Inline cost functions

94463ca7 · Nicolas Pope · a5838f17 · 94463ca7 · 94463ca7
Commit 94463ca7 authored Oct 6, 2019 by Nicolas Pope
--- a/applications/reconstruct/src/ilw/ilw.cpp
+++ b/applications/reconstruct/src/ilw/ilw.cpp
@@ -109,7 +109,7 @@ bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
 	params_.range = 0.1f;

    for (int i=0; i<iterations_; ++i) {
-        _phase1(fs, value("cost_function",0), stream);
+        _phase1(fs, value("cost_function",3), stream);
        //for (int j=0; j<3; ++j) {
            _phase2(fs, motion_rate_, stream);
        //}


--- a/applications/reconstruct/src/ilw/ilw.cu
+++ b/applications/reconstruct/src/ilw/ilw.cu
@@ -89,20 +89,25 @@ template<int FUNCTION>
 __device__ float costFunction(const ftl::cuda::ILWParams &params, float dweight, float cweight);

 template <>
-__device__ float costFunction<0>(const ftl::cuda::ILWParams &params, float dweight, float cweight) {
+__device__ inline float costFunction<0>(const ftl::cuda::ILWParams &params, float dweight, float cweight) {
 	return 1.0f - (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight);
 }

 template <>
-__device__ float costFunction<1>(const ftl::cuda::ILWParams &param, float dweight, float cweight) {
+__device__ inline float costFunction<1>(const ftl::cuda::ILWParams &param, float dweight, float cweight) {
 	return 1.0f - (cweight * cweight * dweight);
 }

 template <>
-__device__ float costFunction<2>(const ftl::cuda::ILWParams &param, float dweight, float cweight) {
+__device__ inline float costFunction<2>(const ftl::cuda::ILWParams &param, float dweight, float cweight) {
 	return 1.0f - (dweight * dweight * cweight);
 }

+template <>
+__device__ inline float costFunction<3>(const ftl::cuda::ILWParams &params, float dweight, float cweight) {
+	return (dweight == 0.0f) ? 1.0f : 1.0f - (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight);
+}
+
 template<int COR_STEPS, int FUNCTION> 
 __global__ void correspondence_energy_vector_kernel(
        TextureObject<float> d1,
@@ -159,17 +164,6 @@ __global__ void correspondence_energy_vector_kernel(

        if (screen.x >= cam2.width || screen.y >= cam2.height) continue;

-        // Small window around suggested point
-        //for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) {
-        //for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) {
-        //const int u = 0;
-        //const int v = 0;
-
-            // Now do correspondence evaluation at "screen" location in camera 2
-            //const float3 world2 = make_float3(p2.tex2D((int)screen.x+u, (int)screen.y+v));
-			//if ((params.flags & ftl::cuda::kILWFlag_IgnoreBad) && world2.x == MINF) continue;
-			
-
 		// Generate a depth correspondence value
 		const float depth2 = d2.tex2D((int)screen.x, (int)screen.y);
 		const float dweight = ftl::cuda::weighting(fabs(depth2 - camPos.z), params.spatial_smooth);
@@ -178,13 +172,6 @@ __global__ void correspondence_energy_vector_kernel(
 		const uchar4 colour2 = c2.tex2D((int)screen.x, (int)screen.y);
 		const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth);

-
-			// Cost eq 1: summed contributions
-			//cost = 1.0f - (params.cost_ratio * (ccost) + (1.0f - params.cost_ratio) * cost);
-			
-			// Cost eq 2: Multiplied
-			//cost = 1.0f - (ccost * ccost * cost);
-
 		const float cost = costFunction<FUNCTION>(params, dweight, cweight);

 		// Cost is so bad, don't even consider this a valid option
@@ -196,9 +183,6 @@ __global__ void correspondence_energy_vector_kernel(
 			bestdepth = depth_adjust;
 			bestcost = cost;
 		}
-
-        //}
-        //}
    }

 	//count = warpSum(count);
@@ -253,6 +237,7 @@ void ftl::cuda::correspondence(
    case 0: correspondence_energy_vector_kernel<16,0><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params);
 	case 1: correspondence_energy_vector_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params);
 	case 2: correspondence_energy_vector_kernel<16,2><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params);
+	case 3: correspondence_energy_vector_kernel<16,3><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params);
 	}

    cudaSafeCall( cudaGetLastError() );