From 94463ca7562819c2edc11a28be6f9648bd6e7775 Mon Sep 17 00:00:00 2001
From: Nicolas Pope <nwpope@utu.fi>
Date: Sun, 6 Oct 2019 13:09:03 +0300
Subject: [PATCH] Inline cost functions

---
 applications/reconstruct/src/ilw/ilw.cpp |  2 +-
 applications/reconstruct/src/ilw/ilw.cu  | 65 +++++++++---------------
 2 files changed, 26 insertions(+), 41 deletions(-)

diff --git a/applications/reconstruct/src/ilw/ilw.cpp b/applications/reconstruct/src/ilw/ilw.cpp
index a4a31e27d..445db06b9 100644
--- a/applications/reconstruct/src/ilw/ilw.cpp
+++ b/applications/reconstruct/src/ilw/ilw.cpp
@@ -109,7 +109,7 @@ bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
 	params_.range = 0.1f;
 
     for (int i=0; i<iterations_; ++i) {
-        _phase1(fs, value("cost_function",0), stream);
+        _phase1(fs, value("cost_function",3), stream);
         //for (int j=0; j<3; ++j) {
             _phase2(fs, motion_rate_, stream);
         //}
diff --git a/applications/reconstruct/src/ilw/ilw.cu b/applications/reconstruct/src/ilw/ilw.cu
index 8e6bd9009..b011a1bd8 100644
--- a/applications/reconstruct/src/ilw/ilw.cu
+++ b/applications/reconstruct/src/ilw/ilw.cu
@@ -89,20 +89,25 @@ template<int FUNCTION>
 __device__ float costFunction(const ftl::cuda::ILWParams &params, float dweight, float cweight);
 
 template <>
-__device__ float costFunction<0>(const ftl::cuda::ILWParams &params, float dweight, float cweight) {
+__device__ inline float costFunction<0>(const ftl::cuda::ILWParams &params, float dweight, float cweight) {
 	return 1.0f - (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight);
 }
 
 template <>
-__device__ float costFunction<1>(const ftl::cuda::ILWParams &param, float dweight, float cweight) {
+__device__ inline float costFunction<1>(const ftl::cuda::ILWParams &param, float dweight, float cweight) {
 	return 1.0f - (cweight * cweight * dweight);
 }
 
 template <>
-__device__ float costFunction<2>(const ftl::cuda::ILWParams &param, float dweight, float cweight) {
+__device__ inline float costFunction<2>(const ftl::cuda::ILWParams &param, float dweight, float cweight) {
 	return 1.0f - (dweight * dweight * cweight);
 }
 
+template <>
+__device__ inline float costFunction<3>(const ftl::cuda::ILWParams &params, float dweight, float cweight) {
+	return (dweight == 0.0f) ? 1.0f : 1.0f - (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight);
+}
+
 template<int COR_STEPS, int FUNCTION> 
 __global__ void correspondence_energy_vector_kernel(
         TextureObject<float> d1,
@@ -159,46 +164,25 @@ __global__ void correspondence_energy_vector_kernel(
 
         if (screen.x >= cam2.width || screen.y >= cam2.height) continue;
 
-        // Small window around suggested point
-        //for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) {
-        //for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) {
-        //const int u = 0;
-        //const int v = 0;
-
-            // Now do correspondence evaluation at "screen" location in camera 2
-            //const float3 world2 = make_float3(p2.tex2D((int)screen.x+u, (int)screen.y+v));
-			//if ((params.flags & ftl::cuda::kILWFlag_IgnoreBad) && world2.x == MINF) continue;
-			
-
-			// Generate a depth correspondence value
-			const float depth2 = d2.tex2D((int)screen.x, (int)screen.y);
-            const float dweight = ftl::cuda::weighting(fabs(depth2 - camPos.z), params.spatial_smooth);
-			
-			// Generate a colour correspondence value
-			const uchar4 colour2 = c2.tex2D((int)screen.x, (int)screen.y);
-			const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth);
-
-
-			// Cost eq 1: summed contributions
-			//cost = 1.0f - (params.cost_ratio * (ccost) + (1.0f - params.cost_ratio) * cost);
-			
-			// Cost eq 2: Multiplied
-			//cost = 1.0f - (ccost * ccost * cost);
-
-			const float cost = costFunction<FUNCTION>(params, dweight, cweight);
+		// Generate a depth correspondence value
+		const float depth2 = d2.tex2D((int)screen.x, (int)screen.y);
+		const float dweight = ftl::cuda::weighting(fabs(depth2 - camPos.z), params.spatial_smooth);
+		
+		// Generate a colour correspondence value
+		const uchar4 colour2 = c2.tex2D((int)screen.x, (int)screen.y);
+		const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth);
 
-			// Cost is so bad, don't even consider this a valid option
-			if (cost >= params.cost_threshold) continue;
+		const float cost = costFunction<FUNCTION>(params, dweight, cweight);
 
-            ++count;
-			avgcost += cost;
-            if (cost < bestcost) {
-                bestdepth = depth_adjust;
-                bestcost = cost;
-            }
+		// Cost is so bad, don't even consider this a valid option
+		if (cost >= params.cost_threshold) continue;
 
-        //}
-        //}
+		++count;
+		avgcost += cost;
+		if (cost < bestcost) {
+			bestdepth = depth_adjust;
+			bestcost = cost;
+		}
     }
 
 	//count = warpSum(count);
@@ -253,6 +237,7 @@ void ftl::cuda::correspondence(
     case 0: correspondence_energy_vector_kernel<16,0><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params);
 	case 1: correspondence_energy_vector_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params);
 	case 2: correspondence_energy_vector_kernel<16,2><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params);
+	case 3: correspondence_energy_vector_kernel<16,3><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params);
 	}
 
     cudaSafeCall( cudaGetLastError() );
-- 
GitLab