From 94463ca7562819c2edc11a28be6f9648bd6e7775 Mon Sep 17 00:00:00 2001 From: Nicolas Pope <nwpope@utu.fi> Date: Sun, 6 Oct 2019 13:09:03 +0300 Subject: [PATCH] Inline cost functions --- applications/reconstruct/src/ilw/ilw.cpp | 2 +- applications/reconstruct/src/ilw/ilw.cu | 65 +++++++++--------------- 2 files changed, 26 insertions(+), 41 deletions(-) diff --git a/applications/reconstruct/src/ilw/ilw.cpp b/applications/reconstruct/src/ilw/ilw.cpp index a4a31e27d..445db06b9 100644 --- a/applications/reconstruct/src/ilw/ilw.cpp +++ b/applications/reconstruct/src/ilw/ilw.cpp @@ -109,7 +109,7 @@ bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { params_.range = 0.1f; for (int i=0; i<iterations_; ++i) { - _phase1(fs, value("cost_function",0), stream); + _phase1(fs, value("cost_function",3), stream); //for (int j=0; j<3; ++j) { _phase2(fs, motion_rate_, stream); //} diff --git a/applications/reconstruct/src/ilw/ilw.cu b/applications/reconstruct/src/ilw/ilw.cu index 8e6bd9009..b011a1bd8 100644 --- a/applications/reconstruct/src/ilw/ilw.cu +++ b/applications/reconstruct/src/ilw/ilw.cu @@ -89,20 +89,25 @@ template<int FUNCTION> __device__ float costFunction(const ftl::cuda::ILWParams ¶ms, float dweight, float cweight); template <> -__device__ float costFunction<0>(const ftl::cuda::ILWParams ¶ms, float dweight, float cweight) { +__device__ inline float costFunction<0>(const ftl::cuda::ILWParams ¶ms, float dweight, float cweight) { return 1.0f - (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight); } template <> -__device__ float costFunction<1>(const ftl::cuda::ILWParams ¶m, float dweight, float cweight) { +__device__ inline float costFunction<1>(const ftl::cuda::ILWParams ¶m, float dweight, float cweight) { return 1.0f - (cweight * cweight * dweight); } template <> -__device__ float costFunction<2>(const ftl::cuda::ILWParams ¶m, float dweight, float cweight) { +__device__ inline float costFunction<2>(const ftl::cuda::ILWParams ¶m, float dweight, float cweight) { return 1.0f - (dweight * dweight * cweight); } +template <> +__device__ inline float costFunction<3>(const ftl::cuda::ILWParams ¶ms, float dweight, float cweight) { + return (dweight == 0.0f) ? 1.0f : 1.0f - (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight); +} + template<int COR_STEPS, int FUNCTION> __global__ void correspondence_energy_vector_kernel( TextureObject<float> d1, @@ -159,46 +164,25 @@ __global__ void correspondence_energy_vector_kernel( if (screen.x >= cam2.width || screen.y >= cam2.height) continue; - // Small window around suggested point - //for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) { - //for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) { - //const int u = 0; - //const int v = 0; - - // Now do correspondence evaluation at "screen" location in camera 2 - //const float3 world2 = make_float3(p2.tex2D((int)screen.x+u, (int)screen.y+v)); - //if ((params.flags & ftl::cuda::kILWFlag_IgnoreBad) && world2.x == MINF) continue; - - - // Generate a depth correspondence value - const float depth2 = d2.tex2D((int)screen.x, (int)screen.y); - const float dweight = ftl::cuda::weighting(fabs(depth2 - camPos.z), params.spatial_smooth); - - // Generate a colour correspondence value - const uchar4 colour2 = c2.tex2D((int)screen.x, (int)screen.y); - const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth); - - - // Cost eq 1: summed contributions - //cost = 1.0f - (params.cost_ratio * (ccost) + (1.0f - params.cost_ratio) * cost); - - // Cost eq 2: Multiplied - //cost = 1.0f - (ccost * ccost * cost); - - const float cost = costFunction<FUNCTION>(params, dweight, cweight); + // Generate a depth correspondence value + const float depth2 = d2.tex2D((int)screen.x, (int)screen.y); + const float dweight = ftl::cuda::weighting(fabs(depth2 - camPos.z), params.spatial_smooth); + + // Generate a colour correspondence value + const uchar4 colour2 = c2.tex2D((int)screen.x, (int)screen.y); + const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth); - // Cost is so bad, don't even consider this a valid option - if (cost >= params.cost_threshold) continue; + const float cost = costFunction<FUNCTION>(params, dweight, cweight); - ++count; - avgcost += cost; - if (cost < bestcost) { - bestdepth = depth_adjust; - bestcost = cost; - } + // Cost is so bad, don't even consider this a valid option + if (cost >= params.cost_threshold) continue; - //} - //} + ++count; + avgcost += cost; + if (cost < bestcost) { + bestdepth = depth_adjust; + bestcost = cost; + } } //count = warpSum(count); @@ -253,6 +237,7 @@ void ftl::cuda::correspondence( case 0: correspondence_energy_vector_kernel<16,0><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params); case 1: correspondence_energy_vector_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params); case 2: correspondence_energy_vector_kernel<16,2><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params); + case 3: correspondence_energy_vector_kernel<16,3><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, pose1, pose1_inv, pose2, cam1, cam2, params); } cudaSafeCall( cudaGetLastError() ); -- GitLab