From aedd78bd564c57768283df785b8654ed20e13fa1 Mon Sep 17 00:00:00 2001 From: Nicolas Pope <nwpope@utu.fi> Date: Mon, 9 Dec 2019 15:20:58 +0200 Subject: [PATCH] Reduce pose multiplications --- components/operators/src/correspondence.cu | 24 +++++++++------------- components/operators/src/mvmls.cpp | 9 ++------ components/operators/src/mvmls_cuda.hpp | 4 +--- 3 files changed, 13 insertions(+), 24 deletions(-) diff --git a/components/operators/src/correspondence.cu b/components/operators/src/correspondence.cu index 2ffb5a042..35f414f28 100644 --- a/components/operators/src/correspondence.cu +++ b/components/operators/src/correspondence.cu @@ -93,9 +93,7 @@ __global__ void corresponding_point_kernel( TextureObject<short2> screenOut, TextureObject<float> conf, TextureObject<int> mask, - float4x4 pose1, - float4x4 pose1_inv, - float4x4 pose2, // Inverse + float4x4 pose, Camera cam1, Camera cam2, ftl::cuda::MvMLSParams params) { @@ -115,7 +113,7 @@ __global__ void corresponding_point_kernel( //const float4 temp = vout.tex2D(x,y); //vout(x,y) = make_float4(depth1, 0.0f, temp.z, temp.w); - const float3 world1 = pose1 * cam1.screenToCam(x,y,depth1); + //const float3 world1 = pose1 * cam1.screenToCam(x,y,depth1); const auto colour1 = c1.tex2D((float)x+0.5f, (float)y+0.5f); @@ -130,9 +128,9 @@ __global__ void corresponding_point_kernel( int count = 0; //float contrib = 0.0f; - const float3 camPosOrigin = pose2 * world1; + const float3 camPosOrigin = pose * cam1.screenToCam(x,y,depth1); const float2 lineOrigin = cam2.camToScreen<float2>(camPosOrigin); - const float3 camPosDistant = pose2 * (pose1 * cam1.screenToCam(x,y,depth1 + 10.0f)); + const float3 camPosDistant = pose * cam1.screenToCam(x,y,depth1 + 10.0f); const float2 lineDistant = cam2.camToScreen<float2>(camPosDistant); const float lineM = (lineDistant.y - lineOrigin.y) / (lineDistant.x - lineOrigin.x); const float depthM = 10.0f / (lineDistant.x - lineOrigin.x); @@ -216,8 +214,6 @@ void ftl::cuda::correspondence( TextureObject<short2> &screen, TextureObject<float> &conf, TextureObject<int> &mask, - float4x4 &pose1, - float4x4 &pose1_inv, float4x4 &pose2, const Camera &cam1, const Camera &cam2, const MvMLSParams ¶ms, int func, @@ -232,12 +228,12 @@ void ftl::cuda::correspondence( //printf("COR SIZE %d,%d\n", p1.width(), p1.height()); switch (func) { - case 0: corresponding_point_kernel<16,0><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; - case 1: corresponding_point_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; - case 2: corresponding_point_kernel<16,2><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; - case 3: corresponding_point_kernel<16,3><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; - case 4: corresponding_point_kernel<16,4><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; - case 5: corresponding_point_kernel<16,5><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; + case 0: corresponding_point_kernel<16,0><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; + case 1: corresponding_point_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; + case 2: corresponding_point_kernel<16,2><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; + case 3: corresponding_point_kernel<16,3><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; + case 4: corresponding_point_kernel<16,4><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; + case 5: corresponding_point_kernel<16,5><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break; } cudaSafeCall( cudaGetLastError() ); diff --git a/components/operators/src/mvmls.cpp b/components/operators/src/mvmls.cpp index e85f82711..87a23090c 100644 --- a/components/operators/src/mvmls.cpp +++ b/components/operators/src/mvmls.cpp @@ -103,12 +103,9 @@ bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda // No, so skip this combination if (d1.dot(d2) <= 0.0) continue; - auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>()); - auto pose1_inv = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse()); - auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse()); - auto pose2_inv = MatrixConversion::toCUDA(s2->getPose().cast<float>()); + auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse() * s1->getPose().cast<float>()); - auto transform = pose2 * pose1; + //auto transform = pose2 * pose1; //Calculate screen positions of estimated corresponding points ftl::cuda::correspondence( @@ -120,8 +117,6 @@ bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda f1.getTexture<short2>(Channel::Screen), f1.getTexture<float>(Channel::Confidence), f1.getTexture<int>(Channel::Mask), - pose1, - pose1_inv, pose2, s1->parameters(), s2->parameters(), diff --git a/components/operators/src/mvmls_cuda.hpp b/components/operators/src/mvmls_cuda.hpp index 93b1e8d88..5faeb4753 100644 --- a/components/operators/src/mvmls_cuda.hpp +++ b/components/operators/src/mvmls_cuda.hpp @@ -28,9 +28,7 @@ void correspondence( ftl::cuda::TextureObject<short2> &screen, ftl::cuda::TextureObject<float> &conf, ftl::cuda::TextureObject<int> &mask, - float4x4 &pose1, - float4x4 &pose1_inv, - float4x4 &pose2, + float4x4 &pose, const ftl::rgbd::Camera &cam1, const ftl::rgbd::Camera &cam2, const ftl::cuda::MvMLSParams ¶ms, int func, cudaStream_t stream); -- GitLab