From aedd78bd564c57768283df785b8654ed20e13fa1 Mon Sep 17 00:00:00 2001
From: Nicolas Pope <nwpope@utu.fi>
Date: Mon, 9 Dec 2019 15:20:58 +0200
Subject: [PATCH] Reduce pose multiplications

---
 components/operators/src/correspondence.cu | 24 +++++++++-------------
 components/operators/src/mvmls.cpp         |  9 ++------
 components/operators/src/mvmls_cuda.hpp    |  4 +---
 3 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/components/operators/src/correspondence.cu b/components/operators/src/correspondence.cu
index 2ffb5a042..35f414f28 100644
--- a/components/operators/src/correspondence.cu
+++ b/components/operators/src/correspondence.cu
@@ -93,9 +93,7 @@ __global__ void corresponding_point_kernel(
         TextureObject<short2> screenOut,
 		TextureObject<float> conf,
 		TextureObject<int> mask,
-        float4x4 pose1,
-        float4x4 pose1_inv,
-        float4x4 pose2,  // Inverse
+        float4x4 pose,
         Camera cam1,
         Camera cam2, ftl::cuda::MvMLSParams params) {
 	
@@ -115,7 +113,7 @@ __global__ void corresponding_point_kernel(
         //const float4 temp = vout.tex2D(x,y);
         //vout(x,y) =  make_float4(depth1, 0.0f, temp.z, temp.w);
         
-        const float3 world1 = pose1 * cam1.screenToCam(x,y,depth1);
+        //const float3 world1 = pose1 * cam1.screenToCam(x,y,depth1);
 
         const auto colour1 = c1.tex2D((float)x+0.5f, (float)y+0.5f);
 
@@ -130,9 +128,9 @@ __global__ void corresponding_point_kernel(
         int count = 0;
         //float contrib = 0.0f;
 		
-		const float3 camPosOrigin = pose2 * world1;
+		const float3 camPosOrigin = pose * cam1.screenToCam(x,y,depth1);
         const float2 lineOrigin = cam2.camToScreen<float2>(camPosOrigin);
-        const float3 camPosDistant = pose2 * (pose1 * cam1.screenToCam(x,y,depth1 + 10.0f));
+        const float3 camPosDistant = pose * cam1.screenToCam(x,y,depth1 + 10.0f);
         const float2 lineDistant = cam2.camToScreen<float2>(camPosDistant);
         const float lineM = (lineDistant.y - lineOrigin.y) / (lineDistant.x - lineOrigin.x);
 		const float depthM = 10.0f / (lineDistant.x - lineOrigin.x);
@@ -216,8 +214,6 @@ void ftl::cuda::correspondence(
         TextureObject<short2> &screen,
 		TextureObject<float> &conf,
 		TextureObject<int> &mask,
-        float4x4 &pose1,
-        float4x4 &pose1_inv,
         float4x4 &pose2,
         const Camera &cam1,
         const Camera &cam2, const MvMLSParams &params, int func,
@@ -232,12 +228,12 @@ void ftl::cuda::correspondence(
     //printf("COR SIZE %d,%d\n", p1.width(), p1.height());
 
 	switch (func) {
-    case 0: corresponding_point_kernel<16,0><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
-	case 1: corresponding_point_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
-	case 2: corresponding_point_kernel<16,2><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
-	case 3: corresponding_point_kernel<16,3><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
-	case 4: corresponding_point_kernel<16,4><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
-	case 5: corresponding_point_kernel<16,5><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
+    case 0: corresponding_point_kernel<16,0><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break;
+	case 1: corresponding_point_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break;
+	case 2: corresponding_point_kernel<16,2><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break;
+	case 3: corresponding_point_kernel<16,3><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break;
+	case 4: corresponding_point_kernel<16,4><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break;
+	case 5: corresponding_point_kernel<16,5><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose2, cam1, cam2, params); break;
 	}
 
     cudaSafeCall( cudaGetLastError() );
diff --git a/components/operators/src/mvmls.cpp b/components/operators/src/mvmls.cpp
index e85f82711..87a23090c 100644
--- a/components/operators/src/mvmls.cpp
+++ b/components/operators/src/mvmls.cpp
@@ -103,12 +103,9 @@ bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda
                     // No, so skip this combination
                     if (d1.dot(d2) <= 0.0) continue;
 
-                    auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>());
-                    auto pose1_inv = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse());
-                    auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse());
-					auto pose2_inv = MatrixConversion::toCUDA(s2->getPose().cast<float>());
+                    auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse() * s1->getPose().cast<float>());
 
-                    auto transform = pose2 * pose1;
+                    //auto transform = pose2 * pose1;
 
                     //Calculate screen positions of estimated corresponding points
                     ftl::cuda::correspondence(
@@ -120,8 +117,6 @@ bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda
                         f1.getTexture<short2>(Channel::Screen),
                         f1.getTexture<float>(Channel::Confidence),
                         f1.getTexture<int>(Channel::Mask),
-                        pose1,
-                        pose1_inv,
                         pose2,
                         s1->parameters(),
                         s2->parameters(),
diff --git a/components/operators/src/mvmls_cuda.hpp b/components/operators/src/mvmls_cuda.hpp
index 93b1e8d88..5faeb4753 100644
--- a/components/operators/src/mvmls_cuda.hpp
+++ b/components/operators/src/mvmls_cuda.hpp
@@ -28,9 +28,7 @@ void correspondence(
         ftl::cuda::TextureObject<short2> &screen,
 		ftl::cuda::TextureObject<float> &conf,
 		ftl::cuda::TextureObject<int> &mask,
-        float4x4 &pose1,
-        float4x4 &pose1_inv,
-        float4x4 &pose2,
+        float4x4 &pose,
         const ftl::rgbd::Camera &cam1,
         const ftl::rgbd::Camera &cam2, const ftl::cuda::MvMLSParams &params, int func,
         cudaStream_t stream);
-- 
GitLab