WIP: Initial move points cuda kernel

e1bcc046 · Nicolas Pope · d1e3fc6b · e1bcc046 · e1bcc046 · e1bcc046
Commit e1bcc046 authored 5 years ago by Nicolas Pope
--- a/applications/reconstruct/src/ilw.cu
+++ b/applications/reconstruct/src/ilw.cu
@@ -31,7 +31,11 @@ __global__ void correspondence_energy_vector_kernel(
    const int y = blockIdx.y*blockDim.y + threadIdx.y;
 	const float3 world1 = make_float3(p1.tex2D(x, y));
-	if (world1.x == MINF) return;
+	if (world1.x == MINF) {
+        vout(x,y) = make_float4(0.0f);
+        eout(x,y) = 0.0f;
+        return;
+    }
    const float3 camPos2 = pose2 * world1;
 	const uint2 screen2 = cam2.camToScreen<uint2>(camPos2);

--- a/applications/reconstruct/src/ilw/ilw.cpp
+++ b/applications/reconstruct/src/ilw/ilw.cpp
@@ -27,7 +27,7 @@ bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
    //for (int i=0; i<2; ++i) {
        _phase1(fs, stream);
        //for (int j=0; j<3; ++j) {
-        //    _phase2(fs);
+            _phase2(fs, 0.1f, stream);
        //}
 		// TODO: Break if no time left
@@ -118,12 +118,21 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
    return true;
 }
-bool ILW::_phase2(ftl::rgbd::FrameSet &fs) {
+bool ILW::_phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream) {
    // Run energies and motion kernel
 	// Smooth vectors across a window and iteratively
 	// strongly disagreeing vectors should cancel out
 	// A weak vector is overriden by a stronger one.
+    for (auto &f : fs.frames) {
+        ftl::cuda::move_points(
+             f.getTexture<float4>(Channel::Points),
+             f.getTexture<float4>(Channel::EnergyVector),
+             rate,
+             stream
+        );
+    }
    return true;
 }
--- a/applications/reconstruct/src/ilw/ilw.cu
+++ b/applications/reconstruct/src/ilw/ilw.cu
@@ -15,6 +15,9 @@ __device__ inline float warpMax(float e) {
 	return e;
 }
+#define COR_WIN_RADIUS 16
+#define COR_WIN_SIZE (COR_WIN_RADIUS * COR_WIN_RADIUS)
 __global__ void correspondence_energy_vector_kernel(
        TextureObject<float4> p1,
        TextureObject<float4> p2,
@@ -33,34 +36,39 @@ __global__ void correspondence_energy_vector_kernel(
 	const float3 world1 = make_float3(p1.tex2D(x, y));
 	if (world1.x == MINF) return;
    const float3 camPos2 = pose2 * world1;
-	const uint2 screen2 = cam2.camToScreen<uint2>(camPos2);
+    const uint2 screen2 = cam2.camToScreen<uint2>(camPos2);
-    const int upsample = 8;
+    float bestconf = 0.0f;
+    float3 bestpoint;
    // Project to p2 using cam2
    // Each thread takes a possible correspondence and calculates a weighting
    const int lane = tid % WARP_SIZE;
-	for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
+	for (int i=lane; i<COR_WIN_SIZE; i+=WARP_SIZE) {
-		const float u = (i % upsample) - (upsample / 2);
+		const float u = (i % COR_WIN_RADIUS) - (COR_WIN_RADIUS / 2);
-        const float v = (i / upsample) - (upsample / 2);
+        const float v = (i / COR_WIN_RADIUS) - (COR_WIN_RADIUS / 2);
 		const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v));
 		if (world2.x == MINF) continue;
        // Determine degree of correspondence
        const float confidence = 1.0f / length(world1 - world2);
-        const float maxconf = warpMax(confidence);
+        if (confidence > bestconf) {
-        // This thread has best confidence value
+            bestpoint = world2;
-        if (maxconf == confidence) {
+            bestconf = confidence;
-            vout(x,y) = vout.tex2D(x, y) + make_float4(
-                (world1.x - world2.x) * maxconf,
-                (world1.y - world2.y) * maxconf,
-                (world1.z - world2.z) * maxconf,
-                maxconf);
-            eout(x,y) = eout.tex2D(x,y) + length(world1 - world2)*maxconf;
        }
    }
+    const float maxconf = warpMax(bestconf);
+    if (maxconf == bestconf && maxconf > 0.0f) {
+        vout(x,y) = vout.tex2D(x, y) + make_float4(
+            (world1.x - bestpoint.x) * maxconf,
+            (world1.y - bestpoint.y) * maxconf,
+            (world1.z - bestpoint.z) * maxconf,
+            maxconf);
+        eout(x,y) = eout.tex2D(x,y) + length(world1 - bestpoint)*maxconf;
+    }
 }
 void ftl::cuda::correspondence_energy_vector(
@@ -84,3 +92,34 @@ void ftl::cuda::correspondence_energy_vector(
    );
    cudaSafeCall( cudaGetLastError() );
 }
+//==============================================================================
+__global__ void move_points_kernel(
+    ftl::cuda::TextureObject<float4> p,
+    ftl::cuda::TextureObject<float4> v,
+    float rate) {
+    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+    if (x < p.width() && y < p.height()) {
+        p(x,y) = p(x,y) + rate * v.tex2D((int)x,(int)y);
+    }
+}
+void ftl::cuda::move_points(
+        ftl::cuda::TextureObject<float4> &p,
+        ftl::cuda::TextureObject<float4> &v,
+        float rate,
+        cudaStream_t stream) {
+    const dim3 gridSize((p.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (p.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+    move_points_kernel<<<gridSize, blockSize, 0, stream>>>(p,v,rate);
+    cudaSafeCall( cudaGetLastError() );
+}
--- a/applications/reconstruct/src/ilw/ilw.hpp
+++ b/applications/reconstruct/src/ilw/ilw.hpp
@@ -56,7 +56,7 @@ class ILW : public ftl::Configurable {
    /*
     * Calculate energies and move the points.
     */
-    bool _phase2(ftl::rgbd::FrameSet &fs);
+    bool _phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream);
    std::vector<detail::ILWData> data_;
 };

--- a/applications/reconstruct/src/ilw/ilw_cuda.hpp
+++ b/applications/reconstruct/src/ilw/ilw_cuda.hpp
@@ -20,6 +20,13 @@ void correspondence_energy_vector(
    cudaStream_t stream
 );
+void move_points(
+    ftl::cuda::TextureObject<float4> &p,
+    ftl::cuda::TextureObject<float4> &v,
+    float rate,
+    cudaStream_t stream
+);
 }
 }

--- a/components/renderers/cpp/src/splat_render.cpp
+++ b/components/renderers/cpp/src/splat_render.cpp
@@ -214,8 +214,10 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
 	} else {
 		if (ftl::rgbd::isFloatChannel(chan)) {
 			out.create<GpuMat>(chan, Format<float>(camera.width, camera.height));
+			out.get<GpuMat>(chan).setTo(cv::Scalar(0.0f), cvstream);
 		} else {
 			out.create<GpuMat>(chan, Format<uchar4>(camera.width, camera.height));
+			out.get<GpuMat>(chan).setTo(cv::Scalar(76,76,76,255), cvstream);
 		}
 		renderChannel(params, out, chan, stream);
 	}