From cd9eb9b512d8872f22511ef4809aa35541762cf1 Mon Sep 17 00:00:00 2001
From: Nicolas Pope <nwpope@utu.fi>
Date: Mon, 22 Jul 2019 14:52:35 +0300
Subject: [PATCH] Add colour smoothing

---
 .../include/ftl/voxel_hash_params.hpp         |  1 +
 applications/reconstruct/src/depth_camera.cu  | 67 ++++++++++++++++---
 .../reconstruct/src/depth_camera_cuda.hpp     |  2 +
 applications/reconstruct/src/integrators.cu   | 13 ++--
 applications/reconstruct/src/voxel_scene.cpp  |  2 +
 5 files changed, 70 insertions(+), 15 deletions(-)

diff --git a/applications/reconstruct/include/ftl/voxel_hash_params.hpp b/applications/reconstruct/include/ftl/voxel_hash_params.hpp
index a08bc7a61..91d19f8fb 100644
--- a/applications/reconstruct/include/ftl/voxel_hash_params.hpp
+++ b/applications/reconstruct/include/ftl/voxel_hash_params.hpp
@@ -28,6 +28,7 @@ struct __align__(16) HashParams {
 	float3 m_minBounds;
 	float3 m_maxBounds;
 	float m_spatialSmoothing;
+	float m_colourSmoothing;
 	float m_confidenceThresh;
 };
 
diff --git a/applications/reconstruct/src/depth_camera.cu b/applications/reconstruct/src/depth_camera.cu
index 4ba171c8e..39a6dcf3b 100644
--- a/applications/reconstruct/src/depth_camera.cu
+++ b/applications/reconstruct/src/depth_camera.cu
@@ -12,11 +12,53 @@ using ftl::voxhash::HashParams;
 
 extern __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS];
 
+__global__ void clear_depth_kernel(ftl::cuda::TextureObject<float> depth) {
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < depth.width() && y < depth.height()) {
+		depth(x,y) = 1000.0f; //PINF;
+		//colour(x,y) = make_uchar4(76,76,82,0);
+	}
+}
+
+void ftl::cuda::clear_depth(const ftl::cuda::TextureObject<float> &depth, cudaStream_t stream) {
+	const dim3 clear_gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 clear_blockSize(T_PER_BLOCK, T_PER_BLOCK);
+	clear_depth_kernel<<<clear_gridSize, clear_blockSize, 0, stream>>>(depth);
+}
+
 /// ===== MLS Smooth
 
 // TODO:(Nick) Put this in a common location (used in integrators.cu)
 extern __device__ float spatialWeighting(float r);
 
+/*
+ * Kim, K., Chalidabhongse, T. H., Harwood, D., & Davis, L. (2005).
+ * Real-time foreground-background segmentation using codebook model.
+ * Real-Time Imaging. https://doi.org/10.1016/j.rti.2004.12.004
+ */
+ __device__ float colordiffFloat(const uchar4 &pa, const uchar4 &pb) {
+	float x_2 = pb.x * pb.x + pb.y * pb.y + pb.z * pb.z;
+	float v_2 = pa.x * pa.x + pa.y * pa.y + pa.z * pa.z;
+	float xv_2 = pow(pb.x * pa.x + pb.y * pa.y + pb.z * pa.z, 2);
+	float p_2 = xv_2 / v_2;
+	return sqrt(x_2 - p_2);
+}
+
+/*
+ * Colour weighting as suggested in:
+ * C. Kuster et al. Spatio-Temporal Geometry Fusion for Multiple Hybrid Cameras using Moving Least Squares Surfaces. 2014.
+ * c = colour distance
+ */
+ __device__ float colourWeighting(float c) {
+	const float h = c_hashParams.m_colourSmoothing;
+	if (c >= h) return 0.0f;
+	float ch = c / h;
+	ch = 1.0f - ch*ch;
+	return ch*ch*ch*ch;
+}
+
 #define WINDOW_RADIUS 5
 
 __global__ void mls_smooth_kernel(ftl::cuda::TextureObject<float> output, HashData hashData, HashParams hashParams, int numcams, int cam) {
@@ -29,10 +71,9 @@ __global__ void mls_smooth_kernel(ftl::cuda::TextureObject<float> output, HashDa
 	const DepthCameraCUDA &mainCamera = c_cameras[cam];
 
 	if (x < width && y < height) {
-		output(x,y) = 1000.0f;
-		__syncthreads();
 
 		float depth = tex2D<float>(mainCamera.depth2, x, y);
+		uchar4 c1 = tex2D<uchar4>(mainCamera.colour, x, y);
 
 		float3 wpos = make_float3(0.0f);
 		float3 wnorm = make_float3(0.0f);
@@ -54,10 +95,10 @@ __global__ void mls_smooth_kernel(ftl::cuda::TextureObject<float> output, HashDa
 					for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) {
 						if (screenPos.x+u < width && screenPos.y+v < height) {	//on screen
 							float depth = tex2D<float>(camera.depth2, screenPos.x+u, screenPos.y+v);
-
+							uchar4 c2 = tex2D<uchar4>(camera.colour, screenPos.x+u, screenPos.y+v);
 							//float4 normal = tex2D<float4>(camera.normal, screenPos.x+u, screenPos.y+v);
 							const float3 worldPos = camera.pose * camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth);
-							const float weight = spatialWeighting(length(mPos - worldPos));
+							const float weight = spatialWeighting(length(mPos - worldPos))*colourWeighting(colordiffFloat(c1,c2));
 
 							wpos += weight*worldPos;
 							//wnorm += weight*make_float3(normal);
@@ -68,13 +109,19 @@ __global__ void mls_smooth_kernel(ftl::cuda::TextureObject<float> output, HashDa
 			}
 
 			wpos /= weights;
-		}
+			//wnorm /= weights;
+		
+
+			//float sdf = wnorm.x * (mPos.x - wpos.x) + wnorm.y * (mPos.y - wpos.y) + wnorm.z * (mPos.z - wpos.z);
+
+			//wpos = wpos - (wnorm * (mPos - wpos));
+			wpos = mainCamera.poseInverse * wpos;
+			const uint2 screenPos = make_uint2(mainCamera.params.cameraToKinectScreenInt(wpos));
+			if (screenPos.x < output.width() && screenPos.y < output.height()) {
+				output(screenPos.x,screenPos.y) = (weights >= hashParams.m_confidenceThresh) ? wpos.z : 1000.0f;
+				//output(x,y) = (weights >= hashParams.m_confidenceThresh) ? wpos.z : 1000.0f;
+			}
 
-		wpos = mainCamera.poseInverse * wpos;
-		const uint2 screenPos = make_uint2(mainCamera.params.cameraToKinectScreenInt(wpos));
-		if (screenPos.x < output.width() && screenPos.y < output.height()) {
-			output(screenPos.x,screenPos.y) = (weights >= hashParams.m_confidenceThresh) ? wpos.z : 1000.0f;
-			//output(x,y) = (weights >= hashParams.m_confidenceThresh) ? wpos.z : 1000.0f;
 		}
 	}
 }
diff --git a/applications/reconstruct/src/depth_camera_cuda.hpp b/applications/reconstruct/src/depth_camera_cuda.hpp
index 54bc45374..90090f643 100644
--- a/applications/reconstruct/src/depth_camera_cuda.hpp
+++ b/applications/reconstruct/src/depth_camera_cuda.hpp
@@ -7,6 +7,8 @@
 namespace ftl {
 namespace cuda {
 
+void clear_depth(const TextureObject<float> &depth, cudaStream_t stream);
+
 void mls_smooth(TextureObject<float> &output, const ftl::voxhash::HashData &hashData, const ftl::voxhash::HashParams &hashParams, int numcams, int cam, cudaStream_t stream);
 
 void point_cloud(float3* output, const ftl::voxhash::DepthCameraCUDA &depthCameraData, cudaStream_t stream);
diff --git a/applications/reconstruct/src/integrators.cu b/applications/reconstruct/src/integrators.cu
index 0b7f79365..d37591036 100644
--- a/applications/reconstruct/src/integrators.cu
+++ b/applications/reconstruct/src/integrators.cu
@@ -31,6 +31,11 @@ __device__ float colourDistance(const uchar4 &c1, const uchar3 &c2) {
 	return x*x + y*y + z*z;
 }
 
+/*
+ * Kim, K., Chalidabhongse, T. H., Harwood, D., & Davis, L. (2005).
+ * Real-time foreground-background segmentation using codebook model.
+ * Real-Time Imaging. https://doi.org/10.1016/j.rti.2004.12.004
+ */
 __device__ bool colordiff(const uchar4 &pa, const uchar3 &pb, float epsilon) {
 	float x_2 = pb.x * pb.x + pb.y * pb.y + pb.z * pb.z;
 	float v_2 = pa.x * pa.x + pa.y * pa.y + pa.z * pa.z;
@@ -54,8 +59,6 @@ __device__ float spatialWeighting(float r) {
 }
 
 
-
-
 __global__ void integrateDepthMapsKernel(HashData hashData, HashParams hashParams, int numcams) {
 	__shared__ uint all_warp_ballot;
 	__shared__ uint voxels[16];
@@ -186,7 +189,7 @@ __global__ void integrateDepthMapsKernel(HashData hashData, HashParams hashParam
 	}
 }
 
-#define WINDOW_RADIUS 5
+#define WINDOW_RADIUS 0
 #define PATCH_SIZE 32
 
 __global__ void integrateMLSKernel(HashData hashData, HashParams hashParams, int numcams) {
@@ -273,7 +276,7 @@ __global__ void integrateMLSKernel(HashData hashData, HashParams hashParams, int
 	// Calculate voxel sign values across a warp
 	int warpNum = i / WARP_SIZE;
 
-	uint solid_ballot = __ballot_sync(0xFFFFFFFF, (fabs(sdf) < hashParams.m_virtualVoxelSize && aweights > hashParams.m_confidenceThresh) ? 1 : 0);
+	uint solid_ballot = __ballot_sync(0xFFFFFFFF, (fabs(sdf) < hashParams.m_virtualVoxelSize && aweights >= 0.5f) ? 1 : 0);
 	//uint solid_ballot = __ballot_sync(0xFFFFFFFF, (sdf < 0.0f ) ? 1 : 0);
 
 	// Aggregate each warp result into voxel mask
@@ -310,7 +313,7 @@ const dim3 gridSize(NUM_CUDA_BLOCKS, 1);
 const dim3 blockSize(threadsPerBlock, 1);
 
 //if (hashParams.m_numOccupiedBlocks > 0) {	//this guard is important if there is no depth in the current frame (i.e., no blocks were allocated)
-	integrateDepthMapsKernel << <gridSize, blockSize, 0, stream >> >(hashData, hashParams, numcams);
+	integrateMLSKernel << <gridSize, blockSize, 0, stream >> >(hashData, hashParams, numcams);
 //}
 
 //cudaSafeCall( cudaGetLastError() );
diff --git a/applications/reconstruct/src/voxel_scene.cpp b/applications/reconstruct/src/voxel_scene.cpp
index 6e12bd9cb..424783e84 100644
--- a/applications/reconstruct/src/voxel_scene.cpp
+++ b/applications/reconstruct/src/voxel_scene.cpp
@@ -287,6 +287,7 @@ HashParams SceneRep::_parametersFromConfig() {
 	params.m_integrationWeightSample = value("SDFIntegrationWeightSample", 10);
 	params.m_integrationWeightMax = value("SDFIntegrationWeightMax", 255);
 	params.m_spatialSmoothing = value("spatialSmoothing", 0.04f); // 4cm
+	params.m_colourSmoothing = value("colourSmoothing", 20.0f);
 	params.m_confidenceThresh = value("confidenceThreshold", 20.0f);
 	params.m_maxBounds = make_float3(
 		value("bbox_x_max", 2.0f),
@@ -356,6 +357,7 @@ void SceneRep::_compactifyAllocated() {
 
 void SceneRep::_integrateDepthMaps() {
 	for (size_t i=0; i<cameras_.size(); ++i) {
+		ftl::cuda::clear_depth(*(cameras_[i].gpu.depth_tex_), integ_stream_);
 		ftl::cuda::mls_smooth(*(cameras_[i].gpu.depth_tex_), m_hashData, m_hashParams, cameras_.size(), i, integ_stream_);
 	}
 	ftl::cuda::integrateDepthMaps(m_hashData, m_hashParams, cameras_.size(), integ_stream_);
-- 
GitLab