#include <ftl/cuda/normals.hpp> #include <ftl/cuda/weighting.hpp> #define T_PER_BLOCK 16 #define MINF __int_as_float(0xff800000) __global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output, ftl::cuda::TextureObject<float4> input) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; if(x >= input.width() || y >= input.height()) return; output(x,y) = make_float4(0, 0, 0, 0); if(x > 0 && x < input.width()-1 && y > 0 && y < input.height()-1) { const float3 CC = make_float3(input.tex2D((int)x+0, (int)y+0)); //[(y+0)*width+(x+0)]; const float3 PC = make_float3(input.tex2D((int)x+0, (int)y+1)); //[(y+1)*width+(x+0)]; const float3 CP = make_float3(input.tex2D((int)x+1, (int)y+0)); //[(y+0)*width+(x+1)]; const float3 MC = make_float3(input.tex2D((int)x+0, (int)y-1)); //[(y-1)*width+(x+0)]; const float3 CM = make_float3(input.tex2D((int)x-1, (int)y+0)); //[(y+0)*width+(x-1)]; if(CC.x != MINF && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) { const float3 n = cross(PC-MC, CP-CM); const float l = length(n); if(l > 0.0f) { output(x,y) = make_float4(n/-l, 1.0f); } } } } __device__ inline bool isValid(const ftl::rgbd::Camera &camera, const float3 &d) { return d.z >= camera.minDepth && d.z <= camera.maxDepth; } __global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output, ftl::cuda::TextureObject<int> input, ftl::rgbd::Camera camera, float3x3 pose) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; if(x >= input.width() || y >= input.height()) return; output(x,y) = make_float4(0, 0, 0, 0); if(x > 0 && x < input.width()-1 && y > 0 && y < input.height()-1) { const float3 CC = camera.screenToCam(x+0, y+0, (float)input.tex2D((int)x+0, (int)y+0) / 1000.0f); const float3 PC = camera.screenToCam(x+0, y+1, (float)input.tex2D((int)x+0, (int)y+1) / 1000.0f); const float3 CP = camera.screenToCam(x+1, y+0, (float)input.tex2D((int)x+1, (int)y+0) / 1000.0f); const float3 MC = camera.screenToCam(x+0, y-1, (float)input.tex2D((int)x+0, (int)y-1) / 1000.0f); const float3 CM = camera.screenToCam(x-1, y+0, (float)input.tex2D((int)x-1, (int)y+0) / 1000.0f); //if(CC.z < && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) { if (isValid(camera,CC) && isValid(camera,PC) && isValid(camera,CP) && isValid(camera,MC) && isValid(camera,CM)) { const float3 n = cross(PC-MC, CP-CM); const float l = length(n); if(l > 0.0f) { output(x,y) = make_float4((n/-l), 1.0f); } } } } template <int RADIUS> __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms, ftl::cuda::TextureObject<float4> output, ftl::cuda::TextureObject<float4> points, ftl::rgbd::Camera camera, float3x3 pose, float smoothing) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; if(x >= points.width() || y >= points.height()) return; const float3 p0 = make_float3(points.tex2D((int)x,(int)y)); float3 nsum = make_float3(0.0f); float contrib = 0.0f; if (p0.x == MINF) return; for (int v=-RADIUS; v<=RADIUS; ++v) { for (int u=-RADIUS; u<=RADIUS; ++u) { const float3 p = make_float3(points.tex2D((int)x+u,(int)y+v)); if (p.x == MINF) continue; const float s = ftl::cuda::spatialWeighting(p0, p, smoothing); //const float s = 1.0f; if (s > 0.0f) { const float4 n = norms.tex2D((int)x+u,(int)y+v); if (n.w > 0.0f) { nsum += make_float3(n) * s; contrib += s; } } } } // Compute dot product of normal with camera to obtain measure of how // well this point faces the source camera, a measure of confidence float3 ray = pose * camera.screenToCam(x, y, 1.0f); ray = ray / length(ray); nsum /= contrib; nsum /= length(nsum); output(x,y) = (contrib > 0.0f) ? make_float4(nsum, dot(nsum, ray)) : make_float4(0.0f); } template <int RADIUS> __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms, ftl::cuda::TextureObject<float4> output, ftl::cuda::TextureObject<int> depth, ftl::rgbd::Camera camera, float3x3 pose, float smoothing) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; if(x >= depth.width() || y >= depth.height()) return; const float3 p0 = camera.screenToCam(x,y, (float)depth.tex2D((int)x,(int)y) / 1000.0f); float3 nsum = make_float3(0.0f); float contrib = 0.0f; if (p0.z < camera.minDepth || p0.z > camera.maxDepth) return; for (int v=-RADIUS; v<=RADIUS; ++v) { for (int u=-RADIUS; u<=RADIUS; ++u) { const float3 p = camera.screenToCam(x+u,y+v, (float)depth.tex2D((int)x+u,(int)y+v) / 1000.0f); if (p.z < camera.minDepth || p.z > camera.maxDepth) continue; const float s = ftl::cuda::spatialWeighting(p0, p, smoothing); //const float s = 1.0f; //if (s > 0.0f) { const float4 n = norms.tex2D((int)x+u,(int)y+v); if (n.w > 0.0f) { nsum += make_float3(n) * s; contrib += s; } //} } } // Compute dot product of normal with camera to obtain measure of how // well this point faces the source camera, a measure of confidence float3 ray = camera.screenToCam(x, y, 1.0f); ray = ray / length(ray); nsum /= contrib; nsum /= length(nsum); output(x,y) = (contrib > 0.0f) ? make_float4(pose*nsum, 1.0f) : make_float4(0.0f); } void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output, ftl::cuda::TextureObject<float4> &temp, ftl::cuda::TextureObject<float4> &input, int radius, float smoothing, const ftl::rgbd::Camera &camera, const float3x3 &pose,cudaStream_t stream) { const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(temp, input); cudaSafeCall( cudaGetLastError() ); switch (radius) { case 9: smooth_normals_kernel<9><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break; case 7: smooth_normals_kernel<7><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break; case 5: smooth_normals_kernel<5><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break; case 3: smooth_normals_kernel<3><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break; } cudaSafeCall( cudaGetLastError() ); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); //cutilCheckMsg(__FUNCTION__); #endif } void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output, ftl::cuda::TextureObject<float4> &temp, ftl::cuda::TextureObject<int> &input, int radius, float smoothing, const ftl::rgbd::Camera &camera, const float3x3 &pose_inv, const float3x3 &pose,cudaStream_t stream) { const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(temp, input, camera, pose); cudaSafeCall( cudaGetLastError() ); switch (radius) { case 7: smooth_normals_kernel<7><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); case 5: smooth_normals_kernel<5><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); case 3: smooth_normals_kernel<3><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); } cudaSafeCall( cudaGetLastError() ); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); //cutilCheckMsg(__FUNCTION__); #endif } //============================================================================== __global__ void vis_normals_kernel(ftl::cuda::TextureObject<float4> norm, ftl::cuda::TextureObject<uchar4> output, float3 direction, uchar4 diffuse, uchar4 ambient) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; if(x >= norm.width() || y >= norm.height()) return; output(x,y) = make_uchar4(0,0,0,0); float3 ray = direction; ray = ray / length(ray); float3 n = make_float3(norm.tex2D((int)x,(int)y)); float l = length(n); if (l == 0) return; n /= l; const float d = max(dot(ray, n), 0.0f); output(x,y) = make_uchar4( min(255.0f, diffuse.x*d + ambient.x), min(255.0f, diffuse.y*d + ambient.y), min(255.0f, diffuse.z*d + ambient.z), 255); } void ftl::cuda::normal_visualise(ftl::cuda::TextureObject<float4> &norm, ftl::cuda::TextureObject<uchar4> &output, const float3 &light, const uchar4 &diffuse, const uchar4 &ambient, cudaStream_t stream) { const dim3 gridSize((norm.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (norm.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); vis_normals_kernel<<<gridSize, blockSize, 0, stream>>>(norm, output, light, diffuse, ambient); cudaSafeCall( cudaGetLastError() ); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); //cutilCheckMsg(__FUNCTION__); #endif } //============================================================================== __global__ void filter_normals_kernel(ftl::cuda::TextureObject<float4> norm, ftl::cuda::TextureObject<float4> output, ftl::rgbd::Camera camera, float4x4 pose, float thresh) { const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; if(x >= norm.width() || y >= norm.height()) return; float3 ray = pose.getFloat3x3() * camera.screenToCam(x,y,1.0f); ray = ray / length(ray); float3 n = make_float3(norm.tex2D((int)x,(int)y)); float l = length(n); if (l == 0) { output(x,y) = make_float4(MINF); return; } n /= l; const float d = dot(ray, n); if (d <= thresh) { output(x,y) = make_float4(MINF); } } void ftl::cuda::normal_filter(ftl::cuda::TextureObject<float4> &norm, ftl::cuda::TextureObject<float4> &output, const ftl::rgbd::Camera &camera, const float4x4 &pose, float thresh, cudaStream_t stream) { const dim3 gridSize((norm.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (norm.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); filter_normals_kernel<<<gridSize, blockSize, 0, stream>>>(norm, output, camera, pose, thresh); cudaSafeCall( cudaGetLastError() ); #ifdef _DEBUG cudaSafeCall(cudaDeviceSynchronize()); //cutilCheckMsg(__FUNCTION__); #endif }