diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt index dce81e19c91a41f7e8bba90a2149a6602d73e5aa..931802aedce15c0f94617385d05aeed141d24fdd 100644 --- a/applications/reconstruct/CMakeLists.txt +++ b/applications/reconstruct/CMakeLists.txt @@ -20,6 +20,7 @@ set(REPSRC src/ilw/fill.cu src/ilw/discontinuity.cu src/ilw/correspondence.cu + src/filters/smoothing.cu ) add_executable(ftl-reconstruct ${REPSRC}) diff --git a/applications/reconstruct/src/filters/smoothing.cu b/applications/reconstruct/src/filters/smoothing.cu new file mode 100644 index 0000000000000000000000000000000000000000..21f30cafda69682e15c987b692a8f712e4373d7d --- /dev/null +++ b/applications/reconstruct/src/filters/smoothing.cu @@ -0,0 +1,85 @@ +#include "smoothing.hpp" + +#include <ftl/cuda/weighting.hpp> + +#define T_PER_BLOCK 8 + +template <int RADIUS> +__global__ void depth_smooth_kernel( + ftl::cuda::TextureObject<float> depth_in, + ftl::cuda::TextureObject<uchar4> colour_in, + ftl::cuda::TextureObject<float> depth_out, + ftl::rgbd::Camera camera, + float factor, float thresh) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < depth_in.width() && y < depth_in.height()) { + float d = depth_in.tex2D((int)x,(int)y); + depth_out(x,y) = 0.0f; + + if (d < camera.minDepth || d > camera.maxDepth) return; + + uchar4 c = colour_in.tex2D((int)x, (int)y); + float3 pos = camera.screenToCam(x,y,d); + + float contrib = 0.0f; + float new_depth = 0.0f; + + for (int v=-RADIUS; v<=RADIUS; ++v) { + for (int u=-RADIUS; u<=RADIUS; ++u) { + // Get colour difference to center + const uchar4 cN = colour_in.tex2D((int)x+u, (int)y+v); + const float colourWeight = ftl::cuda::colourWeighting(c, cN, thresh); + const float dN = depth_in.tex2D((int)x + u, (int)y + v); + const float3 posN = camera.screenToCam(x+u, y+v, dN); + const float weight = ftl::cuda::spatialWeighting(posN, pos, factor * colourWeight); + + contrib += weight; + new_depth += dN * weight; + } + } + + if (contrib > 0.0f) { + depth_out(x,y) = new_depth / contrib; + } + } +} + +void ftl::cuda::depth_smooth( + ftl::cuda::TextureObject<float> &depth_in, + ftl::cuda::TextureObject<uchar4> &colour_in, + ftl::cuda::TextureObject<float> &depth_out, + const ftl::rgbd::Camera &camera, + int radius, float factor, float thresh, int iters, cudaStream_t stream) { + + const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + for (int n=0; n<iters; ++n) { + switch (radius) { + case 5 : depth_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break; + case 4 : depth_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break; + case 3 : depth_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break; + case 2 : depth_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break; + case 1 : depth_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break; + default: break; + } + cudaSafeCall( cudaGetLastError() ); + + switch (radius) { + case 5 : depth_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break; + case 4 : depth_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break; + case 3 : depth_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break; + case 2 : depth_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break; + case 1 : depth_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break; + default: break; + } + cudaSafeCall( cudaGetLastError() ); + } + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif +} diff --git a/applications/reconstruct/src/filters/smoothing.hpp b/applications/reconstruct/src/filters/smoothing.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d5d6a47053140dc33d5ee97fa606f4b00650036d --- /dev/null +++ b/applications/reconstruct/src/filters/smoothing.hpp @@ -0,0 +1,21 @@ +#ifndef _FTL_CUDA_SMOOTHING_HPP_ +#define _FTL_CUDA_SMOOTHING_HPP_ + +#include <ftl/rgbd/camera.hpp> +#include <ftl/cuda_common.hpp> + +namespace ftl { +namespace cuda { + +void depth_smooth( + ftl::cuda::TextureObject<float> &depth_in, + ftl::cuda::TextureObject<uchar4> &colour_in, + ftl::cuda::TextureObject<float> &depth_out, + const ftl::rgbd::Camera &camera, + int radius, float factor, float thresh, int iters, + cudaStream_t stream); + +} +} + +#endif // _FTL_CUDA_SMOOTHING_HPP_ diff --git a/applications/reconstruct/src/ilw.cpp b/applications/reconstruct/src/ilw.cpp deleted file mode 100644 index 435cd886eba1b83d7530f19f28ccfb09f7e37f3a..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/ilw.cpp +++ /dev/null @@ -1,125 +0,0 @@ -#include "ilw.hpp" -#include <ftl/utility/matrix_conversion.hpp> -#include <ftl/rgbd/source.hpp> -#include <ftl/cuda/points.hpp> -#include <loguru.hpp> - -#include "ilw_cuda.hpp" - -using ftl::ILW; -using ftl::detail::ILWData; -using ftl::codecs::Channel; -using ftl::codecs::Channels; -using ftl::rgbd::Format; -using cv::cuda::GpuMat; - -ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) { - -} - -ILW::~ILW() { - -} - -bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { - _phase0(fs, stream); - - //for (int i=0; i<2; ++i) { - _phase1(fs, stream); - //for (int j=0; j<3; ++j) { - // _phase2(fs); - //} - - // TODO: Break if no time left - //} - - return true; -} - -bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { - // Make points channel... - for (size_t i=0; i<fs.frames.size(); ++i) { - auto &f = fs.frames[i]; - auto *s = fs.sources[i]; - - if (f.empty(Channel::Depth + Channel::Colour)) { - LOG(ERROR) << "Missing required channel"; - continue; - } - - auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size())); - auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse()); - ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, stream); - - // TODO: Create energy vector texture and clear it - // Create energy and clear it - - // Convert colour from BGR to BGRA if needed - if (f.get<GpuMat>(Channel::Colour).type() == CV_8UC3) { - // Convert to 4 channel colour - auto &col = f.get<GpuMat>(Channel::Colour); - GpuMat tmp(col.size(), CV_8UC4); - cv::cuda::swap(col, tmp); - cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA); - } - - f.createTexture<float4>(Channel::EnergyVector, Format<float4>(f.get<GpuMat>(Channel::Colour).size())); - f.createTexture<float>(Channel::Energy, Format<float>(f.get<GpuMat>(Channel::Colour).size())); - f.createTexture<uchar4>(Channel::Colour); - - cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); - - f.get<GpuMat>(Channel::EnergyVector).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream); - f.get<GpuMat>(Channel::Energy).setTo(cv::Scalar(0.0f), cvstream); - } - - return true; -} - -bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { - // Run correspondence kernel to create an energy vector - - // For each camera combination - for (size_t i=0; i<fs.frames.size(); ++i) { - for (size_t j=0; j<fs.frames.size(); ++j) { - if (i == j) continue; - - LOG(INFO) << "Running phase1"; - - auto &f1 = fs.frames[i]; - auto &f2 = fs.frames[j]; - //auto s1 = fs.frames[i]; - auto s2 = fs.sources[j]; - - auto pose = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse()); - - try { - //Calculate energy vector to best correspondence - ftl::cuda::correspondence_energy_vector( - f1.getTexture<float4>(Channel::Points), - f2.getTexture<float4>(Channel::Points), - f1.getTexture<uchar4>(Channel::Colour), - f2.getTexture<uchar4>(Channel::Colour), - // TODO: Add normals and other things... - f1.getTexture<float4>(Channel::EnergyVector), - f1.getTexture<float>(Channel::Energy), - pose, - s2->parameters(), - stream - ); - } catch (ftl::exception &e) { - LOG(ERROR) << "Exception in correspondence: " << e.what(); - } - - LOG(INFO) << "Correspondences done... " << i; - } - } - - return true; -} - -bool ILW::_phase2(ftl::rgbd::FrameSet &fs) { - // Run energies and motion kernel - - return true; -} diff --git a/applications/reconstruct/src/ilw.cu b/applications/reconstruct/src/ilw.cu deleted file mode 100644 index 999b5ec9031eed08fc4bc527471961c3236d7445..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/ilw.cu +++ /dev/null @@ -1,90 +0,0 @@ -#include "ilw_cuda.hpp" - -using ftl::cuda::TextureObject; -using ftl::rgbd::Camera; - -#define WARP_SIZE 32 -#define T_PER_BLOCK 8 -#define FULL_MASK 0xffffffff - -__device__ inline float warpMax(float e) { - for (int i = WARP_SIZE/2; i > 0; i /= 2) { - const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); - e = max(e, other); - } - return e; -} - -__global__ void correspondence_energy_vector_kernel( - TextureObject<float4> p1, - TextureObject<float4> p2, - TextureObject<uchar4> c1, - TextureObject<uchar4> c2, - TextureObject<float4> vout, - TextureObject<float> eout, - float4x4 pose2, // Inverse - Camera cam2) { - - // Each warp picks point in p1 - const int tid = (threadIdx.x + threadIdx.y * blockDim.x); - const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; - const int y = blockIdx.y*blockDim.y + threadIdx.y; - - const float3 world1 = make_float3(p1.tex2D(x, y)); - if (world1.x == MINF) { - vout(x,y) = make_float4(0.0f); - eout(x,y) = 0.0f; - return; - } - const float3 camPos2 = pose2 * world1; - const uint2 screen2 = cam2.camToScreen<uint2>(camPos2); - - const int upsample = 8; - - // Project to p2 using cam2 - // Each thread takes a possible correspondence and calculates a weighting - const int lane = tid % WARP_SIZE; - for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { - const float u = (i % upsample) - (upsample / 2); - const float v = (i / upsample) - (upsample / 2); - - const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v)); - if (world2.x == MINF) continue; - - // Determine degree of correspondence - const float confidence = 1.0f / length(world1 - world2); - const float maxconf = warpMax(confidence); - - // This thread has best confidence value - if (maxconf == confidence) { - vout(x,y) = vout.tex2D(x, y) + make_float4( - (world1.x - world2.x) * maxconf, - (world1.y - world2.y) * maxconf, - (world1.z - world2.z) * maxconf, - maxconf); - eout(x,y) = eout.tex2D(x,y) + length(world1 - world2)*maxconf; - } - } -} - -void ftl::cuda::correspondence_energy_vector( - TextureObject<float4> &p1, - TextureObject<float4> &p2, - TextureObject<uchar4> &c1, - TextureObject<uchar4> &c2, - TextureObject<float4> &vout, - TextureObject<float> &eout, - float4x4 &pose2, - const Camera &cam2, - cudaStream_t stream) { - - const dim3 gridSize((p1.width() + 2 - 1)/2, (p1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK); - - printf("COR SIZE %d,%d\n", p1.width(), p1.height()); - - correspondence_energy_vector_kernel<<<gridSize, blockSize, 0, stream>>>( - p1, p2, c1, c2, vout, eout, pose2, cam2 - ); - cudaSafeCall( cudaGetLastError() ); -} diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp index 9ed20069fc53a5e72d08b1947c08a285a5c788e1..e6a7d2d8b3e7f39bb47bbd3b43b4cd3ce166db38 100644 --- a/applications/reconstruct/src/main.cpp +++ b/applications/reconstruct/src/main.cpp @@ -30,6 +30,7 @@ #include <opencv2/opencv.hpp> #include <ftl/net/universe.hpp> +#include "filters/smoothing.hpp" #include <ftl/registration.hpp> #include <cuda_profiler_api.h> @@ -244,9 +245,11 @@ static void run(ftl::Configurable *root) { bool busy = false; + auto *filter = ftl::config::create<ftl::Configurable>(root, "filters"); + group->setLatency(4); group->setName("ReconGroup"); - group->sync([splat,virt,&busy,&slave,&scene_A,&scene_B,&align,controls](ftl::rgbd::FrameSet &fs) -> bool { + group->sync([splat,virt,&busy,&slave,&scene_A,&scene_B,&align,controls,filter](ftl::rgbd::FrameSet &fs) -> bool { //cudaSetDevice(scene->getCUDADevice()); //if (slave.isPaused()) return true; @@ -261,13 +264,46 @@ static void run(ftl::Configurable *root) { // Swap the entire frameset to allow rapid return fs.swapTo(scene_A); - ftl::pool.push([&scene_B,&scene_A,&busy,&slave,&align](int id) { + ftl::pool.push([&scene_B,&scene_A,&busy,&slave,&align, filter](int id) { //cudaSetDevice(scene->getCUDADevice()); // TODO: Release frameset here... //cudaSafeCall(cudaStreamSynchronize(scene->getIntegrationStream())); UNIQUE_LOCK(scene_A.mtx, lk); + cv::cuda::GpuMat tmp; + float factor = filter->value("smooth_factor", 0.4f); + float colour_limit = filter->value("colour_limit", 30.0f); + bool do_smooth = filter->value("pre_smooth", false); + int iters = filter->value("iterations", 3); + int radius = filter->value("radius", 5); + + if (do_smooth) { + // Presmooth... + for (int i=0; i<scene_A.frames.size(); ++i) { + auto &f = scene_A.frames[i]; + auto s = scene_A.sources[i]; + + // Convert colour from BGR to BGRA if needed + if (f.get<cv::cuda::GpuMat>(Channel::Colour).type() == CV_8UC3) { + //cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); + // Convert to 4 channel colour + auto &col = f.get<cv::cuda::GpuMat>(Channel::Colour); + tmp.create(col.size(), CV_8UC4); + cv::cuda::swap(col, tmp); + cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA, 0); + } + + ftl::cuda::depth_smooth( + f.createTexture<float>(Channel::Depth), + f.createTexture<uchar4>(Channel::Colour), + f.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(f.get<cv::cuda::GpuMat>(Channel::Depth).size())), + s->parameters(), + radius, factor, colour_limit, iters, 0 + ); + } + } + // Send all frames to GPU, block until done? //scene_A.upload(Channel::Colour + Channel::Depth); // TODO: (Nick) Add scene stream. align->process(scene_A); diff --git a/components/renderers/cpp/src/normals.cu b/components/renderers/cpp/src/normals.cu index 145a43198cb8dce691be352f782c43c3be6fb18f..976b5804120bac3e000d493e87a90f2ec4b30d1c 100644 --- a/components/renderers/cpp/src/normals.cu +++ b/components/renderers/cpp/src/normals.cu @@ -45,11 +45,11 @@ __global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output, output(x,y) = make_float4(0, 0, 0, 0); if(x > 0 && x < input.width()-1 && y > 0 && y < input.height()-1) { - const float3 CC = camera.screenToCam(x+0, y+0, (float)input.tex2D((int)x+0, (int)y+0) / 10000.0f); - const float3 PC = camera.screenToCam(x+0, y+1, (float)input.tex2D((int)x+0, (int)y+1) / 10000.0f); - const float3 CP = camera.screenToCam(x+1, y+0, (float)input.tex2D((int)x+1, (int)y+0) / 10000.0f); - const float3 MC = camera.screenToCam(x+0, y-1, (float)input.tex2D((int)x+0, (int)y-1) / 10000.0f); - const float3 CM = camera.screenToCam(x-1, y+0, (float)input.tex2D((int)x-1, (int)y+0) / 10000.0f); + const float3 CC = camera.screenToCam(x+0, y+0, (float)input.tex2D((int)x+0, (int)y+0) / 100000.0f); + const float3 PC = camera.screenToCam(x+0, y+1, (float)input.tex2D((int)x+0, (int)y+1) / 100000.0f); + const float3 CP = camera.screenToCam(x+1, y+0, (float)input.tex2D((int)x+1, (int)y+0) / 100000.0f); + const float3 MC = camera.screenToCam(x+0, y-1, (float)input.tex2D((int)x+0, (int)y-1) / 100000.0f); + const float3 CM = camera.screenToCam(x-1, y+0, (float)input.tex2D((int)x-1, (int)y+0) / 100000.0f); //if(CC.z < && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) { if (isValid(camera,CC) && isValid(camera,PC) && isValid(camera,CP) && isValid(camera,MC) && isValid(camera,CM)) { @@ -118,7 +118,7 @@ __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms, if(x >= depth.width() || y >= depth.height()) return; - const float3 p0 = camera.screenToCam(x,y, (float)depth.tex2D((int)x,(int)y) / 10000.0f); + const float3 p0 = camera.screenToCam(x,y, (float)depth.tex2D((int)x,(int)y) / 100000.0f); float3 nsum = make_float3(0.0f); float contrib = 0.0f; @@ -128,7 +128,7 @@ __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms, for (int v=-RADIUS; v<=RADIUS; ++v) { for (int u=-RADIUS; u<=RADIUS; ++u) { - const float3 p = camera.screenToCam(x+u,y+v, (float)depth.tex2D((int)x+u,(int)y+v) / 10000.0f); + const float3 p = camera.screenToCam(x+u,y+v, (float)depth.tex2D((int)x+u,(int)y+v) / 100000.0f); if (p.z < camera.minDepth || p.z > camera.maxDepth) continue; const float s = ftl::cuda::spatialWeighting(p0, p, smoothing); //const float s = 1.0f; diff --git a/components/renderers/cpp/src/reprojection.cu b/components/renderers/cpp/src/reprojection.cu index 6ffd9b2d4c30955694e186d645bee5e503db6d4d..f07b8956369492cb9002b2cc2068f87894fa817e 100644 --- a/components/renderers/cpp/src/reprojection.cu +++ b/components/renderers/cpp/src/reprojection.cu @@ -71,7 +71,7 @@ __global__ void reprojection_kernel( const int x = (blockIdx.x*blockDim.x + threadIdx.x); const int y = blockIdx.y*blockDim.y + threadIdx.y; - const float d = (float)depth_in.tex2D((int)x, (int)y) / 10000.0f; + const float d = (float)depth_in.tex2D((int)x, (int)y) / 100000.0f; if (d < params.camera.minDepth || d > params.camera.maxDepth) return; const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d); @@ -192,7 +192,7 @@ __global__ void reprojection_kernel( const int x = (blockIdx.x*blockDim.x + threadIdx.x); const int y = blockIdx.y*blockDim.y + threadIdx.y; - const float d = (float)depth_in.tex2D((int)x, (int)y) / 10000.0f; + const float d = (float)depth_in.tex2D((int)x, (int)y) / 100000.0f; if (d < params.camera.minDepth || d > params.camera.maxDepth) return; const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d); diff --git a/components/renderers/cpp/src/tri_render.cpp b/components/renderers/cpp/src/tri_render.cpp index cfd81347c6bb1c2e2d4547e0445f0a24d554185b..c579a04bdb8dfaa14af89af7b3376fc4a394b07f 100644 --- a/components/renderers/cpp/src/tri_render.cpp +++ b/components/renderers/cpp/src/tri_render.cpp @@ -482,7 +482,7 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { ftl::cuda::normals(accum_.createTexture<float4>(Channel::Normals, Format<float4>(camera.width, camera.height)), temp_.createTexture<float4>(Channel::Normals), temp_.getTexture<int>(Channel::Depth2), - 1, 0.02f, + value("normal_radius", 1), value("normal_smoothing", 0.02f), params_.camera, params_.m_viewMatrix.getFloat3x3(), params_.m_viewMatrixInverse.getFloat3x3(), stream_); // Reprojection of colours onto surface @@ -491,7 +491,7 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { if (chan == Channel::Depth) { // Just convert int depth to float depth - temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 10000.0f, cvstream); + temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 100000.0f, cvstream); } else if (chan == Channel::Normals) { // Visualise normals to RGBA out.create<GpuMat>(Channel::Normals, Format<uchar4>(camera.width, camera.height)).setTo(cv::Scalar(0,0,0,0), cvstream); diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu index 891557a3fa9e04295846f5703fd848ab0b0954f4..77694ed1d2f194a266b2e67cda8fc8c51575f073 100644 --- a/components/renderers/cpp/src/triangle_render.cu +++ b/components/renderers/cpp/src/triangle_render.cu @@ -146,7 +146,7 @@ float getZAtCoordinate(const float3 &barycentricCoord, const float (&tri)[3]) { if (isBarycentricCoordInBounds(baryCentricCoordinate)) { float new_depth = getZAtCoordinate(baryCentricCoordinate, d); - atomicMin(&depth_out(sx,sy), int(new_depth*10000.0f)); + atomicMin(&depth_out(sx,sy), int(new_depth*100000.0f)); } } }