diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f866eda014e6b6353824435cbacbc500fe2c683..af6a4173c4f2a08d18b2bb1f3a68336898240fa5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -225,6 +225,7 @@ add_subdirectory(components/codecs) add_subdirectory(components/net) add_subdirectory(components/rgbd-sources) add_subdirectory(components/control/cpp) +add_subdirectory(components/filters) add_subdirectory(applications/calibration) add_subdirectory(applications/groupview) add_subdirectory(applications/player) diff --git a/applications/gui/src/camera.cpp b/applications/gui/src/camera.cpp index cca8a40f35c6e679597185668661fe4a814e504f..debb8e73d5cf6f1f1bc1d685bb7298350014614c 100644 --- a/applications/gui/src/camera.cpp +++ b/applications/gui/src/camera.cpp @@ -433,6 +433,7 @@ const GLTexture &ftl::gui::Camera::captureFrame() { cv::Mat tmp; switch(channel_) { + case Channel::Smoothing: case Channel::Confidence: if (im2_.rows == 0) { break; } visualizeEnergy(im2_, tmp, 1.0); @@ -462,7 +463,7 @@ const GLTexture &ftl::gui::Camera::captureFrame() { texture2_.update(tmp);*/ break; - case Channel::Flow: + //case Channel::Flow: case Channel::Normals: case Channel::Right: if (im2_.rows == 0 || im2_.type() != CV_8UC3) { break; } diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt index 931802aedce15c0f94617385d05aeed141d24fdd..1896417ead0e05d137e818af2338e243753a059a 100644 --- a/applications/reconstruct/CMakeLists.txt +++ b/applications/reconstruct/CMakeLists.txt @@ -20,7 +20,6 @@ set(REPSRC src/ilw/fill.cu src/ilw/discontinuity.cu src/ilw/correspondence.cu - src/filters/smoothing.cu ) add_executable(ftl-reconstruct ${REPSRC}) @@ -37,6 +36,6 @@ set_property(TARGET ftl-reconstruct PROPERTY CUDA_SEPARABLE_COMPILATION ON) endif() #target_include_directories(cv-node PUBLIC ${PROJECT_SOURCE_DIR}/include) -target_link_libraries(ftl-reconstruct ftlcommon ftlrgbd Threads::Threads ${OpenCV_LIBS} ftlctrl ftlnet ftlrender) +target_link_libraries(ftl-reconstruct ftlcommon ftlrgbd Threads::Threads ${OpenCV_LIBS} ftlctrl ftlnet ftlrender ftlfilter) diff --git a/applications/reconstruct/src/filters/smoothing.cu b/applications/reconstruct/src/filters/smoothing.cu deleted file mode 100644 index 21f30cafda69682e15c987b692a8f712e4373d7d..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/filters/smoothing.cu +++ /dev/null @@ -1,85 +0,0 @@ -#include "smoothing.hpp" - -#include <ftl/cuda/weighting.hpp> - -#define T_PER_BLOCK 8 - -template <int RADIUS> -__global__ void depth_smooth_kernel( - ftl::cuda::TextureObject<float> depth_in, - ftl::cuda::TextureObject<uchar4> colour_in, - ftl::cuda::TextureObject<float> depth_out, - ftl::rgbd::Camera camera, - float factor, float thresh) { - - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < depth_in.width() && y < depth_in.height()) { - float d = depth_in.tex2D((int)x,(int)y); - depth_out(x,y) = 0.0f; - - if (d < camera.minDepth || d > camera.maxDepth) return; - - uchar4 c = colour_in.tex2D((int)x, (int)y); - float3 pos = camera.screenToCam(x,y,d); - - float contrib = 0.0f; - float new_depth = 0.0f; - - for (int v=-RADIUS; v<=RADIUS; ++v) { - for (int u=-RADIUS; u<=RADIUS; ++u) { - // Get colour difference to center - const uchar4 cN = colour_in.tex2D((int)x+u, (int)y+v); - const float colourWeight = ftl::cuda::colourWeighting(c, cN, thresh); - const float dN = depth_in.tex2D((int)x + u, (int)y + v); - const float3 posN = camera.screenToCam(x+u, y+v, dN); - const float weight = ftl::cuda::spatialWeighting(posN, pos, factor * colourWeight); - - contrib += weight; - new_depth += dN * weight; - } - } - - if (contrib > 0.0f) { - depth_out(x,y) = new_depth / contrib; - } - } -} - -void ftl::cuda::depth_smooth( - ftl::cuda::TextureObject<float> &depth_in, - ftl::cuda::TextureObject<uchar4> &colour_in, - ftl::cuda::TextureObject<float> &depth_out, - const ftl::rgbd::Camera &camera, - int radius, float factor, float thresh, int iters, cudaStream_t stream) { - - const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - for (int n=0; n<iters; ++n) { - switch (radius) { - case 5 : depth_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break; - case 4 : depth_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break; - case 3 : depth_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break; - case 2 : depth_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break; - case 1 : depth_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break; - default: break; - } - cudaSafeCall( cudaGetLastError() ); - - switch (radius) { - case 5 : depth_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break; - case 4 : depth_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break; - case 3 : depth_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break; - case 2 : depth_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break; - case 1 : depth_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break; - default: break; - } - cudaSafeCall( cudaGetLastError() ); - } - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); -#endif -} diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp index e6a7d2d8b3e7f39bb47bbd3b43b4cd3ce166db38..a3e5504a41b11ef0f2b020710b88dae050b0d9c1 100644 --- a/applications/reconstruct/src/main.cpp +++ b/applications/reconstruct/src/main.cpp @@ -30,7 +30,8 @@ #include <opencv2/opencv.hpp> #include <ftl/net/universe.hpp> -#include "filters/smoothing.hpp" +#include <ftl/filters/smoothing.hpp> +#include <ftl/cuda/normals.hpp> #include <ftl/registration.hpp> #include <cuda_profiler_api.h> @@ -245,11 +246,11 @@ static void run(ftl::Configurable *root) { bool busy = false; - auto *filter = ftl::config::create<ftl::Configurable>(root, "filters"); + auto *smooth = ftl::config::create<ftl::DepthSmoother>(root, "filters"); group->setLatency(4); group->setName("ReconGroup"); - group->sync([splat,virt,&busy,&slave,&scene_A,&scene_B,&align,controls,filter](ftl::rgbd::FrameSet &fs) -> bool { + group->sync([splat,virt,&busy,&slave,&scene_A,&scene_B,&align,controls,smooth](ftl::rgbd::FrameSet &fs) -> bool { //cudaSetDevice(scene->getCUDADevice()); //if (slave.isPaused()) return true; @@ -264,7 +265,7 @@ static void run(ftl::Configurable *root) { // Swap the entire frameset to allow rapid return fs.swapTo(scene_A); - ftl::pool.push([&scene_B,&scene_A,&busy,&slave,&align, filter](int id) { + ftl::pool.push([&scene_B,&scene_A,&busy,&slave,&align, smooth](int id) { //cudaSetDevice(scene->getCUDADevice()); // TODO: Release frameset here... //cudaSafeCall(cudaStreamSynchronize(scene->getIntegrationStream())); @@ -272,13 +273,14 @@ static void run(ftl::Configurable *root) { UNIQUE_LOCK(scene_A.mtx, lk); cv::cuda::GpuMat tmp; - float factor = filter->value("smooth_factor", 0.4f); + /*float factor = filter->value("smooth_factor", 0.4f); float colour_limit = filter->value("colour_limit", 30.0f); bool do_smooth = filter->value("pre_smooth", false); int iters = filter->value("iterations", 3); int radius = filter->value("radius", 5); + float var_thesh = filter->value("variance_threshold", 0.02f);*/ - if (do_smooth) { + //if (do_smooth) { // Presmooth... for (int i=0; i<scene_A.frames.size(); ++i) { auto &f = scene_A.frames[i]; @@ -294,15 +296,27 @@ static void run(ftl::Configurable *root) { cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA, 0); } - ftl::cuda::depth_smooth( + smooth->smooth(f, s); + + /*ftl::cuda::smoothing_factor( + f.createTexture<float>(Channel::Depth), + f.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(f.get<cv::cuda::GpuMat>(Channel::Depth).size())), + f.createTexture<float>(Channel::Energy, ftl::rgbd::Format<float>(f.get<cv::cuda::GpuMat>(Channel::Depth).size())), + //f.createTexture<uchar4>(Channel::Colour), + f.createTexture<float>(Channel::Smoothing, ftl::rgbd::Format<float>(f.get<cv::cuda::GpuMat>(Channel::Depth).size())), + var_thesh, + s->parameters(), 0 + );*/ + + /*ftl::cuda::depth_smooth( f.createTexture<float>(Channel::Depth), f.createTexture<uchar4>(Channel::Colour), f.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(f.get<cv::cuda::GpuMat>(Channel::Depth).size())), s->parameters(), radius, factor, colour_limit, iters, 0 - ); + );*/ } - } + //} // Send all frames to GPU, block until done? //scene_A.upload(Channel::Colour + Channel::Depth); // TODO: (Nick) Add scene stream. diff --git a/components/codecs/include/ftl/codecs/channels.hpp b/components/codecs/include/ftl/codecs/channels.hpp index 11262579e73d6d62424f2515ce021265d94ea529..85cd9fffc45ddcda1b6cc13a160c74c30797b804 100644 --- a/components/codecs/include/ftl/codecs/channels.hpp +++ b/components/codecs/include/ftl/codecs/channels.hpp @@ -24,6 +24,7 @@ enum struct Channel : int { Contribution = 7, // 32F EnergyVector = 8, // 32FC4 Flow = 9, // 32F + Smoothing = 9, // 32F Energy = 10, // 32F Mask = 11, // 32U Density = 12, // 32F @@ -124,6 +125,7 @@ inline bool isFloatChannel(ftl::codecs::Channel chan) { case Channel::Depth : //case Channel::Normals : case Channel::Confidence: + case Channel::Flow : case Channel::Density: case Channel::Energy : return true; default : return false; diff --git a/components/filters/CMakeLists.txt b/components/filters/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..92601b3714ac5e166d4e4f8eecf22c8f04edc58f --- /dev/null +++ b/components/filters/CMakeLists.txt @@ -0,0 +1,16 @@ +add_library(ftlfilter + src/smoothing.cpp + src/smoothing.cu +) + +# These cause errors in CI build and are being removed from PCL in newer versions +# target_compile_options(ftlrender PUBLIC ${PCL_DEFINITIONS}) + +target_include_directories(ftlfilter PUBLIC + ${PCL_INCLUDE_DIRS} + $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> + $<INSTALL_INTERFACE:include> + PRIVATE src) +target_link_libraries(ftlfilter ftlrender ftlrgbd ftlcommon Eigen3::Eigen Threads::Threads ${OpenCV_LIBS}) + +#ADD_SUBDIRECTORY(test) diff --git a/components/filters/include/ftl/filters/smoothing.hpp b/components/filters/include/ftl/filters/smoothing.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5e5035f58981107d94f20ad948afe80297254edb --- /dev/null +++ b/components/filters/include/ftl/filters/smoothing.hpp @@ -0,0 +1,25 @@ +#ifndef _FTL_SMOOTHING_HPP_ +#define _FTL_SMOOTHING_HPP_ + +#include <ftl/configurable.hpp> +#include <ftl/cuda_common.hpp> +#include <ftl/rgbd/source.hpp> +#include <ftl/rgbd/frame.hpp> + +namespace ftl { + +class DepthSmoother : public ftl::Configurable { + public: + explicit DepthSmoother(nlohmann::json &config); + ~DepthSmoother(); + + void smooth(ftl::rgbd::Frame &frame, ftl::rgbd::Source *src); + + private: + cv::cuda::GpuMat temp_; + ftl::rgbd::Frame frames_[4]; +}; + +} + +#endif // _FTL_SMOOTHING_HPP_ diff --git a/components/filters/src/smoothing.cpp b/components/filters/src/smoothing.cpp new file mode 100644 index 0000000000000000000000000000000000000000..edd6a072fbb51cc498b243c8530f3abdb550e592 --- /dev/null +++ b/components/filters/src/smoothing.cpp @@ -0,0 +1,55 @@ +#include <ftl/filters/smoothing.hpp> +#include "smoothing_cuda.hpp" + +using ftl::DepthSmoother; +using ftl::codecs::Channel; +using cv::cuda::GpuMat; + +DepthSmoother::DepthSmoother(nlohmann::json &config) : ftl::Configurable(config) { + +} + +DepthSmoother::~DepthSmoother() { + +} + +void DepthSmoother::smooth(ftl::rgbd::Frame &f, ftl::rgbd::Source *s) { + float var_thresh = value("variance_threshold", 0.0002f); + bool do_smooth = value("pre_smooth", false); + int levels = max(0, min(value("levels",0), 4)); + int iters = value("iterations",5); + + if (!do_smooth) return; + + for (int i=0; i<iters; ++i) { + ftl::cuda::smoothing_factor( + f.createTexture<float>(Channel::Depth), + f.createTexture<float>(Channel::Energy, ftl::rgbd::Format<float>(f.get<cv::cuda::GpuMat>(Channel::Depth).size())), + f.createTexture<float>(Channel::Smoothing, ftl::rgbd::Format<float>(f.get<cv::cuda::GpuMat>(Channel::Depth).size())), + var_thresh, + s->parameters(), 0 + ); + } + + LOG(INFO) << "PARAMS DEPTHS " << s->parameters().minDepth << "," << s->parameters().maxDepth; + + for (int i=0; i<levels; ++i) { + var_thresh *= 2.0f; + auto &dmat = f.get<GpuMat>(Channel::Depth); + cv::cuda::resize(dmat, frames_[i].create<GpuMat>(Channel::Depth), cv::Size(dmat.cols / (2*(i+1)), dmat.rows / (2*(i+1))), 0.0, 0.0, cv::INTER_NEAREST); + + ftl::cuda::smoothing_factor( + frames_[i].createTexture<float>(Channel::Depth), + frames_[i].createTexture<float>(Channel::Energy, ftl::rgbd::Format<float>(frames_[i].get<GpuMat>(Channel::Depth).size())), + frames_[i].createTexture<float>(Channel::Smoothing, ftl::rgbd::Format<float>(frames_[i].get<GpuMat>(Channel::Depth).size())), + var_thresh, + s->parameters(), 0 + ); + + cv::cuda::resize(frames_[i].get<GpuMat>(Channel::Smoothing), temp_, f.get<cv::cuda::GpuMat>(Channel::Depth).size(), 0.0, 0.0, cv::INTER_LINEAR); + cv::cuda::add(temp_, f.get<GpuMat>(Channel::Smoothing), f.get<GpuMat>(Channel::Smoothing)); + } + + //cv::cuda::subtract(f.get<GpuMat>(Channel::Depth), f.get<GpuMat>(Channel::Smoothing), f.get<GpuMat>(Channel::Depth)); +} + diff --git a/components/filters/src/smoothing.cu b/components/filters/src/smoothing.cu new file mode 100644 index 0000000000000000000000000000000000000000..43cb1f6bd0a56760aa2c6d4d3fff4c664631c404 --- /dev/null +++ b/components/filters/src/smoothing.cu @@ -0,0 +1,258 @@ +#include "smoothing_cuda.hpp" + +#include <ftl/cuda/weighting.hpp> + +using ftl::cuda::TextureObject; + +#define T_PER_BLOCK 8 + +template <int RADIUS> +__global__ void depth_smooth_kernel( + ftl::cuda::TextureObject<float> depth_in, + ftl::cuda::TextureObject<uchar4> colour_in, + ftl::cuda::TextureObject<float> depth_out, + ftl::rgbd::Camera camera, + float factor, float thresh) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < depth_in.width() && y < depth_in.height()) { + float d = depth_in.tex2D((int)x,(int)y); + depth_out(x,y) = 0.0f; + + if (d < camera.minDepth || d > camera.maxDepth) return; + + uchar4 c = colour_in.tex2D((int)x, (int)y); + float3 pos = camera.screenToCam(x,y,d); + + float contrib = 0.0f; + float new_depth = 0.0f; + + for (int v=-RADIUS; v<=RADIUS; ++v) { + for (int u=-RADIUS; u<=RADIUS; ++u) { + // Get colour difference to center + const uchar4 cN = colour_in.tex2D((int)x+u, (int)y+v); + const float colourWeight = ftl::cuda::colourWeighting(c, cN, thresh); + const float dN = depth_in.tex2D((int)x + u, (int)y + v); + const float3 posN = camera.screenToCam(x+u, y+v, dN); + const float weight = ftl::cuda::spatialWeighting(posN, pos, factor * colourWeight); + + contrib += weight; + new_depth += dN * weight; + } + } + + if (contrib > 0.0f) { + depth_out(x,y) = new_depth / contrib; + } + } +} + +void ftl::cuda::depth_smooth( + ftl::cuda::TextureObject<float> &depth_in, + ftl::cuda::TextureObject<uchar4> &colour_in, + ftl::cuda::TextureObject<float> &depth_out, + const ftl::rgbd::Camera &camera, + int radius, float factor, float thresh, int iters, cudaStream_t stream) { + + const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + for (int n=0; n<iters; ++n) { + switch (radius) { + case 5 : depth_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break; + case 4 : depth_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break; + case 3 : depth_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break; + case 2 : depth_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break; + case 1 : depth_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break; + default: break; + } + cudaSafeCall( cudaGetLastError() ); + + switch (radius) { + case 5 : depth_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break; + case 4 : depth_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break; + case 3 : depth_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break; + case 2 : depth_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break; + case 1 : depth_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break; + default: break; + } + cudaSafeCall( cudaGetLastError() ); + } + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif +} + +// ==== SMOOTHING FACTOR ========== + +template <bool DERIV> +__device__ inline float getAverage(const ftl::rgbd::Camera &cam, float dd, const TextureObject<float> &d, int x, int y, int x1, int y1, int x2, int y2); + +template <> +__device__ inline float getAverage<false>(const ftl::rgbd::Camera &cam, float dd, const TextureObject<float> &d, int x, int y, int x1, int y1, int x2, int y2) { + float a = d.tex2D(x+x1,y+y1); + float b = d.tex2D(x+x2,y+y2); + return (a <= cam.minDepth || a > cam.maxDepth || b <= cam.minDepth || b > cam.maxDepth) ? dd : (a+b) / 2.0f; +} + +template <> +__device__ inline float getAverage<true>(const ftl::rgbd::Camera &cam, float dd, const TextureObject<float> &d, int x, int y, int x1, int y1, int x2, int y2) { + float a = d.tex2D(x+x1,y+y1); + float b = d.tex2D(x+x2,y+y2); + return (a+b) / 2.0f; +} + +__device__ inline void absmin(float &minvar, float v) { + if (fabs(minvar) > fabs(v)) minvar = v; +} + +template <bool DERIV> +__global__ void smoothing_factor_kernel( + ftl::cuda::TextureObject<float> depth_in, + //ftl::cuda::TextureObject<uchar4> colour_in, + ftl::cuda::TextureObject<float> smoothing, + //float thresh, + ftl::rgbd::Camera camera) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < depth_in.width() && y < depth_in.height()) { + float d = depth_in.tex2D((int)x,(int)y); + + //if (d < camera.minDepth || d > camera.maxDepth) return; + + float min_var = 10.0f; + float max_var = 0.0f; + + float avg = 0.0f; + float var; + + var = (d - getAverage<DERIV>(camera, d, depth_in, x, y, -1, -1, 1, 1)); + //avg += var; + absmin(min_var, var); + var = (d - getAverage<DERIV>(camera, d, depth_in, x, y, 0, -1, 0, 1)); + //avg += var; + absmin(min_var, var); + var = (d - getAverage<DERIV>(camera, d, depth_in, x, y, 1, -1, -1, 1)); + //avg += var; + absmin(min_var, var); + var = (d - getAverage<DERIV>(camera, d, depth_in, x, y, -1, 0, 1, 0)); + //avg += var; + absmin(min_var, var); + + // Clamp to threshold + //min_var = min(min_var, thresh); + //float s = 1.0f - (min_var / thresh); + smoothing(x,y) = min_var; + } +} + +__global__ void norm_thresh_kernel( + ftl::cuda::TextureObject<float> in, + ftl::cuda::TextureObject<float> error_value, + ftl::cuda::TextureObject<float> out, + float thresh) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < in.width() && y < in.height()) { + // Clamp to threshold + float min_var = min(in.tex2D((int)x,(int)y), thresh); + float s = min(1.0f, (fabs(min_var) / thresh)); + out(x,y) = s * error_value(x,y); + } +} + +__global__ void do_smooth_kernel( + ftl::cuda::TextureObject<float> smooth_strength, + //ftl::cuda::TextureObject<float> error_value, + ftl::cuda::TextureObject<float> depth) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < depth.width() && y < depth.height()) { + depth(x,y) = depth(x,y) - smooth_strength(x,y); + } +} + +template <int RADIUS> +__global__ void sum_neighbors_kernel( + ftl::cuda::TextureObject<float> depth_in, + ftl::cuda::TextureObject<float> depth_out, + ftl::rgbd::Camera camera, float alpha) { + + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < depth_out.width() && y < depth_out.height()) { + float avg = 0.0f; + float contrib = 0.0f; + + float d0 = depth_in.tex2D((int)x, (int)y); + float3 pos0 = camera.screenToCam(x,y,d0); + + for (int v=-RADIUS; v<=RADIUS; ++v) { + #pragma unroll + for (int u=-RADIUS; u<=RADIUS; ++u) { + float dN = depth_in.tex2D((int)x + u, (int)y + v); + float3 posN = camera.screenToCam(x+u,y+v,dN); + float weight = ftl::cuda::spatialWeighting(pos0, posN, alpha); + avg += weight * dN; + contrib += weight; + } + } + + depth_out(x,y) = avg / contrib; + } +} + +void ftl::cuda::smoothing_factor( + ftl::cuda::TextureObject<float> &depth_in, + //ftl::cuda::TextureObject<float> &depth_tmp, + ftl::cuda::TextureObject<float> &temp, + //ftl::cuda::TextureObject<uchar4> &colour_in, + ftl::cuda::TextureObject<float> &smoothing, + float thresh, + const ftl::rgbd::Camera &camera, + cudaStream_t stream) { + + const dim3 gridSize((smoothing.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (smoothing.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + //smoothing_factor_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, temp, camera); + + //float thresh2 = thresh; + //float alpha = 0.04f; + + //for (int i=0; i<10; ++i) { + + smoothing_factor_kernel<false><<<gridSize, blockSize, 0, stream>>>(depth_in, temp, camera); + smoothing_factor_kernel<true><<<gridSize, blockSize, 0, stream>>>(temp, smoothing, camera); + norm_thresh_kernel<<<gridSize, blockSize, 0, stream>>>(smoothing, temp, smoothing, thresh); + do_smooth_kernel<<<gridSize, blockSize, 0, stream>>>(smoothing, depth_in); + + //do_smooth_kernel<<<gridSize, blockSize, 0, stream>>>(smoothing, bufs[(ix+1)%2], bufs[ix%2]); + //if (i == 0) sum_neighbors_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_in, bufs[ix%2], camera, alpha); + //else { + // sum_neighbors_kernel<2><<<gridSize, blockSize, 0, stream>>>(bufs[ix%2], bufs[(ix+1)%2], camera, alpha); + // ++ix; + //} + //thresh2 *= 2.0f; + //alpha *= 3.0f; + //} + + //sum_neighbors_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, temp); + + cudaSafeCall( cudaGetLastError() ); + + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + #endif +} diff --git a/applications/reconstruct/src/filters/smoothing.hpp b/components/filters/src/smoothing_cuda.hpp similarity index 59% rename from applications/reconstruct/src/filters/smoothing.hpp rename to components/filters/src/smoothing_cuda.hpp index d5d6a47053140dc33d5ee97fa606f4b00650036d..da800f883f2e39a44253929fbc7675360e3a543d 100644 --- a/applications/reconstruct/src/filters/smoothing.hpp +++ b/components/filters/src/smoothing_cuda.hpp @@ -15,6 +15,16 @@ void depth_smooth( int radius, float factor, float thresh, int iters, cudaStream_t stream); +void smoothing_factor( + ftl::cuda::TextureObject<float> &depth_in, + //ftl::cuda::TextureObject<float> &depth_tmp, + ftl::cuda::TextureObject<float> &temp, + //ftl::cuda::TextureObject<uchar4> &colour_in, + ftl::cuda::TextureObject<float> &smoothing, + float thresh, + const ftl::rgbd::Camera &camera, + cudaStream_t stream); + } } diff --git a/components/renderers/cpp/include/ftl/cuda/normals.hpp b/components/renderers/cpp/include/ftl/cuda/normals.hpp index da2247723206cc9a1167ffbb0bc659ec847208c2..227ab536208b51321131b1e3eb7de98775bed971 100644 --- a/components/renderers/cpp/include/ftl/cuda/normals.hpp +++ b/components/renderers/cpp/include/ftl/cuda/normals.hpp @@ -24,6 +24,11 @@ void normals(ftl::cuda::TextureObject<float4> &output, const ftl::rgbd::Camera &camera, const float3x3 &pose_inv, const float3x3 &pose, cudaStream_t stream); +void normals(ftl::cuda::TextureObject<float4> &output, + ftl::cuda::TextureObject<float> &input, // Integer depth values + const ftl::rgbd::Camera &camera, + cudaStream_t stream); + void normal_visualise(ftl::cuda::TextureObject<float4> &norm, ftl::cuda::TextureObject<uchar4> &output, const float3 &light, const uchar4 &diffuse, const uchar4 &ambient, diff --git a/components/renderers/cpp/include/ftl/render/splat_params.hpp b/components/renderers/cpp/include/ftl/render/splat_params.hpp index 0509ee37f0d85163fde2b462ad0871f8a824070b..7c6b8416896f10aae816565c52813108c80bf385 100644 --- a/components/renderers/cpp/include/ftl/render/splat_params.hpp +++ b/components/renderers/cpp/include/ftl/render/splat_params.hpp @@ -18,6 +18,7 @@ struct __align__(16) SplatParams { uint m_flags; //float voxelSize; float depthThreshold; + int triangle_limit; ftl::rgbd::Camera camera; }; diff --git a/components/renderers/cpp/src/normals.cu b/components/renderers/cpp/src/normals.cu index 976b5804120bac3e000d493e87a90f2ec4b30d1c..7dcdf2f5fc705f78538cd799a196c1630f9dfc42 100644 --- a/components/renderers/cpp/src/normals.cu +++ b/components/renderers/cpp/src/normals.cu @@ -63,6 +63,34 @@ __global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output, } } +__global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output, + ftl::cuda::TextureObject<float> input, ftl::rgbd::Camera camera) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if(x >= input.width() || y >= input.height()) return; + + output(x,y) = make_float4(0, 0, 0, 0); + + if(x > 0 && x < input.width()-1 && y > 0 && y < input.height()-1) { + const float3 CC = camera.screenToCam(x+0, y+0, input.tex2D((int)x+0, (int)y+0)); + const float3 PC = camera.screenToCam(x+0, y+1, input.tex2D((int)x+0, (int)y+1)); + const float3 CP = camera.screenToCam(x+1, y+0, input.tex2D((int)x+1, (int)y+0)); + const float3 MC = camera.screenToCam(x+0, y-1, input.tex2D((int)x+0, (int)y-1)); + const float3 CM = camera.screenToCam(x-1, y+0, input.tex2D((int)x-1, (int)y+0)); + + //if(CC.z < && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) { + if (isValid(camera,CC) && isValid(camera,PC) && isValid(camera,CP) && isValid(camera,MC) && isValid(camera,CM)) { + const float3 n = cross(PC-MC, CP-CM); + const float l = length(n); + + if(l > 0.0f) { + output(x,y) = make_float4((n/-l), 1.0f); + } + } + } +} + template <int RADIUS> __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms, ftl::cuda::TextureObject<float4> output, @@ -238,6 +266,22 @@ void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output, #endif } +void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output, + ftl::cuda::TextureObject<float> &input, + const ftl::rgbd::Camera &camera, + cudaStream_t stream) { + const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(output, input, camera); + cudaSafeCall( cudaGetLastError() ); + + #ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); + //cutilCheckMsg(__FUNCTION__); + #endif +} + //============================================================================== __global__ void vis_normals_kernel(ftl::cuda::TextureObject<float4> norm, diff --git a/components/renderers/cpp/src/reprojection.cu b/components/renderers/cpp/src/reprojection.cu index f07b8956369492cb9002b2cc2068f87894fa817e..c11d23dd137972f51628dd844a76b20e3a25993a 100644 --- a/components/renderers/cpp/src/reprojection.cu +++ b/components/renderers/cpp/src/reprojection.cu @@ -93,6 +93,8 @@ __global__ void reprojection_kernel( const float d2 = depth_src.tex2D((int)screenPos.x, (int)screenPos.y); const A input = in.tex2D((int)screenPos.x, (int)screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos); + + // TODO: Z checks need to interpolate between neighbors if large triangles are used float weight = ftl::cuda::weighting(fabs(camPos.z - d2), 0.02f); /* Buehler C. et al. 2001. Unstructured Lumigraph Rendering. */ diff --git a/components/renderers/cpp/src/tri_render.cpp b/components/renderers/cpp/src/tri_render.cpp index be84e5a64063210a74e4314f86eb52f22dc650e5..7f93a8f683507d6c5c393bb4b9bfbeedf5cab756 100644 --- a/components/renderers/cpp/src/tri_render.cpp +++ b/components/renderers/cpp/src/tri_render.cpp @@ -406,6 +406,8 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { // Parameters object to pass to CUDA describing the camera SplatParams ¶ms = params_; + params.triangle_limit = value("triangle_limit", 200); + params.depthThreshold = value("depth_threshold", 0.04f); params.m_flags = 0; //if () params.m_flags |= ftl::render::kShowDisconMask; if (value("normal_weight_colours", true)) params.m_flags |= ftl::render::kNormalWeightColours; @@ -478,7 +480,7 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) { if (aligned_source >= 0 && aligned_source < scene_->frames.size()) { // FIXME: Output may not be same resolution as source! cudaSafeCall(cudaStreamSynchronize(stream_)); - scene_->frames[aligned_source].copyTo(Channel::Depth + Channel::Colour, out); + scene_->frames[aligned_source].copyTo(Channel::Depth + Channel::Colour + Channel::Smoothing, out); if (chan == Channel::Normals) { // Convert normal to single float value diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu index cdf42fff9dff9fd1acf3b1cb2e1ea585b22292ef..7311e50b9bbbd7b7ba78f106e360998b937646d8 100644 --- a/components/renderers/cpp/src/triangle_render.cu +++ b/components/renderers/cpp/src/triangle_render.cu @@ -121,7 +121,7 @@ float getZAtCoordinate(const float3 &barycentricCoord, const float (&tri)[3]) { d[2] = depth_in.tex2D(x,y+B); // Is this triangle valid - if (fabs(d[0] - d[1]) > 0.04f || fabs(d[0] - d[2]) > 0.04f) return; + if (fabs(d[0] - d[1]) > params.depthThreshold || fabs(d[0] - d[2]) > params.depthThreshold) return; if (d[0] < params.camera.minDepth || d[0] > params.camera.maxDepth) return; short2 v[3]; @@ -138,7 +138,7 @@ float getZAtCoordinate(const float3 &barycentricCoord, const float (&tri)[3]) { const int maxY = max(v[0].y, max(v[1].y, v[2].y)); // Remove really large triangles - if ((maxX - minX) * (maxY - minY) > 200) return; + if ((maxX - minX) * (maxY - minY) > params.triangle_limit) return; for (int sy=minY; sy <= maxY; ++sy) { for (int sx=minX; sx <= maxX; ++sx) {