diff --git a/components/operators/include/ftl/cuda/fixed.hpp b/components/operators/include/ftl/cuda/fixed.hpp index efb0c09caabd5e41f024679e8e80a2c6ea2b6df3..16e1e6ae0769fbfa16bad8931a58ac23ff43cb7b 100644 --- a/components/operators/include/ftl/cuda/fixed.hpp +++ b/components/operators/include/ftl/cuda/fixed.hpp @@ -3,7 +3,7 @@ template <int FRAC> __device__ inline float fixed2float(short v) { - return v / (1 << FRAC); + return float(v) / float(1 << FRAC); } template <int FRAC> diff --git a/components/renderers/cpp/include/ftl/render/CUDARender.hpp b/components/renderers/cpp/include/ftl/render/CUDARender.hpp index a4e65bce6ba1aa40dbce3c4a8ba24747f258300a..3b933821450aed45c2c91aebf3ee8f84ed8755ac 100644 --- a/components/renderers/cpp/include/ftl/render/CUDARender.hpp +++ b/components/renderers/cpp/include/ftl/render/CUDARender.hpp @@ -59,8 +59,8 @@ class CUDARender : public ftl::render::FSRenderer { ftl::cuda::TextureObject<int> contrib_; //ftl::cuda::TextureObject<half4> normals_; - std::list<ftl::cuda::TextureObject<short2>*> screen_buffers_; - std::list<ftl::cuda::TextureObject<float>*> depth_buffers_; + std::list<cv::cuda::GpuMat*> screen_buffers_; + std::list<cv::cuda::GpuMat*> depth_buffers_; ftl::cuda::TextureObject<float> depth_out_; ftl::cuda::Collision *collisions_; @@ -118,8 +118,8 @@ class CUDARender : public ftl::render::FSRenderer { bool _alreadySeen() const { return last_frame_ == scene_->timestamp(); } void _adjustDepthThresholds(const ftl::rgbd::Camera &fcam); - ftl::cuda::TextureObject<float> &_getDepthBuffer(const cv::Size &); - ftl::cuda::TextureObject<short2> &_getScreenBuffer(const cv::Size &); + cv::cuda::GpuMat &_getDepthBuffer(const cv::Size &); + cv::cuda::GpuMat &_getScreenBuffer(const cv::Size &); inline ftl::codecs::Channel _getDepthChannel() const { return (out_chan_ == ftl::codecs::Channel::Colour) ? ftl::codecs::Channel::Depth : ftl::codecs::Channel::Depth2; } inline ftl::codecs::Channel _getNormalsChannel() const { return (out_chan_ == ftl::codecs::Channel::Colour) ? ftl::codecs::Channel::Normals : ftl::codecs::Channel::Normals2; } diff --git a/components/renderers/cpp/src/CUDARender.cpp b/components/renderers/cpp/src/CUDARender.cpp index c274e136fd5348ce7c982a26e51e151e99679aec..418ffd9132cec9e3013f8d9ca0934f9aafa7edbe 100644 --- a/components/renderers/cpp/src/CUDARender.cpp +++ b/components/renderers/cpp/src/CUDARender.cpp @@ -218,20 +218,20 @@ void CUDARender::_adjustDepthThresholds(const ftl::rgbd::Camera &fcam) { params_.depthCoef = fcam.baseline*fcam.fx; } -ftl::cuda::TextureObject<float> &CUDARender::_getDepthBuffer(const cv::Size &size) { +cv::cuda::GpuMat &CUDARender::_getDepthBuffer(const cv::Size &size) { for (auto *b : depth_buffers_) { - if (b->width() == static_cast<size_t>(size.width) && b->height() == static_cast<size_t>(size.height)) return *b; + if (b->cols == static_cast<size_t>(size.width) && b->rows == static_cast<size_t>(size.height)) return *b; } - auto *nb = new ftl::cuda::TextureObject<float>(size.width, size.height); + auto *nb = new cv::cuda::GpuMat(size, CV_16S); depth_buffers_.push_back(nb); return *nb; } -ftl::cuda::TextureObject<short2> &CUDARender::_getScreenBuffer(const cv::Size &size) { +cv::cuda::GpuMat &CUDARender::_getScreenBuffer(const cv::Size &size) { for (auto *b : screen_buffers_) { - if (b->width() == static_cast<size_t>(size.width) && b->height() == static_cast<size_t>(size.height)) return *b; + if (b->cols == static_cast<size_t>(size.width) && b->rows == static_cast<size_t>(size.height)) return *b; } - auto *nb = new ftl::cuda::TextureObject<short2>(size.width, size.height); + auto *nb = new cv::cuda::GpuMat(size, CV_16SC2); screen_buffers_.push_back(nb); return *nb; } @@ -284,14 +284,14 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre if (use_depth) { if (f.hasChannel(Channel::Depth)) { ftl::cuda::screen_coord( - f.createTexture<float>(Channel::Depth), + f.get<cv::cuda::GpuMat>(Channel::Depth), depthbuffer, screenbuffer, params_, transform, f.getLeftCamera(), stream ); } else if (f.hasChannel(Channel::GroundTruth)) { ftl::cuda::screen_coord( - f.createTexture<float>(Channel::GroundTruth), + f.get<cv::cuda::GpuMat>(Channel::GroundTruth), depthbuffer, screenbuffer, params_, transform, f.getLeftCamera(), stream @@ -316,7 +316,7 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre // Decide on and render triangles around each point ftl::cuda::triangle_render1( depthbuffer, - temp_.createTexture<int>((do_blend) ? Channel::Depth : Channel::Depth2), + temp_.create<cv::cuda::GpuMat>((do_blend) ? Channel::Depth : Channel::Depth2), screenbuffer, params_, stream ); @@ -357,7 +357,7 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre ); } else { //ftl::cuda::merge_convert_depth(temp_.getTexture<int>(Channel::Depth2), out.createTexture<float>(_getDepthChannel()), 1.0f / 100000.0f, stream_); - ftl::cuda::merge_convert_depth(temp_.getTexture<int>(Channel::Depth2), depth_out_, 1.0f / 100000.0f, stream_); + ftl::cuda::merge_convert_depth(temp_.createTexture<int>(Channel::Depth2), depth_out_, 1.0f / 100000.0f, stream_); } // Now merge new render to any existing frameset render, detecting collisions diff --git a/components/renderers/cpp/src/screen.cu b/components/renderers/cpp/src/screen.cu index 15473b9ad4781957b1b9f1c6949469db1c6d230c..1fda3275953bccf54be24cc4b68d7e046d30f6cf 100644 --- a/components/renderers/cpp/src/screen.cu +++ b/components/renderers/cpp/src/screen.cu @@ -2,6 +2,7 @@ #include "splatter_cuda.hpp" #include <ftl/rgbd/camera.hpp> #include <ftl/cuda_common.hpp> +#include <ftl/cuda/fixed.hpp> using ftl::rgbd::Camera; using ftl::cuda::TextureObject; @@ -34,29 +35,23 @@ __device__ inline uint2 convertToScreen<ViewPortMode::Stretch>(const Parameters return make_uint2(params.viewport.map(params.camera, params.camera.camToScreen<float2>(camPos))); } -/*template <> -__device__ inline uint2 convertToScreen<ViewPortMode::Warping>(const Parameters ¶ms, const float3 &camPos) { - float2 pt = params.camera.camToScreen<float2>(camPos); //params.viewport.map(params.camera, params.camera.camToScreen<float2>(camPos)); - const float coeff = 1.0f / (params.viewport.warpMatrix.entries[6] * pt.x + params.viewport.warpMatrix.entries[7] * pt.y + params.viewport.warpMatrix.entries[8]); - const float xcoo = coeff * (params.viewport.warpMatrix.entries[0] * pt.x + params.viewport.warpMatrix.entries[1] * pt.y + params.viewport.warpMatrix.entries[2]); - const float ycoo = coeff * (params.viewport.warpMatrix.entries[3] * pt.x + params.viewport.warpMatrix.entries[4] * pt.y + params.viewport.warpMatrix.entries[5]); - return make_uint2(xcoo, ycoo); -}*/ - /* * Convert source screen position to output screen coordinates. */ template <ftl::render::ViewPortMode VPMODE, Projection PROJECT> - __global__ void screen_coord_kernel(TextureObject<float> depth, - TextureObject<float> depth_out, - TextureObject<short2> screen_out, Parameters params, float4x4 pose, Camera camera) { + __global__ void screen_coord_kernel( + const float* __restrict__ depth, + short* __restrict__ depth_out, + short2* __restrict__ screen_out, + int pitch4, int pitch2, Parameters params, float4x4 pose, Camera camera) +{ const int x = blockIdx.x*blockDim.x + threadIdx.x; const int y = blockIdx.y*blockDim.y + threadIdx.y; - if (x >= 0 && y >= 0 && x < depth.width() && y < depth.height()) { + if (x >= 0 && y >= 0 && x < camera.width && y < camera.height) { //uint2 screenPos = make_uint2(30000,30000); - const float d = depth.tex2D(x, y); + const float d = depth[y*pitch4+x]; // Find the virtual screen position of current point const float3 camPos = (d > camera.minDepth && d < camera.maxDepth) ? pose * camera.screenToCam(x,y,d) : make_float3(0.0f,0.0f,0.0f); @@ -69,34 +64,42 @@ __device__ inline uint2 convertToScreen<ViewPortMode::Warping>(const Parameters screenPos.x >= params.camera.width || screenPos.y >= params.camera.height) screenPos = make_float3(30000,30000,0); - screen_out(x,y) = make_short2(screenPos.x, screenPos.y); - depth_out(x,y) = screenPos.z; + + screen_out[y*pitch4+x] = make_short2(screenPos.x, screenPos.y); + depth_out[y*pitch2+x] = float2fixed<10>(screenPos.z); } } -void ftl::cuda::screen_coord(TextureObject<float> &depth, TextureObject<float> &depth_out, - TextureObject<short2> &screen_out, const Parameters ¶ms, +void ftl::cuda::screen_coord(const cv::cuda::GpuMat &depth, cv::cuda::GpuMat &depth_out, + cv::cuda::GpuMat &screen_out, const Parameters ¶ms, const float4x4 &pose, const Camera &camera, cudaStream_t stream) { - const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + static constexpr int THREADS_X = 8; + static constexpr int THREADS_Y = 8; + + const dim3 gridSize((depth.cols + THREADS_X - 1)/THREADS_X, (depth.rows + THREADS_Y - 1)/THREADS_Y); + const dim3 blockSize(THREADS_X, THREADS_Y); + + depth_out.create(depth.size(), CV_16S); + screen_out.create(depth.size(), CV_16SC2); if (params.projection == Projection::PERSPECTIVE) { switch (params.viewPortMode) { - case ViewPortMode::Disabled: screen_coord_kernel<ViewPortMode::Disabled, Projection::PERSPECTIVE><<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params, pose, camera); break; - case ViewPortMode::Clipping: screen_coord_kernel<ViewPortMode::Clipping, Projection::PERSPECTIVE><<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params, pose, camera); break; - case ViewPortMode::Stretch: screen_coord_kernel<ViewPortMode::Stretch, Projection::PERSPECTIVE><<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params, pose, camera); break; + case ViewPortMode::Disabled: screen_coord_kernel<ViewPortMode::Disabled, Projection::PERSPECTIVE><<<gridSize, blockSize, 0, stream>>>(depth.ptr<float>(), depth_out.ptr<short>(), screen_out.ptr<short2>(), depth.step1(), depth_out.step1(), params, pose, camera); break; + case ViewPortMode::Clipping: screen_coord_kernel<ViewPortMode::Clipping, Projection::PERSPECTIVE><<<gridSize, blockSize, 0, stream>>>(depth.ptr<float>(), depth_out.ptr<short>(), screen_out.ptr<short2>(), depth.step1(), depth_out.step1(), params, pose, camera); break; + case ViewPortMode::Stretch: screen_coord_kernel<ViewPortMode::Stretch, Projection::PERSPECTIVE><<<gridSize, blockSize, 0, stream>>>(depth.ptr<float>(), depth_out.ptr<short>(), screen_out.ptr<short2>(), depth.step1(), depth_out.step1(), params, pose, camera); break; } } else if (params.projection == Projection::EQUIRECTANGULAR) { switch (params.viewPortMode) { - case ViewPortMode::Disabled: screen_coord_kernel<ViewPortMode::Disabled, Projection::EQUIRECTANGULAR><<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params, pose, camera); break; - case ViewPortMode::Clipping: screen_coord_kernel<ViewPortMode::Clipping, Projection::EQUIRECTANGULAR><<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params, pose, camera); break; - case ViewPortMode::Stretch: screen_coord_kernel<ViewPortMode::Stretch, Projection::EQUIRECTANGULAR><<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params, pose, camera); break; + case ViewPortMode::Disabled: screen_coord_kernel<ViewPortMode::Disabled, Projection::EQUIRECTANGULAR><<<gridSize, blockSize, 0, stream>>>(depth.ptr<float>(), depth_out.ptr<short>(), screen_out.ptr<short2>(), depth.step1(), depth_out.step1(), params, pose, camera); break; + case ViewPortMode::Clipping: screen_coord_kernel<ViewPortMode::Clipping, Projection::EQUIRECTANGULAR><<<gridSize, blockSize, 0, stream>>>(depth.ptr<float>(), depth_out.ptr<short>(), screen_out.ptr<short2>(), depth.step1(), depth_out.step1(), params, pose, camera); break; + case ViewPortMode::Stretch: screen_coord_kernel<ViewPortMode::Stretch, Projection::EQUIRECTANGULAR><<<gridSize, blockSize, 0, stream>>>(depth.ptr<float>(), depth_out.ptr<short>(), screen_out.ptr<short2>(), depth.step1(), depth_out.step1(), params, pose, camera); break; } } else if (params.projection == Projection::ORTHOGRAPHIC) { switch (params.viewPortMode) { - case ViewPortMode::Disabled: screen_coord_kernel<ViewPortMode::Disabled, Projection::ORTHOGRAPHIC><<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params, pose, camera); break; - case ViewPortMode::Clipping: screen_coord_kernel<ViewPortMode::Clipping, Projection::ORTHOGRAPHIC><<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params, pose, camera); break; - case ViewPortMode::Stretch: screen_coord_kernel<ViewPortMode::Stretch, Projection::ORTHOGRAPHIC><<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params, pose, camera); break; + case ViewPortMode::Disabled: screen_coord_kernel<ViewPortMode::Disabled, Projection::ORTHOGRAPHIC><<<gridSize, blockSize, 0, stream>>>(depth.ptr<float>(), depth_out.ptr<short>(), screen_out.ptr<short2>(), depth.step1(), depth_out.step1(), params, pose, camera); break; + case ViewPortMode::Clipping: screen_coord_kernel<ViewPortMode::Clipping, Projection::ORTHOGRAPHIC><<<gridSize, blockSize, 0, stream>>>(depth.ptr<float>(), depth_out.ptr<short>(), screen_out.ptr<short2>(), depth.step1(), depth_out.step1(), params, pose, camera); break; + case ViewPortMode::Stretch: screen_coord_kernel<ViewPortMode::Stretch, Projection::ORTHOGRAPHIC><<<gridSize, blockSize, 0, stream>>>(depth.ptr<float>(), depth_out.ptr<short>(), screen_out.ptr<short2>(), depth.step1(), depth_out.step1(), params, pose, camera); break; } } cudaSafeCall( cudaGetLastError() ); @@ -109,12 +112,16 @@ void ftl::cuda::screen_coord(TextureObject<float> &depth, TextureObject<float> & * Convert source screen position to output screen coordinates. Assumes a * constant depth of 1m instead of using a depth channel input. */ - __global__ void screen_coord_kernel(TextureObject<float> depth_out, - TextureObject<short2> screen_out, Camera vcamera, float4x4 pose, Camera camera) { + __global__ void screen_coord_kernel( + short* __restrict__ depth_out, + short2* __restrict__ screen_out, + int pitch4, int pitch2, + Camera vcamera, float4x4 pose, Camera camera) { + const int x = blockIdx.x*blockDim.x + threadIdx.x; const int y = blockIdx.y*blockDim.y + threadIdx.y; - if (x >= 0 && y >= 0 && x < depth_out.width() && y < depth_out.height()) { + if (x >= 0 && y >= 0 && x < camera.width && y < camera.height) { //uint2 screenPos = make_uint2(30000,30000); const float d = camera.maxDepth; @@ -128,15 +135,21 @@ void ftl::cuda::screen_coord(TextureObject<float> &depth, TextureObject<float> & screenPos.y >= vcamera.height) screenPos = make_uint2(30000,30000); - screen_out(x,y) = make_short2(screenPos.x, screenPos.y); - depth_out(x,y) = camPos.z; + screen_out[y*pitch4+x] = make_short2(screenPos.x, screenPos.y); + depth_out[y*pitch2+x] = float2fixed<10>(camPos.z); } } -void ftl::cuda::screen_coord(TextureObject<float> &depth_out, TextureObject<short2> &screen_out, const Parameters ¶ms, const float4x4 &pose, const Camera &camera, cudaStream_t stream) { - const dim3 gridSize((screen_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (screen_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); +void ftl::cuda::screen_coord(cv::cuda::GpuMat &depth_out, cv::cuda::GpuMat &screen_out, const Parameters ¶ms, const float4x4 &pose, const Camera &camera, cudaStream_t stream) { + static constexpr int THREADS_X = 8; + static constexpr int THREADS_Y = 8; + + const dim3 gridSize((camera.width + THREADS_X - 1)/THREADS_X, (camera.height + THREADS_Y - 1)/THREADS_Y); + const dim3 blockSize(THREADS_X, THREADS_Y); + + depth_out.create(camera.height, camera.width, CV_16S); + screen_out.create(camera.height, camera.width, CV_16SC2); - screen_coord_kernel<<<gridSize, blockSize, 0, stream>>>(depth_out, screen_out, params.camera, pose, camera); + screen_coord_kernel<<<gridSize, blockSize, 0, stream>>>(depth_out.ptr<short>(), screen_out.ptr<short2>(), screen_out.step1()/2, depth_out.step1(), params.camera, pose, camera); cudaSafeCall( cudaGetLastError() ); } diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp index 147ccf99479674f26ab6b7b5d86ee493c18f9123..e8be62b96f28baa390823575ad8bcd3572a6a278 100644 --- a/components/renderers/cpp/src/splatter_cuda.hpp +++ b/components/renderers/cpp/src/splatter_cuda.hpp @@ -7,26 +7,26 @@ namespace ftl { namespace cuda { void screen_coord( - ftl::cuda::TextureObject<float> &depth, - ftl::cuda::TextureObject<float> &depth_out, - ftl::cuda::TextureObject<short2> &screen_out, + const cv::cuda::GpuMat &depth, + cv::cuda::GpuMat &depth_out, + cv::cuda::GpuMat &screen_out, const ftl::render::Parameters ¶ms, const float4x4 &pose, const ftl::rgbd::Camera &camera, cudaStream_t stream); void screen_coord( - ftl::cuda::TextureObject<float> &depth_out, - ftl::cuda::TextureObject<short2> &screen_out, + cv::cuda::GpuMat &depth_out, + cv::cuda::GpuMat &screen_out, const ftl::render::Parameters ¶ms, const float4x4 &pose, const ftl::rgbd::Camera &camera, cudaStream_t stream); void triangle_render1( - ftl::cuda::TextureObject<float> &depth_in, - ftl::cuda::TextureObject<int> &depth_out, - ftl::cuda::TextureObject<short2> &screen, + const cv::cuda::GpuMat &depth_in, // short + cv::cuda::GpuMat &depth_out, // int + const cv::cuda::GpuMat &screen, // short2 const ftl::render::Parameters ¶ms, cudaStream_t stream); diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu index 3b45d1315d2ef3bc486fe97acc7f7a3a6f163e0c..40ee1250e2b05e90c8f4d8122022493925ced613 100644 --- a/components/renderers/cpp/src/triangle_render.cu +++ b/components/renderers/cpp/src/triangle_render.cu @@ -2,6 +2,7 @@ #include "splatter_cuda.hpp" #include <ftl/rgbd/camera.hpp> #include <ftl/cuda_common.hpp> +#include <ftl/cuda/fixed.hpp> #include <ftl/cuda/weighting.hpp> @@ -107,7 +108,7 @@ float getZAtCoordinate(const float3 &barycentricCoord, const float (&tri)[3]) { * being inside or outside (using bary centric coordinate method). If inside * then atomically write to the depth map. */ -__device__ void drawTriangle(const float (&d)[3], const short2 (&v)[3], const Parameters ¶ms, TextureObject<int> &depth_out) { +__device__ void drawTriangle(const float (&d)[3], const short2 (&v)[3], const Parameters ¶ms, int* depth_out, int out_pitch4) { const int minX = min(v[0].x, min(v[1].x, v[2].x)); const int minY = min(v[0].y, min(v[1].y, v[2].y)); const int maxX = max(v[0].x, max(v[1].x, v[2].x)); @@ -124,7 +125,7 @@ __device__ void drawTriangle(const float (&d)[3], const short2 (&v)[3], const Pa if (sx < params.camera.width && sx >= 0 && sy < params.camera.height && sy >= 0 && isBarycentricCoordInBounds(baryCentricCoordinate)) { float new_depth = getZAtCoordinate(baryCentricCoordinate, d); - atomicMin(&depth_out(sx,sy), int(new_depth*100000.0f)); + atomicMin(&depth_out[sx+sy*out_pitch4], int(new_depth*100000.0f)); } } } @@ -145,11 +146,11 @@ __device__ inline bool isValidTriangle(const short2 (&v)[3]) { * which verticies to load. */ template <int A, int B> -__device__ bool loadTriangle(int x, int y, float (&d)[3], short2 (&v)[3], const TextureObject<float> &depth_in, const TextureObject<short2> &screen) { - d[1] = depth_in.tex2D(x+A,y); - d[2] = depth_in.tex2D(x,y+B); - v[1] = screen.tex2D(x+A,y); - v[2] = screen.tex2D(x,y+B); +__device__ bool loadTriangle(int x, int y, float (&d)[3], short2 (&v)[3], const short* __restrict__ depth_in, const short2* __restrict__ screen, int pitch4, int pitch2) { + d[1] = fixed2float<10>(depth_in[y*pitch2+x+A]); + d[2] = fixed2float<10>(depth_in[(y+B)*pitch2+x]); + v[1] = screen[y*pitch4+x+A]; + v[2] = screen[(y+B)*pitch4+x]; return isValidTriangle(v); } @@ -157,37 +158,44 @@ __device__ bool loadTriangle(int x, int y, float (&d)[3], short2 (&v)[3], const * Convert source screen position to output screen coordinates. */ __global__ void triangle_render_kernel( - TextureObject<float> depth_in, - TextureObject<int> depth_out, - TextureObject<short2> screen, Parameters params) { + const short* __restrict__ depth_in, + int* depth_out, + const short2* __restrict__ screen, + int width, int height, int pitch2, int pitch4, int out_pitch4, Parameters params) { + const int x = blockIdx.x*blockDim.x + threadIdx.x; const int y = blockIdx.y*blockDim.y + threadIdx.y; - if (x >= 1 && x < depth_in.width()-1 && y >= 1 && y < depth_in.height()-1) { + if (x >= 1 && x < width-1 && y >= 1 && y < height-1) { float d[3]; - d[0] = depth_in.tex2D(x,y); + d[0] = fixed2float<10>(depth_in[y*pitch2+x]); short2 v[3]; - v[0] = screen.tex2D(x,y); + v[0] = screen[y*pitch4+x]; if (v[0].x < 30000) { // Calculate discontinuity threshold. //const float threshold = (params.depthCoef / ((params.depthCoef / d[0]) - params.disconDisparities)) - d[0]; // Draw (optionally) 4 triangles as a diamond pattern around the central point. - if (loadTriangle<1,1>(x, y, d, v, depth_in, screen)) drawTriangle(d, v, params, depth_out); - if (loadTriangle<1,-1>(x, y, d, v, depth_in, screen)) drawTriangle(d, v, params, depth_out); - if (loadTriangle<-1,1>(x, y, d, v, depth_in, screen)) drawTriangle(d, v, params, depth_out); - if (loadTriangle<-1,-1>(x, y, d, v, depth_in, screen)) drawTriangle(d, v, params, depth_out); + if (loadTriangle<1,1>(x, y, d, v, depth_in, screen, pitch4, pitch2)) drawTriangle(d, v, params, depth_out, out_pitch4); + if (loadTriangle<1,-1>(x, y, d, v, depth_in, screen, pitch4, pitch2)) drawTriangle(d, v, params, depth_out, out_pitch4); + if (loadTriangle<-1,1>(x, y, d, v, depth_in, screen, pitch4, pitch2)) drawTriangle(d, v, params, depth_out, out_pitch4); + if (loadTriangle<-1,-1>(x, y, d, v, depth_in, screen, pitch4, pitch2)) drawTriangle(d, v, params, depth_out, out_pitch4); } } } -void ftl::cuda::triangle_render1(TextureObject<float> &depth_in, TextureObject<int> &depth_out, TextureObject<short2> &screen, const Parameters ¶ms, cudaStream_t stream) { - const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); +void ftl::cuda::triangle_render1(const cv::cuda::GpuMat &depth_in, cv::cuda::GpuMat &depth_out, const cv::cuda::GpuMat &screen, const Parameters ¶ms, cudaStream_t stream) { + static constexpr int THREADS_X = 8; + static constexpr int THREADS_Y = 8; + + const dim3 gridSize((depth_in.cols + THREADS_X - 1)/THREADS_X, (depth_in.rows + THREADS_Y - 1)/THREADS_Y); + const dim3 blockSize(THREADS_X, THREADS_Y); + + depth_out.create(params.camera.height, params.camera.width, CV_32S); - triangle_render_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, screen, params); + triangle_render_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in.ptr<short>(), depth_out.ptr<int>(), screen.ptr<short2>(), depth_in.cols, depth_in.rows, depth_in.step1(), screen.step1()/2, depth_out.step1(), params); cudaSafeCall( cudaGetLastError() ); } @@ -218,8 +226,6 @@ void ftl::cuda::merge_convert_depth(TextureObject<int> &depth_in, TextureObject< // ==== Reverse Verify Result ================================================== -// ==== Merge convert =========== - __global__ void reverse_check_kernel( TextureObject<float> depth_in, TextureObject<float> depth_original, @@ -234,6 +240,7 @@ __global__ void reverse_check_kernel( if (x < 0 || x >= depth_in.width() || y < 0 || y >= depth_in.height()) return; float d = depth_in.tex2D(x,y); + float3 campos = transformR * vintrin.screenToCam(x,y,d); int2 spos = ointrin.camToScreen<int2>(campos); int ox = spos.x; @@ -242,11 +249,14 @@ __global__ void reverse_check_kernel( if (campos.z > 0.0f && ox >= 0 && ox < ointrin.width && oy >= 0 && oy < ointrin.height) { float d2 = depth_original.tex2D(ox,oy); - if (d2 < ointrin.maxDepth && d2 - campos.z > 0.002f) { + if (d2 < ointrin.maxDepth && d2 - campos.z > d2*0.001f) { //printf("Original %f, %f\n", d2, campos.z); - depth_in(x,y) = 0.0f; //(transform * ointrin.screenToCam(ox,oy,d2)).z; + //depth_in(x,y) = 1.5f; //(transform * ointrin.screenToCam(ox,oy,d2)).z; + d = 0.0f; } } + + depth_in(x,y) = d; } void ftl::cuda::reverse_verify(TextureObject<float> &depth_in, TextureObject<float> &depth_original, const float4x4 &transformR, const float4x4 &transform, const ftl::rgbd::Camera &vintrin, const ftl::rgbd::Camera &ointrin, cudaStream_t stream) {