diff --git a/CMakeLists.txt b/CMakeLists.txt index 53dc9b883599a657c3c971487ac298bbdf0b46cf..305168889f4427fbfa26f5a33a7926f5f7c2dd98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -276,7 +276,7 @@ else() add_definitions(-DUNIX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -msse3 -Werror=return-type") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_DEBUG -pg -Wall") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -mfpmath=sse") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -Wall -mfpmath=sse") set(OS_LIBS "dl") endif() diff --git a/applications/gui/src/camera.cpp b/applications/gui/src/camera.cpp index 6db9462d6a3d9d31c8786dc97cef2e12df2b6cec..e33ec641ca9b8a0b90340bad56088e524aa03e90 100644 --- a/applications/gui/src/camera.cpp +++ b/applications/gui/src/camera.cpp @@ -237,52 +237,94 @@ void ftl::gui::Camera::draw(std::vector<ftl::rgbd::FrameSet*> &fss) { if (data.size() > 0) { auto &d = *data.rbegin(); - //cv::Mat over_depth; - //over_depth.create(im1_.size(), CV_32F); + cv::Mat over_depth; + over_depth.create(im1_.size(), CV_32F); auto cam = ftl::rgbd::Camera::from(intrinsics_); - float screenWidth = intrinsics_->value("screen_size", 1.2f); // In meters + float screenWidth = intrinsics_->value("screen_size", 0.6f); // In meters float screenHeight = (9.0f/16.0f) * screenWidth; - //Eigen::Vector3f pc(-screenWidth/2.0f,-screenHeight/2.0f,0.0); - Eigen::Vector3f pa(-screenWidth/2.0f,screenHeight/2.0f,0.0); - //Eigen::Vector3f pb(screenWidth/2.0f,screenHeight/2.0f,0.0); + float screenDistance = (d.depth > cam.minDepth && d.depth < cam.maxDepth) ? d.depth : intrinsics_->value("screen_dist_default", 0.5f); // Face distance from screen in meters - float screenDistance = (d.depth > cam.minDepth && d.depth < cam.maxDepth) ? d.depth : intrinsics_->value("screen_dist_default", 1.2f); // Face distance from screen in meters - - auto pos = f.getLeft().screenToCam(d.box.x+(d.box.width/2), d.box.y+(d.box.height/2), -screenDistance); + auto pos = f.getLeft().screenToCam(d.box.x+(d.box.width/2), d.box.y+(d.box.height/2), screenDistance); Eigen::Vector3f eye; - eye[0] = pos.x; - eye[1] = -pos.y; - eye[2] = pos.z; + eye[0] = -pos.x; + eye[1] = pos.y; + eye[2] = -pos.z; + //eye[3] = 0; Eigen::Translation3f trans(eye); Eigen::Affine3f t(trans); Eigen::Matrix4f viewPose = t.matrix(); - // Top Left norm dist - Eigen::Vector3f tl = (pa - eye); - Eigen::Vector3f princ = tl; - - Eigen::Matrix4d windowPose; - windowPose.setIdentity(); - - Eigen::Matrix4d fakepose = viewPose.cast<double>().inverse() * state_.getPose(); - ftl::rgbd::Camera fakecam = cam; - fakecam.fx = screenDistance; //eye.norm(); - fakecam.fy = fakecam.fx; - fakecam.cx = (princ[0]) * fakecam.fx / princ[2] / screenWidth * cam.width; - fakecam.cy = ((princ[1]) * fakecam.fy / princ[2] / screenHeight * cam.height) - cam.height; - fakecam.fx = fakecam.fx / screenWidth * cam.width; - fakecam.fy = fakecam.fy / screenHeight * cam.height; + // Calculate where the screen is within current camera space + Eigen::Vector4f p1 = viewPose.cast<float>() * (Eigen::Vector4f(screenWidth/2.0, screenHeight/2.0, 0, 1)); + Eigen::Vector4f p2 = viewPose.cast<float>() * (Eigen::Vector4f(screenWidth/2.0, -screenHeight/2.0, 0, 1)); + Eigen::Vector4f p3 = viewPose.cast<float>() * (Eigen::Vector4f(-screenWidth/2.0, screenHeight/2.0, 0, 1)); + Eigen::Vector4f p4 = viewPose.cast<float>() * (Eigen::Vector4f(-screenWidth/2.0, -screenHeight/2.0, 0, 1)); + p1 = p1 / p1[3]; + p2 = p2 / p2[3]; + p3 = p3 / p3[3]; + p4 = p4 / p4[3]; + float2 p1screen = cam.camToScreen<float2>(make_float3(p1[0],p1[1],p1[2])); + float2 p2screen = cam.camToScreen<float2>(make_float3(p2[0],p2[1],p2[2])); + float2 p3screen = cam.camToScreen<float2>(make_float3(p3[0],p3[1],p3[2])); + float2 p4screen = cam.camToScreen<float2>(make_float3(p4[0],p4[1],p4[2])); + + //cv::line(im1_, cv::Point2f(p1screen.x, p1screen.y), cv::Point2f(p2screen.x, p2screen.y), cv::Scalar(255,0,255,0)); + //cv::line(im1_, cv::Point2f(p4screen.x, p4screen.y), cv::Point2f(p2screen.x, p2screen.y), cv::Scalar(255,0,255,0)); + //cv::line(im1_, cv::Point2f(p1screen.x, p1screen.y), cv::Point2f(p3screen.x, p3screen.y), cv::Scalar(255,0,255,0)); + //cv::line(im1_, cv::Point2f(p3screen.x, p3screen.y), cv::Point2f(p4screen.x, p4screen.y), cv::Scalar(255,0,255,0)); + //LOG(INFO) << "DRAW LINE: " << p1screen.x << "," << p1screen.y << " -> " << p2screen.x << "," << p2screen.y; + + std::vector<cv::Point2f> quad_pts; + std::vector<cv::Point2f> squre_pts; + quad_pts.push_back(cv::Point2f(p1screen.x,p1screen.y)); + quad_pts.push_back(cv::Point2f(p2screen.x,p2screen.y)); + quad_pts.push_back(cv::Point2f(p3screen.x,p3screen.y)); + quad_pts.push_back(cv::Point2f(p4screen.x,p4screen.y)); + squre_pts.push_back(cv::Point2f(0,0)); + squre_pts.push_back(cv::Point2f(0,cam.height)); + squre_pts.push_back(cv::Point2f(cam.width,0)); + squre_pts.push_back(cv::Point2f(cam.width,cam.height)); + + cv::Mat transmtx = cv::getPerspectiveTransform(quad_pts,squre_pts); + cv::Mat transformed = cv::Mat::zeros(im1_.rows, im1_.cols, CV_8UC4); + //cv::warpPerspective(im1_, im1_, transmtx, im1_.size()); + + ftl::render::ViewPort vp; + vp.x = std::min(p4screen.x, std::min(p3screen.x, std::min(p1screen.x,p2screen.x))); + vp.y = std::min(p4screen.y, std::min(p3screen.y, std::min(p1screen.y,p2screen.y))); + vp.width = std::max(p4screen.x, std::max(p3screen.x, std::max(p1screen.x,p2screen.x))) - vp.x; + vp.height = std::max(p4screen.y, std::max(p3screen.y, std::max(p1screen.y,p2screen.y))) - vp.y; + renderer_->setViewPort(ftl::render::ViewPortMode::Warping, vp); + + //Eigen::Matrix4d windowPose; + //windowPose.setIdentity(); + + //Eigen::Matrix4d fakepose = (viewPose.cast<double>()).inverse() * state_.getPose(); + //ftl::rgbd::Camera fakecam = cam; + + //eye[1] = -eye[1]; + //eye[2] = -eye[2]; + //Eigen::Translation3f trans2(eye); + //Eigen::Affine3f t2(trans2); + //Eigen::Matrix4f viewPose2 = t2.matrix(); // Use face tracked window pose for virtual camera - state_.getLeft() = fakecam; + //state_.getLeft() = fakecam; transform_ix_ = fset->id; // Disable keyboard/mouse pose setting + state_.setPose(transforms_[transform_ix_] * viewPose.cast<double>()); - //ftl::overlay::draw3DLine(state_.getLeft(), im1_, over_depth, state_.getPose().inverse() * Eigen::Vector4d(eye[0],eye[1],eye[2],1), state_.getPose().inverse() * Eigen::Vector4d(0,0,0,1), cv::Scalar(0,0,255,0)); + //Eigen::Vector4d pt1 = state_.getPose().inverse() * Eigen::Vector4d(eye[0],eye[1],eye[2],1); + //pt1 /= pt1[3]; + //Eigen::Vector4d pt2 = state_.getPose().inverse() * Eigen::Vector4d(0,0,0,1); + //pt2 /= pt2[3]; + + + //ftl::overlay::draw3DLine(state_.getLeft(), im1_, over_depth, pt1, pt2, cv::Scalar(0,0,255,0)); //ftl::overlay::drawRectangle(state_.getLeft(), im1_, over_depth, windowPose.inverse() * state_.getPose(), cv::Scalar(255,0,0,0), screenWidth, screenHeight); //ftl::overlay::drawCamera(state_.getLeft(), im1_, over_depth, fakecam, fakepose, cv::Scalar(255,0,255,255), 1.0,screen_->root()->value("show_frustrum", false)); } @@ -326,7 +368,7 @@ void ftl::gui::Camera::_draw(std::vector<ftl::rgbd::FrameSet*> &fss) { for (auto *fs : fss) { if (!usesFrameset(fs->id)) continue; - for (int i=0; i<fs->frames.size(); ++i) { + for (size_t i=0; i<fs->frames.size(); ++i) { auto pose = fs->frames[i].getPose().inverse() * state_.getPose(); Eigen::Vector4d pos = pose.inverse() * Eigen::Vector4d(0,0,0,1); pos /= pos[3]; @@ -396,7 +438,7 @@ void ftl::gui::Camera::update(std::vector<ftl::rgbd::FrameSet*> &fss) { ftl::rgbd::Frame *frame = nullptr; - if (fid_ >= fs->frames.size()) return; + if ((size_t)fid_ >= fs->frames.size()) return; frame = &fs->frames[fid_]; _downloadFrames(frame); auto n = frame->get<std::string>("name"); @@ -599,6 +641,8 @@ cv::Mat ftl::gui::Camera::visualizeActiveChannel() { break; case Channel::Right: result = im2_; + break; + default: break; } return result; } diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt index f7c34113ad11973b6a5437591791e44fffc15f09..39e4d51f82be600103f91d23e4a0564fff7ca32f 100644 --- a/applications/reconstruct/CMakeLists.txt +++ b/applications/reconstruct/CMakeLists.txt @@ -4,21 +4,7 @@ set(REPSRC src/main.cpp - #src/voxel_scene.cpp - #src/ray_cast_sdf.cu src/camera_util.cu - #src/ray_cast_sdf.cpp - #src/virtual_source.cpp - #src/splat_render.cpp - #src/dibr.cu - #src/mls.cu - #src/depth_camera.cu - #src/depth_camera.cpp - #src/ilw/ilw.cpp - #src/ilw/ilw.cu - #src/ilw/fill.cu - #src/ilw/discontinuity.cu - #src/ilw/correspondence.cu src/reconstruction.cpp ) diff --git a/applications/reconstruct/src/ilw/correspondence.cu b/applications/reconstruct/src/ilw/correspondence.cu deleted file mode 100644 index 9fb440ae41bf6e1515d72759207154fa8224a588..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/ilw/correspondence.cu +++ /dev/null @@ -1,198 +0,0 @@ -#include "ilw_cuda.hpp" -#include <ftl/cuda/weighting.hpp> - -using ftl::cuda::TextureObject; -using ftl::rgbd::Camera; -using ftl::cuda::Mask; - -#define T_PER_BLOCK 8 - -template<int FUNCTION> -__device__ float weightFunction(const ftl::cuda::ILWParams ¶ms, float dweight, float cweight); - -template <> -__device__ inline float weightFunction<0>(const ftl::cuda::ILWParams ¶ms, float dweight, float cweight) { - return (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight); -} - -template <> -__device__ inline float weightFunction<1>(const ftl::cuda::ILWParams ¶m, float dweight, float cweight) { - return (cweight * cweight * dweight); -} - -template <> -__device__ inline float weightFunction<2>(const ftl::cuda::ILWParams ¶m, float dweight, float cweight) { - return (dweight * dweight * cweight); -} - -template <> -__device__ inline float weightFunction<3>(const ftl::cuda::ILWParams ¶ms, float dweight, float cweight) { - return (dweight == 0.0f) ? 0.0f : (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight); -} - -template <> -__device__ inline float weightFunction<4>(const ftl::cuda::ILWParams ¶ms, float dweight, float cweight) { - return cweight; -} - -template<int COR_STEPS, int FUNCTION> -__global__ void correspondence_energy_vector_kernel( - TextureObject<float> d1, - TextureObject<float> d2, - TextureObject<uchar4> c1, - TextureObject<uchar4> c2, - TextureObject<float> dout, - TextureObject<float> conf, - TextureObject<int> mask, - float4x4 pose1, - float4x4 pose1_inv, - float4x4 pose2, // Inverse - Camera cam1, - Camera cam2, ftl::cuda::ILWParams params) { - - // Each warp picks point in p1 - //const int tid = (threadIdx.x + threadIdx.y * blockDim.x); - const int x = (blockIdx.x*blockDim.x + threadIdx.x); // / WARP_SIZE; - const int y = blockIdx.y*blockDim.y + threadIdx.y; - - //const float3 world1 = make_float3(p1.tex2D(x, y)); - const float depth1 = d1.tex2D(x,y); //(pose1_inv * world1).z; // Initial starting depth - if (depth1 < cam1.minDepth || depth1 > cam1.maxDepth) return; - - // TODO: Temporary hack to ensure depth1 is present - //const float4 temp = vout.tex2D(x,y); - //vout(x,y) = make_float4(depth1, 0.0f, temp.z, temp.w); - - const float3 world1 = pose1 * cam1.screenToCam(x,y,depth1); - - const auto colour1 = c1.tex2D((float)x+0.5f, (float)y+0.5f); - - float bestdepth = 0.0f; - float bestweight = 0.0f; - float bestcolour = 0.0f; - float bestdweight = 0.0f; - float totalcolour = 0.0f; - int count = 0; - float contrib = 0.0f; - - const float step_interval = params.range / (COR_STEPS / 2); - - const float3 rayStep_world = pose1.getFloat3x3() * cam1.screenToCam(x,y,step_interval); - const float3 rayStart_2 = pose2 * world1; - const float3 rayStep_2 = pose2.getFloat3x3() * rayStep_world; - - // Project to p2 using cam2 - // Each thread takes a possible correspondence and calculates a weighting - //const int lane = tid % WARP_SIZE; - for (int i=0; i<COR_STEPS; ++i) { - const int j = i - (COR_STEPS/2); - const float depth_adjust = (float)j * step_interval + depth1; - - // Calculate adjusted depth 3D point in camera 2 space - const float3 worldPos = world1 + j * rayStep_world; //(pose1 * cam1.screenToCam(x, y, depth_adjust)); - const float3 camPos = rayStart_2 + j * rayStep_2; //pose2 * worldPos; - const float2 screen = cam2.camToScreen<float2>(camPos); - - if (screen.x >= cam2.width || screen.y >= cam2.height) continue; - - // Generate a depth correspondence value - const float depth2 = d2.tex2D(int(screen.x+0.5f), int(screen.y+0.5f)); - const float dweight = ftl::cuda::weighting(fabs(depth2 - camPos.z), params.spatial_smooth); - //const float dweight = ftl::cuda::weighting(fabs(depth_adjust - depth1), 2.0f*params.range); - - // Generate a colour correspondence value - const auto colour2 = c2.tex2D(screen.x, screen.y); - const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth); - - const float weight = weightFunction<FUNCTION>(params, dweight, cweight); - - ++count; - contrib += weight; - bestcolour = max(cweight, bestcolour); - bestdweight = max(dweight, bestdweight); - totalcolour += cweight; - if (weight > bestweight) { - bestweight = weight; - bestdepth = depth_adjust; - } - } - - const float avgcolour = totalcolour/(float)count; - const float confidence = bestcolour / totalcolour; //bestcolour - avgcolour; - - Mask m(mask.tex2D(x,y)); - - //if (bestweight > 0.0f) { - float old = conf.tex2D(x,y); - - if (bestweight * confidence > old) { - dout(x,y) = bestdepth; - conf(x,y) = bestweight * confidence; - } - //} - - // If a good enough match is found, mark dodgy depth as solid - if ((m.isFilled() || m.isDiscontinuity()) && (bestweight > params.match_threshold)) mask(x,y) = 0; -} - -void ftl::cuda::correspondence( - TextureObject<float> &d1, - TextureObject<float> &d2, - TextureObject<uchar4> &c1, - TextureObject<uchar4> &c2, - TextureObject<float> &dout, - TextureObject<float> &conf, - TextureObject<int> &mask, - float4x4 &pose1, - float4x4 &pose1_inv, - float4x4 &pose2, - const Camera &cam1, - const Camera &cam2, const ILWParams ¶ms, int func, - cudaStream_t stream) { - - const dim3 gridSize((d1.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (d1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - //printf("COR SIZE %d,%d\n", p1.width(), p1.height()); - - switch (func) { - case 0: correspondence_energy_vector_kernel<16,0><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; - case 1: correspondence_energy_vector_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; - case 2: correspondence_energy_vector_kernel<16,2><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; - case 3: correspondence_energy_vector_kernel<16,3><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; - case 4: correspondence_energy_vector_kernel<16,4><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, dout, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break; - } - - cudaSafeCall( cudaGetLastError() ); -} - -//============================================================================== - -__global__ void mask_filter_kernel( - TextureObject<float> depth, - TextureObject<int> mask) { - - const int x = (blockIdx.x*blockDim.x + threadIdx.x); - const int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x >= 0 && x < depth.width() && y >=0 && y < depth.height()) { - Mask m(mask.tex2D(x,y)); - if (m.isFilled()) { - depth(x,y) = MINF; - } - } -} - - -void ftl::cuda::mask_filter( - TextureObject<float> &depth, - TextureObject<int> &mask, - cudaStream_t stream) { - const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - mask_filter_kernel<<<gridSize, blockSize, 0, stream>>>( - depth, mask - ); - cudaSafeCall( cudaGetLastError() ); -} diff --git a/applications/reconstruct/src/ilw/discontinuity.cu b/applications/reconstruct/src/ilw/discontinuity.cu deleted file mode 100644 index fcadde03efa6078c8041a244b717c99fbb24581f..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/ilw/discontinuity.cu +++ /dev/null @@ -1,56 +0,0 @@ -#include "ilw_cuda.hpp" - -#define T_PER_BLOCK 8 - -using ftl::cuda::Mask; - -template <int RADIUS> -__global__ void discontinuity_kernel(ftl::cuda::TextureObject<int> mask_out, ftl::cuda::TextureObject<float> depth, - const cv::Size size, const double minDepth, const double maxDepth) { - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < size.width && y < size.height) { - Mask mask(0); - - const float d = depth.tex2D((int)x, (int)y); - - // Calculate depth between 0.0 and 1.0 - //float p = (d - params.minDepth) / (params.maxDepth - params.minDepth); - - if (d >= minDepth && d <= maxDepth) { - /* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. */ - // Is there a discontinuity nearby? - for (int u=-RADIUS; u<=RADIUS; ++u) { - for (int v=-RADIUS; v<=RADIUS; ++v) { - // If yes, the flag using w = -1 - if (fabs(depth.tex2D((int)x+u, (int)y+v) - d) > 0.1f) mask.isDiscontinuity(true); - } - } - } - - mask_out(x,y) = (int)mask; - } -} - -void ftl::cuda::discontinuity(ftl::cuda::TextureObject<int> &mask_out, ftl::cuda::TextureObject<float> &depth, - const cv::Size size, const double minDepth, const double maxDepth, - uint discon, cudaStream_t stream) { - - const dim3 gridSize((size.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (size.height + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - switch (discon) { - case 5 : discontinuity_kernel<5><<<gridSize, blockSize, 0, stream>>>(mask_out, depth, size, minDepth, maxDepth); break; - case 4 : discontinuity_kernel<4><<<gridSize, blockSize, 0, stream>>>(mask_out, depth, size, minDepth, maxDepth); break; - case 3 : discontinuity_kernel<3><<<gridSize, blockSize, 0, stream>>>(mask_out, depth, size, minDepth, maxDepth); break; - case 2 : discontinuity_kernel<2><<<gridSize, blockSize, 0, stream>>>(mask_out, depth, size, minDepth, maxDepth); break; - case 1 : discontinuity_kernel<1><<<gridSize, blockSize, 0, stream>>>(mask_out, depth, size, minDepth, maxDepth); break; - default: break; - } - cudaSafeCall( cudaGetLastError() ); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); -#endif -} diff --git a/applications/reconstruct/src/ilw/fill.cu b/applications/reconstruct/src/ilw/fill.cu deleted file mode 100644 index 4109c1034d045ecdede9e28582cd19df670f5a63..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/ilw/fill.cu +++ /dev/null @@ -1,71 +0,0 @@ -#include "ilw_cuda.hpp" -#include <ftl/cuda/weighting.hpp> - -using ftl::cuda::Mask; - -#define T_PER_BLOCK 8 - -template <int RADIUS> -__global__ void preprocess_kernel( - ftl::cuda::TextureObject<float> depth_in, - ftl::cuda::TextureObject<float> depth_out, - ftl::cuda::TextureObject<uchar4> colour, - ftl::cuda::TextureObject<int> mask, - ftl::rgbd::Camera camera, - ftl::cuda::ILWParams params) { - - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - float d = depth_in.tex2D((int)x,(int)y); - Mask main_mask(mask.tex2D((int)x,(int)y)); - uchar4 c = colour.tex2D((int)x,(int)y); - - // Calculate discontinuity mask - - // Fill missing depths - if (d < camera.minDepth || d > camera.maxDepth) { - float depth_accum = 0.0f; - float contrib = 0.0f; - - int v=0; - //for (int v=-RADIUS; v<=RADIUS; ++v) { - for (int u=-RADIUS; u<=RADIUS; ++u) { - uchar4 c2 = colour.tex2D((int)x+u,(int)y+v); - float d2 = depth_in.tex2D((int)x+u,(int)y+v); - Mask m(mask.tex2D((int)x+u,(int)y+v)); - - if (!m.isDiscontinuity() && d2 >= camera.minDepth && d2 <= camera.maxDepth) { - float w = ftl::cuda::colourWeighting(c, c2, params.fill_match); - depth_accum += d2*w; - contrib += w; - } - } - //} - - if (contrib > params.fill_threshold) { - d = depth_accum / contrib; - main_mask.isFilled(true); - } - } - - mask(x,y) = (int)main_mask; - depth_out(x,y) = d; -} - -void ftl::cuda::preprocess_depth( - ftl::cuda::TextureObject<float> &depth_in, - ftl::cuda::TextureObject<float> &depth_out, - ftl::cuda::TextureObject<uchar4> &colour, - ftl::cuda::TextureObject<int> &mask, - const ftl::rgbd::Camera &camera, - const ftl::cuda::ILWParams ¶ms, - cudaStream_t stream) { - - const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - preprocess_kernel<10><<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, colour, mask, camera, params); - - cudaSafeCall( cudaGetLastError() ); -} diff --git a/applications/reconstruct/src/ilw/ilw.cpp b/applications/reconstruct/src/ilw/ilw.cpp deleted file mode 100644 index a686f35c337573a04415b0f13a598ec8a3538057..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/ilw/ilw.cpp +++ /dev/null @@ -1,363 +0,0 @@ -#include "ilw.hpp" -#include <ftl/utility/matrix_conversion.hpp> -#include <ftl/rgbd/source.hpp> -#include <ftl/cuda/points.hpp> -#include <loguru.hpp> - -#include "ilw_cuda.hpp" - -using ftl::ILW; -using ftl::detail::ILWData; -using ftl::codecs::Channel; -using ftl::codecs::Channels; -using ftl::rgbd::Format; -using cv::cuda::GpuMat; - -// TODO: Put in common place -static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) { - Eigen::Affine3d rx = - Eigen::Affine3d(Eigen::AngleAxisd(ax, Eigen::Vector3d(1, 0, 0))); - Eigen::Affine3d ry = - Eigen::Affine3d(Eigen::AngleAxisd(ay, Eigen::Vector3d(0, 1, 0))); - Eigen::Affine3d rz = - Eigen::Affine3d(Eigen::AngleAxisd(az, Eigen::Vector3d(0, 0, 1))); - return rz * rx * ry; -} - -ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) { - enabled_ = value("ilw_align", false); - iterations_ = value("iterations", 4); - motion_rate_ = value("motion_rate", 0.8f); - motion_window_ = value("motion_window", 3); - use_lab_ = value("use_Lab", false); - params_.colour_smooth = value("colour_smooth", 50.0f); - params_.spatial_smooth = value("spatial_smooth", 0.04f); - params_.cost_ratio = value("cost_ratio", 0.2f); - params_.cost_threshold = value("cost_threshold", 1.0f); - discon_mask_ = value("discontinuity_mask",2); - fill_depth_ = value("fill_depth", false); - - on("fill_depth", [this](const ftl::config::Event &e) { - fill_depth_ = value("fill_depth", false); - }); - - on("ilw_align", [this](const ftl::config::Event &e) { - enabled_ = value("ilw_align", false); - }); - - on("iterations", [this](const ftl::config::Event &e) { - iterations_ = value("iterations", 4); - }); - - on("motion_rate", [this](const ftl::config::Event &e) { - motion_rate_ = value("motion_rate", 0.4f); - }); - - on("motion_window", [this](const ftl::config::Event &e) { - motion_window_ = value("motion_window", 3); - }); - - on("discontinuity_mask", [this](const ftl::config::Event &e) { - discon_mask_ = value("discontinuity_mask", 2); - }); - - on("use_Lab", [this](const ftl::config::Event &e) { - use_lab_ = value("use_Lab", false); - }); - - on("colour_smooth", [this](const ftl::config::Event &e) { - params_.colour_smooth = value("colour_smooth", 50.0f); - }); - - on("spatial_smooth", [this](const ftl::config::Event &e) { - params_.spatial_smooth = value("spatial_smooth", 0.04f); - }); - - on("cost_ratio", [this](const ftl::config::Event &e) { - params_.cost_ratio = value("cost_ratio", 0.2f); - }); - - on("cost_threshold", [this](const ftl::config::Event &e) { - params_.cost_threshold = value("cost_threshold", 1.0f); - }); - - params_.flags = 0; - if (value("ignore_bad", false)) params_.flags |= ftl::cuda::kILWFlag_IgnoreBad; - if (value("ignore_bad_colour", false)) params_.flags |= ftl::cuda::kILWFlag_SkipBadColour; - if (value("restrict_z", true)) params_.flags |= ftl::cuda::kILWFlag_RestrictZ; - if (value("colour_confidence_only", false)) params_.flags |= ftl::cuda::kILWFlag_ColourConfidenceOnly; - - on("ignore_bad", [this](const ftl::config::Event &e) { - if (value("ignore_bad", false)) params_.flags |= ftl::cuda::kILWFlag_IgnoreBad; - else params_.flags &= ~ftl::cuda::kILWFlag_IgnoreBad; - }); - - on("ignore_bad_colour", [this](const ftl::config::Event &e) { - if (value("ignore_bad_colour", false)) params_.flags |= ftl::cuda::kILWFlag_SkipBadColour; - else params_.flags &= ~ftl::cuda::kILWFlag_SkipBadColour; - }); - - on("restrict_z", [this](const ftl::config::Event &e) { - if (value("restrict_z", false)) params_.flags |= ftl::cuda::kILWFlag_RestrictZ; - else params_.flags &= ~ftl::cuda::kILWFlag_RestrictZ; - }); - - on("colour_confidence_only", [this](const ftl::config::Event &e) { - if (value("colour_confidence_only", false)) params_.flags |= ftl::cuda::kILWFlag_ColourConfidenceOnly; - else params_.flags &= ~ftl::cuda::kILWFlag_ColourConfidenceOnly; - }); - - if (config["clipping"].is_object()) { - auto &c = config["clipping"]; - float rx = c.value("pitch", 0.0f); - float ry = c.value("yaw", 0.0f); - float rz = c.value("roll", 0.0f); - float x = c.value("x", 0.0f); - float y = c.value("y", 0.0f); - float z = c.value("z", 0.0f); - float width = c.value("width", 1.0f); - float height = c.value("height", 1.0f); - float depth = c.value("depth", 1.0f); - - Eigen::Affine3f r = create_rotation_matrix(rx, ry, rz).cast<float>(); - Eigen::Translation3f trans(Eigen::Vector3f(x,y,z)); - Eigen::Affine3f t(trans); - - clip_.origin = MatrixConversion::toCUDA(r.matrix() * t.matrix()); - clip_.size = make_float3(width, height, depth); - clipping_ = value("clipping_enabled", true); - } else { - clipping_ = false; - } - - on("clipping_enabled", [this](const ftl::config::Event &e) { - clipping_ = value("clipping_enabled", true); - }); - - cudaSafeCall(cudaStreamCreate(&stream_)); -} - -ILW::~ILW() { - -} - -bool ILW::process(ftl::rgbd::FrameSet &fs) { - if (!enabled_) return false; - - //fs.upload(Channel::Colour + Channel::Depth, stream_); - _phase0(fs, stream_); - - params_.range = value("search_range", 0.05f); - params_.fill_match = value("fill_match", 50.0f); - params_.fill_threshold = value("fill_threshold", 0.0f); - params_.match_threshold = value("match_threshold", 0.3f); - - for (int i=0; i<iterations_; ++i) { - _phase1(fs, value("cost_function",3), stream_); - //for (int j=0; j<3; ++j) { - _phase2(fs, motion_rate_, stream_); - //} - - params_.range *= value("search_reduce", 0.9f); - // TODO: Break if no time left - - //cudaSafeCall(cudaStreamSynchronize(stream_)); - } - - for (size_t i=0; i<fs.frames.size(); ++i) { - auto &f = fs.frames[i]; - auto *s = fs.sources[i]; - - auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size())); - auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse()); - ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, discon_mask_, stream_); - } - - cudaSafeCall(cudaStreamSynchronize(stream_)); - return true; -} - -bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) { - // Make points channel... - for (size_t i=0; i<fs.frames.size(); ++i) { - auto &f = fs.frames[i]; - auto *s = fs.sources[i]; - - if (f.empty(Channel::Depth + Channel::Colour)) { - LOG(ERROR) << "Missing required channel"; - continue; - } - - //auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size())); - auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse()); - //ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, discon_mask_, stream); - - // TODO: Create energy vector texture and clear it - // Create energy and clear it - - // Convert colour from BGR to BGRA if needed - if (f.get<GpuMat>(Channel::Colour).type() == CV_8UC3) { - cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); - // Convert to 4 channel colour - auto &col = f.get<GpuMat>(Channel::Colour); - GpuMat tmp(col.size(), CV_8UC4); - cv::cuda::swap(col, tmp); - if (use_lab_) cv::cuda::cvtColor(tmp,tmp, cv::COLOR_BGR2Lab, 0, cvstream); - cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA, 0, cvstream); - } - - // Clip first? - if (clipping_) { - auto clip = clip_; - clip.origin = clip.origin * pose; - ftl::cuda::clipping(f.createTexture<float>(Channel::Depth), s->parameters(), clip, stream); - } - - f.createTexture<float>(Channel::Depth2, Format<float>(f.get<GpuMat>(Channel::Colour).size())); - f.createTexture<float>(Channel::Confidence, Format<float>(f.get<GpuMat>(Channel::Colour).size())); - //f.createTexture<int>(Channel::Mask, Format<int>(f.get<GpuMat>(Channel::Colour).size())); - f.createTexture<uchar4>(Channel::Colour); - f.createTexture<float>(Channel::Depth); - - //f.get<GpuMat>(Channel::Mask).setTo(cv::Scalar(0)); - } - - //cudaSafeCall(cudaStreamSynchronize(stream_)); - - return true; -} - -bool ILW::_phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream) { - // Run correspondence kernel to create an energy vector - cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream); - - // Find discontinuity mask - /*for (size_t i=0; i<fs.frames.size(); ++i) { - auto &f = fs.frames[i]; - auto s = fs.sources[i]; - - ftl::cuda::discontinuity( - f.getTexture<int>(Channel::Mask), - f.getTexture<float>(Channel::Depth), - s->parameters(), - discon_mask_, - stream - ); - }*/ - - // First do any preprocessing - if (fill_depth_) { - for (size_t i=0; i<fs.frames.size(); ++i) { - auto &f = fs.frames[i]; - auto s = fs.sources[i]; - - ftl::cuda::preprocess_depth( - f.getTexture<float>(Channel::Depth), - f.getTexture<float>(Channel::Depth2), - f.getTexture<uchar4>(Channel::Colour), - f.getTexture<int>(Channel::Mask), - s->parameters(), - params_, - stream - ); - - //cv::cuda::swap(f.get<GpuMat>(Channel::Depth),f.get<GpuMat>(Channel::Depth2)); - f.swapChannels(Channel::Depth, Channel::Depth2); - } - } - - //cudaSafeCall(cudaStreamSynchronize(stream_)); - - // For each camera combination - for (size_t i=0; i<fs.frames.size(); ++i) { - auto &f1 = fs.frames[i]; - f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream); - f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream); - - Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0); - d1 = fs.sources[i]->getPose() * d1; - - for (size_t j=0; j<fs.frames.size(); ++j) { - if (i == j) continue; - - //LOG(INFO) << "Running phase1"; - - auto &f2 = fs.frames[j]; - auto s1 = fs.sources[i]; - auto s2 = fs.sources[j]; - - // Are cameras facing similar enough direction? - Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0); - d2 = fs.sources[j]->getPose() * d2; - // No, so skip this combination - if (d1.dot(d2) <= 0.0) continue; - - auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>()); - auto pose1_inv = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse()); - auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse()); - - try { - //Calculate energy vector to best correspondence - ftl::cuda::correspondence( - f1.getTexture<float>(Channel::Depth), - f2.getTexture<float>(Channel::Depth), - f1.getTexture<uchar4>(Channel::Colour), - f2.getTexture<uchar4>(Channel::Colour), - // TODO: Add normals and other things... - f1.getTexture<float>(Channel::Depth2), - f1.getTexture<float>(Channel::Confidence), - f1.getTexture<int>(Channel::Mask), - pose1, - pose1_inv, - pose2, - s1->parameters(), - s2->parameters(), - params_, - win, - stream - ); - } catch (ftl::exception &e) { - LOG(ERROR) << "Exception in correspondence: " << e.what(); - } - - //LOG(INFO) << "Correspondences done... " << i; - } - } - - //cudaSafeCall(cudaStreamSynchronize(stream_)); - - return true; -} - -bool ILW::_phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream) { - // Run energies and motion kernel - - // Smooth vectors across a window and iteratively - // strongly disagreeing vectors should cancel out - // A weak vector is overriden by a stronger one. - - for (size_t i=0; i<fs.frames.size(); ++i) { - auto &f = fs.frames[i]; - - auto pose = MatrixConversion::toCUDA(fs.sources[i]->getPose().cast<float>()); //.inverse()); - - ftl::cuda::move_points( - f.getTexture<float>(Channel::Depth), - f.getTexture<float>(Channel::Depth2), - f.getTexture<float>(Channel::Confidence), - fs.sources[i]->parameters(), - pose, - params_, - rate, - motion_window_, - stream - ); - - if (f.hasChannel(Channel::Mask)) { - ftl::cuda::mask_filter(f.getTexture<float>(Channel::Depth), - f.getTexture<int>(Channel::Mask), stream_); - } - } - - return true; -} diff --git a/applications/reconstruct/src/ilw/ilw.cu b/applications/reconstruct/src/ilw/ilw.cu deleted file mode 100644 index 710f7521d4a508dd8fc968f4648a5402f8045fd7..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/ilw/ilw.cu +++ /dev/null @@ -1,112 +0,0 @@ -#include "ilw_cuda.hpp" -#include <ftl/cuda/weighting.hpp> - -using ftl::cuda::TextureObject; -using ftl::rgbd::Camera; - -#define WARP_SIZE 32 -#define T_PER_BLOCK 8 -#define FULL_MASK 0xffffffff - -__device__ inline float warpMin(float e) { - for (int i = WARP_SIZE/2; i > 0; i /= 2) { - const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); - e = min(e, other); - } - return e; -} - -__device__ inline float warpSum(float e) { - for (int i = WARP_SIZE/2; i > 0; i /= 2) { - const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); - e += other; - } - return e; -} - -//============================================================================== - - - -//============================================================================== - - - -//============================================================================== - -//#define MOTION_RADIUS 9 - -template <int MOTION_RADIUS> -__global__ void move_points_kernel( - ftl::cuda::TextureObject<float> d_old, - ftl::cuda::TextureObject<float> d_new, - ftl::cuda::TextureObject<float> conf, - ftl::rgbd::Camera camera, - float4x4 pose, - ftl::cuda::ILWParams params, - float rate) { - - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - const float d0_new = d_new.tex2D((int)x,(int)y); - const float d0_old = d_old.tex2D((int)x,(int)y); - if (d0_new == 0.0f) return; // No correspondence found - - if (x < d_old.width() && y < d_old.height()) { - //const float4 world = p(x,y); - //if (world.x == MINF) return; - - float delta = 0.0f; //make_float4(0.0f, 0.0f, 0.0f, 0.0f); //ev.tex2D((int)x,(int)y); - float contrib = 0.0f; - - // Calculate screen space distortion with neighbours - for (int v=-MOTION_RADIUS; v<=MOTION_RADIUS; ++v) { - for (int u=-MOTION_RADIUS; u<=MOTION_RADIUS; ++u) { - const float dn_new = d_new.tex2D((int)x+u,(int)y+v); - const float dn_old = d_old.tex2D((int)x+u,(int)y+v); - const float confn = conf.tex2D((int)x+u,(int)y+v); - //const float3 pn = make_float3(p.tex2D((int)x+u,(int)y+v)); - //if (pn.x == MINF) continue; - if (dn_new == 0.0f) continue; // Neighbour has no new correspondence - - const float s = ftl::cuda::spatialWeighting(camera.screenToCam(x,y,d0_new), camera.screenToCam(x+u,y+v,dn_new), params.range); // ftl::cuda::weighting(fabs(d0_new - dn_new), params.range); - contrib += (confn+0.01f) * s; - delta += (confn+0.01f) * s * ((confn == 0.0f) ? dn_old : dn_new); - } - } - - if (contrib > 0.0f) { - //const float3 newworld = pose * camera.screenToCam(x, y, vec0.x + rate * ((delta / contrib) - vec0.x)); - //p(x,y) = make_float4(newworld, world.w); //world + rate * (vec / contrib); - - d_old(x,y) = d0_old + rate * ((delta / contrib) - d0_old); - } - } -} - - -void ftl::cuda::move_points( - ftl::cuda::TextureObject<float> &d_old, - ftl::cuda::TextureObject<float> &d_new, - ftl::cuda::TextureObject<float> &conf, - const ftl::rgbd::Camera &camera, - const float4x4 &pose, - const ftl::cuda::ILWParams ¶ms, - float rate, - int radius, - cudaStream_t stream) { - - const dim3 gridSize((d_old.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (d_old.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - switch (radius) { - case 9 : move_points_kernel<9><<<gridSize, blockSize, 0, stream>>>(d_old,d_new,conf,camera, pose, params, rate); break; - case 5 : move_points_kernel<5><<<gridSize, blockSize, 0, stream>>>(d_old,d_new,conf,camera, pose, params, rate); break; - case 3 : move_points_kernel<3><<<gridSize, blockSize, 0, stream>>>(d_old,d_new,conf,camera, pose, params, rate); break; - case 1 : move_points_kernel<1><<<gridSize, blockSize, 0, stream>>>(d_old,d_new,conf,camera, pose, params, rate); break; - case 0 : move_points_kernel<0><<<gridSize, blockSize, 0, stream>>>(d_old,d_new,conf,camera, pose, params, rate); break; - } - - cudaSafeCall( cudaGetLastError() ); -} diff --git a/applications/reconstruct/src/ilw/ilw.hpp b/applications/reconstruct/src/ilw/ilw.hpp deleted file mode 100644 index 78251832d5647233082c8c59072084c6b1d60559..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/ilw/ilw.hpp +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef _FTL_RECONSTRUCT_ILW_HPP_ -#define _FTL_RECONSTRUCT_ILW_HPP_ - -#include <ftl/cuda_common.hpp> -#include <ftl/rgbd/frameset.hpp> -#include <ftl/configurable.hpp> -#include <vector> - -#include "ilw_cuda.hpp" - -#include <ftl/cuda/points.hpp> - -namespace ftl { - -namespace detail { -struct ILWData{ - // x,y,z + confidence - ftl::cuda::TextureObject<float4> correspondence; - - ftl::cuda::TextureObject<float4> points; - - // Residual potential energy - ftl::cuda::TextureObject<float> residual; - - // Flow magnitude - ftl::cuda::TextureObject<float> flow; -}; -} - -/** - * For a set of sources, perform Iterative Lattice Warping to correct the - * location of points between the cameras. The algorithm finds possible - * correspondences and warps the original pixel lattice of points in each - * camera towards the correspondences, iterating the process as many times as - * possible. The result is that both local and global adjustment is made to the - * point clouds to improve micro alignment that may have been incorrect due to - * either inaccurate camera pose estimation or noise/errors in the depth maps. - */ -class ILW : public ftl::Configurable { - public: - explicit ILW(nlohmann::json &config); - ~ILW(); - - /** - * Take a frameset and perform the iterative lattice warping. - */ - bool process(ftl::rgbd::FrameSet &fs); - - inline bool isLabColour() const { return use_lab_; } - - private: - /* - * Initialise data. - */ - bool _phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream); - - /* - * Find possible correspondences and a confidence value. - */ - bool _phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream); - - /* - * Calculate energies and move the points. - */ - bool _phase2(ftl::rgbd::FrameSet &fs, float rate, cudaStream_t stream); - - std::vector<detail::ILWData> data_; - bool enabled_; - ftl::cuda::ILWParams params_; - int iterations_; - float motion_rate_; - int motion_window_; - bool use_lab_; - int discon_mask_; - bool fill_depth_; - ftl::cuda::ClipSpace clip_; - bool clipping_; - - cudaStream_t stream_; -}; - -} - -#endif // _FTL_RECONSTRUCT_ILW_HPP_ diff --git a/applications/reconstruct/src/ilw/ilw_cuda.hpp b/applications/reconstruct/src/ilw/ilw_cuda.hpp deleted file mode 100644 index 94e522347bb64d9b0b091c3f0cd4914b8abd91c2..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/ilw/ilw_cuda.hpp +++ /dev/null @@ -1,85 +0,0 @@ -#ifndef _FTL_ILW_CUDA_HPP_ -#define _FTL_ILW_CUDA_HPP_ - -#include <ftl/cuda_common.hpp> -#include <ftl/rgbd/camera.hpp> -#include <ftl/cuda_matrix_util.hpp> -#include <ftl/cuda/mask.hpp> - -namespace ftl { -namespace cuda { - -struct ILWParams { - float spatial_smooth; - float colour_smooth; - float fill_match; - float fill_threshold; - float match_threshold; - float cost_ratio; - float cost_threshold; - float range; - uint flags; -}; - -static const uint kILWFlag_IgnoreBad = 0x0001; -static const uint kILWFlag_RestrictZ = 0x0002; -static const uint kILWFlag_SkipBadColour = 0x0004; -static const uint kILWFlag_ColourConfidenceOnly = 0x0008; - -void discontinuity( - ftl::cuda::TextureObject<int> &mask_out, - ftl::cuda::TextureObject<float> &depth, - const cv::Size size, - const double minDepth, - const double maxDepth, - uint discon, cudaStream_t stream -); - -void mask_filter( - ftl::cuda::TextureObject<float> &depth, - ftl::cuda::TextureObject<int> &mask, - cudaStream_t stream); - -void preprocess_depth( - ftl::cuda::TextureObject<float> &depth_in, - ftl::cuda::TextureObject<float> &depth_out, - ftl::cuda::TextureObject<uchar4> &colour, - ftl::cuda::TextureObject<int> &mask, - const ftl::rgbd::Camera &camera, - const ILWParams ¶ms, - cudaStream_t stream -); - -void correspondence( - ftl::cuda::TextureObject<float> &d1, - ftl::cuda::TextureObject<float> &d2, - ftl::cuda::TextureObject<uchar4> &c1, - ftl::cuda::TextureObject<uchar4> &c2, - ftl::cuda::TextureObject<float> &dout, - ftl::cuda::TextureObject<float> &conf, - ftl::cuda::TextureObject<int> &mask, - float4x4 &pose1, - float4x4 &pose1_inv, - float4x4 &pose2, - const ftl::rgbd::Camera &cam1, - const ftl::rgbd::Camera &cam2, - const ILWParams ¶ms, int win, - cudaStream_t stream -); - -void move_points( - ftl::cuda::TextureObject<float> &d_old, - ftl::cuda::TextureObject<float> &d_new, - ftl::cuda::TextureObject<float> &conf, - const ftl::rgbd::Camera &camera, - const float4x4 &pose, - const ILWParams ¶ms, - float rate, - int radius, - cudaStream_t stream -); - -} -} - -#endif // _FTL_ILW_CUDA_HPP_ diff --git a/applications/reconstruct/src/mls/mls.cu b/applications/reconstruct/src/mls/mls.cu deleted file mode 100644 index 4de8a07db023d21620f605cd3556913aeafe06cb..0000000000000000000000000000000000000000 --- a/applications/reconstruct/src/mls/mls.cu +++ /dev/null @@ -1,273 +0,0 @@ -#include <ftl/cuda_common.hpp> -#include <ftl/cuda_util.hpp> -#include <ftl/depth_camera.hpp> -#include "depth_camera_cuda.hpp" - -#include "mls_cuda.hpp" - -#define T_PER_BLOCK 16 -#define MINF __int_as_float(0xff800000) - -using ftl::voxhash::DepthCameraCUDA; -using ftl::voxhash::HashData; -using ftl::voxhash::HashParams; -using ftl::cuda::TextureObject; -using ftl::render::SplatParams; - -extern __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS]; -extern __constant__ HashParams c_hashParams; - -/// ===== MLS Smooth - -/* - * Kim, K., Chalidabhongse, T. H., Harwood, D., & Davis, L. (2005). - * Real-time foreground-background segmentation using codebook model. - * Real-Time Imaging. https://doi.org/10.1016/j.rti.2004.12.004 - */ - __device__ float colordiffFloat(const uchar4 &pa, const uchar4 &pb) { - const float x_2 = pb.x * pb.x + pb.y * pb.y + pb.z * pb.z; - const float v_2 = pa.x * pa.x + pa.y * pa.y + pa.z * pa.z; - const float xv_2 = powf(float(pb.x * pa.x + pb.y * pa.y + pb.z * pa.z), 2.0f); - const float p_2 = xv_2 / v_2; - return sqrt(x_2 - p_2); -} - -__device__ float colordiffFloat2(const uchar4 &pa, const uchar4 &pb) { - float3 delta = make_float3((float)pa.x - (float)pb.x, (float)pa.y - (float)pb.y, (float)pa.z - (float)pb.z); - return length(delta); -} - -/* - * Colour weighting as suggested in: - * C. Kuster et al. Spatio-Temporal Geometry Fusion for Multiple Hybrid Cameras using Moving Least Squares Surfaces. 2014. - * c = colour distance - */ - __device__ float colourWeighting(float c) { - const float h = c_hashParams.m_colourSmoothing; - if (c >= h) return 0.0f; - float ch = c / h; - ch = 1.0f - ch*ch; - return ch*ch*ch*ch; -} - -#define WINDOW_RADIUS 5 - -__device__ float mlsCamera(int cam, const float3 &mPos, uchar4 c1, float3 &wpos) { - const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - - const float3 pf = camera.poseInverse * mPos; - float3 pos = make_float3(0.0f, 0.0f, 0.0f); - const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf)); - float weights = 0.0f; - - //#pragma unroll - for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) { - for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) { - //if (screenPos.x+u < width && screenPos.y+v < height) { //on screen - float depth = tex2D<float>(camera.depth, screenPos.x+u, screenPos.y+v); - const float3 camPos = camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth); - float weight = ftl::cuda::spatialWeighting(length(pf - camPos), c_hashParams.m_spatialSmoothing); - - if (weight > 0.0f) { - uchar4 c2 = tex2D<uchar4>(camera.colour, screenPos.x+u, screenPos.y+v); - weight *= colourWeighting(colordiffFloat2(c1,c2)); - - if (weight > 0.0f) { - wpos += weight* (camera.pose * camPos); - weights += weight; - } - } - //} - } - } - - //wpos += (camera.pose * pos); - - return weights; -} - -__device__ float mlsCameraNoColour(int cam, const float3 &mPos, uchar4 c1, const float4 &norm, float3 &wpos, float h) { - const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - - const float3 pf = camera.poseInverse * mPos; - float3 pos = make_float3(0.0f, 0.0f, 0.0f); - const uint2 screenPos = make_uint2(camera.params.cameraToKinectScreenInt(pf)); - float weights = 0.0f; - - //#pragma unroll - for (int v=-WINDOW_RADIUS; v<=WINDOW_RADIUS; ++v) { - for (int u=-WINDOW_RADIUS; u<=WINDOW_RADIUS; ++u) { - //if (screenPos.x+u < width && screenPos.y+v < height) { //on creen - float depth = tex2D<float>(camera.depth, screenPos.x+u, screenPos.y+v); - const float3 camPos = camera.params.kinectDepthToSkeleton(screenPos.x+u, screenPos.y+v, depth); - - // TODO:(Nick) dot product of normals < 0 means the point - // should be ignored with a weight of 0 since it is facing the wrong direction - // May be good to simply weight using the dot product to give - // a stronger weight to those whose normals are closer - - float weight = ftl::cuda::spatialWeighting(length(pf - camPos), h); - - if (weight > 0.0f) { - float4 n2 = tex2D<float4>(camera.normal, screenPos.x+u, screenPos.y+v); - if (dot(make_float3(norm), make_float3(n2)) > 0.0f) { - - uchar4 c2 = tex2D<uchar4>(camera.colour, screenPos.x+u, screenPos.y+v); - - if (colourWeighting(colordiffFloat2(c1,c2)) > 0.0f) { - pos += weight*camPos; // (camera.pose * camPos); - weights += weight; - } - } - } - //} - } - } - - if (weights > 0.0f) wpos += (camera.pose * (pos / weights)) * weights; - - return weights; -} - - -__global__ void mls_smooth_kernel(ftl::cuda::TextureObject<float4> output, HashParams hashParams, int numcams, int cam) { - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - const int width = output.width(); - const int height = output.height(); - - const DepthCameraCUDA &mainCamera = c_cameras[cam]; - - if (x < width && y < height) { - - const float depth = tex2D<float>(mainCamera.depth, x, y); - const uchar4 c1 = tex2D<uchar4>(mainCamera.colour, x, y); - const float4 norm = tex2D<float4>(mainCamera.normal, x, y); - //if (x == 400 && y == 200) printf("NORMX: %f\n", norm.x); - - float3 wpos = make_float3(0.0f); - float3 wnorm = make_float3(0.0f); - float weights = 0.0f; - - if (depth >= mainCamera.params.m_sensorDepthWorldMin && depth <= mainCamera.params.m_sensorDepthWorldMax) { - float3 mPos = mainCamera.pose * mainCamera.params.kinectDepthToSkeleton(x, y, depth); - - if ((!(hashParams.m_flags & ftl::voxhash::kFlagClipping)) || (mPos.x > hashParams.m_minBounds.x && mPos.x < hashParams.m_maxBounds.x && - mPos.y > hashParams.m_minBounds.y && mPos.y < hashParams.m_maxBounds.y && - mPos.z > hashParams.m_minBounds.z && mPos.z < hashParams.m_maxBounds.z)) { - - if (hashParams.m_flags & ftl::voxhash::kFlagMLS) { - for (uint cam2=0; cam2<numcams; ++cam2) { - //if (cam2 == cam) weights += mlsCameraNoColour(cam2, mPos, c1, wpos, c_hashParams.m_spatialSmoothing*0.1f); //weights += 0.5*mlsCamera(cam2, mPos, c1, wpos); - weights += mlsCameraNoColour(cam2, mPos, c1, norm, wpos, c_hashParams.m_spatialSmoothing); //*((cam == cam2)? 0.1f : 5.0f)); - - // Previous approach - //if (cam2 == cam) continue; - //weights += mlsCameraBest(cam2, mPos, c1, wpos); - } - wpos /= weights; - } else { - weights = 1000.0f; - wpos = mPos; - } - - //output(x,y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF); - - if (weights >= hashParams.m_confidenceThresh) output(x,y) = make_float4(wpos, 0.0f); - - //const uint2 screenPos = make_uint2(mainCamera.params.cameraToKinectScreenInt(mainCamera.poseInverse * wpos)); - //if (screenPos.x < output.width() && screenPos.y < output.height()) { - // output(screenPos.x,screenPos.y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF); - //} - } - } - } -} - -void ftl::cuda::mls_smooth(TextureObject<float4> &output, const HashParams &hashParams, int numcams, int cam, cudaStream_t stream) { - const dim3 gridSize((output.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (output.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - mls_smooth_kernel<<<gridSize, blockSize, 0, stream>>>(output, hashParams, numcams, cam); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); -#endif -} - - -// ===== Render Depth using MLS ================================================ - -#define MAX_UPSAMPLE 5 -#define SAMPLE_BUFFER ((2*MAX_UPSAMPLE+1)*(2*MAX_UPSAMPLE+1)) -#define WARP_SIZE 32 -#define BLOCK_WIDTH 4 -#define MLS_RADIUS 5 -#define MLS_WIDTH (2*MLS_RADIUS+1) -#define MLS_SAMPLES (MLS_WIDTH*MLS_WIDTH) - -__global__ void mls_render_depth_kernel(const TextureObject<int> input, TextureObject<int> output, SplatParams params, int numcams) { - /*const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - const int width = output.width(); - const int height = output.height(); - - if (x < width && y < height) { - - const float depth = tex2D<float>(mainCamera.depth, x, y); - const uchar4 c1 = tex2D<uchar4>(mainCamera.colour, x, y); - const float4 norm = tex2D<float4>(mainCamera.normal, x, y); - //if (x == 400 && y == 200) printf("NORMX: %f\n", norm.x); - - float3 wpos = make_float3(0.0f); - float3 wnorm = make_float3(0.0f); - float weights = 0.0f; - - if (depth >= mainCamera.params.m_sensorDepthWorldMin && depth <= mainCamera.params.m_sensorDepthWorldMax) { - float3 mPos = mainCamera.pose * mainCamera.params.kinectDepthToSkeleton(x, y, depth); - - if ((!(hashParams.m_flags & ftl::voxhash::kFlagClipping)) || (mPos.x > hashParams.m_minBounds.x && mPos.x < hashParams.m_maxBounds.x && - mPos.y > hashParams.m_minBounds.y && mPos.y < hashParams.m_maxBounds.y && - mPos.z > hashParams.m_minBounds.z && mPos.z < hashParams.m_maxBounds.z)) { - - if (hashParams.m_flags & ftl::voxhash::kFlagMLS) { - for (uint cam2=0; cam2<numcams; ++cam2) { - //if (cam2 == cam) weights += mlsCameraNoColour(cam2, mPos, c1, wpos, c_hashParams.m_spatialSmoothing*0.1f); //weights += 0.5*mlsCamera(cam2, mPos, c1, wpos); - weights += mlsCameraNoColour(cam2, mPos, c1, norm, wpos, c_hashParams.m_spatialSmoothing); //*((cam == cam2)? 0.1f : 5.0f)); - - // Previous approach - //if (cam2 == cam) continue; - //weights += mlsCameraBest(cam2, mPos, c1, wpos); - } - wpos /= weights; - } else { - weights = 1000.0f; - wpos = mPos; - } - - //output(x,y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF); - - //if (weights >= hashParams.m_confidenceThresh) output(x,y) = make_float4(wpos, 0.0f); - - const uint2 screenPos = make_uint2(mainCamera.params.cameraToKinectScreenInt(mainCamera.poseInverse * wpos)); - if (screenPos.x < output.width() && screenPos.y < output.height()) { - output(screenPos.x,screenPos.y) = (weights >= hashParams.m_confidenceThresh) ? make_float4(wpos, 0.0f) : make_float4(MINF,MINF,MINF,MINF); - } - } - } - }*/ -} - - -void ftl::cuda::mls_render_depth(const TextureObject<int> &input, TextureObject<int> &output, const SplatParams ¶ms, int numcams, cudaStream_t stream) { - const dim3 gridSize((output.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (output.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - mls_render_depth_kernel<<<gridSize, blockSize, 0, stream>>>(input, output, params, numcams); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); -#endif -} diff --git a/components/control/cpp/src/master.cpp b/components/control/cpp/src/master.cpp index af61c64c1ccf1f20522473c0feaee79cef2f2f31..44361b2d557c31e72f9b824911aaefbde1d4cc80 100644 --- a/components/control/cpp/src/master.cpp +++ b/components/control/cpp/src/master.cpp @@ -99,6 +99,7 @@ Master::Master(Configurable *root, Universe *net) delete nc; delete configuration; } + peerConfigurables_[peer].clear(); }); } diff --git a/components/operators/src/detectandtrack.cpp b/components/operators/src/detectandtrack.cpp index 0f2e0dd069a9277648a7ee87a537000ae66cb123..195ae5ada002aae9995519f421aae987e25c22b6 100644 --- a/components/operators/src/detectandtrack.cpp +++ b/components/operators/src/detectandtrack.cpp @@ -32,7 +32,7 @@ bool DetectAndTrack::init() { debug_ = config()->value<bool>("debug", false); }); - detect_n_frames_ = config()->value<int>("n_frames", 10); + detect_n_frames_ = config()->value<int>("n_frames", 20); detect_n_frames_ = detect_n_frames_ < 0.0 ? 0.0 : detect_n_frames_; max_distance_ = config()->value<double>("max_distance", 100.0); @@ -239,7 +239,7 @@ bool DetectAndTrack::apply(Frame &in, Frame &out, cudaStream_t stream) { track(im); - if (!detecting_) { // && tracked_.size() < max_tracked_) { + if ((n_frame_++ % detect_n_frames_ == 0) && !detecting_) { // && tracked_.size() < max_tracked_) { //if (detect_job_.valid()) detect_job_.get(); // To throw any exceptions detecting_ = true; diff --git a/components/renderers/cpp/CMakeLists.txt b/components/renderers/cpp/CMakeLists.txt index 66a930c2c515c818010ddae5557ad976a9ca2d70..71d9ad3d5df8065d11c372e19bda462cb4036f1f 100644 --- a/components/renderers/cpp/CMakeLists.txt +++ b/components/renderers/cpp/CMakeLists.txt @@ -1,9 +1,9 @@ add_library(ftlrender - src/splatter.cu - src/points.cu src/normals.cu src/mask.cu src/screen.cu + src/clipping.cu + src/dibr.cu src/triangle_render.cu src/reprojection.cu src/CUDARender.cpp diff --git a/components/renderers/cpp/include/ftl/render/CUDARender.hpp b/components/renderers/cpp/include/ftl/render/CUDARender.hpp index 616f78a199a4fa0e36ced654e7c18b8a3452c450..3414f43ca244f50d2d2f3306f1ca4206dee382c3 100644 --- a/components/renderers/cpp/include/ftl/render/CUDARender.hpp +++ b/components/renderers/cpp/include/ftl/render/CUDARender.hpp @@ -3,7 +3,7 @@ #include <ftl/render/renderer.hpp> #include <ftl/rgbd/frameset.hpp> -#include <ftl/render/splat_params.hpp> +#include <ftl/render/render_params.hpp> #include <ftl/cuda/points.hpp> #include <ftl/codecs/channels.hpp> //#include <ftl/filters/filter.hpp> @@ -26,6 +26,11 @@ class CUDARender : public ftl::render::Renderer { bool submit(ftl::rgbd::FrameSet *in, ftl::codecs::Channels<0>, const Eigen::Matrix4d &t) override; //void setOutputDevice(int); + void setViewPort(ftl::render::ViewPortMode mode, const ftl::render::ViewPort &vp) { + params_.viewport = vp; + params_.viewPortMode = mode; + } + protected: void _renderChannel(ftl::rgbd::Frame &out, ftl::codecs::Channel channel_in, ftl::codecs::Channel channel_out, const Eigen::Matrix4d &t, cudaStream_t stream); @@ -44,10 +49,12 @@ class CUDARender : public ftl::render::Renderer { float3 light_dir_; uchar4 light_diffuse_; uchar4 light_ambient_; - ftl::render::SplatParams params_; + ftl::render::Parameters params_; cudaStream_t stream_; float3 light_pos_; Eigen::Matrix4d transform_; + float4x4 pose_; + float4x4 poseInverse_; float scale_; int64_t last_frame_; diff --git a/components/renderers/cpp/include/ftl/render/render_params.hpp b/components/renderers/cpp/include/ftl/render/render_params.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8a4da170e0d35f7a1cf1f2587fbdb77b0bd53e63 --- /dev/null +++ b/components/renderers/cpp/include/ftl/render/render_params.hpp @@ -0,0 +1,64 @@ +#ifndef _FTL_RENDER_PARAMS_HPP_ +#define _FTL_RENDER_PARAMS_HPP_ + +#include <ftl/cuda_util.hpp> +#include <ftl/cuda_matrix_util.hpp> +#include <ftl/rgbd/camera.hpp> + +namespace ftl { +namespace render { + +static const uint kShowDisconMask = 0x00000001; +static const uint kNormalWeightColours = 0x00000002; + +struct ViewPort { + short x; + short y; + short width; + short height; + + __device__ __host__ inline short x2() const { return x+width-1; } + __device__ __host__ inline short y2() const { return y+height-1; } + + __device__ __host__ inline bool inside(short px, short py) const { + return px >= x && px <= x2() && py >= y && py <= y2(); + } + + __device__ __host__ inline uint2 map(const ftl::rgbd::Camera &cam, const float2 &pt) const { + return make_uint2( + (pt.x - static_cast<float>(x)) * (static_cast<float>(cam.width) / static_cast<float>(width)), + (pt.y - static_cast<float>(y)) * (static_cast<float>(cam.height) / static_cast<float>(height)) + ); + } + + __device__ __host__ inline uint2 reverseMap(const ftl::rgbd::Camera &cam, const float2 &pt) const { + return make_uint2( + (pt.x * (static_cast<float>(width) / static_cast<float>(cam.width))) + static_cast<float>(x), + (pt.y * (static_cast<float>(height) / static_cast<float>(cam.height))) + static_cast<float>(y) + ); + } +}; + +/** + * Control how the viewport should be used. + */ +enum class ViewPortMode : uint8_t { + Disabled = 0, // Do not use the viewport data + Clipping = 1, // Clip the rendering to within the viewport for stencil like effect + Warping = 2 // Stretch viewport region over entire frame +}; + +struct Parameters { + uint m_flags; + float depthThreshold; + int triangle_limit; + + ftl::rgbd::Camera camera; // Virtual camera intrinsics + ftl::render::ViewPort viewport; + ftl::render::ViewPortMode viewPortMode; +}; + +} +} + +#endif diff --git a/components/renderers/cpp/include/ftl/render/splat_params.hpp b/components/renderers/cpp/include/ftl/render/splat_params.hpp deleted file mode 100644 index 7c6b8416896f10aae816565c52813108c80bf385..0000000000000000000000000000000000000000 --- a/components/renderers/cpp/include/ftl/render/splat_params.hpp +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef _FTL_RENDER_SPLAT_PARAMS_HPP_ -#define _FTL_RENDER_SPLAT_PARAMS_HPP_ - -#include <ftl/cuda_util.hpp> -#include <ftl/cuda_matrix_util.hpp> -#include <ftl/rgbd/camera.hpp> - -namespace ftl { -namespace render { - -static const uint kShowDisconMask = 0x00000001; -static const uint kNormalWeightColours = 0x00000002; - -struct __align__(16) SplatParams { - float4x4 m_viewMatrix; - float4x4 m_viewMatrixInverse; - - uint m_flags; - //float voxelSize; - float depthThreshold; - int triangle_limit; - - ftl::rgbd::Camera camera; -}; - -} -} - -#endif diff --git a/components/renderers/cpp/src/CUDARender.cpp b/components/renderers/cpp/src/CUDARender.cpp index a40c792251dfce60f88ee0f6883307b7fa727cc7..8bc1fbd87adac1d9e7a00d8eeaa662caffedfab3 100644 --- a/components/renderers/cpp/src/CUDARender.cpp +++ b/components/renderers/cpp/src/CUDARender.cpp @@ -96,6 +96,8 @@ CUDARender::CUDARender(nlohmann::json &config) : ftl::render::Renderer(config), clipping_ = false; //} + params_.viewPortMode = ftl::render::ViewPortMode::Disabled; + on("clipping_enabled", [this](const ftl::config::Event &e) { clipping_ = value("clipping_enabled", true); }); @@ -185,7 +187,7 @@ void CUDARender::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Chann return; } - auto transform = MatrixConversion::toCUDA(f.getPose().cast<float>().inverse() * t.cast<float>().inverse()) * params_.m_viewMatrixInverse; + auto transform = MatrixConversion::toCUDA(f.getPose().cast<float>().inverse() * t.cast<float>().inverse()) * poseInverse_; auto transformR = MatrixConversion::toCUDA(f.getPose().cast<float>().inverse()).getFloat3x3(); if (mesh_) { @@ -265,7 +267,7 @@ void CUDARender::_dibr(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre continue; } - auto transform = params_.m_viewMatrix * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>()); + auto transform = pose_ * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>()); if (f.hasChannel(Channel::Depth)) { ftl::cuda::dibr_merge( @@ -312,7 +314,7 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre } //auto pose = MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>()); - auto transform = params_.m_viewMatrix * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>()); + auto transform = pose_ * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>()); // Calculate and save virtual view screen position of each source pixel if (f.hasChannel(Channel::Depth)) { @@ -367,7 +369,7 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre temp_.createTexture<float4>(Channel::Normals), out.createTexture<float>(Channel::Depth), value("normal_radius", 1), value("normal_smoothing", 0.02f), - params_.camera, params_.m_viewMatrix.getFloat3x3(), params_.m_viewMatrixInverse.getFloat3x3(), stream_); + params_.camera, pose_.getFloat3x3(), poseInverse_.getFloat3x3(), stream_); } void CUDARender::_renderChannel( @@ -476,9 +478,10 @@ void CUDARender::_updateParameters(ftl::rgbd::Frame &out) { params_.depthThreshold = value("depth_threshold", 0.04f); params_.m_flags = 0; if (value("normal_weight_colours", true)) params_.m_flags |= ftl::render::kNormalWeightColours; - params_.m_viewMatrix = MatrixConversion::toCUDA(out.getPose().cast<float>().inverse()); - params_.m_viewMatrixInverse = MatrixConversion::toCUDA(out.getPose().cast<float>()); params_.camera = camera; + + poseInverse_ = MatrixConversion::toCUDA(out.getPose().cast<float>()); + pose_ = MatrixConversion::toCUDA(out.getPose().cast<float>().inverse()); } void CUDARender::_preprocessColours() { @@ -513,7 +516,7 @@ void CUDARender::_preprocessColours() { void CUDARender::_postprocessColours(ftl::rgbd::Frame &out) { if (value("cool_effect", false)) { - auto pose = params_.m_viewMatrixInverse.getFloat3x3(); + auto pose = poseInverse_.getFloat3x3(); auto col = parseCUDAColour(value("cool_effect_colour", std::string("#2222ff"))); ftl::cuda::cool_blue( @@ -573,8 +576,8 @@ void CUDARender::_renderRight(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t) { transform(0, 3) = baseline; Eigen::Matrix4f matrix = out.getPose().cast<float>() * transform.inverse(); - params_.m_viewMatrix = MatrixConversion::toCUDA(matrix.inverse()); - params_.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix); + pose_ = MatrixConversion::toCUDA(matrix.inverse()); + poseInverse_ = MatrixConversion::toCUDA(matrix); params_.camera = out.getRightCamera(); out.create<GpuMat>(Channel::Right, Format<uchar4>(params_.camera.width, params_.camera.height)); @@ -656,7 +659,7 @@ void CUDARender::begin(ftl::rgbd::Frame &out) { if (env_image_.empty() || !value("environment_enabled", false)) { out.get<GpuMat>(Channel::Colour).setTo(background_, cvstream); } else { - auto pose = params_.m_viewMatrixInverse.getFloat3x3(); + auto pose = poseInverse_.getFloat3x3(); ftl::cuda::equirectangular_reproject( env_tex_, out.createTexture<uchar4>(Channel::Colour, true), diff --git a/components/renderers/cpp/src/OLD_dibr.cu b/components/renderers/cpp/src/OLD_dibr.cu new file mode 100644 index 0000000000000000000000000000000000000000..f8d4490f6f6a3b2b186843a8c0a36871170508a2 --- /dev/null +++ b/components/renderers/cpp/src/OLD_dibr.cu @@ -0,0 +1,781 @@ +#include "splat_render_cuda.hpp" +#include "depth_camera_cuda.hpp" +//#include <cuda_runtime.h> + +#include <ftl/cuda_matrix_util.hpp> + +#include "splat_params.hpp" +#include "mls_cuda.hpp" +#include <ftl/depth_camera.hpp> + +#define T_PER_BLOCK 8 +#define UPSAMPLE_FACTOR 1.8f +#define WARP_SIZE 32 +#define DEPTH_THRESHOLD 0.05f +#define UPSAMPLE_MAX 60 +#define MAX_ITERATIONS 32 // Note: Must be multiple of 32 +#define SPATIAL_SMOOTHING 0.005f + +using ftl::cuda::TextureObject; +using ftl::render::Parameters; + +extern __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS]; + +__global__ void clearColourKernel(TextureObject<uchar4> colour) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < colour.width() && y < colour.height()) { + //depth(x,y) = 0x7f800000; //PINF; + colour(x,y) = make_uchar4(76,76,82,0); + } +} + +__device__ inline bool isStable(const float3 &previous, const float3 &estimate, const SplatParams ¶ms, float d) { + const float psize = 2.0f * d / params.camera.fx; + //printf("PSIZE %f\n", psize); + return fabs(previous.x - estimate.x) <= psize && + fabs(previous.y - estimate.y) <= psize && + fabs(previous.z - estimate.z) <= psize; +} + +// ===== PASS 1 : Gather & Upsample (Depth) ==================================== + +/* + * Pass 1: Directly render raw points from all cameras, but upsample the points + * if their spacing is within smoothing threshold but greater than their pixel + * size in the original image. + */ + __global__ void dibr_merge_upsample_kernel(TextureObject<int> depth, int cam, SplatParams params) { + const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; + + const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y)); + //const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y)); + if (worldPos.x == MINF) return; + const float r = (camera.poseInverse * worldPos).z / camera.params.fx; + + // Get virtual camera ray for splat centre and backface cull if possible + //const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f); + //const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin); + //if (dot(rayDir, normal) > 0.0f) return; + + // Find the virtual screen position of current point + const float3 camPos = params.m_viewMatrix * worldPos; + if (camPos.z < params.camera.m_sensorDepthWorldMin) return; + if (camPos.z > params.camera.m_sensorDepthWorldMax) return; + + // TODO: Don't upsample so much that only minimum depth makes it through + // Consider also using some SDF style approach to accumulate and smooth a + // depth value between points + const int upsample = min(UPSAMPLE_MAX-2, int(0.01 * params.camera.fx / camPos.z))+3; + const float interval = 1.0f / float(upsample / 2); + + + // TODO:(Nick) Check depth buffer and don't do anything if already hidden? + + // Each thread in warp takes an upsample point and updates corresponding depth buffer. + const int lane = threadIdx.x % WARP_SIZE; + for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { + const float u = (i % upsample) - (upsample / 2); + const float v = (i / upsample) - (upsample / 2); + + // Make an initial estimate of the points location + // Use centroid depth as estimate...? + const float3 point = params.m_viewMatrix * ftl::cuda::upsampled_point(camera.points, make_float2(float(x)+float(u)*interval, float(y)+float(v)*interval)); + const float d = point.z; + + const uint2 screenPos = params.camera.cameraToKinectScreen(point); + const unsigned int cx = screenPos.x;//+u; + const unsigned int cy = screenPos.y;//+v; + if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { + // Transform estimated point to virtual cam space and output z + atomicMin(&depth(cx,cy), d * 1000.0f); + } + } +} + +/* + * Pass 1: Directly render each camera into virtual view but with no upsampling + * for sparse points. + */ + __global__ void dibr_merge_kernel(TextureObject<int> depth, int cam, SplatParams params) { + const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; + + const int x = blockIdx.x*blockDim.x + threadIdx.x; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y)); + if (worldPos.x == MINF) return; + + // Find the virtual screen position of current point + const float3 camPos = params.m_viewMatrix * worldPos; + if (camPos.z < params.camera.m_sensorDepthWorldMin) return; + if (camPos.z > params.camera.m_sensorDepthWorldMax) return; + + const float d = camPos.z; + + const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); + const unsigned int cx = screenPos.x; + const unsigned int cy = screenPos.y; + if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { + // Transform estimated point to virtual cam space and output z + atomicMin(&depth(cx,cy), d * 1000.0f); + } +} + +// ===== PASS 2 : Splat Visible Surface ======================================== + +/* + * Pass 2: Determine depth buffer with enough accuracy for a visibility test in pass 2. + * These values are also used as the actual surface estimate during rendering so should + * at least be plane or sphere fitted if not MLS smoothed onto the actual surface. + */ +__global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, SplatParams params) { + const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; + + const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y)); + const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y)); + if (worldPos.x == MINF) return; + const float r = (camera.poseInverse * worldPos).z / camera.params.fx; + + // Get virtual camera ray for splat centre and backface cull if possible + //const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f); + //const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin); + //if (dot(rayDir, normal) > 0.0f) return; + + // Find the virtual screen position of current point + const float3 camPos = params.m_viewMatrix * worldPos; + if (camPos.z < params.camera.m_sensorDepthWorldMin) return; + if (camPos.z > params.camera.m_sensorDepthWorldMax) return; + const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); + + const int upsample = min(UPSAMPLE_MAX, int((r) * params.camera.fx / camPos.z)); + + // Not on screen so stop now... + if (screenPos.x - upsample >= depth.width() || screenPos.y - upsample >= depth.height()) return; + + // TODO:(Nick) Check depth buffer and don't do anything if already hidden? + + // Each thread in warp takes an upsample point and updates corresponding depth buffer. + const int lane = threadIdx.x % WARP_SIZE; + for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { + const float u = (i % upsample) - (upsample / 2); + const float v = (i / upsample) - (upsample / 2); + + // Make an initial estimate of the points location + // Use centroid depth as estimate...? + float3 nearest = ftl::cuda::screen_centroid<1>(camera.points, make_float2(screenPos.x+u, screenPos.y+v), make_int2(x,y), params, upsample); + + // Use current points z as estimate + //float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,camPos.z); + + // Or calculate upper and lower bounds for depth and do gradient + // descent until the gradient change is too small or max iter is reached + // and depth remains within the bounds. + // How to find min and max depths? + + //float ld = nearest.z; + + // TODO: (Nick) Estimate depth using points plane, but needs better normals. + //float t; + //if (ftl::cuda::intersectPlane(normal, worldPos, rayOrigin, rayDir, t)) { + // Plane based estimate of surface at this pixel + //const float3 nearest = rayOrigin + rayDir * camPos.z; + float3 output; + + // Use MLS of camera neighbor points to get more exact estimate + // Iterate until pixel is stable on the surface. + for (int k=0; k<MAX_ITERATIONS; ++k) { + + // TODO:(Nick) Should perhaps use points from all cameras? + // Instead of doing each camera separately... + // If the depth already is close then it has already been done and can skip this point + if (ftl::cuda::mls_point_surface<1>(camera.points, make_int2(x,y), params.m_viewMatrixInverse * nearest, output, SPATIAL_SMOOTHING) <= 0.0f) { + /*const unsigned int cx = screenPos.x; + const unsigned int cy = screenPos.y; + if (cx < depth.width() && cy < depth.height()) { + atomicMax(&depth(cx,cy), 10000.0f); + }*/ + break; + } + + //ftl::cuda::render_depth(depth, params, output); + + output = params.m_viewMatrix * output; + + // This is essentially the SDF function f(x), only the normal should be estimated also from the weights + //const float d = nearest.z + (normal.x*output.x + normal.y*output.y + normal.z*output.z); + + const float d = nearest.z + copysignf(0.5f*length(output - nearest), output.z - nearest.z); + nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d); + + const float2 sp = params.camera.cameraToKinectScreenFloat(output); + + //if (isStable(nearest, output, params, d)) { + //if (fabs(sp.x - float(screenPos.x+u)) < 2.0f && fabs(sp.y - float(screenPos.y+v)) < 2.0f) { + if (length(output - nearest) <= 2.0f * params.camera.fx / camPos.z) { + const unsigned int cx = screenPos.x+u; + const unsigned int cy = screenPos.y+v; + + if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { + // Transform estimated point to virtual cam space and output z + atomicMin(&depth(cx,cy), d * 1000.0f); + } + break; + } + + /*if (k >= MAX_ITERATIONS-1 && length(output - nearest) <= SPATIAL_SMOOTHING) { + const unsigned int cx = screenPos.x+u; + const unsigned int cy = screenPos.y+v; + if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { + //atomicMin(&depth(cx,cy), d * 1000.0f); + printf("ERR = %f, %f\n", fabs(sp.x - float(screenPos.x+u)), fabs(sp.y - float(screenPos.y+v))); + } + }*/ + + //nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d); // ld + (d - ld)*0.8f + //ld = d; + } + //} + } +} + +// ------ Alternative for pass 2: principle surfaces --------------------------- + +#define NEIGHBOR_RADIUS 1 +#define MAX_NEIGHBORS ((NEIGHBOR_RADIUS*2+1)*(NEIGHBOR_RADIUS*2+1)) + +/* + * Pass 2: Determine depth buffer with enough accuracy for a visibility test in pass 2. + * These values are also used as the actual surface estimate during rendering so should + * at least be plane or sphere fitted if not MLS smoothed onto the actual surface. + */ + __global__ void dibr_visibility_principal_kernel(TextureObject<int> depth, int cam, SplatParams params) { + __shared__ float3 neighborhood_cache[2*T_PER_BLOCK][MAX_NEIGHBORS]; + __shared__ int minimum[2*T_PER_BLOCK]; + __shared__ int maximum[2*T_PER_BLOCK]; + + const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; + + const int warp = threadIdx.x / WARP_SIZE + threadIdx.y*2; + const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y)); + //const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y)); + if (worldPos.x == MINF) return; + const float r = (camera.poseInverse * worldPos).z / camera.params.fx; + + // Get virtual camera ray for splat centre and backface cull if possible + //const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f); + //const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin); + //if (dot(rayDir, normal) > 0.0f) return; + + // Find the virtual screen position of current point + const float3 camPos = params.m_viewMatrix * worldPos; + if (camPos.z < params.camera.m_sensorDepthWorldMin) return; + if (camPos.z > params.camera.m_sensorDepthWorldMax) return; + const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); + + const int upsample = min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z)); + + // Not on screen so stop now... + if (screenPos.x - upsample >= depth.width() || screenPos.y - upsample >= depth.height()) return; + + // TODO:(Nick) Check depth buffer and don't do anything if already hidden? + + // TODO:(Nick) Preload point neighbors and transform to eye + const int lane = threadIdx.x % WARP_SIZE; + if (lane == 0) { + minimum[warp] = 100000000; + maximum[warp] = -100000000; + } + + __syncwarp(); + + for (int i=lane; i<MAX_NEIGHBORS; i+=WARP_SIZE) { + const int u = (i % (2*NEIGHBOR_RADIUS+1)) - NEIGHBOR_RADIUS; + const int v = (i / (2*NEIGHBOR_RADIUS+1)) - NEIGHBOR_RADIUS; + const float3 point = params.m_viewMatrix * make_float3(tex2D<float4>(camera.points, x+u, y+v)); + neighborhood_cache[warp][i] = point; + + if (length(point - camPos) <= 0.04f) { + atomicMin(&minimum[warp], point.z*1000.0f); + atomicMax(&maximum[warp], point.z*1000.0f); + } + } + + __syncwarp(); + + const float interval = (float(maximum[warp])/1000.0f - float(minimum[warp]) / 1000.0f) / float(MAX_ITERATIONS); + //if (y == 200) printf("interval: %f\n", interval); + + // TODO:(Nick) Find min and max depths of neighbors to estimate z bounds + + // Each thread in warp takes an upsample point and updates corresponding depth buffer. + for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { + const float u = (i % upsample) - (upsample / 2); + const float v = (i / upsample) - (upsample / 2); + + // Make an initial estimate of the points location + // Use centroid depth as estimate...? + //float3 nearest = ftl::cuda::screen_centroid<1>(camera.points, make_float2(screenPos.x+u, screenPos.y+v), make_int2(x,y), params, upsample); + + // Use current points z as estimate + // TODO: Use min point as estimate + float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,float(minimum[warp])/1000.0f); + + // Or calculate upper and lower bounds for depth and do gradient + // descent until the gradient change is too small or max iter is reached + // and depth remains within the bounds. + // How to find min and max depths? + + // TODO: (Nick) Estimate depth using points plane, but needs better normals. + //float t; + //if (ftl::cuda::intersectPlane(normal, worldPos, rayOrigin, rayDir, t)) { + // Plane based estimate of surface at this pixel + //const float3 nearest = rayOrigin + rayDir * camPos.z; + + // Use MLS of camera neighbor points to get more exact estimate + // Iterate until pixel is stable on the surface. + for (int k=0; k<MAX_ITERATIONS; ++k) { + + // TODO:(Nick) Should perhaps use points from all cameras? + // Instead of doing each camera separately... + // If the depth already is close then it has already been done and can skip this point + const float energy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS>(neighborhood_cache[warp], nearest, SPATIAL_SMOOTHING); + + if (energy <= 0.0f) break; + + //ftl::cuda::render_depth(depth, params, output); + + // This is essentially the SDF function f(x), only the normal should be estimated also from the weights + //const float d = nearest.z + (normal.x*output.x + normal.y*output.y + normal.z*output.z); + + const float d = nearest.z; + nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d+interval); + + if (energy >= 0.1f) { + const unsigned int cx = screenPos.x+u; + const unsigned int cy = screenPos.y+v; + if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { + // Transform estimated point to virtual cam space and output z + atomicMin(&depth(cx,cy), d * 1000.0f); + } + break; + } + } + //} + } +} + +#define NEIGHBOR_RADIUS_2 3 +#define NEIGHBOR_WINDOW ((NEIGHBOR_RADIUS_2*2+1)*(NEIGHBOR_RADIUS_2*2+1)) +#define MAX_NEIGHBORS_2 32 + +#define FULL_MASK 0xffffffff + +__device__ inline float warpMax(float e) { + for (int i = WARP_SIZE/2; i > 0; i /= 2) { + const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e = max(e, other); + } + return e; +} + +__device__ inline float warpMin(float e) { + for (int i = WARP_SIZE/2; i > 0; i /= 2) { + const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); + e = min(e, other); + } + return e; +} + +#define ENERGY_THRESHOLD 0.1f +#define SMOOTHING_MULTIPLIER_A 10.0f // For surface search +#define SMOOTHING_MULTIPLIER_B 4.0f // For z contribution +#define SMOOTHING_MULTIPLIER_C 4.0f // For colour contribution + + +/* + * Pass 2: Determine depth buffer with enough accuracy for a visibility test in pass 2. + * These values are also used as the actual surface estimate during rendering so should + * at least be plane or sphere fitted if not MLS smoothed onto the actual surface. + * + * This version uses a previous point render as neighbour source. + */ + __global__ void dibr_visibility_principal_kernel2(TextureObject<int> point_in, TextureObject<int> depth, SplatParams params) { + __shared__ float3 neighborhood_cache[2*T_PER_BLOCK][MAX_NEIGHBORS_2]; + __shared__ int minimum[2*T_PER_BLOCK]; + __shared__ int maximum[2*T_PER_BLOCK]; + __shared__ unsigned int nidx[2*T_PER_BLOCK]; + + const int tid = (threadIdx.x + threadIdx.y * blockDim.x); + const int warp = tid / WARP_SIZE; //threadIdx.x / WARP_SIZE + threadIdx.y*2; + const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + // Starting point for surface minimum + float clusterBase = params.camera.m_sensorDepthWorldMin; + + // Loop to a deeper surface if not on the first one selected... + while (clusterBase < params.camera.m_sensorDepthWorldMax) { + + const int lane = tid % WARP_SIZE; + if (lane == 0) { + minimum[warp] = 100000000; + maximum[warp] = -100000000; + nidx[warp] = 0; + } + + __syncwarp(); + + // Search for a valid minimum neighbour + // TODO: Should this really be minimum or the median of a depth cluster? + // cluster median seems very hard to calculate... + for (int i=lane; i<NEIGHBOR_WINDOW; i+=WARP_SIZE) { + const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2; + const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2; + const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f); + const float3 camPos = params.camera.kinectDepthToSkeleton(x, y, point.z); + + // If it is close enough... + // TODO: smoothing / strength should be determined by a number of factors including: + // 1) Depth from original source + // 2) Colour contrast in underlying RGB + // 3) Estimated noise levels in depth values + if (point.z > clusterBase && point.z < params.camera.m_sensorDepthWorldMax && length(point - camPos) <= SMOOTHING_MULTIPLIER_A*(point.z / params.camera.fx)) { + atomicMin(&minimum[warp], point.z*1000.0f); + } + } + + __syncwarp(); + + const float minDepth = float(minimum[warp])/1000.0f; + + // Preload valid neighbour points from within a window. A point is valid + // if it is within a specific distance of the minimum. + // Also calculate the maximum at the same time. + // TODO: Could here do a small search in each camera? This would allow all + // points to be considered, even those masked in our depth input. + const float3 minPos = params.camera.kinectDepthToSkeleton(x, y, minDepth); + + for (int i=lane; i<NEIGHBOR_WINDOW; i+=WARP_SIZE) { + const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2; + const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2; + const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f); + + // If it is close enough... + if (point.z > params.camera.m_sensorDepthWorldMin && point.z < params.camera.m_sensorDepthWorldMax && length(point - minPos) <= SMOOTHING_MULTIPLIER_A*(point.z / params.camera.fx)) { + // Append to neighbour list + //unsigned int idx = atomicInc(&nidx[warp], MAX_NEIGHBORS_2-1); + unsigned int idx = atomicAdd(&nidx[warp], 1); + if (idx >= MAX_NEIGHBORS_2) break; + neighborhood_cache[warp][idx] = point; + atomicMax(&maximum[warp], point.z*1000.0f); + } + } + + __syncwarp(); + + const float maxDepth = float(maximum[warp])/1000.0f; + const float interval = (maxDepth - minDepth) / float(MAX_ITERATIONS); + + if (minDepth >= params.camera.m_sensorDepthWorldMax) return; + if (maxDepth <= params.camera.m_sensorDepthWorldMin) return; + //if (y == 200) printf("interval: %f\n", maxDepth); + + // If all samples say same depth, then agree and return + // TODO: Check this is valid, since small energies should be removed... + /*if (fabs(minDepth - maxDepth) < 0.0001f) { + if (lane == 0) { + const unsigned int cx = x; + const unsigned int cy = y; + if (minDepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { + // Transform estimated point to virtual cam space and output z + atomicMin(&depth(cx,cy), minDepth * 1000.0f); + } + } + return; + }*/ + + + float maxenergy = -1.0f; + float bestdepth = 0.0f; + + // Search for best or threshold energy + for (int k=lane; k<MAX_ITERATIONS; k+=WARP_SIZE) { + const float3 nearest = params.camera.kinectDepthToSkeleton(x,y,minDepth+float(k)*interval); + const float myenergy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, min(nidx[warp], MAX_NEIGHBORS_2), SMOOTHING_MULTIPLIER_B*(nearest.z/params.camera.fx)); + const float newenergy = warpMax(max(myenergy, maxenergy)); + bestdepth = (myenergy == newenergy) ? nearest.z : (newenergy > maxenergy) ? 0.0f : bestdepth; + maxenergy = newenergy; + } + + // If enough energy was found and this thread was the one that found the best + // then output the depth that this energy occured at. + if (bestdepth > 0.0f && maxenergy >= ENERGY_THRESHOLD) { + //printf("E D %f %f\n", maxenergy, bestdepth); + const unsigned int cx = x; + const unsigned int cy = y; + if (bestdepth > params.camera.m_sensorDepthWorldMin && bestdepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { + // Transform estimated point to virtual cam space and output z + atomicMin(&depth(cx,cy), bestdepth * 1000.0f); + //depth(cx,cy) = bestdepth * 1000.0f; + } + } + + // TODO: Could the threshold depend upon the number of points? Fewer points + // due to distance is incorrect since really there may not be fewer points + // Perhaps the best option is to make it depend on depth ... really close + // and really far both has lower thresholds due to point densities. Other + // option is smoothing factor and surface distances alter with distance to + // vary the number of points used ... smoothing factor could be a multiple + // of pixel size at given distance. Density from original source is also + // an influencer of smoothing factor and thresholds. Colour contrast also + // has a weighting influence, high contrast is high certainty in the + // disparity so such points should have a high influence over choice of + // surface location. + // + // Magnitude vs dispersion factor in the energy function ... + // * Mag is certainty of surface location + // * Dispersion is how far to propagate that certainty, + if (maxenergy >= ENERGY_THRESHOLD) return; + + // Move to next possible surface... + clusterBase = minDepth + SMOOTHING_MULTIPLIER_B*(minDepth / params.camera.fx); + + }; +} + +// ===== Pass 2 and 3 : Attribute contributions ================================ + +__device__ inline float4 make_float4(const uchar4 &c) { + return make_float4(c.x,c.y,c.z,c.w); +} + +/* + * Pass 2: Accumulate attribute contributions if the points pass a visibility test. + */ +__global__ void dibr_attribute_contrib_kernel( + TextureObject<int> depth_in, + TextureObject<float4> colour_out, + TextureObject<float4> normal_out, + TextureObject<float> contrib_out, int cam, + SplatParams params) { + + const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; + + const int tid = (threadIdx.x + threadIdx.y * blockDim.x); + //const int warp = tid / WARP_SIZE; + const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; + const int y = blockIdx.y*blockDim.y + threadIdx.y; + + const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y)); + //const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y)); + if (worldPos.x == MINF) return; + const float r = (camera.poseInverse * worldPos).z / camera.params.fx; + + const float3 camPos = params.m_viewMatrix * worldPos; + if (camPos.z < params.camera.m_sensorDepthWorldMin) return; + if (camPos.z > params.camera.m_sensorDepthWorldMax) return; + const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); + + const int upsample = 8; //min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z)); + + // Not on screen so stop now... + if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return; + + // Is this point near the actual surface and therefore a contributor? + const float d = ((float)depth_in.tex2D((int)screenPos.x, (int)screenPos.y)/1000.0f); + //if (abs(d - camPos.z) > DEPTH_THRESHOLD) return; + + // TODO:(Nick) Should just one thread load these to shared mem? + const float4 colour = make_float4(tex2D<uchar4>(camera.colour, x, y)); + const float4 normal = tex2D<float4>(camera.normal, x, y); + + // Each thread in warp takes an upsample point and updates corresponding depth buffer. + const int lane = tid % WARP_SIZE; + for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { + const float u = (i % upsample) - (upsample / 2); + const float v = (i / upsample) - (upsample / 2); + + // Use the depth buffer to determine this pixels 3D position in camera space + const float d = ((float)depth_in.tex2D(screenPos.x+u, screenPos.y+v)/1000.0f); + const float3 nearest = params.camera.kinectDepthToSkeleton((int)(screenPos.x+u),(int)(screenPos.y+v),d); + + // What is contribution of our current point at this pixel? + const float weight = ftl::cuda::spatialWeighting(length(nearest - camPos), SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx)); + if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) { // TODO: Use confidence threshold here + const float4 wcolour = colour * weight; + const float4 wnormal = normal * weight; + + //printf("Z %f\n", d); + + // Add this points contribution to the pixel buffer + atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v), wcolour.x); + atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+1, wcolour.y); + atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+2, wcolour.z); + atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+3, wcolour.w); + atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v), wnormal.x); + atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+1, wnormal.y); + atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+2, wnormal.z); + atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+3, wnormal.w); + atomicAdd(&contrib_out(screenPos.x+u, screenPos.y+v), weight); + } + } +} + +/* + * Pass 2: Accumulate attribute contributions if the points pass a visibility test. + */ +/*__global__ void dibr_attribute_contrib_kernel( + TextureObject<int> depth_in, + TextureObject<uchar4> colour_out, + TextureObject<float4> normal_out, int numcams, SplatParams params) { + + const int i = threadIdx.y*blockDim.y + threadIdx.x; + const int bx = blockIdx.x*blockDim.x; + const int by = blockIdx.y*blockDim.y; + const int x = bx + threadIdx.x; + const int y = by + threadIdx.y; + + for (int j=0; j<numcams; ++j) { + const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[j]; + + float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y)); + float r = (camera.poseInverse * worldPos).z; + //if (ftl::cuda::mls_point_surface<3>(camera.points, make_int2(x,y), worldPos, 0.02f) < 0.001f) continue; + if (worldPos.x == MINF) continue; + + const float3 camPos = params.m_viewMatrix * worldPos; + + // Estimate upsample factor using ratio of source depth and output depth + + const int upsample = min(15, (int)(UPSAMPLE_FACTOR * (r / camPos.z))+1); + const float upfactor = 2.0f / (float)(upsample); + + for (int v=0; v<upsample; ++v) { + for (int u=0; u<upsample; ++u) { + float3 point; + const ftl::cuda::fragment nearest = ftl::cuda::upsampled_point(camera.points, camera.normal, camera.colour, + make_float2((float)x-1.0f+u*upfactor,(float)y-1.0f+v*upfactor)); + //if (ftl::cuda::mls_point_surface<3>(camera.points, make_int2(x,y), nearest, point, 0.02f) < 0.001f) continue; + ftl::cuda::render_fragment(depth_in, normal_out, colour_out, params, nearest); + } + } + } +}*/ + + + +__global__ void dibr_normalise_kernel( + TextureObject<float4> colour_in, + TextureObject<uchar4> colour_out, + TextureObject<float4> normals, + TextureObject<float> contribs) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < colour_in.width() && y < colour_in.height()) { + const float4 colour = colour_in.tex2D((int)x,(int)y); + const float4 normal = normals.tex2D((int)x,(int)y); + const float contrib = contribs.tex2D((int)x,(int)y); + + if (contrib > 0.0f) { + colour_out(x,y) = make_uchar4(colour.x / contrib, colour.y / contrib, colour.z / contrib, 0); + normals(x,y) = normal / contrib; + } + } +} + +void ftl::cuda::dibr(const TextureObject<int> &depth_out, + const TextureObject<uchar4> &colour_out, + const TextureObject<float4> &normal_out, + const TextureObject<float> &confidence_out, + const TextureObject<float4> &tmp_colour, + const TextureObject<int> &tmp_depth, + int numcams, + const SplatParams ¶ms, + cudaStream_t stream) { + + const dim3 sgridSize((depth_out.width() + 2 - 1)/2, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 sblockSize(2*WARP_SIZE, T_PER_BLOCK); + const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + clearColourKernel<<<gridSize, blockSize, 0, stream>>>(colour_out); + ftl::cuda::clear_to_zero(confidence_out, stream); + ftl::cuda::clear_colour(tmp_colour, stream); + ftl::cuda::clear_colour(normal_out, stream); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif + + //int i=3; + + bool noSplatting = params.m_flags & ftl::render::kNoSplatting; + + // Pass 1, gather and upsample depth maps + if (params.m_flags & ftl::render::kNoUpsampling) { + for (int i=0; i<numcams; ++i) + dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>((noSplatting) ? depth_out : tmp_depth, i, params); + } else { + for (int i=0; i<numcams; ++i) + dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>((noSplatting) ? depth_out : tmp_depth, i, params); + } + + if (noSplatting) { + // Pass 3, accumulate all point contributions to pixels + for (int i=0; i<numcams; ++i) + dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, tmp_colour, normal_out, confidence_out, i, params); + } else { + // Pass 2 + dibr_visibility_principal_kernel2<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, depth_out, params); + + // Pass 3, accumulate all point contributions to pixels + for (int i=0; i<numcams; ++i) + dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, tmp_colour, normal_out, confidence_out, i, params); + } + // Pass 2 + //dibr_visibility_principal_kernel2<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, depth_out, params); + + // Pass 2, merge a depth map from each camera. + //for (int i=0; i<numcams; ++i) + // dibr_visibility_principal_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, i, params); + + // Pass 4, normalise contributions + dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(tmp_colour, colour_out, normal_out, confidence_out); + + cudaSafeCall( cudaGetLastError() ); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif +} + +void ftl::cuda::dibr_raw(const TextureObject<int> &depth_out, + int numcams, const SplatParams ¶ms, cudaStream_t stream) { + + const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif + + //dibr_depthmap_direct_kernel<<<gridSize, blockSize, 0, stream>>>(depth_out, numcams, params); + cudaSafeCall( cudaGetLastError() ); + +#ifdef _DEBUG + cudaSafeCall(cudaDeviceSynchronize()); +#endif +} + diff --git a/components/renderers/cpp/src/clipping.cu b/components/renderers/cpp/src/clipping.cu new file mode 100644 index 0000000000000000000000000000000000000000..354376365c5334e8d4bf0671f76f7aec07c441bb --- /dev/null +++ b/components/renderers/cpp/src/clipping.cu @@ -0,0 +1,34 @@ +#include <ftl/cuda/points.hpp> + +#define T_PER_BLOCK 8 + +__device__ bool isClipped(const float4 &p, const ftl::cuda::ClipSpace &clip) { + const float3 tp = clip.origin * make_float3(p); + return fabs(tp.x) > clip.size.x || fabs(tp.y) > clip.size.y || fabs(tp.z) > clip.size.z; +} + +__global__ void clipping_kernel(ftl::cuda::TextureObject<float> depth, ftl::rgbd::Camera camera, ftl::cuda::ClipSpace clip) +{ + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < depth.width() && y < depth.height()) { + float d = depth(x,y); + float4 p = make_float4(camera.screenToCam(x,y,d), 0.0f); + + if (isClipped(p, clip)) { + depth(x,y) = MINF; + } + } +} + +void ftl::cuda::clipping(ftl::cuda::TextureObject<float> &depth, + const ftl::rgbd::Camera &camera, + const ClipSpace &clip, cudaStream_t stream) { + + const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + clipping_kernel<<<gridSize, blockSize, 0, stream>>>(depth, camera, clip); + cudaSafeCall( cudaGetLastError() ); +} \ No newline at end of file diff --git a/components/renderers/cpp/src/dibr.cu b/components/renderers/cpp/src/dibr.cu index 66428ce3086e42b6a3e81c667ae126314d910c98..90e37ff4fcae5b94274aedb43dfb306bff03e957 100644 --- a/components/renderers/cpp/src/dibr.cu +++ b/components/renderers/cpp/src/dibr.cu @@ -1,781 +1,117 @@ -#include "splat_render_cuda.hpp" -#include "depth_camera_cuda.hpp" -//#include <cuda_runtime.h> +#include <ftl/render/render_params.hpp> +#include "splatter_cuda.hpp" +#include <ftl/rgbd/camera.hpp> +#include <ftl/cuda_common.hpp> -#include <ftl/cuda_matrix_util.hpp> - -#include "splat_params.hpp" -#include "mls_cuda.hpp" -#include <ftl/depth_camera.hpp> +#include <ftl/cuda/makers.hpp> #define T_PER_BLOCK 8 -#define UPSAMPLE_FACTOR 1.8f -#define WARP_SIZE 32 -#define DEPTH_THRESHOLD 0.05f -#define UPSAMPLE_MAX 60 -#define MAX_ITERATIONS 32 // Note: Must be multiple of 32 -#define SPATIAL_SMOOTHING 0.005f using ftl::cuda::TextureObject; -using ftl::render::SplatParams; - -extern __constant__ ftl::voxhash::DepthCameraCUDA c_cameras[MAX_CAMERAS]; - -__global__ void clearColourKernel(TextureObject<uchar4> colour) { - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; - - if (x < colour.width() && y < colour.height()) { - //depth(x,y) = 0x7f800000; //PINF; - colour(x,y) = make_uchar4(76,76,82,0); - } -} - -__device__ inline bool isStable(const float3 &previous, const float3 &estimate, const SplatParams ¶ms, float d) { - const float psize = 2.0f * d / params.camera.fx; - //printf("PSIZE %f\n", psize); - return fabs(previous.x - estimate.x) <= psize && - fabs(previous.y - estimate.y) <= psize && - fabs(previous.z - estimate.z) <= psize; -} - -// ===== PASS 1 : Gather & Upsample (Depth) ==================================== - -/* - * Pass 1: Directly render raw points from all cameras, but upsample the points - * if their spacing is within smoothing threshold but greater than their pixel - * size in the original image. - */ - __global__ void dibr_merge_upsample_kernel(TextureObject<int> depth, int cam, SplatParams params) { - const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - - const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; - const int y = blockIdx.y*blockDim.y + threadIdx.y; - - const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y)); - //const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y)); - if (worldPos.x == MINF) return; - const float r = (camera.poseInverse * worldPos).z / camera.params.fx; - - // Get virtual camera ray for splat centre and backface cull if possible - //const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f); - //const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin); - //if (dot(rayDir, normal) > 0.0f) return; - - // Find the virtual screen position of current point - const float3 camPos = params.m_viewMatrix * worldPos; - if (camPos.z < params.camera.m_sensorDepthWorldMin) return; - if (camPos.z > params.camera.m_sensorDepthWorldMax) return; - - // TODO: Don't upsample so much that only minimum depth makes it through - // Consider also using some SDF style approach to accumulate and smooth a - // depth value between points - const int upsample = min(UPSAMPLE_MAX-2, int(0.01 * params.camera.fx / camPos.z))+3; - const float interval = 1.0f / float(upsample / 2); - - - // TODO:(Nick) Check depth buffer and don't do anything if already hidden? - - // Each thread in warp takes an upsample point and updates corresponding depth buffer. - const int lane = threadIdx.x % WARP_SIZE; - for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { - const float u = (i % upsample) - (upsample / 2); - const float v = (i / upsample) - (upsample / 2); - - // Make an initial estimate of the points location - // Use centroid depth as estimate...? - const float3 point = params.m_viewMatrix * ftl::cuda::upsampled_point(camera.points, make_float2(float(x)+float(u)*interval, float(y)+float(v)*interval)); - const float d = point.z; - - const uint2 screenPos = params.camera.cameraToKinectScreen(point); - const unsigned int cx = screenPos.x;//+u; - const unsigned int cy = screenPos.y;//+v; - if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { - // Transform estimated point to virtual cam space and output z - atomicMin(&depth(cx,cy), d * 1000.0f); - } - } -} +using ftl::render::Parameters; /* - * Pass 1: Directly render each camera into virtual view but with no upsampling - * for sparse points. + * DIBR point cloud with a depth check */ - __global__ void dibr_merge_kernel(TextureObject<int> depth, int cam, SplatParams params) { - const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - + __global__ void dibr_merge_kernel(TextureObject<float> depth, + TextureObject<int> depth_out, + float4x4 transform, + ftl::rgbd::Camera cam, + Parameters params) { const int x = blockIdx.x*blockDim.x + threadIdx.x; const int y = blockIdx.y*blockDim.y + threadIdx.y; - const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y)); - if (worldPos.x == MINF) return; + const float d0 = depth.tex2D(x, y); + if (d0 <= cam.minDepth || d0 >= cam.maxDepth) return; - // Find the virtual screen position of current point - const float3 camPos = params.m_viewMatrix * worldPos; - if (camPos.z < params.camera.m_sensorDepthWorldMin) return; - if (camPos.z > params.camera.m_sensorDepthWorldMax) return; + const float3 camPos = transform * cam.screenToCam(x,y,d0); const float d = camPos.z; - const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); + const uint2 screenPos = params.camera.camToScreen<uint2>(camPos); const unsigned int cx = screenPos.x; const unsigned int cy = screenPos.y; - if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { + if (d > params.camera.minDepth && d < params.camera.maxDepth && cx < depth_out.width() && cy < depth_out.height()) { // Transform estimated point to virtual cam space and output z - atomicMin(&depth(cx,cy), d * 1000.0f); + atomicMin(&depth_out(cx,cy), d * 100000.0f); } } -// ===== PASS 2 : Splat Visible Surface ======================================== - /* - * Pass 2: Determine depth buffer with enough accuracy for a visibility test in pass 2. - * These values are also used as the actual surface estimate during rendering so should - * at least be plane or sphere fitted if not MLS smoothed onto the actual surface. - */ -__global__ void OLD_dibr_visibility_kernel(TextureObject<int> depth, int cam, SplatParams params) { - const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - - const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; - const int y = blockIdx.y*blockDim.y + threadIdx.y; - - const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y)); - const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y)); - if (worldPos.x == MINF) return; - const float r = (camera.poseInverse * worldPos).z / camera.params.fx; - - // Get virtual camera ray for splat centre and backface cull if possible - //const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f); - //const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin); - //if (dot(rayDir, normal) > 0.0f) return; - - // Find the virtual screen position of current point - const float3 camPos = params.m_viewMatrix * worldPos; - if (camPos.z < params.camera.m_sensorDepthWorldMin) return; - if (camPos.z > params.camera.m_sensorDepthWorldMax) return; - const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); - - const int upsample = min(UPSAMPLE_MAX, int((r) * params.camera.fx / camPos.z)); - - // Not on screen so stop now... - if (screenPos.x - upsample >= depth.width() || screenPos.y - upsample >= depth.height()) return; - - // TODO:(Nick) Check depth buffer and don't do anything if already hidden? - - // Each thread in warp takes an upsample point and updates corresponding depth buffer. - const int lane = threadIdx.x % WARP_SIZE; - for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { - const float u = (i % upsample) - (upsample / 2); - const float v = (i / upsample) - (upsample / 2); - - // Make an initial estimate of the points location - // Use centroid depth as estimate...? - float3 nearest = ftl::cuda::screen_centroid<1>(camera.points, make_float2(screenPos.x+u, screenPos.y+v), make_int2(x,y), params, upsample); - - // Use current points z as estimate - //float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,camPos.z); - - // Or calculate upper and lower bounds for depth and do gradient - // descent until the gradient change is too small or max iter is reached - // and depth remains within the bounds. - // How to find min and max depths? - - //float ld = nearest.z; - - // TODO: (Nick) Estimate depth using points plane, but needs better normals. - //float t; - //if (ftl::cuda::intersectPlane(normal, worldPos, rayOrigin, rayDir, t)) { - // Plane based estimate of surface at this pixel - //const float3 nearest = rayOrigin + rayDir * camPos.z; - float3 output; - - // Use MLS of camera neighbor points to get more exact estimate - // Iterate until pixel is stable on the surface. - for (int k=0; k<MAX_ITERATIONS; ++k) { - - // TODO:(Nick) Should perhaps use points from all cameras? - // Instead of doing each camera separately... - // If the depth already is close then it has already been done and can skip this point - if (ftl::cuda::mls_point_surface<1>(camera.points, make_int2(x,y), params.m_viewMatrixInverse * nearest, output, SPATIAL_SMOOTHING) <= 0.0f) { - /*const unsigned int cx = screenPos.x; - const unsigned int cy = screenPos.y; - if (cx < depth.width() && cy < depth.height()) { - atomicMax(&depth(cx,cy), 10000.0f); - }*/ - break; - } - - //ftl::cuda::render_depth(depth, params, output); - - output = params.m_viewMatrix * output; - - // This is essentially the SDF function f(x), only the normal should be estimated also from the weights - //const float d = nearest.z + (normal.x*output.x + normal.y*output.y + normal.z*output.z); - - const float d = nearest.z + copysignf(0.5f*length(output - nearest), output.z - nearest.z); - nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d); - - const float2 sp = params.camera.cameraToKinectScreenFloat(output); - - //if (isStable(nearest, output, params, d)) { - //if (fabs(sp.x - float(screenPos.x+u)) < 2.0f && fabs(sp.y - float(screenPos.y+v)) < 2.0f) { - if (length(output - nearest) <= 2.0f * params.camera.fx / camPos.z) { - const unsigned int cx = screenPos.x+u; - const unsigned int cy = screenPos.y+v; - - if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { - // Transform estimated point to virtual cam space and output z - atomicMin(&depth(cx,cy), d * 1000.0f); - } - break; - } - - /*if (k >= MAX_ITERATIONS-1 && length(output - nearest) <= SPATIAL_SMOOTHING) { - const unsigned int cx = screenPos.x+u; - const unsigned int cy = screenPos.y+v; - if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { - //atomicMin(&depth(cx,cy), d * 1000.0f); - printf("ERR = %f, %f\n", fabs(sp.x - float(screenPos.x+u)), fabs(sp.y - float(screenPos.y+v))); - } - }*/ - - //nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d); // ld + (d - ld)*0.8f - //ld = d; - } - //} - } -} - -// ------ Alternative for pass 2: principle surfaces --------------------------- - -#define NEIGHBOR_RADIUS 1 -#define MAX_NEIGHBORS ((NEIGHBOR_RADIUS*2+1)*(NEIGHBOR_RADIUS*2+1)) - -/* - * Pass 2: Determine depth buffer with enough accuracy for a visibility test in pass 2. - * These values are also used as the actual surface estimate during rendering so should - * at least be plane or sphere fitted if not MLS smoothed onto the actual surface. - */ - __global__ void dibr_visibility_principal_kernel(TextureObject<int> depth, int cam, SplatParams params) { - __shared__ float3 neighborhood_cache[2*T_PER_BLOCK][MAX_NEIGHBORS]; - __shared__ int minimum[2*T_PER_BLOCK]; - __shared__ int maximum[2*T_PER_BLOCK]; - - const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - - const int warp = threadIdx.x / WARP_SIZE + threadIdx.y*2; - const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; +* DIBR Point cloud with a constant depth assumption +*/ +__global__ void dibr_merge_kernel( + TextureObject<int> depth_out, + float4x4 transform, + ftl::rgbd::Camera cam, + Parameters params) { + const int x = blockIdx.x*blockDim.x + threadIdx.x; const int y = blockIdx.y*blockDim.y + threadIdx.y; - const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y)); - //const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y)); - if (worldPos.x == MINF) return; - const float r = (camera.poseInverse * worldPos).z / camera.params.fx; - - // Get virtual camera ray for splat centre and backface cull if possible - //const float3 rayOrigin = params.m_viewMatrixInverse * make_float3(0.0f,0.0f,0.0f); - //const float3 rayDir = normalize(params.m_viewMatrixInverse * params.camera.kinectDepthToSkeleton(x,y,1.0f) - rayOrigin); - //if (dot(rayDir, normal) > 0.0f) return; - - // Find the virtual screen position of current point - const float3 camPos = params.m_viewMatrix * worldPos; - if (camPos.z < params.camera.m_sensorDepthWorldMin) return; - if (camPos.z > params.camera.m_sensorDepthWorldMax) return; - const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); + const float d0 = 1.0f; - const int upsample = min(UPSAMPLE_MAX, int((4.0f*r) * params.camera.fx / camPos.z)); - - // Not on screen so stop now... - if (screenPos.x - upsample >= depth.width() || screenPos.y - upsample >= depth.height()) return; - - // TODO:(Nick) Check depth buffer and don't do anything if already hidden? - - // TODO:(Nick) Preload point neighbors and transform to eye - const int lane = threadIdx.x % WARP_SIZE; - if (lane == 0) { - minimum[warp] = 100000000; - maximum[warp] = -100000000; - } - - __syncwarp(); - - for (int i=lane; i<MAX_NEIGHBORS; i+=WARP_SIZE) { - const int u = (i % (2*NEIGHBOR_RADIUS+1)) - NEIGHBOR_RADIUS; - const int v = (i / (2*NEIGHBOR_RADIUS+1)) - NEIGHBOR_RADIUS; - const float3 point = params.m_viewMatrix * make_float3(tex2D<float4>(camera.points, x+u, y+v)); - neighborhood_cache[warp][i] = point; - - if (length(point - camPos) <= 0.04f) { - atomicMin(&minimum[warp], point.z*1000.0f); - atomicMax(&maximum[warp], point.z*1000.0f); - } - } - - __syncwarp(); - - const float interval = (float(maximum[warp])/1000.0f - float(minimum[warp]) / 1000.0f) / float(MAX_ITERATIONS); - //if (y == 200) printf("interval: %f\n", interval); - - // TODO:(Nick) Find min and max depths of neighbors to estimate z bounds - - // Each thread in warp takes an upsample point and updates corresponding depth buffer. - for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { - const float u = (i % upsample) - (upsample / 2); - const float v = (i / upsample) - (upsample / 2); - - // Make an initial estimate of the points location - // Use centroid depth as estimate...? - //float3 nearest = ftl::cuda::screen_centroid<1>(camera.points, make_float2(screenPos.x+u, screenPos.y+v), make_int2(x,y), params, upsample); - - // Use current points z as estimate - // TODO: Use min point as estimate - float3 nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,float(minimum[warp])/1000.0f); - - // Or calculate upper and lower bounds for depth and do gradient - // descent until the gradient change is too small or max iter is reached - // and depth remains within the bounds. - // How to find min and max depths? - - // TODO: (Nick) Estimate depth using points plane, but needs better normals. - //float t; - //if (ftl::cuda::intersectPlane(normal, worldPos, rayOrigin, rayDir, t)) { - // Plane based estimate of surface at this pixel - //const float3 nearest = rayOrigin + rayDir * camPos.z; - - // Use MLS of camera neighbor points to get more exact estimate - // Iterate until pixel is stable on the surface. - for (int k=0; k<MAX_ITERATIONS; ++k) { - - // TODO:(Nick) Should perhaps use points from all cameras? - // Instead of doing each camera separately... - // If the depth already is close then it has already been done and can skip this point - const float energy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS>(neighborhood_cache[warp], nearest, SPATIAL_SMOOTHING); - - if (energy <= 0.0f) break; - - //ftl::cuda::render_depth(depth, params, output); - - // This is essentially the SDF function f(x), only the normal should be estimated also from the weights - //const float d = nearest.z + (normal.x*output.x + normal.y*output.y + normal.z*output.z); - - const float d = nearest.z; - nearest = params.camera.kinectDepthToSkeleton(screenPos.x+u,screenPos.y+v,d+interval); - - if (energy >= 0.1f) { - const unsigned int cx = screenPos.x+u; - const unsigned int cy = screenPos.y+v; - if (d > params.camera.m_sensorDepthWorldMin && d < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { - // Transform estimated point to virtual cam space and output z - atomicMin(&depth(cx,cy), d * 1000.0f); - } - break; - } - } - //} - } -} - -#define NEIGHBOR_RADIUS_2 3 -#define NEIGHBOR_WINDOW ((NEIGHBOR_RADIUS_2*2+1)*(NEIGHBOR_RADIUS_2*2+1)) -#define MAX_NEIGHBORS_2 32 - -#define FULL_MASK 0xffffffff - -__device__ inline float warpMax(float e) { - for (int i = WARP_SIZE/2; i > 0; i /= 2) { - const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); - e = max(e, other); - } - return e; -} + const float3 camPos = transform * cam.screenToCam(x,y,d0); + const float d = camPos.z; -__device__ inline float warpMin(float e) { - for (int i = WARP_SIZE/2; i > 0; i /= 2) { - const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE); - e = min(e, other); + const uint2 screenPos = params.camera.camToScreen<uint2>(camPos); + const unsigned int cx = screenPos.x; + const unsigned int cy = screenPos.y; + if (d > params.camera.minDepth && d < params.camera.maxDepth && cx < depth_out.width() && cy < depth_out.height()) { + // Transform estimated point to virtual cam space and output z + atomicMin(&depth_out(cx,cy), d * 100000.0f); } - return e; } -#define ENERGY_THRESHOLD 0.1f -#define SMOOTHING_MULTIPLIER_A 10.0f // For surface search -#define SMOOTHING_MULTIPLIER_B 4.0f // For z contribution -#define SMOOTHING_MULTIPLIER_C 4.0f // For colour contribution - - -/* - * Pass 2: Determine depth buffer with enough accuracy for a visibility test in pass 2. - * These values are also used as the actual surface estimate during rendering so should - * at least be plane or sphere fitted if not MLS smoothed onto the actual surface. - * - * This version uses a previous point render as neighbour source. - */ - __global__ void dibr_visibility_principal_kernel2(TextureObject<int> point_in, TextureObject<int> depth, SplatParams params) { - __shared__ float3 neighborhood_cache[2*T_PER_BLOCK][MAX_NEIGHBORS_2]; - __shared__ int minimum[2*T_PER_BLOCK]; - __shared__ int maximum[2*T_PER_BLOCK]; - __shared__ unsigned int nidx[2*T_PER_BLOCK]; - - const int tid = (threadIdx.x + threadIdx.y * blockDim.x); - const int warp = tid / WARP_SIZE; //threadIdx.x / WARP_SIZE + threadIdx.y*2; - const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; - const int y = blockIdx.y*blockDim.y + threadIdx.y; - - // Starting point for surface minimum - float clusterBase = params.camera.m_sensorDepthWorldMin; - - // Loop to a deeper surface if not on the first one selected... - while (clusterBase < params.camera.m_sensorDepthWorldMax) { - - const int lane = tid % WARP_SIZE; - if (lane == 0) { - minimum[warp] = 100000000; - maximum[warp] = -100000000; - nidx[warp] = 0; - } - - __syncwarp(); - - // Search for a valid minimum neighbour - // TODO: Should this really be minimum or the median of a depth cluster? - // cluster median seems very hard to calculate... - for (int i=lane; i<NEIGHBOR_WINDOW; i+=WARP_SIZE) { - const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2; - const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2; - const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f); - const float3 camPos = params.camera.kinectDepthToSkeleton(x, y, point.z); - - // If it is close enough... - // TODO: smoothing / strength should be determined by a number of factors including: - // 1) Depth from original source - // 2) Colour contrast in underlying RGB - // 3) Estimated noise levels in depth values - if (point.z > clusterBase && point.z < params.camera.m_sensorDepthWorldMax && length(point - camPos) <= SMOOTHING_MULTIPLIER_A*(point.z / params.camera.fx)) { - atomicMin(&minimum[warp], point.z*1000.0f); - } - } - - __syncwarp(); - - const float minDepth = float(minimum[warp])/1000.0f; - - // Preload valid neighbour points from within a window. A point is valid - // if it is within a specific distance of the minimum. - // Also calculate the maximum at the same time. - // TODO: Could here do a small search in each camera? This would allow all - // points to be considered, even those masked in our depth input. - const float3 minPos = params.camera.kinectDepthToSkeleton(x, y, minDepth); - for (int i=lane; i<NEIGHBOR_WINDOW; i+=WARP_SIZE) { - const int u = (i % (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2; - const int v = (i / (2*NEIGHBOR_RADIUS_2+1)) - NEIGHBOR_RADIUS_2; - const float3 point = params.camera.kinectDepthToSkeleton(x+u, y+v, float(point_in.tex2D(x+u, y+v)) / 1000.0f); - - // If it is close enough... - if (point.z > params.camera.m_sensorDepthWorldMin && point.z < params.camera.m_sensorDepthWorldMax && length(point - minPos) <= SMOOTHING_MULTIPLIER_A*(point.z / params.camera.fx)) { - // Append to neighbour list - //unsigned int idx = atomicInc(&nidx[warp], MAX_NEIGHBORS_2-1); - unsigned int idx = atomicAdd(&nidx[warp], 1); - if (idx >= MAX_NEIGHBORS_2) break; - neighborhood_cache[warp][idx] = point; - atomicMax(&maximum[warp], point.z*1000.0f); - } - } - - __syncwarp(); - - const float maxDepth = float(maximum[warp])/1000.0f; - const float interval = (maxDepth - minDepth) / float(MAX_ITERATIONS); - - if (minDepth >= params.camera.m_sensorDepthWorldMax) return; - if (maxDepth <= params.camera.m_sensorDepthWorldMin) return; - //if (y == 200) printf("interval: %f\n", maxDepth); - - // If all samples say same depth, then agree and return - // TODO: Check this is valid, since small energies should be removed... - /*if (fabs(minDepth - maxDepth) < 0.0001f) { - if (lane == 0) { - const unsigned int cx = x; - const unsigned int cy = y; - if (minDepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { - // Transform estimated point to virtual cam space and output z - atomicMin(&depth(cx,cy), minDepth * 1000.0f); - } - } - return; - }*/ - - - float maxenergy = -1.0f; - float bestdepth = 0.0f; - - // Search for best or threshold energy - for (int k=lane; k<MAX_ITERATIONS; k+=WARP_SIZE) { - const float3 nearest = params.camera.kinectDepthToSkeleton(x,y,minDepth+float(k)*interval); - const float myenergy = ftl::cuda::mls_point_energy<MAX_NEIGHBORS_2>(neighborhood_cache[warp], nearest, min(nidx[warp], MAX_NEIGHBORS_2), SMOOTHING_MULTIPLIER_B*(nearest.z/params.camera.fx)); - const float newenergy = warpMax(max(myenergy, maxenergy)); - bestdepth = (myenergy == newenergy) ? nearest.z : (newenergy > maxenergy) ? 0.0f : bestdepth; - maxenergy = newenergy; - } - - // If enough energy was found and this thread was the one that found the best - // then output the depth that this energy occured at. - if (bestdepth > 0.0f && maxenergy >= ENERGY_THRESHOLD) { - //printf("E D %f %f\n", maxenergy, bestdepth); - const unsigned int cx = x; - const unsigned int cy = y; - if (bestdepth > params.camera.m_sensorDepthWorldMin && bestdepth < params.camera.m_sensorDepthWorldMax && cx < depth.width() && cy < depth.height()) { - // Transform estimated point to virtual cam space and output z - atomicMin(&depth(cx,cy), bestdepth * 1000.0f); - //depth(cx,cy) = bestdepth * 1000.0f; - } - } - - // TODO: Could the threshold depend upon the number of points? Fewer points - // due to distance is incorrect since really there may not be fewer points - // Perhaps the best option is to make it depend on depth ... really close - // and really far both has lower thresholds due to point densities. Other - // option is smoothing factor and surface distances alter with distance to - // vary the number of points used ... smoothing factor could be a multiple - // of pixel size at given distance. Density from original source is also - // an influencer of smoothing factor and thresholds. Colour contrast also - // has a weighting influence, high contrast is high certainty in the - // disparity so such points should have a high influence over choice of - // surface location. - // - // Magnitude vs dispersion factor in the energy function ... - // * Mag is certainty of surface location - // * Dispersion is how far to propagate that certainty, - if (maxenergy >= ENERGY_THRESHOLD) return; - - // Move to next possible surface... - clusterBase = minDepth + SMOOTHING_MULTIPLIER_B*(minDepth / params.camera.fx); - - }; -} - -// ===== Pass 2 and 3 : Attribute contributions ================================ +void ftl::cuda::dibr_merge(TextureObject<float> &depth, TextureObject<int> &depth_out, const float4x4 &transform, const ftl::rgbd::Camera &cam, Parameters params, cudaStream_t stream) { + const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); -__device__ inline float4 make_float4(const uchar4 &c) { - return make_float4(c.x,c.y,c.z,c.w); + dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>(depth, depth_out, transform, cam, params); + cudaSafeCall( cudaGetLastError() ); } -/* - * Pass 2: Accumulate attribute contributions if the points pass a visibility test. - */ -__global__ void dibr_attribute_contrib_kernel( - TextureObject<int> depth_in, - TextureObject<float4> colour_out, - TextureObject<float4> normal_out, - TextureObject<float> contrib_out, int cam, - SplatParams params) { - - const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam]; - - const int tid = (threadIdx.x + threadIdx.y * blockDim.x); - //const int warp = tid / WARP_SIZE; - const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE; - const int y = blockIdx.y*blockDim.y + threadIdx.y; - - const float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y)); - //const float3 normal = make_float3(tex2D<float4>(camera.normal, x, y)); - if (worldPos.x == MINF) return; - const float r = (camera.poseInverse * worldPos).z / camera.params.fx; - - const float3 camPos = params.m_viewMatrix * worldPos; - if (camPos.z < params.camera.m_sensorDepthWorldMin) return; - if (camPos.z > params.camera.m_sensorDepthWorldMax) return; - const uint2 screenPos = params.camera.cameraToKinectScreen(camPos); - - const int upsample = 8; //min(UPSAMPLE_MAX, int((5.0f*r) * params.camera.fx / camPos.z)); - - // Not on screen so stop now... - if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return; - - // Is this point near the actual surface and therefore a contributor? - const float d = ((float)depth_in.tex2D((int)screenPos.x, (int)screenPos.y)/1000.0f); - //if (abs(d - camPos.z) > DEPTH_THRESHOLD) return; - - // TODO:(Nick) Should just one thread load these to shared mem? - const float4 colour = make_float4(tex2D<uchar4>(camera.colour, x, y)); - const float4 normal = tex2D<float4>(camera.normal, x, y); - - // Each thread in warp takes an upsample point and updates corresponding depth buffer. - const int lane = tid % WARP_SIZE; - for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) { - const float u = (i % upsample) - (upsample / 2); - const float v = (i / upsample) - (upsample / 2); - - // Use the depth buffer to determine this pixels 3D position in camera space - const float d = ((float)depth_in.tex2D(screenPos.x+u, screenPos.y+v)/1000.0f); - const float3 nearest = params.camera.kinectDepthToSkeleton((int)(screenPos.x+u),(int)(screenPos.y+v),d); - - // What is contribution of our current point at this pixel? - const float weight = ftl::cuda::spatialWeighting(length(nearest - camPos), SMOOTHING_MULTIPLIER_C*(nearest.z/params.camera.fx)); - if (screenPos.x+u < colour_out.width() && screenPos.y+v < colour_out.height() && weight > 0.0f) { // TODO: Use confidence threshold here - const float4 wcolour = colour * weight; - const float4 wnormal = normal * weight; - - //printf("Z %f\n", d); +void ftl::cuda::dibr_merge(TextureObject<int> &depth_out, const float4x4 &transform, const ftl::rgbd::Camera &cam, Parameters params, cudaStream_t stream) { + const dim3 gridSize((cam.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (cam.height + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - // Add this points contribution to the pixel buffer - atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v), wcolour.x); - atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+1, wcolour.y); - atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+2, wcolour.z); - atomicAdd((float*)&colour_out(screenPos.x+u, screenPos.y+v)+3, wcolour.w); - atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v), wnormal.x); - atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+1, wnormal.y); - atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+2, wnormal.z); - atomicAdd((float*)&normal_out(screenPos.x+u, screenPos.y+v)+3, wnormal.w); - atomicAdd(&contrib_out(screenPos.x+u, screenPos.y+v), weight); - } - } + dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>(depth_out, transform, cam, params); + cudaSafeCall( cudaGetLastError() ); } -/* - * Pass 2: Accumulate attribute contributions if the points pass a visibility test. - */ -/*__global__ void dibr_attribute_contrib_kernel( - TextureObject<int> depth_in, - TextureObject<uchar4> colour_out, - TextureObject<float4> normal_out, int numcams, SplatParams params) { - - const int i = threadIdx.y*blockDim.y + threadIdx.x; - const int bx = blockIdx.x*blockDim.x; - const int by = blockIdx.y*blockDim.y; - const int x = bx + threadIdx.x; - const int y = by + threadIdx.y; - - for (int j=0; j<numcams; ++j) { - const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[j]; - - float3 worldPos = make_float3(tex2D<float4>(camera.points, x, y)); - float r = (camera.poseInverse * worldPos).z; - //if (ftl::cuda::mls_point_surface<3>(camera.points, make_int2(x,y), worldPos, 0.02f) < 0.001f) continue; - if (worldPos.x == MINF) continue; - - const float3 camPos = params.m_viewMatrix * worldPos; - - // Estimate upsample factor using ratio of source depth and output depth - - const int upsample = min(15, (int)(UPSAMPLE_FACTOR * (r / camPos.z))+1); - const float upfactor = 2.0f / (float)(upsample); - - for (int v=0; v<upsample; ++v) { - for (int u=0; u<upsample; ++u) { - float3 point; - const ftl::cuda::fragment nearest = ftl::cuda::upsampled_point(camera.points, camera.normal, camera.colour, - make_float2((float)x-1.0f+u*upfactor,(float)y-1.0f+v*upfactor)); - //if (ftl::cuda::mls_point_surface<3>(camera.points, make_int2(x,y), nearest, point, 0.02f) < 0.001f) continue; - ftl::cuda::render_fragment(depth_in, normal_out, colour_out, params, nearest); - } - } - } -}*/ - - +// ==== Normalize ============================================================== +template <typename A, typename B> __global__ void dibr_normalise_kernel( - TextureObject<float4> colour_in, - TextureObject<uchar4> colour_out, - TextureObject<float4> normals, + TextureObject<A> in, + TextureObject<B> out, TextureObject<float> contribs) { - const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; - const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < in.width() && y < in.height()) { + const float contrib = contribs.tex2D((int)x,(int)y); + const A a = in.tex2D((int)x,(int)y); + //const float4 normal = normals.tex2D((int)x,(int)y); - if (x < colour_in.width() && y < colour_in.height()) { - const float4 colour = colour_in.tex2D((int)x,(int)y); - const float4 normal = normals.tex2D((int)x,(int)y); - const float contrib = contribs.tex2D((int)x,(int)y); + //out(x,y) = (contrib == 0.0f) ? make<B>(a) : make<B>(a / contrib); if (contrib > 0.0f) { - colour_out(x,y) = make_uchar4(colour.x / contrib, colour.y / contrib, colour.z / contrib, 0); - normals(x,y) = normal / contrib; + out(x,y) = make<B>(a / contrib); + //normals(x,y) = normal / contrib; } - } -} - -void ftl::cuda::dibr(const TextureObject<int> &depth_out, - const TextureObject<uchar4> &colour_out, - const TextureObject<float4> &normal_out, - const TextureObject<float> &confidence_out, - const TextureObject<float4> &tmp_colour, - const TextureObject<int> &tmp_depth, - int numcams, - const SplatParams ¶ms, - cudaStream_t stream) { - - const dim3 sgridSize((depth_out.width() + 2 - 1)/2, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 sblockSize(2*WARP_SIZE, T_PER_BLOCK); - const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); - const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - - clearColourKernel<<<gridSize, blockSize, 0, stream>>>(colour_out); - ftl::cuda::clear_to_zero(confidence_out, stream); - ftl::cuda::clear_colour(tmp_colour, stream); - ftl::cuda::clear_colour(normal_out, stream); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); -#endif - - //int i=3; - - bool noSplatting = params.m_flags & ftl::render::kNoSplatting; - - // Pass 1, gather and upsample depth maps - if (params.m_flags & ftl::render::kNoUpsampling) { - for (int i=0; i<numcams; ++i) - dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>((noSplatting) ? depth_out : tmp_depth, i, params); - } else { - for (int i=0; i<numcams; ++i) - dibr_merge_upsample_kernel<<<sgridSize, sblockSize, 0, stream>>>((noSplatting) ? depth_out : tmp_depth, i, params); - } - - if (noSplatting) { - // Pass 3, accumulate all point contributions to pixels - for (int i=0; i<numcams; ++i) - dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, tmp_colour, normal_out, confidence_out, i, params); - } else { - // Pass 2 - dibr_visibility_principal_kernel2<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, depth_out, params); - - // Pass 3, accumulate all point contributions to pixels - for (int i=0; i<numcams; ++i) - dibr_attribute_contrib_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, tmp_colour, normal_out, confidence_out, i, params); - } - // Pass 2 - //dibr_visibility_principal_kernel2<<<sgridSize, sblockSize, 0, stream>>>(tmp_depth, depth_out, params); - - // Pass 2, merge a depth map from each camera. - //for (int i=0; i<numcams; ++i) - // dibr_visibility_principal_kernel<<<sgridSize, sblockSize, 0, stream>>>(depth_out, i, params); - - // Pass 4, normalise contributions - dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(tmp_colour, colour_out, normal_out, confidence_out); - - cudaSafeCall( cudaGetLastError() ); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); -#endif + } } -void ftl::cuda::dibr_raw(const TextureObject<int> &depth_out, - int numcams, const SplatParams ¶ms, cudaStream_t stream) { - - const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); +template <typename A, typename B> +void ftl::cuda::dibr_normalise(TextureObject<A> &in, TextureObject<B> &out, TextureObject<float> &contribs, cudaStream_t stream) { + const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); -#endif - //dibr_depthmap_direct_kernel<<<gridSize, blockSize, 0, stream>>>(depth_out, numcams, params); - cudaSafeCall( cudaGetLastError() ); - -#ifdef _DEBUG - cudaSafeCall(cudaDeviceSynchronize()); -#endif + dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(in, out, contribs); + cudaSafeCall( cudaGetLastError() ); } +template void ftl::cuda::dibr_normalise<float4,uchar4>(TextureObject<float4> &in, TextureObject<uchar4> &out, TextureObject<float> &contribs, cudaStream_t stream); +template void ftl::cuda::dibr_normalise<float,float>(TextureObject<float> &in, TextureObject<float> &out, TextureObject<float> &contribs, cudaStream_t stream); +template void ftl::cuda::dibr_normalise<float4,float4>(TextureObject<float4> &in, TextureObject<float4> &out, TextureObject<float> &contribs, cudaStream_t stream); diff --git a/components/renderers/cpp/src/mask.cu b/components/renderers/cpp/src/mask.cu index 6e9621c8064359021de616c8662f3b4227ba1806..1adb4fb75c4c25d4ee70e8d1f378a43702142745 100644 --- a/components/renderers/cpp/src/mask.cu +++ b/components/renderers/cpp/src/mask.cu @@ -3,7 +3,6 @@ using ftl::cuda::TextureObject; using ftl::cuda::Mask; -using ftl::render::SplatParams; #define T_PER_BLOCK 16 diff --git a/components/renderers/cpp/src/reprojection.cu b/components/renderers/cpp/src/reprojection.cu index 79b01ff534cf36d26c4d58c3e15d3b32fbd95020..808501a4ad66406e284aa7030a939e1e24038c85 100644 --- a/components/renderers/cpp/src/reprojection.cu +++ b/components/renderers/cpp/src/reprojection.cu @@ -1,4 +1,4 @@ -#include <ftl/render/splat_params.hpp> +#include <ftl/render/render_params.hpp> #include "splatter_cuda.hpp" #include <ftl/rgbd/camera.hpp> #include <ftl/cuda_common.hpp> @@ -10,8 +10,9 @@ #define ACCUM_DIAMETER 8 using ftl::cuda::TextureObject; -using ftl::render::SplatParams; +using ftl::render::Parameters; using ftl::rgbd::Camera; +using ftl::render::ViewPortMode; /*template <typename T> __device__ inline T generateInput(const T &in, const SplatParams ¶ms, const float4 &worldPos) { @@ -54,10 +55,20 @@ __device__ inline void accumulateOutput(TextureObject<float4> &out, TextureObjec atomicAdd(&contrib(pos.x, pos.y), w); } +template <ViewPortMode VPMODE> +__device__ inline uint2 convertScreen(const Parameters ¶ms, int x, int y) { + return make_uint2(x,y); +} + +template <> +__device__ inline uint2 convertScreen<ViewPortMode::Warping>(const Parameters ¶ms, int x, int y) { + return params.viewport.reverseMap(params.camera, make_float2(x,y)); +} + /* - * Pass 2: Accumulate attribute contributions if the points pass a visibility test. + * Full reprojection with normals and depth */ - template <typename A, typename B> + template <typename A, typename B, ViewPortMode VPMODE> __global__ void reprojection_kernel( TextureObject<A> in, // Attribute input TextureObject<float> depth_src, @@ -65,7 +76,7 @@ __global__ void reprojection_kernel( TextureObject<float4> normals, TextureObject<B> out, // Accumulated output TextureObject<float> contrib, - SplatParams params, + Parameters params, Camera camera, float4x4 transform, float3x3 transformR) { const int x = (blockIdx.x*blockDim.x + threadIdx.x); @@ -77,7 +88,9 @@ __global__ void reprojection_kernel( //const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d); //if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return; - const float3 camPos = transform * params.camera.screenToCam(x, y, d); + //const uint2 rpt = make_uint2(x,y); + const uint2 rpt = convertScreen<VPMODE>(params, x, y); + const float3 camPos = transform * params.camera.screenToCam(rpt.x, rpt.y, d); if (camPos.z < camera.minDepth) return; if (camPos.z > camera.maxDepth) return; const float2 screenPos = camera.camToScreen<float2>(camPos); @@ -101,6 +114,7 @@ __global__ void reprojection_kernel( // TODO: Z checks need to interpolate between neighbors if large triangles are used //float weight = ftl::cuda::weighting(fabs(camPos.z - d2), params.depthThreshold); + // TODO: Depth threshold should relate to pixel size float weight = (fabs(camPos.z - d2) <= params.depthThreshold) ? 1.0f : 0.0f; /* Buehler C. et al. 2001. Unstructured Lumigraph Rendering. */ @@ -128,24 +142,18 @@ void ftl::cuda::reproject( TextureObject<float4> &normals, TextureObject<B> &out, // Accumulated output TextureObject<float> &contrib, - const SplatParams ¶ms, - const Camera &camera, const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream) { + const Parameters ¶ms, + const Camera &camera, const float4x4 &transform, const float3x3 &transformR, + cudaStream_t stream) { const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - reprojection_kernel<<<gridSize, blockSize, 0, stream>>>( - in, - depth_src, - depth_in, - normals, - out, - contrib, - params, - camera, - transform, - transformR - ); - cudaSafeCall( cudaGetLastError() ); + switch (params.viewPortMode) { + case ViewPortMode::Disabled: reprojection_kernel<A,B,ViewPortMode::Disabled><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, normals, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Clipping: reprojection_kernel<A,B,ViewPortMode::Clipping><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, normals, out, contrib, params, camera, transform, transformR); break; + case ViewPortMode::Warping: reprojection_kernel<A,B,ViewPortMode::Warping><<<gridSize, blockSize, 0, stream>>>(in, depth_src, depth_in, normals, out, contrib, params, camera, transform, transformR); break; + } + cudaSafeCall( cudaGetLastError() ); } template void ftl::cuda::reproject( @@ -155,9 +163,10 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float4> &normals, ftl::cuda::TextureObject<float4> &out, // Accumulated output ftl::cuda::TextureObject<float> &contrib, - const ftl::render::SplatParams ¶ms, + const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, - const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream); + const float4x4 &transform, const float3x3 &transformR, + cudaStream_t stream); template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &in, // Original colour image @@ -166,9 +175,10 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float4> &normals, ftl::cuda::TextureObject<float> &out, // Accumulated output ftl::cuda::TextureObject<float> &contrib, - const ftl::render::SplatParams ¶ms, + const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, - const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream); + const float4x4 &transform, const float3x3 &transformR, + cudaStream_t stream); template void ftl::cuda::reproject( ftl::cuda::TextureObject<float4> &in, // Original colour image @@ -177,9 +187,10 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float4> &normals, ftl::cuda::TextureObject<float4> &out, // Accumulated output ftl::cuda::TextureObject<float> &contrib, - const ftl::render::SplatParams ¶ms, + const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, - const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream); + const float4x4 &transform, const float3x3 &transformR, + cudaStream_t stream); //============================================================================== // Without normals @@ -195,7 +206,7 @@ __global__ void reprojection_kernel( TextureObject<float> depth_in, // Virtual depth map TextureObject<B> out, // Accumulated output TextureObject<float> contrib, - SplatParams params, + Parameters params, Camera camera, float4x4 poseInv) { const int x = (blockIdx.x*blockDim.x + threadIdx.x); @@ -238,7 +249,7 @@ void ftl::cuda::reproject( TextureObject<float> &depth_in, // Virtual depth map TextureObject<B> &out, // Accumulated output TextureObject<float> &contrib, - const SplatParams ¶ms, + const Parameters ¶ms, const Camera &camera, const float4x4 &poseInv, cudaStream_t stream) { const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); @@ -262,7 +273,7 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map ftl::cuda::TextureObject<float4> &out, // Accumulated output ftl::cuda::TextureObject<float> &contrib, - const ftl::render::SplatParams ¶ms, + const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, const float4x4 &poseInv, cudaStream_t stream); @@ -272,7 +283,7 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map ftl::cuda::TextureObject<float> &out, // Accumulated output ftl::cuda::TextureObject<float> &contrib, - const ftl::render::SplatParams ¶ms, + const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, const float4x4 &poseInv, cudaStream_t stream); @@ -282,7 +293,7 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map ftl::cuda::TextureObject<float4> &out, // Accumulated output ftl::cuda::TextureObject<float> &contrib, - const ftl::render::SplatParams ¶ms, + const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, const float4x4 &poseInv, cudaStream_t stream); @@ -299,7 +310,7 @@ __global__ void reprojection_kernel( TextureObject<float> depth_in, // Virtual depth map TextureObject<B> out, // Accumulated output TextureObject<float> contrib, - SplatParams params, + Parameters params, Camera camera, float4x4 poseInv) { const int x = (blockIdx.x*blockDim.x + threadIdx.x); @@ -339,7 +350,7 @@ void ftl::cuda::reproject( TextureObject<float> &depth_in, // Virtual depth map TextureObject<B> &out, // Accumulated output TextureObject<float> &contrib, - const SplatParams ¶ms, + const Parameters ¶ms, const Camera &camera, const float4x4 &poseInv, cudaStream_t stream) { const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); @@ -361,7 +372,7 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map ftl::cuda::TextureObject<float4> &out, // Accumulated output ftl::cuda::TextureObject<float> &contrib, - const ftl::render::SplatParams ¶ms, + const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, const float4x4 &poseInv, cudaStream_t stream); @@ -370,7 +381,7 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map ftl::cuda::TextureObject<float> &out, // Accumulated output ftl::cuda::TextureObject<float> &contrib, - const ftl::render::SplatParams ¶ms, + const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, const float4x4 &poseInv, cudaStream_t stream); @@ -379,7 +390,7 @@ template void ftl::cuda::reproject( ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map ftl::cuda::TextureObject<float4> &out, // Accumulated output ftl::cuda::TextureObject<float> &contrib, - const ftl::render::SplatParams ¶ms, + const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, const float4x4 &poseInv, cudaStream_t stream); @@ -484,3 +495,38 @@ void ftl::cuda::fix_bad_colour( fix_colour_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth, out, contribs, bad_colour, cam); cudaSafeCall( cudaGetLastError() ); } + +// ===== Show bad colour normalise ============================================= + +__global__ void show_missing_colour_kernel( + TextureObject<float> depth, + TextureObject<uchar4> out, + TextureObject<float> contribs, + uchar4 bad_colour, + ftl::rgbd::Camera cam) { + const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; + const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; + + if (x < out.width() && y < out.height()) { + const float contrib = contribs.tex2D((int)x,(int)y); + const float d = depth.tex2D(x,y); + + if (contrib < 0.0000001f && d > cam.minDepth && d < cam.maxDepth) { + out(x,y) = bad_colour; + } + } +} + +void ftl::cuda::show_missing_colour( + TextureObject<float> &depth, + TextureObject<uchar4> &out, + TextureObject<float> &contribs, + uchar4 bad_colour, + const ftl::rgbd::Camera &cam, + cudaStream_t stream) { + const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); + const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); + + show_missing_colour_kernel<<<gridSize, blockSize, 0, stream>>>(depth, out, contribs, bad_colour, cam); + cudaSafeCall( cudaGetLastError() ); +} diff --git a/components/renderers/cpp/src/screen.cu b/components/renderers/cpp/src/screen.cu index f12f255f0b9a3fbdd5f418864567f5688e489ba2..e9cd06f4145c7cb2fd4947387f711af376c68912 100644 --- a/components/renderers/cpp/src/screen.cu +++ b/components/renderers/cpp/src/screen.cu @@ -1,51 +1,76 @@ -#include <ftl/render/splat_params.hpp> +#include <ftl/render/render_params.hpp> #include "splatter_cuda.hpp" #include <ftl/rgbd/camera.hpp> #include <ftl/cuda_common.hpp> using ftl::rgbd::Camera; using ftl::cuda::TextureObject; -using ftl::render::SplatParams; +using ftl::render::Parameters; +using ftl::render::ViewPortMode; #define T_PER_BLOCK 8 +template <ViewPortMode VPMODE> +__device__ inline uint2 convertToScreen(const Parameters ¶ms, const float3 &camPos); + +template <> +__device__ inline uint2 convertToScreen<ViewPortMode::Disabled>(const Parameters ¶ms, const float3 &camPos) { + return params.camera.camToScreen<uint2>(camPos); +} + +template <> +__device__ inline uint2 convertToScreen<ViewPortMode::Clipping>(const Parameters ¶ms, const float3 &camPos) { + const uint2 r = params.camera.camToScreen<uint2>(camPos); + return (params.viewport.inside(r.x, r.y)) ? r : make_uint2(30000, 30000); +} + +template <> +__device__ inline uint2 convertToScreen<ViewPortMode::Warping>(const Parameters ¶ms, const float3 &camPos) { + return params.viewport.map(params.camera, params.camera.camToScreen<float2>(camPos)); +} + /* * Convert source screen position to output screen coordinates. */ + template <ftl::render::ViewPortMode VPMODE> __global__ void screen_coord_kernel(TextureObject<float> depth, TextureObject<float> depth_out, - TextureObject<short2> screen_out, Camera vcamera, float4x4 pose, Camera camera) { + TextureObject<short2> screen_out, Parameters params, float4x4 pose, Camera camera) { const int x = blockIdx.x*blockDim.x + threadIdx.x; const int y = blockIdx.y*blockDim.y + threadIdx.y; if (x >= 0 && y >= 0 && x < depth.width() && y < depth.height()) { uint2 screenPos = make_uint2(30000,30000); - //screen_out(x,y) = make_short2(screenPos.x, screenPos.y); const float d = depth.tex2D(x, y); - //const float3 worldPos = pose * camera.screenToCam(x,y,d); - //if (d < camera.minDepth || d > camera.maxDepth) return; // Find the virtual screen position of current point - const float3 camPos = (d >= camera.minDepth && d <= camera.maxDepth) ? pose * camera.screenToCam(x,y,d) : make_float3(0.0f,0.0f,0.0f); // worldPos; // params.m_viewMatrix * - screenPos = vcamera.camToScreen<uint2>(camPos); - - if ( camPos.z < vcamera.minDepth || - camPos.z > vcamera.maxDepth || - screenPos.x >= vcamera.width || - screenPos.y >= vcamera.height) + const float3 camPos = (d >= camera.minDepth && d <= camera.maxDepth) ? pose * camera.screenToCam(x,y,d) : make_float3(0.0f,0.0f,0.0f); + screenPos = convertToScreen<VPMODE>(params, camPos); + + if ( camPos.z < params.camera.minDepth || + camPos.z > params.camera.maxDepth || + //!vp.inside(screenPos.x, screenPos.y)) + screenPos.x >= params.camera.width || + screenPos.y >= params.camera.height) screenPos = make_uint2(30000,30000); screen_out(x,y) = make_short2(screenPos.x, screenPos.y); depth_out(x,y) = camPos.z; } } -void ftl::cuda::screen_coord(TextureObject<float> &depth, TextureObject<float> &depth_out, TextureObject<short2> &screen_out, const SplatParams ¶ms, const float4x4 &pose, const Camera &camera, cudaStream_t stream) { +void ftl::cuda::screen_coord(TextureObject<float> &depth, TextureObject<float> &depth_out, + TextureObject<short2> &screen_out, const Parameters ¶ms, + const float4x4 &pose, const Camera &camera, cudaStream_t stream) { const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - screen_coord_kernel<<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params.camera, pose, camera); - cudaSafeCall( cudaGetLastError() ); + switch (params.viewPortMode) { + case ViewPortMode::Disabled: screen_coord_kernel<ViewPortMode::Disabled><<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params, pose, camera); break; + case ViewPortMode::Clipping: screen_coord_kernel<ViewPortMode::Clipping><<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params, pose, camera); break; + case ViewPortMode::Warping: screen_coord_kernel<ViewPortMode::Warping><<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params, pose, camera); break; + } + cudaSafeCall( cudaGetLastError() ); } @@ -61,12 +86,12 @@ void ftl::cuda::screen_coord(TextureObject<float> &depth, TextureObject<float> & const int y = blockIdx.y*blockDim.y + threadIdx.y; if (x >= 0 && y >= 0 && x < depth_out.width() && y < depth_out.height()) { - uint2 screenPos = make_uint2(30000,30000); + //uint2 screenPos = make_uint2(30000,30000); const float d = camera.maxDepth; // Find the virtual screen position of current point const float3 camPos = pose * camera.screenToCam(x,y,d); - screenPos = vcamera.camToScreen<uint2>(camPos); + uint2 screenPos = vcamera.camToScreen<uint2>(camPos); if ( camPos.z < vcamera.minDepth || camPos.z > vcamera.maxDepth || @@ -79,7 +104,7 @@ void ftl::cuda::screen_coord(TextureObject<float> &depth, TextureObject<float> & } } -void ftl::cuda::screen_coord(TextureObject<float> &depth_out, TextureObject<short2> &screen_out, const SplatParams ¶ms, const float4x4 &pose, const Camera &camera, cudaStream_t stream) { +void ftl::cuda::screen_coord(TextureObject<float> &depth_out, TextureObject<short2> &screen_out, const Parameters ¶ms, const float4x4 &pose, const Camera &camera, cudaStream_t stream) { const dim3 gridSize((screen_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (screen_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp index 33f0340f2a94007a87981dbf400e953869624392..fcdcaa121c6e52994f54ecf82603edeb88f50775 100644 --- a/components/renderers/cpp/src/splatter_cuda.hpp +++ b/components/renderers/cpp/src/splatter_cuda.hpp @@ -2,7 +2,7 @@ #define _FTL_RECONSTRUCTION_SPLAT_CUDA_HPP_ #include <ftl/cuda_common.hpp> -#include <ftl/render/splat_params.hpp> +#include <ftl/render/render_params.hpp> namespace ftl { namespace cuda { @@ -10,7 +10,7 @@ namespace cuda { ftl::cuda::TextureObject<float> &depth, ftl::cuda::TextureObject<float> &depth_out, ftl::cuda::TextureObject<short2> &screen_out, - const ftl::render::SplatParams ¶ms, + const ftl::render::Parameters ¶ms, const float4x4 &pose, const ftl::rgbd::Camera &camera, cudaStream_t stream); @@ -18,7 +18,7 @@ namespace cuda { void screen_coord( ftl::cuda::TextureObject<float> &depth_out, ftl::cuda::TextureObject<short2> &screen_out, - const ftl::render::SplatParams ¶ms, + const ftl::render::Parameters ¶ms, const float4x4 &pose, const ftl::rgbd::Camera &camera, cudaStream_t stream); @@ -27,7 +27,7 @@ namespace cuda { ftl::cuda::TextureObject<float> &depth_in, ftl::cuda::TextureObject<int> &depth_out, ftl::cuda::TextureObject<short2> &screen, - const ftl::render::SplatParams ¶ms, + const ftl::render::Parameters ¶ms, cudaStream_t stream); void mesh_blender( @@ -41,14 +41,14 @@ namespace cuda { ftl::cuda::TextureObject<float4> &points, ftl::cuda::TextureObject<float4> &normals, ftl::cuda::TextureObject<int> &depth, - ftl::render::SplatParams params, + ftl::render::Parameters params, bool culling, cudaStream_t stream); void dibr_merge( ftl::cuda::TextureObject<float4> &points, ftl::cuda::TextureObject<int> &depth, - ftl::render::SplatParams params, + ftl::render::Parameters params, cudaStream_t stream); void dibr_merge( @@ -56,14 +56,14 @@ namespace cuda { ftl::cuda::TextureObject<int> &depth_out, const float4x4 &transform, const ftl::rgbd::Camera &cam, - ftl::render::SplatParams params, + ftl::render::Parameters params, cudaStream_t stream); void dibr_merge( ftl::cuda::TextureObject<int> &depth_out, const float4x4 &transform, const ftl::rgbd::Camera &cam, - ftl::render::SplatParams params, + ftl::render::Parameters params, cudaStream_t stream); template <typename T> @@ -74,7 +74,7 @@ namespace cuda { ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map ftl::cuda::TextureObject<float> &depth_out, ftl::cuda::TextureObject<T> &colour_out, - const ftl::render::SplatParams ¶ms, cudaStream_t stream); + const ftl::render::Parameters ¶ms, cudaStream_t stream); template <typename A, typename B> void dibr_attribute( @@ -83,7 +83,7 @@ namespace cuda { ftl::cuda::TextureObject<int> &depth_in, // Virtual depth map ftl::cuda::TextureObject<B> &out, // Accumulated output ftl::cuda::TextureObject<float> &contrib, - ftl::render::SplatParams ¶ms, cudaStream_t stream); + ftl::render::Parameters ¶ms, cudaStream_t stream); template <typename A, typename B> void reproject( @@ -93,9 +93,10 @@ namespace cuda { ftl::cuda::TextureObject<float4> &normals, ftl::cuda::TextureObject<B> &out, // Accumulated output ftl::cuda::TextureObject<float> &contrib, - const ftl::render::SplatParams ¶ms, + const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, - const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream); + const float4x4 &transform, const float3x3 &transformR, + cudaStream_t stream); template <typename A, typename B> void reproject( @@ -104,7 +105,7 @@ namespace cuda { ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map ftl::cuda::TextureObject<B> &out, // Accumulated output ftl::cuda::TextureObject<float> &contrib, - const ftl::render::SplatParams ¶ms, + const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, const float4x4 &poseInv, cudaStream_t stream); @@ -114,7 +115,7 @@ namespace cuda { ftl::cuda::TextureObject<float> &depth_in, // Virtual depth map ftl::cuda::TextureObject<B> &out, // Accumulated output ftl::cuda::TextureObject<float> &contrib, - const ftl::render::SplatParams ¶ms, + const ftl::render::Parameters ¶ms, const ftl::rgbd::Camera &camera, const float4x4 &poseInv, cudaStream_t stream); diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu index d183777c65447443540733a2ae8ee97a3e293b35..df7b5dea99a5f88050016302e5df984f212bc7b9 100644 --- a/components/renderers/cpp/src/triangle_render.cu +++ b/components/renderers/cpp/src/triangle_render.cu @@ -1,4 +1,4 @@ -#include <ftl/render/splat_params.hpp> +#include <ftl/render/render_params.hpp> #include "splatter_cuda.hpp" #include <ftl/rgbd/camera.hpp> #include <ftl/cuda_common.hpp> @@ -7,7 +7,7 @@ using ftl::rgbd::Camera; using ftl::cuda::TextureObject; -using ftl::render::SplatParams; +using ftl::render::Parameters; #define T_PER_BLOCK 8 @@ -107,7 +107,7 @@ float getZAtCoordinate(const float3 &barycentricCoord, const float (&tri)[3]) { * being inside or outside (using bary centric coordinate method). If inside * then atomically write to the depth map. */ -__device__ void drawTriangle(const float (&d)[3], const short2 (&v)[3], const SplatParams ¶ms, TextureObject<int> &depth_out) { +__device__ void drawTriangle(const float (&d)[3], const short2 (&v)[3], const Parameters ¶ms, TextureObject<int> &depth_out) { const int minX = min(v[0].x, min(v[1].x, v[2].x)); const int minY = min(v[0].y, min(v[1].y, v[2].y)); const int maxX = max(v[0].x, max(v[1].x, v[2].x)); @@ -136,7 +136,7 @@ __device__ void drawTriangle(const float (&d)[3], const short2 (&v)[3], const Sp * hence that a triangle should not be draw between said verticies. * TODO: Use discontinuity mask or some better test here. */ -__device__ inline bool isValidTriangle(const float (&d)[3], const SplatParams ¶ms) { +__device__ inline bool isValidTriangle(const float (&d)[3], const Parameters ¶ms) { return !(fabs(d[0] - d[1]) > params.depthThreshold || fabs(d[0] - d[2]) > params.depthThreshold || d[0] < params.camera.minDepth || d[0] > params.camera.maxDepth); } @@ -145,7 +145,7 @@ __device__ inline bool isValidTriangle(const float (&d)[3], const SplatParams &p * which verticies to load. */ template <int A, int B> -__device__ bool loadTriangle(int x, int y, float (&d)[3], short2 (&v)[3], const SplatParams ¶ms, const TextureObject<float> &depth_in, const TextureObject<short2> &screen) { +__device__ bool loadTriangle(int x, int y, float (&d)[3], short2 (&v)[3], const Parameters ¶ms, const TextureObject<float> &depth_in, const TextureObject<short2> &screen) { d[1] = depth_in.tex2D(x+A,y); d[2] = depth_in.tex2D(x,y+B); v[1] = screen.tex2D(x+A,y); @@ -159,7 +159,7 @@ __device__ bool loadTriangle(int x, int y, float (&d)[3], short2 (&v)[3], const __global__ void triangle_render_kernel( TextureObject<float> depth_in, TextureObject<int> depth_out, - TextureObject<short2> screen, SplatParams params) { + TextureObject<short2> screen, Parameters params) { const int x = blockIdx.x*blockDim.x + threadIdx.x; const int y = blockIdx.y*blockDim.y + threadIdx.y; @@ -178,7 +178,7 @@ __device__ bool loadTriangle(int x, int y, float (&d)[3], short2 (&v)[3], const } } -void ftl::cuda::triangle_render1(TextureObject<float> &depth_in, TextureObject<int> &depth_out, TextureObject<short2> &screen, const SplatParams ¶ms, cudaStream_t stream) { +void ftl::cuda::triangle_render1(TextureObject<float> &depth_in, TextureObject<int> &depth_out, TextureObject<short2> &screen, const Parameters ¶ms, cudaStream_t stream) { const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); diff --git a/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp b/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp index 683c6a98f4146230a3d2bb90e55fec457138ba60..bf0b01c8229931f7f80f7da8fa739e3bf121d1e1 100644 --- a/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp +++ b/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp @@ -106,10 +106,10 @@ void StereoVideoSource::init(const string &file) { // set use config file/set (some) default values cv::Mat K = cv::Mat::eye(cv::Size(3, 3), CV_64FC1); - K.at<double>(0,0) = host_->value("focal", 500.0); - K.at<double>(1,1) = host_->value("focal", 500.0); - K.at<double>(0,2) = host_->value("centre_x", -color_size_.width/2.0f); - K.at<double>(1,2) = host_->value("centre_y", -color_size_.height/2.0f); + K.at<double>(0,0) = host_->value("focal", 800.0); + K.at<double>(1,1) = host_->value("focal", 800.0); + K.at<double>(0,2) = host_->value("centre_x", color_size_.width/2.0f); + K.at<double>(1,2) = host_->value("centre_y", color_size_.height/2.0f); calib_->setIntrinsics(color_size_, {K, K}); } diff --git a/components/structures/include/ftl/data/frame.hpp b/components/structures/include/ftl/data/frame.hpp index 5e8ab1a075b5c5dff7d38fc7862f5744408dd986..3a33dc086114ccf8fa288761170d5943bc9bb5fa 100644 --- a/components/structures/include/ftl/data/frame.hpp +++ b/components/structures/include/ftl/data/frame.hpp @@ -304,6 +304,7 @@ void ftl::data::Frame<BASE,N,STATE,DATA>::swapTo(ftl::codecs::Channels<BASE> cha // Should we swap this channel? if (channels.has(c)) { f.channels_ += c; + // TODO: Make sure this does a move not copy std::swap(f.getData(c),getData(c)); } }