From 29185c0891b58a0af7fba571235e93aec5f7ed0e Mon Sep 17 00:00:00 2001 From: Nicolas Pope <nwpope@utu.fi> Date: Sun, 30 Aug 2020 18:18:43 +0300 Subject: [PATCH] Working min disp --- lib/libsgm/src/check_consistency.cu | 18 ++++++++++-------- lib/libsgm/src/internal.h | 4 ++-- lib/libsgm/src/stereo_sgm.cpp | 2 +- lib/libsgm/src/winner_takes_all.cu | 4 ++-- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/lib/libsgm/src/check_consistency.cu b/lib/libsgm/src/check_consistency.cu index fb7282250..97cf953b3 100644 --- a/lib/libsgm/src/check_consistency.cu +++ b/lib/libsgm/src/check_consistency.cu @@ -19,7 +19,7 @@ limitations under the License. namespace { template<typename SRC_T, typename DST_T> - __global__ void check_consistency_kernel(DST_T* d_leftDisp, const DST_T* d_rightDisp, const uint8_t* d_mask, int width, int height, int src_pitch, int dst_pitch, bool subpixel) { + __global__ void check_consistency_kernel(DST_T* d_leftDisp, const DST_T* d_rightDisp, const uint8_t* d_mask, int width, int height, int src_pitch, int dst_pitch, bool subpixel, int min_disp) { const int j = blockIdx.x * blockDim.x + threadIdx.x; const int i = blockIdx.y * blockDim.y + threadIdx.y; @@ -28,6 +28,7 @@ namespace { uint8_t mask = d_mask[i * src_pitch + j]; int d = d_leftDisp[i * dst_pitch + j]; + int dout = d + (min_disp << sgm::StereoSGM::SUBPIXEL_SHIFT); if (subpixel) { d >>= sgm::StereoSGM::SUBPIXEL_SHIFT; } @@ -36,38 +37,39 @@ namespace { int diff = abs(d_rightDisp[i * dst_pitch + k] - d); if (mask != 0 || diff > 1) { // masked or left-right inconsistent pixel -> invalid - d_leftDisp[i * dst_pitch + j] = (256 << (sgm::StereoSGM::SUBPIXEL_SHIFT+1)); + dout = (256 << (sgm::StereoSGM::SUBPIXEL_SHIFT+1)); } } + d_leftDisp[i * dst_pitch + j] = dout; } } namespace sgm { namespace details { - void check_consistency(uint8_t* d_left_disp, const uint8_t* d_right_disp, const uint8_t* d_mask, int width, int height, int depth_bits, int src_pitch, int dst_pitch, bool subpixel, cudaStream_t stream) { + void check_consistency(uint8_t* d_left_disp, const uint8_t* d_right_disp, const uint8_t* d_mask, int width, int height, int depth_bits, int src_pitch, int dst_pitch, bool subpixel, int min_disp, cudaStream_t stream) { const dim3 blocks(width / 16, height / 16); const dim3 threads(16, 16); if (depth_bits == 16) { - check_consistency_kernel<uint16_t> << < blocks, threads, 0, stream >> > (d_left_disp, d_right_disp, d_mask, width, height, src_pitch, dst_pitch, subpixel); + check_consistency_kernel<uint16_t> << < blocks, threads, 0, stream >> > (d_left_disp, d_right_disp, d_mask, width, height, src_pitch, dst_pitch, subpixel, min_disp); } else if (depth_bits == 8) { - check_consistency_kernel<uint8_t> << < blocks, threads, 0, stream >> > (d_left_disp, d_right_disp, d_mask, width, height, src_pitch, dst_pitch, subpixel); + check_consistency_kernel<uint8_t> << < blocks, threads, 0, stream >> > (d_left_disp, d_right_disp, d_mask, width, height, src_pitch, dst_pitch, subpixel, min_disp); } CudaKernelCheck(); } - void check_consistency(uint16_t* d_left_disp, const uint16_t* d_right_disp, const uint8_t* d_mask, int width, int height, int depth_bits, int src_pitch, int dst_pitch, bool subpixel, cudaStream_t stream) { + void check_consistency(uint16_t* d_left_disp, const uint16_t* d_right_disp, const uint8_t* d_mask, int width, int height, int depth_bits, int src_pitch, int dst_pitch, bool subpixel, int min_disp, cudaStream_t stream) { const dim3 blocks(width / 16, height / 16); const dim3 threads(16, 16); if (depth_bits == 16) { - check_consistency_kernel<uint16_t> << < blocks, threads, 0, stream >> > (d_left_disp, d_right_disp, d_mask, width, height, src_pitch, dst_pitch, subpixel); + check_consistency_kernel<uint16_t> << < blocks, threads, 0, stream >> > (d_left_disp, d_right_disp, d_mask, width, height, src_pitch, dst_pitch, subpixel, min_disp); } else if (depth_bits == 8) { - check_consistency_kernel<uint8_t> << < blocks, threads, 0, stream >> > (d_left_disp, d_right_disp, d_mask, width, height, src_pitch, dst_pitch, subpixel); + check_consistency_kernel<uint8_t> << < blocks, threads, 0, stream >> > (d_left_disp, d_right_disp, d_mask, width, height, src_pitch, dst_pitch, subpixel, min_disp); } CudaKernelCheck(); diff --git a/lib/libsgm/src/internal.h b/lib/libsgm/src/internal.h index 7d836f5d8..c9c9ff259 100644 --- a/lib/libsgm/src/internal.h +++ b/lib/libsgm/src/internal.h @@ -36,8 +36,8 @@ namespace sgm { void median_filter(const uint8_t* d_src, uint8_t* d_dst, int width, int height, int pitch, cudaStream_t stream); void median_filter(const uint16_t* d_src, uint16_t* d_dst, int width, int height, int pitch, cudaStream_t stream); - void check_consistency(uint8_t* d_left_disp, const uint8_t* d_right_disp, const uint8_t* d_mask, int width, int height, int depth_bits, int src_pitch, int dst_pitch, bool subpixel, cudaStream_t stream); - void check_consistency(uint16_t* d_left_disp, const uint16_t* d_right_disp, const uint8_t* d_mask, int width, int height, int depth_bits, int src_pitch, int dst_pitch, bool subpixel, cudaStream_t stream); + void check_consistency(uint8_t* d_left_disp, const uint8_t* d_right_disp, const uint8_t* d_mask, int width, int height, int depth_bits, int src_pitch, int dst_pitch, bool subpixel, int min_disp, cudaStream_t stream); + void check_consistency(uint16_t* d_left_disp, const uint16_t* d_right_disp, const uint8_t* d_mask, int width, int height, int depth_bits, int src_pitch, int dst_pitch, bool subpixel, int min_disp, cudaStream_t stream); void cast_16bit_8bit_array(const uint16_t* arr16bits, uint8_t* arr8bits, int num_elements); void cast_8bit_16bit_array(const uint8_t* arr8bits, uint16_t* arr16bits, int num_elements); diff --git a/lib/libsgm/src/stereo_sgm.cpp b/lib/libsgm/src/stereo_sgm.cpp index 34f686fcb..58ba52284 100644 --- a/lib/libsgm/src/stereo_sgm.cpp +++ b/lib/libsgm/src/stereo_sgm.cpp @@ -178,7 +178,7 @@ namespace sgm { sgm::details::median_filter((uint16_t*)d_tmp_left_disp, (uint16_t*)d_left_disp, width, height, dst_pitch, stream); sgm::details::median_filter((uint16_t*)d_tmp_right_disp, (uint16_t*)d_right_disp, width, height, dst_pitch, stream); - sgm::details::check_consistency((uint16_t*)d_left_disp, (uint16_t*)d_right_disp, cu_res_->d_mask, width, height, input_depth_bits_, src_pitch, dst_pitch, param_.subpixel, stream); + sgm::details::check_consistency((uint16_t*)d_left_disp, (uint16_t*)d_right_disp, cu_res_->d_mask, width, height, input_depth_bits_, src_pitch, dst_pitch, param_.subpixel, min_disp, stream); if (!is_cuda_output(inout_type_) && output_depth_bits_ == 8) { sgm::details::cast_16bit_8bit_array((const uint16_t*)d_left_disp, (uint8_t*)d_tmp_left_disp, dst_pitch * height); diff --git a/lib/libsgm/src/winner_takes_all.cu b/lib/libsgm/src/winner_takes_all.cu index d06e2d895..1e82fcb65 100644 --- a/lib/libsgm/src/winner_takes_all.cu +++ b/lib/libsgm/src/winner_takes_all.cu @@ -51,13 +51,13 @@ using ComputeDisparity = uint32_t(*)(uint32_t, int, uint32_t, uint16_t*); __device__ inline uint32_t compute_disparity_normal(uint32_t disp, int min_disp, uint32_t cost = 0, uint16_t* smem = nullptr) { - return disp + min_disp; + return disp; // + min_disp; } template <size_t MAX_DISPARITY> __device__ inline uint32_t compute_disparity_subpixel(uint32_t disp, int min_disp, uint32_t cost, uint16_t* smem) { - int subp = disp + min_disp; + int subp = disp; // + min_disp; subp <<= sgm::StereoSGM::SUBPIXEL_SHIFT; if (disp > 0 && disp < MAX_DISPARITY - 1) { const int left = smem[disp - 1]; -- GitLab