diff --git a/components/operators/src/disparity/bilateral_filter.cpp b/components/operators/src/disparity/bilateral_filter.cpp index fe916c0979b90c0ff032a33245bcd7ee1542db07..694a23a9a0e9bab1b144ed9b279f4952dce022ff 100644 --- a/components/operators/src/disparity/bilateral_filter.cpp +++ b/components/operators/src/disparity/bilateral_filter.cpp @@ -16,7 +16,7 @@ DisparityBilateralFilter::DisparityBilateralFilter(ftl::Configurable* cfg) : scale_ = 16.0; n_disp_ = cfg->value("n_disp", 256); - radius_ = cfg->value("radius", 7); + radius_ = cfg->value("radius", 4); iter_ = cfg->value("iter", 13); filter_ = nullptr; } diff --git a/components/operators/src/disparity/opencv/disparity_bilateral_filter.cpp b/components/operators/src/disparity/opencv/disparity_bilateral_filter.cpp index 250a2abf0f17568bf29a34251f7491b151ccbdd8..c93d81331c80f0b451330cda3a4f558cafc589aa 100644 --- a/components/operators/src/disparity/opencv/disparity_bilateral_filter.cpp +++ b/components/operators/src/disparity/opencv/disparity_bilateral_filter.cpp @@ -167,7 +167,11 @@ namespace if (dst.data != disp.data) disp.copyTo(dst, stream); - disp_bilateral_filter<T>(disp, dst, img, img.channels(), iters, table_color.ptr<float>(), (float *)table_space.data, table_space_step, radius, edge_disc, max_disc, StreamAccessor::getStream(stream)); + if (img.channels() == 4) { + disp_bilateral_filter<T,uchar4>(disp, dst, img, iters, table_color.ptr<float>(), table_space_step, radius, edge_disc, max_disc, StreamAccessor::getStream(stream)); + } else { + // TODO: If we need other channels... + } } void DispBilateralFilterImpl::apply(InputArray _disp, InputArray _image, OutputArray dst, Stream& stream) @@ -184,7 +188,8 @@ namespace GpuMat img = _image.getGpuMat(); CV_Assert( disp.type() == CV_8U || disp.type() == CV_16S ); - CV_Assert( img.type() == CV_8UC1 || img.type() == CV_8UC3 || img.type() == CV_8UC4 ); + //CV_Assert( img.type() == CV_8UC1 || img.type() == CV_8UC3 || img.type() == CV_8UC4 ); + CV_Assert( img.type() == CV_8UC4 ); // Nick: We only need/allow 4 channel CV_Assert( disp.size() == img.size() ); operators[disp.type()](ndisp_, radius_, iters_, edge_threshold_, max_disc_threshold_, diff --git a/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu b/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu index 70485d875ff16054c4b571fac4d3cf3f696c2266..c1dd611c00e6830232a01d6f1eb86b643cd477cb 100644 --- a/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu +++ b/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu @@ -266,13 +266,11 @@ namespace ftl { namespace cuda { namespace device } } - template <typename T> - void disp_bilateral_filter(cv::cuda::PtrStepSz<T> disp, cv::cuda::PtrStepSz<T> dispout, cv::cuda::PtrStepSzb img, int channels, int iters, const float *table_color, const float* table_space, size_t table_step, int radius, T edge_disc, T max_disc, cudaStream_t stream) + template <typename T, typename C> + void disp_bilateral_filter(cv::cuda::PtrStepSz<T> disp, cv::cuda::PtrStepSz<T> dispout, cv::cuda::PtrStepSz<C> img, int iters, const float *table_color, size_t table_step, int radius, T edge_disc, T max_disc, cudaStream_t stream) { dim3 threads(32, 8, 1); dim3 grid(1, 1, 1); - //grid.x = divUp(disp.cols, threads.x << 3); - //grid.y = divUp(disp.rows, threads.y); grid.x = (disp.cols + 64 - 1) / 64; // 64*64 = 4096, max pixels in block grid.y = (disp.rows + 64 - 1) / 64; @@ -282,55 +280,71 @@ namespace ftl { namespace cuda { namespace device // Iters must be odd. if (iters & 0x1 == 0) iters += 1; - switch (channels) - { - case 1: - for (int i = 0; i < iters; ++i) - { - disp_bilateral_filter<uchar,4><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (uchar*)img.data, img.step/sizeof(uchar), disp.rows, disp.cols, table_color, edge_disc, max_disc); - cudaSafeCall( cudaGetLastError() ); - - std::swap(in_ptr, out_ptr); - - //disp_bilateral_filter<1,7><<<grid, threads, 0, stream>>>(1, dispout.data, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols, table_color, edge_disc, max_disc); - //cudaSafeCall( cudaGetLastError() ); - } - break; - case 3: - for (int i = 0; i < iters; ++i) - { - disp_bilateral_filter<uchar3,4><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (uchar3*)img.data, img.step/sizeof(uchar3), disp.rows, disp.cols, table_color, edge_disc, max_disc); - cudaSafeCall( cudaGetLastError() ); - - std::swap(in_ptr, out_ptr); - - //disp_bilateral_filter<3,7><<<grid, threads, 0, stream>>>(1, dispout.data, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols, table_color, edge_disc, max_disc); - //cudaSafeCall( cudaGetLastError() ); + switch (radius) { + case 1 : + for (int i = 0; i < iters; ++i) { + disp_bilateral_filter<C,1><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (C*)img.data, img.step/sizeof(C), disp.rows, disp.cols, table_color, edge_disc, max_disc); + cudaSafeCall( cudaGetLastError() ); + std::swap(in_ptr, out_ptr); + } break; + case 2 : + for (int i = 0; i < iters; ++i) { + disp_bilateral_filter<C,2><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (C*)img.data, img.step/sizeof(C), disp.rows, disp.cols, table_color, edge_disc, max_disc); + cudaSafeCall( cudaGetLastError() ); + std::swap(in_ptr, out_ptr); + } break; + case 3 : + for (int i = 0; i < iters; ++i) { + disp_bilateral_filter<C,3><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (C*)img.data, img.step/sizeof(C), disp.rows, disp.cols, table_color, edge_disc, max_disc); + cudaSafeCall( cudaGetLastError() ); + std::swap(in_ptr, out_ptr); + } break; + case 4 : + for (int i = 0; i < iters; ++i) { + disp_bilateral_filter<C,4><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (C*)img.data, img.step/sizeof(C), disp.rows, disp.cols, table_color, edge_disc, max_disc); + cudaSafeCall( cudaGetLastError() ); + std::swap(in_ptr, out_ptr); + } break; + case 5 : + for (int i = 0; i < iters; ++i) { + disp_bilateral_filter<C,5><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (C*)img.data, img.step/sizeof(C), disp.rows, disp.cols, table_color, edge_disc, max_disc); + cudaSafeCall( cudaGetLastError() ); + std::swap(in_ptr, out_ptr); + } break; + case 6 : + for (int i = 0; i < iters; ++i) { + disp_bilateral_filter<C,6><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (C*)img.data, img.step/sizeof(C), disp.rows, disp.cols, table_color, edge_disc, max_disc); + cudaSafeCall( cudaGetLastError() ); + std::swap(in_ptr, out_ptr); + } break; + case 7 : + for (int i = 0; i < iters; ++i) { + disp_bilateral_filter<C,7><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (C*)img.data, img.step/sizeof(C), disp.rows, disp.cols, table_color, edge_disc, max_disc); + cudaSafeCall( cudaGetLastError() ); + std::swap(in_ptr, out_ptr); + } break; + default: + CV_Error(cv::Error::BadTileSize, "Unsupported kernel radius"); } - break; - case 4: // Nick: Support 4 channel - for (int i = 0; i < iters; ++i) - { - disp_bilateral_filter<uchar4,4><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (uchar4*)img.data, img.step/sizeof(uchar4), disp.rows, disp.cols, table_color, edge_disc, max_disc); - cudaSafeCall( cudaGetLastError() ); - - std::swap(in_ptr, out_ptr); - - //disp_bilateral_filter<4,7><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols, table_color, edge_disc, max_disc); - //cudaSafeCall( cudaGetLastError() ); - } - break; - default: - CV_Error(cv::Error::BadNumChannels, "Unsupported channels count"); - } + if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } - template void disp_bilateral_filter<uchar>(cv::cuda::PtrStepSz<uchar> disp, cv::cuda::PtrStepSz<uchar> dispout, cv::cuda::PtrStepSzb img, int channels, int iters, const float *table_color, const float *table_space, size_t table_step, int radius, uchar, uchar, cudaStream_t stream); - template void disp_bilateral_filter<short>(cv::cuda::PtrStepSz<short> disp, cv::cuda::PtrStepSz<short> dispout, cv::cuda::PtrStepSzb img, int channels, int iters, const float *table_color, const float *table_space, size_t table_step, int radius, short, short, cudaStream_t stream); - template void disp_bilateral_filter<float>(cv::cuda::PtrStepSz<float> disp, cv::cuda::PtrStepSz<float> dispout, cv::cuda::PtrStepSzb img, int channels, int iters, const float *table_color, const float *table_space, size_t table_step, int radius, float, float, cudaStream_t stream); + // These are commented out since we don't use them and it slows compile + //template void disp_bilateral_filter<uchar,uchar>(cv::cuda::PtrStepSz<uchar> disp, cv::cuda::PtrStepSz<uchar> dispout, cv::cuda::PtrStepSz<uchar> img, int iters, const float *table_color, size_t table_step, int radius, uchar, uchar, cudaStream_t stream); + //template void disp_bilateral_filter<short,uchar>(cv::cuda::PtrStepSz<short> disp, cv::cuda::PtrStepSz<short> dispout, cv::cuda::PtrStepSz<uchar> img, int iters, const float *table_color, size_t table_step, int radius, short, short, cudaStream_t stream); + //template void disp_bilateral_filter<float,uchar>(cv::cuda::PtrStepSz<float> disp, cv::cuda::PtrStepSz<float> dispout, cv::cuda::PtrStepSz<uchar> img, int iters, const float *table_color, size_t table_step, int radius, float, float, cudaStream_t stream); + + //template void disp_bilateral_filter<uchar,uchar3>(cv::cuda::PtrStepSz<uchar> disp, cv::cuda::PtrStepSz<uchar> dispout, cv::cuda::PtrStepSz<uchar3> img, int iters, const float *table_color, size_t table_step, int radius, uchar, uchar, cudaStream_t stream); + //template void disp_bilateral_filter<short,uchar3>(cv::cuda::PtrStepSz<short> disp, cv::cuda::PtrStepSz<short> dispout, cv::cuda::PtrStepSz<uchar3> img, int iters, const float *table_color, size_t table_step, int radius, short, short, cudaStream_t stream); + //template void disp_bilateral_filter<float,uchar3>(cv::cuda::PtrStepSz<float> disp, cv::cuda::PtrStepSz<float> dispout, cv::cuda::PtrStepSz<uchar3> img, int iters, const float *table_color, size_t table_step, int radius, float, float, cudaStream_t stream); + + template void disp_bilateral_filter<uchar,uchar4>(cv::cuda::PtrStepSz<uchar> disp, cv::cuda::PtrStepSz<uchar> dispout, cv::cuda::PtrStepSz<uchar4> img, int iters, const float *table_color, size_t table_step, int radius, uchar, uchar, cudaStream_t stream); + template void disp_bilateral_filter<short,uchar4>(cv::cuda::PtrStepSz<short> disp, cv::cuda::PtrStepSz<short> dispout, cv::cuda::PtrStepSz<uchar4> img, int iters, const float *table_color, size_t table_step, int radius, short, short, cudaStream_t stream); + template void disp_bilateral_filter<float,uchar4>(cv::cuda::PtrStepSz<float> disp, cv::cuda::PtrStepSz<float> dispout, cv::cuda::PtrStepSz<uchar4> img, int iters, const float *table_color, size_t table_step, int radius, float, float, cudaStream_t stream); + } // namespace bilateral_filter }}} // namespace ftl { namespace cuda { namespace cudev diff --git a/components/operators/src/disparity/opencv/disparity_bilateral_filter.hpp b/components/operators/src/disparity/opencv/disparity_bilateral_filter.hpp index f4147e8b3101d4c571937c70a0d4e4cc079db6e4..80d9799ad2c51977efa2fddfcb8eba6b7c97aac4 100644 --- a/components/operators/src/disparity/opencv/disparity_bilateral_filter.hpp +++ b/components/operators/src/disparity/opencv/disparity_bilateral_filter.hpp @@ -2,7 +2,7 @@ namespace ftl { namespace cuda { namespace device { namespace disp_bilateral_filter { - template<typename T> - void disp_bilateral_filter(cv::cuda::PtrStepSz<T> disp, cv::cuda::PtrStepSz<T> dispout, cv::cuda::PtrStepSzb img, int channels, int iters, const float *, const float *, size_t, int radius, T edge_disc, T max_disc, cudaStream_t stream); + template<typename T, typename C> + void disp_bilateral_filter(cv::cuda::PtrStepSz<T> disp, cv::cuda::PtrStepSz<T> dispout, cv::cuda::PtrStepSz<C> img, int iters, const float *, size_t, int radius, T edge_disc, T max_disc, cudaStream_t stream); } }}}