diff --git a/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu b/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu index 6a924c72702274ae79600481e8e3317cef4fd1d5..e5716a649b6b6de22456eb48bd89212534e4843d 100644 --- a/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu +++ b/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu @@ -196,7 +196,8 @@ namespace ftl { namespace cuda { namespace device const int counter = s_counter; - // Each half warp takes the same queue item (hence / 16) + // Stride the queue to reduce bank conflicts + // Each thread takes a pixel that needs processing for (int ix=(threadIdx.x + threadIdx.y*blockDim.x); ix<counter; ix+=(blockDim.x*blockDim.y)) { const short2 pt = s_queue[ix]; const int x = pt.x;