diff --git a/components/operators/src/disparity/bilateral_filter.cpp b/components/operators/src/disparity/bilateral_filter.cpp
index fe916c0979b90c0ff032a33245bcd7ee1542db07..694a23a9a0e9bab1b144ed9b279f4952dce022ff 100644
--- a/components/operators/src/disparity/bilateral_filter.cpp
+++ b/components/operators/src/disparity/bilateral_filter.cpp
@@ -16,7 +16,7 @@ DisparityBilateralFilter::DisparityBilateralFilter(ftl::Configurable* cfg) :
 	
 	scale_ = 16.0;
 	n_disp_ = cfg->value("n_disp", 256);
-	radius_ = cfg->value("radius", 7);
+	radius_ = cfg->value("radius", 4);
 	iter_ = cfg->value("iter", 13);
 	filter_ = nullptr;
 }
diff --git a/components/operators/src/disparity/opencv/disparity_bilateral_filter.cpp b/components/operators/src/disparity/opencv/disparity_bilateral_filter.cpp
index 250a2abf0f17568bf29a34251f7491b151ccbdd8..c93d81331c80f0b451330cda3a4f558cafc589aa 100644
--- a/components/operators/src/disparity/opencv/disparity_bilateral_filter.cpp
+++ b/components/operators/src/disparity/opencv/disparity_bilateral_filter.cpp
@@ -167,7 +167,11 @@ namespace
         if (dst.data != disp.data)
             disp.copyTo(dst, stream);
 
-        disp_bilateral_filter<T>(disp, dst, img, img.channels(), iters, table_color.ptr<float>(), (float *)table_space.data, table_space_step, radius, edge_disc, max_disc, StreamAccessor::getStream(stream));
+        if (img.channels() == 4) {
+            disp_bilateral_filter<T,uchar4>(disp, dst, img, iters, table_color.ptr<float>(), table_space_step, radius, edge_disc, max_disc, StreamAccessor::getStream(stream));
+        } else {
+            // TODO: If we need other channels...
+        }
     }
 
     void DispBilateralFilterImpl::apply(InputArray _disp, InputArray _image, OutputArray dst, Stream& stream)
@@ -184,7 +188,8 @@ namespace
         GpuMat img = _image.getGpuMat();
 
         CV_Assert( disp.type() == CV_8U || disp.type() == CV_16S );
-        CV_Assert( img.type() == CV_8UC1 || img.type() == CV_8UC3 || img.type() == CV_8UC4 );
+        //CV_Assert( img.type() == CV_8UC1 || img.type() == CV_8UC3 || img.type() == CV_8UC4 );
+        CV_Assert( img.type() == CV_8UC4 );  // Nick: We only need/allow 4 channel
         CV_Assert( disp.size() == img.size() );
 
         operators[disp.type()](ndisp_, radius_, iters_, edge_threshold_, max_disc_threshold_,
diff --git a/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu b/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu
index 70485d875ff16054c4b571fac4d3cf3f696c2266..c1dd611c00e6830232a01d6f1eb86b643cd477cb 100644
--- a/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu
+++ b/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu
@@ -266,13 +266,11 @@ namespace ftl { namespace cuda { namespace device
 			}
         }
 
-        template <typename T>
-        void disp_bilateral_filter(cv::cuda::PtrStepSz<T> disp, cv::cuda::PtrStepSz<T> dispout, cv::cuda::PtrStepSzb img, int channels, int iters, const float *table_color, const float* table_space, size_t table_step, int radius, T edge_disc, T max_disc, cudaStream_t stream)
+        template <typename T, typename C>
+        void disp_bilateral_filter(cv::cuda::PtrStepSz<T> disp, cv::cuda::PtrStepSz<T> dispout, cv::cuda::PtrStepSz<C> img, int iters, const float *table_color, size_t table_step, int radius, T edge_disc, T max_disc, cudaStream_t stream)
         {
             dim3 threads(32, 8, 1);
             dim3 grid(1, 1, 1);
-            //grid.x = divUp(disp.cols, threads.x << 3);
-			//grid.y = divUp(disp.rows, threads.y);
 			grid.x = (disp.cols + 64 - 1) / 64;  // 64*64 = 4096, max pixels in block
 			grid.y = (disp.rows + 64 - 1) / 64;
 
@@ -282,55 +280,71 @@ namespace ftl { namespace cuda { namespace device
             // Iters must be odd.
             if (iters & 0x1 == 0) iters += 1;
 
-            switch (channels)
-            {
-            case 1:
-                for (int i = 0; i < iters; ++i)
-                {
-                    disp_bilateral_filter<uchar,4><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (uchar*)img.data, img.step/sizeof(uchar), disp.rows, disp.cols, table_color, edge_disc, max_disc);
-                    cudaSafeCall( cudaGetLastError() );
-
-                    std::swap(in_ptr, out_ptr);
-
-                    //disp_bilateral_filter<1,7><<<grid, threads, 0, stream>>>(1, dispout.data, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols, table_color, edge_disc, max_disc);
-                    //cudaSafeCall( cudaGetLastError() );
-                }
-                break;
-            case 3:
-                for (int i = 0; i < iters; ++i)
-                {
-                    disp_bilateral_filter<uchar3,4><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (uchar3*)img.data, img.step/sizeof(uchar3), disp.rows, disp.cols, table_color, edge_disc, max_disc);
-                    cudaSafeCall( cudaGetLastError() );
-
-                    std::swap(in_ptr, out_ptr);
-
-                    //disp_bilateral_filter<3,7><<<grid, threads, 0, stream>>>(1, dispout.data, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols, table_color, edge_disc, max_disc);
-                    //cudaSafeCall( cudaGetLastError() );
+            switch (radius) {
+                case 1  :
+                    for (int i = 0; i < iters; ++i) {
+                        disp_bilateral_filter<C,1><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (C*)img.data, img.step/sizeof(C), disp.rows, disp.cols, table_color, edge_disc, max_disc);
+                        cudaSafeCall( cudaGetLastError() );
+                        std::swap(in_ptr, out_ptr);
+                    } break;
+                case 2  :
+                    for (int i = 0; i < iters; ++i) {
+                        disp_bilateral_filter<C,2><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (C*)img.data, img.step/sizeof(C), disp.rows, disp.cols, table_color, edge_disc, max_disc);
+                        cudaSafeCall( cudaGetLastError() );
+                        std::swap(in_ptr, out_ptr);
+                    } break;
+                case 3  :
+                    for (int i = 0; i < iters; ++i) {
+                        disp_bilateral_filter<C,3><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (C*)img.data, img.step/sizeof(C), disp.rows, disp.cols, table_color, edge_disc, max_disc);
+                        cudaSafeCall( cudaGetLastError() );
+                        std::swap(in_ptr, out_ptr);
+                    } break;
+                case 4  :
+                    for (int i = 0; i < iters; ++i) {
+                        disp_bilateral_filter<C,4><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (C*)img.data, img.step/sizeof(C), disp.rows, disp.cols, table_color, edge_disc, max_disc);
+                        cudaSafeCall( cudaGetLastError() );
+                        std::swap(in_ptr, out_ptr);
+                    } break;
+                case 5  :
+                    for (int i = 0; i < iters; ++i) {
+                        disp_bilateral_filter<C,5><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (C*)img.data, img.step/sizeof(C), disp.rows, disp.cols, table_color, edge_disc, max_disc);
+                        cudaSafeCall( cudaGetLastError() );
+                        std::swap(in_ptr, out_ptr);
+                    } break;
+                case 6  :
+                    for (int i = 0; i < iters; ++i) {
+                        disp_bilateral_filter<C,6><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (C*)img.data, img.step/sizeof(C), disp.rows, disp.cols, table_color, edge_disc, max_disc);
+                        cudaSafeCall( cudaGetLastError() );
+                        std::swap(in_ptr, out_ptr);
+                    } break;
+                case 7  :
+                    for (int i = 0; i < iters; ++i) {
+                        disp_bilateral_filter<C,7><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (C*)img.data, img.step/sizeof(C), disp.rows, disp.cols, table_color, edge_disc, max_disc);
+                        cudaSafeCall( cudaGetLastError() );
+                        std::swap(in_ptr, out_ptr);
+                    } break;
+                default:
+                    CV_Error(cv::Error::BadTileSize, "Unsupported kernel radius");
                 }
-				break;
-			case 4:  // Nick: Support 4 channel
-                for (int i = 0; i < iters; ++i)
-                {
-                    disp_bilateral_filter<uchar4,4><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (uchar4*)img.data, img.step/sizeof(uchar4), disp.rows, disp.cols, table_color, edge_disc, max_disc);
-                    cudaSafeCall( cudaGetLastError() );
-
-                    std::swap(in_ptr, out_ptr);
-
-                    //disp_bilateral_filter<4,7><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols, table_color, edge_disc, max_disc);
-                    //cudaSafeCall( cudaGetLastError() );
-                }
-                break;
-            default:
-                CV_Error(cv::Error::BadNumChannels, "Unsupported channels count");
-            }
+                
 
             if (stream == 0)
                 cudaSafeCall( cudaDeviceSynchronize() );
         }
 
-        template void disp_bilateral_filter<uchar>(cv::cuda::PtrStepSz<uchar> disp, cv::cuda::PtrStepSz<uchar> dispout, cv::cuda::PtrStepSzb img, int channels, int iters, const float *table_color, const float *table_space, size_t table_step, int radius, uchar, uchar, cudaStream_t stream);
-		template void disp_bilateral_filter<short>(cv::cuda::PtrStepSz<short> disp, cv::cuda::PtrStepSz<short> dispout, cv::cuda::PtrStepSzb img, int channels, int iters, const float *table_color, const float *table_space, size_t table_step, int radius, short, short, cudaStream_t stream);
-		template void disp_bilateral_filter<float>(cv::cuda::PtrStepSz<float> disp, cv::cuda::PtrStepSz<float> dispout, cv::cuda::PtrStepSzb img, int channels, int iters, const float *table_color, const float *table_space, size_t table_step, int radius, float, float, cudaStream_t stream);
+        // These are commented out since we don't use them and it slows compile
+        //template void disp_bilateral_filter<uchar,uchar>(cv::cuda::PtrStepSz<uchar> disp, cv::cuda::PtrStepSz<uchar> dispout, cv::cuda::PtrStepSz<uchar> img, int iters, const float *table_color, size_t table_step, int radius, uchar, uchar, cudaStream_t stream);
+		//template void disp_bilateral_filter<short,uchar>(cv::cuda::PtrStepSz<short> disp, cv::cuda::PtrStepSz<short> dispout, cv::cuda::PtrStepSz<uchar> img, int iters, const float *table_color, size_t table_step, int radius, short, short, cudaStream_t stream);
+        //template void disp_bilateral_filter<float,uchar>(cv::cuda::PtrStepSz<float> disp, cv::cuda::PtrStepSz<float> dispout, cv::cuda::PtrStepSz<uchar> img, int iters, const float *table_color, size_t table_step, int radius, float, float, cudaStream_t stream);
+
+        //template void disp_bilateral_filter<uchar,uchar3>(cv::cuda::PtrStepSz<uchar> disp, cv::cuda::PtrStepSz<uchar> dispout, cv::cuda::PtrStepSz<uchar3> img, int iters, const float *table_color, size_t table_step, int radius, uchar, uchar, cudaStream_t stream);
+		//template void disp_bilateral_filter<short,uchar3>(cv::cuda::PtrStepSz<short> disp, cv::cuda::PtrStepSz<short> dispout, cv::cuda::PtrStepSz<uchar3> img, int iters, const float *table_color, size_t table_step, int radius, short, short, cudaStream_t stream);
+        //template void disp_bilateral_filter<float,uchar3>(cv::cuda::PtrStepSz<float> disp, cv::cuda::PtrStepSz<float> dispout, cv::cuda::PtrStepSz<uchar3> img, int iters, const float *table_color, size_t table_step, int radius, float, float, cudaStream_t stream);
+
+        template void disp_bilateral_filter<uchar,uchar4>(cv::cuda::PtrStepSz<uchar> disp, cv::cuda::PtrStepSz<uchar> dispout, cv::cuda::PtrStepSz<uchar4> img, int iters, const float *table_color, size_t table_step, int radius, uchar, uchar, cudaStream_t stream);
+		template void disp_bilateral_filter<short,uchar4>(cv::cuda::PtrStepSz<short> disp, cv::cuda::PtrStepSz<short> dispout, cv::cuda::PtrStepSz<uchar4> img, int iters, const float *table_color, size_t table_step, int radius, short, short, cudaStream_t stream);
+        template void disp_bilateral_filter<float,uchar4>(cv::cuda::PtrStepSz<float> disp, cv::cuda::PtrStepSz<float> dispout, cv::cuda::PtrStepSz<uchar4> img, int iters, const float *table_color, size_t table_step, int radius, float, float, cudaStream_t stream);
+        
     } // namespace bilateral_filter
 }}} // namespace ftl { namespace cuda { namespace cudev
 
diff --git a/components/operators/src/disparity/opencv/disparity_bilateral_filter.hpp b/components/operators/src/disparity/opencv/disparity_bilateral_filter.hpp
index f4147e8b3101d4c571937c70a0d4e4cc079db6e4..80d9799ad2c51977efa2fddfcb8eba6b7c97aac4 100644
--- a/components/operators/src/disparity/opencv/disparity_bilateral_filter.hpp
+++ b/components/operators/src/disparity/opencv/disparity_bilateral_filter.hpp
@@ -2,7 +2,7 @@ namespace ftl { namespace cuda { namespace device
 {
     namespace disp_bilateral_filter
     {
-        template<typename T>
-        void disp_bilateral_filter(cv::cuda::PtrStepSz<T> disp, cv::cuda::PtrStepSz<T> dispout, cv::cuda::PtrStepSzb img, int channels, int iters, const float *, const float *, size_t, int radius, T edge_disc, T max_disc, cudaStream_t stream);
+        template<typename T, typename C>
+        void disp_bilateral_filter(cv::cuda::PtrStepSz<T> disp, cv::cuda::PtrStepSz<T> dispout, cv::cuda::PtrStepSz<C> img, int iters, const float *, size_t, int radius, T edge_disc, T max_disc, cudaStream_t stream);
     }
 }}}