Tidy code

ae676a99 · Nicolas Pope · 4b2000a5 · ae676a99
Commit ae676a99 authored 5 years ago by Nicolas Pope
--- a/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu
+++ b/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu
@@ -63,7 +63,39 @@ namespace ftl { namespace cuda { namespace device
 {
    namespace disp_bilateral_filter
    {
-        template <int channels>
+		template <typename C>
+		__device__ inline uchar distance(C a, C b);
+		template <>
+		__device__ inline uchar distance(uchar4 a, uchar4 b) {
+			uchar x = ::abs(a.x - b.x);
+            uchar y = ::abs(a.y - b.y);
+			uchar z = ::abs(a.z - b.z);
+			return (::max(::max(x, y), z));
+			/*union {
+				unsigned int v;
+				uchar d[4];
+			};
+			v = __vabsdiffs4(*(unsigned int*)&a, *(unsigned int*)&b);
+            return (::max(::max(d[0], d[1]), d[2]));*/
+		}
+		template <>
+		__device__ inline uchar distance(uchar3 a, uchar3 b) {
+			uchar x = ::abs(a.x - b.x);
+            uchar y = ::abs(a.y - b.y);
+            uchar z = ::abs(a.z - b.z);
+            return (::max(::max(x, y), z));
+		}
+		template <>
+		__device__ inline uchar distance(uchar a, uchar b) {
+			return abs(int(a)-int(b));
+		}
+        /*template <int channels>
        struct DistRgbMax
        {
            static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
@@ -97,7 +129,7 @@ namespace ftl { namespace cuda { namespace device
            {
                return ::abs(a[0] - b[0]);
            }
-		};
+		};*/
 		__device__ inline float calc_colour_weight(int d) {
 			return exp(-float(d * d) / (2.0f * 10.0f * 10.0f));
@@ -109,9 +141,9 @@ namespace ftl { namespace cuda { namespace device
 		template <>
 		__device__ inline float Abs<float>(float v) { return fabsf(v); }
-        template <int channels, int CRADIUS, typename T>
+        template <typename C, int CRADIUS, typename T>
        __global__ void disp_bilateral_filter(int t, const T* __restrict__ disp, T* __restrict__ dispout, size_t disp_step,
-            const uchar* __restrict__ img, size_t img_step, int h, int w,
+            const C* __restrict__ img, size_t img_step, int h, int w,
            const float* __restrict__ ctable_color,
            T cedge_disc, T cmax_disc)
        {
@@ -179,7 +211,7 @@ namespace ftl { namespace cuda { namespace device
 				float cost[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
-				const uchar* ic = img + y * img_step + channels * x;
+				const C ic = *(img + y * img_step + x);
 				//#pragma unroll
 				for(int yi = -CRADIUS; yi <= CRADIUS; ++yi)
@@ -192,9 +224,9 @@ namespace ftl { namespace cuda { namespace device
 					//for (int xi = (threadIdx.x%16) - CRADIUS; xi <= CRADIUS; xi+=16) {
 					//int xi = (threadIdx.x%16) - CRADIUS;
 					//if (xi < CRADIUS*2+1) {
-						const uchar* in = img + (y+yi) * img_step + channels * (xi+x);
+						const C in = *(img + (y+yi) * img_step + (xi+x));
-						uchar dist_rgb = DistRgbMax<channels>::calc(in, ic);
+						uchar dist_rgb = distance(ic,in);
 						const float weight = ctable_color[dist_rgb] * s_space[::abs(yi)*(CRADIUS+1) + ::abs(xi)];
@@ -267,7 +299,7 @@ namespace ftl { namespace cuda { namespace device
            case 1:
                for (int i = 0; i < iters; ++i)
                {
-                    disp_bilateral_filter<1,7><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols, table_color, edge_disc, max_disc);
+                    disp_bilateral_filter<uchar,7><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (uchar*)img.data, img.step/sizeof(uchar), disp.rows, disp.cols, table_color, edge_disc, max_disc);
                    cudaSafeCall( cudaGetLastError() );
                    std::swap(in_ptr, out_ptr);
@@ -279,7 +311,7 @@ namespace ftl { namespace cuda { namespace device
            case 3:
                for (int i = 0; i < iters; ++i)
                {
-                    disp_bilateral_filter<3,7><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols, table_color, edge_disc, max_disc);
+                    disp_bilateral_filter<uchar3,7><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (uchar3*)img.data, img.step/sizeof(uchar3), disp.rows, disp.cols, table_color, edge_disc, max_disc);
                    cudaSafeCall( cudaGetLastError() );
                    std::swap(in_ptr, out_ptr);
@@ -291,7 +323,7 @@ namespace ftl { namespace cuda { namespace device
 			case 4:  // Nick: Support 4 channel
                for (int i = 0; i < iters; ++i)
                {
-                    disp_bilateral_filter<4,7><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols, table_color, edge_disc, max_disc);
+                    disp_bilateral_filter<uchar4,7><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (uchar4*)img.data, img.step/sizeof(uchar4), disp.rows, disp.cols, table_color, edge_disc, max_disc);
                    cudaSafeCall( cudaGetLastError() );
                    std::swap(in_ptr, out_ptr);