Skip to content
Snippets Groups Projects
Commit ae676a99 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Tidy code

parent 4b2000a5
No related branches found
No related tags found
1 merge request!287Improved bilateral filter performance
...@@ -63,7 +63,39 @@ namespace ftl { namespace cuda { namespace device ...@@ -63,7 +63,39 @@ namespace ftl { namespace cuda { namespace device
{ {
namespace disp_bilateral_filter namespace disp_bilateral_filter
{ {
template <int channels>
template <typename C>
__device__ inline uchar distance(C a, C b);
template <>
__device__ inline uchar distance(uchar4 a, uchar4 b) {
uchar x = ::abs(a.x - b.x);
uchar y = ::abs(a.y - b.y);
uchar z = ::abs(a.z - b.z);
return (::max(::max(x, y), z));
/*union {
unsigned int v;
uchar d[4];
};
v = __vabsdiffs4(*(unsigned int*)&a, *(unsigned int*)&b);
return (::max(::max(d[0], d[1]), d[2]));*/
}
template <>
__device__ inline uchar distance(uchar3 a, uchar3 b) {
uchar x = ::abs(a.x - b.x);
uchar y = ::abs(a.y - b.y);
uchar z = ::abs(a.z - b.z);
return (::max(::max(x, y), z));
}
template <>
__device__ inline uchar distance(uchar a, uchar b) {
return abs(int(a)-int(b));
}
/*template <int channels>
struct DistRgbMax struct DistRgbMax
{ {
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b) static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
...@@ -97,7 +129,7 @@ namespace ftl { namespace cuda { namespace device ...@@ -97,7 +129,7 @@ namespace ftl { namespace cuda { namespace device
{ {
return ::abs(a[0] - b[0]); return ::abs(a[0] - b[0]);
} }
}; };*/
__device__ inline float calc_colour_weight(int d) { __device__ inline float calc_colour_weight(int d) {
return exp(-float(d * d) / (2.0f * 10.0f * 10.0f)); return exp(-float(d * d) / (2.0f * 10.0f * 10.0f));
...@@ -109,9 +141,9 @@ namespace ftl { namespace cuda { namespace device ...@@ -109,9 +141,9 @@ namespace ftl { namespace cuda { namespace device
template <> template <>
__device__ inline float Abs<float>(float v) { return fabsf(v); } __device__ inline float Abs<float>(float v) { return fabsf(v); }
template <int channels, int CRADIUS, typename T> template <typename C, int CRADIUS, typename T>
__global__ void disp_bilateral_filter(int t, const T* __restrict__ disp, T* __restrict__ dispout, size_t disp_step, __global__ void disp_bilateral_filter(int t, const T* __restrict__ disp, T* __restrict__ dispout, size_t disp_step,
const uchar* __restrict__ img, size_t img_step, int h, int w, const C* __restrict__ img, size_t img_step, int h, int w,
const float* __restrict__ ctable_color, const float* __restrict__ ctable_color,
T cedge_disc, T cmax_disc) T cedge_disc, T cmax_disc)
{ {
...@@ -179,7 +211,7 @@ namespace ftl { namespace cuda { namespace device ...@@ -179,7 +211,7 @@ namespace ftl { namespace cuda { namespace device
float cost[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; float cost[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
const uchar* ic = img + y * img_step + channels * x; const C ic = *(img + y * img_step + x);
//#pragma unroll //#pragma unroll
for(int yi = -CRADIUS; yi <= CRADIUS; ++yi) for(int yi = -CRADIUS; yi <= CRADIUS; ++yi)
...@@ -192,9 +224,9 @@ namespace ftl { namespace cuda { namespace device ...@@ -192,9 +224,9 @@ namespace ftl { namespace cuda { namespace device
//for (int xi = (threadIdx.x%16) - CRADIUS; xi <= CRADIUS; xi+=16) { //for (int xi = (threadIdx.x%16) - CRADIUS; xi <= CRADIUS; xi+=16) {
//int xi = (threadIdx.x%16) - CRADIUS; //int xi = (threadIdx.x%16) - CRADIUS;
//if (xi < CRADIUS*2+1) { //if (xi < CRADIUS*2+1) {
const uchar* in = img + (y+yi) * img_step + channels * (xi+x); const C in = *(img + (y+yi) * img_step + (xi+x));
uchar dist_rgb = DistRgbMax<channels>::calc(in, ic); uchar dist_rgb = distance(ic,in);
const float weight = ctable_color[dist_rgb] * s_space[::abs(yi)*(CRADIUS+1) + ::abs(xi)]; const float weight = ctable_color[dist_rgb] * s_space[::abs(yi)*(CRADIUS+1) + ::abs(xi)];
...@@ -267,7 +299,7 @@ namespace ftl { namespace cuda { namespace device ...@@ -267,7 +299,7 @@ namespace ftl { namespace cuda { namespace device
case 1: case 1:
for (int i = 0; i < iters; ++i) for (int i = 0; i < iters; ++i)
{ {
disp_bilateral_filter<1,7><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols, table_color, edge_disc, max_disc); disp_bilateral_filter<uchar,7><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (uchar*)img.data, img.step/sizeof(uchar), disp.rows, disp.cols, table_color, edge_disc, max_disc);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
std::swap(in_ptr, out_ptr); std::swap(in_ptr, out_ptr);
...@@ -279,7 +311,7 @@ namespace ftl { namespace cuda { namespace device ...@@ -279,7 +311,7 @@ namespace ftl { namespace cuda { namespace device
case 3: case 3:
for (int i = 0; i < iters; ++i) for (int i = 0; i < iters; ++i)
{ {
disp_bilateral_filter<3,7><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols, table_color, edge_disc, max_disc); disp_bilateral_filter<uchar3,7><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (uchar3*)img.data, img.step/sizeof(uchar3), disp.rows, disp.cols, table_color, edge_disc, max_disc);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
std::swap(in_ptr, out_ptr); std::swap(in_ptr, out_ptr);
...@@ -291,7 +323,7 @@ namespace ftl { namespace cuda { namespace device ...@@ -291,7 +323,7 @@ namespace ftl { namespace cuda { namespace device
case 4: // Nick: Support 4 channel case 4: // Nick: Support 4 channel
for (int i = 0; i < iters; ++i) for (int i = 0; i < iters; ++i)
{ {
disp_bilateral_filter<4,7><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols, table_color, edge_disc, max_disc); disp_bilateral_filter<uchar4,7><<<grid, threads, 0, stream>>>(0, in_ptr, out_ptr, disp.step/sizeof(T), (uchar4*)img.data, img.step/sizeof(uchar4), disp.rows, disp.cols, table_color, edge_disc, max_disc);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
std::swap(in_ptr, out_ptr); std::swap(in_ptr, out_ptr);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment