diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu index 98ce1bcc0cd7fee6e95a8d02f8e8735bf2142818..96fa70012adb59d992dda651018bef4239cca213 100644 --- a/components/renderers/cpp/src/triangle_render.cu +++ b/components/renderers/cpp/src/triangle_render.cu @@ -14,6 +14,7 @@ __device__ inline float length2(int dx, int dy) { return dx*dx + dy*dy; } /* * Convert source screen position to output screen coordinates. */ + template <int A, int B> __global__ void triangle_render_1_kernel( TextureObject<float> depth_in, TextureObject<int> depth_out, @@ -24,30 +25,33 @@ __device__ inline float length2(int dx, int dy) { return dx*dx + dy*dy; } if (x < 0 || x >= depth_in.width()-1 || y < 0 || y >= depth_in.height()-1) return; float d[3]; - d[0] = depth_in.tex2D(x,y); - d[1] = depth_in.tex2D(x+1,y); - d[2] = depth_in.tex2D(x,y+1); + d[0] = depth_in.tex2D(x+A,y+B); + d[1] = depth_in.tex2D(x+(1-A),y+B); + d[2] = depth_in.tex2D(x+A,y+(1-B)); // Is this triangle valid if (fabs(d[0] - d[1]) > 0.04f || fabs(d[0] - d[2]) > 0.04f) return; short2 s[3]; - s[0] = screen.tex2D(x,y); - s[1] = screen.tex2D(x+1,y); - s[2] = screen.tex2D(x,y+1); - - s[1].x -= s[0].x; + s[0] = screen.tex2D(x+A,y+B); + s[1] = screen.tex2D(x+(1-A),y+B); + s[2] = screen.tex2D(x+A,y+(1-B)); + + const int dx = (A) ? -1 : 1; + const int dy = (B) ? -1 : 1; + + s[1].x = (A) ? s[0].x - s[1].x : s[1].x - s[0].x; s[1].y -= s[0].y; s[2].x -= s[0].x; - s[2].y -= s[0].y; + s[2].y = (B) ? s[0].y - s[2].y : s[2].y - s[0].y; s[1].x = min(s[1].x,10); - s[2].y = min(s[2].y,10); + s[2].y = min(s[2].y,10); for (int sx=0; sx < s[1].x; ++sx) { for (int sy=0; sy < min(s[1].x - sx, s[2].y); ++sy) { //if (sx > s[2].y-sy) continue; - if (sx+s[0].x >= params.camera.width || sy+s[0].y >= params.camera.height) continue; + if (dx*sx+s[0].x >= params.camera.width || dy*sy+s[0].y >= params.camera.height) continue; float dist1 = length2(sx,sy); float dist2 = length2(s[1].x-sx, s[1].y-sy); @@ -55,7 +59,7 @@ __device__ inline float length2(int dx, int dy) { return dx*dx + dy*dy; } float new_depth = (d[0]*dist1 + d[1]*dist2 + d[2] * dist3) / (dist1+dist2+dist3); - atomicMin(&depth_out(sx+s[0].x,sy+s[0].y), int(new_depth*1000.0f)); + atomicMin(&depth_out(dx*sx+s[0].x,dy*sy+s[0].y), int(new_depth*1000.0f)); } } } @@ -64,6 +68,9 @@ void ftl::cuda::triangle_render1(TextureObject<float> &depth_in, TextureObject<i const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK); const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK); - triangle_render_1_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, screen, params); + triangle_render_1_kernel<0,0><<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, screen, params); + triangle_render_1_kernel<0,1><<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, screen, params); + triangle_render_1_kernel<1,0><<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, screen, params); + triangle_render_1_kernel<1,1><<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, screen, params); cudaSafeCall( cudaGetLastError() ); }