Newer
Older
#include <ftl/render/splat_params.hpp>
#include "splatter_cuda.hpp"
#include <ftl/rgbd/camera.hpp>
#include <ftl/cuda_common.hpp>
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
using ftl::rgbd::Camera;
using ftl::cuda::TextureObject;
using ftl::render::SplatParams;
#define T_PER_BLOCK 8
__device__ inline float length2(int dx, int dy) { return dx*dx + dy*dy; }
__device__ inline float cross(const float2 &a, const float2 &b) {
return a.x*b.y - a.y*b.x;
}
__device__ inline bool within(float x) {
return 0.0f <= x <= 1.0f;
}
__device__ inline bool operator==(const float2 &a, const float2 &b) {
return a.x == b.x && a.y == b.y;
}
__device__ inline bool insideTriangle(const float2 &a, const float2 &b, const float2 &c, const float2 &p)
{
float det = (b.y - c.y)*(a.x - c.x) + (c.x - b.x)*(a.y - c.y);
float factor_alpha = (b.y - c.y)*(p.x - c.x) + (c.x - b.x)*(p.y - c.y);
float factor_beta = (c.y - a.y)*(p.x - c.x) + (a.x - c.x)*(p.y - c.y);
float alpha = factor_alpha / det;
float beta = factor_beta / det;
float gamma = 1.0 - alpha - beta;
return p == a || p == b || p == c || (within(alpha) && within(beta) && within(gamma));
}
__device__ inline void swap(short2 &a, short2 &b) {
short2 t = a;
a = b;
b = t;
}
__device__ void drawLine(TextureObject<int> &depth_out, int y, int x1, int x2, float d) {
for (int x=x1; x<=x2; ++x) {
if (x < 0) continue;
if (x >= depth_out.width()) return;
atomicMin(&depth_out(x,y), int(d*1000.0f));
}
}
/* See: https://github.com/bcrusco/CUDA-Rasterizer */
/**
* Calculate the signed area of a given triangle.
*/
__device__ static inline
float calculateSignedArea(const short2 &a, const short2 &b, const short2 &c) {
return 0.5f * (float(c.x - a.x) * float(b.y - a.y) - float(b.x - a.x) * float(c.y - a.y));
}
/**
* Helper function for calculating barycentric coordinates.
*/
__device__ static inline
float calculateBarycentricCoordinateValue(const short2 &a, const short2 &b, const short2 &c, const short2 (&tri)[3]) {
return calculateSignedArea(a,b,c) / calculateSignedArea(tri[0], tri[1], tri[2]);
}
/**
* Calculate barycentric coordinates.
* TODO: Update to handle triangles coming in and not the array
*/
__device__ static
float3 calculateBarycentricCoordinate(const short2 (&tri)[3], const short2 &point) {
float beta = calculateBarycentricCoordinateValue(tri[0], point, tri[2], tri);
float gamma = calculateBarycentricCoordinateValue(tri[0], tri[1], point, tri);
float alpha = 1.0 - beta - gamma;
return make_float3(alpha, beta, gamma);
}
/**
* Check if a barycentric coordinate is within the boundaries of a triangle.
*/
__host__ __device__ static
bool isBarycentricCoordInBounds(const float3 &barycentricCoord) {
return barycentricCoord.x >= 0.0 && barycentricCoord.x <= 1.0 &&
barycentricCoord.y >= 0.0 && barycentricCoord.y <= 1.0 &&
barycentricCoord.z >= 0.0 && barycentricCoord.z <= 1.0;
}
/**
* For a given barycentric coordinate, compute the corresponding z position
* (i.e. depth) on the triangle.
*/
__device__ static
float getZAtCoordinate(const float3 &barycentricCoord, const float (&tri)[3]) {
return (barycentricCoord.x * tri[0]
+ barycentricCoord.y * tri[1]
+ barycentricCoord.z * tri[2]);
}
/*
* Convert source screen position to output screen coordinates.
*/
template <int A, int B>
__global__ void triangle_render_1_kernel(
TextureObject<float> depth_in,
TextureObject<int> depth_out,
TextureObject<short2> screen, SplatParams params) {
const int x = blockIdx.x*blockDim.x + threadIdx.x;
const int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < 1 || x >= depth_in.width()-1 || y < 1 || y >= depth_in.height()-1) return;
float d[3];
d[0] = depth_in.tex2D(x,y);
d[1] = depth_in.tex2D(x+A,y);
d[2] = depth_in.tex2D(x,y+B);
// Is this triangle valid
if (fabs(d[0] - d[1]) > 0.04f || fabs(d[0] - d[2]) > 0.04f) return;
if (d[0] < params.camera.minDepth || d[0] > params.camera.maxDepth) return;
short2 v[3];
v[0] = screen.tex2D(x,y);
v[1] = screen.tex2D(x+A,y);
v[2] = screen.tex2D(x,y+B);
// Attempt to back face cull, but not great
//if ((v[1].x - v[0].x) * A < 0 || (v[2].y - v[0].y) * B < 0) return;
const int minX = min(v[0].x, min(v[1].x, v[2].x));
const int minY = min(v[0].y, min(v[1].y, v[2].y));
const int maxX = max(v[0].x, max(v[1].x, v[2].x));
const int maxY = max(v[0].y, max(v[1].y, v[2].y));
// Remove really large triangles
if ((maxX - minX) * (maxY - minY) > 200) return;
for (int sy=minY; sy <= maxY; ++sy) {
for (int sx=minX; sx <= maxX; ++sx) {
if (sx >= params.camera.width || sx < 0 || sy >= params.camera.height || sy < 0) continue;
float3 baryCentricCoordinate = calculateBarycentricCoordinate(v, make_short2(sx, sy));
if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
float new_depth = getZAtCoordinate(baryCentricCoordinate, d);
atomicMin(&depth_out(sx,sy), int(new_depth*100000.0f));
}
}
}
}
void ftl::cuda::triangle_render1(TextureObject<float> &depth_in, TextureObject<int> &depth_out, TextureObject<short2> &screen, const SplatParams ¶ms, cudaStream_t stream) {
const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
triangle_render_1_kernel<1,1><<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, screen, params);
triangle_render_1_kernel<1,-1><<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, screen, params);
triangle_render_1_kernel<-1,1><<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, screen, params);
triangle_render_1_kernel<-1,-1><<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, screen, params);
cudaSafeCall( cudaGetLastError() );
}
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
// ==== BLENDER ========
/*
* Merge two depth maps together
*/
__global__ void mesh_blender_kernel(
TextureObject<int> depth_in,
TextureObject<int> depth_out,
ftl::rgbd::Camera camera,
float alpha) {
const int x = blockIdx.x*blockDim.x + threadIdx.x;
const int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < 0 || x >= depth_in.width() || y < 0 || y >= depth_in.height()) return;
int a = depth_in.tex2D(x,y);
int b = depth_out.tex2D(x,y);
float mindepth = (float)min(a,b) / 100000.0f;
float maxdepth = (float)max(a,b) / 100000.0f;
float weight = ftl::cuda::weighting(maxdepth-mindepth, alpha);
//depth_out(x,y) = (int)(((float)mindepth + (float)maxdepth*weight) / (1.0f + weight) * 100000.0f);
float depth = (mindepth + maxdepth*weight) / (1.0f + weight);
depth_out(x,y) = (int)(depth * 100000.0f);
}
void ftl::cuda::mesh_blender(TextureObject<int> &depth_in, TextureObject<int> &depth_out, const ftl::rgbd::Camera &camera, float alpha, cudaStream_t stream) {
const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
mesh_blender_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, camera, alpha);
cudaSafeCall( cudaGetLastError() );
}