Newer
Older
#include <ftl/cuda/normals.hpp>
#include <ftl/cuda/weighting.hpp>
#define T_PER_BLOCK 16
#define MINF __int_as_float(0xff800000)
__global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output,
ftl::cuda::TextureObject<float4> input) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if(x >= input.width() || y >= input.height()) return;
output(x,y) = make_float4(0, 0, 0, 0);
if(x > 0 && x < input.width()-1 && y > 0 && y < input.height()-1) {
const float3 CC = make_float3(input.tex2D((int)x+0, (int)y+0)); //[(y+0)*width+(x+0)];
const float3 PC = make_float3(input.tex2D((int)x+0, (int)y+1)); //[(y+1)*width+(x+0)];
const float3 CP = make_float3(input.tex2D((int)x+1, (int)y+0)); //[(y+0)*width+(x+1)];
const float3 MC = make_float3(input.tex2D((int)x+0, (int)y-1)); //[(y-1)*width+(x+0)];
const float3 CM = make_float3(input.tex2D((int)x-1, (int)y+0)); //[(y+0)*width+(x-1)];
if(CC.x != MINF && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) {
const float3 n = cross(PC-MC, CP-CM);
const float l = length(n);
if(l > 0.0f) {
output(x,y) = make_float4(n/-l, 1.0f);
}
__device__ inline bool isValid(const ftl::rgbd::Camera &camera, const float3 &d) {
return d.z >= camera.minDepth && d.z <= camera.maxDepth;
}
__global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output,
ftl::cuda::TextureObject<int> input, ftl::rgbd::Camera camera, float3x3 pose) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if(x >= input.width() || y >= input.height()) return;
output(x,y) = make_float4(0, 0, 0, 0);
if(x > 0 && x < input.width()-1 && y > 0 && y < input.height()-1) {
const float3 CC = camera.screenToCam(x+0, y+0, (float)input.tex2D((int)x+0, (int)y+0) / 1000.0f);
const float3 PC = camera.screenToCam(x+0, y+1, (float)input.tex2D((int)x+0, (int)y+1) / 1000.0f);
const float3 CP = camera.screenToCam(x+1, y+0, (float)input.tex2D((int)x+1, (int)y+0) / 1000.0f);
const float3 MC = camera.screenToCam(x+0, y-1, (float)input.tex2D((int)x+0, (int)y-1) / 1000.0f);
const float3 CM = camera.screenToCam(x-1, y+0, (float)input.tex2D((int)x-1, (int)y+0) / 1000.0f);
//if(CC.z < && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) {
if (isValid(camera,CC) && isValid(camera,PC) && isValid(camera,CP) && isValid(camera,MC) && isValid(camera,CM)) {
const float3 n = cross(PC-MC, CP-CM);
const float l = length(n);
if(l > 0.0f) {
output(x,y) = make_float4((n/-l), 1.0f);
}
}
}
}
template <int RADIUS>
__global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
ftl::cuda::TextureObject<float4> output,
ftl::cuda::TextureObject<float4> points,
ftl::rgbd::Camera camera, float3x3 pose, float smoothing) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if(x >= points.width() || y >= points.height()) return;
const float3 p0 = make_float3(points.tex2D((int)x,(int)y));
float3 nsum = make_float3(0.0f);
float contrib = 0.0f;
if (p0.x == MINF) return;
for (int v=-RADIUS; v<=RADIUS; ++v) {
for (int u=-RADIUS; u<=RADIUS; ++u) {
const float3 p = make_float3(points.tex2D((int)x+u,(int)y+v));
if (p.x == MINF) continue;
const float s = ftl::cuda::spatialWeighting(p0, p, smoothing);
if (s > 0.0f) {
const float4 n = norms.tex2D((int)x+u,(int)y+v);
if (n.w > 0.0f) {
nsum += make_float3(n) * s;
contrib += s;
}
}
}
}
// Compute dot product of normal with camera to obtain measure of how
// well this point faces the source camera, a measure of confidence
float3 ray = pose * camera.screenToCam(x, y, 1.0f);
ray = ray / length(ray);
nsum /= contrib;
nsum /= length(nsum);
output(x,y) = (contrib > 0.0f) ? make_float4(nsum, dot(nsum, ray)) : make_float4(0.0f);
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
template <int RADIUS>
__global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
ftl::cuda::TextureObject<float4> output,
ftl::cuda::TextureObject<int> depth,
ftl::rgbd::Camera camera, float3x3 pose, float smoothing) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if(x >= depth.width() || y >= depth.height()) return;
const float3 p0 = camera.screenToCam(x,y, (float)depth.tex2D((int)x,(int)y) / 1000.0f);
float3 nsum = make_float3(0.0f);
float contrib = 0.0f;
if (p0.z < camera.minDepth || p0.z > camera.maxDepth) return;
for (int v=-RADIUS; v<=RADIUS; ++v) {
for (int u=-RADIUS; u<=RADIUS; ++u) {
const float3 p = camera.screenToCam(x+u,y+v, (float)depth.tex2D((int)x+u,(int)y+v) / 1000.0f);
if (p.z < camera.minDepth || p.z > camera.maxDepth) continue;
const float s = ftl::cuda::spatialWeighting(p0, p, smoothing);
//const float s = 1.0f;
//if (s > 0.0f) {
const float4 n = norms.tex2D((int)x+u,(int)y+v);
if (n.w > 0.0f) {
nsum += make_float3(n) * s;
contrib += s;
}
//}
}
}
// Compute dot product of normal with camera to obtain measure of how
// well this point faces the source camera, a measure of confidence
float3 ray = camera.screenToCam(x, y, 1.0f);
ray = ray / length(ray);
nsum /= contrib;
nsum /= length(nsum);
output(x,y) = (contrib > 0.0f) ? make_float4(pose*nsum, 1.0f) : make_float4(0.0f);
}
void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
ftl::cuda::TextureObject<float4> &temp,
ftl::cuda::TextureObject<float4> &input,
int radius,
float smoothing,
const ftl::rgbd::Camera &camera,
const float3x3 &pose,cudaStream_t stream) {
const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(temp, input);
cudaSafeCall( cudaGetLastError() );
switch (radius) {
case 9: smooth_normals_kernel<9><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break;
case 7: smooth_normals_kernel<7><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break;
case 5: smooth_normals_kernel<5><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break;
case 3: smooth_normals_kernel<3><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing); break;
}
cudaSafeCall( cudaGetLastError() );
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
//cutilCheckMsg(__FUNCTION__);
#endif
}
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
ftl::cuda::TextureObject<float4> &temp,
ftl::cuda::TextureObject<int> &input,
int radius,
float smoothing,
const ftl::rgbd::Camera &camera,
const float3x3 &pose_inv, const float3x3 &pose,cudaStream_t stream) {
const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(temp, input, camera, pose);
cudaSafeCall( cudaGetLastError() );
switch (radius) {
case 7: smooth_normals_kernel<7><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing);
case 5: smooth_normals_kernel<5><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing);
case 3: smooth_normals_kernel<3><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing);
}
cudaSafeCall( cudaGetLastError() );
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
//cutilCheckMsg(__FUNCTION__);
#endif
}
//==============================================================================
__global__ void vis_normals_kernel(ftl::cuda::TextureObject<float4> norm,
ftl::cuda::TextureObject<uchar4> output,
float3 direction, uchar4 diffuse, uchar4 ambient) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if(x >= norm.width() || y >= norm.height()) return;
output(x,y) = make_uchar4(0,0,0,0);
float3 ray = direction;
ray = ray / length(ray);
float3 n = make_float3(norm.tex2D((int)x,(int)y));
float l = length(n);
if (l == 0) return;
n /= l;
const float d = max(dot(ray, n), 0.0f);
output(x,y) = make_uchar4(
min(255.0f, diffuse.x*d + ambient.x),
min(255.0f, diffuse.y*d + ambient.y),
min(255.0f, diffuse.z*d + ambient.z), 255);
}
void ftl::cuda::normal_visualise(ftl::cuda::TextureObject<float4> &norm,
ftl::cuda::TextureObject<uchar4> &output,
const float3 &light, const uchar4 &diffuse, const uchar4 &ambient,
cudaStream_t stream) {
const dim3 gridSize((norm.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (norm.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
vis_normals_kernel<<<gridSize, blockSize, 0, stream>>>(norm, output, light, diffuse, ambient);
cudaSafeCall( cudaGetLastError() );
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
//cutilCheckMsg(__FUNCTION__);
#endif
}
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
//==============================================================================
__global__ void filter_normals_kernel(ftl::cuda::TextureObject<float4> norm,
ftl::cuda::TextureObject<float4> output,
ftl::rgbd::Camera camera, float4x4 pose, float thresh) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if(x >= norm.width() || y >= norm.height()) return;
float3 ray = pose.getFloat3x3() * camera.screenToCam(x,y,1.0f);
ray = ray / length(ray);
float3 n = make_float3(norm.tex2D((int)x,(int)y));
float l = length(n);
if (l == 0) {
output(x,y) = make_float4(MINF);
return;
}
n /= l;
const float d = dot(ray, n);
if (d <= thresh) {
output(x,y) = make_float4(MINF);
}
}
void ftl::cuda::normal_filter(ftl::cuda::TextureObject<float4> &norm,
ftl::cuda::TextureObject<float4> &output,
const ftl::rgbd::Camera &camera, const float4x4 &pose,
float thresh,
cudaStream_t stream) {
const dim3 gridSize((norm.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (norm.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
filter_normals_kernel<<<gridSize, blockSize, 0, stream>>>(norm, output, camera, pose, thresh);
cudaSafeCall( cudaGetLastError() );
#ifdef _DEBUG
cudaSafeCall(cudaDeviceSynchronize());
//cutilCheckMsg(__FUNCTION__);
#endif
}