Skip to content
Snippets Groups Projects
Select Git revision
  • 477995e4f1b9107de8d59ed79d571513c4440d1e
  • master default protected
  • develop
3 results

theme_dc.plot.L.45.nolegend.R

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    splatter.cu 15.92 KiB
    #include <ftl/render/splat_params.hpp>
    #include "splatter_cuda.hpp"
    #include <ftl/rgbd/camera.hpp>
    #include <ftl/cuda_common.hpp>
    
    #include <ftl/cuda/weighting.hpp>
    #include <ftl/cuda/intersections.hpp>
    #include <ftl/cuda/warp.hpp>
    
    #define T_PER_BLOCK 8
    #define UPSAMPLE_FACTOR 1.8f
    #define WARP_SIZE 32
    #define DEPTH_THRESHOLD 0.05f
    #define UPSAMPLE_MAX 60
    #define MAX_ITERATIONS 32  // Note: Must be multiple of 32
    #define SPATIAL_SMOOTHING 0.005f
    
    #define ENERGY_THRESHOLD 0.1f
    #define SMOOTHING_MULTIPLIER_A 10.0f	// For surface search
    #define SMOOTHING_MULTIPLIER_B 4.0f		// For z contribution
    #define SMOOTHING_MULTIPLIER_C 2.0f		// For colour contribution
    
    #define ACCUM_DIAMETER 8
    
    using ftl::cuda::TextureObject;
    using ftl::render::SplatParams;
    using ftl::cuda::warpMin;
    using ftl::cuda::warpSum;
    
    /*
     * Pass 1: Directly render each camera into virtual view but with no upsampling
     * for sparse points.
     */
     template <bool CULLING>
     __global__ void dibr_merge_kernel(TextureObject<float4> points,
    		TextureObject<float4> normals,
    		TextureObject<int> depth, SplatParams params) {
    	const int x = blockIdx.x*blockDim.x + threadIdx.x;
    	const int y = blockIdx.y*blockDim.y + threadIdx.y;
    
    	const float4 worldPos = points.tex2D(x, y);
    	if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return;
    
    	// Compile time enable/disable of culling back facing points
    	if (CULLING) {
    		float3 ray = params.m_viewMatrixInverse.getFloat3x3() * params.camera.screenToCam(x,y,1.0f);
    		ray = ray / length(ray);
    		float3 n = make_float3(normals.tex2D((int)x,(int)y));
    		float l = length(n);
    		if (l == 0) {
    			return;
    		}
    		n /= l;
    
    		const float facing = dot(ray, n);
    		if (facing <= 0.0f) return;
    	}
    
        // Find the virtual screen position of current point
    	const float3 camPos = params.m_viewMatrix * make_float3(worldPos);
    	if (camPos.z < params.camera.minDepth) return;
    	if (camPos.z > params.camera.maxDepth) return;
    
    	const float d = camPos.z;
    
    	const uint2 screenPos = params.camera.camToScreen<uint2>(camPos);
    	const unsigned int cx = screenPos.x;
    	const unsigned int cy = screenPos.y;
    	if (d > params.camera.minDepth && d < params.camera.maxDepth && cx < depth.width() && cy < depth.height()) {
    		// Transform estimated point to virtual cam space and output z
    		atomicMin(&depth(cx,cy), d * 1000.0f);
    	}
    }
    
    void ftl::cuda::dibr_merge(TextureObject<float4> &points, TextureObject<float4> &normals, TextureObject<int> &depth, SplatParams params, bool culling, cudaStream_t stream) {
        const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
        const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
    
    	if (culling) dibr_merge_kernel<true><<<gridSize, blockSize, 0, stream>>>(points, normals, depth, params);
    	else dibr_merge_kernel<false><<<gridSize, blockSize, 0, stream>>>(points, normals, depth, params);
        cudaSafeCall( cudaGetLastError() );
    }
    
    //==============================================================================
    
    __device__ inline float4 make_float4(const uchar4 &c) {
        return make_float4(c.x,c.y,c.z,c.w);
    }
    
    /*
     * Pass 1b: Expand splats to full size and merge
     */
     template <int SEARCH_DIAMETER>
     __global__ void splat_kernel(
            //TextureObject<float4> points,       // Original 3D points
            TextureObject<float4> normals,
            TextureObject<uchar4> colour_in,
            TextureObject<int> depth_in,        // Virtual depth map
            TextureObject<float> depth_out,   // Accumulated output
            TextureObject<uchar4> colour_out,
            //ftl::rgbd::Camera camera,
            //float4x4 pose_inv,
            SplatParams params) {
            
        //const ftl::voxhash::DepthCameraCUDA &camera = c_cameras[cam];
    
        const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
        //const int warp = tid / WARP_SIZE;
        const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
        const int y = blockIdx.y*blockDim.y + threadIdx.y;
    
        if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return;
    
        const float3 origin = params.m_viewMatrixInverse * make_float3(0.0f);
        float3 ray = params.camera.screenToCam(x,y,1.0f);
        ray = ray / length(ray);
        const float scale = ray.z;
        ray = params.m_viewMatrixInverse.getFloat3x3() * ray;
    
        //float depth = 0.0f;
        //float contrib = 0.0f;
        float depth = 1000.0f;
    
        struct Result {
            float weight;
            float depth;
            uchar4 colour;
        };
    
        Result results[(SEARCH_DIAMETER*SEARCH_DIAMETER) / WARP_SIZE];
    
        // Each thread in warp takes an upsample point and updates corresponding depth buffer.
        const int lane = tid % WARP_SIZE;
        for (int i=lane; i<SEARCH_DIAMETER*SEARCH_DIAMETER; i+=WARP_SIZE) {
            const float u = (i % SEARCH_DIAMETER) - (SEARCH_DIAMETER / 2);
            const float v = (i / SEARCH_DIAMETER) - (SEARCH_DIAMETER / 2);
    
            results[i/WARP_SIZE] = {0.0f, 0.0f, make_uchar4(0,0,0,0)};
    
            // Use the depth buffer to determine this pixels 3D position in camera space
            const float d = ((float)depth_in.tex2D(x+u, y+v)/1000.0f);
    
            if (d < params.camera.minDepth || d > params.camera.maxDepth) continue;
    
            const float3 camPos = params.camera.screenToCam((int)(x+u),(int)(y+v),d);
            const float3 camPos2 = params.camera.screenToCam((int)(x),(int)(y),d);
            //if (length(camPos - camPos2) > 2.0f*(camPos.z/params.camera.fx)) continue;
            const float3 worldPos = params.m_viewMatrixInverse * camPos;
            //const float3 camPos2 = pose_inv * worldPos;
            //const uint2 screenPos = camera.camToScreen<uint2>(camPos2);
    
            //if (screenPos.x < points.width() && screenPos.y < points.height()) {
                // Can now read points, normals and colours from source cam
    
                // What is contribution of our current point at this pixel?
                //const float3 p = make_float3(points.tex2D((int)screenPos.x, (int)screenPos.y));
                //const float weight = ftl::cuda::spatialWeighting(worldPos, p, (camPos.z/params.camera.fx)); //*(camPos2.z/camera.fx));
                //if (weight <= 0.0f) continue;
    
                float3 n = make_float3(normals.tex2D((int)(x+u), (int)(y+v)));
                const float l = length(n);
                if (l == 0.0f) continue;
                n /= l;
    
                // Does the ray intersect plane of splat?
                float t = 1000.0f;
                if (ftl::cuda::intersectPlane(n, worldPos, origin, ray, t)) { //} && fabs(t-camPos.z) < 0.01f) {
                    //t *= (params.m_viewMatrix.getFloat3x3() * ray).z;
                    t *= scale;
                    const float3 camPos3 = params.camera.screenToCam((int)(x),(int)(y),t);
                    const float weight = ftl::cuda::spatialWeighting(camPos, camPos3, 2.0f*(camPos3.z/params.camera.fx)); //*(camPos2.z/camera.fx));
                    if (weight == 0.0f) continue;
                    //depth += t * weight;
                    //contrib += weight;
                    depth = min(depth, t);
                    results[i/WARP_SIZE] = {weight, t, colour_in.tex2D((int)x+u, (int)y+v)}; //make_float2(t, weight);
                    //atomicMin(&depth_out(x,y), (int)(depth * 1000.0f));
                }
            //}
        }
    
        depth = warpMin(depth);
    
        float adepth = 0.0f;
        float contrib = 0.0f;
        float4 colour = make_float4(0.0f);
    
        // Loop over results array
        for (int i=0; i<(SEARCH_DIAMETER*SEARCH_DIAMETER) / WARP_SIZE; ++i) {
            if (results[i].depth - depth < 0.04f) {
                adepth += results[i].depth * results[i].weight;
                colour += make_float4(results[i].colour) * results[i].weight;
                contrib += results[i].weight;
            }
        }
    
        // Sum all attributes and contributions
        adepth = warpSum(adepth);
        colour.x = warpSum(colour.x);
        colour.y = warpSum(colour.y);
        colour.z = warpSum(colour.z);
        contrib = warpSum(contrib);
    
        if (lane == 0 && contrib > 0.0f) {
            depth_out(x,y) = adepth / contrib;
            colour_out(x,y) = make_uchar4(colour.x / contrib, colour.y / contrib, colour.z / contrib, 255.0f);
        }
    }
    
    void ftl::cuda::splat(
            TextureObject<float4> &normals,
            TextureObject<uchar4> &colour_in,
            TextureObject<int> &depth_in,        // Virtual depth map
            TextureObject<float> &depth_out,
            TextureObject<uchar4> &colour_out,
            const SplatParams &params, cudaStream_t stream) {
        const dim3 gridSize((depth_in.width() + 2 - 1)/2, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
        const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK);
    
        splat_kernel<8><<<gridSize, blockSize, 0, stream>>>(
            normals,
            colour_in,
            depth_in,
            depth_out,
            colour_out,
            params
        );
        cudaSafeCall( cudaGetLastError() );
    }
    
    //==============================================================================
    
    template <typename T>
    __device__ inline T generateInput(const T &in, const SplatParams &params, const float4 &worldPos) {
    	return in;
    }
    
    template <>
    __device__ inline uchar4 generateInput(const uchar4 &in, const SplatParams &params, const float4 &worldPos) {
    	return (params.m_flags & ftl::render::kShowDisconMask && worldPos.w < 0.0f) ?
    		make_uchar4(0,0,255,255) :  // Show discontinuity mask in red
    		in;
    }
    
    /*
     * Pass 2: Accumulate attribute contributions if the points pass a visibility test.
     */
     template <typename T>
    __global__ void dibr_attribute_contrib_kernel(
            TextureObject<T> in,				// Attribute input
            TextureObject<float4> points,       // Original 3D points
            TextureObject<int> depth_in,        // Virtual depth map
            TextureObject<T> out,			// Accumulated output
            SplatParams params) {
            
    	const int x = (blockIdx.x*blockDim.x + threadIdx.x);
    	const int y = blockIdx.y*blockDim.y + threadIdx.y;
    
    	const float4 worldPos = points.tex2D(x, y);
    	if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return;
    
    	const float3 camPos = params.m_viewMatrix * make_float3(worldPos);
    	if (camPos.z < params.camera.minDepth) return;
    	if (camPos.z > params.camera.maxDepth) return;
    	const uint2 screenPos = params.camera.camToScreen<uint2>(camPos);
    
    	// Not on screen so stop now...
    	if (screenPos.x >= depth_in.width() || screenPos.y >= depth_in.height()) return;
                
        // Is this point near the actual surface and therefore a contributor?
        const int d = depth_in.tex2D((int)screenPos.x, (int)screenPos.y);
    
    	const T input = generateInput(in.tex2D(x, y), params, worldPos);
    
    	//const float3 nearest = params.camera.screenToCam((int)(screenPos.x),(int)(screenPos.y),d);
    
    	//const float l = length(nearest - camPos);
    	if (d == (int)(camPos.z*1000.0f)) {
    		out(screenPos.x, screenPos.y) = input;
    	}
    }
    
    
    template <typename T>
    void ftl::cuda::dibr_attribute(
            TextureObject<T> &in,
            TextureObject<float4> &points,       // Original 3D points
            TextureObject<int> &depth_in,        // Virtual depth map
            TextureObject<T> &out,   // Accumulated output
            SplatParams &params, cudaStream_t stream) {
    	const dim3 gridSize((in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
    	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
    
        dibr_attribute_contrib_kernel<<<gridSize, blockSize, 0, stream>>>(
            in,
            points,
            depth_in,
            out,
            params
        );
        cudaSafeCall( cudaGetLastError() );
    }
    
    template void ftl::cuda::dibr_attribute<uchar4>(
    	ftl::cuda::TextureObject<uchar4> &in,	// Original colour image
    	ftl::cuda::TextureObject<float4> &points,		// Original 3D points
    	ftl::cuda::TextureObject<int> &depth_in,		// Virtual depth map
    	ftl::cuda::TextureObject<uchar4> &out,	// Accumulated output
    	ftl::render::SplatParams &params, cudaStream_t stream);
    
    template void ftl::cuda::dibr_attribute<float>(
    	ftl::cuda::TextureObject<float> &in,	// Original colour image
    	ftl::cuda::TextureObject<float4> &points,		// Original 3D points
    	ftl::cuda::TextureObject<int> &depth_in,		// Virtual depth map
    	ftl::cuda::TextureObject<float> &out,	// Accumulated output
    	ftl::render::SplatParams &params, cudaStream_t stream);
    
    template void ftl::cuda::dibr_attribute<float4>(
    	ftl::cuda::TextureObject<float4> &in,	// Original colour image
    	ftl::cuda::TextureObject<float4> &points,		// Original 3D points
    	ftl::cuda::TextureObject<int> &depth_in,		// Virtual depth map
    	ftl::cuda::TextureObject<float4> &out,	// Accumulated output
    	ftl::render::SplatParams &params, cudaStream_t stream);
    
    //==============================================================================
    
    /*__global__ void dibr_normalise_kernel(
            TextureObject<float4> colour_in,
            TextureObject<uchar4> colour_out,
            //TextureObject<float4> normals,
            TextureObject<float> contribs) {
        const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
        const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
    
        if (x < colour_in.width() && y < colour_in.height()) {
            const float4 colour = colour_in.tex2D((int)x,(int)y);
            //const float4 normal = normals.tex2D((int)x,(int)y);
            const float contrib = contribs.tex2D((int)x,(int)y);
    
            if (contrib > 0.0f) {
                colour_out(x,y) = make_uchar4(colour.x / contrib, colour.y / contrib, colour.z / contrib, 0);
                //normals(x,y) = normal / contrib;
            }
        }
    }
    
    __global__ void dibr_normalise_kernel(
            TextureObject<float4> colour_in,
            TextureObject<float> colour_out,
            //TextureObject<float4> normals,
            TextureObject<float> contribs) {
        const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
        const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
    
        if (x < colour_in.width() && y < colour_in.height()) {
            const float4 colour = colour_in.tex2D((int)x,(int)y);
            //const float4 normal = normals.tex2D((int)x,(int)y);
            const float contrib = contribs.tex2D((int)x,(int)y);
    
            if (contrib > 0.0f) {
                colour_out(x,y) = colour.x / contrib;
                //normals(x,y) = normal / contrib;
            }
        }
    }
    
    __global__ void dibr_normalise_kernel(
            TextureObject<float4> colour_in,
            TextureObject<float4> colour_out,
            //TextureObject<float4> normals,
            TextureObject<float> contribs) {
        const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
        const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
    
        if (x < colour_in.width() && y < colour_in.height()) {
            const float4 colour = colour_in.tex2D((int)x,(int)y);
            //const float4 normal = normals.tex2D((int)x,(int)y);
            const float contrib = contribs.tex2D((int)x,(int)y);
    
            if (contrib > 0.0f) {
                colour_out(x,y) = make_float4(colour.x / contrib, colour.y / contrib, colour.z / contrib, 0);
                //normals(x,y) = normal / contrib;
            }
        }
    }
    
    void ftl::cuda::dibr_normalise(TextureObject<float4> &colour_in, TextureObject<uchar4> &colour_out, TextureObject<float> &contribs, cudaStream_t stream) {
        const dim3 gridSize((colour_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
        const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
    
        dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(colour_in, colour_out, contribs);
        cudaSafeCall( cudaGetLastError() );
    }
    
    void ftl::cuda::dibr_normalise(TextureObject<float4> &colour_in, TextureObject<float> &colour_out, TextureObject<float> &contribs, cudaStream_t stream) {
        const dim3 gridSize((colour_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
        const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
    
        dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(colour_in, colour_out, contribs);
        cudaSafeCall( cudaGetLastError() );
    }
    
    void ftl::cuda::dibr_normalise(TextureObject<float4> &colour_in, TextureObject<float4> &colour_out, TextureObject<float> &contribs, cudaStream_t stream) {
        const dim3 gridSize((colour_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
        const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
    
        dibr_normalise_kernel<<<gridSize, blockSize, 0, stream>>>(colour_in, colour_out, contribs);
        cudaSafeCall( cudaGetLastError() );
    }*/