Skip to content
Snippets Groups Projects
Commit 40b0461f authored by Nicolas Pope's avatar Nicolas Pope
Browse files

WIP add lossy depth encode again

parent e14006b7
No related branches found
No related tags found
1 merge request!311Resolves #296 removal of NvPipe
...@@ -8,6 +8,8 @@ namespace cuda { ...@@ -8,6 +8,8 @@ namespace cuda {
void depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream &stream); void depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream &stream);
void depth_to_nv12(const cv::cuda::PtrStepSz<float> &depth, uchar* luminance, uchar* chroma, int pitch, float maxdepth, cv::cuda::Stream &stream);
void vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream &stream); void vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream &stream);
void vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort> &luminance, const cv::cuda::PtrStepSz<ushort> &chroma, float maxdepth, cv::cuda::Stream &stream); void vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort> &luminance, const cv::cuda::PtrStepSz<ushort> &chroma, float maxdepth, cv::cuda::Stream &stream);
......
...@@ -25,13 +25,8 @@ __device__ inline float clampC(float v, float t=255.0f) { ...@@ -25,13 +25,8 @@ __device__ inline float clampC(float v, float t=255.0f) {
// Assumes 8 bit output channels and 14bit depth // Assumes 8 bit output channels and 14bit depth
static constexpr float P = (2.0f * 256.0f) / 16384.0f; static constexpr float P = (2.0f * 256.0f) / 16384.0f;
__global__ void depth_to_vuya_kernel(cv::cuda::PtrStepSz<float> depth, cv::cuda::PtrStepSz<uchar4> rgba, float maxdepth) { __device__ inline float3 depth2yuv(float depth, float maxdepth) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; float d = max(0.0f,depth);
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < depth.cols && y < depth.rows) {
//float d = max(0.0f,min(maxdepth,depth(y,x)));
float d = max(0.0f,depth(y,x));
if (d >= maxdepth) d = 0.0f; if (d >= maxdepth) d = 0.0f;
float L = d / maxdepth; float L = d / maxdepth;
const float p = P; const float p = P;
...@@ -42,7 +37,16 @@ __global__ void depth_to_vuya_kernel(cv::cuda::PtrStepSz<float> depth, cv::cuda: ...@@ -42,7 +37,16 @@ __global__ void depth_to_vuya_kernel(cv::cuda::PtrStepSz<float> depth, cv::cuda:
float Hb1 = fmodf(((L - (p/4.0f)) / (p/2.0f)), 2.0f); float Hb1 = fmodf(((L - (p/4.0f)) / (p/2.0f)), 2.0f);
float Hb = (Hb1 <= 1.0f) ? Hb1 : 2.0f - Hb1; float Hb = (Hb1 <= 1.0f) ? Hb1 : 2.0f - Hb1;
rgba(y,x) = make_uchar4(Hb*255.0f,Ha*255.0f,L*255.0f, 0.0f); return {L, Ha, Hb};
}
__global__ void depth_to_vuya_kernel(cv::cuda::PtrStepSz<float> depth, cv::cuda::PtrStepSz<uchar4> rgba, float maxdepth) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < depth.cols && y < depth.rows) {
float3 yuv = depth2yuv(depth(y,x), maxdepth);
rgba(y,x) = make_uchar4(yuv.z*255.0f,yuv.y*255.0f,yuv.x*255.0f, 0.0f);
} }
} }
...@@ -54,6 +58,44 @@ void ftl::cuda::depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv: ...@@ -54,6 +58,44 @@ void ftl::cuda::depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv:
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
} }
// Planar 8bit version
__global__ void depth_to_nv12_kernel(cv::cuda::PtrStepSz<float> depth, uchar* luminance, uchar* chroma, int pitch, float maxdepth) {
const unsigned int x = (blockIdx.x*blockDim.x + threadIdx.x) * 2;
const unsigned int y = (blockIdx.y*blockDim.y + threadIdx.y) * 2;
if (x < depth.cols && y < depth.rows) {
float3 yuv1 = depth2yuv(depth(y,x), maxdepth);
float3 yuv2 = depth2yuv(depth(y,x+1), maxdepth);
float3 yuv3 = depth2yuv(depth(y+1,x), maxdepth);
float3 yuv4 = depth2yuv(depth(y+1,x+1), maxdepth);
// TODO: Something better than just average!
// Bad ones are discarded anyway...
float Ha = (yuv1.y+yuv2.y+yuv3.y+yuv4.y) / 4.0f * 255.0f;
float Hb = (yuv1.z+yuv2.z+yuv3.z+yuv4.z) / 4.0f * 255.0f;
luminance[y*pitch+x] = yuv1.x*255.0f;
luminance[y*pitch+x+1] = yuv2.x*255.0f;
luminance[(y+1)*pitch+x] = yuv3.x*255.0f;
luminance[(y+1)*pitch+x+1] = yuv4.x*255.0f;
chroma[(y/2)*pitch+x] = Hb;
chroma[(y/2)*pitch+x+1] = Ha;
}
}
void ftl::cuda::depth_to_nv12(const cv::cuda::PtrStepSz<float> &depth, uchar* luminance, uchar* chroma, int pitch, float maxdepth, cv::cuda::Stream &stream) {
const dim3 gridSize((depth.cols/2 + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.rows/2 + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
depth_to_nv12_kernel<<<gridSize, blockSize, 0, cv::cuda::StreamAccessor::getStream(stream)>>>(depth, luminance, chroma, pitch, maxdepth);
cudaSafeCall( cudaGetLastError() );
}
// =============================================================================
// Decoding // Decoding
/* /*
......
...@@ -257,11 +257,14 @@ bool NvPipeEncoder::encode(const cv::cuda::GpuMat &in, ftl::codecs::Packet &pkt) ...@@ -257,11 +257,14 @@ bool NvPipeEncoder::encode(const cv::cuda::GpuMat &in, ftl::codecs::Packet &pkt)
//pkt.flags = NvPipeEncoder::kFlagRGB | NvPipeEncoder::kFlagMappedDepth; //pkt.flags = NvPipeEncoder::kFlagRGB | NvPipeEncoder::kFlagMappedDepth;
const NvEncInputFrame* f = nvenc_->GetNextInputFrame(); const NvEncInputFrame* f = nvenc_->GetNextInputFrame();
bool islossless = pkt.codec == codec_t::H264_LOSSLESS || pkt.codec == codec_t::HEVC_LOSSLESS;
//auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream_);
if (!(pkt.flags & kFlagFloat)) { if (!(pkt.flags & kFlagFloat)) {
cudaSafeCall(cudaMemcpy2D(f->inputPtr, f->pitch, in.data, in.step, width * 4, height, cudaMemcpyDeviceToDevice)); cudaSafeCall(cudaMemcpy2D(f->inputPtr, f->pitch, in.data, in.step, width * 4, height, cudaMemcpyDeviceToDevice));
} else { } else if (!islossless) {
// Do colour conversion here. // Do colour conversion here.
ftl::cuda::depth_to_nv12(in, (uchar*)f->inputPtr, ((uchar*)f->inputPtr)+(height*f->pitch), f->pitch, 16.0f, cv::cuda::Stream::Null());
} }
// TODO: Use page locked memory? // TODO: Use page locked memory?
...@@ -338,18 +341,6 @@ bool NvPipeEncoder::_createEncoder(const ftl::codecs::Packet &pkt, format_t fmt) ...@@ -338,18 +341,6 @@ bool NvPipeEncoder::_createEncoder(const ftl::codecs::Packet &pkt, format_t fmt)
bool ish264 = pkt.codec == codec_t::H264 || pkt.codec == codec_t::H264_LOSSLESS; bool ish264 = pkt.codec == codec_t::H264 || pkt.codec == codec_t::H264_LOSSLESS;
bool ishevc = !ish264; bool ishevc = !ish264;
/*if (nvenc_) NvPipe_Destroy(nvenc_);
const int fps = 1000/ftl::timer::getInterval();
nvenc_ = NvPipe_CreateEncoder(
format_,
codec_,
compression_,
bitrate,
fps, // FPS
ftl::codecs::getWidth(pkt.definition), // Output Width
ftl::codecs::getHeight(pkt.definition) // Output Height
);*/
// Ensure we have a CUDA context // Ensure we have a CUDA context
cudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
CUcontext cudaContext; CUcontext cudaContext;
...@@ -436,6 +427,9 @@ bool NvPipeEncoder::_createEncoder(const ftl::codecs::Packet &pkt, format_t fmt) ...@@ -436,6 +427,9 @@ bool NvPipeEncoder::_createEncoder(const ftl::codecs::Packet &pkt, format_t fmt)
return false; return false;
} else { } else {
LOG(INFO) << "NvPipe encoder created"; LOG(INFO) << "NvPipe encoder created";
// TODO: SetIOCudaStreams
return true; return true;
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment