Skip to content
Snippets Groups Projects
Commit 2f2c1ce1 authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Merge branch 'feature/271/depthcomp' into 'master'

Implements #271 depth compression

Closes #271

See merge request nicolas.pope/ftl!205
parents 7480cf9c ae494de0
No related branches found
No related tags found
1 merge request!205Implements #271 depth compression
Pipeline #17748 passed
Showing with 222 additions and 22 deletions
......@@ -8,6 +8,7 @@ set(CODECSRC
src/writer.cpp
src/reader.cpp
src/channels.cpp
src/depth_convert.cu
)
if (HAVE_NVPIPE)
......
......@@ -15,6 +15,8 @@ enum struct codec_t : uint8_t {
PNG,
H264,
HEVC, // H265
H264_LOSSLESS,
HEVC_LOSSLESS,
// TODO: Add audio codecs
WAV,
......
......@@ -22,6 +22,7 @@ class NvPipeDecoder : public ftl::codecs::Decoder {
NvPipe *nv_decoder_;
bool is_float_channel_;
ftl::codecs::definition_t last_definition_;
ftl::codecs::codec_t last_codec_;
MUTEX mutex_;
bool seen_iframe_;
cv::cuda::GpuMat tmp_;
......
......@@ -28,6 +28,7 @@ class NvPipeEncoder : public ftl::codecs::Encoder {
bool supports(ftl::codecs::codec_t codec) override;
static constexpr int kFlagRGB = 0x00000001;
static constexpr int kFlagMappedDepth = 0x00000002;
private:
NvPipe *nvenc_;
......@@ -35,6 +36,7 @@ class NvPipeEncoder : public ftl::codecs::Encoder {
bool is_float_channel_;
bool was_reset_;
ftl::codecs::codec_t preference_;
ftl::codecs::codec_t current_codec_;
cv::cuda::GpuMat tmp_;
cv::cuda::GpuMat tmp2_;
cv::cuda::Stream stream_;
......
......@@ -10,6 +10,8 @@ Decoder *ftl::codecs::allocateDecoder(const ftl::codecs::Packet &pkt) {
switch(pkt.codec) {
case codec_t::JPG :
case codec_t::PNG : return new ftl::codecs::OpenCVDecoder;
case codec_t::HEVC_LOSSLESS:
case codec_t::H264_LOSSLESS:
case codec_t::H264 :
case codec_t::HEVC : return new ftl::codecs::NvPipeDecoder;
}
......
#include "depth_convert_cuda.hpp"
#include <opencv2/core/cuda_stream_accessor.hpp>
#define T_PER_BLOCK 8
// Encoding
__device__ inline float clamp(float v) {
return max(0.0f, min(1.0f, v));
}
__device__ inline float clampC(float v, float t=255.0f) {
return max(0.0f, min(t, v));
}
/*
* See: Pece F., Kautz J., Weyrich T. 2011. Adapting standard video codecs for
* depth streaming. Joint Virtual Reality Conference of EGVE 2011 -
* The 17th Eurographics Symposium on Virtual Environments, EuroVR 2011 -
* The 8th EuroVR (INTUITION) Conference, , pp. 59-66.
*
*/
// Assumes 8 bit output channels and 14bit depth
static constexpr float P = (2.0f * 256.0f) / 16384.0f;
__global__ void depth_to_vuya_kernel(cv::cuda::PtrStepSz<float> depth, cv::cuda::PtrStepSz<uchar4> rgba, float maxdepth) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < depth.cols && y < depth.rows) {
float d = max(0.0f,min(maxdepth,depth(y,x)));
float L = d / maxdepth;
const float p = P;
float Ha1 = fmodf((L / (p/2.0f)), 2.0f);
float Ha = (Ha1 <= 1.0f) ? Ha1 : 2.0f - Ha1;
float Hb1 = fmodf(((L - (p/4.0f)) / (p/2.0f)), 2.0f);
float Hb = (Hb1 <= 1.0f) ? Hb1 : 2.0f - Hb1;
rgba(y,x) = make_uchar4(Hb*255.0f,Ha*255.0f,L*255.0f, 0.0f);
}
}
void ftl::cuda::depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream stream) {
const dim3 gridSize((depth.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.rows + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
depth_to_vuya_kernel<<<gridSize, blockSize, 0, cv::cuda::StreamAccessor::getStream(stream)>>>(depth, rgba, maxdepth);
cudaSafeCall( cudaGetLastError() );
}
// Decoding
/*
* See: Pece F., Kautz J., Weyrich T. 2011. Adapting standard video codecs for
* depth streaming. Joint Virtual Reality Conference of EGVE 2011 -
* The 17th Eurographics Symposium on Virtual Environments, EuroVR 2011 -
* The 8th EuroVR (INTUITION) Conference, , pp. 59-66.
*
*/
// Video is assumed to be 10bit encoded, returning ushort instead of uchar.
__global__ void vuya_to_depth_kernel(cv::cuda::PtrStepSz<float> depth, cv::cuda::PtrStepSz<ushort4> rgba, float maxdepth) {
const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < depth.cols && y < depth.rows) {
ushort4 in = rgba(y,x);
// Only the top 8 bits contain any data
float L = float(in.z >> 8) / 255.0f;
float Ha = float(in.y >> 8) / 255.0f;
float Hb = float(in.x >> 8) / 255.0f;
const float p = P;
int m = int(floor(4.0f*(L/p) - 0.5f)) % 4;
float L0 = L - fmodf((L-(p/8.0f)), p) + (p/4.0f)*float(m) - (p/8.0f);
float s = 0.0f;
if (m == 0) s = (p/2.0f)*Ha;
if (m == 1) s = (p/2.0f)*Hb;
if (m == 2) s = (p/2.0f)*(1.0f - Ha);
if (m == 3) s = (p/2.0f)*(1.0f - Hb);
depth(y,x) = (L0+s) * maxdepth;
}
}
void ftl::cuda::vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream stream) {
const dim3 gridSize((depth.cols + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.rows + T_PER_BLOCK - 1)/T_PER_BLOCK);
const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
vuya_to_depth_kernel<<<gridSize, blockSize, 0, cv::cuda::StreamAccessor::getStream(stream)>>>(depth, rgba, maxdepth);
cudaSafeCall( cudaGetLastError() );
}
#ifndef _FTL_CODECS_DEPTH_CONVERT_HPP_
#define _FTL_CODECS_DEPTH_CONVERT_HPP_
#include <ftl/cuda_common.hpp>
namespace ftl {
namespace cuda {
void depth_to_vuya(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<uchar4> &rgba, float maxdepth, cv::cuda::Stream stream);
void vuya_to_depth(const cv::cuda::PtrStepSz<float> &depth, const cv::cuda::PtrStepSz<ushort4> &rgba, float maxdepth, cv::cuda::Stream stream);
}
}
#endif // _FTL_CODECS_DEPTH_CONVERT_HPP_
......@@ -9,6 +9,8 @@
#include <opencv2/core/cuda/common.hpp>
#include "depth_convert_cuda.hpp"
using ftl::codecs::NvPipeDecoder;
NvPipeDecoder::NvPipeDecoder() {
......@@ -25,17 +27,20 @@ NvPipeDecoder::~NvPipeDecoder() {
bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out) {
cudaSetDevice(0);
UNIQUE_LOCK(mutex_,lk);
if (pkt.codec != codec_t::HEVC && pkt.codec != codec_t::H264) return false;
if (pkt.codec != codec_t::HEVC && pkt.codec != codec_t::H264 && pkt.codec != codec_t::HEVC_LOSSLESS && pkt.codec != codec_t::H264_LOSSLESS) return false;
bool is_float_frame = out.type() == CV_32F;
// Is the previous decoder still valid for current resolution and type?
if (nv_decoder_ != nullptr && (last_definition_ != pkt.definition || is_float_channel_ != is_float_frame)) {
if (nv_decoder_ != nullptr && (last_definition_ != pkt.definition || last_codec_ != pkt.codec || is_float_channel_ != is_float_frame)) {
NvPipe_Destroy(nv_decoder_);
nv_decoder_ = nullptr;
}
is_float_channel_ = is_float_frame;
last_definition_ = pkt.definition;
last_codec_ = pkt.codec;
bool islossless = ((pkt.codec == ftl::codecs::codec_t::HEVC || pkt.codec == ftl::codecs::codec_t::H264) && is_float_frame && !(pkt.flags & 0x2)) || pkt.codec == ftl::codecs::codec_t::HEVC_LOSSLESS || pkt.codec == ftl::codecs::codec_t::H264_LOSSLESS;
//LOG(INFO) << "DECODE OUT: " << out.rows << ", " << out.type();
//LOG(INFO) << "DECODE RESOLUTION: (" << (int)pkt.definition << ") " << ftl::codecs::getWidth(pkt.definition) << "x" << ftl::codecs::getHeight(pkt.definition);
......@@ -43,8 +48,8 @@ bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out
// Build a decoder instance of the correct kind
if (nv_decoder_ == nullptr) {
nv_decoder_ = NvPipe_CreateDecoder(
(is_float_frame) ? NVPIPE_UINT16 : NVPIPE_RGBA32,
(pkt.codec == codec_t::HEVC) ? NVPIPE_HEVC : NVPIPE_H264,
(is_float_frame) ? (islossless) ? NVPIPE_UINT16 : NVPIPE_YUV64 : NVPIPE_RGBA32,
(pkt.codec == codec_t::HEVC || pkt.codec == ftl::codecs::codec_t::HEVC_LOSSLESS) ? NVPIPE_HEVC : NVPIPE_H264,
ftl::codecs::getWidth(pkt.definition),
ftl::codecs::getHeight(pkt.definition));
if (!nv_decoder_) {
......@@ -55,14 +60,13 @@ bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out
seen_iframe_ = false;
}
// TODO: (Nick) Move to member variable to prevent re-creation
tmp_.create(cv::Size(ftl::codecs::getWidth(pkt.definition),ftl::codecs::getHeight(pkt.definition)), (is_float_frame) ? CV_16U : CV_8UC4);
tmp_.create(cv::Size(ftl::codecs::getWidth(pkt.definition),ftl::codecs::getHeight(pkt.definition)), (!is_float_frame) ? CV_8UC4 : (islossless) ? CV_16U : CV_16UC4);
// Check for an I-Frame
if (!seen_iframe_) {
if (pkt.codec == ftl::codecs::codec_t::HEVC) {
if (pkt.codec == ftl::codecs::codec_t::HEVC || pkt.codec == ftl::codecs::codec_t::HEVC_LOSSLESS) {
if (ftl::codecs::hevc::isIFrame(pkt.data)) seen_iframe_ = true;
} else if (pkt.codec == ftl::codecs::codec_t::H264) {
} else if (pkt.codec == ftl::codecs::codec_t::H264 || pkt.codec == ftl::codecs::codec_t::H264_LOSSLESS) {
if (ftl::codecs::h264::isIFrame(pkt.data)) seen_iframe_ = true;
}
}
......@@ -82,7 +86,21 @@ bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out
if (is_float_frame) {
// Is the received frame the same size as requested output?
//if (out.rows == ftl::codecs::getHeight(pkt.definition)) {
tmp_.convertTo(out, CV_32FC1, 1.0f/1000.0f, stream_);
//tmp_.convertTo(out, CV_32FC1, 1.0f/1000.0f, stream_);
if (!islossless) {
//cv::cuda::cvtColor(tmp_, tmp_, cv::COLOR_RGB2YUV, 4, stream_);
/*cv::Mat tmpHost;
tmp_.download(tmpHost);
cv::imshow("DEPTH", tmpHost);
cv::waitKey(1);*/
ftl::cuda::vuya_to_depth(out, tmp_, 16.0f, stream_);
} else {
tmp_.convertTo(out, CV_32FC1, 1.0f/1000.0f, stream_);
}
/*} else {
LOG(WARNING) << "Resizing decoded frame from " << tmp_.size() << " to " << out.size();
// FIXME: This won't work on GPU
......@@ -117,5 +135,5 @@ bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out
}
bool NvPipeDecoder::accepts(const ftl::codecs::Packet &pkt) {
return pkt.codec == codec_t::HEVC || pkt.codec == codec_t::H264;
return pkt.codec == codec_t::HEVC || pkt.codec == codec_t::H264 || pkt.codec == codec_t::H264_LOSSLESS || pkt.codec == codec_t::HEVC_LOSSLESS;
}
......@@ -6,6 +6,8 @@
#include <opencv2/core/cuda/common.hpp>
#include "depth_convert_cuda.hpp"
using ftl::codecs::NvPipeEncoder;
using ftl::codecs::bitrate_t;
using ftl::codecs::codec_t;
......@@ -21,6 +23,7 @@ NvPipeEncoder::NvPipeEncoder(definition_t maxdef,
is_float_channel_ = false;
was_reset_ = false;
preference_ = codec_t::Any;
current_codec_ = codec_t::HEVC;
}
NvPipeEncoder::~NvPipeEncoder() {
......@@ -33,6 +36,8 @@ void NvPipeEncoder::reset() {
bool NvPipeEncoder::supports(ftl::codecs::codec_t codec) {
switch (codec) {
case codec_t::H264_LOSSLESS:
case codec_t::HEVC_LOSSLESS:
case codec_t::H264:
case codec_t::HEVC: preference_ = codec; return true;
default: return false;
......@@ -52,6 +57,10 @@ definition_t NvPipeEncoder::_verifiedDefinition(definition_t def, const cv::cuda
return def;
}
static bool isLossy(codec_t c) {
return !(c == codec_t::HEVC_LOSSLESS || c == codec_t::H264_LOSSLESS);
}
bool NvPipeEncoder::encode(const cv::cuda::GpuMat &in, definition_t odefinition, bitrate_t bitrate, const std::function<void(const ftl::codecs::Packet&)> &cb) {
cudaSetDevice(0);
auto definition = odefinition; //_verifiedDefinition(odefinition, in);
......@@ -83,13 +92,22 @@ bool NvPipeEncoder::encode(const cv::cuda::GpuMat &in, definition_t odefinition,
LOG(ERROR) << "Missing data for Nvidia encoder";
return false;
}
if (preference_ == codec_t::Any) preference_ = codec_t::HEVC;
if (!_createEncoder(tmp, definition, bitrate)) return false;
//LOG(INFO) << "NvPipe Encode: " << int(definition) << " " << in.cols;
//cv::Mat tmp;
if (tmp.type() == CV_32F) {
tmp.convertTo(tmp2_, CV_16UC1, 1000, stream_);
if (isLossy(preference_)) {
// Use special encoding transform
tmp2_.create(tmp.size(), CV_8UC4);
ftl::cuda::depth_to_vuya(tmp, tmp2_, 16.0f, stream_);
} else {
tmp.convertTo(tmp2_, CV_16UC1, 1000, stream_);
}
} else if (tmp.type() == CV_8UC3) {
cv::cuda::cvtColor(tmp, tmp2_, cv::COLOR_BGR2RGBA, 0, stream_);
} else if (tmp.type() == CV_8UC4) {
......@@ -103,11 +121,11 @@ bool NvPipeEncoder::encode(const cv::cuda::GpuMat &in, definition_t odefinition,
stream_.waitForCompletion();
Packet pkt;
pkt.codec = (preference_ == codec_t::Any) ? codec_t::HEVC : preference_;
pkt.codec = preference_;
pkt.definition = definition;
pkt.block_total = 1;
pkt.block_number = 0;
pkt.flags = NvPipeEncoder::kFlagRGB;
pkt.flags = NvPipeEncoder::kFlagRGB | NvPipeEncoder::kFlagMappedDepth;
pkt.data.resize(ftl::codecs::kVideoBufferSize);
uint64_t cs = NvPipe_Encode(
......@@ -134,7 +152,7 @@ bool NvPipeEncoder::encode(const cv::cuda::GpuMat &in, definition_t odefinition,
bool NvPipeEncoder::_encoderMatch(const cv::cuda::GpuMat &in, definition_t def) {
return ((in.type() == CV_32F && is_float_channel_) ||
((in.type() == CV_8UC3 || in.type() == CV_8UC4) && !is_float_channel_)) && current_definition_ == def;
((in.type() == CV_8UC3 || in.type() == CV_8UC4) && !is_float_channel_)) && current_definition_ == def && current_codec_ == preference_;
}
static uint64_t calculateBitrate(definition_t def, bitrate_t rate) {
......@@ -163,19 +181,42 @@ static uint64_t calculateBitrate(definition_t def, bitrate_t rate) {
bool NvPipeEncoder::_createEncoder(const cv::cuda::GpuMat &in, definition_t def, bitrate_t rate) {
if (_encoderMatch(in, def) && nvenc_) return true;
uint64_t bitrate = calculateBitrate(def, rate);
LOG(INFO) << "Calculated bitrate: " << bitrate;
if (in.type() == CV_32F) is_float_channel_ = true;
else is_float_channel_ = false;
current_definition_ = def;
current_codec_ = preference_;
uint64_t bitrate = calculateBitrate(def, rate);
if (is_float_channel_) bitrate *= 2.0f;
//LOG(INFO) << "Calculated bitrate: " << bitrate;
NvPipe_Codec codec;
NvPipe_Format format;
NvPipe_Compression compression;
if (is_float_channel_) {
if (isLossy(preference_)) {
format = NVPIPE_YUV32;
compression = NVPIPE_LOSSY_10BIT_420;
codec = (preference_ == codec_t::HEVC) ? NVPIPE_HEVC : NVPIPE_H264;
} else {
format = NVPIPE_UINT16;
compression = NVPIPE_LOSSLESS;
codec = (preference_ == codec_t::HEVC_LOSSLESS) ? NVPIPE_HEVC : NVPIPE_H264;
}
} else {
format = NVPIPE_RGBA32;
compression = NVPIPE_LOSSY;
codec = (preference_ == codec_t::HEVC || preference_ == codec_t::HEVC_LOSSLESS) ? NVPIPE_HEVC : NVPIPE_H264;
}
if (nvenc_) NvPipe_Destroy(nvenc_);
const int fps = 1000/ftl::timer::getInterval();
nvenc_ = NvPipe_CreateEncoder(
(is_float_channel_) ? NVPIPE_UINT16 : NVPIPE_RGBA32,
(preference_ == codec_t::Any || preference_ == codec_t::HEVC) ? NVPIPE_HEVC : NVPIPE_H264,
(is_float_channel_) ? NVPIPE_LOSSLESS : NVPIPE_LOSSY,
format,
codec,
compression,
bitrate,
fps, // FPS
ftl::codecs::getWidth(def), // Output Width
......
......@@ -22,6 +22,7 @@ add_executable(nvpipe_codec_unit
../src/encoder.cpp
../src/nvpipe_encoder.cpp
../src/nvpipe_decoder.cpp
../src/depth_convert.cu
./nvpipe_codec_unit.cpp
)
target_include_directories(nvpipe_codec_unit PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../include")
......
......@@ -149,6 +149,7 @@ class Streamer : public ftl::Configurable {
ftl::UUID time_peer_;
int64_t last_frame_;
int64_t frame_no_;
bool insert_iframes_;
encoder_t encode_mode_;
......
......@@ -136,12 +136,28 @@ Streamer::Streamer(nlohmann::json &config, Universe *net)
}
});
on("video_codec", [this](const ftl::config::Event &e) {
UNIQUE_LOCK(mutex_,ulk);
hq_codec_ = value("video_codec", ftl::codecs::codec_t::Any);
for (auto &s : sources_) {
if (s.second->hq_encoder_c1) ftl::codecs::free(s.second->hq_encoder_c1);
if (s.second->hq_encoder_c2) ftl::codecs::free(s.second->hq_encoder_c2);
s.second->hq_encoder_c1 = nullptr;
s.second->hq_encoder_c2 = nullptr;
}
});
on("lq_bitrate", [this](const ftl::config::Event &e) {
UNIQUE_LOCK(mutex_,ulk);
for (auto &s : sources_) {
s.second->lq_bitrate = value("lq_bitrate", ftl::codecs::kPresetWorst);
}
});
insert_iframes_ = value("insert_iframes", false);
on("insert_iframes", [this](const ftl::config::Event &e) {
insert_iframes_ = value("insert_iframes", false);
});
}
Streamer::~Streamer() {
......@@ -484,7 +500,7 @@ void Streamer::_process(ftl::rgbd::FrameSet &fs) {
if (enc) {
ftl::pool.push([this,&fs,enc,src,hasChan2,&cv,j,&chan2done](int id) {
// TODO: Stagger the reset between nodes... random phasing
if (fs.timestamp % (10*ftl::timer::getInterval()) == 0) enc->reset();
if (insert_iframes_ && fs.timestamp % (10*ftl::timer::getInterval()) == 0) enc->reset();
auto chan = fs.sources[j]->getChannel();
......@@ -527,7 +543,7 @@ void Streamer::_process(ftl::rgbd::FrameSet &fs) {
if (enc) {
// TODO: Stagger the reset between nodes... random phasing
if (fs.timestamp % (10*ftl::timer::getInterval()) == 0) enc->reset();
if (insert_iframes_ && fs.timestamp % (10*ftl::timer::getInterval()) == 0) enc->reset();
enc->encode(fs.frames[j].get<cv::cuda::GpuMat>(Channel::Colour), src->hq_bitrate, [this,src,hasChan2](const ftl::codecs::Packet &blk){
_transmitPacket(src, blk, Channel::Colour, hasChan2, Quality::High);
});
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment