#include <ftl/codecs/nvidia_encoder.hpp> //#include <loguru.hpp> #include <ftl/timer.hpp> #include <ftl/codecs/codecs.hpp> #include <ftl/cuda_util.hpp> #include <ftl/exception.hpp> #include <opencv2/core/cuda/common.hpp> #include <opencv2/core/cuda_stream_accessor.hpp> #include <opencv2/cudaimgproc.hpp> #include <ftl/codecs/depth_convert_cuda.hpp> #include "NvEncoder/NvEncoderCuda.h" using ftl::codecs::NvidiaEncoder; using ftl::codecs::bitrate_t; using ftl::codecs::codec_t; using ftl::codecs::definition_t; using ftl::codecs::format_t; using ftl::codecs::Packet; using ftl::codecs::kFlagFloat; using ftl::codecs::kFlagFlipRGB; using ftl::codecs::kFlagMappedDepth; static inline std::string EncErrorCodeToString(NVENCSTATUS code) { std::vector<std::string> errors = { "NV_ENC_SUCCESS", "NV_ENC_ERR_NO_ENCODE_DEVICE", "NV_ENC_ERR_UNSUPPORTED_DEVICE", "NV_ENC_ERR_INVALID_ENCODERDEVICE", "NV_ENC_ERR_INVALID_DEVICE", "NV_ENC_ERR_DEVICE_NOT_EXIST", "NV_ENC_ERR_INVALID_PTR", "NV_ENC_ERR_INVALID_EVENT", "NV_ENC_ERR_INVALID_PARAM", "NV_ENC_ERR_INVALID_CALL", "NV_ENC_ERR_OUT_OF_MEMORY", "NV_ENC_ERR_ENCODER_NOT_INITIALIZED", "NV_ENC_ERR_UNSUPPORTED_PARAM", "NV_ENC_ERR_LOCK_BUSY", "NV_ENC_ERR_NOT_ENOUGH_BUFFER", "NV_ENC_ERR_INVALID_VERSION", "NV_ENC_ERR_MAP_FAILED", "NV_ENC_ERR_NEED_MORE_INPUT", "NV_ENC_ERR_ENCODER_BUSY", "NV_ENC_ERR_EVENT_NOT_REGISTERD", "NV_ENC_ERR_GENERIC", "NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY", "NV_ENC_ERR_UNIMPLEMENTED", "NV_ENC_ERR_RESOURCE_REGISTER_FAILED", "NV_ENC_ERR_RESOURCE_NOT_REGISTERED", "NV_ENC_ERR_RESOURCE_NOT_MAPPED" }; if (code >= 0 && code < errors.size()) return errors[code]; return "Unknown error code"; } NvidiaEncoder::NvidiaEncoder(definition_t maxdef, definition_t mindef) : Encoder(maxdef, mindef, ftl::codecs::device_t::NVIDIA) { nvenc_ = nullptr; was_reset_ = false; } NvidiaEncoder::~NvidiaEncoder() { if (nvenc_) { std::vector<std::vector<uint8_t>> tmp; nvenc_->EndEncode(tmp); nvenc_->DestroyEncoder(); delete nvenc_; nvenc_ = nullptr; } } void NvidiaEncoder::reset() { was_reset_ = true; } bool NvidiaEncoder::supports(ftl::codecs::codec_t codec) { switch (codec) { case codec_t::H264_LOSSLESS: case codec_t::HEVC_LOSSLESS: case codec_t::H264: case codec_t::HEVC: return true; default: return false; } } /* * Create encoder params structure from packet and surface. Changes to these * require a recreation of the encoder. */ static ftl::codecs::NvidiaEncoder::Parameters generateParams(const cv::cuda::GpuMat &in, const ftl::codecs::Packet &pkt) { ftl::codecs::NvidiaEncoder::Parameters params; params.bitrate = pkt.bitrate; params.codec = pkt.codec; params.width = in.cols; params.height = in.rows; params.is_float = in.type() == CV_32F; return params; } static uint64_t calculateBitrate(int64_t pixels, float ratescale) { /*float bitrate = 1.0f; // Megabits switch (def) { case definition_t::UHD4k : bitrate = 40.0f; break; case definition_t::HTC_VIVE : bitrate = 32.0f; break; case definition_t::HD1080 : bitrate = 12.0f; break; case definition_t::HD720 : bitrate = 8.0f; break; case definition_t::SD576 : case definition_t::SD480 : bitrate = 4.0f; break; case definition_t::LD360 : bitrate = 2.0f; break; default : bitrate = 16.0f; }*/ float bitrate = 16.0f * float(pixels); //bitrate *= 1000.0f*1000.0f; float minrate = 0.05f * bitrate; return uint64_t((bitrate - minrate)*ratescale + minrate); } /* * Check that codec configuration and surface data are all valid. */ static bool validate(const cv::cuda::GpuMat &in, ftl::codecs::Packet &pkt) { if (in.type() == CV_32F) pkt.flags |= kFlagFloat; else pkt.flags |= kFlagFlipRGB; // Remove unwanted flags if (in.type() == CV_32F && (pkt.flags & kFlagFlipRGB)) pkt.flags &= ~kFlagFlipRGB; if (in.type() == CV_8UC4 && (pkt.flags & kFlagFloat)) pkt.flags &= ~kFlagFloat; if (pkt.codec == codec_t::HEVC_LOSSLESS && (pkt.flags & kFlagMappedDepth)) pkt.flags &= ~kFlagMappedDepth; if (pkt.codec == codec_t::Any) pkt.codec = codec_t::HEVC; // Correct for mising flag if (pkt.codec == codec_t::HEVC && in.type() == CV_32F) { pkt.flags |= ftl::codecs::kFlagMappedDepth; } if (pkt.codec == codec_t::H264 && in.type() == CV_32F) { //LOG(ERROR) << "Lossy compression not supported with H264 currently"; return false; } if (pkt.frame_count == 0) { return false; } if (in.empty()) { //LOG(WARNING) << "No data"; return false; } if (in.type() != CV_32F && in.type() != CV_8UC4) { //LOG(ERROR) << "Input type does not match given format"; pkt.flags = 0; return false; } return true; } bool NvidiaEncoder::encode(const cv::cuda::GpuMat &in, ftl::codecs::Packet &pkt) { //cudaSetDevice(0); if (pkt.codec != codec_t::Any && !supports(pkt.codec)) { pkt.codec = codec_t::Invalid; return false; } if (!validate(in, pkt)) return false; if (!_createEncoder(in, pkt)) return false; const NvEncInputFrame* f = nvenc_->GetNextInputFrame(); if (!params_.is_float) { cv::cuda::GpuMat surface(nvenc_->GetEncodeHeight(), nvenc_->GetEncodeWidth(), CV_8UC4, f->inputPtr, f->pitch); cv::cuda::cvtColor(in, surface, cv::COLOR_BGRA2RGBA, 0, stream_); } else if (params_.isLossy()) { ftl::cuda::depth_to_nv12_10(in, (ushort*)f->inputPtr, (ushort*)(((uchar*)f->inputPtr)+(nvenc_->GetEncodeHeight()*f->pitch)), f->pitch/2, 16.0f, stream_); } else { ftl::cuda::float_to_nv12_16bit((float*)in.data, static_cast<uint32_t>(in.step1()), (uchar*)f->inputPtr, f->pitch, nvenc_->GetEncodeWidth()/2, nvenc_->GetEncodeHeight(), cv::cuda::StreamAccessor::getStream(stream_)); } // TODO: Use page locked memory? pkt.data.resize(ftl::codecs::kVideoBufferSize); // Make sure conversions complete... stream_.waitForCompletion(); uint64_t cs = _encode(pkt.data.data(), pkt.data.size(), was_reset_); pkt.data.resize(cs); was_reset_ = false; if (cs == 0 || cs >= ftl::codecs::kVideoBufferSize) { //LOG(ERROR) << "Could not encode video frame"; return false; } else { return true; } } bool NvidiaEncoder::_createEncoder(const cv::cuda::GpuMat &in, const ftl::codecs::Packet &pkt) { Parameters params = generateParams(in, pkt); if (nvenc_ && (params == params_)) return true; uint64_t bitrate = calculateBitrate(in.cols*in.rows, float(pkt.bitrate)/255.0f) * pkt.frame_count; //LOG(INFO) << "Calculated bitrate " << ((params.is_float) ? "(float)" : "(rgb)") << ": " << bitrate; params_ = params; const int fps = 1000/ftl::timer::getInterval(); bool ish264 = pkt.codec == codec_t::H264 || pkt.codec == codec_t::H264_LOSSLESS; bool ishevc = !ish264; // Ensure we have a CUDA context cudaSafeCall(cudaDeviceSynchronize()); CUcontext cudaContext; cuCtxGetCurrent(&cudaContext); if (nvenc_) { //LOG(INFO) << "Destroying old NVENC encoder"; std::vector<std::vector<uint8_t>> tmp; nvenc_->EndEncode(tmp); nvenc_->DestroyEncoder(); delete nvenc_; nvenc_ = nullptr; } // Create encoder try { NV_ENC_BUFFER_FORMAT bufferFormat; if (!params.is_float) bufferFormat = NV_ENC_BUFFER_FORMAT_ABGR; else if (!params.isLossy()) bufferFormat = NV_ENC_BUFFER_FORMAT_NV12; else bufferFormat = NV_ENC_BUFFER_FORMAT_YUV420_10BIT; nvenc_ = new NvEncoderCuda(cudaContext, params_.encodeWidth(), params_.encodeHeight(), bufferFormat, 0); NV_ENC_INITIALIZE_PARAMS initializeParams = { NV_ENC_INITIALIZE_PARAMS_VER }; NV_ENC_CONFIG encodeConfig = { NV_ENC_CONFIG_VER }; initializeParams.encodeConfig = &encodeConfig; GUID codecGUID = (ishevc) ? NV_ENC_CODEC_HEVC_GUID : NV_ENC_CODEC_H264_GUID; GUID presetGUID = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID; if (!params.isLossy()) presetGUID = NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID; // NV_ENC_PRESET_LOSSLESS_HP_GUID nvenc_->CreateDefaultEncoderParams(&initializeParams, codecGUID, presetGUID); initializeParams.encodeWidth = params.encodeWidth(); initializeParams.encodeHeight = params.encodeHeight(); initializeParams.frameRateNum = fps; initializeParams.frameRateDen = 1; initializeParams.enablePTD = 1; encodeConfig.gopLength = NVENC_INFINITE_GOPLENGTH; // No B-frames encodeConfig.frameIntervalP = 1; if (ish264) encodeConfig.encodeCodecConfig.h264Config.idrPeriod = NVENC_INFINITE_GOPLENGTH; else { encodeConfig.encodeCodecConfig.hevcConfig.idrPeriod = NVENC_INFINITE_GOPLENGTH; if (params.is_float && params.isLossy()) { encodeConfig.encodeCodecConfig.hevcConfig.pixelBitDepthMinus8 = 2; // For 10-bit colour } //if (this->compression == NVPIPE_LOSSY_10BIT_444 || this->compression == NVPIPE_LOSSY_8BIT_444) { // encodeConfig.encodeCodecConfig.hevcConfig.chromaFormatIDC = 3; // For Yuv444 (1 for 420) //} } if (params.isLossy()) { encodeConfig.rcParams.averageBitRate = static_cast<uint32_t>(bitrate); encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ; encodeConfig.rcParams.vbvBufferSize = encodeConfig.rcParams.averageBitRate * initializeParams.frameRateDen / initializeParams.frameRateNum; // bitrate / framerate = one frame encodeConfig.rcParams.maxBitRate = encodeConfig.rcParams.averageBitRate; encodeConfig.rcParams.vbvInitialDelay = encodeConfig.rcParams.vbvBufferSize; } nvenc_->CreateEncoder(&initializeParams); } catch (NVENCException& e) { throw FTL_Error("Failed to create encoder (" << e.getErrorString() << ", error " + std::to_string(e.getErrorCode()) << " = " + EncErrorCodeToString(e.getErrorCode()) << ")"); } if (!nvenc_) { //LOG(ERROR) << "Could not create video encoder"; return false; } else { //LOG(INFO) << "NVENC encoder created"; //nvenc_->SetIOCudaStreams(cv::cuda::StreamAccessor::getStream(stream_), cv::cuda::StreamAccessor::getStream(stream_)); return true; } } uint64_t NvidiaEncoder::_encode(uint8_t* dst, uint64_t dstSize, bool forceIFrame) { std::vector<std::vector<uint8_t>> packets; try { if (forceIFrame) { NV_ENC_PIC_PARAMS params = {}; params.encodePicFlags = NV_ENC_PIC_FLAG_FORCEIDR | NV_ENC_PIC_FLAG_OUTPUT_SPSPPS; nvenc_->EncodeFrame(packets, ¶ms); } else { nvenc_->EncodeFrame(packets); } } catch (NVENCException& e) { throw FTL_Error("Encode failed (" << e.getErrorString() << ", error " << std::to_string(e.getErrorCode()) << " = " << EncErrorCodeToString(e.getErrorCode()) << ")"); } // Copy output uint64_t size = 0; for (auto& p : packets) { if (size + p.size() <= dstSize) { memcpy(dst + size, p.data(), p.size()); size += p.size(); } else { throw FTL_Error("Encode output buffer overflow"); } } return size; }