Skip to content
Snippets Groups Projects
Commit 414b169e authored by Nicolas Pope's avatar Nicolas Pope
Browse files

Merge branch 'feature/92/secondgpu' into 'master'

Implements #92 Use of second GPU

Closes #92

See merge request nicolas.pope/ftl!224
parents 4028afaf 82e4aade
No related branches found
No related tags found
1 merge request!224Implements #92 Use of second GPU
Pipeline #18787 passed
...@@ -125,7 +125,25 @@ static ftl::rgbd::Generator *createFileGenerator(ftl::Configurable *root, const ...@@ -125,7 +125,25 @@ static ftl::rgbd::Generator *createFileGenerator(ftl::Configurable *root, const
return gen; return gen;
} }
static void threadSetCUDADevice() {
// Ensure all threads have correct cuda device
std::atomic<int> ijobs = 0;
for (int i=0; i<ftl::pool.size(); ++i) {
ftl::pool.push([&ijobs](int id) {
ftl::cuda::setDevice();
++ijobs;
while (ijobs < ftl::pool.size()) std::this_thread::sleep_for(std::chrono::milliseconds(10));
});
}
while (ijobs < ftl::pool.size()) std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
static void run(ftl::Configurable *root) { static void run(ftl::Configurable *root) {
// Use other GPU if available.
ftl::cuda::setDevice(ftl::cuda::deviceCount()-1);
threadSetCUDADevice();
Universe *net = ftl::create<Universe>(root, "net"); Universe *net = ftl::create<Universe>(root, "net");
ftl::ctrl::Master ctrl(root, net); ftl::ctrl::Master ctrl(root, net);
...@@ -188,6 +206,7 @@ static void run(ftl::Configurable *root) { ...@@ -188,6 +206,7 @@ static void run(ftl::Configurable *root) {
LOG(INFO) << "Found " << (max_stream+1) << " sources in " << path; LOG(INFO) << "Found " << (max_stream+1) << " sources in " << path;
auto *gen = createFileGenerator(root, path); auto *gen = createFileGenerator(root, path);
auto reconstr = ftl::create<ftl::Reconstruction>(root, std::string("recon")+std::to_string(i), std::to_string(i)); auto reconstr = ftl::create<ftl::Reconstruction>(root, std::string("recon")+std::to_string(i), std::to_string(i));
reconstr->setGenerator(gen); reconstr->setGenerator(gen);
reconstr->onFrameSet([sender,i](ftl::rgbd::FrameSet &fs) { reconstr->onFrameSet([sender,i](ftl::rgbd::FrameSet &fs) {
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <opencv2/opencv.hpp> #include <opencv2/opencv.hpp>
#include <opencv2/core/cuda.hpp> #include <opencv2/core/cuda.hpp>
#include <opencv2/core/cuda_stream_accessor.hpp>
#include <ftl/codecs/packet.hpp> #include <ftl/codecs/packet.hpp>
...@@ -32,17 +33,17 @@ void free(Decoder *&e); ...@@ -32,17 +33,17 @@ void free(Decoder *&e);
*/ */
class Decoder { class Decoder {
public: public:
Decoder() {}; Decoder() { cudaStreamCreate(&stream_); };
virtual ~Decoder() {}; virtual ~Decoder() { cudaStreamDestroy(stream_); };
virtual bool decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out)=0; virtual bool decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out)=0;
virtual bool accepts(const ftl::codecs::Packet &)=0; virtual bool accepts(const ftl::codecs::Packet &)=0;
cv::cuda::Stream &stream() { return stream_; } cudaStream_t stream() { return stream_; }
protected: protected:
cv::cuda::Stream stream_; cudaStream_t stream_;
}; };
} }
......
...@@ -44,7 +44,7 @@ bool NvPipeDecoder::_checkIFrame(ftl::codecs::codec_t codec, const unsigned char ...@@ -44,7 +44,7 @@ bool NvPipeDecoder::_checkIFrame(ftl::codecs::codec_t codec, const unsigned char
} }
bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out) { bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out) {
cudaSetDevice(0); //cudaSetDevice(0);
UNIQUE_LOCK(mutex_,lk); UNIQUE_LOCK(mutex_,lk);
if (pkt.codec != codec_t::HEVC && pkt.codec != codec_t::H264 && pkt.codec != codec_t::HEVC_LOSSLESS && pkt.codec != codec_t::H264_LOSSLESS) return false; if (pkt.codec != codec_t::HEVC && pkt.codec != codec_t::H264 && pkt.codec != codec_t::HEVC_LOSSLESS && pkt.codec != codec_t::H264_LOSSLESS) return false;
......
...@@ -93,7 +93,7 @@ static uint64_t calculateBitrate(definition_t def, float ratescale) { ...@@ -93,7 +93,7 @@ static uint64_t calculateBitrate(definition_t def, float ratescale) {
} }
bool NvPipeEncoder::encode(const cv::cuda::GpuMat &in, ftl::codecs::Packet &pkt) { bool NvPipeEncoder::encode(const cv::cuda::GpuMat &in, ftl::codecs::Packet &pkt) {
cudaSetDevice(0); //cudaSetDevice(0);
if (pkt.codec != codec_t::Any && !supports(pkt.codec)) { if (pkt.codec != codec_t::Any && !supports(pkt.codec)) {
pkt.codec = codec_t::Invalid; pkt.codec = codec_t::Invalid;
......
...@@ -28,6 +28,12 @@ bool hasCompute(int major, int minor); ...@@ -28,6 +28,12 @@ bool hasCompute(int major, int minor);
int deviceCount(); int deviceCount();
int getDevice();
void setDevice(int);
void setDevice();
template <typename T> template <typename T>
struct Float; struct Float;
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
using ftl::cuda::TextureObjectBase; using ftl::cuda::TextureObjectBase;
static int dev_to_use = 0;
static int dev_count = 0; static int dev_count = 0;
static std::vector<cudaDeviceProp> properties; static std::vector<cudaDeviceProp> properties;
...@@ -36,6 +37,19 @@ int ftl::cuda::deviceCount() { ...@@ -36,6 +37,19 @@ int ftl::cuda::deviceCount() {
return dev_count; return dev_count;
} }
int ftl::cuda::getDevice() {
return dev_to_use;
}
void ftl::cuda::setDevice(int id) {
dev_to_use = id;
ftl::cuda::setDevice();
}
void ftl::cuda::setDevice() {
cudaSafeCall(cudaSetDevice(dev_to_use));
}
TextureObjectBase::~TextureObjectBase() { TextureObjectBase::~TextureObjectBase() {
free(); free();
} }
......
...@@ -140,6 +140,11 @@ void Receiver::_processAudio(const StreamPacket &spkt, const Packet &pkt) { ...@@ -140,6 +140,11 @@ void Receiver::_processAudio(const StreamPacket &spkt, const Packet &pkt) {
} }
void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) { void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) {
//ftl::cuda::setDevice();
//int dev;
//cudaSafeCall(cudaGetDevice(&dev));
//LOG(INFO) << "Cuda device = " << dev;
const ftl::codecs::Channel rchan = spkt.channel; const ftl::codecs::Channel rchan = spkt.channel;
const unsigned int channum = (unsigned int)spkt.channel; const unsigned int channum = (unsigned int)spkt.channel;
InternalVideoStates &iframe = _getVideoFrame(spkt); InternalVideoStates &iframe = _getVideoFrame(spkt);
...@@ -179,6 +184,7 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) { ...@@ -179,6 +184,7 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) {
//LOG(INFO) << "Decode surface: " << (width*tx) << "x" << (height*ty); //LOG(INFO) << "Decode surface: " << (width*tx) << "x" << (height*ty);
auto &surface = iframe.surface[static_cast<int>(spkt.channel)]; auto &surface = iframe.surface[static_cast<int>(spkt.channel)];
surface.create(height*ty, width*tx, ((isFloatChannel(spkt.channel)) ? ((pkt.flags & 0x2) ? CV_16UC4 : CV_16U) : CV_8UC4)); surface.create(height*ty, width*tx, ((isFloatChannel(spkt.channel)) ? ((pkt.flags & 0x2) ? CV_16UC4 : CV_16U) : CV_8UC4));
// Do the actual decode // Do the actual decode
...@@ -200,6 +206,8 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) { ...@@ -200,6 +206,8 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) {
//return; //return;
} }
auto cvstream = cv::cuda::StreamAccessor::wrapStream(decoder->stream());
/*if (spkt.channel == Channel::Depth && (pkt.flags & 0x2)) { /*if (spkt.channel == Channel::Depth && (pkt.flags & 0x2)) {
cv::Mat tmp; cv::Mat tmp;
surface.download(tmp); surface.download(tmp);
...@@ -212,19 +220,19 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) { ...@@ -212,19 +220,19 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) {
cv::Rect roi((i % tx)*width, (i / tx)*height, width, height); cv::Rect roi((i % tx)*width, (i / tx)*height, width, height);
cv::cuda::GpuMat sroi = surface(roi); cv::cuda::GpuMat sroi = surface(roi);
// Do colour conversion // Do colour conversion
if (isFloatChannel(rchan) && (pkt.flags & 0x2)) { if (isFloatChannel(rchan) && (pkt.flags & 0x2)) {
//LOG(INFO) << "VUYA Convert"; //LOG(INFO) << "VUYA Convert";
ftl::cuda::vuya_to_depth(frame.frame.get<cv::cuda::GpuMat>(spkt.channel), sroi, 16.0f, decoder->stream()); ftl::cuda::vuya_to_depth(frame.frame.get<cv::cuda::GpuMat>(spkt.channel), sroi, 16.0f, cvstream);
} else if (isFloatChannel(rchan)) { } else if (isFloatChannel(rchan)) {
sroi.convertTo(frame.frame.get<cv::cuda::GpuMat>(spkt.channel), CV_32FC1, 1.0f/1000.0f, decoder->stream()); sroi.convertTo(frame.frame.get<cv::cuda::GpuMat>(spkt.channel), CV_32FC1, 1.0f/1000.0f, cvstream);
} else { } else {
cv::cuda::cvtColor(sroi, frame.frame.get<cv::cuda::GpuMat>(spkt.channel), cv::COLOR_RGBA2BGRA, 0, decoder->stream()); cv::cuda::cvtColor(sroi, frame.frame.get<cv::cuda::GpuMat>(spkt.channel), cv::COLOR_RGBA2BGRA, 0, cvstream);
} }
} }
decoder->stream().waitForCompletion(); cudaSafeCall(cudaStreamSynchronize(decoder->stream()));
for (int i=0; i<pkt.frame_count; ++i) { for (int i=0; i<pkt.frame_count; ++i) {
InternalVideoStates &frame = _getVideoFrame(spkt,i); InternalVideoStates &frame = _getVideoFrame(spkt,i);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment