diff --git a/components/codecs/include/ftl/codecs/decoder.hpp b/components/codecs/include/ftl/codecs/decoder.hpp
index e6883680f34085f8b39fbcd02959b19ea4a2ca9e..7684990e204a1231af0cd8c114eabbf4ef9290d5 100644
--- a/components/codecs/include/ftl/codecs/decoder.hpp
+++ b/components/codecs/include/ftl/codecs/decoder.hpp
@@ -35,7 +35,7 @@ class Decoder {
 	Decoder() {};
 	virtual ~Decoder() {};
 
-	virtual bool decode(const ftl::codecs::Packet &pkt, cv::Mat &out)=0;
+	virtual bool decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out)=0;
 
 	virtual bool accepts(const ftl::codecs::Packet &)=0;
 };
diff --git a/components/codecs/include/ftl/codecs/nvpipe_decoder.hpp b/components/codecs/include/ftl/codecs/nvpipe_decoder.hpp
index 75807f05ee45c66d7a8b231c5d755190754f63df..a6ae2e36e41f40415d3f24daa4cc4029bf07a645 100644
--- a/components/codecs/include/ftl/codecs/nvpipe_decoder.hpp
+++ b/components/codecs/include/ftl/codecs/nvpipe_decoder.hpp
@@ -14,7 +14,7 @@ class NvPipeDecoder : public ftl::codecs::Decoder {
 	NvPipeDecoder();
 	~NvPipeDecoder();
 
-	bool decode(const ftl::codecs::Packet &pkt, cv::Mat &out);
+	bool decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out) override;
 
 	bool accepts(const ftl::codecs::Packet &pkt);
 
diff --git a/components/codecs/include/ftl/codecs/opencv_decoder.hpp b/components/codecs/include/ftl/codecs/opencv_decoder.hpp
index c4ef5ab057b1e60da12f36fd603642ea16026888..0f085b90b9fdca3bc899aee2899c40e423bb1086 100644
--- a/components/codecs/include/ftl/codecs/opencv_decoder.hpp
+++ b/components/codecs/include/ftl/codecs/opencv_decoder.hpp
@@ -11,7 +11,7 @@ class OpenCVDecoder : public ftl::codecs::Decoder {
 	OpenCVDecoder();
 	~OpenCVDecoder();
 
-	bool decode(const ftl::codecs::Packet &pkt, cv::Mat &out);
+	bool decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out) override;
 
 	bool accepts(const ftl::codecs::Packet &pkt);
 };
diff --git a/components/codecs/src/nvpipe_decoder.cpp b/components/codecs/src/nvpipe_decoder.cpp
index 4c9f515066870f4519b7148c6acc31e32cb84b11..54540be1cd72e7a53c88b8faae8f9754b45783e9 100644
--- a/components/codecs/src/nvpipe_decoder.cpp
+++ b/components/codecs/src/nvpipe_decoder.cpp
@@ -22,7 +22,7 @@ NvPipeDecoder::~NvPipeDecoder() {
 	}
 }
 
-bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::Mat &out) {
+bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out) {
 	cudaSetDevice(0);
 	UNIQUE_LOCK(mutex_,lk);
 	if (pkt.codec != codec_t::HEVC && pkt.codec != codec_t::H264) return false;
@@ -57,7 +57,7 @@ bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::Mat &out) {
 	}
 	
 	// TODO: (Nick) Move to member variable to prevent re-creation
-	cv::Mat tmp(cv::Size(ftl::codecs::getWidth(pkt.definition),ftl::codecs::getHeight(pkt.definition)), (is_float_frame) ? CV_16U : CV_8UC4);
+	tmp_.create(cv::Size(ftl::codecs::getWidth(pkt.definition),ftl::codecs::getHeight(pkt.definition)), (is_float_frame) ? CV_16U : CV_8UC4);
 
 	// Check for an I-Frame
 	if (pkt.codec == ftl::codecs::codec_t::HEVC) {
@@ -78,8 +78,9 @@ bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::Mat &out) {
 			tmp.convertTo(out, CV_32FC1, 1.0f/1000.0f);
 		} else {
 			LOG(WARNING) << "Resizing decoded frame from " << tmp.size() << " to " << out.size();
+			// FIXME: This won't work on GPU
 			tmp.convertTo(tmp, CV_32FC1, 1.0f/1000.0f);
-			cv::resize(tmp, out, out.size(), 0, 0, cv::INTER_NEAREST);
+			cv::cuda::resize(tmp, out, out.size(), 0, 0, cv::INTER_NEAREST);
 		}
 	} else {
 		// Is the received frame the same size as requested output?
@@ -92,13 +93,14 @@ bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::Mat &out) {
 			}
 		} else {
 			LOG(WARNING) << "Resizing decoded frame from " << tmp.size() << " to " << out.size();
+			// FIXME: This won't work on GPU, plus it allocates extra memory...
 			// Flag 0x1 means frame is in RGB so needs conversion to BGR
 			if (pkt.flags & 0x1) {
-				cv::cvtColor(tmp, tmp, cv::COLOR_RGBA2BGR);
+				cv::cuda::cvtColor(tmp, tmp, cv::COLOR_RGBA2BGR);
 			} else {
-				cv::cvtColor(tmp, tmp, cv::COLOR_BGRA2BGR);
+				cv::cuda::cvtColor(tmp, tmp, cv::COLOR_BGRA2BGR);
 			}
-			cv::resize(tmp, out, out.size());
+			cv::cuda::resize(tmp, out, out.size());
 		}
 	}
 
diff --git a/components/rgbd-sources/include/ftl/rgbd/detail/netframe.hpp b/components/rgbd-sources/include/ftl/rgbd/detail/netframe.hpp
index eb9c64b99a03c66f331381a595d308f1345fa100..995848ff01fdcd87b1e0d1e01e0b9cde61e267f3 100644
--- a/components/rgbd-sources/include/ftl/rgbd/detail/netframe.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/detail/netframe.hpp
@@ -14,7 +14,7 @@ namespace detail {
  * Also maintains statistics about the frame transmission for later analysis.
  */
 struct NetFrame {
-	cv::Mat channel[2];
+	cv::cuda::GpuMat channel[2];
 	volatile int64_t timestamp;
 	std::atomic<int> chunk_count[2];
 	std::atomic<int> channel_count;
diff --git a/components/rgbd-sources/include/ftl/rgbd/source.hpp b/components/rgbd-sources/include/ftl/rgbd/source.hpp
index 6892e9cc7e77a52ede955d6fc7a398af13da790f..7484788796dba398c525a208ad44708deadf3b70 100644
--- a/components/rgbd-sources/include/ftl/rgbd/source.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/source.hpp
@@ -174,14 +174,14 @@ class Source : public ftl::Configurable {
 
 	SHARED_MUTEX &mutex() { return mutex_; }
 
-	std::function<void(int64_t, cv::Mat &, cv::Mat &)> &callback() { return callback_; }
+	std::function<void(int64_t, cv::cuda::GpuMat &, cv::cuda::GpuMat &)> &callback() { return callback_; }
 
 	/**
 	 * Set the callback that receives decoded frames as they are generated.
 	 * There can be only a single such callback as the buffers can be swapped
 	 * by the callback.
 	 */
-	void setCallback(std::function<void(int64_t, cv::Mat &, cv::Mat &)> cb);
+	void setCallback(std::function<void(int64_t, cv::cuda::GpuMat &, cv::cuda::GpuMat &)> cb);
 	void removeCallback() { callback_ = nullptr; }
 
 	/**
@@ -205,7 +205,7 @@ class Source : public ftl::Configurable {
 	 * Notify of a decoded or available pair of frames. This calls the source
 	 * callback after having verified the correct resolution of the frames.
 	 */
-	void notify(int64_t ts, cv::Mat &c1, cv::Mat &c2);
+	void notify(int64_t ts, cv::cuda::GpuMat &c1, cv::cuda::GpuMat &c2);
 
 	// ==== Inject Data into stream ============================================
 
@@ -225,7 +225,7 @@ class Source : public ftl::Configurable {
 	SHARED_MUTEX mutex_;
 	ftl::codecs::Channel channel_;
 	cudaStream_t stream_;
-	std::function<void(int64_t, cv::Mat &, cv::Mat &)> callback_;
+	std::function<void(int64_t, cv::cuda::GpuMat &, cv::cuda::GpuMat &)> callback_;
 	std::list<std::function<void(ftl::rgbd::Source*, const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet &pkt)>> rawcallbacks_;
 
 	detail::Source *_createImplementation();
diff --git a/components/rgbd-sources/src/group.cpp b/components/rgbd-sources/src/group.cpp
index 4e178c0245e815da6b836fbd2de44df4dcc84e1b..1eae2837a31746df611c324882699ae7a04b20f7 100644
--- a/components/rgbd-sources/src/group.cpp
+++ b/components/rgbd-sources/src/group.cpp
@@ -50,7 +50,7 @@ void Group::addSource(ftl::rgbd::Source *src) {
 	size_t ix = sources_.size();
 	sources_.push_back(src);
 
-	src->setCallback([this,ix,src](int64_t timestamp, cv::Mat &rgb, cv::Mat &depth) {
+	src->setCallback([this,ix,src](int64_t timestamp, cv::cuda::GpuMat &rgb, cv::cuda::GpuMat &depth) {
 		if (timestamp == 0) return;
 
 		auto chan = src->getChannel();
@@ -78,13 +78,13 @@ void Group::addSource(ftl::rgbd::Source *src) {
 				// Ensure channels match source mat format
 				//fs.channel1[ix].create(rgb.size(), rgb.type());
 				//fs.channel2[ix].create(depth.size(), depth.type());
-				fs.frames[ix].create<cv::Mat>(Channel::Colour, Format<uchar3>(rgb.size())); //.create(rgb.size(), rgb.type());
-				if (chan != Channel::None) fs.frames[ix].create<cv::Mat>(chan, ftl::rgbd::FormatBase(depth.cols, depth.rows, depth.type())); //.create(depth.size(), depth.type());
+				fs.frames[ix].create<cv::cuda::GpuMat>(Channel::Colour, Format<uchar3>(rgb.size())); //.create(rgb.size(), rgb.type());
+				if (chan != Channel::None) fs.frames[ix].create<cv::cuda::Mat>(chan, ftl::rgbd::FormatBase(depth.cols, depth.rows, depth.type())); //.create(depth.size(), depth.type());
 
 				//cv::swap(rgb, fs.channel1[ix]);
 				//cv::swap(depth, fs.channel2[ix]);
-				cv::swap(rgb, fs.frames[ix].get<cv::Mat>(Channel::Colour));
-				if (chan != Channel::None) cv::swap(depth, fs.frames[ix].get<cv::Mat>(chan));
+				cv::cuda::swap(rgb, fs.frames[ix].get<cv::cuda::GpuMat>(Channel::Colour));
+				if (chan != Channel::None) cv::cuda::swap(depth, fs.frames[ix].get<cv::cuda::GpuMat>(chan));
 
 				++fs.count;
 				fs.mask |= (1 << ix);
diff --git a/components/rgbd-sources/src/source.cpp b/components/rgbd-sources/src/source.cpp
index 52dae351ed27d134119cfbd3bc572c4ae819f355..089ec65686438144a1aec5b3b314eb259005254b 100644
--- a/components/rgbd-sources/src/source.cpp
+++ b/components/rgbd-sources/src/source.cpp
@@ -247,7 +247,7 @@ const ftl::rgbd::Camera Source::parameters(ftl::codecs::Channel chan) const {
 	return (impl_) ? impl_->parameters(chan) : parameters();
 }
 
-void Source::setCallback(std::function<void(int64_t, cv::Mat &, cv::Mat &)> cb) {
+void Source::setCallback(std::function<void(int64_t, cv::cuda::GpuMat &, cv::cuda::GpuMat &)> cb) {
 	if (bool(callback_)) LOG(ERROR) << "Source already has a callback: " << getURI();
 	callback_ = cb;
 }
@@ -297,7 +297,7 @@ static Camera scaled(Camera &cam, int width, int height) {
 	return newcam;
 }
 
-void Source::notify(int64_t ts, cv::Mat &c1, cv::Mat &c2) {
+void Source::notify(int64_t ts, cv::cuda::GpuMat &c1, cv::cuda::GpuMat &c2) {
 	// Ensure correct scaling of images and parameters.
 	int max_width = max(impl_->params_.width, max(c1.cols, c2.cols));
 	int max_height = max(impl_->params_.height, max(c1.rows, c2.rows));
@@ -309,15 +309,15 @@ void Source::notify(int64_t ts, cv::Mat &c1, cv::Mat &c2) {
 
 	// Should channel 1 be scaled?
 	if (c1.cols < max_width || c1.rows < max_height) {
-		cv::resize(c1, c1, cv::Size(max_width, max_height));
+		cv::cuda::resize(c1, c1, cv::Size(max_width, max_height));
 	}
 
 	// Should channel 2 be scaled?
 	if (c2.cols < max_width || c2.rows < max_height) {
 		if (c2.type() == CV_32F) {
-			cv::resize(c2, c2, cv::Size(max_width, max_height), 0.0, 0.0, cv::INTER_NEAREST);
+			cv::cuda::resize(c2, c2, cv::Size(max_width, max_height), 0.0, 0.0, cv::INTER_NEAREST);
 		} else {
-			cv::resize(c2, c2, cv::Size(max_width, max_height));
+			cv::cuda::resize(c2, c2, cv::Size(max_width, max_height));
 		}
 	}