diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index bbaae1509de10793bf8e6cea527ec92ef0622021..28e09eeb330a07a43e139e475f0350fcc6cfac9f 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -32,7 +32,7 @@ linux:
 windows:
   stage: all
   variables:
-    CMAKE_ARGS: '-DWITH_PCL=FALSE -DCMAKE_GENERATOR_PLATFORM=x64 -DNVPIPE_DIR="D:/Build/NvPipe" -DEigen3_DIR="C:/Program Files (x86)/Eigen3/share/eigen3/cmake" -DOpenCV_DIR="D:/Build/opencv-4.1.1" -DCUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1"'
+    CMAKE_ARGS: '-DWITH_OPTFLOW=TRUE -DWITH_PCL=FALSE -DCMAKE_GENERATOR_PLATFORM=x64 -DNVPIPE_DIR="D:/Build/NvPipe" -DEigen3_DIR="C:/Program Files (x86)/Eigen3/share/eigen3/cmake" -DOpenCV_DIR="D:/Build/opencv-4.1.1" -DCUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1"'
     DEPLOY_DIR: 'D:/Shared/AutoDeploy'
   tags:
     - win
diff --git a/components/rgbd-sources/CMakeLists.txt b/components/rgbd-sources/CMakeLists.txt
index 5d2008bf0d85ef6867c7b1580aa98cc714884a49..ff7d290981c1a84c5ee211219ae0651f85c58c77 100644
--- a/components/rgbd-sources/CMakeLists.txt
+++ b/components/rgbd-sources/CMakeLists.txt
@@ -3,6 +3,7 @@ set(RGBDSRC
 	src/local.cpp
 	src/disparity.cpp
 	src/source.cpp
+	src/frame.cpp
 	src/stereovideo.cpp
 	src/middlebury_source.cpp
 	src/net.cpp
diff --git a/components/rgbd-sources/include/ftl/rgbd/detail/source.hpp b/components/rgbd-sources/include/ftl/rgbd/detail/source.hpp
index 25e19e39641b1643eb9ba8cdc3240a4a719834b3..29edf722fe4a3eaac36c76f7c861797f8ff71a49 100644
--- a/components/rgbd-sources/include/ftl/rgbd/detail/source.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/detail/source.hpp
@@ -4,32 +4,13 @@
 #include <Eigen/Eigen>
 #include <opencv2/opencv.hpp>
 #include <ftl/rgbd/camera.hpp>
+#include <ftl/rgbd/frame.hpp>
 
 namespace ftl{
 namespace rgbd {
 
 class Source;
 
-typedef unsigned int channel_t;
-
-static const channel_t kChanNone = 0;
-static const channel_t kChanLeft = 0x0001;
-static const channel_t kChanDepth = 0x0002;
-static const channel_t kChanRight = 0x0004;
-static const channel_t kChanDisparity = 0x0008;
-static const channel_t kChanDeviation = 0x0010;
-static const channel_t kChanNormals = 0x0020;
-static const channel_t kChanConfidence = 0x0040;
-static const channel_t kChanFlow = 0x0080;
-static const channel_t kChanEnergy = 0x0100;
-
-static const channel_t kChanOverlay1 = 0x1000;
-
-inline bool isFloatChannel(ftl::rgbd::channel_t chan) {
-	return (chan == ftl::rgbd::kChanDepth || chan == ftl::rgbd::kChanEnergy);
-}
-
-
 typedef unsigned int capability_t;
 
 static const capability_t kCapMovable	= 0x0001;	// A movable virtual cam
diff --git a/components/rgbd-sources/include/ftl/rgbd/frame.hpp b/components/rgbd-sources/include/ftl/rgbd/frame.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..227913e5de056f15380390201da96b66ec23e27f
--- /dev/null
+++ b/components/rgbd-sources/include/ftl/rgbd/frame.hpp
@@ -0,0 +1,137 @@
+#pragma once
+#ifndef _FTL_RGBD_FRAME_HPP_
+#define _FTL_RGBD_FRAME_HPP_
+
+#include <ftl/configuration.hpp>
+#include <opencv2/core.hpp>
+#include <opencv2/core/cuda.hpp>
+
+namespace ftl {
+namespace rgbd {
+
+typedef unsigned int channel_t;
+
+static const channel_t kChanNone = 0;
+static const channel_t kChanLeft = 0x0001;		// CV_8UC3
+static const channel_t kChanDepth = 0x0002;		// CV_32FC1
+static const channel_t kChanRight = 0x0004;		// CV_8UC3
+static const channel_t kChanDisparity = 0x0008; // CV_32FC1
+static const channel_t kChanDeviation = 0x0010;
+static const channel_t kChanNormals = 0x0020;
+static const channel_t kChanConfidence = 0x0040;
+static const channel_t kChanFlow = 0x0080;		// CV_16SC2 (format 10.5) from NVOF
+static const channel_t kChanEnergy = 0x0100;
+
+// should l/r gray be removed (not that expensive to re-calculate if needed)?
+static const channel_t kChanLeftGray = 0x0200;	// CV_8UC1
+static const channel_t kChanRightGray = 0x0400;	// CV_8UC1
+
+static const channel_t kChanOverlay1 = 0x1000;
+
+// maximum number of available channels
+static const unsigned int n_channels = 13;
+
+inline bool isFloatChannel(ftl::rgbd::channel_t chan) {
+	return (chan == ftl::rgbd::kChanDepth || chan == ftl::rgbd::kChanEnergy);
+}
+
+// TODO:	interpolation for scaling depends on channel type;
+//			NN for depth/disparity/optflow, linear/cubic/etc. for RGB
+
+class Frame;
+
+class Frame {
+public:
+	Frame() :	channels_host_(n_channels),
+				channels_gpu_(n_channels),
+				available_(n_channels, 0)
+	{}
+
+	/* @brief	Reset all channels without releasing memory.
+	 */
+	void reset()
+	{
+		std::fill(available_.begin(), available_.end(), 0);
+	}
+
+	/* @brief	Is there valid data in channel (either host or gpu).
+	 */
+	bool hasChannel(const ftl::rgbd::channel_t& channel)
+	{
+		return available_[_channelIdx(channel)];
+	}
+
+	/* @brief	Method to get reference to the channel content
+	 * @param	Channel type
+	 * @param	CUDA stream
+	 * @returns	Const reference to channel data
+	 * 
+	 * Result is valid only if hasChannel() is true. Host/Gpu transfer is
+	 * performed, if necessary, but only once unless channel contents is
+	 * changed by calling setChannel(). Return value valid only if
+	 * hasChannel(channel) is true.
+	 */
+	template <typename T> const T& getChannel(const ftl::rgbd::channel_t& channel, cv::cuda::Stream& stream);
+	template <typename T> const T& getChannel(const ftl::rgbd::channel_t& channel);
+
+	/* @brief	Method to set/modify channel content
+	 * @param	Channel type
+	 * @returns	Reference to channel data
+	 * 
+	 * Returns non-const reference to channel memory. Invalidates other copies
+	 * of the data (host/gpu) for the specified channel, next time getChannel()
+	 * is called a memory transfer may occur.
+	 * 
+	 * NOTE:	If user of setChannel<T>() wants to modify contents instead of
+	 * 			replacing them, getChannel<T>() needs to be called first to
+	 * 			ensure there is valid contents in the returned reference!
+	 * 			(TODO: interface could be improved)
+	 */
+	template <typename T> T& setChannel(const ftl::rgbd::channel_t& channel);
+
+private:
+
+	static size_t _channelIdx(const ftl::rgbd::channel_t& channel)
+	{
+		switch(channel)
+		{
+			case kChanNone:				return 0;
+			case kChanLeft:				return 1;
+			case kChanDepth:			return 2;
+			case kChanRight:			return 3;
+			case kChanDisparity:		return 4;
+			case kChanDeviation:		return 5;
+			case kChanNormals:			return 6;
+			case kChanConfidence:		return 7;
+			case kChanFlow:				return 8;
+			case kChanEnergy:			return 9;
+			case kChanLeftGray:			return 11;
+			case kChanRightGray:		return 12;
+			// should not happen (error); returned index is kChanNone
+			default:					return 0;
+		}
+	}
+
+	std::vector<cv::Mat> channels_host_;
+	std::vector<cv::cuda::GpuMat> channels_gpu_;
+
+	// bitmasks for each channel stored in available_
+	static const uint mask_host = 1;
+	static const uint mask_gpu = 2;
+
+	std::vector<uint> available_;
+};
+
+template<> const cv::Mat& Frame::getChannel(const ftl::rgbd::channel_t& channel, cv::cuda::Stream& stream);
+template<> const cv::cuda::GpuMat& Frame::getChannel(const ftl::rgbd::channel_t& channel, cv::cuda::Stream& stream);
+
+template<> const cv::Mat& Frame::getChannel(const ftl::rgbd::channel_t& channel);
+template<> const cv::cuda::GpuMat& Frame::getChannel(const ftl::rgbd::channel_t& channel);
+
+template<> cv::Mat& Frame::setChannel(const ftl::rgbd::channel_t& channel);
+template<> cv::cuda::GpuMat& Frame::setChannel(const ftl::rgbd::channel_t& channel);
+
+}
+}
+
+#endif // _FTL_RGBD_FRAME_HPP_
\ No newline at end of file
diff --git a/components/rgbd-sources/include/ftl/rgbd/source.hpp b/components/rgbd-sources/include/ftl/rgbd/source.hpp
index 4ddf48cdb50e463619113e31edfe79d2ec7a6807..1106d76220e676a3e7e36c5db18db9596a0f91d9 100644
--- a/components/rgbd-sources/include/ftl/rgbd/source.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/source.hpp
@@ -12,6 +12,7 @@
 #include <string>
 
 #include <ftl/cuda_common.hpp>
+#include <ftl/rgbd/frame.hpp>
 
 namespace ftl {
 
diff --git a/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp b/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp
index 71f714c3afd06c7c0b362d7b0f635efcf4dd808b..782338fc2be41d56a4d6fcd4f4920f6d920e663f 100644
--- a/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp
+++ b/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp
@@ -12,11 +12,9 @@ using cv::cuda::GpuMat;
 
 FixstarsSGM::FixstarsSGM(nlohmann::json &config) : Disparity(config) {
 	ssgm_ = nullptr;
+	const int width = size_.width;
+	const int height = size_.height;
 
-	int width = value("width", 1280);
-	int height = value("height", 720);
-	
-	size_ = cv::Size(width, height);
 	CHECK((width >= 480) && (height >= 360));
 
 	uniqueness_ = value("uniqueness", 0.95f);
@@ -64,54 +62,79 @@ void FixstarsSGM::init(const cv::Size size) {
 	);
 }
 
-void FixstarsSGM::compute(const cv::cuda::GpuMat &l, const cv::cuda::GpuMat &r,
-	cv::cuda::GpuMat &disp, cv::cuda::Stream &stream)
+void FixstarsSGM::compute(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream)
 {
-	if (l.size() != size_) {
-		// re-use same buffer for l/r
-		cv::cuda::resize(r, l_downscaled_, size_, 0.0, 0.0, cv::INTER_CUBIC, stream);
-		cv::cuda::cvtColor(l_downscaled_, rbw_, cv::COLOR_BGR2GRAY, 0, stream);
-		cv::cuda::resize(l, l_downscaled_, size_, 0.0, 0.0, cv::INTER_CUBIC, stream);
-		cv::cuda::cvtColor(l_downscaled_, lbw_, cv::COLOR_BGR2GRAY, 0, stream);
+	/*if (!frame.hasChannel(ftl::rgbd::kChanLeftGray))
+	{
+		auto &rgb = frame.getChannel<GpuMat>(ftl::rgbd::kChanLeft, stream);
+		auto &gray = frame.setChannel<GpuMat>(ftl::rgbd::kChanLeftGray);
+		cv::cuda::cvtColor(rgb, gray, cv::COLOR_BGR2GRAY, 0, stream);
+	}
+
+	if (!frame.hasChannel(ftl::rgbd::kChanRightGray))
+	{
+		auto &rgb = frame.getChannel<GpuMat>(ftl::rgbd::kChanRight, stream);
+		auto &gray = frame.setChannel<GpuMat>(ftl::rgbd::kChanRightGray);
+		cv::cuda::cvtColor(rgb, gray, cv::COLOR_BGR2GRAY, 0, stream);
+	}*/
+
+	const auto &l = frame.getChannel<GpuMat>(ftl::rgbd::kChanLeft, stream);
+	const auto &r = frame.getChannel<GpuMat>(ftl::rgbd::kChanRight, stream);
+	auto &disp = frame.setChannel<GpuMat>(ftl::rgbd::kChanDisparity);
+
+	if (disp.size() != l.size())
+	{
+		disp = GpuMat(l.size(), CV_32FC1);
+	}
+
+	GpuMat l_scaled;
+	if (l.size() != size_)
+	{
+		GpuMat _r;
+		scaleInput(l, r, l_scaled, _r, stream);
+		cv::cuda::cvtColor(l_scaled, lbw_, cv::COLOR_BGR2GRAY, 0, stream);
+		cv::cuda::cvtColor(_r, rbw_, cv::COLOR_BGR2GRAY, 0, stream);
 	}
-	else {
+	else
+	{
 		cv::cuda::cvtColor(l, lbw_, cv::COLOR_BGR2GRAY, 0, stream);
 		cv::cuda::cvtColor(r, rbw_, cv::COLOR_BGR2GRAY, 0, stream);
 	}
 
 	stream.waitForCompletion();
-
 	ssgm_->execute(lbw_.data, rbw_.data, dispt_.data);
-
 	GpuMat left_pixels(dispt_, cv::Rect(0, 0, max_disp_, dispt_.rows));
 	left_pixels.setTo(0);
 	cv::cuda::threshold(dispt_, dispt_, 4096.0f, 0.0f, cv::THRESH_TOZERO_INV, stream);
 
 	// TODO: filter could be applied after upscaling (to the upscaled disparity image)
-	if (use_filter_) {
-		filter_->apply(dispt_,
-			l.size() != dispt_.size() ? l_downscaled_ : l,
+	if (use_filter_)
+	{
+		filter_->apply(
+			dispt_,
+			(l.size() == size_) ? l : l_scaled,
 			dispt_,
 			stream
 		);
 	}
 
-	if (l.size() != size_) {
-		cv::cuda::multiply(dispt_, (double)l.cols / (double)size_.width, dispt_);
-		// invalid areas (bad values) have to be taken into account in interpolation
-		cv::cuda::resize(dispt_, dispt_full_res_, l.size(), 0.0, 0.0, cv::INTER_NEAREST, stream);
+	GpuMat dispt_scaled;
+	if (l.size() != size_)
+	{
+		scaleDisparity(l.size(), dispt_, dispt_scaled, stream);
 	}
-	else {
-		dispt_full_res_ = dispt_;
+	else
+	{
+		dispt_scaled = dispt_;
 	}
 
-	dispt_full_res_.convertTo(disp, CV_32F, 1.0f / 16.0f, stream);
+	dispt_scaled.convertTo(disp, CV_32F, 1.0f / 16.0f, stream);
 
 #ifdef HAVE_OPTFLOW
-	if (use_off_) {
-		Mat disp_host(disp);
-		off_.filter(disp_host, Mat(lbw_));
-		disp.upload(disp_host);
+	if (use_off_)
+	{
+		frame.getChannel<Mat>(ftl::rgbd::kChanDisparity);
+		off_.filter(frame.setChannel<Mat>(ftl::rgbd::kChanDisparity), Mat(lbw_));
 	}
 #endif
 }
@@ -119,9 +142,7 @@ void FixstarsSGM::compute(const cv::cuda::GpuMat &l, const cv::cuda::GpuMat &r,
 void FixstarsSGM::setMask(Mat &mask) {
 	return; // TODO(Nick) Not needed, but also code below does not work with new GPU pipeline
 	CHECK(mask.type() == CV_8UC1) << "mask type must be CV_8U";
-
 	if (!ssgm_) { init(size_); }
-
-	mask_l_ = mask;
+	mask_l_ = GpuMat(mask);
 	ssgm_->setMask((uint8_t*)mask.data, mask.cols);
 }
\ No newline at end of file
diff --git a/components/rgbd-sources/src/algorithms/fixstars_sgm.hpp b/components/rgbd-sources/src/algorithms/fixstars_sgm.hpp
index d773446d784761af8fe75fd7c98db75f3d4fd10f..039db0c874d8d816b91d447a6254c79fd5a43304 100644
--- a/components/rgbd-sources/src/algorithms/fixstars_sgm.hpp
+++ b/components/rgbd-sources/src/algorithms/fixstars_sgm.hpp
@@ -30,7 +30,7 @@ namespace ftl {
 		public:
 			explicit FixstarsSGM(nlohmann::json &config);
 
-			void compute(const cv::cuda::GpuMat &l, const cv::cuda::GpuMat &r, cv::cuda::GpuMat &disp, cv::cuda::Stream &stream) override;
+			void compute(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream) override;
 			void setMask(cv::Mat &mask) override;
 
 			/* Factory creator */
@@ -44,7 +44,6 @@ namespace ftl {
 			float uniqueness_;
 			int P1_;
 			int P2_;
-			cv::Size size_;
 			bool use_filter_;
 			bool use_off_;
 			cv::Ptr<cv::cuda::DisparityBilateralFilter> filter_;
@@ -53,9 +52,6 @@ namespace ftl {
 			cv::cuda::GpuMat rbw_;
 			cv::cuda::GpuMat dispt_;
 
-			cv::cuda::GpuMat l_downscaled_;
-			cv::cuda::GpuMat dispt_full_res_;
-
 			#ifdef HAVE_OPTFLOW
 			ftl::rgbd::OFDisparityFilter off_;
 			#endif
diff --git a/components/rgbd-sources/src/disparity.cpp b/components/rgbd-sources/src/disparity.cpp
index e92c72de765c7dcd69da1aa279de9181e3170087..7d9089c1ab5a2e47bd26ed87591ddb411dabf7f8 100644
--- a/components/rgbd-sources/src/disparity.cpp
+++ b/components/rgbd-sources/src/disparity.cpp
@@ -15,7 +15,11 @@ std::map<std::string, std::function<Disparity*(ftl::Configurable *, const std::s
 Disparity::Disparity(nlohmann::json &config)
 	: 	ftl::Configurable(config),
 		min_disp_(value("minimum",0)),
-		max_disp_(value("maximum", 256)) {}
+		max_disp_(value("maximum", 256)),
+		size_(value("width", 1280), value("height", 720))
+	{
+
+	}
 
 Disparity *Disparity::create(ftl::Configurable *parent, const std::string &name) {
 	nlohmann::json &config = ftl::config::resolve((!parent->getConfig()[name].is_null()) ? parent->getConfig()[name] : ftl::config::resolve(parent->getConfig())[name]); // ftl::config::resolve(parent->getConfig()[name]);
@@ -37,6 +41,28 @@ void Disparity::_register(const std::string &n,
 	(*algorithms__)[n] = f;
 }
 
+void Disparity::scaleInput(	const cv::cuda::GpuMat& left_in,
+							const cv::cuda::GpuMat& right_in,
+							cv::cuda::GpuMat& left_out,
+							cv::cuda::GpuMat& right_out,
+							cv::cuda::Stream &stream)
+{
+	cv::cuda::resize(left_in, left_scaled_, size_, 0.0, 0.0, cv::INTER_CUBIC, stream);
+	left_out = left_scaled_;
+	cv::cuda::resize(right_in, right_scaled_, size_, 0.0, 0.0, cv::INTER_CUBIC, stream);
+	right_out = right_scaled_;
+}
+
+void Disparity::scaleDisparity(	const cv::Size&		new_size,
+								cv::cuda::GpuMat&	in,
+								cv::cuda::GpuMat&	out,
+								cv::cuda::Stream&	stream)
+{
+	cv::cuda::multiply(in, (double) new_size.width / (double) in.cols, in);
+	cv::cuda::resize(in, dispt_scaled_, new_size, 0.0, 0.0, cv::INTER_NEAREST, stream);
+	out = dispt_scaled_;
+}
+
 // TODO:(Nick) Add remaining algorithms
 /*
 #include "algorithms/rtcensus.hpp"
diff --git a/components/rgbd-sources/src/disparity.hpp b/components/rgbd-sources/src/disparity.hpp
index e7e78b277544afd87870e62ca5b833b15d9cfd6d..2ab8223ca97f14752265be86ccf0ace0554eb20e 100644
--- a/components/rgbd-sources/src/disparity.hpp
+++ b/components/rgbd-sources/src/disparity.hpp
@@ -8,6 +8,7 @@
 #include <opencv2/opencv.hpp>
 #include <nlohmann/json.hpp>
 #include <ftl/configurable.hpp>
+#include <ftl/rgbd/frame.hpp>
 
 namespace ftl {
 namespace rgbd {
@@ -26,13 +27,32 @@ class Disparity : public ftl::Configurable {
 	virtual void setMinDisparity(size_t min) { min_disp_ = min; }
 	virtual void setMaxDisparity(size_t max) { max_disp_ = max; }
 	
-	virtual void setMask(cv::Mat &mask) { mask_l_ = mask; }
+	virtual void setMask(cv::Mat &mask) { mask_l_ = cv::cuda::GpuMat(mask); }
+	virtual void setMask(cv::cuda::GpuMat &mask) { mask_l_ = mask; }
 	
+	void scaleInput(const cv::cuda::GpuMat& left_in,
+					const cv::cuda::GpuMat& right_in,
+					cv::cuda::GpuMat& left_out,
+					cv::cuda::GpuMat& right_out,
+					cv::cuda::Stream &stream);
+	
+	void scaleDisparity(const cv::Size &new_size,
+						cv::cuda::GpuMat& in,
+						cv::cuda::GpuMat& out,
+						cv::cuda::Stream &stream);
+
 	/**
 	 * Pure virtual function representing the actual computation of
 	 * disparity from left and right images to be implemented.
 	 */
-	virtual void compute(const cv::cuda::GpuMat &l, const cv::cuda::GpuMat &r, cv::cuda::GpuMat &disp, cv::cuda::Stream &stream)=0;
+	virtual void compute(Frame &frame, cv::cuda::Stream &stream)=0;
+	virtual void compute(cv::cuda::GpuMat &l, cv::cuda::GpuMat &r, cv::cuda::GpuMat &disp, cv::cuda::Stream &stream)
+	{
+		ftl::rgbd::Frame frame;
+		frame.setChannel<cv::cuda::GpuMat>(kChanLeft) = l;
+		frame.setChannel<cv::cuda::GpuMat>(kChanRight) = r;
+		frame.setChannel<cv::cuda::GpuMat>(kChanDisparity) = disp;
+	}
 
 	/**
 	 * Factory registration class.
@@ -54,11 +74,15 @@ class Disparity : public ftl::Configurable {
 	protected:
 	static void _register(const std::string &n, std::function<Disparity*(ftl::Configurable *, const std::string &)> f);
 	
-	protected:
-	//nlohmann::json &config_;
+protected:
 	int min_disp_;
 	int max_disp_;
-	cv::Mat mask_l_;
+	cv::Size size_;
+	
+	cv::cuda::GpuMat left_scaled_;
+	cv::cuda::GpuMat right_scaled_;
+	cv::cuda::GpuMat dispt_scaled_;
+	cv::cuda::GpuMat mask_l_;
 	
 	private:
 	static std::map<std::string,std::function<Disparity*(ftl::Configurable *, const std::string &)>> *algorithms__;
@@ -69,4 +93,3 @@ class Disparity : public ftl::Configurable {
 }
 
 #endif // _FTL_DISPARITY_HPP_
-
diff --git a/components/rgbd-sources/src/frame.cpp b/components/rgbd-sources/src/frame.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8145280cc83d24347d9bb2fc07a5a9546bbe5dd7
--- /dev/null
+++ b/components/rgbd-sources/src/frame.cpp
@@ -0,0 +1,68 @@
+
+#include <ftl/rgbd/frame.hpp>
+
+namespace ftl {
+namespace rgbd {
+
+template<> const cv::Mat& Frame::getChannel(const ftl::rgbd::channel_t& channel, cv::cuda::Stream &stream)
+{
+	size_t idx = _channelIdx(channel);
+	if (!(available_[idx] & mask_host))
+	{
+		if (available_[idx] & mask_gpu)
+		{
+			channels_gpu_[idx].download(channels_host_[idx], stream);
+			available_[idx] |= mask_host;
+		}
+	}
+
+	return channels_host_[idx];
+}
+
+template<> const cv::Mat& Frame::getChannel(const ftl::rgbd::channel_t& channel)
+{
+	auto &stream = cv::cuda::Stream::Null();
+	auto &retval = getChannel<cv::Mat>(channel, stream);
+	stream.waitForCompletion();
+	return retval;
+}
+
+template<> cv::Mat& Frame::setChannel(const ftl::rgbd::channel_t& channel)
+{
+	size_t idx = _channelIdx(channel);
+	available_[idx] = mask_host;
+	return channels_host_[idx];
+}
+
+template<> const cv::cuda::GpuMat& Frame::getChannel(const ftl::rgbd::channel_t& channel, cv::cuda::Stream &stream)
+{
+	size_t idx = _channelIdx(channel);
+	if (!(available_[idx] & mask_gpu))
+	{
+		if (available_[idx] & mask_host)
+		{
+			channels_gpu_[idx].upload(channels_host_[idx], stream);
+			available_[idx] |= mask_gpu;
+		}
+	}
+	
+	return channels_gpu_[idx];
+}
+
+template<> const cv::cuda::GpuMat& Frame::getChannel(const ftl::rgbd::channel_t& channel)
+{
+	auto &stream = cv::cuda::Stream::Null();
+	auto &retval = getChannel<cv::cuda::GpuMat>(channel, stream);
+	stream.waitForCompletion();
+	return retval;
+}
+
+template<> cv::cuda::GpuMat& Frame::setChannel(const ftl::rgbd::channel_t& channel)
+{
+	size_t idx = _channelIdx(channel);
+	available_[idx] = mask_gpu;
+	return channels_gpu_[idx];
+}
+
+}
+}
\ No newline at end of file
diff --git a/components/rgbd-sources/src/offilter.cpp b/components/rgbd-sources/src/offilter.cpp
index 913dc5ec506b794d8bc0cd35e35f4a74bbf1cd8e..03db4807a7f9fa045708d3cf17bf32556b7bf5b5 100644
--- a/components/rgbd-sources/src/offilter.cpp
+++ b/components/rgbd-sources/src/offilter.cpp
@@ -16,7 +16,8 @@ template<typename T> static bool inline isValidDisparity(T d) { return (0.0 < d)
 OFDisparityFilter::OFDisparityFilter(Size size, int n_frames, float threshold) :
 	n_(0), n_max_(n_frames), threshold_(threshold), size_(size)
 {
-	disp_ = Mat::zeros(size, CV_64FC(n_frames));
+	
+	disp_ = Mat::zeros(cv::Size(size.width * n_frames, size.height), CV_64FC1);
 	gray_ = Mat::zeros(size, CV_8UC1);
 
 	nvof_ = cv::cuda::NvidiaOpticalFlow_1_0::create(size.width, size.height,
diff --git a/components/rgbd-sources/src/stereovideo.cpp b/components/rgbd-sources/src/stereovideo.cpp
index 57e57fbbd245ee9e8f4e7038d4b24b85b4259145..ebc72d854e8d35a480f3d4ef5873e8b58d5bb086 100644
--- a/components/rgbd-sources/src/stereovideo.cpp
+++ b/components/rgbd-sources/src/stereovideo.cpp
@@ -31,7 +31,8 @@ StereoVideoSource::~StereoVideoSource() {
 	delete lsrc_;
 }
 
-void StereoVideoSource::init(const string &file) {
+void StereoVideoSource::init(const string &file)
+{
 	capabilities_ = kCapVideo | kCapStereo;
 
 	if (ftl::is_video(file)) {
@@ -57,6 +58,18 @@ void StereoVideoSource::init(const string &file) {
 	}
 
 	cv::Size size = cv::Size(lsrc_->width(), lsrc_->height());
+	frames_ = std::vector<Frame>(2);
+
+#ifdef HAVE_OPTFLOW
+	use_optflow_ =  host_->value("use_optflow", false);
+	LOG(INFO) << "Using optical flow: " << (use_optflow_ ? "true" : "false");
+
+	nvof_ = cv::cuda::NvidiaOpticalFlow_1_0::create(size.width, size.height,
+													cv::cuda::NvidiaOpticalFlow_1_0::NV_OF_PERF_LEVEL_SLOW,
+													true, false, false, 0);
+
+#endif
+
 	calib_ = ftl::create<Calibrate>(host_, "calibration", size, stream_);
 
 	if (!calib_->isCalibrated()) LOG(WARNING) << "Cameras are not calibrated!";
@@ -160,49 +173,71 @@ bool StereoVideoSource::capture(int64_t ts) {
 }
 
 bool StereoVideoSource::retrieve() {
-	lsrc_->get(cap_left_, cap_right_, calib_, stream2_);
+	auto &frame = frames_[0];
+	frame.reset();
+	auto &left = frame.setChannel<cv::cuda::GpuMat>(ftl::rgbd::kChanLeft);
+	auto &right = frame.setChannel<cv::cuda::GpuMat>(ftl::rgbd::kChanRight);
+	lsrc_->get(left, right, calib_, stream2_);
+
+#ifdef HAVE_OPTFLOW
+	// see comments in https://gitlab.utu.fi/nicolas.pope/ftl/issues/155
+	
+	if (use_optflow_)
+	{
+		auto &left_gray = frame.setChannel<cv::cuda::GpuMat>(kChanLeftGray);
+		auto &right_gray = frame.setChannel<cv::cuda::GpuMat>(kChanRightGray);
+
+		cv::cuda::cvtColor(left, left_gray, cv::COLOR_BGR2GRAY, 0, stream2_);
+		cv::cuda::cvtColor(right, right_gray, cv::COLOR_BGR2GRAY, 0, stream2_);
+
+		if (frames_[1].hasChannel(kChanLeftGray))
+		{
+			auto &left_gray_prev = frame.getChannel<cv::cuda::GpuMat>(kChanLeftGray, stream2_);
+			auto &optflow = frame.setChannel<cv::cuda::GpuMat>(kChanFlow);
+			nvof_->calc(left_gray, left_gray_prev, optflow_, stream2_);
+			// nvof_->upSampler() isn't implemented with CUDA
+			// cv::cuda::resize() does not work wiht 2-channel input
+			// cv::cuda::resize(optflow_, optflow, left.size(), 0.0, 0.0, cv::INTER_NEAREST, stream2_);
+		}
+	}
+#endif
+
 	stream2_.waitForCompletion();
 	return true;
 }
 
 void StereoVideoSource::swap() {
-	cv::cuda::GpuMat tmp;
-	tmp = left_;
-	left_ = cap_left_;
-	cap_left_ = tmp;
-	tmp = right_;
-	right_ = cap_right_;
-	cap_right_ = tmp;
+	auto tmp = frames_[0];
+	frames_[0] = frames_[1];
+	frames_[1] = tmp;
 }
 
-bool StereoVideoSource::compute(int n, int b) {	
-	const ftl::rgbd::channel_t chan = host_->getChannel();
+bool StereoVideoSource::compute(int n, int b) {
+	auto &frame = frames_[1];
+	auto &left = frame.getChannel<cv::cuda::GpuMat>(ftl::rgbd::kChanLeft);
+	auto &right = frame.getChannel<cv::cuda::GpuMat>(ftl::rgbd::kChanRight);
 
-	if (left_.empty() || right_.empty()) return false;
+	const ftl::rgbd::channel_t chan = host_->getChannel();
+	if (left.empty() || right.empty()) return false;
 
 	if (chan == ftl::rgbd::kChanDepth) {
-		//lsrc_->get(left_, right_, stream_);
-		if (depth_tmp_.empty()) depth_tmp_ = cv::cuda::GpuMat(left_.size(), CV_32FC1);
-		if (disp_tmp_.empty()) disp_tmp_ = cv::cuda::GpuMat(left_.size(), CV_32FC1);
-		//calib_->rectifyStereo(left_, right_, stream_);
-		disp_->compute(left_, right_, disp_tmp_, stream_);
-		ftl::cuda::disparity_to_depth(disp_tmp_, depth_tmp_, params_, stream_);
-		left_.download(rgb_, stream_);
-		//rgb_ = lsrc_->cachedLeft();
-		depth_tmp_.download(depth_, stream_);
-
+		disp_->compute(frame, stream_);
+		
+		auto &disp = frame.getChannel<cv::cuda::GpuMat>(ftl::rgbd::kChanDisparity);
+		auto &depth = frame.setChannel<cv::cuda::GpuMat>(ftl::rgbd::kChanDepth);
+		if (depth.empty()) depth = cv::cuda::GpuMat(left.size(), CV_32FC1);
+
+		ftl::cuda::disparity_to_depth(disp, depth, params_, stream_);
+		
+		left.download(rgb_, stream_);
+		depth.download(depth_, stream_);
 		stream_.waitForCompletion();  // TODO:(Nick) Move to getFrames
 	} else if (chan == ftl::rgbd::kChanRight) {
-		//lsrc_->get(left_, right_, stream_);
-		//calib_->rectifyStereo(left_, right_, stream_);
-		left_.download(rgb_, stream_);
-		right_.download(depth_, stream_);
+		left.download(rgb_, stream_);
+		right.download(depth_, stream_);
 		stream_.waitForCompletion();  // TODO:(Nick) Move to getFrames
 	} else {
-		//lsrc_->get(left_, right_, stream_);
-		//calib_->rectifyStereo(left_, right_, stream_);
-		//rgb_ = lsrc_->cachedLeft();
-		left_.download(rgb_, stream_);
+		left.download(rgb_, stream_);
 		stream_.waitForCompletion();  // TODO:(Nick) Move to getFrames
 	}
 
diff --git a/components/rgbd-sources/src/stereovideo.hpp b/components/rgbd-sources/src/stereovideo.hpp
index 88b71a068dfe9d596697afd0b93ded1cb7c034e1..9fe3ca529f2f32300cb85aa47bd958b78f78984c 100644
--- a/components/rgbd-sources/src/stereovideo.hpp
+++ b/components/rgbd-sources/src/stereovideo.hpp
@@ -41,19 +41,21 @@ class StereoVideoSource : public detail::Source {
 	Disparity *disp_;
 	
 	bool ready_;
+	bool use_optflow_;
 	
 	cv::cuda::Stream stream_;
 	cv::cuda::Stream stream2_;
 
-	cv::cuda::GpuMat left_;
-	cv::cuda::GpuMat right_;
-	cv::cuda::GpuMat cap_left_;
-	cv::cuda::GpuMat cap_right_;
-	cv::cuda::GpuMat disp_tmp_;
-	cv::cuda::GpuMat depth_tmp_;
-	
+	std::vector<Frame> frames_;
+
 	cv::Mat mask_l_;
 
+#ifdef HAVE_OPTFLOW
+	// see comments in https://gitlab.utu.fi/nicolas.pope/ftl/issues/155
+	cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> nvof_;
+	cv::cuda::GpuMat optflow_;
+#endif
+
 	void init(const std::string &);
 };