diff --git a/applications/gui2/src/modules/calibration/extrinsic.cpp b/applications/gui2/src/modules/calibration/extrinsic.cpp
index f5b35eed4fd85100e89adf31d1555cfb894ec7b5..916ce681f835d3fad75a865fd56681b0ed63cf2f 100644
--- a/applications/gui2/src/modules/calibration/extrinsic.cpp
+++ b/applications/gui2/src/modules/calibration/extrinsic.cpp
@@ -73,16 +73,17 @@ void ExtrinsicCalibration::start(unsigned int fsid, std::vector<FrameID> sources
 		// stereo calibration
 		auto cl = CameraID(id.frameset(), id.source(), Channel::Left);
 		auto cr = CameraID(id.frameset(), id.source(), Channel::Right);
-		const auto& frame = (*fs_current_)[id.source()].cast<ftl::rgbd::Frame>();
-		auto sz = cv::Size((int) frame.getLeftCamera().width, (int) frame.getLeftCamera().height);
 		state_.cameras.push_back(cl);
 		state_.cameras.push_back(cr);
+
+		const auto& frame = (*fs_current_)[id.source()].cast<ftl::rgbd::Frame>();
+		// NOTE: assumes left size is the same as right size!
+		auto sz = frame.getSize();
 		auto calibl = getCalibration(cl);
 		calibl.intrinsic = CalibrationData::Intrinsic(calibl.intrinsic, sz);
+
 		auto calibr = getCalibration(cr);
 		calibr.intrinsic = CalibrationData::Intrinsic(calibr.intrinsic, sz);
-
-		// Scale intrinsics
 		state_.calib.addStereoCamera(calibl, calibr);
 
 		// Update rectification
diff --git a/applications/gui2/src/modules/calibration/intrinsic.cpp b/applications/gui2/src/modules/calibration/intrinsic.cpp
index 2fa2e9c54fcf996b95994d07a8b055fd9010473f..b91f74f56cf6eacc273368b18bc3332c9f625f3d 100644
--- a/applications/gui2/src/modules/calibration/intrinsic.cpp
+++ b/applications/gui2/src/modules/calibration/intrinsic.cpp
@@ -112,22 +112,10 @@ void IntrinsicCalibration::setChannel_(FrameSetPtr fs) {
 	cv::Size size;
 
 	if (state_->channel== Channel::Left) {
-		if(frame.has(Channel::LeftHighRes)) {
-			state_->channel_alt = Channel::LeftHighRes;
-			size = frame.get<cv::Mat>(state_->channel_alt).size();
-		}
-		else {
-			size = frame.get<cv::Mat>(state_->channel_alt).size();
-		}
+		size = frame.get<cv::Mat>(state_->channel_alt).size();
 	}
 	else if (state_->channel== Channel::Right) {
-		if (frame.has(Channel::RightHighRes)) {
-			state_->channel_alt = Channel::RightHighRes;
-			size = frame.get<cv::Mat>(Channel::LeftHighRes).size();
-		}
-		else {
-			size = frame.get<cv::Mat>(Channel::Left).size();
-		}
+		size = frame.get<cv::Mat>(Channel::Left).size();
 	}
 
 	try {
diff --git a/applications/gui2/src/modules/calibration/stereo.cpp b/applications/gui2/src/modules/calibration/stereo.cpp
index 052bf15572b0e97790cfbfed8ac7c4cf90f96230..148f8abcaa4d484201ec238a4440d01b59ca7700 100644
--- a/applications/gui2/src/modules/calibration/stereo.cpp
+++ b/applications/gui2/src/modules/calibration/stereo.cpp
@@ -110,7 +110,7 @@ void StereoCalibration::start(ftl::data::FrameID id) {
 		try {
 			auto& frame = (*fs)[state_->id.source()];
 			state_->calib = frame.get<CalibrationData>(Channel::CalibrationData);
-			state_->highres = frame.hasAll({Channel::LeftHighRes, Channel::RightHighRes});
+			state_->highres = false; // TODO: Remove
 			auto sizel = frame.get<cv::cuda::GpuMat>(channelLeft_()).size();
 			auto sizer = frame.get<cv::cuda::GpuMat>(channelLeft_()).size();
 			if (sizel != sizer) {
@@ -251,18 +251,18 @@ ftl::rgbd::Frame& StereoCalibration::frame_() {
 }
 
 bool StereoCalibration::hasFrame() {
-	auto cleft = state_->highres ? Channel::LeftHighRes : Channel::Left;
-	auto cright = state_->highres ? Channel::RightHighRes : Channel::Right;
+	auto cleft = Channel::Left;
+	auto cright = Channel::Right;
 	return (std::atomic_load(&fs_update_).get() != nullptr)
 		&& fs_update_->frames[state_->id.source()].hasAll({cleft, cright});
 };
 
 Channel StereoCalibration::channelLeft_() {
-	return (state_->highres ? Channel::LeftHighRes : Channel::Left);
+	return Channel::Left;
 }
 
 Channel StereoCalibration::channelRight_() {
-	return (state_->highres ? Channel::RightHighRes : Channel::Right);
+	return Channel::Right;
 }
 
 cv::cuda::GpuMat StereoCalibration::getLeft() {
diff --git a/applications/gui2/src/modules/camera.cpp b/applications/gui2/src/modules/camera.cpp
index bbe19415bead60124c157f4c21a20f7d33864b13..4219d7004e08fd895de8a75111b2ea262cf5e9e9 100644
--- a/applications/gui2/src/modules/camera.cpp
+++ b/applications/gui2/src/modules/camera.cpp
@@ -77,9 +77,12 @@ void Camera::update(double delta) {
 
 			if (frame.has(Channel::Calibration)) {
 				const auto &cam = rgbdf.getLeft();
+				cv::Size s = rgbdf.getSize();
 				auto &jcam = mod->getJSON(StatisticsPanel::CAMERA_DETAILS);
-				jcam["Resolution"] = std::to_string(cam.width) + std::string("x") + std::to_string(cam.height);
+				jcam["D-Resolution"] = std::to_string(cam.width) + std::string("x") + std::to_string(cam.height);
+				jcam["C-Resolution"] = std::to_string(s.width) + std::string("x") + std::to_string(s.height);
 				jcam["Focal"] = cam.fx;
+				jcam["Baseline"] = cam.baseline;
 				jcam["Principle"] = std::to_string(int(cam.cx)) + std::string(",") + std::to_string(int(cam.cy));
 			}
 
diff --git a/applications/gui2/src/views/camera.cpp b/applications/gui2/src/views/camera.cpp
index 016ce424c9c6849620be0ed7a57637645f638da7..f63455ab930911d8634be7b649e7fffe3bd42ddc 100644
--- a/applications/gui2/src/views/camera.cpp
+++ b/applications/gui2/src/views/camera.cpp
@@ -83,9 +83,7 @@ RecordOptions::RecordOptions(nanogui::Widget *parent, Camera* ctrl)
 			case Channel::Colour	:
 			case Channel::Colour2	:
 			case Channel::Depth		:
-			case Channel::Depth2	:
-			case Channel::ColourHighRes :
-			case Channel::Colour2HighRes : break;
+			case Channel::Depth2	: break;
 			default: continue;
 			}
 		}
diff --git a/applications/reconstruct/src/reconstruction.cpp b/applications/reconstruct/src/reconstruction.cpp
index c55f1f5f1676a22cf1153078de48144e0d0a4b13..a0c69b754f5bdd5b7e3e6d463e42eaa94ed87d0d 100644
--- a/applications/reconstruct/src/reconstruction.cpp
+++ b/applications/reconstruct/src/reconstruction.cpp
@@ -28,8 +28,6 @@ Reconstruction::Reconstruction(nlohmann::json &config, const std::string name) :
 	pipeline_->append<ftl::operators::DisparityToDepth>("calculate_depth")->value("enabled", false);
 	pipeline_->append<ftl::operators::ColourChannels>("colour");  // Convert BGR to BGRA
 	pipeline_->append<ftl::operators::ClipScene>("clipping")->value("enabled", false);
-	pipeline_->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false);
-	pipeline_->append<ftl::operators::ArUco>("aruco")->value("enabled", false);
 	//pipeline_->append<ftl::operators::HFSmoother>("hfnoise");  // Remove high-frequency noise
 	pipeline_->append<ftl::operators::Normals>("normals");  // Estimate surface normals
 	//pipeline_->append<ftl::operators::SmoothChannel>("smoothing");  // Generate a smoothing channel
@@ -43,7 +41,8 @@ Reconstruction::Reconstruction(nlohmann::json &config, const std::string name) :
 	pipeline_->append<ftl::operators::VisCrossSupport>("viscross")->value("enabled", false);
 	pipeline_->append<ftl::operators::MultiViewMLS>("mvmls");
 	pipeline_->append<ftl::operators::Poser>("poser")->value("enabled", false);
-
+	pipeline_->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false);
+	pipeline_->append<ftl::operators::ArUco>("aruco")->value("enabled", false);
 	//pipeline_->set("enabled", false);
 }
 
@@ -75,7 +74,7 @@ bool Reconstruction::post(ftl::rgbd::FrameSet &fs) {
 	/*for (size_t i=0; i<fs.frames.size(); ++i) {
 		fs.frames[i].create<cv::cuda::GpuMat>(Channel::Depth);
 	}*/
-		
+
 	{
 		//UNIQUE_LOCK(exchange_mtx_, lk);
 		//if (new_frame_ == true) LOG(WARNING) << "Frame lost";
diff --git a/applications/reconstruct2/src/main.cpp b/applications/reconstruct2/src/main.cpp
index 9e8961f8b1450c8a1f0fd1df827ce9e38776e7df..4fe250a905eb0105a5bd1a1e94fe7fbe67785697 100644
--- a/applications/reconstruct2/src/main.cpp
+++ b/applications/reconstruct2/src/main.cpp
@@ -66,8 +66,6 @@ static void run(ftl::Configurable *root) {
 		pipeline->append<ftl::operators::DisparityToDepth>("calculate_depth")->value("enabled", false);
 		pipeline->append<ftl::operators::ColourChannels>("colour");  // Convert BGR to BGRA
 		pipeline->append<ftl::operators::ClipScene>("clipping")->value("enabled", false);
-		pipeline->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false);
-		pipeline->append<ftl::operators::ArUco>("aruco")->value("enabled", false);
 		//pipeline_->append<ftl::operators::HFSmoother>("hfnoise");  // Remove high-frequency noise
 		pipeline->append<ftl::operators::Normals>("normals");  // Estimate surface normals
 		//pipeline_->append<ftl::operators::SmoothChannel>("smoothing");  // Generate a smoothing channel
@@ -81,6 +79,8 @@ static void run(ftl::Configurable *root) {
 		pipeline->append<ftl::operators::VisCrossSupport>("viscross")->value("enabled", false);
 		pipeline->append<ftl::operators::MultiViewMLS>("mvmls");
 		pipeline->append<ftl::operators::Poser>("poser")->value("enabled", false);
+		pipeline->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false);
+		pipeline->append<ftl::operators::ArUco>("aruco")->value("enabled", false);
 	});
 
 	// Add sources here
@@ -118,7 +118,7 @@ static void run(ftl::Configurable *root) {
 	});*/
 
 	auto *filter = feed->filter({Channel::Colour, Channel::Depth, Channel::AudioStereo});
-	
+
 	//feed->lowLatencyMode();
 	feed->startStreaming(filter);
 
diff --git a/applications/tools/middlebury_gen/src/main.cpp b/applications/tools/middlebury_gen/src/main.cpp
index c1b71e5680f6034dcc0d98274217ca5197508bc4..db7e380ba9aa89af9ad3561bb6420e4460cff4b1 100644
--- a/applications/tools/middlebury_gen/src/main.cpp
+++ b/applications/tools/middlebury_gen/src/main.cpp
@@ -219,7 +219,7 @@ int main(int argc, char **argv) {
 	ftl::rgbd::Frame &frame = dframe.cast<ftl::rgbd::Frame>();
 	frame.store();
 
-	ftl::operators::DisparityToDepth disp2depth(ftl::create<ftl::Configurable>(root, "disparity"));
+	ftl::operators::DisparityToDepth disp2depth(nullptr, ftl::create<ftl::Configurable>(root, "disparity"));
 
 	ftl::codecs::OpenCVEncoder encoder(ftl::codecs::definition_t::Any, ftl::codecs::definition_t::Any);
 
diff --git a/applications/vision/src/main.cpp b/applications/vision/src/main.cpp
index 93c7043e40785e0445a06ed2d28f78b32e6e6f28..a8c8d9006ea622cad8463c3ed0c45e986fe2f1c9 100644
--- a/applications/vision/src/main.cpp
+++ b/applications/vision/src/main.cpp
@@ -106,9 +106,9 @@ static void run(ftl::Configurable *root) {
 					//LOG(INFO) << "LATENCY: " << float(latency)/1000.0f << "ms";
 
 					if (clock_adjust != 0) {
-						LOG(INFO) << "Clock adjustment: " << clock_adjust << ", latency=" << float(latency)/1000.0f << "ms";
+						LOG(INFO) << "Clock adjustment: " << clock_adjust << ", latency=" << float(latency)/2000.0f << "ms";
 						ftl::timer::setClockAdjustment(clock_adjust);
-					}		
+					}
 				});
 			} catch (const std::exception &e) {
 				LOG(ERROR) << "Ping failed, could not time sync: " << e.what();
@@ -142,7 +142,7 @@ static void run(ftl::Configurable *root) {
 	}
 	Source *source = nullptr;
 	source = ftl::create<Source>(root, "source");
-	
+
 	ftl::stream::Sender *sender = ftl::create<ftl::stream::Sender>(root, "sender");
 	ftl::stream::Net *outstream = ftl::create<ftl::stream::Net>(root, "stream", net);
 	outstream->set("uri", root->value("uri", outstream->getID()));
@@ -189,18 +189,12 @@ static void run(ftl::Configurable *root) {
 
 	auto *pipeline = ftl::config::create<ftl::operators::Graph>(root, "pipeline");
 	pipeline->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false);
-	pipeline->append<ftl::operators::ArUco>("aruco")->value("enabled", false);
 	pipeline->append<ftl::operators::DepthChannel>("depth");  // Ensure there is a depth channel
-	pipeline->append<ftl::operators::ClipScene>("clipping")->value("enabled", false);
-
-	pipeline->restore("vision_pipeline", {
-		"clipping"
-	});
-
-	std::atomic_flag busy;
-	busy.clear();
+	//pipeline->append<ftl::operators::ClipScene>("clipping")->value("enabled", false);
+	pipeline->restore("vision_pipeline", { "clipping" });
+	pipeline->append<ftl::operators::ArUco>("aruco")->value("enabled", false);
 
-	auto h = creator->onFrameSet([sender,outstream,&stats_count,&latency,&frames,&stats_time,pipeline,&busy,&encodable,&previous_encodable](const ftl::data::FrameSetPtr &fs) {
+	auto h = creator->onFrameSet([sender,outstream,&stats_count,&latency,&frames,&stats_time,pipeline,&encodable,&previous_encodable](const ftl::data::FrameSetPtr &fs) {
 
 		// Decide what to encode here, based upon what remote users select
 		const auto sel = outstream->selectedNoExcept(fs->frameset());
@@ -222,21 +216,30 @@ static void run(ftl::Configurable *root) {
 
 		fs->set(ftl::data::FSFlag::AUTO_SEND);
 
-		// Do all processing in another thread...
-		ftl::pool.push([sender,&stats_count,&latency,&frames,&stats_time,pipeline,&busy,fs](int id) mutable {
-			if (busy.test_and_set()) {
-				LOG(WARNING) << "Depth pipeline drop: " << fs->timestamp();
-				fs->firstFrame().message(ftl::data::Message::Warning_PIPELINE_DROP, "Depth pipeline drop");
-				return;
-			}
-			pipeline->apply(*fs, *fs);
-			busy.clear();
-
+		bool did_pipe = pipeline->apply(*fs, *fs, [fs,&frames,&latency]() {
+			if (fs->hasAnyChanged(Channel::Depth)) fs->flush(Channel::Depth);
 			++frames;
 			latency += float(ftl::timer::get_time() - fs->timestamp());
+			const_cast<ftl::data::FrameSetPtr&>(fs).reset();
+		});
+
+		if (!did_pipe) {
+			LOG(WARNING) << "Depth pipeline drop: " << fs->timestamp();
+			fs->firstFrame().message(ftl::data::Message::Warning_PIPELINE_DROP, "Depth pipeline drop");
+		}
+
+
+		// Do some encoding (eg. colour) whilst pipeline runs
+		ftl::pool.push([fs,&stats_count,&latency,&frames,&stats_time](int id){
+			if (fs->hasAnyChanged(Channel::Audio)) {
+				fs->flush(ftl::codecs::Channel::Audio);
+			}
+
+			// Make sure upload has completed.
+			cudaSafeCall(cudaEventSynchronize(fs->frames[0].uploadEvent()));
+			// TODO: Try depth pipeline again here if failed first time.
+			fs->flush(ftl::codecs::Channel::Colour);
 
-			// Destruct frameset as soon as possible to send the data...
-			if (fs->hasAnyChanged(Channel::Depth)) fs->flush(Channel::Depth);
 			const_cast<ftl::data::FrameSetPtr&>(fs).reset();
 
 			if (!quiet && --stats_count <= 0) {
@@ -252,19 +255,14 @@ static void run(ftl::Configurable *root) {
 			}
 		});
 
-		// Lock colour right now to encode in parallel, same for audio
-		ftl::pool.push([fs](int id){ fs->flush(ftl::codecs::Channel::Colour); });
-
-		if (fs->hasAnyChanged(Channel::Audio)) {
-			ftl::pool.push([fs](int id){ fs->flush(ftl::codecs::Channel::Audio); });
-		}
+		const_cast<ftl::data::FrameSetPtr&>(fs).reset();
 
 		return true;
 	});
 
 	// Start the timed generation of frames
 	creator->start();
-	
+
 	// Only now start listening for connections
 	net->start();
 
@@ -274,7 +272,7 @@ static void run(ftl::Configurable *root) {
 	ctrl.stop();
 
 	ftl::config::save();
-	
+
 	net->shutdown();
 
 	ftl::pool.stop();
@@ -321,7 +319,7 @@ int main(int argc, char **argv) {
 
 		// Use other GPU if available.
 		//ftl::cuda::setDevice(ftl::cuda::deviceCount()-1);
-		
+
 		std::cout << "Loading..." << std::endl;
 		run(root);
 
diff --git a/components/calibration/include/ftl/calibration/optimize.hpp b/components/calibration/include/ftl/calibration/optimize.hpp
index a92f0354d471ae0df6192c8ecc92879af2b4b54a..e6bb0b01173bc00e8ae6a44eac0057868c329491 100644
--- a/components/calibration/include/ftl/calibration/optimize.hpp
+++ b/components/calibration/include/ftl/calibration/optimize.hpp
@@ -219,6 +219,8 @@ public:
 	/**/
 	int removeObservations(double threshold);
 
+	std::vector<cv::Point3d> points();
+
 protected:
 	double* getCameraPtr(int i) { return cameras_.at(i)->data; }
 
diff --git a/components/calibration/src/extrinsic.cpp b/components/calibration/src/extrinsic.cpp
index 25c79e4ff7a7303a1986a96f87eed5f1c6c63613..5d5148b67732185ccf11580a16d12a38e8657043 100644
--- a/components/calibration/src/extrinsic.cpp
+++ b/components/calibration/src/extrinsic.cpp
@@ -462,7 +462,16 @@ void ExtrinsicCalibration::triangulate(unsigned int c1, unsigned int c2) {
 	// implements least squares method described in H&Z p312
 	cv::triangulatePoints(P1, P2, pts1u, pts2u, out);
 	// scalePoints() converts to non-homogenous coordinates and estimates scale
-	scalePoints(points().getObject(0), out, pointsw);
+	LOG(INFO) << "new scale: " << scalePoints(points().getObject(0), out, pointsw);
+
+	/*for (int col = 0; col < out.cols; col++) {
+		CHECK_NE(out.at<double>(3, col), 0);
+		cv::Point3d p = cv::Point3d(out.at<double>(0, col),
+							out.at<double>(1, col),
+							out.at<double>(2, col))
+							/ out.at<double>(3, col);
+		pointsw.push_back(p);
+	}*/
 	points().setTriangulatedPoints(c1, c2, pointsw);
 }
 
@@ -723,7 +732,7 @@ double ExtrinsicCalibration::optimize() {
 			// TODO: desgin better check
 			if (cv::norm(absdiff(px, py, pz)) > threshold_bad_) {
 				n_points_bad++;
-				continue;
+				//continue;
 			}
 
 			ba.addPoint(vis, obs, p);
@@ -760,7 +769,8 @@ double ExtrinsicCalibration::optimize() {
 	for (const auto& t : prune_observations_) {
 		n_removed +=  ba.removeObservations(t);
 		if (float(n_removed)/float(n_points) > threhsold_warning_) {
-			LOG(WARNING) << "significant number of observations removed";
+			LOG(WARNING) << "significant number (" << n_removed << " of "
+						 <<  n_points << ") of observations removed";
 			break;
 		}
 		else {
@@ -772,12 +782,17 @@ double ExtrinsicCalibration::optimize() {
 	calib_optimized_.resize(calib_.size());
 	rmse_.resize(calib_.size());
 
+	auto points_optimized = ba.points();
+	double scale = optimizeScale(points_.getObject(0), points_optimized);
+	LOG(INFO) << "scale: " << scale;
+
 	for (unsigned int i = 0; i < cameras.size(); i++) {
 		rmse_[i] = ba.reprojectionError(i);
 		auto intr = cameras[i].intrinsic();
 		calib_optimized_[i] = calib_[i];
 		calib_optimized_[i].intrinsic.set(intr.matrix(), intr.distCoeffs.Mat(), intr.resolution);
 		calib_optimized_[i].extrinsic.set(cameras[i].rvec(), cameras[i].tvec());
+		calib_optimized_[i].extrinsic.tvec *= scale;
 	}
 
 	rmse_total_ = ba.reprojectionError();
diff --git a/components/calibration/src/optimize.cpp b/components/calibration/src/optimize.cpp
index 73788380cb14d09f0eaa50d4791a5e52b6b25a6e..751fd546f67f1ece6d63497adffa9466f54a4e71 100644
--- a/components/calibration/src/optimize.cpp
+++ b/components/calibration/src/optimize.cpp
@@ -334,7 +334,7 @@ struct ScaleError {
 
 double ftl::calibration::optimizeScale(const vector<Point3d> &object_points, vector<Point3d> &points) {
 
-	// use exceptions instead
+	// throw exception instead
 	CHECK_EQ(points.size() % object_points.size(), 0);
 	CHECK_EQ(points.size() % 2, 0);
 
@@ -684,6 +684,13 @@ int BundleAdjustment::removeObservations(double threshold) {
 	return removed;
 }
 
+std::vector<cv::Point3d> BundleAdjustment::points() {
+	std::vector<cv::Point3d> pts;
+	pts.reserve(points_.size());
+	for (const auto& p : points_) { pts.push_back(p.point); }
+	return pts;
+}
+
 void BundleAdjustment::_reprojectionErrorSE(const int camera, double &error, double &npoints) const {
 	error = 0.0;
 	npoints = 0.0;
diff --git a/components/codecs/include/ftl/codecs/channels.hpp b/components/codecs/include/ftl/codecs/channels.hpp
index 90e3d3cec00f903844b658ef32d424c596069c0b..db0db3b679e96fd5993240b535ba1703e5ef6b04 100644
--- a/components/codecs/include/ftl/codecs/channels.hpp
+++ b/components/codecs/include/ftl/codecs/channels.hpp
@@ -31,12 +31,10 @@ enum struct Channel : int {
 	Support2		= 14,	// 8UC4 (currently)
 	Segmentation	= 15,	// 32S?
 	Normals2		= 16,	// 16FC4
-	ColourHighRes	= 17,	// 8UC3 or 8UC4
-	LeftHighRes		= 17,	// 8UC3 or 8UC4
+	UNUSED1			= 17,	
 	Disparity		= 18,
 	Smoothing		= 19,	// 32F
-	RightHighRes	= 20,	// 8UC3 or 8UC4
-	Colour2HighRes	= 20,
+	UNUSED2			= 20,
 	Overlay			= 21,   // 8UC4
 	GroundTruth		= 22,	// 32F
 
diff --git a/components/codecs/include/ftl/codecs/decoder.hpp b/components/codecs/include/ftl/codecs/decoder.hpp
index 4af10e1186855da9b19be7bd8022405c8ac841bb..b649f63a8e67c76a6882bd489b4e2eba9d2970ec 100644
--- a/components/codecs/include/ftl/codecs/decoder.hpp
+++ b/components/codecs/include/ftl/codecs/decoder.hpp
@@ -33,17 +33,19 @@ void free(Decoder *&e);
  */
 class Decoder {
 	public:
-	Decoder() { cudaStreamCreate(&stream_); };
-	virtual ~Decoder() { cudaStreamDestroy(stream_); };
+	Decoder() { cudaStreamCreate(&stream_); cudaEventCreate(&event_); };
+	virtual ~Decoder() { cudaStreamDestroy(stream_); cudaEventDestroy(event_); };
 
 	virtual bool decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out)=0;
 
 	virtual bool accepts(const ftl::codecs::Packet &)=0;
 
 	cudaStream_t stream() { return stream_; }
+	cudaEvent_t event() { return event_; }
 
 	protected:
 	cudaStream_t stream_;
+	cudaEvent_t event_;
 };
 
 }
diff --git a/components/codecs/src/channels.cpp b/components/codecs/src/channels.cpp
index cc30c51683d4947507dc9bec81bbca3159dcef95..458541d5f1b2ffe94166faf828efa9f827e2c983 100644
--- a/components/codecs/src/channels.cpp
+++ b/components/codecs/src/channels.cpp
@@ -27,10 +27,10 @@ static const std::unordered_map<Channel,ChannelInfo> info = {
 	{Channel::Support2, {"Support2", CV_8UC4}},
 	{Channel::Segmentation, {"Segmentation", CV_8U}},
 	{Channel::Normals2, {"Normals Right", CV_32FC4}},
-	{Channel::ColourHighRes, {"Left High-res", CV_8UC4}},
+	{Channel::UNUSED1, {"Unused", CV_8UC4}},
 	{Channel::Disparity, {"Disparity", CV_16S}},
 	{Channel::Smoothing, {"Smoothing", CV_32F}},
-	{Channel::Colour2HighRes, {"Right High-res", CV_8UC4}},
+	{Channel::UNUSED2, {"Unused", CV_8UC4}},
 	{Channel::Overlay, {"Overlay", CV_8UC4}},
 	{Channel::GroundTruth, {"Ground Truth", CV_32F}},
 
diff --git a/components/common/cpp/include/ftl/cuda_common.hpp b/components/common/cpp/include/ftl/cuda_common.hpp
index 5c3cc9f484ecbaf615cf44469e3c37b636a4f7e6..59053e4b2196779bfa4bb16e84431b00fd36594e 100644
--- a/components/common/cpp/include/ftl/cuda_common.hpp
+++ b/components/common/cpp/include/ftl/cuda_common.hpp
@@ -21,6 +21,8 @@
 #define STRIDE_Y(I,N) int I = blockIdx.y * blockDim.y + threadIdx.y; I < N; I += blockDim.y * gridDim.y
 #define STRIDE_X(I,N) int I = blockIdx.x * blockDim.x + threadIdx.x; I < N; I += blockDim.x * gridDim.x
 
+void cudaCallback(cudaStream_t stream, const std::function<void()> &cb);
+
 namespace ftl {
 namespace cuda {
 
diff --git a/components/common/cpp/include/ftl/threads.hpp b/components/common/cpp/include/ftl/threads.hpp
index c40ed095b5075afe0b4df7409c48ace45b8328cc..1e60a20077da11dedc9fc026f6f9932a2c2d9609 100644
--- a/components/common/cpp/include/ftl/threads.hpp
+++ b/components/common/cpp/include/ftl/threads.hpp
@@ -7,7 +7,7 @@
 
 #define POOL_SIZE 10
 
-//#define DEBUG_MUTEX
+#define DEBUG_MUTEX
 #define MUTEX_TIMEOUT 2
 
 #if defined DEBUG_MUTEX
diff --git a/components/common/cpp/include/ftl/timer.hpp b/components/common/cpp/include/ftl/timer.hpp
index 9ccb0b31fbfb282d5f1d072ffd5f73bec57d92c8..6530aeaddb843e7f56dce9dc334b5fa0e9f8d0d6 100644
--- a/components/common/cpp/include/ftl/timer.hpp
+++ b/components/common/cpp/include/ftl/timer.hpp
@@ -84,6 +84,19 @@ void setClockSlave(bool);
  */
 ftl::Handle add(timerlevel_t, const std::function<bool(int64_t ts)> &);
 
+/**
+ * Same as other add function except that a multiplier is given to indicate
+ * how often this should be triggered in numbers of ticks.
+ */
+ftl::Handle add(timerlevel_t, size_t multiplier, const std::function<bool(int64_t ts)> &);
+
+/**
+ * Same as other add function except that a period in seconds is given. Note that
+ * the period should be a multiple of frames otherwise it will not be accurate
+ * but will still work.
+ */
+ftl::Handle add(timerlevel_t, double seconds, const std::function<bool(int64_t ts)> &);
+
 /**
  * Initiate the timer and optionally block the current process.
  */
diff --git a/components/common/cpp/src/cuda_common.cpp b/components/common/cpp/src/cuda_common.cpp
index 273b540c4e2a640c86255b8ed040edde9d8cc40a..949a22704fa1e7454f5094d92296c93f7731190a 100644
--- a/components/common/cpp/src/cuda_common.cpp
+++ b/components/common/cpp/src/cuda_common.cpp
@@ -56,6 +56,18 @@ void ftl::cuda::setDevice() {
 	cudaSafeCall(cudaSetDevice(dev_to_use));
 }
 
+static void _cudaCallback(void *ud) {
+	auto *cb = (std::function<void()>*)ud;
+	(*cb)();
+	delete cb;
+}
+
+// TODO: Move this to a common location
+void cudaCallback(cudaStream_t stream, const std::function<void()> &cb) {
+	cudaSafeCall(cudaLaunchHostFunc(stream, _cudaCallback, (void*)(new std::function<void()>(cb))));
+}
+
+
 TextureObjectBase::~TextureObjectBase() {
 	free();
 }
diff --git a/components/common/cpp/src/timer.cpp b/components/common/cpp/src/timer.cpp
index ec947317836ed3eb9b26f0ea38625240b8db7e87..e49a955cf1af3acae8e70d7bcb448832146b7269 100644
--- a/components/common/cpp/src/timer.cpp
+++ b/components/common/cpp/src/timer.cpp
@@ -35,8 +35,8 @@ struct TimerJob {
 	std::atomic_bool active=false;
 	// TODO: (Nick) Implement richer forms of timer
 	//bool paused;
-	//int multiplier;
-	//int countdown;
+	int multiplier=0;		// Number of ticks before trigger
+	int counter=0;		// Current tick counter
 	std::string name;
 };
 
@@ -76,14 +76,21 @@ static void waitTimePoint() {
 		UNIQUE_LOCK(mtx, lk);
 		auto idle_job = jobs[kTimerIdle10].begin();
 		while (idle_job != jobs[kTimerIdle10].end() && msdelay >= 10 && sincelast != mspf) {
-			(*idle_job).active = true;
-			bool doremove = !(*idle_job).job.trigger(now);
-
-			if (doremove) {
-				idle_job = jobs[kTimerIdle10].erase(idle_job);
-				LOG(INFO) << "Timer job removed";
+			auto &job = *idle_job;
+
+			if (++job.counter >= job.multiplier) {
+				job.counter = 0;
+				job.active = true;
+				bool doremove = !job.job.trigger(now);
+
+				if (doremove) {
+					idle_job = jobs[kTimerIdle10].erase(idle_job);
+					LOG(INFO) << "Timer job removed";
+				} else {
+					(*idle_job++).active = false;
+				}
 			} else {
-				(*idle_job++).active = false;
+				++idle_job;
 			}
 			now = get_time();
 			msdelay = mspf - (now % mspf);
@@ -100,14 +107,21 @@ static void waitTimePoint() {
 		UNIQUE_LOCK(mtx, lk);
 		auto idle_job = jobs[kTimerIdle1].begin();
 		while (idle_job != jobs[kTimerIdle1].end() && msdelay >= 2 && sincelast != mspf) {
-			(*idle_job).active = true;
-			bool doremove = !(*idle_job).job.trigger(now);
-
-			if (doremove) {
-				idle_job = jobs[kTimerIdle1].erase(idle_job);
-				LOG(INFO) << "Timer job removed";
+			auto &job = *idle_job;
+
+			if (++job.counter >= job.multiplier) {
+				job.counter = 0;
+				job.active = true;
+				bool doremove = !job.job.trigger(now);
+
+				if (doremove) {
+					idle_job = jobs[kTimerIdle1].erase(idle_job);
+					LOG(INFO) << "Timer job removed";
+				} else {
+					(*idle_job++).active = false;
+				}
 			} else {
-				(*idle_job++).active = false;
+				++idle_job;
 			}
 			now = get_time();
 			msdelay = mspf - (now % mspf);
@@ -171,6 +185,32 @@ ftl::Handle ftl::timer::add(timerlevel_t l, const std::function<bool(int64_t ts)
 	return h;
 }
 
+ftl::Handle ftl::timer::add(timerlevel_t l, size_t multiplier, const std::function<bool(int64_t ts)> &f) {
+	if (l < 0 || l >= kTimerMAXLEVEL) return {};
+
+	UNIQUE_LOCK(mtx, lk);
+	int newid = last_id++;
+	auto &j = jobs[l].emplace_back();
+	j.id = newid;
+	j.name = "NoName";
+	j.multiplier = multiplier;
+	ftl::Handle h = j.job.on(f);
+	return h;
+}
+
+ftl::Handle ftl::timer::add(timerlevel_t l, double seconds, const std::function<bool(int64_t ts)> &f) {
+	if (l < 0 || l >= kTimerMAXLEVEL) return {};
+
+	UNIQUE_LOCK(mtx, lk);
+	int newid = last_id++;
+	auto &j = jobs[l].emplace_back();
+	j.id = newid;
+	j.name = "NoName";
+	j.multiplier = int(seconds*1000.0 / double(getInterval()));
+	ftl::Handle h = j.job.on(f);
+	return h;
+}
+
 static void removeJob(int id) {
 	UNIQUE_LOCK(mtx, lk);
 	if (id < 0) return;
diff --git a/components/common/cpp/test/timer_unit.cpp b/components/common/cpp/test/timer_unit.cpp
index 2c7602646eced4f792e818214f6424fc71752a16..1f8f63b693b40f6cfbe60a80c50f51463456e1cb 100644
--- a/components/common/cpp/test/timer_unit.cpp
+++ b/components/common/cpp/test/timer_unit.cpp
@@ -133,6 +133,58 @@ TEST_CASE( "Timer::add() Idle10 job" ) {
 	}
 }
 
+TEST_CASE( "Timer::add() Idle10 job periodic" ) {
+	SECTION( "Quick idle job" ) {
+		bool didrun = false;
+
+		ftl::timer::reset();
+
+		int count = 0;
+		auto rcc = ftl::timer::add(ftl::timer::kTimerIdle10, [&count](int64_t ts) {
+			++count;
+			return true;
+		});
+
+		auto rc = ftl::timer::add(ftl::timer::kTimerIdle10, size_t(20), [&didrun](int64_t ts) {
+			didrun = true;
+			ftl::timer::stop(false);
+			return true;
+		});
+
+		REQUIRE( (rc.id() >= 0) );
+
+		ftl::timer::start(true);
+		REQUIRE( didrun == true );
+		REQUIRE( count == 20 );
+	}
+}
+
+TEST_CASE( "Timer::add() Idle1 job periodic" ) {
+	SECTION( "Quick idle job" ) {
+		bool didrun = false;
+
+		ftl::timer::reset();
+
+		int count = 0;
+		auto rcc = ftl::timer::add(ftl::timer::kTimerIdle1, [&count](int64_t ts) {
+			++count;
+			return true;
+		});
+
+		auto rc = ftl::timer::add(ftl::timer::kTimerIdle1, size_t(20), [&didrun](int64_t ts) {
+			didrun = true;
+			ftl::timer::stop(false);
+			return true;
+		});
+
+		REQUIRE( (rc.id() >= 0) );
+
+		ftl::timer::start(true);
+		REQUIRE( didrun == true );
+		REQUIRE( count == 20 );
+	}
+}
+
 TEST_CASE( "Timer::add() Main job" ) {
 	SECTION( "Quick main job" ) {
 		bool didrun = false;
diff --git a/components/net/cpp/include/ftl/net/universe.hpp b/components/net/cpp/include/ftl/net/universe.hpp
index 3d3a4b3ffe175c048958219e1fa1477e0043e73b..8ee642fe615b1044c44535ee8f17d0911d90e669 100644
--- a/components/net/cpp/include/ftl/net/universe.hpp
+++ b/components/net/cpp/include/ftl/net/universe.hpp
@@ -212,8 +212,8 @@ class Universe : public ftl::Configurable {
 
 	void removeCallback(ftl::net::callback_t cbid);
 
-	size_t getSendBufferSize() const { return send_size_; }
-	size_t getRecvBufferSize() const { return recv_size_; }
+	size_t getSendBufferSize(ftl::URI::scheme_t s);
+	size_t getRecvBufferSize(ftl::URI::scheme_t s);
 	
 	private:
 	void _run();
diff --git a/components/net/cpp/src/peer.cpp b/components/net/cpp/src/peer.cpp
index db544841e77cfd6629b2ffddd2745ff9949e0188..57fdab3a9540f01ee32bdb0ebc016dd6ffb5e53d 100644
--- a/components/net/cpp/src/peer.cpp
+++ b/components/net/cpp/src/peer.cpp
@@ -183,11 +183,11 @@ Peer::Peer(SOCKET s, Universe *u, Dispatcher *d) : sock_(s), can_reconnect_(fals
 	#ifndef TEST_MOCKS
 	int flags =1; 
     if (setsockopt(s, IPPROTO_TCP, TCP_NODELAY, (const char *)&flags, sizeof(flags))) { LOG(ERROR) << "ERROR: setsocketopt(), TCP_NODELAY"; };
-	int a = static_cast<int>(u->getRecvBufferSize());
+	int a = static_cast<int>(u->getRecvBufferSize(scheme_));
 	if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, (const char *)&a, sizeof(int)) == -1) {
 		fprintf(stderr, "Error setting socket opts: %s\n", strerror(errno));
 	}
-	a = static_cast<int>(u->getSendBufferSize());
+	a = static_cast<int>(u->getSendBufferSize(scheme_));
 	if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, (const char *)&a, sizeof(int)) == -1) {
 		fprintf(stderr, "Error setting socket opts: %s\n", strerror(errno));
 	}
@@ -244,12 +244,12 @@ Peer::Peer(const char *pUri, Universe *u, Dispatcher *d) : can_reconnect_(true),
 
 	scheme_ = uri.getProtocol();
 	if (uri.getProtocol() == URI::SCHEME_TCP) {
-		sock_ = tcpConnect(uri, u->getSendBufferSize(), u->getRecvBufferSize());
+		sock_ = tcpConnect(uri, u->getSendBufferSize(scheme_), u->getRecvBufferSize(scheme_));
 		if (sock_ != INVALID_SOCKET) status_ = kConnecting;
 		else status_ = kReconnecting;
 	} else if (uri.getProtocol() == URI::SCHEME_WS) {
 		LOG(INFO) << "Websocket connect " << uri.getPath();
-		sock_ = tcpConnect(uri, u->getSendBufferSize(), u->getRecvBufferSize());
+		sock_ = tcpConnect(uri, u->getSendBufferSize(scheme_), u->getRecvBufferSize(scheme_));
 		if (sock_ != INVALID_SOCKET) {
 			if (!ws_connect(sock_, uri)) {
 				LOG(ERROR) << "Websocket connection failed";
@@ -313,7 +313,7 @@ bool Peer::reconnect() {
 	LOG(INFO) << "Reconnecting to " << uri_ << " ...";
 
 	if (scheme_ == URI::SCHEME_TCP) {
-		sock_ = tcpConnect(uri, universe_->getSendBufferSize(), universe_->getRecvBufferSize());
+		sock_ = tcpConnect(uri, universe_->getSendBufferSize(scheme_), universe_->getRecvBufferSize(scheme_));
 		if (sock_ != INVALID_SOCKET) {
 			status_ = kConnecting;
 			is_waiting_ = true;
@@ -322,7 +322,7 @@ bool Peer::reconnect() {
 			return false;
 		}
 	} else if (scheme_ == URI::SCHEME_WS) {
-		sock_ = tcpConnect(uri, universe_->getSendBufferSize(), universe_->getRecvBufferSize());
+		sock_ = tcpConnect(uri, universe_->getSendBufferSize(scheme_), universe_->getRecvBufferSize(scheme_));
 		if (sock_ != INVALID_SOCKET) {
 			if (!ws_connect(sock_, uri)) {
 				return false;
diff --git a/components/net/cpp/src/universe.cpp b/components/net/cpp/src/universe.cpp
index a36bf29ceb84d9dd178cefcfe96287e2c16cc9a9..809bba4af8f1ec7becf35f93904b2d52e5ac14e8 100644
--- a/components/net/cpp/src/universe.cpp
+++ b/components/net/cpp/src/universe.cpp
@@ -41,8 +41,10 @@ struct NetImplDetail {
 //#define TCP_SEND_BUFFER_SIZE	(512*1024)
 //#define TCP_RECEIVE_BUFFER_SIZE	(1024*1024*1)
 
-#define TCP_SEND_BUFFER_SIZE	(128*1024)  // Was 256
-#define TCP_RECEIVE_BUFFER_SIZE	(128*1024)  // Was 256
+#define TCP_SEND_BUFFER_SIZE	(512*1024)
+#define TCP_RECEIVE_BUFFER_SIZE	(1024*1024)
+#define WS_SEND_BUFFER_SIZE	(512*1024)
+#define WS_RECEIVE_BUFFER_SIZE	(512*1024)
 
 callback_t ftl::net::Universe::cbid__ = 0;
 
@@ -68,8 +70,8 @@ Universe::Universe(nlohmann::json &config) :
 		this_peer(ftl::net::this_peer),
 		impl_(new ftl::net::NetImplDetail),
 		phase_(0),
-		send_size_(value("tcp_send_buffer",TCP_SEND_BUFFER_SIZE)),
-		recv_size_(value("tcp_recv_buffer",TCP_RECEIVE_BUFFER_SIZE)),
+		//send_size_(value("tcp_send_buffer",TCP_SEND_BUFFER_SIZE)),
+		//recv_size_(value("tcp_recv_buffer",TCP_RECEIVE_BUFFER_SIZE)),
 		periodic_time_(value("periodics", 1.0)),
 		reconnect_attempts_(value("reconnect_attempts",50)),
 		thread_(Universe::__start, this) {
@@ -99,6 +101,18 @@ Universe::~Universe() {
 	delete impl_;
 }
 
+size_t Universe::getSendBufferSize(ftl::URI::scheme_t s) {
+	return (s == ftl::URI::scheme_t::SCHEME_WS) ?
+			value("ws_send_buffer",WS_SEND_BUFFER_SIZE) :
+			value("tcp_send_buffer",TCP_SEND_BUFFER_SIZE);
+}
+
+size_t Universe::getRecvBufferSize(ftl::URI::scheme_t s) {
+	return (s == ftl::URI::scheme_t::SCHEME_WS) ?
+			value("ws_recv_buffer",WS_SEND_BUFFER_SIZE) :
+			value("tcp_recv_buffer",TCP_SEND_BUFFER_SIZE);
+}
+
 void Universe::start() {
 	/*cpu_set_t cpus;
     CPU_ZERO(&cpus);
diff --git a/components/net/cpp/test/peer_unit.cpp b/components/net/cpp/test/peer_unit.cpp
index 6b2cd2bd2ca9606e24c0bb0a2851dff7bb78e1e8..57828f57c78260068227f6044fc5d7d65dabfe96 100644
--- a/components/net/cpp/test/peer_unit.cpp
+++ b/components/net/cpp/test/peer_unit.cpp
@@ -51,8 +51,8 @@ class Universe {
 	callback_t onConnect(const std::function<void(Peer*)> &f) { return 0; }
 	callback_t onDisconnect(const std::function<void(Peer*)> &f) { return 0; }
 
-	size_t getSendBufferSize() const { return 10*1024; }
-	size_t getRecvBufferSize() const { return 10*1024; }
+	size_t getSendBufferSize(ftl::URI::scheme_t s) const { return 10*1024; }
+	size_t getRecvBufferSize(ftl::URI::scheme_t s) const { return 10*1024; }
 };
 }
 }
diff --git a/components/operators/include/ftl/operators/antialiasing.hpp b/components/operators/include/ftl/operators/antialiasing.hpp
index 295729bd361e6a17287b845460068a59e4d7555e..5548c08578e96879f36f2c3a11b281b78ca4bf43 100644
--- a/components/operators/include/ftl/operators/antialiasing.hpp
+++ b/components/operators/include/ftl/operators/antialiasing.hpp
@@ -12,7 +12,7 @@ namespace operators {
  */
 class FXAA : public ftl::operators::Operator {
 	public:
-    explicit FXAA(ftl::Configurable*);
+    explicit FXAA(ftl::operators::Graph *g, ftl::Configurable*);
     ~FXAA();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
diff --git a/components/operators/include/ftl/operators/buffer.hpp b/components/operators/include/ftl/operators/buffer.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..424e2dea08e853416b8ca673a0f577eaf1c119a7
--- /dev/null
+++ b/components/operators/include/ftl/operators/buffer.hpp
@@ -0,0 +1,37 @@
+#ifndef _FTL_OPERATORS_BUFFERS_HPP_
+#define _FTL_OPERATORS_BUFFERS_HPP_
+
+namespace ftl {
+namespace operators {
+
+/**
+ * Similar to frame channels, but these are pipeline buffers that can be
+ * used from one operator to the next.
+ */
+enum class Buffer {
+	LowLeft			= 0,	// 8UC4
+	Screen			= 1,
+	Weights			= 2,	// short
+	Confidence		= 3,	// 32F
+	Contribution	= 4,	// 32F
+	Flow			= 5,	// 16SC2
+	Flow2			= 6,	// 16SC2
+	Energy			= 7,	// 32F
+	Mask			= 8,	// 32U
+	Density			= 9,	// 32F
+	Support1		= 10,	// 8UC4 (currently)
+	Support2		= 11,	// 8UC4 (currently)
+	Segmentation	= 12,	// 32S?	
+	Disparity		= 13,
+	Smoothing		= 14,	// 32F
+	LowGrayLeft		= 15,
+	LowGrayRight	= 16,
+	GrayLeft		= 17,
+	GrayRight		= 18,
+	LowRight		= 19
+};
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/components/operators/include/ftl/operators/clipping.hpp b/components/operators/include/ftl/operators/clipping.hpp
index 590e714c9eeca9713e56139aa874f7c0db0a472d..25d8b76ff7bdbfc99717b01652497395e5ff7ea1 100644
--- a/components/operators/include/ftl/operators/clipping.hpp
+++ b/components/operators/include/ftl/operators/clipping.hpp
@@ -12,7 +12,7 @@ namespace operators {
  */
 class ClipScene : public ftl::operators::Operator {
 	public:
-    explicit ClipScene(ftl::Configurable*);
+    explicit ClipScene(ftl::operators::Graph *g, ftl::Configurable*);
     ~ClipScene();
 
 	inline Operator::Type type() const override { return Operator::Type::ManyToMany; }
diff --git a/components/operators/include/ftl/operators/colours.hpp b/components/operators/include/ftl/operators/colours.hpp
index 788f7b4f50dec2472453d30ebf4351616de228b2..a54539a67f829a7d1e9aa6c6306bb76a707081fa 100644
--- a/components/operators/include/ftl/operators/colours.hpp
+++ b/components/operators/include/ftl/operators/colours.hpp
@@ -8,7 +8,7 @@ namespace operators {
 
 class ColourChannels : public ftl::operators::Operator {
     public:
-    explicit ColourChannels(ftl::Configurable *cfg);
+    explicit ColourChannels(ftl::operators::Graph *g, ftl::Configurable *cfg);
     ~ColourChannels();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
diff --git a/components/operators/include/ftl/operators/cuda/mask.hpp b/components/operators/include/ftl/operators/cuda/mask.hpp
index 8e136371879c451a9b03997998c46e7b50a8c943..54a1522b628e14e5b99b1fb823416347374e00fc 100644
--- a/components/operators/include/ftl/operators/cuda/mask.hpp
+++ b/components/operators/include/ftl/operators/cuda/mask.hpp
@@ -83,6 +83,11 @@ void cull_mask(
 		unsigned int radius,
 		cudaStream_t stream);
 
+void show_mask(
+        ftl::cuda::TextureObject<uchar4> &colour,
+		ftl::cuda::TextureObject<uint8_t> &mask,
+        int id, uchar4 style, cudaStream_t stream);
+
 }
 }
 
diff --git a/components/operators/include/ftl/operators/depth.hpp b/components/operators/include/ftl/operators/depth.hpp
index b7ceba8dab56c1f168c6f6fc3bf29e005ff07b33..14f46b12d12b3adf933bd3ea7bd67463b36f19f2 100644
--- a/components/operators/include/ftl/operators/depth.hpp
+++ b/components/operators/include/ftl/operators/depth.hpp
@@ -10,8 +10,8 @@ namespace operators {
 
 class DepthBilateralFilter : public::ftl::operators::Operator {
 	public:
-	explicit DepthBilateralFilter(ftl::Configurable*);
-	DepthBilateralFilter(ftl::Configurable*, const std::tuple<ftl::codecs::Channel> &);
+	explicit DepthBilateralFilter(ftl::operators::Graph *g, ftl::Configurable*);
+	DepthBilateralFilter(ftl::operators::Graph *g, ftl::Configurable*, const std::tuple<ftl::codecs::Channel> &);
 
 	~DepthBilateralFilter() {};
 
diff --git a/components/operators/include/ftl/operators/detectandtrack.hpp b/components/operators/include/ftl/operators/detectandtrack.hpp
index f6c5c869fc9583e5f1b2948806d57237be550ea3..2e5ce171359c6a6f6bc5b8d693b64677a602be38 100644
--- a/components/operators/include/ftl/operators/detectandtrack.hpp
+++ b/components/operators/include/ftl/operators/detectandtrack.hpp
@@ -39,7 +39,7 @@ namespace operators {
  */
 class DetectAndTrack : public ftl::operators::Operator {
 	public:
-	explicit DetectAndTrack(ftl::Configurable*);
+	explicit DetectAndTrack(ftl::operators::Graph *g, ftl::Configurable*);
 	~DetectAndTrack() {};
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -116,22 +116,19 @@ class DetectAndTrack : public ftl::operators::Operator {
  */
 class ArUco : public ftl::operators::Operator {
 	public:
-	explicit ArUco(ftl::Configurable*);
+	explicit ArUco(ftl::operators::Graph *g, ftl::Configurable*);
 	~ArUco() {};
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
 	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) override;
 
-	void wait(cudaStream_t) override;
-
 	ftl::codecs::Channel channel_in_;
 	ftl::codecs::Channel channel_out_;
 
 	private:
-	std::future<bool> job_;
-	bool debug_;
 	bool estimate_pose_;
 	float marker_size_;
+	cv::Mat tmp_;
 
 	cv::Ptr<cv::aruco::Dictionary> dictionary_;
 	cv::Ptr<cv::aruco::DetectorParameters> params_;
diff --git a/components/operators/include/ftl/operators/disparity.hpp b/components/operators/include/ftl/operators/disparity.hpp
index 62b1f00857a1d8971f61248cda857814d49fc24e..4aba9bbd8e179c86ddc0d6305fb14b3579a6fb23 100644
--- a/components/operators/include/ftl/operators/disparity.hpp
+++ b/components/operators/include/ftl/operators/disparity.hpp
@@ -8,6 +8,7 @@
 #endif
 
 #include <opencv2/cudastereo.hpp>
+#include <opencv2/cudafilters.hpp>
 
 #ifdef HAVE_LIBSGM
 #include <libsgm.h>
@@ -19,7 +20,7 @@ namespace operators {
 
 class StereoDisparity : public ftl::operators::Operator {
 public:
-	explicit StereoDisparity(ftl::Configurable* cfg);
+	StereoDisparity(ftl::operators::Graph *g, ftl::Configurable* cfg);
 
 	~StereoDisparity();
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -44,7 +45,7 @@ private:
  */
 class FixstarsSGM : public ftl::operators::Operator {
 	public:
-	explicit FixstarsSGM(ftl::Configurable* cfg);
+	FixstarsSGM(ftl::operators::Graph *g, ftl::Configurable* cfg);
 
 	~FixstarsSGM();
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -57,11 +58,14 @@ class FixstarsSGM : public ftl::operators::Operator {
 	bool updateParameters();
 	bool updateP2Parameters();
 	void computeP2(cudaStream_t &stream);
+	void _variance_mask(cv::InputArray in, cv::OutputArray out, int wsize, cv::cuda::Stream &cvstream);
 
 	sgm::StereoSGM *ssgm_;
 	cv::Size size_;
 	cv::cuda::GpuMat lbw_;
 	cv::cuda::GpuMat rbw_;
+	cv::cuda::GpuMat lbw_full_;
+	cv::cuda::GpuMat rbw_full_;
 	cv::cuda::GpuMat disp_int_;
 
 	cv::cuda::GpuMat P2_map_;
@@ -69,6 +73,12 @@ class FixstarsSGM : public ftl::operators::Operator {
 	cv::cuda::GpuMat weightsF_;
 	cv::cuda::GpuMat edges_;
 	cv::Ptr<cv::cuda::CannyEdgeDetector> canny_;
+	cv::Ptr<cv::cuda::Filter> filter_;
+
+	cv::cuda::GpuMat im_;
+	cv::cuda::GpuMat im2_;
+	cv::cuda::GpuMat mean_;
+	cv::cuda::GpuMat mean2_;
 
 	int P1_;
 	int P2_;
@@ -80,7 +90,7 @@ class FixstarsSGM : public ftl::operators::Operator {
 
 class DisparityBilateralFilter : public::ftl::operators::Operator {
 	public:
-	explicit DisparityBilateralFilter(ftl::Configurable*);
+	DisparityBilateralFilter(ftl::operators::Graph *g, ftl::Configurable*);
 
 	~DisparityBilateralFilter() {};
 
@@ -91,6 +101,7 @@ class DisparityBilateralFilter : public::ftl::operators::Operator {
 	cv::Ptr<cv::cuda::DisparityBilateralFilter> filter_;
 	cv::cuda::GpuMat disp_int_;
 	cv::cuda::GpuMat disp_int_result_;
+	cv::cuda::GpuMat rgb_;
 	double scale_;
 	int radius_;
 	int iter_;
@@ -102,8 +113,8 @@ class DisparityBilateralFilter : public::ftl::operators::Operator {
  */
 class DisparityToDepth : public ftl::operators::Operator {
 	public:
-	explicit DisparityToDepth(ftl::Configurable* cfg) :
-		ftl::operators::Operator(cfg) {}
+	DisparityToDepth(ftl::operators::Graph *g, ftl::Configurable* cfg) :
+		ftl::operators::Operator(g, cfg) {}
 
 	~DisparityToDepth() {};
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -117,7 +128,7 @@ class DisparityToDepth : public ftl::operators::Operator {
  */
 class DepthChannel : public ftl::operators::Operator {
 	public:
-	explicit DepthChannel(ftl::Configurable *cfg);
+	DepthChannel(ftl::operators::Graph *g, ftl::Configurable *cfg);
 	~DepthChannel();
 
 	inline Operator::Type type() const override { return Operator::Type::ManyToMany; }
@@ -139,8 +150,8 @@ class DepthChannel : public ftl::operators::Operator {
 #ifdef HAVE_OPTFLOW
 class OpticalFlowTemporalSmoothing : public ftl::operators::Operator {
 	public:
-	explicit OpticalFlowTemporalSmoothing(ftl::Configurable*);
-	OpticalFlowTemporalSmoothing(ftl::Configurable*, const std::tuple<ftl::codecs::Channel> &params);
+	OpticalFlowTemporalSmoothing(ftl::operators::Graph *g, ftl::Configurable*);
+	OpticalFlowTemporalSmoothing(ftl::operators::Graph *g, ftl::Configurable*, const std::tuple<ftl::codecs::Channel> &params);
 	~OpticalFlowTemporalSmoothing();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
diff --git a/components/operators/include/ftl/operators/filling.hpp b/components/operators/include/ftl/operators/filling.hpp
index 6de380213ba4fb69bbf966a87db11a7de81f0946..ed2b3f39a20a949fa28dfe61232d44d15f56affe 100644
--- a/components/operators/include/ftl/operators/filling.hpp
+++ b/components/operators/include/ftl/operators/filling.hpp
@@ -12,7 +12,7 @@ namespace operators {
  */
 class ScanFieldFill : public ftl::operators::Operator {
 	public:
-    explicit ScanFieldFill(ftl::Configurable*);
+    ScanFieldFill(ftl::operators::Graph *g, ftl::Configurable*);
     ~ScanFieldFill();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -23,7 +23,7 @@ class ScanFieldFill : public ftl::operators::Operator {
 
 class CrossSupportFill : public ftl::operators::Operator {
 	public:
-    explicit CrossSupportFill(ftl::Configurable*);
+    CrossSupportFill(ftl::operators::Graph *g, ftl::Configurable*);
     ~CrossSupportFill();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
diff --git a/components/operators/include/ftl/operators/gt_analysis.hpp b/components/operators/include/ftl/operators/gt_analysis.hpp
index a89230caa12e42fc05027506c6484be1bf348d91..cea34c9dba5477012bd2a620ebbbe0e079fdea51 100644
--- a/components/operators/include/ftl/operators/gt_analysis.hpp
+++ b/components/operators/include/ftl/operators/gt_analysis.hpp
@@ -14,7 +14,7 @@ namespace operators {
  */
 class GTAnalysis : public ftl::operators::Operator {
 	public:
-    explicit GTAnalysis(ftl::Configurable*);
+    GTAnalysis(ftl::operators::Graph *g, ftl::Configurable*);
     ~GTAnalysis();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
diff --git a/components/operators/include/ftl/operators/mask.hpp b/components/operators/include/ftl/operators/mask.hpp
index 294caf54f31d12fce9674dfcf51e204f3b2ade5d..7d877d6579c329f7af513840b7140f187dbf7eed 100644
--- a/components/operators/include/ftl/operators/mask.hpp
+++ b/components/operators/include/ftl/operators/mask.hpp
@@ -14,7 +14,7 @@ namespace operators {
  */
 class DiscontinuityMask : public ftl::operators::Operator {
 	public:
-	explicit DiscontinuityMask(ftl::Configurable*);
+	DiscontinuityMask(ftl::operators::Graph *g, ftl::Configurable*);
 	~DiscontinuityMask();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -28,7 +28,7 @@ class DiscontinuityMask : public ftl::operators::Operator {
  */
 class BorderMask : public ftl::operators::Operator {
 	public:
-	explicit BorderMask(ftl::Configurable*);
+	BorderMask(ftl::operators::Graph *g, ftl::Configurable*);
 	~BorderMask();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -37,12 +37,26 @@ class BorderMask : public ftl::operators::Operator {
 
 };
 
+/**
+ * Visualise a mask value
+ */
+class DisplayMask : public ftl::operators::Operator {
+	public:
+	DisplayMask(ftl::operators::Graph *g, ftl::Configurable*);
+	~DisplayMask();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) override;
+
+};
+
 /**
  * Remove depth values marked with the discontinuity mask.
  */
 class CullDiscontinuity : public ftl::operators::Operator {
 	public:
-	explicit CullDiscontinuity(ftl::Configurable*);
+	CullDiscontinuity(ftl::operators::Graph *g, ftl::Configurable*);
 	~CullDiscontinuity();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
diff --git a/components/operators/include/ftl/operators/mvmls.hpp b/components/operators/include/ftl/operators/mvmls.hpp
index 3e5c476d60728bea288f0e43a95a6bd00776ce1e..0c0d6f4afb9715814199304ca0b430ac094071c4 100644
--- a/components/operators/include/ftl/operators/mvmls.hpp
+++ b/components/operators/include/ftl/operators/mvmls.hpp
@@ -8,7 +8,7 @@ namespace operators {
 
 class MultiViewMLS : public ftl::operators::Operator {
 	public:
-	explicit MultiViewMLS(ftl::Configurable*);
+	MultiViewMLS(ftl::operators::Graph *g, ftl::Configurable*);
 	~MultiViewMLS();
 
 	inline Operator::Type type() const override { return Operator::Type::ManyToMany; }
diff --git a/components/operators/include/ftl/operators/normals.hpp b/components/operators/include/ftl/operators/normals.hpp
index a5faaa17645612bce3ab9601638f92058fd4ba57..c4d1a0190d27e9f307474eeed3f2234ac99e2e23 100644
--- a/components/operators/include/ftl/operators/normals.hpp
+++ b/components/operators/include/ftl/operators/normals.hpp
@@ -12,7 +12,7 @@ namespace operators {
  */
 class Normals : public ftl::operators::Operator {
 	public:
-    explicit Normals(ftl::Configurable*);
+    Normals(ftl::operators::Graph *g, ftl::Configurable*);
     ~Normals();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -29,7 +29,7 @@ class Normals : public ftl::operators::Operator {
  */
 class NormalDot : public ftl::operators::Operator {
 	public:
-    explicit NormalDot(ftl::Configurable*);
+    NormalDot(ftl::operators::Graph *g, ftl::Configurable*);
     ~NormalDot();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -44,7 +44,7 @@ class NormalDot : public ftl::operators::Operator {
  */
 class SmoothNormals : public ftl::operators::Operator {
 	public:
-    explicit SmoothNormals(ftl::Configurable*);
+    SmoothNormals(ftl::operators::Graph *g, ftl::Configurable*);
     ~SmoothNormals();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
diff --git a/components/operators/include/ftl/operators/operator.hpp b/components/operators/include/ftl/operators/operator.hpp
index 566e2715b99041eed861f96e5f4a43499ebf1580..86c406caa2da23eb88f2c2493c7b2e6864fa0b74 100644
--- a/components/operators/include/ftl/operators/operator.hpp
+++ b/components/operators/include/ftl/operators/operator.hpp
@@ -8,10 +8,13 @@
 #include <ftl/rgbd/frameset.hpp>
 #include <ftl/rgbd/source.hpp>
 #include <ftl/cuda_common.hpp>
+#include <ftl/operators/buffer.hpp>
 
 namespace ftl {
 namespace operators {
 
+class Graph;
+
 /**
  * An abstract frame operator interface. Any kind of filter that operates on a
  * single frame should use this as a base class. An example of a filter would
@@ -22,7 +25,7 @@ namespace operators {
  */
 class Operator {
 	public:
-	explicit Operator(ftl::Configurable *cfg);
+	Operator(Graph *pgraph, ftl::Configurable *cfg);
 	virtual ~Operator();
 
 	enum class Type {
@@ -58,9 +61,12 @@ class Operator {
 
 	inline ftl::Configurable *config() const { return config_; }
 
+	inline Graph *graph() const { return graph_; }
+
 	private:
 	bool enabled_;
 	ftl::Configurable *config_;
+	Graph *graph_;
 };
 
 namespace detail {
@@ -68,7 +74,7 @@ namespace detail {
 struct ConstructionHelperBase {
 	explicit ConstructionHelperBase(ftl::Configurable *cfg) : config(cfg) {}
 	virtual ~ConstructionHelperBase() {}
-	virtual ftl::operators::Operator *make()=0;
+	virtual ftl::operators::Operator *make(Graph *g)=0;
 
 	ftl::Configurable *config;
 };
@@ -77,8 +83,8 @@ template <typename T>
 struct ConstructionHelper : public ConstructionHelperBase {
 	explicit ConstructionHelper(ftl::Configurable *cfg) : ConstructionHelperBase(cfg) {}
 	~ConstructionHelper() {}
-	ftl::operators::Operator *make() override {
-		return new T(config);
+	ftl::operators::Operator *make(Graph *g) override {
+		return new T(g, config);
 	}
 };
 
@@ -88,8 +94,8 @@ struct ConstructionHelper2 : public ConstructionHelperBase {
 		arguments_ = std::make_tuple(args...);
 	}
 	~ConstructionHelper2() {}
-	ftl::operators::Operator *make() override {
-		return new T(config, arguments_);
+	ftl::operators::Operator *make(Graph *g) override {
+		return new T(g, config, arguments_);
 	}
 
 	private:
@@ -119,11 +125,9 @@ class Graph : public ftl::Configurable {
 	template <typename T, typename... ARGS>
 	ftl::Configurable *append(const std::string &name, ARGS...);
 
-	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream=0);
-	bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream=0);
-	bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::Frame &out, cudaStream_t stream=0);
-
-	cudaStream_t getStream() const { return stream_; }
+	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, const std::function<void()> &cb=nullptr);
+	bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, const std::function<void()> &cb=nullptr);
+	bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::Frame &out, const std::function<void()> &cb=nullptr);
 
 	/**
 	 * Make sure all async operators have also completed. This is automatically
@@ -134,11 +138,20 @@ class Graph : public ftl::Configurable {
 	 */
 	bool waitAll(cudaStream_t);
 
+	inline cv::cuda::GpuMat &createBuffer(ftl::operators::Buffer b) { return createBuffer(b, 0); }
+	cv::cuda::GpuMat &createBuffer(ftl::operators::Buffer b, uint32_t fid);
+
+	cv::cuda::GpuMat &getBuffer(ftl::operators::Buffer b, uint32_t fid);
+
+	bool hasBuffer(ftl::operators::Buffer b, uint32_t fid) const;
+
 	private:
 	std::list<ftl::operators::detail::OperatorNode> operators_;
 	std::map<std::string, ftl::Configurable*> configs_;
-	cudaStream_t stream_;
 	std::atomic_flag busy_;
+	std::unordered_map<uint32_t,cv::cuda::GpuMat> buffers_;
+	std::unordered_set<uint32_t> valid_buffers_;
+	std::function<void()> callback_;
 
 	ftl::Configurable *_append(ftl::operators::detail::ConstructionHelperBase*);
 };
diff --git a/components/operators/include/ftl/operators/opticalflow.hpp b/components/operators/include/ftl/operators/opticalflow.hpp
index 8ee77736dcba7d95becf381759266dfeb543cba3..21d8bbb200383b85a4f0c99a1f35e5e2e7b51385 100644
--- a/components/operators/include/ftl/operators/opticalflow.hpp
+++ b/components/operators/include/ftl/operators/opticalflow.hpp
@@ -12,8 +12,8 @@ namespace operators {
  */
 class NVOpticalFlow : public ftl::operators::Operator {
 	public:
-	explicit NVOpticalFlow(ftl::Configurable*);
-	NVOpticalFlow(ftl::Configurable*, const std::tuple<ftl::codecs::Channel,ftl::codecs::Channel,ftl::codecs::Channel,ftl::codecs::Channel> &channels);
+	NVOpticalFlow(ftl::operators::Graph *g, ftl::Configurable*);
+	NVOpticalFlow(ftl::operators::Graph *g, ftl::Configurable*, const std::tuple<ftl::codecs::Channel,ftl::codecs::Channel,ftl::codecs::Channel,ftl::codecs::Channel> &channels);
 	~NVOpticalFlow();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
diff --git a/components/operators/include/ftl/operators/poser.hpp b/components/operators/include/ftl/operators/poser.hpp
index 9c642e47730e3f6833087b1a484e1f16476e3017..8d762f8ba2ee371342adb8e20b07d359211d081e 100644
--- a/components/operators/include/ftl/operators/poser.hpp
+++ b/components/operators/include/ftl/operators/poser.hpp
@@ -15,7 +15,7 @@ namespace operators {
  */
 class Poser : public ftl::operators::Operator {
 	public:
-	explicit Poser(ftl::Configurable*);
+	Poser(ftl::operators::Graph *g, ftl::Configurable*);
 	~Poser();
 
 	inline Operator::Type type() const override { return Operator::Type::ManyToMany; }
diff --git a/components/operators/include/ftl/operators/segmentation.hpp b/components/operators/include/ftl/operators/segmentation.hpp
index d7447615c6d5230f997f9dab857b1ba824b22ce2..ec2f9e9a6d3ea8d683a6139a237849c82b8fe44a 100644
--- a/components/operators/include/ftl/operators/segmentation.hpp
+++ b/components/operators/include/ftl/operators/segmentation.hpp
@@ -11,7 +11,7 @@ namespace operators {
  */
 class CrossSupport : public ftl::operators::Operator {
 	public:
-    explicit CrossSupport(ftl::Configurable*);
+    CrossSupport(ftl::operators::Graph *g, ftl::Configurable*);
     ~CrossSupport();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -25,7 +25,7 @@ class CrossSupport : public ftl::operators::Operator {
  */
 class VisCrossSupport : public ftl::operators::Operator {
 	public:
-    explicit VisCrossSupport(ftl::Configurable*);
+    VisCrossSupport(ftl::operators::Graph *g, ftl::Configurable*);
     ~VisCrossSupport();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
diff --git a/components/operators/include/ftl/operators/smoothing.hpp b/components/operators/include/ftl/operators/smoothing.hpp
index c42f801ed8ed19a34162b270ed9b36fbf3cdd1d8..db231cdd714a786df1dedf7ca54de06d2005ae4e 100644
--- a/components/operators/include/ftl/operators/smoothing.hpp
+++ b/components/operators/include/ftl/operators/smoothing.hpp
@@ -14,7 +14,7 @@ namespace operators {
  */
 class HFSmoother : public ftl::operators::Operator {
 	public:
-	explicit HFSmoother(ftl::Configurable*);
+	HFSmoother(ftl::operators::Graph *g, ftl::Configurable*);
 	~HFSmoother();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -35,7 +35,7 @@ class HFSmoother : public ftl::operators::Operator {
  */
 class SmoothChannel : public ftl::operators::Operator {
 	public:
-	explicit SmoothChannel(ftl::Configurable*);
+	SmoothChannel(ftl::operators::Graph *g, ftl::Configurable*);
 	~SmoothChannel();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -53,7 +53,7 @@ class SmoothChannel : public ftl::operators::Operator {
  */
 class SimpleMLS : public ftl::operators::Operator {
 	public:
-	explicit SimpleMLS(ftl::Configurable*);
+	SimpleMLS(ftl::operators::Graph *g, ftl::Configurable*);
 	~SimpleMLS();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -70,7 +70,7 @@ class SimpleMLS : public ftl::operators::Operator {
  */
 class ColourMLS : public ftl::operators::Operator {
 	public:
-	explicit ColourMLS(ftl::Configurable*);
+	ColourMLS(ftl::operators::Graph *g, ftl::Configurable*);
 	~ColourMLS();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -113,7 +113,7 @@ class ColourMLS : public ftl::operators::Operator {
  */
 class AggreMLS : public ftl::operators::Operator {
 	public:
-	explicit AggreMLS(ftl::Configurable*);
+	AggreMLS(ftl::operators::Graph *g, ftl::Configurable*);
 	~AggreMLS();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -137,7 +137,7 @@ class AggreMLS : public ftl::operators::Operator {
  */
 class AdaptiveMLS : public ftl::operators::Operator {
 	public:
-	explicit AdaptiveMLS(ftl::Configurable*);
+	AdaptiveMLS(ftl::operators::Graph *g, ftl::Configurable*);
 	~AdaptiveMLS();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
diff --git a/components/operators/include/ftl/operators/weighting.hpp b/components/operators/include/ftl/operators/weighting.hpp
index 545b4c3b8380311aaccc29190a83626da28b8aa6..8256a08cb23d03d42b9ef18a7107a858a34ebb8b 100644
--- a/components/operators/include/ftl/operators/weighting.hpp
+++ b/components/operators/include/ftl/operators/weighting.hpp
@@ -22,7 +22,7 @@ namespace operators {
  */
 class PixelWeights : public ftl::operators::Operator {
 	public:
-	explicit PixelWeights(ftl::Configurable*);
+	PixelWeights(ftl::operators::Graph *g, ftl::Configurable*);
 	~PixelWeights();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -33,7 +33,7 @@ class PixelWeights : public ftl::operators::Operator {
 
 class CullWeight : public ftl::operators::Operator {
 	public:
-	explicit CullWeight(ftl::Configurable*);
+	CullWeight(ftl::operators::Graph *g, ftl::Configurable*);
 	~CullWeight();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
@@ -44,7 +44,7 @@ class CullWeight : public ftl::operators::Operator {
 
 class DegradeWeight : public ftl::operators::Operator {
 	public:
-	explicit DegradeWeight(ftl::Configurable*);
+	DegradeWeight(ftl::operators::Graph *g, ftl::Configurable*);
 	~DegradeWeight();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
diff --git a/components/operators/src/antialiasing.cpp b/components/operators/src/antialiasing.cpp
index 1e6ef8f6c2f2f33de83fb7d113f2bd60b6f1d1f8..bbd1760657e8a7a497c1af5d12a9a92415e990ac 100644
--- a/components/operators/src/antialiasing.cpp
+++ b/components/operators/src/antialiasing.cpp
@@ -4,7 +4,7 @@
 using ftl::operators::FXAA;
 using ftl::codecs::Channel;
 
-FXAA::FXAA(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+FXAA::FXAA(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
diff --git a/components/operators/src/aruco.cpp b/components/operators/src/aruco.cpp
index 02ed251e2e29ec26bbed50ae892dec489b0a368e..0d287356748e7eebd287e18d3e53639f3e6778d2 100644
--- a/components/operators/src/aruco.cpp
+++ b/components/operators/src/aruco.cpp
@@ -6,6 +6,7 @@
 #include <opencv2/calib3d.hpp>
 
 #define LOGURU_REPLACE_GLOG 1
+#include <ftl/profiler.hpp>
 #include <loguru.hpp>
 
 using ftl::operators::ArUco;
@@ -38,20 +39,12 @@ static Eigen::Matrix4d matrix(cv::Vec3d &rvec, cv::Vec3d &tvec) {
 	return r;
 }
 
-ArUco::ArUco(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+ArUco::ArUco(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 	dictionary_ = cv::aruco::getPredefinedDictionary(cfg->value("dictionary", 0));
 	params_ = cv::aruco::DetectorParameters::create();
-
-	debug_ = cfg->value("debug", false);
-	//estimate_pose_ = cfg->value("estimate_pose", false);
-	//auto marker_size = cfg->get<float>("marker_size");
-	//if (!marker_size || (*marker_size <= 0.0f)) {
-	//	marker_size_ = 0.1f;
-	//	estimate_pose_ = false;
-	//}
-	//else {
-	//	marker_size_ = *marker_size;
-	//}
+	params_->cornerRefinementMethod = cv::aruco::CORNER_REFINE_CONTOUR;
+	params_->cornerRefinementMinAccuracy = 0.01;
+	params_->cornerRefinementMaxIterations = 20;
 
 	channel_in_ = Channel::Colour;
 	channel_out_ = Channel::Shapes3D;
@@ -61,88 +54,48 @@ ArUco::ArUco(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
 	});
 }
 
-bool ArUco::apply(Frame &in, Frame &out, cudaStream_t stream) {
+bool ArUco::apply(Frame &in, Frame &out, cudaStream_t) {
 	if (!in.hasChannel(channel_in_)) { return false; }
 
-	Frame *inptr = &in;
-	Frame *outptr = &out;
-
 	estimate_pose_ = config()->value("estimate_pose", true);
-	debug_ = config()->value("debug", false);
-	marker_size_ = config()->value("marker_size",0.1f);
-
-	job_ = std::move(ftl::pool.push([this,inptr,outptr,stream](int id) {
-		Frame &in = *inptr;
-		Frame &out = *outptr;
-
-		auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
-		//in.download(channel_in_);
-
-		//Mat im = in.get<Mat>(channel_in_);
-		// FIXME: Use internal stream here.
-		Mat im; // = in.fastDownload(channel_in_, cv::cuda::Stream::Null());
-		cv::cvtColor(in.get<cv::Mat>(channel_in_), im, cv::COLOR_BGRA2BGR);
+	marker_size_ = config()->value("marker_size", 0.1f);
 
-		Mat K = in.getLeftCamera().getCameraMatrix();
-		Mat dist = cv::Mat::zeros(cv::Size(5, 1), CV_64FC1);
+	std::vector<Vec3d> rvecs;
+	std::vector<Vec3d> tvecs;
+	std::vector<std::vector<cv::Point2f>> corners;
+	std::vector<int> ids;
 
-		std::vector<std::vector<cv::Point2f>> corners;
-		std::vector<int> ids;
+	{
+		FTL_Profile("ArUco", 0.02);
+		cv::cvtColor(in.get<cv::Mat>(channel_in_), tmp_, cv::COLOR_BGRA2GRAY);
 
-		cv::aruco::detectMarkers(	im, dictionary_,
-									corners, ids, params_, cv::noArray(), K);
+		const Mat K = in.getLeftCamera().getCameraMatrix();
+		const Mat dist;
 
-		std::vector<Vec3d> rvecs;
-		std::vector<Vec3d> tvecs;
+		cv::aruco::detectMarkers(tmp_, dictionary_,
+								corners, ids, params_, cv::noArray(), K, dist);
 
 		if (estimate_pose_) {
 			cv::aruco::estimatePoseSingleMarkers(corners, marker_size_, K, dist, rvecs, tvecs);
 		}
+	}
 
-		list<Shape3D> result;
-		if (out.hasChannel(channel_out_)) {
-			result = out.get<list<Shape3D>>(channel_out_);
-		}
-
-		for (size_t i = 0; i < rvecs.size(); i++) {
-			if (estimate_pose_) {
-				auto &t = result.emplace_back();
-				t.id = ids[i];
-				t.type = ftl::codecs::Shape3DType::ARUCO;
-				t.pose = (in.getPose() * matrix(rvecs[i], tvecs[i])).cast<float>();
-				t.size = Eigen::Vector3f(0.1f,0.1f,0.1f);
-				t.label = "Aruco";
-			}
-		}
-
-		out.create<list<Shape3D>>(channel_out_).list = result;
-
-		if (debug_) {
-			cv::aruco::drawDetectedMarkers(im, corners, ids);
-			if (estimate_pose_) {
-				for (size_t i = 0; i < rvecs.size(); i++) {
-						cv::aruco::drawAxis(im, K, dist, rvecs[i], tvecs[i], marker_size_);
-				}
-			}
-		}
+	list<Shape3D> result;
+	if (out.hasChannel(channel_out_)) {
+		result = out.get<list<Shape3D>>(channel_out_);
+	}
 
-		// TODO: should be uploaded by operator which requires data on GPU
-		//in.upload(channel_in_);
-		if (debug_) {
-			//if (in.isGPU(channel_in_)) {
-				cv::cvtColor(im, im, cv::COLOR_BGR2BGRA);
-				out.set<cv::cuda::GpuMat>(channel_in_).upload(im);
-			//} else cv::cvtColor(im, in.get<cv::Mat>(channel_in_), cv::COLOR_BGR2BGRA);
+	for (size_t i = 0; i < rvecs.size(); i++) {
+		if (estimate_pose_) {
+			auto &t = result.emplace_back();
+			t.id = ids[i];
+			t.type = ftl::codecs::Shape3DType::ARUCO;
+			t.pose = (in.getPose() * matrix(rvecs[i], tvecs[i])).cast<float>();
+			t.size = Eigen::Vector3f(1.0f, 1.0f, 0.0f)*marker_size_;
+			t.label = "Aruco-" + std::to_string(ids[i]);
 		}
-		return true;
-	}));
+	}
 
+	out.create<list<Shape3D>>(channel_out_).list = result;
 	return true;
 }
-
-void ArUco::wait(cudaStream_t s) {
-	if (job_.valid()) {
-		job_.wait();
-		job_.get();
-	}
-}
diff --git a/components/operators/src/clipping.cpp b/components/operators/src/clipping.cpp
index fa0a5c7efebc2d7de8e705d639d8c6c1bb1aa48d..3cfce7d554dc81ec253f5248192d61a525205e5e 100644
--- a/components/operators/src/clipping.cpp
+++ b/components/operators/src/clipping.cpp
@@ -10,7 +10,7 @@ using ftl::operators::ClipScene;
 using ftl::codecs::Channel;
 using ftl::rgbd::Format;
 
-ClipScene::ClipScene(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+ClipScene::ClipScene(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
diff --git a/components/operators/src/colours.cpp b/components/operators/src/colours.cpp
index ad48acd1fc8d08b05a778aa983400ca2cebebb39..2704d5763d496de808716c827ccd86ce1efc9c15 100644
--- a/components/operators/src/colours.cpp
+++ b/components/operators/src/colours.cpp
@@ -6,7 +6,7 @@
 using ftl::operators::ColourChannels;
 using ftl::codecs::Channel;
 
-ColourChannels::ColourChannels(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+ColourChannels::ColourChannels(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
diff --git a/components/operators/src/depth.cpp b/components/operators/src/depth.cpp
index df127ef9f360bf40f4e4a757e11b9095902dbce7..11a8d22430c436aa2002ee4afa9ecd6a27c0d4be 100644
--- a/components/operators/src/depth.cpp
+++ b/components/operators/src/depth.cpp
@@ -49,8 +49,8 @@ static void calc_space_weighted_filter(GpuMat& table_space, int win_size, float
 
 // ==== Depth Bilateral Filter =================================================
 
-DepthBilateralFilter::DepthBilateralFilter(ftl::Configurable* cfg) :
-		ftl::operators::Operator(cfg) {
+DepthBilateralFilter::DepthBilateralFilter(ftl::operators::Graph *g, ftl::Configurable* cfg) :
+		ftl::operators::Operator(g, cfg) {
 
 	scale_ = 16.0;
 	radius_ = cfg->value("radius", 7);
@@ -72,8 +72,8 @@ DepthBilateralFilter::DepthBilateralFilter(ftl::Configurable* cfg) :
     calc_space_weighted_filter(table_space_, radius_ * 2 + 1, radius_ + 1.0f);
 }
 
-DepthBilateralFilter::DepthBilateralFilter(ftl::Configurable* cfg, const std::tuple<ftl::codecs::Channel> &p) :
-		ftl::operators::Operator(cfg) {
+DepthBilateralFilter::DepthBilateralFilter(ftl::operators::Graph *g, ftl::Configurable* cfg, const std::tuple<ftl::codecs::Channel> &p) :
+		ftl::operators::Operator(g, cfg) {
 
 	scale_ = 16.0;
 	radius_ = cfg->value("radius", 7);
@@ -124,7 +124,7 @@ bool DepthBilateralFilter::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out,
 
 // =============================================================================
 
-DepthChannel::DepthChannel(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+DepthChannel::DepthChannel(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 	pipe_ = nullptr;
 }
 
@@ -144,7 +144,8 @@ void DepthChannel::_createPipeline(size_t size) {
 	pipe_->append<ftl::operators::ColourChannels>("colour");  // Convert BGR to BGRA
 	pipe_->append<ftl::operators::CrossSupport>("cross");
 	#ifdef HAVE_OPTFLOW
-	pipe_->append<ftl::operators::NVOpticalFlow>("optflow", Channel::Colour, Channel::Flow, Channel::Colour2, Channel::Flow2);
+	// FIXME: OpenCV Nvidia OptFlow has a horrible implementation that causes device syncs
+	//pipe_->append<ftl::operators::NVOpticalFlow>("optflow", Channel::Colour, Channel::Flow, Channel::Colour2, Channel::Flow2);
 	//if (size == 1) pipe_->append<ftl::operators::OpticalFlowTemporalSmoothing>("optflow_filter", Channel::Disparity);
 	#endif
 	#ifdef HAVE_LIBSGM
@@ -171,11 +172,7 @@ bool DepthChannel::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda
 
 	rbuf_.resize(in.frames.size());
 
-	if (in.frames.size() > 0) {
-		if (depth_size_.width == 0) {
-			depth_size_ = in.firstFrame().get<cv::cuda::GpuMat>(Channel::Colour).size();
-		}
-	}
+	int valid_count = 0;
 
 	for (size_t i=0; i<in.frames.size(); ++i) {
 		if (!in.hasFrame(i)) continue;
@@ -188,18 +185,23 @@ bool DepthChannel::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda
 				if (!cdata.enabled) continue;
 			}
 
-			_createPipeline(in.frames.size());
-
 			const cv::cuda::GpuMat& left = f.get<cv::cuda::GpuMat>(Channel::Left);
 			const cv::cuda::GpuMat& right = f.get<cv::cuda::GpuMat>(Channel::Right);
+			if (left.empty() || right.empty()) continue;
+
 			cv::cuda::GpuMat& depth = f.create<cv::cuda::GpuMat>(Channel::Depth);
-			depth.create(left.size(), CV_32FC1);
 
-			if (left.empty() || right.empty()) continue;
-			pipe_->apply(f, f, stream);
+			const auto &intrin = f.getLeft();
+			depth.create(intrin.height, intrin.width, CV_32FC1);
+			++valid_count;
 		}
 	}
 
+	if (valid_count > 0) {
+		_createPipeline(in.frames.size());
+		pipe_->apply(in, out);
+	}
+
 	return true;
 }
 
@@ -210,28 +212,21 @@ bool DepthChannel::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream
 
 	auto &f = in;
 	if (!f.hasChannel(Channel::Depth) && f.hasChannel(Channel::Right)) {
-		_createPipeline(1);
+		if (f.hasChannel(Channel::CalibrationData)) {
+			auto &cdata = f.get<ftl::calibration::CalibrationData>(Channel::CalibrationData);
+			if (!cdata.enabled) return true;
+		}
 
 		const cv::cuda::GpuMat& left = f.get<cv::cuda::GpuMat>(Channel::Left);
 		const cv::cuda::GpuMat& right = f.get<cv::cuda::GpuMat>(Channel::Right);
-		cv::cuda::GpuMat& depth = f.create<cv::cuda::GpuMat>(Channel::Depth);
-		depth.create(depth_size_, CV_32FC1);
-
 		if (left.empty() || right.empty()) return false;
+		
+		_createPipeline(1);
 
-		/*if (depth_size_ != left.size()) {
-			auto &col2 = f.create<cv::cuda::GpuMat>(Channel::ColourHighRes);
-			cv::cuda::resize(left, col2, depth_size_, 0.0, 0.0, cv::INTER_CUBIC, cvstream);
-			f.createTexture<uchar4>(Channel::ColourHighRes, true);
-			f.swapChannels(Channel::Colour, Channel::ColourHighRes);
-		}
-
-		if (depth_size_ != right.size()) {
-			cv::cuda::resize(right, rbuf_[i], depth_size_, 0.0, 0.0, cv::INTER_CUBIC, cvstream);
-			cv::cuda::swap(right, rbuf_[i]);
-		}*/
+		cv::cuda::GpuMat& depth = f.create<cv::cuda::GpuMat>(Channel::Depth);
+		depth.create(depth_size_, CV_32FC1);
 
-		pipe_->apply(f, f, stream);
+		pipe_->apply(f, f);
 	}
 
 	return true;
diff --git a/components/operators/src/detectandtrack.cpp b/components/operators/src/detectandtrack.cpp
index 4e9b1b7294bc168957bc63206d4601ef424f45e6..0b9e783e0419256b357a27970beb6fb6eb0e6604 100644
--- a/components/operators/src/detectandtrack.cpp
+++ b/components/operators/src/detectandtrack.cpp
@@ -20,7 +20,7 @@ using ftl::codecs::Channel;
 using ftl::rgbd::Frame;
 using ftl::operators::DetectAndTrack;
 
-DetectAndTrack::DetectAndTrack(ftl::Configurable *cfg) : ftl::operators::Operator(cfg), detecting_(false) {
+DetectAndTrack::DetectAndTrack(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg), detecting_(false) {
 	init();
 }
 
diff --git a/components/operators/src/disparity/bilateral_filter.cpp b/components/operators/src/disparity/bilateral_filter.cpp
index 7ffce560c635e884dbaad6aa128ebba5f353817e..425b8378a2c8bfc13ebb52f1c11c0d50efbe8449 100644
--- a/components/operators/src/disparity/bilateral_filter.cpp
+++ b/components/operators/src/disparity/bilateral_filter.cpp
@@ -4,15 +4,17 @@
 #include <ftl/operators/cuda/disparity.hpp>
 
 #include <opencv2/cudaimgproc.hpp>
+#include <opencv2/cudawarping.hpp>
 
 using cv::cuda::GpuMat;
 using cv::Size;
 
 using ftl::codecs::Channel;
 using ftl::operators::DisparityBilateralFilter;
+using ftl::operators::Buffer;
 
-DisparityBilateralFilter::DisparityBilateralFilter(ftl::Configurable* cfg) :
-		ftl::operators::Operator(cfg) {
+DisparityBilateralFilter::DisparityBilateralFilter(ftl::operators::Graph *g, ftl::Configurable* cfg) :
+		ftl::operators::Operator(g, cfg) {
 
 	scale_ = 16.0;
 	n_disp_ = cfg->value("n_disp", 256);
@@ -27,14 +29,16 @@ bool DisparityBilateralFilter::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out
 	if (!in.hasChannel(Channel::Colour)) {
 		throw FTL_Error("Joint Bilateral Filter is missing Colour");
 		return false;
-	} else if (!in.hasChannel(Channel::Disparity)) {
+	}
+	
+	if (!graph()->hasBuffer(Buffer::Disparity, in.source())) {
 		// Have depth, so calculate disparity...
 		if (in.hasChannel(Channel::Depth)) {
 			// No disparity, so create it.
 			const auto params = in.getLeftCamera();
 			const GpuMat &depth = in.get<GpuMat>(Channel::Depth);
 
-			GpuMat &disp = out.create<GpuMat>(Channel::Disparity);
+			GpuMat &disp = graph()->createBuffer(Buffer::Disparity, in.source());
 			disp.create(depth.size(), CV_32FC1);
 
 			//LOG(ERROR) << "Calculated disparity from depth";
@@ -56,14 +60,31 @@ bool DisparityBilateralFilter::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out
 
 	auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
 	const GpuMat &rgb = in.get<GpuMat>(Channel::Colour);
-	const GpuMat &disp_in = in.get<GpuMat>(Channel::Disparity);
-	GpuMat &disp_out = out.create<GpuMat>(Channel::Disparity);
+	//const GpuMat &disp_in = in.get<GpuMat>(Channel::Disparity);
+	//GpuMat &disp_out = out.create<GpuMat>(Channel::Disparity);
+
+	GpuMat disp_in = graph()->getBuffer(Buffer::Disparity, in.source());
 	disp_int_.create(disp_in.size(), disp_in.type());
 
+	GpuMat rgb_buf;
+	if (rgb.size() != disp_in.size()) {
+		if (graph()->hasBuffer(Buffer::LowLeft, in.source())) {
+			rgb_buf = graph()->getBuffer(Buffer::LowLeft, in.source());
+		} else {
+			auto &t = graph()->createBuffer(Buffer::LowLeft, in.source());
+			cv::cuda::resize(rgb, t, disp_in.size(), 0, 0, cv::INTER_LINEAR, cvstream);
+			rgb_buf = t;
+		}
+	} else {
+		rgb_buf = rgb;
+	}
+
+	//LOG(INFO) << "DISP = " << disp_in.size() << "," << disp_in.type() << " - RGBBUF = " << rgb_buf.size() << "," << rgb_buf.type() << " - RGB = " << rgb.size() << "," << rgb.type();
+
 	//disp_in.convertTo(disp_int_, CV_16SC1, scale_, cvstream);
 	//cv::cuda::cvtColor(rgb, bw_, cv::COLOR_BGRA2GRAY, 0, cvstream);
-	filter_->apply(disp_in, rgb, disp_int_, cvstream);
-	cv::cuda::swap(disp_out, disp_int_);
+	filter_->apply(disp_in, rgb_buf, disp_int_, cvstream);
+	cv::cuda::swap(disp_in, disp_int_);
 	//disp_int_result_.convertTo(disp_out, disp_in.type(), 1.0/scale_, cvstream);
 	return true;
 }
\ No newline at end of file
diff --git a/components/operators/src/disparity/disparity_to_depth.cpp b/components/operators/src/disparity/disparity_to_depth.cpp
index 1ffd157dead77ef4fcf8859903c4439955a70f0b..07146ccfc95705186ccf5706c071bac3aff9a3a2 100644
--- a/components/operators/src/disparity/disparity_to_depth.cpp
+++ b/components/operators/src/disparity/disparity_to_depth.cpp
@@ -3,17 +3,18 @@
 
 using ftl::operators::DisparityToDepth;
 using ftl::codecs::Channel;
+using ftl::operators::Buffer;
 
 using cv::cuda::GpuMat;
 
 bool DisparityToDepth::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out,
 							cudaStream_t stream) {
 
-	if (!in.hasChannel(Channel::Disparity)) {
+	if (!graph()->hasBuffer(Buffer::Disparity, in.source())) {
 		throw FTL_Error("Missing disparity before convert to depth");
 	}
 
-	const GpuMat &disp = in.get<GpuMat>(Channel::Disparity);
+	const GpuMat &disp = graph()->getBuffer(Buffer::Disparity, in.source());
 	const auto params = in.getLeftCamera().scaled(disp.cols, disp.rows);
 
 	GpuMat &depth = out.create<GpuMat>(Channel::Depth);
diff --git a/components/operators/src/disparity/fixstars_sgm.cpp b/components/operators/src/disparity/fixstars_sgm.cpp
index 0274022737726a8d6a94c5bcc8124c82a8d4dd17..176dc8d10bf23ba91ebc3cf1976b7e2e08a96766 100644
--- a/components/operators/src/disparity/fixstars_sgm.cpp
+++ b/components/operators/src/disparity/fixstars_sgm.cpp
@@ -6,6 +6,7 @@
 #include <opencv2/cudaimgproc.hpp>
 #include <opencv2/cudaarithm.hpp>
 #include <opencv2/cudafilters.hpp>
+#include <opencv2/cudawarping.hpp>
 
 using cv::Size;
 using cv::cuda::GpuMat;
@@ -15,29 +16,25 @@ using ftl::codecs::Channel;
 using ftl::rgbd::Frame;
 using ftl::rgbd::Source;
 using ftl::operators::FixstarsSGM;
+using ftl::operators::Buffer;
 
 
-static void variance_mask(cv::InputArray in, cv::OutputArray out, int wsize, cv::cuda::Stream &cvstream) {
+void FixstarsSGM::_variance_mask(cv::InputArray in, cv::OutputArray out, int wsize, cv::cuda::Stream &cvstream) {
 	if (in.isGpuMat() && out.isGpuMat()) {
-		cv::cuda::GpuMat im;
-		cv::cuda::GpuMat im2;
-		cv::cuda::GpuMat mean;
-		cv::cuda::GpuMat mean2;
-
-		mean.create(in.size(), CV_32FC1);
-		mean2.create(in.size(), CV_32FC1);
-		im2.create(in.size(), CV_32FC1);
-		in.getGpuMat().convertTo(im, CV_32FC1, cvstream);
-
-		cv::cuda::multiply(im, im, im2, 1.0, CV_32FC1, cvstream);
-		auto filter = cv::cuda::createBoxFilter(CV_32FC1, CV_32FC1, cv::Size(wsize,wsize));
-		filter->apply(im, mean, cvstream);   // E[X]
-		filter->apply(im2, mean2, cvstream); // E[X^2]
-		cv::cuda::multiply(mean, mean, mean, 1.0, -1, cvstream); // (E[X])^2
+		mean_.create(in.size(), CV_32FC1);
+		mean2_.create(in.size(), CV_32FC1);
+		im2_.create(in.size(), CV_32FC1);
+		in.getGpuMat().convertTo(im_, CV_32FC1, cvstream);
+
+		cv::cuda::multiply(im_, im_, im2_, 1.0, CV_32FC1, cvstream);
+		if (!filter_) filter_ = cv::cuda::createBoxFilter(CV_32FC1, CV_32FC1, cv::Size(wsize,wsize));
+		filter_->apply(im_, mean_, cvstream);   // E[X]
+		filter_->apply(im2_, mean2_, cvstream); // E[X^2]
+		cv::cuda::multiply(mean_, mean_, mean_, 1.0, -1, cvstream); // (E[X])^2
 
 		// NOTE: floating point accuracy in subtraction
 		// (cv::cuda::createBoxFilter only supports float and 8 bit integer types)
-		cv::cuda::subtract(mean2, mean, out.getGpuMatRef(), cv::noArray(), -1, cvstream); // E[X^2] - (E[X])^2
+		cv::cuda::subtract(mean2_, mean_, out.getGpuMatRef(), cv::noArray(), -1, cvstream); // E[X^2] - (E[X])^2
 	}
 	else { throw std::exception(); /* todo CPU version */ }
 }
@@ -56,8 +53,8 @@ void FixstarsSGM::computeP2(cudaStream_t &stream) {
 	}
 }
 
-FixstarsSGM::FixstarsSGM(ftl::Configurable* cfg) :
-		ftl::operators::Operator(cfg) {
+FixstarsSGM::FixstarsSGM(ftl::operators::Graph *g, ftl::Configurable* cfg) :
+		ftl::operators::Operator(g, cfg) {
 
 	ssgm_ = nullptr;
 	size_ = Size(0, 0);
@@ -180,27 +177,43 @@ bool FixstarsSGM::apply(Frame &in, Frame &out, cudaStream_t stream) {
 	auto &l = in.get<GpuMat>(Channel::Left);
 	const auto &r = in.get<GpuMat>(Channel::Right);
 
-	if (l.size() != size_) {
-		size_ = l.size();
+	const auto &intrin = in.getLeft();
+
+	if (l.empty() || r.empty() || intrin.width == 0) {
+		LOG(ERROR) << "Missing data for Fixstars";
+		return false;
+	}
+
+	if (size_.width != intrin.width) {
+		size_ = cv::Size(intrin.width, intrin.height);
 		if (!init()) { return false; }
 	}
 
-	bool has_estimate = in.hasChannel(Channel::Disparity);
-	const auto &disp = (!has_estimate) ? out.create<ftl::rgbd::VideoFrame>(Channel::Disparity).createGPU(Format<short>(l.size())) : in.get<GpuMat>(Channel::Disparity);
+	bool has_estimate = graph()->hasBuffer(Buffer::Disparity, in.source()); //in.hasChannel(Channel::Disparity);
+	auto &disp = graph()->createBuffer(Buffer::Disparity, in.source());
+	disp.create(size_, CV_16SC1);
 
 	auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
-	cv::cuda::cvtColor(l, lbw_, cv::COLOR_BGRA2GRAY, 0, cvstream);
-	cv::cuda::cvtColor(r, rbw_, cv::COLOR_BGRA2GRAY, 0, cvstream);
+	cv::cuda::cvtColor(l, lbw_full_, cv::COLOR_BGRA2GRAY, 0, cvstream);
+	cv::cuda::cvtColor(r, rbw_full_, cv::COLOR_BGRA2GRAY, 0, cvstream);
+
+	if (l.size() != size_) {
+		cv::cuda::resize(lbw_full_, lbw_, size_, 0, 0, cv::INTER_CUBIC, cvstream);
+		cv::cuda::resize(rbw_full_, rbw_, size_, 0, 0, cv::INTER_CUBIC, cvstream);
+	} else {
+		lbw_ = lbw_full_;
+		rbw_ = rbw_full_;
+	}
 
 	//cvstream.waitForCompletion();
 	computeP2(stream);
 
 	bool use_variance = config()->value("use_variance", true);
 	if (use_variance) {
-		variance_mask(lbw_, weightsF_, config()->value("var_wsize", 11), cvstream);
+		_variance_mask(lbw_, weightsF_, config()->value("var_wsize", 11), cvstream);
 		float minweight = std::min(1.0f, std::max(0.0f, config()->value("var_minweight", 0.5f)));
 		cv::cuda::normalize(weightsF_, weightsF_, minweight, 1.0, cv::NORM_MINMAX, -1, cv::noArray(), cvstream);
-		weightsF_.convertTo(weights_, CV_8UC1, 255.0f);
+		weightsF_.convertTo(weights_, CV_8UC1, 255.0f, cvstream);
 
 		//if ((int)P2_map_.step != P2_map_.cols) LOG(ERROR) << "P2 map step error: " << P2_map_.cols << "," << P2_map_.step;
 		ssgm_->execute(lbw_.data, rbw_.data, disp_int_.data, P2_map_.data, (uint8_t*) weights_.data, weights_.step1(), stream);
@@ -224,7 +237,7 @@ bool FixstarsSGM::apply(Frame &in, Frame &out, cudaStream_t stream) {
 	}
 
 	if (config()->value("show_P2_map", false)) {
-		cv::cuda::cvtColor(P2_map_, out.get<GpuMat>(Channel::Colour), cv::COLOR_GRAY2BGRA);
+		cv::cuda::cvtColor(P2_map_, out.get<GpuMat>(Channel::Colour), cv::COLOR_GRAY2BGRA, 0, cvstream);
 	}
 	if (config()->value("show_rpe", false)) {
 		ftl::cuda::show_rpe(disp, in.set<GpuMat>(Channel::Left), r, 100.0f, stream);
diff --git a/components/operators/src/disparity/libstereo.cpp b/components/operators/src/disparity/libstereo.cpp
index 8531dd98f0650abb4b8b83198999616dc4eb31f9..8229c845e59a6dbe0e963cae66599fb0c405280f 100644
--- a/components/operators/src/disparity/libstereo.cpp
+++ b/components/operators/src/disparity/libstereo.cpp
@@ -21,8 +21,8 @@ struct StereoDisparity::Impl {
 	StereoCensusSgm sgm;
 };
 
-StereoDisparity::StereoDisparity(ftl::Configurable* cfg) :
-		ftl::operators::Operator(cfg), impl_(nullptr) {
+StereoDisparity::StereoDisparity(ftl::operators::Graph *g, ftl::Configurable* cfg) :
+		ftl::operators::Operator(g, cfg), impl_(nullptr) {
 
 	init();
 }
diff --git a/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu b/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu
index c1dd611c00e6830232a01d6f1eb86b643cd477cb..9ed95f6eab0831f12b502a36858c837f37edd8ad 100644
--- a/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu
+++ b/components/operators/src/disparity/opencv/disparity_bilateral_filter.cu
@@ -328,8 +328,8 @@ namespace ftl { namespace cuda { namespace device
                 }
                 
 
-            if (stream == 0)
-                cudaSafeCall( cudaDeviceSynchronize() );
+            //if (stream == 0)
+            //    cudaSafeCall( cudaDeviceSynchronize() );
         }
 
         // These are commented out since we don't use them and it slows compile
diff --git a/components/operators/src/disparity/optflow_smoothing.cpp b/components/operators/src/disparity/optflow_smoothing.cpp
index c69b0727beda8ceb22580609ed8f7181a22d40f9..d1205854ea385c0b8be40167926523c572c4af96 100644
--- a/components/operators/src/disparity/optflow_smoothing.cpp
+++ b/components/operators/src/disparity/optflow_smoothing.cpp
@@ -18,14 +18,14 @@ using std::vector;
 
 template<typename T> static bool inline isValidDisparity(T d) { return (0.0 < d) && (d < 256.0); } // TODO
 
-OpticalFlowTemporalSmoothing::OpticalFlowTemporalSmoothing(ftl::Configurable* cfg, const std::tuple<ftl::codecs::Channel> &params) :
-		ftl::operators::Operator(cfg) {
+OpticalFlowTemporalSmoothing::OpticalFlowTemporalSmoothing(ftl::operators::Graph *g, ftl::Configurable* cfg, const std::tuple<ftl::codecs::Channel> &params) :
+		ftl::operators::Operator(g, cfg) {
 	channel_ = std::get<0>(params);
 	_init(cfg);
 }
 
-OpticalFlowTemporalSmoothing::OpticalFlowTemporalSmoothing(ftl::Configurable* cfg) :
-		ftl::operators::Operator(cfg) {
+OpticalFlowTemporalSmoothing::OpticalFlowTemporalSmoothing(ftl::operators::Graph *g, ftl::Configurable* cfg) :
+		ftl::operators::Operator(g, cfg) {
 	
 	_init(cfg);
 }
diff --git a/components/operators/src/filling.cpp b/components/operators/src/filling.cpp
index 87298cc052cb8d48673a6c2ce13943c35a68caee..8b6b369b787f1e1ceab1c404af46629f4b146185 100644
--- a/components/operators/src/filling.cpp
+++ b/components/operators/src/filling.cpp
@@ -6,7 +6,7 @@ using ftl::operators::ScanFieldFill;
 using ftl::operators::CrossSupportFill;
 using ftl::codecs::Channel;
 
-ScanFieldFill::ScanFieldFill(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+ScanFieldFill::ScanFieldFill(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
@@ -29,7 +29,7 @@ bool ScanFieldFill::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStrea
 }
 
 
-CrossSupportFill::CrossSupportFill(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+CrossSupportFill::CrossSupportFill(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
diff --git a/components/operators/src/fusion/correspondence_depth.cu b/components/operators/src/fusion/correspondence_depth.cu
index f7303cfc413f03b49bdc75510906a5cbd409b626..95d11b890c98d53c643fed9946b855bf0b6f3ca8 100644
--- a/components/operators/src/fusion/correspondence_depth.cu
+++ b/components/operators/src/fusion/correspondence_depth.cu
@@ -117,9 +117,16 @@ __global__ void corresponding_depth_kernel(
 		if (depth1 > cam1.minDepth && depth1 < cam1.maxDepth && bestcost < 1.0f) {
 			// Delay making the depth change until later.
 			conf(pt) = bestadjust;
-			mask(pt) = mask(pt) | Mask::kMask_Correspondence;
+			auto m = mask(pt);
+			m &= ~Mask::kMask_Bad;
+			mask(pt) = m | Mask::kMask_Correspondence;
 			screenOut(pt) = bestScreen;
 		}
+
+		if (depth1 > cam1.minDepth && depth1 < cam1.maxDepth && bestcost > 2.0f) {
+			auto m = mask(pt);
+			mask(pt) = (m & Mask::kMask_Correspondence) ? m : m | Mask::kMask_Bad;
+		}
     }
 }
 
diff --git a/components/operators/src/fusion/correspondence_util.cu b/components/operators/src/fusion/correspondence_util.cu
index 5145f69767866e66ed01518b6e492328f022f595..1887ee88fa7e182e9b6f4b725092b5a5e833c1ef 100644
--- a/components/operators/src/fusion/correspondence_util.cu
+++ b/components/operators/src/fusion/correspondence_util.cu
@@ -53,6 +53,8 @@ __global__ void show_cor_error_kernel(
 	if (x < colour.width() && y < colour.height()) {
 		short2 s1 = screen1.tex2D(x,y);
 
+		//colour(x,y) = make_uchar4(0,0,0,0);
+
 		if (s1.x >= 0 && s1.x < screen2.width() && s1.y < screen2.height()) {
 			short2 s2 = screen2.tex2D(s1.x, s1.y);
 
@@ -120,6 +122,8 @@ __global__ void show_depth_adjust_kernel(
 		float a = adjust.tex2D(x,y);
 		short2 s = screen.tex2D(x,y);
 
+		//colour(x,y) = make_uchar4(0,0,0,0);
+
 		if (s.x >= 0) {
 			float ncG = min(1.0f, fabsf(a)/scale);
 			float ncB = -max(-1.0f, min(0.0f, a/scale));
diff --git a/components/operators/src/fusion/mvmls.cpp b/components/operators/src/fusion/mvmls.cpp
index bdb69b6e6fd53777ec38e248414d72fd05ad2e52..38328f33ea724a1b687163bc961c2ccec1d6b4fa 100644
--- a/components/operators/src/fusion/mvmls.cpp
+++ b/components/operators/src/fusion/mvmls.cpp
@@ -5,13 +5,14 @@
 #include <ftl/cuda/normals.hpp>
 
 #include <opencv2/cudaarithm.hpp>
+#include <opencv2/cudawarping.hpp>
 
 using ftl::operators::MultiViewMLS;
 using ftl::codecs::Channel;
 using cv::cuda::GpuMat;
 using ftl::rgbd::Format;
 
-MultiViewMLS::MultiViewMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+MultiViewMLS::MultiViewMLS(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
@@ -94,6 +95,13 @@ bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda
         f.createTexture<short2>(Channel::Screen);
 
         f.set<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream);
+
+		if (show_adjustment || show_consistency) {
+			if (!f.hasChannel(Channel::Overlay)) {
+				auto &t = f.createTexture<uchar4>(Channel::Overlay, ftl::rgbd::Format<uchar4>(size));
+				cudaMemset2DAsync(t.devicePtr(), t.pitch(), 0, t.width()*4, t.height(), stream);
+			}
+		}
     }
 
     //for (int iter=0; iter<iters; ++iter) {
@@ -272,8 +280,8 @@ bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda
                         );*/
 
 						if (show_consistency) {
-							ftl::cuda::show_cor_error(f1.getTexture<uchar4>(Channel::Colour), f1.getTexture<short2>(Channel::Screen), f2.getTexture<short2>(Channel::Screen), 5.0f, stream);
-							ftl::cuda::show_cor_error(f2.getTexture<uchar4>(Channel::Colour), f2.getTexture<short2>(Channel::Screen), f1.getTexture<short2>(Channel::Screen), 5.0f, stream);
+							ftl::cuda::show_cor_error(f1.getTexture<uchar4>(Channel::Overlay), f1.getTexture<short2>(Channel::Screen), f2.getTexture<short2>(Channel::Screen), 5.0f, stream);
+							ftl::cuda::show_cor_error(f2.getTexture<uchar4>(Channel::Overlay), f2.getTexture<short2>(Channel::Screen), f1.getTexture<short2>(Channel::Screen), 5.0f, stream);
 						}
 
 						/*ftl::cuda::remove_cor_error(
@@ -297,8 +305,8 @@ bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda
                         //}
 
 						if (show_adjustment) {
-							ftl::cuda::show_depth_adjustment(f1.getTexture<uchar4>(Channel::Colour), f1.getTexture<short2>(Channel::Screen), f1.getTexture<float>(Channel::Confidence), 0.04f, stream);
-							ftl::cuda::show_depth_adjustment(f2.getTexture<uchar4>(Channel::Colour), f2.getTexture<short2>(Channel::Screen), f2.getTexture<float>(Channel::Confidence), 0.04f, stream);
+							ftl::cuda::show_depth_adjustment(f1.getTexture<uchar4>(Channel::Overlay), f1.getTexture<short2>(Channel::Screen), f1.getTexture<float>(Channel::Confidence), 0.04f, stream);
+							ftl::cuda::show_depth_adjustment(f2.getTexture<uchar4>(Channel::Overlay), f2.getTexture<short2>(Channel::Screen), f2.getTexture<float>(Channel::Confidence), 0.04f, stream);
 						}
                     //} //else {
                         /*ftl::cuda::correspondence(
@@ -439,13 +447,27 @@ bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cuda
 
 				float thresh = (1.0f / f.getLeft().fx) * disconPixels;
 
+				const GpuMat &rgb = f.get<GpuMat>(Channel::Colour);
+				GpuMat rgb_buf;
+				if (rgb.size() != size) {
+					if (graph()->hasBuffer(Buffer::LowLeft, f.source())) {
+						rgb_buf = graph()->getBuffer(Buffer::LowLeft, f.source());
+					} else {
+						rgb_buf = graph()->createBuffer(Buffer::LowLeft, f.source());
+						cv::cuda::resize(rgb, rgb_buf, size, 0, 0, cv::INTER_LINEAR, cvstream);
+					}
+				} else {
+					rgb_buf = rgb;
+				}
+
 				ftl::cuda::mls_aggr_horiz(
 					f.createTexture<uchar4>((f.hasChannel(Channel::Support2)) ? Channel::Support2 : Channel::Support1),
 					f.createTexture<half4>(Channel::Normals),
 					*normals_horiz_[i],
 					f.createTexture<float>(Channel::Depth),
 					*centroid_horiz_[i],
-					f.createTexture<uchar4>(Channel::Colour),
+					//f.createTexture<uchar4>(Channel::Colour),
+					rgb_buf,
 					thresh,
 					col_smooth,
 					radius,
diff --git a/components/operators/src/gt_analysis.cpp b/components/operators/src/gt_analysis.cpp
index c977a814b0c0e5fd0c6881f80b7d43379c440712..d7f94d39946d58a877050c73371c178436a9e6f9 100644
--- a/components/operators/src/gt_analysis.cpp
+++ b/components/operators/src/gt_analysis.cpp
@@ -5,7 +5,7 @@ using ftl::operators::GTAnalysis;
 using ftl::codecs::Channel;
 using std::string;
 
-GTAnalysis::GTAnalysis(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+GTAnalysis::GTAnalysis(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 	cudaMalloc(&output_, sizeof(ftl::cuda::GTAnalysisData));
 }
 
diff --git a/components/operators/src/mask.cpp b/components/operators/src/mask.cpp
index 274f2de718dfae78f9b148848a663e16a3bd1313..3a2e7d8331f7fa27aad4c06973cab46f9fe1d23d 100644
--- a/components/operators/src/mask.cpp
+++ b/components/operators/src/mask.cpp
@@ -4,10 +4,11 @@
 using ftl::operators::DiscontinuityMask;
 using ftl::operators::BorderMask;
 using ftl::operators::CullDiscontinuity;
+using ftl::operators::DisplayMask;
 using ftl::codecs::Channel;
 using ftl::rgbd::Format;
 
-DiscontinuityMask::DiscontinuityMask(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+DiscontinuityMask::DiscontinuityMask(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
@@ -31,9 +32,10 @@ bool DiscontinuityMask::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaS
 	}
 
 	if (!out.hasChannel(Channel::Mask)) {
+		cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
 		auto &m = out.create<cv::cuda::GpuMat>(Channel::Mask);
 		m.create(in.get<cv::cuda::GpuMat>(Channel::Depth).size(), CV_8UC1);
-		m.setTo(cv::Scalar(0));
+		m.setTo(cv::Scalar(0), cvstream);
 	}
 
 	/*ftl::cuda::discontinuity(
@@ -59,7 +61,7 @@ bool DiscontinuityMask::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaS
 
 
 
-BorderMask::BorderMask(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+BorderMask::BorderMask(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
@@ -87,7 +89,7 @@ bool BorderMask::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t
 
 
 
-CullDiscontinuity::CullDiscontinuity(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+CullDiscontinuity::CullDiscontinuity(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
@@ -113,4 +115,35 @@ bool CullDiscontinuity::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaS
 	);
 
 	return true;
-}
\ No newline at end of file
+}
+
+
+
+DisplayMask::DisplayMask(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
+
+}
+
+DisplayMask::~DisplayMask() {
+
+}
+
+bool DisplayMask::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t stream) {
+
+	if (!in.hasChannel(Channel::Mask)) {
+		return true;
+	}
+
+	uint8_t mask = config()->value("mask", 0);
+	bool invert = config()->value("invert", false);
+
+	auto &masktex = in.getTexture<uint8_t>(Channel::Mask);
+
+	if (!in.hasChannel(Channel::Overlay)) {
+		auto &t = in.createTexture<uchar4>(Channel::Overlay, ftl::rgbd::Format<uchar4>(masktex.width(), masktex.height()));
+		cudaMemset2DAsync(t.devicePtr(), t.pitch(), 0, t.width()*4, t.height(), stream);
+	}
+
+	ftl::cuda::show_mask(in.getTexture<uchar4>(Channel::Overlay), masktex, mask, make_uchar4(255,0,255,255), stream);
+
+	return true;
+}
diff --git a/components/operators/src/mls.cu b/components/operators/src/mls.cu
index ee3ce41255e1f24d3f6066e648f26e7caa501f0e..55813446ab7a62d0913b96d107187b2bb62f0543 100644
--- a/components/operators/src/mls.cu
+++ b/components/operators/src/mls.cu
@@ -223,6 +223,10 @@ __device__ inline int segmentID(int u, int v) {
 	return 0;
 }
 
+__device__ inline float4 make_float4(const uchar4 &v) {
+	return make_float4(float(v.x), float(v.y), float(v.z), float(v.w));
+}
+
 /*
  * Smooth depth map using Moving Least Squares. This version uses colour
  * similarity weights to adjust the spatial smoothing factor. It is naive in
@@ -237,7 +241,9 @@ __device__ inline int segmentID(int u, int v) {
 		TextureObject<half4> normals_out,
         TextureObject<float> depth_in,        // Virtual depth map
 		TextureObject<float> depth_out,   // Accumulated output
-		TextureObject<uchar4> colour_in,
+		//TextureObject<uchar4> colour_in,
+		const uchar4* __restrict__ colour_in,
+		int colour_pitch,
 		float smoothing,
 		float colour_smoothing,
         ftl::rgbd::Camera camera) {
@@ -260,7 +266,8 @@ __device__ inline int segmentID(int u, int v) {
 	}
 	float3 X = camera.screenToCam((int)(x),(int)(y),d0);
 
-	float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f);
+	//float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f);
+	float4 c0 = make_float4(colour_in[x+y*colour_pitch]);
 
     // Neighbourhood
 	uchar4 base = region.tex2D(x,y);
@@ -274,7 +281,8 @@ __device__ inline int segmentID(int u, int v) {
 
 		#pragma unroll
 		for (int u=-RADIUS; u<=RADIUS; ++u) {
-			const float d = depth_in.tex2D(x+u, y+v);
+			if (x+u >= 0 && x+u < depth_in.width() && y+v >= 0 && y+v < depth_in.height()) {
+				const float d = depth_in.tex2D(x+u, y+v);
 			//if (d > camera.minDepth && d < camera.maxDepth) {
 
 				float w = (d <= camera.minDepth || d >= camera.maxDepth || u < -baseY.x || u > baseY.y || v < -base.z || v > base.z) ? 0.0f : 1.0f;
@@ -286,7 +294,8 @@ __device__ inline int segmentID(int u, int v) {
 				// FIXME: Ensure bad normals are removed by setting depth invalid
 				//if (Ni.x+Ni.y+Ni.z == 0.0f) continue;
 
-				const float4 c = colour_in.tex2D(float(x+u) + 0.5f, float(y+v) + 0.5f);
+				//const float4 c = colour_in.tex2D(float(x+u) + 0.5f, float(y+v) + 0.5f);
+				const float4 c = make_float4(colour_in[x+u+(y+v)*colour_pitch]);
 				w *= ftl::cuda::colourWeighting(c0,c,colour_smoothing);
 
 				// Allow missing point to borrow z value
@@ -300,7 +309,7 @@ __device__ inline int segmentID(int u, int v) {
 				nX += Ni*w;
 				contrib += w;
 				//if (FILLING && w > 0.0f && v > -base.z+1 && v < base.w-1 && u > -baseY.x+1 && u < baseY.y-1) segment_check |= segmentID(u,v);
-			//}
+			}
 		}
 	}
 
@@ -335,7 +344,8 @@ void ftl::cuda::colour_mls_smooth_csr(
 		ftl::cuda::TextureObject<half4> &normals_out,
 		ftl::cuda::TextureObject<float> &depth_in,
 		ftl::cuda::TextureObject<float> &depth_out,
-		ftl::cuda::TextureObject<uchar4> &colour_in,
+		//ftl::cuda::TextureObject<uchar4> &colour_in,
+		const cv::cuda::GpuMat &colour_in,
 		float smoothing,
 		float colour_smoothing,
 		bool filling,
@@ -346,9 +356,9 @@ void ftl::cuda::colour_mls_smooth_csr(
 	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
 	if (filling) {
-		colour_mls_smooth_csr_kernel<true,5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera);
+		colour_mls_smooth_csr_kernel<true,5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera);
 	} else {
-		colour_mls_smooth_csr_kernel<false,5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera);
+		colour_mls_smooth_csr_kernel<false,5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera);
 	}
 		
 	cudaSafeCall( cudaGetLastError() );
@@ -593,7 +603,8 @@ void ftl::cuda::mls_aggr_horiz(
 		ftl::cuda::TextureObject<half4> &normals_out,
 		ftl::cuda::TextureObject<float> &depth_in,
 		ftl::cuda::TextureObject<float4> &centroid_out,
-		ftl::cuda::TextureObject<uchar4> &colour_in,
+		//ftl::cuda::TextureObject<uchar4> &colour_in,
+		const cv::cuda::GpuMat &colour_in,
 		float smoothing,
 		float colour_smoothing,
 		int radius,
@@ -607,13 +618,13 @@ void ftl::cuda::mls_aggr_horiz(
 	const dim3 blockSize(THREADS_X, THREADS_Y);
 
 	switch(radius) {
-	case 1: mls_aggr_horiz_kernel<1><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, colour_in.devicePtr(), colour_in.pixelPitch(), smoothing, colour_smoothing, camera); break;
-	case 2: mls_aggr_horiz_kernel<2><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, colour_in.devicePtr(), colour_in.pixelPitch(), smoothing, colour_smoothing, camera); break;
-	case 3: mls_aggr_horiz_kernel<3><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, colour_in.devicePtr(), colour_in.pixelPitch(), smoothing, colour_smoothing, camera); break;
-	case 5: mls_aggr_horiz_kernel<5><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, colour_in.devicePtr(), colour_in.pixelPitch(), smoothing, colour_smoothing, camera); break;
-	case 10: mls_aggr_horiz_kernel<10><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, colour_in.devicePtr(), colour_in.pixelPitch(), smoothing, colour_smoothing, camera); break;
-	case 15: mls_aggr_horiz_kernel<15><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, colour_in.devicePtr(), colour_in.pixelPitch(), smoothing, colour_smoothing, camera); break;
-	case 20: mls_aggr_horiz_kernel<20><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, colour_in.devicePtr(), colour_in.pixelPitch(), smoothing, colour_smoothing, camera); break;
+	case 1: mls_aggr_horiz_kernel<1><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera); break;
+	case 2: mls_aggr_horiz_kernel<2><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera); break;
+	case 3: mls_aggr_horiz_kernel<3><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera); break;
+	case 5: mls_aggr_horiz_kernel<5><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera); break;
+	case 10: mls_aggr_horiz_kernel<10><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera); break;
+	case 15: mls_aggr_horiz_kernel<15><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera); break;
+	case 20: mls_aggr_horiz_kernel<20><<<gridSize, blockSize, 0, stream>>>(region.devicePtr(), region.pixelPitch(), normals_in.devicePtr(), normals_in.pixelPitch(), normals_out, depth_in.devicePtr(), depth_in.pixelPitch(), centroid_out, (uchar4*)colour_in.data, colour_in.step/4, smoothing, colour_smoothing, camera); break;
 	default: return;
 	}
 	cudaSafeCall( cudaGetLastError() );
diff --git a/components/operators/src/normals.cpp b/components/operators/src/normals.cpp
index 09618fa4805a4e171c59c32f37b54df3298d5daa..d4831c56204598cb8537ab984caf806c6830c628 100644
--- a/components/operators/src/normals.cpp
+++ b/components/operators/src/normals.cpp
@@ -8,7 +8,7 @@ using ftl::operators::SmoothNormals;
 using ftl::codecs::Channel;
 using ftl::rgbd::Format;
 
-Normals::Normals(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+Normals::Normals(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
@@ -39,7 +39,7 @@ bool Normals::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t st
 
 // =============================================================================
 
-NormalDot::NormalDot(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+NormalDot::NormalDot(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
@@ -71,7 +71,7 @@ bool NormalDot::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t
 // =============================================================================
 
 
-SmoothNormals::SmoothNormals(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+SmoothNormals::SmoothNormals(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
diff --git a/components/operators/src/nvopticalflow.cpp b/components/operators/src/nvopticalflow.cpp
index 27a623b39485f0ac29ad1c914980db62f324d560..8c0d38a6937e08b1efba9f61601029b4bd9cad15 100644
--- a/components/operators/src/nvopticalflow.cpp
+++ b/components/operators/src/nvopticalflow.cpp
@@ -18,13 +18,13 @@ using ftl::operators::NVOpticalFlow;
 using cv::Size;
 using cv::cuda::GpuMat;
 
-NVOpticalFlow::NVOpticalFlow(ftl::Configurable* cfg) :
-		ftl::operators::Operator(cfg), channel_in_{ftl::codecs::Channel::Colour,ftl::codecs::Channel::Colour2}, channel_out_{ftl::codecs::Channel::Flow,ftl::codecs::Channel::Flow2} {
+NVOpticalFlow::NVOpticalFlow(ftl::operators::Graph *g, ftl::Configurable* cfg) :
+		ftl::operators::Operator(g, cfg), channel_in_{ftl::codecs::Channel::Colour,ftl::codecs::Channel::Colour2}, channel_out_{ftl::codecs::Channel::Flow,ftl::codecs::Channel::Flow2} {
 	size_ = Size(0, 0);
 
 }
 
-NVOpticalFlow::NVOpticalFlow(ftl::Configurable*cfg, const std::tuple<ftl::codecs::Channel,ftl::codecs::Channel,ftl::codecs::Channel,ftl::codecs::Channel> &channels) : ftl::operators::Operator(cfg) {
+NVOpticalFlow::NVOpticalFlow(ftl::operators::Graph *g, ftl::Configurable*cfg, const std::tuple<ftl::codecs::Channel,ftl::codecs::Channel,ftl::codecs::Channel,ftl::codecs::Channel> &channels) : ftl::operators::Operator(g, cfg) {
 	channel_in_[0] = std::get<0>(channels);
 	channel_out_[0] = std::get<1>(channels);
 	channel_in_[1] = std::get<2>(channels);
diff --git a/components/operators/src/operator.cpp b/components/operators/src/operator.cpp
index cdbeb0c00e9753bd8cb6d8ce21ba285b062505b5..4cc0f90598d49fc70eb6b48eec773a5733b6a459 100644
--- a/components/operators/src/operator.cpp
+++ b/components/operators/src/operator.cpp
@@ -10,7 +10,7 @@ using ftl::rgbd::FrameSet;
 using ftl::rgbd::Source;
 using ftl::codecs::Channel;
 
-Operator::Operator(ftl::Configurable *config) : config_(config) {
+Operator::Operator(ftl::operators::Graph *g, ftl::Configurable *config) : config_(config), graph_(g) {
 	enabled_ = config_->value("enabled", true);
 
 	config_->on("enabled", [this]() {
@@ -35,7 +35,6 @@ bool Operator::apply(FrameSet &in, Frame &out, cudaStream_t stream) {
 
 
 Graph::Graph(nlohmann::json &config) : ftl::Configurable(config) {
-	cudaSafeCall( cudaStreamCreate(&stream_) );
 	busy_.clear();
 }
 
@@ -49,22 +48,43 @@ Graph::~Graph() {
 			delete i;
 		}
 	}
-	cudaStreamDestroy(stream_);
 }
 
-bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) {
-	if (!value("enabled", true)) return false;
+cv::cuda::GpuMat &Graph::createBuffer(ftl::operators::Buffer b, uint32_t fid) {
+	if (fid > 32) throw FTL_Error("Too many frames for buffer");
+	auto &v = buffers_[(uint32_t(b) << 8) + fid];
+	valid_buffers_.insert((uint32_t(b) << 8) + fid);
+	return v;
+}
+
+cv::cuda::GpuMat &Graph::getBuffer(ftl::operators::Buffer b, uint32_t fid) {
+	if (fid > 32) throw FTL_Error("Too many frames for buffer");
+	if (!hasBuffer(b, fid)) throw FTL_Error("Buffer does not exist: " << int(b));
+	auto &v = buffers_.at((uint32_t(b) << 8) + fid);
+	return v;
+}
+
+bool Graph::hasBuffer(ftl::operators::Buffer b, uint32_t fid) const {
+	return valid_buffers_.count((uint32_t(b) << 8) + fid) > 0;
+}
+
+bool Graph::apply(FrameSet &in, FrameSet &out, const std::function<void()> &cb) {
+	if (!value("enabled", true)) return true;
+	if (in.frames.size() < 1) return true;
 
-	auto stream_actual = (stream == 0) ? stream_ : stream;
+	auto stream_actual = in.frames[0].stream();
 	bool success = true;
 
-	if (in.frames.size() != out.frames.size()) return false;
+	if (in.frames.size() != out.frames.size()) return true;
 
 	if (busy_.test_and_set()) {
 		LOG(ERROR) << "Pipeline already in use: " << in.timestamp();
+		//if (cb) cb();
 		return false;
 	}
 
+	valid_buffers_.clear();
+
 	for (auto &f : out.frames) {
 		if (!f.hasOwn(Channel::Pipelines)) f.create<std::list<std::string>>(Channel::Pipelines);
 		auto pls = f.set<std::list<std::string>>(Channel::Pipelines);
@@ -73,7 +93,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) {
 
 	for (auto &i : operators_) {
 		if (i.instances.size() < 1) {
-			i.instances.push_back(i.maker->make());
+			i.instances.push_back(i.maker->make(this));
 		}
 
 		if (i.instances[0]->type() == Operator::Type::OneToOne) {
@@ -82,7 +102,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) {
 				//i.instances.push_back(i.maker->make());
 			//}
 			if (in.frames.size() > 1 && i.instances.size() < 2 && !i.instances[0]->isMemoryHeavy()) {
-				i.instances.push_back(i.maker->make());
+				i.instances.push_back(i.maker->make(this));
 			}
 
 			for (size_t j=0; j<in.frames.size(); ++j) {
@@ -94,6 +114,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) {
 				if (instance->enabled()) {
 					try {
 						instance->apply(in.frames[j].cast<ftl::rgbd::Frame>(), out.frames[j].cast<ftl::rgbd::Frame>(), stream_actual);
+						//cudaSafeCall(cudaStreamSynchronize(stream_actual));
 					} catch (const std::exception &e) {
 						LOG(ERROR) << "Operator exception for '" << instance->config()->getID() << "': " << e.what();
 						in.frames[j].message(ftl::data::Message::Error_OPERATOR_EXCEPTION, "Operator exception");
@@ -109,6 +130,7 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) {
 			if (instance->enabled()) {
 				try {
 					instance->apply(in, out, stream_actual);
+					//cudaSafeCall(cudaStreamSynchronize(stream_actual));
 				} catch (const std::exception &e) {
 					LOG(ERROR) << "Operator exception for '" << instance->config()->getID() << "': " << e.what();
 					if (in.frames.size() > 0) in.frames[0].message(ftl::data::Message::Error_OPERATOR_EXCEPTION, "Operator exception");
@@ -121,12 +143,17 @@ bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) {
 
 	success = waitAll(stream_actual) && success;
 
-	if (stream == 0) {
-		cudaSafeCall(cudaStreamSynchronize(stream_actual));
+	if (cb) {
+		cudaCallback(stream_actual, [this,cb]() {
+			busy_.clear();
+			ftl::pool.push([cb](int id) { cb(); });
+		});
+	} else {
+		//cudaSafeCall(cudaStreamSynchronize(stream_actual));
+		busy_.clear();
 	}
 
-	busy_.clear();
-	return success;
+	return true;
 }
 
 bool Graph::waitAll(cudaStream_t stream) {
@@ -143,17 +170,20 @@ bool Graph::waitAll(cudaStream_t stream) {
 	return true;
 }
 
-bool Graph::apply(Frame &in, Frame &out, cudaStream_t stream) {
-	if (!value("enabled", true)) return false;
+bool Graph::apply(Frame &in, Frame &out, const std::function<void()> &cb) {
+	if (!value("enabled", true)) return true;
 
-	auto stream_actual = (stream == 0) ? stream_ : stream;
+	auto stream_actual = in.stream();
 	bool success = true;
 
 	if (busy_.test_and_set()) {
 		LOG(ERROR) << "Pipeline already in use: " << in.timestamp();
+		//if (cb) cb();
 		return false;
 	}
 
+	valid_buffers_.clear();
+
 	if (!out.hasOwn(Channel::Pipelines)) out.create<std::list<std::string>>(Channel::Pipelines);
 	auto pls = out.set<std::list<std::string>>(Channel::Pipelines);
 	pls = getID();
@@ -161,7 +191,7 @@ bool Graph::apply(Frame &in, Frame &out, cudaStream_t stream) {
 	for (auto &i : operators_) {
 		// Make sure there are enough instances
 		if (i.instances.size() < 1) {
-			i.instances.push_back(i.maker->make());
+			i.instances.push_back(i.maker->make(this));
 		}
 
 		auto *instance = i.instances[0];
@@ -169,6 +199,7 @@ bool Graph::apply(Frame &in, Frame &out, cudaStream_t stream) {
 		if (instance->enabled()) {
 			try {
 				instance->apply(in, out, stream_actual);
+				//cudaSafeCall(cudaStreamSynchronize(stream_actual));
 			} catch (const std::exception &e) {
 				LOG(ERROR) << "Operator exception for '" << instance->config()->getID() << "': " << e.what();
 				success = false;
@@ -180,12 +211,18 @@ bool Graph::apply(Frame &in, Frame &out, cudaStream_t stream) {
 
 	success = waitAll(stream_actual) && success;
 
-	if (stream == 0) {
-		cudaSafeCall(cudaStreamSynchronize(stream_actual));
+	if (cb) {
+		cudaCallback(stream_actual, [this,cb]() {
+			busy_.clear();
+			ftl::pool.push([cb](int id) { cb(); });
+		});
+	} else {
+		//cudaSafeCall(cudaStreamSynchronize(stream_actual));
+		busy_.clear();
 	}
 
-	busy_.clear();
-	return success;
+	//busy_.clear();
+	return true;
 }
 
 ftl::Configurable *Graph::_append(ftl::operators::detail::ConstructionHelperBase *m) {
diff --git a/components/operators/src/poser.cpp b/components/operators/src/poser.cpp
index a03682767163c03b089ec985342ca1f846e19996..8c748e041651947ab4e2ce39fb1efabcaa4a02c5 100644
--- a/components/operators/src/poser.cpp
+++ b/components/operators/src/poser.cpp
@@ -12,7 +12,7 @@ static SHARED_MUTEX smtx;
 std::unordered_map<std::string,ftl::operators::Poser::PoseState> Poser::pose_db__;
 std::unordered_map<int,std::list<ftl::codecs::Shape3D*>> Poser::fs_shapes__;
 
-Poser::Poser(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+Poser::Poser(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
diff --git a/components/operators/src/segmentation.cpp b/components/operators/src/segmentation.cpp
index 91a314da2a534acf43a7d5d007872c5064a2c98c..e256dc091a5c7c701729ce005aaf4e11f3f76591 100644
--- a/components/operators/src/segmentation.cpp
+++ b/components/operators/src/segmentation.cpp
@@ -1,11 +1,15 @@
 #include <ftl/operators/segmentation.hpp>
 #include "segmentation_cuda.hpp"
+#include <opencv2/cudawarping.hpp>
+
+#include <loguru.hpp>
 
 using ftl::operators::CrossSupport;
 using ftl::operators::VisCrossSupport;
 using ftl::codecs::Channel;
+using cv::cuda::GpuMat;
 
-CrossSupport::CrossSupport(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+CrossSupport::CrossSupport(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
@@ -21,6 +25,27 @@ bool CrossSupport::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream
 		return false;
 	}
 
+	auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+
+	const auto &intrin = in.getLeft();
+	cv::Size size(intrin.width, intrin.height);
+
+	const GpuMat &rgb = in.get<GpuMat>(Channel::Colour);
+	if (rgb.empty()) return false;
+
+	GpuMat rgb_buf;
+	if (rgb.size() != size) {
+		if (graph()->hasBuffer(Buffer::LowLeft, in.source())) {
+			rgb_buf = graph()->getBuffer(Buffer::LowLeft, in.source());
+		} else {
+			auto &t = graph()->createBuffer(Buffer::LowLeft, in.source());
+			cv::cuda::resize(rgb, t, size, 0, 0, cv::INTER_LINEAR, cvstream);
+			rgb_buf = t;
+		}
+	} else {
+		rgb_buf = rgb;
+	}
+
 	if (use_mask && !in.hasChannel(Channel::Support2)) {
 		if (!in.hasChannel(Channel::Mask)) {
 			out.message(ftl::data::Message::Warning_MISSING_CHANNEL, "Missing Mask channel in Support operator");
@@ -28,15 +53,15 @@ bool CrossSupport::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream
 		}
 		ftl::cuda::support_region(
 			in.createTexture<uint8_t>(Channel::Mask),
-			out.createTexture<uchar4>(Channel::Support2, ftl::rgbd::Format<uchar4>(in.get<cv::cuda::GpuMat>(Channel::Colour).size())),
+			out.createTexture<uchar4>(Channel::Support2, ftl::rgbd::Format<uchar4>(rgb_buf.size())),
 			config()->value("v_max", 5),
 			config()->value("h_max", 5),
 			config()->value("symmetric", false), stream
 		);
 	} else if (!in.hasChannel(Channel::Support1)) {
 		ftl::cuda::support_region(
-			in.createTexture<uchar4>(Channel::Colour),
-			out.createTexture<uchar4>(Channel::Support1, ftl::rgbd::Format<uchar4>(in.get<cv::cuda::GpuMat>(Channel::Colour).size())),
+			rgb_buf,
+			out.createTexture<uchar4>(Channel::Support1, ftl::rgbd::Format<uchar4>(rgb_buf.size())),
 			config()->value("tau", 10.0f),
 			config()->value("v_max", 5),
 			config()->value("h_max", 5),
@@ -50,7 +75,7 @@ bool CrossSupport::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream
 
 
 
-VisCrossSupport::VisCrossSupport(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+VisCrossSupport::VisCrossSupport(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
diff --git a/components/operators/src/segmentation.cu b/components/operators/src/segmentation.cu
index de1268931a7b91bb09f0e3dc6385a8d9c7a4a9d8..c8f780363622c00292aebea375e05eb98fb722c3 100644
--- a/components/operators/src/segmentation.cu
+++ b/components/operators/src/segmentation.cu
@@ -25,20 +25,20 @@ __device__ inline float cross<float>(float p1, float p2) {
 }
 
 template <typename T, bool SYM>
-__device__ uchar4 calculate_support_region(const TextureObject<T> &img, int x, int y, float tau, int v_max, int h_max) {
+__device__ uchar4 calculate_support_region(const T* __restrict__ img, int width, int height, int pitch, int x, int y, float tau, int v_max, int h_max) {
     int x_min = max(0, x - h_max);
-    int x_max = min(img.width()-1, static_cast<unsigned int>(x + h_max));
+    int x_max = min(width-1, static_cast<unsigned int>(x + h_max));
     int y_min = max(0, y - v_max);
-    int y_max = min(img.height()-1, static_cast<unsigned int>(y + v_max));
+    int y_max = min(height-1, static_cast<unsigned int>(y + v_max));
 
 	uchar4 result = make_uchar4(0, 0, 0, 0);
 
-	auto colour = img.tex2D((float)x+0.5f,(float)y+0.5f);
+	auto colour = img[x+y*pitch];
 	auto prev_colour = colour;
 
 	int u;
     for (u=x-1; u >= x_min; --u) {
-		auto next_colour = img.tex2D((float)u+0.5f,(float)y+0.5f);
+		auto next_colour = img[u+y*pitch];
         if (cross(prev_colour, next_colour) > tau) {
             result.x = x - u - 1;
             break;
@@ -49,7 +49,7 @@ __device__ uchar4 calculate_support_region(const TextureObject<T> &img, int x, i
 	
 	prev_colour = colour;
     for (u=x+1; u <= x_max; ++u) {
-		auto next_colour = img.tex2D((float)u+0.5f,(float)y+0.5f);
+		auto next_colour = img[u+y*pitch];
         if (cross(prev_colour, next_colour) > tau) {
             result.y = u - x - 1;
             break;
@@ -61,7 +61,7 @@ __device__ uchar4 calculate_support_region(const TextureObject<T> &img, int x, i
 	int v;
 	prev_colour = colour;
     for (v=y-1; v >= y_min; --v) {
-		auto next_colour = img.tex2D((float)x+0.5f,(float)v+0.5f);
+		auto next_colour = img[x+v*pitch];
         if (cross(prev_colour, next_colour) > tau) {
             result.z = y - v - 1;
             break;
@@ -72,7 +72,7 @@ __device__ uchar4 calculate_support_region(const TextureObject<T> &img, int x, i
 
 	prev_colour = colour;
     for (v=y+1; v <= y_max; ++v) {
-		auto next_colour = img.tex2D((float)x+0.5f,(float)v+0.5f);
+		auto next_colour = img[x+v*pitch];
         if (cross(prev_colour, next_colour) > tau) {
             result.w = v - y - 1;
             break;
@@ -91,19 +91,19 @@ __device__ uchar4 calculate_support_region(const TextureObject<T> &img, int x, i
     return result;
 }
 
-__device__ uchar4 calculate_support_region(const TextureObject<uint8_t> &img, int x, int y, int v_max, int h_max) {
+__device__ uchar4 calculate_support_region(const uint8_t* __restrict__ img, int width, int height, int pitch, int x, int y, int v_max, int h_max) {
     int x_min = max(0, x - h_max);
-    int x_max = min(img.width()-1, static_cast<unsigned int>(x + h_max));
+    int x_max = min(width-1, static_cast<unsigned int>(x + h_max));
     int y_min = max(0, y - v_max);
-    int y_max = min(img.height()-1, static_cast<unsigned int>(y + v_max));
+    int y_max = min(height-1, static_cast<unsigned int>(y + v_max));
 
 	uchar4 result = make_uchar4(0, 0, 0, 0);
 
-	Mask m1(img.tex2D(x,y));
+	Mask m1(img[x+y*pitch]);
 
 	int u;
     for (u=x-1; u >= x_min; --u) {
-		Mask m2(img.tex2D(u,y));
+		Mask m2(img[u+y*pitch]);
         if (m2.isDiscontinuity()) {
             result.x = x - u - 1;
             break;
@@ -112,7 +112,7 @@ __device__ uchar4 calculate_support_region(const TextureObject<uint8_t> &img, in
 	if (u < x_min) result.x = x - x_min;
 	
     for (u=x+1; u <= x_max; ++u) {
-		Mask m2(img.tex2D(u,y));
+		Mask m2(img[u+y*pitch]);
         if (m2.isDiscontinuity()) {
             result.y = u - x - 1;
             break;
@@ -122,7 +122,7 @@ __device__ uchar4 calculate_support_region(const TextureObject<uint8_t> &img, in
 
 	int v;
     for (v=y-1; v >= y_min; --v) {
-		Mask m2(img.tex2D(x,v));
+		Mask m2(img[x+v*pitch]);
         if (m2.isDiscontinuity()) {
             result.z = y - v - 1;
             break;
@@ -131,7 +131,7 @@ __device__ uchar4 calculate_support_region(const TextureObject<uint8_t> &img, in
 	if (v < y_min) result.z = y - y_min;
 
     for (v=y+1; v <= y_max; ++v) {
-		Mask m2(img.tex2D(x,v));
+		Mask m2(img[x+v*pitch]);
         if (m2.isDiscontinuity()) {
             result.w = v - y - 1;
             break;
@@ -150,26 +150,26 @@ __device__ uchar4 calculate_support_region(const TextureObject<uint8_t> &img, in
 }
 
 template <typename T, bool SYM>
-__global__ void support_region_kernel(TextureObject<T> img, TextureObject<uchar4> region, float tau, int v_max, int h_max) {
+__global__ void support_region_kernel(const T* __restrict__ img, int width, int height, int pitch, TextureObject<uchar4> region, float tau, int v_max, int h_max) {
     const int x = blockIdx.x*blockDim.x + threadIdx.x;
     const int y = blockIdx.y*blockDim.y + threadIdx.y;
 
-    if (x < 0 || y < 0 || x >= img.width() || y >= img.height()) return;
+    if (x < 0 || y < 0 || x >= width || y >= height) return;
 
-    region(x,y) = calculate_support_region<T,SYM>(img, x, y, tau, v_max, h_max);
+    region(x,y) = calculate_support_region<T,SYM>(img, width, height, pitch, x, y, tau, v_max, h_max);
 }
 
-__global__ void support_region_kernel(TextureObject<uint8_t> img, TextureObject<uchar4> region, int v_max, int h_max) {
+__global__ void support_region_kernel(const uint8_t* __restrict__ img, int width, int height, int pitch, TextureObject<uchar4> region, int v_max, int h_max) {
     const int x = blockIdx.x*blockDim.x + threadIdx.x;
     const int y = blockIdx.y*blockDim.y + threadIdx.y;
 
-    if (x < 0 || y < 0 || x >= img.width() || y >= img.height()) return;
+    if (x < 0 || y < 0 || x >= width || y >= height) return;
 
-    region(x,y) = calculate_support_region(img, x, y, v_max, h_max);
+    region(x,y) = calculate_support_region(img, width, height, pitch, x, y, v_max, h_max);
 }
 
 void ftl::cuda::support_region(
-        ftl::cuda::TextureObject<uchar4> &colour,
+        const cv::cuda::GpuMat &colour,
         ftl::cuda::TextureObject<uchar4> &region,
         float tau,
         int v_max,
@@ -180,8 +180,8 @@ void ftl::cuda::support_region(
     const dim3 gridSize((region.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (region.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
     const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
-	if (sym) support_region_kernel<uchar4, true><<<gridSize, blockSize, 0, stream>>>(colour, region, tau, v_max, h_max);
-	else support_region_kernel<uchar4, false><<<gridSize, blockSize, 0, stream>>>(colour, region, tau, v_max, h_max);
+	if (sym) support_region_kernel<uchar4, true><<<gridSize, blockSize, 0, stream>>>((uchar4*)colour.data, colour.cols, colour.rows, colour.step/4, region, tau, v_max, h_max);
+	else support_region_kernel<uchar4, false><<<gridSize, blockSize, 0, stream>>>((uchar4*)colour.data, colour.cols, colour.rows, colour.step/4, region, tau, v_max, h_max);
     cudaSafeCall( cudaGetLastError() );
 
 
@@ -202,7 +202,7 @@ void ftl::cuda::support_region(
 	const dim3 gridSize((region.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (region.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
 	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
-	support_region_kernel<float, true><<<gridSize, blockSize, 0, stream>>>(depth, region, tau, v_max, h_max);
+	support_region_kernel<float, true><<<gridSize, blockSize, 0, stream>>>(depth.devicePtr(), depth.width(), depth.height(), depth.pixelPitch(), region, tau, v_max, h_max);
 	cudaSafeCall( cudaGetLastError() );
 
 
@@ -222,7 +222,7 @@ void ftl::cuda::support_region(
 	const dim3 gridSize((region.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (region.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
 	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
-	support_region_kernel<<<gridSize, blockSize, 0, stream>>>(mask, region, v_max, h_max);
+	support_region_kernel<<<gridSize, blockSize, 0, stream>>>(mask.devicePtr(), mask.width(), mask.height(), mask.pixelPitch(), region, v_max, h_max);
 	cudaSafeCall( cudaGetLastError() );
 
 
diff --git a/components/operators/src/segmentation_cuda.hpp b/components/operators/src/segmentation_cuda.hpp
index 445ed7e6d0f2624808232e66d834a360c6d798e8..49faa4e87fb58df03da7f5332c5313e7f35121c1 100644
--- a/components/operators/src/segmentation_cuda.hpp
+++ b/components/operators/src/segmentation_cuda.hpp
@@ -7,7 +7,7 @@ namespace ftl {
 namespace cuda {
 
 void support_region(
-		ftl::cuda::TextureObject<uchar4> &colour,
+		const cv::cuda::GpuMat &colour,
 		ftl::cuda::TextureObject<uchar4> &region,
 		float tau, int v_max, int h_max, bool sym,
 		cudaStream_t stream);
diff --git a/components/operators/src/smoothing.cpp b/components/operators/src/smoothing.cpp
index 0690744f84a508520b6e02089d8d2e19a08b14fd..347b6b7754b49363be98578a94e1d049b6ffec1a 100644
--- a/components/operators/src/smoothing.cpp
+++ b/components/operators/src/smoothing.cpp
@@ -18,7 +18,7 @@ using ftl::codecs::Channel;
 using ftl::rgbd::Format;
 using cv::cuda::GpuMat;
 
-HFSmoother::HFSmoother(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+HFSmoother::HFSmoother(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
@@ -71,7 +71,7 @@ bool HFSmoother::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t
 
 // ====== Smoothing Channel ====================================================
 
-SmoothChannel::SmoothChannel(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+SmoothChannel::SmoothChannel(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
@@ -135,7 +135,7 @@ bool SmoothChannel::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStrea
 
 // ===== MLS ===================================================================
 
-SimpleMLS::SimpleMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg), temp_(ftl::data::Frame::make_standalone()) {
+SimpleMLS::SimpleMLS(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg), temp_(ftl::data::Frame::make_standalone()) {
 
 }
 
@@ -184,7 +184,7 @@ bool SimpleMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t
 
 
 
-ColourMLS::ColourMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg), temp_(ftl::data::Frame::make_standalone()) {
+ColourMLS::ColourMLS(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg), temp_(ftl::data::Frame::make_standalone()) {
 
 }
 
@@ -206,6 +206,23 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t
 	}
 
 	auto &temp = temp_.cast<ftl::rgbd::Frame>();
+	auto size = in.get<GpuMat>(Channel::Depth).size();
+
+	auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+
+	const GpuMat &rgb = in.get<GpuMat>(Channel::Colour);
+	GpuMat rgb_buf;
+	if (rgb.size() != size) {
+		if (graph()->hasBuffer(Buffer::LowLeft, in.source())) {
+			rgb_buf = graph()->getBuffer(Buffer::LowLeft, in.source());
+		} else {
+			auto &t = graph()->createBuffer(Buffer::LowLeft, in.source());
+			cv::cuda::resize(rgb, t, size, 0, 0, cv::INTER_LINEAR, cvstream);
+			rgb_buf = t;
+		}
+	} else {
+		rgb_buf = rgb;
+	}
 
 	// FIXME: Assume in and out are the same frame.
 	for (int i=0; i<iters; ++i) {
@@ -229,7 +246,8 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t
 				temp.createTexture<half4>(Channel::Normals, ftl::rgbd::Format<half4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
 				in.createTexture<float>(Channel::Depth),
 				temp.createTexture<float>(Channel::Depth, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
-				in.createTexture<uchar4>(Channel::Colour),
+				//in.createTexture<uchar4>(Channel::Colour),
+				rgb_buf,
 				thresh,
 				col_smooth,
 				filling,
@@ -250,7 +268,7 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t
 
 // ====== Aggregating MLS ======================================================
 
-AggreMLS::AggreMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg), temp_(ftl::data::Frame::make_standalone()) {
+AggreMLS::AggreMLS(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg), temp_(ftl::data::Frame::make_standalone()) {
 	temp_.store();
 }
 
@@ -282,6 +300,22 @@ bool AggreMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t s
 	normals_horiz_.create(size.height, size.width);
 	centroid_vert_.create(size.width, size.height);
 
+	auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+
+	const GpuMat &rgb = in.get<GpuMat>(Channel::Colour);
+	GpuMat rgb_buf;
+	if (rgb.size() != size) {
+		if (graph()->hasBuffer(Buffer::LowLeft, in.source())) {
+			rgb_buf = graph()->getBuffer(Buffer::LowLeft, in.source());
+		} else {
+			auto &t = graph()->createBuffer(Buffer::LowLeft, in.source());
+			cv::cuda::resize(rgb, t, size, 0, 0, cv::INTER_LINEAR, cvstream);
+			rgb_buf = t;
+		}
+	} else {
+		rgb_buf = rgb;
+	}
+
 	// FIXME: Assume in and out are the same frame.
 	for (int i=0; i<iters; ++i) {
 
@@ -292,7 +326,8 @@ bool AggreMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t s
 				normals_horiz_,
 				in.createTexture<float>(Channel::Depth),
 				centroid_horiz_,
-				in.createTexture<uchar4>(Channel::Colour),
+				//in.createTexture<uchar4>(Channel::Colour),
+				rgb_buf,
 				thresh,
 				col_smooth,
 				radius,
@@ -333,7 +368,8 @@ bool AggreMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t s
 				temp.createTexture<half4>(Channel::Normals, ftl::rgbd::Format<half4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
 				in.createTexture<float>(Channel::Depth),
 				temp.createTexture<float>(Channel::Depth, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
-				in.createTexture<uchar4>(Channel::Colour),
+				//in.createTexture<uchar4>(Channel::Colour),
+				rgb_buf,
 				thresh,
 				col_smooth,
 				false,
@@ -352,7 +388,7 @@ bool AggreMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t s
 
 // ====== Adaptive MLS =========================================================
 
-AdaptiveMLS::AdaptiveMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg), temp_(ftl::data::Frame::make_standalone()) {
+AdaptiveMLS::AdaptiveMLS(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg), temp_(ftl::data::Frame::make_standalone()) {
 
 }
 
diff --git a/components/operators/src/smoothing_cuda.hpp b/components/operators/src/smoothing_cuda.hpp
index 5454d91e2aea43c49ae806524a98bc0cc85ef086..c44c7787b9fe8dd18d6e148d96c8d71803ba368c 100644
--- a/components/operators/src/smoothing_cuda.hpp
+++ b/components/operators/src/smoothing_cuda.hpp
@@ -35,7 +35,8 @@ void colour_mls_smooth_csr(
 		ftl::cuda::TextureObject<half4> &normals_out,
 		ftl::cuda::TextureObject<float> &depth_in,
 		ftl::cuda::TextureObject<float> &depth_out,
-		ftl::cuda::TextureObject<uchar4> &colour_in,
+		//ftl::cuda::TextureObject<uchar4> &colour_in,
+		const cv::cuda::GpuMat &colour_in,
 		float smoothing,
 		float colour_smoothing,
 		bool filling,
@@ -56,7 +57,8 @@ void mls_aggr_horiz(
 		ftl::cuda::TextureObject<half4> &normals_out,
 		ftl::cuda::TextureObject<float> &depth_in,
 		ftl::cuda::TextureObject<float4> &centroid_out,
-		ftl::cuda::TextureObject<uchar4> &colour_in,
+		//ftl::cuda::TextureObject<uchar4> &colour_in,
+		const cv::cuda::GpuMat &colour_in,
 		float smoothing,
 		float colour_smoothing,
 		int radius,
diff --git a/components/operators/src/weighting.cpp b/components/operators/src/weighting.cpp
index 5864f8caf87ee241b690e23072e26e0e2f3ecb26..33f1a922d7ec8306899db071c43fd5eeac94f4e8 100644
--- a/components/operators/src/weighting.cpp
+++ b/components/operators/src/weighting.cpp
@@ -9,7 +9,7 @@ using ftl::operators::CullWeight;
 using ftl::operators::DegradeWeight;
 using ftl::codecs::Channel;
 
-PixelWeights::PixelWeights(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+PixelWeights::PixelWeights(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
@@ -45,9 +45,10 @@ bool PixelWeights::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream
 	Channel dchan = (in.hasChannel(Channel::Depth)) ? Channel::Depth : Channel::GroundTruth;
 
 	if (!out.hasChannel(Channel::Mask)) {
+		cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
 		auto &m = out.create<cv::cuda::GpuMat>(Channel::Mask);
 		m.create(in.get<cv::cuda::GpuMat>(dchan).size(), CV_8UC1);
-		m.setTo(cv::Scalar(0));
+		m.setTo(cv::Scalar(0), cvstream);
 	}
 
 	if (output_normals) {
@@ -76,7 +77,7 @@ bool PixelWeights::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream
 	return true;
 }
 
-CullWeight::CullWeight(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+CullWeight::CullWeight(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
@@ -102,7 +103,7 @@ bool CullWeight::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t
 
 
 
-DegradeWeight::DegradeWeight(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+DegradeWeight::DegradeWeight(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg) {
 
 }
 
diff --git a/components/renderers/cpp/src/CUDARender.cpp b/components/renderers/cpp/src/CUDARender.cpp
index 1ed37cca6563ad40729bf81d68a985ce3e832ee0..dab897d5696f78ebffbec097485778bebd57c03b 100644
--- a/components/renderers/cpp/src/CUDARender.cpp
+++ b/components/renderers/cpp/src/CUDARender.cpp
@@ -99,7 +99,8 @@ CUDARender::CUDARender(nlohmann::json &config) : ftl::render::FSRenderer(config)
 		}
 	}
 
-	cudaSafeCall(cudaStreamCreate(&stream_));
+	//cudaSafeCall(cudaStreamCreate(&stream_));
+	stream_ = 0;
 	last_frame_ = -1;
 
 	temp_.store();
@@ -249,6 +250,9 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 
 	int valid_count = 0;
 
+	// FIXME: Is it possible to remember previously if there should be depth?
+	bool use_depth = scene_->anyHasChannel(Channel::Depth) || scene_->anyHasChannel(Channel::GroundTruth);
+
 	// For each source depth map
 	for (size_t i=0; i < scene_->frames.size(); ++i) {
 		//if (!scene_->hasFrame(i)) continue;
@@ -260,6 +264,11 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 			continue;
 		}
 
+		// We have the needed depth data?
+		if (use_depth && !f.hasOwn(Channel::Depth) && !f.hasOwn(Channel::GroundTruth)) {
+			continue;
+		}
+
 		++valid_count;
 
 		//auto pose = MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>());
@@ -272,20 +281,22 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 		auto &screenbuffer = _getScreenBuffer(bufsize);
 
 		// Calculate and save virtual view screen position of each source pixel
-		if (f.hasChannel(Channel::Depth)) {
-			ftl::cuda::screen_coord(
-				f.createTexture<float>(Channel::Depth),
-				depthbuffer,
-				screenbuffer,
-				params_, transform, f.getLeftCamera(), stream
-			);
-		} else if (f.hasChannel(Channel::GroundTruth)) {
-			ftl::cuda::screen_coord(
-				f.createTexture<float>(Channel::GroundTruth),
-				depthbuffer,
-				screenbuffer,
-				params_, transform, f.getLeftCamera(), stream
-			);
+		if (use_depth) {
+			if (f.hasChannel(Channel::Depth)) {
+				ftl::cuda::screen_coord(
+					f.createTexture<float>(Channel::Depth),
+					depthbuffer,
+					screenbuffer,
+					params_, transform, f.getLeftCamera(), stream
+				);
+			} else if (f.hasChannel(Channel::GroundTruth)) {
+				ftl::cuda::screen_coord(
+					f.createTexture<float>(Channel::GroundTruth),
+					depthbuffer,
+					screenbuffer,
+					params_, transform, f.getLeftCamera(), stream
+				);
+			}
 		} else {
 			// Constant depth version
 			ftl::cuda::screen_coord(
@@ -510,6 +521,8 @@ void CUDARender::begin(ftl::rgbd::Frame &out, ftl::codecs::Channel chan) {
 		throw FTL_Error("Already rendering");
 	}
 
+	stream_ = out.stream();
+
 	out_ = &out;
 	const auto &camera = out.getLeftCamera();
 	cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream_);
diff --git a/components/renderers/cpp/src/clipping.cu b/components/renderers/cpp/src/clipping.cu
index 016e08855e6e042c90662a98d675d9e38ce344bb..d73e7a50bf2000ae6e2ba074280a21a0c7e880da 100644
--- a/components/renderers/cpp/src/clipping.cu
+++ b/components/renderers/cpp/src/clipping.cu
@@ -27,12 +27,15 @@ __global__ void clipping_kernel(ftl::cuda::TextureObject<float> depth, ftl::cuda
 	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
 	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
 
-	if (x < depth.width() && y < depth.height()) {
-		float d = depth(x,y);
-		float4 p = make_float4(camera.screenToCam(x,y,d), 0.0f);
+	const float sx = float(x) / float(colour.width()) * float(depth.width());
+	const float sy = float(y) / float(colour.height()) * float(depth.height());
+
+	if (sx >= 0.0f && sx < depth.width() && sy < depth.height() && sy >= 0.0f) {
+		float d = depth(sx,sy);
+		float4 p = make_float4(camera.screenToCam(sx,sy,d), 0.0f);
 
 		if (d <= camera.minDepth || d >= camera.maxDepth || isClipped(p, clip)) {
-			depth(x,y) = 0.0f;
+			depth(sx,sy) = 0.0f;
 			colour(x,y) = make_uchar4(0,0,0,0);
 		}
 	}
@@ -54,7 +57,7 @@ void ftl::cuda::clipping(ftl::cuda::TextureObject<float> &depth,
 		const ftl::rgbd::Camera &camera,
 		const ClipSpace &clip, cudaStream_t stream) {
 
-	const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 gridSize((colour.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
 	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
 	clipping_kernel<<<gridSize, blockSize, 0, stream>>>(depth, colour, camera, clip);
diff --git a/components/renderers/cpp/src/colouriser.cpp b/components/renderers/cpp/src/colouriser.cpp
index 574073d378414068adf66143c6beb95926e65b54..a8399d016b3229fe04d009d49ca9b902f4e6e68e 100644
--- a/components/renderers/cpp/src/colouriser.cpp
+++ b/components/renderers/cpp/src/colouriser.cpp
@@ -2,6 +2,7 @@
 #include "splatter_cuda.hpp"
 #include <ftl/cuda/colour_cuda.hpp>
 #include <ftl/cuda/normals.hpp>
+#include <ftl/operators/cuda/mask.hpp>
 
 #include <opencv2/cudaarithm.hpp>
 #include <opencv2/cudaimgproc.hpp>
@@ -119,9 +120,7 @@ TextureObject<uchar4> &Colouriser::colourise(ftl::rgbd::Frame &f, Channel c, cud
 	}
 
 	switch (c) {
-	case Channel::Overlay		: return f.createTexture<uchar4>(c);
-	case Channel::ColourHighRes	:
-	case Channel::RightHighRes	:
+	case Channel::Overlay		:
 	case Channel::Colour		:
 	case Channel::Colour2		: return _processColour(f,c,stream);
 	case Channel::GroundTruth	:
diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp
index e1021651ce1989cbf06b0a4b00fc04a1d457b844..41088cc226e34f4916c6919d8edc465906dbd499 100644
--- a/components/renderers/cpp/src/splatter_cuda.hpp
+++ b/components/renderers/cpp/src/splatter_cuda.hpp
@@ -165,11 +165,6 @@ namespace cuda {
 		uchar4 bad_colour,
 		cudaStream_t stream);
 
-	void show_mask(
-        ftl::cuda::TextureObject<uchar4> &colour,
-		ftl::cuda::TextureObject<uint8_t> &mask,
-        int id, uchar4 style, cudaStream_t stream);
-
 	void merge_convert_depth(
         ftl::cuda::TextureObject<int> &d1,
 		ftl::cuda::TextureObject<float> &d2,
diff --git a/components/rgbd-sources/include/ftl/rgbd/frame.hpp b/components/rgbd-sources/include/ftl/rgbd/frame.hpp
index e3a77ce2fab9af14b2b3b1f6ebb0c34a348bf9a4..99cbd5a1a9e8eeb207bc57d9b161d9dc514971d5 100644
--- a/components/rgbd-sources/include/ftl/rgbd/frame.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/frame.hpp
@@ -65,6 +65,7 @@ class VideoFrame {
 	const cv::Mat &getCPU() const;
 	const cv::cuda::GpuMat &getGPU() const;
 
+	/// gets cv::Mat for
 	cv::Mat &setCPU();
 	cv::cuda::GpuMat &setGPU();
 
@@ -95,6 +96,8 @@ class Frame : public ftl::data::Frame {
 	ftl::rgbd::Camera &setRight();
 	Eigen::Matrix4d &setPose();
 
+	cv::Size getSize(ftl::codecs::Channel c=ftl::codecs::Channel::Left) const;
+
 	ftl::calibration::CalibrationData& setCalibration();
 	const ftl::calibration::CalibrationData& getCalibration() const;
 
diff --git a/components/rgbd-sources/src/frame.cpp b/components/rgbd-sources/src/frame.cpp
index a4ace9e46eca90c70fa2910645d5513496cf4fe9..f6379f977a1fd936c2910dbc0a0c8d03b8fdedd1 100644
--- a/components/rgbd-sources/src/frame.cpp
+++ b/components/rgbd-sources/src/frame.cpp
@@ -85,6 +85,7 @@ const cv::cuda::GpuMat &VideoFrame::getGPU() const {
 }
 
 cv::Mat &VideoFrame::setCPU() {
+	validhost = true;
 	return host;
 }
 
@@ -96,6 +97,7 @@ cv::cuda::GpuMat &VideoFrame::setGPU() {
 void ftl::rgbd::Frame::upload(ftl::codecs::Channel c) {
 	auto &vframe = set<VideoFrame>(c);
 	const auto &cpumat = vframe.getCPU();
+	LOG(WARNING) << "Sync Upload: " << int(c);
 	vframe.createGPU().upload(cpumat);
 }
 
@@ -114,6 +116,18 @@ unsigned int ftl::rgbd::Frame::getOpenGL(ftl::codecs::Channel c) const {
 	return vframe.getOpenGL();
 }
 
+cv::Size ftl::rgbd::Frame::getSize(ftl::codecs::Channel c) const {
+	if (hasChannel(c)) {
+		const auto &f = get<VideoFrame>(c);
+		if (f.isGPU()) {
+			return f.getGPU().size();
+		} else {
+			return f.getCPU().size();
+		}
+	} else {
+		throw FTL_Error("Channel does not exists: " << int(c));
+	}
+}
 
 const ftl::rgbd::Camera &ftl::rgbd::Frame::getLeftCamera() const {
 	return std::get<0>(this->get<std::tuple<ftl::rgbd::Camera, ftl::codecs::Channel, int>>(ftl::codecs::Channel::Calibration));
diff --git a/components/rgbd-sources/src/sources/stereovideo/device.hpp b/components/rgbd-sources/src/sources/stereovideo/device.hpp
index f575ea3aec6b23ccbfe755690e3e6bc4a94e7b6d..0b7d4effb35643ae0fddc90db3240da6d938775b 100644
--- a/components/rgbd-sources/src/sources/stereovideo/device.hpp
+++ b/components/rgbd-sources/src/sources/stereovideo/device.hpp
@@ -35,16 +35,11 @@ class Device : public Configurable {
 	//virtual const std::vector<DeviceDetails> &listDevices()=0;
 
 	virtual bool grab()=0;
-	virtual bool get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l, cv::cuda::GpuMat &r, cv::cuda::GpuMat &h_l, cv::Mat &h_r, StereoRectification *c, cv::cuda::Stream &stream)=0;
+	virtual bool get(ftl::rgbd::Frame &frame, StereoRectification *c, cv::cuda::Stream &stream)=0;
 
 	virtual unsigned int width() const =0;
 	virtual unsigned int height() const =0;
 
-	virtual unsigned int fullWidth() const =0;
-	virtual unsigned int fullHeight() const =0;
-
-	inline bool hasHigherRes() const { return fullWidth() != width(); }
-
 	virtual double getTimestamp() const =0;
 
 	virtual bool isStereo() const =0;
diff --git a/components/rgbd-sources/src/sources/stereovideo/opencv.cpp b/components/rgbd-sources/src/sources/stereovideo/opencv.cpp
index 4ce893c6b0b6e1b9ab25de43c3789ad8e3b40cf6..a781765a821df181ff8e34381d7ffd70d7ede103 100644
--- a/components/rgbd-sources/src/sources/stereovideo/opencv.cpp
+++ b/components/rgbd-sources/src/sources/stereovideo/opencv.cpp
@@ -140,14 +140,9 @@ OpenCVDevice::OpenCVDevice(nlohmann::json &config, bool stereo)
 	width_ = frame.cols;
 	height_ = frame.rows;
 
-	dwidth_ = value("depth_width", width_);
-	float aspect = float(height_) / float(width_);
-	dheight_ = value("depth_height", std::min(uint32_t(aspect*float(dwidth_)), height_)) & 0xFFFe;
-
 	// Allocate page locked host memory for fast GPU transfer
-	left_hm_ = cv::cuda::HostMem(dheight_, dwidth_, CV_8UC4);
-	right_hm_ = cv::cuda::HostMem(dheight_, dwidth_, CV_8UC4);
-	hres_hm_ = cv::cuda::HostMem(height_, width_, CV_8UC4);
+	left_hm_ = cv::cuda::HostMem(height_, width_, CV_8UC4);
+	right_hm_ = cv::cuda::HostMem(height_, width_, CV_8UC4);
 
 	interpolation_ = value("inter_cubic", true) ? cv::INTER_CUBIC : cv::INTER_LINEAR;
 	on("inter_cubic", [this](){
@@ -332,47 +327,34 @@ bool OpenCVDevice::grab() {
 	return true;
 }
 
-bool OpenCVDevice::get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l_out, cv::cuda::GpuMat &r_out,
-	cv::cuda::GpuMat &l_hres_out, cv::Mat &r_hres_out, StereoRectification *c, cv::cuda::Stream &stream) {
+bool OpenCVDevice::get(ftl::rgbd::Frame &frame, StereoRectification *c, cv::cuda::Stream &stream) {
 
 	Mat l, r ,hres;
 
 	// Use page locked memory
 	l = left_hm_.createMatHeader();
 	r = right_hm_.createMatHeader();
-	hres = hres_hm_.createMatHeader();
-
-	Mat &lfull = (!hasHigherRes()) ? l : hres;
-	Mat &rfull = (!hasHigherRes()) ? r : rtmp_;
 
 	if (!camera_a_) return false;
 
-	//std::future<bool> future_b;
 	if (camera_b_) {
-		//future_b = std::move(ftl::pool.push([this,&rfull,&r,c,&r_out,&r_hres_out,&stream](int id) {
-			if (!camera_b_->retrieve(frame_r_)) {
-				LOG(ERROR) << "Unable to read frame from camera B";
-				return false;
-			} else {
-				cv::cvtColor(frame_r_, rtmp2_, cv::COLOR_BGR2BGRA);
-
-				//if (stereo_) {
-					c->rectify(rtmp2_, rfull, Channel::Right);
-
-					if (hasHigherRes()) {
-						// TODO: Use threads?
-						cv::resize(rfull, r, r.size(), 0.0, 0.0, interpolation_);
-						r_hres_out = rfull;
-					}
-					else {
-						r_hres_out = Mat();
-					}
-				//}
-
-				r_out.upload(r, stream);
-			}
-			//return true;
-		//}));
+		if (!camera_b_->retrieve(frame_r_)) {
+			LOG(ERROR) << "Unable to read frame from camera B";
+			return false;
+		}
+		else {
+			cv::cvtColor(frame_r_, rtmp_, cv::COLOR_BGR2BGRA);
+
+			//if (stereo_) {
+				c->rectify(rtmp_, r, Channel::Right);
+			//}
+
+			auto& f_right = frame.create<ftl::rgbd::VideoFrame>(Channel::Right);
+			cv::cuda::GpuMat& r_out = f_right.createGPU();
+			cv::Mat &r_host = f_right.setCPU();
+			r_out.upload(r, stream);
+			r.copyTo(r_host);
+		}
 	}
 
 	if (camera_b_) {
@@ -383,11 +365,8 @@ bool OpenCVDevice::get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l_out, cv::cud
 			return false;
 		}
 
-		/*if (camera_b_ && !camera_b_->retrieve(rfull)) {
-			LOG(ERROR) << "Unable to read frame from camera B";
-			return false;
-		}*/
-	} else {
+	}
+	else {
 		if (!camera_a_->read(frame_l_)) {
 			LOG(ERROR) << "Unable to read frame from camera A";
 			return false;
@@ -396,32 +375,19 @@ bool OpenCVDevice::get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l_out, cv::cud
 
 	if (stereo_) {
 		cv::cvtColor(frame_l_, ltmp_, cv::COLOR_BGR2BGRA);
-		//FTL_Profile("Rectification", 0.01);
-		//c->rectifyStereo(lfull, rfull);
-		c->rectify(ltmp_, lfull, Channel::Left);
-
-		// Need to resize
-		//if (hasHigherRes()) {
-			// TODO: Use threads?
-		//	cv::resize(rfull, r, r.size(), 0.0, 0.0, interpolation_);
-		//}
-	} else {
-		cv::cvtColor(frame_l_, lfull, cv::COLOR_BGR2BGRA);
+		c->rectify(ltmp_, l, Channel::Left);
 	}
-
-	if (hasHigherRes()) {
-		//FTL_Profile("Frame Resize", 0.01);
-		cv::resize(lfull, l, l.size(), 0.0, 0.0, interpolation_);
-		l_hres_out.upload(hres, stream);
-	} else {
-		l_hres_out = cv::cuda::GpuMat();
+	else {
+		cv::cvtColor(frame_l_, l, cv::COLOR_BGR2BGRA);
 	}
 
 	{
-		//FTL_Profile("Upload", 0.05);
+		auto& f_left = frame.create<ftl::rgbd::VideoFrame>(Channel::Left);
+		cv::cuda::GpuMat& l_out = f_left.createGPU();
+		cv::Mat &l_host = f_left.setCPU();
 		l_out.upload(l, stream);
+		l.copyTo(l_host);
 	}
-	//r_out.upload(r, stream);
 
 	if (!frame.hasChannel(Channel::Thumbnail)) {
 		cv::Mat thumb;
@@ -431,11 +397,6 @@ bool OpenCVDevice::get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l_out, cv::cud
 		cv::imencode(".jpg", thumb, thumbdata, params);
 	}
 
-	//if (camera_b_) {
-		//FTL_Profile("WaitCamB", 0.05);
-		//future_b.wait();
-	//}
-
 	return true;
 }
 
diff --git a/components/rgbd-sources/src/sources/stereovideo/opencv.hpp b/components/rgbd-sources/src/sources/stereovideo/opencv.hpp
index 37389e63f286458a1eeb4f8180c534a98e556941..607360591b168b15c3d1b452f1c039b3827cb2f6 100644
--- a/components/rgbd-sources/src/sources/stereovideo/opencv.hpp
+++ b/components/rgbd-sources/src/sources/stereovideo/opencv.hpp
@@ -20,13 +20,10 @@ class OpenCVDevice : public ftl::rgbd::detail::Device {
 	static std::vector<DeviceDetails> listDevices();
 
 	bool grab() override;
-	bool get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l, cv::cuda::GpuMat &r, cv::cuda::GpuMat &h_l, cv::Mat &h_r, StereoRectification *c, cv::cuda::Stream &stream) override;
+	bool get(ftl::rgbd::Frame &frame, StereoRectification *c, cv::cuda::Stream &stream) override;
 
-	unsigned int width() const override { return dwidth_; }
-	unsigned int height() const override { return dheight_; }
-
-	unsigned int fullWidth() const override { return width_; }
-	unsigned int fullHeight() const override { return height_; }
+	unsigned int width() const override { return width_; }
+	unsigned int height() const override { return height_; }
 
 	double getTimestamp() const override;
 
diff --git a/components/rgbd-sources/src/sources/stereovideo/pylon.cpp b/components/rgbd-sources/src/sources/stereovideo/pylon.cpp
index 57fea9555d845c29bd1c87b5f5a041c7012f31e4..b9690e2d8fed449b57552d08e147d7c3fa1e717d 100644
--- a/components/rgbd-sources/src/sources/stereovideo/pylon.cpp
+++ b/components/rgbd-sources/src/sources/stereovideo/pylon.cpp
@@ -94,17 +94,17 @@ PylonDevice::PylonDevice(nlohmann::json &config)
 	}
 
 	// Choose a good default depth res
-	width_ = value("depth_width", std::min(1280u,fullwidth_)) & 0xFFFe;
-	float aspect = float(fullheight_) / float(fullwidth_);
-	height_ = value("depth_height", std::min(uint32_t(aspect*float(width_)), fullheight_)) & 0xFFFe;
+	//width_ = value("depth_width", std::min(1280u,fullwidth_)) & 0xFFFe;
+	//float aspect = float(fullheight_) / float(fullwidth_);
+	//height_ = value("depth_height", std::min(uint32_t(aspect*float(width_)), fullheight_)) & 0xFFFe;
 
-	LOG(INFO) << "Depth resolution: " << width_ << "x" << height_;
+	//LOG(INFO) << "Depth resolution: " << width_ << "x" << height_;
 
 	// Allocate page locked host memory for fast GPU transfer
-	left_hm_ = cv::cuda::HostMem(height_, width_, CV_8UC4);
-	right_hm_ = cv::cuda::HostMem(height_, width_, CV_8UC4);
-	hres_hm_ = cv::cuda::HostMem(fullheight_, fullwidth_, CV_8UC4);
-	rtmp_.create(fullheight_, fullwidth_, CV_8UC4);
+	left_hm_ = cv::cuda::HostMem(fullheight_, fullwidth_, CV_8UC4);
+	right_hm_ = cv::cuda::HostMem(fullheight_, fullwidth_, CV_8UC4);
+	//hres_hm_ = cv::cuda::HostMem(fullheight_, fullwidth_, CV_8UC4);
+	//rtmp_.create(fullheight_, fullwidth_, CV_8UC4);
 
 	on("exposure", [this]() {
 		if (lcam_->GetDeviceInfo().GetModelName() != "Emulation") {
@@ -122,10 +122,29 @@ PylonDevice::PylonDevice(nlohmann::json &config)
 		interpolation_ = value("inter_cubic", true) ?
 			cv::INTER_CUBIC : cv::INTER_LINEAR;
 	});
+
+	monitor_ = true;
+	temperature_monitor_ = ftl::timer::add(ftl::timer::timerlevel_t::kTimerIdle1, 10.0, [this](int64_t ts) {
+		float temperature = (rcam_) ? std::max(lcam_->DeviceTemperature(), rcam_->DeviceTemperature()) : lcam_->DeviceTemperature();
+
+		LOG_IF(WARNING, temperature > 53.0)
+			<< "Camera temperature over 50C (value: " << temperature << ")";
+
+		// TODO: check actual temperature status.
+		if (temperature > 65.0) {
+			LOG(FATAL) << "Cameras are overheating";
+		}
+
+		return true;
+	});
 }
 
 PylonDevice::~PylonDevice() {
+	monitor_ = false;
+	temperature_monitor_.cancel();
 
+	lcam_->Close();
+	rcam_->Close();
 }
 
 static std::vector<ftl::rgbd::detail::DeviceDetails> pylon_devices;
@@ -226,24 +245,14 @@ bool PylonDevice::_retrieveFrames(Pylon::CGrabResultPtr &result, Pylon::CBaslerU
 	return true;
 }
 
-bool PylonDevice::get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l_out, cv::cuda::GpuMat &r_out, cv::cuda::GpuMat &h_l, cv::Mat &h_r, StereoRectification *c, cv::cuda::Stream &stream) {
+bool PylonDevice::get(ftl::rgbd::Frame &frame, StereoRectification *c, cv::cuda::Stream &stream) {
 	if (!isReady()) return false;
 
-	Mat l, r ,hres;
+	Mat l, r;
 
 	// Use page locked memory
 	l = left_hm_.createMatHeader();
 	r = right_hm_.createMatHeader();
-	hres = hres_hm_.createMatHeader();
-
-	Mat &lfull = (!hasHigherRes()) ? l : hres;
-	Mat &rfull = (!hasHigherRes()) ? r : rtmp_;
-
-	//ftl::cuda::setDevice();
-
-	//int dev;
-	//cudaGetDevice(&dev);
-	//LOG(INFO) << "Current cuda device = " << dev;
 
 	if (isStereo()) {
 		auto lcount = lcam_->NumReadyBuffers.GetValue();
@@ -283,50 +292,42 @@ bool PylonDevice::get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l_out, cv::cuda
 
 	try {
 		FTL_Profile("Frame Retrieve", 0.005);
-		//std::future<bool> future_b;
 		bool res_r = false;
+
 		if (rcam_) {
-			//future_b = std::move(ftl::pool.push([this,&rfull,&r,&l,c,&r_out,&h_r,&stream](int id) {
-				Pylon::CGrabResultPtr result_right;
-
-				if (_retrieveFrames(result_right, rcam_)) {
-
-					cv::Mat wrap_right(
-					result_right->GetHeight(),
-					result_right->GetWidth(),
-					CV_8UC1,
-					(uint8_t*)result_right->GetBuffer());
-
-					{
-						FTL_Profile("Bayer Colour (R)", 0.005);
-						cv::cvtColor(wrap_right, rtmp2_, cv::COLOR_BayerRG2BGRA);
-					}
-
-					//if (isStereo()) {
-						FTL_Profile("Rectify and Resize (R)", 0.005);
-						c->rectify(rtmp2_, rfull, Channel::Right);
-
-						if (hasHigherRes()) {
-							cv::resize(rfull, r, r.size(), 0.0, 0.0, interpolation_);
-							h_r = rfull;
-						}
-						else {
-							h_r = Mat();
-						}
-					//}
-
-					r_out.upload(r, stream);
-					res_r = true;
+			Pylon::CGrabResultPtr result_right;
+
+			if (_retrieveFrames(result_right, rcam_)) {
+
+				cv::Mat wrap_right(
+				result_right->GetHeight(),
+				result_right->GetWidth(),
+				CV_8UC1,
+				(uint8_t*)result_right->GetBuffer());
+
+				{
+					FTL_Profile("Bayer Colour (R)", 0.005);
+					cv::cvtColor(wrap_right, rtmp_, cv::COLOR_BayerRG2BGRA);
 				}
-			//}));
+
+				{
+					FTL_Profile("Rectify (R)", 0.005);
+					c->rectify(rtmp_, r, Channel::Right);
+				}
+
+				auto& f_right = frame.create<ftl::rgbd::VideoFrame>(Channel::Right);
+				cv::cuda::GpuMat& r_out = f_right.createGPU();
+				cv::Mat &r_host = f_right.setCPU();
+
+				r_out.upload(r, stream);
+				r.copyTo(r_host);
+				res_r = true;
+			}
 		}
 
 		Pylon::CGrabResultPtr result_left;
 
 		if (!_retrieveFrames(result_left, lcam_)) {
-			if (rcam_) {
-				//future_b.wait();
-			}
 			return false;
 		}
 
@@ -339,28 +340,24 @@ bool PylonDevice::get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l_out, cv::cuda
 		{
 			FTL_Profile("Bayer Colour (L)", 0.005);
 			if (isStereo()) cv::cvtColor(wrap_left, ltmp_, cv::COLOR_BayerRG2BGRA);
-			else cv::cvtColor(wrap_left, lfull, cv::COLOR_BayerRG2BGRA);
+			else cv::cvtColor(wrap_left, l, cv::COLOR_BayerRG2BGRA);
 		}
 
 		{
-			FTL_Profile("Rectify and Resize (L)", 0.005);
+			FTL_Profile("Rectify (L)", 0.005);
 			if (isStereo()) {
-				c->rectify(ltmp_, lfull, Channel::Left);
-			}
-
-			if (hasHigherRes()) {
-				cv::resize(lfull, l, l.size(), 0.0, 0.0, interpolation_);
-				h_l.upload(hres, stream);
-			} else {
-				h_l = cv::cuda::GpuMat();
+				c->rectify(ltmp_, l, Channel::Left);
 			}
 		}
 
+		auto& f_left = frame.create<ftl::rgbd::VideoFrame>(Channel::Left);
+		cv::cuda::GpuMat& l_out = f_left.createGPU();
+		cv::Mat &l_host = f_left.setCPU();
+
 		l_out.upload(l, stream);
+		l.copyTo(l_host);
 
 		if (rcam_) {
-			//future_b.wait();
-			//if (!future_b.get()) return false;
 			if (!res_r) return false;
 		}
 
diff --git a/components/rgbd-sources/src/sources/stereovideo/pylon.hpp b/components/rgbd-sources/src/sources/stereovideo/pylon.hpp
index 707e8f437dd971d71ed5ec95ed28592b2f9145be..096d504acf71c97e7600ed86b5ea9e811bd54a00 100644
--- a/components/rgbd-sources/src/sources/stereovideo/pylon.hpp
+++ b/components/rgbd-sources/src/sources/stereovideo/pylon.hpp
@@ -22,13 +22,10 @@ class PylonDevice : public ftl::rgbd::detail::Device {
 	static std::vector<DeviceDetails> listDevices();
 
 	bool grab() override;
-	bool get(ftl::rgbd::Frame &frame, cv::cuda::GpuMat &l, cv::cuda::GpuMat &r, cv::cuda::GpuMat &h_l, cv::Mat &h_r, StereoRectification *c, cv::cuda::Stream &stream) override;
+	bool get(ftl::rgbd::Frame &frame, StereoRectification *c, cv::cuda::Stream &stream) override;
 
-	unsigned int width() const override { return width_; }
-	unsigned int height() const override { return height_; };
-
-	unsigned int fullWidth() const override { return fullwidth_; }
-	unsigned int fullHeight() const override { return fullheight_; }
+	unsigned int width() const override { return fullwidth_; }
+	unsigned int height() const override { return fullheight_; }
 
 	double getTimestamp() const override { return 0.0; }
 
@@ -45,8 +42,6 @@ class PylonDevice : public ftl::rgbd::detail::Device {
 	cv::Mat tmp_;
 	uint32_t fullwidth_;
 	uint32_t fullheight_;
-	uint32_t width_;
-	uint32_t height_;
 	std::string name_;
 	std::string serial_;
 	int left_fail_=0;
@@ -55,12 +50,13 @@ class PylonDevice : public ftl::rgbd::detail::Device {
 
 	cv::cuda::HostMem left_hm_;
 	cv::cuda::HostMem right_hm_;
-	cv::cuda::HostMem hres_hm_;
 	cv::Mat rtmp_;
-	cv::Mat rtmp2_;
 	cv::Mat ltmp_;
 	int interpolation_;
 
+	std::atomic_bool monitor_;
+	ftl::Handle temperature_monitor_;
+
 	void _configureCamera(Pylon::CBaslerUniversalInstantCamera *cam);
 	bool _retrieveFrames(Pylon::CGrabResultPtr &result, Pylon::CBaslerUniversalInstantCamera *cam);
 };
diff --git a/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp b/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp
index aade34c89124470862ed3bc7cd7a72c506f777f2..192697d2ac240ad434e3a21b9ad35ff816cfffe4 100644
--- a/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp
+++ b/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp
@@ -74,6 +74,9 @@ ftl::rgbd::detail::Device::~Device() {
 
 StereoVideoSource::StereoVideoSource(ftl::rgbd::Source *host)
 		: ftl::rgbd::BaseSourceImpl(host), ready_(false) {
+
+	cudaSafeCall( cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) );
+
 	auto uri = host->get<std::string>("uri");
 	if (uri) {
 		init(*uri);
@@ -86,12 +89,15 @@ StereoVideoSource::StereoVideoSource(ftl::rgbd::Source *host)
 StereoVideoSource::StereoVideoSource(ftl::rgbd::Source *host, const string &file)
 		: ftl::rgbd::BaseSourceImpl(host), ready_(false) {
 
+	cudaSafeCall( cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) );
 	init(file);
 }
 
 StereoVideoSource::~StereoVideoSource() {
+	cudaStreamDestroy(stream_);
+
 	delete lsrc_;
-	if (pipeline_input_) delete pipeline_input_;
+	//if (pipeline_input_) delete pipeline_input_;
 }
 
 bool StereoVideoSource::supported(const std::string &dev) {
@@ -143,15 +149,21 @@ void StereoVideoSource::init(const string &file) {
 
 	if (!lsrc_) return;  // throw?
 
-	color_size_ = cv::Size(lsrc_->width(), lsrc_->height());
+	cv::Size size_full = cv::Size(lsrc_->width(), lsrc_->height());
 
-	pipeline_input_ = ftl::config::create<ftl::operators::Graph>(host_, "input");
-	#ifdef HAVE_OPTFLOW
-	pipeline_input_->append<ftl::operators::NVOpticalFlow>("optflow");
-	#endif
-	pipeline_input_->append<ftl::operators::ColourChannels>("colour");
+	// Choose a good default depth res
+	int w = lsrc_->value("depth_width", std::min(1280,size_full.width)) & 0xFFFe;
+	float aspect = float(size_full.height) / float(size_full.width);
+	int h = lsrc_->value("depth_height", std::min(int(aspect*float(w)), size_full.height)) & 0xFFFe;
+
+	depth_size_ = cv::Size(w, h);
+
+	//pipeline_input_ = ftl::config::create<ftl::operators::Graph>(host_, "input");
+	//#ifdef HAVE_OPTFLOW
+	//pipeline_input_->append<ftl::operators::NVOpticalFlow>("optflow");
+	//#endif
+	//pipeline_input_->append<ftl::operators::ColourChannels>("colour");
 
-	cv::Size size_full = cv::Size(lsrc_->fullWidth(), lsrc_->fullHeight());
 	rectification_ = std::unique_ptr<StereoRectification>
 		(ftl::create<StereoRectification>(host_, "rectification", size_full));
 
@@ -248,11 +260,11 @@ void StereoVideoSource::updateParameters(ftl::rgbd::Frame &frame) {
 		cv::cv2eigen(calibration_.origin * rectification_->getPose(Channel::Left), pose);
 		frame.setPose() = pose;
 
-		cv::Mat K = rectification_->cameraMatrix(color_size_);
+		cv::Mat K = rectification_->cameraMatrix(depth_size_);
 		float fx = static_cast<float>(K.at<double>(0,0));
 
 		float baseline = static_cast<float>(rectification_->baseline());
-		float doff = rectification_->doff(color_size_);
+		float doff = rectification_->doff(depth_size_);
 
 		double d_resolution = this->host_->getConfig().value<double>("depth_resolution", 0.0);
 		float min_depth = this->host_->getConfig().value<double>("min_depth", 0.45);
@@ -270,8 +282,8 @@ void StereoVideoSource::updateParameters(ftl::rgbd::Frame &frame) {
 			static_cast<float>(K.at<double>(1,1)),	// Fy
 			static_cast<float>(-K.at<double>(0,2)),	// Cx
 			static_cast<float>(-K.at<double>(1,2)),	// Cy
-			(unsigned int) color_size_.width,
-			(unsigned int) color_size_.height,
+			(unsigned int) depth_size_.width,
+			(unsigned int) depth_size_.height,
 			min_depth,
 			max_depth,
 			baseline,
@@ -288,16 +300,16 @@ void StereoVideoSource::updateParameters(ftl::rgbd::Frame &frame) {
 		Eigen::Matrix4d pose;
 		auto& params = frame.setLeft();
 
-		params.cx = -(color_size_.width / 2.0);
-		params.cy = -(color_size_.height / 2.0);
+		params.cx = -(depth_size_.width / 2.0);
+		params.cy = -(depth_size_.height / 2.0);
 		params.fx = 700.0;
 		params.fy = 700.0;
 		params.maxDepth = host_->value("size", 1.0f);
 		params.minDepth = 0.0f;
 		params.doffs = 0.0;
 		params.baseline = 0.1f;
-		params.width = color_size_.width;
-		params.height = color_size_.height;;
+		params.width = depth_size_.width;
+		params.height = depth_size_.height;;
 
 		float offsetz = host_->value("offset_z", 0.0f);
 		//state_.setPose(matrix(cv::Vec3d(0.0, 3.14159, 0.0), cv::Vec3d(0.0,0.0,params_.maxDepth+offsetz)));
@@ -326,6 +338,7 @@ void StereoVideoSource::updateParameters(ftl::rgbd::Frame &frame) {
 
 bool StereoVideoSource::capture(int64_t ts) {
 	cap_status_ = lsrc_->grab();
+	if (!cap_status_) LOG(WARNING) << "Capture failed";
 	return cap_status_;
 }
 
@@ -353,31 +366,12 @@ bool StereoVideoSource::retrieve(ftl::rgbd::Frame &frame) {
 		do_update_params_ = false;
 	}
 
-	cv::cuda::GpuMat gpu_dummy;
-	cv::Mat dummy;
-	auto &hres = (lsrc_->hasHigherRes()) ? frame.create<cv::cuda::GpuMat>(Channel::ColourHighRes) : gpu_dummy;
-	auto &hres_r = (lsrc_->hasHigherRes()) ? frame.create<cv::Mat>(Channel::RightHighRes) : dummy;
-
-	if (lsrc_->isStereo()) {
-		cv::cuda::GpuMat &left = frame.create<cv::cuda::GpuMat>(Channel::Left);
-		cv::cuda::GpuMat &right = frame.create<cv::cuda::GpuMat>(Channel::Right);
-		if (!lsrc_->get(frame, left, right, hres, hres_r, rectification_.get(), stream2_)) {
-			frame.remove(Channel::Left);
-			frame.remove(Channel::Right);
-		}
-	}
-	else {
-		cv::cuda::GpuMat &left = frame.create<cv::cuda::GpuMat>(Channel::Left);
-		cv::cuda::GpuMat right;
-		if (!lsrc_->get(frame, left, right, hres, hres_r, rectification_.get(), stream2_)) {
-			frame.remove(Channel::Left);
-		}
-	}
+	auto cvstream = cv::cuda::StreamAccessor::wrapStream(frame.stream());
 
-	//LOG(INFO) << "Channel size: " << hres.size();
+	lsrc_->get(frame, rectification_.get(), cvstream);
 
-	pipeline_input_->apply(frame, frame, cv::cuda::StreamAccessor::getStream(stream2_));
-	stream2_.waitForCompletion();
+	cudaSafeCall(cudaEventRecord(frame.uploadEvent(), frame.stream()));
+	// FIXME: Currently possible that previous upload not finished
 
 	return true;
 }
diff --git a/components/rgbd-sources/src/sources/stereovideo/stereovideo.hpp b/components/rgbd-sources/src/sources/stereovideo/stereovideo.hpp
index dd08f4f68baf5809a7c0eecf3911c58e658d452d..abd8652432eb6b9dbd8e876ef3fd03f86a791495 100644
--- a/components/rgbd-sources/src/sources/stereovideo/stereovideo.hpp
+++ b/components/rgbd-sources/src/sources/stereovideo/stereovideo.hpp
@@ -42,7 +42,7 @@ private:
 
 	int64_t capts_;
 
-	cv::Size color_size_;
+	//cv::Size color_size_;
 	cv::Size depth_size_;
 
 	ftl::operators::Graph *pipeline_input_=nullptr;
@@ -55,10 +55,8 @@ private:
 	bool do_update_params_ = false;
 	bool cap_status_ = false;
 
-	cv::cuda::Stream stream_;
-	cv::cuda::Stream stream2_;
-
 	cv::Mat mask_l_;
+	cudaStream_t stream_;
 
 	ftl::Handle calibration_change_;
 	std::string fname_calib_;
diff --git a/components/streams/include/ftl/streams/feed.hpp b/components/streams/include/ftl/streams/feed.hpp
index ac141a045950c5e266199fe8a1145641d82314cb..4aab46ca29d8106d955022529b7699e152511c22 100644
--- a/components/streams/include/ftl/streams/feed.hpp
+++ b/components/streams/include/ftl/streams/feed.hpp
@@ -235,6 +235,7 @@ private:
 
 	void _createPipeline(uint32_t fsid);
 	ftl::operators::Graph* _addPipeline(uint32_t fsid);
+	void _dispatch(const ftl::data::FrameSetPtr &fs);
 
 	void _beginRecord(Filter *f);
 	void _stopRecording();
diff --git a/components/streams/include/ftl/streams/receiver.hpp b/components/streams/include/ftl/streams/receiver.hpp
index 7febc9e416794b762f7df9a02b0f385840c071a3..07f90efeec27fb76a24f43aa09532038e4c19b67 100644
--- a/components/streams/include/ftl/streams/receiver.hpp
+++ b/components/streams/include/ftl/streams/receiver.hpp
@@ -93,6 +93,7 @@ class Receiver : public ftl::Configurable, public ftl::data::Generator {
 	InternalVideoStates &_getVideoFrame(const ftl::codecs::StreamPacket &spkt, int ix=0);
 	InternalAudioStates &_getAudioFrame(const ftl::codecs::StreamPacket &spkt, int ix=0);
 	void _finishPacket(ftl::streams::LockedFrameSet &fs, size_t fix);
+	void _terminateVideoPacket(const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet &pkt);
 };
 
 }
diff --git a/components/streams/src/feed.cpp b/components/streams/src/feed.cpp
index 878497ac584577d53bfde1ecf9f3cf26eeac50f6..6822813cb30c7bddb12c39c6384ad89ac130913f 100644
--- a/components/streams/src/feed.cpp
+++ b/components/streams/src/feed.cpp
@@ -179,44 +179,26 @@ Feed::Feed(nlohmann::json &config, ftl::net::Universe*net) :
 
 			SHARED_LOCK(mtx_, lk);
 			if (pre_pipelines_.count(fs->frameset()) == 1) {
-				pipeline = pre_pipelines_[fs->frameset()]; //->apply(*fs, *fs, 0);
+				pipeline = pre_pipelines_[fs->frameset()];
 			}
 
 			lk.unlock();
 
-			if (pipeline) pipeline->apply(*fs, *fs, 0);
+			bool did_pipe = false;
 
-			lk.lock();
-
-			std::atomic_store(&latest_.at(fs->frameset()), fs);
-
-			if (fs->hasAnyChanged(Channel::Thumbnail)) {
-				_saveThumbnail(fs);
-			}
+			if (pipeline) {
+				bool did_pipe = pipeline->apply(*fs, *fs, [this,fs]() {
+					_dispatch(fs);
+				});
 
-			for (auto* filter : filters_) {
-				// TODO: smarter update (update only when changed) instead of
-				// filter->channels_available_ = fs->channels();
-
-				if (filter->sources().empty()) {
-					//filter->channels_available_ = fs->channels();
-					filter->handler_.triggerParallel(fs);
-				}
-				else {
-					// TODO: process partial/complete sets here (drop), that is
-					//       intersection filter->sources() and fs->sources() is
-					//       same as filter->sources().
-
-					// TODO: reverse map source ids required here?
-					for (const auto& src : filter->sources()) {
-						//if (fs->hasFrame(src)) {
-						if (fs->frameset() == src) {
-							//filter->channels_available_ = fs->channels();
-							filter->handler_.triggerParallel(fs);
-							break;
-						}
-					}
+				if (!did_pipe) {
+					LOG(WARNING) << "Feed Pipeline dropped";
+					ftl::pool.push([this,fs](int id) {
+						_dispatch(fs);
+					});
 				}
+			} else {
+				_dispatch(fs);
 			}
 
 			return true;
@@ -270,6 +252,41 @@ Feed::~Feed() {
 	}
 }
 
+void Feed::_dispatch(const ftl::data::FrameSetPtr &fs) {
+	SHARED_LOCK(mtx_, lk);
+
+	std::atomic_store(&latest_.at(fs->frameset()), fs);
+
+	if (fs->hasAnyChanged(Channel::Thumbnail)) {
+		_saveThumbnail(fs);
+	}
+
+	for (auto* filter : filters_) {
+		// TODO: smarter update (update only when changed) instead of
+		// filter->channels_available_ = fs->channels();
+
+		if (filter->sources().empty()) {
+			//filter->channels_available_ = fs->channels();
+			filter->handler_.triggerParallel(fs);
+		}
+		else {
+			// TODO: process partial/complete sets here (drop), that is
+			//       intersection filter->sources() and fs->sources() is
+			//       same as filter->sources().
+
+			// TODO: reverse map source ids required here?
+			for (const auto& src : filter->sources()) {
+				//if (fs->hasFrame(src)) {
+				if (fs->frameset() == src) {
+					//filter->channels_available_ = fs->channels();
+					filter->handler_.triggerParallel(fs);
+					break;
+				}
+			}
+		}
+	}
+}
+
 void Feed::_saveThumbnail(const ftl::data::FrameSetPtr& fs) {
 	// TODO: Put thumb somewhere here...
 }
@@ -457,8 +474,6 @@ void Feed::_createPipeline(uint32_t fsid) {
 		p->append<ftl::operators::DepthChannel>("depth")->value("enabled", false);
 		p->append<ftl::operators::ClipScene>("clipping")->value("enabled", false);
 		p->append<ftl::operators::ColourChannels>("colour");  // Convert BGR to BGRA
-		p->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false);
-		p->append<ftl::operators::ArUco>("aruco")->value("enabled", false);
 		//p->append<ftl::operators::HFSmoother>("hfnoise");
 		p->append<ftl::operators::CrossSupport>("cross");
 		p->append<ftl::operators::PixelWeights>("weights");
@@ -468,8 +483,11 @@ void Feed::_createPipeline(uint32_t fsid) {
 		p->append<ftl::operators::BorderMask>("border_mask");
 		p->append<ftl::operators::CullDiscontinuity>("remove_discontinuity");
 		p->append<ftl::operators::MultiViewMLS>("mvmls")->value("enabled", false);
+		p->append<ftl::operators::DisplayMask>("display_mask")->value("enabled", false);
 		p->append<ftl::operators::Poser>("poser")->value("enabled", true);
 		p->append<ftl::operators::GTAnalysis>("gtanalyse");
+		p->append<ftl::operators::DetectAndTrack>("facedetection")->value("enabled", false);
+		p->append<ftl::operators::ArUco>("aruco")->value("enabled", false);
 	}
 }
 
@@ -632,7 +650,7 @@ std::set<ftl::stream::SourceInfo> Feed::recentSources() {
 std::vector<std::string> Feed::availableDeviceSources() {
 	std::vector<std::string> results;
 
-	if (ftl::rgbd::Source::supports("device:pylon")) results.emplace_back("device:pylon");
+	//if (ftl::rgbd::Source::supports("device:pylon")) results.emplace_back("device:pylon");
 	if (ftl::rgbd::Source::supports("device:camera")) results.emplace_back("device:camera");
 	if (ftl::rgbd::Source::supports("device:stereo")) results.emplace_back("device:stereo");
 	if (ftl::rgbd::Source::supports("device:screen")) results.emplace_back("device:screen");
diff --git a/components/streams/src/filestream.cpp b/components/streams/src/filestream.cpp
index 3d71fffb3508c9b3ed192a03059e5beba2cfe67b..35b48d217d4fceb7c289646c90af376e78365d14 100644
--- a/components/streams/src/filestream.cpp
+++ b/components/streams/src/filestream.cpp
@@ -235,7 +235,7 @@ bool File::tick(int64_t ts) {
 
 	// Check buffer first for frames already read
 	{
-		UNIQUE_LOCK(data_mutex_, dlk);
+		//UNIQUE_LOCK(data_mutex_, dlk);
 		if (data_.size() > 0) has_data = true;
 		
 		if (needs_endframe_) {
@@ -245,7 +245,7 @@ bool File::tick(int64_t ts) {
 
 		size_t frame_count = 0;
 
-		for (auto i = data_.begin(); i != data_.end(); ++i) {
+		for (auto i = data_.begin(); i != data_.end(); ) {
 			if (std::get<0>(*i).timestamp <= timestamp_) {
 				auto &spkt = std::get<0>(*i);
 				auto &pkt = std::get<1>(*i);
@@ -268,7 +268,10 @@ bool File::tick(int64_t ts) {
 					}
 				}
 
-				ftl::pool.push([this,i](int id) {
+				auto j = i;
+				++i;
+
+				ftl::pool.push([this,i=j](int id) {
 					auto &spkt = std::get<0>(*i);
 					auto &pkt = std::get<1>(*i);
 
diff --git a/components/streams/src/netstream.cpp b/components/streams/src/netstream.cpp
index 62864077276916106d17c53bb3bfc5ce051b03b2..fcce08aecabd2db49c86e69652483a68e7b34b24 100644
--- a/components/streams/src/netstream.cpp
+++ b/components/streams/src/netstream.cpp
@@ -69,9 +69,9 @@ Net::Net(nlohmann::json &config, ftl::net::Universe *net) : Stream(config), acti
 		abr_->setMaxRate(static_cast<uint8_t>(std::max(0, std::min(255, value("max_bitrate", 200)))));
 	});
 
-	abr_enabled_ = value("abr_enabled", true);
+	abr_enabled_ = value("abr_enabled", false);
 	on("abr_enabled", [this]() {
-		abr_enabled_ = value("abr_enabled", true);
+		abr_enabled_ = value("abr_enabled", false);
 		bitrate_ = (abr_enabled_) ?
 			abr_->current() :
 			static_cast<uint8_t>(std::max(0, std::min(255, value("bitrate", 64))));
diff --git a/components/streams/src/receiver.cpp b/components/streams/src/receiver.cpp
index c0cdbf04f890d5beea81b1873059a1257cb7e610..b53fc49a7beea2f4521dbc60757c02fbdf1cafde 100644
--- a/components/streams/src/receiver.cpp
+++ b/components/streams/src/receiver.cpp
@@ -247,6 +247,15 @@ namespace sgm {
 	}
 }
 
+void Receiver::_terminateVideoPacket(const StreamPacket &spkt, const Packet &pkt) {
+	auto &build = builder(spkt.streamID);
+	auto fs = build.get(spkt.timestamp, spkt.frame_number+pkt.frame_count-1);
+	if (fs) {
+		fs->localTimestamp = spkt.localTimestamp;
+		_finishPacket(fs, spkt.frame_number);
+	}
+}
+
 void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) {
 	FTL_Profile("VideoPacket", 0.02);
 
@@ -257,6 +266,7 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) {
 
 	if (tx == 0 || ty == 0) {
 		LOG(ERROR) << "No Packets";
+		_terminateVideoPacket(spkt, pkt);
 		return;
 	}
 
@@ -268,6 +278,7 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) {
 	auto *decoder = ividstate.decoders[channum];
 	if (!decoder) {
 		LOG(ERROR) << "No frame decoder available";
+		_terminateVideoPacket(spkt, pkt);
 		return;
 	}
 
@@ -276,10 +287,12 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) {
 		FTL_Profile("Decode", 0.015);
 		if (!decoder->decode(pkt, surface)) {
 			LOG(ERROR) << "Decode failed on channel " << (int)spkt.channel;
+			_terminateVideoPacket(spkt, pkt);
 			return;
 		}
 	} catch (std::exception &e) {
 		LOG(ERROR) << "Decode failed for " << spkt.timestamp << ": " << e.what();
+		_terminateVideoPacket(spkt, pkt);
 		return;
 	}
 
@@ -288,6 +301,7 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) {
 
 	if (width == 0 || height == 0) {
 		LOG(ERROR) << "Invalid decoded size: " << surface.cols << "x" << surface.rows << " (" << tx << "," << ty << ")";
+		_terminateVideoPacket(spkt, pkt);
 		return;
 	}
 
@@ -295,6 +309,7 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) {
 
 	if (surface.type() != cvtype) {
 		LOG(ERROR) << "Invalid video format received";
+		_terminateVideoPacket(spkt, pkt);
 		return;
 	}
 
@@ -339,7 +354,15 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) {
 	}
 
 	// Must ensure all processing is finished before completing a frame.
-	cudaSafeCall(cudaStreamSynchronize(decoder->stream()));
+	//cudaSafeCall(cudaStreamSynchronize(decoder->stream()));
+
+	cudaSafeCall(cudaEventRecord(decoder->event(), decoder->stream()));
+	//for (int i=0; i<pkt.frame_count; ++i) {
+	//	cudaSafeCall(cudaStreamWaitEvent(fs->frames[spkt.frame_number+i].stream(), decoder->event(), 0));
+	//}
+
+	// For now, always add to frame 0 stream
+	cudaSafeCall(cudaStreamWaitEvent(fs->frames[0].stream(), decoder->event(), 0));
 
 	fs->localTimestamp = spkt.localTimestamp;
 
diff --git a/components/streams/src/renderers/openvr_render.cpp b/components/streams/src/renderers/openvr_render.cpp
index b4a190c652ce20c94a735f9c0513477ac2a7db61..a448206e2812f07bb8359dfd040e55b2f9577bec 100644
--- a/components/streams/src/renderers/openvr_render.cpp
+++ b/components/streams/src/renderers/openvr_render.cpp
@@ -361,8 +361,8 @@ bool OpenVRRender::retrieve(ftl::data::Frame &frame_out) {
 		texture1_.make(width, height, ftl::utility::GLTexture::Type::BGRA);
 		texture2_.make(width, height, ftl::utility::GLTexture::Type::BGRA);
 
-		rgbdframe.create<cv::cuda::GpuMat>(Channel::Colour) = texture1_.map(renderer_->getCUDAStream());
-		rgbdframe.create<cv::cuda::GpuMat>(Channel::Colour2) = texture2_.map(renderer_->getCUDAStream());
+		rgbdframe.create<cv::cuda::GpuMat>(Channel::Colour) = texture1_.map(rgbdframe.stream());
+		rgbdframe.create<cv::cuda::GpuMat>(Channel::Colour2) = texture2_.map(rgbdframe.stream());
 		rgbdframe.create<cv::cuda::GpuMat>(Channel::Depth).create(height, width, CV_32F);
 		rgbdframe.createTexture<float>(Channel::Depth);
 
@@ -521,7 +521,7 @@ bool OpenVRRender::retrieve(ftl::data::Frame &frame_out) {
 			post_pipe_->append<ftl::operators::GTAnalysis>("gtanalyse");
 		}
 
-		post_pipe_->apply(rgbdframe, rgbdframe, renderer_->getCUDAStream());
+		post_pipe_->apply(rgbdframe, rgbdframe);
 
 		if (host_->value("enable_touch", false)) {
 			ftl::render::collision2touch(rgbdframe, renderer_->getCollisions(), sets, my_id_, host_->value("touch_min", 0.01f), host_->value("touch_max", 0.05f));
diff --git a/components/streams/src/renderers/screen_render.cpp b/components/streams/src/renderers/screen_render.cpp
index 3bd7d8c927cf93fee9fb3a724c9d0406300df5f2..4556cfaec9505e9924b3d49c0013e2630920a2e3 100644
--- a/components/streams/src/renderers/screen_render.cpp
+++ b/components/streams/src/renderers/screen_render.cpp
@@ -88,6 +88,7 @@ bool ScreenRender::retrieve(ftl::data::Frame &frame_out) {
 	my_id_ = frame_out.frameset();
 	auto sets = filter_->getLatestFrameSets();
 	bool data_only = host_->value("data_only", false);
+	bool blend_overlay = host_->value("blend_overlay", false);
 
 	if (sets.size() > 0) {
 		ftl::rgbd::Frame &rgbdframe = frame_out.cast<ftl::rgbd::Frame>();
@@ -140,10 +141,13 @@ bool ScreenRender::retrieve(ftl::data::Frame &frame_out) {
 				pose.setIdentity();
 				if (s->hasChannel(Channel::Pose)) pose = s->cast<ftl::rgbd::Frame>().getPose();
 
-				if (!data_only) renderer_->submit(
-					s.get(),
-					ftl::codecs::Channels<0>(ftl::codecs::Channel::Colour),
-					pose);
+				if (!data_only) {
+					if (blend_overlay) {
+						renderer_->submit(s.get(), ftl::codecs::Channels<0>(Channel::Colour) + Channel::Overlay, pose);
+					} else {
+						renderer_->submit(s.get(), ftl::codecs::Channels<0>(Channel::Colour), pose);
+					}
+				}
 			}
 
 			if (!data_only) renderer_->render();
@@ -221,7 +225,8 @@ bool ScreenRender::retrieve(ftl::data::Frame &frame_out) {
 				post_pipe_->append<ftl::operators::GTAnalysis>("gtanalyse");
 			}
 
-			post_pipe_->apply(rgbdframe, rgbdframe, 0);
+			post_pipe_->apply(rgbdframe, rgbdframe);
+			cudaSafeCall(cudaStreamSynchronize(rgbdframe.stream()));
 
 			if (host_->value("enable_touch", false)) {
 				ftl::render::collision2touch(rgbdframe, renderer_->getCollisions(), sets, my_id_, host_->value("touch_min", 0.01f), host_->value("touch_max", 0.05f));
diff --git a/components/streams/src/sender.cpp b/components/streams/src/sender.cpp
index a47e9854a38784802e0b26ee344a20c790b77ac9..b4d9d409323cb7a1e8d66d46c4e0e8458398eaf5 100644
--- a/components/streams/src/sender.cpp
+++ b/components/streams/src/sender.cpp
@@ -490,19 +490,12 @@ void Sender::_encodeVideoChannel(ftl::data::FrameSet &fs, Channel c, bool reset)
 	int encoder_number = 0;
 	while (offset < fs.frames.size()) {
 		Channel cc = c;
-		//if ((cc == Channel::Colour) && fs.firstFrame().hasChannel(Channel::ColourHighRes)) {
-		//	cc = Channel::ColourHighRes;
-		//}
-		
-		//if ((cc == Channel::Right) && fs.firstFrame().hasChannel(Channel::RightHighRes)) {
-		//	cc = Channel::RightHighRes;
-		//	fs.frames[offset].upload(cc);
-		//}
 
-		if (!fs.frames[offset].hasChannel(cc)) {
-			offset++;
-			continue;
-		}
+		// FIXME: Don't change tile layout when channel temporarily drops.
+		//if (!fs.frames[offset].hasChannel(cc)) {
+		//	offset++;
+		//	continue;
+		//}
 
 		StreamPacket spkt;
 		spkt.version = 5;
@@ -539,15 +532,12 @@ void Sender::_encodeVideoChannel(ftl::data::FrameSet &fs, Channel c, bool reset)
 			//}
 		}
 
-		int count = _generateTiles(fs, offset, cc, enc->stream(), is_stereo);
+		int count = (fs.frames.size() == 1) ? 1 : _generateTiles(fs, offset, cc, enc->stream(), is_stereo);
 		if (count <= 0) {
 			LOG(ERROR) << "Could not generate tiles.";
 			break;
 		}
 
-		//cudaSafeCall(cudaStreamSynchronize(enc->stream()));
-		//enc->stream().waitForCompletion();
-
 		if (enc) {
 			if (reset) enc->reset();
 
@@ -562,8 +552,15 @@ void Sender::_encodeVideoChannel(ftl::data::FrameSet &fs, Channel c, bool reset)
 				//if (static_cast<size_t>(fs.count) < fs.frames.size()) pkt.flags |= ftl::codecs::kFlagPartial;
 
 				// Choose correct region of interest into the surface.
-				cv::Rect roi = _generateROI(fs, cc, offset, is_stereo);
-				cv::cuda::GpuMat sroi = tile.surface(roi);
+				//cv::Rect roi = _generateROI(fs, cc, offset, is_stereo);
+				cv::cuda::GpuMat sroi;
+				
+				if (fs.frames.size() > 1) {
+					cv::Rect roi = _generateROI(fs, cc, offset, is_stereo);
+					sroi = tile.surface(roi);
+				} else {
+					sroi = fs.frames[0].get<cv::cuda::GpuMat>(cc);
+				}
 
 				FTL_Profile("Encoder",0.02);
 
@@ -637,8 +634,6 @@ void Sender::_encodeAudioChannel(ftl::data::FrameSet &fs, Channel c, bool reset)
 }
 
 void Sender::_encodeDataChannel(ftl::data::FrameSet &fs, Channel c, bool reset) {
-	int i=0;
-
 	// TODO: Pack all frames into a single packet
 	for (auto &f : fs.frames) {
 		StreamPacket spkt;
@@ -786,7 +781,7 @@ int Sender::_generateTiles(const ftl::rgbd::FrameSet &fs, int offset, Channel c,
 		} else {
 			cv::Rect roi((count % tx)*rwidth, (count / tx)*rheight, rwidth, rheight);
 			cv::cuda::GpuMat sroi = surface.surface(roi);
-			sroi.setTo(cv::Scalar(0));
+			sroi.setTo(cv::Scalar(0), stream);
 		}
 
 		++count;
diff --git a/components/streams/test/receiver_unit.cpp b/components/streams/test/receiver_unit.cpp
index fcdeafc1a819319b35d777498c31b2a7438bad7a..29bf0b241f1f4563202987cc3656e377cf889acd 100644
--- a/components/streams/test/receiver_unit.cpp
+++ b/components/streams/test/receiver_unit.cpp
@@ -364,7 +364,7 @@ TEST_CASE( "Receiver sync bugs" ) {
 		auto h = receiver->onFrameSet([&count,&ts,&haswrongchan](const ftl::data::FrameSetPtr& fs) {
 
 			ts = fs->timestamp();
-			haswrongchan = fs->frames[0].hasChannel(Channel::ColourHighRes);
+			haswrongchan = fs->frames[0].hasChannel(Channel::Overlay);
 
 			++count;
 
@@ -373,7 +373,7 @@ TEST_CASE( "Receiver sync bugs" ) {
 
 		try { stream.post(spkt, pkt); } catch(...) {}
 		spkt.timestamp = 10;
-		spkt.channel = Channel::ColourHighRes;
+		spkt.channel = Channel::Overlay;
 		try { stream.postEnd(spkt, pkt, 3); } catch(...) {}
 		spkt.timestamp = 20;
 		spkt.channel = Channel::Colour2;
diff --git a/components/streams/test/sender_unit.cpp b/components/streams/test/sender_unit.cpp
index 753fde010b94071152cc646a90d3f745a2e8b461..11c44ae33bad5ac61bbb01f90a45c6002372868f 100644
--- a/components/streams/test/sender_unit.cpp
+++ b/components/streams/test/sender_unit.cpp
@@ -210,13 +210,13 @@ TEST_CASE( "Sender::post() video frames" ) {
 		REQUIRE( count == 1 );
 		REQUIRE( spkt.version == 5 );
 		REQUIRE( spkt.timestamp == 1000 );
-		REQUIRE( (int)spkt.frame_number == 1 );
+		REQUIRE( (int)spkt.frame_number == 0 );
 		REQUIRE( spkt.streamID == 0 );
 		REQUIRE( spkt.channel == Channel::Depth );
 		REQUIRE( pkt.codec == codec_t::HEVC );
 		REQUIRE( pkt.data.size() > 0 );
 		REQUIRE( pkt.flags == (ftl::codecs::kFlagFloat | ftl::codecs::kFlagMappedDepth) );
-		REQUIRE( pkt.frame_count == 3 );
+		REQUIRE( pkt.frame_count == 4 );
 		REQUIRE( ftl::codecs::hevc::validNAL(pkt.data.data(), pkt.data.size()) );
 	}
 
diff --git a/components/structures/include/ftl/data/new_frame.hpp b/components/structures/include/ftl/data/new_frame.hpp
index 94c539086e6a24802cb4ca980345a36987c8be47..ebbceed9061a380e4ade9c5b6b011a163c3887b3 100644
--- a/components/structures/include/ftl/data/new_frame.hpp
+++ b/components/structures/include/ftl/data/new_frame.hpp
@@ -20,6 +20,8 @@
 #include <ftl/handle.hpp>
 #include <ftl/data/messages.hpp>
 
+#include <cuda_runtime.h>
+
 template<typename T> struct is_list : public std::false_type {};
 
 template<typename T>
@@ -630,6 +632,14 @@ class Frame {
 
 	inline FrameMode mode() const { return mode_; }
 
+	// ==== CUDA Functions =====================================================
+
+	cudaStream_t stream();
+
+	cudaEvent_t uploadEvent();
+
+	cudaEvent_t pipeEvent();
+
 	// ==== Wrapper functions ==================================================
 
 	void message(ftl::data::Message code, const std::string &msg);
@@ -681,6 +691,9 @@ class Frame {
 	FrameStatus status_;
 	FrameMode mode_ = FrameMode::PRIMARY;
 	uint64_t available_ = 0;
+	cudaStream_t stream_=0;
+	cudaEvent_t upload_event_=0;
+	cudaEvent_t pipe_event_=0;
 
 	inline void restart(int64_t ts) {
 		timestamp_ = ts;
diff --git a/components/structures/include/ftl/data/new_frameset.hpp b/components/structures/include/ftl/data/new_frameset.hpp
index 8dcc4b3b27f332139e5c18b9d42312cc2af9ccba..6eafe9cb59a4c7d0f53033ba8e903c6ed9e55207 100644
--- a/components/structures/include/ftl/data/new_frameset.hpp
+++ b/components/structures/include/ftl/data/new_frameset.hpp
@@ -134,6 +134,8 @@ class FrameSet : public ftl::data::Frame {
 	 */
 	bool hasAnyChanged(ftl::codecs::Channel) const;
 
+	bool anyHasChannel(ftl::codecs::Channel) const;
+
 	private:
 	std::atomic<int> flags_;
 };
diff --git a/components/structures/src/frameset.cpp b/components/structures/src/frameset.cpp
index cba77a2136cb6222f6fea9f2d6f77a2debb5eb2a..e8cfd9404535e1fdfe410596fc92cf35a83e39ec 100644
--- a/components/structures/src/frameset.cpp
+++ b/components/structures/src/frameset.cpp
@@ -78,6 +78,13 @@ bool ftl::data::FrameSet::hasAnyChanged(ftl::codecs::Channel c) const {
 	return false;
 }
 
+bool ftl::data::FrameSet::anyHasChannel(ftl::codecs::Channel c) const {
+	for (size_t i=0; i<frames.size(); ++i) {
+		if (frames[i].hasOwn(c)) return true;
+	}
+	return false;
+}
+
 void FrameSet::store() {
 	if (status() != ftl::data::FrameStatus::CREATED) throw FTL_Error("Cannot store frameset multiple times");
 
diff --git a/components/structures/src/new_frame.cpp b/components/structures/src/new_frame.cpp
index 576896702e3e524639d59f903341fce1bc3c3916..36a1c7d06cc59ea9a710bb6a0035fdb6951a9c70 100644
--- a/components/structures/src/new_frame.cpp
+++ b/components/structures/src/new_frame.cpp
@@ -1,6 +1,7 @@
 #include <ftl/data/new_frame.hpp>
 #include <ftl/data/framepool.hpp>
 #include <ftl/timer.hpp>
+#include <ftl/cuda_common.hpp>
 
 using ftl::data::Frame;
 using ftl::data::Session;
@@ -84,6 +85,27 @@ Frame::~Frame() {
 	}
 };
 
+cudaStream_t Frame::stream() {
+	if (stream_ == 0) {
+		cudaSafeCall( cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking) );
+	}
+	return stream_;
+}
+
+cudaEvent_t Frame::uploadEvent() {
+	if (upload_event_ == 0) {
+		cudaSafeCall( cudaEventCreate(&upload_event_) );
+	}
+	return upload_event_;
+}
+
+cudaEvent_t Frame::pipeEvent() {
+	if (pipe_event_ == 0) {
+		cudaSafeCall( cudaEventCreate(&pipe_event_) );
+	}
+	return pipe_event_;
+}
+
 bool ftl::data::Frame::hasAll(const std::unordered_set<ftl::codecs::Channel> &cs) {
 	for (auto &a : cs) {
 		if (!has(a)) return false;
@@ -346,6 +368,12 @@ void Frame::moveTo(Frame &f) {
 	f.changed_ = std::move(changed_);
 	f.packet_rx = (int)packet_rx;
 	f.packet_tx = (int)packet_tx;
+	f.stream_ = stream_;
+	f.upload_event_ = upload_event_;
+	f.pipe_event_ = pipe_event_;
+	stream_ = 0;
+	pipe_event_ = 0;
+	upload_event_ = 0;
 	status_ = FrameStatus::RELEASED;
 }
 
diff --git a/lib/libsgm/src/path_aggregation.cu b/lib/libsgm/src/path_aggregation.cu
index 16567de556520b10a8fcbb3560bcd667ab1fb7b6..e5019ec3a67ddb1113ce2a7f1a873983359e5fb4 100644
--- a/lib/libsgm/src/path_aggregation.cu
+++ b/lib/libsgm/src/path_aggregation.cu
@@ -29,6 +29,7 @@ PathAggregation<MAX_DISPARITY>::PathAggregation()
 		cudaStreamCreate(&m_streams[i]);
 		cudaEventCreate(&m_events[i]);
 	}
+	cudaEventCreate(&m_event);
 }
 
 template <size_t MAX_DISPARITY>
@@ -38,6 +39,7 @@ PathAggregation<MAX_DISPARITY>::~PathAggregation(){
 		cudaStreamDestroy(m_streams[i]);
 		cudaEventDestroy(m_events[i]);
 	}
+	cudaEventDestroy(m_event);
 }
 
 template <size_t MAX_DISPARITY>
@@ -58,7 +60,13 @@ void PathAggregation<MAX_DISPARITY>::enqueue(
 		m_cost_buffer = DeviceBuffer<cost_type>(buffer_size);
 	}
 	const size_t buffer_step = width * height * MAX_DISPARITY;
-	cudaStreamSynchronize(stream);
+	//cudaStreamSynchronize(stream);
+	cudaEventRecord(m_event, stream);
+
+	for(unsigned int i = 0; i < NUM_PATHS; ++i){
+		cudaStreamWaitEvent(m_streams[i], m_event, 0);
+	}
+
 	path_aggregation::enqueue_aggregate_up2down_path<MAX_DISPARITY>(
 		m_cost_buffer.data() + 0 * buffer_step,
 		left, right, width, height, p1, p2, p2_pitch, w, w_pitch, m_streams[0]);
diff --git a/lib/libsgm/src/path_aggregation.hpp b/lib/libsgm/src/path_aggregation.hpp
index 0b019a3b556fb92969acff843ad7431bc3d57b0e..7df96996da46711f7ddcb76dcbc44132ca9f194c 100644
--- a/lib/libsgm/src/path_aggregation.hpp
+++ b/lib/libsgm/src/path_aggregation.hpp
@@ -31,6 +31,7 @@ private:
 	DeviceBuffer<cost_type> m_cost_buffer;
 	cudaStream_t m_streams[NUM_PATHS];
 	cudaEvent_t m_events[NUM_PATHS];
+	cudaEvent_t m_event;
 
 public:
 	PathAggregation();