diff --git a/applications/calibration-multi/src/main.cpp b/applications/calibration-multi/src/main.cpp
index dbea22842d4b0a70546187f48e161def746af207..7c92ea9eaffcf68f1af97c47e1255d95b5a533cc 100644
--- a/applications/calibration-multi/src/main.cpp
+++ b/applications/calibration-multi/src/main.cpp
@@ -395,37 +395,6 @@ void runCameraCalibration(	ftl::Configurable* root,
 	const size_t n_cameras = n_sources * 2;
 	size_t reference_camera = 0;
 	
-	{
-		auto camera = sources[0]->parameters();
-		params.size = Size(camera.width, camera.height);
-		LOG(INFO) << "Camera resolution: " << params.size;
-	}
-
-	params.idx_cameras.resize(n_cameras);
-	std::iota(params.idx_cameras.begin(), params.idx_cameras.end(), 0);
-
-	// TODO: parameter for calibration target type
-	auto calib = MultiCameraCalibrationNew(	n_cameras, reference_camera,
-											params.size, CalibrationTarget(0.250)
-	);
-
-	for (size_t i = 0; i < n_sources; i++) {
-		auto camera_r = sources[i]->parameters(Channel::Right);
-		auto camera_l = sources[i]->parameters(Channel::Left);
-
-		CHECK(params.size == Size(camera_r.width, camera_r.height));
-		CHECK(params.size == Size(camera_l.width, camera_l.height));
-		
-		Mat K;
-		K = createCameraMatrix(camera_r);
-		LOG(INFO) << "K[" << 2 * i + 1 << "] = \n" << K;
-		calib.setCameraParameters(2 * i + 1, K);
-
-		K = createCameraMatrix(camera_l);
-		LOG(INFO) << "K[" << 2 * i << "] = \n" << K;
-		calib.setCameraParameters(2 * i, K);
-	}
-
 	ftl::rgbd::Group group;
 	for (Source* src : sources) {
 		src->setChannel(Channel::Right);
@@ -442,6 +411,8 @@ void runCameraCalibration(	ftl::Configurable* root,
 		mutex.lock();
 		bool good = true;
 		for (size_t i = 0; i < frames.sources.size(); i ++) {
+			frames.frames[i].download(Channel::Left+Channel::Right);
+
 			if (frames.frames[i].get<cv::Mat>(Channel::Left).empty()) good = false;
 			if (frames.frames[i].get<cv::Mat>(Channel::Right).empty()) good = false;
 			if (frames.frames[i].get<cv::Mat>(Channel::Left).channels() != 3) good = false; // ASSERT
@@ -455,6 +426,43 @@ void runCameraCalibration(	ftl::Configurable* root,
 		mutex.unlock();
 		return true;
 	});
+
+	for (auto &source : sources) {
+		while (!source->isReady()) {
+			std::this_thread::sleep_for(std::chrono::milliseconds(100));
+		}
+	}
+
+	{
+		auto camera = sources[0]->parameters();
+		params.size = Size(camera.width, camera.height);
+		LOG(INFO) << "Camera resolution: " << params.size;
+	}
+
+	params.idx_cameras.resize(n_cameras);
+	std::iota(params.idx_cameras.begin(), params.idx_cameras.end(), 0);
+
+	// TODO: parameter for calibration target type
+	auto calib = MultiCameraCalibrationNew(	n_cameras, reference_camera,
+											params.size, CalibrationTarget(0.250)
+	);
+
+	for (size_t i = 0; i < n_sources; i++) {
+		auto camera_r = sources[i]->parameters(Channel::Right);
+		auto camera_l = sources[i]->parameters();
+
+		CHECK(params.size == Size(camera_r.width, camera_r.height));
+		CHECK(params.size == Size(camera_l.width, camera_l.height));
+		
+		Mat K;
+		K = createCameraMatrix(camera_r);
+		LOG(INFO) << "K[" << 2 * i + 1 << "] = \n" << K;
+		calib.setCameraParameters(2 * i + 1, K);
+
+		K = createCameraMatrix(camera_l);
+		LOG(INFO) << "K[" << 2 * i << "] = \n" << K;
+		calib.setCameraParameters(2 * i, K);
+	}
 	
 	int iter = 0;
 	Mat show;
diff --git a/applications/gui/src/camera.cpp b/applications/gui/src/camera.cpp
index debb8e73d5cf6f1f1bc1d685bb7298350014614c..ae0d5627a94a84afb819a9b1daf62b6bddacccae 100644
--- a/applications/gui/src/camera.cpp
+++ b/applications/gui/src/camera.cpp
@@ -293,6 +293,7 @@ void ftl::gui::Camera::setChannel(Channel c) {
 	case Channel::Flow:
 	case Channel::Confidence:
 	case Channel::Normals:
+	case Channel::ColourNormals:
 	case Channel::Right:
 		src_->setChannel(c);
 		break;
@@ -464,7 +465,7 @@ const GLTexture &ftl::gui::Camera::captureFrame() {
 				break;
 
 		//case Channel::Flow:
-		case Channel::Normals:
+		case Channel::ColourNormals:
 		case Channel::Right:
 				if (im2_.rows == 0 || im2_.type() != CV_8UC3) { break; }
 				texture2_.update(im2_);
diff --git a/applications/gui/src/media_panel.cpp b/applications/gui/src/media_panel.cpp
index b3ea1a6afbbc8f2a31bf294e85a85564fdffe1cb..800940199e2488ca1b11bd723498059efd63d374 100644
--- a/applications/gui/src/media_panel.cpp
+++ b/applications/gui/src/media_panel.cpp
@@ -192,7 +192,7 @@ MediaPanel::MediaPanel(ftl::gui::Screen *screen) : nanogui::Window(screen, ""),
 	button->setCallback([this]() {
 		ftl::gui::Camera *cam = screen_->activeCamera();
 		if (cam) {
-			cam->setChannel(Channel::Normals);
+			cam->setChannel(Channel::ColourNormals);
 		}
 	});
 
diff --git a/applications/gui/src/screen.cpp b/applications/gui/src/screen.cpp
index a9f4cc023265ba427a0be451ee3337fe834528bc..c359e227d9be39ab98df2cd8e816323b6a0ecbef 100644
--- a/applications/gui/src/screen.cpp
+++ b/applications/gui/src/screen.cpp
@@ -48,11 +48,13 @@ namespace {
 
 	constexpr char const *const defaultImageViewFragmentShader =
 		R"(#version 330
-		uniform sampler2D image;
+		uniform sampler2D image1;
+		uniform sampler2D image2;
+		uniform float blendAmount;
 		out vec4 color;
 		in vec2 uv;
 		void main() {
-			color = texture(image, uv);
+			color = blendAmount * texture(image1, uv) + (1.0 - blendAmount) * texture(image2, uv);
 		})";
 }
 
@@ -70,6 +72,20 @@ ftl::gui::Screen::Screen(ftl::Configurable *proot, ftl::net::Universe *pnet, ftl
 	has_vr_ = vr::VR_IsHmdPresent();
 	#endif
 
+	zoom_ = root_->value("zoom", 1.0f);
+	root_->on("zoom", [this](const ftl::config::Event &e) {
+		zoom_ = root_->value("zoom", 1.0f);
+	});
+
+	pos_x_ = root_->value("position_x", 0.0f);
+	root_->on("position_x", [this](const ftl::config::Event &e) {
+		pos_x_ = root_->value("position_x", 0.0f);
+	});
+	pos_y_ = root_->value("position_y", 0.0f);
+	root_->on("position_y", [this](const ftl::config::Event &e) {
+		pos_y_ = root_->value("position_y", 0.0f);
+	});
+
 	setSize(Vector2i(1280,720));
 
 	toolbuttheme = new Theme(*theme());
@@ -352,11 +368,27 @@ void ftl::gui::Screen::setActiveCamera(ftl::gui::Camera *cam) {
 	}
 }
 
+bool ftl::gui::Screen::scrollEvent(const Eigen::Vector2i &p, const Eigen::Vector2f &rel) {
+	if (nanogui::Screen::scrollEvent(p, rel)) {
+		return true;
+	} else {
+		zoom_ += zoom_ * 0.1f * rel[1];
+		return true;
+	}
+}
+
 bool ftl::gui::Screen::mouseMotionEvent(const Eigen::Vector2i &p, const Eigen::Vector2i &rel, int button, int modifiers) {
 	if (nanogui::Screen::mouseMotionEvent(p, rel, button, modifiers)) {
 		return true;
 	} else {
-		if (camera_) camera_->mouseMovement(rel[0], rel[1], button);
+		if (camera_) {
+			if (button == 1) {
+				camera_->mouseMovement(rel[0], rel[1], button);
+			} else if (button == 2) {
+				pos_x_ += rel[0];
+				pos_y_ += -rel[1];
+			}
+		}
 	}
 	return true; // TODO: return statement was missing; is true correct?
 }
@@ -365,7 +397,10 @@ bool ftl::gui::Screen::mouseButtonEvent(const nanogui::Vector2i &p, int button,
 	if (nanogui::Screen::mouseButtonEvent(p, button, down, modifiers)) {
 		return true;
 	} else {
-		if (camera_ && down) {
+		if (!camera_) return false;
+		
+		//ol movable = camera_->source()->hasCapabilities(ftl::rgbd::kCapMovable);
+		if (button == 0 && down) {
 			Eigen::Vector2f screenSize = size().cast<float>();
 			auto mScale = (screenSize.cwiseQuotient(imageSize).minCoeff());
 			Eigen::Vector2f scaleFactor = mScale * imageSize.cwiseQuotient(screenSize);
@@ -389,8 +424,10 @@ bool ftl::gui::Screen::mouseButtonEvent(const nanogui::Vector2i &p, int button,
 			//lookPoint_ = Eigen::Vector3f(worldPos[0],worldPos[1],worldPos[2]);
 			//LOG(INFO) << "Depth at click = " << -camPos[2];
 			return true;
+		} else {
+			
 		}
-	return false;
+		return false;
 	}
 }
 
@@ -444,9 +481,9 @@ void ftl::gui::Screen::draw(NVGcontext *ctx) {
 		#endif
 
 		if (mImageID < std::numeric_limits<unsigned int>::max() && imageSize[0] > 0) {
-			auto mScale = (screenSize.cwiseQuotient(imageSize).minCoeff());
+			auto mScale = (screenSize.cwiseQuotient(imageSize).minCoeff()) * zoom_;
 			Vector2f scaleFactor = mScale * imageSize.cwiseQuotient(screenSize);
-			Vector2f positionInScreen(0.0f, 0.0f);
+			Vector2f positionInScreen(pos_x_, pos_y_);
 			auto mOffset = (screenSize - (screenSize.cwiseProduct(scaleFactor))) / 2;
 			Vector2f positionAfterOffset = positionInScreen + mOffset;
 			Vector2f imagePosition = positionAfterOffset.cwiseQuotient(screenSize);
@@ -457,8 +494,12 @@ void ftl::gui::Screen::draw(NVGcontext *ctx) {
 					size().x() * r, size().y() * r);*/
 			mShader.bind();
 			glActiveTexture(GL_TEXTURE0);
-			glBindTexture(GL_TEXTURE_2D, mImageID);
-			mShader.setUniform("image", 0);
+			glBindTexture(GL_TEXTURE_2D, leftEye_);
+			glActiveTexture(GL_TEXTURE1);
+			glBindTexture(GL_TEXTURE_2D, (camera_->getRight().isValid()) ? rightEye_ : leftEye_);
+			mShader.setUniform("image1", 0);
+			mShader.setUniform("image2", 1);
+			mShader.setUniform("blendAmount", (camera_->getChannel() != ftl::codecs::Channel::Left) ? root_->value("blending", 0.5f) : 1.0f);
 			mShader.setUniform("scaleFactor", scaleFactor);
 			mShader.setUniform("position", imagePosition);
 			mShader.drawIndexed(GL_TRIANGLES, 0, 2);
diff --git a/applications/gui/src/screen.hpp b/applications/gui/src/screen.hpp
index a0ac39d064875bdc5be81e526a95ac9269617c25..a195f4ccdd45500c534039709ba6762fdd640b22 100644
--- a/applications/gui/src/screen.hpp
+++ b/applications/gui/src/screen.hpp
@@ -29,6 +29,7 @@ class Screen : public nanogui::Screen {
 	~Screen();
 
 	bool mouseMotionEvent(const Eigen::Vector2i &p, const Eigen::Vector2i &rel, int button, int modifiers);
+	bool scrollEvent(const Eigen::Vector2i &p, const Eigen::Vector2f &rel);
 	bool mouseButtonEvent(const nanogui::Vector2i &p, int button, bool down, int modifiers);
 	bool keyboardEvent(int key, int scancode, int action, int modifiers);
 
@@ -92,6 +93,9 @@ class Screen : public nanogui::Screen {
 	ftl::Configurable *root_;
 	std::string status_;
 	ftl::gui::Camera *camera_;
+	float zoom_;
+	float pos_x_;
+	float pos_y_;
 
 	GLuint leftEye_;
 	GLuint rightEye_;
diff --git a/applications/reconstruct/src/ilw/correspondence.cu b/applications/reconstruct/src/ilw/correspondence.cu
index 8fb9d94b75df5a5d2687fa81da01d999f2bcfeeb..9fb440ae41bf6e1515d72759207154fa8224a588 100644
--- a/applications/reconstruct/src/ilw/correspondence.cu
+++ b/applications/reconstruct/src/ilw/correspondence.cu
@@ -65,7 +65,7 @@ __global__ void correspondence_energy_vector_kernel(
 	
 	const float3 world1 = pose1 * cam1.screenToCam(x,y,depth1);
 
-    const uchar4 colour1 = c1.tex2D(x, y);
+    const auto colour1 = c1.tex2D((float)x+0.5f, (float)y+0.5f);
 
 	float bestdepth = 0.0f;
 	float bestweight = 0.0f;
@@ -91,17 +91,17 @@ __global__ void correspondence_energy_vector_kernel(
         // Calculate adjusted depth 3D point in camera 2 space
         const float3 worldPos = world1 + j * rayStep_world; //(pose1 * cam1.screenToCam(x, y, depth_adjust));
         const float3 camPos = rayStart_2 + j * rayStep_2; //pose2 * worldPos;
-        const uint2 screen = cam2.camToScreen<uint2>(camPos);
+        const float2 screen = cam2.camToScreen<float2>(camPos);
 
         if (screen.x >= cam2.width || screen.y >= cam2.height) continue;
 
 		// Generate a depth correspondence value
-		const float depth2 = d2.tex2D((int)screen.x, (int)screen.y);
+		const float depth2 = d2.tex2D(int(screen.x+0.5f), int(screen.y+0.5f));
 		const float dweight = ftl::cuda::weighting(fabs(depth2 - camPos.z), params.spatial_smooth);
 		//const float dweight = ftl::cuda::weighting(fabs(depth_adjust - depth1), 2.0f*params.range);
 		
 		// Generate a colour correspondence value
-		const uchar4 colour2 = c2.tex2D((int)screen.x, (int)screen.y);
+		const auto colour2 = c2.tex2D(screen.x, screen.y);
 		const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth);
 
 		const float weight = weightFunction<FUNCTION>(params, dweight, cweight);
diff --git a/applications/reconstruct/src/ilw/ilw.cpp b/applications/reconstruct/src/ilw/ilw.cpp
index dfa43267ed5d3ac09309a466b150c0498fb72967..a686f35c337573a04415b0f13a598ec8a3538057 100644
--- a/applications/reconstruct/src/ilw/ilw.cpp
+++ b/applications/reconstruct/src/ilw/ilw.cpp
@@ -25,7 +25,7 @@ static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) {
 }
 
 ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) {
-    enabled_ = value("ilw_align", true);
+    enabled_ = value("ilw_align", false);
     iterations_ = value("iterations", 4);
     motion_rate_ = value("motion_rate", 0.8f);
     motion_window_ = value("motion_window", 3);
@@ -42,7 +42,7 @@ ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) {
     });
 
 	on("ilw_align", [this](const ftl::config::Event &e) {
-        enabled_ = value("ilw_align", true);
+        enabled_ = value("ilw_align", false);
     });
 
     on("iterations", [this](const ftl::config::Event &e) {
@@ -215,11 +215,11 @@ bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
 
         f.createTexture<float>(Channel::Depth2, Format<float>(f.get<GpuMat>(Channel::Colour).size()));
         f.createTexture<float>(Channel::Confidence, Format<float>(f.get<GpuMat>(Channel::Colour).size()));
-		f.createTexture<int>(Channel::Mask, Format<int>(f.get<GpuMat>(Channel::Colour).size()));
+		//f.createTexture<int>(Channel::Mask, Format<int>(f.get<GpuMat>(Channel::Colour).size()));
         f.createTexture<uchar4>(Channel::Colour);
 		f.createTexture<float>(Channel::Depth);
 
-		f.get<GpuMat>(Channel::Mask).setTo(cv::Scalar(0));
+		//f.get<GpuMat>(Channel::Mask).setTo(cv::Scalar(0));
     }
 
 	//cudaSafeCall(cudaStreamSynchronize(stream_));
@@ -232,7 +232,7 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream) {
     cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
 
     // Find discontinuity mask
-    for (size_t i=0; i<fs.frames.size(); ++i) {
+    /*for (size_t i=0; i<fs.frames.size(); ++i) {
         auto &f = fs.frames[i];
         auto s = fs.sources[i];
 
@@ -243,7 +243,7 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream) {
             discon_mask_,
             stream
         );
-    }
+    }*/
 
 	// First do any preprocessing
 	if (fill_depth_) {
diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp
index bcbe3e52e655e600ea6e654fbeddf6248221e3b4..0603dad4028d69223b0ce5ddc31d36eafc900c14 100644
--- a/applications/reconstruct/src/main.cpp
+++ b/applications/reconstruct/src/main.cpp
@@ -33,6 +33,12 @@
 #include <ftl/operators/smoothing.hpp>
 #include <ftl/operators/colours.hpp>
 #include <ftl/operators/normals.hpp>
+#include <ftl/operators/filling.hpp>
+#include <ftl/operators/segmentation.hpp>
+#include <ftl/operators/mask.hpp>
+#include <ftl/operators/antialiasing.hpp>
+#include <ftl/operators/mvmls.hpp>
+#include <ftl/operators/clipping.hpp>
 
 #include <ftl/cuda/normals.hpp>
 #include <ftl/registration.hpp>
@@ -62,15 +68,64 @@ using ftl::registration::loadTransformations;
 using ftl::registration::saveTransformations;
 
 static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) {
-  Eigen::Affine3d rx =
-      Eigen::Affine3d(Eigen::AngleAxisd(ax, Eigen::Vector3d(1, 0, 0)));
-  Eigen::Affine3d ry =
-      Eigen::Affine3d(Eigen::AngleAxisd(ay, Eigen::Vector3d(0, 1, 0)));
-  Eigen::Affine3d rz =
-      Eigen::Affine3d(Eigen::AngleAxisd(az, Eigen::Vector3d(0, 0, 1)));
-  return rz * rx * ry;
+	Eigen::Affine3d rx =
+		Eigen::Affine3d(Eigen::AngleAxisd(ax, Eigen::Vector3d(1, 0, 0)));
+	Eigen::Affine3d ry =
+		Eigen::Affine3d(Eigen::AngleAxisd(ay, Eigen::Vector3d(0, 1, 0)));
+	Eigen::Affine3d rz =
+		Eigen::Affine3d(Eigen::AngleAxisd(az, Eigen::Vector3d(0, 0, 1)));
+	return rz * rx * ry;
 }
 
+// TODO:	*	Remove this class (requires more general solution). Also does not
+//				process disconnections/reconnections/types etc. correctly.
+//			*	Update when new options become available.
+
+class ConfigProxy {
+	private:
+	vector<ftl::UUID> peers_;
+	vector<std::string> uris_;
+	ftl::net::Universe *net_;
+	
+	public:
+	ConfigProxy(ftl::net::Universe *net) {
+		net_ = net;
+
+		auto response = net_->findAll<std::string>("node_details");
+		for (auto &r : response) {
+			auto r_json = json_t::parse(r);
+			peers_.push_back(ftl::UUID(r_json["id"].get<std::string>()));
+			uris_.push_back(r_json["title"].get<std::string>());
+		}
+	}
+
+	void add(ftl::Configurable *root, const std::string &uri, const std::string &name) {
+		auto config = json_t::parse(net_->call<string>(peers_[0], "get_cfg", uris_[0] + "/" + uri));
+		auto *proxy = ftl::create<ftl::Configurable>(root, name);
+		
+		try {
+			for (auto &itm : config.get<json::object_t>()) {
+				auto key = itm.first;
+				auto value = itm.second;
+				if (*key.begin() == '$') { continue; }
+
+				proxy->set(key, value);
+				proxy->on(key, [this, uri, key, value, proxy](const ftl::config::Event&) {
+					for (size_t i = 0; i < uris_.size(); i++) {
+						// TODO: check that config exists?
+						auto peer = peers_[i];
+						std::string name = uris_[i] + "/" + uri + "/" + key;
+						net_->send(peer, "update_cfg", name, proxy->getConfig()[key].dump());
+					}
+				});
+			}
+		}
+		catch (nlohmann::detail::type_error) {
+			LOG(ERROR) << "Failed to add config proxy for: " << uri << "/" << name;
+		}
+	}
+};
+
 static void run(ftl::Configurable *root) {
 	Universe *net = ftl::create<Universe>(root, "net");
 	ftl::ctrl::Slave slave(net, root);
@@ -80,7 +135,7 @@ static void run(ftl::Configurable *root) {
 	
 	net->start();
 	net->waitConnections();
-
+	
 	// Check paths for an FTL file to load...
 	auto paths = (*root->get<nlohmann::json>("paths"));
 	for (auto &x : paths.items()) {
@@ -120,6 +175,17 @@ static void run(ftl::Configurable *root) {
 		LOG(ERROR) << "No sources configured!";
 		return;
 	}
+	
+	ConfigProxy *configproxy = nullptr;
+	if (net->numberOfPeers() > 0) {
+		configproxy = new ConfigProxy(net); // TODO delete
+		auto *disparity = ftl::create<ftl::Configurable>(root, "disparity");
+		configproxy->add(disparity, "source/disparity/algorithm", "algorithm");
+		configproxy->add(disparity, "source/disparity/bilateral_filter", "bilateral_filter");
+		configproxy->add(disparity, "source/disparity/optflow_filter", "optflow_filter");
+		configproxy->add(disparity, "source/disparity/mls", "mls");
+		configproxy->add(disparity, "source/disparity/cross", "cross");
+	}
 
 	// Create scene transform, intended for axis aligning the walls and floor
 	Eigen::Matrix4d transform;
@@ -182,8 +248,12 @@ static void run(ftl::Configurable *root) {
 	int o = root->value("origin_pose", 0) % sources.size();
 	virt->setPose(sources[o]->getPose());
 
+	auto *renderpipe = ftl::config::create<ftl::operators::Graph>(root, "render_pipe");
+	renderpipe->append<ftl::operators::ColourChannels>("colour");  // Generate interpolation texture...
+	renderpipe->append<ftl::operators::FXAA>("antialiasing"); 
+
 	// Generate virtual camera render when requested by streamer
-	virt->onRender([splat,virt,&scene_B,align](ftl::rgbd::Frame &out) {
+	virt->onRender([splat,virt,&scene_B,align,renderpipe](ftl::rgbd::Frame &out) {
 		//virt->setTimestamp(scene_B.timestamp);
 		// Do we need to convert Lab to BGR?
 		if (align->isLabColour()) {
@@ -193,6 +263,7 @@ static void run(ftl::Configurable *root) {
 			}
 		}
 		splat->render(virt, out);
+		renderpipe->apply(out, out, virt, 0);
 	});
 	stream->add(virt);
 
@@ -229,7 +300,7 @@ static void run(ftl::Configurable *root) {
 
 			// TODO: Write pose+calibration+config packets
 			auto sources = group->sources();
-			for (int i=0; i<sources.size(); ++i) {
+			for (size_t i=0; i<sources.size(); ++i) {
 				//writeSourceProperties(writer, i, sources[i]);
 				sources[i]->inject(Channel::Calibration, sources[i]->parameters(), Channel::Left, sources[i]->getCapabilities());
 				sources[i]->inject(sources[i]->getPose()); 
@@ -251,11 +322,18 @@ static void run(ftl::Configurable *root) {
 
 	// Create the source depth map pipeline
 	auto *pipeline1 = ftl::config::create<ftl::operators::Graph>(root, "pre_filters");
+	pipeline1->append<ftl::operators::ClipScene>("clipping");
 	pipeline1->append<ftl::operators::ColourChannels>("colour");  // Convert BGR to BGRA
-	pipeline1->append<ftl::operators::HFSmoother>("hfnoise");  // Remove high-frequency noise
+	//pipeline1->append<ftl::operators::HFSmoother>("hfnoise");  // Remove high-frequency noise
 	pipeline1->append<ftl::operators::Normals>("normals");  // Estimate surface normals
-	pipeline1->append<ftl::operators::SmoothChannel>("smoothing");  // Generate a smoothing channel
-	pipeline1->append<ftl::operators::AdaptiveMLS>("mls");  // Perform MLS (using smoothing channel)
+	//pipeline1->append<ftl::operators::SmoothChannel>("smoothing");  // Generate a smoothing channel
+	//pipeline1->append<ftl::operators::ScanFieldFill>("filling");  // Generate a smoothing channel
+	pipeline1->append<ftl::operators::CrossSupport>("cross");
+	pipeline1->append<ftl::operators::DiscontinuityMask>("discontinuity");
+	pipeline1->append<ftl::operators::CullDiscontinuity>("remove_discontinuity");
+	//pipeline1->append<ftl::operators::AggreMLS>("mls");  // Perform MLS (using smoothing channel)
+	pipeline1->append<ftl::operators::VisCrossSupport>("viscross")->set("enabled", false);
+	pipeline1->append<ftl::operators::MultiViewMLS>("mvmls");
 	// Alignment
 
 
diff --git a/applications/vision/src/main.cpp b/applications/vision/src/main.cpp
index ba07e90424db91c5cc36e18d0aa40a68109bddbd..f99754cbf558bb3d0f53fde37763855fac7af128 100644
--- a/applications/vision/src/main.cpp
+++ b/applications/vision/src/main.cpp
@@ -48,32 +48,6 @@ using std::chrono::milliseconds;
 using cv::Mat;
 using json = nlohmann::json;
 
-namespace ftl {
-void disparityToDepth(const cv::Mat &disparity, cv::Mat &depth, const cv::Mat &q) {
-	cv::Matx44d _Q;
-    q.convertTo(_Q, CV_64F);
-
-	if (depth.empty()) depth = cv::Mat(disparity.size(), CV_32F);
-
-	for( int y = 0; y < disparity.rows; y++ ) {
-		const float *sptr = disparity.ptr<float>(y);
-		float *dptr = depth.ptr<float>(y);
-
-		for( int x = 0; x < disparity.cols; x++ ) {
-			double d = sptr[x];
-			cv::Vec4d homg_pt = _Q*cv::Vec4d(x, y, d, 1.0);
-			//dptr[x] = Vec3d(homg_pt.val);
-			//dptr[x] /= homg_pt[3];
-			dptr[x] = (homg_pt[2] / homg_pt[3]); // Depth in meters
-
-			if( fabs(d) <= FLT_EPSILON )
-				dptr[x] = 1000.0f;
-		}
-	}
-}
-};
-
-
 static void run(ftl::Configurable *root) {
 	Universe *net = ftl::create<Universe>(root, "net");
 	ftl::ctrl::Slave slave(net, root);
@@ -107,6 +81,8 @@ static void run(ftl::Configurable *root) {
 		stream->run(true);
 	//}
 
+	ftl::timer::start(true);
+
 	LOG(INFO) << "Stopping...";
 	slave.stop();
 	stream->stop();
diff --git a/components/codecs/include/ftl/codecs/channels.hpp b/components/codecs/include/ftl/codecs/channels.hpp
index 85cd9fffc45ddcda1b6cc13a160c74c30797b804..6673275fe7b4f874b0807c48e690e24b4aaddb51 100644
--- a/components/codecs/include/ftl/codecs/channels.hpp
+++ b/components/codecs/include/ftl/codecs/channels.hpp
@@ -19,7 +19,7 @@ enum struct Channel : int {
     Deviation		= 4,
     Screen          = 4,
     Normals			= 5,	// 32FC4
-    Points			= 6,	// 32FC4
+    Points			= 6,	// 32FC4 (should be deprecated)
     Confidence		= 7,	// 32F
     Contribution	= 7,	// 32F
     EnergyVector	= 8,	// 32FC4
@@ -28,9 +28,10 @@ enum struct Channel : int {
     Energy			= 10,	// 32F
 	Mask			= 11,	// 32U
 	Density			= 12,	// 32F
-    LeftGray		= 13,	// Deprecated
-    RightGray		= 14,	// Deprecated
-    Overlay1		= 15,	// Unused
+	Support1		= 13,	// 8UC4 (currently)
+	Support2		= 14,	// 8UC4 (currently)
+    Segmentation	= 15,	// 32S?
+    ColourNormals   = 16,   // 8UC4
 
 	AudioLeft		= 32,
 	AudioRight		= 33,
diff --git a/components/codecs/src/channels.cpp b/components/codecs/src/channels.cpp
index 25b06b533b5c95d16bd60365ccbcc4373e09bd45..effaf9eb261dfe460f2d6f5bf6f08849e4f93538 100644
--- a/components/codecs/src/channels.cpp
+++ b/components/codecs/src/channels.cpp
@@ -21,9 +21,9 @@ static ChannelInfo info[] = {
     "Energy", CV_32F,
 	"Mask", CV_32S,
 	"Density", CV_32F,
-    "LeftGray", CV_8U,
-    "RightGray", CV_8U,
-    "Overlay1", CV_8UC3,
+    "Support1", CV_8UC4,
+    "Support2", CV_8UC4,
+    "Segmentation", CV_32S,
 
 	"NoName", 0,
 	"NoName", 0,
diff --git a/components/common/cpp/include/ftl/configurable.hpp b/components/common/cpp/include/ftl/configurable.hpp
index 244b548285b92902e645436891b04acb182ffd08..219bd171edf4b7997cc35f97fa7787a0c4bb4559 100644
--- a/components/common/cpp/include/ftl/configurable.hpp
+++ b/components/common/cpp/include/ftl/configurable.hpp
@@ -104,7 +104,7 @@ class Configurable {
 	protected:
 	nlohmann::json *config_;
 
-	virtual void inject(const std::string name, nlohmann::json &value) {}
+	virtual void inject(const std::string &name, nlohmann::json &value) {}
 
 	private:
 	std::map<std::string, std::list<std::function<void(const config::Event&)>>> observers_; 
diff --git a/components/common/cpp/include/ftl/cuda_common.hpp b/components/common/cpp/include/ftl/cuda_common.hpp
index 116e26ec74ff4b469e8d1214cafa2897264f80ad..1b4f3a47afb000a9d1e05d264a5dd79a9d1c3e4a 100644
--- a/components/common/cpp/include/ftl/cuda_common.hpp
+++ b/components/common/cpp/include/ftl/cuda_common.hpp
@@ -28,6 +28,22 @@ bool hasCompute(int major, int minor);
 
 int deviceCount();
 
+template <typename T>
+struct Float;
+
+template <> struct Float<float> { typedef float type; };
+template <> struct Float<int> { typedef float type; };
+template <> struct Float<float4> { typedef float4 type; };
+template <> struct Float<uchar4> { typedef float4 type; };
+template <> struct Float<short2> { typedef float2 type; };
+
+template <typename T>
+struct ScaleValue;
+
+template <> struct ScaleValue<uchar4> { static constexpr float value = 255.0f; };
+template <> struct ScaleValue<float> { static constexpr float value = 1.0f; };
+template <> struct ScaleValue<float4> { static constexpr float value = 1.0f; };
+
 /**
  * Represent a CUDA texture object. Instances of this class can be used on both
  * host and device. A texture object base cannot be constructed directly, it
@@ -89,7 +105,7 @@ class TextureObject : public TextureObjectBase {
 	static_assert((16u % sizeof(T)) == 0, "Channel format must be aligned with 16 bytes");
 
 	__host__ __device__ TextureObject() : TextureObjectBase() {};
-	explicit TextureObject(const cv::cuda::GpuMat &d);
+	explicit TextureObject(const cv::cuda::GpuMat &d, bool interpolated=false);
 	explicit TextureObject(const cv::cuda::PtrStepSz<T> &d);
 	TextureObject(T *ptr, int pitch, int width, int height);
 	TextureObject(size_t width, size_t height);
@@ -110,7 +126,8 @@ class TextureObject : public TextureObjectBase {
 
 	#ifdef __CUDACC__
 	__device__ inline T tex2D(int u, int v) const { return ::tex2D<T>(texobj_, u, v); }
-	__device__ inline T tex2D(float u, float v) const { return ::tex2D<T>(texobj_, u, v); }
+	__device__ inline T tex2D(unsigned int u, unsigned int v) const { return ::tex2D<T>(texobj_, (int)u, (int)v); }
+	__device__ inline typename Float<T>::type tex2D(float u, float v) const { return ::tex2D<typename Float<T>::type>(texobj_, u, v) * ScaleValue<T>::value; }
 	#endif
 
 	__host__ __device__ inline const T &operator()(int u, int v) const { return reinterpret_cast<T*>(ptr_)[u+v*pitch2_]; }
@@ -137,7 +154,7 @@ TextureObject<T> &TextureObject<T>::cast(TextureObjectBase &b) {
  * Create a 2D array texture from an OpenCV GpuMat object.
  */
 template <typename T>
-TextureObject<T>::TextureObject(const cv::cuda::GpuMat &d) {
+TextureObject<T>::TextureObject(const cv::cuda::GpuMat &d, bool interpolated) {
 	// GpuMat must have correct data type
 	CHECK(d.type() == ftl::traits::OpenCVType<T>::value);
 
@@ -153,7 +170,8 @@ TextureObject<T>::TextureObject(const cv::cuda::GpuMat &d) {
 	cudaTextureDesc texDesc;
 	// cppcheck-suppress memsetClassFloat
 	memset(&texDesc, 0, sizeof(texDesc));
-	texDesc.readMode = cudaReadModeElementType;
+	texDesc.readMode = (interpolated) ? cudaReadModeNormalizedFloat : cudaReadModeElementType;
+	if (interpolated) texDesc.filterMode = cudaFilterModeLinear;
 
 	cudaTextureObject_t tex = 0;
 	cudaSafeCall(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL));
@@ -165,7 +183,7 @@ TextureObject<T>::TextureObject(const cv::cuda::GpuMat &d) {
 	height_ = d.rows;
 	needsfree_ = false;
 	cvType_ = ftl::traits::OpenCVType<T>::value;
-	//needsdestroy_ = true;
+	needsdestroy_ = true;
 }
 
 #endif  // __CUDACC__
@@ -188,6 +206,7 @@ TextureObject<T>::TextureObject(const cv::cuda::PtrStepSz<T> &d) {
 	// cppcheck-suppress memsetClassFloat
 	memset(&texDesc, 0, sizeof(texDesc));
 	texDesc.readMode = cudaReadModeElementType;
+	//if (std::is_same<T,uchar4>::value) texDesc.filterMode = cudaFilterModeLinear;
 
 	cudaTextureObject_t tex = 0;
 	cudaSafeCall(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL));
@@ -199,7 +218,7 @@ TextureObject<T>::TextureObject(const cv::cuda::PtrStepSz<T> &d) {
 	height_ = d.rows;
 	needsfree_ = false;
 	cvType_ = ftl::traits::OpenCVType<T>::value;
-	//needsdestroy_ = true;
+	needsdestroy_ = true;
 }
 
 /**
@@ -221,9 +240,10 @@ TextureObject<T>::TextureObject(T *ptr, int pitch, int width, int height) {
 	// cppcheck-suppress memsetClassFloat
 	memset(&texDesc, 0, sizeof(texDesc));
 	texDesc.readMode = cudaReadModeElementType;
+	//if (std::is_same<T,uchar4>::value) texDesc.filterMode = cudaFilterModeLinear;
 
 	cudaTextureObject_t tex = 0;
-	cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
+	cudaSafeCall(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL));
 	texobj_ = tex;
 	pitch_ = pitch;
 	pitch2_ = pitch_ / sizeof(T);
@@ -232,7 +252,7 @@ TextureObject<T>::TextureObject(T *ptr, int pitch, int width, int height) {
 	height_ = height;
 	needsfree_ = false;
 	cvType_ = ftl::traits::OpenCVType<T>::value;
-	//needsdestroy_ = true;
+	needsdestroy_ = true;
 }
 
 template <typename T>
@@ -255,7 +275,8 @@ TextureObject<T>::TextureObject(size_t width, size_t height) {
 		// cppcheck-suppress memsetClassFloat
 		memset(&texDesc, 0, sizeof(texDesc));
 		texDesc.readMode = cudaReadModeElementType;
-		cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
+		//if (std::is_same<T,uchar4>::value) texDesc.filterMode = cudaFilterModeLinear;
+		cudaSafeCall(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL));
 	//}
 
 	texobj_ = tex;
@@ -264,7 +285,7 @@ TextureObject<T>::TextureObject(size_t width, size_t height) {
 	needsfree_ = true;
 	pitch2_ = pitch_ / sizeof(T);
 	cvType_ = ftl::traits::OpenCVType<T>::value;
-	//needsdestroy_ = true;
+	needsdestroy_ = true;
 }
 
 #ifndef __CUDACC__
@@ -291,6 +312,7 @@ TextureObject<T>::TextureObject(const TextureObject<T> &p) {
 	pitch2_ = pitch_ / sizeof(T);
 	cvType_ = ftl::traits::OpenCVType<T>::value;
 	needsfree_ = false;
+	needsdestroy_ = false;
 }
 
 template <typename T>
@@ -302,14 +324,17 @@ TextureObject<T>::TextureObject(TextureObject<T> &&p) {
 	pitch_ = p.pitch_;
 	pitch2_ = pitch_ / sizeof(T);
 	needsfree_ = p.needsfree_;
+	needsdestroy_ = p.needsdestroy_;
 	p.texobj_ = 0;
 	p.needsfree_ = false;
+	p.needsdestroy_ = false;
 	p.ptr_ = nullptr;
 	cvType_ = ftl::traits::OpenCVType<T>::value;
 }
 
 template <typename T>
 TextureObject<T> &TextureObject<T>::operator=(const TextureObject<T> &p) {
+	free();
 	texobj_ = p.texobj_;
 	ptr_ = p.ptr_;
 	width_ = p.width_;
@@ -318,11 +343,13 @@ TextureObject<T> &TextureObject<T>::operator=(const TextureObject<T> &p) {
 	pitch2_ = pitch_ / sizeof(T);
 	cvType_ = ftl::traits::OpenCVType<T>::value;
 	needsfree_ = false;
+	needsdestroy_ = false;
 	return *this;
 }
 
 template <typename T>
 TextureObject<T> &TextureObject<T>::operator=(TextureObject<T> &&p) {
+	free();
 	texobj_ = p.texobj_;
 	ptr_ = p.ptr_;
 	width_ = p.width_;
@@ -330,8 +357,10 @@ TextureObject<T> &TextureObject<T>::operator=(TextureObject<T> &&p) {
 	pitch_ = p.pitch_;
 	pitch2_ = pitch_ / sizeof(T);
 	needsfree_ = p.needsfree_;
+	needsdestroy_ = p.needsdestroy_;
 	p.texobj_ = 0;
 	p.needsfree_ = false;
+	p.needsdestroy_ = false;
 	p.ptr_ = nullptr;
 	cvType_ = ftl::traits::OpenCVType<T>::value;
 	return *this;
diff --git a/components/common/cpp/src/configuration.cpp b/components/common/cpp/src/configuration.cpp
index bd9d9feec20538cebc210cafba72a98f651b243c..89d539cbec5270b7d7a5bc57a0a113eb57456553 100644
--- a/components/common/cpp/src/configuration.cpp
+++ b/components/common/cpp/src/configuration.cpp
@@ -290,7 +290,8 @@ json_t &ftl::config::resolve(const std::string &puri, bool eager) {
 		std::string u = uri.getBaseURI();
 		auto ix = config_index.find(u);
 		if (ix == config_index.end()) {
-			LOG(FATAL) << "Cannot find resource: " << u;
+			LOG(WARNING) << "Cannot find resource: " << u;
+			return null_json;
 		}
 
 		auto ptr = nlohmann::json::json_pointer("/"+uri.getFragment());
diff --git a/components/common/cpp/src/timer.cpp b/components/common/cpp/src/timer.cpp
index 328006ab82041d9613ab9ef7847d74727e3aa7f2..f13255e7359747406feb5c3d7da3b1ba779c617f 100644
--- a/components/common/cpp/src/timer.cpp
+++ b/components/common/cpp/src/timer.cpp
@@ -169,6 +169,8 @@ namespace ftl {
 	extern bool running;
 }
 
+//static MUTEX mtx;
+
 void ftl::timer::start(bool block) {
 	if (active) return;
 	active = true;
diff --git a/components/net/cpp/include/ftl/net_configurable.hpp b/components/net/cpp/include/ftl/net_configurable.hpp
index 3a31ecc27675f2e37b72bc42ee83da0dee5b7e4c..bdd21c4700e7664cff996672585054c30b6e8e6a 100644
--- a/components/net/cpp/include/ftl/net_configurable.hpp
+++ b/components/net/cpp/include/ftl/net_configurable.hpp
@@ -13,7 +13,7 @@ namespace ftl {
 	~NetConfigurable();
 
 	protected:
-	void inject(const std::string name, nlohmann::json &value);
+	void inject(const std::string &name, nlohmann::json &value);
 
     private:
 	ftl::UUID peer;
diff --git a/components/net/cpp/src/net_configurable.cpp b/components/net/cpp/src/net_configurable.cpp
index edcec65b8c4ba452472e30efd29b672b7a6e2c55..be98cf7edbb221c61e9732a9e59d8546f1185644 100644
--- a/components/net/cpp/src/net_configurable.cpp
+++ b/components/net/cpp/src/net_configurable.cpp
@@ -7,6 +7,6 @@ ftl::NetConfigurable::NetConfigurable(ftl::UUID peer, const std::string &suri, f
 
 ftl::NetConfigurable::~NetConfigurable(){}
 
-void ftl::NetConfigurable::inject(const std::string name, nlohmann::json &value) {
+void ftl::NetConfigurable::inject(const std::string &name, nlohmann::json &value) {
     ctrl.set(peer, suri + std::string("/") + name, value);
 }
diff --git a/components/operators/CMakeLists.txt b/components/operators/CMakeLists.txt
index c93a87f6693334c30e28039387db45554ebdb0d2..b025a39b357a628ee84d2f6d3d7b82c8aa6fcc3e 100644
--- a/components/operators/CMakeLists.txt
+++ b/components/operators/CMakeLists.txt
@@ -1,13 +1,41 @@
-add_library(ftloperators
-    src/smoothing.cpp
+set(OPERSRC
+	src/smoothing.cpp
 	src/smoothing.cu
 	src/mls.cu
 	src/smoothchan.cu
 	src/operator.cpp
 	src/colours.cpp
 	src/normals.cpp
+	src/filling.cpp
+	src/filling.cu
+	src/disparity/disp2depth.cu
+	src/disparity/disparity_to_depth.cpp
+	src/disparity/bilateral_filter.cpp
+	src/segmentation.cu
+	src/segmentation.cpp
+	src/mask.cu
+	src/mask.cpp
+	src/antialiasing.cpp
+	src/antialiasing.cu
+	src/mvmls.cpp
+	src/correspondence.cu
+	src/clipping.cpp
 )
 
+
+if (LIBSGM_FOUND)
+	list(APPEND OPERSRC src/disparity/fixstars_sgm.cpp)
+endif (LIBSGM_FOUND)
+
+if (HAVE_OPTFLOW)
+	list(APPEND OPERSRC
+		src/nvopticalflow.cpp
+		src/disparity/optflow_smoothing.cu
+		src/disparity/optflow_smoothing.cpp)
+endif()
+
+add_library(ftloperators ${OPERSRC})
+
 # These cause errors in CI build and are being removed from PCL in newer versions
 # target_compile_options(ftlrender PUBLIC ${PCL_DEFINITIONS})
 
diff --git a/components/operators/include/ftl/operators/antialiasing.hpp b/components/operators/include/ftl/operators/antialiasing.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..302631253f0d1aab6e758cf48c0af213bcf3f002
--- /dev/null
+++ b/components/operators/include/ftl/operators/antialiasing.hpp
@@ -0,0 +1,27 @@
+#ifndef _FTL_OPERATORS_ANTIALIASING_HPP_
+#define _FTL_OPERATORS_ANTIALIASING_HPP_
+
+#include <ftl/operators/operator.hpp>
+#include <ftl/cuda_common.hpp>
+
+namespace ftl {
+namespace operators {
+
+/**
+ * Fast Approximate Anti-Aliasing by NVIDIA (2010)
+ */
+class FXAA : public ftl::operators::Operator {
+	public:
+    explicit FXAA(ftl::Configurable*);
+    ~FXAA();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+};
+
+}
+}
+
+#endif  // _FTL_OPERATORS_ANTIALIASING_HPP_
diff --git a/components/operators/include/ftl/operators/clipping.hpp b/components/operators/include/ftl/operators/clipping.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..590e714c9eeca9713e56139aa874f7c0db0a472d
--- /dev/null
+++ b/components/operators/include/ftl/operators/clipping.hpp
@@ -0,0 +1,27 @@
+#ifndef _FTL_OPERATORS_CLIPPING_HPP_
+#define _FTL_OPERATORS_CLIPPING_HPP_
+
+#include <ftl/operators/operator.hpp>
+#include <ftl/cuda_common.hpp>
+
+namespace ftl {
+namespace operators {
+
+/**
+ * Calculate rough normals from local depth gradients.
+ */
+class ClipScene : public ftl::operators::Operator {
+	public:
+    explicit ClipScene(ftl::Configurable*);
+    ~ClipScene();
+
+	inline Operator::Type type() const override { return Operator::Type::ManyToMany; }
+
+    bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) override;
+
+};
+
+}
+}
+
+#endif  // _FTL_OPERATORS_CLIPPING_HPP_
diff --git a/components/operators/include/ftl/operators/disparity.hpp b/components/operators/include/ftl/operators/disparity.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..0c26c904fbfa8e2dce0d209347b612f6bb719b63
--- /dev/null
+++ b/components/operators/include/ftl/operators/disparity.hpp
@@ -0,0 +1,105 @@
+#pragma once
+
+#include <ftl/config.h>
+#include <ftl/operators/operator.hpp>
+
+#ifdef HAVE_OPTFLOW
+#include <opencv2/cudaoptflow.hpp>
+#endif
+
+#ifdef HAVE_LIBSGM
+#include <libsgm.h>
+#endif
+
+namespace ftl {
+namespace operators {
+
+#ifdef HAVE_LIBSGM
+/*
+ * FixstarsSGM https://github.com/fixstars/libSGM
+ * 
+ * Requires modified version https://gitlab.utu.fi/joseha/libsgm
+ */
+class FixstarsSGM : public ftl::operators::Operator {
+	public:
+	explicit FixstarsSGM(ftl::Configurable* cfg);
+
+	~FixstarsSGM();
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+	
+	private:
+	bool init();
+	bool updateParameters();
+	
+	sgm::StereoSGM *ssgm_;
+	cv::Size size_;
+	cv::cuda::GpuMat lbw_;
+	cv::cuda::GpuMat rbw_;
+	cv::cuda::GpuMat disp_int_;
+
+	int P1_;
+	int P2_;
+	int max_disp_;
+	float uniqueness_;
+};
+#endif
+
+class DisparityBilateralFilter : public::ftl::operators::Operator {
+	public:
+	explicit DisparityBilateralFilter(ftl::Configurable*);
+
+	~DisparityBilateralFilter() {};
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+	private:
+	cv::Ptr<cv::cuda::DisparityBilateralFilter> filter_;
+	cv::cuda::GpuMat disp_int_;
+	cv::cuda::GpuMat disp_int_result_;
+	double scale_;
+	int radius_;
+	int iter_;
+	int n_disp_;
+};
+
+/*
+ * Calculate depth from disparity
+ */
+class DisparityToDepth : public ftl::operators::Operator {
+	public:
+	explicit DisparityToDepth(ftl::Configurable* cfg) :
+		ftl::operators::Operator(cfg) {}
+
+	~DisparityToDepth() {};
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+};
+
+/*
+ * Optical flow smoothing for depth
+ */
+#ifdef HAVE_OPTFLOW
+class OpticalFlowTemporalSmoothing : public ftl::operators::Operator {
+	public:
+	explicit OpticalFlowTemporalSmoothing(ftl::Configurable*);
+	~OpticalFlowTemporalSmoothing();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+	private:
+	bool init();
+
+	const ftl::codecs::Channel channel_ = ftl::codecs::Channel::Disparity;
+	cv::cuda::GpuMat history_;
+	cv::Size size_;
+	int n_max_;
+	float threshold_;
+};
+#endif
+
+}
+}
diff --git a/components/operators/include/ftl/operators/filling.hpp b/components/operators/include/ftl/operators/filling.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..2825dde5f6840658d7c49dd374792e80952e9391
--- /dev/null
+++ b/components/operators/include/ftl/operators/filling.hpp
@@ -0,0 +1,38 @@
+#ifndef _FTL_OPERATORS_FILLING_HPP_
+#define _FTL_OPERATORS_FILLING_HPP_
+
+#include <ftl/operators/operator.hpp>
+#include <ftl/cuda_common.hpp>
+
+namespace ftl {
+namespace operators {
+
+/**
+ * Use colour distance field to scanline correct depth edges and fill holes.
+ */
+class ScanFieldFill : public ftl::operators::Operator {
+	public:
+    explicit ScanFieldFill(ftl::Configurable*);
+    ~ScanFieldFill();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+};
+
+class CrossSupportFill : public ftl::operators::Operator {
+	public:
+    explicit CrossSupportFill(ftl::Configurable*);
+    ~CrossSupportFill();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+};
+
+}
+}
+
+#endif  // _FTL_OPERATORS_FILLING_HPP_
diff --git a/components/operators/include/ftl/operators/mask.hpp b/components/operators/include/ftl/operators/mask.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..579b1f6fe83daf7402041fe139116abe09ee3234
--- /dev/null
+++ b/components/operators/include/ftl/operators/mask.hpp
@@ -0,0 +1,43 @@
+#ifndef _FTL_OPERATORS_MASK_HPP_
+#define _FTL_OPERATORS_MASK_HPP_
+
+#include <ftl/operators/operator.hpp>
+#include <ftl/cuda_common.hpp>
+
+namespace ftl {
+namespace operators {
+
+/**
+ * Generate a masking channel that indicates depth discontinuities within a
+ * specified radius from a pixel. This is useful for culling potentially bad
+ * depth and colour values when merging and smoothing.
+ */
+class DiscontinuityMask : public ftl::operators::Operator {
+	public:
+	explicit DiscontinuityMask(ftl::Configurable*);
+	~DiscontinuityMask();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+};
+
+/**
+ * Remove depth values marked with the discontinuity mask.
+ */
+class CullDiscontinuity : public ftl::operators::Operator {
+	public:
+	explicit CullDiscontinuity(ftl::Configurable*);
+	~CullDiscontinuity();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+};
+
+}
+}
+
+#endif  // _FTL_OPERATORS_MASK_HPP_
diff --git a/components/operators/include/ftl/operators/mvmls.hpp b/components/operators/include/ftl/operators/mvmls.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6b8eff1be304693cfe7dee96152f9aa4cd3452b4
--- /dev/null
+++ b/components/operators/include/ftl/operators/mvmls.hpp
@@ -0,0 +1,28 @@
+#ifndef _FTL_OPERATORS_MVMLS_HPP_
+#define _FTL_OPERATORS_MVMLS_HPP_
+
+#include <ftl/operators/operator.hpp>
+
+namespace ftl {
+namespace operators {
+
+class MultiViewMLS : public ftl::operators::Operator {
+	public:
+	explicit MultiViewMLS(ftl::Configurable*);
+	~MultiViewMLS();
+
+	inline Operator::Type type() const override { return Operator::Type::ManyToMany; }
+
+	bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) override;
+
+	private:
+	std::vector<ftl::cuda::TextureObject<float4>> centroid_horiz_;
+	std::vector<ftl::cuda::TextureObject<float4>> centroid_vert_;
+	std::vector<ftl::cuda::TextureObject<float4>> normals_horiz_;
+    std::vector<ftl::cuda::TextureObject<float>> contributions_;
+};
+
+}
+}
+
+#endif
diff --git a/components/operators/include/ftl/operators/operator.hpp b/components/operators/include/ftl/operators/operator.hpp
index fe9b52f2c6b7d6d3364c1d1722f70bd691602170..bc47afef9a976d21e1cb934a4d491a1575f7022d 100644
--- a/components/operators/include/ftl/operators/operator.hpp
+++ b/components/operators/include/ftl/operators/operator.hpp
@@ -23,7 +23,7 @@ namespace operators {
 class Operator {
 	public:
 	explicit Operator(ftl::Configurable *cfg);
-    virtual ~Operator();
+	virtual ~Operator();
 
 	enum class Type {
 		OneToOne,		// Frame to Frame (Filter or generator)
@@ -56,7 +56,7 @@ class Operator {
 namespace detail {
 
 struct ConstructionHelperBase {
-	ConstructionHelperBase(ftl::Configurable *cfg) : config(cfg) {}
+	explicit ConstructionHelperBase(ftl::Configurable *cfg) : config(cfg) {}
 	virtual ~ConstructionHelperBase() {}
 	virtual ftl::operators::Operator *make()=0;
 
@@ -65,7 +65,7 @@ struct ConstructionHelperBase {
 
 template <typename T>
 struct ConstructionHelper : public ConstructionHelperBase {
-	ConstructionHelper(ftl::Configurable *cfg) : ConstructionHelperBase(cfg) {}
+	explicit ConstructionHelper(ftl::Configurable *cfg) : ConstructionHelperBase(cfg) {}
 	~ConstructionHelper() {}
 	ftl::operators::Operator *make() override {
 		return new T(config);
@@ -87,7 +87,7 @@ struct OperatorNode {
 class Graph : public ftl::Configurable {
 	public:
 	explicit Graph(nlohmann::json &config);
-    ~Graph();
+	~Graph();
 
 	template <typename T>
 	ftl::Configurable *append(const std::string &name);
@@ -99,6 +99,7 @@ class Graph : public ftl::Configurable {
 	private:
 	std::list<ftl::operators::detail::OperatorNode> operators_;
 	std::map<std::string, ftl::Configurable*> configs_;
+	cudaStream_t stream_;
 
 	ftl::Configurable *_append(ftl::operators::detail::ConstructionHelperBase*);
 };
diff --git a/components/operators/include/ftl/operators/opticalflow.hpp b/components/operators/include/ftl/operators/opticalflow.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..81afc3914b3a3d31975deca14383f319588209d5
--- /dev/null
+++ b/components/operators/include/ftl/operators/opticalflow.hpp
@@ -0,0 +1,38 @@
+#pragma once
+
+#include <ftl/operators/operator.hpp>
+#include <opencv2/cudaoptflow.hpp>
+
+namespace ftl {
+namespace operators {
+
+/*
+ * Compute Optical flow from Channel::Colour (Left) and save the result in
+ * Channel::Flow using NVidia Optical Flow 1.0 (via OpenCV wrapper).
+ */
+class NVOpticalFlow : public ftl::operators::Operator {
+	public:
+	explicit NVOpticalFlow(ftl::Configurable*);
+	~NVOpticalFlow();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+	protected:
+	bool init();
+
+	private:
+	cv::Size size_;
+	
+	// TODO: Colour to Flow always assumed, could also calculate something else?
+	const ftl::codecs::Channel channel_in_ = ftl::codecs::Channel::Colour;
+	const ftl::codecs::Channel channel_out_ = ftl::codecs::Channel::Flow;
+
+	cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> nvof_;
+	cv::cuda::GpuMat left_gray_;
+	cv::cuda::GpuMat left_gray_prev_;
+};
+
+}
+}
diff --git a/components/operators/include/ftl/operators/segmentation.hpp b/components/operators/include/ftl/operators/segmentation.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..f905ce96966d0f68171ee94d209b6d888581005a
--- /dev/null
+++ b/components/operators/include/ftl/operators/segmentation.hpp
@@ -0,0 +1,40 @@
+#ifndef _FTL_OPERATORS_SEGMENTATION_HPP_
+#define _FTL_OPERATORS_SEGMENTATION_HPP_
+
+#include <ftl/operators/operator.hpp>
+
+namespace ftl {
+namespace operators {
+
+/**
+ * Generate the cross support regions channel.
+ */
+class CrossSupport : public ftl::operators::Operator {
+	public:
+    explicit CrossSupport(ftl::Configurable*);
+    ~CrossSupport();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+};
+
+/**
+ * Visualise the cross support regions channel.
+ */
+class VisCrossSupport : public ftl::operators::Operator {
+	public:
+    explicit VisCrossSupport(ftl::Configurable*);
+    ~VisCrossSupport();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+};
+
+}
+}
+
+#endif  // _FTL_OPERATORS_SEGMENTATION_HPP_
diff --git a/components/operators/include/ftl/operators/smoothing.hpp b/components/operators/include/ftl/operators/smoothing.hpp
index b5362ea6f1a6c16de81cdb7c55339058476da09d..6e0c6ff75e382f8a54d99587f545195c70d59e6b 100644
--- a/components/operators/include/ftl/operators/smoothing.hpp
+++ b/components/operators/include/ftl/operators/smoothing.hpp
@@ -13,17 +13,17 @@ namespace operators {
  * first derivative. Only the depth channel is used and modified.
  */
 class HFSmoother : public ftl::operators::Operator {
-    public:
-    explicit HFSmoother(ftl::Configurable*);
-    ~HFSmoother();
+	public:
+	explicit HFSmoother(ftl::Configurable*);
+	~HFSmoother();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
 
-    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
 
-    private:
-    cv::cuda::GpuMat temp_;
-    ftl::rgbd::Frame frames_[4];
+	private:
+	cv::cuda::GpuMat temp_;
+	ftl::rgbd::Frame frames_[4];
 };
 
 /**
@@ -34,13 +34,13 @@ class HFSmoother : public ftl::operators::Operator {
  * no smoothing.
  */
 class SmoothChannel : public ftl::operators::Operator {
-    public:
-    explicit SmoothChannel(ftl::Configurable*);
-    ~SmoothChannel();
+	public:
+	explicit SmoothChannel(ftl::Configurable*);
+	~SmoothChannel();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
 
-    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
 
 	private:
 	ftl::rgbd::Frame temp_[6];
@@ -52,15 +52,15 @@ class SmoothChannel : public ftl::operators::Operator {
  * Also outputs Depth + Normals.
  */
 class SimpleMLS : public ftl::operators::Operator {
-    public:
-    explicit SimpleMLS(ftl::Configurable*);
-    ~SimpleMLS();
+	public:
+	explicit SimpleMLS(ftl::Configurable*);
+	~SimpleMLS();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
 
-    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
 
-    private:
+	private:
 };
 
 /**
@@ -68,15 +68,60 @@ class SimpleMLS : public ftl::operators::Operator {
  * by a simple colour similarity weighting. In practice this is too naive.
  */
 class ColourMLS : public ftl::operators::Operator {
-    public:
-    explicit ColourMLS(ftl::Configurable*);
-    ~ColourMLS();
+	public:
+	explicit ColourMLS(ftl::Configurable*);
+	~ColourMLS();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
 
-    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
 
-    private:
+	private:
+};
+
+/**
+ * Use cross aggregation of neighbors for faster MLS smoothing. Utilises the
+ * cross support regions and colour weighting. This algorithm is based upon the
+ * cross aggregation method in the papers below. MLS was adapted into an
+ * approximate form which is not identical to real MLS as the weights are not
+ * perfectly correct for each point. The errors are visually minimal and it
+ * has linear performance wrt radius, rather than quadratic.
+ * 
+ * Zhang K, Lu J, Lafruit G. Cross-based local stereo matching using orthogonal
+ * integral images. (2009).
+ * 
+ * Better explained in:
+ * X. Mei, X. Sun, M. Zhou et al. On building an accurate stereo matching system
+ * on graphics hardware. (2011).
+ * 
+ * The use of colour weighting is done as in:
+ * C. Kuster et al. Spatio-temporal geometry fusion for multiple hybrid cameras
+ * using moving least squares surfaces. (2014)
+ * 
+ * The above paper also indicates the importance of stopping MLS at depth
+ * boundaries. They use k-means clustering (K=2) of depths, but we are using
+ * an approach (discontinuity mask) indicated in the following paper combined
+ * with the cross support regions to define the extent of the MLS neighbourhood.
+ * 
+ * S. Orts-Escolano et al. Holoportation: Virtual 3D teleportation in real-time.
+ * (2016).
+ * 
+ * The use of all these approaches in this combination is novel I believe.
+ * 
+ */
+class AggreMLS : public ftl::operators::Operator {
+	public:
+	explicit AggreMLS(ftl::Configurable*);
+	~AggreMLS();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+	private:
+	ftl::cuda::TextureObject<float4> centroid_horiz_;
+	ftl::cuda::TextureObject<float4> centroid_vert_;
+	ftl::cuda::TextureObject<float4> normals_horiz_;
 };
 
 /**
@@ -87,15 +132,15 @@ class ColourMLS : public ftl::operators::Operator {
  * it can be only a few or even no pixels with a zero smoothing factor.
  */
 class AdaptiveMLS : public ftl::operators::Operator {
-    public:
-    explicit AdaptiveMLS(ftl::Configurable*);
-    ~AdaptiveMLS();
+	public:
+	explicit AdaptiveMLS(ftl::Configurable*);
+	~AdaptiveMLS();
 
 	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
 
-    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
 
-    private:
+	private:
 };
 
 }
diff --git a/components/operators/src/antialiasing.cpp b/components/operators/src/antialiasing.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..575cc26a5183f38dbaa0c861bac5a0cbe70ace18
--- /dev/null
+++ b/components/operators/src/antialiasing.cpp
@@ -0,0 +1,29 @@
+#include <ftl/operators/antialiasing.hpp>
+#include "antialiasing_cuda.hpp"
+
+using ftl::operators::FXAA;
+using ftl::codecs::Channel;
+
+FXAA::FXAA(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+FXAA::~FXAA() {
+
+}
+
+bool FXAA::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+	ftl::cuda::fxaa(
+		in.getTexture<uchar4>(Channel::Colour),
+		stream
+	);
+
+	if (in.hasChannel(Channel::Right)) {
+		ftl::cuda::fxaa(
+			in.getTexture<uchar4>(Channel::Right),
+			stream
+		);
+	}
+
+	return true;
+}
diff --git a/components/operators/src/antialiasing.cu b/components/operators/src/antialiasing.cu
new file mode 100644
index 0000000000000000000000000000000000000000..7ad851a2af3508b6a0728b0081fc00ec879ac088
--- /dev/null
+++ b/components/operators/src/antialiasing.cu
@@ -0,0 +1,88 @@
+#include "antialiasing_cuda.hpp"
+
+#define T_PER_BLOCK 8
+
+__device__ inline uchar4 toChar(const float4 rgba) {
+    return make_uchar4(rgba.x*255.0f, rgba.y*255.0f, rgba.z*255.0f, 255);
+}
+
+__global__ void filter_fxaa2(ftl::cuda::TextureObject<uchar4> data) {
+
+    int x = blockIdx.x*blockDim.x + threadIdx.x;
+    int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if(x >= data.width() || y >= data.height())
+    {
+        return;
+    }
+
+    uchar4 out_color;
+    cudaTextureObject_t texRef = data.cudaTexture();
+
+    const float FXAA_SPAN_MAX = 8.0f;
+    const float FXAA_REDUCE_MUL = 1.0f/8.0f;
+    const float FXAA_REDUCE_MIN = (1.0f/128.0f);
+
+    float u = x + 0.5f;
+    float v = y + 0.5f;
+
+    float4 rgbNW = tex2D<float4>( texRef, u-1.0f,v-1.0f);
+    float4 rgbNE = tex2D<float4>( texRef, u+1.0f,v-1.0f);
+    float4 rgbSW = tex2D<float4>( texRef, u-1.0f,v+1.0f);
+    float4 rgbSE = tex2D<float4>( texRef, u+1.0f,v+1.0f);
+    float4 rgbM = tex2D<float4>( texRef, u,v);
+
+    const float4 luma = make_float4(0.299f, 0.587f, 0.114f,0.0f);
+    float lumaNW = dot(rgbNW, luma);
+    float lumaNE = dot(rgbNE, luma);
+    float lumaSW = dot(rgbSW, luma);
+    float lumaSE = dot(rgbSE, luma);
+    float lumaM = dot( rgbM, luma);
+
+    float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE)));
+    float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE)));
+
+    float2 dir;
+    dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE));
+    dir.y = ((lumaNW + lumaSW) - (lumaNE + lumaSE));
+
+    float dirReduce = max((lumaNW + lumaNE + lumaSW + lumaSE) * (0.25f * FXAA_REDUCE_MUL), FXAA_REDUCE_MIN);
+
+    float rcpDirMin = 1.0f/(min(abs(dir.x), abs(dir.y)) + dirReduce);
+
+
+    float2 test = dir * rcpDirMin;
+    dir = clamp(test,-FXAA_SPAN_MAX,FXAA_SPAN_MAX);
+
+    float4 rgbA = (1.0f/2.0f) * (
+                tex2D<float4>( texRef,u+ dir.x * (1.0f/3.0f - 0.5f),v+ dir.y * (1.0f/3.0f - 0.5f))+
+                tex2D<float4>( texRef,u+ dir.x * (2.0f/3.0f - 0.5f),v+ dir.y * (2.0f/3.0f - 0.5f)));
+    float4 rgbB = rgbA * (1.0f/2.0f) + (1.0f/4.0f) * (
+                tex2D<float4>( texRef,u+ dir.x * (0.0f/3.0f - 0.5f),v+ dir.y * (0.0f/3.0f - 0.5f))+
+                tex2D<float4>( texRef,u+ dir.x * (3.0f/3.0f - 0.5f),v+ dir.y * (3.0f/3.0f - 0.5f)));
+    float lumaB = dot(rgbB, luma);
+
+    if((lumaB < lumaMin) || (lumaB > lumaMax)){
+        out_color=toChar(rgbA);
+    } else {
+        out_color=toChar(rgbB);
+    }
+
+
+    //surf2Dwrite<uchar4>(out_color, surfaceWrite, x*sizeof(uchar4), y);
+
+	// FIXME: Should not output to same texture object.
+    data(x,y) = out_color;
+}
+
+void ftl::cuda::fxaa(ftl::cuda::TextureObject<uchar4> &colour, cudaStream_t stream) {
+	const dim3 gridSize((colour.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    filter_fxaa2<<<gridSize, blockSize, 0, stream>>>(colour);
+	cudaSafeCall( cudaGetLastError() );
+
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
+}
diff --git a/components/operators/src/antialiasing_cuda.hpp b/components/operators/src/antialiasing_cuda.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..afe2c5246f43f1027c2cecfe25b368bbb4cf3e20
--- /dev/null
+++ b/components/operators/src/antialiasing_cuda.hpp
@@ -0,0 +1,14 @@
+#ifndef _FTL_CUDA_ANTIALIASING_HPP_
+#define _FTL_CUDA_ANTIALIASING_HPP_
+
+#include <ftl/cuda_common.hpp>
+
+namespace ftl {
+namespace cuda {
+
+void fxaa(ftl::cuda::TextureObject<uchar4> &colour, cudaStream_t stream);
+
+}
+}
+
+#endif
diff --git a/components/operators/src/clipping.cpp b/components/operators/src/clipping.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b37abe2c33b55b86b697565d1565b523c073d80f
--- /dev/null
+++ b/components/operators/src/clipping.cpp
@@ -0,0 +1,60 @@
+#include <ftl/operators/clipping.hpp>
+#include <ftl/cuda/points.hpp>
+#include <ftl/utility/matrix_conversion.hpp>
+
+using ftl::operators::ClipScene;
+using ftl::codecs::Channel;
+using ftl::rgbd::Format;
+
+ClipScene::ClipScene(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+ClipScene::~ClipScene() {
+
+}
+
+// TODO: Put in common place
+static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) {
+  Eigen::Affine3d rx =
+      Eigen::Affine3d(Eigen::AngleAxisd(ax, Eigen::Vector3d(1, 0, 0)));
+  Eigen::Affine3d ry =
+      Eigen::Affine3d(Eigen::AngleAxisd(ay, Eigen::Vector3d(0, 1, 0)));
+  Eigen::Affine3d rz =
+      Eigen::Affine3d(Eigen::AngleAxisd(az, Eigen::Vector3d(0, 0, 1)));
+  return rz * rx * ry;
+}
+
+bool ClipScene::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) {
+	auto &c = *config();
+	float rx = c.value("pitch", 0.0f);
+	float ry = c.value("yaw", 0.0f);
+	float rz = c.value("roll", 0.0f);
+	float x = c.value("x", 0.0f);
+	float y = c.value("y", 0.0f);
+	float z = c.value("z", 0.0f);
+	float width = c.value("width", 1.0f);
+	float height = c.value("height", 1.0f);
+	float depth = c.value("depth", 1.0f);
+
+	Eigen::Affine3f r = create_rotation_matrix(rx, ry, rz).cast<float>();
+	Eigen::Translation3f trans(Eigen::Vector3f(x,y,z));
+	Eigen::Affine3f t(trans);
+
+	ftl::cuda::ClipSpace clip;
+	clip.origin = MatrixConversion::toCUDA(r.matrix() * t.matrix());
+	clip.size = make_float3(width, height, depth);
+		
+	for (size_t i=0; i<in.frames.size(); ++i) {	
+		auto &f = in.frames[i];
+		auto *s = in.sources[i];
+
+		auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>());
+
+		auto sclip = clip;
+		sclip.origin = sclip.origin * pose;
+		ftl::cuda::clipping(f.createTexture<float>(Channel::Depth), s->parameters(), sclip, stream);
+	}
+
+	return true;
+}
diff --git a/components/operators/src/colours.cpp b/components/operators/src/colours.cpp
index 3174c42e41de75e30f852c5bb187266ad26b61aa..9c6fff8b887fc645da71fbc99e018a6dc99b6313 100644
--- a/components/operators/src/colours.cpp
+++ b/components/operators/src/colours.cpp
@@ -12,6 +12,8 @@ ColourChannels::~ColourChannels() {
 }
 
 bool ColourChannels::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+	auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+
 	// Convert colour from BGR to BGRA if needed
 	if (in.get<cv::cuda::GpuMat>(Channel::Colour).type() == CV_8UC3) {
 		//cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
@@ -19,8 +21,11 @@ bool ColourChannels::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgb
 		auto &col = in.get<cv::cuda::GpuMat>(Channel::Colour);
 		temp_.create(col.size(), CV_8UC4);
 		cv::cuda::swap(col, temp_);
-		cv::cuda::cvtColor(temp_,col, cv::COLOR_BGR2BGRA, 0);
+		cv::cuda::cvtColor(temp_,col, cv::COLOR_BGR2BGRA, 0, cvstream);
 	}
 
+	//in.resetTexture(Channel::Colour);
+	in.createTexture<uchar4>(Channel::Colour, true);
+
 	return true;
 }
diff --git a/components/operators/src/correspondence.cu b/components/operators/src/correspondence.cu
new file mode 100644
index 0000000000000000000000000000000000000000..36e91d9f483d62164b284f43b82443641eadd664
--- /dev/null
+++ b/components/operators/src/correspondence.cu
@@ -0,0 +1,548 @@
+#include "mvmls_cuda.hpp"
+#include <ftl/cuda/weighting.hpp>
+#include <ftl/cuda/mask.hpp>
+
+using ftl::cuda::TextureObject;
+using ftl::rgbd::Camera;
+using ftl::cuda::Mask;
+using ftl::cuda::MvMLSParams;
+
+#define T_PER_BLOCK 8
+
+template<int FUNCTION>
+__device__ float weightFunction(const ftl::cuda::MvMLSParams &params, float dweight, float cweight);
+
+template <>
+__device__ inline float weightFunction<0>(const ftl::cuda::MvMLSParams &params, float dweight, float cweight) {
+	return (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight);
+}
+
+template <>
+__device__ inline float weightFunction<1>(const ftl::cuda::MvMLSParams &param, float dweight, float cweight) {
+	return (cweight * cweight * dweight);
+}
+
+template <>
+__device__ inline float weightFunction<2>(const ftl::cuda::MvMLSParams &param, float dweight, float cweight) {
+	return (dweight * dweight * cweight);
+}
+
+template <>
+__device__ inline float weightFunction<3>(const ftl::cuda::MvMLSParams &params, float dweight, float cweight) {
+	return (dweight == 0.0f) ? 0.0f : (params.cost_ratio * (cweight) + (1.0f - params.cost_ratio) * dweight);
+}
+
+template <>
+__device__ inline float weightFunction<4>(const ftl::cuda::MvMLSParams &params, float dweight, float cweight) {
+	return cweight;
+}
+
+template <>
+__device__ inline float weightFunction<5>(const ftl::cuda::MvMLSParams &params, float dweight, float cweight) {
+	return (cweight > 0.0f) ? dweight : 0.0f;
+}
+
+template<int COR_STEPS, int FUNCTION> 
+__global__ void corresponding_point_kernel(
+        TextureObject<float> d1,
+        TextureObject<float> d2,
+        TextureObject<uchar4> c1,
+        TextureObject<uchar4> c2,
+        TextureObject<short2> screenOut,
+		TextureObject<float> conf,
+		TextureObject<int> mask,
+        float4x4 pose1,
+        float4x4 pose1_inv,
+        float4x4 pose2,  // Inverse
+        Camera cam1,
+        Camera cam2, ftl::cuda::MvMLSParams params) {
+
+    // Each warp picks point in p1
+    //const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x); // / WARP_SIZE;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x >= 0 && y >=0 && x < screenOut.width() && y < screenOut.height()) {
+        screenOut(x,y) = make_short2(-1,-1);
+    
+        //const float3 world1 = make_float3(p1.tex2D(x, y));
+        const float depth1 = d1.tex2D(x,y); //(pose1_inv * world1).z;  // Initial starting depth
+        if (depth1 < cam1.minDepth || depth1 > cam1.maxDepth) return;
+
+        // TODO: Temporary hack to ensure depth1 is present
+        //const float4 temp = vout.tex2D(x,y);
+        //vout(x,y) =  make_float4(depth1, 0.0f, temp.z, temp.w);
+        
+        const float3 world1 = pose1 * cam1.screenToCam(x,y,depth1);
+
+        const auto colour1 = c1.tex2D((float)x+0.5f, (float)y+0.5f);
+
+        //float bestdepth = 0.0f;
+        short2 bestScreen = make_short2(-1,-1);
+		float bestdepth = 0.0f;
+		float bestdepth2 = 0.0f;
+        float bestweight = 0.0f;
+        float bestcolour = 0.0f;
+        float bestdweight = 0.0f;
+        float totalcolour = 0.0f;
+        int count = 0;
+        float contrib = 0.0f;
+        
+        const float step_interval = params.range / (COR_STEPS / 2);
+        
+        const float3 rayStep_world = pose1.getFloat3x3() * cam1.screenToCam(x,y,step_interval);
+        const float3 rayStart_2 = pose2 * world1;
+        const float3 rayStep_2 = pose2.getFloat3x3() * rayStep_world;
+
+        // Project to p2 using cam2
+        // Each thread takes a possible correspondence and calculates a weighting
+        //const int lane = tid % WARP_SIZE;
+        for (int i=0; i<COR_STEPS; ++i) {
+            const int j = i - (COR_STEPS/2);
+            const float depth_adjust = (float)j * step_interval;
+
+            // Calculate adjusted depth 3D point in camera 2 space
+            const float3 worldPos = world1 + j * rayStep_world; //(pose1 * cam1.screenToCam(x, y, depth_adjust));
+            const float3 camPos = rayStart_2 + j * rayStep_2; //pose2 * worldPos;
+			const float2 screen = cam2.camToScreen<float2>(camPos);
+			
+			float weight = (screen.x >= cam2.width || screen.y >= cam2.height) ? 0.0f : 1.0f;
+
+			// Generate a colour correspondence value
+            const auto colour2 = c2.tex2D(screen.x, screen.y);
+            const float cweight = ftl::cuda::colourWeighting(colour1, colour2, params.colour_smooth);
+
+            // Generate a depth correspondence value
+			const float depth2 = d2.tex2D(int(screen.x+0.5f), int(screen.y+0.5f));
+			
+			if (FUNCTION == 1) {
+				weight *= ftl::cuda::weighting(fabs(depth2 - camPos.z), cweight*params.spatial_smooth);
+			} else {
+				const float dweight = ftl::cuda::weighting(fabs(depth2 - camPos.z), params.spatial_smooth);
+            	weight *= weightFunction<FUNCTION>(params, dweight, cweight);
+			}
+            //const float dweight = ftl::cuda::weighting(fabs(depth_adjust), 10.0f*params.range);
+
+            //weight *= weightFunction<FUNCTION>(params, dweight, cweight);
+
+            ++count;
+            contrib += weight;
+            bestcolour = max(cweight, bestcolour);
+            //bestdweight = max(dweight, bestdweight);
+            totalcolour += cweight;
+			bestdepth = (weight > bestweight) ? depth_adjust : bestdepth;
+			//bestdepth2 = (weight > bestweight) ? camPos.z : bestdepth2;
+			//bestScreen = (weight > bestweight) ? make_short2(screen.x+0.5f, screen.y+0.5f) : bestScreen;
+			bestweight = max(bestweight, weight);
+                //bestweight = weight;
+                //bestdepth = depth_adjust;
+                //bestScreen = make_short2(screen.x+0.5f, screen.y+0.5f);
+            //}
+        }
+
+        const float avgcolour = totalcolour/(float)count;
+        const float confidence = bestcolour / totalcolour; //bestcolour - avgcolour;
+        
+        //Mask m(mask.tex2D(x,y));
+
+        //if (bestweight > 0.0f) {
+            float old = conf.tex2D(x,y);
+
+            if (bestweight * confidence > old) {
+				d1(x,y) = 0.4f*bestdepth + depth1;
+				//d2(bestScreen.x, bestScreen.y) = bestdepth2;
+                //screenOut(x,y) = bestScreen;
+                conf(x,y) = bestweight * confidence;
+            }
+        //}
+        
+        // If a good enough match is found, mark dodgy depth as solid
+        //if ((m.isFilled() || m.isDiscontinuity()) && (bestweight > params.match_threshold)) mask(x,y) = 0;
+    }
+}
+
+void ftl::cuda::correspondence(
+        TextureObject<float> &d1,
+        TextureObject<float> &d2,
+        TextureObject<uchar4> &c1,
+        TextureObject<uchar4> &c2,
+        TextureObject<short2> &screen,
+		TextureObject<float> &conf,
+		TextureObject<int> &mask,
+        float4x4 &pose1,
+        float4x4 &pose1_inv,
+        float4x4 &pose2,
+        const Camera &cam1,
+        const Camera &cam2, const MvMLSParams &params, int func,
+        cudaStream_t stream) {
+
+	const dim3 gridSize((d1.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (d1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    //printf("COR SIZE %d,%d\n", p1.width(), p1.height());
+
+	switch (func) {
+    case 0: corresponding_point_kernel<16,0><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
+	case 1: corresponding_point_kernel<16,1><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
+	case 2: corresponding_point_kernel<16,2><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
+	case 3: corresponding_point_kernel<16,3><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
+	case 4: corresponding_point_kernel<16,4><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
+	case 5: corresponding_point_kernel<16,5><<<gridSize, blockSize, 0, stream>>>(d1, d2, c1, c2, screen, conf, mask, pose1, pose1_inv, pose2, cam1, cam2, params); break;
+	}
+
+    cudaSafeCall( cudaGetLastError() );
+}
+
+// ==== Remove zero-confidence =================================================
+
+__global__ void zero_confidence_kernel(
+		TextureObject<float> conf,
+		TextureObject<float> depth) {
+
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < depth.width() && y < depth.height()) {
+		const float c = conf.tex2D((int)x,(int)y);
+
+		if (c == 0.0f) {
+			depth(x,y) = 1000.0f;	
+		}
+	}
+}
+
+void ftl::cuda::zero_confidence(TextureObject<float> &conf, TextureObject<float> &depth, cudaStream_t stream) {
+	const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	zero_confidence_kernel<<<gridSize, blockSize, 0, stream>>>(conf, depth);
+	cudaSafeCall( cudaGetLastError() );
+}
+
+
+// ==== MultiViewMLS Aggregate =================================================
+
+__device__ inline short3 getScreenPos(int x, int y, float d, const Camera &cam1, const Camera &cam2, const float4x4 &transform) {
+    const float3 campos = transform * cam1.screenToCam(x,y,d);
+    const int2 screen = cam2.camToScreen<int2>(campos);
+    return make_short3(screen.x, screen.y, campos.z);
+}
+
+__device__ inline short2 packScreen(int x, int y, int id) {
+    return make_short2((id << 12) + x, y);
+}
+
+__device__ inline short2 packScreen(const short3 &p, int id) {
+    return make_short2((id << 12) + p.x, p.y);
+}
+
+__device__ inline int supportSize(uchar4 support) {
+    return (support.x+support.y) * (support.z+support.w);
+}
+
+__device__ inline short2 choosePoint(uchar4 sup1, uchar4 sup2, float dot1, float dot2, short2 screen1, short2 screen2) {
+    //return (float(supportSize(sup2))*dot1 > float(supportSize(sup1))*dot2) ? screen2 : screen1;
+    return (dot1 > dot2) ? screen2 : screen1;
+}
+
+__device__ inline int unpackCameraID(short2 p) {
+    return p.x >> 12;
+}
+
+/**
+ * Identify which source has the best support region for a given pixel.
+ */
+__global__ void best_sources_kernel(
+        TextureObject<float4> normals1,
+        TextureObject<float4> normals2,
+        TextureObject<uchar4> support1,
+        TextureObject<uchar4> support2,
+        TextureObject<float> depth1,
+        TextureObject<float> depth2,
+        TextureObject<short2> screen,
+        float4x4 transform,
+        //float3x3 transformR,
+        ftl::rgbd::Camera cam1,
+        ftl::rgbd::Camera cam2,
+        int id1,
+        int id2) {
+
+    const int x = (blockIdx.x*blockDim.x + threadIdx.x);
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x >= 0 && y >= 0 && x < screen.width() && y < screen.height()) {
+        const float d1 = depth1.tex2D(x,y);
+
+        const short3 scr2 = getScreenPos(x, y, d1, cam1, cam2, transform);
+        short2 bestPoint = packScreen(x,y,0);
+
+        if (scr2.x >= 0 && scr2.y >= 0 && scr2.x < cam2.width && scr2.y < cam2.height) {
+            uchar4 sup1 = support1.tex2D(x,y);
+            uchar4 sup2 = support2.tex2D(scr2.x,scr2.y);
+            const float d2 = depth2.tex2D(scr2.x,scr2.y);
+            float3 n1 = transform.getFloat3x3() * make_float3(normals1.tex2D(x,y));
+            float3 n2 = make_float3(normals2.tex2D(scr2.x,scr2.y));
+
+            float3 camray = cam2.screenToCam(scr2.x,scr2.y,1.0f);
+            camray /= length(camray);
+            const float dot1 = dot(camray, n1);
+            const float dot2 = dot(camray, n2);
+
+            bestPoint = (fabs(scr2.z - d2) < 0.04f) ? choosePoint(sup1, sup2, dot1, dot2, packScreen(x,y,id1), packScreen(scr2,id2)) : packScreen(x,y,6);
+            //bestPoint = choosePoint(sup1, sup2, dot1, dot2, packScreen(x,y,id1), packScreen(scr2,id2));
+			//bestPoint = (d1 < d2) ? packScreen(x,y,id1) : packScreen(x,y,id2);
+			
+			bestPoint = (fabs(scr2.z - d2) < 0.04f) ? packScreen(scr2,id2) : packScreen(scr2,id1);
+        }
+
+        screen(x,y) = bestPoint;
+
+        /*if (s.x >= 0 && s.y >= 0) {
+            auto norm1 = make_float3(n1.tex2D(x,y));
+            const auto norm2 = make_float3(n2.tex2D(s.x,s.y));
+            //n2(s.x,s.y) = norm1;
+
+            float3 cent1 = make_float3(c1.tex2D(x,y));
+            const auto cent2 = make_float3(c2.tex2D(s.x,s.y));
+
+            if (cent2.x+cent2.y+cent2.z > 0.0f && norm2.x+norm2.y+norm2.z > 0.0f) {
+                norm1 += poseInv1.getFloat3x3() * (pose2.getFloat3x3() * norm2);
+                n1(x,y) = make_float4(norm1, 0.0f);
+				cent1 +=  poseInv1 * (pose2 * cent2);  // FIXME: Transform between camera spaces
+				cent1 /= 2.0f;
+                c1(x,y) = make_float4(cent1, 0.0f);
+                //c2(s.x,s.y) = cent1;
+
+				//contribs1(x,y) = contribs1.tex2D(x,y) + 1.0f;
+            }
+           // contribs2(s.x,s.y) = contribs2.tex2D(s.x,s.y) + 1.0f;
+        }*/
+    }
+}
+
+void ftl::cuda::best_sources(
+        ftl::cuda::TextureObject<float4> &normals1,
+        ftl::cuda::TextureObject<float4> &normals2,
+        ftl::cuda::TextureObject<uchar4> &support1,
+        ftl::cuda::TextureObject<uchar4> &support2,
+        ftl::cuda::TextureObject<float> &depth1,
+        ftl::cuda::TextureObject<float> &depth2,
+        ftl::cuda::TextureObject<short2> &screen,
+        const float4x4 &transform,
+        const ftl::rgbd::Camera &cam1,
+        const ftl::rgbd::Camera &cam2,
+        int id1,
+        int id2,
+        cudaStream_t stream) {
+
+    const dim3 gridSize((screen.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (screen.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    best_sources_kernel<<<gridSize, blockSize, 0, stream>>>(normals1, normals2, support1, support2, depth1, depth2, screen, transform, cam1, cam2, id1, id2);
+    cudaSafeCall( cudaGetLastError() );
+}
+
+/**
+ * Identify which source has the best support region for a given pixel.
+ */
+ __global__ void aggregate_sources_kernel(
+		TextureObject<float4> n1,
+		TextureObject<float4> n2,
+		TextureObject<float4> c1,
+		TextureObject<float4> c2,
+		TextureObject<float> depth1,
+		//TextureObject<float> depth2,
+		//TextureObject<short2> screen,
+		float4x4 transform,
+		//float3x3 transformR,
+		ftl::rgbd::Camera cam1,
+		ftl::rgbd::Camera cam2) {
+
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x);
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x >= 0 && y >= 0 && x < n1.width() && y < n1.height()) {
+		const float d1 = depth1.tex2D(x,y);
+
+		if (d1 > cam1.minDepth && d1 < cam1.maxDepth) {
+			//const short3 s = getScreenPos(x, y, d1, cam1, cam2, transform);
+			const float3 camPos = transform * cam1.screenToCam(x, y, d1);
+			const int2 s = cam2.camToScreen<int2>(camPos);
+
+			if (s.x >= 0 && s.y >= 0 && s.x < n2.width() && s.y < n2.height()) {
+				auto norm1 = make_float3(n1.tex2D(x,y));
+				const auto norm2 = make_float3(n2.tex2D(s.x,s.y));
+				//n2(s.x,s.y) = norm1;
+
+				float3 cent1 = make_float3(c1.tex2D(x,y));
+				const auto cent2 = transform.getInverse() * make_float3(c2.tex2D(s.x,s.y));
+
+				//printf("MERGING %f\n", length(cent2-cent1));
+
+				if (cent2.x+cent2.y+cent2.z > 0.0f && norm2.x+norm2.y+norm2.z > 0.0f && length(cent2-cent1) < 0.04f) {
+					norm1 += norm2;
+					norm1 /= 2.0f;
+					n1(x,y) = make_float4(norm1, 0.0f);
+					cent1 += cent2;
+					cent1 /= 2.0f;
+					c1(x,y) = make_float4(cent1, 0.0f);
+					//c2(s.x,s.y) = cent1;
+
+					//contribs1(x,y) = contribs1.tex2D(x,y) + 1.0f;
+				}
+			// contribs2(s.x,s.y) = contribs2.tex2D(s.x,s.y) + 1.0f;
+			}
+		}
+	}
+}
+
+void ftl::cuda::aggregate_sources(
+		ftl::cuda::TextureObject<float4> &n1,
+		ftl::cuda::TextureObject<float4> &n2,
+		ftl::cuda::TextureObject<float4> &c1,
+		ftl::cuda::TextureObject<float4> &c2,
+		ftl::cuda::TextureObject<float> &depth1,
+		//ftl::cuda::TextureObject<float> &depth2,
+		//ftl::cuda::TextureObject<short2> &screen,
+		const float4x4 &transform,
+		const ftl::rgbd::Camera &cam1,
+		const ftl::rgbd::Camera &cam2,
+		cudaStream_t stream) {
+
+	const dim3 gridSize((n1.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (n1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	aggregate_sources_kernel<<<gridSize, blockSize, 0, stream>>>(n1, n2, c1, c2, depth1, transform, cam1, cam2);
+	cudaSafeCall( cudaGetLastError() );
+}
+
+__device__ static uchar4 HSVtoRGB(int H, float S, float V) {
+	const float C = S * V;
+	const float X = C * (1 - fabs(fmodf(H / 60.0f, 2) - 1));
+	const float m = V - C;
+	float Rs, Gs, Bs;
+
+	if(H >= 0 && H < 60) {
+		Rs = C;
+		Gs = X;
+		Bs = 0;	
+	}
+	else if(H >= 60 && H < 120) {	
+		Rs = X;
+		Gs = C;
+		Bs = 0;	
+	}
+	else if(H >= 120 && H < 180) {
+		Rs = 0;
+		Gs = C;
+		Bs = X;	
+	}
+	else if(H >= 180 && H < 240) {
+		Rs = 0;
+		Gs = X;
+		Bs = C;	
+	}
+	else if(H >= 240 && H < 300) {
+		Rs = X;
+		Gs = 0;
+		Bs = C;	
+	}
+	else {
+		Rs = C;
+		Gs = 0;
+		Bs = X;	
+	}
+
+	return make_uchar4((Bs + m) * 255, (Gs + m) * 255, (Rs + m) * 255, 0);
+}
+
+/**
+ * Render each pixel is a colour corresponding to the source camera with the
+ * best support window.
+ */
+ __global__ void vis_best_sources_kernel(
+        TextureObject<short2> screen,
+        TextureObject<uchar4> colour,
+        int myid,
+        int count) {
+
+    const int x = (blockIdx.x*blockDim.x + threadIdx.x);
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x >= 0 && y >= 0 && x < colour.width() && y < colour.height()) {
+        short2 s = screen.tex2D(x,y);
+        int id = unpackCameraID(s);
+
+        uchar4 c = HSVtoRGB((360 / count) * id, 0.6f, 0.85f);
+        if (myid != id) colour(x,y) = c;
+        //colour(x,y) = c;
+    }
+}
+
+void ftl::cuda::vis_best_sources(
+        ftl::cuda::TextureObject<short2> &screen,
+        ftl::cuda::TextureObject<uchar4> &colour,
+        int myid,
+        int count,
+        cudaStream_t stream) {
+
+    const dim3 gridSize((colour.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (colour.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    vis_best_sources_kernel<<<gridSize, blockSize, 0, stream>>>(screen, colour, myid, count);
+    cudaSafeCall( cudaGetLastError() );
+}
+
+/*void ftl::cuda::aggregate_sources(
+        ftl::cuda::TextureObject<float4> &n1,
+        ftl::cuda::TextureObject<float4> &n2,
+        ftl::cuda::TextureObject<float4> &c1,
+        ftl::cuda::TextureObject<float4> &c2,
+        ftl::cuda::TextureObject<float> &contribs1,
+        ftl::cuda::TextureObject<float> &contribs2,
+		ftl::cuda::TextureObject<short2> &screen,
+		const float4x4 &poseInv1,
+		const float4x4 &pose2,
+        cudaStream_t stream) {
+
+    const dim3 gridSize((screen.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (screen.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    aggregate_sources_kernel<<<gridSize, blockSize, 0, stream>>>(n1, n2, c1, c2, contribs1, contribs2, screen, poseInv1, pose2);
+    cudaSafeCall( cudaGetLastError() );
+}*/
+
+// ==== Normalise aggregations =================================================
+
+__global__ void normalise_aggregations_kernel(
+        TextureObject<float4> norms,
+        TextureObject<float4> cents,
+        TextureObject<float> contribs) {
+
+    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x < norms.width() && y < norms.height()) {
+        const float contrib = contribs.tex2D((int)x,(int)y);
+
+        const auto a = norms.tex2D((int)x,(int)y);
+        const auto b = cents.tex2D(x,y);
+        //const float4 normal = normals.tex2D((int)x,(int)y);
+
+		//out(x,y) = (contrib == 0.0f) ? make<B>(a) : make<B>(a / contrib);
+
+        if (contrib > 0.0f) {
+            norms(x,y) = a / (contrib+1.0f);
+            cents(x,y) = b / (contrib+1.0f);
+        }
+    }
+}
+
+void ftl::cuda::normalise_aggregations(TextureObject<float4> &norms, TextureObject<float4> &cents, TextureObject<float> &contribs, cudaStream_t stream) {
+    const dim3 gridSize((norms.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (norms.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    normalise_aggregations_kernel<<<gridSize, blockSize, 0, stream>>>(norms, cents, contribs);
+    cudaSafeCall( cudaGetLastError() );
+}
+
diff --git a/components/operators/src/disparity/bilateral_filter.cpp b/components/operators/src/disparity/bilateral_filter.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8c91fac6f317250f7a8af9b85a0c975e217bdfe7
--- /dev/null
+++ b/components/operators/src/disparity/bilateral_filter.cpp
@@ -0,0 +1,35 @@
+#include <ftl/operators/disparity.hpp>
+
+using cv::cuda::GpuMat;
+using cv::Size;
+
+using ftl::codecs::Channel;
+using ftl::operators::DisparityBilateralFilter;
+
+DisparityBilateralFilter::DisparityBilateralFilter(ftl::Configurable* cfg) :
+		ftl::operators::Operator(cfg) {
+	
+	scale_ = 16.0;
+	n_disp_ = cfg->value("n_disp", 256);
+	radius_ = cfg->value("radius", 7);
+	iter_ = cfg->value("iter", 13);
+	filter_ = cv::cuda::createDisparityBilateralFilter(n_disp_ * scale_, radius_, iter_);
+}
+
+bool DisparityBilateralFilter::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out,
+									 ftl::rgbd::Source *src, cudaStream_t stream) {
+
+	if (!in.hasChannel(Channel::Disparity) || !in.hasChannel(Channel::Colour)) {
+		return false;
+	}
+	auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+	const GpuMat &rgb = in.get<GpuMat>(Channel::Colour);
+	GpuMat &disp_in = in.get<GpuMat>(Channel::Disparity);
+	GpuMat &disp_out = out.create<GpuMat>(Channel::Disparity);
+	disp_out.create(disp_in.size(), disp_in.type());
+
+	disp_in.convertTo(disp_int_, CV_16SC1, scale_, cvstream);
+	filter_->apply(disp_int_, rgb, disp_int_result_, cvstream);
+	disp_int_result_.convertTo(disp_out, disp_in.type(), 1.0/scale_, cvstream);
+	return true;
+}
\ No newline at end of file
diff --git a/components/operators/src/disparity/cuda.hpp b/components/operators/src/disparity/cuda.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d4cf06aaec772869ee3e8f2e8fafab9157115f57
--- /dev/null
+++ b/components/operators/src/disparity/cuda.hpp
@@ -0,0 +1,15 @@
+#include <ftl/cuda_common.hpp>
+
+namespace ftl {
+namespace cuda {
+
+void disparity_to_depth(const cv::cuda::GpuMat &disparity, cv::cuda::GpuMat &depth,
+						const ftl::rgbd::Camera &c, cudaStream_t &stream);
+
+
+void optflow_filter(cv::cuda::GpuMat &disp, const cv::cuda::GpuMat &optflow,
+					cv::cuda::GpuMat &history, int n_max, float threshold,
+					cv::cuda::Stream &stream);
+
+}
+}
diff --git a/components/rgbd-sources/src/algorithms/disp2depth.cu b/components/operators/src/disparity/disp2depth.cu
similarity index 74%
rename from components/rgbd-sources/src/algorithms/disp2depth.cu
rename to components/operators/src/disparity/disp2depth.cu
index 4b707c4795017cdf38999cd6606d53ff4870ccb5..86a3fc290967fc0a64dd1a61df61badaef0aa833 100644
--- a/components/rgbd-sources/src/algorithms/disp2depth.cu
+++ b/components/operators/src/disparity/disp2depth.cu
@@ -16,12 +16,12 @@ __global__ void d2d_kernel(cv::cuda::PtrStepSz<float> disp, cv::cuda::PtrStepSz<
 namespace ftl {
 namespace cuda {
 	void disparity_to_depth(const cv::cuda::GpuMat &disparity, cv::cuda::GpuMat &depth,
-				const ftl::rgbd::Camera &c, cv::cuda::Stream &stream) {
+				const ftl::rgbd::Camera &c, cudaStream_t &stream) {
 		dim3 grid(1,1,1);
-    	dim3 threads(128, 1, 1);
-    	grid.x = cv::cuda::device::divUp(disparity.cols, 128);
+		dim3 threads(128, 1, 1);
+		grid.x = cv::cuda::device::divUp(disparity.cols, 128);
 		grid.y = cv::cuda::device::divUp(disparity.rows, 1);
-		d2d_kernel<<<grid, threads, 0, cv::cuda::StreamAccessor::getStream(stream)>>>(
+		d2d_kernel<<<grid, threads, 0, stream>>>(
 			disparity, depth, c);
 		cudaSafeCall( cudaGetLastError() );
 	}
diff --git a/components/operators/src/disparity/disparity_to_depth.cpp b/components/operators/src/disparity/disparity_to_depth.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5951802f0adc960d387a20a4f7f77abd264fb6e8
--- /dev/null
+++ b/components/operators/src/disparity/disparity_to_depth.cpp
@@ -0,0 +1,22 @@
+#include "ftl/operators/disparity.hpp"
+#include "disparity/cuda.hpp"
+
+using ftl::operators::DisparityToDepth;
+using ftl::codecs::Channel;
+
+using cv::cuda::GpuMat;
+
+bool DisparityToDepth::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out,
+							 ftl::rgbd::Source *src, cudaStream_t stream) {
+	
+	if (!in.hasChannel(Channel::Disparity)) { return false;  }
+
+	const auto params = src->parameters();
+	const GpuMat &disp = in.get<GpuMat>(Channel::Disparity);
+
+	GpuMat &depth = out.create<GpuMat>(Channel::Depth);
+	depth.create(disp.size(), CV_32FC1);
+
+	ftl::cuda::disparity_to_depth(disp, depth, params, stream);
+	return true;
+}
\ No newline at end of file
diff --git a/components/operators/src/disparity/fixstars_sgm.cpp b/components/operators/src/disparity/fixstars_sgm.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..79a18685ee3ac8732f6721358f8f9d7dc5be4da9
--- /dev/null
+++ b/components/operators/src/disparity/fixstars_sgm.cpp
@@ -0,0 +1,136 @@
+#include <loguru.hpp>
+
+#include "ftl/operators/disparity.hpp"
+
+using cv::Size;
+using cv::cuda::GpuMat;
+
+using ftl::rgbd::Format;
+using ftl::codecs::Channel;
+using ftl::rgbd::Frame;
+using ftl::rgbd::Source;
+using ftl::operators::FixstarsSGM;
+
+FixstarsSGM::FixstarsSGM(ftl::Configurable* cfg) :
+		ftl::operators::Operator(cfg) {
+	
+	ssgm_ = nullptr;
+	size_ = Size(0, 0);
+
+	uniqueness_ = cfg->value("uniqueness", 0.95f);
+	P1_ = cfg->value("P1", 10);
+	P2_ = cfg->value("P2", 120);
+	max_disp_ = cfg->value("max_disp", 256);
+
+	if (uniqueness_ < 0.0 || uniqueness_ > 1.0) {
+		uniqueness_ = 1.0;
+		LOG(WARNING) << "Invalid uniqueness, using default (1.0)";
+	}
+
+	if (P1_ <= 0 ) {
+		P1_ = 10;
+		LOG(WARNING) << "Invalid value for P1, using default (10)";
+	}
+
+	if (P2_ < P1_) {
+		P2_ = P1_;
+		LOG(WARNING) << "Invalid value for P2, using value of P1 (" << P1_ << ")";
+	}
+
+	if (!(max_disp_ == 256 || max_disp_ == 128)) {
+		max_disp_ = 256;
+		LOG(WARNING) << "Invalid value for max_disp, using default value (256)";
+	}
+
+	cfg->on("P1", [this, cfg](const ftl::config::Event&) {
+		int P1 = cfg->value("P1", 0);
+		if (P1 <= 0) {
+			LOG(WARNING) << "Invalid value for P1 (" << P1 << ")";
+		}
+		else {
+			P1_ = P1;
+			updateParameters();
+		}
+	});
+
+	cfg->on("P2", [this, cfg](const ftl::config::Event&) {
+		int P2 = cfg->value("P2", 0);
+		if (P2 < P1_) {
+			LOG(WARNING) << "Invalid value for P2 (" << P2 << ")";
+		}
+		else {
+			P2_ = P2;
+			updateParameters();
+		}
+	});
+
+	cfg->on("uniqueness", [this, cfg](const ftl::config::Event&) {
+		double uniqueness = cfg->value("uniqueness", 0.0);
+		if (uniqueness < 0.0 || uniqueness > 1.0) {
+			LOG(WARNING) << "Invalid value for uniqueness (" << uniqueness << ")";
+		}
+		else {
+			uniqueness_ = uniqueness;
+			updateParameters();
+		}
+	});
+}
+
+FixstarsSGM::~FixstarsSGM() {
+	if (ssgm_) {
+		delete ssgm_;
+	}
+}
+
+bool FixstarsSGM::init() {
+	if (size_ == Size(0, 0)) { return false; }
+	if (ssgm_) { delete ssgm_; }
+	lbw_.create(size_, CV_8UC1);
+	rbw_.create(size_, CV_8UC1);
+	disp_int_.create(size_, CV_16SC1);
+
+	ssgm_ = new sgm::StereoSGM(size_.width, size_.height, max_disp_, 8, 16,
+		lbw_.step, disp_int_.step / sizeof(short),
+		sgm::EXECUTE_INOUT_CUDA2CUDA,
+		sgm::StereoSGM::Parameters(P1_, P2_, uniqueness_, true)
+	);
+
+	return true;
+}
+
+bool FixstarsSGM::updateParameters() {
+	if (ssgm_ == nullptr) { return false; }
+	return this->ssgm_->updateParameters(
+		sgm::StereoSGM::Parameters(P1_, P2_, uniqueness_, true));
+}
+
+bool FixstarsSGM::apply(Frame &in, Frame &out, Source *src, cudaStream_t stream) {
+	if (!in.hasChannel(Channel::Left) || !in.hasChannel(Channel::Right)) {
+		return false;
+	}
+
+	const auto &l = in.get<GpuMat>(Channel::Left);
+	const auto &r = in.get<GpuMat>(Channel::Right);
+	
+	if (l.size() != size_) {
+		size_ = l.size();
+		if (!init()) { return false; }
+	}
+
+	auto &disp = out.create<GpuMat>(Channel::Disparity, Format<float>(l.size()));
+
+	auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+	cv::cuda::cvtColor(l, lbw_, cv::COLOR_BGR2GRAY, 0, cvstream);
+	cv::cuda::cvtColor(r, rbw_, cv::COLOR_BGR2GRAY, 0, cvstream);
+
+	cvstream.waitForCompletion();
+	ssgm_->execute(lbw_.data, rbw_.data, disp_int_.data);
+
+	// GpuMat left_pixels(dispt_, cv::Rect(0, 0, max_disp_, dispt_.rows));
+	// left_pixels.setTo(0);
+
+	cv::cuda::threshold(disp_int_, disp_int_, 4096.0f, 0.0f, cv::THRESH_TOZERO_INV, cvstream);
+	
+	disp_int_.convertTo(disp, CV_32F, 1.0f / 16.0f, cvstream);
+	return true;
+}
diff --git a/components/operators/src/disparity/optflow_smoothing.cpp b/components/operators/src/disparity/optflow_smoothing.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5304da7dc7fc4d7c5e0e4531c90851c3f41a37a4
--- /dev/null
+++ b/components/operators/src/disparity/optflow_smoothing.cpp
@@ -0,0 +1,95 @@
+#include <loguru.hpp>
+
+#include "ftl/operators/disparity.hpp"
+#include "ftl/offilter.hpp"
+#include "disparity/cuda.hpp"
+
+#ifdef HAVE_OPTFLOW
+
+using ftl::operators::OpticalFlowTemporalSmoothing;
+
+using ftl::codecs::Channel;
+using ftl::rgbd::Frame;
+using ftl::rgbd::Source;
+
+using cv::Mat;
+using cv::Size;
+
+using std::vector;
+
+template<typename T> static bool inline isValidDisparity(T d) { return (0.0 < d) && (d < 256.0); } // TODO
+
+OpticalFlowTemporalSmoothing::OpticalFlowTemporalSmoothing(ftl::Configurable* cfg) :
+		ftl::operators::Operator(cfg) {
+	
+	size_ = Size(0, 0);
+
+	n_max_ = cfg->value("history_size", 7);
+	if (n_max_ < 1) {
+		LOG(WARNING) << "History length must be larger than 0, using default (0)";
+		n_max_ = 7;
+	}
+
+	if (n_max_ > 32) {
+		// TODO: cuda kernel uses fixed size buffer
+		LOG(WARNING) << "History length can't be larger than 32 (TODO)";
+		n_max_ = 32;
+	}
+
+	threshold_ = cfg->value("threshold", 1.0);
+
+	cfg->on("threshold", [this, &cfg](const ftl::config::Event&) {
+		float threshold = cfg->value("threshold", 1.0);
+		if (threshold < 0.0) {
+			LOG(WARNING) << "invalid threshold " << threshold << ", value must be positive";
+		}
+		else {
+			threshold_ = threshold;
+			init();
+		}
+	});
+
+	cfg->on("history_size", [this, &cfg](const ftl::config::Event&) {
+		int n_max = cfg->value("history_size", 1.0);
+
+		if (n_max < 1) {
+			LOG(WARNING) << "History length must be larger than 0";
+		}
+		else if (n_max_ > 32) {
+			// TODO: cuda kernel uses fixed size buffer
+			LOG(WARNING) << "History length can't be larger than 32 (TODO)";
+		}
+		else {
+			n_max_ = n_max;
+			init();
+		}
+	});
+}
+
+OpticalFlowTemporalSmoothing::~OpticalFlowTemporalSmoothing() {}
+
+bool OpticalFlowTemporalSmoothing::init() {
+	if (size_ == Size(0, 0)) { return false; }
+	history_.create(cv::Size(size_.width * n_max_, size_.height), CV_32FC1);
+	history_.setTo(0.0);
+	return true;
+}
+
+bool OpticalFlowTemporalSmoothing::apply(Frame &in, Frame &out, Source *src, cudaStream_t stream) {
+	if (!out.hasChannel(channel_) || !in.hasChannel(Channel::Flow)) { return false; }
+
+	auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+	const cv::cuda::GpuMat &optflow = in.get<cv::cuda::GpuMat>(Channel::Flow);
+	cv::cuda::GpuMat &data = out.get<cv::cuda::GpuMat>(channel_);
+	
+	if (data.size() != size_) {
+		size_ = data.size();
+		if (!init()) { return false; }
+	}
+
+	ftl::cuda::optflow_filter(data, optflow, history_, n_max_, threshold_, cvstream);
+	
+	return true;
+}
+
+#endif  // HAVE_OPTFLOW
diff --git a/components/rgbd-sources/src/algorithms/offilter.cu b/components/operators/src/disparity/optflow_smoothing.cu
similarity index 97%
rename from components/rgbd-sources/src/algorithms/offilter.cu
rename to components/operators/src/disparity/optflow_smoothing.cu
index 6feee5ae1daf9fc8b4b165370dbb8646b1d7b8a1..a7633036fae1e9d20c6a52ae5617466f1b234202 100644
--- a/components/rgbd-sources/src/algorithms/offilter.cu
+++ b/components/operators/src/disparity/optflow_smoothing.cu
@@ -1,7 +1,9 @@
 #include <ftl/cuda_common.hpp>
 #include <ftl/rgbd/camera.hpp>
 #include <opencv2/core/cuda_stream_accessor.hpp>
-#include <qsort.h>
+
+#include "disparity/qsort.h"
+#include "disparity/cuda.hpp"
 
 __device__ void quicksort(float A[], size_t n)
 {
diff --git a/components/rgbd-sources/include/qsort.h b/components/operators/src/disparity/qsort.h
similarity index 100%
rename from components/rgbd-sources/include/qsort.h
rename to components/operators/src/disparity/qsort.h
diff --git a/components/operators/src/filling.cpp b/components/operators/src/filling.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..da7efea6bdf085eb797bc56500f4e783e30f6162
--- /dev/null
+++ b/components/operators/src/filling.cpp
@@ -0,0 +1,52 @@
+#include <ftl/operators/filling.hpp>
+
+#include "filling_cuda.hpp"
+
+using ftl::operators::ScanFieldFill;
+using ftl::operators::CrossSupportFill;
+using ftl::codecs::Channel;
+
+ScanFieldFill::ScanFieldFill(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+ScanFieldFill::~ScanFieldFill() {
+
+}
+
+bool ScanFieldFill::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+	float thresh = config()->value("threshold", 0.1f);
+
+	ftl::cuda::scan_field_fill(
+		in.createTexture<float>(Channel::Depth),
+		in.createTexture<float>(Channel::Depth),
+		in.createTexture<float>(Channel::Smoothing),
+		thresh,
+		s->parameters(), stream
+	);
+
+	return true;
+}
+
+
+CrossSupportFill::CrossSupportFill(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+CrossSupportFill::~CrossSupportFill() {
+
+}
+
+bool CrossSupportFill::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+
+	/*ftl::cuda::filling_csr(
+		in.createTexture<uchar4>(Channel::Colour2),
+		in.createTexture<float4>(Channel::Normals),
+		in.createTexture<float>(Channel::Depth),
+		in.createTexture<float>(Channel::Depth),
+		in.createTexture<uchar4>(Channel::Colour),
+		s->parameters(), 0
+	);*/
+
+	return true;
+}
diff --git a/components/operators/src/filling.cu b/components/operators/src/filling.cu
new file mode 100644
index 0000000000000000000000000000000000000000..7780530ff69ab178c46803a38f05de85d037f033
--- /dev/null
+++ b/components/operators/src/filling.cu
@@ -0,0 +1,201 @@
+#include "filling_cuda.hpp"
+
+#define T_PER_BLOCK 8
+#define WARP_SIZE 32
+#define MAX_EDGES 128
+
+using ftl::cuda::TextureObject;
+
+__device__ inline bool isValid(const ftl::rgbd::Camera &camera, float d) {
+	return d > camera.minDepth && d < camera.maxDepth; 
+}
+
+__global__ void scan_field_fill_kernel(
+		TextureObject<float> depth_in,        // Virtual depth map
+		TextureObject<float> depth_out,   // Accumulated output
+		TextureObject<float> smoothing,
+		float thresh,
+		ftl::rgbd::Camera camera) {
+
+	__shared__ int counter;
+	__shared__ int offsets[MAX_EDGES];
+		
+	//const int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (y >= depth_in.height()) return;
+	if (threadIdx.x == 0) counter = 0;
+	__syncthreads();
+
+	for (STRIDE_X(x,depth_in.width()-1)) {
+
+		// Use entire block to find depth edges and make a list of them
+		float d0 = depth_in.tex2D(x,y);
+		float d1 = depth_in.tex2D(x+1,y);
+
+		if (isValid(camera, d0) != isValid(camera, d1)) {
+			int ix = atomicAdd(&counter, 1);
+			if (ix >= MAX_EDGES) break;
+			offsets[ix] = x;
+		}
+	}
+
+	__syncthreads();
+
+	// For each identified pixel, a thread takes on a filling job
+	for (int i=threadIdx.x; i<counter; i += blockDim.x) {
+		// Get smoothing gradient to decide action
+		int x = offsets[i];
+
+		float s0 = smoothing.tex2D(x,y);
+		float d0 = depth_in.tex2D(x,y);
+		float d1 = depth_in.tex2D(x+1,y);
+
+		// More than 8 pixels from edge
+		if (s0 < thresh) continue;
+		//if (s0 < 2.0f / 100.0f) continue;
+
+		/*bool increase = false;
+		for (int u=1; u<20; ++u) {
+			float s1 = smoothing.tex2D(x+u,y);
+			if (s1 != s0) {
+				increase = s1 > s0;
+				break;
+			}
+		}*/
+
+		// TODO: Smoothing channel has discrete steps meaning gradient can't be determined? CHECK
+		// The above would be why some slices are not filled
+
+		//bool fill_right = (isValid(camera, d0) && !increase);
+		//bool clear_right = (isValid(camera, d1) && !increase);
+		//bool fill_left = (isValid(camera, d1) && increase);
+		//bool clear_left = (isValid(camera, d0) && increase);
+		//bool clear = clear_left || clear_right;
+
+		bool fill_right = isValid(camera, d0);
+
+		//if (s1 == s0 || max(s1,s0) > 0.1f) continue;
+
+		// Set up fill value and direction
+		//float value = ((clear_left || clear_right) && s0 < thresh) ? 1000.0f : (isValid(camera, d0) ? d0 : d1);
+		//int dir = (fill_right || clear_right) ? 1 : -1;
+
+		float value = (isValid(camera, d0)) ? d0 : d1;
+		int dir = (fill_right) ? 1 : -1;
+
+		// TODO: The minimum needs to be search for and not crossed into, the filling
+		// must stop before the minimum. Requires lookahead.
+
+		/*int end_x = 0;
+		for (int u=1; u<1000; ++u) {
+			float s = smoothing(x+u*dir, y);
+			if (s < thresh) break;
+			end_x = u;
+		}
+
+		float gradient = */
+
+		// Fill
+		int end_x = 0;
+		float end_d;
+		for (int u=1; u<1000; ++u) {
+			end_d = depth_in(x+u*dir, y);
+			float s = smoothing(x+u*dir, y);
+			if ((s < thresh) || (isValid(camera, end_d))) break;
+			end_x = u;
+		}
+
+		float gradient = (end_d - value) / (float)end_x;
+
+		for (int u=1; u<end_x; ++u) {
+			depth_out(x+u*dir,y) = value;
+			value += gradient;
+		}
+	}
+}
+
+void ftl::cuda::scan_field_fill(
+		TextureObject<float> &depth_in,        // Virtual depth map
+		TextureObject<float> &depth_out,   // Accumulated output
+		TextureObject<float> &smoothing,
+		float thresh,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream) {
+
+	const dim3 gridSize(1, smoothing.height());
+	const dim3 blockSize(128, 1);
+
+	scan_field_fill_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, smoothing, thresh, camera);
+	cudaSafeCall( cudaGetLastError() );
+}
+
+
+//===== Cross Support Region Filling ===========================================
+
+__global__ void filling_csr_kernel(
+		TextureObject<uchar4> region,
+		TextureObject<float4> normals_in,
+		TextureObject<float> depth_in,        // Virtual depth map
+		TextureObject<float> depth_out,   // Accumulated output
+		TextureObject<uchar4> colour_in,
+		ftl::rgbd::Camera camera) {
+	
+	const int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return;
+
+	float3 aX = make_float3(0.0f,0.0f,0.0f);
+	float3 nX = make_float3(0.0f,0.0f,0.0f);
+	float contrib = 0.0f;
+
+	float d0 = depth_in.tex2D(x, y);
+	//depth_out(x,y) = d0;
+	//normals_out(x,y) = normals_in(x,y);
+	if (d0 < camera.minDepth || d0 > camera.maxDepth) return;
+	float3 X = camera.screenToCam((int)(x),(int)(y),d0);
+
+	uchar4 c0 = colour_in.tex2D(x, y);
+
+	// Neighbourhood
+	uchar4 base = region.tex2D(x,y);
+
+	for (int v=-base.z; v<=base.w; ++v) {
+	uchar4 baseY = region.tex2D(x,y+v);
+
+	for (int u=-baseY.x; u<=baseY.y; ++u) {
+		const float d = depth_in.tex2D(x+u, y+v);
+		if (d < camera.minDepth || d > camera.maxDepth) continue;
+
+		// Point and normal of neighbour
+		const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y)+v,d);
+		const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y)+v));
+
+		
+	}
+	}
+
+	
+}
+
+void ftl::cuda::filling_csr(
+		ftl::cuda::TextureObject<uchar4> &region,
+		ftl::cuda::TextureObject<float4> &normals_in,
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		ftl::cuda::TextureObject<uchar4> &colour_in,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream) {
+
+	const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	filling_csr_kernel<<<gridSize, blockSize, 0, stream>>>(region, normals_in, depth_in, depth_out, colour_in, camera);
+	cudaSafeCall( cudaGetLastError() );
+
+
+	#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	#endif
+}
diff --git a/components/operators/src/filling_cuda.hpp b/components/operators/src/filling_cuda.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..e47781ac7fdd3656809de496dad21d1bb3a48862
--- /dev/null
+++ b/components/operators/src/filling_cuda.hpp
@@ -0,0 +1,30 @@
+#ifndef _FTL_CUDA_FILLING_HPP_
+#define _FTL_CUDA_FILLING_HPP_
+
+#include <ftl/rgbd/camera.hpp>
+#include <ftl/cuda_common.hpp>
+
+namespace ftl {
+namespace cuda {
+
+void scan_field_fill(
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		ftl::cuda::TextureObject<float> &smoothing,
+		float thresh,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream);
+
+void filling_csr(
+		ftl::cuda::TextureObject<uchar4> &region,
+		ftl::cuda::TextureObject<float4> &normals_in,
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		ftl::cuda::TextureObject<uchar4> &colour_in,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream);
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/components/operators/src/mask.cpp b/components/operators/src/mask.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f923f11d06a39df882eaf83289b37296aba0ada5
--- /dev/null
+++ b/components/operators/src/mask.cpp
@@ -0,0 +1,49 @@
+#include <ftl/operators/mask.hpp>
+#include "mask_cuda.hpp"
+
+using ftl::operators::DiscontinuityMask;
+using ftl::operators::CullDiscontinuity;
+using ftl::codecs::Channel;
+using ftl::rgbd::Format;
+
+DiscontinuityMask::DiscontinuityMask(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+DiscontinuityMask::~DiscontinuityMask() {
+
+}
+
+bool DiscontinuityMask::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+	int radius = config()->value("radius", 2);
+	float threshold = config()->value("threshold", 0.1f);
+
+	ftl::cuda::discontinuity(
+		out.createTexture<int>(Channel::Mask, ftl::rgbd::Format<int>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+		in.createTexture<uchar4>(Channel::Support1),
+		in.createTexture<float>(Channel::Depth),
+		s->parameters(), radius, threshold, stream
+	);
+
+	return true;
+}
+
+
+
+CullDiscontinuity::CullDiscontinuity(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+CullDiscontinuity::~CullDiscontinuity() {
+
+}
+
+bool CullDiscontinuity::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+	ftl::cuda::cull_discontinuity(
+		in.createTexture<int>(Channel::Mask),
+		out.createTexture<float>(Channel::Depth),
+		stream
+	);
+
+	return true;
+}
\ No newline at end of file
diff --git a/components/operators/src/mask.cu b/components/operators/src/mask.cu
new file mode 100644
index 0000000000000000000000000000000000000000..e385f41b14459802dbf52ef85aef2f891eceff08
--- /dev/null
+++ b/components/operators/src/mask.cu
@@ -0,0 +1,80 @@
+#include "mask_cuda.hpp"
+
+#define T_PER_BLOCK 8
+
+using ftl::cuda::Mask;
+
+__global__ void discontinuity_kernel(ftl::cuda::TextureObject<int> mask_out, ftl::cuda::TextureObject<uchar4> support, ftl::cuda::TextureObject<float> depth, ftl::rgbd::Camera params, float threshold, int radius) {
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < params.width && y < params.height) {
+		Mask mask(0);
+
+		const float d = depth.tex2D((int)x, (int)y);
+
+		if (d >= params.minDepth && d <= params.maxDepth) {
+			/* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. */
+
+			// If colour cross support region terminates within the requested
+			// radius, and the absolute depth difference on the other side is
+			// greater than threshold, then is is a discontinuity.
+			// Repeat for left, right, up and down.
+			const uchar4 sup = support.tex2D((int)x, (int)y);
+			if (sup.x <= radius) {
+				float dS = depth.tex2D((int)x - sup.x - radius, (int)y);
+				if (fabs(dS - d) > threshold) mask.isDiscontinuity(true);
+			}
+			if (sup.y <= radius) {
+				float dS = depth.tex2D((int)x + sup.y + radius, (int)y);
+				if (fabs(dS - d) > threshold) mask.isDiscontinuity(true);
+			}
+			if (sup.z <= radius) {
+				float dS = depth.tex2D((int)x, (int)y - sup.z - radius);
+				if (fabs(dS - d) > threshold) mask.isDiscontinuity(true);
+			}
+			if (sup.w <= radius) {
+				float dS = depth.tex2D((int)x, (int)y + sup.w + radius);
+				if (fabs(dS - d) > threshold) mask.isDiscontinuity(true);
+			}
+        }
+        
+        mask_out(x,y) = (int)mask;
+	}
+}
+
+void ftl::cuda::discontinuity(ftl::cuda::TextureObject<int> &mask_out, ftl::cuda::TextureObject<uchar4> &support, ftl::cuda::TextureObject<float> &depth, const ftl::rgbd::Camera &params, int discon, float thresh, cudaStream_t stream) {
+	const dim3 gridSize((params.width + T_PER_BLOCK - 1)/T_PER_BLOCK, (params.height + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    discontinuity_kernel<<<gridSize, blockSize, 0, stream>>>(mask_out, support, depth, params, thresh, discon);
+	cudaSafeCall( cudaGetLastError() );
+
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
+}
+
+
+
+__global__ void cull_discontinuity_kernel(ftl::cuda::TextureObject<int> mask, ftl::cuda::TextureObject<float> depth) {
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < depth.width() && y < depth.height()) {
+		Mask m(mask.tex2D((int)x,(int)y));
+		if (m.isDiscontinuity()) depth(x,y) = 1000.0f;
+	}
+}
+
+void ftl::cuda::cull_discontinuity(ftl::cuda::TextureObject<int> &mask, ftl::cuda::TextureObject<float> &depth, cudaStream_t stream) {
+	const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    cull_discontinuity_kernel<<<gridSize, blockSize, 0, stream>>>(mask, depth);
+	cudaSafeCall( cudaGetLastError() );
+
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
+}
diff --git a/components/operators/src/mask_cuda.hpp b/components/operators/src/mask_cuda.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6a02aafdbbdfbbbd200355bddc3c7ba33a605483
--- /dev/null
+++ b/components/operators/src/mask_cuda.hpp
@@ -0,0 +1,59 @@
+#ifndef _FTL_CUDA_MASK_HPP_
+#define _FTL_CUDA_MASK_HPP_
+
+#include <ftl/cuda_common.hpp>
+#include <ftl/rgbd/camera.hpp>
+
+namespace ftl {
+namespace cuda {
+
+/**
+ * Wrap an int mask value used to flag individual depth pixels.
+ */
+class Mask {
+	public:
+	__device__ inline Mask() : v_(0) {}
+	__device__ explicit inline Mask(int v) : v_(v) {}
+	#ifdef __CUDACC__
+	__device__ inline Mask(const ftl::cuda::TextureObject<int> &m, int x, int y) : v_(m.tex2D(x,y)) {}
+	#endif
+	__device__ inline operator int() const { return v_; }
+
+    __device__ inline bool is(int m) const { return v_ & m; }
+
+	__device__ inline bool isFilled() const { return v_ & kMask_Filled; }
+	__device__ inline bool isDiscontinuity() const { return v_ & kMask_Discontinuity; }
+	__device__ inline bool hasCorrespondence() const { return v_ & kMask_Correspondence; }
+	__device__ inline bool isBad() const { return v_ & kMask_Bad; }
+
+	__device__ inline void isFilled(bool v) { v_ = (v) ? v_ | kMask_Filled : v_ & (~kMask_Filled); }
+	__device__ inline void isDiscontinuity(bool v) { v_ = (v) ? v_ | kMask_Discontinuity : v_ & (~kMask_Discontinuity); }
+	__device__ inline void hasCorrespondence(bool v) { v_ = (v) ? v_ | kMask_Correspondence : v_ & (~kMask_Correspondence); }
+	__device__ inline void isBad(bool v) { v_ = (v) ? v_ | kMask_Bad : v_ & (~kMask_Bad); }
+
+    static constexpr int kMask_Filled = 0x0001;
+	static constexpr int kMask_Discontinuity = 0x0002;
+	static constexpr int kMask_Correspondence = 0x0004;
+	static constexpr int kMask_Bad = 0x0008;
+
+	private:
+	int v_;
+};
+
+void discontinuity(
+		ftl::cuda::TextureObject<int> &mask,
+		ftl::cuda::TextureObject<uchar4> &support,
+		ftl::cuda::TextureObject<float> &depth,
+		const ftl::rgbd::Camera &params,
+		int radius, float threshold,
+		cudaStream_t stream);
+
+void cull_discontinuity(
+		ftl::cuda::TextureObject<int> &mask,
+		ftl::cuda::TextureObject<float> &depth,
+		cudaStream_t stream);
+
+}
+}
+
+#endif
diff --git a/components/operators/src/mls.cu b/components/operators/src/mls.cu
index e030d1967695bd71760c0bc2af26706d21f0f2e3..e07178cdf9606444c307f59a2adbb8818b049ed4 100644
--- a/components/operators/src/mls.cu
+++ b/components/operators/src/mls.cu
@@ -5,6 +5,7 @@
 using ftl::cuda::TextureObject;
 
 #define T_PER_BLOCK 8
+#define WARP_SIZE 32
 
 // ===== MLS Smooth ============================================================
 
@@ -131,10 +132,11 @@ void ftl::cuda::mls_smooth(
 
 	float d0 = depth_in.tex2D(x, y);
 	depth_out(x,y) = d0;
+	normals_out(x,y) = normals_in(x,y);
 	if (d0 < camera.minDepth || d0 > camera.maxDepth) return;
 	float3 X = camera.screenToCam((int)(x),(int)(y),d0);
 
-	uchar4 c0 = colour_in.tex2D(x, y);
+	float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f);
 
     // Neighbourhood
     for (int v=-SEARCH_RADIUS; v<=SEARCH_RADIUS; ++v) {
@@ -146,7 +148,9 @@ void ftl::cuda::mls_smooth(
 		const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y)+v,d);
 		const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y)+v));
 
-		const uchar4 c = colour_in.tex2D(x+u, y+v);
+		if (Ni.x+Ni.y+Ni.z == 0.0f) continue;
+
+		const float4 c = colour_in.tex2D(float(x+u) + 0.5f, float(y+v) + 0.5f);
 		const float cw = ftl::cuda::colourWeighting(c0,c,colour_smoothing);
 
 		// Gauss approx weighting function using point distance
@@ -157,6 +161,8 @@ void ftl::cuda::mls_smooth(
 		contrib += w;
     }
 	}
+
+	if (contrib == 0.0f) return;
 	
 	nX /= contrib;  // Weighted average normal
 	aX /= contrib;  // Weighted average point (centroid)
@@ -207,6 +213,436 @@ void ftl::cuda::colour_mls_smooth(
 }
 
 
+// ===== Colour MLS using cross support region =================================
+
+__device__ inline int segmentID(int u, int v) {
+	if (u < 0 && v < 0) return 0x001;
+	if (u > 0 && v < 0) return 0x002;
+	if (u > 0 && v > 0) return 0x004;
+	if (u < 0 && v > 0) return 0x008;
+	return 0;
+}
+
+/*
+ * Smooth depth map using Moving Least Squares. This version uses colour
+ * similarity weights to adjust the spatial smoothing factor. It is naive in
+ * that similar colours may exist on both sides of an edge and colours at the
+ * level of single pixels can be subject to noise. Colour noise should first
+ * be removed from the image.
+ */
+ template <bool FILLING, int RADIUS>
+ __global__ void colour_mls_smooth_csr_kernel(
+	 	TextureObject<uchar4> region,
+		TextureObject<float4> normals_in,
+		TextureObject<float4> normals_out,
+        TextureObject<float> depth_in,        // Virtual depth map
+		TextureObject<float> depth_out,   // Accumulated output
+		TextureObject<uchar4> colour_in,
+		float smoothing,
+		float colour_smoothing,
+        ftl::rgbd::Camera camera) {
+        
+    const int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return;
+
+	float3 aX = make_float3(0.0f,0.0f,0.0f);
+	float3 nX = make_float3(0.0f,0.0f,0.0f);
+    float contrib = 0.0f;
+
+	float d0 = depth_in.tex2D(x, y);
+	depth_out(x,y) = d0;
+	normals_out(x,y) = normals_in(x,y);
+	if (d0 < camera.minDepth || d0 > camera.maxDepth) {
+		if(FILLING) d0 = 0.0f;
+		else return;
+	}
+	float3 X = camera.screenToCam((int)(x),(int)(y),d0);
+
+	float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f);
+
+    // Neighbourhood
+	uchar4 base = region.tex2D(x,y);
+	int segment_check = 0;
+
+	// TODO: Does using a fixed loop size with range test work better?
+	// Or with warp per pixel version, this would be less of a problem...
+	// TODO: Do a separate vote fill step?
+	for (int v=-RADIUS; v<=RADIUS; ++v) {
+		uchar4 baseY = region.tex2D(x,y+v);
+
+		#pragma unroll
+		for (int u=-RADIUS; u<=RADIUS; ++u) {
+			const float d = depth_in.tex2D(x+u, y+v);
+			//if (d > camera.minDepth && d < camera.maxDepth) {
+
+				float w = (d <= camera.minDepth || d >= camera.maxDepth || u < -baseY.x || u > baseY.y || v < -base.z || v > base.z) ? 0.0f : 1.0f;
+
+				// Point and normal of neighbour
+				const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y)+v,d);
+				const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y)+v));
+
+				// FIXME: Ensure bad normals are removed by setting depth invalid
+				//if (Ni.x+Ni.y+Ni.z == 0.0f) continue;
+
+				const float4 c = colour_in.tex2D(float(x+u) + 0.5f, float(y+v) + 0.5f);
+				w *= ftl::cuda::colourWeighting(c0,c,colour_smoothing);
+
+				// Allow missing point to borrow z value
+				// TODO: This is a bad choice of Z! Perhaps try histogram vote approach
+				//if (FILLING && d0 == 0.0f) X = camera.screenToCam((int)(x),(int)(y),Xi.z);
+
+				// Gauss approx weighting function using point distance
+				w = ftl::cuda::spatialWeighting(X,Xi,smoothing*w);
+
+				aX += Xi*w;
+				nX += Ni*w;
+				contrib += w;
+				//if (FILLING && w > 0.0f && v > -base.z+1 && v < base.w-1 && u > -baseY.x+1 && u < baseY.y-1) segment_check |= segmentID(u,v);
+			//}
+		}
+	}
+
+	if (contrib > 0.0f) {
+		nX /= contrib;  // Weighted average normal
+		aX /= contrib;  // Weighted average point (centroid)
+
+		if (FILLING && d0 == 0.0f) {
+			if (__popc(segment_check) < 3) return;
+			X = camera.screenToCam((int)(x),(int)(y),aX.z);
+		}
+
+		// Signed-Distance Field function
+		float fX = nX.x * (X.x - aX.x) + nX.y * (X.y - aX.y) + nX.z * (X.z - aX.z);
+
+		// Calculate new point using SDF function to adjust depth (and position)
+		X = X - nX * fX;
+		
+		//uint2 screen = camera.camToScreen<uint2>(X);
+
+		//if (screen.x < depth_out.width() && screen.y < depth_out.height()) {
+		//    depth_out(screen.x,screen.y) = X.z;
+		//}
+		depth_out(x,y) = X.z;
+		normals_out(x,y) = make_float4(nX / length(nX), 0.0f);
+	}
+}
+
+void ftl::cuda::colour_mls_smooth_csr(
+		ftl::cuda::TextureObject<uchar4> &region,
+		ftl::cuda::TextureObject<float4> &normals_in,
+		ftl::cuda::TextureObject<float4> &normals_out,
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		ftl::cuda::TextureObject<uchar4> &colour_in,
+		float smoothing,
+		float colour_smoothing,
+		bool filling,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream) {
+
+	const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	if (filling) {
+		colour_mls_smooth_csr_kernel<true,5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera);
+	} else {
+		colour_mls_smooth_csr_kernel<false,5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera);
+	}
+		
+	cudaSafeCall( cudaGetLastError() );
+
+
+	#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	#endif
+}
+
+
+// ===== Cross Aggregate MLS ===================================================
+
+/*
+ * Smooth depth map using Moving Least Squares. This version uses colour
+ * similarity weights to adjust the spatial smoothing factor. It also uses
+ * cross support windows to prevent unwanted edge crossing. This function does
+ * the weighted aggregation of centroids and normals in the horizontal
+ * direction.
+ */
+ template <int RADIUS>
+ __global__ void mls_aggr_horiz_kernel(
+	 	TextureObject<uchar4> region,
+		TextureObject<float4> normals_in,
+		TextureObject<float4> normals_out,
+        TextureObject<float> depth_in,        // Virtual depth map
+		TextureObject<float4> centroid_out,   // Accumulated output
+		TextureObject<uchar4> colour_in,
+		float smoothing,
+		float colour_smoothing,
+        ftl::rgbd::Camera camera) {
+        
+    const int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return;
+
+	float3 aX = make_float3(0.0f,0.0f,0.0f);
+	float3 nX = make_float3(0.0f,0.0f,0.0f);
+    float contrib = 0.0f;
+
+	float d0 = depth_in.tex2D(x, y);
+
+	// Note: x and y flipped as output is rotated.
+	centroid_out(y,x) = make_float4(0.0f);
+	normals_out(y,x) = normals_in(x,y);
+
+	if (d0 <= camera.minDepth || d0 >= camera.maxDepth) return;
+	
+	float3 X = camera.screenToCam((int)(x),(int)(y),d0);
+	float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f);
+
+    // Cross-Support Neighbourhood
+	uchar4 base = region.tex2D(x,y);
+
+	#pragma unroll
+	for (int u=-RADIUS; u<=RADIUS; ++u) {
+		const float d = depth_in.tex2D(x+u, y);
+
+		// If outside of cross support range, set weight to 0 to ignore
+		float w = (d <= camera.minDepth || d >= camera.maxDepth || u < -base.x || u > base.y) ? 0.0f : 1.0f;
+
+		// Point and normal of neighbour
+		const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y),d);
+		const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y)));
+
+		// Bad or missing normals should be ignored
+		if (Ni.x+Ni.y+Ni.z == 0.0f) w = 0.0f;
+
+		// Gauss approx colour weighting.
+		const float4 c = colour_in.tex2D(float(x+u) + 0.5f, float(y) + 0.5f);
+		w *= ftl::cuda::colourWeighting(c0,c,colour_smoothing);
+
+		// Gauss approx weighting function using point distance
+		w = ftl::cuda::spatialWeighting(X,Xi,smoothing*w);
+
+		aX += Xi*w;
+		nX += Ni*w;
+		contrib += w;
+	}
+
+	if (contrib > 0.0f) {
+		nX /= contrib;  // Weighted average normal
+		aX /= contrib;  // Weighted average point (centroid)
+
+		// Note: x and y flipped since output is rotated 90 degrees.
+		centroid_out(y,x) = make_float4(aX, 0.0f);
+		normals_out(y,x) = make_float4(nX / length(nX), 0.0f);
+	}
+}
+
+/**
+ * This function does a vertical weighted aggregation of the centroids and
+ * normals generated by the horizontal aggregation.
+ */
+template <int RADIUS>
+ __global__ void mls_aggr_vert_kernel(
+	 	TextureObject<uchar4> region,
+		TextureObject<float4> normals_in,
+		TextureObject<float4> normals_out,
+        TextureObject<float4> centroid_in,        // Virtual depth map
+		TextureObject<float4> centroid_out,   // Accumulated output
+		TextureObject<uchar4> colour_in,
+		TextureObject<float> depth_in,
+		float smoothing,
+		float colour_smoothing,
+        ftl::rgbd::Camera camera) {
+        
+    const int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return;
+
+	float3 aX = make_float3(0.0f,0.0f,0.0f);
+	float3 nX = make_float3(0.0f,0.0f,0.0f);
+	float contrib = 0.0f;
+	
+	float d0 = depth_in.tex2D(x, y);
+	if (d0 <= camera.minDepth || d0 >= camera.maxDepth) return;
+	
+	float3 X = camera.screenToCam((int)(x),(int)(y),d0);
+
+	centroid_out(x,y) = make_float4(0.0f);
+	normals_out(x,y) = make_float4(0.0f);
+	
+	float4 c0 = colour_in.tex2D((float)x+0.5f, (float)y+0.5f);
+
+    // Cross-Support Neighbourhood
+	uchar4 base = region.tex2D(x,y);
+
+	#pragma unroll
+	for (int v=-RADIUS; v<=RADIUS; ++v) {
+		const float d = depth_in.tex2D(x, y+v);
+		const float3 Xi = camera.screenToCam(x,y+v,d);
+
+		// Note: x and y flipped, input image is rotated.
+		float3 Ai = make_float3(centroid_in.tex2D(y+v, x));
+
+		// If outside the cross support range, set weight to 0 to ignore
+		float w = (Ai.z <= camera.minDepth || Ai.z >= camera.maxDepth || v < -base.z || v > base.w) ? 0.0f : 1.0f;
+
+		// Note: x and y flipped, input image is rotated.
+		const float3 Ni = make_float3(normals_in.tex2D(y+v, x));
+
+		// Bad normals should be ignored.
+		if (Ni.x+Ni.y+Ni.z == 0.0f) w = 0.0f;
+
+		// Gauss approx colour weighting.
+		const float4 c = colour_in.tex2D(float(x) + 0.5f, float(y+v) + 0.5f);
+		w *= ftl::cuda::colourWeighting(c0,c,colour_smoothing);
+
+		// Gauss approx weighting function using point distance
+		w = ftl::cuda::spatialWeighting(X,Xi,smoothing*w);
+
+		aX += Ai*w;
+		nX += Ni*w;
+		contrib += w;
+	}
+
+	// Normalise the summed points and normals
+	if (contrib > 0.0f) {
+		nX /= contrib;  // Weighted average normal
+		aX /= contrib;  // Weighted average point (centroid)
+		centroid_out(x,y) = make_float4(aX, 0.0f);
+		normals_out(x,y) = make_float4(nX / length(nX), 0.0f);
+	}
+}
+
+/**
+ * Using the aggregated centroids and normals, calculate the signed-distance-
+ * field and move the depth value accordingly using the calculated normal.
+ */
+__global__ void mls_adjust_depth_kernel(
+		TextureObject<float4> normals_in,
+		TextureObject<float4> centroid_in,        // Virtual depth map
+		TextureObject<float> depth_in,
+		TextureObject<float> depth_out,
+		ftl::rgbd::Camera camera) {
+	
+	const int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < 0 || y < 0 || x >= depth_out.width() || y >= depth_out.height()) return;
+
+	float3 aX = make_float3(centroid_in(x,y));
+	float3 nX = make_float3(normals_in(x,y));
+
+	float d0 = depth_in.tex2D(x, y);
+	depth_out(x,y) = d0;
+
+	if (d0 > camera.minDepth && d0 < camera.maxDepth && aX.z > camera.minDepth && aX.z < camera.maxDepth) {
+		float3 X = camera.screenToCam((int)(x),(int)(y),d0);
+
+		// Signed-Distance Field function
+		float fX = nX.x * (X.x - aX.x) + nX.y * (X.y - aX.y) + nX.z * (X.z - aX.z);
+
+		// Calculate new point using SDF function to adjust depth (and position)
+		X = X - nX * fX;
+		
+		depth_out(x,y) = X.z;
+	}
+}
+
+
+void ftl::cuda::mls_aggr_horiz(
+		ftl::cuda::TextureObject<uchar4> &region,
+		ftl::cuda::TextureObject<float4> &normals_in,
+		ftl::cuda::TextureObject<float4> &normals_out,
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float4> &centroid_out,
+		ftl::cuda::TextureObject<uchar4> &colour_in,
+		float smoothing,
+		float colour_smoothing,
+		int radius,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream) {
+
+	const dim3 gridSize((normals_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (normals_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	switch(radius) {
+	case 1: mls_aggr_horiz_kernel<1><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break;
+	case 2: mls_aggr_horiz_kernel<2><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break;
+	case 3: mls_aggr_horiz_kernel<3><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break;
+	case 5: mls_aggr_horiz_kernel<5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break;
+	case 10: mls_aggr_horiz_kernel<10><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break;
+	case 15: mls_aggr_horiz_kernel<15><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break;
+	case 20: mls_aggr_horiz_kernel<20><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, depth_in, centroid_out, colour_in, smoothing, colour_smoothing, camera); break;
+	default: return;
+	}
+	cudaSafeCall( cudaGetLastError() );
+
+
+	#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	#endif
+}
+
+void ftl::cuda::mls_aggr_vert(
+		ftl::cuda::TextureObject<uchar4> &region,
+		ftl::cuda::TextureObject<float4> &normals_in,
+		ftl::cuda::TextureObject<float4> &normals_out,
+		ftl::cuda::TextureObject<float4> &centroid_in,
+		ftl::cuda::TextureObject<float4> &centroid_out,
+		ftl::cuda::TextureObject<uchar4> &colour_in,
+		ftl::cuda::TextureObject<float> &depth_in,
+		float smoothing,
+		float colour_smoothing,
+		int radius,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream) {
+
+	const dim3 gridSize((normals_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (normals_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	switch(radius) {
+	case 1: mls_aggr_vert_kernel<1><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break;
+	case 2: mls_aggr_vert_kernel<2><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break;
+	case 3: mls_aggr_vert_kernel<3><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break;
+	case 5: mls_aggr_vert_kernel<5><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break;
+	case 10: mls_aggr_vert_kernel<10><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break;
+	case 15: mls_aggr_vert_kernel<15><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break;
+	case 20: mls_aggr_vert_kernel<20><<<gridSize, blockSize, 0, stream>>>(region, normals_in, normals_out, centroid_in, centroid_out, colour_in, depth_in, smoothing, colour_smoothing, camera); break;
+	default: return;
+	}
+	cudaSafeCall( cudaGetLastError() );
+
+
+	#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	#endif
+}
+
+void ftl::cuda::mls_adjust_depth(
+		ftl::cuda::TextureObject<float4> &normals_in,
+		ftl::cuda::TextureObject<float4> &centroid_in,
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream) {
+
+	const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	mls_adjust_depth_kernel<<<gridSize, blockSize, 0, stream>>>(normals_in, centroid_in, depth_in, depth_out, camera);
+	cudaSafeCall( cudaGetLastError() );
+
+
+	#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	#endif
+}
+
+
 // ===== Adaptive MLS Smooth =====================================================
 
 /*
diff --git a/components/operators/src/mvmls.cpp b/components/operators/src/mvmls.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4c02a7607f1d6a506c1f60cbd1f25eabd2cb5732
--- /dev/null
+++ b/components/operators/src/mvmls.cpp
@@ -0,0 +1,340 @@
+#include <ftl/operators/mvmls.hpp>
+#include "smoothing_cuda.hpp"
+#include <ftl/utility/matrix_conversion.hpp>
+#include "mvmls_cuda.hpp"
+
+using ftl::operators::MultiViewMLS;
+using ftl::codecs::Channel;
+using cv::cuda::GpuMat;
+using ftl::rgbd::Format;
+
+MultiViewMLS::MultiViewMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+MultiViewMLS::~MultiViewMLS() {
+
+}
+
+bool MultiViewMLS::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) {
+    cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+
+    float thresh = config()->value("mls_threshold", 0.4f);
+	float col_smooth = config()->value("mls_colour_smoothing", 30.0f);
+	int iters = config()->value("mls_iterations", 3);
+	int radius = config()->value("mls_radius",5);
+	//bool aggre = config()->value("aggregation", true);
+    int win = config()->value("cost_function",1);
+    bool do_corr = config()->value("merge_corresponding", true);
+	bool do_aggr = config()->value("merge_mls", false);
+	bool cull_zero = config()->value("cull_no_confidence", false);
+    //bool show_best_source = config()->value("show_pixel_source", false);
+
+    ftl::cuda::MvMLSParams params;
+    params.range = config()->value("search_range", 0.05f);
+    params.fill_match = config()->value("fill_match", 50.0f);
+    params.fill_threshold = config()->value("fill_threshold", 0.0f);
+	params.match_threshold = config()->value("match_threshold", 0.3f);
+    params.colour_smooth = config()->value("colour_smooth", 150.0f);
+    params.spatial_smooth = config()->value("spatial_smooth", 0.04f);
+    params.cost_ratio = config()->value("cost_ratio", 0.2f);
+	params.cost_threshold = config()->value("cost_threshold", 1.0f);
+
+    // Make sure we have enough buffers
+    if (normals_horiz_.size() < in.frames.size()) {
+        normals_horiz_.resize(in.frames.size());
+        centroid_horiz_.resize(in.frames.size());
+        centroid_vert_.resize(in.frames.size());
+        contributions_.resize(in.frames.size());
+    }
+
+    // Make sure all buffers are at correct resolution and are allocated
+    for (size_t i=0; i<in.frames.size(); ++i) {
+        auto &f = in.frames[i];
+	    auto size = f.get<GpuMat>(Channel::Depth).size();
+	    centroid_horiz_[i].create(size.height, size.width);
+	    normals_horiz_[i].create(size.height, size.width);
+	    centroid_vert_[i].create(size.width, size.height);
+        contributions_[i].create(size.width, size.height);
+
+        if (!f.hasChannel(Channel::Normals)) {
+            LOG(ERROR) << "Required normals channel missing for MLS";
+            return false;
+        }
+        if (!f.hasChannel(Channel::Support1)) {
+            LOG(ERROR) << "Required cross support channel missing for MLS";
+            return false;
+        }
+
+        // Create required channels
+        f.create<GpuMat>(Channel::Confidence, Format<float>(size));
+        f.createTexture<float>(Channel::Confidence);
+        f.create<GpuMat>(Channel::Screen, Format<short2>(size));
+        f.createTexture<short2>(Channel::Screen);
+    }
+
+    for (int iter=0; iter<iters; ++iter) {
+		// Step 1:
+		// Calculate correspondences and adjust depth values
+		// Step 2:
+        // Find corresponding points and perform aggregation of any correspondences
+        // For each camera combination
+        if (do_corr) {
+            for (size_t i=0; i<in.frames.size(); ++i) {
+                auto &f1 = in.frames[i];
+                //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream);
+                f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream);
+
+                Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0);
+                d1 = in.sources[i]->getPose() * d1;
+
+                for (size_t j=0; j<in.frames.size(); ++j) {
+                    if (i == j) continue;
+
+                    //LOG(INFO) << "Running phase1";
+
+                    auto &f2 = in.frames[j];
+                    auto s1 = in.sources[i];
+                    auto s2 = in.sources[j];
+
+                    // Are cameras facing similar enough direction?
+                    Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0);
+                    d2 = in.sources[j]->getPose() * d2;
+                    // No, so skip this combination
+                    if (d1.dot(d2) <= 0.0) continue;
+
+                    auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>());
+                    auto pose1_inv = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse());
+                    auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse());
+					auto pose2_inv = MatrixConversion::toCUDA(s2->getPose().cast<float>());
+
+                    auto transform = pose2 * pose1;
+
+                    //Calculate screen positions of estimated corresponding points
+                    ftl::cuda::correspondence(
+                        f1.getTexture<float>(Channel::Depth),
+                        f2.getTexture<float>(Channel::Depth),
+                        f1.getTexture<uchar4>(Channel::Colour),
+                        f2.getTexture<uchar4>(Channel::Colour),
+                        // TODO: Add normals and other things...
+                        f1.getTexture<short2>(Channel::Screen),
+                        f1.getTexture<float>(Channel::Confidence),
+                        f1.getTexture<int>(Channel::Mask),
+                        pose1,
+                        pose1_inv,
+                        pose2,
+                        s1->parameters(),
+                        s2->parameters(),
+                        params,
+                        win,
+                        stream
+                    );
+
+                    // Also calculate best source for each point
+                    /*ftl::cuda::best_sources(
+                        f1.getTexture<uchar4>(Channel::Support1),
+                        f2.getTexture<uchar4>(Channel::Support1),
+                        f1.getTexture<float>(Channel::Depth),
+                        f2.getTexture<float>(Channel::Depth),
+                        f1.getTexture<short2>(Channel::Screen),
+                        transform,
+                        s1->parameters(),
+                        s2->parameters(),
+                        i, j, stream
+                    );*/
+				}
+			}
+		}
+
+        // Find best source for every pixel
+        /*for (size_t i=0; i<in.frames.size(); ++i) {
+            auto &f1 = in.frames[i];
+            //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream);
+            //f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream);
+
+            Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0);
+            d1 = in.sources[i]->getPose() * d1;
+
+            for (size_t j=0; j<in.frames.size(); ++j) {
+                if (i == j) continue;
+
+                //LOG(INFO) << "Running phase1";
+
+                auto &f2 = in.frames[j];
+                auto s1 = in.sources[i];
+                auto s2 = in.sources[j];
+
+                // Are cameras facing similar enough direction?
+                Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0);
+                d2 = in.sources[j]->getPose() * d2;
+                // No, so skip this combination
+                if (d1.dot(d2) <= 0.0) continue;
+
+                auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>());
+                auto pose1_inv = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse());
+                auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse());
+                auto pose2_inv = MatrixConversion::toCUDA(s2->getPose().cast<float>());
+
+                auto transform = pose2 * pose1;
+
+                // Also calculate best source for each point
+                ftl::cuda::best_sources(
+                    f1.getTexture<float4>(Channel::Normals),
+                    f2.getTexture<float4>(Channel::Normals),
+                    f1.getTexture<uchar4>(Channel::Support1),
+                    f2.getTexture<uchar4>(Channel::Support1),
+                    f1.getTexture<float>(Channel::Depth),
+                    f2.getTexture<float>(Channel::Depth),
+                    f1.getTexture<short2>(Channel::Screen),
+                    transform,
+                    s1->parameters(),
+                    s2->parameters(),
+                    i, j, stream
+                );
+            }
+        }*/
+
+        // Step 2:
+        // Do the horizontal and vertical MLS aggregations for each source
+        // But don't do the final move step.
+        for (size_t i=0; i<in.frames.size(); ++i) {
+            auto &f = in.frames[i];
+            auto *s = in.sources[i];
+
+            // Clear data
+            cv::cuda::GpuMat data(contributions_[i].height(), contributions_[i].width(), CV_32F, contributions_[i].pixelPitch());
+            data.setTo(cv::Scalar(0.0f), cvstream);
+
+			if (cull_zero && iter == iters-1) {
+				ftl::cuda::zero_confidence(
+					f.getTexture<float>(Channel::Confidence),
+					f.getTexture<float>(Channel::Depth),
+					stream
+				);
+			}
+
+            ftl::cuda::mls_aggr_horiz(
+                f.createTexture<uchar4>(Channel::Support1),
+                f.createTexture<float4>(Channel::Normals),
+                normals_horiz_[i],
+                f.createTexture<float>(Channel::Depth),
+                centroid_horiz_[i],
+                f.createTexture<uchar4>(Channel::Colour),
+                thresh,
+                col_smooth,
+                radius,
+                s->parameters(),
+                stream
+            );
+
+            ftl::cuda::mls_aggr_vert(
+                f.getTexture<uchar4>(Channel::Support1),
+                normals_horiz_[i],
+                f.getTexture<float4>(Channel::Normals),
+                centroid_horiz_[i],
+                centroid_vert_[i],
+                f.getTexture<uchar4>(Channel::Colour),
+                f.getTexture<float>(Channel::Depth),
+                thresh,
+                col_smooth,
+                radius,
+                s->parameters(),
+                stream
+            );
+        }
+
+        // Step 3:
+        // Find corresponding points and perform aggregation of any correspondences
+        // For each camera combination
+        if (do_aggr) {
+            for (size_t i=0; i<in.frames.size(); ++i) {
+                auto &f1 = in.frames[i];
+                //f1.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0.0f), cvstream);
+                f1.get<GpuMat>(Channel::Confidence).setTo(cv::Scalar(0.0f), cvstream);
+
+                Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0);
+                d1 = in.sources[i]->getPose() * d1;
+
+                for (size_t j=0; j<in.frames.size(); ++j) {
+                    if (i == j) continue;
+
+                    //LOG(INFO) << "Running phase1";
+
+                    auto &f2 = in.frames[j];
+                    auto s1 = in.sources[i];
+                    auto s2 = in.sources[j];
+
+                    // Are cameras facing similar enough direction?
+                    Eigen::Vector4d d2(0.0, 0.0, 1.0, 0.0);
+                    d2 = in.sources[j]->getPose() * d2;
+                    // No, so skip this combination
+                    if (d1.dot(d2) <= 0.0) continue;
+
+                    auto pose1 = MatrixConversion::toCUDA(s1->getPose().cast<float>());
+					auto pose1_inv = MatrixConversion::toCUDA(s1->getPose().cast<float>().inverse());
+					auto pose2 = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse());
+					auto pose2_inv = MatrixConversion::toCUDA(s2->getPose().cast<float>());
+
+					auto transform = pose2 * pose1;
+
+                    // For the corresponding points, combine normals and centroids
+                    ftl::cuda::aggregate_sources(
+                        f1.getTexture<float4>(Channel::Normals),
+                        f2.getTexture<float4>(Channel::Normals),
+                        centroid_vert_[i],
+                        centroid_vert_[j],
+						f1.getTexture<float>(Channel::Depth),
+                        //contributions_[i],
+                        //contributions_[j],
+                        //f1.getTexture<short2>(Channel::Screen),
+						transform,
+						s1->parameters(),
+						s2->parameters(),
+                        stream
+                    );
+
+                    //LOG(INFO) << "Correspondences done... " << i;
+                }
+            }
+        }
+
+        // Step 3:
+        // Normalise aggregations and move the points
+        for (size_t i=0; i<in.frames.size(); ++i) {
+            auto &f = in.frames[i];
+            auto *s = in.sources[i];
+            auto size = f.get<GpuMat>(Channel::Depth).size();
+
+            /*if (do_corr) {
+                ftl::cuda::normalise_aggregations(
+                    f.getTexture<float4>(Channel::Normals),
+                    centroid_vert_[i],
+                    contributions_[i],
+                    stream
+                );
+            }*/
+
+            ftl::cuda::mls_adjust_depth(
+                f.getTexture<float4>(Channel::Normals),
+                centroid_vert_[i],
+                f.getTexture<float>(Channel::Depth),
+                f.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(size)),
+                s->parameters(),
+                stream
+            );
+
+            f.swapChannels(Channel::Depth, Channel::Depth2);
+
+            /*if (show_best_source) {
+                ftl::cuda::vis_best_sources(
+                    f.getTexture<short2>(Channel::Screen),
+                    f.getTexture<uchar4>(Channel::Colour),
+                    i,
+                    7, stream
+                );
+            }*/
+        }
+    }
+
+    return true;
+}
diff --git a/components/operators/src/mvmls_cuda.hpp b/components/operators/src/mvmls_cuda.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..93b1e8d882848490aec96a291d58805c2d2dbf03
--- /dev/null
+++ b/components/operators/src/mvmls_cuda.hpp
@@ -0,0 +1,101 @@
+#ifndef _FTL_CUDA_MVMLS_HPP_
+#define _FTL_CUDA_MVMLS_HPP_
+
+#include <ftl/rgbd/camera.hpp>
+#include <ftl/cuda_common.hpp>
+#include <ftl/cuda_matrix_util.hpp>
+
+namespace ftl {
+namespace cuda {
+
+struct MvMLSParams {
+    float spatial_smooth;
+    float colour_smooth;
+	float fill_match;
+	float fill_threshold;
+	float match_threshold;
+    float cost_ratio;
+    float cost_threshold;
+	float range;
+    uint flags;
+};
+
+void correspondence(
+        ftl::cuda::TextureObject<float> &d1,
+        ftl::cuda::TextureObject<float> &d2,
+        ftl::cuda::TextureObject<uchar4> &c1,
+        ftl::cuda::TextureObject<uchar4> &c2,
+        ftl::cuda::TextureObject<short2> &screen,
+		ftl::cuda::TextureObject<float> &conf,
+		ftl::cuda::TextureObject<int> &mask,
+        float4x4 &pose1,
+        float4x4 &pose1_inv,
+        float4x4 &pose2,
+        const ftl::rgbd::Camera &cam1,
+        const ftl::rgbd::Camera &cam2, const ftl::cuda::MvMLSParams &params, int func,
+        cudaStream_t stream);
+
+void zero_confidence(
+		ftl::cuda::TextureObject<float> &conf,
+		ftl::cuda::TextureObject<float> &depth,
+		cudaStream_t stream);
+
+/*void aggregate_sources(
+    ftl::cuda::TextureObject<float4> &n1,
+    ftl::cuda::TextureObject<float4> &n2,
+    ftl::cuda::TextureObject<float4> &c1,
+    ftl::cuda::TextureObject<float4> &c2,
+    ftl::cuda::TextureObject<float> &contribs1,
+    ftl::cuda::TextureObject<float> &contribs2,
+    ftl::cuda::TextureObject<short2> &screen,
+	//const float4x4 &pose1,
+	//const float4x4 &poseInv2,
+	const float4x4 &poseInv1,
+	const float4x4 &pose2,
+    cudaStream_t stream);*/
+
+void aggregate_sources(
+		ftl::cuda::TextureObject<float4> &n1,
+		ftl::cuda::TextureObject<float4> &n2,
+		ftl::cuda::TextureObject<float4> &c1,
+		ftl::cuda::TextureObject<float4> &c2,
+		ftl::cuda::TextureObject<float> &depth1,
+		//ftl::cuda::TextureObject<float> &depth2,
+		//ftl::cuda::TextureObject<short2> &screen,
+		const float4x4 &transform,
+		const ftl::rgbd::Camera &cam1,
+		const ftl::rgbd::Camera &cam2,
+		cudaStream_t stream);
+
+void best_sources(
+        ftl::cuda::TextureObject<float4> &normals1,
+        ftl::cuda::TextureObject<float4> &normals2,
+        ftl::cuda::TextureObject<uchar4> &support1,
+        ftl::cuda::TextureObject<uchar4> &suppor2,
+        ftl::cuda::TextureObject<float> &depth1,
+        ftl::cuda::TextureObject<float> &depth2,
+        ftl::cuda::TextureObject<short2> &screen,
+        const float4x4 &transform,
+        const ftl::rgbd::Camera &cam1,
+        const ftl::rgbd::Camera &cam2,
+        int id1,
+        int id2,
+        cudaStream_t stream);
+
+void vis_best_sources(
+        ftl::cuda::TextureObject<short2> &screen,
+        ftl::cuda::TextureObject<uchar4> &colour,
+        int myid,
+        int count,
+        cudaStream_t stream);
+
+void normalise_aggregations(
+    ftl::cuda::TextureObject<float4> &norms,
+    ftl::cuda::TextureObject<float4> &cents,
+    ftl::cuda::TextureObject<float> &contribs,
+    cudaStream_t stream);
+
+}
+}
+
+#endif
diff --git a/components/operators/src/normals.cpp b/components/operators/src/normals.cpp
index 5f8554c2986f27de3fe5bd0ab2fb03fbf2226c35..57903aa1db741c0e99f51aedf5c162b694f011cb 100644
--- a/components/operators/src/normals.cpp
+++ b/components/operators/src/normals.cpp
@@ -28,7 +28,7 @@ bool Normals::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Sour
 	ftl::cuda::normals(
 		out.createTexture<float4>(Channel::Normals, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
 		in.createTexture<float>(Channel::Depth),
-		s->parameters(), 0
+		s->parameters(), stream
 	);
 
 	return true;
diff --git a/components/operators/src/nvopticalflow.cpp b/components/operators/src/nvopticalflow.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a8b6d406bc7421d48d0cfb15de40d0eb48cafebe
--- /dev/null
+++ b/components/operators/src/nvopticalflow.cpp
@@ -0,0 +1,48 @@
+#include <ftl/operators/opticalflow.hpp>
+
+using ftl::rgbd::Frame;
+using ftl::rgbd::Source;
+using ftl::codecs::Channel;
+
+using ftl::operators::NVOpticalFlow;
+
+using cv::Size;
+using cv::cuda::GpuMat;
+
+NVOpticalFlow::NVOpticalFlow(ftl::Configurable* cfg) :
+		ftl::operators::Operator(cfg) {
+	size_ = Size(0, 0);
+}
+
+NVOpticalFlow::~NVOpticalFlow() {
+}
+
+bool NVOpticalFlow::init() {
+	nvof_ = cv::cuda::NvidiaOpticalFlow_1_0::create(
+				size_.width, size_.height, 
+				cv::cuda::NvidiaOpticalFlow_1_0::NV_OF_PERF_LEVEL_SLOW,
+				true, false, false, 0);
+	
+	left_gray_.create(size_, CV_8UC1);
+	left_gray_prev_.create(size_, CV_8UC1);
+	return true;
+}
+
+bool NVOpticalFlow::apply(Frame &in, Frame &out, Source *src, cudaStream_t stream) {
+	if (!in.hasChannel(channel_in_)) { return false; }
+
+	if (in.get<GpuMat>(channel_in_).size() != size_) {
+		size_ = in.get<GpuMat>(channel_in_).size();
+		init();
+	}
+	
+	auto cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+	auto &flow = out.create<GpuMat>(channel_out_);
+
+	cv::cuda::cvtColor(in.get<GpuMat>(channel_in_), left_gray_, cv::COLOR_BGR2GRAY, 0, cvstream);
+
+	nvof_->calc(left_gray_, left_gray_prev_, flow, cvstream);
+	std::swap(left_gray_, left_gray_prev_);
+
+	return true;
+}
\ No newline at end of file
diff --git a/components/operators/src/operator.cpp b/components/operators/src/operator.cpp
index 91dada28b6adddfcdb3e71861aca07591c57fa9c..dd821ee6fdd2b967e5ac46f3a4f77c57d59aed6b 100644
--- a/components/operators/src/operator.cpp
+++ b/components/operators/src/operator.cpp
@@ -31,33 +31,56 @@ bool Operator::apply(FrameSet &in, Frame &out, Source *os, cudaStream_t stream)
 
 
 Graph::Graph(nlohmann::json &config) : ftl::Configurable(config) {
-
+	cudaSafeCall( cudaStreamCreate(&stream_) );
 }
 
 Graph::~Graph() {
-
+	cudaStreamDestroy(stream_);
 }
 
 bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) {
 	if (!value("enabled", true)) return false;
 
+	auto stream_actual = (stream == 0) ? stream_ : stream;
+
 	if (in.frames.size() != out.frames.size()) return false;
 
 	for (auto &i : operators_) {
-		// Make sure there are enough instances
-		while (i.instances.size() < in.frames.size()) {
+		if (i.instances.size() < 1) {
 			i.instances.push_back(i.maker->make());
 		}
 
-		for (int j=0; j<in.frames.size(); ++j) {
-			auto *instance = i.instances[j];
+		if (i.instances[0]->type() == Operator::Type::OneToOne) {
+			// Make sure there are enough instances
+			while (i.instances.size() < in.frames.size()) {
+				i.instances.push_back(i.maker->make());
+			}
+
+			for (int j=0; j<in.frames.size(); ++j) {
+				auto *instance = i.instances[j];
+
+				if (instance->enabled()) {
+					instance->apply(in.frames[j], out.frames[j], in.sources[j], stream_actual);
+				}
+			}
+		} else if (i.instances[0]->type() == Operator::Type::ManyToMany) {
+			auto *instance = i.instances[0];
 
 			if (instance->enabled()) {
-				instance->apply(in.frames[j], out.frames[j], in.sources[j], stream);
+				try {
+					instance->apply(in, out, stream_actual);
+				} catch (const std::exception &e) {
+					LOG(ERROR) << "Operator exception: " << e.what();
+				}
 			}
 		}
 	}
 
+	if (stream == 0) {
+		cudaStreamSynchronize(stream_actual);
+		cudaSafeCall( cudaGetLastError() );
+	}
+
 	return true;
 }
 
diff --git a/components/operators/src/segmentation.cpp b/components/operators/src/segmentation.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fb015b6b16e1aa7e1dcac9735103c8c6af41567e
--- /dev/null
+++ b/components/operators/src/segmentation.cpp
@@ -0,0 +1,98 @@
+#include <ftl/operators/segmentation.hpp>
+#include "segmentation_cuda.hpp"
+
+using ftl::operators::CrossSupport;
+using ftl::operators::VisCrossSupport;
+using ftl::codecs::Channel;
+
+CrossSupport::CrossSupport(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+CrossSupport::~CrossSupport() {
+
+}
+
+bool CrossSupport::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+	bool use_depth = config()->value("depth_region", false);
+
+	if (use_depth) {
+		ftl::cuda::support_region(
+			in.createTexture<float>(Channel::Depth),
+			out.createTexture<uchar4>(Channel::Support2, ftl::rgbd::Format<uchar4>(in.get<cv::cuda::GpuMat>(Channel::Colour).size())),
+			config()->value("depth_tau", 0.04f),
+			config()->value("v_max", 5),
+			config()->value("h_max", 5),
+			config()->value("symmetric", true), stream
+		);
+	} //else {
+		ftl::cuda::support_region(
+			in.createTexture<uchar4>(Channel::Colour),
+			out.createTexture<uchar4>(Channel::Support1, ftl::rgbd::Format<uchar4>(in.get<cv::cuda::GpuMat>(Channel::Colour).size())),
+			config()->value("tau", 5.0f),
+			config()->value("v_max", 5),
+			config()->value("h_max", 5),
+			config()->value("symmetric", true), stream
+		);
+	//}
+
+	return true;
+}
+
+
+
+
+VisCrossSupport::VisCrossSupport(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+VisCrossSupport::~VisCrossSupport() {
+
+}
+
+bool VisCrossSupport::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+	bool show_depth = false;
+	if (in.hasChannel(Channel::Support2) && config()->value("show_depth_support", false)) {
+		show_depth = true;
+	}
+
+	bool show_bad = config()->value("show_bad", false) && in.hasChannel(Channel::Support2);
+
+	if (show_bad) {
+		ftl::cuda::vis_bad_region(
+			in.createTexture<uchar4>(Channel::Colour),
+			in.createTexture<float>(Channel::Depth),
+			in.createTexture<uchar4>(Channel::Support1),
+			in.createTexture<uchar4>(Channel::Support2),
+			stream
+		);
+	} else {
+		ftl::cuda::vis_support_region(
+			in.createTexture<uchar4>(Channel::Colour),
+			in.createTexture<uchar4>(Channel::Support1),
+			make_uchar4(0,0,255,0),
+			make_uchar4(255,0,0,0),
+			config()->value("offset_x", 0),
+			config()->value("offset_y", 0),
+			config()->value("spacing_x", 50),
+			config()->value("spacing_y", 50),
+			stream
+		);
+
+		if (show_depth) {
+			ftl::cuda::vis_support_region(
+				in.createTexture<uchar4>(Channel::Colour),
+				in.createTexture<uchar4>(Channel::Support2),
+				make_uchar4(0,0,255,0),
+				make_uchar4(0,255,0,0),
+				config()->value("offset_x", 0),
+				config()->value("offset_y", 0),
+				config()->value("spacing_x", 50),
+				config()->value("spacing_y", 50),
+				stream
+			);
+		}
+	}
+
+	return true;
+}
\ No newline at end of file
diff --git a/components/operators/src/segmentation.cu b/components/operators/src/segmentation.cu
new file mode 100644
index 0000000000000000000000000000000000000000..c16e647931f7d4c4d92551f44f6242fc415bd91d
--- /dev/null
+++ b/components/operators/src/segmentation.cu
@@ -0,0 +1,253 @@
+#include "segmentation_cuda.hpp"
+
+#define T_PER_BLOCK 8
+
+using ftl::cuda::TextureObject;
+
+template <typename T>
+__device__ inline float cross(T p1, T p2);
+
+template <>
+__device__ inline float cross<uchar4>(uchar4 p1, uchar4 p2) {
+    return max(max(__sad(p1.x,p2.x,0),__sad(p1.y,p2.y,0)), __sad(p1.z,p2.z,0));
+}
+
+template <>
+__device__ inline float cross<float4>(float4 p1, float4 p2) {
+    return max(max(fabsf(p1.x - p2.x),fabsf(p1.y - p2.y)), fabsf(p1.z - p2.z));
+}
+
+template <>
+__device__ inline float cross<float>(float p1, float p2) {
+    return fabs(p1-p2);
+}
+
+template <typename T, bool SYM>
+__device__ uchar4 calculate_support_region(const TextureObject<T> &img, int x, int y, float tau, int v_max, int h_max) {
+    int x_min = max(0, x - h_max);
+    int x_max = min(img.width()-1, x + h_max);
+    int y_min = max(0, y - v_max);
+    int y_max = min(img.height()-1, y + v_max);
+
+	uchar4 result = make_uchar4(0, 0, 0, 0);
+
+	auto colour = img.tex2D((float)x+0.5f,(float)y+0.5f);
+	auto prev_colour = colour;
+
+	int u;
+    for (u=x-1; u >= x_min; --u) {
+		auto next_colour = img.tex2D((float)u+0.5f,(float)y+0.5f);
+        if (cross(prev_colour, next_colour) > tau) {
+            result.x = x - u - 1;
+            break;
+		}
+		prev_colour = next_colour;
+	}
+	if (u < x_min) result.x = x - x_min;
+	
+	prev_colour = colour;
+    for (u=x+1; u <= x_max; ++u) {
+		auto next_colour = img.tex2D((float)u+0.5f,(float)y+0.5f);
+        if (cross(prev_colour, next_colour) > tau) {
+            result.y = u - x - 1;
+            break;
+		}
+		prev_colour = next_colour;
+	}
+	if (u > x_max) result.y = x_max - x;
+
+	int v;
+	prev_colour = colour;
+    for (v=y-1; v >= y_min; --v) {
+		auto next_colour = img.tex2D((float)x+0.5f,(float)v+0.5f);
+        if (cross(prev_colour, next_colour) > tau) {
+            result.z = y - v - 1;
+            break;
+		}
+		prev_colour = next_colour;
+	}
+	if (v < y_min) result.z = y - y_min;
+
+	prev_colour = colour;
+    for (v=y+1; v <= y_max; ++v) {
+		auto next_colour = img.tex2D((float)x+0.5f,(float)v+0.5f);
+        if (cross(prev_colour, next_colour) > tau) {
+            result.w = v - y - 1;
+            break;
+		}
+		prev_colour = next_colour;
+	}
+	if (v > y_max) result.w = y_max - y;
+
+	// Make symetric left/right and up/down
+	if (SYM) {
+		result.x = min(result.x, result.y);
+		result.y = result.x;
+		result.z = min(result.z, result.w);
+		result.w = result.z;
+	}
+    return result;
+}
+
+template <typename T, bool SYM>
+__global__ void support_region_kernel(TextureObject<T> img, TextureObject<uchar4> region, float tau, int v_max, int h_max) {
+    const int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x < 0 || y < 0 || x >= img.width() || y >= img.height()) return;
+
+    region(x,y) = calculate_support_region<T,SYM>(img, x, y, tau, v_max, h_max);
+}
+
+void ftl::cuda::support_region(
+        ftl::cuda::TextureObject<uchar4> &colour,
+        ftl::cuda::TextureObject<uchar4> &region,
+        float tau,
+        int v_max,
+		int h_max,
+		bool sym,
+        cudaStream_t stream) {
+
+    const dim3 gridSize((region.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (region.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	if (sym) support_region_kernel<uchar4, true><<<gridSize, blockSize, 0, stream>>>(colour, region, tau, v_max, h_max);
+	else support_region_kernel<uchar4, false><<<gridSize, blockSize, 0, stream>>>(colour, region, tau, v_max, h_max);
+    cudaSafeCall( cudaGetLastError() );
+
+
+    #ifdef _DEBUG
+    cudaSafeCall(cudaDeviceSynchronize());
+    #endif
+}
+
+void ftl::cuda::support_region(
+		ftl::cuda::TextureObject<float> &depth,
+		ftl::cuda::TextureObject<uchar4> &region,
+		float tau,
+		int v_max,
+		int h_max,
+		bool sym,
+		cudaStream_t stream) {
+
+	const dim3 gridSize((region.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (region.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	support_region_kernel<float, true><<<gridSize, blockSize, 0, stream>>>(depth, region, tau, v_max, h_max);
+	cudaSafeCall( cudaGetLastError() );
+
+
+	#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	#endif
+}
+
+__global__ void vis_support_region_kernel(TextureObject<uchar4> colour, TextureObject<uchar4> region, uchar4 bcolour, uchar4 acolour,
+		int ox, int oy, int dx, int dy) {
+    const int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < 0 || y < 0 || x >= colour.width() || y >= colour.height()) return;
+	
+	// Grid pattern
+	if (x % dx != ox || y % dy != oy) return;
+
+	uchar4 base = region.tex2D(x,y);
+
+	// Edge pattern
+	//if (base.x != 1) return;
+	
+	for (int v=-base.z; v<=base.w; ++v) {
+		uchar4 baseY = region.tex2D(x,y+v);
+
+		for (int u=-baseY.x; u<=baseY.y; ++u) {
+			if (x+u < 0 || y+v < 0 || x+u >= colour.width() || y+v >= colour.height()) continue;
+			auto col = colour.tex2D(float(x+u)+0.5f, float(y+v)+0.5f);
+			colour(x+u, y+v) = (u==0 || v == 0) ?
+					make_uchar4(max(bcolour.x, (unsigned char)col.x), max(bcolour.y, (unsigned char)col.y), max(bcolour.z, (unsigned char)col.z), 0) :
+					make_uchar4(max(acolour.x, (unsigned char)col.x), max(acolour.y, (unsigned char)col.y), max(acolour.z, (unsigned char)col.z), 0);
+		}
+	}
+}
+
+void ftl::cuda::vis_support_region(
+        ftl::cuda::TextureObject<uchar4> &colour,
+		ftl::cuda::TextureObject<uchar4> &region,
+		uchar4 bar_colour,
+		uchar4 area_colour,
+		int ox, int oy, int dx, int dy,
+        cudaStream_t stream) {
+
+    const dim3 gridSize((region.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (region.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    vis_support_region_kernel<<<gridSize, blockSize, 0, stream>>>(
+		colour,
+		region,
+		bar_colour,
+		area_colour,
+		ox,oy,dx,dy
+	);
+    cudaSafeCall( cudaGetLastError() );
+
+
+    #ifdef _DEBUG
+    cudaSafeCall(cudaDeviceSynchronize());
+    #endif
+}
+
+// ===== Vis bad edges =========================================================
+
+__global__ void vis_bad_region_kernel(
+		TextureObject<uchar4> colour,
+		TextureObject<float> depth,
+		TextureObject<uchar4> region,
+		TextureObject<uchar4> dregion) {
+    const int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < 0 || y < 0 || x >= colour.width() || y >= colour.height()) return;
+	
+	// Grid pattern
+	//if (x % 50 != 0 || y % 50 != 0) return;
+
+	uchar4 base = region.tex2D(x,y);
+	uchar4 baseD = dregion.tex2D(x,y);
+	auto col = colour.tex2D((float)x+0.5f,(float)y+0.5f);
+	float d = depth.tex2D(x,y);
+
+	if (baseD.x > base.x && baseD.y < base.y) {
+		uchar4 baseR = region.tex2D(x+baseD.y+1, y);
+		float dR = depth.tex2D(x+baseD.y+1, y);
+		//if (x+baseD.y+1-baseR.x <= x) {
+			if (d > 0.0f && d < 30.0f && (dR <= 0.0f || dR >= 30.0f)) {
+				colour(x,y) = make_uchar4(col.x, col.y, 255, 0);
+				depth(x,y) = 0.0f;
+			}
+		//}
+	}
+}
+
+void ftl::cuda::vis_bad_region(
+		ftl::cuda::TextureObject<uchar4> &colour,
+		ftl::cuda::TextureObject<float> &depth,
+		ftl::cuda::TextureObject<uchar4> &region,
+		ftl::cuda::TextureObject<uchar4> &dregion,
+        cudaStream_t stream) {
+
+    const dim3 gridSize((region.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (region.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    vis_bad_region_kernel<<<gridSize, blockSize, 0, stream>>>(
+		colour,
+		depth,
+		region,
+		dregion
+	);
+    cudaSafeCall( cudaGetLastError() );
+
+
+    #ifdef _DEBUG
+    cudaSafeCall(cudaDeviceSynchronize());
+    #endif
+}
diff --git a/components/operators/src/segmentation_cuda.hpp b/components/operators/src/segmentation_cuda.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c2cb390d9c0ee62a33127eec1720cf0d6fee8cae
--- /dev/null
+++ b/components/operators/src/segmentation_cuda.hpp
@@ -0,0 +1,40 @@
+#ifndef _FTL_CUDA_SEGMENTATION_HPP_
+#define _FTL_CUDA_SEGMENTATION_HPP_
+
+#include <ftl/cuda_common.hpp>
+
+namespace ftl {
+namespace cuda {
+
+void support_region(
+		ftl::cuda::TextureObject<uchar4> &colour,
+		ftl::cuda::TextureObject<uchar4> &region,
+		float tau, int v_max, int h_max, bool sym,
+		cudaStream_t stream);
+
+void support_region(
+		ftl::cuda::TextureObject<float> &depth,
+		ftl::cuda::TextureObject<uchar4> &region,
+		float tau, int v_max, int h_max, bool sym,
+		cudaStream_t stream);
+
+void vis_support_region(
+        ftl::cuda::TextureObject<uchar4> &colour,
+        ftl::cuda::TextureObject<uchar4> &region,
+		uchar4 bar_colour,
+		uchar4 area_colour,
+		int ox, int oy, int dx, int dy,
+        cudaStream_t stream);
+
+void vis_bad_region(
+		ftl::cuda::TextureObject<uchar4> &colour,
+		ftl::cuda::TextureObject<float> &depth,
+		ftl::cuda::TextureObject<uchar4> &region,
+		ftl::cuda::TextureObject<uchar4> &dregion,
+        cudaStream_t stream);
+
+
+}
+}
+
+#endif 
diff --git a/components/operators/src/smoothchan.cu b/components/operators/src/smoothchan.cu
index af9ec35243ed6304f859ab16c2177aa761c8eb14..ea40854fccd7579fc3973a6acb41543ea7bc75d5 100644
--- a/components/operators/src/smoothchan.cu
+++ b/components/operators/src/smoothchan.cu
@@ -32,6 +32,7 @@ __global__ void smooth_chan_kernel(
 
 		//const float3 pos = camera.screenToCam(ix, iy, d0);
 
+		//int v=0;
 		for (int v=-RADIUS; v<=RADIUS; ++v) {
 			#pragma unroll
 			for (int u=-RADIUS; u<=RADIUS; ++u) {
diff --git a/components/operators/src/smoothing.cpp b/components/operators/src/smoothing.cpp
index 8e4f458bf9f996a6d38bef816ab7bcec75b69b11..2eafe7977a827cd0a1e90017f749fd1245ce1b68 100644
--- a/components/operators/src/smoothing.cpp
+++ b/components/operators/src/smoothing.cpp
@@ -6,6 +6,7 @@
 using ftl::operators::HFSmoother;
 using ftl::operators::SimpleMLS;
 using ftl::operators::ColourMLS;
+using ftl::operators::AggreMLS;
 using ftl::operators::AdaptiveMLS;
 using ftl::operators::SmoothChannel;
 using ftl::codecs::Channel;
@@ -33,7 +34,7 @@ bool HFSmoother::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::S
             in.createTexture<float>(Channel::Energy, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
             in.createTexture<float>(Channel::Smoothing, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
             var_thresh,
-            s->parameters(), 0
+            s->parameters(), stream
         );
     }
 
@@ -158,7 +159,7 @@ bool SimpleMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::So
 			thresh,
 			radius,
 			s->parameters(),
-			0
+			stream
 		);
 
 		in.swapChannels(Channel::Depth, Channel::Depth2);
@@ -179,10 +180,12 @@ ColourMLS::~ColourMLS() {
 }
 
 bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
-	float thresh = config()->value("mls_threshold", 0.04f);
+	float thresh = config()->value("mls_threshold", 0.4f);
 	float col_smooth = config()->value("mls_colour_smoothing", 30.0f);
-	int iters = config()->value("mls_iterations", 1);
-	int radius = config()->value("mls_radius",2);
+	int iters = config()->value("mls_iterations", 3);
+	int radius = config()->value("mls_radius",3);
+	bool crosssup = config()->value("cross_support", true);
+	bool filling = config()->value("filling", false);
 
 	if (!in.hasChannel(Channel::Normals)) {
 		LOG(ERROR) << "Required normals channel missing for MLS";
@@ -191,18 +194,34 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::So
 
 	// FIXME: Assume in and out are the same frame.
 	for (int i=0; i<iters; ++i) {
-		ftl::cuda::colour_mls_smooth(
-			in.createTexture<float4>(Channel::Normals),
-			in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
-			in.createTexture<float>(Channel::Depth),
-			in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
-			in.createTexture<uchar4>(Channel::Colour),
-			thresh,
-			col_smooth,
-			radius,
-			s->parameters(),
-			0
-		);
+		if (!crosssup) {
+			ftl::cuda::colour_mls_smooth(
+				in.createTexture<float4>(Channel::Normals),
+				in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+				in.createTexture<float>(Channel::Depth),
+				in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+				in.createTexture<uchar4>(Channel::Colour),
+				thresh,
+				col_smooth,
+				radius,
+				s->parameters(),
+				stream
+			);
+		} else {
+			ftl::cuda::colour_mls_smooth_csr(
+				in.createTexture<uchar4>(Channel::Support1),
+				in.createTexture<float4>(Channel::Normals),
+				in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+				in.createTexture<float>(Channel::Depth),
+				in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+				in.createTexture<uchar4>(Channel::Colour),
+				thresh,
+				col_smooth,
+				filling,
+				s->parameters(),
+				stream
+			);
+		}
 
 		in.swapChannels(Channel::Depth, Channel::Depth2);
 		in.swapChannels(Channel::Normals, Channel::Points);
@@ -212,6 +231,107 @@ bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::So
 }
 
 
+// ====== Aggregating MLS ======================================================
+
+AggreMLS::AggreMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+AggreMLS::~AggreMLS() {
+
+}
+
+bool AggreMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+	float thresh = config()->value("mls_threshold", 0.4f);
+	float col_smooth = config()->value("mls_colour_smoothing", 30.0f);
+	int iters = config()->value("mls_iterations", 3);
+	int radius = config()->value("mls_radius",5);
+	bool aggre = config()->value("aggregation", true);
+
+	if (!in.hasChannel(Channel::Normals)) {
+		LOG(ERROR) << "Required normals channel missing for MLS";
+		return false;
+	}
+
+	if (!in.hasChannel(Channel::Support1)) {
+		LOG(ERROR) << "Required support channel missing for MLS";
+		return false;
+	}
+
+	auto size = in.get<GpuMat>(Channel::Depth).size();
+	centroid_horiz_.create(size.height, size.width);
+	normals_horiz_.create(size.height, size.width);
+	centroid_vert_.create(size.width, size.height);
+
+	// FIXME: Assume in and out are the same frame.
+	for (int i=0; i<iters; ++i) {
+
+		if (aggre) {
+			ftl::cuda::mls_aggr_horiz(
+				in.createTexture<uchar4>(Channel::Support1),
+				in.createTexture<float4>(Channel::Normals),
+				normals_horiz_,
+				in.createTexture<float>(Channel::Depth),
+				centroid_horiz_,
+				in.createTexture<uchar4>(Channel::Colour),
+				thresh,
+				col_smooth,
+				radius,
+				s->parameters(),
+				stream
+			);
+
+			ftl::cuda::mls_aggr_vert(
+				in.createTexture<uchar4>(Channel::Support1),
+				normals_horiz_,
+				in.createTexture<float4>(Channel::Normals),
+				centroid_horiz_,
+				centroid_vert_,
+				in.createTexture<uchar4>(Channel::Colour),
+				in.createTexture<float>(Channel::Depth),
+				thresh,
+				col_smooth,
+				radius,
+				s->parameters(),
+				stream
+			);
+
+			ftl::cuda::mls_adjust_depth(
+				in.createTexture<float4>(Channel::Normals),
+				centroid_vert_,
+				in.createTexture<float>(Channel::Depth),
+				in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(size)),
+				s->parameters(),
+				stream
+			);
+
+			in.swapChannels(Channel::Depth, Channel::Depth2);
+			//in.swapChannels(Channel::Normals, Channel::Points);
+
+		} else {
+			ftl::cuda::colour_mls_smooth_csr(
+				in.createTexture<uchar4>(Channel::Support1),
+				in.createTexture<float4>(Channel::Normals),
+				in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+				in.createTexture<float>(Channel::Depth),
+				in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+				in.createTexture<uchar4>(Channel::Colour),
+				thresh,
+				col_smooth,
+				false,
+				s->parameters(),
+				stream
+			);
+
+			in.swapChannels(Channel::Depth, Channel::Depth2);
+			in.swapChannels(Channel::Normals, Channel::Points);
+		}
+	}
+
+	return true;
+}
+
+
 // ====== Adaptive MLS =========================================================
 
 AdaptiveMLS::AdaptiveMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
@@ -241,7 +361,7 @@ bool AdaptiveMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::
 			in.createTexture<float>(Channel::Smoothing),
 			radius,
 			s->parameters(),
-			0
+			stream
 		);
 
 		in.swapChannels(Channel::Depth, Channel::Depth2);
diff --git a/components/operators/src/smoothing_cuda.hpp b/components/operators/src/smoothing_cuda.hpp
index 53e8cf898c4e99875dc4a9881df58700ee31c127..6681c1a2a0383fdf69c7a62ab0b78346fede5f3a 100644
--- a/components/operators/src/smoothing_cuda.hpp
+++ b/components/operators/src/smoothing_cuda.hpp
@@ -29,6 +29,54 @@ void colour_mls_smooth(
 		const ftl::rgbd::Camera &camera,
 		cudaStream_t stream);
 
+void colour_mls_smooth_csr(
+		ftl::cuda::TextureObject<uchar4> &region,
+		ftl::cuda::TextureObject<float4> &normals_in,
+		ftl::cuda::TextureObject<float4> &normals_out,
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		ftl::cuda::TextureObject<uchar4> &colour_in,
+		float smoothing,
+		float colour_smoothing,
+		bool filling,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream);
+
+void mls_adjust_depth(
+		ftl::cuda::TextureObject<float4> &normals_in,
+		ftl::cuda::TextureObject<float4> &centroid_in,
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream);
+
+void mls_aggr_horiz(
+		ftl::cuda::TextureObject<uchar4> &region,
+		ftl::cuda::TextureObject<float4> &normals_in,
+		ftl::cuda::TextureObject<float4> &normals_out,
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float4> &centroid_out,
+		ftl::cuda::TextureObject<uchar4> &colour_in,
+		float smoothing,
+		float colour_smoothing,
+		int radius,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream);
+
+void mls_aggr_vert(
+		ftl::cuda::TextureObject<uchar4> &region,
+		ftl::cuda::TextureObject<float4> &normals_in,
+		ftl::cuda::TextureObject<float4> &normals_out,
+		ftl::cuda::TextureObject<float4> &centroid_in,
+		ftl::cuda::TextureObject<float4> &centroid_out,
+		ftl::cuda::TextureObject<uchar4> &colour_in,
+		ftl::cuda::TextureObject<float> &depth_in,
+		float smoothing,
+		float colour_smoothing,
+		int radius,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream);
+
 void adaptive_mls_smooth(
 		ftl::cuda::TextureObject<float4> &normals_in,
 		ftl::cuda::TextureObject<float4> &normals_out,
diff --git a/components/renderers/cpp/include/ftl/cuda/weighting.hpp b/components/renderers/cpp/include/ftl/cuda/weighting.hpp
index bffff673d2009ac698c3c604ec565a2aa1a89901..b0c3f58b9c5a959c58fb9b0cba43bf27823aac4f 100644
--- a/components/renderers/cpp/include/ftl/cuda/weighting.hpp
+++ b/components/renderers/cpp/include/ftl/cuda/weighting.hpp
@@ -44,6 +44,20 @@ __device__ inline float colourDistance(uchar4 a, uchar4 b) {
 	return ch*ch*ch*ch;
 }
 
+/*
+ * Colour weighting as suggested in:
+ * C. Kuster et al. Spatio-Temporal Geometry Fusion for Multiple Hybrid Cameras using Moving Least Squares Surfaces. 2014.
+ * c = colour distance
+ */
+ __device__ inline float colourWeighting(const float4 &a, const float4 &b, float h) {
+	const float3 delta = make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
+	const float c = length(delta);
+	if (c >= h) return 0.0f;
+	float ch = c / h;
+	ch = 1.0f - ch*ch;
+	return ch*ch*ch*ch;
+}
+
 }
 }
 
diff --git a/components/renderers/cpp/include/ftl/render/tri_render.hpp b/components/renderers/cpp/include/ftl/render/tri_render.hpp
index 60d3dcbf6a68576981c1d72194dae731e8592fec..3d9183643e9f2fe183499cf8bbcc97699328ef60 100644
--- a/components/renderers/cpp/include/ftl/render/tri_render.hpp
+++ b/components/renderers/cpp/include/ftl/render/tri_render.hpp
@@ -43,6 +43,9 @@ class Triangular : public ftl::render::Renderer {
 	cudaStream_t stream_;
 	float3 light_pos_;
 
+	cv::cuda::GpuMat env_image_;
+	ftl::cuda::TextureObject<uchar4> env_tex_;
+
 	//ftl::Filters *filters_;
 
 	template <typename T>
diff --git a/components/renderers/cpp/src/reprojection.cu b/components/renderers/cpp/src/reprojection.cu
index e74b4bb60963ab7d7d1798a96b22c90916a7ffc3..e6490c47eaf9033c2077cdd06457b3ff4f59103e 100644
--- a/components/renderers/cpp/src/reprojection.cu
+++ b/components/renderers/cpp/src/reprojection.cu
@@ -66,7 +66,7 @@ __global__ void reprojection_kernel(
 		TextureObject<B> out,			// Accumulated output
 		TextureObject<float> contrib,
 		SplatParams params,
-		Camera camera, float4x4 poseInv) {
+		Camera camera, float4x4 transform, float3x3 transformR) {
         
 	const int x = (blockIdx.x*blockDim.x + threadIdx.x);
 	const int y = blockIdx.y*blockDim.y + threadIdx.y;
@@ -74,25 +74,25 @@ __global__ void reprojection_kernel(
 	const float d = depth_in.tex2D((int)x, (int)y);
 	if (d < params.camera.minDepth || d > params.camera.maxDepth) return;
 
-	const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d);
+	//const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d);
 	//if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return;
 
-	const float3 camPos = poseInv * worldPos;
+	const float3 camPos = transform * params.camera.screenToCam(x, y, d);
 	if (camPos.z < camera.minDepth) return;
 	if (camPos.z > camera.maxDepth) return;
-	const uint2 screenPos = camera.camToScreen<uint2>(camPos);
+	const float2 screenPos = camera.camToScreen<float2>(camPos);
 
 	// Not on screen so stop now...
 	if (screenPos.x >= depth_src.width() || screenPos.y >= depth_src.height()) return;
             
 	// Calculate the dot product of surface normal and camera ray
-	const float3 n = poseInv.getFloat3x3() * make_float3(normals.tex2D((int)x, (int)y));
+	const float3 n = transformR * make_float3(normals.tex2D((int)x, (int)y));
 	float3 ray = camera.screenToCam(screenPos.x, screenPos.y, 1.0f);
 	ray = ray / length(ray);
 	const float dotproduct = max(dot(ray,n),0.0f);
     
-	const float d2 = depth_src.tex2D((int)screenPos.x, (int)screenPos.y);
-	const A input = in.tex2D((int)screenPos.x, (int)screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos);
+	const float d2 = depth_src.tex2D(int(screenPos.x+0.5f), int(screenPos.y+0.5f));
+	const auto input = in.tex2D(screenPos.x, screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos);
 
 	// TODO: Z checks need to interpolate between neighbors if large triangles are used
 	float weight = ftl::cuda::weighting(fabs(camPos.z - d2), 0.02f);
@@ -123,7 +123,7 @@ void ftl::cuda::reproject(
 		TextureObject<B> &out,   // Accumulated output
 		TextureObject<float> &contrib,
 		const SplatParams &params,
-		const Camera &camera, const float4x4 &poseInv, cudaStream_t stream) {
+		const Camera &camera, const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream) {
 	const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
 	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
@@ -136,7 +136,8 @@ void ftl::cuda::reproject(
 		contrib,
 		params,
 		camera,
-		poseInv
+		transform,
+		transformR
     );
     cudaSafeCall( cudaGetLastError() );
 }
@@ -150,7 +151,7 @@ template void ftl::cuda::reproject(
 	ftl::cuda::TextureObject<float> &contrib,
 	const ftl::render::SplatParams &params,
 	const ftl::rgbd::Camera &camera,
-	const float4x4 &poseInv, cudaStream_t stream);
+	const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream);
 
 template void ftl::cuda::reproject(
 		ftl::cuda::TextureObject<float> &in,	// Original colour image
@@ -161,7 +162,7 @@ template void ftl::cuda::reproject(
 		ftl::cuda::TextureObject<float> &contrib,
 		const ftl::render::SplatParams &params,
 		const ftl::rgbd::Camera &camera,
-		const float4x4 &poseInv, cudaStream_t stream);
+		const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream);
 
 template void ftl::cuda::reproject(
 		ftl::cuda::TextureObject<float4> &in,	// Original colour image
@@ -172,7 +173,7 @@ template void ftl::cuda::reproject(
 		ftl::cuda::TextureObject<float> &contrib,
 		const ftl::render::SplatParams &params,
 		const ftl::rgbd::Camera &camera,
-		const float4x4 &poseInv, cudaStream_t stream);
+		const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream);
 
 //==============================================================================
 //  Without normals
@@ -197,19 +198,19 @@ __global__ void reprojection_kernel(
 	const float d = depth_in.tex2D((int)x, (int)y);
 	if (d < params.camera.minDepth || d > params.camera.maxDepth) return;
 
-	const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d);
+	//const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d);
 	//if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return;
 
-	const float3 camPos = poseInv * worldPos;
+	const float3 camPos = poseInv * params.camera.screenToCam(x, y, d);
 	if (camPos.z < camera.minDepth) return;
 	if (camPos.z > camera.maxDepth) return;
-	const uint2 screenPos = camera.camToScreen<uint2>(camPos);
+	const float2 screenPos = camera.camToScreen<float2>(camPos);
 
 	// Not on screen so stop now...
 	if (screenPos.x >= depth_src.width() || screenPos.y >= depth_src.height()) return;
     
-	const float d2 = depth_src.tex2D((int)screenPos.x, (int)screenPos.y);
-	const A input = in.tex2D((int)screenPos.x, (int)screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos);
+	const float d2 = depth_src.tex2D((int)(screenPos.x+0.5f), (int)(screenPos.y+0.5f));
+	const auto input = in.tex2D(screenPos.x, screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos);
 	float weight = ftl::cuda::weighting(fabs(camPos.z - d2), 0.02f);
 	const B weighted = make<B>(input) * weight;
 
@@ -274,3 +275,51 @@ template void ftl::cuda::reproject(
 		const ftl::render::SplatParams &params,
 		const ftl::rgbd::Camera &camera,
 		const float4x4 &poseInv, cudaStream_t stream);
+
+
+// ===== Equirectangular Reprojection ==========================================
+
+__device__ inline float2 equirect_reprojection(int x_img, int y_img, double f, const float3x3 &rot, int w1, int h1, const ftl::rgbd::Camera &cam) {
+	float3 ray3d = cam.screenToCam(x_img, y_img, 1.0f);
+	ray3d /= length(ray3d);
+	ray3d = rot * ray3d;
+
+    //inverse formula for spherical projection, reference Szeliski book "Computer Vision: Algorithms and Applications" p439.
+    float theta = atan2(ray3d.y,sqrt(ray3d.x*ray3d.x+ray3d.z*ray3d.z));
+	float phi = atan2(ray3d.x, ray3d.z);
+	
+	const float pi = 3.14f;
+
+    //get 2D point on equirectangular map
+    float x_sphere = (((phi*w1)/pi+w1)/2); 
+    float y_sphere = (theta+ pi/2)*h1/pi;
+
+    return make_float2(x_sphere,y_sphere);
+};
+
+__global__ void equirectangular_kernel(
+		TextureObject<uchar4> image_in,
+		TextureObject<uchar4> image_out,
+		Camera camera, float3x3 pose) {
+		
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x);
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x >= 0 && y >= 0 && x < image_out.width() && y < image_out.height()) {
+		const float2 p = equirect_reprojection(x,y, camera.fx, pose, image_in.width(), image_in.height(), camera);
+		const float4 colour = image_in.tex2D(p.x, p.y);
+		image_out(x,y) = make_uchar4(colour.x, colour.y, colour.z, 0);
+	}
+}
+
+void ftl::cuda::equirectangular_reproject(
+		ftl::cuda::TextureObject<uchar4> &image_in,
+		ftl::cuda::TextureObject<uchar4> &image_out,
+		const ftl::rgbd::Camera &camera, const float3x3 &pose, cudaStream_t stream) {
+
+	const dim3 gridSize((image_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (image_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	equirectangular_kernel<<<gridSize, blockSize, 0, stream>>>(image_in, image_out, camera, pose);
+	cudaSafeCall( cudaGetLastError() );
+}
diff --git a/components/renderers/cpp/src/splatter.cu b/components/renderers/cpp/src/splatter.cu
index 6346daa883019453bef1d2f80be36ba5afa0aaf2..2986234bb3bbc24f762ff5ba0103ba173f4a0093 100644
--- a/components/renderers/cpp/src/splatter.cu
+++ b/components/renderers/cpp/src/splatter.cu
@@ -103,6 +103,40 @@ using ftl::cuda::warpSum;
 	}
 }
 
+/*
+ * Pass 1: Directly render each camera into virtual view but with no upsampling
+ * for sparse points.
+ */
+ __global__ void dibr_merge_kernel(TextureObject<float> depth,
+		TextureObject<int> depth_out,
+		float4x4 transform,
+		ftl::rgbd::Camera cam,
+		SplatParams params) {
+	const int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	const float d0 = depth.tex2D(x, y);
+	if (d0 <= cam.minDepth || d0 >= cam.maxDepth) return;
+
+	const float3 camPos = transform * cam.screenToCam(x,y,d0);
+	//if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return;
+
+	// Find the virtual screen position of current point
+	//const float3 camPos = params.m_viewMatrix * make_float3(worldPos);
+	//if (camPos.z < params.camera.minDepth) return;
+	//if (camPos.z > params.camera.maxDepth) return;
+
+	const float d = camPos.z;
+
+	const uint2 screenPos = params.camera.camToScreen<uint2>(camPos);
+	const unsigned int cx = screenPos.x;
+	const unsigned int cy = screenPos.y;
+	if (d > params.camera.minDepth && d < params.camera.maxDepth && cx < depth.width() && cy < depth.height()) {
+		// Transform estimated point to virtual cam space and output z
+		atomicMin(&depth_out(cx,cy), d * 100000.0f);
+	}
+}
+
 void ftl::cuda::dibr_merge(TextureObject<float4> &points, TextureObject<float4> &normals, TextureObject<int> &depth, SplatParams params, bool culling, cudaStream_t stream) {
     const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
     const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
@@ -120,6 +154,14 @@ void ftl::cuda::dibr_merge(TextureObject<float4> &points, TextureObject<int> &de
     cudaSafeCall( cudaGetLastError() );
 }
 
+void ftl::cuda::dibr_merge(TextureObject<float> &depth, TextureObject<int> &depth_out, const float4x4 &transform, const ftl::rgbd::Camera &cam, SplatParams params, cudaStream_t stream) {
+    const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>(depth, depth_out, transform, cam, params);
+    cudaSafeCall( cudaGetLastError() );
+}
+
 //==============================================================================
 
 
@@ -441,9 +483,11 @@ __global__ void dibr_normalise_kernel(
     const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
 
     if (x < in.width() && y < in.height()) {
+		const float contrib = contribs.tex2D((int)x,(int)y);
         const A a = in.tex2D((int)x,(int)y);
         //const float4 normal = normals.tex2D((int)x,(int)y);
-        const float contrib = contribs.tex2D((int)x,(int)y);
+
+		//out(x,y) = (contrib == 0.0f) ? make<B>(a) : make<B>(a / contrib);
 
         if (contrib > 0.0f) {
             out(x,y) = make<B>(a / contrib);
@@ -464,3 +508,39 @@ void ftl::cuda::dibr_normalise(TextureObject<A> &in, TextureObject<B> &out, Text
 template void ftl::cuda::dibr_normalise<float4,uchar4>(TextureObject<float4> &in, TextureObject<uchar4> &out, TextureObject<float> &contribs, cudaStream_t stream);
 template void ftl::cuda::dibr_normalise<float,float>(TextureObject<float> &in, TextureObject<float> &out, TextureObject<float> &contribs, cudaStream_t stream);
 template void ftl::cuda::dibr_normalise<float4,float4>(TextureObject<float4> &in, TextureObject<float4> &out, TextureObject<float> &contribs, cudaStream_t stream);
+
+
+// ===== Show bad colour normalise =============================================
+
+__global__ void show_missing_colour_kernel(
+        TextureObject<float> depth,
+        TextureObject<uchar4> out,
+		TextureObject<float> contribs,
+		uchar4 bad_colour,
+		ftl::rgbd::Camera cam) {
+    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x < out.width() && y < out.height()) {
+		const float contrib = contribs.tex2D((int)x,(int)y);
+		const float d = depth.tex2D(x,y);
+
+		if (contrib < 0.0000001f && d > cam.minDepth && d < cam.maxDepth) {
+			out(x,y) = bad_colour;
+		}
+    }
+}
+
+void ftl::cuda::show_missing_colour(
+		TextureObject<float> &depth,
+		TextureObject<uchar4> &out,
+		TextureObject<float> &contribs,
+		uchar4 bad_colour,
+		const ftl::rgbd::Camera &cam,
+		cudaStream_t stream) {
+    const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    show_missing_colour_kernel<<<gridSize, blockSize, 0, stream>>>(depth, out, contribs, bad_colour, cam);
+    cudaSafeCall( cudaGetLastError() );
+}
diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp
index 838eda409761c7420a35944a45900ace0912f124..c81977cf38a4380df4143d2cd8d0609c69de8897 100644
--- a/components/renderers/cpp/src/splatter_cuda.hpp
+++ b/components/renderers/cpp/src/splatter_cuda.hpp
@@ -43,6 +43,14 @@ namespace cuda {
 		ftl::render::SplatParams params,
 		cudaStream_t stream);
 
+	void dibr_merge(
+		ftl::cuda::TextureObject<float> &depth,
+		ftl::cuda::TextureObject<int> &depth_out,
+		const float4x4 &transform,
+		const ftl::rgbd::Camera &cam,
+		ftl::render::SplatParams params,
+		cudaStream_t stream);
+
 	template <typename T>
 	void splat(
         ftl::cuda::TextureObject<float4> &normals,
@@ -72,7 +80,7 @@ namespace cuda {
 		ftl::cuda::TextureObject<float> &contrib,
 		const ftl::render::SplatParams &params,
 		const ftl::rgbd::Camera &camera,
-		const float4x4 &poseInv, cudaStream_t stream);
+		const float4x4 &transform, const float3x3 &transformR, cudaStream_t stream);
 
 	template <typename A, typename B>
 	void reproject(
@@ -85,6 +93,11 @@ namespace cuda {
 		const ftl::rgbd::Camera &camera,
 		const float4x4 &poseInv, cudaStream_t stream);
 
+	void equirectangular_reproject(
+		ftl::cuda::TextureObject<uchar4> &image_in,
+		ftl::cuda::TextureObject<uchar4> &image_out,
+		const ftl::rgbd::Camera &camera, const float3x3 &pose, cudaStream_t stream);
+
 	template <typename A, typename B>
 	void dibr_normalise(
 		ftl::cuda::TextureObject<A> &in,
@@ -92,6 +105,14 @@ namespace cuda {
 		ftl::cuda::TextureObject<float> &contribs,
 		cudaStream_t stream);
 
+	void show_missing_colour(
+		ftl::cuda::TextureObject<float> &depth,
+		ftl::cuda::TextureObject<uchar4> &out,
+		ftl::cuda::TextureObject<float> &contribs,
+		uchar4 bad_colour,
+		const ftl::rgbd::Camera &cam,
+		cudaStream_t stream);
+
 	void show_mask(
         ftl::cuda::TextureObject<uchar4> &colour,
 		ftl::cuda::TextureObject<int> &mask,
diff --git a/components/renderers/cpp/src/tri_render.cpp b/components/renderers/cpp/src/tri_render.cpp
index 6d60a5f6685ac7d292d4a67b0d23df8b231f3b21..5912b6480b109db2c3cce960ae5bca6abe6531f8 100644
--- a/components/renderers/cpp/src/tri_render.cpp
+++ b/components/renderers/cpp/src/tri_render.cpp
@@ -130,6 +130,19 @@ Triangular::Triangular(nlohmann::json &config, ftl::rgbd::FrameSet *fs) : ftl::r
 	on("light_y", [this](const ftl::config::Event &e) { light_pos_.y = value("light_y", 0.3f); });
 	on("light_z", [this](const ftl::config::Event &e) { light_pos_.z = value("light_z", 0.3f); });
 
+	// Load any environment texture
+	std::string envimage = value("environment", std::string(""));
+	if (envimage.size() > 0) {
+		cv::Mat envim = cv::imread(envimage);
+		if (!envim.empty()) {
+			if (envim.type() == CV_8UC3) {
+				cv::cvtColor(envim,envim, cv::COLOR_BGR2BGRA);
+			}
+			env_image_.upload(envim);
+			env_tex_ = std::move(ftl::cuda::TextureObject<uchar4>(env_image_, true));
+		}
+	}
+
 	cudaSafeCall(cudaStreamCreate(&stream_));
 
 	//filters_ = ftl::create<ftl::Filters>(this, "filters");
@@ -215,7 +228,8 @@ void Triangular::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Chann
 			cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA);
 		}
 
-		auto poseInv = MatrixConversion::toCUDA(s->getPose().cast<float>().inverse());
+		auto transform = MatrixConversion::toCUDA(s->getPose().cast<float>().inverse()) * params_.m_viewMatrixInverse;
+		auto transformR = MatrixConversion::toCUDA(s->getPose().cast<float>().inverse()).getFloat3x3();
 
 		if (mesh_) {
 			ftl::cuda::reproject(
@@ -227,7 +241,7 @@ void Triangular::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Chann
 				temp_.getTexture<float>(Channel::Contribution),
 				params_,
 				s->parameters(),
-				poseInv, stream
+				transform, transformR, stream
 			);
 		} else {
 			// Can't use normals with point cloud version
@@ -239,7 +253,7 @@ void Triangular::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Chann
 				temp_.getTexture<float>(Channel::Contribution),
 				params_,
 				s->parameters(),
-				poseInv, stream
+				transform, stream
 			);
 		}
 	}
@@ -287,9 +301,13 @@ void Triangular::_dibr(ftl::rgbd::Frame &out, cudaStream_t stream) {
 			continue;
 		}
 
+		auto transform = params_.m_viewMatrix * MatrixConversion::toCUDA(s->getPose().cast<float>());
+
 		ftl::cuda::dibr_merge(
-			f.createTexture<float4>(Channel::Points),
+			f.createTexture<float>(Channel::Depth),
 			temp_.createTexture<int>(Channel::Depth2),
+			transform,
+			s->parameters(),
 			params_, stream
 		);
 	}
@@ -398,6 +416,53 @@ void Triangular::_renderChannel(
 	_reprojectChannel(out, channel_in, channel_out, stream);
 }
 
+/*
+ * H(Hue): 0 - 360 degree (integer)
+ * S(Saturation): 0 - 1.00 (double)
+ * V(Value): 0 - 1.00 (double)
+ * 
+ * output[3]: Output, array size 3, int
+ */
+static cv::Scalar HSVtoRGB(int H, double S, double V) {
+	double C = S * V;
+	double X = C * (1 - abs(fmod(H / 60.0, 2) - 1));
+	double m = V - C;
+	double Rs, Gs, Bs;
+
+	if(H >= 0 && H < 60) {
+		Rs = C;
+		Gs = X;
+		Bs = 0;	
+	}
+	else if(H >= 60 && H < 120) {	
+		Rs = X;
+		Gs = C;
+		Bs = 0;	
+	}
+	else if(H >= 120 && H < 180) {
+		Rs = 0;
+		Gs = C;
+		Bs = X;	
+	}
+	else if(H >= 180 && H < 240) {
+		Rs = 0;
+		Gs = X;
+		Bs = C;	
+	}
+	else if(H >= 240 && H < 300) {
+		Rs = X;
+		Gs = 0;
+		Bs = C;	
+	}
+	else {
+		Rs = C;
+		Gs = 0;
+		Bs = X;	
+	}
+
+	return cv::Scalar((Bs + m) * 255, (Gs + m) * 255, (Rs + m) * 255, 0);
+}
+
 bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 	SHARED_LOCK(scene_->mtx, lk);
 	if (!src->isReady()) return false;
@@ -411,6 +476,7 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 	
 	out.create<GpuMat>(Channel::Depth, Format<float>(camera.width, camera.height));
 	out.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height));
+	out.createTexture<uchar4>(Channel::Colour, true);  // Force interpolated colour
 
 
 	if (scene_->frames.size() == 0) return false;
@@ -437,21 +503,36 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 	// Clear all channels to 0 or max depth
 
 	out.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(1000.0f), cvstream);
-	out.get<GpuMat>(Channel::Colour).setTo(background_, cvstream);
+
+	if (env_image_.empty() || !value("environment_enabled", false)) {
+		out.get<GpuMat>(Channel::Colour).setTo(background_, cvstream);
+	} else {
+		auto pose = params.m_viewMatrixInverse.getFloat3x3();
+		ftl::cuda::equirectangular_reproject(
+			env_tex_,
+			out.createTexture<uchar4>(Channel::Colour, true),
+			camera, pose, stream_);
+	}
 
 	//LOG(INFO) << "Render ready: " << camera.width << "," << camera.height;
 
 	bool show_discon = value("show_discontinuity_mask", false);
 	bool show_fill = value("show_filled", false);
+	bool colour_sources = value("colour_sources", false);
 
 	temp_.createTexture<int>(Channel::Depth);
 	//temp_.get<GpuMat>(Channel::Normals).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
 
-	// Display mask values
+	// Display mask values or otherwise alter colour image
 	for (int i=0; i<scene_->frames.size(); ++i) {
 		auto &f = scene_->frames[i];
 		auto s = scene_->sources[i];
 
+		if (colour_sources) {
+			auto colour = HSVtoRGB(360 / scene_->frames.size() * i, 0.6, 0.85); //(i == 0) ? cv::Scalar(255,0,0,0) : cv::Scalar(0,255,0,0);
+			f.get<GpuMat>(Channel::Colour).setTo(colour, cvstream);
+		}
+
 		if (f.hasChannel(Channel::Mask)) {
 			if (show_discon) {
 				ftl::cuda::show_mask(f.getTexture<uchar4>(Channel::Colour), f.getTexture<int>(Channel::Mask), Mask::kMask_Discontinuity, make_uchar4(0,0,255,255), stream_);
@@ -500,19 +581,19 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 	if (aligned_source >= 0 && aligned_source < scene_->frames.size()) {
 		// FIXME: Output may not be same resolution as source!
 		cudaSafeCall(cudaStreamSynchronize(stream_));
-		scene_->frames[aligned_source].copyTo(Channel::Depth + Channel::Colour + Channel::Smoothing, out);
+		scene_->frames[aligned_source].copyTo(Channel::Depth + Channel::Colour + Channel::Smoothing + Channel::Confidence, out);
 
-		if (chan == Channel::Normals) {
+		if (chan == Channel::ColourNormals) {
 			// Convert normal to single float value
-			temp_.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height)).setTo(cv::Scalar(0,0,0,0), cvstream);
-			ftl::cuda::normal_visualise(scene_->frames[aligned_source].getTexture<float4>(Channel::Normals), temp_.createTexture<uchar4>(Channel::Colour),
+			out.create<GpuMat>(Channel::ColourNormals, Format<uchar4>(out.get<GpuMat>(Channel::Colour).size())).setTo(cv::Scalar(0,0,0,0), cvstream);
+			ftl::cuda::normal_visualise(scene_->frames[aligned_source].getTexture<float4>(Channel::Normals), out.createTexture<uchar4>(Channel::ColourNormals),
 					light_pos_,
 					light_diffuse_,
 					light_ambient_, stream_);
 
 			// Put in output as single float
-			cv::cuda::swap(temp_.get<GpuMat>(Channel::Colour), out.create<GpuMat>(Channel::Normals));
-			out.resetTexture(Channel::Normals);
+			//cv::cuda::swap(temp_.get<GpuMat>(Channel::Colour), out.create<GpuMat>(Channel::Normals));
+			//out.resetTexture(Channel::Normals);
 		}
 
 		return true;
@@ -527,20 +608,35 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 
 	// Reprojection of colours onto surface
 	_renderChannel(out, Channel::Colour, Channel::Colour, stream_);
+
+	if (value("show_bad_colour", false)) {
+		ftl::cuda::show_missing_colour(
+			out.getTexture<float>(Channel::Depth),
+			out.getTexture<uchar4>(Channel::Colour),
+			temp_.getTexture<float>(Channel::Contribution),
+			make_uchar4(255,0,0,0),
+			camera,
+			stream_
+		);
+	}
 	
 	if (chan == Channel::Depth)
 	{
 		// Just convert int depth to float depth
 		//temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 100000.0f, cvstream);
-	} else if (chan == Channel::Normals) {
+	} else if (chan == Channel::ColourNormals) {
 		// Visualise normals to RGBA
-		accum_.create<GpuMat>(Channel::Normals, Format<uchar4>(camera.width, camera.height)).setTo(cv::Scalar(0,0,0,0), cvstream);
-		ftl::cuda::normal_visualise(out.getTexture<float4>(Channel::Normals), accum_.createTexture<uchar4>(Channel::Normals),
+		out.create<GpuMat>(Channel::ColourNormals, Format<uchar4>(camera.width, camera.height)).setTo(cv::Scalar(0,0,0,0), cvstream);
+
+		ftl::cuda::normal_visualise(out.getTexture<float4>(Channel::Normals), out.createTexture<uchar4>(Channel::ColourNormals),
 				light_pos_,
 				light_diffuse_,
 				light_ambient_, stream_);
 
-		accum_.swapTo(Channels(Channel::Normals), out);
+		//accum_.swapTo(Channels(Channel::Normals), out);
+		//cv::cuda::swap(accum_.get<GpuMat>(Channel::Normals), out.get<GpuMat>(Channel::Normals));
+		//out.resetTexture(Channel::Normals);
+		//accum_.resetTexture(Channel::Normals);
 	}
 	//else if (chan == Channel::Contribution)
 	//{
@@ -561,7 +657,7 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 
 		Eigen::Matrix4f transform = Eigen::Matrix4f::Identity();
 		transform(0, 3) = baseline;
-		Eigen::Matrix4f matrix = transform.inverse() * src->getPose().cast<float>();
+		Eigen::Matrix4f matrix = src->getPose().cast<float>() * transform.inverse();
 		
 		params.m_viewMatrix = MatrixConversion::toCUDA(matrix.inverse());
 		params.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix);
@@ -570,6 +666,7 @@ bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
 		
 		out.create<GpuMat>(Channel::Right, Format<uchar4>(camera.width, camera.height));
 		out.get<GpuMat>(Channel::Right).setTo(background_, cvstream);
+		out.createTexture<uchar4>(Channel::Right, true);
 
 		// Need to re-dibr due to pose change
 		if (mesh_) {
diff --git a/components/rgbd-sources/CMakeLists.txt b/components/rgbd-sources/CMakeLists.txt
index 0dbd5ccf8cca5aecea9b270c6fc28d4b53b3b4ac..6027ab17a7d64110a72dd51e6cfde098c0b0abc7 100644
--- a/components/rgbd-sources/CMakeLists.txt
+++ b/components/rgbd-sources/CMakeLists.txt
@@ -1,12 +1,11 @@
 set(RGBDSRC
 	src/sources/stereovideo/calibrate.cpp
 	src/sources/stereovideo/local.cpp
-	src/disparity.cpp
 	src/source.cpp
 	src/frame.cpp
 	src/frameset.cpp
 	src/sources/stereovideo/stereovideo.cpp
-	src/sources/middlebury/middlebury_source.cpp
+#	src/sources/middlebury/middlebury_source.cpp
 	src/sources/net/net.cpp
 	src/streamer.cpp
 	src/colour.cpp
@@ -17,7 +16,6 @@ set(RGBDSRC
 #	src/algorithms/opencv_bm.cpp
 	src/cb_segmentation.cpp
 	src/abr.cpp
-	src/offilter.cpp
 	src/sources/virtual/virtual.cpp
 	src/sources/ftlfile/file_source.cpp
 	src/sources/ftlfile/player.cpp
@@ -34,14 +32,12 @@ if (LibArchive_FOUND)
 	)
 endif (LibArchive_FOUND)
 
-if (LIBSGM_FOUND)
-	list(APPEND RGBDSRC "src/algorithms/fixstars_sgm.cpp")
-endif (LIBSGM_FOUND)
+#if (LIBSGM_FOUND)
+#	list(APPEND RGBDSRC "src/algorithms/fixstars_sgm.cpp")
+#endif (LIBSGM_FOUND)
 
 if (CUDA_FOUND)
 	list(APPEND RGBDSRC
-		src/algorithms/disp2depth.cu
-		src/algorithms/offilter.cu
 #		"src/algorithms/opencv_cuda_bm.cpp"
 #		"src/algorithms/opencv_cuda_bp.cpp"
 #		"src/algorithms/rtcensus.cu"
@@ -69,7 +65,7 @@ set_property(TARGET ftlrgbd PROPERTY CUDA_SEPARABLE_COMPILATION OFF)
 endif()
 
 #target_include_directories(cv-node PUBLIC ${PROJECT_SOURCE_DIR}/include)
-target_link_libraries(ftlrgbd ftlcommon ${OpenCV_LIBS} ${LIBSGM_LIBRARIES} ${CUDA_LIBRARIES} Eigen3::Eigen ${REALSENSE_LIBRARY} ftlnet ${LibArchive_LIBRARIES} ftlcodecs)
+target_link_libraries(ftlrgbd ftlcommon ${OpenCV_LIBS} ${LIBSGM_LIBRARIES} ${CUDA_LIBRARIES} Eigen3::Eigen ${REALSENSE_LIBRARY} ftlnet ${LibArchive_LIBRARIES} ftlcodecs ftloperators)
 
 add_subdirectory(test)
 
diff --git a/components/rgbd-sources/include/ftl/offilter.hpp b/components/rgbd-sources/include/ftl/offilter.hpp
index 6aece39aab241dfc7605dfb208d7d30ba6135509..0e273d80d0382dd1a805bb50f658e8180ded567e 100644
--- a/components/rgbd-sources/include/ftl/offilter.hpp
+++ b/components/rgbd-sources/include/ftl/offilter.hpp
@@ -17,6 +17,7 @@ public:
 	OFDisparityFilter() : n_max_(0), threshold_(0.0) {}
 	OFDisparityFilter(cv::Size size, int n_frames, float threshold);
 	void filter(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream);
+	void filter(cv::cuda::GpuMat &disp, cv::cuda::GpuMat &optflow, cv::cuda::Stream &stream);
 
 private:
 	int n_max_;
diff --git a/components/rgbd-sources/include/ftl/rgbd/frame.hpp b/components/rgbd-sources/include/ftl/rgbd/frame.hpp
index 52bbe9022987a85c3bf4398e9e3287011943ecac..e7a949600e6ba097aeda54460e83a1529851371e 100644
--- a/components/rgbd-sources/include/ftl/rgbd/frame.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/frame.hpp
@@ -81,14 +81,14 @@ public:
 	 * argument to also create (or recreate) the associated GpuMat.
 	 */
 	template <typename T>
-	ftl::cuda::TextureObject<T> &createTexture(ftl::codecs::Channel c, const ftl::rgbd::Format<T> &f);
+	ftl::cuda::TextureObject<T> &createTexture(ftl::codecs::Channel c, const ftl::rgbd::Format<T> &f, bool interpolated=false);
 
 	/**
 	 * Create a CUDA texture object for a channel. With this version the GpuMat
 	 * must already exist and be of the correct type.
 	 */
 	template <typename T>
-	ftl::cuda::TextureObject<T> &createTexture(ftl::codecs::Channel c);
+	ftl::cuda::TextureObject<T> &createTexture(ftl::codecs::Channel c, bool interpolated=false);
 
 	void resetTexture(ftl::codecs::Channel c);
 
@@ -200,7 +200,7 @@ ftl::cuda::TextureObject<T> &Frame::getTexture(ftl::codecs::Channel c) {
 }
 
 template <typename T>
-ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c, const ftl::rgbd::Format<T> &f) {
+ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c, const ftl::rgbd::Format<T> &f, bool interpolated) {
 	if (!channels_.has(c)) channels_ += c;
 	if (!gpu_.has(c)) gpu_ += c;
 
@@ -221,19 +221,19 @@ ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c, const
 
 	if (m.tex.devicePtr() == nullptr) {
 		//LOG(INFO) << "Creating texture object";
-		m.tex = ftl::cuda::TextureObject<T>(m.gpu);
+		m.tex = ftl::cuda::TextureObject<T>(m.gpu, interpolated);
 	} else if (m.tex.cvType() != ftl::traits::OpenCVType<T>::value || m.tex.width() != m.gpu.cols || m.tex.height() != m.gpu.rows) {
 		LOG(INFO) << "Recreating texture object for '" << ftl::codecs::name(c) << "'";
 		m.tex.free();
-		m.tex = ftl::cuda::TextureObject<T>(m.gpu);
+		m.tex = ftl::cuda::TextureObject<T>(m.gpu, interpolated);
 	}
 
 	return ftl::cuda::TextureObject<T>::cast(m.tex);
 }
 
 template <typename T>
-ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c) {
-	if (!channels_.has(c)) throw ftl::exception("createTexture needs a format if the channel does not exist");
+ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c, bool interpolated) {
+	if (!channels_.has(c)) throw ftl::exception(ftl::Formatter() << "createTexture needs a format if the channel does not exist: " << (int)c);
 
 	auto &m = _get(c);
 
@@ -254,11 +254,11 @@ ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c) {
 
 	if (m.tex.devicePtr() == nullptr) {
 		//LOG(INFO) << "Creating texture object";
-		m.tex = ftl::cuda::TextureObject<T>(m.gpu);
+		m.tex = ftl::cuda::TextureObject<T>(m.gpu, interpolated);
 	} else if (m.tex.cvType() != ftl::traits::OpenCVType<T>::value || m.tex.width() != m.gpu.cols || m.tex.height() != m.gpu.rows || m.tex.devicePtr() != m.gpu.data) {
 		LOG(INFO) << "Recreating texture object for '" << ftl::codecs::name(c) << "'.";
 		m.tex.free();
-		m.tex = ftl::cuda::TextureObject<T>(m.gpu);
+		m.tex = ftl::cuda::TextureObject<T>(m.gpu, interpolated);
 	}
 
 	return ftl::cuda::TextureObject<T>::cast(m.tex);
diff --git a/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp b/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp
index 732c4ffb56a65a2ac8f53841b5dcb45e52ed5280..bbcad8c63f923bd92cbda56cb009e88d2ec50eb0 100644
--- a/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp
+++ b/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp
@@ -27,28 +27,68 @@ FixstarsSGM::FixstarsSGM(nlohmann::json &config) : Disparity(config) {
 	CHECK(P1_ >= 0);
 	CHECK(P2_ > P1_);
 
-	use_filter_ = value("use_filter", false);
-	if (use_filter_) {
-		int radius = value("filter_radius", 25);
-		int iter = value("filter_iter", 1);
-		CHECK(radius > 0) << "filter_radius must be greater than 0";
-		CHECK(iter > 0) << "filter_iter must be greater than 0";
+	updateBilateralFilter();
 
-		filter_ = cv::cuda::createDisparityBilateralFilter(max_disp_ << 4, radius, iter);
-	}
+	on("use_filter", [this](const ftl::config::Event&) {
+		updateBilateralFilter();
+	});
+
+	on("filter_radius", [this](const ftl::config::Event&) {
+		updateBilateralFilter();
+	});
+
+	on("filter_iter", [this](const ftl::config::Event&) {
+		updateBilateralFilter();
+	});
 	
 #ifdef HAVE_OPTFLOW
-	use_off_ = value("use_off", false);
+/*
+	updateOFDisparityFilter();
 
-	if (use_off_)
-	{
-		int off_size = value("off_size", 9);
-		double off_threshold = value("off_threshold", 0.9);
-		off_ = ftl::rgbd::OFDisparityFilter(size_, off_size, off_threshold);
-	}
+	on("use_off", [this](const ftl::config::Event&) {
+		updateOFDisparityFilter();
+	});
+
+	on("use_off", [this](const ftl::config::Event&) {
+		updateOFDisparityFilter();
+	});
+*/
 #endif
 
 	init(size_);
+
+	on("uniqueness", [this](const ftl::config::Event&) {
+		float uniqueness = value("uniqueness", uniqueness_);
+		if ((uniqueness >= 0.0) && (uniqueness <= 1.0)) {
+			uniqueness_ = uniqueness;
+			updateParameters();
+		}
+		else {
+			LOG(WARNING) << "invalid uniquness: " << uniqueness;
+		}
+	});
+
+	on("P1", [this](const ftl::config::Event&) {
+		int P1 = value("P1", P1_);
+		if (P1 <= P2_) {
+			P1_ = P1;
+			updateParameters();
+		}
+		else {
+			LOG(WARNING) << "invalid P1: " << P1 << ", (P1 <= P2), P2 is " << P2_;
+		}
+	});
+
+	on("P2", [this](const ftl::config::Event&) {
+		int P2 = value("P2", P2_);
+		if (P2 >= P1_) {
+			P2_ = P2;
+			updateParameters();
+		}
+		else {
+			LOG(WARNING) << "invalid P2: " << P2 << ", (P1 <= P2), P1 is " << P1_;
+		}
+	});
 }
 
 void FixstarsSGM::init(const cv::Size size) {
@@ -64,22 +104,73 @@ void FixstarsSGM::init(const cv::Size size) {
 	);
 }
 
-void FixstarsSGM::compute(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream)
-{
-	/*if (!frame.hasChannel(ftl::rgbd::kChanLeftGray))
-	{
-		auto &rgb = frame.getChannel<GpuMat>(ftl::rgbd::kChanLeft, stream);
-		auto &gray = frame.setChannel<GpuMat>(ftl::rgbd::kChanLeftGray);
-		cv::cuda::cvtColor(rgb, gray, cv::COLOR_BGR2GRAY, 0, stream);
+bool FixstarsSGM::updateParameters() {
+	if (ssgm_ == nullptr) { return false; }
+	return this->ssgm_->updateParameters(
+		sgm::StereoSGM::Parameters(P1_, P2_, uniqueness_, true));
+}
+
+bool FixstarsSGM::updateBilateralFilter() {
+	bool enable = value("use_filter", false);
+	int radius = value("filter_radius", 25);
+	int iter = value("filter_iter", 1);
+
+	if (enable) {
+		if (radius <= 0) {
+			LOG(WARNING) << "filter_radius must be greater than 0";
+			enable = false;
+		}
+		if (iter <= 0) {
+			LOG(WARNING) << "filter_iter must be greater than 0";
+			enable = false;
+		}
+	}
+	
+	if (enable) {
+		filter_ = cv::cuda::createDisparityBilateralFilter(max_disp_ << 4, radius, iter);
+		use_filter_ = true;
+	}
+	else {
+		use_filter_ = false;
 	}
 
-	if (!frame.hasChannel(ftl::rgbd::kChanRightGray))
-	{
-		auto &rgb = frame.getChannel<GpuMat>(ftl::rgbd::kChanRight, stream);
-		auto &gray = frame.setChannel<GpuMat>(ftl::rgbd::kChanRightGray);
-		cv::cuda::cvtColor(rgb, gray, cv::COLOR_BGR2GRAY, 0, stream);
-	}*/
+	return use_filter_;
+}
+
+#ifdef HAVE_OPTFLOW
+/*
+bool FixstarsSGM::updateOFDisparityFilter() {
+	bool enable = value("use_off", false);
+	int off_size = value("off_size", 9);
+	double off_threshold = value("off_threshold", 0.9);
+
+	if (enable) {
+		if (off_size <= 0) {
+			LOG(WARNING) << "bad off_size: " << off_size;
+			enable = false;
+		}
+
+		if (off_threshold <= 0.0) {
+			LOG(WARNING) << "bad off_threshold: " << off_threshold;
+			enable = false;
+		}
+	}
+	
+	if (enable) {
+		LOG(INFO) << "Optical flow filter, size: " << off_size << ", threshold: " << off_threshold;
+		off_ = ftl::rgbd::OFDisparityFilter(size_, off_size, off_threshold);
+		use_off_ = true;
+	}
+	else {
+		use_off_ = false;
+	}
+	
+	return use_off_;
+}*/
+#endif
 
+void FixstarsSGM::compute(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream)
+{
 	const auto &l = frame.get<GpuMat>(Channel::Left);
 	const auto &r = frame.get<GpuMat>(Channel::Right);
 	auto &disp = frame.create<GpuMat>(Channel::Disparity, Format<float>(l.size()));
@@ -100,21 +191,10 @@ void FixstarsSGM::compute(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream)
 
 	stream.waitForCompletion();
 	ssgm_->execute(lbw_.data, rbw_.data, dispt_.data);
-	GpuMat left_pixels(dispt_, cv::Rect(0, 0, max_disp_, dispt_.rows));
-	left_pixels.setTo(0);
+	// GpuMat left_pixels(dispt_, cv::Rect(0, 0, max_disp_, dispt_.rows));
+	// left_pixels.setTo(0);
 	cv::cuda::threshold(dispt_, dispt_, 4096.0f, 0.0f, cv::THRESH_TOZERO_INV, stream);
 
-	// TODO: filter could be applied after upscaling (to the upscaled disparity image)
-	if (use_filter_)
-	{
-		filter_->apply(
-			dispt_,
-			(l.size() == size_) ? l : l_scaled,
-			dispt_,
-			stream
-		);
-	}
-
 	GpuMat dispt_scaled;
 	if (l.size() != size_)
 	{
@@ -125,10 +205,28 @@ void FixstarsSGM::compute(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream)
 		dispt_scaled = dispt_;
 	}
 
+	// TODO: filter could be applied after upscaling (to the upscaled disparity image)
+	if (use_filter_)
+	{
+		filter_->apply(
+			dispt_,
+			l,
+			dispt_,
+			stream
+		);
+	}
+	
 	dispt_scaled.convertTo(disp, CV_32F, 1.0f / 16.0f, stream);
 
 #ifdef HAVE_OPTFLOW
-	if (use_off_) { off_.filter(frame, stream); }
+/*
+	// TODO: Optical flow filter expects CV_32F
+	if (use_off_) {
+		frame.upload(Channel::Flow, stream);
+		stream.waitForCompletion();
+		off_.filter(disp, frame.get<GpuMat>(Channel::Flow), stream);
+	}
+*/
 #endif
 }
 
diff --git a/components/rgbd-sources/src/algorithms/fixstars_sgm.hpp b/components/rgbd-sources/src/algorithms/fixstars_sgm.hpp
index d2c0c1d2a8fd459695bb02c9f66f17e423dc9fa5..2de1c41ef377570915397ea2758d265fa875b179 100644
--- a/components/rgbd-sources/src/algorithms/fixstars_sgm.hpp
+++ b/components/rgbd-sources/src/algorithms/fixstars_sgm.hpp
@@ -41,6 +41,11 @@ namespace ftl {
 
 		private:
 			void init(const cv::Size size);
+			bool updateParameters();
+			bool updateBilateralFilter();
+			#ifdef HAVE_OPTFLOW
+			bool updateOFDisparityFilter();
+			#endif
 
 			float uniqueness_;
 			int P1_;
@@ -54,7 +59,7 @@ namespace ftl {
 			cv::cuda::GpuMat dispt_;
 
 			#ifdef HAVE_OPTFLOW
-			ftl::rgbd::OFDisparityFilter off_;
+			//ftl::rgbd::OFDisparityFilter off_;
 			#endif
 		};
 	};
diff --git a/components/rgbd-sources/src/cuda_algorithms.hpp b/components/rgbd-sources/src/cuda_algorithms.hpp
index 439c16cfc21fef08086bb69b7e84b1c8b49fec74..43fff52d0c1d00aa2ff7960456db1fb827ad5cc6 100644
--- a/components/rgbd-sources/src/cuda_algorithms.hpp
+++ b/components/rgbd-sources/src/cuda_algorithms.hpp
@@ -38,13 +38,6 @@ namespace cuda {
 	void texture_map(const TextureObject<uchar4> &t,
 			TextureObject<float> &f);
 
-	void disparity_to_depth(const cv::cuda::GpuMat &disparity, cv::cuda::GpuMat &depth,
-				const ftl::rgbd::Camera &c, cv::cuda::Stream &stream);
-
-	void optflow_filter(cv::cuda::GpuMat &disp, const cv::cuda::GpuMat &optflow,
-						cv::cuda::GpuMat &history, int n_max, float threshold,
-						cv::cuda::Stream &stream);
-
 }
 }
 
diff --git a/components/rgbd-sources/src/offilter.cpp b/components/rgbd-sources/src/offilter.cpp
deleted file mode 100644
index 34c01c1feb73b8171b0890f8d94405ee22a0433b..0000000000000000000000000000000000000000
--- a/components/rgbd-sources/src/offilter.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-#include "ftl/offilter.hpp"
-#include "cuda_algorithms.hpp"
-
-#ifdef HAVE_OPTFLOW
-
-#include <loguru.hpp>
-
-using namespace ftl::rgbd;
-using namespace ftl::codecs;
-
-using cv::Mat;
-using cv::Size;
-
-using std::vector;
-
-template<typename T> static bool inline isValidDisparity(T d) { return (0.0 < d) && (d < 256.0); } // TODO
-
-OFDisparityFilter::OFDisparityFilter(Size size, int n_frames, float threshold) :
-	n_max_(n_frames + 1), threshold_(threshold)
-{
-	CHECK((n_max_ > 1) && (n_max_ <= 32)) << "History length must be between 0 and 31!";
-	disp_old_ = cv::cuda::GpuMat(cv::Size(size.width * n_max_, size.height), CV_32FC1);
-	
-	/*nvof_ = cv::cuda::NvidiaOpticalFlow_1_0::create(size.width, size.height,
-													cv::cuda::NvidiaOpticalFlow_1_0::NV_OF_PERF_LEVEL_SLOW,
-													true, false, false, 0);*/
-	
-}
-
-void OFDisparityFilter::filter(ftl::rgbd::Frame &frame, cv::cuda::Stream &stream)
-{
-	frame.upload(Channel::Flow, stream);
-	const cv::cuda::GpuMat &optflow = frame.get<cv::cuda::GpuMat>(Channel::Flow);
-	//frame.get<cv::cuda::GpuMat>(Channel::Disparity);
-	stream.waitForCompletion();
-	if (optflow.empty()) { return; }
-
-	cv::cuda::GpuMat &disp = frame.create<cv::cuda::GpuMat>(Channel::Disparity);
-	ftl::cuda::optflow_filter(disp, optflow, disp_old_, n_max_, threshold_, stream);
-}
-
-#endif  // HAVE_OPTFLOW
diff --git a/components/rgbd-sources/src/source.cpp b/components/rgbd-sources/src/source.cpp
index b0ee77a449e33b82641c18aa0c7a039ed42e8626..13cdd5487edf0b7cbc99f7cd9dd7032b43d31185 100644
--- a/components/rgbd-sources/src/source.cpp
+++ b/components/rgbd-sources/src/source.cpp
@@ -104,8 +104,8 @@ ftl::rgbd::detail::Source *Source::_createFileImpl(const ftl::URI &uri) {
 		if (ftl::is_directory(path)) {
 			if (ftl::is_file(path + "/video.mp4")) {
 				return new StereoVideoSource(this, path);
-			} else if (ftl::is_file(path + "/im0.png")) {
-				return new MiddleburySource(this, path);
+//			} else if (ftl::is_file(path + "/im0.png")) {
+//				return new MiddleburySource(this, path);
 			} else {
 				LOG(ERROR) << "Directory is not a valid RGBD source: " << path;
 			}
diff --git a/components/rgbd-sources/src/sources/ftlfile/file_source.cpp b/components/rgbd-sources/src/sources/ftlfile/file_source.cpp
index 9597bde796f80ac67f38a5a569190001e13d7065..0962c1886dc199e50530343c0d01edf4e74e37f0 100644
--- a/components/rgbd-sources/src/sources/ftlfile/file_source.cpp
+++ b/components/rgbd-sources/src/sources/ftlfile/file_source.cpp
@@ -30,6 +30,13 @@ FileSource::FileSource(ftl::rgbd::Source *s, ftl::rgbd::Player *r, int sid) : ft
 	realtime_ = host_->value("realtime", true);
 	timestamp_ = r->getStartTime();
 	sourceid_ = sid;
+	freeze_ = host_->value("freeze", false);
+	have_frozen_ = false;
+
+	host_->on("freeze", [this](const ftl::config::Event &e) {
+		have_frozen_ = false;
+		freeze_ = host_->value("freeze", false);
+	});
 
     r->onPacket(sid, [this](const ftl::codecs::StreamPacket &spkt, ftl::codecs::Packet &pkt) {
 		host_->notifyRaw(spkt, pkt);
@@ -133,18 +140,28 @@ bool FileSource::capture(int64_t ts) {
 }
 
 bool FileSource::retrieve() {
-	if (!reader_->read(timestamp_)) {
+	if (!have_frozen_ && !reader_->read(timestamp_)) {
 		cache_write_ = -1;
 	}
     return true;
 }
 
 void FileSource::swap() {
+	if (have_frozen_) return;
 	cache_read_ = cache_write_;
 	cache_write_ = (cache_write_ == 0) ? 1 : 0;
 }
 
 bool FileSource::compute(int n, int b) {
+	// Freeze frame requires a copy to be made each time...
+	if (have_frozen_) {
+		cv::cuda::GpuMat t1, t2;
+		if (!rgb_.empty()) rgb_.copyTo(t1);
+		if (!depth_.empty()) depth_.copyTo(t2);
+		host_->notify(timestamp_, t1, t2);
+		return true;
+	}
+
 	if (cache_read_ < 0) return false;
 	if (cache_[cache_read_].size() == 0) return false;
 
@@ -191,7 +208,11 @@ bool FileSource::compute(int n, int b) {
 	if (rgb_.empty() || depth_.empty()) return false;
 
 	// Inform about a decoded frame pair
-	host_->notify(timestamp_, rgb_, depth_);
+	if (freeze_ && !have_frozen_) {
+		have_frozen_ = true;
+	} else {
+		host_->notify(timestamp_, rgb_, depth_);
+	}
     return true;
 }
 
diff --git a/components/rgbd-sources/src/sources/ftlfile/file_source.hpp b/components/rgbd-sources/src/sources/ftlfile/file_source.hpp
index 2d59b68cb1e07d10aade07b33780af2497736a12..f8a26da37802e0aa2aae4681b420e6061c230fd2 100644
--- a/components/rgbd-sources/src/sources/ftlfile/file_source.hpp
+++ b/components/rgbd-sources/src/sources/ftlfile/file_source.hpp
@@ -43,6 +43,8 @@ class FileSource : public detail::Source {
 	ftl::codecs::Decoder *decoders_[2];
 
 	bool realtime_;
+	bool freeze_;
+	bool have_frozen_;
 
 	void _processCalibration(ftl::codecs::Packet &pkt);
 	void _processPose(ftl::codecs::Packet &pkt);
diff --git a/components/rgbd-sources/src/sources/middlebury/middlebury_source.cpp b/components/rgbd-sources/src/sources/middlebury/middlebury_source.cpp
index d152d25e5a7d8830e8b555851f94d97b70463e29..229895049061f2308f1a982eab46005a26940548 100644
--- a/components/rgbd-sources/src/sources/middlebury/middlebury_source.cpp
+++ b/components/rgbd-sources/src/sources/middlebury/middlebury_source.cpp
@@ -149,7 +149,7 @@ void MiddleburySource::_performDisparity() {
 	//calib_->rectifyStereo(left_, right_, stream_);
 	disp_->compute(rgb_, right_, disp_tmp_, stream_);
 	//disparityToDepth(disp_tmp_, depth_tmp_, params_, stream_);
-	ftl::cuda::disparity_to_depth(disp_tmp_, depth_, params_, stream_);
+	//ftl::cuda::disparity_to_depth(disp_tmp_, depth_, params_, stream_);
 	//left_.download(rgb_, stream_);
 	//rgb_ = lsrc_->cachedLeft();
 	//depth_tmp_.download(depth_, stream_);
diff --git a/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp b/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp
index f8e08a9f6d1f7670adeb8be9310129e8353daf1b..84d1e574b8e7aff91c774a1035c75280cd8ac03c 100644
--- a/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp
+++ b/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp
@@ -1,13 +1,26 @@
 #include <loguru.hpp>
+
 #include "stereovideo.hpp"
-#include <ftl/configuration.hpp>
-#include <ftl/threads.hpp>
+
+#include "ftl/configuration.hpp"
+
+#ifdef HAVE_OPTFLOW
+#include "ftl/operators/opticalflow.hpp"
+#endif
+
+
+#include "ftl/operators/smoothing.hpp"
+#include "ftl/operators/colours.hpp"
+#include "ftl/operators/normals.hpp"
+#include "ftl/operators/filling.hpp"
+#include "ftl/operators/segmentation.hpp"
+#include "ftl/operators/disparity.hpp"
+#include "ftl/operators/mask.hpp"
+
+#include "ftl/threads.hpp"
 #include "calibrate.hpp"
 #include "local.hpp"
 #include "disparity.hpp"
-#include "cuda_algorithms.hpp"
-
-#include "cuda_algorithms.hpp"
 
 using ftl::rgbd::detail::Calibrate;
 using ftl::rgbd::detail::LocalSource;
@@ -27,13 +40,11 @@ StereoVideoSource::StereoVideoSource(ftl::rgbd::Source *host, const string &file
 }
 
 StereoVideoSource::~StereoVideoSource() {
-	delete disp_;
 	delete calib_;
 	delete lsrc_;
 }
 
-void StereoVideoSource::init(const string &file)
-{
+void StereoVideoSource::init(const string &file) {
 	capabilities_ = kCapVideo | kCapStereo;
 
 	if (ftl::is_video(file)) {
@@ -61,18 +72,7 @@ void StereoVideoSource::init(const string &file)
 	cv::Size size = cv::Size(lsrc_->width(), lsrc_->height());
 	frames_ = std::vector<Frame>(2);
 
-#ifdef HAVE_OPTFLOW
-	use_optflow_ =  host_->value("use_optflow", false);
-	LOG(INFO) << "Using optical flow: " << (use_optflow_ ? "true" : "false");
-
-	nvof_ = cv::cuda::NvidiaOpticalFlow_1_0::create(size.width, size.height,
-													cv::cuda::NvidiaOpticalFlow_1_0::NV_OF_PERF_LEVEL_SLOW,
-													true, false, false, 0);
-
-#endif
-
 	calib_ = ftl::create<Calibrate>(host_, "calibration", size, stream_);
-
 	if (!calib_->isCalibrated()) LOG(WARNING) << "Cameras are not calibrated!";
 
 	// Generate camera parameters from camera matrix
@@ -117,20 +117,33 @@ void StereoVideoSource::init(const string &file)
 	});
 	
 	// left and right masks (areas outside rectified images)
-	// only left mask used
+	// only left mask used (not used)
 	cv::cuda::GpuMat mask_r_gpu(lsrc_->height(), lsrc_->width(), CV_8U, 255);
 	cv::cuda::GpuMat mask_l_gpu(lsrc_->height(), lsrc_->width(), CV_8U, 255);
-	
 	calib_->rectifyStereo(mask_l_gpu, mask_r_gpu, stream_);
 	stream_.waitForCompletion();
-
 	cv::Mat mask_l;
 	mask_l_gpu.download(mask_l);
 	mask_l_ = (mask_l == 0);
 	
-	disp_ = Disparity::create(host_, "disparity");
-	if (!disp_) LOG(FATAL) << "Unknown disparity algorithm : " << *host_->get<ftl::config::json_t>("disparity");
-	disp_->setMask(mask_l_);
+	pipeline_input_ = ftl::config::create<ftl::operators::Graph>(host_, "input");
+	#ifdef HAVE_OPTFLOW
+	pipeline_input_->append<ftl::operators::NVOpticalFlow>("optflow");
+	#endif
+
+	pipeline_depth_ = ftl::config::create<ftl::operators::Graph>(host_, "disparity");
+	pipeline_depth_->append<ftl::operators::FixstarsSGM>("algorithm");
+
+	#ifdef HAVE_OPTFLOW
+	pipeline_depth_->append<ftl::operators::OpticalFlowTemporalSmoothing>("optflow_filter");
+	#endif
+	pipeline_depth_->append<ftl::operators::DisparityBilateralFilter>("bilateral_filter");
+	pipeline_depth_->append<ftl::operators::DisparityToDepth>("calculate_depth");
+	pipeline_depth_->append<ftl::operators::ColourChannels>("colour");  // Convert BGR to BGRA
+	pipeline_depth_->append<ftl::operators::Normals>("normals");  // Estimate surface normals
+	pipeline_depth_->append<ftl::operators::CrossSupport>("cross");
+	pipeline_depth_->append<ftl::operators::DiscontinuityMask>("discontinuity_mask");
+	pipeline_depth_->append<ftl::operators::AggreMLS>("mls");  // Perform MLS (using smoothing channel)
 
 	LOG(INFO) << "StereoVideo source ready...";
 	ready_ = true;
@@ -158,15 +171,6 @@ ftl::rgbd::Camera StereoVideoSource::parameters(Channel chan) {
 	}
 }
 
-static void disparityToDepth(const cv::cuda::GpuMat &disparity, cv::cuda::GpuMat &depth,
-							 const cv::Mat &Q, cv::cuda::Stream &stream) {
-	// Q(3, 2) = -1/Tx
-	// Q(2, 3) = f
-
-	double val = (1.0f / Q.at<double>(3, 2)) * Q.at<double>(2, 3);
-	cv::cuda::divide(val, disparity, depth, 1.0f / 1000.0f, -1, stream);
-}
-
 bool StereoVideoSource::capture(int64_t ts) {
 	timestamp_ = ts;
 	lsrc_->grab();
@@ -180,31 +184,9 @@ bool StereoVideoSource::retrieve() {
 	auto &right = frame.create<cv::cuda::GpuMat>(Channel::Right);
 	lsrc_->get(left, right, calib_, stream2_);
 
-#ifdef HAVE_OPTFLOW
-	// see comments in https://gitlab.utu.fi/nicolas.pope/ftl/issues/155
-	
-	if (use_optflow_)
-	{
-		auto &left_gray = frame.create<cv::cuda::GpuMat>(Channel::LeftGray);
-		auto &right_gray = frame.create<cv::cuda::GpuMat>(Channel::RightGray);
-
-		cv::cuda::cvtColor(left, left_gray, cv::COLOR_BGR2GRAY, 0, stream2_);
-		cv::cuda::cvtColor(right, right_gray, cv::COLOR_BGR2GRAY, 0, stream2_);
-
-		if (frames_[1].hasChannel(Channel::LeftGray))
-		{
-			//frames_[1].download(Channel::LeftGray);
-			auto &left_gray_prev = frames_[1].get<cv::cuda::GpuMat>(Channel::LeftGray);
-			auto &optflow = frame.create<cv::cuda::GpuMat>(Channel::Flow);
-			nvof_->calc(left_gray, left_gray_prev, optflow, stream2_);
-			// nvof_->upSampler() isn't implemented with CUDA
-			// cv::cuda::resize() does not work wiht 2-channel input
-			// cv::cuda::resize(optflow_, optflow, left.size(), 0.0, 0.0, cv::INTER_NEAREST, stream2_);
-		}
-	}
-#endif
-
+	pipeline_input_->apply(frame, frame, host_, cv::cuda::StreamAccessor::getStream(stream2_));
 	stream2_.waitForCompletion();
+	
 	return true;
 }
 
@@ -216,37 +198,32 @@ void StereoVideoSource::swap() {
 
 bool StereoVideoSource::compute(int n, int b) {
 	auto &frame = frames_[1];
-	auto &left = frame.get<cv::cuda::GpuMat>(Channel::Left);
-	auto &right = frame.get<cv::cuda::GpuMat>(Channel::Right);
-
+	
 	const ftl::codecs::Channel chan = host_->getChannel();
-	if (left.empty() || right.empty()) return false;
+	if (!frame.hasChannel(Channel::Left) || !frame.hasChannel(Channel::Right)) {
+		return false;
+	}
 
 	if (chan == Channel::Depth) {
-		disp_->compute(frame, stream_);
-		
-		auto &disp = frame.get<cv::cuda::GpuMat>(Channel::Disparity);
-		auto &depth = frame.create<cv::cuda::GpuMat>(Channel::Depth);
-		if (depth.empty()) depth = cv::cuda::GpuMat(left.size(), CV_32FC1);
-
-		ftl::cuda::disparity_to_depth(disp, depth, params_, stream_);
-		
-		//left.download(rgb_, stream_);
-		//depth.download(depth_, stream_);
-		//frame.download(Channel::Left + Channel::Depth);
+		pipeline_depth_->apply(frame, frame, host_, cv::cuda::StreamAccessor::getStream(stream_));	
 		stream_.waitForCompletion();
-		host_->notify(timestamp_, left, depth);
+		host_->notify(timestamp_,
+						frame.get<cv::cuda::GpuMat>(Channel::Left),
+						frame.get<cv::cuda::GpuMat>(Channel::Depth));
+
 	} else if (chan == Channel::Right) {
-		//left.download(rgb_, stream_);
-		//right.download(depth_, stream_);
-		stream_.waitForCompletion();  // TODO:(Nick) Move to getFrames
-		host_->notify(timestamp_, left, right);
+		stream_.waitForCompletion(); // TODO:(Nick) Move to getFrames
+		host_->notify(timestamp_,
+						frame.get<cv::cuda::GpuMat>(Channel::Left),
+						frame.get<cv::cuda::GpuMat>(Channel::Right));
+	
 	} else {
-		//left.download(rgb_, stream_);
-		stream_.waitForCompletion();  // TODO:(Nick) Move to getFrames
-		//LOG(INFO) << "NO SECOND CHANNEL: " << (bool)depth_.empty();
+		stream_.waitForCompletion(); // TODO:(Nick) Move to getFrames
+		auto &left = frame.get<cv::cuda::GpuMat>(Channel::Left);
 		depth_.create(left.size(), left.type());
-		host_->notify(timestamp_, left, depth_);
+		host_->notify(timestamp_,
+						frame.get<cv::cuda::GpuMat>(Channel::Left),
+						depth_);
 	}
 
 	return true;
diff --git a/components/rgbd-sources/src/sources/stereovideo/stereovideo.hpp b/components/rgbd-sources/src/sources/stereovideo/stereovideo.hpp
index dfea7937f99777c23f753a7ab2b5bad8c68bb4af..78fcdbcf809eeae0d5de6da30b99ce3dc07f9214 100644
--- a/components/rgbd-sources/src/sources/stereovideo/stereovideo.hpp
+++ b/components/rgbd-sources/src/sources/stereovideo/stereovideo.hpp
@@ -3,6 +3,7 @@
 #define _FTL_RGBD_STEREOVIDEO_HPP_
 
 #include <ftl/rgbd/source.hpp>
+#include <ftl/operators/operator.hpp>
 #include <string>
 
 namespace ftl {
@@ -33,15 +34,14 @@ class StereoVideoSource : public detail::Source {
 	bool isReady();
 	Camera parameters(ftl::codecs::Channel chan);
 
-	//const cv::Mat &getRight() const { return right_; }
-
 	private:
 	LocalSource *lsrc_;
 	Calibrate *calib_;
-	Disparity *disp_;
-	
+
+	ftl::operators::Graph *pipeline_input_;
+	ftl::operators::Graph *pipeline_depth_;
+
 	bool ready_;
-	bool use_optflow_;
 	
 	cv::cuda::Stream stream_;
 	cv::cuda::Stream stream2_;
@@ -50,12 +50,6 @@ class StereoVideoSource : public detail::Source {
 
 	cv::Mat mask_l_;
 
-#ifdef HAVE_OPTFLOW
-	// see comments in https://gitlab.utu.fi/nicolas.pope/ftl/issues/155
-	cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> nvof_;
-	cv::cuda::GpuMat optflow_;
-#endif
-
 	void init(const std::string &);
 };
 
diff --git a/config/config_vision.jsonc b/config/config_vision.jsonc
index b73446cefe06aaaf3663b8fe2823822878b5a1a8..cea950e0c21bde19d369ad0a4f53c255657e3724 100644
--- a/config/config_vision.jsonc
+++ b/config/config_vision.jsonc
@@ -9,53 +9,6 @@
 		}
 	},
 	
-	"disparity": {
-		"libsgm": {
-			"algorithm": "libsgm",
-			"width": 1280,
-			"height": 720,
-			"use_cuda": true,
-			"minimum": 0,
-			"maximum": 256,
-			"tau": 0.0,
-			"gamma": 0.0,
-			"window_size": 5,
-			"sigma": 1.5,
-			"lambda": 8000.0,
-			"uniqueness":  0.65,
-			"use_filter": true,
-			"P1": 8,
-			"P2": 130,
-			"filter_radius": 11,
-			"filter_iter": 2,
-			"use_off": true,
-			"off_size": 24,
-			"off_threshold": 0.75,
-			"T": 60,
-			"T_add": 0,
-			"T_del": 25,
-			"T_x" : 3.0,
-			"alpha" : 0.6,
-			"beta" : 1.7,
-			"epsilon" : 15.0
-		},
-		
-		"rtcensus": {
-			"algorithm": "rtcensus",
-			"use_cuda": true,
-			"minimum": 0,
-			"maximum": 256,
-			"tau": 0.0,
-			"gamma": 0.0,
-			"window_size": 5,
-			"sigma": 1.5,
-			"lambda": 8000.0,
-			"use_filter": true,
-			"filter_radius": 3,
-			"filter_iter": 4	
-		}
-	},
-	
 	"sources": {
 		"stereocam": {
 			"uri": "device:video",
@@ -71,7 +24,23 @@
 			},
 			"use_optflow" : true,
 			"calibration": { "$ref": "#calibrations/default" },
-			"disparity": { "$ref": "#disparity/libsgm" }
+			"disparity": {
+				"algorithm" : {
+					"P1" : 10,
+					"P2" : 120,
+					"enabled" : true
+				},
+				"bilateral_filter" : {
+					"radius" : 17,
+					"iter" : 21,
+					"enabled" : true
+				},
+				"optflow_filter" : {
+					"threshold" : 0.8,
+					"history_size" : 10,
+					"enabled": true
+				}
+			}
 		},
 		"stereovid": {},
 		"localhost": {}