diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0f866eda014e6b6353824435cbacbc500fe2c683..fb73e5cf9d35ad7de7e1a2cbf66826d802c0b749 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -225,6 +225,7 @@ add_subdirectory(components/codecs)
 add_subdirectory(components/net)
 add_subdirectory(components/rgbd-sources)
 add_subdirectory(components/control/cpp)
+add_subdirectory(components/operators)
 add_subdirectory(applications/calibration)
 add_subdirectory(applications/groupview)
 add_subdirectory(applications/player)
diff --git a/applications/gui/src/camera.cpp b/applications/gui/src/camera.cpp
index f35b4e539e8216b32106762b69ea7e2b028706eb..debb8e73d5cf6f1f1bc1d685bb7298350014614c 100644
--- a/applications/gui/src/camera.cpp
+++ b/applications/gui/src/camera.cpp
@@ -148,17 +148,20 @@ ftl::gui::Camera::Camera(ftl::gui::Screen *screen, ftl::rgbd::Source *src) : scr
 	posewin_->setTheme(screen->windowtheme);
 	posewin_->setVisible(false);
 
-	src->setCallback([this](int64_t ts, cv::Mat &channel1, cv::Mat &channel2) {
+	src->setCallback([this](int64_t ts, cv::cuda::GpuMat &channel1, cv::cuda::GpuMat &channel2) {
 		UNIQUE_LOCK(mutex_, lk);
 		im1_.create(channel1.size(), channel1.type());
 		im2_.create(channel2.size(), channel2.type());
 
 		//cv::swap(channel1, im1_);
 		//cv::swap(channel2, im2_);
+
+		channel1.download(im1_);
+		channel2.download(im2_);
 		
 		// OpenGL (0,0) bottom left
-		cv::flip(channel1, im1_, 0);
-		cv::flip(channel2, im2_, 0);
+		cv::flip(im1_, im1_, 0);
+		cv::flip(im2_, im2_, 0);
 	});
 }
 
@@ -430,6 +433,7 @@ const GLTexture &ftl::gui::Camera::captureFrame() {
 		cv::Mat tmp;
 
 		switch(channel_) {
+			case Channel::Smoothing:
 			case Channel::Confidence:
 				if (im2_.rows == 0) { break; }
 				visualizeEnergy(im2_, tmp, 1.0);
@@ -459,7 +463,7 @@ const GLTexture &ftl::gui::Camera::captureFrame() {
 				texture2_.update(tmp);*/
 				break;
 
-		case Channel::Flow:
+		//case Channel::Flow:
 		case Channel::Normals:
 		case Channel::Right:
 				if (im2_.rows == 0 || im2_.type() != CV_8UC3) { break; }
diff --git a/applications/gui/src/config_window.cpp b/applications/gui/src/config_window.cpp
index a9db26b466c2b9ae5f41808ebebc30726240b8ab..fcba8ff7ad2e033ab790790b03aed4a3af25af15 100644
--- a/applications/gui/src/config_window.cpp
+++ b/applications/gui/src/config_window.cpp
@@ -15,8 +15,11 @@ using std::string;
 using std::vector;
 using ftl::config::json_t;
 
+ConfigWindow::ConfigWindow(nanogui::Widget *parent, ftl::ctrl::Master *ctrl, const ftl::UUID &peer) : ConfigWindow(parent, ctrl, std::optional<ftl::UUID>(peer)) {
 
-ConfigWindow::ConfigWindow(nanogui::Widget *parent, ftl::ctrl::Master *ctrl, const ftl::UUID &peer)
+}
+
+ConfigWindow::ConfigWindow(nanogui::Widget *parent, ftl::ctrl::Master *ctrl, const std::optional<ftl::UUID> &peer)
 		: nanogui::Window(parent, "Settings"), ctrl_(ctrl), peer_(peer) {
 	using namespace nanogui;
 
@@ -24,78 +27,115 @@ ConfigWindow::ConfigWindow(nanogui::Widget *parent, ftl::ctrl::Master *ctrl, con
 	setPosition(Vector2i(parent->width()/2.0f - 100.0f, parent->height()/2.0f - 100.0f));
 	//setModal(true);
 
-	configurables_ = ctrl->getConfigurables(peer);
+	if (peer) {
+		configurables_ = ctrl->getConfigurables(peer.value());
+	} else {
+		configurables_ = ftl::config::list();
+	}
 
 	new Label(this, "Select Configurable","sans-bold");
 
-	auto select = new ComboBox(this, configurables_);
-	select->setCallback([this](int ix) {
-		LOG(INFO) << "Change configurable: " << ix;
-		_buildForm(configurables_[ix], ctrl_->get(peer_, configurables_[ix]));
-
-		setVisible(false);
-		//this->parent()->removeChild(this);
-		//delete this;
-		//screen()->removeChild(this);
-	});
+	for (auto c : configurables_) {
+		auto itembutton = new Button(this, c);
+		itembutton->setCallback([this,c]() {
+			LOG(INFO) << "Change configurable: " << c;
+			_buildForm(c);
+			setVisible(false);
+			//this->parent()->removeChild(this);
+			//delete this;
+			//screen()->removeChild(this);
+		});
+	}
 }
 
 ConfigWindow::~ConfigWindow() {
 
 }
 
-void ConfigWindow::_addElements(nanogui::FormHelper *form, const std::string &suri, const ftl::config::json_t &data) {
+class ConfigWindow::References {
+	public:
+	References(ftl::NetConfigurable* nc, ftl::config::json_t* config, const std::string* suri) : nc(nc), config(config), suri(suri) {
+	}
+
+	~References() {
+		delete nc;
+		delete config;
+		delete suri;
+	}
+
+	private:
+	ftl::NetConfigurable* nc;
+	ftl::config::json_t* config;
+	const std::string* suri;
+};
+
+std::vector<ftl::gui::ConfigWindow::References *> ConfigWindow::_addElements(nanogui::FormHelper *form, ftl::Configurable &nc, const std::string &suri, std::function<ftl::Configurable*(const std::string*, std::vector<References *>&)> construct) {
 	using namespace nanogui;
 
+	std::vector<References *> references;
+
+	auto data = nc.getConfig();
+
 	for (auto i=data.begin(); i!=data.end(); ++i) {
 		if (i.key() == "$id") continue;
 
 		if (i.key() == "$ref" && i.value().is_string()) {
 			LOG(INFO) << "Follow $ref: " << i.value();
-			_addElements(form, suri, ctrl_->get(peer_, i.value().get<string>()));
+			const std::string* suri = new std::string(i.value().get<string>());
+			ftl::Configurable* rc = construct(suri, references);
+			auto new_references = _addElements(form, *rc, *suri, construct);
+			references.insert(references.end(), new_references.begin(), new_references.end());
 			continue;
 		}
 
 		if (i.value().is_boolean()) {
 			string key = i.key();
-			form->addVariable<bool>(i.key(), [this,data,key,suri](const bool &b){
-				ctrl_->set(peer_, suri + string("/") + key, json_t(b));
+			form->addVariable<bool>(i.key(), [this,data,key,&nc](const bool &b){
+				nc.set(key, b);
 			}, [data,key]() -> bool {
 				return data[key].get<bool>();
 			});
 		} else if (i.value().is_number_integer()) {
 			string key = i.key();
-			form->addVariable<int>(i.key(), [this,data,key,suri](const int &f){
-				ctrl_->set(peer_, suri + string("/") + key, json_t(f));
+			form->addVariable<int>(i.key(), [this,data,key,&nc](const int &f){
+				nc.set(key, f);
 			}, [data,key]() -> int {
 				return data[key].get<int>();
 			});
 		} else if (i.value().is_number_float()) {
 			string key = i.key();
-			form->addVariable<float>(i.key(), [this,data,key,suri](const float &f){
-				ctrl_->set(peer_, suri + string("/") + key, json_t(f));
+			form->addVariable<float>(i.key(), [this,data,key,&nc](const float &f){
+				nc.set(key, f);
 			}, [data,key]() -> float {
 				return data[key].get<float>();
 			});
 		} else if (i.value().is_string()) {
 			string key = i.key();
-			form->addVariable<string>(i.key(), [this,data,key,suri](const string &f){
-				ctrl_->set(peer_, suri + string("/") + key, json_t(f));
+			form->addVariable<string>(i.key(), [this,data,key,&nc](const string &f){
+				nc.set(key, f);
 			}, [data,key]() -> string {
 				return data[key].get<string>();
 			});
 		} else if (i.value().is_object()) {
 			string key = i.key();
-			const ftl::config::json_t &v = i.value();
 		
-			form->addButton(i.key(), [this,form,suri,key,v]() {
-				_buildForm(suri+string("/")+key, v);
-			})->setIcon(ENTYPO_ICON_FOLDER);
+			// Checking the URI with exists() prevents unloaded local configurations from being shown.
+			if (suri.find('#') != string::npos && exists(suri+string("/")+key)) {
+				form->addButton(key, [this,suri,key]() {
+					_buildForm(suri+string("/")+key);
+				})->setIcon(ENTYPO_ICON_FOLDER);
+			} else if (exists(suri+string("#")+key)) {
+				form->addButton(key, [this,suri,key]() {
+					_buildForm(suri+string("#")+key);
+				})->setIcon(ENTYPO_ICON_FOLDER);
+			}
 		}
 	}
+
+	return references;
 }
 
-void ConfigWindow::_buildForm(const std::string &suri, ftl::config::json_t data) {
+void ConfigWindow::_buildForm(const std::string &suri) {
 	using namespace nanogui;
 
 	ftl::URI uri(suri);
@@ -105,11 +145,50 @@ void ConfigWindow::_buildForm(const std::string &suri, ftl::config::json_t data)
 	form->addWindow(Vector2i(100,50), uri.getFragment());
 	form->window()->setTheme(theme());
 
-	_addElements(form, suri, data);
+	ftl::config::json_t* config;
+	config = new ftl::config::json_t;
+	const std::string* allocated_suri = new std::string(suri);
+	std::vector<ftl::gui::ConfigWindow::References *> references;
+
+	ftl::Configurable* nc;
+
+	if (peer_) {
+		*config = ctrl_->get(peer_.value(), suri);
+		nc = new ftl::NetConfigurable(peer_.value(), *allocated_suri, *ctrl_, *config);
+
+		references = _addElements(form, *nc, *allocated_suri, [this](auto suri, auto &references) {
+			ftl::config::json_t* config = new ftl::config::json_t;
+			*config = ctrl_->get(peer_.value(), *suri);
+			auto nc = new ftl::NetConfigurable(peer_.value(), *suri, *ctrl_, *config);
+			auto r = new References(nc, config, suri);
+			references.push_back(r);
+			return nc;
+		});
+	} else {
+		nc = ftl::config::find(suri);
+		if (nc) {
+			references = _addElements(form, *nc, *allocated_suri, [this](auto suri, auto &references) {
+				return ftl::config::find(*suri);
+			});
+		}
+	}
 
-	auto closebutton = form->addButton("Close", [form]() {
+	auto closebutton = form->addButton("Close", [this,form,config,allocated_suri,nc,references]() {
 		form->window()->setVisible(false);
+		for(auto r : references) {
+			delete r;
+		}
+		if (peer_) {
+			delete nc;
+		}
+		delete config;
+		delete allocated_suri;
 		delete form;
 	});
 	closebutton->setIcon(ENTYPO_ICON_CROSS);
 }
+
+bool ConfigWindow::exists(const std::string &uri) {
+	// If the Configurable is a NetConfigurable, the URI is not checked.
+	return peer_ || ftl::config::find(uri);
+}
\ No newline at end of file
diff --git a/applications/gui/src/config_window.hpp b/applications/gui/src/config_window.hpp
index 87170b1d30741a3d94eff5a54fcaa10d67718e86..23279d93cee1d601e53e0147b4edee8d5309a483 100644
--- a/applications/gui/src/config_window.hpp
+++ b/applications/gui/src/config_window.hpp
@@ -6,6 +6,7 @@
 #include <ftl/uuid.hpp>
 
 #include <nanogui/formhelper.h>
+#include <ftl/net_configurable.hpp>
 
 namespace ftl {
 namespace gui {
@@ -16,15 +17,23 @@ namespace gui {
 class ConfigWindow : public nanogui::Window {
 	public:
 	ConfigWindow(nanogui::Widget *parent, ftl::ctrl::Master *ctrl, const ftl::UUID &peer);
+	ConfigWindow(nanogui::Widget *parent, ftl::ctrl::Master *ctrl, const std::optional<ftl::UUID> &peer = std::nullopt);
 	~ConfigWindow();
 
 	private:
+	/*
+	References holds the pointers to a NetConfigurable and all its members so that
+	they can all be returned from _addElements() and then simultaneously deleted
+	as the form is closed.
+	*/
+	class References;
 	ftl::ctrl::Master *ctrl_;
-	ftl::UUID peer_;
+	std::optional<ftl::UUID> peer_;
 	std::vector<std::string> configurables_;
 	
-	void _buildForm(const std::string &uri, ftl::config::json_t data);
-	void _addElements(nanogui::FormHelper *form, const std::string &suri, const ftl::config::json_t &data);
+	void _buildForm(const std::string &uri);
+	std::vector<References *> _addElements(nanogui::FormHelper *form, ftl::Configurable &nc, const std::string &suri, std::function<ftl::Configurable*(const std::string*, std::vector<References *>&)> construct);
+	bool exists(const std::string &uri);
 };
 
 }
diff --git a/applications/gui/src/main.cpp b/applications/gui/src/main.cpp
index 974afff1473bd4aad7ecf59575f8735808c35e72..50bc3638c2e26197fbcf5b74dadacd4b6a49d0ad 100644
--- a/applications/gui/src/main.cpp
+++ b/applications/gui/src/main.cpp
@@ -13,7 +13,7 @@ int main(int argc, char **argv) {
 	ftl::net::Universe *net = ftl::create<ftl::net::Universe>(root, "net");
 
 	net->start();
-	//net->waitConnections();
+	net->waitConnections();
 
 	ftl::ctrl::Master *controller = new ftl::ctrl::Master(root, net);
 	controller->onLog([](const ftl::ctrl::LogEvent &e){
diff --git a/applications/gui/src/screen.cpp b/applications/gui/src/screen.cpp
index b0a74d37a2e39856901006504093c29c708e8189..a9f4cc023265ba427a0be451ee3337fe834528bc 100644
--- a/applications/gui/src/screen.cpp
+++ b/applications/gui/src/screen.cpp
@@ -17,6 +17,7 @@
 
 #include "ctrl_window.hpp"
 #include "src_window.hpp"
+#include "config_window.hpp"
 #include "camera.hpp"
 #include "media_panel.hpp"
 
@@ -212,12 +213,41 @@ ftl::gui::Screen::Screen(ftl::Configurable *proot, ftl::net::Universe *pnet, ftl
 		popup->setVisible(false);
 	});
 
-	button = new ToolButton(toolbar, ENTYPO_ICON_COG);
-	button->setIconExtraScale(1.5f);
-	button->setTheme(toolbuttheme);
-	button->setTooltip("Settings");
-	button->setFixedSize(Vector2i(40,40));
-	button->setPosition(Vector2i(5,height()-50));
+	popbutton = new PopupButton(innertool, "", ENTYPO_ICON_COG);
+	popbutton->setIconExtraScale(1.5f);
+	popbutton->setTheme(toolbuttheme);
+	popbutton->setTooltip("Settings");
+	popbutton->setFixedSize(Vector2i(40,40));
+	popbutton->setSide(Popup::Side::Right);
+	popbutton->setChevronIcon(0);
+	// popbutton->setPosition(Vector2i(5,height()-50));
+	popup = popbutton->popup();
+	popup->setLayout(new GroupLayout());
+	popup->setTheme(toolbuttheme);
+
+	//net_->onConnect([this,popup](ftl::net::Peer *p) {
+	{
+		LOG(INFO) << "NET CONNECT";
+		auto node_details = ctrl_->getSlaves();
+		std::vector<std::string> node_titles;
+
+		for (auto &d : node_details) {
+			LOG(INFO) << "ADDING TITLE: " << d.dump();
+			auto peer = ftl::UUID(d["id"].get<std::string>());
+			auto itembutton = new Button(popup, d["title"].get<std::string>());
+			itembutton->setCallback([this,popup,peer]() {
+				auto config_window = new ConfigWindow(this, ctrl_, peer);
+				config_window->setTheme(windowtheme);
+			});
+		}
+	}
+	//});
+
+	itembutton = new Button(popup, "Local");
+	itembutton->setCallback([this,popup]() {
+		auto config_window = new ConfigWindow(this, ctrl_);
+		config_window->setTheme(windowtheme);
+	});
 
 	//configwindow_ = new ConfigWindow(parent, ctrl_);
 	cwindow_ = new ftl::gui::ControlWindow(this, controller);
@@ -268,12 +298,9 @@ bool ftl::gui::Screen::initVR() {
 	{
 		HMD_ = nullptr;
 		LOG(ERROR) << "Unable to init VR runtime: " << vr::VR_GetVRInitErrorAsEnglishDescription(eError);
+		return false;
 	}
 
-	uint32_t size_x, size_y;
-	HMD_->GetRecommendedRenderTargetSize(&size_x, &size_y);
-	LOG(INFO) << size_x << ", " << size_y;
-	LOG(INFO) << "\n" << getCameraMatrix(HMD_, vr::Eye_Left);
 	return true;
 }
 
@@ -349,18 +376,18 @@ bool ftl::gui::Screen::mouseButtonEvent(const nanogui::Vector2i &p, int button,
 			float sx = ((float)p[0] - positionAfterOffset[0]) / mScale;
 			float sy = ((float)p[1] - positionAfterOffset[1]) / mScale;
 
-			Eigen::Vector4f camPos;
+			//Eigen::Vector4f camPos;
 
-			try {
-				camPos = camera_->source()->point(sx,sy).cast<float>();
-			} catch(...) {
-				return true;
-			}
+			//try {
+				//camPos = camera_->source()->point(sx,sy).cast<float>();
+			//} catch(...) {
+			//	return true;
+			//}
 			
-			camPos *= -1.0f;
+			//camPos *= -1.0f;
 			//Eigen::Vector4f worldPos =  camera_->source()->getPose().cast<float>() * camPos;
 			//lookPoint_ = Eigen::Vector3f(worldPos[0],worldPos[1],worldPos[2]);
-			LOG(INFO) << "Depth at click = " << -camPos[2];
+			//LOG(INFO) << "Depth at click = " << -camPos[2];
 			return true;
 		}
 	return false;
diff --git a/applications/gui/src/screen.hpp b/applications/gui/src/screen.hpp
index b78ad8b690017d84950f0e135473731c631492bc..a0ac39d064875bdc5be81e526a95ac9269617c25 100644
--- a/applications/gui/src/screen.hpp
+++ b/applications/gui/src/screen.hpp
@@ -62,9 +62,6 @@ class Screen : public nanogui::Screen {
 	bool hasVR() const { return false; }
 #endif
 
-	void setDualView(bool v) { show_two_images_ = v; LOG(INFO) << "CLICK"; }
-	bool getDualView() { return show_two_images_; }
-
 	nanogui::Theme *windowtheme;
 	nanogui::Theme *specialtheme;
 	nanogui::Theme *mediatheme;
diff --git a/applications/player/src/main.cpp b/applications/player/src/main.cpp
index 60d2793c1c4b0484dbb226bff12deebfb6e1fc2e..7eeb6e6bf5f96fd8f32360949c566c93860518db 100644
--- a/applications/player/src/main.cpp
+++ b/applications/player/src/main.cpp
@@ -77,11 +77,13 @@ int main(int argc, char **argv) {
 
 				//LOG(INFO) << "Reading packet: (" << (int)spkt.streamID << "," << (int)spkt.channel << ") " << (int)pkt.codec << ", " << (int)pkt.definition;
 
-				cv::Mat frame(cv::Size(ftl::codecs::getWidth(pkt.definition),ftl::codecs::getHeight(pkt.definition)), (spkt.channel == Channel::Depth) ? CV_32F : CV_8UC3);
+				cv::cuda::GpuMat gframe(cv::Size(ftl::codecs::getWidth(pkt.definition),ftl::codecs::getHeight(pkt.definition)), (spkt.channel == Channel::Depth) ? CV_32F : CV_8UC3);
+				cv::Mat frame;
 				createDecoder(pkt);
 
 				try {
-					decoder->decode(pkt, frame);
+					decoder->decode(pkt, gframe);
+					gframe.download(frame);
 				} catch (std::exception &e) {
 					LOG(INFO) << "Decoder exception: " << e.what();
 				}
diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt
index dce81e19c91a41f7e8bba90a2149a6602d73e5aa..b089d4a1aeae9654b50865c7400ca0e381f874e6 100644
--- a/applications/reconstruct/CMakeLists.txt
+++ b/applications/reconstruct/CMakeLists.txt
@@ -36,6 +36,6 @@ set_property(TARGET ftl-reconstruct PROPERTY CUDA_SEPARABLE_COMPILATION ON)
 endif()
 
 #target_include_directories(cv-node PUBLIC ${PROJECT_SOURCE_DIR}/include)
-target_link_libraries(ftl-reconstruct ftlcommon ftlrgbd Threads::Threads ${OpenCV_LIBS} ftlctrl ftlnet ftlrender)
+target_link_libraries(ftl-reconstruct ftlcommon ftlrgbd Threads::Threads ${OpenCV_LIBS} ftlctrl ftlnet ftlrender ftloperators)
 
 
diff --git a/applications/reconstruct/src/ilw.cpp b/applications/reconstruct/src/ilw.cpp
deleted file mode 100644
index 435cd886eba1b83d7530f19f28ccfb09f7e37f3a..0000000000000000000000000000000000000000
--- a/applications/reconstruct/src/ilw.cpp
+++ /dev/null
@@ -1,125 +0,0 @@
-#include "ilw.hpp"
-#include <ftl/utility/matrix_conversion.hpp>
-#include <ftl/rgbd/source.hpp>
-#include <ftl/cuda/points.hpp>
-#include <loguru.hpp>
-
-#include "ilw_cuda.hpp"
-
-using ftl::ILW;
-using ftl::detail::ILWData;
-using ftl::codecs::Channel;
-using ftl::codecs::Channels;
-using ftl::rgbd::Format;
-using cv::cuda::GpuMat;
-
-ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) {
-
-}
-
-ILW::~ILW() {
-
-}
-
-bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
-    _phase0(fs, stream);
-
-    //for (int i=0; i<2; ++i) {
-        _phase1(fs, stream);
-        //for (int j=0; j<3; ++j) {
-        //    _phase2(fs);
-        //}
-
-		// TODO: Break if no time left
-    //}
-
-    return true;
-}
-
-bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
-    // Make points channel...
-    for (size_t i=0; i<fs.frames.size(); ++i) {
-		auto &f = fs.frames[i];
-		auto *s = fs.sources[i];
-
-		if (f.empty(Channel::Depth + Channel::Colour)) {
-			LOG(ERROR) << "Missing required channel";
-			continue;
-		}
-			
-        auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size()));
-        auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse());
-        ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, stream);
-
-        // TODO: Create energy vector texture and clear it
-        // Create energy and clear it
-
-        // Convert colour from BGR to BGRA if needed
-		if (f.get<GpuMat>(Channel::Colour).type() == CV_8UC3) {
-			// Convert to 4 channel colour
-			auto &col = f.get<GpuMat>(Channel::Colour);
-			GpuMat tmp(col.size(), CV_8UC4);
-			cv::cuda::swap(col, tmp);
-			cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA);
-		}
-
-        f.createTexture<float4>(Channel::EnergyVector, Format<float4>(f.get<GpuMat>(Channel::Colour).size()));
-        f.createTexture<float>(Channel::Energy, Format<float>(f.get<GpuMat>(Channel::Colour).size()));
-        f.createTexture<uchar4>(Channel::Colour);
-
-		cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
-
-		f.get<GpuMat>(Channel::EnergyVector).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
-		f.get<GpuMat>(Channel::Energy).setTo(cv::Scalar(0.0f), cvstream);
-    }
-
-    return true;
-}
-
-bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
-    // Run correspondence kernel to create an energy vector
-
-	// For each camera combination
-    for (size_t i=0; i<fs.frames.size(); ++i) {
-        for (size_t j=0; j<fs.frames.size(); ++j) {
-            if (i == j) continue;
-
-            LOG(INFO) << "Running phase1";
-
-            auto &f1 = fs.frames[i];
-            auto &f2 = fs.frames[j];
-            //auto s1 = fs.frames[i];
-            auto s2 = fs.sources[j];
-
-            auto pose = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse());
-
-            try {
-            //Calculate energy vector to best correspondence
-            ftl::cuda::correspondence_energy_vector(
-                f1.getTexture<float4>(Channel::Points),
-                f2.getTexture<float4>(Channel::Points),
-                f1.getTexture<uchar4>(Channel::Colour),
-                f2.getTexture<uchar4>(Channel::Colour),
-                // TODO: Add normals and other things...
-                f1.getTexture<float4>(Channel::EnergyVector),
-                f1.getTexture<float>(Channel::Energy),
-                pose,
-                s2->parameters(),
-                stream
-            );
-            } catch (ftl::exception &e) {
-                LOG(ERROR) << "Exception in correspondence: " << e.what();
-            }
-
-            LOG(INFO) << "Correspondences done... " << i;
-        }
-    }
-
-    return true;
-}
-
-bool ILW::_phase2(ftl::rgbd::FrameSet &fs) {
-    // Run energies and motion kernel
-
-    return true;
-}
diff --git a/applications/reconstruct/src/ilw.cu b/applications/reconstruct/src/ilw.cu
deleted file mode 100644
index 999b5ec9031eed08fc4bc527471961c3236d7445..0000000000000000000000000000000000000000
--- a/applications/reconstruct/src/ilw.cu
+++ /dev/null
@@ -1,90 +0,0 @@
-#include "ilw_cuda.hpp"
-
-using ftl::cuda::TextureObject;
-using ftl::rgbd::Camera;
-
-#define WARP_SIZE 32
-#define T_PER_BLOCK 8
-#define FULL_MASK 0xffffffff
-
-__device__ inline float warpMax(float e) {
-	for (int i = WARP_SIZE/2; i > 0; i /= 2) {
-		const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
-		e = max(e, other);
-	}
-	return e;
-}
-
-__global__ void correspondence_energy_vector_kernel(
-        TextureObject<float4> p1,
-        TextureObject<float4> p2,
-        TextureObject<uchar4> c1,
-        TextureObject<uchar4> c2,
-        TextureObject<float4> vout,
-        TextureObject<float> eout,
-        float4x4 pose2,  // Inverse
-        Camera cam2) {
-
-    // Each warp picks point in p1
-    const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
-	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
-    const int y = blockIdx.y*blockDim.y + threadIdx.y;
-    
-	const float3 world1 = make_float3(p1.tex2D(x, y));
-	if (world1.x == MINF) {
-        vout(x,y) = make_float4(0.0f);
-        eout(x,y) = 0.0f;
-        return;
-    }
-    const float3 camPos2 = pose2 * world1;
-	const uint2 screen2 = cam2.camToScreen<uint2>(camPos2);
-
-    const int upsample = 8;
-
-    // Project to p2 using cam2
-    // Each thread takes a possible correspondence and calculates a weighting
-    const int lane = tid % WARP_SIZE;
-	for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
-		const float u = (i % upsample) - (upsample / 2);
-        const float v = (i / upsample) - (upsample / 2);
-        
-		const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v));
-		if (world2.x == MINF) continue;
-
-        // Determine degree of correspondence
-        const float confidence = 1.0f / length(world1 - world2);
-        const float maxconf = warpMax(confidence);
-
-        // This thread has best confidence value
-        if (maxconf == confidence) {
-            vout(x,y) = vout.tex2D(x, y) + make_float4(
-                (world1.x - world2.x) * maxconf,
-                (world1.y - world2.y) * maxconf,
-                (world1.z - world2.z) * maxconf,
-                maxconf);
-            eout(x,y) = eout.tex2D(x,y) + length(world1 - world2)*maxconf;
-        }
-    }
-}
-
-void ftl::cuda::correspondence_energy_vector(
-        TextureObject<float4> &p1,
-        TextureObject<float4> &p2,
-        TextureObject<uchar4> &c1,
-        TextureObject<uchar4> &c2,
-        TextureObject<float4> &vout,
-        TextureObject<float> &eout,
-        float4x4 &pose2,
-        const Camera &cam2,
-        cudaStream_t stream) {
-
-    const dim3 gridSize((p1.width() + 2 - 1)/2, (p1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-    const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK);
-
-    printf("COR SIZE %d,%d\n", p1.width(), p1.height());
-
-    correspondence_energy_vector_kernel<<<gridSize, blockSize, 0, stream>>>(
-        p1, p2, c1, c2, vout, eout, pose2, cam2
-    );
-    cudaSafeCall( cudaGetLastError() );
-}
diff --git a/applications/reconstruct/src/ilw/ilw.cpp b/applications/reconstruct/src/ilw/ilw.cpp
index 93fd75a188b5e48e48e387c5ddf96ea43bcb1eec..dfa43267ed5d3ac09309a466b150c0498fb72967 100644
--- a/applications/reconstruct/src/ilw/ilw.cpp
+++ b/applications/reconstruct/src/ilw/ilw.cpp
@@ -144,7 +144,7 @@ ILW::~ILW() {
 bool ILW::process(ftl::rgbd::FrameSet &fs) {
     if (!enabled_) return false;
 
-	fs.upload(Channel::Colour + Channel::Depth, stream_);
+	//fs.upload(Channel::Colour + Channel::Depth, stream_);
     _phase0(fs, stream_);
 
 	params_.range = value("search_range", 0.05f);
diff --git a/applications/reconstruct/src/main.cpp b/applications/reconstruct/src/main.cpp
index 4d239758b19401e3d749ae34829401e298aba332..d05453f9410ee52e8c5fed610656a42f662bed93 100644
--- a/applications/reconstruct/src/main.cpp
+++ b/applications/reconstruct/src/main.cpp
@@ -16,9 +16,10 @@
 #include <ftl/rgbd/group.hpp>
 #include <ftl/threads.hpp>
 #include <ftl/codecs/writer.hpp>
+#include <ftl/codecs/reader.hpp>
 
 #include "ilw/ilw.hpp"
-#include <ftl/render/splat_render.hpp>
+#include <ftl/render/tri_render.hpp>
 
 #include <fstream>
 #include <string>
@@ -29,6 +30,11 @@
 #include <opencv2/opencv.hpp>
 #include <ftl/net/universe.hpp>
 
+#include <ftl/operators/smoothing.hpp>
+#include <ftl/operators/colours.hpp>
+#include <ftl/operators/normals.hpp>
+
+#include <ftl/cuda/normals.hpp>
 #include <ftl/registration.hpp>
 
 #include <cuda_profiler_api.h>
@@ -65,34 +71,6 @@ static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) {
   return rz * rx * ry;
 }
 
-static void writeSourceProperties(ftl::codecs::Writer &writer, int id, ftl::rgbd::Source *src) {
-	ftl::codecs::StreamPacket spkt;
-	ftl::codecs::Packet pkt;
-
-	spkt.timestamp = 0;
-	spkt.streamID = id;
-	spkt.channel = Channel::Calibration;
-	spkt.channel_count = 1;
-	pkt.codec = ftl::codecs::codec_t::CALIBRATION;
-	pkt.definition = ftl::codecs::definition_t::Any;
-	pkt.block_number = 0;
-	pkt.block_total = 1;
-	pkt.flags = 0;
-	pkt.data = std::move(std::vector<uint8_t>((uint8_t*)&src->parameters(), (uint8_t*)&src->parameters() + sizeof(ftl::rgbd::Camera)));
-
-	writer.write(spkt, pkt);
-
-	spkt.channel = Channel::Pose;
-	pkt.codec = ftl::codecs::codec_t::POSE;
-	pkt.definition = ftl::codecs::definition_t::Any;
-	pkt.block_number = 0;
-	pkt.block_total = 1;
-	pkt.flags = 0;
-	pkt.data = std::move(std::vector<uint8_t>((uint8_t*)src->getPose().data(), (uint8_t*)src->getPose().data() + 4*4*sizeof(double)));
-
-	writer.write(spkt, pkt);
-}
-
 static void run(ftl::Configurable *root) {
 	Universe *net = ftl::create<Universe>(root, "net");
 	ftl::ctrl::Slave slave(net, root);
@@ -102,6 +80,38 @@ static void run(ftl::Configurable *root) {
 	
 	net->start();
 	net->waitConnections();
+
+	// Check paths for an FTL file to load...
+	auto paths = (*root->get<nlohmann::json>("paths"));
+	for (auto &x : paths.items()) {
+		std::string path = x.value().get<std::string>();
+		auto eix = path.find_last_of('.');
+		auto ext = path.substr(eix+1);
+
+		// Command line path is ftl file
+		if (ext == "ftl") {
+			// Create temp reader to count number of sources found in file
+			std::ifstream file;
+			file.open(path);
+			ftl::codecs::Reader reader(file);
+			reader.begin();
+
+			int max_stream = 0;
+			reader.read(reader.getStartTime()+100, [&max_stream](const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet &pkt) {
+				max_stream = max(max_stream, spkt.streamID);
+			});
+			reader.end();
+
+			LOG(INFO) << "Found " << (max_stream+1) << " sources in " << path;
+
+			int N = root->value("N", 100);
+
+			// For each stream found, add a source object
+			for (int i=0; i<=min(max_stream,N-1); ++i) {
+				root->getConfig()["sources"].push_back(nlohmann::json{{"uri",std::string("file://") + path + std::string("#") + std::to_string(i)}});
+			}
+		}
+	}
 	
 	// Create a vector of all input RGB-Depth sources
 	auto sources = ftl::createArray<Source>(root, "sources", net);
@@ -165,7 +175,7 @@ static void run(ftl::Configurable *root) {
 	//ftl::voxhash::SceneRep *scene = ftl::create<ftl::voxhash::SceneRep>(root, "voxelhash");
 	ftl::rgbd::Streamer *stream = ftl::create<ftl::rgbd::Streamer>(root, "stream", net);
 	ftl::rgbd::VirtualSource *virt = ftl::create<ftl::rgbd::VirtualSource>(root, "virtual");
-	ftl::render::Splatter *splat = ftl::create<ftl::render::Splatter>(root, "renderer", &scene_B);
+	ftl::render::Triangular *splat = ftl::create<ftl::render::Triangular>(root, "renderer", &scene_B);
 	ftl::rgbd::Group *group = new ftl::rgbd::Group;
 	ftl::ILW *align = ftl::create<ftl::ILW>(root, "merge");
 
@@ -174,7 +184,7 @@ static void run(ftl::Configurable *root) {
 
 	// Generate virtual camera render when requested by streamer
 	virt->onRender([splat,virt,&scene_B,align](ftl::rgbd::Frame &out) {
-		virt->setTimestamp(scene_B.timestamp);
+		//virt->setTimestamp(scene_B.timestamp);
 		// Do we need to convert Lab to BGR?
 		if (align->isLabColour()) {
 			for (auto &f : scene_B.frames) {
@@ -215,14 +225,15 @@ static void run(ftl::Configurable *root) {
 			fileout.open(std::string(timestamp) + ".ftl");
 
 			writer.begin();
+			group->addRawCallback(std::function(recorder));
 
 			// TODO: Write pose+calibration+config packets
 			auto sources = group->sources();
 			for (int i=0; i<sources.size(); ++i) {
-				writeSourceProperties(writer, i, sources[i]);
+				//writeSourceProperties(writer, i, sources[i]);
+				sources[i]->inject(Channel::Calibration, sources[i]->parameters(), Channel::Left, sources[i]->getCapabilities());
+				sources[i]->inject(sources[i]->getPose()); 
 			}
-
-			group->addRawCallback(std::function(recorder));
 		} else {
 			group->removeRawCallback(recorder);
 			writer.end();
@@ -238,9 +249,19 @@ static void run(ftl::Configurable *root) {
 
 	bool busy = false;
 
+	// Create the source depth map pipeline
+	auto *pipeline1 = ftl::config::create<ftl::operators::Graph>(root, "pre_filters");
+	pipeline1->append<ftl::operators::ColourChannels>("colour");  // Convert BGR to BGRA
+	pipeline1->append<ftl::operators::HFSmoother>("hfnoise");  // Remove high-frequency noise
+	pipeline1->append<ftl::operators::Normals>("normals");  // Estimate surface normals
+	pipeline1->append<ftl::operators::SmoothChannel>("smoothing");  // Generate a smoothing channel
+	pipeline1->append<ftl::operators::AdaptiveMLS>("mls");  // Perform MLS (using smoothing channel)
+	// Alignment
+
+
 	group->setLatency(4);
 	group->setName("ReconGroup");
-	group->sync([splat,virt,&busy,&slave,&scene_A,&scene_B,&align,controls](ftl::rgbd::FrameSet &fs) -> bool {
+	group->sync([splat,virt,&busy,&slave,&scene_A,&scene_B,&align,controls,pipeline1](ftl::rgbd::FrameSet &fs) -> bool {
 		//cudaSetDevice(scene->getCUDADevice());
 
 		//if (slave.isPaused()) return true;
@@ -255,17 +276,17 @@ static void run(ftl::Configurable *root) {
 		// Swap the entire frameset to allow rapid return
 		fs.swapTo(scene_A);
 
-		ftl::pool.push([&scene_B,&scene_A,&busy,&slave,&align](int id) {
+		ftl::pool.push([&scene_B,&scene_A,&busy,&slave,&align, pipeline1](int id) {
 			//cudaSetDevice(scene->getCUDADevice());
 			// TODO: Release frameset here...
 			//cudaSafeCall(cudaStreamSynchronize(scene->getIntegrationStream()));
 
 			UNIQUE_LOCK(scene_A.mtx, lk);
 
-			// Send all frames to GPU, block until done?
-			//scene_A.upload(Channel::Colour + Channel::Depth);  // TODO: (Nick) Add scene stream.
+			pipeline1->apply(scene_A, scene_A, 0);
 			align->process(scene_A);
 
+
 			// TODO: To use second GPU, could do a download, swap, device change,
 			// then upload to other device. Or some direct device-2-device copy.
 			scene_A.swapTo(scene_B);
diff --git a/applications/recorder/src/main.cpp b/applications/recorder/src/main.cpp
index a6f8926a5afa5d02112ca943c6a48af89a17c5fa..c00daa9755a931dfc894e191746c8bfe52558cc3 100644
--- a/applications/recorder/src/main.cpp
+++ b/applications/recorder/src/main.cpp
@@ -62,34 +62,6 @@ static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) {
   return rz * rx * ry;
 }
 
-static void writeSourceProperties(ftl::codecs::Writer &writer, int id, ftl::rgbd::Source *src) {
-	ftl::codecs::StreamPacket spkt;
-	ftl::codecs::Packet pkt;
-
-	spkt.timestamp = 0;
-	spkt.streamID = id;
-	spkt.channel = Channel::Calibration;
-	spkt.channel_count = 1;
-	pkt.codec = ftl::codecs::codec_t::CALIBRATION;
-	pkt.definition = ftl::codecs::definition_t::Any;
-	pkt.block_number = 0;
-	pkt.block_total = 1;
-	pkt.flags = 0;
-	pkt.data = std::move(std::vector<uint8_t>((uint8_t*)&src->parameters(), (uint8_t*)&src->parameters() + sizeof(ftl::rgbd::Camera)));
-
-	writer.write(spkt, pkt);
-
-	spkt.channel = Channel::Pose;
-	pkt.codec = ftl::codecs::codec_t::POSE;
-	pkt.definition = ftl::codecs::definition_t::Any;
-	pkt.block_number = 0;
-	pkt.block_total = 1;
-	pkt.flags = 0;
-	pkt.data = std::move(std::vector<uint8_t>((uint8_t*)src->getPose().data(), (uint8_t*)src->getPose().data() + 4*4*sizeof(double)));
-
-	writer.write(spkt, pkt);
-}
-
 static void run(ftl::Configurable *root) {
 	Universe *net = ftl::create<Universe>(root, "net");
 	ftl::ctrl::Slave slave(net, root);
@@ -199,7 +171,9 @@ static void run(ftl::Configurable *root) {
 			// TODO: Write pose+calibration+config packets
 			auto sources = group->sources();
 			for (int i=0; i<sources.size(); ++i) {
-				writeSourceProperties(writer, i, sources[i]);
+				//writeSourceProperties(writer, i, sources[i]);
+				sources[i]->inject(Channel::Calibration, sources[i]->parameters(), Channel::Left, sources[i]->getCapabilities());
+				sources[i]->inject(sources[i]->getPose()); 
 			}
 		} else {
 			//group->removeRawCallback(recorder);
diff --git a/components/codecs/include/ftl/codecs/bitrates.hpp b/components/codecs/include/ftl/codecs/bitrates.hpp
index 976e039fc07b3dd15e7f4ede219ed06c27cff0fe..d34ede8adb88c647949051853dde33f81221a8d2 100644
--- a/components/codecs/include/ftl/codecs/bitrates.hpp
+++ b/components/codecs/include/ftl/codecs/bitrates.hpp
@@ -22,7 +22,11 @@ enum struct codec_t : uint8_t {
 	JSON = 100,		// A JSON string
 	CALIBRATION,	// Camera parameters object
 	POSE,			// 4x4 eigen matrix
-	RAW				// Some unknown binary format (msgpack?)
+	MSGPACK,
+	STRING,			// Null terminated string
+	RAW,				// Some unknown binary format
+
+	Any = 255
 };
 
 /**
@@ -38,9 +42,11 @@ enum struct definition_t : uint8_t {
 	LD360 = 6,
 	Any = 7,
 
-	HTC_VIVE = 8
+	HTC_VIVE = 8,
 
 	// TODO: Add audio definitions
+
+	Invalid
 };
 
 /**
@@ -85,6 +91,8 @@ static const preset_t kPresetLQThreshold = 4;
 
 static const preset_t kPresetHTCVive = -1;
 
+static const preset_t kPresetMinimum = -1;
+
 /**
  * Represents the details of each preset codec configuration.
  */
@@ -123,6 +131,8 @@ preset_t findPreset(definition_t, definition_t);
  */
 preset_t findPreset(definition_t);
 
+preset_t findPreset(size_t width, size_t height);
+
 }
 }
 
diff --git a/components/codecs/include/ftl/codecs/channels.hpp b/components/codecs/include/ftl/codecs/channels.hpp
index d1a0e265b0548adc1fc510947f12d408ed3b2840..85cd9fffc45ddcda1b6cc13a160c74c30797b804 100644
--- a/components/codecs/include/ftl/codecs/channels.hpp
+++ b/components/codecs/include/ftl/codecs/channels.hpp
@@ -17,12 +17,14 @@ enum struct Channel : int {
     Disparity		= 3,
     Depth2			= 3,
     Deviation		= 4,
+    Screen          = 4,
     Normals			= 5,	// 32FC4
     Points			= 6,	// 32FC4
     Confidence		= 7,	// 32F
     Contribution	= 7,	// 32F
     EnergyVector	= 8,	// 32FC4
     Flow			= 9,	// 32F
+    Smoothing       = 9,    // 32F
     Energy			= 10,	// 32F
 	Mask			= 11,	// 32U
 	Density			= 12,	// 32F
@@ -36,7 +38,8 @@ enum struct Channel : int {
 	Configuration	= 64,	// JSON Data
 	Calibration		= 65,	// Camera Parameters Object
 	Pose			= 66,	// Eigen::Matrix4d
-	Data			= 67	// Custom data, any codec.
+    Index           = 67,
+	Data			= 2048	// Custom data, any codec.
 };
 
 inline bool isVideo(Channel c) { return (int)c < 32; };
@@ -122,6 +125,7 @@ inline bool isFloatChannel(ftl::codecs::Channel chan) {
 	case Channel::Depth		:
     //case Channel::Normals   :
 	case Channel::Confidence:
+    case Channel::Flow      :
 	case Channel::Density:
 	case Channel::Energy	: return true;
 	default					: return false;
diff --git a/components/codecs/include/ftl/codecs/decoder.hpp b/components/codecs/include/ftl/codecs/decoder.hpp
index e6883680f34085f8b39fbcd02959b19ea4a2ca9e..7684990e204a1231af0cd8c114eabbf4ef9290d5 100644
--- a/components/codecs/include/ftl/codecs/decoder.hpp
+++ b/components/codecs/include/ftl/codecs/decoder.hpp
@@ -35,7 +35,7 @@ class Decoder {
 	Decoder() {};
 	virtual ~Decoder() {};
 
-	virtual bool decode(const ftl::codecs::Packet &pkt, cv::Mat &out)=0;
+	virtual bool decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out)=0;
 
 	virtual bool accepts(const ftl::codecs::Packet &)=0;
 };
diff --git a/components/codecs/include/ftl/codecs/encoder.hpp b/components/codecs/include/ftl/codecs/encoder.hpp
index 560810b84060b3b062b426614da40836634fdee5..9c3aa8fefc64810bf7660e323b44a3c4440d5098 100644
--- a/components/codecs/include/ftl/codecs/encoder.hpp
+++ b/components/codecs/include/ftl/codecs/encoder.hpp
@@ -33,7 +33,8 @@ enum class device_t {
  */
 Encoder *allocateEncoder(
 		ftl::codecs::definition_t maxdef=ftl::codecs::definition_t::HD1080,
-		ftl::codecs::device_t dev=ftl::codecs::device_t::Any);
+		ftl::codecs::device_t dev=ftl::codecs::device_t::Any,
+		ftl::codecs::codec_t codec=ftl::codecs::codec_t::Any);
 
 /**
  * Release an encoder to be reused by some other stream.
@@ -47,7 +48,7 @@ void free(Encoder *&e);
 class Encoder {
     public:
     friend Encoder *allocateEncoder(ftl::codecs::definition_t,
-			ftl::codecs::device_t);
+			ftl::codecs::device_t, ftl::codecs::codec_t);
     friend void free(Encoder *&);
 
     public:
@@ -59,7 +60,7 @@ class Encoder {
 	/**
 	 * Wrapper encode to allow use of presets.
 	 */
-	virtual bool encode(const cv::Mat &in, ftl::codecs::preset_t preset,
+	virtual bool encode(const cv::cuda::GpuMat &in, ftl::codecs::preset_t preset,
 			const std::function<void(const ftl::codecs::Packet&)> &cb);
 
 	/**
@@ -76,7 +77,7 @@ class Encoder {
 	 * @return True if succeeded with encoding.
 	 */
     virtual bool encode(
-			const cv::Mat &in,
+			const cv::cuda::GpuMat &in,
 			ftl::codecs::definition_t definition,
 			ftl::codecs::bitrate_t bitrate,
 			const std::function<void(const ftl::codecs::Packet&)> &cb)=0;
@@ -86,6 +87,8 @@ class Encoder {
 
 	virtual void reset() {}
 
+	virtual bool supports(ftl::codecs::codec_t codec)=0;
+
     protected:
     bool available;
 	const ftl::codecs::definition_t max_definition;
diff --git a/components/codecs/include/ftl/codecs/h264.hpp b/components/codecs/include/ftl/codecs/h264.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..17f649c52220f7ac8b00e6d4ed9b78e18f2847f8
--- /dev/null
+++ b/components/codecs/include/ftl/codecs/h264.hpp
@@ -0,0 +1,70 @@
+#ifndef _FTL_CODECS_H264_HPP_
+#define _FTL_CODECS_H264_HPP_
+
+namespace ftl {
+namespace codecs {
+
+/**
+ * H.264 codec utility functions.
+ */
+namespace h264 {
+
+/**
+ * H264 Network Abstraction Layer Unit types.
+ */
+enum class NALType : int {
+	UNSPECIFIED_0 = 0,
+	CODED_SLICE_NON_IDR = 1,
+	CODED_SLICE_PART_A = 2,
+	CODED_SLICE_PART_B = 3,
+	CODED_SLICE_PART_C = 4,
+	CODED_SLICE_IDR = 5,
+	SEI = 6,
+	SPS = 7,
+	PPS = 8,
+	ACCESS_DELIMITER = 9,
+	EO_SEQ = 10,
+	EO_STREAM = 11,
+	FILTER_DATA = 12,
+	SPS_EXT = 13,
+	PREFIX_NAL_UNIT = 14,
+	SUBSET_SPS = 15,
+	RESERVED_16 = 16,
+	RESERVED_17 = 17,
+	RESERVED_18 = 18,
+	CODED_SLICE_AUX = 19,
+	CODED_SLICE_EXT = 20,
+	CODED_SLICE_DEPTH = 21,
+	RESERVED_22 = 22,
+	RESERVED_23 = 23,
+	UNSPECIFIED_24 = 24,
+	UNSPECIFIED_25,
+	UNSPECIFIED_26,
+	UNSPECIFIED_27,
+	UNSPECIFIED_28,
+	UNSPECIFIED_29,
+	UNSPECIFIED_30,
+	UNSPECIFIED_31
+};
+
+/**
+ * Extract the NAL unit type from the first NAL header.
+ * With NvPipe, the 5th byte contains the NAL Unit header.
+ */
+inline NALType getNALType(const std::vector<uint8_t> &data) {
+	return static_cast<NALType>(data[4] & 0x1F);
+}
+
+/**
+ * Check the H264 bitstream for an I-Frame. With NvPipe, all I-Frames start
+ * with a SPS NAL unit so just check for this.
+ */
+inline bool isIFrame(const std::vector<uint8_t> &data) {
+	return getNALType(data) == NALType::SPS;
+}
+
+}
+}
+}
+
+#endif  // _FTL_CODECS_H264_HPP_
diff --git a/components/codecs/include/ftl/codecs/nvpipe_decoder.hpp b/components/codecs/include/ftl/codecs/nvpipe_decoder.hpp
index 75807f05ee45c66d7a8b231c5d755190754f63df..987915ea2595c6b1e88cf8959b239cb7eb7a3f09 100644
--- a/components/codecs/include/ftl/codecs/nvpipe_decoder.hpp
+++ b/components/codecs/include/ftl/codecs/nvpipe_decoder.hpp
@@ -14,7 +14,7 @@ class NvPipeDecoder : public ftl::codecs::Decoder {
 	NvPipeDecoder();
 	~NvPipeDecoder();
 
-	bool decode(const ftl::codecs::Packet &pkt, cv::Mat &out);
+	bool decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out) override;
 
 	bool accepts(const ftl::codecs::Packet &pkt);
 
@@ -24,6 +24,8 @@ class NvPipeDecoder : public ftl::codecs::Decoder {
 	ftl::codecs::definition_t last_definition_;
 	MUTEX mutex_;
 	bool seen_iframe_;
+	cv::cuda::GpuMat tmp_;
+	cv::cuda::Stream stream_;
 };
 
 }
diff --git a/components/codecs/include/ftl/codecs/nvpipe_encoder.hpp b/components/codecs/include/ftl/codecs/nvpipe_encoder.hpp
index 3f8de66cf7b77265e245a30da4522936058426c0..5d04068c53cf3b46dee73c63cf8e2fcf674f148d 100644
--- a/components/codecs/include/ftl/codecs/nvpipe_encoder.hpp
+++ b/components/codecs/include/ftl/codecs/nvpipe_encoder.hpp
@@ -13,27 +13,35 @@ class NvPipeEncoder : public ftl::codecs::Encoder {
 			ftl::codecs::definition_t mindef);
     ~NvPipeEncoder();
 
-	bool encode(const cv::Mat &in, ftl::codecs::preset_t preset,
+	bool encode(const cv::cuda::GpuMat &in, ftl::codecs::preset_t preset,
 			const std::function<void(const ftl::codecs::Packet&)> &cb) {
 		return Encoder::encode(in, preset, cb);
 	}
 
-    bool encode(const cv::Mat &in, ftl::codecs::definition_t definition, ftl::codecs::bitrate_t bitrate,
+    bool encode(const cv::cuda::GpuMat &in, ftl::codecs::definition_t definition, ftl::codecs::bitrate_t bitrate,
 			const std::function<void(const ftl::codecs::Packet&)>&) override;
 
     //bool encode(const cv::cuda::GpuMat &in, std::vector<uint8_t> &out, bitrate_t bix, bool);
 
 	void reset();
 
+	bool supports(ftl::codecs::codec_t codec) override;
+
+	static constexpr int kFlagRGB = 0x00000001;
+
     private:
     NvPipe *nvenc_;
     definition_t current_definition_;
     bool is_float_channel_;
 	bool was_reset_;
-
-    bool _encoderMatch(const cv::Mat &in, definition_t def);
-    bool _createEncoder(const cv::Mat &in, definition_t def, bitrate_t rate);
-	ftl::codecs::definition_t _verifiedDefinition(ftl::codecs::definition_t def, const cv::Mat &in);
+	ftl::codecs::codec_t preference_;
+	cv::cuda::GpuMat tmp_;
+	cv::cuda::GpuMat tmp2_;
+	cv::cuda::Stream stream_;
+
+    bool _encoderMatch(const cv::cuda::GpuMat &in, definition_t def);
+    bool _createEncoder(const cv::cuda::GpuMat &in, definition_t def, bitrate_t rate);
+	ftl::codecs::definition_t _verifiedDefinition(ftl::codecs::definition_t def, const cv::cuda::GpuMat &in);
 };
 
 }
diff --git a/components/codecs/include/ftl/codecs/opencv_decoder.hpp b/components/codecs/include/ftl/codecs/opencv_decoder.hpp
index c4ef5ab057b1e60da12f36fd603642ea16026888..53b61f683bc1d0a93a9c21049e0d312c133c83f7 100644
--- a/components/codecs/include/ftl/codecs/opencv_decoder.hpp
+++ b/components/codecs/include/ftl/codecs/opencv_decoder.hpp
@@ -11,9 +11,12 @@ class OpenCVDecoder : public ftl::codecs::Decoder {
 	OpenCVDecoder();
 	~OpenCVDecoder();
 
-	bool decode(const ftl::codecs::Packet &pkt, cv::Mat &out);
+	bool decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out) override;
 
 	bool accepts(const ftl::codecs::Packet &pkt);
+
+	private:
+	cv::Mat tmp_;
 };
 
 }
diff --git a/components/codecs/include/ftl/codecs/opencv_encoder.hpp b/components/codecs/include/ftl/codecs/opencv_encoder.hpp
index a34f8811de62277b5be53dd329517deb3ed06af3..82b0a5cccf32398ed6e94d9a7a84d0c0ee1b9f25 100644
--- a/components/codecs/include/ftl/codecs/opencv_encoder.hpp
+++ b/components/codecs/include/ftl/codecs/opencv_encoder.hpp
@@ -20,14 +20,16 @@ class OpenCVEncoder : public ftl::codecs::Encoder {
 			ftl::codecs::definition_t mindef);
     ~OpenCVEncoder();
 
-	bool encode(const cv::Mat &in, ftl::codecs::preset_t preset,
+	bool encode(const cv::cuda::GpuMat &in, ftl::codecs::preset_t preset,
 			const std::function<void(const ftl::codecs::Packet&)> &cb) {
 		return Encoder::encode(in, preset, cb);
 	}
 
-    bool encode(const cv::Mat &in, ftl::codecs::definition_t definition, ftl::codecs::bitrate_t bitrate,
+    bool encode(const cv::cuda::GpuMat &in, ftl::codecs::definition_t definition, ftl::codecs::bitrate_t bitrate,
 			const std::function<void(const ftl::codecs::Packet&)>&) override;
 
+	bool supports(ftl::codecs::codec_t codec) override;
+
     //bool encode(const cv::cuda::GpuMat &in, std::vector<uint8_t> &out, bitrate_t bix, bool);
 
 	private:
@@ -38,6 +40,7 @@ class OpenCVEncoder : public ftl::codecs::Encoder {
 	std::atomic<int> jobs_;
 	std::mutex job_mtx_;
 	std::condition_variable job_cv_;
+	cv::Mat tmp_;
 
 	bool _encodeBlock(const cv::Mat &in, ftl::codecs::Packet &pkt, ftl::codecs::bitrate_t);
 };
diff --git a/components/codecs/include/ftl/codecs/packet.hpp b/components/codecs/include/ftl/codecs/packet.hpp
index edddd205a728e66943e6362b0d24b865272de48e..f2fa53c8fa860527f1939b61c4962b306da89043 100644
--- a/components/codecs/include/ftl/codecs/packet.hpp
+++ b/components/codecs/include/ftl/codecs/packet.hpp
@@ -16,7 +16,14 @@ namespace codecs {
  */
 struct Header {
 	const char magic[4] = {'F','T','L','F'};
-	uint8_t version = 1;
+	uint8_t version = 3;
+};
+
+/**
+ * Version 2 header padding for potential indexing use.
+ */
+struct IndexHeader {
+	int64_t reserved[8];
 };
 
 /**
@@ -50,6 +57,16 @@ struct StreamPacket {
 	MSGPACK_DEFINE(timestamp, streamID, channel_count, channel);
 };
 
+/**
+ * Combine both packet types into a single packet unit. This pair is always
+ * saved or transmitted in a stream together.
+ */
+struct PacketPair {
+	PacketPair(const StreamPacket &s, const Packet &p) : spkt(s), pkt(p) {}
+	const StreamPacket &spkt;
+	const Packet &pkt;
+};
+
 }
 }
 
diff --git a/components/codecs/include/ftl/codecs/reader.hpp b/components/codecs/include/ftl/codecs/reader.hpp
index cdc50cad30bc07fcc7793b24fb4daf1b308b03a8..c3ea0adc601ec0dd01d0afa4af12154d7d5458b8 100644
--- a/components/codecs/include/ftl/codecs/reader.hpp
+++ b/components/codecs/include/ftl/codecs/reader.hpp
@@ -41,6 +41,7 @@ class Reader {
 	bool end();
 
 	inline int64_t getStartTime() const { return timestart_; };
+	inline int version() const { return version_; }
 
 	private:
 	std::istream *stream_;
@@ -49,6 +50,7 @@ class Reader {
 	bool has_data_;
 	int64_t timestart_;
 	bool playing_;
+	int version_;
 
 	MUTEX mtx_;
 
diff --git a/components/codecs/include/ftl/codecs/writer.hpp b/components/codecs/include/ftl/codecs/writer.hpp
index 044624ba837f17c83e4a22a16787ec86edd4ad67..94d82f1ecbfb697bdbad35acc5a4cf29dad22410 100644
--- a/components/codecs/include/ftl/codecs/writer.hpp
+++ b/components/codecs/include/ftl/codecs/writer.hpp
@@ -5,6 +5,7 @@
 #include <msgpack.hpp>
 //#include <Eigen/Eigen>
 
+#include <ftl/threads.hpp>
 #include <ftl/codecs/packet.hpp>
 
 namespace ftl {
@@ -24,6 +25,7 @@ class Writer {
 	msgpack::sbuffer buffer_;
 	int64_t timestart_;
 	bool active_;
+	MUTEX mutex_;
 };
 
 }
diff --git a/components/codecs/src/bitrates.cpp b/components/codecs/src/bitrates.cpp
index 035e850d1ff20f5e113db2f7004b85c8bbea3040..45a5057687f8add5cbfdaf02718e880a3361bd40 100644
--- a/components/codecs/src/bitrates.cpp
+++ b/components/codecs/src/bitrates.cpp
@@ -41,7 +41,8 @@ static const Resolution resolutions[] = {
 	854, 480,		// SD480
 	640, 360,		// LD360
 	0, 0,			// ANY
-	1852, 2056		// HTC_VIVE
+	1852, 2056,		// HTC_VIVE
+	0, 0
 };
 
 int ftl::codecs::getWidth(definition_t d) {
@@ -57,4 +58,32 @@ const CodecPreset &ftl::codecs::getPreset(preset_t p) {
     if (p > kPresetWorst) return presets[kPresetWorst];
     if (p < kPresetBest) return presets[kPresetBest];
     return presets[p];
-};
+}
+
+preset_t ftl::codecs::findPreset(size_t width, size_t height) {
+	int min_error = std::numeric_limits<int>::max();
+
+	// Find best definition
+	int best_def = (int)definition_t::Invalid;
+
+	for (int i=0; i<(int)definition_t::Invalid; ++i) {
+		int dw = resolutions[i].width - width;
+		int dh = resolutions[i].height - height;
+		int error = dw*dw + dh*dh;
+		if (error < min_error) {
+			min_error = error;
+			best_def = i;
+		}
+	}
+
+	// Find preset that matches this best definition
+	for (preset_t i=kPresetMinimum; i<=kPresetWorst; ++i) {
+		const auto &preset = getPreset(i);
+
+		if ((int)preset.colour_res == best_def && (int)preset.depth_res == best_def) {
+			return i;
+		}
+	}
+
+	return kPresetWorst;
+}
diff --git a/components/codecs/src/decoder.cpp b/components/codecs/src/decoder.cpp
index 4cd5437d0cb6ea84251b55bdd139a7c0b799f411..a1809b6157d783782a20e32f46f39f89cd26e8c5 100644
--- a/components/codecs/src/decoder.cpp
+++ b/components/codecs/src/decoder.cpp
@@ -10,6 +10,7 @@ Decoder *ftl::codecs::allocateDecoder(const ftl::codecs::Packet &pkt) {
 	switch(pkt.codec) {
 	case codec_t::JPG		:
 	case codec_t::PNG		: return new ftl::codecs::OpenCVDecoder;
+	case codec_t::H264		:
 	case codec_t::HEVC		: return new ftl::codecs::NvPipeDecoder;
 	}
 
diff --git a/components/codecs/src/encoder.cpp b/components/codecs/src/encoder.cpp
index fdacc89596668937f8f465995f52f0955bbded01..9a7eac72def3a20b966e4332d45d8073f57c47f6 100644
--- a/components/codecs/src/encoder.cpp
+++ b/components/codecs/src/encoder.cpp
@@ -11,6 +11,7 @@ using ftl::codecs::bitrate_t;
 using ftl::codecs::definition_t;
 using ftl::codecs::preset_t;
 using ftl::codecs::device_t;
+using ftl::codecs::codec_t;
 using ftl::codecs::kPresetBest;
 using ftl::codecs::kPresetWorst;
 using ftl::codecs::kPresetLQThreshold;
@@ -34,7 +35,7 @@ using namespace ftl::codecs::internal;
 static MUTEX mutex;
 
 Encoder *ftl::codecs::allocateEncoder(ftl::codecs::definition_t maxdef,
-		ftl::codecs::device_t dev) {
+		ftl::codecs::device_t dev, ftl::codecs::codec_t codec) {
     UNIQUE_LOCK(mutex, lk);
 	if (!has_been_init) init_encoders();
 
@@ -43,6 +44,7 @@ Encoder *ftl::codecs::allocateEncoder(ftl::codecs::definition_t maxdef,
 		if (!e->available) continue;
 		if (dev != device_t::Any && dev != e->device) continue;
 		if (maxdef != definition_t::Any && (maxdef < e->max_definition || maxdef > e->min_definition)) continue;
+		if (codec != codec_t::Any && !e->supports(codec)) continue;
 		
 		e->available = false;
 		return e;
@@ -68,11 +70,11 @@ Encoder::~Encoder() {
 
 }
 
-bool Encoder::encode(const cv::Mat &in, preset_t preset,
+bool Encoder::encode(const cv::cuda::GpuMat &in, preset_t preset,
 			const std::function<void(const ftl::codecs::Packet&)> &cb) {
 	const auto &settings = ftl::codecs::getPreset(preset);
 	const definition_t definition = (in.type() == CV_32F) ? settings.depth_res : settings.colour_res;
 	const bitrate_t bitrate = (in.type() == CV_32F) ? settings.depth_qual : settings.colour_qual;
-	LOG(INFO) << "Encode definition: " << (int)definition;
+
 	return encode(in, definition, bitrate, cb);
 }
diff --git a/components/codecs/src/nvpipe_decoder.cpp b/components/codecs/src/nvpipe_decoder.cpp
index 74519a8f2f3b1b5c8e4eb13ea23ff89778402825..77a3105f88b84f2b9c00f5dba152bbc9814c70db 100644
--- a/components/codecs/src/nvpipe_decoder.cpp
+++ b/components/codecs/src/nvpipe_decoder.cpp
@@ -4,6 +4,7 @@
 
 #include <ftl/cuda_util.hpp>
 #include <ftl/codecs/hevc.hpp>
+#include <ftl/codecs/h264.hpp>
 //#include <cuda_runtime.h>
 
 #include <opencv2/core/cuda/common.hpp>
@@ -21,36 +22,10 @@ NvPipeDecoder::~NvPipeDecoder() {
 	}
 }
 
-void cropAndScaleUp(cv::Mat &in, cv::Mat &out) {
-	CHECK(in.type() == out.type());
-
-	auto isize = in.size();
-	auto osize = out.size();
-	cv::Mat tmp;
-	
-	if (isize != osize) {
-		double x_scale = ((double) isize.width) / osize.width;
-		double y_scale = ((double) isize.height) / osize.height;
-		double x_scalei = 1.0 / x_scale;
-		double y_scalei = 1.0 / y_scale;
-		cv::Size sz_crop;
-
-		// assume downscaled image
-		if (x_scalei > y_scalei) {
-			sz_crop = cv::Size(isize.width, isize.height * x_scale);
-		} else {
-			sz_crop = cv::Size(isize.width * y_scale, isize.height);
-		}
-
-		tmp = in(cv::Rect(cv::Point2i(0, 0), sz_crop));
-		cv::resize(tmp, out, osize);
-	}
-}
-
-bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::Mat &out) {
+bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out) {
 	cudaSetDevice(0);
 	UNIQUE_LOCK(mutex_,lk);
-	if (pkt.codec != codec_t::HEVC) return false;
+	if (pkt.codec != codec_t::HEVC && pkt.codec != codec_t::H264) return false;
 	bool is_float_frame = out.type() == CV_32F;
 
 	// Is the previous decoder still valid for current resolution and type?
@@ -68,7 +43,7 @@ bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::Mat &out) {
 	if (nv_decoder_ == nullptr) {
 		nv_decoder_ = NvPipe_CreateDecoder(
 				(is_float_frame) ? NVPIPE_UINT16 : NVPIPE_RGBA32,
-				NVPIPE_HEVC,
+				(pkt.codec == codec_t::HEVC) ? NVPIPE_HEVC : NVPIPE_H264,
 				ftl::codecs::getWidth(pkt.definition),
 				ftl::codecs::getHeight(pkt.definition));
 		if (!nv_decoder_) {
@@ -82,45 +57,58 @@ bool NvPipeDecoder::decode(const ftl::codecs::Packet &pkt, cv::Mat &out) {
 	}
 	
 	// TODO: (Nick) Move to member variable to prevent re-creation
-	cv::Mat tmp(cv::Size(ftl::codecs::getWidth(pkt.definition),ftl::codecs::getHeight(pkt.definition)), (is_float_frame) ? CV_16U : CV_8UC4);
+	tmp_.create(cv::Size(ftl::codecs::getWidth(pkt.definition),ftl::codecs::getHeight(pkt.definition)), (is_float_frame) ? CV_16U : CV_8UC4);
 
+	// Check for an I-Frame
 	if (pkt.codec == ftl::codecs::codec_t::HEVC) {
-		// Obtain NAL unit type
 		if (ftl::codecs::hevc::isIFrame(pkt.data)) seen_iframe_ = true;
+	} else if (pkt.codec == ftl::codecs::codec_t::H264) {
+		if (ftl::codecs::h264::isIFrame(pkt.data)) seen_iframe_ = true;
 	}
 
+	// No I-Frame yet so don't attempt to decode P-Frames.
 	if (!seen_iframe_) return false;
 
-	int rc = NvPipe_Decode(nv_decoder_, pkt.data.data(), pkt.data.size(), tmp.data, tmp.cols, tmp.rows);
+	int rc = NvPipe_Decode(nv_decoder_, pkt.data.data(), pkt.data.size(), tmp_.data, tmp_.cols, tmp_.rows, tmp_.step);
 	if (rc == 0) LOG(ERROR) << "NvPipe decode error: " << NvPipe_GetError(nv_decoder_);
 
 	if (is_float_frame) {
 		// Is the received frame the same size as requested output?
 		if (out.rows == ftl::codecs::getHeight(pkt.definition)) {
-			tmp.convertTo(out, CV_32FC1, 1.0f/1000.0f);
+			tmp_.convertTo(out, CV_32FC1, 1.0f/1000.0f, stream_);
 		} else {
-			cv::cvtColor(tmp, tmp, cv::COLOR_BGRA2BGR);
-			cv::resize(tmp, out, out.size());
-			//cv::Mat tmp2;
-			//tmp.convertTo(tmp2, CV_32FC1, 1.0f/1000.0f);
-			//cropAndScaleUp(tmp2, out);
+			LOG(WARNING) << "Resizing decoded frame from " << tmp_.size() << " to " << out.size();
+			// FIXME: This won't work on GPU
+			tmp_.convertTo(tmp_, CV_32FC1, 1.0f/1000.0f, stream_);
+			cv::cuda::resize(tmp_, out, out.size(), 0, 0, cv::INTER_NEAREST, stream_);
 		}
 	} else {
 		// Is the received frame the same size as requested output?
 		if (out.rows == ftl::codecs::getHeight(pkt.definition)) {
-			cv::cvtColor(tmp, out, cv::COLOR_BGRA2BGR);
+			// Flag 0x1 means frame is in RGB so needs conversion to BGR
+			if (pkt.flags & 0x1) {
+				cv::cuda::cvtColor(tmp_, out, cv::COLOR_RGBA2BGR, 0, stream_);
+			} else {
+				cv::cuda::cvtColor(tmp_, out, cv::COLOR_BGRA2BGR, 0, stream_);
+			}
 		} else {
-			cv::cvtColor(tmp, tmp, cv::COLOR_BGRA2BGR);
-			cv::resize(tmp, out, out.size());
-			//cv::Mat tmp2;
-			//cv::cvtColor(tmp, tmp2, cv::COLOR_BGRA2BGR);
-			//cropAndScaleUp(tmp2, out);
+			LOG(WARNING) << "Resizing decoded frame from " << tmp_.size() << " to " << out.size();
+			// FIXME: This won't work on GPU, plus it allocates extra memory...
+			// Flag 0x1 means frame is in RGB so needs conversion to BGR
+			if (pkt.flags & 0x1) {
+				cv::cuda::cvtColor(tmp_, tmp_, cv::COLOR_RGBA2BGR, 0, stream_);
+			} else {
+				cv::cuda::cvtColor(tmp_, tmp_, cv::COLOR_BGRA2BGR, 0, stream_);
+			}
+			cv::cuda::resize(tmp_, out, out.size(), 0.0, 0.0, cv::INTER_LINEAR, stream_);
 		}
 	}
 
+	stream_.waitForCompletion();
+
 	return rc > 0;
 }
 
 bool NvPipeDecoder::accepts(const ftl::codecs::Packet &pkt) {
-	return pkt.codec == codec_t::HEVC;
+	return pkt.codec == codec_t::HEVC || pkt.codec == codec_t::H264;
 }
diff --git a/components/codecs/src/nvpipe_encoder.cpp b/components/codecs/src/nvpipe_encoder.cpp
index 80bde1f9fac0dc63fc68db845b7e53084f6c59ec..132a3209ad0849dd76f1a5f7438eba8f5655b854 100644
--- a/components/codecs/src/nvpipe_encoder.cpp
+++ b/components/codecs/src/nvpipe_encoder.cpp
@@ -20,6 +20,7 @@ NvPipeEncoder::NvPipeEncoder(definition_t maxdef,
 	current_definition_ = definition_t::HD1080;
 	is_float_channel_ = false;
 	was_reset_ = false;
+	preference_ = codec_t::Any;
 }
 
 NvPipeEncoder::~NvPipeEncoder() {
@@ -30,8 +31,16 @@ void NvPipeEncoder::reset() {
 	was_reset_ = true;
 }
 
+bool NvPipeEncoder::supports(ftl::codecs::codec_t codec) {
+	switch (codec) {
+	case codec_t::H264:
+	case codec_t::HEVC: preference_ = codec; return true;
+	default: return false;
+	}
+}
+
 /* Check preset resolution is not better than actual resolution. */
-definition_t NvPipeEncoder::_verifiedDefinition(definition_t def, const cv::Mat &in) {
+definition_t NvPipeEncoder::_verifiedDefinition(definition_t def, const cv::cuda::GpuMat &in) {
 	int height = ftl::codecs::getHeight(def);
 
 	// FIXME: Make sure this can't go forever
@@ -43,48 +52,27 @@ definition_t NvPipeEncoder::_verifiedDefinition(definition_t def, const cv::Mat
 	return def;
 }
 
-void scaleDownAndPad(cv::Mat &in, cv::Mat &out) {
-	const auto isize = in.size();
-	const auto osize = out.size();
-	cv::Mat tmp;
-	
-	if (isize != osize) {
-		double x_scale = ((double) isize.width) / osize.width;
-		double y_scale = ((double) isize.height) / osize.height;
-		double x_scalei = 1.0 / x_scale;
-		double y_scalei = 1.0 / y_scale;
-
-		if (x_scale > 1.0 || y_scale > 1.0) {
-			if (x_scale > y_scale) {
-				cv::resize(in, tmp, cv::Size(osize.width, osize.height * x_scalei));
-			} else {
-				cv::resize(in, tmp, cv::Size(osize.width * y_scalei, osize.height));
-			}
-		}
-		else { tmp = in; }
-		
-		if (tmp.size().width < osize.width || tmp.size().height < osize.height) {
-			tmp.copyTo(out(cv::Rect(cv::Point2i(0, 0), tmp.size())));
-		}
-		else { out = tmp; }
-	}
-}
-
-bool NvPipeEncoder::encode(const cv::Mat &in, definition_t odefinition, bitrate_t bitrate, const std::function<void(const ftl::codecs::Packet&)> &cb) {
+bool NvPipeEncoder::encode(const cv::cuda::GpuMat &in, definition_t odefinition, bitrate_t bitrate, const std::function<void(const ftl::codecs::Packet&)> &cb) {
 	cudaSetDevice(0);
 	auto definition = odefinition; //_verifiedDefinition(odefinition, in);
 
 	auto width = ftl::codecs::getWidth(definition);
 	auto height = ftl::codecs::getHeight(definition);
 
-	cv::Mat tmp;
+	if (in.empty()) {
+		LOG(WARNING) << "No data";
+		return false;
+	}
+
+	cv::cuda::GpuMat tmp;
 	if (width != in.cols || height != in.rows) {
 		LOG(WARNING) << "Mismatch resolution with encoding resolution";
 		if (in.type() == CV_32F) {
-			cv::resize(in, tmp, cv::Size(width,height), 0.0, 0.0, cv::INTER_NEAREST);
+			cv::cuda::resize(in, tmp_, cv::Size(width,height), 0.0, 0.0, cv::INTER_NEAREST, stream_);
 		} else {
-			cv::resize(in, tmp, cv::Size(width,height));
+			cv::cuda::resize(in, tmp_, cv::Size(width,height), 0.0, 0.0, cv::INTER_LINEAR, stream_);
 		}
+		tmp = tmp_;
 	} else {
 		tmp = in;
 	}
@@ -101,33 +89,35 @@ bool NvPipeEncoder::encode(const cv::Mat &in, definition_t odefinition, bitrate_
 
 	//cv::Mat tmp;
 	if (tmp.type() == CV_32F) {
-		tmp.convertTo(tmp, CV_16UC1, 1000);
+		tmp.convertTo(tmp2_, CV_16UC1, 1000, stream_);
 	} else if (tmp.type() == CV_8UC3) {
-		cv::cvtColor(tmp, tmp, cv::COLOR_BGR2BGRA);
+		cv::cuda::cvtColor(tmp, tmp2_, cv::COLOR_BGR2RGBA, 0, stream_);
+	} else if (tmp.type() == CV_8UC4) {
+		cv::cuda::cvtColor(tmp, tmp2_, cv::COLOR_BGRA2RGBA, 0, stream_);
 	} else {
-		//in.copyTo(tmp);
+		LOG(ERROR) << "Unsupported cv::Mat type in Nvidia encoder";
+		return false;
 	}
 
-	// scale/pad to fit output format
-	//cv::Mat tmp2 = cv::Mat::zeros(getHeight(odefinition), getWidth(odefinition), tmp.type());
-	//scaleDownAndPad(tmp, tmp2);
-	//std::swap(tmp, tmp2);
+	// Make sure conversions complete...
+	stream_.waitForCompletion();
 
 	Packet pkt;
-	pkt.codec = codec_t::HEVC;
+	pkt.codec = (preference_ == codec_t::Any) ? codec_t::HEVC : preference_;
 	pkt.definition = definition;
 	pkt.block_total = 1;
 	pkt.block_number = 0;
+	pkt.flags = NvPipeEncoder::kFlagRGB;
 
 	pkt.data.resize(ftl::codecs::kVideoBufferSize);
 	uint64_t cs = NvPipe_Encode(
 		nvenc_,
-		tmp.data,
-		tmp.step,
+		tmp2_.data,
+		tmp2_.step,
 		pkt.data.data(),
 		ftl::codecs::kVideoBufferSize,
-		tmp.cols,
-		tmp.rows,
+		tmp2_.cols,
+		tmp2_.rows,
 		was_reset_		// Force IFrame!
 	);
 	pkt.data.resize(cs);
@@ -142,14 +132,40 @@ bool NvPipeEncoder::encode(const cv::Mat &in, definition_t odefinition, bitrate_
 	}
 }
 
-bool NvPipeEncoder::_encoderMatch(const cv::Mat &in, definition_t def) {
+bool NvPipeEncoder::_encoderMatch(const cv::cuda::GpuMat &in, definition_t def) {
 	return ((in.type() == CV_32F && is_float_channel_) ||
 		((in.type() == CV_8UC3 || in.type() == CV_8UC4) && !is_float_channel_)) && current_definition_ == def;
 }
 
-bool NvPipeEncoder::_createEncoder(const cv::Mat &in, definition_t def, bitrate_t rate) {
+static uint64_t calculateBitrate(definition_t def, bitrate_t rate) {
+	float scale = 1.0f;
+	switch (rate) {
+	case bitrate_t::High		: break;
+	case bitrate_t::Standard	: scale = 0.5f; break;
+	case bitrate_t::Low			: scale = 0.25f; break;
+	}
+
+	float bitrate = 1.0f;  // Megabits
+	switch (def) {
+	case definition_t::UHD4k	: bitrate = 24.0f; break;
+	case definition_t::HTC_VIVE	: bitrate = 16.0f; break;
+	case definition_t::HD1080	: bitrate = 16.0f; break;
+	case definition_t::HD720	: bitrate = 8.0f; break;
+	case definition_t::SD576	:
+	case definition_t::SD480	: bitrate = 4.0f; break;
+	case definition_t::LD360	: bitrate = 2.0f; break;
+	default						: bitrate = 8.0f;
+	}
+
+	return uint64_t(bitrate * 1000.0f * 1000.0f * scale);
+}
+
+bool NvPipeEncoder::_createEncoder(const cv::cuda::GpuMat &in, definition_t def, bitrate_t rate) {
 	if (_encoderMatch(in, def) && nvenc_) return true;
 
+	uint64_t bitrate = calculateBitrate(def, rate);
+	LOG(INFO) << "Calculated bitrate: " << bitrate;
+
 	if (in.type() == CV_32F) is_float_channel_ = true;
 	else is_float_channel_ = false;
 	current_definition_ = def;
@@ -158,9 +174,9 @@ bool NvPipeEncoder::_createEncoder(const cv::Mat &in, definition_t def, bitrate_
 	const int fps = 1000/ftl::timer::getInterval();
 	nvenc_ = NvPipe_CreateEncoder(
 		(is_float_channel_) ? NVPIPE_UINT16 : NVPIPE_RGBA32,
-		NVPIPE_HEVC,
+		(preference_ == codec_t::Any || preference_ == codec_t::HEVC) ? NVPIPE_HEVC : NVPIPE_H264,
 		(is_float_channel_) ? NVPIPE_LOSSLESS : NVPIPE_LOSSY,
-		16*1000*1000,
+		bitrate,
 		fps,				// FPS
 		ftl::codecs::getWidth(def),	// Output Width
 		ftl::codecs::getHeight(def)	// Output Height
diff --git a/components/codecs/src/opencv_decoder.cpp b/components/codecs/src/opencv_decoder.cpp
index 3bbd82fa21ae4b60fef00b81c409ae9c59311d39..0b9feea46e5925f16ce5ab323747d94d8bdb1d2a 100644
--- a/components/codecs/src/opencv_decoder.cpp
+++ b/components/codecs/src/opencv_decoder.cpp
@@ -17,8 +17,7 @@ bool OpenCVDecoder::accepts(const ftl::codecs::Packet &pkt) {
 	return (pkt.codec == codec_t::JPG || pkt.codec == codec_t::PNG);
 }
 
-bool OpenCVDecoder::decode(const ftl::codecs::Packet &pkt, cv::Mat &out) {
-	cv::Mat tmp;
+bool OpenCVDecoder::decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out) {
 
 	int chunk_dim = std::sqrt(pkt.block_total);
 	int chunk_width = out.cols / chunk_dim;
@@ -28,10 +27,12 @@ bool OpenCVDecoder::decode(const ftl::codecs::Packet &pkt, cv::Mat &out) {
 	int cx = (pkt.block_number % chunk_dim) * chunk_width;
 	int cy = (pkt.block_number / chunk_dim) * chunk_height;
 	cv::Rect roi(cx,cy,chunk_width,chunk_height);
-	cv::Mat chunkHead = out(roi);
+	cv::cuda::GpuMat chunkHead = out(roi);
+
+	//LOG(INFO) << "DECODE JPEG " << (int)pkt.block_number << "/" << chunk_dim;
 
 	// Decode in temporary buffers to prevent long locks
-	cv::imdecode(pkt.data, cv::IMREAD_UNCHANGED, &tmp);
+	cv::imdecode(pkt.data, cv::IMREAD_UNCHANGED, &tmp_);
 
 	// Apply colour correction to chunk
 	//ftl::rgbd::colourCorrection(tmp_rgb, gamma_, temperature_);
@@ -41,21 +42,25 @@ bool OpenCVDecoder::decode(const ftl::codecs::Packet &pkt, cv::Mat &out) {
 	// Can either check JPG/PNG headers or just use pkt definition.
 
 	// Original size so just copy
-	if (tmp.cols == chunkHead.cols) {
-		if (!tmp.empty() && tmp.type() == CV_16U && chunkHead.type() == CV_32F) {
-			tmp.convertTo(chunkHead, CV_32FC1, 1.0f/1000.0f); //(16.0f*10.0f));
-		} else if (!tmp.empty() && tmp.type() == CV_8UC3 && chunkHead.type() == CV_8UC3) {
-			tmp.copyTo(chunkHead);
+	if (tmp_.cols == chunkHead.cols) {
+		if (!tmp_.empty() && tmp_.type() == CV_16U && chunkHead.type() == CV_32F) {
+			tmp_.convertTo(tmp_, CV_32FC1, 1.0f/1000.0f);
+			chunkHead.upload(tmp_);
+		} else if (!tmp_.empty() && tmp_.type() == CV_8UC3 && chunkHead.type() == CV_8UC3) {
+			//tmp_.copyTo(chunkHead);
+			chunkHead.upload(tmp_);
 		} else {
 			// Silent ignore?
 		}
 	// Downsized so needs a scale up
 	} else {
-		if (!tmp.empty() && tmp.type() == CV_16U && chunkHead.type() == CV_32F) {
-			tmp.convertTo(tmp, CV_32FC1, 1.0f/1000.0f); //(16.0f*10.0f));
-			cv::resize(tmp, chunkHead, chunkHead.size(), 0, 0, cv::INTER_NEAREST);
-		} else if (!tmp.empty() && tmp.type() == CV_8UC3 && chunkHead.type() == CV_8UC3) {
-			cv::resize(tmp, chunkHead, chunkHead.size());
+		if (!tmp_.empty() && tmp_.type() == CV_16U && chunkHead.type() == CV_32F) {
+			tmp_.convertTo(tmp_, CV_32FC1, 1.0f/1000.0f); //(16.0f*10.0f));
+			cv::resize(tmp_, tmp_, chunkHead.size(), 0, 0, cv::INTER_NEAREST);
+			chunkHead.upload(tmp_);
+		} else if (!tmp_.empty() && tmp_.type() == CV_8UC3 && chunkHead.type() == CV_8UC3) {
+			cv::resize(tmp_, tmp_, chunkHead.size());
+			chunkHead.upload(tmp_);
 		} else {
 			// Silent ignore?
 		}
diff --git a/components/codecs/src/opencv_encoder.cpp b/components/codecs/src/opencv_encoder.cpp
index 028395b9e32865adb7a907d148308c9085588fa3..5dc1995a82e6147184572d6e45d20a8a49561ddc 100644
--- a/components/codecs/src/opencv_encoder.cpp
+++ b/components/codecs/src/opencv_encoder.cpp
@@ -20,21 +20,30 @@ OpenCVEncoder::~OpenCVEncoder() {
     
 }
 
-bool OpenCVEncoder::encode(const cv::Mat &in, definition_t definition, bitrate_t bitrate, const std::function<void(const ftl::codecs::Packet&)> &cb) {
-	cv::Mat tmp;
+bool OpenCVEncoder::supports(ftl::codecs::codec_t codec) {
+	switch (codec) {
+	case codec_t::JPG:
+	case codec_t::PNG: return true;
+	default: return false;
+	}
+}
+
+bool OpenCVEncoder::encode(const cv::cuda::GpuMat &in, definition_t definition, bitrate_t bitrate, const std::function<void(const ftl::codecs::Packet&)> &cb) {
 	bool is_colour = in.type() != CV_32F;
 	current_definition_ = definition;
 
+	in.download(tmp_);
+
 	// Scale down image to match requested definition...
 	if (ftl::codecs::getHeight(current_definition_) < in.rows) {
-		cv::resize(in, tmp, cv::Size(ftl::codecs::getWidth(current_definition_), ftl::codecs::getHeight(current_definition_)), 0, 0, (is_colour) ? 1 : cv::INTER_NEAREST);
+		cv::resize(tmp_, tmp_, cv::Size(ftl::codecs::getWidth(current_definition_), ftl::codecs::getHeight(current_definition_)), 0, 0, (is_colour) ? 1 : cv::INTER_NEAREST);
 	} else {
-		tmp = in;
+		
 	}
 
 	// Represent float at 16bit int
     if (!is_colour) {
-		tmp.convertTo(tmp, CV_16UC1, 1000);
+		tmp_.convertTo(tmp_, CV_16UC1, 1000);
 	}
 
 	chunk_dim_ = (definition == definition_t::LD360) ? 1 : 4;
@@ -43,7 +52,7 @@ bool OpenCVEncoder::encode(const cv::Mat &in, definition_t definition, bitrate_t
 
 	for (int i=0; i<chunk_count_; ++i) {
 		// Add chunk job to thread pool
-		ftl::pool.push([this,i,&tmp,cb,is_colour,bitrate](int id) {
+		ftl::pool.push([this,i,cb,is_colour,bitrate](int id) {
 			ftl::codecs::Packet pkt;
 			pkt.block_number = i;
 			pkt.block_total = chunk_count_;
@@ -51,7 +60,7 @@ bool OpenCVEncoder::encode(const cv::Mat &in, definition_t definition, bitrate_t
 			pkt.codec = (is_colour) ? codec_t::JPG : codec_t::PNG;
 
 			try {
-				_encodeBlock(tmp, pkt, bitrate);
+				_encodeBlock(tmp_, pkt, bitrate);
 			} catch(...) {
 				LOG(ERROR) << "OpenCV encode block exception: " << i;
 			}
diff --git a/components/codecs/src/reader.cpp b/components/codecs/src/reader.cpp
index 41d0697f5c23192a405ac9d13286cb38047a7ec5..ba68f26c9666da9a84ea86632caa1035bb6c5c88 100644
--- a/components/codecs/src/reader.cpp
+++ b/components/codecs/src/reader.cpp
@@ -22,6 +22,14 @@ bool Reader::begin() {
 	(*stream_).read((char*)&h, sizeof(h));
 	if (h.magic[0] != 'F' || h.magic[1] != 'T' || h.magic[2] != 'L' || h.magic[3] != 'F') return false;
 
+	if (h.version >= 2) {
+		ftl::codecs::IndexHeader ih;
+		(*stream_).read((char*)&ih, sizeof(ih));
+	}
+
+	version_ = h.version;
+	LOG(INFO) << "FTL format version " << version_;
+
 	// Capture current time to adjust timestamps
 	timestart_ = (ftl::timer::get_time() / ftl::timer::getInterval()) * ftl::timer::getInterval();
 	playing_ = true;
@@ -29,6 +37,22 @@ bool Reader::begin() {
 	return true;
 }
 
+/*static void printMsgpack(msgpack::object &obj) {
+	switch (obj.type) {
+	case msgpack::type::NIL: return;
+	case msgpack::type::BOOLEAN: LOG(INFO) << "BOOL " << obj.as<bool>(); return;
+	case msgpack::type::POSITIVE_INTEGER:
+	case msgpack::type::NEGATIVE_INTEGER: LOG(INFO) << "INT " << obj.as<int>(); return;
+	case msgpack::type::FLOAT32: LOG(INFO) << "FLOAT " << obj.as<float>(); return;
+	case msgpack::type::FLOAT64: LOG(INFO) << "DOUBLE " << obj.as<double>(); return;
+	case msgpack::type::STR: LOG(INFO) << "STRING " << obj.as<std::string>(); return;
+	case msgpack::type::BIN: return;
+	case msgpack::type::EXT: return;
+	case msgpack::type::ARRAY: LOG(INFO) << "ARRAY: "; return;
+	case msgpack::type::MAP: LOG(INFO) << "MAP: "; return;
+	}
+}*/
+
 bool Reader::read(int64_t ts, const std::function<void(const ftl::codecs::StreamPacket &, ftl::codecs::Packet &)> &f) {
 	//UNIQUE_LOCK(mtx_, lk);
 	std::unique_lock<std::mutex> lk(mtx_, std::defer_lock);
@@ -67,10 +91,13 @@ bool Reader::read(int64_t ts, const std::function<void(const ftl::codecs::Stream
 
 		//std::tuple<StreamPacket,Packet> data;
 		msgpack::object obj = msg.get();
+
+		//printMsgpack(obj);
+
 		try {
 			obj.convert(data_.emplace_back());
 		} catch (std::exception &e) {
-			LOG(INFO) << "Corrupt message: " << buffer_.nonparsed_size();
+			LOG(INFO) << "Corrupt message: " << buffer_.nonparsed_size() << " - " << e.what();
 			//partial = true;
 			//continue;
 			return false;
@@ -81,6 +108,11 @@ bool Reader::read(int64_t ts, const std::function<void(const ftl::codecs::Stream
 		// Adjust timestamp
 		get<0>(data).timestamp += timestart_;
 
+		// Fix to clear flags for version 2.
+		if (version_ <= 2) {
+			get<1>(data).flags = 0;
+		}
+
 		// TODO: Need to read ahead a few frames because there may be a
 		// smaller timestamp after this one... requires a buffer. Ideally this
 		// should be resolved during the write process.
diff --git a/components/codecs/src/writer.cpp b/components/codecs/src/writer.cpp
index 6561549f6f2e681b7678b328f4357762eae468a1..1e3841a8e05f840f808aaddf127833392212af05 100644
--- a/components/codecs/src/writer.cpp
+++ b/components/codecs/src/writer.cpp
@@ -14,9 +14,13 @@ Writer::~Writer() {
 
 bool Writer::begin() {
 	ftl::codecs::Header h;
-	h.version = 0;
+	//h.version = 2;
 	(*stream_).write((const char*)&h, sizeof(h));
 
+	ftl::codecs::IndexHeader ih;
+	ih.reserved[0] = -1;
+	(*stream_).write((const char*)&ih, sizeof(ih));
+
 	// Capture current time to adjust timestamps
 	timestart_ = ftl::timer::get_time();
 	active_ = true;
@@ -37,7 +41,8 @@ bool Writer::write(const ftl::codecs::StreamPacket &s, const ftl::codecs::Packet
 	auto data = std::make_tuple(s2,p);
 	msgpack::sbuffer buffer;
 	msgpack::pack(buffer, data);
+
+	UNIQUE_LOCK(mutex_, lk);
 	(*stream_).write(buffer.data(), buffer.size());
-	//buffer_.clear();
 	return true;
 }
diff --git a/components/codecs/test/nvpipe_codec_unit.cpp b/components/codecs/test/nvpipe_codec_unit.cpp
index 86f773df64f8fcdd55d493305ffa2505d356a22c..609ce56a50059978af931b718de57098201a0c1a 100644
--- a/components/codecs/test/nvpipe_codec_unit.cpp
+++ b/components/codecs/test/nvpipe_codec_unit.cpp
@@ -24,7 +24,7 @@ namespace ftl {
 
 TEST_CASE( "NvPipeEncoder::encode() - A colour test image at preset 0" ) {
 	ftl::codecs::NvPipeEncoder encoder(definition_t::HD1080, definition_t::SD480);
-	cv::Mat m(cv::Size(1920,1080), CV_8UC3, cv::Scalar(0,0,0));
+	cv::cuda::GpuMat m(cv::Size(1920,1080), CV_8UC3, cv::Scalar(0,0,0));
 
 	int block_total = 0;
 	std::atomic<int> block_count = 0;
@@ -46,7 +46,7 @@ TEST_CASE( "NvPipeEncoder::encode() - A colour test image at preset 0" ) {
 
 TEST_CASE( "NvPipeEncoder::encode() - A depth test image at preset 0" ) {
 	ftl::codecs::NvPipeEncoder encoder(definition_t::HD1080, definition_t::SD480);
-	cv::Mat m(cv::Size(1920,1080), CV_32F, cv::Scalar(0.0f));
+	cv::cuda::GpuMat m(cv::Size(1920,1080), CV_32F, cv::Scalar(0.0f));
 
 	int block_total = 0;
 	std::atomic<int> block_count = 0;
@@ -70,13 +70,13 @@ TEST_CASE( "NvPipeDecoder::decode() - A colour test image" ) {
 	ftl::codecs::NvPipeEncoder encoder(definition_t::HD1080, definition_t::SD480);
 	ftl::codecs::NvPipeDecoder decoder;
 
-	cv::Mat in;
-	cv::Mat out;
+	cv::cuda::GpuMat in;
+	cv::cuda::GpuMat out;
 	bool r = false;
 
 	SECTION("FHD in and out, FHD encoding") {
-		in = cv::Mat(cv::Size(1920,1080), CV_8UC3, cv::Scalar(255,0,0));
-		out = cv::Mat(cv::Size(1920,1080), CV_8UC3, cv::Scalar(0,0,0));
+		in = cv::cuda::GpuMat(cv::Size(1920,1080), CV_8UC3, cv::Scalar(255,0,0));
+		out = cv::cuda::GpuMat(cv::Size(1920,1080), CV_8UC3, cv::Scalar(0,0,0));
 
 		r = encoder.encode(in, ftl::codecs::kPreset0, [&out,&decoder](const ftl::codecs::Packet &pkt) {
 			REQUIRE( decoder.decode(pkt, out) );
@@ -84,8 +84,8 @@ TEST_CASE( "NvPipeDecoder::decode() - A colour test image" ) {
 	}
 
 	SECTION("Full HD in, 720 out, FHD encoding") {
-		in = cv::Mat(cv::Size(1920,1080), CV_8UC3, cv::Scalar(255,0,0));
-		out = cv::Mat(cv::Size(1280,720), CV_8UC3, cv::Scalar(0,0,0));
+		in = cv::cuda::GpuMat(cv::Size(1920,1080), CV_8UC3, cv::Scalar(255,0,0));
+		out = cv::cuda::GpuMat(cv::Size(1280,720), CV_8UC3, cv::Scalar(0,0,0));
 
 		r = encoder.encode(in, ftl::codecs::kPreset0, [&out,&decoder](const ftl::codecs::Packet &pkt) {
 			REQUIRE( decoder.decode(pkt, out) );
@@ -95,8 +95,8 @@ TEST_CASE( "NvPipeDecoder::decode() - A colour test image" ) {
 	}
 
 	SECTION("HHD in, FHD out, FHD encoding") {
-		in = cv::Mat(cv::Size(1280,720), CV_8UC3, cv::Scalar(255,0,0));
-		out = cv::Mat(cv::Size(1920,1080), CV_8UC3, cv::Scalar(0,0,0));
+		in = cv::cuda::GpuMat(cv::Size(1280,720), CV_8UC3, cv::Scalar(255,0,0));
+		out = cv::cuda::GpuMat(cv::Size(1920,1080), CV_8UC3, cv::Scalar(0,0,0));
 
 		r = encoder.encode(in, ftl::codecs::kPreset0, [&out,&decoder](const ftl::codecs::Packet &pkt) {
 			REQUIRE( decoder.decode(pkt, out) );
@@ -106,8 +106,8 @@ TEST_CASE( "NvPipeDecoder::decode() - A colour test image" ) {
 	}
 
 	SECTION("FHD in, HHD out, SD encoding") {
-		in = cv::Mat(cv::Size(1920,1080), CV_8UC3, cv::Scalar(255,0,0));
-		out = cv::Mat(cv::Size(1280,720), CV_8UC3, cv::Scalar(0,0,0));
+		in = cv::cuda::GpuMat(cv::Size(1920,1080), CV_8UC3, cv::Scalar(255,0,0));
+		out = cv::cuda::GpuMat(cv::Size(1280,720), CV_8UC3, cv::Scalar(0,0,0));
 
 		r = encoder.encode(in, ftl::codecs::kPreset4, [&out,&decoder](const ftl::codecs::Packet &pkt) {
 			REQUIRE( decoder.decode(pkt, out) );
@@ -117,5 +117,5 @@ TEST_CASE( "NvPipeDecoder::decode() - A colour test image" ) {
 	}
 
 	REQUIRE( r );
-	REQUIRE( (cv::sum(out) != cv::Scalar(0,0,0)) );
+	REQUIRE( (cv::cuda::sum(out) != cv::Scalar(0,0,0)) );
 }
diff --git a/components/codecs/test/opencv_codec_unit.cpp b/components/codecs/test/opencv_codec_unit.cpp
index dd85140dbfcddbd9c48f0c83795a84b0d4914882..2505eeb8994e1397bc19175f7f0903bdb3000c9d 100644
--- a/components/codecs/test/opencv_codec_unit.cpp
+++ b/components/codecs/test/opencv_codec_unit.cpp
@@ -24,7 +24,7 @@ namespace ftl {
 
 TEST_CASE( "OpenCVEncoder::encode() - A colour test image at preset 0" ) {
 	ftl::codecs::OpenCVEncoder encoder(definition_t::HD1080, definition_t::SD480);
-	cv::Mat m(cv::Size(1024,576), CV_8UC3, cv::Scalar(0,0,0));
+	cv::cuda::GpuMat m(cv::Size(1024,576), CV_8UC3, cv::Scalar(0,0,0));
 
 	int block_total = 0;
 	std::atomic<int> block_count = 0;
@@ -53,7 +53,7 @@ TEST_CASE( "OpenCVEncoder::encode() - A colour test image at preset 0" ) {
 
 TEST_CASE( "OpenCVEncoder::encode() - A depth test image at preset 0" ) {
 	ftl::codecs::OpenCVEncoder encoder(definition_t::HD1080, definition_t::SD480);
-	cv::Mat m(cv::Size(1024,576), CV_32F, cv::Scalar(0.0f));
+	cv::cuda::GpuMat m(cv::Size(1024,576), CV_32F, cv::Scalar(0.0f));
 
 	int block_total = 0;
 	std::atomic<int> block_count = 0;
@@ -82,8 +82,8 @@ TEST_CASE( "OpenCVEncoder::encode() - A depth test image at preset 0" ) {
 TEST_CASE( "OpenCVDecoder::decode() - A colour test image no resolution change" ) {
 	ftl::codecs::OpenCVEncoder encoder(definition_t::HD1080, definition_t::SD480);
 	ftl::codecs::OpenCVDecoder decoder;
-	cv::Mat in(cv::Size(1024,576), CV_8UC3, cv::Scalar(255,0,0));
-	cv::Mat out(cv::Size(1024,576), CV_8UC3, cv::Scalar(0,0,0));
+	cv::cuda::GpuMat in(cv::Size(1024,576), CV_8UC3, cv::Scalar(255,0,0));
+	cv::cuda::GpuMat out(cv::Size(1024,576), CV_8UC3, cv::Scalar(0,0,0));
 
 	std::mutex mtx;
 
@@ -92,5 +92,5 @@ TEST_CASE( "OpenCVDecoder::decode() - A colour test image no resolution change"
 		REQUIRE( decoder.decode(pkt, out) );
 	});
 
-	REQUIRE( (cv::sum(out) != cv::Scalar(0,0,0)) );
+	REQUIRE( (cv::cuda::sum(out) != cv::Scalar(0,0,0)) );
 }
diff --git a/components/common/cpp/include/ftl/configurable.hpp b/components/common/cpp/include/ftl/configurable.hpp
index 6593119c794205e0bf901814437f79974bb81e00..244b548285b92902e645436891b04acb182ffd08 100644
--- a/components/common/cpp/include/ftl/configurable.hpp
+++ b/components/common/cpp/include/ftl/configurable.hpp
@@ -80,6 +80,7 @@ class Configurable {
 	template <typename T>
 	void set(const std::string &name, T value) {
 		(*config_)[name] = value;
+		inject(name, (*config_)[name]);
 		_trigger(name);
 	}
 
@@ -103,12 +104,12 @@ class Configurable {
 	protected:
 	nlohmann::json *config_;
 
+	virtual void inject(const std::string name, nlohmann::json &value) {}
+
 	private:
 	std::map<std::string, std::list<std::function<void(const config::Event&)>>> observers_; 
 
 	void _trigger(const std::string &name);
-
-	static void __changeURI(const std::string &uri, Configurable *cfg);
 };
 
 /*template <>
diff --git a/components/common/cpp/include/ftl/cuda_common.hpp b/components/common/cpp/include/ftl/cuda_common.hpp
index 70a6a4ad6d4dc0def715979f9eaf348e621d103e..116e26ec74ff4b469e8d1214cafa2897264f80ad 100644
--- a/components/common/cpp/include/ftl/cuda_common.hpp
+++ b/components/common/cpp/include/ftl/cuda_common.hpp
@@ -102,6 +102,9 @@ class TextureObject : public TextureObjectBase {
 
 	operator cv::cuda::GpuMat();
 
+	void create(const cv::Size &);
+	void create(int w, int h);
+
 	__host__ __device__ T *devicePtr() const { return (T*)(ptr_); };
 	__host__ __device__ T *devicePtr(int v) const { return &(T*)(ptr_)[v*pitch2_]; }
 
@@ -264,6 +267,20 @@ TextureObject<T>::TextureObject(size_t width, size_t height) {
 	//needsdestroy_ = true;
 }
 
+#ifndef __CUDACC__
+template <typename T>
+void TextureObject<T>::create(const cv::Size &s) {
+	create(s.width, s.height);
+}
+
+template <typename T>
+void TextureObject<T>::create(int w, int h) {
+	if (width_ != w || height_ != h) {
+		*this = std::move(TextureObject<T>(w, h));
+	}
+}
+#endif
+
 template <typename T>
 TextureObject<T>::TextureObject(const TextureObject<T> &p) {
 	texobj_ = p.texobj_;
diff --git a/components/common/cpp/src/configurable.cpp b/components/common/cpp/src/configurable.cpp
index f47e12195e00d6db0bce1ee65ec70339e062448f..5116292adab92672da3adb239ecb2ed3f4630011 100644
--- a/components/common/cpp/src/configurable.cpp
+++ b/components/common/cpp/src/configurable.cpp
@@ -59,8 +59,4 @@ void Configurable::on(const string &prop, function<void(const ftl::config::Event
 	} else {
 		(*ix).second.push_back(f);
 	}
-}
-
-void Configurable::__changeURI(const string &uri, Configurable *cfg) {
-
 }
\ No newline at end of file
diff --git a/components/control/cpp/src/master.cpp b/components/control/cpp/src/master.cpp
index a1248fac771dcd58a4654244296f4f0c608a5a34..a13a67a90b4bc7ccc1bd2c893580fc286dfb330a 100644
--- a/components/control/cpp/src/master.cpp
+++ b/components/control/cpp/src/master.cpp
@@ -86,6 +86,7 @@ vector<string> Master::getConfigurables() {
 
 vector<string> Master::getConfigurables(const ftl::UUID &peer) {
 	try {
+		LOG(INFO) << "LISTING CONFIGS";
 		return net_->call<vector<string>>(peer, "list_configurables");
 	} catch (...) {
 		return {};
diff --git a/components/net/cpp/CMakeLists.txt b/components/net/cpp/CMakeLists.txt
index c7e8bb34119a6b2f70b5a34b6e4b7e4911fe465f..9df16c0b27dda27cc00b75c33c340f1474afd63d 100644
--- a/components/net/cpp/CMakeLists.txt
+++ b/components/net/cpp/CMakeLists.txt
@@ -9,13 +9,14 @@ add_library(ftlnet
 	src/dispatcher.cpp
 	src/universe.cpp
 	src/ws_internal.cpp
+	src/net_configurable.cpp
 )
 
 target_include_directories(ftlnet PUBLIC
 	$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
 	$<INSTALL_INTERFACE:include>
 	PRIVATE src)
-target_link_libraries(ftlnet ftlcommon Threads::Threads glog::glog ${UUID_LIBRARIES})
+target_link_libraries(ftlnet ftlctrl ftlcommon Threads::Threads glog::glog ${UUID_LIBRARIES})
 
 install(TARGETS ftlnet EXPORT ftlnet-config
 	ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
diff --git a/components/net/cpp/include/ftl/net_configurable.hpp b/components/net/cpp/include/ftl/net_configurable.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..3a31ecc27675f2e37b72bc42ee83da0dee5b7e4c
--- /dev/null
+++ b/components/net/cpp/include/ftl/net_configurable.hpp
@@ -0,0 +1,26 @@
+#pragma once
+#ifndef _FTL_NETCONFIGURABLE_HPP_
+#define _FTL_NETCONFIGURABLE_HPP_
+
+#include <ftl/configurable.hpp>
+#include <ftl/master.hpp>
+
+namespace ftl {
+
+    class NetConfigurable : public ftl::Configurable {
+    public:
+	NetConfigurable(ftl::UUID peer, const std::string &suri, ftl::ctrl::Master &ctrl, ftl::config::json_t &config);
+	~NetConfigurable();
+
+	protected:
+	void inject(const std::string name, nlohmann::json &value);
+
+    private:
+	ftl::UUID peer;
+	const std::string &suri;
+	ftl::ctrl::Master &ctrl;
+    };
+
+}
+
+#endif // _FTL_NETCONFIGURABLE_HPP_
diff --git a/components/net/cpp/src/net_configurable.cpp b/components/net/cpp/src/net_configurable.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..edcec65b8c4ba452472e30efd29b672b7a6e2c55
--- /dev/null
+++ b/components/net/cpp/src/net_configurable.cpp
@@ -0,0 +1,12 @@
+#include <ftl/net_configurable.hpp>
+
+#include <string>
+
+ftl::NetConfigurable::NetConfigurable(ftl::UUID peer, const std::string &suri, ftl::ctrl::Master &ctrl, ftl::config::json_t &config) : ftl::Configurable(config), peer(peer), suri(suri), ctrl(ctrl) {
+}
+
+ftl::NetConfigurable::~NetConfigurable(){}
+
+void ftl::NetConfigurable::inject(const std::string name, nlohmann::json &value) {
+    ctrl.set(peer, suri + std::string("/") + name, value);
+}
diff --git a/components/net/cpp/test/CMakeLists.txt b/components/net/cpp/test/CMakeLists.txt
index 34a642fa00e2019d0f47ce6dbb0451be2a2a2773..c786f73d8f8d73a534cae849a3ff9d80331f3267 100644
--- a/components/net/cpp/test/CMakeLists.txt
+++ b/components/net/cpp/test/CMakeLists.txt
@@ -27,11 +27,20 @@ target_link_libraries(net_integration
 	Threads::Threads
 	${UUID_LIBRARIES})
 
+### NetConfigurable Unit #######################################################
+add_executable(net_configurable_unit
+	./tests.cpp
+	./net_configurable_unit.cpp)
+target_link_libraries(net_configurable_unit
+	ftlnet)
+
 
 
 #add_test(ProtocolUnitTest protocol_unit)
 add_test(PeerUnitTest peer_unit)
 add_test(NetIntegrationTest net_integration)
+# Testing of NetConfigurable is disabled.
+#add_test(NetConfigurableUnitTest net_configurable_unit)
 
 add_custom_target(tests)
 add_dependencies(tests peer_unit uri_unit)
diff --git a/components/net/cpp/test/net_configurable_unit.cpp b/components/net/cpp/test/net_configurable_unit.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c8bf42247ef3f23023b25c159a968deda1fd419b
--- /dev/null
+++ b/components/net/cpp/test/net_configurable_unit.cpp
@@ -0,0 +1,43 @@
+#include "catch.hpp"
+#include <ftl/net_configurable.hpp>
+#include <ftl/slave.hpp>
+
+using ftl::NetConfigurable;
+
+SCENARIO( "NetConfigurable::set()" ) {
+    GIVEN( "valid peer UUID, URI and Master" ) {
+        // Set up Master
+        nlohmann::json json = {{"$id", "root"}, {"test", {{"listen", "tcp://localhost:7077"}}}}; // Check what values are needed
+        ftl::Configurable *root;
+        root = new ftl::Configurable(json);
+        ftl::net::Universe *net = ftl::config::create<ftl::net::Universe>(root, std::string("test"));
+        net->start();
+        ftl::ctrl::Master *controller = new ftl::ctrl::Master(root, net);
+        
+        // Set up a slave, then call getSlaves() to get the UUID string
+        nlohmann::json jsonSlave = {{"$id", "slave"}, {"test", {{"peers", {"tcp://localhost:7077"}}}}};
+        ftl::Configurable *rootSlave;
+        rootSlave = new ftl::Configurable(jsonSlave);
+        ftl::net::Universe *netSlave = ftl::config::create<ftl::net::Universe>(rootSlave, std::string("test"));
+        ftl::ctrl::Slave slave(netSlave, rootSlave);
+        netSlave->start();
+        netSlave->waitConnections();
+        net->waitConnections();
+
+        auto slaves = controller->getSlaves();
+        REQUIRE( slaves.size() == 1 );
+
+        ftl::UUID peer = ftl::UUID(slaves[0]["id"].get<std::string>());
+        const std::string suri = "slave_test";
+        nlohmann::json jsonTest = {{"$id", "slave_test"}, {"test", {{"peers", {"tcp://localhost:7077"}}}}};
+        NetConfigurable nc(peer, suri, *controller, jsonTest);
+        nc.set("test_value", 5);
+        REQUIRE( nc.get<int>("test_value") == 5 );
+    }
+
+    // invalid peer UUID
+
+    // invalid URI
+
+    // null Master
+}
\ No newline at end of file
diff --git a/components/operators/CMakeLists.txt b/components/operators/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c93a87f6693334c30e28039387db45554ebdb0d2
--- /dev/null
+++ b/components/operators/CMakeLists.txt
@@ -0,0 +1,21 @@
+add_library(ftloperators
+    src/smoothing.cpp
+	src/smoothing.cu
+	src/mls.cu
+	src/smoothchan.cu
+	src/operator.cpp
+	src/colours.cpp
+	src/normals.cpp
+)
+
+# These cause errors in CI build and are being removed from PCL in newer versions
+# target_compile_options(ftlrender PUBLIC ${PCL_DEFINITIONS})
+
+target_include_directories(ftloperators PUBLIC
+	${PCL_INCLUDE_DIRS}
+	$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+	$<INSTALL_INTERFACE:include>
+	PRIVATE src)
+target_link_libraries(ftloperators ftlrender ftlrgbd ftlcommon Eigen3::Eigen Threads::Threads ${OpenCV_LIBS})
+
+#ADD_SUBDIRECTORY(test)
diff --git a/components/operators/include/ftl/operators/colours.hpp b/components/operators/include/ftl/operators/colours.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..afad434d49e170ba6f48d4b875d5641bdff61a54
--- /dev/null
+++ b/components/operators/include/ftl/operators/colours.hpp
@@ -0,0 +1,25 @@
+#ifndef _FTL_OPERATORS_COLOURS_HPP_
+#define _FTL_OPERATORS_COLOURS_HPP_
+
+#include <ftl/operators/operator.hpp>
+
+namespace ftl {
+namespace operators {
+
+class ColourChannels : public ftl::operators::Operator {
+    public:
+    explicit ColourChannels(ftl::Configurable *cfg);
+    ~ColourChannels();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+    private:
+    cv::cuda::GpuMat temp_;
+};
+
+}
+}
+
+#endif // _FTL_OPERATORS_COLOURS_HPP_
diff --git a/components/operators/include/ftl/operators/normals.hpp b/components/operators/include/ftl/operators/normals.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..5aff09e4da3b5867a77631c81750c42a6c23fdbd
--- /dev/null
+++ b/components/operators/include/ftl/operators/normals.hpp
@@ -0,0 +1,45 @@
+#ifndef _FTL_OPERATORS_NORMALS_HPP_
+#define _FTL_OPERATORS_NORMALS_HPP_
+
+#include <ftl/operators/operator.hpp>
+#include <ftl/cuda_common.hpp>
+
+namespace ftl {
+namespace operators {
+
+/**
+ * Calculate rough normals from local depth gradients.
+ */
+class Normals : public ftl::operators::Operator {
+	public:
+    explicit Normals(ftl::Configurable*);
+    ~Normals();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+};
+
+/**
+ * Calculate rough normals from local depth gradients and perform a weighted
+ * smoothing over the neighbourhood.
+ */
+class SmoothNormals : public ftl::operators::Operator {
+	public:
+    explicit SmoothNormals(ftl::Configurable*);
+    ~SmoothNormals();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+	private:
+	ftl::cuda::TextureObject<float4> temp_;
+
+};
+
+}
+}
+
+#endif  // _FTL_OPERATORS_NORMALS_HPP_
diff --git a/components/operators/include/ftl/operators/operator.hpp b/components/operators/include/ftl/operators/operator.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..fe9b52f2c6b7d6d3364c1d1722f70bd691602170
--- /dev/null
+++ b/components/operators/include/ftl/operators/operator.hpp
@@ -0,0 +1,117 @@
+#ifndef _FTL_OPERATORS_OPERATOR_HPP_
+#define _FTL_OPERATORS_OPERATOR_HPP_
+
+#include <list>
+#include <ftl/configurable.hpp>
+#include <ftl/configuration.hpp>
+#include <ftl/rgbd/frame.hpp>
+#include <ftl/rgbd/frameset.hpp>
+#include <ftl/rgbd/source.hpp>
+#include <ftl/cuda_common.hpp>
+
+namespace ftl {
+namespace operators {
+
+/**
+ * An abstract frame operator interface. Any kind of filter that operates on a
+ * single frame should use this as a base class. An example of a filter would
+ * be MLS smoothing, or optical flow temporal smoothing. Some 'filters' might
+ * simply generate additional channels, such as a 'Normals' filter that
+ * generates a normals channel. Filters may also have internal data buffers,
+ * these may also persist between frames in some cases.
+ */
+class Operator {
+	public:
+	explicit Operator(ftl::Configurable *cfg);
+    virtual ~Operator();
+
+	enum class Type {
+		OneToOne,		// Frame to Frame (Filter or generator)
+		ManyToOne,		// FrameSet to Frame (Rendering or Compositing)
+		ManyToMany,		// FrameSet to FrameSet (alignment)
+		OneToMany		// Currently not used or supported
+	};
+
+	/**
+	 * Must be implemented and return an operator structural type.
+	 */
+	virtual Type type() const =0;
+
+	virtual bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream);
+	virtual bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream);
+	virtual bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *os, cudaStream_t stream);
+
+	inline void enable() { enabled_ = true; }
+	inline void disable() { enabled_ = false; }
+	inline bool enabled() const { return enabled_; }
+	inline void enabled(bool e) { enabled_ = e; }
+
+	inline ftl::Configurable *config() const { return config_; }
+
+	private:
+	bool enabled_;
+	ftl::Configurable *config_;
+};
+
+namespace detail {
+
+struct ConstructionHelperBase {
+	ConstructionHelperBase(ftl::Configurable *cfg) : config(cfg) {}
+	virtual ~ConstructionHelperBase() {}
+	virtual ftl::operators::Operator *make()=0;
+
+	ftl::Configurable *config;
+};
+
+template <typename T>
+struct ConstructionHelper : public ConstructionHelperBase {
+	ConstructionHelper(ftl::Configurable *cfg) : ConstructionHelperBase(cfg) {}
+	~ConstructionHelper() {}
+	ftl::operators::Operator *make() override {
+		return new T(config);
+	}
+};
+
+struct OperatorNode {
+	ConstructionHelperBase *maker;
+	std::vector<ftl::operators::Operator*> instances;
+};
+
+}
+
+/**
+ * Represent a sequential collection of operators. Each operator created is
+ * added to an internal list and then applied to a frame in the order they were
+ * created. A directed acyclic graph can also be formed.
+ */
+class Graph : public ftl::Configurable {
+	public:
+	explicit Graph(nlohmann::json &config);
+    ~Graph();
+
+	template <typename T>
+	ftl::Configurable *append(const std::string &name);
+
+	bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream=0);
+	bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream=0);
+	bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream=0);
+
+	private:
+	std::list<ftl::operators::detail::OperatorNode> operators_;
+	std::map<std::string, ftl::Configurable*> configs_;
+
+	ftl::Configurable *_append(ftl::operators::detail::ConstructionHelperBase*);
+};
+
+}
+}
+
+template <typename T>
+ftl::Configurable *ftl::operators::Graph::append(const std::string &name) {
+	if (configs_.find(name) == configs_.end()) {
+		configs_[name] = ftl::create<ftl::Configurable>(this, name);
+	}
+	return _append(new ftl::operators::detail::ConstructionHelper<T>(configs_[name]));
+}
+
+#endif  // _FTL_OPERATORS_OPERATOR_HPP_
diff --git a/components/operators/include/ftl/operators/smoothing.hpp b/components/operators/include/ftl/operators/smoothing.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..b5362ea6f1a6c16de81cdb7c55339058476da09d
--- /dev/null
+++ b/components/operators/include/ftl/operators/smoothing.hpp
@@ -0,0 +1,104 @@
+#ifndef _FTL_OPERATORS_SMOOTHING_HPP_
+#define _FTL_OPERATORS_SMOOTHING_HPP_
+
+#include <ftl/operators/operator.hpp>
+
+namespace ftl {
+namespace operators {
+
+/**
+ * Remove high frequency noise whilst attempting to retain sharp edges and curves.
+ * It uses the minimum error in the second derivative of the local surface as
+ * a smoothing factor, with the depth error itself taken from the minimum
+ * first derivative. Only the depth channel is used and modified.
+ */
+class HFSmoother : public ftl::operators::Operator {
+    public:
+    explicit HFSmoother(ftl::Configurable*);
+    ~HFSmoother();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+    private:
+    cv::cuda::GpuMat temp_;
+    ftl::rgbd::Frame frames_[4];
+};
+
+/**
+ * Generate a smoothing channel from the colour image that provides a smoothing
+ * factor for each pixel. It uses colour gradient at multiple resolutions to
+ * decide on how much a given pixel needs smoothing, large single colour areas
+ * will generate a large smoothing value, whilst sharp colour edges will have
+ * no smoothing.
+ */
+class SmoothChannel : public ftl::operators::Operator {
+    public:
+    explicit SmoothChannel(ftl::Configurable*);
+    ~SmoothChannel();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+	private:
+	ftl::rgbd::Frame temp_[6];
+};
+
+/**
+ * Perform Moving Least Squares smoothing with a constant smoothing amount and
+ * neighbourhood size. Requires channels: Depth + Normals.
+ * Also outputs Depth + Normals.
+ */
+class SimpleMLS : public ftl::operators::Operator {
+    public:
+    explicit SimpleMLS(ftl::Configurable*);
+    ~SimpleMLS();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+    private:
+};
+
+/**
+ * Perform Moving Least Squares smoothing with a smoothing amount determined
+ * by a simple colour similarity weighting. In practice this is too naive.
+ */
+class ColourMLS : public ftl::operators::Operator {
+    public:
+    explicit ColourMLS(ftl::Configurable*);
+    ~ColourMLS();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+    private:
+};
+
+/**
+ * A version of Moving Least Squares where both the smoothing factor and
+ * neighbourhood size are adapted over an extra large range using the colour
+ * channel as a guide. Requires Depth+Normals+Colour. Neighbourhood can
+ * encompass hundreds of pixels with a smoothing factor approaching a meter, or
+ * it can be only a few or even no pixels with a zero smoothing factor.
+ */
+class AdaptiveMLS : public ftl::operators::Operator {
+    public:
+    explicit AdaptiveMLS(ftl::Configurable*);
+    ~AdaptiveMLS();
+
+	inline Operator::Type type() const override { return Operator::Type::OneToOne; }
+
+    bool apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) override;
+
+    private:
+};
+
+}
+}
+
+#endif  // _FTL_OPERATORS_SMOOTHING_HPP_
diff --git a/components/operators/src/colours.cpp b/components/operators/src/colours.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3174c42e41de75e30f852c5bb187266ad26b61aa
--- /dev/null
+++ b/components/operators/src/colours.cpp
@@ -0,0 +1,26 @@
+#include <ftl/operators/colours.hpp>
+
+using ftl::operators::ColourChannels;
+using ftl::codecs::Channel;
+
+ColourChannels::ColourChannels(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+ColourChannels::~ColourChannels() {
+
+}
+
+bool ColourChannels::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+	// Convert colour from BGR to BGRA if needed
+	if (in.get<cv::cuda::GpuMat>(Channel::Colour).type() == CV_8UC3) {
+		//cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+		// Convert to 4 channel colour
+		auto &col = in.get<cv::cuda::GpuMat>(Channel::Colour);
+		temp_.create(col.size(), CV_8UC4);
+		cv::cuda::swap(col, temp_);
+		cv::cuda::cvtColor(temp_,col, cv::COLOR_BGR2BGRA, 0);
+	}
+
+	return true;
+}
diff --git a/components/operators/src/mls.cu b/components/operators/src/mls.cu
new file mode 100644
index 0000000000000000000000000000000000000000..e030d1967695bd71760c0bc2af26706d21f0f2e3
--- /dev/null
+++ b/components/operators/src/mls.cu
@@ -0,0 +1,314 @@
+#include "smoothing_cuda.hpp"
+
+#include <ftl/cuda/weighting.hpp>
+
+using ftl::cuda::TextureObject;
+
+#define T_PER_BLOCK 8
+
+// ===== MLS Smooth ============================================================
+
+/*
+ * Smooth depth map using Moving Least Squares
+ */
+ template <int SEARCH_RADIUS>
+ __global__ void mls_smooth_kernel(
+		TextureObject<float4> normals_in,
+		TextureObject<float4> normals_out,
+        TextureObject<float> depth_in,        // Virtual depth map
+		TextureObject<float> depth_out,   // Accumulated output
+		float smoothing,
+        ftl::rgbd::Camera camera) {
+        
+    const int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return;
+
+	float3 aX = make_float3(0.0f,0.0f,0.0f);
+	float3 nX = make_float3(0.0f,0.0f,0.0f);
+    float contrib = 0.0f;
+
+	float d0 = depth_in.tex2D(x, y);
+	depth_out(x,y) = d0;
+	if (d0 < camera.minDepth || d0 > camera.maxDepth) return;
+	float3 X = camera.screenToCam((int)(x),(int)(y),d0);
+
+    // Neighbourhood
+    for (int v=-SEARCH_RADIUS; v<=SEARCH_RADIUS; ++v) {
+    for (int u=-SEARCH_RADIUS; u<=SEARCH_RADIUS; ++u) {
+		const float d = depth_in.tex2D(x+u, y+v);
+		if (d < camera.minDepth || d > camera.maxDepth) continue;
+
+		// Point and normal of neighbour
+		const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y)+v,d);
+		const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y)+v));
+
+		// Gauss approx weighting function using point distance
+		const float w = ftl::cuda::spatialWeighting(X,Xi,smoothing);
+
+		aX += Xi*w;
+		nX += Ni*w;
+		contrib += w;
+    }
+	}
+	
+	nX /= contrib;  // Weighted average normal
+	aX /= contrib;  // Weighted average point (centroid)
+
+	// Signed-Distance Field function
+	float fX = nX.x * (X.x - aX.x) + nX.y * (X.y - aX.y) + nX.z * (X.z - aX.z);
+
+	// Calculate new point using SDF function to adjust depth (and position)
+	X = X - nX * fX;
+	
+	//uint2 screen = camera.camToScreen<uint2>(X);
+
+    //if (screen.x < depth_out.width() && screen.y < depth_out.height()) {
+    //    depth_out(screen.x,screen.y) = X.z;
+	//}
+	depth_out(x,y) = X.z;
+	normals_out(x,y) = make_float4(nX / length(nX), 0.0f);
+}
+
+void ftl::cuda::mls_smooth(
+		ftl::cuda::TextureObject<float4> &normals_in,
+		ftl::cuda::TextureObject<float4> &normals_out,
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		float smoothing,
+		int radius,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream) {
+
+	const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	switch (radius) {
+		case 5: mls_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, smoothing, camera); break;
+		case 4: mls_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, smoothing, camera); break;
+		case 3: mls_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, smoothing, camera); break;
+		case 2: mls_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, smoothing, camera); break;
+		case 1: mls_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, smoothing, camera); break;
+	}
+	cudaSafeCall( cudaGetLastError() );
+
+
+	#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	#endif
+}
+
+
+// ===== Colour MLS Smooth =====================================================
+
+/*
+ * Smooth depth map using Moving Least Squares. This version uses colour
+ * similarity weights to adjust the spatial smoothing factor. It is naive in
+ * that similar colours may exist on both sides of an edge and colours at the
+ * level of single pixels can be subject to noise. Colour noise should first
+ * be removed from the image.
+ */
+ template <int SEARCH_RADIUS>
+ __global__ void colour_mls_smooth_kernel(
+		TextureObject<float4> normals_in,
+		TextureObject<float4> normals_out,
+        TextureObject<float> depth_in,        // Virtual depth map
+		TextureObject<float> depth_out,   // Accumulated output
+		TextureObject<uchar4> colour_in,
+		float smoothing,
+		float colour_smoothing,
+        ftl::rgbd::Camera camera) {
+        
+    const int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return;
+
+	float3 aX = make_float3(0.0f,0.0f,0.0f);
+	float3 nX = make_float3(0.0f,0.0f,0.0f);
+    float contrib = 0.0f;
+
+	float d0 = depth_in.tex2D(x, y);
+	depth_out(x,y) = d0;
+	if (d0 < camera.minDepth || d0 > camera.maxDepth) return;
+	float3 X = camera.screenToCam((int)(x),(int)(y),d0);
+
+	uchar4 c0 = colour_in.tex2D(x, y);
+
+    // Neighbourhood
+    for (int v=-SEARCH_RADIUS; v<=SEARCH_RADIUS; ++v) {
+    for (int u=-SEARCH_RADIUS; u<=SEARCH_RADIUS; ++u) {
+		const float d = depth_in.tex2D(x+u, y+v);
+		if (d < camera.minDepth || d > camera.maxDepth) continue;
+
+		// Point and normal of neighbour
+		const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y)+v,d);
+		const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y)+v));
+
+		const uchar4 c = colour_in.tex2D(x+u, y+v);
+		const float cw = ftl::cuda::colourWeighting(c0,c,colour_smoothing);
+
+		// Gauss approx weighting function using point distance
+		const float w = ftl::cuda::spatialWeighting(X,Xi,smoothing*cw);
+
+		aX += Xi*w;
+		nX += Ni*w;
+		contrib += w;
+    }
+	}
+	
+	nX /= contrib;  // Weighted average normal
+	aX /= contrib;  // Weighted average point (centroid)
+
+	// Signed-Distance Field function
+	float fX = nX.x * (X.x - aX.x) + nX.y * (X.y - aX.y) + nX.z * (X.z - aX.z);
+
+	// Calculate new point using SDF function to adjust depth (and position)
+	X = X - nX * fX;
+	
+	//uint2 screen = camera.camToScreen<uint2>(X);
+
+    //if (screen.x < depth_out.width() && screen.y < depth_out.height()) {
+    //    depth_out(screen.x,screen.y) = X.z;
+	//}
+	depth_out(x,y) = X.z;
+	normals_out(x,y) = make_float4(nX / length(nX), 0.0f);
+}
+
+void ftl::cuda::colour_mls_smooth(
+		ftl::cuda::TextureObject<float4> &normals_in,
+		ftl::cuda::TextureObject<float4> &normals_out,
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		ftl::cuda::TextureObject<uchar4> &colour_in,
+		float smoothing,
+		float colour_smoothing,
+		int radius,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream) {
+
+	const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	switch (radius) {
+		case 5: colour_mls_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera); break;
+		case 4: colour_mls_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera); break;
+		case 3: colour_mls_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera); break;
+		case 2: colour_mls_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera); break;
+		case 1: colour_mls_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, colour_in, smoothing, colour_smoothing, camera); break;
+	}
+	cudaSafeCall( cudaGetLastError() );
+
+
+	#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	#endif
+}
+
+
+// ===== Adaptive MLS Smooth =====================================================
+
+/*
+ * Smooth depth map using Moving Least Squares. This version uses colour
+ * similarity weights to adjust the spatial smoothing factor. It is naive in
+ * that similar colours may exist on both sides of an edge and colours at the
+ * level of single pixels can be subject to noise. Colour noise should first
+ * be removed from the image.
+ */
+ template <int SEARCH_RADIUS>
+ __global__ void adaptive_mls_smooth_kernel(
+		TextureObject<float4> normals_in,
+		TextureObject<float4> normals_out,
+        TextureObject<float> depth_in,        // Virtual depth map
+		TextureObject<float> depth_out,   // Accumulated output
+		TextureObject<float> smoothing,
+        ftl::rgbd::Camera camera) {
+        
+    const int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x < 0 || y < 0 || x >= depth_in.width() || y >= depth_in.height()) return;
+
+	float3 aX = make_float3(0.0f,0.0f,0.0f);
+	float3 nX = make_float3(0.0f,0.0f,0.0f);
+    float contrib = 0.0f;
+
+	float d0 = depth_in.tex2D(x, y);
+	depth_out(x,y) = d0;
+	normals_out(x,y) = normals_in(x,y);
+	if (d0 < camera.minDepth || d0 > camera.maxDepth) return;
+	float3 X = camera.screenToCam((int)(x),(int)(y),d0);
+
+	//uchar4 c0 = colour_in.tex2D(x, y);
+	float smooth = smoothing(x,y);
+
+    // Neighbourhood
+    for (int v=-SEARCH_RADIUS; v<=SEARCH_RADIUS; ++v) {
+    for (int u=-SEARCH_RADIUS; u<=SEARCH_RADIUS; ++u) {
+		const float d = depth_in.tex2D(x+u, y+v);
+		if (d < camera.minDepth || d > camera.maxDepth) continue;
+
+		// Point and normal of neighbour
+		const float3 Xi = camera.screenToCam((int)(x)+u,(int)(y)+v,d);
+		const float3 Ni = make_float3(normals_in.tex2D((int)(x)+u, (int)(y)+v));
+
+		if (Ni.x+Ni.y+Ni.z == 0.0f) continue;
+
+		// Gauss approx weighting function using point distance
+		const float w = ftl::cuda::spatialWeighting(X,Xi,smooth*0.5f);
+
+		aX += Xi*w;
+		nX += Ni*w;
+		contrib += w;
+    }
+	}
+
+	if (contrib <= 0.0f) return;
+	
+	nX /= contrib;  // Weighted average normal
+	aX /= contrib;  // Weighted average point (centroid)
+
+	// Signed-Distance Field function
+	float fX = nX.x * (X.x - aX.x) + nX.y * (X.y - aX.y) + nX.z * (X.z - aX.z);
+
+	// Calculate new point using SDF function to adjust depth (and position)
+	X = X - nX * fX;
+	
+	//uint2 screen = camera.camToScreen<uint2>(X);
+
+    //if (screen.x < depth_out.width() && screen.y < depth_out.height()) {
+    //    depth_out(screen.x,screen.y) = X.z;
+	//}
+	depth_out(x,y) = X.z;
+	normals_out(x,y) = make_float4(nX / length(nX), 0.0f);
+}
+
+void ftl::cuda::adaptive_mls_smooth(
+		ftl::cuda::TextureObject<float4> &normals_in,
+		ftl::cuda::TextureObject<float4> &normals_out,
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		ftl::cuda::TextureObject<float> &smoothing,
+		int radius,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream) {
+
+	const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	switch (radius) {
+		case 5: adaptive_mls_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, smoothing, camera); break;
+		case 4: adaptive_mls_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, smoothing, camera); break;
+		case 3: adaptive_mls_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, smoothing, camera); break;
+		case 2: adaptive_mls_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, smoothing, camera); break;
+		case 1: adaptive_mls_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(normals_in, normals_out, depth_in, depth_out, smoothing, camera); break;
+	}
+	cudaSafeCall( cudaGetLastError() );
+
+
+	#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	#endif
+}
+
diff --git a/components/operators/src/normals.cpp b/components/operators/src/normals.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5f8554c2986f27de3fe5bd0ab2fb03fbf2226c35
--- /dev/null
+++ b/components/operators/src/normals.cpp
@@ -0,0 +1,76 @@
+#include <ftl/operators/normals.hpp>
+#include <ftl/cuda/normals.hpp>
+#include <ftl/utility/matrix_conversion.hpp>
+
+using ftl::operators::Normals;
+using ftl::operators::SmoothNormals;
+using ftl::codecs::Channel;
+using ftl::rgbd::Format;
+
+Normals::Normals(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+Normals::~Normals() {
+
+}
+
+bool Normals::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+	if (!in.hasChannel(Channel::Depth)) {
+		LOG(ERROR) << "Missing depth channel in Normals operator";
+		return false;
+	}
+
+	if (out.hasChannel(Channel::Normals)) {
+		LOG(WARNING) << "Output already has normals";
+	}
+
+	ftl::cuda::normals(
+		out.createTexture<float4>(Channel::Normals, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+		in.createTexture<float>(Channel::Depth),
+		s->parameters(), 0
+	);
+
+	return true;
+}
+
+
+SmoothNormals::SmoothNormals(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+SmoothNormals::~SmoothNormals() {
+
+}
+
+bool SmoothNormals::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+    float smoothing = config()->value("normal_smoothing", 0.02f);
+    int radius = max(0, min(config()->value("radius",1), 5));
+
+	if (!in.hasChannel(Channel::Depth)) {
+		LOG(ERROR) << "Missing depth channel in SmoothNormals operator";
+		return false;
+	}
+
+	if (out.hasChannel(Channel::Normals)) {
+		LOG(WARNING) << "Output already has normals";
+	}
+
+	auto &depth = in.get<cv::cuda::GpuMat>(Channel::Depth);
+
+	temp_.create(depth.size());
+
+    ftl::cuda::normals(
+		out.createTexture<float4>(Channel::Normals, Format<float4>(depth.size())),
+		temp_,
+		in.createTexture<float>(Channel::Depth),
+		radius, smoothing,
+		s->parameters(),
+		MatrixConversion::toCUDA(s->getPose().cast<float>().inverse()).getFloat3x3(),
+		MatrixConversion::toCUDA(s->getPose().cast<float>()).getFloat3x3(),
+		stream
+	);
+
+
+	return true;
+}
diff --git a/components/operators/src/operator.cpp b/components/operators/src/operator.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..91dada28b6adddfcdb3e71861aca07591c57fa9c
--- /dev/null
+++ b/components/operators/src/operator.cpp
@@ -0,0 +1,87 @@
+#include <ftl/operators/operator.hpp>
+
+using ftl::operators::Operator;
+using ftl::operators::Graph;
+using ftl::rgbd::Frame;
+using ftl::rgbd::FrameSet;
+using ftl::rgbd::Source;
+
+Operator::Operator(ftl::Configurable *config) : config_(config) {
+	enabled_ = config_->value("enabled", true);
+
+	config_->on("enabled", [this](const ftl::config::Event &e) {
+		enabled_ = config_->value("enabled", true);
+	});
+}
+
+Operator::~Operator() {}
+
+bool Operator::apply(Frame &in, Frame &out, Source *s, cudaStream_t stream) {
+	return false;
+}
+
+bool Operator::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) {
+	return false;
+}
+
+bool Operator::apply(FrameSet &in, Frame &out, Source *os, cudaStream_t stream) {
+	return false;
+}
+
+
+
+Graph::Graph(nlohmann::json &config) : ftl::Configurable(config) {
+
+}
+
+Graph::~Graph() {
+
+}
+
+bool Graph::apply(FrameSet &in, FrameSet &out, cudaStream_t stream) {
+	if (!value("enabled", true)) return false;
+
+	if (in.frames.size() != out.frames.size()) return false;
+
+	for (auto &i : operators_) {
+		// Make sure there are enough instances
+		while (i.instances.size() < in.frames.size()) {
+			i.instances.push_back(i.maker->make());
+		}
+
+		for (int j=0; j<in.frames.size(); ++j) {
+			auto *instance = i.instances[j];
+
+			if (instance->enabled()) {
+				instance->apply(in.frames[j], out.frames[j], in.sources[j], stream);
+			}
+		}
+	}
+
+	return true;
+}
+
+bool Graph::apply(Frame &in, Frame &out, Source *s, cudaStream_t stream) {
+	if (!value("enabled", true)) return false;
+
+	for (auto &i : operators_) {
+		// Make sure there are enough instances
+		if (i.instances.size() < 1) {
+			i.instances.push_back(i.maker->make());
+		}
+
+		auto *instance = i.instances[0];
+
+		if (instance->enabled()) {
+			instance->apply(in, out, s, stream);
+		}
+	}
+
+	return true;
+}
+
+ftl::Configurable *Graph::_append(ftl::operators::detail::ConstructionHelperBase *m) {
+	auto &o = operators_.emplace_back();
+	o.maker = m;
+	return m->config;
+}
diff --git a/components/operators/src/smoothchan.cu b/components/operators/src/smoothchan.cu
new file mode 100644
index 0000000000000000000000000000000000000000..af9ec35243ed6304f859ab16c2177aa761c8eb14
--- /dev/null
+++ b/components/operators/src/smoothchan.cu
@@ -0,0 +1,82 @@
+#include "smoothing_cuda.hpp"
+
+#include <ftl/cuda/weighting.hpp>
+
+using ftl::cuda::TextureObject;
+
+#define T_PER_BLOCK 8
+
+template <int RADIUS>
+__global__ void smooth_chan_kernel(
+		ftl::cuda::TextureObject<uchar4> colour_in,
+		//ftl::cuda::TextureObject<float> depth_in,
+		ftl::cuda::TextureObject<float> smoothing_out,
+		ftl::rgbd::Camera camera,
+		float alpha, float scale) {
+
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < smoothing_out.width() && y < smoothing_out.height()) {
+		const int ix = int(((float)x / (float)smoothing_out.width()) * (float)colour_in.width());
+		const int iy = int(((float)y / (float)smoothing_out.height()) * (float)colour_in.height());
+
+		// A distance has already been found
+		if (smoothing_out(x,y) < 1.0f) return;
+
+		float mindist = 100.0f;
+
+		const uchar4 c0 = colour_in.tex2D(ix, iy);
+		//const float d0 = depth_in.tex2D(ix, iy);
+		//if (d0 < camera.minDepth || d0 > camera.maxDepth) return;
+
+		//const float3 pos = camera.screenToCam(ix, iy, d0);
+
+		for (int v=-RADIUS; v<=RADIUS; ++v) {
+			#pragma unroll
+			for (int u=-RADIUS; u<=RADIUS; ++u) {
+				const uchar4 c = colour_in.tex2D(ix+u, iy+v);
+				//const float d = depth_in.tex2D(ix+u, iy+v);
+				//if (d < camera.minDepth || d > camera.maxDepth) continue;
+				//const float3 posN = camera.screenToCam(ix+u, iy+v, d);
+
+				float d = sqrt((float)u*(float)u + (float)v*(float)v) * scale;
+
+				if (ftl::cuda::colourDistance(c, c0) >= alpha) mindist = min(mindist, (float)d);
+			}
+		}
+
+		smoothing_out(x,y) = min(mindist / 100.0f, 1.0f);
+	}
+}
+
+void ftl::cuda::smooth_channel(
+		ftl::cuda::TextureObject<uchar4> &colour_in,
+		//ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float> &smoothing_out,
+		const ftl::rgbd::Camera &camera,
+		float alpha,
+		float scale,
+		int radius,
+		cudaStream_t stream) {
+
+	const dim3 gridSize((smoothing_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (smoothing_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+
+	switch (radius) {
+		case 5: smooth_chan_kernel<5><<<gridSize, blockSize, 0, stream>>>(colour_in, smoothing_out, camera, alpha, scale); break;
+		case 4: smooth_chan_kernel<4><<<gridSize, blockSize, 0, stream>>>(colour_in, smoothing_out, camera, alpha, scale); break;
+		case 3: smooth_chan_kernel<3><<<gridSize, blockSize, 0, stream>>>(colour_in, smoothing_out, camera, alpha, scale); break;
+		case 2: smooth_chan_kernel<2><<<gridSize, blockSize, 0, stream>>>(colour_in, smoothing_out, camera, alpha, scale); break;
+		case 1: smooth_chan_kernel<1><<<gridSize, blockSize, 0, stream>>>(colour_in, smoothing_out, camera, alpha, scale); break;
+	}
+		
+
+	cudaSafeCall( cudaGetLastError() );
+
+
+	#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	#endif
+}
diff --git a/components/operators/src/smoothing.cpp b/components/operators/src/smoothing.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8e4f458bf9f996a6d38bef816ab7bcec75b69b11
--- /dev/null
+++ b/components/operators/src/smoothing.cpp
@@ -0,0 +1,253 @@
+#include <ftl/operators/smoothing.hpp>
+#include "smoothing_cuda.hpp"
+
+#include <ftl/cuda/normals.hpp>
+
+using ftl::operators::HFSmoother;
+using ftl::operators::SimpleMLS;
+using ftl::operators::ColourMLS;
+using ftl::operators::AdaptiveMLS;
+using ftl::operators::SmoothChannel;
+using ftl::codecs::Channel;
+using ftl::rgbd::Format;
+using cv::cuda::GpuMat;
+
+HFSmoother::HFSmoother(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+HFSmoother::~HFSmoother() {
+
+}
+
+bool HFSmoother::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+    float var_thresh = config()->value("variance_threshold", 0.0002f);
+    int levels = max(0, min(config()->value("levels",0), 4));
+    int iters = config()->value("iterations",5);
+
+	// FIXME: in and out are assumed to be the same
+
+    for (int i=0; i<iters; ++i) {
+        ftl::cuda::smoothing_factor(
+            in.createTexture<float>(Channel::Depth),
+            in.createTexture<float>(Channel::Energy, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+            in.createTexture<float>(Channel::Smoothing, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+            var_thresh,
+            s->parameters(), 0
+        );
+    }
+
+    //LOG(INFO) << "PARAMS DEPTHS  " << s->parameters().minDepth << "," << s->parameters().maxDepth;
+
+    /*for (int i=0; i<levels; ++i) {
+        var_thresh *= 2.0f;
+        auto &dmat = f.get<GpuMat>(Channel::Depth); 
+        cv::cuda::resize(dmat, frames_[i].create<GpuMat>(Channel::Depth), cv::Size(dmat.cols / (2*(i+1)), dmat.rows / (2*(i+1))), 0.0, 0.0, cv::INTER_NEAREST);
+
+        ftl::cuda::smoothing_factor(
+            frames_[i].createTexture<float>(Channel::Depth),
+            frames_[i].createTexture<float>(Channel::Energy, ftl::rgbd::Format<float>(frames_[i].get<GpuMat>(Channel::Depth).size())),
+            frames_[i].createTexture<float>(Channel::Smoothing, ftl::rgbd::Format<float>(frames_[i].get<GpuMat>(Channel::Depth).size())),
+            var_thresh,
+            s->parameters(), 0
+        );
+
+        cv::cuda::resize(frames_[i].get<GpuMat>(Channel::Smoothing), temp_, f.get<cv::cuda::GpuMat>(Channel::Depth).size(), 0.0, 0.0, cv::INTER_LINEAR);
+        cv::cuda::add(temp_, f.get<GpuMat>(Channel::Smoothing), f.get<GpuMat>(Channel::Smoothing));
+    }*/
+
+    //cv::cuda::subtract(f.get<GpuMat>(Channel::Depth), f.get<GpuMat>(Channel::Smoothing), f.get<GpuMat>(Channel::Depth));
+
+	return true;
+}
+
+// ====== Smoothing Channel ====================================================
+
+SmoothChannel::SmoothChannel(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+SmoothChannel::~SmoothChannel() {
+
+}
+
+bool SmoothChannel::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+	int radius = config()->value("radius", 3);
+	float threshold = config()->value("threshold", 30.0f);
+	int iters = max(0, min(6, config()->value("levels", 4)));
+
+	int width = s->parameters().width;
+	int height = s->parameters().height;
+	float scale = 1.0f;
+
+	// Clear to max smoothing
+	out.create<GpuMat>(Channel::Smoothing, Format<float>(width, height)).setTo(cv::Scalar(1.0f));
+
+	// Reduce to nearest
+	ftl::cuda::smooth_channel(
+		in.createTexture<uchar4>(Channel::Colour),
+		//in.createTexture<float>(Channel::Depth),
+		out.createTexture<float>(Channel::Smoothing),
+		s->parameters(),
+		threshold,
+		scale,
+		radius,
+		stream
+	);
+
+	for (int i=0; i<iters; ++i) {
+		width /= 2;
+		height /= 2;
+		scale *= 2.0f;
+
+		ftl::rgbd::Camera scaledCam = s->parameters().scaled(width, height);
+
+		// Downscale images for next pass
+		cv::cuda::resize(in.get<GpuMat>(Channel::Colour), temp_[i].create<GpuMat>(Channel::Colour), cv::Size(width, height), 0.0, 0.0, cv::INTER_LINEAR);
+		//cv::cuda::resize(in.get<GpuMat>(Channel::Depth), temp_[i].create<GpuMat>(Channel::Depth), cv::Size(width, height), 0.0, 0.0, cv::INTER_NEAREST);
+
+		ftl::cuda::smooth_channel(
+			temp_[i].createTexture<uchar4>(Channel::Colour),
+			//temp_[i].createTexture<float>(Channel::Depth),
+			out.getTexture<float>(Channel::Smoothing),
+			scaledCam,
+			threshold,
+			scale,
+			radius,
+			stream
+		);
+	}
+
+	return true;
+}
+
+
+
+// ===== MLS ===================================================================
+
+SimpleMLS::SimpleMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+SimpleMLS::~SimpleMLS() {
+
+}
+
+bool SimpleMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+	float thresh = config()->value("mls_threshold", 0.04f);
+	int iters = config()->value("mls_iterations", 1);
+	int radius = config()->value("mls_radius",2);
+
+	if (!in.hasChannel(Channel::Normals)) {
+		/*ftl::cuda::normals(
+			in.createTexture<float4>(Channel::Normals, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+			in.createTexture<float>(Channel::Depth),
+			s->parameters(), 0
+		);*/
+		LOG(ERROR) << "Required normals channel missing for MLS";
+		return false;
+	}
+
+	// FIXME: Assume in and out are the same frame.
+	for (int i=0; i<iters; ++i) {
+		ftl::cuda::mls_smooth(
+			in.createTexture<float4>(Channel::Normals),
+			in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+			in.createTexture<float>(Channel::Depth),
+			in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+			thresh,
+			radius,
+			s->parameters(),
+			0
+		);
+
+		in.swapChannels(Channel::Depth, Channel::Depth2);
+		in.swapChannels(Channel::Normals, Channel::Points);
+	}
+
+	return true;
+}
+
+
+
+ColourMLS::ColourMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+ColourMLS::~ColourMLS() {
+
+}
+
+bool ColourMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+	float thresh = config()->value("mls_threshold", 0.04f);
+	float col_smooth = config()->value("mls_colour_smoothing", 30.0f);
+	int iters = config()->value("mls_iterations", 1);
+	int radius = config()->value("mls_radius",2);
+
+	if (!in.hasChannel(Channel::Normals)) {
+		LOG(ERROR) << "Required normals channel missing for MLS";
+		return false;
+	}
+
+	// FIXME: Assume in and out are the same frame.
+	for (int i=0; i<iters; ++i) {
+		ftl::cuda::colour_mls_smooth(
+			in.createTexture<float4>(Channel::Normals),
+			in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+			in.createTexture<float>(Channel::Depth),
+			in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+			in.createTexture<uchar4>(Channel::Colour),
+			thresh,
+			col_smooth,
+			radius,
+			s->parameters(),
+			0
+		);
+
+		in.swapChannels(Channel::Depth, Channel::Depth2);
+		in.swapChannels(Channel::Normals, Channel::Points);
+	}
+
+	return true;
+}
+
+
+// ====== Adaptive MLS =========================================================
+
+AdaptiveMLS::AdaptiveMLS(ftl::Configurable *cfg) : ftl::operators::Operator(cfg) {
+
+}
+
+AdaptiveMLS::~AdaptiveMLS() {
+
+}
+
+bool AdaptiveMLS::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, ftl::rgbd::Source *s, cudaStream_t stream) {
+	int iters = config()->value("mls_iterations", 1);
+	int radius = config()->value("mls_radius",2);
+
+	if (!in.hasChannel(Channel::Normals)) {
+		LOG(ERROR) << "Required normals channel missing for MLS";
+		return false;
+	}
+
+	// FIXME: Assume in and out are the same frame.
+	for (int i=0; i<iters; ++i) {
+		ftl::cuda::adaptive_mls_smooth(
+			in.createTexture<float4>(Channel::Normals),
+			in.createTexture<float4>(Channel::Points, ftl::rgbd::Format<float4>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+			in.createTexture<float>(Channel::Depth),
+			in.createTexture<float>(Channel::Depth2, ftl::rgbd::Format<float>(in.get<cv::cuda::GpuMat>(Channel::Depth).size())),
+			in.createTexture<float>(Channel::Smoothing),
+			radius,
+			s->parameters(),
+			0
+		);
+
+		in.swapChannels(Channel::Depth, Channel::Depth2);
+		in.swapChannels(Channel::Normals, Channel::Points);
+	}
+
+	return true;
+}
+
diff --git a/components/operators/src/smoothing.cu b/components/operators/src/smoothing.cu
new file mode 100644
index 0000000000000000000000000000000000000000..a2b7377fba011243c08cf5347f3dea4800b8ab51
--- /dev/null
+++ b/components/operators/src/smoothing.cu
@@ -0,0 +1,260 @@
+#include "smoothing_cuda.hpp"
+
+#include <ftl/cuda/weighting.hpp>
+
+using ftl::cuda::TextureObject;
+
+#define T_PER_BLOCK 8
+
+// ===== Colour-based Smooth ===================================================
+
+template <int RADIUS>
+__global__ void depth_smooth_kernel(
+		ftl::cuda::TextureObject<float> depth_in,
+		ftl::cuda::TextureObject<uchar4> colour_in,
+		ftl::cuda::TextureObject<float> depth_out,
+		ftl::rgbd::Camera camera,
+		float factor, float thresh) {
+
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < depth_in.width() && y < depth_in.height()) {
+		float d = depth_in.tex2D((int)x,(int)y);
+		depth_out(x,y) = 0.0f;
+
+		if (d < camera.minDepth || d > camera.maxDepth) return;
+
+		uchar4 c = colour_in.tex2D((int)x, (int)y);
+		float3 pos = camera.screenToCam(x,y,d);
+
+		float contrib = 0.0f;
+		float new_depth = 0.0f;
+
+		for (int v=-RADIUS; v<=RADIUS; ++v) {
+			for (int u=-RADIUS; u<=RADIUS; ++u) {
+				// Get colour difference to center
+				const uchar4 cN = colour_in.tex2D((int)x+u, (int)y+v);
+				const float colourWeight = ftl::cuda::colourWeighting(c, cN, thresh);
+				const float dN = depth_in.tex2D((int)x + u, (int)y + v);
+				const float3 posN = camera.screenToCam(x+u, y+v, dN);
+				const float weight = ftl::cuda::spatialWeighting(posN, pos, factor * colourWeight);
+				
+				contrib += weight;
+				new_depth += dN * weight;
+			}
+		}
+
+		if (contrib > 0.0f) {
+			depth_out(x,y) = new_depth / contrib;
+		}
+	}
+}
+
+void ftl::cuda::depth_smooth(
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<uchar4> &colour_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		const ftl::rgbd::Camera &camera,
+		int radius, float factor, float thresh, int iters, cudaStream_t stream) {
+
+	const dim3 gridSize((depth_out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	for (int n=0; n<iters; ++n) {
+		switch (radius) {
+		case 5 :	depth_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
+		case 4 :	depth_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
+		case 3 :	depth_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
+		case 2 :	depth_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
+		case 1 :	depth_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_in, colour_in, depth_out, camera, factor, thresh); break;
+		default:	break;
+		}
+		cudaSafeCall( cudaGetLastError() );
+
+		switch (radius) {
+		case 5 :	depth_smooth_kernel<5><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
+		case 4 :	depth_smooth_kernel<4><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
+		case 3 :	depth_smooth_kernel<3><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
+		case 2 :	depth_smooth_kernel<2><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
+		case 1 :	depth_smooth_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_out, colour_in, depth_in, camera, factor, thresh); break;
+		default:	break;
+		}
+		cudaSafeCall( cudaGetLastError() );
+	}
+
+#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+#endif
+}
+
+// ==== SMOOTHING FACTOR ==========
+
+template <bool DERIV>
+__device__ inline float getAverage(const ftl::rgbd::Camera &cam, float dd, const TextureObject<float> &d, int x, int y, int x1, int y1, int x2, int y2);
+
+template <>
+__device__ inline float getAverage<false>(const ftl::rgbd::Camera &cam, float dd, const TextureObject<float> &d, int x, int y, int x1, int y1, int x2, int y2) {
+	float a = d.tex2D(x+x1,y+y1);
+	float b = d.tex2D(x+x2,y+y2);
+	return (a <= cam.minDepth || a > cam.maxDepth || b <= cam.minDepth || b > cam.maxDepth) ? dd : (a+b) / 2.0f;
+}
+
+template <>
+__device__ inline float getAverage<true>(const ftl::rgbd::Camera &cam, float dd, const TextureObject<float> &d, int x, int y, int x1, int y1, int x2, int y2) {
+	float a = d.tex2D(x+x1,y+y1);
+	float b = d.tex2D(x+x2,y+y2);
+	return (a+b) / 2.0f;
+}
+
+__device__ inline void absmin(float &minvar, float v) {
+	if (fabs(minvar) > fabs(v)) minvar = v;
+}
+
+template <bool DERIV>
+__global__ void smoothing_factor_kernel(
+		ftl::cuda::TextureObject<float> depth_in,
+		//ftl::cuda::TextureObject<uchar4> colour_in,
+		ftl::cuda::TextureObject<float> smoothing,
+		//float thresh,
+		ftl::rgbd::Camera camera) {
+
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < depth_in.width() && y < depth_in.height()) {
+		float d = depth_in.tex2D((int)x,(int)y);
+
+		//if (d < camera.minDepth || d > camera.maxDepth) return;
+
+		float min_var = 10.0f;
+		float max_var = 0.0f;
+
+		float avg = 0.0f;
+		float var;
+
+		var = (d - getAverage<DERIV>(camera, d, depth_in, x, y, -1, -1, 1, 1));
+		//avg += var;
+		absmin(min_var, var);
+		var = (d - getAverage<DERIV>(camera, d, depth_in, x, y, 0, -1, 0, 1));
+		//avg += var;
+		absmin(min_var, var);
+		var = (d - getAverage<DERIV>(camera, d, depth_in, x, y, 1, -1, -1, 1));
+		//avg += var;
+		absmin(min_var, var);
+		var = (d - getAverage<DERIV>(camera, d, depth_in, x, y, -1, 0, 1, 0));
+		//avg += var;
+		absmin(min_var, var);
+
+		// Clamp to threshold
+		//min_var = min(min_var, thresh);
+		//float s = 1.0f - (min_var / thresh);
+		smoothing(x,y) = min_var;
+	}
+}
+
+__global__ void norm_thresh_kernel(
+		ftl::cuda::TextureObject<float> in,
+		ftl::cuda::TextureObject<float> error_value,
+		ftl::cuda::TextureObject<float> out,
+		float thresh) {
+
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < in.width() && y < in.height()) {
+		// Clamp to threshold
+		float min_var = min(in.tex2D((int)x,(int)y), thresh);
+		float s = min(1.0f, (fabs(min_var) / thresh));
+		out(x,y) = s * error_value(x,y);
+	}
+}
+
+__global__ void do_smooth_kernel(
+		ftl::cuda::TextureObject<float> smooth_strength,
+		//ftl::cuda::TextureObject<float> error_value,
+		ftl::cuda::TextureObject<float> depth) {
+
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < depth.width() && y < depth.height()) {
+		depth(x,y) = depth(x,y) - smooth_strength(x,y);
+	}
+}
+
+template <int RADIUS>
+__global__ void sum_neighbors_kernel(
+		ftl::cuda::TextureObject<float> depth_in,
+		ftl::cuda::TextureObject<float> depth_out,
+		ftl::rgbd::Camera camera, float alpha) {
+
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x < depth_out.width() && y < depth_out.height()) {
+		float avg = 0.0f;
+		float contrib = 0.0f;
+
+		float d0 = depth_in.tex2D((int)x, (int)y);
+		float3 pos0 = camera.screenToCam(x,y,d0);
+
+		for (int v=-RADIUS; v<=RADIUS; ++v) {
+			#pragma unroll
+			for (int u=-RADIUS; u<=RADIUS; ++u) {
+				float dN = depth_in.tex2D((int)x + u, (int)y + v);
+				float3 posN = camera.screenToCam(x+u,y+v,dN);
+				float weight = ftl::cuda::spatialWeighting(pos0, posN, alpha);
+				avg += weight * dN;
+				contrib += weight;
+			}
+		}
+
+		depth_out(x,y) = avg / contrib;
+	}
+}
+
+void ftl::cuda::smoothing_factor(
+		ftl::cuda::TextureObject<float> &depth_in,
+		//ftl::cuda::TextureObject<float> &depth_tmp,
+		ftl::cuda::TextureObject<float> &temp,
+		//ftl::cuda::TextureObject<uchar4> &colour_in,
+		ftl::cuda::TextureObject<float> &smoothing,
+		float thresh,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream) {
+
+	const dim3 gridSize((smoothing.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (smoothing.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	//smoothing_factor_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, temp, camera);
+
+	//float thresh2 = thresh;
+	//float alpha = 0.04f;
+
+	//for (int i=0; i<10; ++i) {
+		
+		smoothing_factor_kernel<false><<<gridSize, blockSize, 0, stream>>>(depth_in, temp, camera);
+		smoothing_factor_kernel<true><<<gridSize, blockSize, 0, stream>>>(temp, smoothing, camera);
+		norm_thresh_kernel<<<gridSize, blockSize, 0, stream>>>(smoothing, temp, smoothing, thresh);
+		do_smooth_kernel<<<gridSize, blockSize, 0, stream>>>(smoothing, depth_in);
+
+		//do_smooth_kernel<<<gridSize, blockSize, 0, stream>>>(smoothing, bufs[(ix+1)%2], bufs[ix%2]);
+		//if (i == 0) sum_neighbors_kernel<1><<<gridSize, blockSize, 0, stream>>>(depth_in, bufs[ix%2], camera, alpha);
+		//else {
+		//	sum_neighbors_kernel<2><<<gridSize, blockSize, 0, stream>>>(bufs[ix%2], bufs[(ix+1)%2], camera, alpha);
+		//	++ix;
+		//}
+		//thresh2 *= 2.0f;
+		//alpha *= 3.0f;
+	//}
+
+	//sum_neighbors_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, temp);
+
+	cudaSafeCall( cudaGetLastError() );
+
+
+	#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	#endif
+}
diff --git a/components/operators/src/smoothing_cuda.hpp b/components/operators/src/smoothing_cuda.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..53e8cf898c4e99875dc4a9881df58700ee31c127
--- /dev/null
+++ b/components/operators/src/smoothing_cuda.hpp
@@ -0,0 +1,73 @@
+#ifndef _FTL_CUDA_SMOOTHING_HPP_
+#define _FTL_CUDA_SMOOTHING_HPP_
+
+#include <ftl/rgbd/camera.hpp>
+#include <ftl/cuda_common.hpp>
+
+namespace ftl {
+namespace cuda {
+
+void mls_smooth(
+		ftl::cuda::TextureObject<float4> &normals_in,
+		ftl::cuda::TextureObject<float4> &normals_out,
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		float smoothing,
+		int radius,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream);
+
+void colour_mls_smooth(
+		ftl::cuda::TextureObject<float4> &normals_in,
+		ftl::cuda::TextureObject<float4> &normals_out,
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		ftl::cuda::TextureObject<uchar4> &colour_in,
+		float smoothing,
+		float colour_smoothing,
+		int radius,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream);
+
+void adaptive_mls_smooth(
+		ftl::cuda::TextureObject<float4> &normals_in,
+		ftl::cuda::TextureObject<float4> &normals_out,
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float> &depth_out,
+		ftl::cuda::TextureObject<float> &smoothing,
+		int radius,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream);
+
+void smooth_channel(
+		ftl::cuda::TextureObject<uchar4> &colour_in,
+		//ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<float> &smoothing_out,
+		const ftl::rgbd::Camera &camera,
+		float alpha,
+		float scale,
+		int radius,
+		cudaStream_t stream);
+
+void depth_smooth(
+	ftl::cuda::TextureObject<float> &depth_in,
+	ftl::cuda::TextureObject<uchar4> &colour_in,
+	ftl::cuda::TextureObject<float> &depth_out,
+	const ftl::rgbd::Camera &camera,
+	int radius, float factor, float thresh, int iters,
+	cudaStream_t stream);
+
+void smoothing_factor(
+	ftl::cuda::TextureObject<float> &depth_in,
+	//ftl::cuda::TextureObject<float> &depth_tmp,
+	ftl::cuda::TextureObject<float> &temp,
+	//ftl::cuda::TextureObject<uchar4> &colour_in,
+	ftl::cuda::TextureObject<float> &smoothing,
+	float thresh,
+	const ftl::rgbd::Camera &camera,
+	cudaStream_t stream);
+
+}
+}
+
+#endif  // _FTL_CUDA_SMOOTHING_HPP_
diff --git a/components/renderers/cpp/CMakeLists.txt b/components/renderers/cpp/CMakeLists.txt
index 8fc7996efcbad8502bcc3dce0fef4fee8ffd327d..14bacda712522b930e632f7a4c4e5e4790091037 100644
--- a/components/renderers/cpp/CMakeLists.txt
+++ b/components/renderers/cpp/CMakeLists.txt
@@ -1,9 +1,13 @@
 add_library(ftlrender
-	src/splat_render.cpp
+	src/splatter.cpp
 	src/splatter.cu
 	src/points.cu
 	src/normals.cu
 	src/mask.cu
+	src/screen.cu
+	src/triangle_render.cu
+	src/reprojection.cu
+	src/tri_render.cpp
 )
 
 # These cause errors in CI build and are being removed from PCL in newer versions
diff --git a/components/renderers/cpp/include/ftl/cuda/makers.hpp b/components/renderers/cpp/include/ftl/cuda/makers.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..7994caaca7f01cf68b9daae91019c75b7b0691c1
--- /dev/null
+++ b/components/renderers/cpp/include/ftl/cuda/makers.hpp
@@ -0,0 +1,66 @@
+#ifndef _FTL_CUDA_MAKERS_HPP_
+#define _FTL_CUDA_MAKERS_HPP_
+
+#include <ftl/cuda_common.hpp>
+
+__device__ inline float4 make_float4(const uchar4 &c) {
+    return make_float4(c.x,c.y,c.z,c.w);
+}
+
+__device__ inline float4 make_float4(const float4 &v) {
+	return v;
+}
+
+template <typename T>
+__device__ inline T make();
+
+template <>
+__device__ inline uchar4 make() {
+	return make_uchar4(0,0,0,0);
+}
+
+template <>
+__device__ inline float4 make() {
+	return make_float4(0.0f,0.0f,0.0f,0.0f);
+}
+
+template <>
+__device__ inline float make() {
+	return 0.0f;
+}
+
+template <typename T>
+__device__ inline T make(const float4 &);
+
+template <>
+__device__ inline uchar4 make(const float4 &v) {
+	return make_uchar4((int)v.x, (int)v.y, (int)v.z, (int)v.w);
+}
+
+template <>
+__device__ inline float4 make(const float4 &v) {
+	return v;
+}
+
+template <>
+__device__ inline float make(const float4 &v) {
+	return v.x;
+}
+
+template <typename T>
+__device__ inline T make(const uchar4 &v);
+
+template <>
+__device__ inline float4 make(const uchar4 &v) {
+	return make_float4((float)v.x, (float)v.y, (float)v.z, (float)v.w);
+}
+
+template <typename T>
+__device__ inline T make(float v);
+
+template <>
+__device__ inline float make(float v) {
+	return v;
+}
+
+#endif  // _FTL_CUDA_MAKERS_HPP_
diff --git a/components/renderers/cpp/include/ftl/cuda/normals.hpp b/components/renderers/cpp/include/ftl/cuda/normals.hpp
index da2247723206cc9a1167ffbb0bc659ec847208c2..dc3d0265ce4c142861bb0ca0b2e841dc81887449 100644
--- a/components/renderers/cpp/include/ftl/cuda/normals.hpp
+++ b/components/renderers/cpp/include/ftl/cuda/normals.hpp
@@ -24,6 +24,19 @@ void normals(ftl::cuda::TextureObject<float4> &output,
         const ftl::rgbd::Camera &camera,
         const float3x3 &pose_inv, const float3x3 &pose, cudaStream_t stream);
 
+void normals(ftl::cuda::TextureObject<float4> &output,
+        ftl::cuda::TextureObject<float4> &temp,
+        ftl::cuda::TextureObject<float> &input,
+		int radius,
+		float smoothing,
+        const ftl::rgbd::Camera &camera,
+        const float3x3 &pose_inv, const float3x3 &pose, cudaStream_t stream);
+
+void normals(ftl::cuda::TextureObject<float4> &output,
+        ftl::cuda::TextureObject<float> &input,  // Integer depth values
+        const ftl::rgbd::Camera &camera,
+        cudaStream_t stream);
+
 void normal_visualise(ftl::cuda::TextureObject<float4> &norm,
         ftl::cuda::TextureObject<uchar4> &output,
         const float3 &light, const uchar4 &diffuse, const uchar4 &ambient,
diff --git a/components/renderers/cpp/include/ftl/cuda/weighting.hpp b/components/renderers/cpp/include/ftl/cuda/weighting.hpp
index 15d3dbcec387f97d8ffe60690bdb5c1fda2c098c..bffff673d2009ac698c3c604ec565a2aa1a89901 100644
--- a/components/renderers/cpp/include/ftl/cuda/weighting.hpp
+++ b/components/renderers/cpp/include/ftl/cuda/weighting.hpp
@@ -25,6 +25,11 @@ __device__ inline float spatialWeighting(const float3 &a, const float3 &b, float
 	return rh*rh*rh*rh;
 }
 
+__device__ inline float colourDistance(uchar4 a, uchar4 b) {
+	const float3 delta = make_float3((float)a.x - (float)b.x, (float)a.y - (float)b.y, (float)a.z - (float)b.z);
+	return length(delta);
+}
+
 /*
  * Colour weighting as suggested in:
  * C. Kuster et al. Spatio-Temporal Geometry Fusion for Multiple Hybrid Cameras using Moving Least Squares Surfaces. 2014.
diff --git a/components/renderers/cpp/include/ftl/render/splat_params.hpp b/components/renderers/cpp/include/ftl/render/splat_params.hpp
index 0509ee37f0d85163fde2b462ad0871f8a824070b..7c6b8416896f10aae816565c52813108c80bf385 100644
--- a/components/renderers/cpp/include/ftl/render/splat_params.hpp
+++ b/components/renderers/cpp/include/ftl/render/splat_params.hpp
@@ -18,6 +18,7 @@ struct __align__(16) SplatParams {
 	uint m_flags;
 	//float voxelSize;
 	float depthThreshold;
+	int triangle_limit;
 
 	ftl::rgbd::Camera camera;
 };
diff --git a/components/renderers/cpp/include/ftl/render/tri_render.hpp b/components/renderers/cpp/include/ftl/render/tri_render.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..60d3dcbf6a68576981c1d72194dae731e8592fec
--- /dev/null
+++ b/components/renderers/cpp/include/ftl/render/tri_render.hpp
@@ -0,0 +1,58 @@
+#ifndef _FTL_RECONSTRUCTION_TRI_HPP_
+#define _FTL_RECONSTRUCTION_TRI_HPP_
+
+#include <ftl/render/renderer.hpp>
+#include <ftl/rgbd/frameset.hpp>
+#include <ftl/render/splat_params.hpp>
+#include <ftl/cuda/points.hpp>
+//#include <ftl/filters/filter.hpp>
+
+namespace ftl {
+namespace render {
+
+/**
+ * Generate triangles between connected points and render those. Colour is done
+ * by weighted reprojection to the original source images.
+ */
+class Triangular : public ftl::render::Renderer {
+	public:
+	explicit Triangular(nlohmann::json &config, ftl::rgbd::FrameSet *fs);
+	~Triangular();
+
+	bool render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) override;
+	//void setOutputDevice(int);
+
+	protected:
+	void _renderChannel(ftl::rgbd::Frame &out, ftl::codecs::Channel channel_in, ftl::codecs::Channel channel_out, cudaStream_t stream);
+
+	private:
+	int device_;
+	ftl::rgbd::Frame temp_;
+	ftl::rgbd::Frame accum_;
+	ftl::rgbd::FrameSet *scene_;
+	ftl::cuda::ClipSpace clip_;
+	bool clipping_;
+	float norm_filter_;
+	bool backcull_;
+	cv::Scalar background_;
+	bool mesh_;
+	float3 light_dir_;
+	uchar4 light_diffuse_;
+	uchar4 light_ambient_;
+	ftl::render::SplatParams params_;
+	cudaStream_t stream_;
+	float3 light_pos_;
+
+	//ftl::Filters *filters_;
+
+	template <typename T>
+	void __reprojectChannel(ftl::rgbd::Frame &, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t);
+	void _reprojectChannel(ftl::rgbd::Frame &, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t);
+	void _dibr(ftl::rgbd::Frame &, cudaStream_t);
+	void _mesh(ftl::rgbd::Frame &, ftl::rgbd::Source *, cudaStream_t);
+};
+
+}
+}
+
+#endif  // _FTL_RECONSTRUCTION_TRI_HPP_
diff --git a/components/renderers/cpp/src/normals.cu b/components/renderers/cpp/src/normals.cu
index 14357d02ed00f212577e463051d315e6e5a90078..97452555dfb61fd5015aa6a1745b39e2ddcca409 100644
--- a/components/renderers/cpp/src/normals.cu
+++ b/components/renderers/cpp/src/normals.cu
@@ -45,11 +45,11 @@ __global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output,
 	output(x,y) = make_float4(0, 0, 0, 0);
 
 	if(x > 0 && x < input.width()-1 && y > 0 && y < input.height()-1) {
-		const float3 CC = camera.screenToCam(x+0, y+0, (float)input.tex2D((int)x+0, (int)y+0) / 1000.0f);
-		const float3 PC = camera.screenToCam(x+0, y+1, (float)input.tex2D((int)x+0, (int)y+1) / 1000.0f);
-		const float3 CP = camera.screenToCam(x+1, y+0, (float)input.tex2D((int)x+1, (int)y+0) / 1000.0f);
-		const float3 MC = camera.screenToCam(x+0, y-1, (float)input.tex2D((int)x+0, (int)y-1) / 1000.0f);
-		const float3 CM = camera.screenToCam(x-1, y+0, (float)input.tex2D((int)x-1, (int)y+0) / 1000.0f);
+		const float3 CC = camera.screenToCam(x+0, y+0, (float)input.tex2D((int)x+0, (int)y+0) / 100000.0f);
+		const float3 PC = camera.screenToCam(x+0, y+1, (float)input.tex2D((int)x+0, (int)y+1) / 100000.0f);
+		const float3 CP = camera.screenToCam(x+1, y+0, (float)input.tex2D((int)x+1, (int)y+0) / 100000.0f);
+		const float3 MC = camera.screenToCam(x+0, y-1, (float)input.tex2D((int)x+0, (int)y-1) / 100000.0f);
+		const float3 CM = camera.screenToCam(x-1, y+0, (float)input.tex2D((int)x-1, (int)y+0) / 100000.0f);
 
 		//if(CC.z <  && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) {
 		if (isValid(camera,CC) && isValid(camera,PC) && isValid(camera,CP) && isValid(camera,MC) && isValid(camera,CM)) {
@@ -63,6 +63,34 @@ __global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output,
 	}
 }
 
+__global__ void computeNormals_kernel(ftl::cuda::TextureObject<float4> output,
+        ftl::cuda::TextureObject<float> input, ftl::rgbd::Camera camera) {
+    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if(x >= input.width() || y >= input.height()) return;
+
+    output(x,y) = make_float4(0, 0, 0, 0);
+
+    if(x > 0 && x < input.width()-1 && y > 0 && y < input.height()-1) {
+        const float3 CC = camera.screenToCam(x+0, y+0, input.tex2D((int)x+0, (int)y+0));
+        const float3 PC = camera.screenToCam(x+0, y+1, input.tex2D((int)x+0, (int)y+1));
+        const float3 CP = camera.screenToCam(x+1, y+0, input.tex2D((int)x+1, (int)y+0));
+        const float3 MC = camera.screenToCam(x+0, y-1, input.tex2D((int)x+0, (int)y-1));
+        const float3 CM = camera.screenToCam(x-1, y+0, input.tex2D((int)x-1, (int)y+0));
+
+        //if(CC.z <  && PC.x != MINF && CP.x != MINF && MC.x != MINF && CM.x != MINF) {
+        if (isValid(camera,CC) && isValid(camera,PC) && isValid(camera,CP) && isValid(camera,MC) && isValid(camera,CM)) {
+            const float3 n = cross(PC-MC, CP-CM);
+            const float  l = length(n);
+
+            if(l > 0.0f) {
+                output(x,y) = make_float4((n/-l), 1.0f);
+            }
+        }
+    }
+}
+
 template <int RADIUS>
 __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
         ftl::cuda::TextureObject<float4> output,
@@ -118,7 +146,7 @@ __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
 
     if(x >= depth.width() || y >= depth.height()) return;
 
-    const float3 p0 = camera.screenToCam(x,y, (float)depth.tex2D((int)x,(int)y) / 1000.0f);
+    const float3 p0 = camera.screenToCam(x,y, (float)depth.tex2D((int)x,(int)y) / 100000.0f);
     float3 nsum = make_float3(0.0f);
     float contrib = 0.0f;
 
@@ -128,7 +156,7 @@ __global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
 
     for (int v=-RADIUS; v<=RADIUS; ++v) {
         for (int u=-RADIUS; u<=RADIUS; ++u) {
-            const float3 p = camera.screenToCam(x+u,y+v, (float)depth.tex2D((int)x+u,(int)y+v) / 1000.0f);
+            const float3 p = camera.screenToCam(x+u,y+v, (float)depth.tex2D((int)x+u,(int)y+v) / 100000.0f);
             if (p.z < camera.minDepth || p.z > camera.maxDepth) continue;
             const float s = ftl::cuda::spatialWeighting(p0, p, smoothing);
             //const float s = 1.0f;
@@ -180,6 +208,51 @@ __global__ void smooth_normals_kernel<0>(ftl::cuda::TextureObject<float4> norms,
     output(x,y) = n;
 }
 
+template <int RADIUS>
+__global__ void smooth_normals_kernel(ftl::cuda::TextureObject<float4> norms,
+        ftl::cuda::TextureObject<float4> output,
+        ftl::cuda::TextureObject<float> depth,
+        ftl::rgbd::Camera camera, float3x3 pose, float smoothing) {
+    const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if(x >= depth.width() || y >= depth.height()) return;
+
+    const float3 p0 = camera.screenToCam(x,y, depth.tex2D((int)x,(int)y));
+    float3 nsum = make_float3(0.0f);
+    float contrib = 0.0f;
+
+    output(x,y) = make_float4(0.0f,0.0f,0.0f,0.0f);
+
+    if (p0.z < camera.minDepth || p0.z > camera.maxDepth) return;
+
+    for (int v=-RADIUS; v<=RADIUS; ++v) {
+        for (int u=-RADIUS; u<=RADIUS; ++u) {
+            const float3 p = camera.screenToCam(x+u,y+v, depth.tex2D((int)x+u,(int)y+v));
+            if (p.z < camera.minDepth || p.z > camera.maxDepth) continue;
+            const float s = ftl::cuda::spatialWeighting(p0, p, smoothing);
+            //const float s = 1.0f;
+
+            //if (s > 0.0f) {
+                const float4 n = norms.tex2D((int)x+u,(int)y+v);
+                if (n.w > 0.0f) {
+                    nsum += make_float3(n) * s;
+                    contrib += s;
+                }
+            //}
+        }
+    }
+
+    // Compute dot product of normal with camera to obtain measure of how
+    // well this point faces the source camera, a measure of confidence
+    float3 ray = camera.screenToCam(x, y, 1.0f);
+    ray = ray / length(ray);
+    nsum /= contrib;
+    nsum /= length(nsum);
+
+    output(x,y) = (contrib > 0.0f) ? make_float4(pose*nsum, 1.0f) : make_float4(0.0f);
+}
+
 void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
         ftl::cuda::TextureObject<float4> &temp,
 		ftl::cuda::TextureObject<float4> &input,
@@ -223,10 +296,40 @@ void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
 	computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(temp, input, camera, pose);
 	cudaSafeCall( cudaGetLastError() );
 
+	switch (radius) {
+	case 7: smooth_normals_kernel<7><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing);
+	case 5: smooth_normals_kernel<5><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing);
+    case 3: smooth_normals_kernel<3><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing);
+    case 2: smooth_normals_kernel<2><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing);
+    case 1: smooth_normals_kernel<1><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing);
+	}
+	cudaSafeCall( cudaGetLastError() );
+
+	#ifdef _DEBUG
+	cudaSafeCall(cudaDeviceSynchronize());
+	//cutilCheckMsg(__FUNCTION__);
+	#endif
+}
+
+void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
+		ftl::cuda::TextureObject<float4> &temp,
+		ftl::cuda::TextureObject<float> &input,
+		int radius,
+		float smoothing,
+		const ftl::rgbd::Camera &camera,
+		const float3x3 &pose_inv, const float3x3 &pose,cudaStream_t stream) {
+	const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(temp, input, camera);
+	cudaSafeCall( cudaGetLastError() );
+
 	switch (radius) {
 	case 7: smooth_normals_kernel<7><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing);
 	case 5: smooth_normals_kernel<5><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing);
 	case 3: smooth_normals_kernel<3><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing);
+	case 2: smooth_normals_kernel<2><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing);
+	case 1: smooth_normals_kernel<1><<<gridSize, blockSize, 0, stream>>>(temp, output, input, camera, pose, smoothing);
 	}
 	cudaSafeCall( cudaGetLastError() );
 
@@ -236,6 +339,22 @@ void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
 	#endif
 }
 
+void ftl::cuda::normals(ftl::cuda::TextureObject<float4> &output,
+        ftl::cuda::TextureObject<float> &input,
+        const ftl::rgbd::Camera &camera,
+        cudaStream_t stream) {
+    const dim3 gridSize((input.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (input.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    computeNormals_kernel<<<gridSize, blockSize, 0, stream>>>(output, input, camera);
+    cudaSafeCall( cudaGetLastError() );
+
+    #ifdef _DEBUG
+    cudaSafeCall(cudaDeviceSynchronize());
+    //cutilCheckMsg(__FUNCTION__);
+    #endif
+}
+
 //==============================================================================
 
 __global__ void vis_normals_kernel(ftl::cuda::TextureObject<float4> norm,
diff --git a/components/renderers/cpp/src/reprojection.cu b/components/renderers/cpp/src/reprojection.cu
new file mode 100644
index 0000000000000000000000000000000000000000..e74b4bb60963ab7d7d1798a96b22c90916a7ffc3
--- /dev/null
+++ b/components/renderers/cpp/src/reprojection.cu
@@ -0,0 +1,276 @@
+#include <ftl/render/splat_params.hpp>
+#include "splatter_cuda.hpp"
+#include <ftl/rgbd/camera.hpp>
+#include <ftl/cuda_common.hpp>
+
+#include <ftl/cuda/weighting.hpp>
+#include <ftl/cuda/makers.hpp>
+
+#define T_PER_BLOCK 8
+#define ACCUM_DIAMETER 8
+
+using ftl::cuda::TextureObject;
+using ftl::render::SplatParams;
+using ftl::rgbd::Camera;
+
+/*template <typename T>
+__device__ inline T generateInput(const T &in, const SplatParams &params, const float4 &worldPos) {
+	return in;
+}
+
+template <>
+__device__ inline uchar4 generateInput(const uchar4 &in, const SplatParams &params, const float4 &worldPos) {
+	return (params.m_flags & ftl::render::kShowDisconMask && worldPos.w < 0.0f) ?
+		make_uchar4(0,0,255,255) :  // Show discontinuity mask in red
+		in;
+}*/
+
+template <typename A, typename B>
+__device__ inline B weightInput(const A &in, float weight) {
+	return in * weight;
+}
+
+template <>
+__device__ inline float4 weightInput(const uchar4 &in, float weight) {
+	return make_float4(
+		(float)in.x * weight,
+		(float)in.y * weight,
+		(float)in.z * weight,
+		(float)in.w * weight);
+}
+
+template <typename T>
+__device__ inline void accumulateOutput(TextureObject<T> &out, TextureObject<float> &contrib, const uint2 &pos, const T &in, float w) {
+	atomicAdd(&out(pos.x, pos.y), in);
+	atomicAdd(&contrib(pos.x, pos.y), w);
+} 
+
+template <>
+__device__ inline void accumulateOutput(TextureObject<float4> &out, TextureObject<float> &contrib, const uint2 &pos, const float4 &in, float w) {
+	atomicAdd((float*)&out(pos.x, pos.y), in.x);
+	atomicAdd(((float*)&out(pos.x, pos.y))+1, in.y);
+	atomicAdd(((float*)&out(pos.x, pos.y))+2, in.z);
+	atomicAdd(((float*)&out(pos.x, pos.y))+3, in.w);
+	atomicAdd(&contrib(pos.x, pos.y), w);
+} 
+
+/*
+ * Pass 2: Accumulate attribute contributions if the points pass a visibility test.
+ */
+ template <typename A, typename B>
+__global__ void reprojection_kernel(
+        TextureObject<A> in,				// Attribute input
+        TextureObject<float> depth_src,
+		TextureObject<float> depth_in,        // Virtual depth map
+		TextureObject<float4> normals,
+		TextureObject<B> out,			// Accumulated output
+		TextureObject<float> contrib,
+		SplatParams params,
+		Camera camera, float4x4 poseInv) {
+        
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x);
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	const float d = depth_in.tex2D((int)x, (int)y);
+	if (d < params.camera.minDepth || d > params.camera.maxDepth) return;
+
+	const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d);
+	//if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return;
+
+	const float3 camPos = poseInv * worldPos;
+	if (camPos.z < camera.minDepth) return;
+	if (camPos.z > camera.maxDepth) return;
+	const uint2 screenPos = camera.camToScreen<uint2>(camPos);
+
+	// Not on screen so stop now...
+	if (screenPos.x >= depth_src.width() || screenPos.y >= depth_src.height()) return;
+            
+	// Calculate the dot product of surface normal and camera ray
+	const float3 n = poseInv.getFloat3x3() * make_float3(normals.tex2D((int)x, (int)y));
+	float3 ray = camera.screenToCam(screenPos.x, screenPos.y, 1.0f);
+	ray = ray / length(ray);
+	const float dotproduct = max(dot(ray,n),0.0f);
+    
+	const float d2 = depth_src.tex2D((int)screenPos.x, (int)screenPos.y);
+	const A input = in.tex2D((int)screenPos.x, (int)screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos);
+
+	// TODO: Z checks need to interpolate between neighbors if large triangles are used
+	float weight = ftl::cuda::weighting(fabs(camPos.z - d2), 0.02f);
+
+	/* Buehler C. et al. 2001. Unstructured Lumigraph Rendering. */
+	/* Orts-Escolano S. et al. 2016. Holoportation: Virtual 3D teleportation in real-time. */
+	// This is the simple naive colour weighting. It might be good
+	// enough for our purposes if the alignment step prevents ghosting
+	// TODO: Use depth and perhaps the neighbourhood consistency in:
+	//     Kuster C. et al. 2011. FreeCam: A hybrid camera system for interactive free-viewpoint video
+	if (params.m_flags & ftl::render::kNormalWeightColours) weight *= dotproduct;
+
+	const B weighted = make<B>(input) * weight; //weightInput(input, weight);
+
+	if (weight > 0.0f) {
+		accumulateOutput(out, contrib, make_uint2(x,y), weighted, weight);
+		//out(screenPos.x, screenPos.y) = input;
+	}
+}
+
+
+template <typename A, typename B>
+void ftl::cuda::reproject(
+        TextureObject<A> &in,
+        TextureObject<float> &depth_src,       // Original 3D points
+		TextureObject<float> &depth_in,        // Virtual depth map
+		TextureObject<float4> &normals,
+		TextureObject<B> &out,   // Accumulated output
+		TextureObject<float> &contrib,
+		const SplatParams &params,
+		const Camera &camera, const float4x4 &poseInv, cudaStream_t stream) {
+	const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    reprojection_kernel<<<gridSize, blockSize, 0, stream>>>(
+        in,
+        depth_src,
+		depth_in,
+		normals,
+		out,
+		contrib,
+		params,
+		camera,
+		poseInv
+    );
+    cudaSafeCall( cudaGetLastError() );
+}
+
+template void ftl::cuda::reproject(
+	ftl::cuda::TextureObject<uchar4> &in,	// Original colour image
+	ftl::cuda::TextureObject<float> &depth_src,		// Original 3D points
+	ftl::cuda::TextureObject<float> &depth_in,		// Virtual depth map
+	ftl::cuda::TextureObject<float4> &normals,
+	ftl::cuda::TextureObject<float4> &out,	// Accumulated output
+	ftl::cuda::TextureObject<float> &contrib,
+	const ftl::render::SplatParams &params,
+	const ftl::rgbd::Camera &camera,
+	const float4x4 &poseInv, cudaStream_t stream);
+
+template void ftl::cuda::reproject(
+		ftl::cuda::TextureObject<float> &in,	// Original colour image
+		ftl::cuda::TextureObject<float> &depth_src,		// Original 3D points
+		ftl::cuda::TextureObject<float> &depth_in,		// Virtual depth map
+		ftl::cuda::TextureObject<float4> &normals,
+		ftl::cuda::TextureObject<float> &out,	// Accumulated output
+		ftl::cuda::TextureObject<float> &contrib,
+		const ftl::render::SplatParams &params,
+		const ftl::rgbd::Camera &camera,
+		const float4x4 &poseInv, cudaStream_t stream);
+
+template void ftl::cuda::reproject(
+		ftl::cuda::TextureObject<float4> &in,	// Original colour image
+		ftl::cuda::TextureObject<float> &depth_src,		// Original 3D points
+		ftl::cuda::TextureObject<float> &depth_in,		// Virtual depth map
+		ftl::cuda::TextureObject<float4> &normals,
+		ftl::cuda::TextureObject<float4> &out,	// Accumulated output
+		ftl::cuda::TextureObject<float> &contrib,
+		const ftl::render::SplatParams &params,
+		const ftl::rgbd::Camera &camera,
+		const float4x4 &poseInv, cudaStream_t stream);
+
+//==============================================================================
+//  Without normals
+//==============================================================================
+
+/*
+ * Pass 2: Accumulate attribute contributions if the points pass a visibility test.
+ */
+ template <typename A, typename B>
+__global__ void reprojection_kernel(
+        TextureObject<A> in,				// Attribute input
+        TextureObject<float> depth_src,
+		TextureObject<float> depth_in,        // Virtual depth map
+		TextureObject<B> out,			// Accumulated output
+		TextureObject<float> contrib,
+		SplatParams params,
+		Camera camera, float4x4 poseInv) {
+        
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x);
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	const float d = depth_in.tex2D((int)x, (int)y);
+	if (d < params.camera.minDepth || d > params.camera.maxDepth) return;
+
+	const float3 worldPos = params.m_viewMatrixInverse * params.camera.screenToCam(x, y, d);
+	//if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return;
+
+	const float3 camPos = poseInv * worldPos;
+	if (camPos.z < camera.minDepth) return;
+	if (camPos.z > camera.maxDepth) return;
+	const uint2 screenPos = camera.camToScreen<uint2>(camPos);
+
+	// Not on screen so stop now...
+	if (screenPos.x >= depth_src.width() || screenPos.y >= depth_src.height()) return;
+    
+	const float d2 = depth_src.tex2D((int)screenPos.x, (int)screenPos.y);
+	const A input = in.tex2D((int)screenPos.x, (int)screenPos.y); //generateInput(in.tex2D((int)screenPos.x, (int)screenPos.y), params, worldPos);
+	float weight = ftl::cuda::weighting(fabs(camPos.z - d2), 0.02f);
+	const B weighted = make<B>(input) * weight;
+
+	if (weight > 0.0f) {
+		accumulateOutput(out, contrib, make_uint2(x,y), weighted, weight);
+		//out(screenPos.x, screenPos.y) = input;
+	}
+}
+
+
+template <typename A, typename B>
+void ftl::cuda::reproject(
+        TextureObject<A> &in,
+        TextureObject<float> &depth_src,       // Original 3D points
+		TextureObject<float> &depth_in,        // Virtual depth map
+		TextureObject<B> &out,   // Accumulated output
+		TextureObject<float> &contrib,
+		const SplatParams &params,
+		const Camera &camera, const float4x4 &poseInv, cudaStream_t stream) {
+	const dim3 gridSize((out.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (out.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+    reprojection_kernel<<<gridSize, blockSize, 0, stream>>>(
+        in,
+        depth_src,
+		depth_in,
+		out,
+		contrib,
+		params,
+		camera,
+		poseInv
+    );
+    cudaSafeCall( cudaGetLastError() );
+}
+
+template void ftl::cuda::reproject(
+	ftl::cuda::TextureObject<uchar4> &in,	// Original colour image
+	ftl::cuda::TextureObject<float> &depth_src,		// Original 3D points
+	ftl::cuda::TextureObject<float> &depth_in,		// Virtual depth map
+	ftl::cuda::TextureObject<float4> &out,	// Accumulated output
+	ftl::cuda::TextureObject<float> &contrib,
+	const ftl::render::SplatParams &params,
+	const ftl::rgbd::Camera &camera,
+	const float4x4 &poseInv, cudaStream_t stream);
+
+template void ftl::cuda::reproject(
+		ftl::cuda::TextureObject<float> &in,	// Original colour image
+		ftl::cuda::TextureObject<float> &depth_src,		// Original 3D points
+		ftl::cuda::TextureObject<float> &depth_in,		// Virtual depth map
+		ftl::cuda::TextureObject<float> &out,	// Accumulated output
+		ftl::cuda::TextureObject<float> &contrib,
+		const ftl::render::SplatParams &params,
+		const ftl::rgbd::Camera &camera,
+		const float4x4 &poseInv, cudaStream_t stream);
+
+template void ftl::cuda::reproject(
+		ftl::cuda::TextureObject<float4> &in,	// Original colour image
+		ftl::cuda::TextureObject<float> &depth_src,		// Original 3D points
+		ftl::cuda::TextureObject<float> &depth_in,		// Virtual depth map
+		ftl::cuda::TextureObject<float4> &out,	// Accumulated output
+		ftl::cuda::TextureObject<float> &contrib,
+		const ftl::render::SplatParams &params,
+		const ftl::rgbd::Camera &camera,
+		const float4x4 &poseInv, cudaStream_t stream);
diff --git a/components/renderers/cpp/src/screen.cu b/components/renderers/cpp/src/screen.cu
new file mode 100644
index 0000000000000000000000000000000000000000..8610359d2bbec8eaa6f6972116a5f58cf6433b7a
--- /dev/null
+++ b/components/renderers/cpp/src/screen.cu
@@ -0,0 +1,44 @@
+#include <ftl/render/splat_params.hpp>
+#include "splatter_cuda.hpp"
+#include <ftl/rgbd/camera.hpp>
+#include <ftl/cuda_common.hpp>
+
+using ftl::rgbd::Camera;
+using ftl::cuda::TextureObject;
+using ftl::render::SplatParams;
+
+#define T_PER_BLOCK 8
+
+/*
+ * Convert source screen position to output screen coordinates.
+ */
+ __global__ void screen_coord_kernel(TextureObject<float> depth,
+        TextureObject<float> depth_out,
+		TextureObject<short2> screen_out, SplatParams params, float4x4 pose, Camera camera) {
+	const int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    uint2 screenPos = make_uint2(30000,30000);
+    screen_out(x,y) = make_short2(screenPos.x, screenPos.y);
+
+    const float d = depth.tex2D(x, y);
+	const float3 worldPos = pose * camera.screenToCam(x,y,d);
+	if (d < camera.minDepth || d > camera.maxDepth) return;
+
+    // Find the virtual screen position of current point
+	const float3 camPos = params.m_viewMatrix * worldPos;
+    screenPos = params.camera.camToScreen<uint2>(camPos);
+
+    if (camPos.z < params.camera.minDepth || camPos.z > params.camera.maxDepth || screenPos.x >= params.camera.width || screenPos.y >= params.camera.height)
+        screenPos = make_uint2(30000,30000);
+    screen_out(x,y) = make_short2(screenPos.x, screenPos.y);
+    depth_out(x,y) = camPos.z;
+}
+
+void ftl::cuda::screen_coord(TextureObject<float> &depth, TextureObject<float> &depth_out, TextureObject<short2> &screen_out, const SplatParams &params, const float4x4 &pose, const Camera &camera, cudaStream_t stream) {
+    const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	screen_coord_kernel<<<gridSize, blockSize, 0, stream>>>(depth, depth_out, screen_out, params, pose, camera);
+    cudaSafeCall( cudaGetLastError() );
+}
diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splatter.cpp
similarity index 100%
rename from components/renderers/cpp/src/splat_render.cpp
rename to components/renderers/cpp/src/splatter.cpp
diff --git a/components/renderers/cpp/src/splatter.cu b/components/renderers/cpp/src/splatter.cu
index 3a9270e542ee58e836410e0de598cbd4ab9e269c..6346daa883019453bef1d2f80be36ba5afa0aaf2 100644
--- a/components/renderers/cpp/src/splatter.cu
+++ b/components/renderers/cpp/src/splatter.cu
@@ -6,6 +6,7 @@
 #include <ftl/cuda/weighting.hpp>
 #include <ftl/cuda/intersections.hpp>
 #include <ftl/cuda/warp.hpp>
+#include <ftl/cuda/makers.hpp>
 
 #define T_PER_BLOCK 8
 #define UPSAMPLE_FACTOR 1.8f
@@ -74,76 +75,53 @@ using ftl::cuda::warpSum;
 	}
 }
 
-void ftl::cuda::dibr_merge(TextureObject<float4> &points, TextureObject<float4> &normals, TextureObject<int> &depth, SplatParams params, bool culling, cudaStream_t stream) {
-    const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
-
-	if (culling) dibr_merge_kernel<true><<<gridSize, blockSize, 0, stream>>>(points, normals, depth, params);
-	else dibr_merge_kernel<false><<<gridSize, blockSize, 0, stream>>>(points, normals, depth, params);
-    cudaSafeCall( cudaGetLastError() );
-}
-
-//==============================================================================
-
-__device__ inline float4 make_float4(const uchar4 &c) {
-    return make_float4(c.x,c.y,c.z,c.w);
-}
-
-__device__ inline float4 make_float4(const float4 &v) {
-	return v;
-}
-
-template <typename T>
-__device__ inline T make();
-
-template <>
-__device__ inline uchar4 make() {
-	return make_uchar4(0,0,0,0);
-}
+/*
+ * Pass 1: Directly render each camera into virtual view but with no upsampling
+ * for sparse points.
+ */
+ __global__ void dibr_merge_kernel(TextureObject<float4> points,
+		TextureObject<int> depth, SplatParams params) {
+	const int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
 
-template <>
-__device__ inline float4 make() {
-	return make_float4(0.0f,0.0f,0.0f,0.0f);
-}
+	const float4 worldPos = points.tex2D(x, y);
+	if (worldPos.x == MINF || (!(params.m_flags & ftl::render::kShowDisconMask) && worldPos.w < 0.0f)) return;
 
-template <>
-__device__ inline float make() {
-	return 0.0f;
-}
+    // Find the virtual screen position of current point
+	const float3 camPos = params.m_viewMatrix * make_float3(worldPos);
+	if (camPos.z < params.camera.minDepth) return;
+	if (camPos.z > params.camera.maxDepth) return;
 
-template <typename T>
-__device__ inline T make(const float4 &);
+	const float d = camPos.z;
 
-template <>
-__device__ inline uchar4 make(const float4 &v) {
-	return make_uchar4((int)v.x, (int)v.y, (int)v.z, (int)v.w);
+	const uint2 screenPos = params.camera.camToScreen<uint2>(camPos);
+	const unsigned int cx = screenPos.x;
+	const unsigned int cy = screenPos.y;
+	if (d > params.camera.minDepth && d < params.camera.maxDepth && cx < depth.width() && cy < depth.height()) {
+		// Transform estimated point to virtual cam space and output z
+		atomicMin(&depth(cx,cy), d * 1000.0f);
+	}
 }
 
-template <>
-__device__ inline float4 make(const float4 &v) {
-	return v;
-}
+void ftl::cuda::dibr_merge(TextureObject<float4> &points, TextureObject<float4> &normals, TextureObject<int> &depth, SplatParams params, bool culling, cudaStream_t stream) {
+    const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
-template <>
-__device__ inline float make(const float4 &v) {
-	return v.x;
+	if (culling) dibr_merge_kernel<true><<<gridSize, blockSize, 0, stream>>>(points, normals, depth, params);
+	else dibr_merge_kernel<false><<<gridSize, blockSize, 0, stream>>>(points, normals, depth, params);
+    cudaSafeCall( cudaGetLastError() );
 }
 
-template <typename T>
-__device__ inline T make(const uchar4 &v);
+void ftl::cuda::dibr_merge(TextureObject<float4> &points, TextureObject<int> &depth, SplatParams params, cudaStream_t stream) {
+    const dim3 gridSize((depth.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
 
-template <>
-__device__ inline float4 make(const uchar4 &v) {
-	return make_float4((float)v.x, (float)v.y, (float)v.z, (float)v.w);
+	dibr_merge_kernel<<<gridSize, blockSize, 0, stream>>>(points, depth, params);
+    cudaSafeCall( cudaGetLastError() );
 }
 
-template <typename T>
-__device__ inline T make(float v);
+//==============================================================================
 
-template <>
-__device__ inline float make(float v) {
-	return v;
-}
 
 /*
  * Pass 1b: Expand splats to full size and merge
diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp
index 42ec8a0c7dbca51b1c3fe7e22ba2c38eb2447169..838eda409761c7420a35944a45900ace0912f124 100644
--- a/components/renderers/cpp/src/splatter_cuda.hpp
+++ b/components/renderers/cpp/src/splatter_cuda.hpp
@@ -6,6 +6,29 @@
 
 namespace ftl {
 namespace cuda {
+	void screen_coord(
+		ftl::cuda::TextureObject<float> &depth,
+		ftl::cuda::TextureObject<float> &depth_out,
+		ftl::cuda::TextureObject<short2> &screen_out,
+		const ftl::render::SplatParams &params,
+		const float4x4 &pose,
+		const ftl::rgbd::Camera &camera,
+		cudaStream_t stream);
+
+	void triangle_render1(
+		ftl::cuda::TextureObject<float> &depth_in,
+		ftl::cuda::TextureObject<int> &depth_out,
+		ftl::cuda::TextureObject<short2> &screen,
+		const ftl::render::SplatParams &params,
+		cudaStream_t stream);
+
+	void mesh_blender(
+		ftl::cuda::TextureObject<int> &depth_in,
+		ftl::cuda::TextureObject<int> &depth_out,
+		const ftl::rgbd::Camera &camera,
+		float alpha,
+		cudaStream_t stream);
+	
 	void dibr_merge(
 		ftl::cuda::TextureObject<float4> &points,
 		ftl::cuda::TextureObject<float4> &normals,
@@ -14,6 +37,12 @@ namespace cuda {
 		bool culling,
 		cudaStream_t stream);
 
+	void dibr_merge(
+		ftl::cuda::TextureObject<float4> &points,
+		ftl::cuda::TextureObject<int> &depth,
+		ftl::render::SplatParams params,
+		cudaStream_t stream);
+
 	template <typename T>
 	void splat(
         ftl::cuda::TextureObject<float4> &normals,
@@ -33,6 +62,29 @@ namespace cuda {
 		ftl::cuda::TextureObject<float> &contrib,
 		ftl::render::SplatParams &params, cudaStream_t stream);
 
+	template <typename A, typename B>
+	void reproject(
+		ftl::cuda::TextureObject<A> &in,	// Original colour image
+		ftl::cuda::TextureObject<float> &depth_src,		// Original 3D points
+		ftl::cuda::TextureObject<float> &depth_in,		// Virtual depth map
+		ftl::cuda::TextureObject<float4> &normals,
+		ftl::cuda::TextureObject<B> &out,	// Accumulated output
+		ftl::cuda::TextureObject<float> &contrib,
+		const ftl::render::SplatParams &params,
+		const ftl::rgbd::Camera &camera,
+		const float4x4 &poseInv, cudaStream_t stream);
+
+	template <typename A, typename B>
+	void reproject(
+		ftl::cuda::TextureObject<A> &in,	// Original colour image
+		ftl::cuda::TextureObject<float> &depth_src,		// Original 3D points
+		ftl::cuda::TextureObject<float> &depth_in,		// Virtual depth map
+		ftl::cuda::TextureObject<B> &out,	// Accumulated output
+		ftl::cuda::TextureObject<float> &contrib,
+		const ftl::render::SplatParams &params,
+		const ftl::rgbd::Camera &camera,
+		const float4x4 &poseInv, cudaStream_t stream);
+
 	template <typename A, typename B>
 	void dibr_normalise(
 		ftl::cuda::TextureObject<A> &in,
diff --git a/components/renderers/cpp/src/tri_render.cpp b/components/renderers/cpp/src/tri_render.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6d60a5f6685ac7d292d4a67b0d23df8b231f3b21
--- /dev/null
+++ b/components/renderers/cpp/src/tri_render.cpp
@@ -0,0 +1,595 @@
+#include <ftl/render/tri_render.hpp>
+#include <ftl/utility/matrix_conversion.hpp>
+#include "splatter_cuda.hpp"
+#include <ftl/cuda/points.hpp>
+#include <ftl/cuda/normals.hpp>
+#include <ftl/cuda/mask.hpp>
+
+#include <opencv2/core/cuda_stream_accessor.hpp>
+
+//#include <ftl/filters/smoothing.hpp>
+
+#include <string>
+
+using ftl::render::Triangular;
+using ftl::codecs::Channel;
+using ftl::codecs::Channels;
+using ftl::rgbd::Format;
+using cv::cuda::GpuMat;
+using std::stoul;
+using ftl::cuda::Mask;
+
+static Eigen::Affine3d create_rotation_matrix(float ax, float ay, float az) {
+  Eigen::Affine3d rx =
+      Eigen::Affine3d(Eigen::AngleAxisd(ax, Eigen::Vector3d(1, 0, 0)));
+  Eigen::Affine3d ry =
+      Eigen::Affine3d(Eigen::AngleAxisd(ay, Eigen::Vector3d(0, 1, 0)));
+  Eigen::Affine3d rz =
+      Eigen::Affine3d(Eigen::AngleAxisd(az, Eigen::Vector3d(0, 0, 1)));
+  return rz * rx * ry;
+}
+
+/*
+ * Parse a CSS style colour string into a scalar.
+ */
+static cv::Scalar parseCVColour(const std::string &colour) {
+	std::string c = colour;
+	if (c[0] == '#') {
+		c.erase(0, 1);
+		unsigned long value = stoul(c.c_str(), nullptr, 16);
+		return cv::Scalar(
+			(value >> 0) & 0xff,
+			(value >> 8) & 0xff,
+			(value >> 16) & 0xff,
+			(value >> 24) & 0xff
+		);
+	}
+
+	return cv::Scalar(0,0,0,0);
+}
+
+/*
+ * Parse a CSS style colour string into a scalar.
+ */
+static uchar4 parseCUDAColour(const std::string &colour) {
+	std::string c = colour;
+	if (c[0] == '#') {
+		c.erase(0, 1);
+		unsigned long value = stoul(c.c_str(), nullptr, 16);
+		return make_uchar4(
+			(value >> 0) & 0xff,
+			(value >> 8) & 0xff,
+			(value >> 16) & 0xff,
+			(value >> 24) & 0xff
+		);
+	}
+
+	return make_uchar4(0,0,0,0);
+}
+
+Triangular::Triangular(nlohmann::json &config, ftl::rgbd::FrameSet *fs) : ftl::render::Renderer(config), scene_(fs) {
+	if (config["clipping"].is_object()) {
+		auto &c = config["clipping"];
+		float rx = c.value("pitch", 0.0f);
+		float ry = c.value("yaw", 0.0f);
+		float rz = c.value("roll", 0.0f);
+		float x = c.value("x", 0.0f);
+		float y = c.value("y", 0.0f);
+		float z = c.value("z", 0.0f);
+		float width = c.value("width", 1.0f);
+		float height = c.value("height", 1.0f);
+		float depth = c.value("depth", 1.0f);
+
+		Eigen::Affine3f r = create_rotation_matrix(rx, ry, rz).cast<float>();
+		Eigen::Translation3f trans(Eigen::Vector3f(x,y,z));
+		Eigen::Affine3f t(trans);
+
+		clip_.origin = MatrixConversion::toCUDA(r.matrix() * t.matrix());
+		clip_.size = make_float3(width, height, depth);
+		clipping_ = value("clipping_enabled", true);
+	} else {
+		clipping_ = false;
+	}
+
+	on("clipping_enabled", [this](const ftl::config::Event &e) {
+		clipping_ = value("clipping_enabled", true);
+	});
+
+	norm_filter_ = value("normal_filter", -1.0f);
+	on("normal_filter", [this](const ftl::config::Event &e) {
+		norm_filter_ = value("normal_filter", -1.0f);
+	});
+
+	backcull_ = value("back_cull", true);
+	on("back_cull", [this](const ftl::config::Event &e) {
+		backcull_ = value("back_cull", true);
+	});
+
+	mesh_ = value("meshing", true);
+	on("meshing", [this](const ftl::config::Event &e) {
+		mesh_ = value("meshing", true);
+	});
+
+	background_ = parseCVColour(value("background", std::string("#4c4c4c")));
+	on("background", [this](const ftl::config::Event &e) {
+		background_ = parseCVColour(value("background", std::string("#4c4c4c")));
+	});
+
+	light_diffuse_ = parseCUDAColour(value("diffuse", std::string("#e0e0e0")));
+	on("diffuse", [this](const ftl::config::Event &e) {
+		light_diffuse_ = parseCUDAColour(value("diffuse", std::string("#e0e0e0")));
+	});
+
+	light_ambient_ = parseCUDAColour(value("ambient", std::string("#0e0e0e")));
+	on("ambient", [this](const ftl::config::Event &e) {
+		light_ambient_ = parseCUDAColour(value("ambient", std::string("#0e0e0e")));
+	});
+
+	light_pos_ = make_float3(value("light_x", 0.3f), value("light_y", 0.2f), value("light_z", 1.0f));
+	on("light_x", [this](const ftl::config::Event &e) { light_pos_.x = value("light_x", 0.3f); });
+	on("light_y", [this](const ftl::config::Event &e) { light_pos_.y = value("light_y", 0.3f); });
+	on("light_z", [this](const ftl::config::Event &e) { light_pos_.z = value("light_z", 0.3f); });
+
+	cudaSafeCall(cudaStreamCreate(&stream_));
+
+	//filters_ = ftl::create<ftl::Filters>(this, "filters");
+	//filters_->create<ftl::filters::DepthSmoother>("hfnoise");
+}
+
+Triangular::~Triangular() {
+
+}
+
+template <typename T>
+struct AccumSelector {
+	typedef float4 type;
+	static constexpr Channel channel = Channel::Colour;
+	//static constexpr cv::Scalar value = cv::Scalar(0.0f,0.0f,0.0f,0.0f);
+};
+
+template <>
+struct AccumSelector<float> {
+	typedef float type;
+	static constexpr Channel channel = Channel::Colour2;
+	//static constexpr cv::Scalar value = cv::Scalar(0.0f);
+};
+
+/*template <typename T>
+void Triangular::__blendChannel(ftl::rgbd::Frame &output, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t stream) {
+	cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+	temp_.create<GpuMat>(
+		AccumSelector<T>::channel,
+		Format<typename AccumSelector<T>::type>(params_.camera.width, params_.camera.height)
+	).setTo(cv::Scalar(0.0f), cvstream);
+	temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0.0f), cvstream);
+
+	temp_.createTexture<float>(Channel::Contribution);
+
+	for (auto &f : scene_->frames) {
+		if (f.get<GpuMat>(in).type() == CV_8UC3) {
+			// Convert to 4 channel colour
+			auto &col = f.get<GpuMat>(in);
+			GpuMat tmp(col.size(), CV_8UC4);
+			cv::cuda::swap(col, tmp);
+			cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA);
+		}
+
+		ftl::cuda::dibr_attribute(
+			f.createTexture<T>(in),
+			f.createTexture<float4>(Channel::Points),
+			temp_.getTexture<int>(Channel::Depth2),
+			temp_.createTexture<typename AccumSelector<T>::type>(AccumSelector<T>::channel),
+			temp_.getTexture<float>(Channel::Contribution),
+			params_, stream
+		);
+	}
+
+	ftl::cuda::dibr_normalise(
+		temp_.getTexture<typename AccumSelector<T>::type>(AccumSelector<T>::channel),
+		output.createTexture<T>(out),
+		temp_.getTexture<float>(Channel::Contribution),
+		stream
+	);
+}*/
+
+template <typename T>
+void Triangular::__reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t stream) {
+	cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+	temp_.create<GpuMat>(
+		AccumSelector<T>::channel,
+		Format<typename AccumSelector<T>::type>(params_.camera.width, params_.camera.height)
+	).setTo(cv::Scalar(0.0f), cvstream);
+	temp_.get<GpuMat>(Channel::Contribution).setTo(cv::Scalar(0.0f), cvstream);
+
+	temp_.createTexture<float>(Channel::Contribution);
+
+	for (size_t i=0; i < scene_->frames.size(); ++i) {
+		auto &f = scene_->frames[i];
+		auto *s = scene_->sources[i];
+		
+		if (f.get<GpuMat>(in).type() == CV_8UC3) {
+			// Convert to 4 channel colour
+			auto &col = f.get<GpuMat>(in);
+			GpuMat tmp(col.size(), CV_8UC4);
+			cv::cuda::swap(col, tmp);
+			cv::cuda::cvtColor(tmp,col, cv::COLOR_BGR2BGRA);
+		}
+
+		auto poseInv = MatrixConversion::toCUDA(s->getPose().cast<float>().inverse());
+
+		if (mesh_) {
+			ftl::cuda::reproject(
+				f.createTexture<T>(in),
+				f.createTexture<float>(Channel::Depth),
+				output.getTexture<float>(Channel::Depth),
+				output.getTexture<float4>(Channel::Normals),
+				temp_.createTexture<typename AccumSelector<T>::type>(AccumSelector<T>::channel),
+				temp_.getTexture<float>(Channel::Contribution),
+				params_,
+				s->parameters(),
+				poseInv, stream
+			);
+		} else {
+			// Can't use normals with point cloud version
+			ftl::cuda::reproject(
+				f.createTexture<T>(in),
+				f.createTexture<float>(Channel::Depth),
+				output.getTexture<float>(Channel::Depth),
+				temp_.createTexture<typename AccumSelector<T>::type>(AccumSelector<T>::channel),
+				temp_.getTexture<float>(Channel::Contribution),
+				params_,
+				s->parameters(),
+				poseInv, stream
+			);
+		}
+	}
+
+	ftl::cuda::dibr_normalise(
+		temp_.getTexture<typename AccumSelector<T>::type>(AccumSelector<T>::channel),
+		output.createTexture<T>(out),
+		temp_.getTexture<float>(Channel::Contribution),
+		stream
+	);
+}
+
+/*void Triangular::_blendChannel(ftl::rgbd::Frame &output, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t stream) {
+	int type = output.get<GpuMat>(out).type(); // == CV_32F; //ftl::rgbd::isFloatChannel(channel);
+	
+	switch (type) {
+	case CV_32F		: __blendChannel<float>(output, in, out, stream); break;
+	case CV_32FC4	: __blendChannel<float4>(output, in, out, stream); break;
+	case CV_8UC4	: __blendChannel<uchar4>(output, in, out, stream); break;
+	default			: LOG(ERROR) << "Invalid output channel format";
+	}
+}*/
+
+void Triangular::_reprojectChannel(ftl::rgbd::Frame &output, ftl::codecs::Channel in, ftl::codecs::Channel out, cudaStream_t stream) {
+	int type = output.get<GpuMat>(out).type(); // == CV_32F; //ftl::rgbd::isFloatChannel(channel);
+	
+	switch (type) {
+	case CV_32F		: __reprojectChannel<float>(output, in, out, stream); break;
+	case CV_32FC4	: __reprojectChannel<float4>(output, in, out, stream); break;
+	case CV_8UC4	: __reprojectChannel<uchar4>(output, in, out, stream); break;
+	default			: LOG(ERROR) << "Invalid output channel format";
+	}
+}
+
+void Triangular::_dibr(ftl::rgbd::Frame &out, cudaStream_t stream) {
+	cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+	temp_.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0x7FFFFFFF), cvstream);
+
+	for (size_t i=0; i < scene_->frames.size(); ++i) {
+		auto &f = scene_->frames[i];
+		auto *s = scene_->sources[i];
+
+		if (f.empty(Channel::Depth + Channel::Colour)) {
+			LOG(ERROR) << "Missing required channel";
+			continue;
+		}
+
+		ftl::cuda::dibr_merge(
+			f.createTexture<float4>(Channel::Points),
+			temp_.createTexture<int>(Channel::Depth2),
+			params_, stream
+		);
+	}
+
+	// Convert from int depth to float depth
+	temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 100000.0f, cvstream);
+}
+
+void Triangular::_mesh(ftl::rgbd::Frame &out, ftl::rgbd::Source *src, cudaStream_t stream) {
+	cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+
+	bool do_blend = value("mesh_blend", true);
+	float blend_alpha = value("blend_alpha", 0.02f);
+	if (do_blend) {
+		temp_.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(0x7FFFFFFF), cvstream);
+		temp_.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0x7FFFFFFF), cvstream);
+	} else {
+		temp_.get<GpuMat>(Channel::Depth2).setTo(cv::Scalar(0x7FFFFFFF), cvstream);
+	}
+
+	// For each source depth map
+	for (size_t i=0; i < scene_->frames.size(); ++i) {
+		auto &f = scene_->frames[i];
+		auto *s = scene_->sources[i];
+
+		if (f.empty(Channel::Depth + Channel::Colour)) {
+			LOG(ERROR) << "Missing required channel";
+			continue;
+		}
+
+		auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>());
+
+		// Calculate and save virtual view screen position of each source pixel
+		ftl::cuda::screen_coord(
+			f.createTexture<float>(Channel::Depth),
+			f.createTexture<float>(Channel::Depth2, Format<float>(f.get<GpuMat>(Channel::Depth).size())),
+			f.createTexture<short2>(Channel::Screen, Format<short2>(f.get<GpuMat>(Channel::Depth).size())),
+			params_, pose, s->parameters(), stream
+		);
+
+		// Must reset depth channel if blending
+		if (do_blend) {
+			temp_.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(0x7FFFFFFF), cvstream);
+		}
+
+		// Decide on and render triangles around each point
+		ftl::cuda::triangle_render1(
+			f.getTexture<float>(Channel::Depth2),
+			temp_.createTexture<int>((do_blend) ? Channel::Depth : Channel::Depth2),
+			f.getTexture<short2>(Channel::Screen),
+			params_, stream
+		);
+
+		if (do_blend) {
+			// Blend this sources mesh with previous meshes
+			ftl::cuda::mesh_blender(
+				temp_.getTexture<int>(Channel::Depth),
+				temp_.createTexture<int>(Channel::Depth2),
+				params_.camera,
+				blend_alpha,
+				stream
+			);
+		}
+	}
+
+	// Convert from int depth to float depth
+	temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 100000.0f, cvstream);
+
+	//filters_->filter(out, src, stream);
+
+	// Generate normals for final virtual image
+	ftl::cuda::normals(out.createTexture<float4>(Channel::Normals, Format<float4>(params_.camera.width, params_.camera.height)),
+				temp_.createTexture<float4>(Channel::Normals),
+				out.createTexture<float>(Channel::Depth),
+				value("normal_radius", 1), value("normal_smoothing", 0.02f),
+				params_.camera, params_.m_viewMatrix.getFloat3x3(), params_.m_viewMatrixInverse.getFloat3x3(), stream_);
+}
+
+void Triangular::_renderChannel(
+		ftl::rgbd::Frame &out,
+		Channel channel_in, Channel channel_out, cudaStream_t stream)
+{
+	if (channel_out == Channel::None || channel_in == Channel::None) return;
+	cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+
+	if (scene_->frames.size() < 1) return;
+	bool is_float = out.get<GpuMat>(channel_out).type() == CV_32F; //ftl::rgbd::isFloatChannel(channel);
+	bool is_4chan = out.get<GpuMat>(channel_out).type() == CV_32FC4;
+
+
+	temp_.createTexture<float4>(Channel::Colour);
+	temp_.createTexture<float>(Channel::Contribution);
+
+	// FIXME: Using colour 2 in this way seems broken since it is already used
+	if (is_4chan) {
+		accum_.create<GpuMat>(channel_out, Format<float4>(params_.camera.width, params_.camera.height));
+		accum_.get<GpuMat>(channel_out).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
+	} else if (is_float) {
+		accum_.create<GpuMat>(channel_out, Format<float>(params_.camera.width, params_.camera.height));
+		accum_.get<GpuMat>(channel_out).setTo(cv::Scalar(0.0f), cvstream);
+	} else {
+		accum_.create<GpuMat>(channel_out, Format<uchar4>(params_.camera.width, params_.camera.height));
+		accum_.get<GpuMat>(channel_out).setTo(cv::Scalar(0,0,0,0), cvstream);
+	}
+
+	_reprojectChannel(out, channel_in, channel_out, stream);
+}
+
+bool Triangular::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out) {
+	SHARED_LOCK(scene_->mtx, lk);
+	if (!src->isReady()) return false;
+
+	//scene_->upload(Channel::Colour + Channel::Depth, stream_);
+
+	const auto &camera = src->parameters();
+	//cudaSafeCall(cudaSetDevice(scene_->getCUDADevice()));
+
+	// Create all the required channels
+	
+	out.create<GpuMat>(Channel::Depth, Format<float>(camera.width, camera.height));
+	out.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height));
+
+
+	if (scene_->frames.size() == 0) return false;
+	auto &g = scene_->frames[0].get<GpuMat>(Channel::Colour);
+
+	temp_.create<GpuMat>(Channel::Colour, Format<float4>(camera.width, camera.height));
+	temp_.create<GpuMat>(Channel::Contribution, Format<float>(camera.width, camera.height));
+	temp_.create<GpuMat>(Channel::Depth, Format<int>(camera.width, camera.height));
+	temp_.create<GpuMat>(Channel::Depth2, Format<int>(camera.width, camera.height));
+	temp_.create<GpuMat>(Channel::Normals, Format<float4>(camera.width, camera.height)); //g.cols, g.rows));
+
+	cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream_);
+
+	// Parameters object to pass to CUDA describing the camera
+	SplatParams &params = params_;
+	params.triangle_limit = value("triangle_limit", 200);
+	params.depthThreshold = value("depth_threshold", 0.04f);
+	params.m_flags = 0;
+	//if () params.m_flags |= ftl::render::kShowDisconMask;
+	if (value("normal_weight_colours", true)) params.m_flags |= ftl::render::kNormalWeightColours;
+	params.m_viewMatrix = MatrixConversion::toCUDA(src->getPose().cast<float>().inverse());
+	params.m_viewMatrixInverse = MatrixConversion::toCUDA(src->getPose().cast<float>());
+	params.camera = camera;
+	// Clear all channels to 0 or max depth
+
+	out.get<GpuMat>(Channel::Depth).setTo(cv::Scalar(1000.0f), cvstream);
+	out.get<GpuMat>(Channel::Colour).setTo(background_, cvstream);
+
+	//LOG(INFO) << "Render ready: " << camera.width << "," << camera.height;
+
+	bool show_discon = value("show_discontinuity_mask", false);
+	bool show_fill = value("show_filled", false);
+
+	temp_.createTexture<int>(Channel::Depth);
+	//temp_.get<GpuMat>(Channel::Normals).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
+
+	// Display mask values
+	for (int i=0; i<scene_->frames.size(); ++i) {
+		auto &f = scene_->frames[i];
+		auto s = scene_->sources[i];
+
+		if (f.hasChannel(Channel::Mask)) {
+			if (show_discon) {
+				ftl::cuda::show_mask(f.getTexture<uchar4>(Channel::Colour), f.getTexture<int>(Channel::Mask), Mask::kMask_Discontinuity, make_uchar4(0,0,255,255), stream_);
+			}
+			if (show_fill) {
+				ftl::cuda::show_mask(f.getTexture<uchar4>(Channel::Colour), f.getTexture<int>(Channel::Mask), Mask::kMask_Filled, make_uchar4(0,255,0,255), stream_);
+			}
+		}
+
+		/*// Needs to create points channel first?
+		if (!f.hasChannel(Channel::Points)) {
+			//LOG(INFO) << "Creating points... " << s->parameters().width;
+			
+			auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size()));
+			auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse());
+			ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, 0, stream_);
+
+			//LOG(INFO) << "POINTS Added";
+		}
+
+		// Clip first?
+		if (clipping_) {
+			ftl::cuda::clipping(f.createTexture<float4>(Channel::Points), clip_, stream_);
+		}
+
+		if (!f.hasChannel(Channel::Normals)) {
+			Eigen::Matrix4f matrix =  s->getPose().cast<float>().transpose();
+			auto pose = MatrixConversion::toCUDA(matrix);
+
+			auto &g = f.get<GpuMat>(Channel::Colour);
+			ftl::cuda::normals(f.createTexture<float4>(Channel::Normals, Format<float4>(g.cols, g.rows)),
+				temp_.getTexture<float4>(Channel::Normals),
+				f.getTexture<float4>(Channel::Points),
+				1, 0.02f,
+				s->parameters(), pose.getFloat3x3(), stream_);
+
+			if (norm_filter_ > -0.1f) {
+				ftl::cuda::normal_filter(f.getTexture<float4>(Channel::Normals), f.getTexture<float4>(Channel::Points), s->parameters(), pose, norm_filter_, stream_);
+			}
+		}*/
+	}
+
+	Channel chan = src->getChannel();
+
+	int aligned_source = value("aligned_source",-1);
+	if (aligned_source >= 0 && aligned_source < scene_->frames.size()) {
+		// FIXME: Output may not be same resolution as source!
+		cudaSafeCall(cudaStreamSynchronize(stream_));
+		scene_->frames[aligned_source].copyTo(Channel::Depth + Channel::Colour + Channel::Smoothing, out);
+
+		if (chan == Channel::Normals) {
+			// Convert normal to single float value
+			temp_.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height)).setTo(cv::Scalar(0,0,0,0), cvstream);
+			ftl::cuda::normal_visualise(scene_->frames[aligned_source].getTexture<float4>(Channel::Normals), temp_.createTexture<uchar4>(Channel::Colour),
+					light_pos_,
+					light_diffuse_,
+					light_ambient_, stream_);
+
+			// Put in output as single float
+			cv::cuda::swap(temp_.get<GpuMat>(Channel::Colour), out.create<GpuMat>(Channel::Normals));
+			out.resetTexture(Channel::Normals);
+		}
+
+		return true;
+	}
+
+	// Create and render triangles for depth
+	if (mesh_) {
+		_mesh(out, src, stream_);
+	} else {
+		_dibr(out, stream_);
+	}
+
+	// Reprojection of colours onto surface
+	_renderChannel(out, Channel::Colour, Channel::Colour, stream_);
+	
+	if (chan == Channel::Depth)
+	{
+		// Just convert int depth to float depth
+		//temp_.get<GpuMat>(Channel::Depth2).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 100000.0f, cvstream);
+	} else if (chan == Channel::Normals) {
+		// Visualise normals to RGBA
+		accum_.create<GpuMat>(Channel::Normals, Format<uchar4>(camera.width, camera.height)).setTo(cv::Scalar(0,0,0,0), cvstream);
+		ftl::cuda::normal_visualise(out.getTexture<float4>(Channel::Normals), accum_.createTexture<uchar4>(Channel::Normals),
+				light_pos_,
+				light_diffuse_,
+				light_ambient_, stream_);
+
+		accum_.swapTo(Channels(Channel::Normals), out);
+	}
+	//else if (chan == Channel::Contribution)
+	//{
+	//	cv::cuda::swap(temp_.get<GpuMat>(Channel::Contribution), out.create<GpuMat>(Channel::Contribution));
+	//}
+	else if (chan == Channel::Density) {
+		out.create<GpuMat>(chan, Format<float>(camera.width, camera.height));
+		out.get<GpuMat>(chan).setTo(cv::Scalar(0.0f), cvstream);
+		_renderChannel(out, Channel::Depth, Channel::Density, stream_);
+	}
+	else if (chan == Channel::Right)
+	{
+		float baseline = camera.baseline;
+		
+		//Eigen::Translation3f translation(baseline, 0.0f, 0.0f);
+		//Eigen::Affine3f transform(translation);
+		//Eigen::Matrix4f matrix = transform.matrix() * src->getPose().cast<float>();
+
+		Eigen::Matrix4f transform = Eigen::Matrix4f::Identity();
+		transform(0, 3) = baseline;
+		Eigen::Matrix4f matrix = transform.inverse() * src->getPose().cast<float>();
+		
+		params.m_viewMatrix = MatrixConversion::toCUDA(matrix.inverse());
+		params.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix);
+
+		params.camera = src->parameters(Channel::Right);
+		
+		out.create<GpuMat>(Channel::Right, Format<uchar4>(camera.width, camera.height));
+		out.get<GpuMat>(Channel::Right).setTo(background_, cvstream);
+
+		// Need to re-dibr due to pose change
+		if (mesh_) {
+			_mesh(out, src, stream_);
+		} else {
+			_dibr(out, stream_);
+		}
+		_renderChannel(out, Channel::Left, Channel::Right, stream_);
+
+	} else if (chan != Channel::None) {
+		if (ftl::codecs::isFloatChannel(chan)) {
+			out.create<GpuMat>(chan, Format<float>(camera.width, camera.height));
+			out.get<GpuMat>(chan).setTo(cv::Scalar(0.0f), cvstream);
+		} else {
+			out.create<GpuMat>(chan, Format<uchar4>(camera.width, camera.height));
+			out.get<GpuMat>(chan).setTo(background_, cvstream);
+		}
+		_renderChannel(out, chan, chan, stream_);
+	}
+
+	cudaSafeCall(cudaStreamSynchronize(stream_));
+	return true;
+}
diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu
new file mode 100644
index 0000000000000000000000000000000000000000..7311e50b9bbbd7b7ba78f106e360998b937646d8
--- /dev/null
+++ b/components/renderers/cpp/src/triangle_render.cu
@@ -0,0 +1,202 @@
+#include <ftl/render/splat_params.hpp>
+#include "splatter_cuda.hpp"
+#include <ftl/rgbd/camera.hpp>
+#include <ftl/cuda_common.hpp>
+
+#include <ftl/cuda/weighting.hpp>
+
+using ftl::rgbd::Camera;
+using ftl::cuda::TextureObject;
+using ftl::render::SplatParams;
+
+#define T_PER_BLOCK 8
+
+__device__ inline float length2(int dx, int dy) { return dx*dx + dy*dy; }
+
+__device__ inline float cross(const float2 &a, const float2 &b) {
+	return a.x*b.y - a.y*b.x;
+}
+
+__device__ inline bool within(float x) {
+    return 0.0f <= x <= 1.0f;
+}
+
+__device__ inline bool operator==(const float2 &a, const float2 &b) {
+	return a.x == b.x && a.y == b.y;
+}
+
+__device__ inline bool insideTriangle(const float2 &a, const float2 &b, const float2 &c, const float2 &p)
+{   
+    float det = (b.y - c.y)*(a.x - c.x) + (c.x - b.x)*(a.y - c.y);
+    float factor_alpha = (b.y - c.y)*(p.x - c.x) + (c.x - b.x)*(p.y - c.y);
+    float factor_beta = (c.y - a.y)*(p.x - c.x) + (a.x - c.x)*(p.y - c.y);
+    float alpha = factor_alpha / det;
+    float beta = factor_beta / det;
+    float gamma = 1.0 - alpha - beta;
+
+    return p == a || p == b || p == c || (within(alpha) && within(beta) && within(gamma));
+}
+
+__device__ inline void swap(short2 &a, short2 &b) {
+	short2 t = a;
+	a = b;
+	b = t;
+}
+
+__device__ void drawLine(TextureObject<int> &depth_out, int y, int x1, int x2, float d) {
+	for (int x=x1; x<=x2; ++x) {
+		if (x < 0) continue;
+		if (x >= depth_out.width()) return;
+		atomicMin(&depth_out(x,y), int(d*1000.0f));
+	}
+}
+
+/* See: https://github.com/bcrusco/CUDA-Rasterizer */
+
+/**
+ * Calculate the signed area of a given triangle.
+ */
+__device__ static inline
+ float calculateSignedArea(const short2 &a, const short2 &b, const short2 &c) {
+	 return 0.5f * (float(c.x - a.x) * float(b.y - a.y) - float(b.x - a.x) * float(c.y - a.y));
+ }
+
+/**
+ * Helper function for calculating barycentric coordinates.
+ */
+ __device__ static inline
+ float calculateBarycentricCoordinateValue(const short2 &a, const short2 &b, const short2 &c, const short2 (&tri)[3]) {
+	 return calculateSignedArea(a,b,c) / calculateSignedArea(tri[0], tri[1], tri[2]);
+ }
+ 
+ /**
+  * Calculate barycentric coordinates.
+  * TODO: Update to handle triangles coming in and not the array
+  */
+__device__ static
+ float3 calculateBarycentricCoordinate(const short2 (&tri)[3], const short2 &point) {
+	 float beta = calculateBarycentricCoordinateValue(tri[0], point, tri[2], tri);
+	 float gamma = calculateBarycentricCoordinateValue(tri[0], tri[1], point, tri);
+	 float alpha = 1.0 - beta - gamma;
+	 return make_float3(alpha, beta, gamma);
+ }
+ 
+ /**
+  * Check if a barycentric coordinate is within the boundaries of a triangle.
+  */
+ __host__ __device__ static
+ bool isBarycentricCoordInBounds(const float3 &barycentricCoord) {
+	 return barycentricCoord.x >= 0.0 && barycentricCoord.x <= 1.0 &&
+			barycentricCoord.y >= 0.0 && barycentricCoord.y <= 1.0 &&
+			barycentricCoord.z >= 0.0 && barycentricCoord.z <= 1.0;
+ }
+
+ /**
+ * For a given barycentric coordinate, compute the corresponding z position
+ * (i.e. depth) on the triangle.
+ */
+__device__ static
+float getZAtCoordinate(const float3 &barycentricCoord, const float (&tri)[3]) {
+	return (barycentricCoord.x * tri[0]
+		+ barycentricCoord.y * tri[1]
+		+ barycentricCoord.z * tri[2]);
+}
+
+/*
+ * Convert source screen position to output screen coordinates.
+ */
+ template <int A, int B>
+ __global__ void triangle_render_1_kernel(
+        TextureObject<float> depth_in,
+        TextureObject<int> depth_out,
+		TextureObject<short2> screen, SplatParams params) {
+	const int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x < 1 || x >= depth_in.width()-1 || y < 1 || y >= depth_in.height()-1) return;
+
+    float d[3];
+    d[0] = depth_in.tex2D(x,y);
+    d[1] = depth_in.tex2D(x+A,y);
+    d[2] = depth_in.tex2D(x,y+B);
+
+    // Is this triangle valid
+	if (fabs(d[0] - d[1]) > params.depthThreshold || fabs(d[0] - d[2]) > params.depthThreshold) return;
+	if (d[0] < params.camera.minDepth || d[0] > params.camera.maxDepth) return;
+
+    short2 v[3];
+    v[0] = screen.tex2D(x,y);
+    v[1] = screen.tex2D(x+A,y);
+	v[2] = screen.tex2D(x,y+B);
+
+	// Attempt to back face cull, but not great
+	//if ((v[1].x - v[0].x) * A < 0 || (v[2].y - v[0].y) * B < 0) return;
+
+	const int minX = min(v[0].x, min(v[1].x, v[2].x));
+	const int minY = min(v[0].y, min(v[1].y, v[2].y));
+	const int maxX = max(v[0].x, max(v[1].x, v[2].x));
+	const int maxY = max(v[0].y, max(v[1].y, v[2].y));
+
+	// Remove really large triangles
+	if ((maxX - minX) * (maxY - minY) > params.triangle_limit) return;
+
+	for (int sy=minY; sy <= maxY; ++sy) {
+		for (int sx=minX; sx <= maxX; ++sx) {
+			if (sx >= params.camera.width || sx < 0 || sy >= params.camera.height || sy < 0) continue;
+
+			float3 baryCentricCoordinate = calculateBarycentricCoordinate(v, make_short2(sx, sy));
+
+			if (isBarycentricCoordInBounds(baryCentricCoordinate)) {
+				float new_depth = getZAtCoordinate(baryCentricCoordinate, d);
+				atomicMin(&depth_out(sx,sy), int(new_depth*100000.0f));
+			}
+		}
+	}
+}
+
+void ftl::cuda::triangle_render1(TextureObject<float> &depth_in, TextureObject<int> &depth_out, TextureObject<short2> &screen, const SplatParams &params, cudaStream_t stream) {
+    const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	triangle_render_1_kernel<1,1><<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, screen, params);
+	triangle_render_1_kernel<1,-1><<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, screen, params);
+	triangle_render_1_kernel<-1,1><<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, screen, params);
+	triangle_render_1_kernel<-1,-1><<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, screen, params);
+    cudaSafeCall( cudaGetLastError() );
+}
+
+// ==== BLENDER ========
+
+/*
+ * Merge two depth maps together
+ */
+ __global__ void mesh_blender_kernel(
+        TextureObject<int> depth_in,
+		TextureObject<int> depth_out,
+		ftl::rgbd::Camera camera,
+		float alpha) {
+	const int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+    if (x < 0 || x >= depth_in.width() || y < 0 || y >= depth_in.height()) return;
+
+	int a = depth_in.tex2D(x,y);
+	int b = depth_out.tex2D(x,y);
+
+	float mindepth = (float)min(a,b) / 100000.0f;
+	float maxdepth = (float)max(a,b) / 100000.0f;
+	float weight = ftl::cuda::weighting(maxdepth-mindepth, alpha);
+
+	//depth_out(x,y) = (int)(((float)mindepth + (float)maxdepth*weight) / (1.0f + weight) * 100000.0f);
+
+	float depth = (mindepth + maxdepth*weight) / (1.0f + weight);
+	depth_out(x,y) = (int)(depth * 100000.0f);
+}
+
+void ftl::cuda::mesh_blender(TextureObject<int> &depth_in, TextureObject<int> &depth_out, const ftl::rgbd::Camera &camera, float alpha, cudaStream_t stream) {
+    const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+
+	mesh_blender_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_out, camera, alpha);
+    cudaSafeCall( cudaGetLastError() );
+}
diff --git a/components/rgbd-sources/include/ftl/rgbd/camera.hpp b/components/rgbd-sources/include/ftl/rgbd/camera.hpp
index e245be1ea44f3e7e8503f585bf2c387fef821a38..2d36b2f7565620efe9962297dddf2a0e3a45393e 100644
--- a/components/rgbd-sources/include/ftl/rgbd/camera.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/camera.hpp
@@ -6,6 +6,10 @@
 #include <cuda_runtime.h>
 #include <ftl/cuda_util.hpp>
 
+#ifndef __CUDACC__
+#include <msgpack.hpp>
+#endif
+
 namespace ftl{
 namespace rgbd {
 
@@ -25,6 +29,8 @@ struct __align__(16) Camera {
 	double baseline;		// For stereo pair
 	double doffs;			// Disparity offset
 
+	Camera scaled(int width, int height) const;
+
 	/**
 	 * Convert camera coordinates into screen coordinates.
 	 */
@@ -34,6 +40,10 @@ struct __align__(16) Camera {
 	 * Convert screen plus depth into camera coordinates.
 	 */
 	__device__ float3 screenToCam(uint ux, uint uy, float depth) const; 
+
+	#ifndef __CUDACC__
+	MSGPACK_DEFINE(fx,fy,cx,cy,width,height,minDepth,maxDepth,baseline,doffs);
+	#endif
 };
 
 };
diff --git a/components/rgbd-sources/include/ftl/rgbd/detail/netframe.hpp b/components/rgbd-sources/include/ftl/rgbd/detail/netframe.hpp
index 1abb624c96e6a6871c2058b2e69574331183e7b4..995848ff01fdcd87b1e0d1e01e0b9cde61e267f3 100644
--- a/components/rgbd-sources/include/ftl/rgbd/detail/netframe.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/detail/netframe.hpp
@@ -14,8 +14,7 @@ namespace detail {
  * Also maintains statistics about the frame transmission for later analysis.
  */
 struct NetFrame {
-	cv::Mat channel1;
-	cv::Mat channel2;
+	cv::cuda::GpuMat channel[2];
 	volatile int64_t timestamp;
 	std::atomic<int> chunk_count[2];
 	std::atomic<int> channel_count;
diff --git a/components/rgbd-sources/include/ftl/rgbd/detail/source.hpp b/components/rgbd-sources/include/ftl/rgbd/detail/source.hpp
index 0d188c0b163777e842f3bd8c31f91ddd51435269..712d29170739be2ee966208439df6ba342df752e 100644
--- a/components/rgbd-sources/include/ftl/rgbd/detail/source.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/detail/source.hpp
@@ -62,8 +62,8 @@ class Source {
 	capability_t capabilities_;
 	ftl::rgbd::Source *host_;
 	ftl::rgbd::Camera params_;
-	cv::Mat rgb_;
-	cv::Mat depth_;
+	cv::cuda::GpuMat rgb_;
+	cv::cuda::GpuMat depth_;
 	int64_t timestamp_;
 	//Eigen::Matrix4f pose_;
 };
diff --git a/components/rgbd-sources/include/ftl/rgbd/frame.hpp b/components/rgbd-sources/include/ftl/rgbd/frame.hpp
index b08673c973f42253670b51a3da8e28735406c952..52bbe9022987a85c3bf4398e9e3287011943ecac 100644
--- a/components/rgbd-sources/include/ftl/rgbd/frame.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/frame.hpp
@@ -256,7 +256,7 @@ ftl::cuda::TextureObject<T> &Frame::createTexture(ftl::codecs::Channel c) {
 		//LOG(INFO) << "Creating texture object";
 		m.tex = ftl::cuda::TextureObject<T>(m.gpu);
 	} else if (m.tex.cvType() != ftl::traits::OpenCVType<T>::value || m.tex.width() != m.gpu.cols || m.tex.height() != m.gpu.rows || m.tex.devicePtr() != m.gpu.data) {
-		LOG(INFO) << "Recreating texture object for '" << ftl::codecs::name(c) << "'";
+		LOG(INFO) << "Recreating texture object for '" << ftl::codecs::name(c) << "'.";
 		m.tex.free();
 		m.tex = ftl::cuda::TextureObject<T>(m.gpu);
 	}
diff --git a/components/rgbd-sources/include/ftl/rgbd/source.hpp b/components/rgbd-sources/include/ftl/rgbd/source.hpp
index 862d472f16b170fc8de28974462b2a0f9df63e54..7484788796dba398c525a208ad44708deadf3b70 100644
--- a/components/rgbd-sources/include/ftl/rgbd/source.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/source.hpp
@@ -3,10 +3,10 @@
 
 #include <ftl/cuda_util.hpp>
 #include <ftl/configuration.hpp>
-#include <ftl/rgbd/camera.hpp>
 #include <ftl/threads.hpp>
 #include <ftl/net/universe.hpp>
 #include <ftl/uri.hpp>
+#include <ftl/rgbd/camera.hpp>
 #include <ftl/rgbd/detail/source.hpp>
 #include <ftl/codecs/packet.hpp>
 #include <opencv2/opencv.hpp>
@@ -46,9 +46,6 @@ class Source : public ftl::Configurable {
 	friend T *ftl::config::create(ftl::config::json_t &, ARGS ...);
 	friend class VirtualSource;
 
-	//template <typename T, typename... ARGS>
-	//friend T *ftl::config::create(ftl::Configurable *, const std::string &, ARGS ...);
-
 	// This class cannot be constructed directly, use ftl::create
 	Source()=delete;
 
@@ -66,13 +63,16 @@ class Source : public ftl::Configurable {
 	/**
 	 * Is this source valid and ready to grab?.
 	 */
-	bool isReady() { return (impl_) ? impl_->isReady() : params_.width != 0; }
+	bool isReady() { return (impl_) ? impl_->isReady() : false; }
 
 	/**
 	 * Change the second channel source.
 	 */
 	bool setChannel(ftl::codecs::Channel c);
 
+	/**
+	 * Get the channel allocated to the second source.
+	 */
 	ftl::codecs::Channel getChannel() const { return channel_; }
 
 	/**
@@ -114,19 +114,7 @@ class Source : public ftl::Configurable {
 	 * swap rather than a copy, so the parameters should be persistent buffers for
 	 * best performance.
 	 */
-	void getFrames(cv::Mat &c, cv::Mat &d);
-
-	/**
-	 * Get a copy of the colour frame only.
-	 */
-	void getColour(cv::Mat &c);
-
-	/**
-	 * Get a copy of the depth frame only.
-	 */
-	void getDepth(cv::Mat &d);
-
-	int64_t timestamp() const { return timestamp_; }
+	[[deprecated]] void getFrames(cv::Mat &c, cv::Mat &d);
 
 	/**
 	 * Directly upload source RGB and Depth to GPU.
@@ -136,16 +124,20 @@ class Source : public ftl::Configurable {
 	void uploadColour(cv::cuda::GpuMat&);
 	void uploadDepth(cv::cuda::GpuMat&);
 
-	bool isVirtual() const { return impl_ == nullptr; }
+	//bool isVirtual() const { return impl_ == nullptr; }
 
 	/**
 	 * Get the source's camera intrinsics.
 	 */
 	const Camera &parameters() const {
 		if (impl_) return impl_->params_;
-		else return params_;
+		else throw ftl::exception("Cannot get parameters for bad source");
 	}
 
+	/**
+	 * Get camera intrinsics for another channel. For example the right camera
+	 * in a stereo pair.
+	 */
 	const Camera parameters(ftl::codecs::Channel) const;
 
 	cv::Mat cameraMatrix() const;
@@ -169,46 +161,27 @@ class Source : public ftl::Configurable {
 
 	capability_t getCapabilities() const;
 
-	/**
-	 * Get a point in camera coordinates at specified pixel location.
-	 */
-	Eigen::Vector4d point(uint x, uint y);
-
-	/**
-	 * Get a point in camera coordinates at specified pixel location, with a
-	 * given depth value.
-	 */
-	Eigen::Vector4d point(uint x, uint y, double d);
-
-	Eigen::Vector2i point(const Eigen::Vector4d &p);
-
 	/**
 	 * Force the internal implementation to be reconstructed.
 	 */
 	void reset();
 
-	void pause(bool);
-	bool isPaused() { return paused_; }
-
-	void bullet(bool);
-	bool isBullet() { return bullet_; }
-
-	bool thumbnail(cv::Mat &t);
-
 	ftl::net::Universe *getNet() const { return net_; }
 
 	std::string getURI() { return value("uri", std::string("")); }
 
-	void customImplementation(detail::Source *);
+	//void customImplementation(detail::Source *);
 
 	SHARED_MUTEX &mutex() { return mutex_; }
 
-	std::function<void(int64_t, cv::Mat &, cv::Mat &)> &callback() { return callback_; }
+	std::function<void(int64_t, cv::cuda::GpuMat &, cv::cuda::GpuMat &)> &callback() { return callback_; }
 
 	/**
 	 * Set the callback that receives decoded frames as they are generated.
+	 * There can be only a single such callback as the buffers can be swapped
+	 * by the callback.
 	 */
-	void setCallback(std::function<void(int64_t, cv::Mat &, cv::Mat &)> cb);
+	void setCallback(std::function<void(int64_t, cv::cuda::GpuMat &, cv::cuda::GpuMat &)> cb);
 	void removeCallback() { callback_ = nullptr; }
 
 	/**
@@ -218,6 +191,9 @@ class Source : public ftl::Configurable {
 	 */
 	void addRawCallback(const std::function<void(ftl::rgbd::Source*, const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet &pkt)> &);
 
+	/**
+	 * THIS DOES NOT WORK CURRENTLY.
+	 */
 	void removeRawCallback(const std::function<void(ftl::rgbd::Source*, const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet &pkt)> &);
 
 	/**
@@ -225,21 +201,31 @@ class Source : public ftl::Configurable {
 	 */
 	void notifyRaw(const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet &pkt);
 
+	/**
+	 * Notify of a decoded or available pair of frames. This calls the source
+	 * callback after having verified the correct resolution of the frames.
+	 */
+	void notify(int64_t ts, cv::cuda::GpuMat &c1, cv::cuda::GpuMat &c2);
+
+	// ==== Inject Data into stream ============================================
+
+	/**
+	 * Generate a stream packet with arbitrary data. The data is packed using
+	 * msgpack and is given the timestamp of the most recent frame.
+	 */
+	template <typename... ARGS>
+	void inject(ftl::codecs::Channel c, ARGS... args);
+
+	void inject(const Eigen::Matrix4d &pose);
+
 	protected:
 	detail::Source *impl_;
-	cv::Mat rgb_;
-	cv::Mat depth_;
-	cv::Mat thumb_;
-	Camera params_;		// TODO Find better solution
 	Eigen::Matrix4d pose_;
 	ftl::net::Universe *net_;
 	SHARED_MUTEX mutex_;
-	bool paused_;
-	bool bullet_;
 	ftl::codecs::Channel channel_;
 	cudaStream_t stream_;
-	int64_t timestamp_;
-	std::function<void(int64_t, cv::Mat &, cv::Mat &)> callback_;
+	std::function<void(int64_t, cv::cuda::GpuMat &, cv::cuda::GpuMat &)> callback_;
 	std::list<std::function<void(ftl::rgbd::Source*, const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet &pkt)>> rawcallbacks_;
 
 	detail::Source *_createImplementation();
@@ -255,4 +241,40 @@ class Source : public ftl::Configurable {
 }
 }
 
+class VectorBuffer {
+	public:
+	inline explicit VectorBuffer(std::vector<unsigned char> &v) : vector_(v) {}
+
+	inline void write(const char *data, std::size_t size) {
+		vector_.insert(vector_.end(), (const unsigned char*)data, (const unsigned char*)data+size);
+	}
+
+	private:
+	std::vector<unsigned char> &vector_;
+};
+
+template <typename... ARGS>
+void ftl::rgbd::Source::inject(ftl::codecs::Channel c, ARGS... args) {
+	if (!impl_) return;
+	auto data = std::make_tuple(args...);
+
+	ftl::codecs::StreamPacket spkt;
+	ftl::codecs::Packet pkt;
+
+	spkt.timestamp = impl_->timestamp_;
+	spkt.channel = c;
+	spkt.channel_count = 0;
+	spkt.streamID = 0;
+	pkt.codec = ftl::codecs::codec_t::MSGPACK;
+	pkt.block_number = 0;
+	pkt.block_total = 1;
+	pkt.definition = ftl::codecs::definition_t::Any;
+	pkt.flags = 0;
+
+	VectorBuffer buf(pkt.data);
+	msgpack::pack(buf, data);
+
+	notifyRaw(spkt, pkt);
+}
+
 #endif  // _FTL_RGBD_SOURCE_HPP_
diff --git a/components/rgbd-sources/include/ftl/rgbd/streamer.hpp b/components/rgbd-sources/include/ftl/rgbd/streamer.hpp
index cfa552a862e1d46b9600ce523815eb8fa5efa001..1fa1cb48663b1ecaa47966df6d68cfe4663f8b9b 100644
--- a/components/rgbd-sources/include/ftl/rgbd/streamer.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/streamer.hpp
@@ -158,6 +158,7 @@ class Streamer : public ftl::Configurable {
 	ftl::timer::TimerHandle timer_job_;
 
 	ftl::codecs::device_t hq_devices_;
+	ftl::codecs::codec_t hq_codec_;
 
 	enum class Quality {
 		High,
diff --git a/components/rgbd-sources/include/ftl/rgbd/virtual.hpp b/components/rgbd-sources/include/ftl/rgbd/virtual.hpp
index ed3530258d64b14427802f8e56375635ab26a24a..f0ab3a93bdb0c7bbf81fe61b4724000f3b020b31 100644
--- a/components/rgbd-sources/include/ftl/rgbd/virtual.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/virtual.hpp
@@ -13,8 +13,6 @@ class VirtualSource : public ftl::rgbd::Source {
 
 	void onRender(const std::function<void(ftl::rgbd::Frame &)> &);
 
-	void setTimestamp(int64_t ts) { timestamp_ = ts; }
-
     /**
 	 * Write frames into source buffers from an external renderer. Virtual
 	 * sources do not have an internal generator of frames but instead have
diff --git a/components/rgbd-sources/src/frame.cpp b/components/rgbd-sources/src/frame.cpp
index 212ca55375993789bb832e678fe4f1a8f22d5c48..3c9fe54d8c1177fd8ea6f2dedce87aec5c6b71b4 100644
--- a/components/rgbd-sources/src/frame.cpp
+++ b/components/rgbd-sources/src/frame.cpp
@@ -112,7 +112,7 @@ template<> cv::Mat& Frame::get(ftl::codecs::Channel channel) {
 
 	// Add channel if not already there
 	if (!channels_.has(channel)) {
-		throw ftl::exception("Frame channel does not exist");
+		throw ftl::exception(ftl::Formatter() << "Frame channel does not exist: " << (int)channel);
 	}
 
 	return _get(channel).host;
@@ -132,7 +132,7 @@ template<> cv::cuda::GpuMat& Frame::get(ftl::codecs::Channel channel) {
 
 	// Add channel if not already there
 	if (!channels_.has(channel)) {
-		throw ftl::exception("Frame channel does not exist");
+		throw ftl::exception(ftl::Formatter() << "Frame channel does not exist: " << (int)channel);
 	}
 
 	return _get(channel).gpu;
@@ -147,7 +147,7 @@ template<> const cv::Mat& Frame::get(ftl::codecs::Channel channel) const {
 		LOG(FATAL) << "Getting GPU channel on CPU without explicit 'download'";
 	}
 
-	if (!channels_.has(channel)) throw ftl::exception("Frame channel does not exist");
+	if (!channels_.has(channel)) throw ftl::exception(ftl::Formatter() << "Frame channel does not exist: " << (int)channel);
 
 	return _get(channel).host;
 }
@@ -163,7 +163,7 @@ template<> const cv::cuda::GpuMat& Frame::get(ftl::codecs::Channel channel) cons
 
 	// Add channel if not already there
 	if (!channels_.has(channel)) {
-		throw ftl::exception("Frame channel does not exist");
+		throw ftl::exception(ftl::Formatter() << "Frame channel does not exist: " << (int)channel);
 	}
 
 	return _get(channel).gpu;
diff --git a/components/rgbd-sources/src/group.cpp b/components/rgbd-sources/src/group.cpp
index 4e178c0245e815da6b836fbd2de44df4dcc84e1b..b2236ef3c83821d38ff16e8132748a635dd2712f 100644
--- a/components/rgbd-sources/src/group.cpp
+++ b/components/rgbd-sources/src/group.cpp
@@ -50,7 +50,7 @@ void Group::addSource(ftl::rgbd::Source *src) {
 	size_t ix = sources_.size();
 	sources_.push_back(src);
 
-	src->setCallback([this,ix,src](int64_t timestamp, cv::Mat &rgb, cv::Mat &depth) {
+	src->setCallback([this,ix,src](int64_t timestamp, cv::cuda::GpuMat &rgb, cv::cuda::GpuMat &depth) {
 		if (timestamp == 0) return;
 
 		auto chan = src->getChannel();
@@ -78,13 +78,13 @@ void Group::addSource(ftl::rgbd::Source *src) {
 				// Ensure channels match source mat format
 				//fs.channel1[ix].create(rgb.size(), rgb.type());
 				//fs.channel2[ix].create(depth.size(), depth.type());
-				fs.frames[ix].create<cv::Mat>(Channel::Colour, Format<uchar3>(rgb.size())); //.create(rgb.size(), rgb.type());
-				if (chan != Channel::None) fs.frames[ix].create<cv::Mat>(chan, ftl::rgbd::FormatBase(depth.cols, depth.rows, depth.type())); //.create(depth.size(), depth.type());
+				fs.frames[ix].create<cv::cuda::GpuMat>(Channel::Colour, Format<uchar3>(rgb.size())); //.create(rgb.size(), rgb.type());
+				if (chan != Channel::None) fs.frames[ix].create<cv::cuda::GpuMat>(chan, ftl::rgbd::FormatBase(depth.cols, depth.rows, depth.type())); //.create(depth.size(), depth.type());
 
 				//cv::swap(rgb, fs.channel1[ix]);
 				//cv::swap(depth, fs.channel2[ix]);
-				cv::swap(rgb, fs.frames[ix].get<cv::Mat>(Channel::Colour));
-				if (chan != Channel::None) cv::swap(depth, fs.frames[ix].get<cv::Mat>(chan));
+				cv::cuda::swap(rgb, fs.frames[ix].get<cv::cuda::GpuMat>(Channel::Colour));
+				if (chan != Channel::None) cv::cuda::swap(depth, fs.frames[ix].get<cv::cuda::GpuMat>(chan));
 
 				++fs.count;
 				fs.mask |= (1 << ix);
diff --git a/components/rgbd-sources/src/source.cpp b/components/rgbd-sources/src/source.cpp
index 7e2cc10ed8971cf2c0b29d2a07b345121f92cb9e..b0ee77a449e33b82641c18aa0c7a039ed42e8626 100644
--- a/components/rgbd-sources/src/source.cpp
+++ b/components/rgbd-sources/src/source.cpp
@@ -31,14 +31,14 @@ using ftl::rgbd::detail::MiddleburySource;
 using ftl::rgbd::capability_t;
 using ftl::codecs::Channel;
 using ftl::rgbd::detail::FileSource;
+using ftl::rgbd::Camera;
 
 std::map<std::string, ftl::rgbd::Player*> Source::readers__;
 
 Source::Source(ftl::config::json_t &cfg) : Configurable(cfg), pose_(Eigen::Matrix4d::Identity()), net_(nullptr) {
 	impl_ = nullptr;
-	params_ = {};
+	//params_ = {};
 	stream_ = 0;
-	timestamp_ = 0;
 	reset();
 
 	on("uri", [this](const ftl::config::Event &e) {
@@ -49,9 +49,8 @@ Source::Source(ftl::config::json_t &cfg) : Configurable(cfg), pose_(Eigen::Matri
 
 Source::Source(ftl::config::json_t &cfg, ftl::net::Universe *net) : Configurable(cfg), pose_(Eigen::Matrix4d::Identity()), net_(net) {
 	impl_ = nullptr;
-	params_ = {};
+	//params_ = {};
 	stream_ = 0;
-	timestamp_ = 0;
 	reset();
 
 	on("uri", [this](const ftl::config::Event &e) {
@@ -69,11 +68,6 @@ cv::Mat Source::cameraMatrix() const {
 	return m;
 }
 
-/*void Source::customImplementation(ftl::rgbd::detail::Source *impl) {
-	if (impl_) delete impl_;
-	impl_ = impl;
-}*/
-
 ftl::rgbd::detail::Source *Source::_createImplementation() {
 	auto uristr = get<string>("uri");
 	if (!uristr) {
@@ -178,7 +172,7 @@ ftl::rgbd::detail::Source *Source::_createDeviceImpl(const ftl::URI &uri) {
 		LOG(ERROR) << "You do not have 'librealsense2' installed";
 #endif
 	} else {
-		params_.width = value("width", 1280);
+		/*params_.width = value("width", 1280);
 		params_.height = value("height", 720);
 		params_.fx = value("focal", 700.0f);
 		params_.fy = params_.fx;
@@ -187,7 +181,7 @@ ftl::rgbd::detail::Source *Source::_createDeviceImpl(const ftl::URI &uri) {
 		params_.minDepth = value("minDepth", 0.1f);
 		params_.maxDepth = value("maxDepth", 20.0f);
 		params_.doffs = 0;
-		params_.baseline = value("baseline", 0.0f);
+		params_.baseline = value("baseline", 0.0f);*/
 	}
 	return nullptr;
 }
@@ -195,45 +189,12 @@ ftl::rgbd::detail::Source *Source::_createDeviceImpl(const ftl::URI &uri) {
 void Source::getFrames(cv::Mat &rgb, cv::Mat &depth) {
 	if (bool(callback_)) LOG(WARNING) << "Cannot use getFrames and callback in source";
 	SHARED_LOCK(mutex_,lk);
-	rgb_.copyTo(rgb);
-	depth_.copyTo(depth);
+	//rgb_.copyTo(rgb);
+	//depth_.copyTo(depth);
 	//rgb = rgb_;
 	//depth = depth_;
-
-	/*cv::Mat tmp;
-	tmp = rgb;
-	rgb = rgb_;
-	rgb_ = tmp;
-	tmp = depth;
-	depth = depth_;
-	depth_ = tmp;*/
-}
-
-Eigen::Vector4d Source::point(uint ux, uint uy) {
-	const auto &params = parameters();
-	const double x = ((double)ux+params.cx) / params.fx;
-	const double y = ((double)uy+params.cy) / params.fy;
-
-	SHARED_LOCK(mutex_,lk);
-	const double depth = depth_.at<float>(uy,ux);
-	return Eigen::Vector4d(x*depth,y*depth,depth,1.0);
-}
-
-Eigen::Vector4d Source::point(uint ux, uint uy, double d) {
-	const auto &params = parameters();
-	const double x = ((double)ux+params.cx) / params.fx;
-	const double y = ((double)uy+params.cy) / params.fy;
-	return Eigen::Vector4d(x*d,y*d,d,1.0);
 }
 
-Eigen::Vector2i Source::point(const Eigen::Vector4d &p) {
-	const auto &params = parameters();
-	double x = p[0] / p[2];
-	double y = p[1] / p[2];
-	x *= params.fx;
-	y *= params.fy;
-	return Eigen::Vector2i((int)(x - params.cx), (int)(y - params.cy));
-}
 
 void Source::setPose(const Eigen::Matrix4d &pose) {
 	pose_ = pose;
@@ -273,54 +234,7 @@ bool Source::retrieve() {
 
 bool Source::compute(int N, int B) {
 	UNIQUE_LOCK(mutex_,lk);
-	if (!impl_ && stream_ != 0) {
-		cudaSafeCall(cudaStreamSynchronize(stream_));
-		if (depth_.type() == CV_32SC1) depth_.convertTo(depth_, CV_32F, 1.0f / 1000.0f);
-		stream_ = 0;
-		return true;
-	} else if (impl_ && impl_->compute(N,B)) {
-		timestamp_ = impl_->timestamp_;
-		/*cv::Mat tmp;
-		rgb_.create(impl_->rgb_.size(), impl_->rgb_.type());
-		depth_.create(impl_->depth_.size(), impl_->depth_.type());
-		tmp = rgb_;
-		rgb_ = impl_->rgb_;
-		impl_->rgb_ = tmp;
-		tmp = depth_;
-		depth_ = impl_->depth_;
-		impl_->depth_ = tmp;*/
-
-		// TODO:(Nick) Reduce buffer copies
-		impl_->rgb_.copyTo(rgb_);
-		impl_->depth_.copyTo(depth_);
-		//rgb_ = impl_->rgb_;
-		//depth_ = impl_->depth_;
-		return true;
-	}
-	return false;
-}
-
-bool Source::thumbnail(cv::Mat &t) {
-	if (!impl_ && stream_ != 0) {
-		cudaSafeCall(cudaStreamSynchronize(stream_));
-		if (depth_.type() == CV_32SC1) depth_.convertTo(depth_, CV_32F, 1.0f / 1000.0f);
-		stream_ = 0;
-		return true;
-	} else if (impl_) {
-		UNIQUE_LOCK(mutex_,lk);
-		impl_->capture(0);
-		impl_->swap();
-		impl_->compute(1, 9);
-		impl_->rgb_.copyTo(rgb_);
-		impl_->depth_.copyTo(depth_);
-	}
-	if (!rgb_.empty()) {
-		SHARED_LOCK(mutex_,lk);
-		// Downsize and square the rgb_ image
-		cv::resize(rgb_, thumb_, cv::Size(320,180));
-	}
-	t = thumb_;
-	return !thumb_.empty();
+	return impl_ && impl_->compute(N,B);
 }
 
 bool Source::setChannel(ftl::codecs::Channel c) {
@@ -333,14 +247,13 @@ const ftl::rgbd::Camera Source::parameters(ftl::codecs::Channel chan) const {
 	return (impl_) ? impl_->parameters(chan) : parameters();
 }
 
-void Source::setCallback(std::function<void(int64_t, cv::Mat &, cv::Mat &)> cb) {
+void Source::setCallback(std::function<void(int64_t, cv::cuda::GpuMat &, cv::cuda::GpuMat &)> cb) {
 	if (bool(callback_)) LOG(ERROR) << "Source already has a callback: " << getURI();
 	callback_ = cb;
 }
 
 void Source::addRawCallback(const std::function<void(ftl::rgbd::Source*, const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet &pkt)> &f) {
 	UNIQUE_LOCK(mutex_,lk);
-	LOG(INFO) << "ADD RAW";
 	rawcallbacks_.push_back(f);
 }
 
@@ -358,7 +271,76 @@ void Source::removeRawCallback(const std::function<void(ftl::rgbd::Source*, cons
 
 void Source::notifyRaw(const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet &pkt) {
 	SHARED_LOCK(mutex_,lk);
+
 	for (auto &i : rawcallbacks_) {
 		i(this, spkt, pkt);
 	}
 }
+
+/*
+ * Scale camera parameters to match resolution.
+ */
+Camera Camera::scaled(int width, int height) const {
+	const auto &cam = *this;
+	float scaleX = (float)width / (float)cam.width;
+	float scaleY = (float)height / (float)cam.height;
+
+	CHECK( abs(scaleX - scaleY) < 0.00000001f );
+
+	Camera newcam = cam;
+	newcam.width = width;
+	newcam.height = height;
+	newcam.fx *= scaleX;
+	newcam.fy *= scaleY;
+	newcam.cx *= scaleX;
+	newcam.cy *= scaleY;
+
+	return newcam;
+}
+
+void Source::notify(int64_t ts, cv::cuda::GpuMat &c1, cv::cuda::GpuMat &c2) {
+	// Ensure correct scaling of images and parameters.
+	int max_width = max(impl_->params_.width, max(c1.cols, c2.cols));
+	int max_height = max(impl_->params_.height, max(c1.rows, c2.rows));
+
+	// Do we need to scale camera parameters
+	if (impl_->params_.width < max_width || impl_->params_.height < max_height) {
+		impl_->params_ = impl_->params_.scaled(max_width, max_height);
+	}
+
+	// Should channel 1 be scaled?
+	if (c1.cols < max_width || c1.rows < max_height) {
+		LOG(WARNING) << "Resizing on GPU";
+		cv::cuda::resize(c1, c1, cv::Size(max_width, max_height));
+	}
+
+	// Should channel 2 be scaled?
+	if (!c2.empty() && (c2.cols < max_width || c2.rows < max_height)) {
+		LOG(WARNING) << "Resizing on GPU";
+		if (c2.type() == CV_32F) {
+			cv::cuda::resize(c2, c2, cv::Size(max_width, max_height), 0.0, 0.0, cv::INTER_NEAREST);
+		} else {
+			cv::cuda::resize(c2, c2, cv::Size(max_width, max_height));
+		}
+	}
+
+	if (callback_) callback_(ts, c1, c2);
+}
+
+void Source::inject(const Eigen::Matrix4d &pose) {
+	ftl::codecs::StreamPacket spkt;
+	ftl::codecs::Packet pkt;
+
+	spkt.timestamp = impl_->timestamp_;
+	spkt.channel_count = 0;
+	spkt.channel = Channel::Pose;
+	spkt.streamID = 0;
+	pkt.codec = ftl::codecs::codec_t::POSE;
+	pkt.definition = ftl::codecs::definition_t::Any;
+	pkt.block_number = 0;
+	pkt.block_total = 1;
+	pkt.flags = 0;
+	pkt.data = std::move(std::vector<uint8_t>((uint8_t*)pose.data(), (uint8_t*)pose.data() + 4*4*sizeof(double)));
+
+	notifyRaw(spkt, pkt);
+}
diff --git a/components/rgbd-sources/src/sources/ftlfile/file_source.cpp b/components/rgbd-sources/src/sources/ftlfile/file_source.cpp
index 323f1ae72179c46ad816ec653ffc940d87cad0f7..9597bde796f80ac67f38a5a569190001e13d7065 100644
--- a/components/rgbd-sources/src/sources/ftlfile/file_source.cpp
+++ b/components/rgbd-sources/src/sources/ftlfile/file_source.cpp
@@ -33,25 +33,46 @@ FileSource::FileSource(ftl::rgbd::Source *s, ftl::rgbd::Player *r, int sid) : ft
 
     r->onPacket(sid, [this](const ftl::codecs::StreamPacket &spkt, ftl::codecs::Packet &pkt) {
 		host_->notifyRaw(spkt, pkt);
+
+		// Some channels are to be directly handled by the source object and
+		// do not proceed to any subsequent step.
+		// FIXME: Potential problem, these get processed at wrong time
+		if (spkt.channel == Channel::Configuration) {
+			std::tuple<std::string, std::string> cfg;
+			auto unpacked = msgpack::unpack((const char*)pkt.data.data(), pkt.data.size());
+			unpacked.get().convert(cfg);
+
+			LOG(INFO) << "Config Received: " << std::get<1>(cfg);
+			return;
+		}
+		else if (spkt.channel == Channel::Calibration) {
+			_processCalibration(pkt);
+			return;
+		} else if (spkt.channel == Channel::Pose) {
+			_processPose(pkt);
+			return;
+		}
+
+		// FIXME: For bad and old FTL files where wrong channel is used
 		if (pkt.codec == codec_t::POSE) {
-			Eigen::Matrix4d p = Eigen::Map<Eigen::Matrix4d>((double*)pkt.data.data());
-			host_->setPose(p);
+			_processPose(pkt);
+			return;
 		} else if (pkt.codec == codec_t::CALIBRATION) {
-			ftl::rgbd::Camera *camera = (ftl::rgbd::Camera*)pkt.data.data();
-            LOG(INFO) << "Have calibration: " << camera->fx;
-			params_ = *camera;
-			has_calibration_ = true;
-		} else {
-			if (pkt.codec == codec_t::HEVC) {
-				if (ftl::codecs::hevc::isIFrame(pkt.data)) _removeChannel(spkt.channel);
-			}
-			cache_[cache_write_].emplace_back();
-			auto &c = cache_[cache_write_].back();
-
-			// TODO: Attempt to avoid this copy operation
-			c.spkt = spkt;
-			c.pkt = pkt;
+			_processCalibration(pkt);
+			return;
 		}
+
+
+		// TODO: Check I-Frames for H264
+		if (pkt.codec == codec_t::HEVC) {
+			if (ftl::codecs::hevc::isIFrame(pkt.data)) _removeChannel(spkt.channel);
+		}
+		cache_[cache_write_].emplace_back();
+		auto &c = cache_[cache_write_].back();
+
+		// TODO: Attempt to avoid this copy operation
+		c.spkt = spkt;
+		c.pkt = pkt;
     });
 }
 
@@ -59,6 +80,38 @@ FileSource::~FileSource() {
 
 }
 
+void FileSource::_processPose(ftl::codecs::Packet &pkt) {
+	LOG(INFO) << "Got POSE channel";
+	if (pkt.codec == codec_t::POSE) {
+		Eigen::Matrix4d p = Eigen::Map<Eigen::Matrix4d>((double*)pkt.data.data());
+		host_->setPose(p);
+	} else if (pkt.codec == codec_t::MSGPACK) {
+
+	}
+}
+
+void FileSource::_processCalibration(ftl::codecs::Packet &pkt) {
+	if (pkt.codec == codec_t::CALIBRATION) {
+		ftl::rgbd::Camera *camera = (ftl::rgbd::Camera*)pkt.data.data();
+		params_ = *camera;
+		has_calibration_ = true;
+	} else if (pkt.codec == codec_t::MSGPACK) {
+		std::tuple<ftl::rgbd::Camera, ftl::codecs::Channel, ftl::rgbd::capability_t> params;
+		auto unpacked = msgpack::unpack((const char*)pkt.data.data(), pkt.data.size());
+		unpacked.get().convert(params);
+
+		if (std::get<1>(params) == Channel::Left) {
+			params_ = std::get<0>(params);
+			capabilities_ = std::get<2>(params);
+			has_calibration_ = true;
+
+			LOG(INFO) << "Got Calibration channel: " << params_.width << "x" << params_.height;
+		} else {
+			//params_right_ = std::get<0>(params);
+		}
+	}
+}
+
 void FileSource::_removeChannel(ftl::codecs::Channel channel) {
 	int c = 0;
 	for (auto i=cache_[cache_write_].begin(); i != cache_[cache_write_].end(); ++i) {
@@ -98,9 +151,13 @@ bool FileSource::compute(int n, int b) {
 	int64_t lastts = 0;
 	int lastc = 0;
 
+	// Go through previously read and cached frames in sequence
+	// needs to be done due to P-Frames
 	for (auto i=cache_[cache_read_].begin(); i!=cache_[cache_read_].end(); ++i) {
 		auto &c = *i;
 
+		// Check for verifying that both channels are received, ie. two frames
+		// with the same timestamp.
 		if (c.spkt.timestamp > lastts) {
 			lastts = c.spkt.timestamp;
 			lastc = 1;
@@ -123,6 +180,7 @@ bool FileSource::compute(int n, int b) {
 		}
 	}
 
+	// FIXME: Consider case of Channel::None
 	if (lastc != 2) {
 		LOG(ERROR) << "Channels not in sync (" << sourceid_ << "): " << lastts;
 		return false;
@@ -132,8 +190,8 @@ bool FileSource::compute(int n, int b) {
 
 	if (rgb_.empty() || depth_.empty()) return false;
 
-	auto cb = host_->callback();
-	if (cb) cb(timestamp_, rgb_, depth_);
+	// Inform about a decoded frame pair
+	host_->notify(timestamp_, rgb_, depth_);
     return true;
 }
 
diff --git a/components/rgbd-sources/src/sources/ftlfile/file_source.hpp b/components/rgbd-sources/src/sources/ftlfile/file_source.hpp
index 80f3b368b1ec30f5e5b1ebd9ac2f051ceba20901..2d59b68cb1e07d10aade07b33780af2497736a12 100644
--- a/components/rgbd-sources/src/sources/ftlfile/file_source.hpp
+++ b/components/rgbd-sources/src/sources/ftlfile/file_source.hpp
@@ -44,6 +44,8 @@ class FileSource : public detail::Source {
 
 	bool realtime_;
 
+	void _processCalibration(ftl::codecs::Packet &pkt);
+	void _processPose(ftl::codecs::Packet &pkt);
 	void _removeChannel(ftl::codecs::Channel channel);
 	void _createDecoder(int ix, const ftl::codecs::Packet &pkt);
 };
diff --git a/components/rgbd-sources/src/sources/middlebury/middlebury_source.cpp b/components/rgbd-sources/src/sources/middlebury/middlebury_source.cpp
index e82167fdcf6b4e2bfd1a38a00b50e3cdceeb2aef..d152d25e5a7d8830e8b555851f94d97b70463e29 100644
--- a/components/rgbd-sources/src/sources/middlebury/middlebury_source.cpp
+++ b/components/rgbd-sources/src/sources/middlebury/middlebury_source.cpp
@@ -129,30 +129,30 @@ MiddleburySource::MiddleburySource(ftl::rgbd::Source *host, const string &dir)
 	disp_->setMask(mask_l_);
 
 	// Load image files...
-	cv::Mat right_tmp;
-	rgb_ = cv::imread(dir+"/im0.png", cv::IMREAD_COLOR);
+	cv::Mat left_tmp, right_tmp;
+	left_tmp = cv::imread(dir+"/im0.png", cv::IMREAD_COLOR);
 	right_tmp = cv::imread(dir+"/im1.png", cv::IMREAD_COLOR);
 
-	cv::resize(rgb_, rgb_, cv::Size(params_.width, params_.height));
+	cv::resize(left_tmp, left_tmp, cv::Size(params_.width, params_.height));
 	cv::resize(right_tmp, right_tmp, cv::Size(params_.width, params_.height));
 
-	left_.upload(rgb_);
-	right_.upload(right_tmp);
+	rgb_.upload(left_tmp, stream_);
+	right_.upload(right_tmp, stream_);
 
 	_performDisparity();
 	ready_ = true;
 }
 
 void MiddleburySource::_performDisparity() {
-	if (depth_tmp_.empty()) depth_tmp_ = cv::cuda::GpuMat(left_.size(), CV_32FC1);
-	if (disp_tmp_.empty()) disp_tmp_ = cv::cuda::GpuMat(left_.size(), CV_32FC1);
+	depth_.create(left_.size(), CV_32FC1);
+	disp_tmp_.create(left_.size(), CV_32FC1);
 	//calib_->rectifyStereo(left_, right_, stream_);
-	disp_->compute(left_, right_, disp_tmp_, stream_);
+	disp_->compute(rgb_, right_, disp_tmp_, stream_);
 	//disparityToDepth(disp_tmp_, depth_tmp_, params_, stream_);
-	ftl::cuda::disparity_to_depth(disp_tmp_, depth_tmp_, params_, stream_);
+	ftl::cuda::disparity_to_depth(disp_tmp_, depth_, params_, stream_);
 	//left_.download(rgb_, stream_);
 	//rgb_ = lsrc_->cachedLeft();
-	depth_tmp_.download(depth_, stream_);
+	//depth_tmp_.download(depth_, stream_);
 
 	stream_.waitForCompletion();
 
diff --git a/components/rgbd-sources/src/sources/net/net.cpp b/components/rgbd-sources/src/sources/net/net.cpp
index 5956114d57bc2b9bc0a0b10bee48fd7a29d6b470..e4073536a574255965de81ab5f2294e008695032 100644
--- a/components/rgbd-sources/src/sources/net/net.cpp
+++ b/components/rgbd-sources/src/sources/net/net.cpp
@@ -52,8 +52,8 @@ NetFrame &NetFrameQueue::getFrame(int64_t ts, const cv::Size &s, int c1type, int
 			f.chunk_total[1] = 0;
 			f.channel_count = 0;
 			f.tx_size = 0;
-			f.channel1.create(s, c1type);
-			f.channel2.create(s, c2type);
+			f.channel[0].create(s, c1type);
+			f.channel[1].create(s, c2type);
 			return f;
 		}
 		oldest = (f.timestamp < oldest) ? f.timestamp : oldest;
@@ -72,8 +72,8 @@ NetFrame &NetFrameQueue::getFrame(int64_t ts, const cv::Size &s, int c1type, int
 			f.chunk_total[1] = 0;
 			f.channel_count = 0;
 			f.tx_size = 0;
-			f.channel1.create(s, c1type);
-			f.channel2.create(s, c2type);
+			f.channel[0].create(s, c1type);
+			f.channel[1].create(s, c2type);
 			return f;
 		}
 	}
@@ -90,55 +90,6 @@ void NetFrameQueue::freeFrame(NetFrame &f) {
 
 // ===== NetSource =============================================================
 
-bool NetSource::_getCalibration(Universe &net, const UUID &peer, const string &src, ftl::rgbd::Camera &p, ftl::codecs::Channel chan) {
-	try {
-		while(true) {
-			auto [cap,buf] = net.call<tuple<unsigned int,vector<unsigned char>>>(peer_, "source_details", src, chan);
-
-			capabilities_ = cap;
-
-			if (buf.size() > 0) {
-				memcpy((char*)&p, buf.data(), buf.size());
-				
-				if (sizeof(p) != buf.size()) {
-					LOG(ERROR) << "Corrupted calibration";
-					return false;
-				}
-
-				LOG(INFO) << "Calibration received: " << p.cx << ", " << p.cy << ", " << p.fx << ", " << p.fy;
-
-				if (chan == Channel::Left) {
-					// Put calibration into config manually
-					host_->getConfig()["focal"] = p.fx;
-					host_->getConfig()["centre_x"] = p.cx;
-					host_->getConfig()["centre_y"] = p.cy;
-					host_->getConfig()["baseline"] = p.baseline;
-					host_->getConfig()["doffs"] = p.doffs;
-				} else {
-					host_->getConfig()["focal_right"] = p.fx;
-					host_->getConfig()["centre_x_right"] = p.cx;
-					host_->getConfig()["centre_y_right"] = p.cy;
-					host_->getConfig()["baseline_right"] = p.baseline;
-					host_->getConfig()["doffs_right"] = p.doffs;
-				}
-				
-				return true;
-			} else {
-				LOG(INFO) << "Could not get calibration, retrying";
-				sleep_for(milliseconds(500));
-			}
-		}
-		
-	} catch (const std::exception& ex) {
-		LOG(ERROR) << "Exception: " << ex.what();
-		return false;
-
-	} catch (...) {
-		LOG(ERROR) << "Unknown exception";
-		return false;
-	}
-}
-
 NetSource::NetSource(ftl::rgbd::Source *host)
 		: ftl::rgbd::detail::Source(host), active_(false), minB_(9), maxN_(1), adaptive_(0), queue_(3) {
 
@@ -147,9 +98,10 @@ NetSource::NetSource(ftl::rgbd::Source *host)
 	default_quality_ = host->value("quality", 0);
 	last_bitrate_ = 0;
 	params_right_.width = 0;
+	has_calibration_ = false;
 
-	decoder_c1_ = nullptr;
-	decoder_c2_ = nullptr;
+	decoder_[0] = nullptr;
+	decoder_[1] = nullptr;
 
 	host->on("gamma", [this,host](const ftl::config::Event&) {
 		gamma_ = host->value("gamma", 1.0f);
@@ -221,8 +173,8 @@ NetSource::NetSource(ftl::rgbd::Source *host)
 }
 
 NetSource::~NetSource() {
-	if (decoder_c1_) ftl::codecs::free(decoder_c1_);
-	if (decoder_c2_) ftl::codecs::free(decoder_c2_);
+	if (decoder_[0]) ftl::codecs::free(decoder_[0]);
+	if (decoder_[1]) ftl::codecs::free(decoder_[1]);
 
 	if (uri_.size() > 0) {
 		host_->getNet()->unbind(uri_);
@@ -260,22 +212,47 @@ NetSource::~NetSource() {
 
 void NetSource::_createDecoder(int chan, const ftl::codecs::Packet &pkt) {
 	UNIQUE_LOCK(mutex_,lk);
-	auto *decoder = (chan == 0) ? decoder_c1_ : decoder_c2_;
+	auto *decoder = decoder_[chan];
 	if (decoder) {
 		if (!decoder->accepts(pkt)) {
-			ftl::codecs::free((chan == 0) ? decoder_c1_ : decoder_c2_);
+			ftl::codecs::free(decoder_[chan]);
 		} else {
 			return;
 		}
 	}
 
-	if (chan == 0) {
-		decoder_c1_ = ftl::codecs::allocateDecoder(pkt);
+	decoder_[chan] = ftl::codecs::allocateDecoder(pkt);
+}
+
+void NetSource::_processConfig(const ftl::codecs::Packet &pkt) {
+	std::tuple<std::string, std::string> cfg;
+	auto unpacked = msgpack::unpack((const char*)pkt.data.data(), pkt.data.size());
+	unpacked.get().convert(cfg);
+
+	//LOG(INFO) << "Config Received: " << std::get<1>(cfg);
+	// TODO: This needs to be put in safer / better location
+	host_->set(std::get<0>(cfg), nlohmann::json::parse(std::get<1>(cfg)));
+}
+
+void NetSource::_processCalibration(const ftl::codecs::Packet &pkt) {
+	std::tuple<ftl::rgbd::Camera, ftl::codecs::Channel, ftl::rgbd::capability_t> params;
+	auto unpacked = msgpack::unpack((const char*)pkt.data.data(), pkt.data.size());
+	unpacked.get().convert(params);
+
+	if (std::get<1>(params) == Channel::Left) {
+		params_ = std::get<0>(params);
+		capabilities_ = std::get<2>(params);
+		has_calibration_ = true;
+		LOG(INFO) << "Got Calibration channel: " << params_.width << "x" << params_.height;
 	} else {
-		decoder_c2_ = ftl::codecs::allocateDecoder(pkt);
+		params_right_ = std::get<0>(params);
 	}
 }
 
+void NetSource::_processPose(const ftl::codecs::Packet &pkt) {
+	LOG(INFO) << "Got POSE channel";
+}
+
 void NetSource::_recvPacket(short ttimeoff, const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet &pkt) {
 	// Capture time here for better net latency estimate
 	int64_t now = std::chrono::time_point_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now()).time_since_epoch().count();
@@ -288,6 +265,18 @@ void NetSource::_recvPacket(short ttimeoff, const ftl::codecs::StreamPacket &spk
 	const ftl::codecs::Channel rchan = spkt.channel;
 	const int channum = (rchan == Channel::Colour) ? 0 : 1;
 
+	// Deal with the special channels...
+	switch (rchan) {
+	case Channel::Configuration		: _processConfig(pkt); return;
+	case Channel::Calibration		: _processCalibration(pkt); return;
+	case Channel::Pose				: _processPose(pkt); return;
+	}
+
+	if (!has_calibration_) {
+		LOG(WARNING) << "Missing calibration, skipping frame";
+		return;
+	}
+
 	NetFrame &frame = queue_.getFrame(spkt.timestamp, cv::Size(params_.width, params_.height), CV_8UC3, (isFloatChannel(chan) ? CV_32FC1 : CV_8UC3));
 
 	// Update frame statistics
@@ -296,19 +285,19 @@ void NetSource::_recvPacket(short ttimeoff, const ftl::codecs::StreamPacket &spk
 	// Only decode if this channel is wanted.
 	if (rchan == Channel::Colour || rchan == chan) {
 		_createDecoder(channum, pkt);
-		auto *decoder = (rchan == Channel::Colour) ? decoder_c1_ : decoder_c2_;
+		auto *decoder = decoder_[channum];
 		if (!decoder) {
 			LOG(ERROR) << "No frame decoder available";
 			return;
 		}
 
-		decoder->decode(pkt, (rchan == Channel::Colour) ? frame.channel1 : frame.channel2);
+		decoder->decode(pkt, frame.channel[channum]);
 	} else if (chan != Channel::None && rchan != Channel::Colour) {
 		// Didn't receive correct second channel so just clear the images
 		if (isFloatChannel(chan)) {
-			frame.channel2.setTo(cv::Scalar(0.0f));
+			frame.channel[1].setTo(cv::Scalar(0.0f));
 		} else {
-			frame.channel2.setTo(cv::Scalar(0,0,0));
+			frame.channel[1].setTo(cv::Scalar(0,0,0));
 		}
 	}
 
@@ -328,48 +317,39 @@ void NetSource::_recvPacket(short ttimeoff, const ftl::codecs::StreamPacket &spk
 	}		
 
 	++frame.chunk_count[channum];
-	++frame.channel_count;
-
+	if (frame.chunk_count[channum] == frame.chunk_total[channum]) ++frame.channel_count;
 	if (frame.chunk_count[channum] > frame.chunk_total[channum]) LOG(FATAL) << "TOO MANY CHUNKS";
 
 	// Capture tx time of first received chunk
-	if (frame.channel_count == 1 && frame.chunk_count[channum] == 1) {
+	// FIXME: This seems broken
+	if (channum == 1 && frame.chunk_count[channum] == 1) {
 		UNIQUE_LOCK(frame.mtx, flk);
 		if (frame.chunk_count[channum] == 1) {
 			frame.tx_latency = int64_t(ttimeoff);
 		}
 	}
 
-	// Last chunk of both channels now received
-	if (frame.channel_count == spkt.channel_count &&
-			frame.chunk_count[0] == frame.chunk_total[0] &&
-			frame.chunk_count[1] == frame.chunk_total[1]) {
-		UNIQUE_LOCK(frame.mtx, flk);
+	// Last chunk of both channels now received, so we are done.
+	if (frame.channel_count == spkt.channel_count) {
+		_completeFrame(frame, now-(spkt.timestamp+frame.tx_latency));
+	}
+}
 
-		if (frame.timestamp >= 0 && frame.chunk_count[0] == frame.chunk_total[0] && frame.chunk_count[1] == frame.chunk_total[1]) {
-			timestamp_ = frame.timestamp;
-			frame.tx_latency = now-(spkt.timestamp+frame.tx_latency);
-
-			adaptive_ = abr_.selectBitrate(frame);
-			//LOG(INFO) << "Frame finished: " << frame.timestamp;
-			auto cb = host_->callback();
-			if (cb) {
-				try {
-					cb(frame.timestamp, frame.channel1, frame.channel2);
-				} catch (...) {
-					LOG(ERROR) << "Exception in net frame callback";
-				}
-			} else {
-				LOG(ERROR) << "NO FRAME CALLBACK";
-			}
+void NetSource::_completeFrame(NetFrame &frame, int64_t latency) {
+	UNIQUE_LOCK(frame.mtx, flk);
 
-			queue_.freeFrame(frame);
+	// Frame must not have already been freed.
+	if (frame.timestamp >= 0) {
+		timestamp_ = frame.timestamp;
+		frame.tx_latency = latency;
 
-			{
-				// Decrement expected frame counter
-				N_--;
-			}
-		}
+		// Note: Not used currently
+		adaptive_ = abr_.selectBitrate(frame);
+
+		host_->notify(frame.timestamp, frame.channel[0], frame.channel[1]);
+
+		queue_.freeFrame(frame);
+		N_--;
 	}
 }
 
@@ -389,12 +369,6 @@ void NetSource::setPose(const Eigen::Matrix4d &pose) {
 
 ftl::rgbd::Camera NetSource::parameters(ftl::codecs::Channel chan) {
 	if (chan == ftl::codecs::Channel::Right) {
-		if (params_right_.width == 0) {
-			auto uri = host_->get<string>("uri");
-			if (!uri) return params_;
-
-			_getCalibration(*host_->getNet(), peer_, *uri, params_right_, chan);
-		}
 		return params_right_;
 	} else {
 		return params_;
@@ -419,29 +393,11 @@ void NetSource::_updateURI() {
 		}
 		peer_ = *p;
 
-		has_calibration_ = _getCalibration(*host_->getNet(), peer_, *uri, params_, ftl::codecs::Channel::Left);
-		_getCalibration(*host_->getNet(), peer_, *uri, params_right_, ftl::codecs::Channel::Right);
-
 		host_->getNet()->bind(*uri, [this](short ttimeoff, const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet &pkt) {
-			//if (chunk == -1) {
-				//#ifdef HAVE_NVPIPE
-				//_recvVideo(frame, ttimeoff, bitrate, jpg, d);
-				//#else
-				//LOG(ERROR) << "Cannot receive HEVC, no NvPipe support";
-				//#endif
-			//} else {
-				//_recvChunk(frame, ttimeoff, bitrate, chunk, jpg, d);
-				_recvPacket(ttimeoff, spkt, pkt);
-			//}
+			_recvPacket(ttimeoff, spkt, pkt);
 		});
 
 		N_ = 0;
-
-		rgb_ = cv::Mat(cv::Size(params_.width, params_.height), CV_8UC3, cv::Scalar(0,0,0));
-		depth_ = cv::Mat(cv::Size(params_.width, params_.height), CV_32FC1, 0.0f);
-		//d_rgb_ = cv::Mat(cv::Size(params_.width, params_.height), CV_8UC3, cv::Scalar(0,0,0));
-		//d_depth_ = cv::Mat(cv::Size(params_.width, params_.height), CV_32FC1, 0.0f);
-
 		uri_ = *uri;
 		active_ = true;
 	} else {
@@ -466,9 +422,9 @@ bool NetSource::compute(int n, int b) {
 
 		// Verify depth destination is of required type
 		if (isFloatChannel(chan) && depth_.type() != CV_32F) {
-			depth_ = cv::Mat(cv::Size(params_.width, params_.height), CV_32FC1, 0.0f);
+			depth_.create(cv::Size(params_.width, params_.height), CV_32FC1);  // 0.0f
 		} else if (!isFloatChannel(chan) && depth_.type() != CV_8UC3) {
-			depth_ = cv::Mat(cv::Size(params_.width, params_.height), CV_8UC3, cv::Scalar(0,0,0));
+			depth_.create(cv::Size(params_.width, params_.height), CV_8UC3);  // cv::Scalar(0,0,0)
 		}
 
 		if (prev_chan_ != chan) {
@@ -491,5 +447,5 @@ bool NetSource::compute(int n, int b) {
 }
 
 bool NetSource::isReady() {
-	return has_calibration_ && !rgb_.empty();
+	return has_calibration_;
 }
diff --git a/components/rgbd-sources/src/sources/net/net.hpp b/components/rgbd-sources/src/sources/net/net.hpp
index 469902a5709f1ce9674d4fcafb0b36b957b5f7a4..5cef2726d2cdc5c34c161a74b25d45234f55ce48 100644
--- a/components/rgbd-sources/src/sources/net/net.hpp
+++ b/components/rgbd-sources/src/sources/net/net.hpp
@@ -68,8 +68,7 @@ class NetSource : public detail::Source {
 	//NvPipe *nv_channel2_decoder_;
 	//#endif
 
-	ftl::codecs::Decoder *decoder_c1_;
-	ftl::codecs::Decoder *decoder_c2_;
+	ftl::codecs::Decoder *decoder_[2];
 
 	// Adaptive bitrate functionality
 	ftl::rgbd::detail::bitrate_t adaptive_;	 // Current adapted bitrate
@@ -86,6 +85,10 @@ class NetSource : public detail::Source {
 	void _updateURI();
 	//void _checkAdaptive(int64_t);
 	void _createDecoder(int chan, const ftl::codecs::Packet &);
+	void _completeFrame(NetFrame &frame, int64_t now);
+	void _processCalibration(const ftl::codecs::Packet &pkt);
+	void _processConfig(const ftl::codecs::Packet &pkt);
+	void _processPose(const ftl::codecs::Packet &pkt);
 };
 
 }
diff --git a/components/rgbd-sources/src/sources/realsense/realsense_source.cpp b/components/rgbd-sources/src/sources/realsense/realsense_source.cpp
index b458aa3e77c8557ee828a8f71431bb5e4e665066..2b55356c853cdafc2d6aa3be523e07376e7ac0f8 100644
--- a/components/rgbd-sources/src/sources/realsense/realsense_source.cpp
+++ b/components/rgbd-sources/src/sources/realsense/realsense_source.cpp
@@ -54,9 +54,11 @@ bool RealsenseSource::compute(int n, int b) {
     float h = depth.get_height();
     rscolour_ = frames.first(RS2_STREAM_COLOR); //.get_color_frame();
 
-    cv::Mat tmp(cv::Size((int)w, (int)h), CV_16UC1, (void*)depth.get_data(), depth.get_stride_in_bytes());
-    tmp.convertTo(depth_, CV_32FC1, scale_);
-    rgb_ = cv::Mat(cv::Size(w, h), CV_8UC4, (void*)rscolour_.get_data(), cv::Mat::AUTO_STEP);
+    cv::Mat tmp_depth(cv::Size((int)w, (int)h), CV_16UC1, (void*)depth.get_data(), depth.get_stride_in_bytes());
+    tmp_depth.convertTo(tmp_depth, CV_32FC1, scale_);
+	depth_.upload(tmp_depth);
+    cv::Mat tmp_rgb(cv::Size(w, h), CV_8UC4, (void*)rscolour_.get_data(), cv::Mat::AUTO_STEP);
+	rgb_.upload(tmp_rgb);
 
 	auto cb = host_->callback();
 	if (cb) cb(timestamp_, rgb_, depth_);
diff --git a/components/rgbd-sources/src/sources/snapshot/snapshot_source.cpp b/components/rgbd-sources/src/sources/snapshot/snapshot_source.cpp
index 73db6b86c3861cd0aa1105f4d9ff7dcd9cbe9fe3..136a2e7dfcc7b279228cdd5c6efc0bfb8d303baa 100644
--- a/components/rgbd-sources/src/sources/snapshot/snapshot_source.cpp
+++ b/components/rgbd-sources/src/sources/snapshot/snapshot_source.cpp
@@ -59,8 +59,10 @@ bool SnapshotSource::compute(int n, int b) {
 	snapshot_.getLeftRGB(camera_idx_, frame_idx_, snap_rgb_);
 	snapshot_.getLeftDepth(camera_idx_, frame_idx_, snap_depth_);
 
-	snap_rgb_.copyTo(rgb_);
-	snap_depth_.copyTo(depth_);
+	//snap_rgb_.copyTo(rgb_);
+	//snap_depth_.copyTo(depth_);
+	rgb_.upload(snap_rgb_);
+	depth_.upload(snap_depth_);
 
 	auto cb = host_->callback();
 	if (cb) cb(timestamp_, rgb_, depth_);
diff --git a/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp b/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp
index b84385fcecb3a8c00a94398d998d872c68929951..f8e08a9f6d1f7670adeb8be9310129e8353daf1b 100644
--- a/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp
+++ b/components/rgbd-sources/src/sources/stereovideo/stereovideo.cpp
@@ -231,21 +231,24 @@ bool StereoVideoSource::compute(int n, int b) {
 
 		ftl::cuda::disparity_to_depth(disp, depth, params_, stream_);
 		
-		left.download(rgb_, stream_);
-		depth.download(depth_, stream_);
+		//left.download(rgb_, stream_);
+		//depth.download(depth_, stream_);
 		//frame.download(Channel::Left + Channel::Depth);
-		stream_.waitForCompletion();  // TODO:(Nick) Move to getFrames
+		stream_.waitForCompletion();
+		host_->notify(timestamp_, left, depth);
 	} else if (chan == Channel::Right) {
-		left.download(rgb_, stream_);
-		right.download(depth_, stream_);
+		//left.download(rgb_, stream_);
+		//right.download(depth_, stream_);
 		stream_.waitForCompletion();  // TODO:(Nick) Move to getFrames
+		host_->notify(timestamp_, left, right);
 	} else {
-		left.download(rgb_, stream_);
+		//left.download(rgb_, stream_);
 		stream_.waitForCompletion();  // TODO:(Nick) Move to getFrames
+		//LOG(INFO) << "NO SECOND CHANNEL: " << (bool)depth_.empty();
+		depth_.create(left.size(), left.type());
+		host_->notify(timestamp_, left, depth_);
 	}
 
-	auto cb = host_->callback();
-	if (cb) cb(timestamp_, rgb_, depth_);
 	return true;
 }
 
diff --git a/components/rgbd-sources/src/sources/virtual/virtual.cpp b/components/rgbd-sources/src/sources/virtual/virtual.cpp
index 17ce33638fbc1cfc934beb93243399cf93151118..a1d1040c65caac8833397d39ef3949c7778cdebf 100644
--- a/components/rgbd-sources/src/sources/virtual/virtual.cpp
+++ b/components/rgbd-sources/src/sources/virtual/virtual.cpp
@@ -7,8 +7,17 @@ using ftl::rgbd::Camera;
 
 class VirtualImpl : public ftl::rgbd::detail::Source {
 	public:
-	explicit VirtualImpl(ftl::rgbd::Source *host, const ftl::rgbd::Camera &params) : ftl::rgbd::detail::Source(host) {
-		params_ = params;
+	explicit VirtualImpl(ftl::rgbd::Source *host) : ftl::rgbd::detail::Source(host) {
+		params_.width = host->value("width", 1280);
+		params_.height = host->value("height", 720);
+		params_.fx = host->value("focal", 700.0f);
+		params_.fy = params_.fx;
+		params_.cx = -(double)params_.width / 2.0;
+		params_.cy = -(double)params_.height / 2.0;
+		params_.minDepth = host->value("minDepth", 0.1f);
+		params_.maxDepth = host->value("maxDepth", 20.0f);
+		params_.doffs = 0;
+		params_.baseline = host->value("baseline", 0.0f);
 
 		params_right_.width = host->value("width", 1280);
 		params_right_.height = host->value("height", 720);
@@ -82,20 +91,19 @@ class VirtualImpl : public ftl::rgbd::detail::Source {
 			}
 
 			if (frame.hasChannel(Channel::Colour)) {
-				frame.download(Channel::Colour);
-				cv::swap(frame.get<cv::Mat>(Channel::Colour), rgb_);	
+				//frame.download(Channel::Colour);
+				cv::cuda::swap(frame.get<cv::cuda::GpuMat>(Channel::Colour), rgb_);	
 			} else {
 				LOG(ERROR) << "Channel 1 frame in rendering";
 			}
 			
 			if ((host_->getChannel() != Channel::None) &&
 					frame.hasChannel(host_->getChannel())) {
-				frame.download(host_->getChannel());
-				cv::swap(frame.get<cv::Mat>(host_->getChannel()), depth_);
+				//frame.download(host_->getChannel());
+				cv::cuda::swap(frame.get<cv::cuda::GpuMat>(host_->getChannel()), depth_);
 			}
 
-			auto cb = host_->callback();
-			if (cb) cb(timestamp_, rgb_, depth_);
+			host_->notify(timestamp_, rgb_, depth_);
 		}
 		return true;
 	}
@@ -113,20 +121,7 @@ class VirtualImpl : public ftl::rgbd::detail::Source {
 };
 
 VirtualSource::VirtualSource(ftl::config::json_t &cfg) : Source(cfg) {
-	auto params = params_;
-
-	params_.width = value("width", 1280);
-	params_.height = value("height", 720);
-	params_.fx = value("focal", 700.0f);
-	params_.fy = params_.fx;
-	params_.cx = value("centre_x", -(double)params_.width / 2.0);
-	params_.cy = value("centre_y", -(double)params_.height / 2.0);
-	params_.minDepth = value("minDepth", 0.1f);
-	params_.maxDepth = value("maxDepth", 20.0f);
-	params_.doffs = 0;
-	params_.baseline = value("baseline", 0.0f);
-
-	impl_ = new VirtualImpl(this, params);
+	impl_ = new VirtualImpl(this);
 }
 
 VirtualSource::~VirtualSource() {
diff --git a/components/rgbd-sources/src/streamer.cpp b/components/rgbd-sources/src/streamer.cpp
index 939252d77a6704bf7c9c5a2202ec72a6c45562a8..e05e7faf9305ac0a3ede6dade21596bfe8251db7 100644
--- a/components/rgbd-sources/src/streamer.cpp
+++ b/components/rgbd-sources/src/streamer.cpp
@@ -45,6 +45,7 @@ Streamer::Streamer(nlohmann::json &config, Universe *net)
 
 	encode_mode_ = ftl::rgbd::kEncodeVideo;
 	hq_devices_ = (value("disable_hardware_encode", false)) ? device_t::Software : device_t::Any;
+	hq_codec_ = value("video_codec", ftl::codecs::codec_t::Any);
 
 	//group_.setFPS(value("fps", 20));
 	group_.setLatency(4);
@@ -187,6 +188,17 @@ void Streamer::add(Source *src) {
 		sources_[src->getID()] = s;
 
 		group_.addSource(src);
+
+		src->addRawCallback([this,s](Source *src, const ftl::codecs::StreamPacket &spkt, const ftl::codecs::Packet &pkt) {
+			if (spkt.channel == Channel::Calibration) {
+				// Calibration changed, so lets re-check the bitrate presets
+				const auto &params = src->parameters();
+				s->hq_bitrate = ftl::codecs::findPreset(params.width, params.height);
+			}
+
+			//LOG(INFO) << "RAW CALLBACK";
+			_transmitPacket(s, spkt, pkt, Quality::Any);
+		});
 	}
 
 	LOG(INFO) << "Streaming: " << src->getID();
@@ -322,6 +334,14 @@ void Streamer::_addClient(const string &source, int N, int rate, const ftl::UUID
 	}
 
 	++s->clientCount;
+
+	// Finally, inject calibration and config data
+	s->src->inject(Channel::Calibration, s->src->parameters(Channel::Left), Channel::Left, s->src->getCapabilities());
+	s->src->inject(Channel::Calibration, s->src->parameters(Channel::Right), Channel::Right, s->src->getCapabilities());
+	//s->src->inject(s->src->getPose());
+	//if (!(*s->src->get<nlohmann::json>("meta")).is_null()) {
+		s->src->inject(Channel::Configuration, "/original", s->src->getConfig().dump());
+	//}
 }
 
 void Streamer::remove(Source *) {
@@ -430,9 +450,9 @@ void Streamer::_process(ftl::rgbd::FrameSet &fs) {
 		// Do we need to do high quality encoding?
 		if (src->hq_count > 0) {
 			if (!src->hq_encoder_c1) src->hq_encoder_c1 = ftl::codecs::allocateEncoder(
-					definition_t::HD1080, hq_devices_);
+					definition_t::HD1080, hq_devices_, hq_codec_);
 			if (!src->hq_encoder_c2 && hasChan2) src->hq_encoder_c2 = ftl::codecs::allocateEncoder(
-					definition_t::HD1080, hq_devices_);
+					definition_t::HD1080, hq_devices_, hq_codec_);
 
 			// Do we have the resources to do a HQ encoding?
 			if (src->hq_encoder_c1 && (!hasChan2 || src->hq_encoder_c2)) {
@@ -448,7 +468,7 @@ void Streamer::_process(ftl::rgbd::FrameSet &fs) {
 
 					auto chan = fs.sources[j]->getChannel();
 
-					enc2->encode(fs.frames[j].get<cv::Mat>(chan), src->hq_bitrate, [this,src,hasChan2,chan](const ftl::codecs::Packet &blk){
+					enc2->encode(fs.frames[j].get<cv::cuda::GpuMat>(chan), src->hq_bitrate, [this,src,hasChan2,chan](const ftl::codecs::Packet &blk){
 						_transmitPacket(src, blk, chan, hasChan2, Quality::High);
 					});
 				} else {
@@ -457,7 +477,7 @@ void Streamer::_process(ftl::rgbd::FrameSet &fs) {
 
 				// TODO: Stagger the reset between nodes... random phasing
 				if (fs.timestamp % (10*ftl::timer::getInterval()) == 0) enc1->reset();
-				enc1->encode(fs.frames[j].get<cv::Mat>(Channel::Colour), src->hq_bitrate, [this,src,hasChan2](const ftl::codecs::Packet &blk){
+				enc1->encode(fs.frames[j].get<cv::cuda::GpuMat>(Channel::Colour), src->hq_bitrate, [this,src,hasChan2](const ftl::codecs::Packet &blk){
 					_transmitPacket(src, blk, Channel::Colour, hasChan2, Quality::High);
 				});
 			}
@@ -480,14 +500,14 @@ void Streamer::_process(ftl::rgbd::FrameSet &fs) {
 				if (hasChan2) {
 					auto chan = fs.sources[j]->getChannel();
 
-					enc2->encode(fs.frames[j].get<cv::Mat>(chan), src->lq_bitrate, [this,src,hasChan2,chan](const ftl::codecs::Packet &blk){
+					enc2->encode(fs.frames[j].get<cv::cuda::GpuMat>(chan), src->lq_bitrate, [this,src,hasChan2,chan](const ftl::codecs::Packet &blk){
 						_transmitPacket(src, blk, chan, hasChan2, Quality::Low);
 					});
 				} else {
 					if (enc2) enc2->reset();
 				}
 
-				enc1->encode(fs.frames[j].get<cv::Mat>(Channel::Colour), src->lq_bitrate, [this,src,hasChan2](const ftl::codecs::Packet &blk){
+				enc1->encode(fs.frames[j].get<cv::cuda::GpuMat>(Channel::Colour), src->lq_bitrate, [this,src,hasChan2](const ftl::codecs::Packet &blk){
 					_transmitPacket(src, blk, Channel::Colour, hasChan2, Quality::Low);
 				});
 			}
diff --git a/python/.gitignore b/python/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..feae5c1921703133caa7d3cb82a79e2be6e85ab6
--- /dev/null
+++ b/python/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+__pycache__
\ No newline at end of file
diff --git a/python/README.md b/python/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a2918492f1622494b35cf1c965cb306efa0b2524
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,89 @@
+Python support for `.ftl` files. At the moment, only reading RGB channels
+(left/right) supported. Non-parallel decoding of 8 streams has a frame rate
+of ~15 fps on i7-9700K.
+
+Required **Python** modules:
+
+ * msgpack 
+ * numpy
+ * skimage **or** OpenCV 
+
+Required libraries
+
+ * libde265 (available on most Linux distributions)
+
+## Example
+
+Example reads from `input.ftl` and writes to `output.ftl`. Calibration and
+pose are copied directly (same method can be used for other channels as well).
+The program copies left and right frames of source 0 to new file (and re-encodes
+them in JPG) when both frames are available.
+
+```python
+import ftl
+from ftl import types
+
+reader = ftl.FTLStreamReader("./input.ftl")
+writer = ftl.FTLStreamWriter("./output")
+
+source_id = 0
+fps = 25
+frame_t = int(1000.0/fps)
+timestamp_out = 0
+timestamp_in = 0
+
+im_left = None
+im_right = None
+
+while reader.read():
+    channel = reader.get_channel_type()
+    timestamp = reader.get_timestamp()
+    frame = reader.get_frame()
+    
+    if reader.get_source_id() != source_id:
+        # not interested in this source, skip
+        continue
+    
+    if channel in (types.Channel.Calibration, types.Channel.Configuration):
+        # copy calibration and pose (but replace timestamp with new value)
+        
+        sp, p = reader.get_raw()
+        sp = sp._replace(timestamp=timestamp_out)
+        writer.add_raw(sp, p)
+        continue
+
+    if channel not in (types.Channel.Left, types.Channel.Right):
+        # only interested in left and right frame
+        continue
+
+    if frame is None:
+        # no frame if decoding failed
+        continue
+
+    if timestamp_in != timestamp:
+        # new timestamp, process available frames
+
+        if not (im_left is None or im_right is None):
+            # save frames only if both of them were found for this timestamp
+            
+            # Note: In this expample channel is re-encoded. If channel content
+            # is not modified, lossy channels should be passed directly
+            # (using add_raw() in same way as for calibration/pose) instead of 
+            # re-encoding them.
+            
+            writer.add_frame(timestamp_out, 0, types.Channel.Left, 2,
+                             types.codec_t.JPG, im_left)
+            writer.add_frame(timestamp_out, 0, types.Channel.Right, 2,
+                             types.codec_t.JPG, im_right)
+        
+        
+        timestamp_out += frame_t
+        timestamp_in = timestamp
+        im_left, im_right = None, None
+
+    if channel is types.Channel.Left:
+        im_left = frame
+    else:
+        im_right = frame
+
+```
diff --git a/python/ftl/.gitignore b/python/ftl/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..a295864e354f25c935ea5453ab7ec97813b11bd2
--- /dev/null
+++ b/python/ftl/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+__pycache__
diff --git a/python/ftl/__init__.py b/python/ftl/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..38e017612ea53ec9e65417aa9a56729c8f10baf0
--- /dev/null
+++ b/python/ftl/__init__.py
@@ -0,0 +1,4 @@
+from . ftlstream import FTLStreamReader, FTLStreamWriter
+from . misc import disparity_to_depth
+
+from . import ftltypes as types
diff --git a/python/ftl/ftlstream.py b/python/ftl/ftlstream.py
new file mode 100644
index 0000000000000000000000000000000000000000..6031e757d39b915f76db997952f17a3d77ffebd0
--- /dev/null
+++ b/python/ftl/ftlstream.py
@@ -0,0 +1,356 @@
+import msgpack
+
+import numpy as np
+
+import sys
+import struct
+from warnings import warn
+from enum import IntEnum
+from collections import namedtuple
+
+from . misc import is_iframe
+from . import ftltypes as ftl
+from . import libde265
+
+_calib_fmt = "@ddddIIdddd"
+
+try:
+    import cv2 as cv
+    
+    def _ycrcb2rgb(img):
+        return cv.cvtColor(img, cv.COLOR_YCrCb2RGB)
+    
+except ImportError:
+    warn("OpenCV not available. OpenCV required for full functionality.")
+
+    def _ycrcb2rgb(img):
+        ''' YCrCb to RGB, based on OpenCV documentation definition.
+        
+        Note: It seems this implementation is not perfectly equivalent to
+        OpenCV's
+        '''
+        
+        rgb = np.zeros(img.shape, np.float)
+
+        Y = img[:,:,0].astype(np.float)
+        Cr = img[:,:,1].astype(np.float)
+        Cb = img[:,:,2].astype(np.float)
+        delta = 128.0
+
+        rgb[:,:,0] = Y + 1.403 * (Cr - delta)
+        rgb[:,:,1] = Y - 0.714 * (Cr - delta) - 0.344 * (Cb - delta)
+        rgb[:,:,2] = Y + 1.773 * (Cb - delta)
+
+        return rgb.round().astype(np.uint8)
+
+class FTLStreamWriter:
+    def __init__(self, file):
+        self._file = open(file, "wb")
+        self._file.write(bytes(ord(c) for c in "FTLF")) # magic
+        self._file.write(bytes([2]))                    # version
+        self._file.write(bytes([0]*64))                 # reserved
+
+        self._packer = msgpack.Packer(strict_types=False, use_bin_type=True)
+
+    def __del__(self):
+        self.close()
+
+    def close(self):
+        self._file.close()
+
+    def add_raw(self, sp, p):
+        if len(sp) != len(ftl.StreamPacket._fields) or len(p) != len(ftl.Packet._fields):
+           raise ValueError("invalid input")
+        
+        self._file.write(self._packer.pack((sp, p)))
+        self._file.flush()
+
+    def add_frame(self, timestamp, source, channel, channel_count, codec,  data,
+                  definition=None, flags=0, encode=True):
+        ''' Write frame to file. If encode is False (data already encoded),
+        definition needs to be specified.
+        '''
+        
+        if source < 0:
+            raise ValueError("invalid source id")
+
+        if channel not in ftl.Channel:
+            raise ValueError("invalid channel")
+        
+        if codec not in ftl.codec_t:
+            raise ValueError("invalid codec")
+
+        if encode:
+            if definition is None:
+                definition = ftl.get_definition(data.shape)
+
+            if definition is None:
+                raise ValueError("unsupported resolution")
+            
+            if definition != ftl.get_definition(data.shape):
+                # todo: could replace definition or scale
+                raise ValueError("definition does not match frame resolution")
+
+            if codec == ftl.codec_t.PNG:
+                if ftl.is_float_channel(channel):
+                    # scaling always same (???)
+                    data = data.astype(np.float) / 1000.0
+                
+                params = [cv.IMWRITE_PNG_COMPRESSION, 9]
+                retval, data = cv.imencode(".png", data, params)
+                
+                if not retval:
+                    raise Exception("encoding error (PNG)")
+            
+            elif codec == ftl.codec_t.JPG:
+                params = []
+                retval, data = cv.imencode(".jpg", data, params)
+
+                if not retval:
+                    raise Exception("encoding error (JPG)")
+            
+            else:
+                raise ValueError("unsupported codec")
+
+            data = data.tobytes()
+        
+        if definition is None:
+            raise ValueError("definition required")
+
+        if not isinstance(data, bytes):
+            raise ValueError("expected bytes")
+
+        sp = ftl.StreamPacket(int(timestamp), int(source),
+                              int(channel_count), int(channel))
+        p = ftl.Packet(int(codec), int(definition), 1, 0, int(flags), data)
+
+        self.add_raw(sp, p)
+
+    def add_pose(self, timestamp, source, data):
+        if data.shape != (4, 4):
+            raise ValueError("invalid pose")
+
+        data.astype(np.float64).tobytes(order='F')
+        raise NotImplementedError("todo")
+
+    def add_calibration(self, timestamp, source, data):
+        struct.pack(_calib_fmt, *data)
+        raise NotImplementedError("todo")
+
+class FTLStreamReader:
+    ''' FTL file reader. '''
+    
+    def __init__(self, file):
+        self._file = open(file, "br")
+        self._version = 0
+
+        self._decoders_hevc = {}
+        self._seen_iframe = set()
+
+        self._frame = None
+        
+        # calibration and pose are cached
+        self._calibration = {}
+        self._pose = {}
+
+        try:
+            magic = self._file.read(5)
+            self._version = int(magic[4])
+            if magic[:4] != bytes(ord(c) for c in "FTLF"):
+                raise Exception("wrong magic")
+            
+            if self._version >= 2:
+                # first 64 bytes reserved
+                self._file.read(8*8)
+
+            self._unpacker = msgpack.Unpacker(self._file, raw=True, use_list=False)
+            
+        except Exception as ex:
+            self._file.close()
+            raise ex
+            
+        self._packets_read = 0
+    
+    def __del__(self):
+        self._file.close()
+    
+    def _read_next(self):
+        v1, v2 = self._unpacker.unpack()
+        return ftl.StreamPacket._make(v1), ftl.Packet._make(v2)
+    
+    def _update_calib(self, sp, p):
+        ''' Update calibration. '''
+        calibration = struct.unpack(_calib_fmt, p.data[:(4*8+2*4+4*8)])
+        self._calibration[sp.streamID] = ftl.Camera._make(calibration)
+
+    def _update_pose(self, sp, p):
+        ''' Update pose '''
+        pose = np.asarray(struct.unpack("@16d", p.data[:(16*8)]),
+                          dtype=np.float64)
+        pose = pose.reshape((4, 4), order='F') # Eigen
+        self._pose[sp.streamID] = pose
+    
+    def _process_json(self, sp, p):
+        raise NotImplementedError("json decoding not implemented")
+
+    def _decode_hevc(self, sp, p):
+        ''' Decode HEVC frame '''
+        
+        k = (sp.streamID, sp.channel)
+        
+        if k not in self._decoders_hevc:
+            self._decoders_hevc[k] = libde265.Decoder(ftl.definition_t[p.definition])
+        
+        decoder = self._decoders_hevc[k]
+
+        if k not in self._seen_iframe:
+            if not is_iframe(p.data):
+                # can't decode before first I-frame has been received
+                warn("received P-frame before I-frame")
+                return
+            
+            self._seen_iframe.add(k)
+        
+        decoder.push_data(p.data)
+        decoder.push_end_of_frame()
+        
+        while decoder.get_number_of_input_bytes_pending() > 0:
+            decoder.decode()
+        
+        img = decoder.get_next_picture()
+        if img is None:
+            # if this happens, does get_next_picture() in loop help?
+            warn("frame expected, no image from decoded")
+        
+        if ftl.is_float_channel(self._sp.channel):
+            raise NotImplementedError("non-color channel decoding not available")
+        
+        else:
+            self._frame = _ycrcb2rgb(img)
+
+    def _decode_opencv(self, sp, p):
+        try:
+            cv
+        except NameError:
+            raise Exception("OpenCV required for OpenCV (png/jpeg) decoding")
+
+        self._frame = cv.imdecode(np.frombuffer(p.data, dtype=np.uint8),
+                                  cv.IMREAD_UNCHANGED)
+
+        if ftl.is_float_channel(self._sp.channel):
+            self._frame = self._frame.astype(np.float) / 1000.0
+
+    def seek(self, ts):
+        ''' Read until timestamp reached '''
+        if self.get_timestamp() >= ts:
+            raise Exception("trying to seek to earlier timestamp")
+        
+        while self.read():
+            if self.get_timestamp() >= ts:
+                break
+
+    def read(self):
+        '''
+        Reads data for until the next timestamp. Returns False if there is no
+        more data to read, otherwise returns True.
+
+        todo: make decoding optional
+        '''
+        self._frame = None
+
+        try:
+            self._sp, self._p = self._read_next()
+            self._packets_read += 1
+        
+        except msgpack.OutOfData:
+            return False
+
+        if self._p.block_total != 1 or self._p.block_number != 0:
+            raise Exception("Unsupported block format (todo)")
+
+        if self._p.codec == ftl.codec_t.JSON:
+            self._process_json(self._sp, self._p)
+
+        elif self._p.codec == ftl.codec_t.CALIBRATION:
+            self._update_calib(self._sp, self._p)
+
+        elif self._p.codec == ftl.codec_t.POSE:
+            self._update_pose(self._sp, self._p)
+
+        elif self._p.codec == ftl.codec_t.HEVC:
+            self._decode_hevc(self._sp, self._p)
+
+        elif self._p.codec == ftl.codec_t.PNG:
+            self._decode_opencv(self._sp, self._p)
+
+        elif self._p.codec == ftl.codec_t.JPG:
+            self._decode_opencv(self._sp, self._p)
+
+        else:
+            raise Exception("unkowno codec %i" % self._p.codec)
+
+        return True
+
+    def get_packet_count(self):
+        return self._packets_read
+
+    def get_raw(self):
+        ''' Returns previously received StreamPacket and Packet '''
+        return self._sp, self._p
+
+    def get_channel_type(self):
+        return ftl.Channel(self._sp.channel)
+
+    def get_source_id(self):
+        return self._sp.streamID
+
+    def get_timestamp(self):
+        return self._sp.timestamp
+
+    def get_frame(self):
+        ''' Return decoded frame from previous packet. Returns None if previous
+        packet did not contain a (valid) frame. '''
+        return self._frame
+
+    def get_pose(self, source):
+        try:
+            return self._pose[source]
+        except KeyError:
+            raise ValueError("source id %i not found" % source)
+
+    def get_camera_matrix(self, source):
+        ''' Camera intrinsic parameters '''
+
+        calib = self.get_calibration(source)
+        K = np.identity(3, dtype=np.float64)
+        K[0,0] = calib.fx
+        K[1,1] = calib.fy
+        K[0,2] = calib.cx
+        K[1,2] = calib.cy
+        return K
+
+    def get_calibration(self, source):
+        try:
+            return self._calibration[source]
+        except KeyError:
+            raise ValueError("source id %i not found" % source)
+    
+    def get_Q(self, source):
+        ''' Disparity to depth matrix in OpenCV format '''
+
+        calib = self.get_calibration(source)
+        Q = np.identity(4, dtype=np.float64)
+        Q[0,3] = calib.cx
+        Q[1,3] = calib.cy
+        Q[2,2] = 0.0
+        Q[2,3] = calib.fx
+        Q[3,2] = -1 / calib.baseline
+        Q[3,3] = calib.doff
+        return Q
+
+    def get_sources(self):
+        ''' Get list of sources '''
+        return list(self._calibration.keys())
+
+    def get_version(self):
+        return self._version
diff --git a/python/ftl/ftltypes.py b/python/ftl/ftltypes.py
new file mode 100644
index 0000000000000000000000000000000000000000..17980b808fbadd68c1adff9f17a037a92878c27e
--- /dev/null
+++ b/python/ftl/ftltypes.py
@@ -0,0 +1,88 @@
+
+from collections import namedtuple
+from enum import IntEnum
+
+# components/rgbd-sources/include/ftl/rgbd/camera.hpp
+Camera = namedtuple("Camera", ["fx", "fy", "cx", "cy", "width", "height",
+                                "min_depth", "max_depth", "baseline", "doff"])
+
+# components/codecs/include/ftl/codecs/packet.hpp
+Packet = namedtuple("Packet", ["codec", "definition", "block_total",
+                               "block_number", "flags", "data"])
+
+StreamPacket = namedtuple("StreamPacket", ["timestamp", "streamID",
+                                           "channel_count", "channel"])
+
+# components/codecs/include/ftl/codecs/channels.hpp
+class Channel(IntEnum):
+    None_           = -1
+    Colour          = 0
+    Left            = 0
+    Depth           = 1
+    Right           = 2
+    Colour2         = 2
+    Disparity       = 3
+    Depth2          = 3
+    Deviation       = 4
+    Normals         = 5
+    Points          = 6
+    Confidence      = 7
+    Contribution    = 7
+    EnergyVector    = 8
+    Flow            = 9
+    Energy          = 10
+    Mask            = 11
+    Density         = 12
+    LeftGray        = 13
+    RightGray       = 14
+    Overlay1        = 15
+
+    AudioLeft       = 32
+    AudioRight      = 33
+
+    Configuration   = 64
+    Calibration     = 65
+    Pose            = 66
+    Data            = 67
+
+_float_channels = [
+    Channel.Depth,
+    Channel.Confidence,
+    Channel.Density,
+    Channel.Energy
+]
+
+def is_float_channel(channel):
+    return channel in _float_channels
+
+# components/codecs/include/ftl/codecs/bitrates.hpp
+class codec_t(IntEnum):
+    JPG = 0
+    PNG = 1
+    H264 = 2
+    HEVC = 3
+    WAV = 4
+    JSON = 100
+    CALIBRATION = 101
+    POSE = 102
+    RAW = 103
+
+definition_t = {
+    0 : (7680, 4320),
+    1 : (2160, 3840),
+    2 : (1080, 1920),
+    3 : (720, 1280),
+    4 : (576, 1024),
+    5 : (480, 854),
+    6 : (360, 640),
+    7 : (0, 0),
+    8 : (2056, 1852)
+}
+
+def get_definition(shape):
+	for k, v in definition_t.items():
+		if shape[:2] == v:
+			return k
+	
+	return 7 # (None)
+
diff --git a/python/ftl/libde265.py b/python/ftl/libde265.py
new file mode 100644
index 0000000000000000000000000000000000000000..20f2f250c0c7344bc81682e15ab6bf08965318b2
--- /dev/null
+++ b/python/ftl/libde265.py
@@ -0,0 +1,289 @@
+'''
+Python wrapper for libde265. Only decoding is (partly) implemented.
+
+Requirements:
+ * libde265 library (libde265.so.0)
+ * numpy
+ * opencv or skimage
+
+'''
+
+try:
+    import cv2 as cv
+    def _resize(img, size):
+        return cv.resize(img, dsize=tuple(reversed(size)), interpolation=cv.INTER_CUBIC)
+    
+except ImportError:
+    from skimage.transform import resize as resize_skimage
+    def _resize(img, size):
+        # skimage resize() return dtype float64, convert back to uint8
+        # order: 0 nn, 1 bilinear, 3 bicubic
+        return (resize_skimage(img, size, order=3, mode="constant", cval=0) * 255).astype(np.uint8)
+
+from warnings import warn
+
+import ctypes
+from enum import IntEnum
+
+import numpy as np
+
+import os 
+
+'''
+# default number of worker threads for decoder: half of os.cpu_count()
+
+_threads = os.cpu_count() // 2
+if _threads is None:
+    _threads = 1
+'''
+
+_threads = 1
+
+# error codes copied from header (de265.h)
+
+class _libde265error(IntEnum):
+    DE265_OK = 0
+    DE265_ERROR_NO_SUCH_FILE=1
+    DE265_ERROR_COEFFICIENT_OUT_OF_IMAGE_BOUNDS=4
+    DE265_ERROR_CHECKSUM_MISMATCH=5
+    DE265_ERROR_CTB_OUTSIDE_IMAGE_AREA=6
+    DE265_ERROR_OUT_OF_MEMORY=7
+    DE265_ERROR_CODED_PARAMETER_OUT_OF_RANGE=8
+    DE265_ERROR_IMAGE_BUFFER_FULL=9
+    DE265_ERROR_CANNOT_START_THREADPOOL=10
+    DE265_ERROR_LIBRARY_INITIALIZATION_FAILED=11
+    DE265_ERROR_LIBRARY_NOT_INITIALIZED=12
+    DE265_ERROR_WAITING_FOR_INPUT_DATA=13
+    DE265_ERROR_CANNOT_PROCESS_SEI=14
+    DE265_ERROR_PARAMETER_PARSING=15
+    DE265_ERROR_NO_INITIAL_SLICE_HEADER=16
+    DE265_ERROR_PREMATURE_END_OF_SLICE=17
+    DE265_ERROR_UNSPECIFIED_DECODING_ERROR=18
+    DE265_ERROR_NOT_IMPLEMENTED_YET = 502
+    DE265_WARNING_NO_WPP_CANNOT_USE_MULTITHREADING = 1000
+    DE265_WARNING_WARNING_BUFFER_FULL=1001
+    DE265_WARNING_PREMATURE_END_OF_SLICE_SEGMENT=1002
+    DE265_WARNING_INCORRECT_ENTRY_POINT_OFFSET=1003
+    DE265_WARNING_CTB_OUTSIDE_IMAGE_AREA=1004
+    DE265_WARNING_SPS_HEADER_INVALID=1005
+    DE265_WARNING_PPS_HEADER_INVALID=1006
+    DE265_WARNING_SLICEHEADER_INVALID=1007
+    DE265_WARNING_INCORRECT_MOTION_VECTOR_SCALING=1008
+    DE265_WARNING_NONEXISTING_PPS_REFERENCED=1009
+    DE265_WARNING_NONEXISTING_SPS_REFERENCED=1010
+    DE265_WARNING_BOTH_PREDFLAGS_ZERO=1011
+    DE265_WARNING_NONEXISTING_REFERENCE_PICTURE_ACCESSED=1012
+    DE265_WARNING_NUMMVP_NOT_EQUAL_TO_NUMMVQ=1013
+    DE265_WARNING_NUMBER_OF_SHORT_TERM_REF_PIC_SETS_OUT_OF_RANGE=1014
+    DE265_WARNING_SHORT_TERM_REF_PIC_SET_OUT_OF_RANGE=1015
+    DE265_WARNING_FAULTY_REFERENCE_PICTURE_LIST=1016
+    DE265_WARNING_EOSS_BIT_NOT_SET=1017
+    DE265_WARNING_MAX_NUM_REF_PICS_EXCEEDED=1018
+    DE265_WARNING_INVALID_CHROMA_FORMAT=1019
+    DE265_WARNING_SLICE_SEGMENT_ADDRESS_INVALID=1020
+    DE265_WARNING_DEPENDENT_SLICE_WITH_ADDRESS_ZERO=1021
+    DE265_WARNING_NUMBER_OF_THREADS_LIMITED_TO_MAXIMUM=1022
+    DE265_NON_EXISTING_LT_REFERENCE_CANDIDATE_IN_SLICE_HEADER=1023
+    DE265_WARNING_CANNOT_APPLY_SAO_OUT_OF_MEMORY=1024
+    DE265_WARNING_SPS_MISSING_CANNOT_DECODE_SEI=1025
+    DE265_WARNING_COLLOCATED_MOTION_VECTOR_OUTSIDE_IMAGE_AREA=1026
+
+class de265_chroma(IntEnum):
+    de265_chroma_mono = 0
+    de265_chroma_420 = 1
+    de265_chroma_422 = 2
+    de265_chroma_444 = 3
+
+libde265 = ctypes.cdll.LoadLibrary("libde265.so.0")
+
+libde265.de265_get_error_text.argtypes = [ctypes.c_void_p]
+libde265.de265_get_error_text.restype = ctypes.c_char_p
+
+libde265.de265_get_warning.argtypes = [ctypes.c_void_p]
+libde265.de265_get_warning.restype = ctypes.c_int
+
+libde265.de265_get_version_number_major.restype = ctypes.c_uint32
+libde265.de265_get_version_number_minor.restype = ctypes.c_uint32
+
+libde265.de265_new_decoder.restype = ctypes.c_void_p
+
+libde265.de265_free_decoder.argtypes = [ctypes.c_void_p]
+libde265.de265_free_decoder.restype = ctypes.c_int
+
+libde265.de265_start_worker_threads.argtypes = [ctypes.c_void_p, ctypes.c_int]
+libde265.de265_start_worker_threads.restype = ctypes.c_int
+
+libde265.de265_push_data.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_void_p]
+libde265.de265_push_data.restype = ctypes.c_int
+
+libde265.de265_push_NAL.argtypes = [ctypes.c_void_p, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_void_p]
+libde265.de265_push_data.restype = ctypes.c_int
+
+libde265.de265_push_end_of_frame.argtypes = [ctypes.c_void_p]
+libde265.de265_push_end_of_frame.restype = None
+
+libde265.de265_flush_data.argtypes = [ctypes.c_void_p]
+libde265.de265_flush_data.restype = ctypes.c_int
+
+libde265.de265_decode.argtypes = [ctypes.c_void_p, ctypes.POINTER(ctypes.c_int)]
+libde265.de265_decode.restype = ctypes.c_int
+
+libde265.de265_get_next_picture.argtypes = [ctypes.c_void_p]
+libde265.de265_get_next_picture.restype = ctypes.c_void_p
+
+libde265.de265_peek_next_picture.argtypes = [ctypes.c_void_p]
+libde265.de265_peek_next_picture.restype = ctypes.c_void_p
+
+libde265.de265_release_next_picture.argtypes = [ctypes.c_void_p]
+libde265.de265_release_next_picture.restype = None
+
+libde265.de265_get_chroma_format.argtypes = [ctypes.c_void_p]
+libde265.de265_get_chroma_format.restype = ctypes.c_int
+
+libde265.de265_get_image_width.argtypes = [ctypes.c_void_p, ctypes.c_int]
+libde265.de265_get_image_width.restype = ctypes.c_int
+
+libde265.de265_get_image_height.argtypes = [ctypes.c_void_p, ctypes.c_int]
+libde265.de265_get_image_height.restype = ctypes.c_int
+
+libde265.de265_get_bits_per_pixel.argtypes = [ctypes.c_void_p, ctypes.c_int]
+libde265.de265_get_bits_per_pixel.restype = ctypes.c_int
+
+libde265.de265_get_image_plane.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.POINTER(ctypes.c_int)]
+libde265.de265_get_image_plane.restype = ctypes.POINTER(ctypes.c_char)
+
+libde265.de265_get_number_of_input_bytes_pending.argtypes = [ctypes.c_void_p]
+libde265.de265_get_number_of_input_bytes_pending.restype = ctypes.c_int
+
+class libde265Error(Exception):
+    def __init__(self, code):
+        super(libde265Error, self).__init__(
+            libde265.de265_get_error_text(code).decode("ascii"))
+
+class WaitingForInput(libde265Error):
+    pass
+
+class Decoder:
+    def __init__(self, size, threads=_threads):
+        self._size = size
+        self._more = ctypes.c_int()
+        self._out_stride = ctypes.c_int()
+        self._ctx = libde265.de265_new_decoder()
+        self._supress_warnings = False
+
+        err = libde265.de265_start_worker_threads(self._ctx, threads)
+
+        if err:
+            raise libde265Error(err)
+        
+    def __del__(self):
+        libde265.de265_free_decoder(self._ctx)
+
+    def _copy_image(self, de265_image):
+        res = np.zeros((self._size[0], self._size[1], 3), dtype=np.uint8)
+
+        # libde265: always 420 (???)
+        # chroma_format = libde265.de265_get_chroma_format(de265_image)
+
+        for c in range(0, 3):
+            size = (libde265.de265_get_image_height(de265_image, c),
+                    libde265.de265_get_image_width(de265_image, c))
+            
+            bpp = libde265.de265_get_bits_per_pixel(de265_image, c)
+            if bpp != 8:
+                raise NotImplementedError("unsupported bits per pixel %i" % bpp)
+
+            img_ptr = libde265.de265_get_image_plane(de265_image, c, self._out_stride)
+            
+			# for frombuffer() no copy assumed
+            ch = np.frombuffer(img_ptr[:size[0] * size[1]], dtype=np.uint8)
+            ch.shape = size
+            
+            res[:,:,c] = _resize(ch, self._size)
+        
+        return res
+
+    def _warning(self):
+        if self._supress_warnings:
+            return
+        
+        code = libde265.de265_get_warning(self._ctx)
+        
+        if code != _libde265error.DE265_OK:
+            msg = libde265.de265_get_error_text(code).decode("ascii")
+            warn(msg)
+
+    def decode(self):
+        err = libde265.de265_decode(self._ctx, self._more)
+        
+        if err:
+            if err == _libde265error.DE265_ERROR_WAITING_FOR_INPUT_DATA:
+                raise WaitingForInput(err)
+
+            raise libde265Error(err)
+        
+        self._warning()
+
+        return self._more.value != 0
+    
+    def flush_data(self):
+        err = libde265.de265_flush_data(self._ctx)
+        
+        if err:
+            raise libde265Error(err)
+    
+    def push_data(self, data):
+        if not isinstance(data, bytes):
+            raise ValueError("expected bytes")
+        
+        err = libde265.de265_push_data(self._ctx, data, len(data), None, None)
+        
+        if err:
+            raise libde265Error(err)
+    
+    def push_end_of_frame(self):
+        err = libde265.de265_push_end_of_frame(self._ctx)
+        
+        if err:
+            raise libde265Error(err)
+            
+    def push_NAL(self, data):
+        if not isinstance(data, bytes):
+            raise ValueError("expected bytes")
+        
+        err = libde265.de265_push_NAL(self._ctx, data, len(data), None, None)
+        
+        if err:
+            raise libde265Error(err)
+
+    def get_next_picture(self):
+        '''
+        Returns next decoded frame. Image in YCbCr format. If no frame available
+        returns None.
+        '''
+
+        de265_image = libde265.de265_get_next_picture(self._ctx)
+
+        if not de265_image:
+            return None
+        
+        res = self._copy_image(de265_image)
+
+        libde265.de265_release_next_picture(self._ctx)
+
+        return res
+
+    def get_number_of_input_bytes_pending(self):
+        return libde265.de265_get_number_of_input_bytes_pending(self._ctx)
+
+    def peek_next_picture(self):
+        de265_image = libde265.de265_peek_next_picture(self._ctx)
+
+        if not de265_image:
+            return None
+        
+        res = self._copy_image(de265_image)
+
+        libde265.de265_release_next_picture(self._ctx)
+
+        return res
diff --git a/python/ftl/misc.py b/python/ftl/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..5494382ca1376c8bf96a9d30d7d863d8bfc9eff9
--- /dev/null
+++ b/python/ftl/misc.py
@@ -0,0 +1,97 @@
+
+def disparity_to_depth(disparity, camera, max_depth=10.0, invalid_value=0.0):
+    ''' Calculate depth map from disparity map. Depth values smaller than 0.0 
+	    and larger than max_depth are set to invalid_value.
+    '''
+    depth = (camera.fx * camera.baseline) / (disparity - camera.doff)
+    depth[depth < 0] = invalid_value
+    depth[depth > max_depth] = invalid_value
+    return depth
+
+from enum import IntEnum
+
+# components/codecs/include/ftl/codecs/hevc.hpp
+class NALType(IntEnum):
+    CODED_SLICE_TRAIL_N = 0
+    CODED_SLICE_TRAIL_R = 1
+
+    CODED_SLICE_TSA_N = 2
+    CODED_SLICE_TSA_R = 3
+
+    CODED_SLICE_STSA_N = 4
+    CODED_SLICE_STSA_R = 5
+
+    CODED_SLICE_RADL_N = 6
+    CODED_SLICE_RADL_R = 7
+
+    CODED_SLICE_RASL_N = 8
+    CODED_SLICE_RASL_R = 9
+
+    RESERVED_VCL_N10 = 10
+    RESERVED_VCL_R11 = 11
+    RESERVED_VCL_N12 = 12
+    RESERVED_VCL_R13 = 13
+    RESERVED_VCL_N14 = 14
+    RESERVED_VCL_R15 = 15
+
+    CODED_SLICE_BLA_W_LP = 16
+    CODED_SLICE_BLA_W_RADL = 17
+    CODED_SLICE_BLA_N_LP = 18
+    CODED_SLICE_IDR_W_RADL = 19
+    CODED_SLICE_IDR_N_LP = 20
+    CODED_SLICE_CRA = 21
+    RESERVED_IRAP_VCL22 = 22
+    RESERVED_IRAP_VCL23 = 23
+
+    RESERVED_VCL24 = 24
+    RESERVED_VCL25 = 25
+    RESERVED_VCL26 = 26
+    RESERVED_VCL27 = 27
+    RESERVED_VCL28 = 28
+    RESERVED_VCL29 = 29
+    RESERVED_VCL30 = 30
+    RESERVED_VCL31 = 31
+
+    VPS = 32
+    SPS = 33
+    PPS = 34
+    ACCESS_UNIT_DELIMITER = 35
+    EOS = 36
+    EOB = 37
+    FILLER_DATA = 38
+    PREFIX_SEI = 39
+    SUFFIX_SEI = 40
+
+    RESERVED_NVCL41 = 41
+    RESERVED_NVCL42 = 42
+    RESERVED_NVCL43 = 43
+    RESERVED_NVCL44 = 44
+    RESERVED_NVCL45 = 45
+    RESERVED_NVCL46 = 46
+    RESERVED_NVCL47 = 47
+    UNSPECIFIED_48 = 48
+    UNSPECIFIED_49 = 49
+    UNSPECIFIED_50 = 50
+    UNSPECIFIED_51 = 51
+    UNSPECIFIED_52 = 52
+    UNSPECIFIED_53 = 53
+    UNSPECIFIED_54 = 54
+    UNSPECIFIED_55 = 55
+    UNSPECIFIED_56 = 56
+    UNSPECIFIED_57 = 57
+    UNSPECIFIED_58 = 58
+    UNSPECIFIED_59 = 59
+    UNSPECIFIED_60 = 60
+    UNSPECIFIED_61 = 61
+    UNSPECIFIED_62 = 62
+    UNSPECIFIED_63 = 63
+    INVALID = 64
+
+def get_NAL_type(data):
+    if not isinstance(data, bytes):
+        raise ValueError("expected bytes")
+    
+    return NALType((data[4] >> 1) & 0x3f)
+
+def is_iframe(data):
+    return get_NAL_type(data) == NALType.VPS