diff --git a/SDK/CPP/private/frame_impl.cpp b/SDK/CPP/private/frame_impl.cpp
index 0b0779b6a2a2d23a3250a0cb917e8486e88af4b9..059b508fff3e7e7e3b40877685411ad890f0887f 100644
--- a/SDK/CPP/private/frame_impl.cpp
+++ b/SDK/CPP/private/frame_impl.cpp
@@ -15,9 +15,9 @@ FrameImpl::~FrameImpl()
 
 }
 
-std::list<voltu::ImagePtr> FrameImpl::getImageSet(voltu::Channel c)
+std::vector<voltu::ImagePtr> FrameImpl::getImageSet(voltu::Channel c)
 {
-	std::list<voltu::ImagePtr> result;
+	std::vector<voltu::ImagePtr> result;
 	ftl::codecs::Channel channel = ftl::codecs::Channel::Colour;
 
 	switch (c)
@@ -50,9 +50,9 @@ voltu::PointCloudPtr FrameImpl::getPointCloud(voltu::PointCloudFormat cloudfmt,
 	return nullptr;
 }
 
-std::vector<std::string> FrameImpl::getMessages()
+std::vector<std::vector<std::string>> FrameImpl::getMessages()
 {
-	std::vector<std::string> msgs;
+	std::vector<std::vector<std::string>> allmsgs;
 
 	for (const auto &fs : framesets_)
 	{
@@ -61,12 +61,13 @@ std::vector<std::string> FrameImpl::getMessages()
 			if (f.hasChannel(ftl::codecs::Channel::Messages))
 			{
 				const auto &m = f.get<std::vector<std::string>>(ftl::codecs::Channel::Messages);
+				auto &msgs = allmsgs.emplace_back();
 				msgs.insert(msgs.end(), m.begin(), m.end());
 			}
 		}
 	}
 
-	return msgs;
+	return allmsgs;
 }
 
 void FrameImpl::pushFrameSet(const std::shared_ptr<ftl::data::FrameSet> &fs)
diff --git a/SDK/CPP/private/frame_impl.hpp b/SDK/CPP/private/frame_impl.hpp
index 1b6074a8a6cc1c23dd9e317c09cf1a5596bd467c..38750ce4269b9bdaf67df41ba76f4f07a3d15233 100644
--- a/SDK/CPP/private/frame_impl.hpp
+++ b/SDK/CPP/private/frame_impl.hpp
@@ -16,11 +16,11 @@ public:
 	FrameImpl();
 	~FrameImpl() override;
 
-	std::list<voltu::ImagePtr> getImageSet(voltu::Channel) override;
+	std::vector<voltu::ImagePtr> getImageSet(voltu::Channel) override;
 
 	voltu::PointCloudPtr getPointCloud(voltu::PointCloudFormat cloudfmt, voltu::PointFormat pointfmt) override;
 
-	std::vector<std::string> getMessages() override;
+	std::vector<std::vector<std::string>> getMessages() override;
 
 	int64_t getTimestamp() override;
 
diff --git a/SDK/CPP/private/pipeline_impl.cpp b/SDK/CPP/private/pipeline_impl.cpp
index 114f73ad4b19a0bf8db3053bdc25b6260e63351c..7fbbc5131cc0f3d6edc67a5f413bac31ca2a9ba7 100644
--- a/SDK/CPP/private/pipeline_impl.cpp
+++ b/SDK/CPP/private/pipeline_impl.cpp
@@ -6,6 +6,8 @@
 #include <ftl/operators/fusion.hpp>
 #include <ftl/operators/gt_analysis.hpp>
 
+#include <loguru.hpp>
+
 using voltu::internal::PipelineImpl;
 
 PipelineImpl::PipelineImpl(ftl::Configurable *root)
@@ -28,7 +30,7 @@ void PipelineImpl::submit(const voltu::FramePtr &frame)
 
 	const auto &sets = fimp->getInternalFrameSets();
 
-	if (sets.size() > 1) throw voltu::exceptions::IncompatibleOperation();
+	if (sets.size() > 1 || sets.size() == 0) throw voltu::exceptions::IncompatibleOperation();
 
 	for (const auto &fs : sets)
 	{
@@ -40,10 +42,12 @@ void PipelineImpl::submit(const voltu::FramePtr &frame)
 	}
 }
 
-bool PipelineImpl::waitCompletion(int timeout)
+bool PipelineImpl::waitCompletion(int timeout, bool except)
 {
 	int count = timeout / 5;
 	while (!ready_ && --count >= 0) std::this_thread::sleep_for(std::chrono::milliseconds(5));
+
+	if (!ready_) throw voltu::exceptions::Timeout();
 	return ready_;
 }
 
diff --git a/SDK/CPP/private/pipeline_impl.hpp b/SDK/CPP/private/pipeline_impl.hpp
index 98fe41c9018fe328c1cb296e3819d052a8808a73..3413f067a20bfe06bd55c9373b9bc8eb4ac73026 100644
--- a/SDK/CPP/private/pipeline_impl.hpp
+++ b/SDK/CPP/private/pipeline_impl.hpp
@@ -16,7 +16,7 @@ public:
 
 	void submit(const voltu::FramePtr &frame) override;
 
-	bool waitCompletion(int timeout) override;
+	bool waitCompletion(int timeout, bool except) override;
 
 	voltu::OperatorPtr appendOperator(voltu::OperatorId id) override;
 
diff --git a/SDK/CPP/private/room_impl.cpp b/SDK/CPP/private/room_impl.cpp
index 65825d5152c5bad4f01340e52896db2808880c2b..9fd0d2d437ca39ceeef3355dfb486a2dd270367d 100644
--- a/SDK/CPP/private/room_impl.cpp
+++ b/SDK/CPP/private/room_impl.cpp
@@ -20,7 +20,7 @@ bool RoomImpl::waitNextFrame(int64_t timeout, bool except)
 {
 	if (!filter_)
 	{
-		filter_ = feed_->filter(fsids_, {ftl::codecs::Channel::Colour, ftl::codecs::Channel::Depth});
+		filter_ = feed_->filter(fsids_, {ftl::codecs::Channel::Colour, ftl::codecs::Channel::Depth, ftl::codecs::Channel::GroundTruth});
 		filter_->on([this](const std::shared_ptr<ftl::data::FrameSet> &fs)
 		{
 			UNIQUE_LOCK(mutex_, lk);
@@ -36,7 +36,7 @@ bool RoomImpl::waitNextFrame(int64_t timeout, bool except)
 	{
 		if (timeout > 0)
 		{
-			cv_.wait_for(lk, std::chrono::seconds(timeout), [this] {
+			cv_.wait_for(lk, std::chrono::milliseconds(timeout), [this] {
 				return last_read_ < last_seen_;
 			});
 
diff --git a/SDK/CPP/public/CMakeLists.txt b/SDK/CPP/public/CMakeLists.txt
index 1128913e54a4350b1863f62849cca251e3aa6ed3..94b928b5ff111abc47def326150c7d5c4aa51b73 100644
--- a/SDK/CPP/public/CMakeLists.txt
+++ b/SDK/CPP/public/CMakeLists.txt
@@ -44,6 +44,10 @@ endif()
 
 add_library(voltu_sdk STATIC ${VOLTU_SRCS})
 
+if (WITH_OPENCV)
+	target_compile_definitions(voltu_sdk PUBLIC WITH_OPENCV)
+endif()
+
 target_include_directories(voltu_sdk
 	PUBLIC include)
 target_link_libraries(voltu_sdk ${OS_LIBS} Threads::Threads ${OPTIONAL_DEPENDENCIES} Eigen3::Eigen)
@@ -65,6 +69,7 @@ target_link_libraries(voltu_basic_virtual_cam voltu_sdk)
 
 add_executable(voltu_fusion_evaluator
 	samples/fusion_evaluator/main.cpp
+	samples/common/cmd_args.cpp
 )
 target_link_libraries(voltu_fusion_evaluator voltu_sdk)
 
diff --git a/SDK/CPP/public/include/voltu/cuda.hpp b/SDK/CPP/public/include/voltu/cuda.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..972f4964b839db3db3793d117e5fb80c0c7702ae
--- /dev/null
+++ b/SDK/CPP/public/include/voltu/cuda.hpp
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <voltu/types/image.hpp>
+#include 
+#include <memory>
+
+namespace voltu
+{
+
+/**
+ * @brief CUDA Processing Stream.
+ * 
+ * An instance of this class is mapped to a single CUDA stream, so any of the
+ * available operations will occur within that stream. It is therefore
+ * necessary to call `waitCompletion` after all steps have been finished.
+ */
+class CUDAProc
+{
+public:
+	virtual bool waitCompletion(int timeout, bool except=false) = 0;
+
+	virtual void* getInternalStream() = 0;
+
+	virtual void visualiseDepthEnhancement(const voltu::ImagePtr &gt, const voltu::ImagePtr &depth_old, const voltu::ImagePtr &depth_new, const voltu::ImagePtr &colour) = 0;
+};
+
+}
diff --git a/SDK/CPP/public/include/voltu/initialise.hpp b/SDK/CPP/public/include/voltu/initialise.hpp
index 5d34a1e691797b8c4922aa274775d7f84ef401ac..7c72189dcf669982d48abd5a797b52c9c3832e58 100644
--- a/SDK/CPP/public/include/voltu/initialise.hpp
+++ b/SDK/CPP/public/include/voltu/initialise.hpp
@@ -43,4 +43,6 @@ namespace voltu
 	 * @return Singleton VolTu runtime instance.
 	 */
 	std::shared_ptr<voltu::System> instance();
+
+	void release();
 }
diff --git a/SDK/CPP/public/include/voltu/opencv.hpp b/SDK/CPP/public/include/voltu/opencv.hpp
index 7119cbc87097ce1609a7e93696059db38b7097e3..1e8e858ab0eef22dfccc892f0a1b2124056f482e 100644
--- a/SDK/CPP/public/include/voltu/opencv.hpp
+++ b/SDK/CPP/public/include/voltu/opencv.hpp
@@ -7,19 +7,29 @@
 #pragma once
 
 #include <opencv2/core/mat.hpp>
-#include <opencv2/core/cuda_types.hpp>
+#include <opencv2/core/cuda.hpp>
 #include <voltu/types/image.hpp>
 
 namespace voltu
 {
-namespace cv
+namespace opencv
 {
 
 void convert(voltu::ImagePtr img, ::cv::Mat &mat);
 
 void convert(voltu::ImagePtr img, ::cv::cuda::GpuMat &mat);
 
+::cv::cuda::GpuMat toGpuMat(voltu::ImagePtr img);
+
 void visualise(voltu::ImagePtr img, ::cv::Mat &mat);
 
 }
+
+struct GpuUtilities
+{
+	void (*visualiseDepthEnhancement)(const voltu::ImagePtr &gt, const voltu::ImagePtr &depth_old, const voltu::ImagePtr &depth_new, const voltu::ImagePtr &colour) = nullptr;
+};
+
+extern GpuUtilities gpu;
+
 }
\ No newline at end of file
diff --git a/SDK/CPP/public/include/voltu/pipeline.hpp b/SDK/CPP/public/include/voltu/pipeline.hpp
index f8a201bac088192866886505050b2bce07b51acd..4e2c4ee89d512d22f7e7e675323d09ec3598d9c4 100644
--- a/SDK/CPP/public/include/voltu/pipeline.hpp
+++ b/SDK/CPP/public/include/voltu/pipeline.hpp
@@ -64,7 +64,7 @@ public:
 	 * @param timeout Millisecond timeout, or 0 for non-blocking check.
 	 * @return True if completed
 	 */
-	PY_API virtual bool waitCompletion(int timeout) = 0;
+	PY_API virtual bool waitCompletion(int timeout, bool except=false) = 0;
 
 	/**
 	 * @brief Add an operator to this pipeline.
diff --git a/SDK/CPP/public/include/voltu/system.hpp b/SDK/CPP/public/include/voltu/system.hpp
index 8fb9c744cdf3c7646531803f349aeb7455962c12..df7b4938ebd8103903b852f962fa6c23c1f5d255 100644
--- a/SDK/CPP/public/include/voltu/system.hpp
+++ b/SDK/CPP/public/include/voltu/system.hpp
@@ -78,7 +78,7 @@ public:
 	 * Identifiers (URIs), with some non-standard additions. A few examples
 	 * are:
 	 * * `file:///home/user/file.ftl`
-	 * * `tcp://localhost:9001/*`
+	 * * `tcp://localhost:9001/`
 	 * * `ftl://my.stream.name/room1`
 	 * * `ws://ftlab.utu.fi/lab/`
 	 * * `./file.ftl`
diff --git a/SDK/CPP/public/include/voltu/types/frame.hpp b/SDK/CPP/public/include/voltu/types/frame.hpp
index 4209cc80c2d1af4e46c4c1d7227ad0e3a9918c79..7f03fc6045ec8a378fb9feb174e956c59cc4adff 100644
--- a/SDK/CPP/public/include/voltu/types/frame.hpp
+++ b/SDK/CPP/public/include/voltu/types/frame.hpp
@@ -22,11 +22,11 @@ class Frame
 public:
 	virtual ~Frame() = default;
 	
-	PY_API PY_RV_LIFETIME_PARENT virtual std::list<voltu::ImagePtr> getImageSet(voltu::Channel channel) = 0;
+	PY_API PY_RV_LIFETIME_PARENT virtual std::vector<voltu::ImagePtr> getImageSet(voltu::Channel channel) = 0;
 
 	PY_API PY_RV_LIFETIME_PARENT virtual voltu::PointCloudPtr getPointCloud(voltu::PointCloudFormat cloudfmt, voltu::PointFormat pointfmt) = 0;
 
-	PY_API virtual std::vector<std::string> getMessages() = 0;
+	PY_API virtual std::vector<std::vector<std::string>> getMessages() = 0;
 
 	PY_API virtual int64_t getTimestamp() = 0;
 };
diff --git a/SDK/CPP/public/include/voltu/voltu.hpp b/SDK/CPP/public/include/voltu/voltu.hpp
index d3e1413b4052c065709aaa7591a9c93d37e7550d..2bd0ff0a07651a9b562f8921869b729ca30b9233 100644
--- a/SDK/CPP/public/include/voltu/voltu.hpp
+++ b/SDK/CPP/public/include/voltu/voltu.hpp
@@ -8,10 +8,30 @@
 
 // Bump these for each release
 #define VOLTU_VERSION_MAJOR 0    // For API incompatible changes
-#define VOLTU_VERSION_MINOR 3    // For binary compatibility and extensions
+#define VOLTU_VERSION_MINOR 4    // For binary compatibility and extensions
 #define VOLTU_VERSION_PATCH 0    // Binary compatible internal fixes
 
 #define VOLTU_VERSION ((VOLTU_VERSION_MAJOR*10000) + (VOLTU_VERSION_MINOR*100) + VOLTU_VERSION_PATCH)
 
 #include <voltu/system.hpp>
 #include <voltu/initialise.hpp>
+
+namespace voltu
+{
+
+class Voltu
+{
+public:
+	inline Voltu() : instance_(voltu::instance()) {}
+	inline ~Voltu() { instance_.reset(); voltu::release(); }
+
+	inline voltu::System* operator->()
+	{
+		return instance_.get();
+	}
+
+private:
+	std::shared_ptr<voltu::System> instance_;
+};
+
+}
diff --git a/SDK/CPP/public/samples/basic_file/main.cpp b/SDK/CPP/public/samples/basic_file/main.cpp
index c983864275a217fc1d2cc6628012dc698f8ad19e..9af26c1c0859e609aaabf687dfeb119e9f625f80 100644
--- a/SDK/CPP/public/samples/basic_file/main.cpp
+++ b/SDK/CPP/public/samples/basic_file/main.cpp
@@ -45,7 +45,7 @@ int main(int argc, char **argv)
 		for (auto img : imgset)
 		{
 			cv::Mat m;
-			voltu::cv::visualise(img, m);
+			voltu::opencv::visualise(img, m);
 			cv::imshow(string("Image-") + img->getName(), m);
 		}
 
diff --git a/SDK/CPP/public/samples/basic_virtual_cam/main.cpp b/SDK/CPP/public/samples/basic_virtual_cam/main.cpp
index f1b2634b7135bfb3c303fdd6fd54e81da6a69a38..47dea0583709dbf339e32ca7e96f2eee8e7e78db 100644
--- a/SDK/CPP/public/samples/basic_virtual_cam/main.cpp
+++ b/SDK/CPP/public/samples/basic_virtual_cam/main.cpp
@@ -69,7 +69,7 @@ int main(int argc, char **argv)
 		for (auto img : imgset)
 		{
 			cv::Mat m;
-			voltu::cv::convert(img, m);
+			voltu::opencv::convert(img, m);
 			cv::imshow(string("Camera-") + img->getName(), m);
 		}
 
diff --git a/SDK/CPP/public/samples/common/cmd_args.cpp b/SDK/CPP/public/samples/common/cmd_args.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..06812875cbb13919d65363d1c0d1ffc04f1860ea
--- /dev/null
+++ b/SDK/CPP/public/samples/common/cmd_args.cpp
@@ -0,0 +1,35 @@
+#include "cmd_args.hpp"
+
+std::map<std::string, std::string> read_options(char ***argv, int *argc)
+{
+	std::map<std::string, std::string> opts;
+
+	(*argc)--;  // Remove application path
+	(*argv)++;
+
+	while (*argc > 0) {
+		std::string cmd((*argv)[0]);
+
+		size_t p;
+		if (cmd[0] != '-' || (p = cmd.find("=")) == std::string::npos) {
+			opts[cmd.substr(0)] = "true";
+		} else {
+			auto val = cmd.substr(p+1);
+#ifdef WIN32
+			if ((val[0] >= 48 && val[0] <= 57) || val == "true" || val == "false" || val == "null") {
+#else
+			if (std::isdigit(val[0]) || val == "true" || val == "false" || val == "null") {
+#endif
+				opts[cmd.substr(0, p)] = val;
+			} else {
+				if (val[0] == '\\') opts[cmd.substr(2, p-2)] = val;
+				else opts[cmd.substr(0, p)] = "\""+val+"\"";
+			}
+		}
+
+		(*argc)--;
+		(*argv)++;
+	}
+
+	return opts;
+}
diff --git a/SDK/CPP/public/samples/common/cmd_args.hpp b/SDK/CPP/public/samples/common/cmd_args.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a42486b421cd4a59e7727926ba25dbdd7a1d87e6
--- /dev/null
+++ b/SDK/CPP/public/samples/common/cmd_args.hpp
@@ -0,0 +1,6 @@
+#pragma once
+
+#include <map>
+#include <string>
+
+std::map<std::string, std::string> read_options(char ***argv, int *argc);
diff --git a/SDK/CPP/public/samples/fusion_evaluator/main.cpp b/SDK/CPP/public/samples/fusion_evaluator/main.cpp
index 908d0925ed80bf5d4d125d6f184e6dc0d7a9f906..40db29fe5024e1e994a02a732dea9cb9f56b4ccc 100644
--- a/SDK/CPP/public/samples/fusion_evaluator/main.cpp
+++ b/SDK/CPP/public/samples/fusion_evaluator/main.cpp
@@ -4,6 +4,8 @@
 #include <thread>
 #include <chrono>
 
+#include "../common/cmd_args.hpp"
+
 #include <opencv2/highgui.hpp>
 
 using std::cout;
@@ -12,53 +14,131 @@ using std::string;
 
 int main(int argc, char **argv)
 {
-	if (argc != 2) return -1;
+	bool do_fusion = true;
+	bool do_eval = true;
+	bool do_carving = false;
+	bool show_changes = false;
+	int frameno = 0;
+	int sourceno = 0;
+	int iters = 2;
+	voltu::Channel display_channel = voltu::Channel::kColour;
+	std::list<std::string> paths;
+
+	auto opts = read_options(&argv, &argc);
+
+	for (const auto &s : opts)
+	{
+		if (s.first == "--no-fusion")
+		{
+			do_fusion = false;
+		}
+		else if (s.first == "--display")
+		{
+			if (s.second == "\"normals\"")
+			{
+				display_channel = voltu::Channel::kNormals;
+			}
+			else if (s.second == "\"depth\"")
+			{
+				display_channel = voltu::Channel::kDepth;
+			}
+			else if (s.second == "\"corrections\"")
+			{
+				display_channel = voltu::Channel::kColour;
+				show_changes = true;
+			}
+		}
+		else if (s.first == "--no-eval")
+		{
+			do_eval = false;
+		}
+		else if (s.first == "--carving")
+		{
+			do_carving = true;
+		}
+		else if (s.first == "--frame")
+		{
+			frameno = std::stoi(s.second);
+		}
+		else if (s.first == "--source")
+		{
+			sourceno = std::stoi(s.second);
+		}
+		else if (s.first == "--iterations")
+		{
+			iters = std::stoi(s.second);
+		}
+		else if (s.first[0] != '-')
+		{
+			paths.push_back(s.first);
+		}
+	}
 
-	auto vtu = voltu::instance();
+	voltu::Voltu vtu;
 
-	if (!vtu->open(argv[1]))
+	for (const auto &p : paths)
 	{
-		cout << "Could not open source" << endl;
-		return -1;
+		vtu->open(p);
 	}
 
 	while (vtu->listRooms().size() == 0)
 	{
 		std::this_thread::sleep_for(std::chrono::milliseconds(100));
+		cout << "Wait room..." << endl;
 	}
 
 	auto room = vtu->getRoom(vtu->listRooms().front());
-	if (!room)
+
+	for (int i=0; i<frameno; ++i)
 	{
-		cout << "Could not get room" << endl;
-		return -1;
+		room->waitNextFrame(5000, true);
+		room->getFrame();
 	}
-
 	auto frame = room->getFrame();
 
 	auto pipe = vtu->createPipeline();
 	auto op1 = pipe->appendOperator(voltu::OperatorId::kFusion);
 	auto op2 = pipe->appendOperator(voltu::OperatorId::kGTEvaluator);
 
-	op2->property("show_colour")->setBool(true);
+	op1->property("enabled")->setBool(do_fusion);
+	op2->property("enabled")->setBool(do_eval);
+	op2->property("show_colour")->setBool(!show_changes);
+	op1->property("show_changes")->setBool(show_changes);
+	op1->property("visibility_carving")->setBool(do_carving);
+	op1->property("mls_iterations")->setInt(iters);
+
+	cv::Mat old_depth;
+	auto oldimgset = frame->getImageSet(voltu::Channel::kDepth);
+	voltu::opencv::toGpuMat(oldimgset[sourceno]).download(old_depth);
 
 	pipe->submit(frame);
-	pipe->waitCompletion(1000);
+	pipe->waitCompletion(3000, true);
+
+	auto imgset = frame->getImageSet(display_channel);
 
-	auto imgset = frame->getImageSet(voltu::Channel::kColour);
+	if (imgset.size() == 0)
+	{
+		cout << "No images!" << endl;
+		return -1;
+	}
 
+	int srccount = 0;
 	for (auto img : imgset)
 	{
+		if (srccount++ < sourceno) continue;
 		cv::Mat m;
-		voltu::cv::visualise(img, m);
+		voltu::opencv::toGpuMat(img).download(m);
+		voltu::opencv::visualise(img, m);
 		cv::imshow(string("Image-") + img->getName(), m);
 		break;
 	}
 
-	std::vector<std::string> msgs = frame->getMessages();
-	for (const auto &s : msgs)
-	{
-		cout << s << endl;
+	std::vector<std::vector<std::string>> msgs = frame->getMessages();
+	if (msgs.size() > 0) {
+		for (const auto &s : msgs[sourceno])
+		{
+			cout << s << endl;
+		}
 	}
 
 	cv::waitKey(-1);
diff --git a/SDK/CPP/public/src/voltu.cpp b/SDK/CPP/public/src/voltu.cpp
index 2c4d19712428be34af7d8fd04c93a7c615fdec7b..9c9767b306d4be7311983c65b731fbc54a5c8a7b 100644
--- a/SDK/CPP/public/src/voltu.cpp
+++ b/SDK/CPP/public/src/voltu.cpp
@@ -8,6 +8,10 @@
 #include <voltu/types/errors.hpp>
 #include <voltu/voltu.hpp>
 
+#ifdef WITH_OPENCV
+#include <voltu/opencv.hpp>
+#endif
+
 #if defined(WIN32)
 #include <windows.h>
 #pragma comment(lib, "User32.lib")
@@ -21,9 +25,14 @@
 #include <cstdlib>
 #include <iostream>
 
+typedef void* Library;
+
 static bool g_init = false;
+static Library handle = nullptr;
 
-typedef void* Library;
+#ifdef WITH_OPENCV
+voltu::GpuUtilities voltu::gpu;
+#endif
 
 static Library loadLibrary(const char *file)
 {
@@ -108,7 +117,7 @@ std::shared_ptr<voltu::System> voltu::instance()
 	
 	std::string name = locateLibrary();
 	std::cout << "Loading VolTu Runtime: " << name << std::endl;
-	Library handle = loadLibrary(name.c_str());
+	handle = loadLibrary(name.c_str());
 
 	if (handle)
 	{
@@ -132,6 +141,18 @@ std::shared_ptr<voltu::System> voltu::instance()
 				throw voltu::exceptions::RuntimeVersionMismatch();
 			}
 
+#ifdef WITH_OPENCV
+			auto gpuinit = (voltu::GpuUtilities* (*)())getFunction(handle, "voltu_utilities_gpu");
+			if (gpuinit)
+			{
+				gpu = *gpuinit();
+			}
+			else
+			{
+				//throw voltu::exceptions::LibraryLoadFailed();	
+			}
+#endif
+
 			return instance;
 		}
 		else
@@ -146,3 +167,9 @@ std::shared_ptr<voltu::System> voltu::instance()
 
 	return nullptr;
 }
+
+void voltu::release()
+{
+	// TODO: Call a finalise function
+	if (handle) unloadLibrary(handle);
+}
diff --git a/SDK/CPP/public/src/voltu_cv.cpp b/SDK/CPP/public/src/voltu_cv.cpp
index 31cd29aa6f85a77ee3baf23541d918ea5046b01a..5e55e79086a96dc599d209944f5c3ccff9170776 100644
--- a/SDK/CPP/public/src/voltu_cv.cpp
+++ b/SDK/CPP/public/src/voltu_cv.cpp
@@ -9,7 +9,7 @@
 
 #include <opencv2/imgproc.hpp>
 
-void voltu::cv::convert(voltu::ImagePtr img, ::cv::Mat &mat)
+void voltu::opencv::convert(voltu::ImagePtr img, ::cv::Mat &mat)
 {
 	voltu::ImageData data = img->getHost();
 
@@ -31,26 +31,51 @@ void voltu::cv::convert(voltu::ImagePtr img, ::cv::Mat &mat)
 	}
 }
 
-void voltu::cv::convert(voltu::ImagePtr img, ::cv::cuda::GpuMat &mat)
+void voltu::opencv::convert(voltu::ImagePtr img, ::cv::cuda::GpuMat &mat)
 {
+	mat = voltu::opencv::toGpuMat(img);
+}
+
+cv::cuda::GpuMat voltu::opencv::toGpuMat(voltu::ImagePtr img)
+{
+	voltu::ImageData data = img->getDevice();
+
+	if (data.format == voltu::ImageFormat::kBGRA8)
+	{
+
+	}
+	else if (data.format == voltu::ImageFormat::kFloat32)
+	{
+		return ::cv::cuda::GpuMat(
+			data.height,
+			data.width,
+			CV_32F,
+			data.data
+		);
+	}
+
 	throw voltu::exceptions::NotImplemented();
 }
 
-void voltu::cv::visualise(voltu::ImagePtr img, ::cv::Mat &mat)
+void voltu::opencv::visualise(voltu::ImagePtr img, ::cv::Mat &mat)
 {
 	voltu::ImageData data = img->getHost();
 
 	if (data.format == voltu::ImageFormat::kBGRA8)
 	{
-		voltu::cv::convert(img, mat);
+		voltu::opencv::convert(img, mat);
 	}
 	else if (data.format == voltu::ImageFormat::kFloat32)
 	{
 		::cv::Mat tmp;
-		voltu::cv::convert(img, tmp);
+		voltu::opencv::convert(img, tmp);
+
+		float maxdepth = 8.0f;  // TODO: Get from intrinsics
 
-		::cv::normalize(tmp, tmp, 0, 255, ::cv::NORM_MINMAX);
-		tmp.convertTo(tmp, CV_8U);
+		//::cv::normalize(tmp, tmp, 0, 255, ::cv::NORM_MINMAX);
+		tmp.convertTo(tmp, CV_8U, 255.0f / maxdepth);
+		::cv::Mat mask = tmp > 0;
+		::cv::subtract(::cv::Scalar(255), tmp, tmp, mask);
 		
 		//#if OPENCV_VERSION >= 40102
 		//cv::applyColorMap(tmp, mat, cv::COLORMAP_TURBO);
@@ -61,7 +86,7 @@ void voltu::cv::visualise(voltu::ImagePtr img, ::cv::Mat &mat)
 	else if (data.format == voltu::ImageFormat::kFloat16_4)
 	{
 		::cv::Mat tmp;
-		voltu::cv::convert(img, tmp);
+		voltu::opencv::convert(img, tmp);
 		tmp.convertTo(tmp, CV_32FC4);
 		tmp += 1.0f;
 		tmp *= 127.0f;
diff --git a/components/codecs/src/opencv_decoder.cpp b/components/codecs/src/opencv_decoder.cpp
index 6e094a0e5b1fe35af04eb651b327cf3c79d5bc3e..9133d394478cc8f9f709020833b0f6d74bf1f539 100644
--- a/components/codecs/src/opencv_decoder.cpp
+++ b/components/codecs/src/opencv_decoder.cpp
@@ -36,6 +36,8 @@ bool OpenCVDecoder::decode(const ftl::codecs::Packet &pkt, cv::cuda::GpuMat &out
 
 	if (tmp2_.type() == CV_8UC3) {
 		cv::cvtColor(tmp2_, tmp_, cv::COLOR_RGB2BGRA);
+	} else if (tmp2_.type() == CV_8U) {
+		tmp_ = tmp2_;
 	} else {
 		if (pkt.flags & ftl::codecs::kFlagFlipRGB) {
 			cv::cvtColor(tmp2_, tmp_, cv::COLOR_RGBA2BGRA);
diff --git a/components/operators/include/ftl/operators/cuda/mls/multi_intensity.hpp b/components/operators/include/ftl/operators/cuda/mls/multi_intensity.hpp
index 884fd0da46db2d61e68aba175603d7a04717f407..bbd5cfffffae45a25510dc2d0a2b0ed8134633fb 100644
--- a/components/operators/include/ftl/operators/cuda/mls/multi_intensity.hpp
+++ b/components/operators/include/ftl/operators/cuda/mls/multi_intensity.hpp
@@ -49,6 +49,7 @@ public:
 		const ftl::rgbd::Camera &cam_src,
 		const float4x4 &pose_src,
 		float smoothing,
+		float fsmoothing,
 		cudaStream_t stream
 	);
 
@@ -58,6 +59,13 @@ public:
 		cudaStream_t stream
 	);
 
+	void adjust(
+		cv::cuda::GpuMat &depth_out,
+		cv::cuda::GpuMat &normals_out,
+		cv::cuda::GpuMat &colour_out,
+		cudaStream_t stream
+	);
+
 private:
 	cv::cuda::GpuMat depth_prime_;
 	cv::cuda::GpuMat intensity_prime_;
@@ -70,6 +78,13 @@ private:
 	cv::cuda::GpuMat weight_accum_;
 };
 
+void mean_subtract(
+	const cv::cuda::GpuMat &intensity,
+	cv::cuda::GpuMat &contrast,
+	int radius,
+	cudaStream_t stream
+);
+
 }
 }
 
diff --git a/components/operators/include/ftl/operators/fusion.hpp b/components/operators/include/ftl/operators/fusion.hpp
index d1e72ecfed516545a109f4530ecde9f330bb0205..fb0dec87e712636c4c98fdb6690ad247dfbbdc8a 100644
--- a/components/operators/include/ftl/operators/fusion.hpp
+++ b/components/operators/include/ftl/operators/fusion.hpp
@@ -17,12 +17,13 @@ class Fusion : public ftl::operators::Operator {
 
 	bool apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) override;
 
-	static void configuration(ftl::Configurable*) {}
+	static void configuration(ftl::Configurable*);
 
 	private:
 	ftl::cuda::MLSMultiIntensity mls_;
 	std::vector<cv::cuda::GpuMat> weights_;
 	cv::cuda::GpuMat temp_;
+	cv::cuda::GpuMat temp2_;
 };
 
 }
diff --git a/components/operators/src/analysis/evaluation/gt_analysis.cpp b/components/operators/src/analysis/evaluation/gt_analysis.cpp
index c9559084eb53da55e13c8c4b22ccb221fcd9cb3f..23c8b25baf3cbf8e66ddbc820c725643ae98c86b 100644
--- a/components/operators/src/analysis/evaluation/gt_analysis.cpp
+++ b/components/operators/src/analysis/evaluation/gt_analysis.cpp
@@ -1,6 +1,8 @@
 #include <ftl/operators/gt_analysis.hpp>
 #include <ftl/operators/cuda/gt.hpp>
 
+#include <opencv2/core/cuda_stream_accessor.hpp>
+
 using ftl::operators::GTAnalysis;
 using ftl::codecs::Channel;
 using std::string;
@@ -10,6 +12,7 @@ GTAnalysis::GTAnalysis(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::
 }
 
 void GTAnalysis::configuration(ftl::Configurable *cfg) {
+	cfg->value("enabled", true);
 	cfg->value("use_disparity", true);
 	cfg->value("show_colour", false);
 }
@@ -72,6 +75,14 @@ bool GTAnalysis::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t
 	const float npixels = dmat.rows * dmat.cols;
 	ftl::cuda::GTAnalysisData err;
 
+	if (!in.hasChannel(Channel::Mask)) {
+		cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
+
+		auto &m = in.create<cv::cuda::GpuMat>(Channel::Mask);
+		m.create(dmat.size(), CV_8UC1);
+		m.setTo(cv::Scalar(0), cvstream);
+	}
+
 	for (const auto &o : (use_disp ? options_disparity : options_depth)) {
 		if (config()->value("show_colour", false)) {
 			ftl::cuda::gt_analysis(
@@ -102,6 +113,8 @@ bool GTAnalysis::apply(ftl::rgbd::Frame &in, ftl::rgbd::Frame &out, cudaStream_t
 			);
 		}
 
+		cudaStreamSynchronize(stream);
+
 		cudaMemcpy(&err, output_, sizeof(err), cudaMemcpyDeviceToHost);
 		msgs.push_back(" ");
 		if (use_disp) 	{ report(msgs, err, o, npixels, "px", 1.0); }
diff --git a/components/operators/src/fusion/carving/carver.cu b/components/operators/src/fusion/carving/carver.cu
index d1dd480ef0cbbf2ea4a75595fd6af4fee712297d..497cf848e4fa8a7c4e17e6bc54692d2ce8fa71cb 100644
--- a/components/operators/src/fusion/carving/carver.cu
+++ b/components/operators/src/fusion/carving/carver.cu
@@ -74,8 +74,7 @@ __global__ void reverse_check_kernel(
 
 	float d = depth_in[y*pitch4+x];
 
-	// TODO: Externally provide the error coefficient
-	const float err_coef = 0.0005f; //depthErrorCoef(ointrin);
+	const float err_coef = depthErrorCoef(ointrin);
 
 	int ox = 0;
 	int oy = 0;
@@ -95,6 +94,7 @@ __global__ void reverse_check_kernel(
 			// TODO: Threshold comes from depth error characteristics
 			// If the value is significantly further then carve. Depth error
 			// is not always easy to calculate, depends on source.
+			// FIXME: Use length between 3D points, not depth?
 			if (!(d2 < ointrin.maxDepth && d2 - campos.z > d2*d2*err_coef)) {
 				match = fabsf(campos.z - d2) < d2*d2*err_coef; break;
 			}
diff --git a/components/operators/src/fusion/fusion.cpp b/components/operators/src/fusion/fusion.cpp
index 334820a3a12d590bbfcdee51390b2fef1f6b41a7..857198c7aa5888d0050273c47749e9e77b0ff1a0 100644
--- a/components/operators/src/fusion/fusion.cpp
+++ b/components/operators/src/fusion/fusion.cpp
@@ -3,6 +3,8 @@
 #include <ftl/utility/matrix_conversion.hpp>
 #include <opencv2/core/cuda_stream_accessor.hpp>
 
+#include <ftl/utility/image_debug.hpp>
+
 #include <opencv2/cudaimgproc.hpp>
 #include <opencv2/cudawarping.hpp>
 
@@ -10,6 +12,14 @@ using ftl::operators::Fusion;
 using ftl::codecs::Channel;
 using cv::cuda::GpuMat;
 
+void Fusion::configuration(ftl::Configurable *cfg) {
+	cfg->value("enabled", true);
+	cfg->value("mls_smoothing", 2.0f);
+	cfg->value("mls_iterations", 2);
+	cfg->value("visibility_carving", true);
+	cfg->value("show_changes", false);
+}
+
 Fusion::Fusion(ftl::operators::Graph *g, ftl::Configurable *cfg) : ftl::operators::Operator(g, cfg), mls_(3) {
 
 }
@@ -19,8 +29,10 @@ Fusion::~Fusion() {
 }
 
 bool Fusion::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream_t stream) {
-	float mls_smoothing = config()->value("mls_smoothing", 0.01f);
+	float mls_smoothing = config()->value("mls_smoothing", 2.0f);
+	//float mls_feature = config()->value("mls_feature", 20.0f);
 	int mls_iters = config()->value("mls_iterations", 2);
+	bool show_changes = config()->value("show_changes", false);
 
 	if (weights_.size() != in.frames.size()) weights_.resize(in.frames.size());
 
@@ -28,23 +40,52 @@ bool Fusion::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream
 
 	for (size_t i=0; i<in.frames.size(); ++i) {
 		if (!in.hasFrame(i)) continue;
+
+		if (!in.frames[i].hasChannel(Channel::Colour) || !in.frames[i].hasChannel(Channel::Depth)) continue;
+
 		const GpuMat &col = in.frames[i].get<GpuMat>(Channel::Colour);
 		const GpuMat &d = in.frames[i].get<GpuMat>(Channel::Depth);
 
 		cv::cuda::cvtColor(col, temp_, cv::COLOR_BGRA2GRAY, 0, cvstream);
-		cv::cuda::resize(temp_, weights_[i], d.size(), 0, 0, cv::INTER_LINEAR, cvstream);
+		if (temp_.size() != d.size()) {
+			cv::cuda::resize(temp_, temp2_, d.size(), 0, 0, cv::INTER_LINEAR, cvstream);
+		} else {
+			temp2_ = temp_;
+		}
+
+		// TODO: Not the best since the mean is entirely lost here.
+		// Perhaps check mean also with greater smoothing value
+		ftl::cuda::mean_subtract(temp2_, weights_[i], 3, stream);
 	}
 
+	//if (weights_.size() > 0) ftl::utility::show_image(weights_[0], "MeanSub", 1.0f, ftl::utility::ImageVisualisation::RAW_GRAY);
+
+	// 1) Optical flow of colour
+	// 2) Flow depth from model,
+	//    a) check local depth change consistency, generate a weighting
+	// 3) Generate smooth motion field
+	//    a) Remove outliers (median filter?)
+	//    b) Smooth outputs, perhaps using change consistency as weight?
+	// 4) Merge past with present using motion field
+	//    a) Visibility cull both directions
+	//    b) Local view feature weighted MLS
+	// 5) Now merge all view points
+	// 6) Store as a new model
+
 	if (config()->value("visibility_carving", true)) {
 		for (size_t i=0; i < in.frames.size(); ++i) {
 			if (!in.hasFrame(i)) continue;
 
+			if (!in.frames[i].hasChannel(Channel::Colour) || !in.frames[i].hasChannel(Channel::Depth)) continue;
+
 			auto &f = in.frames[i].cast<ftl::rgbd::Frame>();
 
 			for (size_t j=0; j < in.frames.size(); ++j) {
 				if (i == j) continue;
 				if (!in.hasFrame(j)) continue;
 
+				if (!in.frames[j].hasChannel(Channel::Colour) || !in.frames[j].hasChannel(Channel::Depth)) continue;
+
 				auto &ref = in.frames[j].cast<ftl::rgbd::Frame>();
 
 				auto transformR = MatrixConversion::toCUDA(ref.getPose().cast<float>().inverse() * f.getPose().cast<float>());
@@ -69,6 +110,7 @@ bool Fusion::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream
 	for (int iters=0; iters < mls_iters; ++iters) {
 	for (size_t i=0; i<in.frames.size(); ++i) {
 		if (!in.hasFrame(i)) continue;
+		if (!in.frames[i].hasChannel(Channel::Normals) || !in.frames[i].hasChannel(Channel::Depth)) continue;
 
 		auto &f1 = in.frames[i].cast<ftl::rgbd::Frame>();
 
@@ -88,6 +130,7 @@ bool Fusion::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream
 		for (size_t j=0; j<in.frames.size(); ++j) {
 			if (!in.hasFrame(j)) continue;
 			//if (i == j) continue;
+			if (!in.frames[j].hasChannel(Channel::Normals) || !in.frames[j].hasChannel(Channel::Depth)) continue;
 
 			//LOG(INFO) << "Running phase1";
 
@@ -108,15 +151,25 @@ bool Fusion::apply(ftl::rgbd::FrameSet &in, ftl::rgbd::FrameSet &out, cudaStream
 				f2.getLeft(),
 				pose2,
 				mls_smoothing,
+				mls_smoothing,
 				stream
 			);
 		}
 
-		mls_.adjust(
-			f1.create<GpuMat>(Channel::Depth),
-			f1.create<GpuMat>(Channel::Normals),
-			stream
-		);
+		if (show_changes) {
+			mls_.adjust(
+				f1.create<GpuMat>(Channel::Depth),
+				f1.create<GpuMat>(Channel::Normals),
+				f1.create<GpuMat>(Channel::Colour),
+				stream
+			);
+		} else {
+			mls_.adjust(
+				f1.create<GpuMat>(Channel::Depth),
+				f1.create<GpuMat>(Channel::Normals),
+				stream
+			);
+		}
 	}
 	}
 
diff --git a/components/operators/src/fusion/smoothing/mls_multi_weighted.cu b/components/operators/src/fusion/smoothing/mls_multi_weighted.cu
index a5e5ded31f193bda1c5b3d24b58994d5717ead32..489dc0cf58420cc59f2b87c151b182ef024ad9a4 100644
--- a/components/operators/src/fusion/smoothing/mls_multi_weighted.cu
+++ b/components/operators/src/fusion/smoothing/mls_multi_weighted.cu
@@ -9,7 +9,18 @@ using cv::cuda::GpuMat;
 
 __device__ inline float featureWeight(int f1, int f2) {
 	const float w = (1.0f-(float(abs(f1 - f2)) / 255.0f));
-	return w*w;
+	return w*w*w;
+}
+
+__device__ inline float biasedLength(const float3 &Xi, const float3 &X) {
+	float l = 0.0f;
+	const float dx = Xi.x-X.x;
+	l += 2.0f*dx*dx;
+	const float dy = Xi.y-X.y;
+	l += 2.0f*dy*dy;
+	const float dz = Xi.z-X.z;
+	l += dz*dz;
+	return sqrt(l);
 }
 
 /*
@@ -26,6 +37,7 @@ __device__ inline float featureWeight(int f1, int f2) {
 	float4* __restrict__ centroid_out,
 	float* __restrict__ contrib_out,
 	float smoothing,
+	float fsmoothing,
 	float4x4 o_2_in,
 	float4x4 in_2_o,
 	float3x3 in_2_o33,
@@ -45,18 +57,70 @@ __device__ inline float featureWeight(int f1, int f2) {
 
     if (x < 0 || y < 0 || x >= camera_origin.width || y >= camera_origin.height) return;
 
-	float3 nX = make_float3(normals_out[y*npitch_out+x]);
-	float3 aX = make_float3(centroid_out[y*cpitch_out+x]);
-    float contrib = contrib_out[y*wpitch_out+x];
+	//float3 nX = make_float3(normals_out[y*npitch_out+x]);
+	//float3 aX = make_float3(centroid_out[y*cpitch_out+x]);
+	//float contrib = contrib_out[y*wpitch_out+x];
+	
+	float3 nX = make_float3(0.0f, 0.0f, 0.0f);
+	float3 aX = make_float3(0.0f, 0.0f, 0.0f);
+	float contrib = 0.0f;
 
 	float d0 = depth_origin[x+y*dpitch_o];
 	if (d0 <= camera_origin.minDepth || d0 >= camera_origin.maxDepth) return;
 
-	const int feature1 = feature_origin[x+y*fpitch_o];
+	const uchar2 feature1 = feature_origin[x+y*fpitch_o];
+
+	// TODO: Could the origin depth actually be averaged with depth in other
+	// image? So as to not bias towards the original view?
 
 	float3 X = camera_origin.screenToCam((int)(x),(int)(y),d0);
 
-	int2 s = camera_in.camToScreen<int2>(o_2_in * X);
+	const float3 camPos = o_2_in * X;
+	const int2 s = camera_in.camToScreen<int2>(camPos);
+
+	// Move point off of original surface
+	//X = camera_origin.screenToCam((int)(x),(int)(y),d0-0.005f);
+
+	// TODO: Could dynamically adjust the smoothing factors depending upon the
+	// number of matches. Meaning, if lots of good local and feature matches
+	// then be less likely to include poorer matches. Conversely, if only poor
+	// non-local or feature distance matches, then increase search range.
+
+	// Could also adapt smoothing parameters using variance or some other local
+	// image measures. Or by just considering distance of the central projected
+	// points as an indication of miss-alignment. Both spatial distance and
+	// feature distance could be used to adjust parameters.
+
+	// FIXME: For own image, need to do something different than the below.
+	// Otherwise smoothing factors become 0.
+
+	float spatial_smoothing = (depth_origin == depth_in) ? 0.005f : 0.03f; // 3cm default
+	float hf_intensity_smoothing = (depth_origin == depth_in) ? 100.0f : 50.0f;
+	float mean_smoothing = (depth_origin == depth_in) ? 100.0f : 100.0f;
+	if (depth_origin != depth_in && s.x >= 0 && s.x < camera_in.width && s.y >= 0 && s.y <= camera_in.height) {
+		// Get depth at exact reprojection point
+		const float d = depth_in[s.x+s.y*dpitch_i];
+		// Get feature at exact reprojection point
+		const uchar2 feature2 = feature_in[s.x+s.y*fpitch_i];
+		if (d > camera_in.minDepth && d < camera_in.maxDepth) {
+			spatial_smoothing = min(spatial_smoothing, smoothing * fabsf(camPos.z - d));
+		}
+		hf_intensity_smoothing = smoothing * fabsf(float(feature2.x) - float(feature1.x));
+		//mean_smoothing = smoothing * fabsf(float(feature2.y) - float(feature1.y));
+
+		// Make start point the average of the two sources...
+		const float3 reversePos = in_2_o * camera_in.screenToCam(s.x, s.y, d);
+		X = X + (reversePos) / 2.0f;
+	}
+
+	// Make sure there is a minimum smoothing value
+	spatial_smoothing = max(0.05f, spatial_smoothing);
+	hf_intensity_smoothing = max(50.0f, hf_intensity_smoothing);
+	//mean_smoothing = max(10.0f, mean_smoothing);
+
+	// Check for neighbourhood symmetry and use to weight overall contribution
+	float symx = 0.0f;
+	float symy = 0.0f;
 
     // Neighbourhood
     for (int v=-SEARCH_RADIUS; v<=SEARCH_RADIUS; ++v) {
@@ -68,22 +132,47 @@ __device__ inline float featureWeight(int f1, int f2) {
 		const float3 Xi = in_2_o * camera_in.screenToCam(s.x+u, s.y+v, d);
 		const float3 Ni = make_float3(normals_in[s.x+u+(s.y+v)*npitch_in]);
 
-		const int feature2 = feature_in[s.x+y+(s.y+v)*fpitch_i];
-
-		// Gauss approx weighting function using point distance
+		const uchar2 feature2 = feature_in[s.x+u+(s.y+v)*fpitch_i];
+
+		// Gauss approx weighting functions
+		// Rule: spatially close and feature close is strong
+		// Spatially far or feature far, then poor.
+		// So take the minimum, must be close and feature close to get good value
+		const float w_high_int = ftl::cuda::weighting(float(abs(int(feature1.x)-int(feature2.x))), hf_intensity_smoothing);
+		const float w_mean_int = ftl::cuda::weighting(float(abs(int(feature1.y)-int(feature2.y))), mean_smoothing);
+		const float w_space = ftl::cuda::spatialWeighting(X,Xi,spatial_smoothing);
+		//const float w_space = ftl::cuda::weighting(biasedLength(Xi,X),spatial_smoothing);
+		// TODO: Distance from cam squared
+		// TODO: Angle from cam (dot of normal and ray)
+		//const float w_lateral = ftl::cuda::weighting(sqrt(Xi.x*X.x + Xi.y*X.y), float(SEARCH_RADIUS)*camera_origin.fx/Xi.z);
 		const float w = (length(Ni) > 0.0f)
-			? ftl::cuda::spatialWeighting(X,Xi,smoothing) * featureWeight(feature1, feature2)
+			? min(w_space, min(w_high_int, w_mean_int))  //w_space * w_high_int * w_mean_int //
 			: 0.0f;
 
+		// Mark as a symmetry contribution
+		if (w > 0.0f) {
+			if (u < 0) symx -= 1.0f;
+			else if (u > 0) symx += 1.0f;
+			if (v < 0) symy -= 1.0f;
+			else if (v > 0) symy += 1.0f;
+		}
+
 		aX += Xi*w;
 		nX += (in_2_o33 * Ni)*w;
 		contrib += w;
     }
 	}
 
-	normals_out[y*npitch_out+x] = make_half4(nX, 0.0f);
-	centroid_out[y*cpitch_out+x] = make_float4(aX, 0.0f);
-	contrib_out[y*wpitch_out+x] = contrib;
+	// Perfect symmetry means symx and symy == 0, therefore actual length can
+	// be measure of asymmetry, so when inverted it can be used to weight result
+	symx = fabsf(symx) / float(SEARCH_RADIUS);
+	symy = fabsf(symy) / float(SEARCH_RADIUS);
+	float l = 1.0f - sqrt(symx*symx+symy*symy);
+	l = l*l;
+
+	normals_out[y*npitch_out+x] = make_half4(make_float3(normals_out[y*npitch_out+x]) + nX*l, 0.0f);
+	centroid_out[y*cpitch_out+x] = make_float4(make_float3(centroid_out[y*cpitch_out+x]) + aX*l, 0.0f);
+	contrib_out[y*wpitch_out+x] = contrib_out[y*wpitch_out+x] + contrib*l;
 }
 
 /**
@@ -95,12 +184,14 @@ __device__ inline float featureWeight(int f1, int f2) {
 	const float* __restrict__ contrib_out,
 	half4* __restrict__ normals_out,
 	float* __restrict__ depth,
+	uchar4* __restrict__ colour,
 	ftl::rgbd::Camera camera,
 	int npitch_in,
 	int cpitch_in,
 	int wpitch,
 	int npitch,
-	int dpitch
+	int dpitch,
+	int cpitch
 ) {
 	const int x = blockIdx.x*blockDim.x + threadIdx.x;
     const int y = blockIdx.y*blockDim.y + threadIdx.y;
@@ -111,7 +202,7 @@ __device__ inline float featureWeight(int f1, int f2) {
 		float contrib = contrib_out[y*wpitch+x];
 
 		//depth[x+y*dpitch] = X.z;
-		normals_out[x+y*npitch] = make_half4(0.0f, 0.0f, 0.0f, 0.0f);
+		//normals_out[x+y*npitch] = make_half4(0.0f, 0.0f, 0.0f, 0.0f);
 
 		float d0 = depth[x+y*dpitch];
 		//depth[x+y*dpitch] = 0.0f;
@@ -129,6 +220,20 @@ __device__ inline float featureWeight(int f1, int f2) {
 
 		depth[x+y*dpitch] = X.z;
 		normals_out[x+y*npitch] = make_half4(nX / length(nX), 0.0f);
+
+		if (colour) {
+			int2 s = camera.camToScreen<int2>(X);
+			float pd = min(1.0f, max(0.0f, X.z-d0) / 0.002f);
+			float nd = min(1.0f, -min(0.0f, X.z-d0) / 0.002f);
+			colour[x+y*cpitch] = (abs(s.x - x) > 1 || abs(s.y - y) > 1)
+			? make_uchar4(0,255,0,255)
+			: make_uchar4(
+				255.0f - pd*255.0f,
+				255.0f - pd*255.0f - nd*255.0f,
+				255.0f - nd*255.0f,
+				255.0f
+			);
+		}
 	}
 }
 
@@ -180,6 +285,7 @@ void MLSMultiIntensity::gather(
 	const ftl::rgbd::Camera &cam_src,
 	const float4x4 &pose_src,
 	float smoothing,
+	float fsmoothing,
 	cudaStream_t stream)
 {
 	static constexpr int THREADS_X = 8;
@@ -201,11 +307,12 @@ void MLSMultiIntensity::gather(
 		normal_accum_.ptr<half4>(),
 		depth_prime_.ptr<float>(),
 		depth_src.ptr<float>(),
-		intensity_prime_.ptr<uchar>(),
-		intensity_src.ptr<uchar>(),
+		intensity_prime_.ptr<uchar2>(),
+		intensity_src.ptr<uchar2>(),
 		centroid_accum_.ptr<float4>(),
 		weight_accum_.ptr<float>(),
 		smoothing,
+		fsmoothing,
 		o_2_in,
 		in_2_o,
 		in_2_o33,
@@ -217,8 +324,8 @@ void MLSMultiIntensity::gather(
 		depth_prime_.step1(),
 		depth_src.step1(),
 		normals_src.step1()/4,
-		intensity_prime_.step1(),
-		intensity_src.step1()
+		intensity_prime_.step1()/2,
+		intensity_src.step1()/2
 	);
 	cudaSafeCall( cudaGetLastError() );
 }
@@ -245,12 +352,104 @@ void MLSMultiIntensity::adjust(
 		weight_accum_.ptr<float>(),
 		normals_out.ptr<half4>(),
 		depth_prime_.ptr<float>(),
+		nullptr,
 		cam_prime_,
 		normal_accum_.step1()/4,
 		centroid_accum_.step1()/4,
 		weight_accum_.step1(),
 		normals_out.step1()/4,
-		depth_prime_.step1()
+		depth_prime_.step1(),
+		0
+	);
+	cudaSafeCall( cudaGetLastError() );
+}
+
+void MLSMultiIntensity::adjust(
+	GpuMat &depth_out,
+	GpuMat &normals_out,
+	GpuMat &colour_out,
+	cudaStream_t stream)
+{
+	static constexpr int THREADS_X = 8;
+	static constexpr int THREADS_Y = 8;
+
+	const dim3 gridSize((depth_prime_.cols + THREADS_X - 1)/THREADS_X, (depth_prime_.rows + THREADS_Y - 1)/THREADS_Y);
+	const dim3 blockSize(THREADS_X, THREADS_Y);
+
+	normals_out.create(depth_prime_.size(), CV_16FC4);
+	depth_out.create(depth_prime_.size(), CV_32F);
+
+	// FIXME: Depth prime assumed to be same as depth out
+
+	mls_reduce_kernel_2<<<gridSize, blockSize, 0, stream>>>(
+		centroid_accum_.ptr<float4>(),
+		normal_accum_.ptr<half4>(),
+		weight_accum_.ptr<float>(),
+		normals_out.ptr<half4>(),
+		depth_prime_.ptr<float>(),
+		colour_out.ptr<uchar4>(),
+		cam_prime_,
+		normal_accum_.step1()/4,
+		centroid_accum_.step1()/4,
+		weight_accum_.step1(),
+		normals_out.step1()/4,
+		depth_prime_.step1(),
+		colour_out.step1()/4
+	);
+	cudaSafeCall( cudaGetLastError() );
+}
+
+// =============================================================================
+
+template <int RADIUS>
+__global__ void mean_subtract_kernel(
+	const uchar* __restrict__ intensity,
+	uchar2* __restrict__ contrast,
+	int pitch,
+	int cpitch,
+	int width,
+	int height
+) {
+	const int x = blockIdx.x*blockDim.x + threadIdx.x;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if (x >= RADIUS && y >= RADIUS && x < width-RADIUS && y < height-RADIUS) {
+		float mean = 0.0f;
+
+		for (int v=-RADIUS; v<=RADIUS; ++v) {
+		for (int u=-RADIUS; u<=RADIUS; ++u) {
+			mean += float(intensity[x+u+(y+v)*pitch]);
+		}
+		}
+
+		mean /= float((2*RADIUS+1)*(2*RADIUS+1));
+
+		float diff = float(intensity[x+y*pitch]) - mean;
+		contrast[x+y*cpitch] = make_uchar2(max(0, min(254, int(diff)+127)), int(mean));
+	}
+}
+
+void ftl::cuda::mean_subtract(
+	const cv::cuda::GpuMat &intensity,
+	cv::cuda::GpuMat &contrast,
+	int radius,
+	cudaStream_t stream
+) {
+	static constexpr int THREADS_X = 8;
+	static constexpr int THREADS_Y = 8;
+
+	const dim3 gridSize((intensity.cols + THREADS_X - 1)/THREADS_X, (intensity.rows + THREADS_Y - 1)/THREADS_Y);
+	const dim3 blockSize(THREADS_X, THREADS_Y);
+
+	contrast.create(intensity.size(), CV_8UC2);
+
+	mean_subtract_kernel<3><<<gridSize, blockSize, 0, stream>>>(
+		intensity.ptr<uchar>(),
+		contrast.ptr<uchar2>(),
+		intensity.step1(),
+		contrast.step1()/2,
+		intensity.cols,
+		intensity.rows
 	);
 	cudaSafeCall( cudaGetLastError() );
 }
diff --git a/components/renderers/cpp/src/CUDARender.cpp b/components/renderers/cpp/src/CUDARender.cpp
index 0f888dec2b9b75b1de77c791f9295d1563c113b5..52472c0b98ce32367657288e51453c5cea3b50c3 100644
--- a/components/renderers/cpp/src/CUDARender.cpp
+++ b/components/renderers/cpp/src/CUDARender.cpp
@@ -417,6 +417,8 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 		);*/
 	}
 
+	//out.createTexture<half4>(_getNormalsChannel());
+
 	ftl::cuda::transform_normals(
 		out.createTexture<half4>(_getNormalsChannel()),
 		poseInverse_.getFloat3x3(),
diff --git a/components/renderers/cpp/src/normals.cu b/components/renderers/cpp/src/normals.cu
index ddeb6294e20aa681302932a44ade664f512272f4..3afe06c7097e47104625cbab5f5c40d9135d2614 100644
--- a/components/renderers/cpp/src/normals.cu
+++ b/components/renderers/cpp/src/normals.cu
@@ -409,7 +409,7 @@ void ftl::cuda::normals_dot(ftl::cuda::TextureObject<float> &output,
 
 //==============================================================================
 
-__global__ void vis_normals_kernel(ftl::cuda::TextureObject<half4> norm,
+/*__global__ void vis_normals_kernel(ftl::cuda::TextureObject<half4> norm,
         ftl::cuda::TextureObject<uchar4> output,
         float3 direction, uchar4 diffuse, uchar4 ambient) {
     const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
@@ -433,6 +433,42 @@ __global__ void vis_normals_kernel(ftl::cuda::TextureObject<half4> norm,
 				min(255.0f, diffuse.z*d + ambient.z), ambient.w);
 		}
 	}
+}*/
+
+__global__ void vis_normals_kernel(ftl::cuda::TextureObject<half4> norm,
+	ftl::cuda::TextureObject<uchar4> output,
+	float3 direction, uchar4 diffuse, uchar4 ambient
+) {
+	const unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
+	const unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
+
+	if(x < norm.width() && y < norm.height()) {
+		output(x,y) = make_uchar4(ambient.x,ambient.y,ambient.z,0);
+
+		//float3 ray = direction;
+		//ray = ray / length(ray);
+		float3 n = make_float3(norm.tex2D((int)x,(int)y));
+		float l = length(n);
+
+		if (l != 0) {
+			n /= l;
+
+			const float b = (n.z + 1.0f) * 0.5f * 255.0f;
+			const float g = (n.y + 1.0f) * 0.5f * 255.0f;
+			const float r = (n.x + 1.0f) * 0.5f * 255.0f;
+
+			//const float d = max(dot(ray, n), 0.0f);
+			/*output(x,y) = make_uchar4(
+				min(255.0f, diffuse.x*d + ambient.x),
+				min(255.0f, diffuse.y*d + ambient.y),
+				min(255.0f, diffuse.z*d + ambient.z), ambient.w);*/
+
+			output(x,y) = make_uchar4(
+				min(255.0f, b + ambient.x),
+				min(255.0f, g + ambient.y),
+				min(255.0f, r + ambient.z), ambient.w);
+		}
+	}
 }
 
 void ftl::cuda::normal_visualise(ftl::cuda::TextureObject<half4> &norm,
diff --git a/components/rgbd-sources/src/frame.cpp b/components/rgbd-sources/src/frame.cpp
index 496c48807a693e44409b4e59f4de1bdc5ca7c80f..255d438c089a5a7dc0cf27db21c9f6dba96bf408 100644
--- a/components/rgbd-sources/src/frame.cpp
+++ b/components/rgbd-sources/src/frame.cpp
@@ -71,7 +71,7 @@ cv::cuda::GpuMat &VideoFrame::createGPU(const ftl::rgbd::FormatBase &f) {
 }
 
 const cv::Mat &VideoFrame::getCPU() const {
-	if (!validhost) {
+	if (!validhost && !gpu.empty()) {
 		// TODO: Use stream and page locked mem.
 		gpu.download(host);
 		validhost = true;
diff --git a/components/streams/src/feed.cpp b/components/streams/src/feed.cpp
index f9b468605bcc336e3bf6d89f137eae7fe252a087..b51cc2ba9620fa5dadcf150a70000d4eada9a8b8 100644
--- a/components/streams/src/feed.cpp
+++ b/components/streams/src/feed.cpp
@@ -204,9 +204,9 @@ Feed::Feed(nlohmann::json &config, ftl::net::Universe*net) :
 
 				if (!did_pipe) {
 					LOG(WARNING) << "Feed Pipeline dropped (" << fs->frameset() << ")";
-					ftl::pool.push([this,fs](int id) {
-						_dispatch(fs);
-					});
+					//ftl::pool.push([this,fs](int id) {
+					//	_dispatch(fs);
+					//});
 				}
 
 				_processAudio(fs);
@@ -534,7 +534,7 @@ void Feed::_createPipeline(uint32_t fsid) {
 		p->append<ftl::operators::BorderMask>("border_mask");
 		p->append<ftl::operators::CullDiscontinuity>("remove_discontinuity");
 		p->append<ftl::operators::MultiViewMLS>("mvmls")->value("enabled", false);
-		p->append<ftl::operators::Fusion>("fusion")->value("enabled", false);
+		p->append<ftl::operators::Fusion>("fusion")->set("enabled", false);
 		p->append<ftl::operators::DisplayMask>("display_mask")->value("enabled", false);
 		p->append<ftl::operators::Poser>("poser")->value("enabled", true);
 		p->append<ftl::operators::GTAnalysis>("gtanalyse");
@@ -1115,8 +1115,12 @@ std::string Feed::getSourceURI(ftl::data::FrameID id) {
 
 std::vector<unsigned int> Feed::listFrameSets() {
 	SHARED_LOCK(mtx_, lk);
+
+	cudaDeviceSynchronize();
+	cudaSafeCall( cudaGetLastError() );
+
 	std::vector<unsigned int> result;
-	result.reserve(fsid_lookup_.size());
+	result.reserve(latest_.size());
 	for (const auto [k, fs] : latest_) {
 		if (fs) {
 			result.push_back(k);
diff --git a/components/streams/src/receiver.cpp b/components/streams/src/receiver.cpp
index 97c58c7dcf8f727a8b8dd58321caf15e33170b88..be3d01d853498d3068c75805018a972c6e9c55fd 100644
--- a/components/streams/src/receiver.cpp
+++ b/components/streams/src/receiver.cpp
@@ -343,7 +343,7 @@ void Receiver::_processVideo(const StreamPacket &spkt, const Packet &pkt) {
 	int cvtype = ftl::codecs::type(spkt.channel);
 
 	if (surface.type() != cvtype) {
-		LOG(ERROR) << "Invalid video format received";
+		LOG(ERROR) << "Invalid video format received: " << cvtype << " for " << (int)spkt.channel;
 		_terminateVideoPacket(spkt, pkt);
 		return;
 	}