diff --git a/applications/reconstruct/CMakeLists.txt b/applications/reconstruct/CMakeLists.txt
index de3aca69c4da32f0145f2d57ee3ac439209d1071..dcee7afa0147378ffaabd91263e200f8fe284c98 100644
--- a/applications/reconstruct/CMakeLists.txt
+++ b/applications/reconstruct/CMakeLists.txt
@@ -16,6 +16,7 @@ set(REPSRC
 	#src/depth_camera.cu
 	#src/depth_camera.cpp
 	src/ilw.cpp
+	src/ilw.cu
 )
 
 add_executable(ftl-reconstruct ${REPSRC})
diff --git a/applications/reconstruct/src/ilw.cpp b/applications/reconstruct/src/ilw.cpp
index 90b3a1bee8f08f8a0973cd068c3ff959343da6b3..57ad8634c990e7c5fb05b98fc5e2b8a1773ce008 100644
--- a/applications/reconstruct/src/ilw.cpp
+++ b/applications/reconstruct/src/ilw.cpp
@@ -2,6 +2,9 @@
 #include <ftl/utility/matrix_conversion.hpp>
 #include <ftl/rgbd/source.hpp>
 #include <ftl/cuda/points.hpp>
+#include <loguru.hpp>
+
+#include "ilw_cuda.hpp"
 
 using ftl::ILW;
 using ftl::detail::ILWData;
@@ -21,14 +24,14 @@ ILW::~ILW() {
 bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
     _phase0(fs, stream);
 
-    for (int i=0; i<2; ++i) {
-        _phase1(fs);
-        for (int j=0; j<3; ++j) {
-            _phase2(fs);
-        }
+    //for (int i=0; i<2; ++i) {
+        _phase1(fs, stream);
+        //for (int j=0; j<3; ++j) {
+        //    _phase2(fs);
+        //}
 
 		// TODO: Break if no time left
-    }
+    //}
 
     return true;
 }
@@ -47,15 +50,48 @@ bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
         auto &t = f.createTexture<float4>(Channel::Points, Format<float4>(f.get<GpuMat>(Channel::Colour).size()));
         auto pose = MatrixConversion::toCUDA(s->getPose().cast<float>()); //.inverse());
         ftl::cuda::point_cloud(t, f.createTexture<float>(Channel::Depth), s->parameters(), pose, stream);
+
+        // TODO: Create energy vector texture and clear it
+        // Create energy and clear it
     }
 
     return true;
 }
 
-bool ILW::_phase1(ftl::rgbd::FrameSet &fs) {
-    // Run correspondence kernel to find points
+bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
+    // Run correspondence kernel to create an energy vector
 
 	// For each camera combination
+    for (size_t i=0; i<fs.frames.size(); ++i) {
+        for (size_t j=0; j<fs.frames.size(); ++j) {
+            if (i == j) continue;
+
+            LOG(INFO) << "Running phase1";
+
+            auto &f1 = fs.frames[i];
+            auto &f2 = fs.frames[j];
+            //auto s1 = fs.frames[i];
+            auto s2 = fs.sources[j];
+
+            auto pose = MatrixConversion::toCUDA(s2->getPose().cast<float>().inverse());
+
+            //Calculate energy vector to best correspondence
+            ftl::cuda::correspondence_energy_vector(
+                f1.getTexture<float4>(Channel::Points),
+                f2.getTexture<float4>(Channel::Points),
+                f1.getTexture<uchar4>(Channel::Colour),
+                f2.getTexture<uchar4>(Channel::Colour),
+                // TODO: Add normals and other things...
+                f1.getTexture<float4>(Channel::EnergyVector),
+                f1.getTexture<float>(Channel::Energy),
+                pose,
+                s2->parameters(),
+                stream
+            );
+
+            LOG(INFO) << "Correspondences done... " << i;
+        }
+    }
 
     return true;
 }
diff --git a/applications/reconstruct/src/ilw.cu b/applications/reconstruct/src/ilw.cu
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..90133a3a57800ee87a91fd50902deea5f701258a 100644
--- a/applications/reconstruct/src/ilw.cu
+++ b/applications/reconstruct/src/ilw.cu
@@ -0,0 +1,86 @@
+#include "ilw_cuda.hpp"
+
+using ftl::cuda::TextureObject;
+using ftl::rgbd::Camera;
+
+#define WARP_SIZE 32
+#define T_PER_BLOCK 8
+#define FULL_MASK 0xffffffff
+
+__device__ inline float warpMax(float e) {
+	for (int i = WARP_SIZE/2; i > 0; i /= 2) {
+		const float other = __shfl_xor_sync(FULL_MASK, e, i, WARP_SIZE);
+		e = max(e, other);
+	}
+	return e;
+}
+
+__global__ void correspondence_energy_vector_kernel(
+        TextureObject<float4> p1,
+        TextureObject<float4> p2,
+        TextureObject<uchar4> c1,
+        TextureObject<uchar4> c2,
+        TextureObject<float4> vout,
+        TextureObject<float> eout,
+        float4x4 pose2,  // Inverse
+        Camera cam2) {
+
+    // Each warp picks point in p1
+    const int tid = (threadIdx.x + threadIdx.y * blockDim.x);
+	const int x = (blockIdx.x*blockDim.x + threadIdx.x) / WARP_SIZE;
+    const int y = blockIdx.y*blockDim.y + threadIdx.y;
+    
+    const float3 world1 = make_float3(p1.tex2D(x, y));
+    const float3 camPos2 = pose2 * world1;
+    const uint2 screen2 = cam2.camToScreen<uint2>(camPos2);
+
+    const int upsample = 8;
+
+    // Project to p2 using cam2
+    // Each thread takes a possible correspondence and calculates a weighting
+    const int lane = tid % WARP_SIZE;
+	for (int i=lane; i<upsample*upsample; i+=WARP_SIZE) {
+		const float u = (i % upsample) - (upsample / 2);
+        const float v = (i / upsample) - (upsample / 2);
+        
+        const float3 world2 = make_float3(p2.tex2D(screen2.x+u, screen2.y+v));
+
+        // Determine degree of correspondence
+        const float confidence = 1.0f / length(world1 - world2);
+
+        printf("conf %f\n", confidence);
+        const float maxconf = warpMax(confidence);
+
+        // This thread has best confidence value
+        if (maxconf == confidence) {
+            vout(x,y) = vout.tex2D(x, y) + make_float4(
+                (world1.x - world2.x) * maxconf,
+                (world1.y - world2.y) * maxconf,
+                (world1.z - world2.z) * maxconf,
+                maxconf);
+            eout(x,y) = eout.tex2D(x,y) + length(world1 - world2)*maxconf;
+        }
+    }
+}
+
+void ftl::cuda::correspondence_energy_vector(
+        TextureObject<float4> &p1,
+        TextureObject<float4> &p2,
+        TextureObject<uchar4> &c1,
+        TextureObject<uchar4> &c2,
+        TextureObject<float4> &vout,
+        TextureObject<float> &eout,
+        float4x4 &pose2,
+        const Camera &cam2,
+        cudaStream_t stream) {
+
+    const dim3 gridSize((p1.width() + 2 - 1)/2, (p1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
+    const dim3 blockSize(2*WARP_SIZE, T_PER_BLOCK);
+
+    printf("COR SIZE %d,%d\n", p1.width(), p1.height());
+
+    correspondence_energy_vector_kernel<<<gridSize, blockSize, 0, stream>>>(
+        p1, p2, c1, c2, vout, eout, pose2, cam2
+    );
+    cudaSafeCall( cudaGetLastError() );
+}
diff --git a/applications/reconstruct/src/ilw.hpp b/applications/reconstruct/src/ilw.hpp
index 5bab3387316c47c3f1489acde7990d583364f523..0be45d015e976b540263a2c16cc5605376092a43 100644
--- a/applications/reconstruct/src/ilw.hpp
+++ b/applications/reconstruct/src/ilw.hpp
@@ -51,7 +51,7 @@ class ILW : public ftl::Configurable {
     /*
      * Find possible correspondences and a confidence value.
      */
-    bool _phase1(ftl::rgbd::FrameSet &fs);
+    bool _phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream);
 
     /*
      * Calculate energies and move the points.
diff --git a/applications/reconstruct/src/ilw_cuda.hpp b/applications/reconstruct/src/ilw_cuda.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a01af75149409fe033ba39ffb0170489ee926be9
--- /dev/null
+++ b/applications/reconstruct/src/ilw_cuda.hpp
@@ -0,0 +1,26 @@
+#ifndef _FTL_ILW_CUDA_HPP_
+#define _FTL_ILW_CUDA_HPP_
+
+#include <ftl/cuda_common.hpp>
+#include <ftl/rgbd/camera.hpp>
+#include <ftl/cuda_matrix_util.hpp>
+
+namespace ftl {
+namespace cuda {
+
+void correspondence_energy_vector(
+    ftl::cuda::TextureObject<float4> &p1,
+    ftl::cuda::TextureObject<float4> &p2,
+    ftl::cuda::TextureObject<uchar4> &c1,
+    ftl::cuda::TextureObject<uchar4> &c2,
+    ftl::cuda::TextureObject<float4> &vout,
+    ftl::cuda::TextureObject<float> &eout,
+    float4x4 &pose2,
+    const ftl::rgbd::Camera &cam2,
+    cudaStream_t stream
+);
+
+}
+}
+
+#endif  // _FTL_ILW_CUDA_HPP_
diff --git a/components/renderers/cpp/src/splat_render.cpp b/components/renderers/cpp/src/splat_render.cpp
index c878a4913dfd37d70a4aeddf16397f7aaab509fd..daf9f5f64c019d24ae9afb9f2e4540c59b722922 100644
--- a/components/renderers/cpp/src/splat_render.cpp
+++ b/components/renderers/cpp/src/splat_render.cpp
@@ -31,6 +31,7 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
 	out.create<GpuMat>(Channel::Depth, Format<float>(camera.width, camera.height));
 	out.create<GpuMat>(Channel::Colour, Format<uchar4>(camera.width, camera.height));
 
+	// FIXME: Use source resolutions, not virtual resolution
 	temp_.create<GpuMat>(Channel::Colour, Format<float4>(camera.width, camera.height));
 	temp_.create<GpuMat>(Channel::Colour2, Format<uchar4>(camera.width, camera.height));
 	temp_.create<GpuMat>(Channel::Contribution, Format<float>(camera.width, camera.height));
@@ -115,6 +116,8 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
 		//LOG(INFO) << "DIBR DONE";
 	}
 
+	// TODO: Add the depth splatting step..
+
 	temp_.createTexture<float4>(Channel::Colour);
 	temp_.createTexture<float>(Channel::Contribution);
 
@@ -157,74 +160,9 @@ bool Splatter::render(ftl::rgbd::VirtualSource *src, ftl::rgbd::Frame &out, cuda
 		Eigen::Matrix4f matrix =  src->getPose().cast<float>() * transform.matrix();
 		params.m_viewMatrix = MatrixConversion::toCUDA(matrix.inverse());
 		params.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix);
-	}
-
-	/*
-		//ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, depth3_, scene_->cameraCount(), params, stream);
-
-		// Step 1: Put all points into virtual view to gather them
-		//ftl::cuda::dibr_raw(depth1_, scene_->cameraCount(), params, stream);
-
-		// Step 2: For each point, use a warp to do MLS and up sample
-		//ftl::cuda::mls_render_depth(depth1_, depth3_, params, scene_->cameraCount(), stream);
-
-		if (src->getChannel() == Channel::Depth) {
-			//LOG(INFO) << "Rendering depth";
-			//ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream);
-			if (value("splatting",  false)) {
-				//ftl::cuda::splat_points(depth1_, colour1_, normal1_, depth2_, colour2_, params, stream);
-				//ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream);
-
-				temp_.get<GpuMat>(Channel::Depth).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream);
-			} else {
-				temp_.get<GpuMat>(Channel::Depth).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream);
-				//ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream);
-				//src->writeFrames(ts, colour1_, depth2_, stream);
-				//src->write(scene_.timestamp, output_, stream);
-			}
-		} else if (src->getChannel() == Channel::Energy) {
-			//ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream);
-			//if (src->value("splatting",  false)) {
-				//ftl::cuda::splat_points(depth1_, colour1_, normal1_, depth2_, colour2_, params, stream);
-				//ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream);
-				//src->writeFrames(ts, colour1_, depth2_, stream);
-				//src->write(scene_.timestamp, output_, stream);
-			//} else {
-				//ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream);
-			//	src->writeFrames(colour1_, depth2_, stream);
-			//}
-		} else if (src->getChannel() == Channel::Right) {
-			//LOG(INFO) << "Rendering right";
-			// Adjust pose to right eye position
-			Eigen::Affine3f transform(Eigen::Translation3f(camera.baseline,0.0f,0.0f));
-			Eigen::Matrix4f matrix =  src->getPose().cast<float>() * transform.matrix();
-			params.m_viewMatrix = MatrixConversion::toCUDA(matrix.inverse());
-			params.m_viewMatrixInverse = MatrixConversion::toCUDA(matrix);
-
-			//ftl::cuda::clear_depth(depth1_, stream);
-			//ftl::cuda::dibr(depth1_, colour1_, normal1_, depth2_, colour_tmp_, depth3_, scene_->cameraCount(), params, stream);
-			//src->writeFrames(ts, colour1_, colour2_, stream);
-			//src->write(scene_.timestamp, output_, stream);
-		} else {
-			//LOG(INFO) << "No second rendering";
-			//if (value("splatting",  false)) {
-				//ftl::cuda::splat_points(depth1_, colour1_, normal1_, depth2_, colour2_, params, stream);
-				//src->writeFrames(ts, colour1_, depth2_, stream);
-				//src->write(scene_.timestamp, out, stream);
-			//} else {
-				//ftl::cuda::int_to_float(depth1_, depth2_, 1.0f / 1000.0f, stream);
-				temp_.get<GpuMat>(Channel::Depth).convertTo(out.get<GpuMat>(Channel::Depth), CV_32F, 1.0f / 1000.0f, cvstream);
-				//src->writeFrames(ts, colour1_, depth2_, stream);
-				//src->write(scene_.timestamp, output_, stream);
-			//}
-		}
-	//}
-	*/
-
-	//ftl::cuda::median_filter(depth1_, depth2_, stream);
-	//ftl::cuda::splat_points(depth1_, depth2_, params, stream);
 
-	// TODO: Second pass
+		// TODO: Repeat rendering process...
+	}
 
 	return true;
 }
diff --git a/components/rgbd-sources/include/ftl/rgbd/channels.hpp b/components/rgbd-sources/include/ftl/rgbd/channels.hpp
index a87d03e803523da7ad9370866b27061ea64e6044..9bf731a5319fa47c501a91e09f1e2acc48c5a4a8 100644
--- a/components/rgbd-sources/include/ftl/rgbd/channels.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/channels.hpp
@@ -21,6 +21,7 @@ enum struct Channel : int {
     Points = 6,         // 32FC4
     Confidence = 7,     // 32F
     Contribution = 7,   // 32F
+    EnergyVector,       // 32FC4
     Flow,               // 32F
     Energy,             // 32F
     LeftGray,