diff --git a/applications/reconstruct/src/ilw/ilw.cpp b/applications/reconstruct/src/ilw/ilw.cpp
index 833f47ad32bd3d7e24dc0c12cc7c6d224b9effcc..464e1f57e688d328ccd345ee604156d96dbe4791 100644
--- a/applications/reconstruct/src/ilw/ilw.cpp
+++ b/applications/reconstruct/src/ilw/ilw.cpp
@@ -15,11 +15,16 @@ using cv::cuda::GpuMat;
 
 ILW::ILW(nlohmann::json &config) : ftl::Configurable(config) {
     enabled_ = value("ilw_align", true);
+    iterations_ = value("iterations", 1);
 
     on("ilw_align", [this](const ftl::config::Event &e) {
         enabled_ = value("ilw_align", true);
     });
 
+    on("iterations", [this](const ftl::config::Event &e) {
+        iterations_ = value("iterations", 1);
+    });
+
     flags_ = 0;
     if (value("ignore_bad", false)) flags_ |= ftl::cuda::kILWFlag_IgnoreBad;
     if (value("restrict_z", true)) flags_ |= ftl::cuda::kILWFlag_RestrictZ;
@@ -44,14 +49,20 @@ bool ILW::process(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
 
     _phase0(fs, stream);
 
-    //for (int i=0; i<2; ++i) {
-        _phase1(fs, stream);
+    for (int i=0; i<iterations_; ++i) {
+        int win;
+        switch (i) {
+        case 0: win = 17; break;
+        case 1: win = 9; break;
+        default: win = 5; break;
+        }
+        _phase1(fs, win, stream);
         //for (int j=0; j<3; ++j) {
             _phase2(fs, 0.5f, stream);
         //}
 
 		// TODO: Break if no time left
-    //}
+    }
 
     return true;
 }
@@ -86,21 +97,21 @@ bool ILW::_phase0(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
         f.createTexture<float4>(Channel::EnergyVector, Format<float4>(f.get<GpuMat>(Channel::Colour).size()));
         f.createTexture<float>(Channel::Energy, Format<float>(f.get<GpuMat>(Channel::Colour).size()));
         f.createTexture<uchar4>(Channel::Colour);
-
-		cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
-
-		f.get<GpuMat>(Channel::EnergyVector).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
-		f.get<GpuMat>(Channel::Energy).setTo(cv::Scalar(0.0f), cvstream);
     }
 
     return true;
 }
 
-bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
+bool ILW::_phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream) {
     // Run correspondence kernel to create an energy vector
+    cv::cuda::Stream cvstream = cv::cuda::StreamAccessor::wrapStream(stream);
 
 	// For each camera combination
     for (size_t i=0; i<fs.frames.size(); ++i) {
+        auto &f1 = fs.frames[i];
+        f1.get<GpuMat>(Channel::EnergyVector).setTo(cv::Scalar(0.0f,0.0f,0.0f,0.0f), cvstream);
+		f1.get<GpuMat>(Channel::Energy).setTo(cv::Scalar(0.0f), cvstream);
+
 		Eigen::Vector4d d1(0.0, 0.0, 1.0, 0.0);
 		d1 = fs.sources[i]->getPose() * d1;
 
@@ -109,7 +120,6 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
 
             //LOG(INFO) << "Running phase1";
 
-            auto &f1 = fs.frames[i];
             auto &f2 = fs.frames[j];
             auto s1 = fs.sources[i];
             auto s2 = fs.sources[j];
@@ -137,6 +147,7 @@ bool ILW::_phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream) {
                 pose2,
                 s2->parameters(),
                 flags_,
+                win,
                 stream
             );
             } catch (ftl::exception &e) {
diff --git a/applications/reconstruct/src/ilw/ilw.cu b/applications/reconstruct/src/ilw/ilw.cu
index 1c495f3e09c056006527c94d40a5906e2de74ffb..488ed7cb1ff894f8037514430e5d7291194865c8 100644
--- a/applications/reconstruct/src/ilw/ilw.cu
+++ b/applications/reconstruct/src/ilw/ilw.cu
@@ -24,9 +24,10 @@ __device__ inline float warpSum(float e) {
 	return e;
 }
 
-#define COR_WIN_RADIUS 17
-#define COR_WIN_SIZE (COR_WIN_RADIUS * COR_WIN_RADIUS)
+//#define COR_WIN_RADIUS 17
+//#define COR_WIN_SIZE (COR_WIN_RADIUS * COR_WIN_RADIUS)
 
+template<int COR_WIN_RADIUS> 
 __global__ void correspondence_energy_vector_kernel(
         TextureObject<float4> p1,
         TextureObject<float4> p2,
@@ -57,7 +58,7 @@ __global__ void correspondence_energy_vector_kernel(
     // Project to p2 using cam2
     // Each thread takes a possible correspondence and calculates a weighting
     const int lane = tid % WARP_SIZE;
-	for (int i=lane; i<COR_WIN_SIZE; i+=WARP_SIZE) {
+	for (int i=lane; i<COR_WIN_RADIUS*COR_WIN_RADIUS; i+=WARP_SIZE) {
 		const float u = (i % COR_WIN_RADIUS) - (COR_WIN_RADIUS / 2);
         const float v = (i / COR_WIN_RADIUS) - (COR_WIN_RADIUS / 2);
         
@@ -122,7 +123,7 @@ void ftl::cuda::correspondence_energy_vector(
         TextureObject<float> &eout,
         float4x4 &pose1,
         float4x4 &pose2,
-        const Camera &cam2, uint flags,
+        const Camera &cam2, uint flags, int win,
         cudaStream_t stream) {
 
     const dim3 gridSize((p1.width() + 2 - 1)/2, (p1.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
@@ -130,9 +131,11 @@ void ftl::cuda::correspondence_energy_vector(
 
     //printf("COR SIZE %d,%d\n", p1.width(), p1.height());
 
-    correspondence_energy_vector_kernel<<<gridSize, blockSize, 0, stream>>>(
-        p1, p2, c1, c2, vout, eout, pose1, pose2, cam2, flags
-    );
+    switch (win) {
+    case 17     : correspondence_energy_vector_kernel<17><<<gridSize, blockSize, 0, stream>>>(p1, p2, c1, c2, vout, eout, pose1, pose2, cam2, flags); break;
+    case 9      : correspondence_energy_vector_kernel<9><<<gridSize, blockSize, 0, stream>>>(p1, p2, c1, c2, vout, eout, pose1, pose2, cam2, flags); break;
+    case 5      : correspondence_energy_vector_kernel<5><<<gridSize, blockSize, 0, stream>>>(p1, p2, c1, c2, vout, eout, pose1, pose2, cam2, flags); break;
+    }
     cudaSafeCall( cudaGetLastError() );
 }
 
diff --git a/applications/reconstruct/src/ilw/ilw.hpp b/applications/reconstruct/src/ilw/ilw.hpp
index 5e10cc7cf6f2ff76143369848d7d6ee7618b194d..36c2cbbc0e7c62d234dcc7e9db746a7ed7a1b66f 100644
--- a/applications/reconstruct/src/ilw/ilw.hpp
+++ b/applications/reconstruct/src/ilw/ilw.hpp
@@ -51,7 +51,7 @@ class ILW : public ftl::Configurable {
     /*
      * Find possible correspondences and a confidence value.
      */
-    bool _phase1(ftl::rgbd::FrameSet &fs, cudaStream_t stream);
+    bool _phase1(ftl::rgbd::FrameSet &fs, int win, cudaStream_t stream);
 
     /*
      * Calculate energies and move the points.
@@ -61,6 +61,7 @@ class ILW : public ftl::Configurable {
     std::vector<detail::ILWData> data_;
     bool enabled_;
     unsigned int flags_;
+    int iterations_;
 };
 
 }
diff --git a/applications/reconstruct/src/ilw/ilw_cuda.hpp b/applications/reconstruct/src/ilw/ilw_cuda.hpp
index 5e399606209ce66e6d48ac9e0be483d135e34ca5..b9b9f89889825b61ff153d9f0c27f32c69f14e1c 100644
--- a/applications/reconstruct/src/ilw/ilw_cuda.hpp
+++ b/applications/reconstruct/src/ilw/ilw_cuda.hpp
@@ -21,7 +21,7 @@ void correspondence_energy_vector(
     float4x4 &pose1,
     float4x4 &pose2,
     const ftl::rgbd::Camera &cam2,
-    uint flags,
+    uint flags, int win,
     cudaStream_t stream
 );