diff --git a/components/renderers/cpp/src/CUDARender.cpp b/components/renderers/cpp/src/CUDARender.cpp
index c573d23b1fdcf0f035be865883213b8c842f9a27..d44a8cfd102b4a1bf90d250ded75a543f09e01b2 100644
--- a/components/renderers/cpp/src/CUDARender.cpp
+++ b/components/renderers/cpp/src/CUDARender.cpp
@@ -386,8 +386,8 @@ void CUDARender::_mesh(ftl::rgbd::Frame &out, const Eigen::Matrix4d &t, cudaStre
 			auto transform = pose_ * MatrixConversion::toCUDA(t.cast<float>() * f.getPose().cast<float>());
 
 			ftl::cuda::reverse_verify(
-				out.getTexture<float>(_getDepthChannel()),
-				f.getTexture<float>(Channel::Depth),
+				out.create<cv::cuda::GpuMat>(_getDepthChannel()),
+				f.get<cv::cuda::GpuMat>(Channel::Depth),
 				transformR,
 				transform,
 				params_.camera,
diff --git a/components/renderers/cpp/src/splatter_cuda.hpp b/components/renderers/cpp/src/splatter_cuda.hpp
index e8be62b96f28baa390823575ad8bcd3572a6a278..aee5d0bc7dab2357c34713ec4eb628d141b0f248 100644
--- a/components/renderers/cpp/src/splatter_cuda.hpp
+++ b/components/renderers/cpp/src/splatter_cuda.hpp
@@ -171,8 +171,8 @@ namespace cuda {
         float factor, cudaStream_t stream);
 
 	void reverse_verify(
-		ftl::cuda::TextureObject<float> &depth_in,
-		ftl::cuda::TextureObject<float> &depth_original,
+		cv::cuda::GpuMat &depth_in,
+		const cv::cuda::GpuMat &depth_original,
 		const float4x4 &transformR,
 		const float4x4 &transform,
 		const ftl::rgbd::Camera &vintrin,
diff --git a/components/renderers/cpp/src/triangle_render.cu b/components/renderers/cpp/src/triangle_render.cu
index 6bc257aa01a347f642249234c6503da9b69ffa87..ff20436a95fe10f27fe9b881146a7ffe6f3e4d91 100644
--- a/components/renderers/cpp/src/triangle_render.cu
+++ b/components/renderers/cpp/src/triangle_render.cu
@@ -227,8 +227,10 @@ void ftl::cuda::merge_convert_depth(TextureObject<int> &depth_in, TextureObject<
 // ==== Reverse Verify Result ==================================================
 
 __global__ void reverse_check_kernel(
-	TextureObject<float> depth_in,
-	TextureObject<float> depth_original,
+	float* __restrict__ depth_in,
+	const float* __restrict__ depth_original,
+	int pitch4,
+	int opitch4,
 	float4x4 transformR,
 	float4x4 transform,
 	ftl::rgbd::Camera vintrin,
@@ -237,9 +239,9 @@ __global__ void reverse_check_kernel(
 	const int x = blockIdx.x*blockDim.x + threadIdx.x;
 	const int y = blockIdx.y*blockDim.y + threadIdx.y;
 
-	if (x < 0 || x >= depth_in.width() || y < 0 || y >= depth_in.height()) return;
+	if (x < 0 || x >= vintrin.width || y < 0 || y >= vintrin.height) return;
 
-	float d = depth_in.tex2D(x,y);
+	float d = depth_in[y*pitch4+x];
 	
 	// FIXME: This is dangerous, need to check through alternates instead
 	while (true) {
@@ -249,26 +251,23 @@ __global__ void reverse_check_kernel(
 		int oy = spos.y;
 
 		if (campos.z > 0.0f && ox >= 0 && ox < ointrin.width && oy >= 0 && oy < ointrin.height) {
-			float d2 = depth_original.tex2D(ox,oy);
-
-			if (!(d2 < ointrin.maxDepth && d2 - campos.z > d2*0.001f)) {
-				//printf("Original %f, %f\n", d2, campos.z);
-				//depth_in(x,y) = 1.5f; //(transform * ointrin.screenToCam(ox,oy,d2)).z;
-				//d = 0.0f;
-				break;
-			}
-			d += 0.002f;
+			float d2 = depth_original[oy*opitch4+ox];
+			if (!(d2 < ointrin.maxDepth && d2 - campos.z > d2*0.001f)) break;
+			d += 0.001f;
 		} else break;
 	}
 
-	depth_in(x,y) = d;
+	depth_in[y*pitch4+x] = d;
 }
 
-void ftl::cuda::reverse_verify(TextureObject<float> &depth_in, TextureObject<float> &depth_original, const float4x4 &transformR, const float4x4 &transform, const ftl::rgbd::Camera &vintrin, const ftl::rgbd::Camera &ointrin, cudaStream_t stream) {
-	const dim3 gridSize((depth_in.width() + T_PER_BLOCK - 1)/T_PER_BLOCK, (depth_in.height() + T_PER_BLOCK - 1)/T_PER_BLOCK);
-	const dim3 blockSize(T_PER_BLOCK, T_PER_BLOCK);
+void ftl::cuda::reverse_verify(cv::cuda::GpuMat &depth_in, const cv::cuda::GpuMat &depth_original, const float4x4 &transformR, const float4x4 &transform, const ftl::rgbd::Camera &vintrin, const ftl::rgbd::Camera &ointrin, cudaStream_t stream) {
+	static constexpr int THREADS_X = 8;
+	static constexpr int THREADS_Y = 8;
+
+	const dim3 gridSize((depth_in.cols + THREADS_X - 1)/THREADS_X, (depth_in.rows + THREADS_Y - 1)/THREADS_Y);
+	const dim3 blockSize(THREADS_X, THREADS_Y);
 
-	reverse_check_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in, depth_original, transformR, transform, vintrin, ointrin);
+	reverse_check_kernel<<<gridSize, blockSize, 0, stream>>>(depth_in.ptr<float>(), depth_original.ptr<float>(), depth_in.step1(), depth_original.step1(), transformR, transform, vintrin, ointrin);
 	cudaSafeCall( cudaGetLastError() );
 }