diff --git a/applications/vision/src/main.cpp b/applications/vision/src/main.cpp
index d855dd93500b79f66a5cd2cf92a48ef2adb13930..57606034b0ef8c6d67fcc92397563946b64c1d9c 100644
--- a/applications/vision/src/main.cpp
+++ b/applications/vision/src/main.cpp
@@ -197,7 +197,21 @@ static void run(ftl::Configurable *root) {
 	delete net;
 }
 
+static void threadSetCUDADevice() {
+	// Ensure all threads have correct cuda device
+	std::atomic<int> ijobs = 0;
+	for (int i=0; i<ftl::pool.size(); ++i) {
+		ftl::pool.push([&ijobs](int id) {
+			ftl::cuda::setDevice();
+			++ijobs;
+			while (ijobs < ftl::pool.size()) std::this_thread::sleep_for(std::chrono::milliseconds(10));
+		});
+	}
+	while (ijobs < ftl::pool.size()) std::this_thread::sleep_for(std::chrono::milliseconds(10));
+}
+
 int main(int argc, char **argv) {
+
 #ifdef HAVE_PYLON
 	Pylon::PylonAutoInitTerm autoInitTerm;
 #endif
@@ -207,6 +221,9 @@ int main(int argc, char **argv) {
 #endif
 	std::cout << "FTL Vision Node " << FTL_VERSION_LONG << std::endl;
 	auto root = ftl::configure(argc, argv, "vision_default");
+
+	// Use other GPU if available.
+	//ftl::cuda::setDevice(ftl::cuda::deviceCount()-1);
 	
 	std::cout << "Loading..." << std::endl;
 	run(root);
diff --git a/components/common/cpp/src/cuda_common.cpp b/components/common/cpp/src/cuda_common.cpp
index 2eb5d19f829c999c04cf4ec1e5c28bbd699f0521..ede4a998a4a104539d1b13ce32f6f278c2f3a8b2 100644
--- a/components/common/cpp/src/cuda_common.cpp
+++ b/components/common/cpp/src/cuda_common.cpp
@@ -10,6 +10,8 @@ static int dev_count = 0;
 static std::vector<cudaDeviceProp> properties;
 
 bool ftl::cuda::initialise() {
+	if (dev_count > 0) return true;
+	
 	// Do an initial CUDA check
 	cudaSafeCall(cudaGetDeviceCount(&dev_count));
 	CHECK_GE(dev_count, 1) << "No CUDA devices found";
@@ -50,6 +52,7 @@ void ftl::cuda::setDevice(int id) {
 }
 
 void ftl::cuda::setDevice() {
+	LOG(INFO) << "Using CUDA Device " << dev_to_use;
 	cudaSafeCall(cudaSetDevice(dev_to_use));
 }
 
diff --git a/components/rgbd-sources/src/group.cpp b/components/rgbd-sources/src/group.cpp
index b050db96f9720110f7e5f506829b97e532388635..55fd3c411fcbd0f51f8bdfc319a9fa4e7e4804f6 100644
--- a/components/rgbd-sources/src/group.cpp
+++ b/components/rgbd-sources/src/group.cpp
@@ -120,7 +120,7 @@ void Group::onFrameSet(const ftl::rgbd::VideoCallback &cb) {
 
 			//ftl::pool.push([this,s,ts](int id) {
 				_retrieveJob(s);
-				LOG(INFO) << "Retrieve latency: " << ftl::timer::get_time()-ts;
+				//LOG(INFO) << "Retrieve latency: " << ftl::timer::get_time()-ts;
 				--jobs_;
 				_dispatchJob(s, ts);
 			//});
diff --git a/components/rgbd-sources/src/source.cpp b/components/rgbd-sources/src/source.cpp
index 37cff385a3cc29e5fedb69632d206f4b22e4ea53..2a9af58891810e3434416406b3fded6aef35c80c 100644
--- a/components/rgbd-sources/src/source.cpp
+++ b/components/rgbd-sources/src/source.cpp
@@ -195,7 +195,10 @@ bool Source::retrieve() {
 
 bool Source::dispatch(int64_t ts) {
 	if (!callback_) return false;
-	if (is_dispatching || is_retrieving) return false;
+	if (is_dispatching || is_retrieving) {
+		LOG(WARNING) << "Previous distance not completed";
+		return false;
+	}
 	is_dispatching = true;
 	_swap();
 	ftl::pool.push([this,ts](int id) {
diff --git a/components/rgbd-sources/src/sources/stereovideo/pylon.cpp b/components/rgbd-sources/src/sources/stereovideo/pylon.cpp
index 43975d149410a8b8d8f6c43cbb475b198bacd65a..b8155489ed9f1a32aa1e7abbc297357398da2070 100644
--- a/components/rgbd-sources/src/sources/stereovideo/pylon.cpp
+++ b/components/rgbd-sources/src/sources/stereovideo/pylon.cpp
@@ -124,6 +124,10 @@ void PylonDevice::_configureCamera(CBaslerUniversalInstantCamera *cam) {
 bool PylonDevice::grab() {
 	if (!isReady()) return false;
 
+	//int dev;
+	//cudaGetDevice(&dev);
+	//LOG(INFO) << "Current cuda device = " << dev;
+
 	try {
 		FTL_Profile("Frame Capture", 0.001);
 		if (rcam_) rcam_->WaitForFrameTriggerReady( 30, Pylon::TimeoutHandling_ThrowException);
@@ -152,6 +156,12 @@ bool PylonDevice::get(cv::cuda::GpuMat &l_out, cv::cuda::GpuMat &r_out, cv::cuda
 	Mat &lfull = (!hasHigherRes()) ? l : hres;
 	Mat &rfull = (!hasHigherRes()) ? r : rtmp_;
 
+	//ftl::cuda::setDevice();
+
+	//int dev;
+	//cudaGetDevice(&dev);
+	//LOG(INFO) << "Current cuda device = " << dev;
+
 	try {
 		FTL_Profile("Frame Retrieve", 0.005);
 		std::future<bool> future_b;