diff --git a/applications/calibration/src/stereo.cpp b/applications/calibration/src/stereo.cpp
index aedb197152c10f7c7d033bb9b0e62c71d9150cf0..59b37bc1f70601a37365099bd1cc750db9c3df5c 100644
--- a/applications/calibration/src/stereo.cpp
+++ b/applications/calibration/src/stereo.cpp
@@ -228,6 +228,8 @@ void ftl::calibration::stereo(map<string, string> &opt) {
 	cv::Rect validRoi[2];
 
 	// calculate extrinsic parameters
+	// NOTE: 	Other code assumes CALIB_ZERO_DISPARITY is used (for Cy == Cx). 
+	//			Depth map map calculation disparityToDepth() could be incorrect otherwise.
 	stereoRectify(
 		camera_matrices[0], dist_coeffs[0],
 		camera_matrices[1], dist_coeffs[1],
diff --git a/applications/reconstruct/src/scene_rep_hash_sdf.cu b/applications/reconstruct/src/scene_rep_hash_sdf.cu
index ba982e7c701745c528928de26ec74e07083535c2..cbeef06906daa1a1eada0f385e5b1b4a3d560b02 100644
--- a/applications/reconstruct/src/scene_rep_hash_sdf.cu
+++ b/applications/reconstruct/src/scene_rep_hash_sdf.cu
@@ -525,7 +525,7 @@ __global__ void integrateDepthMapKernel(HashData hashData, DepthCameraData camer
 		//return;
 
 		// Depth is within accepted max distance from camera
-		if (depth > 0 && depth < hashParams.m_maxIntegrationDistance) { // valid depth and color (Nick: removed colour check)
+		if (depth > 0.01f && depth < hashParams.m_maxIntegrationDistance) { // valid depth and color (Nick: removed colour check)
 			float depthZeroOne = cameraData.cameraToKinectProjZ(depth);
 
 			// Calculate SDF of this voxel wrt the depth map value
@@ -584,7 +584,7 @@ __global__ void integrateDepthMapKernel(HashData hashData, DepthCameraData camer
 		} else {
 			uint idx = entry.ptr + i;
 			float coldist = colourDistance(color, hashData.d_SDFBlocks[idx].color);
-			if (depth > 40.0f && coldist > 100.0f) {
+			if ((depth > 39.99f || depth < 0.01f) && coldist > 100.0f) {
 				//hashData.d_SDFBlocks[idx].color = make_uchar3(0,0,(uchar)(coldist));
 				hashData.d_SDFBlocks[idx].weight = hashData.d_SDFBlocks[idx].weight >> 1;
 			}
diff --git a/applications/registration/src/correspondances.cpp b/applications/registration/src/correspondances.cpp
index 6d4ac2b03993748adc5f48ecc3f06a026e5c810a..4dbda754ab201ad807b17c9b542bcacabbe1f1cd 100644
--- a/applications/registration/src/correspondances.cpp
+++ b/applications/registration/src/correspondances.cpp
@@ -49,7 +49,7 @@ static void averageDepth(vector<Mat> &in, Mat &out, float varThresh) {
 		// Calculate mean
 		for (int i_in = 0; i_in < in.size(); ++i_in) {
 			double d = in[i_in].at<float>(i);
-			if (d < 40.0) {
+			if (ftl::rgbd::isValidDepth(d)) {
 				good_values++;
 				sum += d;
 			}
@@ -75,10 +75,10 @@ static void averageDepth(vector<Mat> &in, Mat &out, float varThresh) {
 			if (var < varThresh2) {
 				out.at<float>(i) = (float)sum;
 			} else {
-				out.at<float>(i) = 41.0f;
+				out.at<float>(i) = 0.0f;
 			}
 		} else {
-			out.at<float>(i) = 41.0f;
+			out.at<float>(i) = 0.0f;
 		}
 	}
 }
diff --git a/components/rgbd-sources/CMakeLists.txt b/components/rgbd-sources/CMakeLists.txt
index 9953eb32746e74caace9aa4805fb58d5fdf996ad..19aaf74583d2f4335ef29e2107a0198a528d6a21 100644
--- a/components/rgbd-sources/CMakeLists.txt
+++ b/components/rgbd-sources/CMakeLists.txt
@@ -6,7 +6,7 @@ set(RGBDSRC
 	src/stereovideo.cpp
 	src/net.cpp
 	src/streamer.cpp
-	src/algorithms/rtcensus.cpp
+#	src/algorithms/rtcensus.cpp
 #	src/algorithms/rtcensus_sgm.cpp
 #	src/algorithms/opencv_sgbm.cpp
 #	src/algorithms/opencv_bm.cpp
@@ -31,11 +31,11 @@ if (CUDA_FOUND)
 	list(APPEND RGBDSRC
 #		"src/algorithms/opencv_cuda_bm.cpp"
 #		"src/algorithms/opencv_cuda_bp.cpp"
-		"src/algorithms/rtcensus.cu"
+#		"src/algorithms/rtcensus.cu"
 #		"src/algorithms/rtcensus_sgm.cu"
-		"src/algorithms/consistency.cu"
-		"src/algorithms/sparse_census.cu"
-		"src/algorithms/tex_filter.cu"
+#		"src/algorithms/consistency.cu"
+#		"src/algorithms/sparse_census.cu"
+#		"src/algorithms/tex_filter.cu"
 #		"src/algorithms/nick1.cu"
 #		"src/algorithms/nick.cpp")
 )
diff --git a/components/rgbd-sources/include/ftl/rgbd/source.hpp b/components/rgbd-sources/include/ftl/rgbd/source.hpp
index fd74466071286b706de3580d6f1101cbfa6e13f3..d5b57a72c34f145c83dfa6acb09d00aef6b4e826 100644
--- a/components/rgbd-sources/include/ftl/rgbd/source.hpp
+++ b/components/rgbd-sources/include/ftl/rgbd/source.hpp
@@ -19,7 +19,7 @@ class Universe;
 
 namespace rgbd {
 
-static inline bool isValidDepth(float d) { return (d > 0.0f) && (d <= 40.0f); }
+static inline bool isValidDepth(float d) { return (d > 0.01f) && (d < 39.99f); }
 
 class SnapshotReader;
 
diff --git a/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp b/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp
index fc68f24a91e3c130c78d7b44b182b1fff3b4110a..daa1a335bfe1611b8870f643ed38b17672e1332d 100644
--- a/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp
+++ b/components/rgbd-sources/src/algorithms/fixstars_sgm.cpp
@@ -6,6 +6,7 @@
 
 using ftl::algorithms::FixstarsSGM;
 using cv::Mat;
+using cv::cuda::GpuMat;
 
 //static ftl::Disparity::Register fixstarssgm("libsgm", FixstarsSGM::create);
 
@@ -17,51 +18,45 @@ FixstarsSGM::FixstarsSGM(nlohmann::json &config) : Disparity(config) {
 	filter_ = cv::cuda::createDisparityBilateralFilter(max_disp_ << 4, value("filter_radius", 25), value("filter_iter", 1));
 }
 
-void FixstarsSGM::compute(const cv::Mat &l, const cv::Mat &r, cv::Mat &disp) {
-	Mat left_disp;
-	Mat right_disp;
-
-	Mat lbw, rbw;
-	cv::cvtColor(l, lbw, cv::COLOR_BGR2GRAY);
-	cv::cvtColor(r, rbw, cv::COLOR_BGR2GRAY);
+void FixstarsSGM::compute(const cv::cuda::GpuMat &l, const cv::cuda::GpuMat &r, cv::cuda::GpuMat &disp, cv::cuda::Stream &stream) {
+	cv::cuda::cvtColor(l, lbw_, cv::COLOR_BGR2GRAY, 0, stream);
+	cv::cuda::cvtColor(r, rbw_, cv::COLOR_BGR2GRAY, 0, stream);
 
+	stream.waitForCompletion();
 	if (!ssgm_) { // todo: move to constructor
-		ssgm_ = new sgm::StereoSGM(l.cols, l.rows, max_disp_, 8, 16,
-			sgm::EXECUTE_INOUT_HOST2HOST,
-			sgm::StereoSGM::Parameters(10,120,0.95f,true));
+		dispt_ = GpuMat(l.rows, l.cols, CV_16SC1);
+		ssgm_ = new sgm::StereoSGM(l.cols, l.rows, max_disp_, 8, 16, lbw_.step, dispt_.step / sizeof(short),
+			sgm::EXECUTE_INOUT_CUDA2CUDA, sgm::StereoSGM::Parameters(10,120,0.95f,true));
 	}
 
-	disp = Mat(cv::Size(l.cols, l.rows), CV_16SC1);
-
 	//auto start = std::chrono::high_resolution_clock::now();
-	ssgm_->execute(lbw.data, rbw.data, disp.data);
+	ssgm_->execute(lbw_.data, rbw_.data, dispt_.data);
 	//std::chrono::duration<double> elapsed =
 	//		std::chrono::high_resolution_clock::now() - start;
 	//LOG(INFO) << "CUDA sgm in " << elapsed.count() << "s";
 	
 	// todo: fix libSGM (return float data or provide mask separately)
 	// disparity values set to (256 << 5) in libSGM consistency check 
-	Mat bad_pixels = (disp == (256 << 5)); 
+	//Mat bad_pixels = (disp == (256 << 5)); 
 	
-	disp.setTo(0, bad_pixels);
-	Mat left_pixels(disp, cv::Rect(0, 0, max_disp_, disp.rows));
-	left_pixels = 0;
+	//disp.setTo(0, bad_pixels, stream_);
+	GpuMat left_pixels(dispt_, cv::Rect(0, 0, max_disp_, dispt_.rows));
+	left_pixels.setTo(0);
+
+	cv::cuda::threshold(dispt_, dispt_, 4096.0f, 0.0f, cv::THRESH_TOZERO_INV, stream);
 
 	if (use_filter_) {
-		cv::cuda::GpuMat l_gpu, disp_gpu, disp_gpu_out;
 		// parameters need benchmarking, impact of image
 		// quick tests show with parameters (max_disp_, 25, 3)
-		// roughly 50% in disparity calculation and 50% in filter
-		disp_gpu.upload(disp);
-		l_gpu.upload(l);
-		filter_->apply(disp_gpu, l_gpu, disp_gpu_out);
-		disp_gpu_out.download(disp);
+		// roughly 50% in disparity calculation and 50% in filter;
+		filter_->apply(dispt_, l, dispt_, stream);
 	}
 	
-	disp.convertTo(disp, CV_32F, 1.0f/16.0f);
+	dispt_.convertTo(disp, CV_32F, 1.0f/16.0f, stream);
 }
 
 void FixstarsSGM::setMask(Mat &mask) {
+	return; // TODO(Nick) Not needed, but also code below does not work with new GPU pipeline
 	CHECK(mask.type() == CV_8UC1) << "mask type must be CV_8U";
 	
 	if (!ssgm_) { // todo: move to constructor
diff --git a/components/rgbd-sources/src/algorithms/fixstars_sgm.hpp b/components/rgbd-sources/src/algorithms/fixstars_sgm.hpp
index 18492c6f14576fabbe913a25c04f1ddd45359686..dfbae841150ca0443c9f67acb7e871dacc08cf2e 100644
--- a/components/rgbd-sources/src/algorithms/fixstars_sgm.hpp
+++ b/components/rgbd-sources/src/algorithms/fixstars_sgm.hpp
@@ -26,7 +26,7 @@ class FixstarsSGM : public ftl::rgbd::detail::Disparity {
 	public:
 	explicit FixstarsSGM(nlohmann::json &config);
 
-	void compute(const cv::Mat &l, const cv::Mat &r, cv::Mat &disp) override;
+	void compute(const cv::cuda::GpuMat &l, const cv::cuda::GpuMat &r, cv::cuda::GpuMat &disp, cv::cuda::Stream &stream) override;
 	void setMask(cv::Mat &mask) override;
 	
 	/* Factory creator */
@@ -38,6 +38,9 @@ class FixstarsSGM : public ftl::rgbd::detail::Disparity {
 	bool use_filter_;
 	cv::Ptr<cv::cuda::DisparityBilateralFilter> filter_;
 	sgm::StereoSGM *ssgm_;
+	cv::cuda::GpuMat lbw_;
+	cv::cuda::GpuMat rbw_;
+	cv::cuda::GpuMat dispt_;
 };
 };
 };
diff --git a/components/rgbd-sources/src/calibrate.cpp b/components/rgbd-sources/src/calibrate.cpp
index 4addc38e33b3dba8afaf86952818f66cb0ea1895..51767128eca13de622e4113ff8c53390cd9ef9fa 100644
--- a/components/rgbd-sources/src/calibrate.cpp
+++ b/components/rgbd-sources/src/calibrate.cpp
@@ -10,7 +10,6 @@
 #include <sstream>
 #include <string>
 #include <ctime>
-#include <cstdio>
 
 #include "calibrate.hpp"
 
@@ -18,229 +17,68 @@
 #include <opencv2/core/utility.hpp>
 #include <opencv2/imgproc.hpp>
 #include <opencv2/calib3d.hpp>
-#include <opencv2/imgcodecs.hpp>
-#include <opencv2/videoio.hpp>
-#include <opencv2/highgui.hpp>
 
 using ftl::rgbd::detail::Calibrate;
-using ftl::rgbd::detail::LocalSource;
+
 using cv::FileStorage;
-using cv::CALIB_FIX_PRINCIPAL_POINT;
-using cv::CALIB_ZERO_TANGENT_DIST;
-using cv::CALIB_FIX_ASPECT_RATIO;
-using cv::CALIB_FIX_K1;
-using cv::CALIB_FIX_K2;
-using cv::CALIB_FIX_K3;
-using cv::CALIB_FIX_K4;
-using cv::CALIB_FIX_K5;
-using cv::CALIB_ZERO_DISPARITY;
-using cv::CALIB_USE_INTRINSIC_GUESS;
-using cv::CALIB_SAME_FOCAL_LENGTH;
-using cv::CALIB_RATIONAL_MODEL;
-using cv::CALIB_CB_ADAPTIVE_THRESH;
-using cv::CALIB_CB_NORMALIZE_IMAGE;
-using cv::CALIB_CB_FAST_CHECK;
-using cv::CALIB_USE_LU;
-using cv::NORM_L2;
+
 using cv::INTER_LINEAR;
-using cv::COLOR_BGR2GRAY;
+
 using cv::FileNode;
 using cv::FileNodeIterator;
+
 using cv::Mat;
+using cv::cuda::GpuMat;
+using cv::cuda::Stream;
+
 using cv::Size;
+
 using cv::Point2f;
 using cv::Point3f;
 using cv::Matx33d;
 using cv::Scalar;
-using cv::Rect;
-using cv::TermCriteria;
-using cv::waitKey;
+
 using std::string;
 using std::vector;
 
-void Calibrate::Settings::write(FileStorage& fs) const {
-    fs << "{"
-		<< "BoardSize_Width"  << boardSize.width
-		<< "BoardSize_Height" << boardSize.height
-		<< "Square_Size"         << squareSize
-		<< "Calibrate_Pattern" << patternToUse
-		<< "Calibrate_NrOfFrameToUse" << nrFrames
-		<< "Calibrate_FixAspectRatio" << aspectRatio
-		<< "Calibrate_AssumeZeroTangentialDistortion" << calibZeroTangentDist
-		<< "Calibrate_FixPrincipalPointAtTheCenter" << calibFixPrincipalPoint
-		<< "Write_DetectedFeaturePoints" << writePoints
-		<< "Write_extrinsicParameters"   << writeExtrinsics
-		<< "Write_gridPoints" << writeGrid
-		<< "Input_FlipAroundHorizontalAxis" << flipVertical
-		<< "Input_Delay" << delay
-		<< "}";
-}
-
-void Calibrate::Settings::read(ftl::Configurable *node) {
-    boardSize.width = node->value<vector<int>>("board_size", {10,10})[0];
-    boardSize.height = node->value<vector<int>>("board_size", {10,10})[1];
-    squareSize = node->value("square_size", 50.0f);
-    nrFrames = node->value("num_frames", 20);
-    aspectRatio = node->value("fix_aspect_ratio", false);
-    calibZeroTangentDist = node->value("assume_zero_tangential_distortion", false);
-    calibFixPrincipalPoint = node->value("fix_principal_point_at_center", false);
-    useFisheye =  node->value("use_fisheye_model", false);
-    flipVertical = node->value("flip_vertical", false);
-    delay = node->value("frame_delay", 1.0f);
-    fixK1 = node->value("fix_k1", false);
-    fixK2 = node->value("fix_k2", false);
-    fixK3 = node->value("fix_k3", false);
-    fixK4 = node->value("fix_k4", true);
-    fixK5 = node->value("fix_k5", true);
-
-    validate();
-}
-
-void Calibrate::Settings::validate() {
-    goodInput = true;
-    if (boardSize.width <= 0 || boardSize.height <= 0) {
-        LOG(ERROR) << "Invalid Board size: " << boardSize.width << " " <<
-        		boardSize.height;
-        goodInput = false;
-    }
-
-    if (squareSize <= 10e-6) {
-        LOG(ERROR) << "Invalid square size " << squareSize;
-        goodInput = false;
-    }
-
-    if (nrFrames <= 0) {
-        LOG(ERROR) << "Invalid number of frames " << nrFrames;
-        goodInput = false;
-    }
-
-    flag = 0;
-    if (calibFixPrincipalPoint) flag |= CALIB_FIX_PRINCIPAL_POINT;
-    if (calibZeroTangentDist)   flag |= CALIB_ZERO_TANGENT_DIST;
-    if (aspectRatio)            flag |= CALIB_FIX_ASPECT_RATIO;
-    if (fixK1)                  flag |= CALIB_FIX_K1;
-    if (fixK2)                  flag |= CALIB_FIX_K2;
-    if (fixK3)                  flag |= CALIB_FIX_K3;
-    if (fixK4)                  flag |= CALIB_FIX_K4;
-    if (fixK5)                  flag |= CALIB_FIX_K5;
-
-    if (useFisheye) {
-        // the fisheye model has its own enum, so overwrite the flags
-        flag = cv::fisheye::CALIB_FIX_SKEW |
-        		cv::fisheye::CALIB_RECOMPUTE_EXTRINSIC;
-        if (fixK1)                   flag |= cv::fisheye::CALIB_FIX_K1;
-        if (fixK2)                   flag |= cv::fisheye::CALIB_FIX_K2;
-        if (fixK3)                   flag |= cv::fisheye::CALIB_FIX_K3;
-        if (fixK4)                   flag |= cv::fisheye::CALIB_FIX_K4;
-        if (calibFixPrincipalPoint)
-        	flag |= cv::fisheye::CALIB_FIX_PRINCIPAL_POINT;
-    }
-
-    atImageList = 0;
-}
-
-Mat Calibrate::_nextImage(size_t cam) {
-    Mat result;
-    if (cam == 0) {
-    	local_->left(result);
-    } else if (cam == 1 && local_->isStereo()) {
-    	local_->right(result);
-    }
-    return result;
-}
-
-bool Calibrate::Settings::readStringList(const string& filename,
-		vector<string>& l) {
-    l.clear();
-    FileStorage fs(filename, FileStorage::READ);
-    if ( !fs.isOpened() )
-        return false;
-    FileNode n = fs.getFirstTopLevelNode();
-    if ( n.type() != FileNode::SEQ )
-        return false;
-    FileNodeIterator it = n.begin(), it_end = n.end();
-    for ( ; it != it_end; ++it )
-        l.push_back((string)*it);
-    return true;
-}
-
-bool Calibrate::Settings::isListOfImages(const string& filename) {
-    string s(filename);
-    // Look for file extension
-    if ( s.find(".xml") == string::npos && s.find(".yaml") == string::npos &&
-    		s.find(".yml") == string::npos )
-        return false;
-    else
-        return true;
-}
-
-enum { DETECTION = 0, CAPTURING = 1, CALIBRATED = 2 };
-
-bool runCalibration(const Calibrate::Settings& s, Size imageSize,
-		Mat&  cameraMatrix, Mat& distCoeffs,
-		vector<vector<Point2f> > imagePoints, float grid_width,
-		bool release_object);
-
-
-Calibrate::Calibrate(nlohmann::json &config, LocalSource *s) : ftl::Configurable(config), local_(s) {
-    /*FileStorage fs(cal, FileStorage::READ); // Read the settings
-    if (!fs.isOpened())
-    {
-        LOG(ERROR) << "Could not open the configuration file: \"" << cal << "\"";
-        return;
-    }*/
-    // fs["Settings"] >> settings_;
-    settings_.read(this);
-    // fs.release();
-
-    if (!settings_.goodInput) {
-        LOG(ERROR) << "Invalid input detected. Application stopping.";
-        return;
-    }
-
-    map1_.resize(2);
-    map2_.resize(2);
+Calibrate::Calibrate(nlohmann::json &config, cv::Size image_size, cv::cuda::Stream &stream) : ftl::Configurable(config) {
+	std::pair<Mat, Mat> map1, map2;
+	calibrated_ = _loadCalibration(image_size, map1, map2);
 
-    calibrated_ = _loadCalibration();
-
-    if (calibrated_) {
-    	LOG(INFO) << "Calibration loaded from file";
-    }
+	if (calibrated_) {
+		LOG(INFO) << "Calibration loaded from file";
+		map1_gpu_.first.upload(map1.first, stream);
+		map1_gpu_.second.upload(map1.second, stream);
+		map2_gpu_.first.upload(map2.first, stream);
+		map2_gpu_.second.upload(map2.second, stream);
+	}
+	else {
+		LOG(WARNING) << "Calibration not loaded";
+	}
 }
 
-bool Calibrate::_loadCalibration() {
-	// Capture one frame to get size;
-	Mat l, r;
-	local_->get(l, r);
-	Size img_size = l.size();
-	float scale = 1.0f;
-
-    Rect roi1, roi2;
-    FileStorage fs;
+bool Calibrate::_loadCalibration(cv::Size img_size, std::pair<Mat, Mat> &map1, std::pair<Mat, Mat> &map2) {
+	FileStorage fs;
 
-    // reading intrinsic parameters
-    auto ifile = ftl::locateFile(value("intrinsics", std::string("intrinsics.yml")));
-    if (ifile) {
+	// reading intrinsic parameters
+	auto ifile = ftl::locateFile(value("intrinsics", std::string("intrinsics.yml")));
+	if (ifile) {
 		fs.open((*ifile).c_str(), FileStorage::READ);
 		if (!fs.isOpened()) {
-		    LOG(WARNING) << "Could not open intrinsics file";
-		    return false;
+			LOG(WARNING) << "Could not open intrinsics file";
+			return false;
 		}
 		
 		LOG(INFO) << "Intrinsics from: " << *ifile;
-    } else {
-    	LOG(WARNING) << "Calibration intrinsics file not found";
+	}
+	else {
+		LOG(WARNING) << "Calibration intrinsics file not found";
 		return false;
-    }
-
-    Mat M1, D1, M2, D2;
-    fs["M"] >> M1;
-    fs["D"] >> D1;
-    //fs["M2"] >> M2;
-    //fs["D2"] >> D2;
+	}
 
-    M1 *= scale;
-    //M2 *= scale;
+	Mat M1, D1, M2, D2;
+	fs["M"] >> M1;
+	fs["D"] >> D1;
 
 	M2 = M1;
 	D2 = D1;
@@ -249,411 +87,46 @@ bool Calibrate::_loadCalibration() {
 	if (efile) {
 		fs.open((*efile).c_str(), FileStorage::READ);
 		if (!fs.isOpened()) {
-		    LOG(WARNING) << "Could not open extrinsics file";
-		    return false;
+			LOG(WARNING) << "Could not open extrinsics file";
+			return false;
 		}
 		
 		LOG(INFO) << "Extrinsics from: " << *efile;
-    } else {
-    	LOG(WARNING) << "Calibration extrinsics file not found";
-		return false;
-    }
-
-    Mat R, T, R1, P1, R2, P2;
-    fs["R"] >> R;
-    fs["T"] >> T;
-    fs["R1"] >> R1;
-    fs["R2"] >> R2;
-    fs["P1"] >> P1;
-    fs["P2"] >> P2;
-    fs["Q"] >> Q_;
-
-    /*stereoRectify(M1, D1, M2, D2, img_size, R, T, R1, R2, P1, P2, Q_,
-    		CALIB_ZERO_DISPARITY, -1, img_size, &roi1, &roi2);*/
-
-    Mat map11, map12, map21, map22;
-    initUndistortRectifyMap(M1, D1, R1, P1, img_size, CV_16SC2,
-    		map1_[0], map2_[0]);
-    initUndistortRectifyMap(M2, D2, R2, P2, img_size, CV_16SC2,
-    		map1_[1], map2_[1]);
-
-	// Re-distort
-	Mat P1_cam = (cv::Mat_<double>(3,3) << P1.at<double>(0, 0), P1.at<double>(0, 1) , P1.at<double>(0, 2),
-		P1.at<double>(1, 0), P1.at<double>(1, 1), P1.at<double>(1, 2),
-		P1.at<double>(2, 0), P1.at<double>(2, 1), P1.at<double>(2, 2));
-	Mat M1_trans = (cv::Mat_<double>(3, 4) << M1.at<double>(0, 0), M1.at<double>(0, 1), M1.at<double>(0, 2), -P1.at<double>(0, 3),
-		M1.at<double>(1, 0), M1.at<double>(1, 1), M1.at<double>(1, 2), -P1.at<double>(1, 3),
-		M1.at<double>(2, 0), M1.at<double>(2, 1), M1.at<double>(2, 2), -P1.at<double>(2, 3));
-	initUndistortRectifyMap(P1_cam, Mat(), R1.t(), P1,
-			img_size, CV_16SC2, imap1_, imap2_);
-	r1_ = P1;
-    return true;
-}
-
-bool Calibrate::recalibrate() {
-	vector<vector<Point2f>> imagePoints[2];
-    Mat cameraMatrix[2], distCoeffs[2];
-    Size imageSize[2];
-
-	bool r = _recalibrate(imagePoints, cameraMatrix, distCoeffs, imageSize);
-
-	if (r) calibrated_ = true;
-
-	if (r && local_->isStereo()) {
-		int nimages = static_cast<int>(imagePoints[0].size());
-		auto squareSize = settings_.squareSize;
-		vector<vector<Point3f>> objectPoints;
-		objectPoints.resize(nimages);
-
-		for (auto i = 0; i < nimages; i++) {
-		    for (auto j = 0; j < settings_.boardSize.height; j++)
-		        for (auto  k = 0; k < settings_.boardSize.width; k++)
-		            objectPoints[i].push_back(Point3f(k*squareSize,
-		            		j*squareSize, 0));
-		}
-
-		Mat R, T, E, F;
-
-		LOG(INFO) << "Running stereo calibration...";
-
-		double rms = stereoCalibrate(objectPoints, imagePoints[0], imagePoints[1],
-                cameraMatrix[0], distCoeffs[0],
-                cameraMatrix[1], distCoeffs[1],
-                imageSize[0], R, T, E, F,
-                CALIB_FIX_ASPECT_RATIO +
-                CALIB_ZERO_TANGENT_DIST +
-                CALIB_USE_INTRINSIC_GUESS +
-                CALIB_SAME_FOCAL_LENGTH +
-                CALIB_RATIONAL_MODEL +
-                CALIB_FIX_K3 + CALIB_FIX_K4 + CALIB_FIX_K5,
-                TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 100, 1e-5));
-		LOG(INFO) << "... done with RMS error=" << rms;
-
-		// save intrinsic parameters
-		FileStorage fs(FTL_LOCAL_CONFIG_ROOT "/intrinsics.yml", FileStorage::WRITE);
-		if (fs.isOpened()) {
-		    fs << "M1" << cameraMatrix[0] << "D1" << distCoeffs[0] <<
-		        "M2" << cameraMatrix[1] << "D2" << distCoeffs[1];
-		    fs.release();
-		} else {
-		    LOG(ERROR) << "Error: can not save the intrinsic parameters";
-		}
-
-		Mat R1, R2, P1, P2;
-		Rect validRoi[2];
-
-		stereoRectify(cameraMatrix[0], distCoeffs[0],
-		              cameraMatrix[1], distCoeffs[1],
-		              imageSize[0], R, T, R1, R2, P1, P2, Q_,
-		              CALIB_ZERO_DISPARITY, 1, imageSize[0],
-		              &validRoi[0], &validRoi[1]);
-
-		fs.open(FTL_LOCAL_CONFIG_ROOT "/extrinsics.yml", FileStorage::WRITE);
-		if (fs.isOpened()) {
-		    fs << "R" << R << "T" << T << "R1" << R1 << "R2" << R2 << "P1" <<
-		    		P1 << "P2" << P2 << "Q" << Q_;
-		    fs.release();
-		} else {
-		    LOG(ERROR) << "Error: can not save the extrinsic parameters";
-		}
-
-		// Precompute maps for cv::remap()
-    	initUndistortRectifyMap(cameraMatrix[0], distCoeffs[0], R1, P1,
-    			imageSize[0], CV_16SC2, map1_[0], map2_[0]);
-    	initUndistortRectifyMap(cameraMatrix[1], distCoeffs[1], R2, P2,
-    			imageSize[0], CV_16SC2, map1_[1], map2_[1]);
-
 	} else {
-		int cam = 0;
-		if (settings_.useFisheye) {
-		    Mat newCamMat;
-		    cv::fisheye::estimateNewCameraMatrixForUndistortRectify(
-		    		cameraMatrix[cam], distCoeffs[cam], imageSize[cam],
-		            Matx33d::eye(), newCamMat, 1);
-		    cv::fisheye::initUndistortRectifyMap(
-		    		cameraMatrix[cam], distCoeffs[cam], Matx33d::eye(),
-		    		newCamMat, imageSize[cam],
-		            CV_16SC2, map1_[cam], map2_[cam]);
-		} else {
-		    initUndistortRectifyMap(
-		        cameraMatrix[cam], distCoeffs[cam], Mat(),
-		        getOptimalNewCameraMatrix(cameraMatrix[cam], distCoeffs[cam],
-		        		imageSize[cam], 1, imageSize[cam], 0),
-		        imageSize[cam], CV_16SC2, map1_[cam], map2_[cam]);
-		}
-	}
-
-	return r;
-}
-
-bool Calibrate::_recalibrate(vector<vector<Point2f>> *imagePoints,
-		Mat *cameraMatrix, Mat *distCoeffs, Size *imageSize) {
-	//int winSize = 11;  // parser.get<int>("winSize");
-
-	float grid_width = settings_.squareSize * (settings_.boardSize.width - 1);
-	bool release_object = false;
-	double prevTimestamp = 0.0;
-	const Scalar RED(0, 0, 255), GREEN(0, 255, 0);
-
-#if CV_VERSION_MAJOR >= 4
-	int chessBoardFlags = CALIB_CB_NORMALIZE_IMAGE;
-#else
-	int chessBoardFlags = CALIB_CB_ADAPTIVE_THRESH | CALIB_CB_NORMALIZE_IMAGE;
-
-	if (!settings_.useFisheye) {
-		// fast check erroneously fails with high distortions like fisheye
-		chessBoardFlags |= CALIB_CB_FAST_CHECK;
+		LOG(WARNING) << "Calibration extrinsics file not found";
+		return false;
 	}
-#endif
-
-	for (;;) {
-		Mat view[2];
-		local_->get(view[0], view[1]);
-		LOG(INFO) << "Grabbing calibration image...";
-
-		if (view[0].empty() || (local_->isStereo() && view[1].empty()) ||
-				imagePoints[0].size() >= (size_t)settings_.nrFrames) {
-			settings_.outputFileName = FTL_LOCAL_CONFIG_ROOT "/cam0.xml";
-			bool r = runCalibration(settings_, imageSize[0],
-							cameraMatrix[0], distCoeffs[0], imagePoints[0],
-							grid_width, release_object);
-
-			if (local_->isStereo()) {
-				settings_.outputFileName = FTL_LOCAL_CONFIG_ROOT "/cam1.xml";
-				r &= runCalibration(settings_, imageSize[1],
-								cameraMatrix[1], distCoeffs[1], imagePoints[1],
-								grid_width, release_object);
-			}
-
-			if (!r && view[0].empty()) {
-				LOG(ERROR) << "Not enough frames to calibrate";
-				return false;
-			} else if (r) {
-				LOG(INFO) << "Calibration successful";
-				break;
-			}
-		}
 
-		imageSize[0] = view[0].size();  // Format input image.
-		imageSize[1] = view[1].size();
-		// if( settings_.flipVertical )    flip( view[cam], view[cam], 0 );
+	Mat R, T, R1, P1, R2, P2;
+	fs["R"] >> R;
+	fs["T"] >> T;
+	fs["R1"] >> R1;
+	fs["R2"] >> R2;
+	fs["P1"] >> P1;
+	fs["P2"] >> P2;
+	fs["Q"] >> Q_;
 
-		vector<Point2f> pointBuf[2];
-		bool found1, found2;
-#if CV_VERSION_MAJOR >= 4
-		LOG(INFO) << "Using findChessboardCornersSB()";
-		found1 = findChessboardCornersSB(view[0], settings_.boardSize,
-				pointBuf[0], chessBoardFlags);
-		found2 = !local_->isStereo() || findChessboardCornersSB(view[1],
-			settings_.boardSize, pointBuf[1], chessBoardFlags);
-#else
-		LOG(INFO) << "Using findChessboardCorners()";
-		found1 = findChessboardCorners(view[0], settings_.boardSize,
-				pointBuf[0], chessBoardFlags);
-		found2 = !local_->isStereo() || findChessboardCorners(view[1],
-				settings_.boardSize, pointBuf[1], chessBoardFlags);
-#endif
-		if (found1 && found2 &&
-				local_->getTimestamp()-prevTimestamp > settings_.delay) {
-			prevTimestamp = local_->getTimestamp();
-#if CV_VERSION_MAJOR >=4
-			// findChessboardCornersSB() does not require subpixel step.
-#else
-			// improve the found corners' coordinate accuracy for chessboard.
-			Mat viewGray;
-			cvtColor(view[0], viewGray, COLOR_BGR2GRAY);
-			cornerSubPix(viewGray, pointBuf[0], Size(winSize, winSize),
-				Size(-1, -1), TermCriteria(
-					TermCriteria::EPS+TermCriteria::COUNT, 30, 0.0001));
-			
-			if (local_->isStereo()) {
-				cvtColor(view[1], viewGray, COLOR_BGR2GRAY);
-				cornerSubPix(viewGray, pointBuf[1], Size(winSize, winSize),
-					Size(-1, -1), TermCriteria(
-						TermCriteria::EPS+TermCriteria::COUNT, 30, 0.0001));
-			}
-#endif
-			imagePoints[0].push_back(pointBuf[0]);
-			if (local_->isStereo()) { imagePoints[1].push_back(pointBuf[1]); }
-			// Draw the corners.
-			
-			drawChessboardCorners(view[0], settings_.boardSize,
-					Mat(pointBuf[0]), found1);
-			if (local_->isStereo())
-				drawChessboardCorners(view[1],settings_.boardSize, Mat(pointBuf[1]), found2);
-		} else {
-			LOG(WARNING) << "No calibration pattern found";
-		}
-
-		imshow("Left", view[0]);
-		if (local_->isStereo()) imshow("Right", view[1]);
-		char key = static_cast<char>(waitKey(50));
+	P_ = P1;
 
-		if (key  == 27)
-			break;
-	}
+	// cv::cuda::remap() works only with CV_32FC1
+	initUndistortRectifyMap(M1, D1, R1, P1, img_size, CV_32FC1, map1.first, map2.first);
+	initUndistortRectifyMap(M2, D2, R2, P2, img_size, CV_32FC1, map1.second, map2.second);
 
 	return true;
 }
 
-void Calibrate::rectifyStereo(cv::Mat &l, cv::Mat &r) {
-	remap(l, l, map1_[0], map2_[0], INTER_LINEAR);
-	remap(r, r, map1_[1], map2_[1], INTER_LINEAR);
-}
-
-bool Calibrate::undistort(cv::Mat &l, cv::Mat &r) {
-	Mat l_tmp, r_tmp;
-	local_->get(l_tmp, r_tmp);
-	if (!calibrated_) return false;
-	if (l_tmp.empty() || r_tmp.empty()) {
-		LOG(ERROR) << "l/r empty in undistort()!!!";
-		return false;
-	}
-	remap(l_tmp, l, map1_[0], map2_[0], INTER_LINEAR);
-	if (local_->isStereo()) remap(r_tmp, r, map1_[1], map2_[1], INTER_LINEAR);
-	return true;
-}
-
-void Calibrate::distort(cv::Mat &rgb, cv::Mat &depth) {
-	remap(rgb, rgb, imap1_, imap2_, INTER_LINEAR);
-	remap(depth, depth, imap1_, imap2_, INTER_LINEAR);
-}
-
-bool Calibrate::rectified(cv::Mat &l, cv::Mat &r) {
-	return undistort(l, r);
+void Calibrate::rectifyStereo(GpuMat &l, GpuMat &r, Stream &stream) {
+	// cv::cuda::remap() can not use same Mat for input and output
+	
+	GpuMat l_tmp(l.size(), l.type());
+	GpuMat r_tmp(r.size(), r.type());
+	cv::cuda::remap(l, l_tmp, map1_gpu_.first, map2_gpu_.first, cv::INTER_LINEAR, 0, cv::Scalar(), stream);
+	cv::cuda::remap(r, r_tmp, map1_gpu_.second, map2_gpu_.second, cv::INTER_LINEAR, 0, cv::Scalar(), stream);
+	stream.waitForCompletion();
+	l = l_tmp;
+	r = r_tmp;
 }
 
 bool Calibrate::isCalibrated() {
 	return calibrated_;
-}
-
-//! [compute_errors]
-static double computeReprojectionErrors(
-		const vector<vector<Point3f> >& objectPoints,
-		const vector<vector<Point2f> >& imagePoints,
-		const vector<Mat>& rvecs, const vector<Mat>& tvecs,
-		const Mat& cameraMatrix , const Mat& distCoeffs,
-		vector<float>& perViewErrors, bool fisheye) {
-	vector<Point2f> imagePoints2;
-	size_t totalPoints = 0;
-	double totalErr = 0;
-	perViewErrors.resize(objectPoints.size());
-
-	for (size_t i = 0; i < objectPoints.size(); ++i) {
-		if (fisheye) {
-			cv::fisheye::projectPoints(objectPoints[i], imagePoints2, rvecs[i],
-					tvecs[i], cameraMatrix, distCoeffs);
-		} else {
-			projectPoints(objectPoints[i], rvecs[i], tvecs[i], cameraMatrix,
-					distCoeffs, imagePoints2);
-		}
-		double err = norm(imagePoints[i], imagePoints2, NORM_L2);
-
-		size_t n = objectPoints[i].size();
-		perViewErrors[i] = static_cast<float>(std::sqrt(err*err/n));
-		totalErr        += err*err;
-		totalPoints     += n;
-	}
-
-	return std::sqrt(totalErr/totalPoints);
-}
-//! [compute_errors]
-//! [board_corners]
-static void calcBoardCornerPositions(Size boardSize, float squareSize,
-		vector<Point3f>& corners, Calibrate::Settings::Pattern patternType) {
-	corners.clear();
-
-	switch (patternType) {
-	case Calibrate::Settings::CHESSBOARD:
-	case Calibrate::Settings::CIRCLES_GRID:
-		for (int i = 0; i < boardSize.height; ++i)
-			for (int j = 0; j < boardSize.width; ++j)
-				corners.push_back(Point3f(j*squareSize, i*squareSize, 0));
-		break;
-
-	case Calibrate::Settings::ASYMMETRIC_CIRCLES_GRID:
-		for (int i = 0; i < boardSize.height; i++)
-			for (int j = 0; j < boardSize.width; j++)
-				corners.push_back(Point3f((2*j + i % 2)*squareSize,
-						i*squareSize, 0));
-		break;
-	default:
-		break;
-	}
-}
-//! [board_corners]
-static bool _runCalibration(const Calibrate::Settings& s, Size& imageSize,
-		Mat& cameraMatrix, Mat& distCoeffs,
-		vector<vector<Point2f> > imagePoints, vector<Mat>& rvecs,
-		vector<Mat>& tvecs, vector<float>& reprojErrs,  double& totalAvgErr,
-		vector<Point3f>& newObjPoints, float grid_width, bool release_object) {
-	cameraMatrix = Mat::eye(3, 3, CV_64F);
-	if (s.flag & CALIB_FIX_ASPECT_RATIO)
-		cameraMatrix.at<double>(0, 0) = s.aspectRatio;
-
-	if (s.useFisheye) {
-		distCoeffs = Mat::zeros(4, 1, CV_64F);
-	} else {
-		distCoeffs = Mat::zeros(8, 1, CV_64F);
-	}
-
-	vector<vector<Point3f> > objectPoints(1);
-	calcBoardCornerPositions(s.boardSize, s.squareSize, objectPoints[0],
-			Calibrate::Settings::CHESSBOARD);
-	objectPoints[0][s.boardSize.width - 1].x = objectPoints[0][0].x +
-			grid_width;
-	newObjPoints = objectPoints[0];
-
-	objectPoints.resize(imagePoints.size(), objectPoints[0]);
-
-	// Find intrinsic and extrinsic camera parameters
-	double rms;
-
-	if (s.useFisheye) {
-		Mat _rvecs, _tvecs;
-		rms = cv::fisheye::calibrate(objectPoints, imagePoints, imageSize,
-				cameraMatrix, distCoeffs, _rvecs, _tvecs, s.flag);
-
-		rvecs.reserve(_rvecs.rows);
-		tvecs.reserve(_tvecs.rows);
-		for (int i = 0; i < static_cast<int>(objectPoints.size()); i++) {
-			rvecs.push_back(_rvecs.row(i));
-			tvecs.push_back(_tvecs.row(i));
-		}
-	} else {
-		rms = calibrateCamera(objectPoints, imagePoints, imageSize,
-								cameraMatrix, distCoeffs, rvecs, tvecs,
-								s.flag | CALIB_USE_LU);
-	}
-
-	LOG(INFO) << "Re-projection error reported by calibrateCamera: "<< rms;
-
-	bool ok = checkRange(cameraMatrix) && checkRange(distCoeffs);
-
-	objectPoints.clear();
-	objectPoints.resize(imagePoints.size(), newObjPoints);
-	totalAvgErr = computeReprojectionErrors(objectPoints, imagePoints, rvecs,
-			tvecs, cameraMatrix, distCoeffs, reprojErrs, s.useFisheye);
-
-	return ok;
-}
-
-//! [run_and_save]
-bool runCalibration(const Calibrate::Settings& s, Size imageSize,
-		Mat& cameraMatrix, Mat& distCoeffs,
-		vector<vector<Point2f> > imagePoints, float grid_width,
-		bool release_object) {
-	vector<Mat> rvecs, tvecs;
-	vector<float> reprojErrs;
-	double totalAvgErr = 0;
-	vector<Point3f> newObjPoints;
-
-	bool ok = _runCalibration(s, imageSize, cameraMatrix, distCoeffs,
-			imagePoints, rvecs, tvecs, reprojErrs, totalAvgErr, newObjPoints,
-			grid_width, release_object);
-	LOG(INFO) << (ok ? "Calibration succeeded" : "Calibration failed")
-		 << ". avg re projection error = " << totalAvgErr;
-
-	return ok;
-}
-//! [run_and_save]
\ No newline at end of file
+}
\ No newline at end of file
diff --git a/components/rgbd-sources/src/calibrate.hpp b/components/rgbd-sources/src/calibrate.hpp
index c3a163a03fb48d6f275246cb6708ab265831ff9c..c967de2e50f1a24560119b0f15b587e1213c8441 100644
--- a/components/rgbd-sources/src/calibrate.hpp
+++ b/components/rgbd-sources/src/calibrate.hpp
@@ -21,117 +21,43 @@ namespace rgbd {
 namespace detail {
 
 /**
- * Manage both performing and applying camera calibration. It will attempt to
- * load any existing cached camera calibration unless explicitely told to
- * redo the calibration.
+ * Manage local calibration details: undistortion, rectification and camera parameters.
+ * Calibration parameters can be acquired using ftl-calibrate app.
  */
 class Calibrate : public ftl::Configurable {
 	public:
-	
-	// TODO(nick) replace or remove this class.
-	class Settings {
-		public:
-		Settings() : calibrationPattern(CHESSBOARD), squareSize(50.0f),
-			nrFrames(30), aspectRatio(1.0f), delay(100), writePoints(false), writeExtrinsics(true),
-			writeGrid(false), calibZeroTangentDist(false), calibFixPrincipalPoint(true),
-			flipVertical(false), showUndistorsed(true), useFisheye(false), fixK1(false),
-			fixK2(false), fixK3(false), fixK4(false), fixK5(false), cameraID(0), atImageList(0),
-			inputType(INVALID), goodInput(false), flag(0) {}
-		enum Pattern { NOT_EXISTING, CHESSBOARD, CIRCLES_GRID, ASYMMETRIC_CIRCLES_GRID };
-		enum InputType { INVALID, CAMERA, VIDEO_FILE, IMAGE_LIST };
-
-		void write(cv::FileStorage& fs) const;
-		void read(ftl::Configurable *node);
-		void validate();
-		//Mat nextImage();
-
-		static bool readStringList( const std::string& filename, std::vector<std::string>& l );
-
-		static bool isListOfImages( const std::string& filename);
-		public:
-		cv::Size boardSize;              // The size of the board -> Number of items by width and height
-		Pattern calibrationPattern;  // One of the Chessboard, circles, or asymmetric circle pattern
-		float squareSize;            // The size of a square in your defined unit (point, millimeter,etc).
-		int nrFrames;                // The number of frames to use from the input for calibration
-		float aspectRatio;           // The aspect ratio
-		float delay;                   // In case of a video input
-		bool writePoints;            // Write detected feature points
-		bool writeExtrinsics;        // Write extrinsic parameters
-		bool writeGrid;              // Write refined 3D target grid points
-		bool calibZeroTangentDist;   // Assume zero tangential distortion
-		bool calibFixPrincipalPoint; // Fix the principal point at the center
-		bool flipVertical;           // Flip the captured images around the horizontal axis
-		std::string outputFileName;       // The name of the file where to write
-		bool showUndistorsed;        // Show undistorted images after calibration
-		std::string input;                // The input ->
-		bool useFisheye;             // use fisheye camera model for calibration
-		bool fixK1;                  // fix K1 distortion coefficient
-		bool fixK2;                  // fix K2 distortion coefficient
-		bool fixK3;                  // fix K3 distortion coefficient
-		bool fixK4;                  // fix K4 distortion coefficient
-		bool fixK5;                  // fix K5 distortion coefficient
-
-		int cameraID;
-		std::vector<std::string> imageList;
-		size_t atImageList;
-		//cv::VideoCapture inputCapture;
-		InputType inputType;
-		bool goodInput;
-		int flag;
-
-		private:
-		std::string patternToUse;
-
-
-	};
-	public:
-	Calibrate(nlohmann::json &config, LocalSource *s);
-	
-	/**
-	 * Perform a new camera calibration. Ignore and replace any existing
-	 * cached calibration.
-	 */
-	bool recalibrate();
-	
-	bool undistort(cv::Mat &l, cv::Mat &r);
-
-	void distort(cv::Mat &rgb, cv::Mat &depth);
+	Calibrate(nlohmann::json &config, cv::Size image_size, cv::cuda::Stream &stream);
 	
 	/**
 	 * Get both left and right images from local source, but with intrinsic
 	 * and extrinsic stereo calibration already applied.
 	 */
-	bool rectified(cv::Mat &l, cv::Mat &r);
+	bool rectified(cv::cuda::GpuMat &l, cv::cuda::GpuMat &r, cv::cuda::Stream &stream);
 
 	/**
 	 * Rectify and remove distortions from from images l and r using cv::remap()
 	 */
-	void rectifyStereo(cv::Mat &l, cv::Mat &r);
+	void rectifyStereo(cv::cuda::GpuMat &l, cv::cuda::GpuMat &r, cv::cuda::Stream &stream);
 
-	bool isCalibrated();
+	bool isCalibrated(); // TODO ???
 
 	/**
 	 * Get the camera matrix. Used to convert disparity map back to depth and
 	 * a 3D point cloud.
 	 */
 	const cv::Mat &getQ() const { return Q_; }
-	const cv::Mat &getCameraMatrix() const { return r1_; }
+	const cv::Mat &getCameraMatrix() const { return P_; }
 
-	private:
-	bool _recalibrate(std::vector<std::vector<cv::Point2f>> *imagePoints,
-		cv::Mat *cameraMatrix, cv::Mat *distCoeffs, cv::Size *imageSize);
-	cv::Mat _nextImage(size_t cam);
-	bool _loadCalibration();
+private:
+	bool _loadCalibration(cv::Size img_size, std::pair<cv::Mat, cv::Mat> &map1, std::pair<cv::Mat, cv::Mat> &map2);
 	
 	private:
-	LocalSource *local_;
-	Settings settings_;
 	bool calibrated_;
-	std::vector<cv::Mat> map1_;
-	std::vector<cv::Mat> map2_;
-	cv::Mat imap1_;
-	cv::Mat imap2_;
-	cv::Mat r1_;
+
+	std::pair<cv::cuda::GpuMat, cv::cuda::GpuMat> map1_gpu_;
+	std::pair<cv::cuda::GpuMat, cv::cuda::GpuMat> map2_gpu_;
+
+	cv::Mat P_;
 	cv::Mat Q_;
 };
 
diff --git a/components/rgbd-sources/src/disparity.cpp b/components/rgbd-sources/src/disparity.cpp
index 6f413bc2ebf358b6ea0c9cd9ec72f20511e4e9fa..9341a37af6b93201c9a2abad9f1d21ef4847b24f 100644
--- a/components/rgbd-sources/src/disparity.cpp
+++ b/components/rgbd-sources/src/disparity.cpp
@@ -38,9 +38,10 @@ void Disparity::_register(const std::string &n,
 }
 
 // TODO(Nick) Add remaining algorithms
-
+/*
 #include "algorithms/rtcensus.hpp"
 static ftl::rgbd::detail::Disparity::Register rtcensus("rtcensus", ftl::algorithms::RTCensus::create);
+*/
 
 #ifdef HAVE_LIBSGM
 #include "algorithms/fixstars_sgm.hpp"
diff --git a/components/rgbd-sources/src/disparity.hpp b/components/rgbd-sources/src/disparity.hpp
index 838a4a16463aee3c6796d0d7dd2e28180da60616..e7e78b277544afd87870e62ca5b833b15d9cfd6d 100644
--- a/components/rgbd-sources/src/disparity.hpp
+++ b/components/rgbd-sources/src/disparity.hpp
@@ -32,7 +32,7 @@ class Disparity : public ftl::Configurable {
 	 * Pure virtual function representing the actual computation of
 	 * disparity from left and right images to be implemented.
 	 */
-	virtual void compute(const cv::Mat &l, const cv::Mat &r, cv::Mat &disp)=0;
+	virtual void compute(const cv::cuda::GpuMat &l, const cv::cuda::GpuMat &r, cv::cuda::GpuMat &disp, cv::cuda::Stream &stream)=0;
 
 	/**
 	 * Factory registration class.
diff --git a/components/rgbd-sources/src/local.cpp b/components/rgbd-sources/src/local.cpp
index 02bf0d36ef9c1635633d8596c4bc48cf398a3540..410cb9f96c0a1c1ca48cadd52300e25b260c3aad 100644
--- a/components/rgbd-sources/src/local.cpp
+++ b/components/rgbd-sources/src/local.cpp
@@ -214,7 +214,8 @@ bool LocalSource::right(cv::Mat &r) {
 	return true;
 }
 
-bool LocalSource::get(cv::Mat &l, cv::Mat &r) {
+bool LocalSource::get(cv::cuda::GpuMat &l_out, cv::cuda::GpuMat &r_out, cv::cuda::Stream &stream) {
+	Mat l, r;
 	if (!camera_a_) return false;
 
 	if (!camera_a_->grab()) {
@@ -231,14 +232,14 @@ bool LocalSource::get(cv::Mat &l, cv::Mat &r) {
 			high_resolution_clock::now().time_since_epoch()).count();
 	
 	// Limit max framerate
-	if (timestamp - timestamp_ < tps_) {
-		sleep_for(milliseconds((int)std::round((tps_ - (timestamp - timestamp_))*1000)));
-	}
+	//if (timestamp - timestamp_ < tps_) {
+	//	sleep_for(milliseconds((int)std::round((tps_ - (timestamp - timestamp_))*1000)));
+	//}
 
 	timestamp_ = timestamp;
 
 	if (camera_b_ || !stereo_) {
-		if (!camera_a_->retrieve(l)) {
+		if (!camera_a_->retrieve(left_)) {
 			LOG(ERROR) << "Unable to read frame from camera A";
 			return false;
 		}
@@ -256,15 +257,17 @@ bool LocalSource::get(cv::Mat &l, cv::Mat &r) {
 		int resx = frame.cols / 2;
 		if (flip_) {
 			r = Mat(frame, Rect(0, 0, resx, frame.rows));
-			l = Mat(frame, Rect(resx, 0, frame.cols-resx, frame.rows));
+			left_ = Mat(frame, Rect(resx, 0, frame.cols-resx, frame.rows));
 		} else {
-			l = Mat(frame, Rect(0, 0, resx, frame.rows));
+			left_ = Mat(frame, Rect(0, 0, resx, frame.rows));
 			r = Mat(frame, Rect(resx, 0, frame.cols-resx, frame.rows));
 		}
 	}
 
 	if (downsize_ != 1.0f) {
-		cv::resize(l, l, cv::Size((int)(l.cols * downsize_), (int)(l.rows * downsize_)),
+		// cv::cuda::resize()
+
+		cv::resize(left_, left_, cv::Size((int)(left_.cols * downsize_), (int)(left_.rows * downsize_)),
 				0, 0, cv::INTER_LINEAR);
 		cv::resize(r, r, cv::Size((int)(r.cols * downsize_), (int)(r.rows * downsize_)),
 				0, 0, cv::INTER_LINEAR);
@@ -278,12 +281,15 @@ bool LocalSource::get(cv::Mat &l, cv::Mat &r) {
 
 	if (flip_v_) {
 		Mat tl, tr;
-		cv::flip(l, tl, 0);
+		cv::flip(left_, tl, 0);
 		cv::flip(r, tr, 0);
-		l = tl;
+		left_ = tl;
 		r = tr;
 	}
 
+	l_out.upload(left_, stream);
+	r_out.upload(r, stream);
+
 	return true;
 }
 
diff --git a/components/rgbd-sources/src/local.hpp b/components/rgbd-sources/src/local.hpp
index 92437b8c42742d11d5a9e05c782816dd0db65b9a..e3fcb91bd585d8090cfb50a8ee83bf49dd78f98d 100644
--- a/components/rgbd-sources/src/local.hpp
+++ b/components/rgbd-sources/src/local.hpp
@@ -2,6 +2,7 @@
 #define _FTL_LOCAL_HPP_
 
 #include <ftl/configurable.hpp>
+#include <ftl/cuda_common.hpp>
 #include <string>
 #include <nlohmann/json.hpp>
 
@@ -21,10 +22,12 @@ class LocalSource : public Configurable {
 	
 	bool left(cv::Mat &m);
 	bool right(cv::Mat &m);
-	bool get(cv::Mat &l, cv::Mat &r);
+	bool get(cv::cuda::GpuMat &l, cv::cuda::GpuMat &r, cv::cuda::Stream &stream);
 
 	unsigned int width() const { return width_; }
 	unsigned int height() const { return height_; }
+
+	cv::Mat &cachedLeft() { return left_; }
 	
 	//void setFramerate(float fps);
 	//float getFramerate() const;
@@ -46,6 +49,7 @@ class LocalSource : public Configurable {
 	cv::VideoCapture *camera_b_;
 	unsigned int width_;
 	unsigned int height_;
+	cv::Mat left_;
 };
 
 }
diff --git a/components/rgbd-sources/src/stereovideo.cpp b/components/rgbd-sources/src/stereovideo.cpp
index 5180dddd7c44710198b15b2a81ab517c1d768570..b75380746942b3af2af1c5fca11b43afa848cf6e 100644
--- a/components/rgbd-sources/src/stereovideo.cpp
+++ b/components/rgbd-sources/src/stereovideo.cpp
@@ -34,32 +34,29 @@ void StereoVideoSource::init(const string &file) {
 	if (ftl::is_video(file)) {
 		// Load video file
 		LOG(INFO) << "Using video file...";
-		//lsrc_ = new LocalSource(file, config["source"]);
 		lsrc_ = ftl::create<LocalSource>(host_, "feed", file);
-	} else if (file != "") {
+	}
+	else if (file != "") {
 		auto vid = ftl::locateFile("video.mp4");
 		if (!vid) {
 			LOG(FATAL) << "No video.mp4 file found in provided paths (" << file << ")";
 		} else {
 			LOG(INFO) << "Using test directory...";
-			//lsrc_ = new LocalSource(*vid, config["source"]);
 			lsrc_ = ftl::create<LocalSource>(host_, "feed", *vid);
 		}
-	} else {
+	}
+	else {
 		// Use cameras
 		LOG(INFO) << "Using cameras...";
-		//lsrc_ = new LocalSource(config["source"]);
 		lsrc_ = ftl::create<LocalSource>(host_, "feed");
 	}
 
-	//calib_ = new Calibrate(lsrc_, ftl::resolve(config["calibration"]));
-	calib_ = ftl::create<Calibrate>(host_, "calibration", lsrc_);
+	cv::Size size = cv::Size(lsrc_->width(), lsrc_->height());
+	calib_ = ftl::create<Calibrate>(host_, "calibration", size, stream_);
 
-	if (host_->value("calibrate", false)) calib_->recalibrate();
 	if (!calib_->isCalibrated()) LOG(WARNING) << "Cameras are not calibrated!";
-	else LOG(INFO) << "Calibration initiated.";
 
-	// Generate camera parameters from Q matrix
+	// Generate camera parameters from camera matrix
 	cv::Mat q = calib_->getCameraMatrix();
 	params_ = {
 		q.at<double>(0,0),	// Fx
@@ -74,9 +71,14 @@ void StereoVideoSource::init(const string &file) {
 	
 	// left and right masks (areas outside rectified images)
 	// only left mask used
-	cv::Mat mask_r(lsrc_->height(), lsrc_->width(), CV_8U, 255);
-	cv::Mat mask_l(lsrc_->height(), lsrc_->width(), CV_8U, 255);
-	calib_->rectifyStereo(mask_l, mask_r);
+	cv::cuda::GpuMat mask_r_gpu(lsrc_->height(), lsrc_->width(), CV_8U, 255);
+	cv::cuda::GpuMat mask_l_gpu(lsrc_->height(), lsrc_->width(), CV_8U, 255);
+	
+	calib_->rectifyStereo(mask_l_gpu, mask_r_gpu, stream_);
+	stream_.waitForCompletion();
+
+	cv::Mat mask_l;
+	mask_l_gpu.download(mask_l);
 	mask_l_ = (mask_l == 0);
 	
 	disp_ = Disparity::create(host_, "disparity");
@@ -87,38 +89,27 @@ void StereoVideoSource::init(const string &file) {
 	ready_ = true;
 }
 
-static void disparityToDepth(const cv::Mat &disparity, cv::Mat &depth, const cv::Mat &q) {
-	cv::Matx44d _Q;
-    q.convertTo(_Q, CV_64F);
-
-	if (depth.empty()) depth = cv::Mat(disparity.size(), CV_32F);
-
-	for( int y = 0; y < disparity.rows; y++ ) {
-		const float *sptr = disparity.ptr<float>(y);
-		float *dptr = depth.ptr<float>(y);
+static void disparityToDepth(const cv::cuda::GpuMat &disparity, cv::cuda::GpuMat &depth,
+							 const cv::Mat &Q, cv::cuda::Stream &stream) {
+	// Q(3, 2) = -1/Tx
+	// Q(2, 3) = f
 
-		for( int x = 0; x < disparity.cols; x++ ) {
-			double d = sptr[x];
-			cv::Vec4d homg_pt = _Q*cv::Vec4d(x, y, d, 1.0);
-			//dptr[x] = Vec3d(homg_pt.val);
-			//dptr[x] /= homg_pt[3];
-			dptr[x] = (float)(homg_pt[2] / homg_pt[3]) / 1000.0f; // Depth in meters
-
-			if( fabs(d) <= FLT_EPSILON )
-				dptr[x] = 1000.0f;
-		}
-	}
+	double val = (1.0f / Q.at<double>(3, 2)) * Q.at<double>(2, 3);
+	cv::cuda::divide(val, disparity, depth, 1.0f / 1000.0f, -1, stream);
 }
 
 bool StereoVideoSource::grab() {
-	calib_->rectified(left_, right_);
-
-	cv::Mat disp;
-	disp_->compute(left_, right_, disp);
-
-	//unique_lock<mutex> lk(mutex_);
-	left_.copyTo(rgb_);
-	disparityToDepth(disp, depth_, calib_->getQ());
+	lsrc_->get(left_, right_, stream_);
+	if (depth_tmp_.empty()) depth_tmp_ = cv::cuda::GpuMat(left_.size(), CV_32FC1);
+	if (disp_tmp_.empty()) disp_tmp_ = cv::cuda::GpuMat(left_.size(), CV_32FC1);
+	calib_->rectifyStereo(left_, right_, stream_);
+	disp_->compute(left_, right_, disp_tmp_, stream_);
+	disparityToDepth(disp_tmp_, depth_tmp_, calib_->getQ(), stream_);
+	//left_.download(rgb_, stream_);
+	rgb_ = lsrc_->cachedLeft();
+	depth_tmp_.download(depth_, stream_);
+
+	stream_.waitForCompletion();	
 	return true;
 }
 
diff --git a/components/rgbd-sources/src/stereovideo.hpp b/components/rgbd-sources/src/stereovideo.hpp
index 7979b25bc781a19ad650206b259a2575d07e4d7c..a5db20fb99ac3d01c5d36302666d9b0cd51d4101 100644
--- a/components/rgbd-sources/src/stereovideo.hpp
+++ b/components/rgbd-sources/src/stereovideo.hpp
@@ -35,9 +35,16 @@ class StereoVideoSource : public detail::Source {
 	LocalSource *lsrc_;
 	Calibrate *calib_;
 	Disparity *disp_;
+	
 	bool ready_;
-	cv::Mat left_;
-	cv::Mat right_;
+	
+	cv::cuda::Stream stream_;
+
+	cv::cuda::GpuMat left_;
+	cv::cuda::GpuMat right_;
+	cv::cuda::GpuMat disp_tmp_;
+	cv::cuda::GpuMat depth_tmp_;
+	
 	cv::Mat mask_l_;
 
 	void init(const std::string &);