diff --git a/cv-node/CMakeLists.txt b/cv-node/CMakeLists.txt index c4cb1e38dae435560cc4a564c4071f98cd802993..2b2d24cd36476860793585e05deb82d42a2ba884 100644 --- a/cv-node/CMakeLists.txt +++ b/cv-node/CMakeLists.txt @@ -76,6 +76,7 @@ set(CVNODESRC src/local.cpp src/sync.cpp src/disparity.cpp + src/middlebury.cpp src/algorithms/rtcensus.cpp src/algorithms/opencv_sgbm.cpp src/algorithms/opencv_bm.cpp diff --git a/cv-node/config/config.json b/cv-node/config/config.json index a634c12ff4b77010ad380e4a322c77d8a7b10317..a0bf49e5024c2287fa407c6981de0fa4cfaebae1 100644 --- a/cv-node/config/config.json +++ b/cv-node/config/config.json @@ -1,4 +1,9 @@ { + "middlebury": { + "dataset": "", + "threshold": 10.0, + "scale": 0.25 + }, "source": { "flip": false, "nostereo": false, diff --git a/cv-node/include/ftl/middlebury.hpp b/cv-node/include/ftl/middlebury.hpp index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..5caa89f6a388b127d639f0995f500b8a7a2bb87e 100644 --- a/cv-node/include/ftl/middlebury.hpp +++ b/cv-node/include/ftl/middlebury.hpp @@ -0,0 +1,21 @@ +#ifndef _FTL_MIDDLEBURY_HPP_ +#define _FTL_MIDDLEBURY_HPP_ + +#include <opencv2/opencv.hpp> +#include <nlohmann/json.hpp> +#include <string> + +namespace ftl { +namespace middlebury { + void test(nlohmann::json &config); + + void evaldisp(const cv::Mat &disp, const cv::Mat >disp, + const cv::Mat &mask, float badthresh, int maxdisp, int rounddisp); + + void readFilePFM(cv::Mat &img, const std::string &filename); + void writeFilePFM(const cv::Mat &img, const char* filename, float scalefactor=1/255.0); +} +} + +#endif // _FTL_MIDDLEBURY_HPP_ + diff --git a/cv-node/src/algorithms/rtcensus.cu b/cv-node/src/algorithms/rtcensus.cu index 0b888631d1ddc81bee13546bf82e0b34748ec6a7..3151655aefca4901c9850b72f9f92c72754103ff 100644 --- a/cv-node/src/algorithms/rtcensus.cu +++ b/cv-node/src/algorithms/rtcensus.cu @@ -32,16 +32,16 @@ namespace gpu { * Sparse 16x16 census (so 8x8) creating a 64bit mask * (14) & (15), based upon (9) */ -__device__ uint64_t sparse_census(unsigned char *arr, size_t u, size_t v, size_t w) { +__device__ uint64_t sparse_census(cudaTextureObject_t tex, int u, int v) { uint64_t r = 0; - unsigned char t = arr[v*w+u]; + unsigned char t = tex2D<unsigned char>(tex, u,v); for (int m=-7; m<=7; m+=2) { - auto start_ix = (v + m)*w + u; + //auto start_ix = (v + m)*w + u; for (int n=-7; n<=7; n+=2) { r <<= 1; - r |= XHI(t, arr[start_ix+n]); + r |= XHI(t, tex2D<unsigned char>(tex, u+n, v+m)); } } @@ -63,22 +63,20 @@ __device__ float fit_parabola(size_t pi, uint16_t p, uint16_t pl, uint16_t pr) { /* * Calculate census mask for left and right images together. */ -__global__ void census_kernel(PtrStepSzb l, PtrStepSzb r, uint64_t *census) { +__global__ void census_kernel(cudaTextureObject_t l, cudaTextureObject_t r, int w, int h, uint64_t *census) { //extern __shared__ uint64_t census[]; - size_t u = (blockIdx.x * BLOCK_W + threadIdx.x + RADIUS); - size_t v_start = blockIdx.y * ROWSperTHREAD + RADIUS; - size_t v_end = v_start + ROWSperTHREAD; - - if (v_end >= l.rows) v_end = l.rows; - if (u >= l.cols) return; + int u = (blockIdx.x * BLOCK_W + threadIdx.x + RADIUS); + int v_start = blockIdx.y * ROWSperTHREAD + RADIUS; + int v_end = v_start + ROWSperTHREAD; - size_t width = l.cols; + if (v_end >= h) v_end = h; + if (u >= w) return; - for (size_t v=v_start; v<v_end; v++) { - size_t ix = (u + v*width) * 2; - uint64_t cenL = sparse_census(l.data, u, v, l.step); - uint64_t cenR = sparse_census(r.data, u, v, r.step); + for (int v=v_start; v<v_end; v++) { + int ix = (u + v*w) * 2; + uint64_t cenL = sparse_census(l, u, v); + uint64_t cenR = sparse_census(r, u, v); census[ix] = cenL; census[ix + 1] = cenR; @@ -91,7 +89,7 @@ __global__ void census_kernel(PtrStepSzb l, PtrStepSzb r, uint64_t *census) { __global__ void disp_kernel(float *disp_l, float *disp_r, size_t width, size_t height, uint64_t *census, size_t ds) { //extern __shared__ uint64_t cache[]; - const int gamma = 100; + const int gamma = 5; size_t u = (blockIdx.x * BLOCK_W) + threadIdx.x + RADIUS2; size_t v_start = (blockIdx.y * ROWSperTHREAD) + RADIUS2; @@ -235,9 +233,37 @@ void rtcensus_call(const PtrStepSzb &l, const PtrStepSzb &r, const PtrStepSz<flo cudaMalloc(&disp_l, sizeof(float)*l.cols*l.rows); cudaMalloc(&disp_r, sizeof(float)*l.cols*l.rows); + // Make textures + cudaResourceDesc resDescL; + memset(&resDescL, 0, sizeof(resDescL)); + resDescL.resType = cudaResourceTypePitch2D; + resDescL.res.pitch2D.devPtr = l.data; + resDescL.res.pitch2D.pitchInBytes = l.step; + resDescL.res.pitch2D.desc = cudaCreateChannelDesc<unsigned char>(); + resDescL.res.pitch2D.width = l.cols; + resDescL.res.pitch2D.height = l.rows; + + cudaResourceDesc resDescR; + memset(&resDescR, 0, sizeof(resDescR)); + resDescR.resType = cudaResourceTypePitch2D; + resDescR.res.pitch2D.devPtr = r.data; + resDescR.res.pitch2D.pitchInBytes = r.step; + resDescR.res.pitch2D.desc = cudaCreateChannelDesc<unsigned char>(); + resDescR.res.pitch2D.width = r.cols; + resDescR.res.pitch2D.height = r.rows; + + cudaTextureDesc texDesc; + memset(&texDesc, 0, sizeof(texDesc)); + texDesc.readMode = cudaReadModeElementType; + + cudaTextureObject_t texLeft = 0; + cudaCreateTextureObject(&texLeft, &resDescL, &texDesc, NULL); + cudaTextureObject_t texRight = 0; + cudaCreateTextureObject(&texRight, &resDescR, &texDesc, NULL); + //size_t smem_size = (2 * l.cols * l.rows) * sizeof(uint64_t); - census_kernel<<<grid, threads>>>(l, r, census); + census_kernel<<<grid, threads>>>(texLeft, texRight, l.cols, l.rows, census); cudaSafeCall( cudaGetLastError() ); grid.x = cv::cuda::device::divUp(l.cols - 2 * RADIUS2, BLOCK_W); @@ -250,12 +276,14 @@ void rtcensus_call(const PtrStepSzb &l, const PtrStepSzb &r, const PtrStepSz<flo consistency_kernel<<<grid, threads>>>(disp_l, disp_r, disp); cudaSafeCall( cudaGetLastError() ); + //if (&stream == Stream::Null()) + cudaSafeCall( cudaDeviceSynchronize() ); + + cudaSafeCall( cudaDestroyTextureObject (texLeft) ); + cudaSafeCall( cudaDestroyTextureObject (texRight) ); cudaFree(disp_r); cudaFree(disp_l); cudaFree(census); - - //if (&stream == Stream::Null()) - cudaSafeCall( cudaDeviceSynchronize() ); } }; diff --git a/cv-node/src/main.cpp b/cv-node/src/main.cpp index a5498bdc888d433121157c9bdaa8ec03d08d0589..4791afc7e7216a4d8983093b365b7ab38d2c5fd7 100644 --- a/cv-node/src/main.cpp +++ b/cv-node/src/main.cpp @@ -3,6 +3,7 @@ #include <ftl/synched.hpp> #include <ftl/calibrate.hpp> #include <ftl/disparity.hpp> +#include <ftl/middlebury.hpp> #include <nlohmann/json.hpp> #include "opencv2/imgproc.hpp" @@ -85,23 +86,13 @@ static void process_options(const map<string,string> &opts) { } } -int main(int argc, char **argv) { - argc--; - argv++; - - // Process Arguments - auto options = read_options(&argv, &argc); - if (!findConfiguration(options["config"])) { - LOG(FATAL) << "Could not find any configuration!"; - } - process_options(options); - +static void run(const string &file) { // TODO Initiate the network LocalSource *lsrc; - if (argc) { + if (file != "") { // Load video file - lsrc = new LocalSource(argv[0], config["source"]); + lsrc = new LocalSource(file, config["source"]); } else { // Use cameras lsrc = new LocalSource(config["source"]); @@ -221,3 +212,21 @@ int main(int argc, char **argv) { } } +int main(int argc, char **argv) { + argc--; + argv++; + + // Process Arguments + auto options = read_options(&argv, &argc); + if (!findConfiguration(options["config"])) { + LOG(FATAL) << "Could not find any configuration!"; + } + process_options(options); + + if (config["middlebury"]["dataset"] == "") { + run((argc > 0) ? argv[0] : ""); + } else { + ftl::middlebury::test(config); + } +} + diff --git a/cv-node/src/middlebury.cpp b/cv-node/src/middlebury.cpp index 596d7a9a3258e95fee7f57959039a9ba9150d351..b8b01e267b02e9ce02583ade5bbca96707cc1e2f 100644 --- a/cv-node/src/middlebury.cpp +++ b/cv-node/src/middlebury.cpp @@ -1,7 +1,15 @@ #include <ftl/middlebury.hpp> +#include <glog/logging.h> +#include <ftl/disparity.hpp> + +#include <string> +#include <algorithm> using cv::Mat; using cv::Size; +using std::string; +using std::min; +using std::max; static void skip_comment(FILE *fp) { // skip comment lines in the headers of pnm files @@ -30,7 +38,7 @@ static void read_header(FILE *fp, const char *imtype, char c1, char c2, char c; if (getc(fp) != c1 || getc(fp) != c2) - throw CError("ReadFilePGM: wrong magic code for %s file", imtype); + LOG(FATAL) << "ReadFilePGM: wrong magic code for " << imtype << " file"; skip_space(fp); skip_comment(fp); skip_space(fp); @@ -47,9 +55,9 @@ static void read_header(FILE *fp, const char *imtype, char c1, char c2, c = getc(fp); if (c != '\n') { if (c == ' ' || c == '\t' || c == '\r') - throw CError("newline expected in file after image height"); + LOG(FATAL) << "newline expected in file after image height"; else - throw CError("whitespace expected in file after image height"); + LOG(FATAL) << "whitespace expected in file after image height"; } } @@ -62,12 +70,12 @@ static int littleendian() { // 1-band PFM image, see http://netpbm.sourceforge.net/doc/pfm.html // 3-band not yet supported -void ftl::middlebury::readFilePFM(Mat &img, const char* filename) +void ftl::middlebury::readFilePFM(Mat &img, const string &filename) { // Open the file and read the header - FILE *fp = fopen(filename, "rb"); + FILE *fp = fopen(filename.c_str(), "rb"); if (fp == 0) - throw CError("ReadFilePFM: could not open %s", filename); + LOG(FATAL) << "ReadFilePFM: could not open " << filename; int width, height, nBands; read_header(fp, "PFM", 'P', 'f', &width, &height, &nBands, 0); @@ -83,16 +91,15 @@ void ftl::middlebury::readFilePFM(Mat &img, const char* filename) c = getc(fp); if (c != '\n') { if (c == ' ' || c == '\t' || c == '\r') - throw CError("newline expected in file after scale factor"); + LOG(FATAL) << "newline expected in file after scale factor"; else - throw CError("whitespace expected in file after scale factor"); + LOG(FATAL) << "whitespace expected in file after scale factor"; } - - // Set the image shape - Size sh(width, height, 1); // Allocate the image if necessary - img.ReAllocate(sh); + img = Mat(height, width, CV_32FC1); + // Set the image shape + //Size sh = img.size(); int littleEndianFile = (scalef < 0); int littleEndianMachine = littleendian(); @@ -102,12 +109,12 @@ void ftl::middlebury::readFilePFM(Mat &img, const char* filename) for (int y = height-1; y >= 0; y--) { // PFM stores rows top-to-bottom!!!! int n = width; - float* ptr = (float *) img.PixelAddress(0, y, 0); + float* ptr = &img.at<float>(y, 0, 0); if ((int)fread(ptr, sizeof(float), n, fp) != n) - throw CError("ReadFilePFM(%s): file is too short", filename); + LOG(FATAL) << "ReadFilePFM(" << filename << "): file is too short"; if (needSwap) { // if endianness doesn't agree, swap bytes - uchar* ptr = (uchar *) img.PixelAddress(0, y, 0); + uchar* ptr = (uchar *)&img.at<uchar>(y, 0, 0); int x = 0; uchar tmp = 0; while (x < n) { @@ -119,23 +126,23 @@ void ftl::middlebury::readFilePFM(Mat &img, const char* filename) } } if (fclose(fp)) - throw CError("ReadFilePGM(%s): error closing file", filename); + LOG(FATAL) << "ReadFilePGM(" << filename << "): error closing file"; } // 1-band PFM image, see http://netpbm.sourceforge.net/doc/pfm.html // 3-band not yet supported -void ftl::middlebury::writeFilePFM(const Mat &img, const char* filename, float scalefactor=1/255.0) +void ftl::middlebury::writeFilePFM(const Mat &img, const char* filename, float scalefactor) { // Write a PFM file - CShape sh = img.Shape(); - int nBands = sh.nBands; + Size sh = img.size(); + int nBands = img.channels(); if (nBands != 1) - throw CError("WriteFilePFM(%s): can only write 1-band image as pfm for now", filename); + LOG(FATAL) << "WriteFilePFM(" << filename << "): can only write 1-band image as pfm for now"; // Open the file FILE *stream = fopen(filename, "wb"); if (stream == 0) - throw CError("WriteFilePFM: could not open %s", filename); + LOG(FATAL) << "WriteFilePFM: could not open " << filename; // sign of scalefact indicates endianness, see pfms specs if (littleendian()) @@ -147,14 +154,14 @@ void ftl::middlebury::writeFilePFM(const Mat &img, const char* filename, float s int n = sh.width; // write rows -- pfm stores rows in inverse order! for (int y = sh.height-1; y >= 0; y--) { - float* ptr = (float *)img.PixelAddress(0, y, 0); + const float* ptr = &img.at<float>(0, y, 0); if ((int)fwrite(ptr, sizeof(float), n, stream) != n) - throw CError("WriteFilePFM(%s): file is too short", filename); + LOG(FATAL) << "WriteFilePFM(" << filename << "): file is too short"; } // close file if (fclose(stream)) - throw CError("WriteFilePFM(%s): error closing file", filename); + LOG(FATAL) << "WriteFilePFM(" << filename << "): error closing file"; } void ftl::middlebury::evaldisp(const Mat &disp, const Mat >disp, const Mat &mask, float badthresh, int maxdisp, int rounddisp) @@ -184,19 +191,19 @@ void ftl::middlebury::evaldisp(const Mat &disp, const Mat >disp, const Mat &ma float serr = 0; for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { - float gt = gtdisp.at(x, y, 0); + float gt = gtdisp.at<float>(y, x, 0); if (gt == INFINITY) // unknown continue; - float d = scale * disp.at(x / scale, y / scale, 0); + float d = scale * disp.at<float>(y / scale, x / scale, 0); int valid = (d != INFINITY); if (valid) { float maxd = scale * maxdisp; // max disp range - d = __max(0, __min(maxd, d)); // clip disps to max disp range + d = max(0.0f, min(maxd, d)); // clip disps to max disp range } if (valid && rounddisp) d = round(d); float err = fabs(d - gt); - if (usemask && mask.at(x, y, 0) != 255) { // don't evaluate pixel + if (usemask && mask.at<float>(y, x, 0) != 255) { // don't evaluate pixel } else { n++; if (valid) { @@ -214,24 +221,31 @@ void ftl::middlebury::evaldisp(const Mat &disp, const Mat >disp, const Mat &ma float invalidpercent = 100.0*invalid/n; float totalbadpercent = 100.0*(bad+invalid)/n; float avgErr = serr / (n - invalid); // CHANGED 10/14/2014 -- was: serr / n - //printf("mask bad%.1f invalid totbad avgErr\n", badthresh); + printf("mask bad%.1f invalid totbad avgErr\n", badthresh); printf("%4.1f %6.2f %6.2f %6.2f %6.2f\n", 100.0*n/(width * height), badpercent, invalidpercent, totalbadpercent, avgErr); } void ftl::middlebury::test(nlohmann::json &config) { // Load dataset images - Mat l = imread((string)config["middlebury"]["dataset"] + "/im0.png"); - Mat r = imread((string)config["middlebury"]["dataset"] + "/im1.png"); + Mat l = cv::imread((string)config["middlebury"]["dataset"] + "/im0.png"); + Mat r = cv::imread((string)config["middlebury"]["dataset"] + "/im1.png"); // Load ground truth Mat gt; - readFilePFM(gt, (string)config["middlebury"]["dataset"] + "disp0.pfm"); + readFilePFM(gt, (string)config["middlebury"]["dataset"] + "/disp0.pfm"); + + if ((float)config["middlebury"]["scale"] != 1.0f) { + float scale = (float)config["middlebury"]["scale"]; + //cv::resize(gt, gt, cv::Size(gt.cols * scale,gt.rows * scale), 0, 0, cv::INTER_LINEAR); + cv::resize(l, l, cv::Size(l.cols * scale,l.rows * scale), 0, 0, cv::INTER_LINEAR); + cv::resize(r, r, cv::Size(r.cols * scale,r.rows * scale), 0, 0, cv::INTER_LINEAR); + } // Run algorithm - auto disparity = Disparity::create(config["disparity"]); - cvtColor(l, l, COLOR_BGR2GRAY); - cvtColor(r, r, COLOR_BGR2GRAY); + auto disparity = ftl::Disparity::create(config["disparity"]); + cvtColor(l, l, cv::COLOR_BGR2GRAY); + cvtColor(r, r, cv::COLOR_BGR2GRAY); Mat disp; disparity->compute(l,r,disp); @@ -240,9 +254,27 @@ void ftl::middlebury::test(nlohmann::json &config) { // Display results evaldisp(disp, gt, Mat(), (float)config["middlebury"]["threshold"], (int)config["disparity"]["maximum"], 0); + if (gt.cols > 1600) { + cv::resize(gt, gt, cv::Size(gt.cols * 0.25,gt.rows * 0.25), 0, 0, cv::INTER_LINEAR); + } + if (disp.cols > 1600) { + cv::resize(disp, disp, cv::Size(disp.cols * 0.25,disp.rows * 0.25), 0, 0, cv::INTER_LINEAR); + } + + double mindisp; + double maxdisp; + Mat mask; + threshold(disp,mask,10000.0, 255, cv::THRESH_BINARY_INV); + normalize(mask, mask, 0, 255, cv::NORM_MINMAX, CV_8U); + cv::minMaxLoc(disp, &mindisp, &maxdisp, 0, 0, mask); + + gt = gt / 330.0; // TODO Read from calib.txt + disp = disp / maxdisp; imshow("Ground Truth", gt); imshow("Disparity", disp); + while (cv::waitKey(10) != 27); + /*cv::putText(yourImageMat, "Here is some text", cv::Point(5,5), // Coordinates