diff --git a/applications/vision/src/main.cpp b/applications/vision/src/main.cpp index d855dd93500b79f66a5cd2cf92a48ef2adb13930..57606034b0ef8c6d67fcc92397563946b64c1d9c 100644 --- a/applications/vision/src/main.cpp +++ b/applications/vision/src/main.cpp @@ -197,7 +197,21 @@ static void run(ftl::Configurable *root) { delete net; } +static void threadSetCUDADevice() { + // Ensure all threads have correct cuda device + std::atomic<int> ijobs = 0; + for (int i=0; i<ftl::pool.size(); ++i) { + ftl::pool.push([&ijobs](int id) { + ftl::cuda::setDevice(); + ++ijobs; + while (ijobs < ftl::pool.size()) std::this_thread::sleep_for(std::chrono::milliseconds(10)); + }); + } + while (ijobs < ftl::pool.size()) std::this_thread::sleep_for(std::chrono::milliseconds(10)); +} + int main(int argc, char **argv) { + #ifdef HAVE_PYLON Pylon::PylonAutoInitTerm autoInitTerm; #endif @@ -207,6 +221,9 @@ int main(int argc, char **argv) { #endif std::cout << "FTL Vision Node " << FTL_VERSION_LONG << std::endl; auto root = ftl::configure(argc, argv, "vision_default"); + + // Use other GPU if available. + //ftl::cuda::setDevice(ftl::cuda::deviceCount()-1); std::cout << "Loading..." << std::endl; run(root); diff --git a/components/common/cpp/src/cuda_common.cpp b/components/common/cpp/src/cuda_common.cpp index 2eb5d19f829c999c04cf4ec1e5c28bbd699f0521..ede4a998a4a104539d1b13ce32f6f278c2f3a8b2 100644 --- a/components/common/cpp/src/cuda_common.cpp +++ b/components/common/cpp/src/cuda_common.cpp @@ -10,6 +10,8 @@ static int dev_count = 0; static std::vector<cudaDeviceProp> properties; bool ftl::cuda::initialise() { + if (dev_count > 0) return true; + // Do an initial CUDA check cudaSafeCall(cudaGetDeviceCount(&dev_count)); CHECK_GE(dev_count, 1) << "No CUDA devices found"; @@ -50,6 +52,7 @@ void ftl::cuda::setDevice(int id) { } void ftl::cuda::setDevice() { + LOG(INFO) << "Using CUDA Device " << dev_to_use; cudaSafeCall(cudaSetDevice(dev_to_use)); } diff --git a/components/rgbd-sources/src/group.cpp b/components/rgbd-sources/src/group.cpp index b050db96f9720110f7e5f506829b97e532388635..55fd3c411fcbd0f51f8bdfc319a9fa4e7e4804f6 100644 --- a/components/rgbd-sources/src/group.cpp +++ b/components/rgbd-sources/src/group.cpp @@ -120,7 +120,7 @@ void Group::onFrameSet(const ftl::rgbd::VideoCallback &cb) { //ftl::pool.push([this,s,ts](int id) { _retrieveJob(s); - LOG(INFO) << "Retrieve latency: " << ftl::timer::get_time()-ts; + //LOG(INFO) << "Retrieve latency: " << ftl::timer::get_time()-ts; --jobs_; _dispatchJob(s, ts); //}); diff --git a/components/rgbd-sources/src/source.cpp b/components/rgbd-sources/src/source.cpp index 37cff385a3cc29e5fedb69632d206f4b22e4ea53..2a9af58891810e3434416406b3fded6aef35c80c 100644 --- a/components/rgbd-sources/src/source.cpp +++ b/components/rgbd-sources/src/source.cpp @@ -195,7 +195,10 @@ bool Source::retrieve() { bool Source::dispatch(int64_t ts) { if (!callback_) return false; - if (is_dispatching || is_retrieving) return false; + if (is_dispatching || is_retrieving) { + LOG(WARNING) << "Previous distance not completed"; + return false; + } is_dispatching = true; _swap(); ftl::pool.push([this,ts](int id) { diff --git a/components/rgbd-sources/src/sources/stereovideo/pylon.cpp b/components/rgbd-sources/src/sources/stereovideo/pylon.cpp index 43975d149410a8b8d8f6c43cbb475b198bacd65a..b8155489ed9f1a32aa1e7abbc297357398da2070 100644 --- a/components/rgbd-sources/src/sources/stereovideo/pylon.cpp +++ b/components/rgbd-sources/src/sources/stereovideo/pylon.cpp @@ -124,6 +124,10 @@ void PylonDevice::_configureCamera(CBaslerUniversalInstantCamera *cam) { bool PylonDevice::grab() { if (!isReady()) return false; + //int dev; + //cudaGetDevice(&dev); + //LOG(INFO) << "Current cuda device = " << dev; + try { FTL_Profile("Frame Capture", 0.001); if (rcam_) rcam_->WaitForFrameTriggerReady( 30, Pylon::TimeoutHandling_ThrowException); @@ -152,6 +156,12 @@ bool PylonDevice::get(cv::cuda::GpuMat &l_out, cv::cuda::GpuMat &r_out, cv::cuda Mat &lfull = (!hasHigherRes()) ? l : hres; Mat &rfull = (!hasHigherRes()) ? r : rtmp_; + //ftl::cuda::setDevice(); + + //int dev; + //cudaGetDevice(&dev); + //LOG(INFO) << "Current cuda device = " << dev; + try { FTL_Profile("Frame Retrieve", 0.005); std::future<bool> future_b;