Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
ftl
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Nicolas Pope
ftl
Merge requests
!114
Ongoing
#133
improvements
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
Ongoing
#133
improvements
feature/133/ilw
into
master
Overview
0
Commits
36
Pipelines
1
Changes
1
Merged
Nicolas Pope
requested to merge
feature/133/ilw
into
master
5 years ago
Overview
0
Commits
36
Pipelines
1
Changes
1
Expand
0
0
Merge request reports
Viewing commit
2a7a44a2
Prev
Next
Show latest version
1 file
+
12
−
7
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
2a7a44a2
Add cost ratio
· 2a7a44a2
Nicolas Pope
authored
5 years ago
applications/reconstruct/src/ilw/ilw.cu
0 → 100644
+
205
−
0
Options
#include
"ilw_cuda.hpp"
#include
<ftl/cuda/weighting.hpp>
using
ftl
::
cuda
::
TextureObject
;
using
ftl
::
rgbd
::
Camera
;
#define WARP_SIZE 32
#define T_PER_BLOCK 8
#define FULL_MASK 0xffffffff
__device__
inline
float
warpMin
(
float
e
)
{
for
(
int
i
=
WARP_SIZE
/
2
;
i
>
0
;
i
/=
2
)
{
const
float
other
=
__shfl_xor_sync
(
FULL_MASK
,
e
,
i
,
WARP_SIZE
);
e
=
min
(
e
,
other
);
}
return
e
;
}
__device__
inline
float
warpSum
(
float
e
)
{
for
(
int
i
=
WARP_SIZE
/
2
;
i
>
0
;
i
/=
2
)
{
const
float
other
=
__shfl_xor_sync
(
FULL_MASK
,
e
,
i
,
WARP_SIZE
);
e
+=
other
;
}
return
e
;
}
//#define COR_WIN_RADIUS 17
//#define COR_WIN_SIZE (COR_WIN_RADIUS * COR_WIN_RADIUS)
template
<
int
COR_WIN_RADIUS
>
__global__
void
correspondence_energy_vector_kernel
(
TextureObject
<
float4
>
p1
,
TextureObject
<
float4
>
p2
,
TextureObject
<
uchar4
>
c1
,
TextureObject
<
uchar4
>
c2
,
TextureObject
<
float4
>
vout
,
TextureObject
<
float
>
eout
,
float4x4
pose1
,
// Inverse
float4x4
pose2
,
// Inverse
Camera
cam2
,
ftl
::
cuda
::
ILWParams
params
)
{
// Each warp picks point in p1
const
int
tid
=
(
threadIdx
.
x
+
threadIdx
.
y
*
blockDim
.
x
);
const
int
x
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
WARP_SIZE
;
const
int
y
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
const
float3
world1
=
make_float3
(
p1
.
tex2D
(
x
,
y
));
const
uchar4
colour1
=
c1
.
tex2D
(
x
,
y
);
if
(
world1
.
x
==
MINF
)
return
;
const
float3
camPos2
=
pose2
*
world1
;
const
uint2
screen2
=
cam2
.
camToScreen
<
uint2
>
(
camPos2
);
float
bestcost
=
1.1
f
;
float
avgcost
=
0.0
f
;
float3
bestpoint
;
int
count
=
0
;
// Project to p2 using cam2
// Each thread takes a possible correspondence and calculates a weighting
const
int
lane
=
tid
%
WARP_SIZE
;
for
(
int
i
=
lane
;
i
<
COR_WIN_RADIUS
*
COR_WIN_RADIUS
;
i
+=
WARP_SIZE
)
{
const
float
u
=
(
i
%
COR_WIN_RADIUS
)
-
(
COR_WIN_RADIUS
/
2
);
const
float
v
=
(
i
/
COR_WIN_RADIUS
)
-
(
COR_WIN_RADIUS
/
2
);
const
float3
world2
=
make_float3
(
p2
.
tex2D
(
screen2
.
x
+
u
,
screen2
.
y
+
v
));
if
((
params
.
flags
&
ftl
::
cuda
::
kILWFlag_IgnoreBad
)
&&
world2
.
x
==
MINF
)
continue
;
const
uchar4
colour2
=
c2
.
tex2D
(
screen2
.
x
+
u
,
screen2
.
y
+
v
);
// Determine degree of correspondence
float
cost
=
1.0
f
-
ftl
::
cuda
::
spatialWeighting
(
world1
,
world2
,
params
.
spatial_smooth
);
// Point is too far away to even count
if
(
world2
.
x
!=
MINF
&&
cost
==
1.0
f
)
continue
;
// Mix ratio of colour and distance costs
const
float
ccost
=
1.0
f
-
ftl
::
cuda
::
colourWeighting
(
colour1
,
colour2
,
params
.
colour_smooth
);
if
((
params
.
flags
&
ftl
::
cuda
::
kILWFlag_SkipBadColour
)
&&
ccost
==
1.0
f
)
continue
;
cost
=
params
.
cost_ratio
*
(
ccost
)
+
(
1.0
f
-
params
.
cost_ratio
)
*
cost
;
//cost /= 2.0f;
++
count
;
avgcost
+=
cost
;
if
(
world2
.
x
!=
MINF
&&
cost
<
bestcost
)
{
bestpoint
=
world2
;
bestcost
=
cost
;
}
}
count
=
warpSum
(
count
);
const
float
mincost
=
warpMin
(
bestcost
);
bool
best
=
mincost
==
bestcost
;
avgcost
=
warpSum
(
avgcost
)
/
count
;
const
float
confidence
=
(
avgcost
-
mincost
);
if
(
best
&&
mincost
<
1.0
f
)
{
float3
tvecA
=
pose1
*
bestpoint
;
float3
tvecB
=
pose1
*
world1
;
if
(
params
.
flags
&
ftl
::
cuda
::
kILWFlag_RestrictZ
)
{
tvecA
.
x
=
tvecB
.
x
;
tvecA
.
y
=
tvecB
.
y
;
}
tvecA
=
(
pose1
.
getInverse
()
*
tvecA
)
-
world1
;
vout
(
x
,
y
)
=
vout
.
tex2D
(
x
,
y
)
+
make_float4
(
tvecA
.
x
,
// * (1.0f - mincost) * confidence,
tvecA
.
y
,
// * (1.0f - mincost) * confidence,
tvecA
.
z
,
// * (1.0f - mincost) * confidence,
(
1.0
f
-
mincost
)
*
confidence
);
//eout(x,y) = max(eout(x,y), (length(bestpoint-world1) / 0.04f) * 7.0f);
//eout(x,y) = max(eout(x,y), (1.0f - mincost) * 7.0f);
//eout(x,y) = max(eout(x, y), (1.0f - mincost) * confidence * (length(bestpoint-world1) / 0.04f) * 12.0f);
eout
(
x
,
y
)
=
max
(
eout
(
x
,
y
),
(
1.0
f
-
mincost
)
*
confidence
*
12.0
f
);
//eout(x,y) = max(eout(x, y), confidence * 12.0f);
}
else
if
(
mincost
>=
1.0
f
&&
lane
==
0
)
{
//vout(x,y) = make_float4(0.0f);
//eout(x,y) = 0.0f;
}
}
void
ftl
::
cuda
::
correspondence_energy_vector
(
TextureObject
<
float4
>
&
p1
,
TextureObject
<
float4
>
&
p2
,
TextureObject
<
uchar4
>
&
c1
,
TextureObject
<
uchar4
>
&
c2
,
TextureObject
<
float4
>
&
vout
,
TextureObject
<
float
>
&
eout
,
float4x4
&
pose1
,
float4x4
&
pose2
,
const
Camera
&
cam2
,
const
ILWParams
&
params
,
int
win
,
cudaStream_t
stream
)
{
const
dim3
gridSize
((
p1
.
width
()
+
2
-
1
)
/
2
,
(
p1
.
height
()
+
T_PER_BLOCK
-
1
)
/
T_PER_BLOCK
);
const
dim3
blockSize
(
2
*
WARP_SIZE
,
T_PER_BLOCK
);
//printf("COR SIZE %d,%d\n", p1.width(), p1.height());
switch
(
win
)
{
case
17
:
correspondence_energy_vector_kernel
<
17
><<<
gridSize
,
blockSize
,
0
,
stream
>>>
(
p1
,
p2
,
c1
,
c2
,
vout
,
eout
,
pose1
,
pose2
,
cam2
,
params
);
break
;
case
9
:
correspondence_energy_vector_kernel
<
9
><<<
gridSize
,
blockSize
,
0
,
stream
>>>
(
p1
,
p2
,
c1
,
c2
,
vout
,
eout
,
pose1
,
pose2
,
cam2
,
params
);
break
;
case
5
:
correspondence_energy_vector_kernel
<
5
><<<
gridSize
,
blockSize
,
0
,
stream
>>>
(
p1
,
p2
,
c1
,
c2
,
vout
,
eout
,
pose1
,
pose2
,
cam2
,
params
);
break
;
}
cudaSafeCall
(
cudaGetLastError
()
);
}
//==============================================================================
//#define MOTION_RADIUS 9
template
<
int
MOTION_RADIUS
>
__global__
void
move_points_kernel
(
ftl
::
cuda
::
TextureObject
<
float4
>
p
,
ftl
::
cuda
::
TextureObject
<
float4
>
ev
,
ftl
::
rgbd
::
Camera
camera
,
float
rate
)
{
const
unsigned
int
x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
const
unsigned
int
y
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
if
(
x
<
p
.
width
()
&&
y
<
p
.
height
())
{
const
float4
world
=
p
(
x
,
y
);
if
(
world
.
x
==
MINF
)
return
;
float4
vec
=
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
);
//ev.tex2D((int)x,(int)y);
float
contrib
=
0.0
f
;
// Calculate screen space distortion with neighbours
for
(
int
v
=-
MOTION_RADIUS
;
v
<=
MOTION_RADIUS
;
++
v
)
{
for
(
int
u
=-
MOTION_RADIUS
;
u
<=
MOTION_RADIUS
;
++
u
)
{
const
float4
vecn
=
ev
.
tex2D
((
int
)
x
+
u
,(
int
)
y
+
v
);
const
float3
pn
=
make_float3
(
p
.
tex2D
((
int
)
x
+
u
,(
int
)
y
+
v
));
if
(
pn
.
x
==
MINF
)
continue
;
const
float
s
=
ftl
::
cuda
::
spatialWeighting
(
pn
,
make_float3
(
world
),
0.01
f
);
contrib
+=
vecn
.
w
*
s
;
vec
+=
vecn
.
w
*
s
*
vecn
;
}
}
if
(
vec
.
w
>
0.0
f
)
{
p
(
x
,
y
)
=
world
+
rate
*
(
vec
/
contrib
);
}
}
}
void
ftl
::
cuda
::
move_points
(
ftl
::
cuda
::
TextureObject
<
float4
>
&
p
,
ftl
::
cuda
::
TextureObject
<
float4
>
&
v
,
const
ftl
::
rgbd
::
Camera
&
camera
,
float
rate
,
int
radius
,
cudaStream_t
stream
)
{
const
dim3
gridSize
((
p
.
width
()
+
T_PER_BLOCK
-
1
)
/
T_PER_BLOCK
,
(
p
.
height
()
+
T_PER_BLOCK
-
1
)
/
T_PER_BLOCK
);
const
dim3
blockSize
(
T_PER_BLOCK
,
T_PER_BLOCK
);
switch
(
radius
)
{
case
9
:
move_points_kernel
<
9
><<<
gridSize
,
blockSize
,
0
,
stream
>>>
(
p
,
v
,
camera
,
rate
);
break
;
case
5
:
move_points_kernel
<
5
><<<
gridSize
,
blockSize
,
0
,
stream
>>>
(
p
,
v
,
camera
,
rate
);
break
;
case
3
:
move_points_kernel
<
3
><<<
gridSize
,
blockSize
,
0
,
stream
>>>
(
p
,
v
,
camera
,
rate
);
break
;
case
1
:
move_points_kernel
<
1
><<<
gridSize
,
blockSize
,
0
,
stream
>>>
(
p
,
v
,
camera
,
rate
);
break
;
case
0
:
move_points_kernel
<
0
><<<
gridSize
,
blockSize
,
0
,
stream
>>>
(
p
,
v
,
camera
,
rate
);
break
;
}
cudaSafeCall
(
cudaGetLastError
()
);
}
Loading