Skip to content
Snippets Groups Projects
Commit 8f9dbdfd authored by Nicolas Pope's avatar Nicolas Pope
Browse files

WIP Activation fields

parent 66fd837e
No related branches found
No related tags found
No related merge requests found
......@@ -68,112 +68,104 @@ void ftl::cuda::edge_filter(
#endif
}
__global__ void bdmseg_kernel(
TextureObject<uchar4> colour_in,
TextureObject<float> gradient_in,
TextureObject<uchar4> seg_out) {
__shared__ uchar4 colours[WARPS_PER_BLOCK][WIDTH_PER_WARP][WIDTH_PER_WARP];
//__shared__ float gradientY[WARPS_PER_BLOCK][WIDTH_PER_WARP][WIDTH_PER_WARP];
//__shared__ int flags[WARPS_PER_BLOCK][WIDTH_PER_WARP+2][WIDTH_PER_WARP+2];
const int bx = blockIdx.x*WIDTH_PER_WARP*BLOCK_WIDTH;
const int by = blockIdx.y*WIDTH_PER_WARP*BLOCK_WIDTH;
const int warp = threadIdx.y;
__device__ float findMaxEnergy(const float (&gradients)[WIDTH_PER_WARP][WIDTH_PER_WARP], const float (&field)[WIDTH_PER_WARP][WIDTH_PER_WARP]) {
const int lane = threadIdx.x;
const int wx = (warp % BLOCK_WIDTH)*WIDTH_PER_WARP + bx;
const int wy = (warp / BLOCK_WIDTH)*WIDTH_PER_WARP + by;
float max_e = 0.0f;
for (int i=lane; i<BLOCK_COUNT; i+=WARP_SIZE) {
int x = i % WIDTH_PER_WARP;
int y = i / WIDTH_PER_WARP;
if (wx+x >= colour_in.width() || wy+y >= colour_in.height()) continue;
//if (x == WIDTH_PER_WARP-1 || y == WIDTH_PER_WARP-1) continue;
// Iteratively merge pixels until division is approx 2. Reduce threshold
// over the entire block to achieve merge, don't just take local min diff
// to merge?
// Start at max gradient? Or some percentage of it and divide.
// Merge if gradient is less than max. Merge left, merge up?
// Iterate until no further merges take place.
float f = field[y][x];
float e = f * gradients[y][x];
max_e = (f < 1.0f) ? max(max_e, e) : max_e;
}
max_e = warpMax(max_e);
return max_e;
}
// Succeed if 3 or less segments and each segment has a certain number of
// pixels touching a boundary.
__device__ void updateField(const float (&gradients)[WIDTH_PER_WARP][WIDTH_PER_WARP], float (&field)[WIDTH_PER_WARP][WIDTH_PER_WARP], float me) {
const int lane = threadIdx.x;
// Perhaps a 9x9 is better? If merge left and up is used.
for (int i=lane; i<BLOCK_COUNT; i+=WARP_SIZE) {
int x = i % WIDTH_PER_WARP;
int y = i / WIDTH_PER_WARP;
// Segment count starts at max and is subtracted when a merge occurs.
float e = field[y][x] * gradients[y][x];
if (e >= me*0.99f) field[y][x] = 1.0f;
}
//float pgrad = gradient_in(wx+x,wy+y);
//gradient[warp][y][x] = pgrad;
//gradientY[warp][y][x] = pgrady;
__syncwarp();
// Find max border gradient
//if (x == 0 || y == 0 || x == WIDTH_PER_WARP-1 || y == WIDTH_PER_WARP-1) grad = max(grad, pgrad);
float newfields[PIXELS_PER_THREAD];
int j=0;
for (int i=lane; i<BLOCK_COUNT; i+=WARP_SIZE) {
int x = i % WIDTH_PER_WARP;
int y = i / WIDTH_PER_WARP;
colours[warp][y][x] = colour_in(wx+x, wy+y);
float oldfield = field[y][x];
float newfield = 0.0f;
seg_out(wx+x,wy+y) = make_uchar4(0,0,0,0);
for (int v=max(y-1,0); v<=min(y+1,WIDTH_PER_WARP-1); ++v) {
for (int u=max(x-1,0); u<=min(x+1,WIDTH_PER_WARP-1); ++u) {
float f = oldfield * field[v][u];
newfield = max(newfield, f);
}
}
//grad = warpMax(grad);
//if (grad < 20.0f) return;
float alpha = 0.0f;
float max_sum = 0.0f;
float best_alpha = -1.0f;
float grad_total = 0.0f;
newfields[j++] = newfield;
}
while (alpha < 3.14f) {
float grad_sum = 0.0f;
__syncwarp();
j=0;
for (int i=lane; i<BLOCK_COUNT; i+=WARP_SIZE) {
int x = i % (WIDTH_PER_WARP);
int y = i / (WIDTH_PER_WARP);
if (wx+x >= colour_in.width() || wy+y >= colour_in.height()) continue;
int x = i % WIDTH_PER_WARP;
int y = i / WIDTH_PER_WARP;
field[y][x] = newfields[j++];
}
uchar4 mycolour = colours[warp][y][x];
__syncwarp();
}
float xf = (float)x - 4.0f;
float yf = (float)y - 4.0f;
__global__ void bdmseg_kernel(
TextureObject<uchar4> colour_in,
TextureObject<float> gradient_in,
TextureObject<uchar4> seg_out) {
float base = (xf*xf + yf*yf);
float G_alpha_ij = (base < 0.00001f) ? 0.0f : (xf*__cosf(alpha) + yf*__sinf(alpha));
float r = ((float)mycolour.x / 256.0f) * G_alpha_ij;
__shared__ float gradients[WARPS_PER_BLOCK][WIDTH_PER_WARP][WIDTH_PER_WARP];
__shared__ float field[WARPS_PER_BLOCK][WIDTH_PER_WARP][WIDTH_PER_WARP];
//__shared__ int flags[WARPS_PER_BLOCK][WIDTH_PER_WARP+2][WIDTH_PER_WARP+2];
//float g = (float)mycolour.y * G_alpha_ij;
//float b = (float)mycolour.z * G_alpha_ij;
const int bx = blockIdx.x*WIDTH_PER_WARP*BLOCK_WIDTH;
const int by = blockIdx.y*WIDTH_PER_WARP*BLOCK_WIDTH;
const int warp = threadIdx.y;
const int lane = threadIdx.x;
const int wx = (warp % BLOCK_WIDTH)*WIDTH_PER_WARP + bx;
const int wy = (warp / BLOCK_WIDTH)*WIDTH_PER_WARP + by;
grad_sum += r;
}
for (int i=lane; i<BLOCK_COUNT; i+=WARP_SIZE) {
int x = i % WIDTH_PER_WARP;
int y = i / WIDTH_PER_WARP;
grad_sum = warpSum(grad_sum);
grad_total += grad_sum*grad_sum;
if (wx+x >= colour_in.width() || wy+y >= colour_in.height()) continue;
if (fabs(grad_sum) > max_sum) {
max_sum = fabs(grad_sum);
best_alpha = alpha;
gradients[warp][y][x] = gradient_in.tex2D(wx+x, wy+y);
field[warp][y][x] = 0.1f; // All locations have equal chance
seg_out(wx+x,wy+y) = make_uchar4(0,0,0,0);
}
alpha += 0.3925f;
for (int j=0; j<8; ++j) {
float e = findMaxEnergy(gradients[warp], field[warp]);
updateField(gradients[warp], field[warp], e);
}
grad_total = sqrt(grad_total);
for (int i=lane; i<BLOCK_COUNT; i+=WARP_SIZE) {
int x = i % WIDTH_PER_WARP;
int y = i / WIDTH_PER_WARP;
if (wx+x >= colour_in.width() || wy+y >= colour_in.height()) continue;
float xf = (float)x - 4.0f;
float yf = (float)y - 4.0f;
float base = (xf*xf + yf*yf);
float G_alpha_ij = (base < 0.00001f) ? 0.0f : (xf*__cosf(best_alpha) + yf*__sinf(best_alpha));
seg_out(wx+x,wy+y) = make_uchar4(0,G_alpha_ij,0,0);
//seg_out(wx+x, wy+y) = make_uchar4(gradient[warp][y][x], 0, 0, 0);
//seg_out(wx+x,wy+y) = make_uchar4(0,G_alpha_ij,0,0);
seg_out(wx+x, wy+y) = make_uchar4(field[warp][y][x]*255.0f, 0, 0, 0);
}
}
......
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment