aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas White <taw@physics.org>2016-07-29 14:54:58 +0200
committerThomas White <taw@physics.org>2016-07-29 17:42:23 +0200
commit001705c484e08d1fd8b412f997ba014d4e9adfe3 (patch)
tree716118b1fa6857c7ef391b150d91c4763006fd4a
parent9b1374999c98d5eb15e7a93fbeb2357bc907dd26 (diff)
Use z component in GPU simulation
-rw-r--r--data/diffraction.cl27
-rw-r--r--src/diffraction-gpu.c24
-rw-r--r--tests/gpu_sim_check.c36
3 files changed, 46 insertions, 41 deletions
diff --git a/data/diffraction.cl b/data/diffraction.cl
index 994e27db..3b0779b5 100644
--- a/data/diffraction.cl
+++ b/data/diffraction.cl
@@ -51,29 +51,27 @@ const sampler_t sampler_c = CLK_NORMALIZED_COORDS_TRUE
float4 get_q(float fs, float ss, float res, float clen, float k,
float corner_x, float corner_y,
- float fsx, float fsy, float ssx, float ssy)
+ float fsx, float fsy, float fsz, float ssx, float ssy, float ssz)
{
- float rx, ry, r;
+ float rx, ry, rz;
float az, tt;
float4 q;
float xs, ys;
float kx, ky, kz;
+ float ctt;
- xs = fs*fsx + ss*ssx;
- ys = fs*fsy + ss*ssy;
-
- rx = (xs + corner_x) / res;
- ry = (ys + corner_y) / res;
-
- r = sqrt(pow(rx, 2.0f) + pow(ry, 2.0f));
-
- tt = atan2(r, clen);
+ /* Calculate 3D position of given position, in m */
+ rx = (corner_x + fs*fsx + ss*ssx) / res;
+ ry = (corner_y + fs*fsy + ss*ssy) / res;
+ rz = clen + (fs*fsz + ss*ssz)/res;
+ ctt = rz / sqrt(rx*rx + ry*ry + rz*rz); /* cos(2theta) */
+ tt = acos(ctt);
az = atan2(ry, rx);
kx = k*native_sin(tt)*native_cos(az);
ky = k*native_sin(tt)*native_sin(az);
- kz = k*(native_cos(tt)-1.0);
+ kz = k*(ctt - 1.0);
q = (float4)(kx, ky, kz, 0.0);
@@ -159,7 +157,8 @@ float molecule_factor(global float *intensities, global float *flags,
kernel void diffraction(global float *diff, float k, float weight,
int w, float corner_x, float corner_y,
- float fsx, float fsy, float ssx, float ssy,
+ float fsx, float fsy, float fsz,
+ float ssx, float ssy, float ssz,
float res, float clen, float16 cell,
global float *intensities, global float *flags,
read_only image2d_t func_a,
@@ -183,7 +182,7 @@ kernel void diffraction(global float *diff, float k, float weight,
/* Get the scattering vector */
q = get_q(fs, ss, res, clen, k,
- corner_x, corner_y, fsx, fsy, ssx, ssy);
+ corner_x, corner_y, fsx, fsy, fsz, ssx, ssy, ssz);
/* Calculate the diffraction */
f_lattice = lattice_factor(cell, q, func_a, func_b, func_c);
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c
index 70f3ff58..331170ae 100644
--- a/src/diffraction-gpu.c
+++ b/src/diffraction-gpu.c
@@ -212,10 +212,12 @@ static int do_panels(struct gpu_context *gctx, struct image *image,
if ( set_arg_float(gctx, 5, p->cny) ) return 1;
if ( set_arg_float(gctx, 6, p->fsx) ) return 1;
if ( set_arg_float(gctx, 7, p->fsy) ) return 1;
- if ( set_arg_float(gctx, 8, p->ssx) ) return 1;
- if ( set_arg_float(gctx, 9, p->ssy) ) return 1;
- if ( set_arg_float(gctx, 10, p->res) ) return 1;
- if ( set_arg_float(gctx, 11, p->clen) ) return 1;
+ if ( set_arg_float(gctx, 8, p->fsz) ) return 1;
+ if ( set_arg_float(gctx, 9, p->ssx) ) return 1;
+ if ( set_arg_float(gctx, 10, p->ssy) ) return 1;
+ if ( set_arg_float(gctx, 11, p->ssz) ) return 1;
+ if ( set_arg_float(gctx, 12, p->res) ) return 1;
+ if ( set_arg_float(gctx, 13, p->clen) ) return 1;
dims[0] = p->w * sampling;
dims[1] = p->h * sampling;
@@ -223,7 +225,7 @@ static int do_panels(struct gpu_context *gctx, struct image *image,
ldims[0] = sampling;
ldims[1] = sampling;
- err = clSetKernelArg(gctx->kern, 18,
+ err = clSetKernelArg(gctx->kern, 20,
sampling*sampling*sizeof(cl_float), NULL);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set local memory: %s\n", clError(err));
@@ -305,17 +307,17 @@ int get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
cell.s[3] = bx; cell.s[4] = by; cell.s[5] = bz;
cell.s[6] = cx; cell.s[7] = cy; cell.s[8] = cz;
- err = clSetKernelArg(gctx->kern, 12, sizeof(cl_float16), &cell);
+ err = clSetKernelArg(gctx->kern, 14, sizeof(cl_float16), &cell);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set unit cell: %s\n", clError(err));
return 1;
}
- if ( set_arg_mem(gctx, 13, gctx->intensities) ) return 1;
- if ( set_arg_mem(gctx, 14, gctx->flags) ) return 1;
- if ( set_arg_mem(gctx, 15, gctx->sinc_luts[na-1]) ) return 1;
- if ( set_arg_mem(gctx, 16, gctx->sinc_luts[nb-1]) ) return 1;
- if ( set_arg_mem(gctx, 17, gctx->sinc_luts[nc-1]) ) return 1;
+ if ( set_arg_mem(gctx, 15, gctx->intensities) ) return 1;
+ if ( set_arg_mem(gctx, 16, gctx->flags) ) return 1;
+ if ( set_arg_mem(gctx, 17, gctx->sinc_luts[na-1]) ) return 1;
+ if ( set_arg_mem(gctx, 18, gctx->sinc_luts[nb-1]) ) return 1;
+ if ( set_arg_mem(gctx, 19, gctx->sinc_luts[nc-1]) ) return 1;
/* Allocate memory for the result */
image->dp = malloc(image->det->n_panels * sizeof(float *));
diff --git a/tests/gpu_sim_check.c b/tests/gpu_sim_check.c
index c471fb0f..ba51b8db 100644
--- a/tests/gpu_sim_check.c
+++ b/tests/gpu_sim_check.c
@@ -113,14 +113,16 @@ int main(int argc, char *argv[])
det->panels[0].orig_max_ss = 511;
det->panels[0].w = 1024;
det->panels[0].h = 512;
- det->panels[0].fsx = 1;
- det->panels[0].fsy = 0;
- det->panels[0].ssx = 0;
- det->panels[0].ssy = 1;
- det->panels[0].xfs = 1;
- det->panels[0].yfs = 0;
- det->panels[0].xss = 0;
- det->panels[0].yss = 1;
+ det->panels[0].fsx = 1.0;
+ det->panels[0].fsy = 0.0;
+ det->panels[0].fsz = 0.4;
+ det->panels[0].ssx = 0.0;
+ det->panels[0].ssy = 1.0;
+ det->panels[0].ssz = 0.0;
+ det->panels[0].xfs = 1.0;
+ det->panels[0].yfs = 0.0;
+ det->panels[0].xss = 0.0;
+ det->panels[0].yss = 1.0;
det->panels[0].cnx = -512.0;
det->panels[0].cny = -512.0-sep;
det->panels[0].clen = 100.0e-3;
@@ -134,14 +136,16 @@ int main(int argc, char *argv[])
det->panels[1].orig_max_ss = 1023;
det->panels[1].w = 1024;
det->panels[1].h = 512;
- det->panels[1].fsx = 1;
- det->panels[1].fsy = 0;
- det->panels[1].ssx = 0;
- det->panels[1].ssy = 1;
- det->panels[1].xfs = 1;
- det->panels[1].yfs = 0;
- det->panels[1].xss = 0;
- det->panels[1].yss = 1;
+ det->panels[1].fsx = 1.0;
+ det->panels[1].fsy = 0.0;
+ det->panels[1].fsz = 0.0;
+ det->panels[1].ssx = 0.0;
+ det->panels[1].ssy = 1.0;
+ det->panels[1].ssz = 1.4;
+ det->panels[1].xfs = 1.0;
+ det->panels[1].yfs = 0.0;
+ det->panels[1].xss = 0.0;
+ det->panels[1].yss = 1.0;
det->panels[1].cnx = -512.0;
det->panels[1].cny = sep;
det->panels[1].clen = 100.0e-3;