diff options
-rw-r--r-- | data/diffraction.cl | 15 | ||||
-rw-r--r-- | src/diffraction-gpu.c | 216 | ||||
-rw-r--r-- | src/pattern_sim.c | 2 |
3 files changed, 141 insertions, 92 deletions
diff --git a/data/diffraction.cl b/data/diffraction.cl index f6318378..d128446e 100644 --- a/data/diffraction.cl +++ b/data/diffraction.cl @@ -33,7 +33,7 @@ const sampler_t sampler_c = CLK_NORMALIZED_COORDS_TRUE float4 get_q(float fs, float ss, float res, float clen, float k, - float *ttp, float corner_x, float corner_y, + float corner_x, float corner_y, float fsx, float fsy, float ssx, float ssy) { float rx, ry, r; @@ -51,7 +51,6 @@ float4 get_q(float fs, float ss, float res, float clen, float k, r = sqrt(pow(rx, 2.0f) + pow(ry, 2.0f)); tt = atan2(r, clen); - *ttp = tt; az = atan2(ry, rx); @@ -211,7 +210,7 @@ float molecule_factor(global float *intensities, global float *flags, } -kernel void diffraction(global float *diff, global float *ttp, float k, +kernel void diffraction(global float *diff, float k, int w, float corner_x, float corner_y, float res, float clen, float16 cell, global float *intensities, @@ -219,7 +218,8 @@ kernel void diffraction(global float *diff, global float *ttp, float k, read_only image2d_t func_b, read_only image2d_t func_c, global float *flags, - float fsx, float fsy, float ssx, float ssy) + float fsx, float fsy, float ssx, float ssy, + float xo, float yo) { float tt; float fs, ss; @@ -230,11 +230,11 @@ kernel void diffraction(global float *diff, global float *ttp, float k, int idx; /* Calculate fractional coordinates in fs/ss */ - fs = convert_float(get_global_id(0)); - ss = convert_float(get_global_id(1)); + fs = convert_float(get_global_id(0)) + xo; + ss = convert_float(get_global_id(1)) + yo; /* Get the scattering vector */ - q = get_q(fs, ss, res, clen, k, &tt, + q = get_q(fs, ss, res, clen, k, corner_x, corner_y, fsx, fsy, ssx, ssy); /* Calculate the diffraction */ @@ -245,5 +245,4 @@ kernel void diffraction(global float *diff, global float *ttp, float k, /* Write the value to memory */ idx = convert_int_rtz(fs) + w*convert_int_rtz(ss); diff[idx] = I_molecule * I_lattice; - ttp[idx] = tt; } diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c index 2b8e97e5..15ff39cc 100644 --- a/src/diffraction-gpu.c +++ b/src/diffraction-gpu.c @@ -167,57 +167,13 @@ static int set_arg_mem(struct gpu_context *gctx, int idx, cl_mem val) } -void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, - int na, int nb, int nc, UnitCell *ucell) +static void do_panels(struct gpu_context *gctx, struct image *image, + int *n_inf, int *n_neg, int *n_nan, double k, double frac, + double xo, double yo) { - cl_int err; - double ax, ay, az; - double bx, by, bz; - double cx, cy, cz; int i; - cl_float16 cell; - cl_int4 ncells; - int n_inf = 0; - int n_neg = 0; - int n_nan = 0; - if ( gctx == NULL ) { - ERROR("GPU setup failed.\n"); - return; - } - - cell_get_cartesian(ucell, &ax, &ay, &az, &bx, &by, &bz, &cx, &cy, &cz); - cell.s[0] = ax; cell.s[1] = ay; cell.s[2] = az; - cell.s[3] = bx; cell.s[4] = by; cell.s[5] = bz; - cell.s[6] = cx; cell.s[7] = cy; cell.s[8] = cz; - - ncells.s[0] = na; - ncells.s[1] = nb; - ncells.s[2] = nc; - ncells.s[3] = 0; /* unused */ - - /* Ensure all required LUTs are available */ - check_sinc_lut(gctx, na); - check_sinc_lut(gctx, nb); - check_sinc_lut(gctx, nc); - - if ( set_arg_float(gctx, 2, 1.0/image->lambda) ) return; - if ( set_arg_mem(gctx, 9, gctx->intensities) ) return; - if ( set_arg_mem(gctx, 10, gctx->sinc_luts[na-1]) ) return; - if ( set_arg_mem(gctx, 11, gctx->sinc_luts[nb-1]) ) return; - if ( set_arg_mem(gctx, 12, gctx->sinc_luts[nc-1]) ) return; - if ( set_arg_mem(gctx, 13, gctx->flags) ) return; - - /* Unit cell */ - err = clSetKernelArg(gctx->kern, 8, sizeof(cl_float16), &cell); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set unit cell: %s\n", clError(err)); - return; - } - - /* Allocate memory for the result */ - image->data = calloc(image->width * image->height, sizeof(float)); - image->twotheta = calloc(image->width * image->height, sizeof(double)); + if ( set_arg_float(gctx, 1, k) ) return; /* Iterate over panels */ for ( i=0; i<image->det->n_panels; i++ ) { @@ -225,14 +181,12 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, size_t dims[2]; size_t ldims[2] = {1, 1}; struct panel *p; - cl_mem tt; - size_t tt_size; cl_mem diff; size_t diff_size; float *diff_ptr; - float *tt_ptr; int pan_width, pan_height; int fs, ss; + cl_int err; p = &image->det->panels[i]; @@ -247,25 +201,19 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, ERROR("Couldn't allocate diffraction memory\n"); return; } - tt_size = pan_width * pan_height * sizeof(cl_float); - tt = clCreateBuffer(gctx->ctx, CL_MEM_WRITE_ONLY, tt_size, - NULL, &err); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't allocate twotheta memory\n"); - return; - } if ( set_arg_mem(gctx, 0, diff) ) return; - if ( set_arg_mem(gctx, 1, tt) ) return; - if ( set_arg_int(gctx, 3, pan_width) ) return; - if ( set_arg_float(gctx, 4, p->cnx) ) return; - if ( set_arg_float(gctx, 5, p->cny) ) return; - if ( set_arg_float(gctx, 6, p->res) ) return; - if ( set_arg_float(gctx, 7, p->clen) ) return; - if ( set_arg_float(gctx, 14, p->fsx) ) return; - if ( set_arg_float(gctx, 15, p->fsy) ) return; - if ( set_arg_float(gctx, 16, p->ssx) ) return; - if ( set_arg_float(gctx, 17, p->ssy) ) return; + if ( set_arg_int(gctx, 2, pan_width) ) return; + if ( set_arg_float(gctx, 3, p->cnx) ) return; + if ( set_arg_float(gctx, 4, p->cny) ) return; + if ( set_arg_float(gctx, 5, p->res) ) return; + if ( set_arg_float(gctx, 6, p->clen) ) return; + if ( set_arg_float(gctx, 13, p->fsx) ) return; + if ( set_arg_float(gctx, 14, p->fsy) ) return; + if ( set_arg_float(gctx, 15, p->ssx) ) return; + if ( set_arg_float(gctx, 16, p->ssy) ) return; + if ( set_arg_float(gctx, 17, xo) ) return; + if ( set_arg_float(gctx, 18, yo) ) return; dims[0] = pan_width; dims[1] = pan_height; @@ -288,43 +236,124 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, clError(err)); return; } - tt_ptr = clEnqueueMapBuffer(gctx->cq, tt, CL_TRUE, CL_MAP_READ, - 0, tt_size, 0, NULL, NULL, &err); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't map tt buffer\n"); - return; - } for ( fs=0; fs<pan_width; fs++ ) { for ( ss=0; ss<pan_height; ss++ ) { - float val, tt; + float val; int tfs, tss; val = diff_ptr[fs + pan_width*ss]; - if ( isinf(val) ) n_inf++; - if ( val < 0.0 ) n_neg++; - if ( isnan(val) ) n_nan++; - tt = tt_ptr[fs + pan_width*ss]; + if ( isinf(val) ) (*n_inf)++; + if ( val < 0.0 ) (*n_neg)++; + if ( isnan(val) ) (*n_nan)++; tfs = p->min_fs + fs; tss = p->min_ss + ss; - image->data[tfs + image->width*tss] = val; - image->twotheta[tfs + image->width*tss] = tt; + image->data[tfs + image->width*tss] += frac*val; } } clEnqueueUnmapMemObject(gctx->cq, diff, diff_ptr, 0, NULL, NULL); - clEnqueueUnmapMemObject(gctx->cq, tt, tt_ptr, - 0, NULL, NULL); clReleaseMemObject(diff); - clReleaseMemObject(tt); } +} + + +void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, + int na, int nb, int nc, UnitCell *ucell) +{ + double ax, ay, az; + double bx, by, bz; + double cx, cy, cz; + cl_float16 cell; + cl_int4 ncells; + cl_int err; + int n_inf = 0; + int n_neg = 0; + int n_nan = 0; + int fs, ss; + const int nkstep = 10; + const int nxs = 4; + const int nys = 4; + int kstep; + double klow, khigh, kinc, w; + double frac; + int xs, ys; + int n, ntot; + + if ( gctx == NULL ) { + ERROR("GPU setup failed.\n"); + return; + } + cell_get_cartesian(ucell, &ax, &ay, &az, &bx, &by, &bz, &cx, &cy, &cz); + cell.s[0] = ax; cell.s[1] = ay; cell.s[2] = az; + cell.s[3] = bx; cell.s[4] = by; cell.s[5] = bz; + cell.s[6] = cx; cell.s[7] = cy; cell.s[8] = cz; + + ncells.s[0] = na; + ncells.s[1] = nb; + ncells.s[2] = nc; + ncells.s[3] = 0; /* unused */ + + /* Ensure all required LUTs are available */ + check_sinc_lut(gctx, na); + check_sinc_lut(gctx, nb); + check_sinc_lut(gctx, nc); + + if ( set_arg_mem(gctx, 8, gctx->intensities) ) return; + if ( set_arg_mem(gctx, 9, gctx->sinc_luts[na-1]) ) return; + if ( set_arg_mem(gctx, 10, gctx->sinc_luts[nb-1]) ) return; + if ( set_arg_mem(gctx, 11, gctx->sinc_luts[nc-1]) ) return; + if ( set_arg_mem(gctx, 12, gctx->flags) ) return; + + /* Unit cell */ + err = clSetKernelArg(gctx->kern, 7, sizeof(cl_float16), &cell); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set unit cell: %s\n", clError(err)); + return; + } + + /* Allocate memory for the result */ + image->data = calloc(image->width * image->height, sizeof(float)); + + w = image->lambda * image->bw; + klow = 1.0 / (image->lambda + (w/2.0)); /* Smallest k */ + khigh = 1.0 / (image->lambda - (w/2.0)); /* Largest k */ + kinc = (khigh-klow) / (nkstep+1); + + ntot = nkstep * nxs * nys; + frac = 1.0 / ntot; + + n = 0; + for ( xs=0; xs<nxs; xs++ ) { + for ( ys=0; ys<nys; ys++ ) { + + double xo, yo; + + xo = (1.0/nxs) * xs; + yo = (1.0/nys) * ys; + + for ( kstep=0; kstep<nkstep; kstep++ ) { + + double k; + + k = klow + kstep*kinc; + + do_panels(gctx, image, &n_inf, &n_neg, &n_nan, k, frac, + xo, yo); + + n++; + progress_bar(n, ntot, "Simulating"); + + } + } + } if ( n_neg + n_inf + n_nan ) { ERROR("WARNING: The GPU calculation produced %i negative" @@ -332,6 +361,25 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, n_neg, n_inf, n_nan); } + /* Calculate "2theta" values for detector geometry */ + image->twotheta = calloc(image->width * image->height, sizeof(double)); + for ( fs=0; fs<image->width; fs++ ) { + for ( ss=0; ss<image->height; ss++ ) { + + struct rvec q; + double twotheta, k; + int idx; + + /* Calculate k this time round */ + k = 1.0/image->lambda; + + q = get_q(image, fs, ss, &twotheta, k); + + idx = fs + image->width*ss; + image->twotheta[idx] = twotheta; + + } + } } diff --git a/src/pattern_sim.c b/src/pattern_sim.c index 03b009b4..ea8a317f 100644 --- a/src/pattern_sim.c +++ b/src/pattern_sim.c @@ -497,6 +497,8 @@ int main(int argc, char *argv[]) image.width = image.det->max_fs + 1; image.height = image.det->max_ss + 1; image.lambda = ph_en_to_lambda(eV_to_J(image.beam->photon_energy)); + image.bw = image.beam->bandwidth; + image.div = image.beam->divergence; /* Load unit cell */ input_cell = load_cell_from_pdb(filename); |