aboutsummaryrefslogtreecommitdiff
path: root/src/diffraction-gpu.c
diff options
context:
space:
mode:
authorThomas White <taw@physics.org>2011-11-15 12:17:59 +0100
committerThomas White <taw@physics.org>2012-02-22 15:27:40 +0100
commit469efb904b59f137ac9e85e5ff23edd0c113de5c (patch)
tree71fab5f5715ec9f88984450cdabb592cd49dd46d /src/diffraction-gpu.c
parent38089071300b8e04ed42236dd08d9055094fb3b8 (diff)
Move a load more stuff into libcrystfel
Diffstat (limited to 'src/diffraction-gpu.c')
-rw-r--r--src/diffraction-gpu.c529
1 files changed, 0 insertions, 529 deletions
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c
deleted file mode 100644
index 605b1514..00000000
--- a/src/diffraction-gpu.c
+++ /dev/null
@@ -1,529 +0,0 @@
-/*
- * diffraction-gpu.c
- *
- * Calculate diffraction patterns by Fourier methods (GPU version)
- *
- * (c) 2006-2011 Thomas White <taw@physics.org>
- *
- * Part of CrystFEL - crystallography with a FEL
- *
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdlib.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
-#include <complex.h>
-
-#ifdef HAVE_CL_CL_H
-#include <CL/cl.h>
-#else
-#include <cl.h>
-#endif
-
-#include "image.h"
-#include "utils.h"
-#include "cell.h"
-#include "diffraction.h"
-#include "cl-utils.h"
-#include "beam-parameters.h"
-
-
-#define SAMPLING (4)
-#define BWSAMPLING (10)
-#define DIVSAMPLING (1)
-#define SINC_LUT_ELEMENTS (4096)
-
-
-struct gpu_context
-{
- cl_context ctx;
- cl_command_queue cq;
- cl_program prog;
- cl_kernel kern;
- cl_mem intensities;
- cl_mem flags;
-
- /* Array of sinc LUTs */
- cl_mem *sinc_luts;
- cl_float **sinc_lut_ptrs;
- int max_sinc_lut; /* Number of LUTs, i.e. one greater than the maximum
- * index. This equals the highest allowable "n". */
-};
-
-
-static void check_sinc_lut(struct gpu_context *gctx, int n)
-{
- cl_int err;
- cl_image_format fmt;
- int i;
-
- if ( n > gctx->max_sinc_lut ) {
-
- gctx->sinc_luts = realloc(gctx->sinc_luts,
- n*sizeof(*gctx->sinc_luts));
- gctx->sinc_lut_ptrs = realloc(gctx->sinc_lut_ptrs,
- n*sizeof(*gctx->sinc_lut_ptrs));
-
- for ( i=gctx->max_sinc_lut; i<n; i++ ) {
- gctx->sinc_lut_ptrs[i] = NULL;
- }
-
- gctx->max_sinc_lut = n;
- }
-
- if ( gctx->sinc_lut_ptrs[n-1] != NULL ) return;
-
- /* Create a new sinc LUT */
- gctx->sinc_lut_ptrs[n-1] = malloc(SINC_LUT_ELEMENTS*sizeof(cl_float));
- gctx->sinc_lut_ptrs[n-1][0] = n;
- if ( n == 1 ) {
- for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
- gctx->sinc_lut_ptrs[n-1][i] = 1.0;
- }
- } else {
- for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
- double x, val;
- x = (double)i/SINC_LUT_ELEMENTS;
- val = fabs(sin(M_PI*n*x)/sin(M_PI*x));
- gctx->sinc_lut_ptrs[n-1][i] = val;
- }
- }
-
- fmt.image_channel_order = CL_INTENSITY;
- fmt.image_channel_data_type = CL_FLOAT;
-
- gctx->sinc_luts[n-1] = clCreateImage2D(gctx->ctx,
- CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- &fmt, SINC_LUT_ELEMENTS, 1, 0,
- gctx->sinc_lut_ptrs[n-1], &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't create LUT for %i\n", n);
- return;
- }
-}
-
-
-static int set_arg_float(struct gpu_context *gctx, int idx, float val)
-{
- cl_int err;
- err = clSetKernelArg(gctx->kern, idx, sizeof(cl_float), &val);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set kernel argument %i: %s\n",
- idx, clError(err));
- return 1;
- }
-
- return 0;
-}
-
-
-static int set_arg_int(struct gpu_context *gctx, int idx, int val)
-{
- cl_int err;
-
- err = clSetKernelArg(gctx->kern, idx, sizeof(cl_int), &val);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set kernel argument %i: %s\n",
- idx, clError(err));
- return 1;
- }
-
- return 0;
-}
-
-
-static int set_arg_mem(struct gpu_context *gctx, int idx, cl_mem val)
-{
- cl_int err;
-
- err = clSetKernelArg(gctx->kern, idx, sizeof(cl_mem), &val);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set kernel argument %i: %s\n",
- idx, clError(err));
- return 1;
- }
-
- return 0;
-}
-
-
-void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
- int na, int nb, int nc, UnitCell *ucell)
-{
- cl_int err;
- double ax, ay, az;
- double bx, by, bz;
- double cx, cy, cz;
- float klow, khigh;
- int i;
- cl_float16 cell;
- cl_int4 ncells;
- const int sampling = SAMPLING;
- cl_float bwstep;
- int n_inf = 0;
- int n_neg = 0;
- cl_float divxlow, divxstep;
- cl_float divylow, divystep;
- int n_nan = 0;
- int sprod;
-
- if ( gctx == NULL ) {
- ERROR("GPU setup failed.\n");
- return;
- }
-
- cell_get_cartesian(ucell, &ax, &ay, &az, &bx, &by, &bz, &cx, &cy, &cz);
- cell.s[0] = ax; cell.s[1] = ay; cell.s[2] = az;
- cell.s[3] = bx; cell.s[4] = by; cell.s[5] = bz;
- cell.s[6] = cx; cell.s[7] = cy; cell.s[8] = cz;
-
- /* Calculate wavelength */
- klow = 1.0/(image->lambda*(1.0 + image->beam->bandwidth/2.0));
- khigh = 1.0/(image->lambda*(1.0 - image->beam->bandwidth/2.0));
- bwstep = (khigh-klow) / BWSAMPLING;
-
- /* Calculate divergence stuff */
- divxlow = -image->beam->divergence/2.0;
- divylow = -image->beam->divergence/2.0;
- divxstep = image->beam->divergence / DIVSAMPLING;
- divystep = image->beam->divergence / DIVSAMPLING;
-
- ncells.s[0] = na;
- ncells.s[1] = nb;
- ncells.s[2] = nc;
- ncells.s[3] = 0; /* unused */
-
- /* Ensure all required LUTs are available */
- check_sinc_lut(gctx, na);
- check_sinc_lut(gctx, nb);
- check_sinc_lut(gctx, nc);
-
- if ( set_arg_float(gctx, 2, klow) ) return;
- if ( set_arg_mem(gctx, 9, gctx->intensities) ) return;
- if ( set_arg_int(gctx, 12, sampling) ) return;
- if ( set_arg_float(gctx, 14, bwstep) ) return;
- if ( set_arg_mem(gctx, 15, gctx->sinc_luts[na-1]) ) return;
- if ( set_arg_mem(gctx, 16, gctx->sinc_luts[nb-1]) ) return;
- if ( set_arg_mem(gctx, 17, gctx->sinc_luts[nc-1]) ) return;
- if ( set_arg_mem(gctx, 18, gctx->flags) ) return;
- if ( set_arg_float(gctx, 23, divxlow) ) return;
- if ( set_arg_float(gctx, 24, divxstep) ) return;
- if ( set_arg_int(gctx, 25, DIVSAMPLING) ) return;
- if ( set_arg_float(gctx, 26, divylow) ) return;
- if ( set_arg_float(gctx, 27, divystep) ) return;
- if ( set_arg_int(gctx, 28, DIVSAMPLING) ) return;
-
- /* Unit cell */
- err = clSetKernelArg(gctx->kern, 8, sizeof(cl_float16), &cell);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set unit cell: %s\n", clError(err));
- return;
- }
-
- /* Local memory for reduction */
- sprod = BWSAMPLING*SAMPLING*SAMPLING*DIVSAMPLING*DIVSAMPLING;
- err = clSetKernelArg(gctx->kern, 13, sprod*sizeof(cl_float), NULL);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set local memory: %s\n", clError(err));
- return;
- }
-
- /* Allocate memory for the result */
- image->data = calloc(image->width * image->height, sizeof(float));
- image->twotheta = calloc(image->width * image->height, sizeof(double));
-
- /* Iterate over panels */
- for ( i=0; i<image->det->n_panels; i++ ) {
-
- size_t dims[3];
- size_t ldims[3] = {SAMPLING, SAMPLING,
- BWSAMPLING * DIVSAMPLING * DIVSAMPLING};
- struct panel *p;
- cl_mem tt;
- size_t tt_size;
- cl_mem diff;
- size_t diff_size;
- float *diff_ptr;
- float *tt_ptr;
- int pan_width, pan_height;
- int fs, ss;
-
- p = &image->det->panels[i];
-
- pan_width = 1 + p->max_fs - p->min_fs;
- pan_height = 1 + p->max_ss - p->min_ss;
-
- /* Buffer for the results of this panel */
- diff_size = pan_width * pan_height * sizeof(cl_float);
- diff = clCreateBuffer(gctx->ctx, CL_MEM_WRITE_ONLY,
- diff_size, NULL, &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't allocate diffraction memory\n");
- return;
- }
- tt_size = pan_width * pan_height * sizeof(cl_float);
- tt = clCreateBuffer(gctx->ctx, CL_MEM_WRITE_ONLY, tt_size,
- NULL, &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't allocate twotheta memory\n");
- return;
- }
-
- if ( set_arg_mem(gctx, 0, diff) ) return;
- if ( set_arg_mem(gctx, 1, tt) ) return;
- if ( set_arg_int(gctx, 3, pan_width) ) return;
- if ( set_arg_float(gctx, 4, p->cnx) ) return;
- if ( set_arg_float(gctx, 5, p->cny) ) return;
- if ( set_arg_float(gctx, 6, p->res) ) return;
- if ( set_arg_float(gctx, 7, p->clen) ) return;
- if ( set_arg_int(gctx, 10, p->min_fs) ) return;
- if ( set_arg_int(gctx, 11, p->min_ss) ) return;
- if ( set_arg_float(gctx, 19, p->fsx) ) return;
- if ( set_arg_float(gctx, 20, p->fsy) ) return;
- if ( set_arg_float(gctx, 21, p->ssx) ) return;
- if ( set_arg_float(gctx, 22, p->ssy) ) return;
-
- dims[0] = pan_width * SAMPLING;
- dims[1] = pan_height * SAMPLING;
- dims[2] = BWSAMPLING * DIVSAMPLING * DIVSAMPLING;
-
- err = clEnqueueNDRangeKernel(gctx->cq, gctx->kern, 3, NULL,
- dims, ldims, 0, NULL, NULL);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't enqueue diffraction kernel: %s\n",
- clError(err));
- return;
- }
-
- clFinish(gctx->cq);
-
- diff_ptr = clEnqueueMapBuffer(gctx->cq, diff, CL_TRUE,
- CL_MAP_READ, 0, diff_size,
- 0, NULL, NULL, &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't map diffraction buffer: %s\n",
- clError(err));
- return;
- }
- tt_ptr = clEnqueueMapBuffer(gctx->cq, tt, CL_TRUE, CL_MAP_READ,
- 0, tt_size, 0, NULL, NULL, &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't map tt buffer\n");
- return;
- }
-
- for ( fs=0; fs<pan_width; fs++ ) {
- for ( ss=0; ss<pan_height; ss++ ) {
-
- float val, tt;
- int tfs, tss;
-
- val = diff_ptr[fs + pan_width*ss];
- if ( isinf(val) ) n_inf++;
- if ( val < 0.0 ) n_neg++;
- if ( isnan(val) ) n_nan++;
- tt = tt_ptr[fs + pan_width*ss];
-
- tfs = p->min_fs + fs;
- tss = p->min_ss + ss;
- image->data[tfs + image->width*tss] = val;
- image->twotheta[tfs + image->width*tss] = tt;
-
- }
- }
-
- clEnqueueUnmapMemObject(gctx->cq, diff, diff_ptr,
- 0, NULL, NULL);
- clEnqueueUnmapMemObject(gctx->cq, tt, tt_ptr,
- 0, NULL, NULL);
-
- clReleaseMemObject(diff);
- clReleaseMemObject(tt);
-
- }
-
-
- if ( n_neg + n_inf + n_nan ) {
- ERROR("WARNING: The GPU calculation produced %i negative"
- " values, %i infinities and %i NaNs.\n",
- n_neg, n_inf, n_nan);
- }
-
-}
-
-
-/* Setup the OpenCL stuff, create buffers, load the structure factor table */
-struct gpu_context *setup_gpu(int no_sfac,
- const double *intensities, unsigned char *flags,
- const char *sym, int dev_num)
-{
- struct gpu_context *gctx;
- cl_uint nplat;
- cl_platform_id platforms[8];
- cl_context_properties prop[3];
- cl_int err;
- cl_device_id dev;
- size_t intensities_size;
- float *intensities_ptr;
- size_t flags_size;
- float *flags_ptr;
- size_t maxwgsize;
- int i;
- char cflags[512] = "";
-
- STATUS("Setting up GPU...\n");
-
- err = clGetPlatformIDs(8, platforms, &nplat);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't get platform IDs: %i\n", err);
- return NULL;
- }
- if ( nplat == 0 ) {
- ERROR("Couldn't find at least one platform!\n");
- return NULL;
- }
- prop[0] = CL_CONTEXT_PLATFORM;
- prop[1] = (cl_context_properties)platforms[0];
- prop[2] = 0;
-
- gctx = malloc(sizeof(*gctx));
- gctx->ctx = clCreateContextFromType(prop, CL_DEVICE_TYPE_GPU,
- NULL, NULL, &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't create OpenCL context: %i\n", err);
- free(gctx);
- return NULL;
- }
-
- dev = get_cl_dev(gctx->ctx, dev_num);
-
- gctx->cq = clCreateCommandQueue(gctx->ctx, dev, 0, &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't create OpenCL command queue\n");
- free(gctx);
- return NULL;
- }
-
- /* Create a single-precision version of the scattering factors */
- intensities_size = IDIM*IDIM*IDIM*sizeof(cl_float);
- intensities_ptr = malloc(intensities_size);
- if ( intensities != NULL ) {
- for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
- intensities_ptr[i] = intensities[i];
- }
- } else {
- for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
- intensities_ptr[i] = 1e5;
- }
- strncat(cflags, "-DFLAT_INTENSITIES ", 511-strlen(cflags));
- }
- gctx->intensities = clCreateBuffer(gctx->ctx,
- CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- intensities_size, intensities_ptr, &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't allocate intensities memory\n");
- free(gctx);
- return NULL;
- }
- free(intensities_ptr);
-
- if ( sym != NULL ) {
- if ( strcmp(sym, "1") == 0 ) {
- strncat(cflags, "-DPG1 ", 511-strlen(cflags));
- } else if ( strcmp(sym, "-1") == 0 ) {
- strncat(cflags, "-DPG1BAR ", 511-strlen(cflags));
- } else if ( strcmp(sym, "6/mmm") == 0 ) {
- strncat(cflags, "-DPG6MMM ", 511-strlen(cflags));
- } else if ( strcmp(sym, "6") == 0 ) {
- strncat(cflags, "-DPG6 ", 511-strlen(cflags));
- } else if ( strcmp(sym, "6/m") == 0 ) {
- strncat(cflags, "-DPG6M ", 511-strlen(cflags));
- } else {
- ERROR("Sorry! Point group '%s' is not currently"
- " supported on the GPU."
- " I'm using '1' instead.\n", sym);
- strncat(cflags, "-DPG1 ", 511-strlen(cflags));
- }
- } else {
- if ( intensities != NULL ) {
- ERROR("You gave me an intensities file but no point"
- " group. I'm assuming '1'.\n");
- strncat(cflags, "-DPG1 ", 511-strlen(cflags));
- }
- }
-
- /* Create a flag array */
- flags_size = IDIM*IDIM*IDIM*sizeof(cl_float);
- flags_ptr = malloc(flags_size);
- if ( flags != NULL ) {
- for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
- flags_ptr[i] = flags[i];
- }
- } else {
- for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
- flags_ptr[i] = 1.0;
- }
- }
- gctx->flags = clCreateBuffer(gctx->ctx,
- CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- flags_size, flags_ptr, &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't allocate flag buffer\n");
- free(gctx);
- return NULL;
- }
- free(flags_ptr);
-
- gctx->prog = load_program(DATADIR"/crystfel/diffraction.cl", gctx->ctx,
- dev, &err, cflags);
- if ( err != CL_SUCCESS ) {
- free(gctx);
- return NULL;
- }
-
- gctx->kern = clCreateKernel(gctx->prog, "diffraction", &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't create kernel\n");
- free(gctx);
- return NULL;
- }
-
- gctx->max_sinc_lut = 0;
- gctx->sinc_lut_ptrs = NULL;
- gctx->sinc_luts = NULL;
-
- clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE,
- sizeof(size_t), &maxwgsize, NULL);
- STATUS("Maximum work group size = %lli\n", (long long int)maxwgsize);
-
- return gctx;
-}
-
-
-void cleanup_gpu(struct gpu_context *gctx)
-{
- int i;
-
- clReleaseProgram(gctx->prog);
- clReleaseMemObject(gctx->intensities);
-
- /* Release LUTs */
- for ( i=1; i<=gctx->max_sinc_lut; i++ ) {
- if ( gctx->sinc_lut_ptrs[i-1] != NULL ) {
- clReleaseMemObject(gctx->sinc_luts[i-1]);
- free(gctx->sinc_lut_ptrs[i-1]);
- }
- }
-
- free(gctx->sinc_luts);
- free(gctx->sinc_lut_ptrs);
-
- clReleaseCommandQueue(gctx->cq);
- clReleaseContext(gctx->ctx);
- free(gctx);
-}