aboutsummaryrefslogtreecommitdiff
path: root/src/diffraction-gpu.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/diffraction-gpu.c')
-rw-r--r--src/diffraction-gpu.c596
1 files changed, 0 insertions, 596 deletions
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c
deleted file mode 100644
index 97badc36..00000000
--- a/src/diffraction-gpu.c
+++ /dev/null
@@ -1,596 +0,0 @@
-/*
- * diffraction-gpu.c
- *
- * Calculate diffraction patterns by Fourier methods (GPU version)
- *
- * Copyright © 2012-2021 Deutsches Elektronen-Synchrotron DESY,
- * a research centre of the Helmholtz Association.
- *
- * Authors:
- * 2009-2020 Thomas White <taw@physics.org>
- * 2013 Alexandra Tolstikova
- * 2013-2014 Chun Hong Yoon <chun.hong.yoon@desy.de>
- *
- * This file is part of CrystFEL.
- *
- * CrystFEL is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * CrystFEL is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with CrystFEL. If not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdlib.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
-#include <complex.h>
-
-#define CL_TARGET_OPENCL_VERSION 220
-#ifdef HAVE_CL_CL_H
-#include <CL/cl.h>
-#else
-#include <cl.h>
-#endif
-
-#include "image.h"
-#include "utils.h"
-#include "cell.h"
-#include "diffraction.h"
-#include "cl-utils.h"
-#include "pattern_sim.h"
-
-#include "diffraction.cl.h"
-
-#define SINC_LUT_ELEMENTS (4096)
-
-
-struct gpu_context
-{
- cl_context ctx;
- cl_command_queue cq;
- cl_program prog;
- cl_kernel kern;
- cl_mem intensities;
- cl_mem flags;
-
- /* Array of sinc LUTs */
- cl_mem *sinc_luts;
- cl_float **sinc_lut_ptrs;
- int max_sinc_lut; /* Number of LUTs, i.e. one greater than the maximum
- * index. This equals the highest allowable "n". */
-};
-
-
-static void check_sinc_lut(struct gpu_context *gctx, int n,
- int no_fringes, int flat)
-{
- cl_int err;
- cl_image_format fmt;
- int i;
-
- if ( n > gctx->max_sinc_lut ) {
-
- gctx->sinc_luts = realloc(gctx->sinc_luts,
- n*sizeof(*gctx->sinc_luts));
- gctx->sinc_lut_ptrs = realloc(gctx->sinc_lut_ptrs,
- n*sizeof(*gctx->sinc_lut_ptrs));
-
- for ( i=gctx->max_sinc_lut; i<n; i++ ) {
- gctx->sinc_lut_ptrs[i] = NULL;
- }
-
- gctx->max_sinc_lut = n;
- }
-
- if ( gctx->sinc_lut_ptrs[n-1] != NULL ) return;
-
- /* Create a new sinc LUT */
- gctx->sinc_lut_ptrs[n-1] = malloc(SINC_LUT_ELEMENTS*sizeof(cl_float));
- gctx->sinc_lut_ptrs[n-1][0] = n;
- if ( n == 1 ) {
- for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
- gctx->sinc_lut_ptrs[n-1][i] = 1.0;
- }
- } else {
- for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
- double x, val;
- x = (double)i/SINC_LUT_ELEMENTS;
- if ( (flat || no_fringes) && (x > 1.0/n) && (1.0-x > 1.0/n) ) {
- val = 0.0;
- } else if ( flat ) {
- val = n;
- } else {
- val = fabs(sin(M_PI*n*x)/sin(M_PI*x));
- }
- gctx->sinc_lut_ptrs[n-1][i] = val;
- }
- }
-
- fmt.image_channel_order = CL_INTENSITY;
- fmt.image_channel_data_type = CL_FLOAT;
-
- gctx->sinc_luts[n-1] = clCreateImage2D(gctx->ctx,
- CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- &fmt, SINC_LUT_ELEMENTS, 1, 0,
- gctx->sinc_lut_ptrs[n-1], &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't create LUT for %i\n", n);
- return;
- }
-}
-
-
-static int set_arg_float(struct gpu_context *gctx, int idx, float val)
-{
- cl_int err;
- err = clSetKernelArg(gctx->kern, idx, sizeof(cl_float), &val);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set kernel argument %i: %s\n",
- idx, clError(err));
- return 1;
- }
-
- return 0;
-}
-
-
-static int set_arg_int(struct gpu_context *gctx, int idx, int val)
-{
- cl_int err;
-
- err = clSetKernelArg(gctx->kern, idx, sizeof(cl_int), &val);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set kernel argument %i: %s\n",
- idx, clError(err));
- return 1;
- }
-
- return 0;
-}
-
-
-static int set_arg_mem(struct gpu_context *gctx, int idx, cl_mem val)
-{
- cl_int err;
-
- err = clSetKernelArg(gctx->kern, idx, sizeof(cl_mem), &val);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set kernel argument %i: %s\n",
- idx, clError(err));
- return 1;
- }
-
- return 0;
-}
-
-
-static int do_panels(struct gpu_context *gctx, struct image *image,
- double k, double weight,
- int *n_inf, int *n_neg, int *n_nan)
-{
- int i;
- const int sampling = 4; /* This, squared, number of samples / pixel */
-
- if ( set_arg_float(gctx, 1, k) ) return 1;
- if ( set_arg_float(gctx, 2, weight) ) return 1;
-
- /* Iterate over panels */
- for ( i=0; i<image->detgeom->n_panels; i++ ) {
-
- size_t dims[2];
- size_t ldims[2];
- struct detgeom_panel *p;
- cl_mem diff;
- size_t diff_size;
- float *diff_ptr;
- int fs, ss;
- cl_int err;
-
- p = &image->detgeom->panels[i];
-
- /* Buffer for the results of this panel */
- diff_size = p->w * p->h * sizeof(cl_float);
- diff = clCreateBuffer(gctx->ctx, CL_MEM_WRITE_ONLY,
- diff_size, NULL, &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't allocate diffraction memory\n");
- return 1;
- }
-
- if ( set_arg_mem(gctx, 0, diff) ) return 1;
-
- if ( set_arg_int(gctx, 3, p->w) ) return 1;
- if ( set_arg_float(gctx, 4, p->cnx) ) return 1;
- if ( set_arg_float(gctx, 5, p->cny) ) return 1;
- if ( set_arg_float(gctx, 6, p->fsx) ) return 1;
- if ( set_arg_float(gctx, 7, p->fsy) ) return 1;
- if ( set_arg_float(gctx, 8, p->fsz) ) return 1;
- if ( set_arg_float(gctx, 9, p->ssx) ) return 1;
- if ( set_arg_float(gctx, 10, p->ssy) ) return 1;
- if ( set_arg_float(gctx, 11, p->ssz) ) return 1;
- if ( set_arg_float(gctx, 12, 1.0/p->pixel_pitch) ) return 1;
- if ( set_arg_float(gctx, 13, p->cnz*p->pixel_pitch) ) return 1;
-
- dims[0] = p->w * sampling;
- dims[1] = p->h * sampling;
-
- ldims[0] = sampling;
- ldims[1] = sampling;
-
- err = clSetKernelArg(gctx->kern, 20,
- sampling*sampling*sizeof(cl_float), NULL);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set local memory: %s\n", clError(err));
- return 1;
- }
-
- err = clEnqueueNDRangeKernel(gctx->cq, gctx->kern, 2, NULL,
- dims, ldims, 0, NULL, NULL);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't enqueue diffraction kernel: %s\n",
- clError(err));
- return 1;
- }
-
- clFinish(gctx->cq);
-
- diff_ptr = clEnqueueMapBuffer(gctx->cq, diff, CL_TRUE,
- CL_MAP_READ, 0, diff_size,
- 0, NULL, NULL, &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't map diffraction buffer: %s\n",
- clError(err));
- return 1;
- }
-
- for ( ss=0; ss<p->h; ss++ ) {
- for ( fs=0; fs<p->w; fs++ ) {
-
- float val;
-
- val = diff_ptr[fs + p->w*ss];
- if ( isinf(val) ) (*n_inf)++;
- if ( val < 0.0 ) (*n_neg)++;
- if ( isnan(val) ) (*n_nan)++;
-
- image->dp[i][fs + p->w*ss] += val;
-
- }
- }
-
- clEnqueueUnmapMemObject(gctx->cq, diff, diff_ptr,
- 0, NULL, NULL);
-
- clReleaseMemObject(diff);
-
- }
-
- return 0;
-}
-
-
-int get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
- int na, int nb, int nc, UnitCell *ucell,
- int no_fringes, int flat, int n_samples)
-{
- double ax, ay, az;
- double bx, by, bz;
- double cx, cy, cz;
- cl_float16 cell;
- cl_int err;
- int n_inf = 0;
- int n_neg = 0;
- int n_nan = 0;
- int i;
- double kmin, kmax, step, norm;
-
- if ( gctx == NULL ) {
- ERROR("GPU setup failed.\n");
- return 1;
- }
-
- /* Ensure all required LUTs are available */
- check_sinc_lut(gctx, na, no_fringes, flat);
- check_sinc_lut(gctx, nb, no_fringes, flat);
- check_sinc_lut(gctx, nc, no_fringes, flat);
-
- /* Unit cell */
- cell_get_cartesian(ucell, &ax, &ay, &az, &bx, &by, &bz, &cx, &cy, &cz);
- cell.s[0] = ax; cell.s[1] = ay; cell.s[2] = az;
- cell.s[3] = bx; cell.s[4] = by; cell.s[5] = bz;
- cell.s[6] = cx; cell.s[7] = cy; cell.s[8] = cz;
-
- err = clSetKernelArg(gctx->kern, 14, sizeof(cl_float16), &cell);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set unit cell: %s\n", clError(err));
- return 1;
- }
-
- if ( set_arg_mem(gctx, 15, gctx->intensities) ) return 1;
- if ( set_arg_mem(gctx, 16, gctx->flags) ) return 1;
- if ( set_arg_mem(gctx, 17, gctx->sinc_luts[na-1]) ) return 1;
- if ( set_arg_mem(gctx, 18, gctx->sinc_luts[nb-1]) ) return 1;
- if ( set_arg_mem(gctx, 19, gctx->sinc_luts[nc-1]) ) return 1;
-
- /* Allocate memory for the result */
- image->dp = malloc(image->detgeom->n_panels * sizeof(float *));
- if ( image->dp == NULL ) {
- ERROR("Couldn't allocate memory for result.\n");
- return 1;
- }
- for ( i=0; i<image->detgeom->n_panels; i++ ) {
- struct detgeom_panel *p = &image->detgeom->panels[i];
- image->dp[i] = calloc(p->w * p->h, sizeof(float));
- if ( image->dp[i] == NULL ) {
- ERROR("Couldn't allocate memory for panel %i\n", i);
- return 1;
- }
- }
-
- spectrum_get_range(image->spectrum, &kmin, &kmax);
- step = (kmax-kmin)/(n_samples+1);
-
- /* Determine normalisation factor such that weights add up to 1 after
- * sampling (bins must have constant width) */
- norm = 0.0;
- for ( i=1; i<=n_samples; i++ ) {
- double k = kmin + i*step;
- norm += spectrum_get_density_at_k(image->spectrum, k);
- }
- for ( i=1; i<=n_samples; i++ ) {
-
- double k = kmin + i*step;
- double prob;
-
- /* Probability = p.d.f. times step width */
- prob = spectrum_get_density_at_k(image->spectrum, k)/norm;
-
- STATUS("Wavelength: %e m, weight = %.5f\n", 1.0/k, prob);
-
- err = do_panels(gctx, image, k, prob, &n_inf, &n_neg, &n_nan);
- if ( err ) return 1;
-
- }
-
- if ( n_neg + n_inf + n_nan ) {
- ERROR("WARNING: The GPU calculation produced %i negative"
- " values, %i infinities and %i NaNs.\n",
- n_neg, n_inf, n_nan);
- }
-
- return 0;
-}
-
-
-/* Setup the OpenCL stuff, create buffers, load the structure factor table */
-struct gpu_context *setup_gpu(int no_sfac,
- const double *intensities, unsigned char *flags,
- const char *sym, int dev_num)
-{
- struct gpu_context *gctx;
- cl_uint nplat;
- cl_platform_id platforms[8];
- cl_context_properties prop[3];
- cl_int err;
- cl_device_id dev;
- size_t intensities_size;
- float *intensities_ptr;
- size_t flags_size;
- float *flags_ptr;
- size_t maxwgsize;
- int iplat;
- int have_ctx = 0;
- char cflags[512] = "";
- char *insert_stuff = NULL;
-
- STATUS("Setting up GPU...\n");
-
- err = clGetPlatformIDs(8, platforms, &nplat);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't get platform IDs: %i\n", err);
- return NULL;
- }
- if ( nplat == 0 ) {
- ERROR("Couldn't find at least one platform!\n");
- return NULL;
- }
-
- /* Find a GPU platform in the list */
- for ( iplat=0; iplat<nplat; iplat++ ) {
-
- prop[0] = CL_CONTEXT_PLATFORM;
- prop[1] = (cl_context_properties)platforms[iplat];
- prop[2] = 0;
-
- gctx = malloc(sizeof(*gctx));
- gctx->ctx = clCreateContextFromType(prop, CL_DEVICE_TYPE_GPU,
- NULL, NULL, &err);
-
- if ( err != CL_SUCCESS ) {
- if ( err == CL_DEVICE_NOT_FOUND ) {
- /* No GPU device in this platform */
- continue; /* Try next platform */
- } else {
- ERROR("Couldn't create OpenCL context: %s (%i)\n",
- clError(err), err);
- free(gctx);
- return NULL;
- }
- } else {
- STATUS("Using OpenCL platform %i (%i total)\n",
- iplat, nplat);
- have_ctx = 1;
- break;
- }
- }
-
- if ( !have_ctx ) {
- ERROR("Couldn't find a GPU device in any platform.\n");
- return NULL;
- }
-
- dev = get_cl_dev(gctx->ctx, dev_num);
-
- gctx->cq = clCreateCommandQueue(gctx->ctx, dev, 0, &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't create OpenCL command queue\n");
- free(gctx);
- return NULL;
- }
-
- /* Create a single-precision version of the scattering factors */
- intensities_size = IDIM*IDIM*IDIM*sizeof(cl_float);
- intensities_ptr = malloc(intensities_size);
- if ( intensities != NULL ) {
- int i;
- for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
- intensities_ptr[i] = intensities[i];
- }
- } else {
- int i;
- for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
- intensities_ptr[i] = 100.0; /* Does nothing */
- }
- strncat(cflags, "-DFLAT_INTENSITIES ", 511-strlen(cflags));
- }
- gctx->intensities = clCreateBuffer(gctx->ctx,
- CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- intensities_size, intensities_ptr, &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't allocate intensities memory\n");
- free(gctx);
- return NULL;
- }
- free(intensities_ptr);
-
- if ( sym != NULL ) {
-
- int i, n;
- SymOpList *pg;
- size_t islen = 0;
-
- insert_stuff = malloc(16384);
- if ( insert_stuff == NULL ) return NULL;
- insert_stuff[0] = '\0';
-
- pg = get_pointgroup(sym);
- n = num_equivs(pg, NULL);
- for ( i=0; i<n; i++ ) {
-
- IntegerMatrix *op = get_symop(pg, NULL, i);
- char line[1024];
-
- snprintf(line, 1023,
- "val += lookup_flagged_intensity(intensities, "
- "flags, %s, %s, %s);\n\t",
- get_matrix_name(op, 0),
- get_matrix_name(op, 1),
- get_matrix_name(op, 2));
-
- islen += strlen(line);
- if ( islen > 16383 ) {
- ERROR("Too many symmetry operators.\n");
- return NULL;
- }
- strcat(insert_stuff, line);
-
- }
-
- free_symoplist(pg);
-
- printf("Inserting --->%s<---\n", insert_stuff);
-
- } else {
- if ( intensities != NULL ) {
- ERROR("You gave me an intensities file but no point"
- " group. I'm assuming '1'.\n");
- strncat(cflags, "-DPG1 ", 511-strlen(cflags));
- }
- }
-
- /* Create a flag array */
- flags_size = IDIM*IDIM*IDIM*sizeof(cl_float);
- flags_ptr = malloc(flags_size);
- if ( flags != NULL ) {
- int i;
- for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
- flags_ptr[i] = flags[i];
- }
- } else {
- int i;
- for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
- flags_ptr[i] = 1.0;
- }
- }
- gctx->flags = clCreateBuffer(gctx->ctx,
- CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- flags_size, flags_ptr, &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't allocate flag buffer\n");
- free(gctx);
- return NULL;
- }
- free(flags_ptr);
-
- gctx->prog = load_program_from_string((char *)data_diffraction_cl,
- data_diffraction_cl_len, gctx->ctx,
- dev, &err, cflags, insert_stuff);
- if ( err != CL_SUCCESS ) {
- free(gctx);
- return NULL;
- }
-
- gctx->kern = clCreateKernel(gctx->prog, "diffraction", &err);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't create kernel\n");
- free(gctx);
- return NULL;
- }
-
- gctx->max_sinc_lut = 0;
- gctx->sinc_lut_ptrs = NULL;
- gctx->sinc_luts = NULL;
-
- clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE,
- sizeof(size_t), &maxwgsize, NULL);
- STATUS("Maximum work group size = %lli\n", (long long int)maxwgsize);
-
- return gctx;
-}
-
-
-void cleanup_gpu(struct gpu_context *gctx)
-{
- int i;
-
- clReleaseProgram(gctx->prog);
- clReleaseMemObject(gctx->intensities);
-
- /* Release LUTs */
- for ( i=1; i<=gctx->max_sinc_lut; i++ ) {
- if ( gctx->sinc_lut_ptrs[i-1] != NULL ) {
- clReleaseMemObject(gctx->sinc_luts[i-1]);
- free(gctx->sinc_lut_ptrs[i-1]);
- }
- }
-
- free(gctx->sinc_luts);
- free(gctx->sinc_lut_ptrs);
-
- clReleaseCommandQueue(gctx->cq);
- clReleaseContext(gctx->ctx);
- free(gctx);
-}