From 0f6ebe5649902dea9456c6598bc28e694c422336 Mon Sep 17 00:00:00 2001 From: Thomas White Date: Tue, 9 Feb 2010 17:24:16 +0100 Subject: Use OpenCL for much faster diffraction calculation --- configure.ac | 15 ++- data/Makefile.am | 2 +- data/diffraction.cl | 106 +++++++++++++++++++ src/Makefile.am | 4 + src/diffraction-gpu.c | 286 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/diffraction-gpu.h | 34 ++++++ src/diffraction.c | 6 +- src/image.h | 4 +- src/pattern_sim.c | 16 ++- 9 files changed, 464 insertions(+), 9 deletions(-) create mode 100644 data/diffraction.cl create mode 100644 src/diffraction-gpu.c create mode 100644 src/diffraction-gpu.h diff --git a/configure.ac b/configure.ac index 49a90a8a..fe58ccbd 100644 --- a/configure.ac +++ b/configure.ac @@ -30,13 +30,24 @@ AC_ARG_WITH(gsl, GSL_LIBS="-L$withval/lib -lgsl -lgslcblas"], [GSL_LIBS="-lgsl -lgslcblas"]) +AC_MSG_CHECKING([whether to use OpenCL]) +AC_ARG_ENABLE(opencl, +[AS_HELP_STRING([--enable-opencl], [Enable the use of OpenCL])], +[OPENCL_CFLAGS="" + OPENCL_LIBS="-lOpenCL" + AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_OPENCL], [1], [Define to 1 if OpenCL is available]) + have_opencl=true], +[AC_MSG_RESULT([no])]) + havegtk=false AM_PATH_GTK_2_0(2.0.0, havegtk=true, AC_MSG_WARN([GTK not found. hdfsee will not be built.])) AM_CONDITIONAL([HAVE_GTK], test x$havegtk = xtrue) +AM_CONDITIONAL([HAVE_OPENCL], test x$have_opencl = xtrue) -CFLAGS="$CFLAGS $HDF5_CFLAGS $GTK_CFLAGS $GSL_CFLAGS" -LIBS="$LIBS $HDF5_LIBS -lm -lz $GSL_LIBS $GTK_LIBS -lgthread-2.0 -lutil" +CFLAGS="$CFLAGS $HDF5_CFLAGS $GTK_CFLAGS $GSL_CFLAGS $OPENCL_CFLAGS" +LIBS="$LIBS $HDF5_LIBS -lm -lz $GSL_LIBS $GTK_LIBS $OPENCL_LIBS -lgthread-2.0 -lutil" AC_OUTPUT(Makefile src/Makefile data/Makefile) diff --git a/data/Makefile.am b/data/Makefile.am index b4cebb2d..b55597f0 100644 --- a/data/Makefile.am +++ b/data/Makefile.am @@ -2,4 +2,4 @@ hdfseedir = $(datadir)/hdfsee hdfsee_DATA = displaywindow.ui crystfeldir = $(datadir)/crystfel -crystfel_DATA = sfac/* +crystfel_DATA = sfac/* diffraction.cl diff --git a/data/diffraction.cl b/data/diffraction.cl new file mode 100644 index 00000000..5c75d70b --- /dev/null +++ b/data/diffraction.cl @@ -0,0 +1,106 @@ +/* + * diffraction.cl + * + * GPU calculation kernel for truncated lattice diffraction + * + * (c) 2006-2010 Thomas White + * + * Part of CrystFEL - crystallography with a FEL + * + */ + + +#define INDMAX 30 +#define IDIM (INDMAX*2 +1) + + +float4 get_q(int x, int y, float cx, float cy, float res, float clen, float k, + float *ttp) +{ + float rx, ry, r; + float ttx, tty, tt; + + rx = ((float)x - cx)/res; + ry = ((float)y - cy)/res; + + r = sqrt(pow(rx, 2.0) + pow(ry, 2.0)); + + ttx = atan2(rx, clen); + tty = atan2(ry, clen); + tt = atan2(r, clen); + + *ttp = tt; + + return (float4)(k*sin(ttx), k*sin(tty), k-k*cos(tt), 0.0); +} + + +float lattice_factor(float16 cell, float4 q) +{ + float f1, f2, f3; + float4 Udotq; + const int na = 8; + const int nb = 8; + const int nc = 8; + + Udotq.x = cell.s0*q.x + cell.s1*q.y + cell.s2*q.z; + Udotq.y = cell.s3*q.x + cell.s4*q.y + cell.s5*q.z; + Udotq.z = cell.s6*q.x + cell.s7*q.y + cell.s8*q.z; + + /* At exact Bragg condition, f1 = na */ + f1 = sin(M_PI*(float)na*Udotq.x) / sin(M_PI*Udotq.x); + + /* At exact Bragg condition, f2 = nb */ + f2 = sin(M_PI*(float)nb*Udotq.y) / sin(M_PI*Udotq.y); + + /* At exact Bragg condition, f3 = nc */ + f3 = sin(M_PI*(float)nc*Udotq.z) / sin(M_PI*Udotq.z); + + /* At exact Bragg condition, this will multiply the molecular + * part of the structure factor by the number of unit cells, + * as desired (more scattering from bigger crystal!) */ + return f1 * f2 * f3; +} + + +float2 get_sfac(global float2 *sfacs, float16 cell, float4 q) +{ + signed int h, k, l; + int idx; + + h = rint(cell.s0*q.x + cell.s1*q.y + cell.s2*q.z); /* h */ + k = rint(cell.s3*q.x + cell.s4*q.y + cell.s5*q.z); /* k */ + l = rint(cell.s6*q.x + cell.s7*q.y + cell.s8*q.z); /* l */ + + if ( (abs(h) > INDMAX) || (abs(k) > INDMAX) || (abs(l) > INDMAX) ) { + return 100.0; + } + + if ( h < 0 ) h += IDIM; + if ( k < 0 ) k += IDIM; + if ( l < 0 ) l += IDIM; + + idx = h + (IDIM*k) + (IDIM*IDIM*l); + return sfacs[idx]; +} + + +kernel void diffraction(global float2 *diff, global float *tt, float k, + int w, float cx, float cy, + float res, float clen, float16 cell, + global float2 *sfacs) +{ + float ttv; + const int x = get_global_id(0); + const int y = get_global_id(1); + float f_lattice; + float2 f_molecule; + + float4 q = get_q(x, y, cx, cy, res, clen, k, &ttv); + + f_lattice = lattice_factor(cell, q); + f_molecule = get_sfac(sfacs, cell, q); + + diff[x+w*y] = f_molecule*f_lattice; + tt[x+w*y] = ttv; +} diff --git a/src/Makefile.am b/src/Makefile.am index 6a40c9a0..824211f8 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -10,6 +10,10 @@ AM_CPPFLAGS = -DDATADIR=\""$(datadir)"\" pattern_sim_SOURCES = pattern_sim.c diffraction.c utils.c image.c cell.c \ hdf5-file.c detector.c sfac.c intensities.c \ reflections.c +if HAVE_OPENCL +pattern_sim_SOURCES += diffraction-gpu.c +endif + pattern_sim_LDADD = @LIBS@ process_hkl_SOURCES = process_hkl.c sfac.c statistics.c cell.c utils.c \ diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c new file mode 100644 index 00000000..4172da3f --- /dev/null +++ b/src/diffraction-gpu.c @@ -0,0 +1,286 @@ +/* + * diffraction-gpu.c + * + * Calculate diffraction patterns by Fourier methods (GPU version) + * + * (c) 2006-2010 Thomas White + * + * Part of CrystFEL - crystallography with a FEL + * + */ + + +#include +#include +#include +#include +#include +#include + +#include "image.h" +#include "utils.h" +#include "cell.h" +#include "diffraction.h" +#include "sfac.h" + + +#define SAMPLING (5) +#define BWSAMPLING (10) +#define BANDWIDTH (1.0 / 100.0) + + +static cl_device_id get_first_dev(cl_context ctx) +{ + cl_device_id dev; + cl_int r; + + r = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, sizeof(dev), &dev, NULL); + if ( r != CL_SUCCESS ) { + ERROR("Couldn't get device\n"); + return 0; + } + + return dev; +} + + +static void show_build_log(cl_program prog, cl_device_id dev) +{ + cl_int r; + char log[4096]; + size_t s; + + r = clGetProgramBuildInfo(prog, dev, CL_PROGRAM_BUILD_LOG, 4096, log, + &s); + + STATUS("%s\n", log); +} + + +static cl_program load_program(const char *filename, cl_context ctx, + cl_device_id dev, cl_int *err) +{ + FILE *fh; + cl_program prog; + char *source; + size_t len; + cl_int r; + + fh = fopen(filename, "r"); + if ( fh == NULL ) { + ERROR("Couldn't open '%s'\n", filename); + *err = CL_INVALID_PROGRAM; + return 0; + } + source = malloc(16384); + len = fread(source, 1, 16383, fh); + fclose(fh); + source[len] = '\0'; + + prog = clCreateProgramWithSource(ctx, 1, (const char **)&source, + NULL, err); + if ( *err != CL_SUCCESS ) { + ERROR("Couldn't load source\n"); + return 0; + } + + r = clBuildProgram(prog, 0, NULL, "-Werror", NULL, NULL); + if ( r != CL_SUCCESS ) { + ERROR("Couldn't build program\n"); + show_build_log(prog, dev); + *err = r; + return 0; + } + + *err = CL_SUCCESS; + return prog; +} + + +void get_diffraction_gpu(struct image *image, int na, int nb, int nc, + int no_sfac) +{ + cl_uint nplat; + cl_platform_id platforms[8]; + cl_context_properties prop[3]; + cl_context ctx; + cl_int err; + cl_command_queue cq; + cl_program prog; + cl_device_id dev; + cl_kernel kern; + double ax, ay, az; + double bx, by, bz; + double cx, cy, cz; + double a, b, c, d; + float kc; + const size_t dims[2] = {1024, 1024}; + + cl_mem sfacs; + size_t sfac_size; + float *sfac_ptr; + cl_mem tt; + size_t tt_size; + float *tt_ptr; + int x, y; + cl_float16 cell; + cl_mem diff; + size_t diff_size; + float *diff_ptr; + int i; + + if ( image->molecule == NULL ) return; + + /* Generate structure factors if required */ + if ( !no_sfac ) { + if ( image->molecule->reflections == NULL ) { + get_reflections_cached(image->molecule, + ph_lambda_to_en(image->lambda)); + } + } + + cell_get_cartesian(image->molecule->cell, &ax, &ay, &az, + &bx, &by, &bz, + &cx, &cy, &cz); + cell[0] = ax; cell[1] = ay; cell[2] = az; + cell[3] = bx; cell[4] = by; cell[5] = bz; + cell[6] = cx; cell[7] = cy; cell[8] = cz; + + cell_get_parameters(image->molecule->cell, + &a, &b, &c, &d, &d, &d); + STATUS("Particle size = %i x %i x %i (=%5.2f x %5.2f x %5.2f nm)\n", + na, nb, nc, na*a/1.0e-9, nb*b/1.0e-9, nc*c/1.0e-9); + + err = clGetPlatformIDs(8, platforms, &nplat); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't get platform IDs: %i\n", err); + return; + } + STATUS("%i platforms\n", nplat); + prop[0] = CL_CONTEXT_PLATFORM; + prop[1] = (cl_context_properties)platforms[0]; + prop[2] = 0; + + ctx = clCreateContextFromType(prop, CL_DEVICE_TYPE_GPU, NULL, NULL, &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't create OpenCL context: %i\n", err); + switch ( err ) { + case CL_INVALID_PLATFORM : + ERROR("Invalid platform\n"); + break; + case CL_OUT_OF_HOST_MEMORY : + ERROR("Out of memory\n"); + break; + } + return; + } + + dev = get_first_dev(ctx); + + cq = clCreateCommandQueue(ctx, dev, 0, &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't create OpenCL command queue\n"); + return; + } + + /* Create buffer for the picture */ + diff_size = image->width*image->height*sizeof(cl_float)*2; /* complex */ + diff = clCreateBuffer(ctx, CL_MEM_READ_WRITE, diff_size, NULL, &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't allocate diffraction memory\n"); + return; + } + + /* Create a single-precision version of the scattering factors */ + sfac_size = IDIM*IDIM*sizeof(cl_float)*2; /* complex */ + sfac_ptr = malloc(IDIM*IDIM*sizeof(cl_float)*2); + for ( i=0; imolecule->reflections[i]); + sfac_ptr[2*i+1] = cimag(image->molecule->reflections[i]); + } + sfacs = clCreateBuffer(ctx, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, + sfac_size, sfac_ptr, &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't allocate sfac memory\n"); + return; + } + + tt_size = image->width*image->height*sizeof(cl_float); + tt = clCreateBuffer(ctx, CL_MEM_READ_WRITE, tt_size, NULL, &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't allocate twotheta memory\n"); + return; + } + + prog = load_program(DATADIR"/crystfel/diffraction.cl", ctx, dev, &err); + if ( err != CL_SUCCESS ) { + return; + } + + kern = clCreateKernel(prog, "diffraction", &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't create kernel\n"); + return; + } + + /* Calculate wavelength */ + kc = 1.0/image->lambda; /* Centre value */ + + clSetKernelArg(kern, 0, sizeof(cl_mem), &diff); + clSetKernelArg(kern, 1, sizeof(cl_mem), &tt); + clSetKernelArg(kern, 2, sizeof(cl_float), &kc); + clSetKernelArg(kern, 3, sizeof(cl_int), &image->width); + clSetKernelArg(kern, 4, sizeof(cl_float), &image->det.panels[0].cx); + clSetKernelArg(kern, 5, sizeof(cl_float), &image->det.panels[0].cy); + clSetKernelArg(kern, 6, sizeof(cl_float), &image->resolution); + clSetKernelArg(kern, 7, sizeof(cl_float), &image->camera_len); + clSetKernelArg(kern, 8, sizeof(cl_float16), &cell); + clSetKernelArg(kern, 9, sizeof(cl_mem), &sfacs); + + STATUS("Running...\n"); + err = clEnqueueNDRangeKernel(cq, kern, 2, NULL, dims, NULL, + 0, NULL, NULL); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't enqueue kernel\n"); + return; + } + + diff_ptr = clEnqueueMapBuffer(cq, diff, CL_TRUE, CL_MAP_READ, 0, + diff_size, 0, NULL, NULL, &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't map sfac buffer\n"); + return; + } + tt_ptr = clEnqueueMapBuffer(cq, tt, CL_TRUE, CL_MAP_READ, 0, + tt_size, 0, NULL, NULL, &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't map tt buffer\n"); + return; + } + STATUS("Done!\n"); + + image->sfacs = calloc(image->width * image->height, + sizeof(double complex)); + image->twotheta = calloc(image->width * image->height, sizeof(double)); + + for ( x=0; xwidth; x++ ) { + for ( y=0; yheight; y++ ) { + + float re, im, tt; + + re = diff_ptr[2*(x + image->width*y)+0]; + im = diff_ptr[2*(x + image->width*y)+1]; + tt = tt_ptr[x + image->width*y]; + + image->sfacs[x + image->width*y] = re + I*im; + image->twotheta[x + image->width*y] = tt; + + } + } + + clReleaseProgram(prog); + clReleaseMemObject(sfacs); + clReleaseCommandQueue(cq); + clReleaseContext(ctx); + +} diff --git a/src/diffraction-gpu.h b/src/diffraction-gpu.h new file mode 100644 index 00000000..687fecf3 --- /dev/null +++ b/src/diffraction-gpu.h @@ -0,0 +1,34 @@ +/* + * diffraction-gpu.h + * + * Calculate diffraction patterns by Fourier methods (GPU version) + * + * (c) 2006-2010 Thomas White + * + * Part of CrystFEL - crystallography with a FEL + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifndef DIFFRACTION_GPU_H +#define DIFFRACTION_GPU_H + +#include "image.h" +#include "cell.h" + +#if HAVE_OPENCL +extern void get_diffraction_gpu(struct image *image, int na, int nb, int nc, + int nosfac); +#else +static void get_diffraction_gpu(struct image *image, int na, int nb, int nc, + int nosfac) +{ + /* Do nothing */ + ERROR("This copy of CrystFEL was not compiled with OpenCL support.\n"); +} +#endif + +#endif /* DIFFRACTION_GPU_H */ diff --git a/src/diffraction.c b/src/diffraction.c index 88b50d1f..fb993bf6 100644 --- a/src/diffraction.c +++ b/src/diffraction.c @@ -23,9 +23,9 @@ #include "sfac.h" -#define SAMPLING (5) -#define BWSAMPLING (10) -#define BANDWIDTH (1.0 / 100.0) +#define SAMPLING (1) +#define BWSAMPLING (1) +#define BANDWIDTH (0.0 / 100.0) static double lattice_factor(struct rvec q, double ax, double ay, double az, diff --git a/src/image.h b/src/image.h index 144ebe42..78194433 100644 --- a/src/image.h +++ b/src/image.h @@ -88,8 +88,8 @@ struct image { * If FORMULATION_PIXELSIZE, then pixel_size only is needed.*/ FormulationMode fmode; double pixel_size; - double camera_len; - double resolution; /* pixels per metre */ + float camera_len; + float resolution; /* pixels per metre */ /* Wavelength must always be given */ double lambda; /* Wavelength in m */ diff --git a/src/pattern_sim.c b/src/pattern_sim.c index d0aad29b..e3d675d2 100644 --- a/src/pattern_sim.c +++ b/src/pattern_sim.c @@ -21,6 +21,7 @@ #include "image.h" #include "diffraction.h" +#include "diffraction-gpu.h" #include "cell.h" #include "utils.h" #include "hdf5-file.h" @@ -38,6 +39,7 @@ static void show_help(const char *s) "\n" " -h, --help Display this help message.\n" " --simulation-details Show technical details of the simulation.\n" +" --gpu Use the GPU to speed up the calculation.\n" "\n" " --near-bragg Output h,k,l,I near Bragg conditions.\n" " -n, --number= Generate N images. Default 1.\n" @@ -157,6 +159,7 @@ int main(int argc, char *argv[]) int config_nonoise = 0; int config_nobloom = 0; int config_nosfac = 0; + int config_gpu = 0; int ndone = 0; /* Number of simulations done (images or not) */ int number = 1; /* Number used for filename of image */ int n_images = 1; /* Generate one image by default */ @@ -166,6 +169,7 @@ int main(int argc, char *argv[]) const struct option longopts[] = { {"help", 0, NULL, 'h'}, {"simulation-details", 0, &config_simdetails, 1}, + {"gpu", 0, &config_gpu, 1}, {"near-bragg", 0, &config_nearbragg, 1}, {"random-orientation", 0, NULL, 'r'}, {"number", 1, NULL, 'n'}, @@ -274,11 +278,20 @@ int main(int argc, char *argv[]) image.twotheta = NULL; image.hdr = NULL; - get_diffraction(&image, na, nb, nc, config_nosfac); + if ( config_gpu ) { + get_diffraction_gpu(&image, na, nb, nc, config_nosfac); + } else { + get_diffraction(&image, na, nb, nc, config_nosfac); + } if ( image.molecule == NULL ) { ERROR("Couldn't open molecule.pdb\n"); return 1; } + if ( image.sfacs == NULL ) { + ERROR("Diffraction calculation failed.\n"); + goto skip; + } + record_image(&image, !config_nowater, !config_nonoise, !config_nobloom); @@ -305,6 +318,7 @@ int main(int argc, char *argv[]) free(image.sfacs); free(image.twotheta); +skip: ndone++; if ( n_images && (ndone >= n_images) ) done = 1; -- cgit v1.2.3