aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas White <taw@physics.org>2010-02-09 17:24:16 +0100
committerThomas White <taw@physics.org>2010-02-17 10:33:13 +0100
commit0f6ebe5649902dea9456c6598bc28e694c422336 (patch)
tree3ce031ca3c8ecf4fb132fa90aeae7373e0d5b0b6
parent89bd4cc23c5ac40dd0060d362f663819a5f4ae22 (diff)
Use OpenCL for much faster diffraction calculation
-rw-r--r--configure.ac15
-rw-r--r--data/Makefile.am2
-rw-r--r--data/diffraction.cl106
-rw-r--r--src/Makefile.am4
-rw-r--r--src/diffraction-gpu.c286
-rw-r--r--src/diffraction-gpu.h34
-rw-r--r--src/diffraction.c6
-rw-r--r--src/image.h4
-rw-r--r--src/pattern_sim.c16
9 files changed, 464 insertions, 9 deletions
diff --git a/configure.ac b/configure.ac
index 49a90a8a..fe58ccbd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -30,13 +30,24 @@ AC_ARG_WITH(gsl,
GSL_LIBS="-L$withval/lib -lgsl -lgslcblas"],
[GSL_LIBS="-lgsl -lgslcblas"])
+AC_MSG_CHECKING([whether to use OpenCL])
+AC_ARG_ENABLE(opencl,
+[AS_HELP_STRING([--enable-opencl], [Enable the use of OpenCL])],
+[OPENCL_CFLAGS=""
+ OPENCL_LIBS="-lOpenCL"
+ AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_OPENCL], [1], [Define to 1 if OpenCL is available])
+ have_opencl=true],
+[AC_MSG_RESULT([no])])
+
havegtk=false
AM_PATH_GTK_2_0(2.0.0, havegtk=true,
AC_MSG_WARN([GTK not found. hdfsee will not be built.]))
AM_CONDITIONAL([HAVE_GTK], test x$havegtk = xtrue)
+AM_CONDITIONAL([HAVE_OPENCL], test x$have_opencl = xtrue)
-CFLAGS="$CFLAGS $HDF5_CFLAGS $GTK_CFLAGS $GSL_CFLAGS"
-LIBS="$LIBS $HDF5_LIBS -lm -lz $GSL_LIBS $GTK_LIBS -lgthread-2.0 -lutil"
+CFLAGS="$CFLAGS $HDF5_CFLAGS $GTK_CFLAGS $GSL_CFLAGS $OPENCL_CFLAGS"
+LIBS="$LIBS $HDF5_LIBS -lm -lz $GSL_LIBS $GTK_LIBS $OPENCL_LIBS -lgthread-2.0 -lutil"
AC_OUTPUT(Makefile src/Makefile data/Makefile)
diff --git a/data/Makefile.am b/data/Makefile.am
index b4cebb2d..b55597f0 100644
--- a/data/Makefile.am
+++ b/data/Makefile.am
@@ -2,4 +2,4 @@ hdfseedir = $(datadir)/hdfsee
hdfsee_DATA = displaywindow.ui
crystfeldir = $(datadir)/crystfel
-crystfel_DATA = sfac/*
+crystfel_DATA = sfac/* diffraction.cl
diff --git a/data/diffraction.cl b/data/diffraction.cl
new file mode 100644
index 00000000..5c75d70b
--- /dev/null
+++ b/data/diffraction.cl
@@ -0,0 +1,106 @@
+/*
+ * diffraction.cl
+ *
+ * GPU calculation kernel for truncated lattice diffraction
+ *
+ * (c) 2006-2010 Thomas White <taw@physics.org>
+ *
+ * Part of CrystFEL - crystallography with a FEL
+ *
+ */
+
+
+#define INDMAX 30
+#define IDIM (INDMAX*2 +1)
+
+
+float4 get_q(int x, int y, float cx, float cy, float res, float clen, float k,
+ float *ttp)
+{
+ float rx, ry, r;
+ float ttx, tty, tt;
+
+ rx = ((float)x - cx)/res;
+ ry = ((float)y - cy)/res;
+
+ r = sqrt(pow(rx, 2.0) + pow(ry, 2.0));
+
+ ttx = atan2(rx, clen);
+ tty = atan2(ry, clen);
+ tt = atan2(r, clen);
+
+ *ttp = tt;
+
+ return (float4)(k*sin(ttx), k*sin(tty), k-k*cos(tt), 0.0);
+}
+
+
+float lattice_factor(float16 cell, float4 q)
+{
+ float f1, f2, f3;
+ float4 Udotq;
+ const int na = 8;
+ const int nb = 8;
+ const int nc = 8;
+
+ Udotq.x = cell.s0*q.x + cell.s1*q.y + cell.s2*q.z;
+ Udotq.y = cell.s3*q.x + cell.s4*q.y + cell.s5*q.z;
+ Udotq.z = cell.s6*q.x + cell.s7*q.y + cell.s8*q.z;
+
+ /* At exact Bragg condition, f1 = na */
+ f1 = sin(M_PI*(float)na*Udotq.x) / sin(M_PI*Udotq.x);
+
+ /* At exact Bragg condition, f2 = nb */
+ f2 = sin(M_PI*(float)nb*Udotq.y) / sin(M_PI*Udotq.y);
+
+ /* At exact Bragg condition, f3 = nc */
+ f3 = sin(M_PI*(float)nc*Udotq.z) / sin(M_PI*Udotq.z);
+
+ /* At exact Bragg condition, this will multiply the molecular
+ * part of the structure factor by the number of unit cells,
+ * as desired (more scattering from bigger crystal!) */
+ return f1 * f2 * f3;
+}
+
+
+float2 get_sfac(global float2 *sfacs, float16 cell, float4 q)
+{
+ signed int h, k, l;
+ int idx;
+
+ h = rint(cell.s0*q.x + cell.s1*q.y + cell.s2*q.z); /* h */
+ k = rint(cell.s3*q.x + cell.s4*q.y + cell.s5*q.z); /* k */
+ l = rint(cell.s6*q.x + cell.s7*q.y + cell.s8*q.z); /* l */
+
+ if ( (abs(h) > INDMAX) || (abs(k) > INDMAX) || (abs(l) > INDMAX) ) {
+ return 100.0;
+ }
+
+ if ( h < 0 ) h += IDIM;
+ if ( k < 0 ) k += IDIM;
+ if ( l < 0 ) l += IDIM;
+
+ idx = h + (IDIM*k) + (IDIM*IDIM*l);
+ return sfacs[idx];
+}
+
+
+kernel void diffraction(global float2 *diff, global float *tt, float k,
+ int w, float cx, float cy,
+ float res, float clen, float16 cell,
+ global float2 *sfacs)
+{
+ float ttv;
+ const int x = get_global_id(0);
+ const int y = get_global_id(1);
+ float f_lattice;
+ float2 f_molecule;
+
+ float4 q = get_q(x, y, cx, cy, res, clen, k, &ttv);
+
+ f_lattice = lattice_factor(cell, q);
+ f_molecule = get_sfac(sfacs, cell, q);
+
+ diff[x+w*y] = f_molecule*f_lattice;
+ tt[x+w*y] = ttv;
+}
diff --git a/src/Makefile.am b/src/Makefile.am
index 6a40c9a0..824211f8 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -10,6 +10,10 @@ AM_CPPFLAGS = -DDATADIR=\""$(datadir)"\"
pattern_sim_SOURCES = pattern_sim.c diffraction.c utils.c image.c cell.c \
hdf5-file.c detector.c sfac.c intensities.c \
reflections.c
+if HAVE_OPENCL
+pattern_sim_SOURCES += diffraction-gpu.c
+endif
+
pattern_sim_LDADD = @LIBS@
process_hkl_SOURCES = process_hkl.c sfac.c statistics.c cell.c utils.c \
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c
new file mode 100644
index 00000000..4172da3f
--- /dev/null
+++ b/src/diffraction-gpu.c
@@ -0,0 +1,286 @@
+/*
+ * diffraction-gpu.c
+ *
+ * Calculate diffraction patterns by Fourier methods (GPU version)
+ *
+ * (c) 2006-2010 Thomas White <taw@physics.org>
+ *
+ * Part of CrystFEL - crystallography with a FEL
+ *
+ */
+
+
+#include <stdlib.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <complex.h>
+#include <CL/cl.h>
+
+#include "image.h"
+#include "utils.h"
+#include "cell.h"
+#include "diffraction.h"
+#include "sfac.h"
+
+
+#define SAMPLING (5)
+#define BWSAMPLING (10)
+#define BANDWIDTH (1.0 / 100.0)
+
+
+static cl_device_id get_first_dev(cl_context ctx)
+{
+ cl_device_id dev;
+ cl_int r;
+
+ r = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, sizeof(dev), &dev, NULL);
+ if ( r != CL_SUCCESS ) {
+ ERROR("Couldn't get device\n");
+ return 0;
+ }
+
+ return dev;
+}
+
+
+static void show_build_log(cl_program prog, cl_device_id dev)
+{
+ cl_int r;
+ char log[4096];
+ size_t s;
+
+ r = clGetProgramBuildInfo(prog, dev, CL_PROGRAM_BUILD_LOG, 4096, log,
+ &s);
+
+ STATUS("%s\n", log);
+}
+
+
+static cl_program load_program(const char *filename, cl_context ctx,
+ cl_device_id dev, cl_int *err)
+{
+ FILE *fh;
+ cl_program prog;
+ char *source;
+ size_t len;
+ cl_int r;
+
+ fh = fopen(filename, "r");
+ if ( fh == NULL ) {
+ ERROR("Couldn't open '%s'\n", filename);
+ *err = CL_INVALID_PROGRAM;
+ return 0;
+ }
+ source = malloc(16384);
+ len = fread(source, 1, 16383, fh);
+ fclose(fh);
+ source[len] = '\0';
+
+ prog = clCreateProgramWithSource(ctx, 1, (const char **)&source,
+ NULL, err);
+ if ( *err != CL_SUCCESS ) {
+ ERROR("Couldn't load source\n");
+ return 0;
+ }
+
+ r = clBuildProgram(prog, 0, NULL, "-Werror", NULL, NULL);
+ if ( r != CL_SUCCESS ) {
+ ERROR("Couldn't build program\n");
+ show_build_log(prog, dev);
+ *err = r;
+ return 0;
+ }
+
+ *err = CL_SUCCESS;
+ return prog;
+}
+
+
+void get_diffraction_gpu(struct image *image, int na, int nb, int nc,
+ int no_sfac)
+{
+ cl_uint nplat;
+ cl_platform_id platforms[8];
+ cl_context_properties prop[3];
+ cl_context ctx;
+ cl_int err;
+ cl_command_queue cq;
+ cl_program prog;
+ cl_device_id dev;
+ cl_kernel kern;
+ double ax, ay, az;
+ double bx, by, bz;
+ double cx, cy, cz;
+ double a, b, c, d;
+ float kc;
+ const size_t dims[2] = {1024, 1024};
+
+ cl_mem sfacs;
+ size_t sfac_size;
+ float *sfac_ptr;
+ cl_mem tt;
+ size_t tt_size;
+ float *tt_ptr;
+ int x, y;
+ cl_float16 cell;
+ cl_mem diff;
+ size_t diff_size;
+ float *diff_ptr;
+ int i;
+
+ if ( image->molecule == NULL ) return;
+
+ /* Generate structure factors if required */
+ if ( !no_sfac ) {
+ if ( image->molecule->reflections == NULL ) {
+ get_reflections_cached(image->molecule,
+ ph_lambda_to_en(image->lambda));
+ }
+ }
+
+ cell_get_cartesian(image->molecule->cell, &ax, &ay, &az,
+ &bx, &by, &bz,
+ &cx, &cy, &cz);
+ cell[0] = ax; cell[1] = ay; cell[2] = az;
+ cell[3] = bx; cell[4] = by; cell[5] = bz;
+ cell[6] = cx; cell[7] = cy; cell[8] = cz;
+
+ cell_get_parameters(image->molecule->cell,
+ &a, &b, &c, &d, &d, &d);
+ STATUS("Particle size = %i x %i x %i (=%5.2f x %5.2f x %5.2f nm)\n",
+ na, nb, nc, na*a/1.0e-9, nb*b/1.0e-9, nc*c/1.0e-9);
+
+ err = clGetPlatformIDs(8, platforms, &nplat);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't get platform IDs: %i\n", err);
+ return;
+ }
+ STATUS("%i platforms\n", nplat);
+ prop[0] = CL_CONTEXT_PLATFORM;
+ prop[1] = (cl_context_properties)platforms[0];
+ prop[2] = 0;
+
+ ctx = clCreateContextFromType(prop, CL_DEVICE_TYPE_GPU, NULL, NULL, &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't create OpenCL context: %i\n", err);
+ switch ( err ) {
+ case CL_INVALID_PLATFORM :
+ ERROR("Invalid platform\n");
+ break;
+ case CL_OUT_OF_HOST_MEMORY :
+ ERROR("Out of memory\n");
+ break;
+ }
+ return;
+ }
+
+ dev = get_first_dev(ctx);
+
+ cq = clCreateCommandQueue(ctx, dev, 0, &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't create OpenCL command queue\n");
+ return;
+ }
+
+ /* Create buffer for the picture */
+ diff_size = image->width*image->height*sizeof(cl_float)*2; /* complex */
+ diff = clCreateBuffer(ctx, CL_MEM_READ_WRITE, diff_size, NULL, &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't allocate diffraction memory\n");
+ return;
+ }
+
+ /* Create a single-precision version of the scattering factors */
+ sfac_size = IDIM*IDIM*sizeof(cl_float)*2; /* complex */
+ sfac_ptr = malloc(IDIM*IDIM*sizeof(cl_float)*2);
+ for ( i=0; i<IDIM*IDIM; i++ ) {
+ sfac_ptr[2*i+0] = creal(image->molecule->reflections[i]);
+ sfac_ptr[2*i+1] = cimag(image->molecule->reflections[i]);
+ }
+ sfacs = clCreateBuffer(ctx, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+ sfac_size, sfac_ptr, &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't allocate sfac memory\n");
+ return;
+ }
+
+ tt_size = image->width*image->height*sizeof(cl_float);
+ tt = clCreateBuffer(ctx, CL_MEM_READ_WRITE, tt_size, NULL, &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't allocate twotheta memory\n");
+ return;
+ }
+
+ prog = load_program(DATADIR"/crystfel/diffraction.cl", ctx, dev, &err);
+ if ( err != CL_SUCCESS ) {
+ return;
+ }
+
+ kern = clCreateKernel(prog, "diffraction", &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't create kernel\n");
+ return;
+ }
+
+ /* Calculate wavelength */
+ kc = 1.0/image->lambda; /* Centre value */
+
+ clSetKernelArg(kern, 0, sizeof(cl_mem), &diff);
+ clSetKernelArg(kern, 1, sizeof(cl_mem), &tt);
+ clSetKernelArg(kern, 2, sizeof(cl_float), &kc);
+ clSetKernelArg(kern, 3, sizeof(cl_int), &image->width);
+ clSetKernelArg(kern, 4, sizeof(cl_float), &image->det.panels[0].cx);
+ clSetKernelArg(kern, 5, sizeof(cl_float), &image->det.panels[0].cy);
+ clSetKernelArg(kern, 6, sizeof(cl_float), &image->resolution);
+ clSetKernelArg(kern, 7, sizeof(cl_float), &image->camera_len);
+ clSetKernelArg(kern, 8, sizeof(cl_float16), &cell);
+ clSetKernelArg(kern, 9, sizeof(cl_mem), &sfacs);
+
+ STATUS("Running...\n");
+ err = clEnqueueNDRangeKernel(cq, kern, 2, NULL, dims, NULL,
+ 0, NULL, NULL);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't enqueue kernel\n");
+ return;
+ }
+
+ diff_ptr = clEnqueueMapBuffer(cq, diff, CL_TRUE, CL_MAP_READ, 0,
+ diff_size, 0, NULL, NULL, &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't map sfac buffer\n");
+ return;
+ }
+ tt_ptr = clEnqueueMapBuffer(cq, tt, CL_TRUE, CL_MAP_READ, 0,
+ tt_size, 0, NULL, NULL, &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't map tt buffer\n");
+ return;
+ }
+ STATUS("Done!\n");
+
+ image->sfacs = calloc(image->width * image->height,
+ sizeof(double complex));
+ image->twotheta = calloc(image->width * image->height, sizeof(double));
+
+ for ( x=0; x<image->width; x++ ) {
+ for ( y=0; y<image->height; y++ ) {
+
+ float re, im, tt;
+
+ re = diff_ptr[2*(x + image->width*y)+0];
+ im = diff_ptr[2*(x + image->width*y)+1];
+ tt = tt_ptr[x + image->width*y];
+
+ image->sfacs[x + image->width*y] = re + I*im;
+ image->twotheta[x + image->width*y] = tt;
+
+ }
+ }
+
+ clReleaseProgram(prog);
+ clReleaseMemObject(sfacs);
+ clReleaseCommandQueue(cq);
+ clReleaseContext(ctx);
+
+}
diff --git a/src/diffraction-gpu.h b/src/diffraction-gpu.h
new file mode 100644
index 00000000..687fecf3
--- /dev/null
+++ b/src/diffraction-gpu.h
@@ -0,0 +1,34 @@
+/*
+ * diffraction-gpu.h
+ *
+ * Calculate diffraction patterns by Fourier methods (GPU version)
+ *
+ * (c) 2006-2010 Thomas White <taw@physics.org>
+ *
+ * Part of CrystFEL - crystallography with a FEL
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifndef DIFFRACTION_GPU_H
+#define DIFFRACTION_GPU_H
+
+#include "image.h"
+#include "cell.h"
+
+#if HAVE_OPENCL
+extern void get_diffraction_gpu(struct image *image, int na, int nb, int nc,
+ int nosfac);
+#else
+static void get_diffraction_gpu(struct image *image, int na, int nb, int nc,
+ int nosfac)
+{
+ /* Do nothing */
+ ERROR("This copy of CrystFEL was not compiled with OpenCL support.\n");
+}
+#endif
+
+#endif /* DIFFRACTION_GPU_H */
diff --git a/src/diffraction.c b/src/diffraction.c
index 88b50d1f..fb993bf6 100644
--- a/src/diffraction.c
+++ b/src/diffraction.c
@@ -23,9 +23,9 @@
#include "sfac.h"
-#define SAMPLING (5)
-#define BWSAMPLING (10)
-#define BANDWIDTH (1.0 / 100.0)
+#define SAMPLING (1)
+#define BWSAMPLING (1)
+#define BANDWIDTH (0.0 / 100.0)
static double lattice_factor(struct rvec q, double ax, double ay, double az,
diff --git a/src/image.h b/src/image.h
index 144ebe42..78194433 100644
--- a/src/image.h
+++ b/src/image.h
@@ -88,8 +88,8 @@ struct image {
* If FORMULATION_PIXELSIZE, then pixel_size only is needed.*/
FormulationMode fmode;
double pixel_size;
- double camera_len;
- double resolution; /* pixels per metre */
+ float camera_len;
+ float resolution; /* pixels per metre */
/* Wavelength must always be given */
double lambda; /* Wavelength in m */
diff --git a/src/pattern_sim.c b/src/pattern_sim.c
index d0aad29b..e3d675d2 100644
--- a/src/pattern_sim.c
+++ b/src/pattern_sim.c
@@ -21,6 +21,7 @@
#include "image.h"
#include "diffraction.h"
+#include "diffraction-gpu.h"
#include "cell.h"
#include "utils.h"
#include "hdf5-file.h"
@@ -38,6 +39,7 @@ static void show_help(const char *s)
"\n"
" -h, --help Display this help message.\n"
" --simulation-details Show technical details of the simulation.\n"
+" --gpu Use the GPU to speed up the calculation.\n"
"\n"
" --near-bragg Output h,k,l,I near Bragg conditions.\n"
" -n, --number=<N> Generate N images. Default 1.\n"
@@ -157,6 +159,7 @@ int main(int argc, char *argv[])
int config_nonoise = 0;
int config_nobloom = 0;
int config_nosfac = 0;
+ int config_gpu = 0;
int ndone = 0; /* Number of simulations done (images or not) */
int number = 1; /* Number used for filename of image */
int n_images = 1; /* Generate one image by default */
@@ -166,6 +169,7 @@ int main(int argc, char *argv[])
const struct option longopts[] = {
{"help", 0, NULL, 'h'},
{"simulation-details", 0, &config_simdetails, 1},
+ {"gpu", 0, &config_gpu, 1},
{"near-bragg", 0, &config_nearbragg, 1},
{"random-orientation", 0, NULL, 'r'},
{"number", 1, NULL, 'n'},
@@ -274,11 +278,20 @@ int main(int argc, char *argv[])
image.twotheta = NULL;
image.hdr = NULL;
- get_diffraction(&image, na, nb, nc, config_nosfac);
+ if ( config_gpu ) {
+ get_diffraction_gpu(&image, na, nb, nc, config_nosfac);
+ } else {
+ get_diffraction(&image, na, nb, nc, config_nosfac);
+ }
if ( image.molecule == NULL ) {
ERROR("Couldn't open molecule.pdb\n");
return 1;
}
+ if ( image.sfacs == NULL ) {
+ ERROR("Diffraction calculation failed.\n");
+ goto skip;
+ }
+
record_image(&image, !config_nowater, !config_nonoise,
!config_nobloom);
@@ -305,6 +318,7 @@ int main(int argc, char *argv[])
free(image.sfacs);
free(image.twotheta);
+skip:
ndone++;
if ( n_images && (ndone >= n_images) ) done = 1;