aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThomas White <taw@physics.org>2010-02-09 17:24:16 +0100
committerThomas White <taw@physics.org>2010-02-17 10:33:13 +0100
commit0f6ebe5649902dea9456c6598bc28e694c422336 (patch)
tree3ce031ca3c8ecf4fb132fa90aeae7373e0d5b0b6 /src
parent89bd4cc23c5ac40dd0060d362f663819a5f4ae22 (diff)
Use OpenCL for much faster diffraction calculation
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am4
-rw-r--r--src/diffraction-gpu.c286
-rw-r--r--src/diffraction-gpu.h34
-rw-r--r--src/diffraction.c6
-rw-r--r--src/image.h4
-rw-r--r--src/pattern_sim.c16
6 files changed, 344 insertions, 6 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 6a40c9a0..824211f8 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -10,6 +10,10 @@ AM_CPPFLAGS = -DDATADIR=\""$(datadir)"\"
pattern_sim_SOURCES = pattern_sim.c diffraction.c utils.c image.c cell.c \
hdf5-file.c detector.c sfac.c intensities.c \
reflections.c
+if HAVE_OPENCL
+pattern_sim_SOURCES += diffraction-gpu.c
+endif
+
pattern_sim_LDADD = @LIBS@
process_hkl_SOURCES = process_hkl.c sfac.c statistics.c cell.c utils.c \
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c
new file mode 100644
index 00000000..4172da3f
--- /dev/null
+++ b/src/diffraction-gpu.c
@@ -0,0 +1,286 @@
+/*
+ * diffraction-gpu.c
+ *
+ * Calculate diffraction patterns by Fourier methods (GPU version)
+ *
+ * (c) 2006-2010 Thomas White <taw@physics.org>
+ *
+ * Part of CrystFEL - crystallography with a FEL
+ *
+ */
+
+
+#include <stdlib.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <complex.h>
+#include <CL/cl.h>
+
+#include "image.h"
+#include "utils.h"
+#include "cell.h"
+#include "diffraction.h"
+#include "sfac.h"
+
+
+#define SAMPLING (5)
+#define BWSAMPLING (10)
+#define BANDWIDTH (1.0 / 100.0)
+
+
+static cl_device_id get_first_dev(cl_context ctx)
+{
+ cl_device_id dev;
+ cl_int r;
+
+ r = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, sizeof(dev), &dev, NULL);
+ if ( r != CL_SUCCESS ) {
+ ERROR("Couldn't get device\n");
+ return 0;
+ }
+
+ return dev;
+}
+
+
+static void show_build_log(cl_program prog, cl_device_id dev)
+{
+ cl_int r;
+ char log[4096];
+ size_t s;
+
+ r = clGetProgramBuildInfo(prog, dev, CL_PROGRAM_BUILD_LOG, 4096, log,
+ &s);
+
+ STATUS("%s\n", log);
+}
+
+
+static cl_program load_program(const char *filename, cl_context ctx,
+ cl_device_id dev, cl_int *err)
+{
+ FILE *fh;
+ cl_program prog;
+ char *source;
+ size_t len;
+ cl_int r;
+
+ fh = fopen(filename, "r");
+ if ( fh == NULL ) {
+ ERROR("Couldn't open '%s'\n", filename);
+ *err = CL_INVALID_PROGRAM;
+ return 0;
+ }
+ source = malloc(16384);
+ len = fread(source, 1, 16383, fh);
+ fclose(fh);
+ source[len] = '\0';
+
+ prog = clCreateProgramWithSource(ctx, 1, (const char **)&source,
+ NULL, err);
+ if ( *err != CL_SUCCESS ) {
+ ERROR("Couldn't load source\n");
+ return 0;
+ }
+
+ r = clBuildProgram(prog, 0, NULL, "-Werror", NULL, NULL);
+ if ( r != CL_SUCCESS ) {
+ ERROR("Couldn't build program\n");
+ show_build_log(prog, dev);
+ *err = r;
+ return 0;
+ }
+
+ *err = CL_SUCCESS;
+ return prog;
+}
+
+
+void get_diffraction_gpu(struct image *image, int na, int nb, int nc,
+ int no_sfac)
+{
+ cl_uint nplat;
+ cl_platform_id platforms[8];
+ cl_context_properties prop[3];
+ cl_context ctx;
+ cl_int err;
+ cl_command_queue cq;
+ cl_program prog;
+ cl_device_id dev;
+ cl_kernel kern;
+ double ax, ay, az;
+ double bx, by, bz;
+ double cx, cy, cz;
+ double a, b, c, d;
+ float kc;
+ const size_t dims[2] = {1024, 1024};
+
+ cl_mem sfacs;
+ size_t sfac_size;
+ float *sfac_ptr;
+ cl_mem tt;
+ size_t tt_size;
+ float *tt_ptr;
+ int x, y;
+ cl_float16 cell;
+ cl_mem diff;
+ size_t diff_size;
+ float *diff_ptr;
+ int i;
+
+ if ( image->molecule == NULL ) return;
+
+ /* Generate structure factors if required */
+ if ( !no_sfac ) {
+ if ( image->molecule->reflections == NULL ) {
+ get_reflections_cached(image->molecule,
+ ph_lambda_to_en(image->lambda));
+ }
+ }
+
+ cell_get_cartesian(image->molecule->cell, &ax, &ay, &az,
+ &bx, &by, &bz,
+ &cx, &cy, &cz);
+ cell[0] = ax; cell[1] = ay; cell[2] = az;
+ cell[3] = bx; cell[4] = by; cell[5] = bz;
+ cell[6] = cx; cell[7] = cy; cell[8] = cz;
+
+ cell_get_parameters(image->molecule->cell,
+ &a, &b, &c, &d, &d, &d);
+ STATUS("Particle size = %i x %i x %i (=%5.2f x %5.2f x %5.2f nm)\n",
+ na, nb, nc, na*a/1.0e-9, nb*b/1.0e-9, nc*c/1.0e-9);
+
+ err = clGetPlatformIDs(8, platforms, &nplat);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't get platform IDs: %i\n", err);
+ return;
+ }
+ STATUS("%i platforms\n", nplat);
+ prop[0] = CL_CONTEXT_PLATFORM;
+ prop[1] = (cl_context_properties)platforms[0];
+ prop[2] = 0;
+
+ ctx = clCreateContextFromType(prop, CL_DEVICE_TYPE_GPU, NULL, NULL, &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't create OpenCL context: %i\n", err);
+ switch ( err ) {
+ case CL_INVALID_PLATFORM :
+ ERROR("Invalid platform\n");
+ break;
+ case CL_OUT_OF_HOST_MEMORY :
+ ERROR("Out of memory\n");
+ break;
+ }
+ return;
+ }
+
+ dev = get_first_dev(ctx);
+
+ cq = clCreateCommandQueue(ctx, dev, 0, &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't create OpenCL command queue\n");
+ return;
+ }
+
+ /* Create buffer for the picture */
+ diff_size = image->width*image->height*sizeof(cl_float)*2; /* complex */
+ diff = clCreateBuffer(ctx, CL_MEM_READ_WRITE, diff_size, NULL, &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't allocate diffraction memory\n");
+ return;
+ }
+
+ /* Create a single-precision version of the scattering factors */
+ sfac_size = IDIM*IDIM*sizeof(cl_float)*2; /* complex */
+ sfac_ptr = malloc(IDIM*IDIM*sizeof(cl_float)*2);
+ for ( i=0; i<IDIM*IDIM; i++ ) {
+ sfac_ptr[2*i+0] = creal(image->molecule->reflections[i]);
+ sfac_ptr[2*i+1] = cimag(image->molecule->reflections[i]);
+ }
+ sfacs = clCreateBuffer(ctx, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+ sfac_size, sfac_ptr, &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't allocate sfac memory\n");
+ return;
+ }
+
+ tt_size = image->width*image->height*sizeof(cl_float);
+ tt = clCreateBuffer(ctx, CL_MEM_READ_WRITE, tt_size, NULL, &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't allocate twotheta memory\n");
+ return;
+ }
+
+ prog = load_program(DATADIR"/crystfel/diffraction.cl", ctx, dev, &err);
+ if ( err != CL_SUCCESS ) {
+ return;
+ }
+
+ kern = clCreateKernel(prog, "diffraction", &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't create kernel\n");
+ return;
+ }
+
+ /* Calculate wavelength */
+ kc = 1.0/image->lambda; /* Centre value */
+
+ clSetKernelArg(kern, 0, sizeof(cl_mem), &diff);
+ clSetKernelArg(kern, 1, sizeof(cl_mem), &tt);
+ clSetKernelArg(kern, 2, sizeof(cl_float), &kc);
+ clSetKernelArg(kern, 3, sizeof(cl_int), &image->width);
+ clSetKernelArg(kern, 4, sizeof(cl_float), &image->det.panels[0].cx);
+ clSetKernelArg(kern, 5, sizeof(cl_float), &image->det.panels[0].cy);
+ clSetKernelArg(kern, 6, sizeof(cl_float), &image->resolution);
+ clSetKernelArg(kern, 7, sizeof(cl_float), &image->camera_len);
+ clSetKernelArg(kern, 8, sizeof(cl_float16), &cell);
+ clSetKernelArg(kern, 9, sizeof(cl_mem), &sfacs);
+
+ STATUS("Running...\n");
+ err = clEnqueueNDRangeKernel(cq, kern, 2, NULL, dims, NULL,
+ 0, NULL, NULL);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't enqueue kernel\n");
+ return;
+ }
+
+ diff_ptr = clEnqueueMapBuffer(cq, diff, CL_TRUE, CL_MAP_READ, 0,
+ diff_size, 0, NULL, NULL, &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't map sfac buffer\n");
+ return;
+ }
+ tt_ptr = clEnqueueMapBuffer(cq, tt, CL_TRUE, CL_MAP_READ, 0,
+ tt_size, 0, NULL, NULL, &err);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't map tt buffer\n");
+ return;
+ }
+ STATUS("Done!\n");
+
+ image->sfacs = calloc(image->width * image->height,
+ sizeof(double complex));
+ image->twotheta = calloc(image->width * image->height, sizeof(double));
+
+ for ( x=0; x<image->width; x++ ) {
+ for ( y=0; y<image->height; y++ ) {
+
+ float re, im, tt;
+
+ re = diff_ptr[2*(x + image->width*y)+0];
+ im = diff_ptr[2*(x + image->width*y)+1];
+ tt = tt_ptr[x + image->width*y];
+
+ image->sfacs[x + image->width*y] = re + I*im;
+ image->twotheta[x + image->width*y] = tt;
+
+ }
+ }
+
+ clReleaseProgram(prog);
+ clReleaseMemObject(sfacs);
+ clReleaseCommandQueue(cq);
+ clReleaseContext(ctx);
+
+}
diff --git a/src/diffraction-gpu.h b/src/diffraction-gpu.h
new file mode 100644
index 00000000..687fecf3
--- /dev/null
+++ b/src/diffraction-gpu.h
@@ -0,0 +1,34 @@
+/*
+ * diffraction-gpu.h
+ *
+ * Calculate diffraction patterns by Fourier methods (GPU version)
+ *
+ * (c) 2006-2010 Thomas White <taw@physics.org>
+ *
+ * Part of CrystFEL - crystallography with a FEL
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifndef DIFFRACTION_GPU_H
+#define DIFFRACTION_GPU_H
+
+#include "image.h"
+#include "cell.h"
+
+#if HAVE_OPENCL
+extern void get_diffraction_gpu(struct image *image, int na, int nb, int nc,
+ int nosfac);
+#else
+static void get_diffraction_gpu(struct image *image, int na, int nb, int nc,
+ int nosfac)
+{
+ /* Do nothing */
+ ERROR("This copy of CrystFEL was not compiled with OpenCL support.\n");
+}
+#endif
+
+#endif /* DIFFRACTION_GPU_H */
diff --git a/src/diffraction.c b/src/diffraction.c
index 88b50d1f..fb993bf6 100644
--- a/src/diffraction.c
+++ b/src/diffraction.c
@@ -23,9 +23,9 @@
#include "sfac.h"
-#define SAMPLING (5)
-#define BWSAMPLING (10)
-#define BANDWIDTH (1.0 / 100.0)
+#define SAMPLING (1)
+#define BWSAMPLING (1)
+#define BANDWIDTH (0.0 / 100.0)
static double lattice_factor(struct rvec q, double ax, double ay, double az,
diff --git a/src/image.h b/src/image.h
index 144ebe42..78194433 100644
--- a/src/image.h
+++ b/src/image.h
@@ -88,8 +88,8 @@ struct image {
* If FORMULATION_PIXELSIZE, then pixel_size only is needed.*/
FormulationMode fmode;
double pixel_size;
- double camera_len;
- double resolution; /* pixels per metre */
+ float camera_len;
+ float resolution; /* pixels per metre */
/* Wavelength must always be given */
double lambda; /* Wavelength in m */
diff --git a/src/pattern_sim.c b/src/pattern_sim.c
index d0aad29b..e3d675d2 100644
--- a/src/pattern_sim.c
+++ b/src/pattern_sim.c
@@ -21,6 +21,7 @@
#include "image.h"
#include "diffraction.h"
+#include "diffraction-gpu.h"
#include "cell.h"
#include "utils.h"
#include "hdf5-file.h"
@@ -38,6 +39,7 @@ static void show_help(const char *s)
"\n"
" -h, --help Display this help message.\n"
" --simulation-details Show technical details of the simulation.\n"
+" --gpu Use the GPU to speed up the calculation.\n"
"\n"
" --near-bragg Output h,k,l,I near Bragg conditions.\n"
" -n, --number=<N> Generate N images. Default 1.\n"
@@ -157,6 +159,7 @@ int main(int argc, char *argv[])
int config_nonoise = 0;
int config_nobloom = 0;
int config_nosfac = 0;
+ int config_gpu = 0;
int ndone = 0; /* Number of simulations done (images or not) */
int number = 1; /* Number used for filename of image */
int n_images = 1; /* Generate one image by default */
@@ -166,6 +169,7 @@ int main(int argc, char *argv[])
const struct option longopts[] = {
{"help", 0, NULL, 'h'},
{"simulation-details", 0, &config_simdetails, 1},
+ {"gpu", 0, &config_gpu, 1},
{"near-bragg", 0, &config_nearbragg, 1},
{"random-orientation", 0, NULL, 'r'},
{"number", 1, NULL, 'n'},
@@ -274,11 +278,20 @@ int main(int argc, char *argv[])
image.twotheta = NULL;
image.hdr = NULL;
- get_diffraction(&image, na, nb, nc, config_nosfac);
+ if ( config_gpu ) {
+ get_diffraction_gpu(&image, na, nb, nc, config_nosfac);
+ } else {
+ get_diffraction(&image, na, nb, nc, config_nosfac);
+ }
if ( image.molecule == NULL ) {
ERROR("Couldn't open molecule.pdb\n");
return 1;
}
+ if ( image.sfacs == NULL ) {
+ ERROR("Diffraction calculation failed.\n");
+ goto skip;
+ }
+
record_image(&image, !config_nowater, !config_nonoise,
!config_nobloom);
@@ -305,6 +318,7 @@ int main(int argc, char *argv[])
free(image.sfacs);
free(image.twotheta);
+skip:
ndone++;
if ( n_images && (ndone >= n_images) ) done = 1;