aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/diffraction-gpu.c175
-rw-r--r--src/diffraction-gpu.h12
-rw-r--r--src/indexamajig.c5
-rw-r--r--src/pattern_sim.c4
4 files changed, 109 insertions, 87 deletions
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c
index 29bfe3e8..f71df74e 100644
--- a/src/diffraction-gpu.c
+++ b/src/diffraction-gpu.c
@@ -46,15 +46,65 @@ struct gpu_context
cl_mem diff;
size_t diff_size;
- cl_mem func_a;
- cl_float *func_a_ptr;
- cl_mem func_b;
- cl_float *func_b_ptr;
- cl_mem func_c;
- cl_float *func_c_ptr;
+ /* Array of sinc LUTs */
+ cl_mem *sinc_luts;
+ cl_float **sinc_lut_ptrs;
+ int max_sinc_lut; /* Number of LUTs, i.e. one greater than the maximum
+ * index. This equals the highest allowable "n". */
};
+static void check_sinc_lut(struct gpu_context *gctx, int n)
+{
+ cl_int err;
+ size_t sinc_lut_size;
+ cl_image_format fmt;
+ int i;
+
+ if ( n > gctx->max_sinc_lut ) {
+
+ STATUS("Allocating %i -> %i\n", gctx->max_sinc_lut, n);
+
+ gctx->sinc_luts = realloc(gctx->sinc_luts,
+ n*sizeof(*gctx->sinc_luts));
+ gctx->sinc_lut_ptrs = realloc(gctx->sinc_lut_ptrs,
+ n*sizeof(*gctx->sinc_lut_ptrs));
+
+ for ( i=gctx->max_sinc_lut; i<n; i++ ) {
+ STATUS("zeroing %i\n", i);
+ gctx->sinc_lut_ptrs[i] = NULL;
+ }
+
+ gctx->max_sinc_lut = n;
+ }
+
+ fmt.image_channel_order = CL_INTENSITY;
+ fmt.image_channel_data_type = CL_FLOAT;
+ sinc_lut_size = SINC_LUT_ELEMENTS*sizeof(cl_float);
+
+ /* Create a new sinc LUT */
+ gctx->sinc_lut_ptrs[n-1] = malloc(sinc_lut_size);
+ gctx->sinc_lut_ptrs[n-1][0] = n;
+ if ( n == 1 ) {
+ for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
+ gctx->sinc_lut_ptrs[n-1][i] = 1.0;
+ }
+ } else {
+ for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
+ double x, val;
+ x = (double)i/SINC_LUT_ELEMENTS;
+ val = fabs(sin(M_PI*n*x)/sin(M_PI*x));
+ gctx->sinc_lut_ptrs[n-1][i] = val;
+ }
+ }
+
+ gctx->sinc_luts[n-1] = clCreateImage2D(gctx->ctx,
+ CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+ &fmt, SINC_LUT_ELEMENTS, 1, 0,
+ gctx->sinc_lut_ptrs[n-1], &err);
+}
+
+
void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
int na, int nb, int nc, int no_sfac)
{
@@ -102,6 +152,11 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
ncells[2] = nc;
ncells[3] = 0; /* unused */
+ /* Ensure all required LUTs are available */
+ check_sinc_lut(gctx, na);
+ check_sinc_lut(gctx, nb);
+ check_sinc_lut(gctx, nc);
+
err = clSetKernelArg(gctx->kern, 0, sizeof(cl_mem), &gctx->diff);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 0: %s\n", clError(err));
@@ -156,6 +211,27 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
return;
}
+ /* LUT in 'a' direction */
+ clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->sinc_luts[na-1]);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't set arg 16: %s\n", clError(err));
+ return;
+ }
+
+ /* LUT in 'b' direction */
+ clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->sinc_luts[nb-1]);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't set arg 17: %s\n", clError(err));
+ return;
+ }
+
+ /* LUT in 'c' direction */
+ clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->sinc_luts[nc-1]);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't set arg 18: %s\n", clError(err));
+ return;
+ }
+
/* Iterate over panels */
event = malloc(image->det.n_panels * sizeof(cl_event));
for ( p=0; p<image->det.n_panels; p++ ) {
@@ -267,7 +343,7 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
/* Setup the OpenCL stuff, create buffers, load the structure factor table */
struct gpu_context *setup_gpu(int no_sfac, struct image *image,
- struct molecule *molecule, int na, int nb, int nc)
+ struct molecule *molecule)
{
struct gpu_context *gctx;
cl_uint nplat;
@@ -278,8 +354,6 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
size_t sfac_size;
float *sfac_ptr;
size_t maxwgsize;
- size_t sinc_lut_size;
- cl_image_format fmt;
int i;
if ( molecule == NULL ) return NULL;
@@ -382,69 +456,12 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
return NULL;
}
- fmt.image_channel_order = CL_INTENSITY;
- fmt.image_channel_data_type = CL_FLOAT;
- sinc_lut_size = SINC_LUT_ELEMENTS*sizeof(cl_float);
-
- /* Set up sinc LUT for a* direction */
- gctx->func_a_ptr = malloc(sinc_lut_size);
- gctx->func_a_ptr[0] = na;
- for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
- double x, val;
- x = (double)i/SINC_LUT_ELEMENTS;
- val = fabs(sin(M_PI*na*x)/sin(M_PI*x));
- gctx->func_a_ptr[i] = val;
- }
- gctx->func_a = clCreateImage2D(gctx->ctx,
- CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- &fmt, SINC_LUT_ELEMENTS, 1, 0,
- gctx->func_a_ptr, &err);
- clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->func_a);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 16: %s\n", clError(err));
- return NULL;
- }
-
- /* Set up sinc LUT for b* direction */
- gctx->func_b_ptr = malloc(sinc_lut_size);
- gctx->func_b_ptr[0] = nb;
- for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
- double x, val;
- x = (double)i/SINC_LUT_ELEMENTS;
- val = fabs(sin(M_PI*nb*x)/sin(M_PI*x));
- gctx->func_b_ptr[i] = val;
- }
- gctx->func_b = clCreateImage2D(gctx->ctx,
- CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- &fmt, SINC_LUT_ELEMENTS, 1, 0,
- gctx->func_b_ptr, &err);
- clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->func_b);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 17: %s\n", clError(err));
- return NULL;
- }
-
- /* Set up sinc LUT for c* direction */
- gctx->func_c_ptr = malloc(sinc_lut_size);
- gctx->func_c_ptr[0] = nc;
- for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
- double x, val;
- x = (double)i/SINC_LUT_ELEMENTS;
- val = fabs(sin(M_PI*nc*x)/sin(M_PI*x));
- gctx->func_c_ptr[i] = val;
- }
- gctx->func_c = clCreateImage2D(gctx->ctx,
- CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- &fmt, SINC_LUT_ELEMENTS, 1, 0,
- gctx->func_c_ptr, &err);
- clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->func_c);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 18: %s\n", clError(err));
- return NULL;
- }
-
STATUS("done\n");
+ gctx->max_sinc_lut = 0;
+ gctx->sinc_lut_ptrs = NULL;
+ gctx->sinc_luts = NULL;
+
clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(size_t), &maxwgsize, NULL);
STATUS("Maximum work group size = %lli\n", (long long int)maxwgsize);
@@ -455,16 +472,22 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
void cleanup_gpu(struct gpu_context *gctx)
{
+ int i;
+
clReleaseProgram(gctx->prog);
clReleaseMemObject(gctx->diff);
clReleaseMemObject(gctx->tt);
clReleaseMemObject(gctx->sfacs);
- clReleaseMemObject(gctx->func_a);
- clReleaseMemObject(gctx->func_b);
- clReleaseMemObject(gctx->func_c);
- free(gctx->func_a_ptr);
- free(gctx->func_b_ptr);
- free(gctx->func_c_ptr);
+
+ /* Release LUTs */
+ for ( i=1; i<=gctx->max_sinc_lut; i++ ) {
+ if ( gctx->sinc_lut_ptrs[i-1] != NULL ) {
+ STATUS("freeing %i\n", i-1);
+ clReleaseMemObject(gctx->sinc_luts[i-1]);
+ free(gctx->sinc_lut_ptrs[i-1]);
+ }
+ }
+
clReleaseCommandQueue(gctx->cq);
clReleaseContext(gctx->ctx);
free(gctx);
diff --git a/src/diffraction-gpu.h b/src/diffraction-gpu.h
index c0ab15e3..a8bd5cfb 100644
--- a/src/diffraction-gpu.h
+++ b/src/diffraction-gpu.h
@@ -23,23 +23,23 @@ struct gpu_context;
#if HAVE_OPENCL
-extern void get_diffraction_gpu(struct gpu_context *gctx, struct image *image);
+extern void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
+ int na, int nb, int nc);
extern struct gpu_context *setup_gpu(int no_sfac, struct image *image,
- struct molecule *molecule,
- int na, int nb, int nc);
+ struct molecule *molecule);
extern void cleanup_gpu(struct gpu_context *gctx);
#else
-static void get_diffraction_gpu(struct gpu_context *gctx, struct image *image)
+static void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
+ int na, int nb, int nc)
{
/* Do nothing */
ERROR("This copy of CrystFEL was not compiled with OpenCL support.\n");
}
static struct gpu_context *setup_gpu(int no_sfac, struct image *image,
- struct molecule *molecule,
- int na, int nb, int nc)
+ struct molecule *molecule
{
return NULL;
}
diff --git a/src/indexamajig.c b/src/indexamajig.c
index f1c5f743..93a95922 100644
--- a/src/indexamajig.c
+++ b/src/indexamajig.c
@@ -119,12 +119,11 @@ static void simulate_and_write(struct image *template,
/* Set up GPU if necessary */
if ( *gctx == NULL ) {
- *gctx = setup_gpu(0, &image, image.molecule,
- 24, 24, 40);
+ *gctx = setup_gpu(0, &image, image.molecule);
}
if ( *gctx != NULL ) {
- get_diffraction_gpu(*gctx, &image);
+ get_diffraction_gpu(*gctx, &image, 8, 8, 8);
} else {
get_diffraction(&image, 8, 8, 8, 0, 0);
}
diff --git a/src/pattern_sim.c b/src/pattern_sim.c
index 4eee7c57..65e020ac 100644
--- a/src/pattern_sim.c
+++ b/src/pattern_sim.c
@@ -276,9 +276,9 @@ int main(int argc, char *argv[])
if ( config_gpu ) {
if ( gctx == NULL ) {
gctx = setup_gpu(config_nosfac, &image,
- image.molecule, na, nb, nc);
+ image.molecule);
}
- get_diffraction_gpu(gctx, &image);
+ get_diffraction_gpu(gctx, &image, na, nb, nc);
} else {
get_diffraction(&image, na, nb, nc, config_nosfac,
!config_nowater);