aboutsummaryrefslogtreecommitdiff
path: root/src/diffraction-gpu.c
diff options
context:
space:
mode:
authorThomas White <taw@physics.org>2010-03-10 12:14:25 +0100
committerThomas White <taw@physics.org>2010-03-10 12:14:25 +0100
commitbda63e5f6eeed249f0effb4f990e887b3912dad6 (patch)
tree42070462d4c14d9e3b2cdc657a12c1f374d8d9c3 /src/diffraction-gpu.c
parentfd8f9222599951749edf3c6ba27289da04d84b82 (diff)
Allow multiple sinc LUTs
Diffstat (limited to 'src/diffraction-gpu.c')
-rw-r--r--src/diffraction-gpu.c175
1 files changed, 99 insertions, 76 deletions
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c
index 29bfe3e8..f71df74e 100644
--- a/src/diffraction-gpu.c
+++ b/src/diffraction-gpu.c
@@ -46,15 +46,65 @@ struct gpu_context
cl_mem diff;
size_t diff_size;
- cl_mem func_a;
- cl_float *func_a_ptr;
- cl_mem func_b;
- cl_float *func_b_ptr;
- cl_mem func_c;
- cl_float *func_c_ptr;
+ /* Array of sinc LUTs */
+ cl_mem *sinc_luts;
+ cl_float **sinc_lut_ptrs;
+ int max_sinc_lut; /* Number of LUTs, i.e. one greater than the maximum
+ * index. This equals the highest allowable "n". */
};
+static void check_sinc_lut(struct gpu_context *gctx, int n)
+{
+ cl_int err;
+ size_t sinc_lut_size;
+ cl_image_format fmt;
+ int i;
+
+ if ( n > gctx->max_sinc_lut ) {
+
+ STATUS("Allocating %i -> %i\n", gctx->max_sinc_lut, n);
+
+ gctx->sinc_luts = realloc(gctx->sinc_luts,
+ n*sizeof(*gctx->sinc_luts));
+ gctx->sinc_lut_ptrs = realloc(gctx->sinc_lut_ptrs,
+ n*sizeof(*gctx->sinc_lut_ptrs));
+
+ for ( i=gctx->max_sinc_lut; i<n; i++ ) {
+ STATUS("zeroing %i\n", i);
+ gctx->sinc_lut_ptrs[i] = NULL;
+ }
+
+ gctx->max_sinc_lut = n;
+ }
+
+ fmt.image_channel_order = CL_INTENSITY;
+ fmt.image_channel_data_type = CL_FLOAT;
+ sinc_lut_size = SINC_LUT_ELEMENTS*sizeof(cl_float);
+
+ /* Create a new sinc LUT */
+ gctx->sinc_lut_ptrs[n-1] = malloc(sinc_lut_size);
+ gctx->sinc_lut_ptrs[n-1][0] = n;
+ if ( n == 1 ) {
+ for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
+ gctx->sinc_lut_ptrs[n-1][i] = 1.0;
+ }
+ } else {
+ for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
+ double x, val;
+ x = (double)i/SINC_LUT_ELEMENTS;
+ val = fabs(sin(M_PI*n*x)/sin(M_PI*x));
+ gctx->sinc_lut_ptrs[n-1][i] = val;
+ }
+ }
+
+ gctx->sinc_luts[n-1] = clCreateImage2D(gctx->ctx,
+ CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+ &fmt, SINC_LUT_ELEMENTS, 1, 0,
+ gctx->sinc_lut_ptrs[n-1], &err);
+}
+
+
void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
int na, int nb, int nc, int no_sfac)
{
@@ -102,6 +152,11 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
ncells[2] = nc;
ncells[3] = 0; /* unused */
+ /* Ensure all required LUTs are available */
+ check_sinc_lut(gctx, na);
+ check_sinc_lut(gctx, nb);
+ check_sinc_lut(gctx, nc);
+
err = clSetKernelArg(gctx->kern, 0, sizeof(cl_mem), &gctx->diff);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 0: %s\n", clError(err));
@@ -156,6 +211,27 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
return;
}
+ /* LUT in 'a' direction */
+ clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->sinc_luts[na-1]);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't set arg 16: %s\n", clError(err));
+ return;
+ }
+
+ /* LUT in 'b' direction */
+ clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->sinc_luts[nb-1]);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't set arg 17: %s\n", clError(err));
+ return;
+ }
+
+ /* LUT in 'c' direction */
+ clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->sinc_luts[nc-1]);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't set arg 18: %s\n", clError(err));
+ return;
+ }
+
/* Iterate over panels */
event = malloc(image->det.n_panels * sizeof(cl_event));
for ( p=0; p<image->det.n_panels; p++ ) {
@@ -267,7 +343,7 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
/* Setup the OpenCL stuff, create buffers, load the structure factor table */
struct gpu_context *setup_gpu(int no_sfac, struct image *image,
- struct molecule *molecule, int na, int nb, int nc)
+ struct molecule *molecule)
{
struct gpu_context *gctx;
cl_uint nplat;
@@ -278,8 +354,6 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
size_t sfac_size;
float *sfac_ptr;
size_t maxwgsize;
- size_t sinc_lut_size;
- cl_image_format fmt;
int i;
if ( molecule == NULL ) return NULL;
@@ -382,69 +456,12 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
return NULL;
}
- fmt.image_channel_order = CL_INTENSITY;
- fmt.image_channel_data_type = CL_FLOAT;
- sinc_lut_size = SINC_LUT_ELEMENTS*sizeof(cl_float);
-
- /* Set up sinc LUT for a* direction */
- gctx->func_a_ptr = malloc(sinc_lut_size);
- gctx->func_a_ptr[0] = na;
- for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
- double x, val;
- x = (double)i/SINC_LUT_ELEMENTS;
- val = fabs(sin(M_PI*na*x)/sin(M_PI*x));
- gctx->func_a_ptr[i] = val;
- }
- gctx->func_a = clCreateImage2D(gctx->ctx,
- CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- &fmt, SINC_LUT_ELEMENTS, 1, 0,
- gctx->func_a_ptr, &err);
- clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->func_a);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 16: %s\n", clError(err));
- return NULL;
- }
-
- /* Set up sinc LUT for b* direction */
- gctx->func_b_ptr = malloc(sinc_lut_size);
- gctx->func_b_ptr[0] = nb;
- for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
- double x, val;
- x = (double)i/SINC_LUT_ELEMENTS;
- val = fabs(sin(M_PI*nb*x)/sin(M_PI*x));
- gctx->func_b_ptr[i] = val;
- }
- gctx->func_b = clCreateImage2D(gctx->ctx,
- CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- &fmt, SINC_LUT_ELEMENTS, 1, 0,
- gctx->func_b_ptr, &err);
- clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->func_b);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 17: %s\n", clError(err));
- return NULL;
- }
-
- /* Set up sinc LUT for c* direction */
- gctx->func_c_ptr = malloc(sinc_lut_size);
- gctx->func_c_ptr[0] = nc;
- for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
- double x, val;
- x = (double)i/SINC_LUT_ELEMENTS;
- val = fabs(sin(M_PI*nc*x)/sin(M_PI*x));
- gctx->func_c_ptr[i] = val;
- }
- gctx->func_c = clCreateImage2D(gctx->ctx,
- CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- &fmt, SINC_LUT_ELEMENTS, 1, 0,
- gctx->func_c_ptr, &err);
- clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->func_c);
- if ( err != CL_SUCCESS ) {
- ERROR("Couldn't set arg 18: %s\n", clError(err));
- return NULL;
- }
-
STATUS("done\n");
+ gctx->max_sinc_lut = 0;
+ gctx->sinc_lut_ptrs = NULL;
+ gctx->sinc_luts = NULL;
+
clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(size_t), &maxwgsize, NULL);
STATUS("Maximum work group size = %lli\n", (long long int)maxwgsize);
@@ -455,16 +472,22 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
void cleanup_gpu(struct gpu_context *gctx)
{
+ int i;
+
clReleaseProgram(gctx->prog);
clReleaseMemObject(gctx->diff);
clReleaseMemObject(gctx->tt);
clReleaseMemObject(gctx->sfacs);
- clReleaseMemObject(gctx->func_a);
- clReleaseMemObject(gctx->func_b);
- clReleaseMemObject(gctx->func_c);
- free(gctx->func_a_ptr);
- free(gctx->func_b_ptr);
- free(gctx->func_c_ptr);
+
+ /* Release LUTs */
+ for ( i=1; i<=gctx->max_sinc_lut; i++ ) {
+ if ( gctx->sinc_lut_ptrs[i-1] != NULL ) {
+ STATUS("freeing %i\n", i-1);
+ clReleaseMemObject(gctx->sinc_luts[i-1]);
+ free(gctx->sinc_lut_ptrs[i-1]);
+ }
+ }
+
clReleaseCommandQueue(gctx->cq);
clReleaseContext(gctx->ctx);
free(gctx);