From bda63e5f6eeed249f0effb4f990e887b3912dad6 Mon Sep 17 00:00:00 2001 From: Thomas White Date: Wed, 10 Mar 2010 12:14:25 +0100 Subject: Allow multiple sinc LUTs --- src/diffraction-gpu.c | 175 ++++++++++++++++++++++++++++---------------------- 1 file changed, 99 insertions(+), 76 deletions(-) (limited to 'src/diffraction-gpu.c') diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c index 29bfe3e8..f71df74e 100644 --- a/src/diffraction-gpu.c +++ b/src/diffraction-gpu.c @@ -46,15 +46,65 @@ struct gpu_context cl_mem diff; size_t diff_size; - cl_mem func_a; - cl_float *func_a_ptr; - cl_mem func_b; - cl_float *func_b_ptr; - cl_mem func_c; - cl_float *func_c_ptr; + /* Array of sinc LUTs */ + cl_mem *sinc_luts; + cl_float **sinc_lut_ptrs; + int max_sinc_lut; /* Number of LUTs, i.e. one greater than the maximum + * index. This equals the highest allowable "n". */ }; +static void check_sinc_lut(struct gpu_context *gctx, int n) +{ + cl_int err; + size_t sinc_lut_size; + cl_image_format fmt; + int i; + + if ( n > gctx->max_sinc_lut ) { + + STATUS("Allocating %i -> %i\n", gctx->max_sinc_lut, n); + + gctx->sinc_luts = realloc(gctx->sinc_luts, + n*sizeof(*gctx->sinc_luts)); + gctx->sinc_lut_ptrs = realloc(gctx->sinc_lut_ptrs, + n*sizeof(*gctx->sinc_lut_ptrs)); + + for ( i=gctx->max_sinc_lut; isinc_lut_ptrs[i] = NULL; + } + + gctx->max_sinc_lut = n; + } + + fmt.image_channel_order = CL_INTENSITY; + fmt.image_channel_data_type = CL_FLOAT; + sinc_lut_size = SINC_LUT_ELEMENTS*sizeof(cl_float); + + /* Create a new sinc LUT */ + gctx->sinc_lut_ptrs[n-1] = malloc(sinc_lut_size); + gctx->sinc_lut_ptrs[n-1][0] = n; + if ( n == 1 ) { + for ( i=1; isinc_lut_ptrs[n-1][i] = 1.0; + } + } else { + for ( i=1; isinc_lut_ptrs[n-1][i] = val; + } + } + + gctx->sinc_luts[n-1] = clCreateImage2D(gctx->ctx, + CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + &fmt, SINC_LUT_ELEMENTS, 1, 0, + gctx->sinc_lut_ptrs[n-1], &err); +} + + void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, int na, int nb, int nc, int no_sfac) { @@ -102,6 +152,11 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, ncells[2] = nc; ncells[3] = 0; /* unused */ + /* Ensure all required LUTs are available */ + check_sinc_lut(gctx, na); + check_sinc_lut(gctx, nb); + check_sinc_lut(gctx, nc); + err = clSetKernelArg(gctx->kern, 0, sizeof(cl_mem), &gctx->diff); if ( err != CL_SUCCESS ) { ERROR("Couldn't set arg 0: %s\n", clError(err)); @@ -156,6 +211,27 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, return; } + /* LUT in 'a' direction */ + clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->sinc_luts[na-1]); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 16: %s\n", clError(err)); + return; + } + + /* LUT in 'b' direction */ + clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->sinc_luts[nb-1]); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 17: %s\n", clError(err)); + return; + } + + /* LUT in 'c' direction */ + clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->sinc_luts[nc-1]); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 18: %s\n", clError(err)); + return; + } + /* Iterate over panels */ event = malloc(image->det.n_panels * sizeof(cl_event)); for ( p=0; pdet.n_panels; p++ ) { @@ -267,7 +343,7 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, /* Setup the OpenCL stuff, create buffers, load the structure factor table */ struct gpu_context *setup_gpu(int no_sfac, struct image *image, - struct molecule *molecule, int na, int nb, int nc) + struct molecule *molecule) { struct gpu_context *gctx; cl_uint nplat; @@ -278,8 +354,6 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image, size_t sfac_size; float *sfac_ptr; size_t maxwgsize; - size_t sinc_lut_size; - cl_image_format fmt; int i; if ( molecule == NULL ) return NULL; @@ -382,69 +456,12 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image, return NULL; } - fmt.image_channel_order = CL_INTENSITY; - fmt.image_channel_data_type = CL_FLOAT; - sinc_lut_size = SINC_LUT_ELEMENTS*sizeof(cl_float); - - /* Set up sinc LUT for a* direction */ - gctx->func_a_ptr = malloc(sinc_lut_size); - gctx->func_a_ptr[0] = na; - for ( i=1; ifunc_a_ptr[i] = val; - } - gctx->func_a = clCreateImage2D(gctx->ctx, - CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - &fmt, SINC_LUT_ELEMENTS, 1, 0, - gctx->func_a_ptr, &err); - clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->func_a); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 16: %s\n", clError(err)); - return NULL; - } - - /* Set up sinc LUT for b* direction */ - gctx->func_b_ptr = malloc(sinc_lut_size); - gctx->func_b_ptr[0] = nb; - for ( i=1; ifunc_b_ptr[i] = val; - } - gctx->func_b = clCreateImage2D(gctx->ctx, - CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - &fmt, SINC_LUT_ELEMENTS, 1, 0, - gctx->func_b_ptr, &err); - clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->func_b); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 17: %s\n", clError(err)); - return NULL; - } - - /* Set up sinc LUT for c* direction */ - gctx->func_c_ptr = malloc(sinc_lut_size); - gctx->func_c_ptr[0] = nc; - for ( i=1; ifunc_c_ptr[i] = val; - } - gctx->func_c = clCreateImage2D(gctx->ctx, - CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - &fmt, SINC_LUT_ELEMENTS, 1, 0, - gctx->func_c_ptr, &err); - clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->func_c); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 18: %s\n", clError(err)); - return NULL; - } - STATUS("done\n"); + gctx->max_sinc_lut = 0; + gctx->sinc_lut_ptrs = NULL; + gctx->sinc_luts = NULL; + clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &maxwgsize, NULL); STATUS("Maximum work group size = %lli\n", (long long int)maxwgsize); @@ -455,16 +472,22 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image, void cleanup_gpu(struct gpu_context *gctx) { + int i; + clReleaseProgram(gctx->prog); clReleaseMemObject(gctx->diff); clReleaseMemObject(gctx->tt); clReleaseMemObject(gctx->sfacs); - clReleaseMemObject(gctx->func_a); - clReleaseMemObject(gctx->func_b); - clReleaseMemObject(gctx->func_c); - free(gctx->func_a_ptr); - free(gctx->func_b_ptr); - free(gctx->func_c_ptr); + + /* Release LUTs */ + for ( i=1; i<=gctx->max_sinc_lut; i++ ) { + if ( gctx->sinc_lut_ptrs[i-1] != NULL ) { + STATUS("freeing %i\n", i-1); + clReleaseMemObject(gctx->sinc_luts[i-1]); + free(gctx->sinc_lut_ptrs[i-1]); + } + } + clReleaseCommandQueue(gctx->cq); clReleaseContext(gctx->ctx); free(gctx); -- cgit v1.2.3