aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThomas White <taw@physics.org>2010-02-19 19:12:55 +0100
committerThomas White <taw@physics.org>2010-02-19 19:12:55 +0100
commitc7e0450702ab668cdcda34541e6bf815d50be8a8 (patch)
tree085d35a5b5b846572b9c043bbe7b15f01c0e3325 /src
parentd8115c2fc3bc1c69b751f907e323acc45f6a758a (diff)
Add bandwidth to GPU calculation
Also: alter CPU version to be cleaner and give exactly the same results at GPU, and fix an indexing bug
Diffstat (limited to 'src')
-rw-r--r--src/diffraction-gpu.c34
-rw-r--r--src/diffraction.c13
2 files changed, 29 insertions, 18 deletions
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c
index b9485241..133c0bc1 100644
--- a/src/diffraction-gpu.c
+++ b/src/diffraction-gpu.c
@@ -25,8 +25,8 @@
#define SAMPLING (4)
-#define BWSAMPLING (1)
-#define BANDWIDTH (0.0 / 100.0)
+#define BWSAMPLING (10)
+#define BANDWIDTH (1.0 / 100.0)
struct gpu_context
@@ -150,7 +150,7 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
double ax, ay, az;
double bx, by, bz;
double cx, cy, cz;
- float kc;
+ float k, klow;
cl_event *event;
int p;
float *tt_ptr;
@@ -160,6 +160,7 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
cl_float4 orientation;
cl_int4 ncells;
const int sampling = SAMPLING;
+ cl_float bwstep;
cell_get_cartesian(image->molecule->cell, &ax, &ay, &az,
&bx, &by, &bz,
@@ -169,7 +170,9 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
cell[6] = cx; cell[7] = cy; cell[8] = cz;
/* Calculate wavelength */
- kc = 1.0/image->lambda; /* Centre value */
+ k = 1.0/image->lambda; /* Centre value */
+ klow = k - k*(BANDWIDTH/2.0); /* Lower value */
+ bwstep = k * BANDWIDTH / BWSAMPLING;
/* Orientation */
orientation[0] = image->orientation.w;
@@ -192,7 +195,7 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
ERROR("Couldn't set arg 1: %s\n", clError(err));
return;
}
- clSetKernelArg(gctx->kern, 2, sizeof(cl_float), &kc);
+ clSetKernelArg(gctx->kern, 2, sizeof(cl_float), &klow);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 2: %s\n", clError(err));
return;
@@ -228,26 +231,33 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
return;
}
/* Local memory for reduction */
- clSetKernelArg(gctx->kern, 15, SAMPLING*SAMPLING*2*sizeof(cl_float),
- NULL);
+ clSetKernelArg(gctx->kern, 15,
+ BWSAMPLING*SAMPLING*SAMPLING*2*sizeof(cl_float), NULL);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 15: %s\n", clError(err));
return;
}
+ /* Bandwidth sampling step */
+ clSetKernelArg(gctx->kern, 16, sizeof(cl_float), &bwstep);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't set arg 16: %s\n", clError(err));
+ return;
+ }
/* Iterate over panels */
event = malloc(image->det.n_panels * sizeof(cl_event));
for ( p=0; p<image->det.n_panels; p++ ) {
- size_t dims[2];
- size_t ldims[2] = {SAMPLING, SAMPLING};
+ size_t dims[3];
+ size_t ldims[3] = {SAMPLING, SAMPLING, BWSAMPLING};
/* In a future version of OpenCL, this could be done
* with a global work offset. But not yet... */
- dims[0] = image->det.panels[0].max_x-image->det.panels[0].min_x;
- dims[1] = image->det.panels[0].max_y-image->det.panels[0].min_y;
+ dims[0] = 1+image->det.panels[0].max_x-image->det.panels[0].min_x;
+ dims[1] = 1+image->det.panels[0].max_y-image->det.panels[0].min_y;
dims[0] *= SAMPLING;
dims[1] *= SAMPLING;
+ dims[2] = BWSAMPLING;
clSetKernelArg(gctx->kern, 4, sizeof(cl_float),
&image->det.panels[p].cx);
@@ -286,7 +296,7 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
return;
}
- err = clEnqueueNDRangeKernel(gctx->cq, gctx->kern, 2, NULL,
+ err = clEnqueueNDRangeKernel(gctx->cq, gctx->kern, 3, NULL,
dims, ldims, 0, NULL, &event[p]);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't enqueue diffraction kernel: %s\n",
diff --git a/src/diffraction.c b/src/diffraction.c
index 876fac5d..6df00164 100644
--- a/src/diffraction.c
+++ b/src/diffraction.c
@@ -24,8 +24,8 @@
#define SAMPLING (4)
-#define BWSAMPLING (1)
-#define BANDWIDTH (0.0 / 100.0)
+#define BWSAMPLING (10)
+#define BANDWIDTH (1.0 / 100.0)
static double lattice_factor(struct rvec q, double ax, double ay, double az,
@@ -184,7 +184,7 @@ void get_diffraction(struct image *image, int na, int nb, int nc, int no_sfac)
double ax, ay, az;
double bx, by, bz;
double cx, cy, cz;
- float kc;
+ float k, klow, bwstep;
if ( image->molecule == NULL ) return;
@@ -206,7 +206,9 @@ void get_diffraction(struct image *image, int na, int nb, int nc, int no_sfac)
/* Needed later for Lorentz calculation */
image->twotheta = malloc(image->width * image->height * sizeof(double));
- kc = 1.0/image->lambda; /* Centre value */
+ k = 1.0/image->lambda; /* Centre value */
+ klow = k - k*(BANDWIDTH/2.0); /* Lower value */
+ bwstep = k * BANDWIDTH / BWSAMPLING;
for ( xs=0; xs<image->width*SAMPLING; xs++ ) {
for ( ys=0; ys<image->height*SAMPLING; ys++ ) {
@@ -229,8 +231,7 @@ void get_diffraction(struct image *image, int na, int nb, int nc, int no_sfac)
double complex val;
/* Calculate k this time round */
- k = kc + (kstep-(BWSAMPLING/2)) *
- kc*(BANDWIDTH/BWSAMPLING);
+ k = klow + kstep * bwstep;
q = get_q(image, xs, ys, SAMPLING, &twotheta, k);
image->twotheta[x + image->width*y] = twotheta;