aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas White <taw@physics.org>2010-02-19 14:49:49 +0100
committerThomas White <taw@physics.org>2010-02-19 14:49:49 +0100
commit803fbcf98c0c68cc5000331d6e00f9297a1cf5f0 (patch)
treee8c3d49acc9542932af90bdf87c72490d38dd21a
parente3d41026e188d6225c16a86accab05a6897977f6 (diff)
Honour particle size in GPU calculation
-rw-r--r--data/diffraction.cl12
-rw-r--r--src/diffraction-gpu.c11
2 files changed, 17 insertions, 6 deletions
diff --git a/data/diffraction.cl b/data/diffraction.cl
index a861b159..ade56484 100644
--- a/data/diffraction.cl
+++ b/data/diffraction.cl
@@ -69,13 +69,13 @@ float4 get_q(int x, int y, float cx, float cy, float res, float clen, float k,
}
-float lattice_factor(float16 cell, float4 q)
+float lattice_factor(float16 cell, float4 q, int4 ncells)
{
float f1, f2, f3;
float4 Udotq;
- const int na = 8;
- const int nb = 8;
- const int nc = 8;
+ const int na = ncells.s0;
+ const int nb = ncells.s1;
+ const int nc = ncells.s2;
Udotq.x = cell.s0*q.x + cell.s1*q.y + cell.s2*q.z;
Udotq.y = cell.s3*q.x + cell.s4*q.y + cell.s5*q.z;
@@ -130,7 +130,7 @@ float2 get_sfac(global float2 *sfacs, float16 cell, float4 q)
kernel void diffraction(global float2 *diff, global float *tt, float k,
int w, float cx, float cy,
float res, float clen, float16 cell,
- global float2 *sfacs, float4 z)
+ global float2 *sfacs, float4 z, int4 ncells)
{
float ttv;
const int x = get_global_id(0);
@@ -140,7 +140,7 @@ kernel void diffraction(global float2 *diff, global float *tt, float k,
float4 q = get_q(x, y, cx, cy, res, clen, k, &ttv, z);
- f_lattice = lattice_factor(cell, q);
+ f_lattice = lattice_factor(cell, q, ncells);
f_molecule = get_sfac(sfacs, cell, q);
diff[x+w*y] = f_molecule * f_lattice;
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c
index 26aa7e34..5ffb576c 100644
--- a/src/diffraction-gpu.c
+++ b/src/diffraction-gpu.c
@@ -157,6 +157,7 @@ void get_diffraction_gpu(struct image *image, int na, int nb, int nc,
float *diff_ptr;
int i;
cl_float4 orientation;
+ cl_int4 ncells;
if ( image->molecule == NULL ) return;
@@ -251,6 +252,11 @@ void get_diffraction_gpu(struct image *image, int na, int nb, int nc,
orientation[2] = image->orientation.y;
orientation[3] = image->orientation.z;
+ ncells[0] = na;
+ ncells[1] = nb;
+ ncells[2] = nc;
+ ncells[3] = 0; /* unused */
+
err = clSetKernelArg(kern, 0, sizeof(cl_mem), &diff);
if ( err != CL_SUCCESS ) {
ERROR("Couldn't set arg 0: %s\n", clError(err));
@@ -306,6 +312,11 @@ void get_diffraction_gpu(struct image *image, int na, int nb, int nc,
ERROR("Couldn't set arg 10: %s\n", clError(err));
return;
}
+ clSetKernelArg(kern, 11, sizeof(cl_int4), &ncells);
+ if ( err != CL_SUCCESS ) {
+ ERROR("Couldn't set arg 11: %s\n", clError(err));
+ return;
+ }
err = clEnqueueNDRangeKernel(cq, kern, 2, NULL, dims, NULL,
0, NULL, &event_d);