[cig-commits] r15485 - mc/3D/CitcomS/trunk/lib
leif at geodynamics.org
leif at geodynamics.org
Wed Jul 29 13:16:08 PDT 2009
Author: leif
Date: 2009-07-29 13:16:08 -0700 (Wed, 29 Jul 2009)
New Revision: 15485
Modified:
mc/3D/CitcomS/trunk/lib/multigrid_kernel.cu
Log:
Fixed the remaining bug in n_assemble_del2_u(). Glancing at
"global_defs.h", I had assumed that VBX, VBY, and VBZ were defined in
the natrual way:
#define VBX 0x2
#define VBY 0x4
#define VBZ 0x8
I.e., "0x1 << doff"... but today I looked at it more closely; Y & Z
are reversed!!!
#define VBX 0x2
#define VBZ 0x4
#define VBY 0x8
With this fix, the CUDA multigrid solver seems to work correctly.
Modified: mc/3D/CitcomS/trunk/lib/multigrid_kernel.cu
===================================================================
--- mc/3D/CitcomS/trunk/lib/multigrid_kernel.cu 2009-07-29 04:05:42 UTC (rev 15484)
+++ mc/3D/CitcomS/trunk/lib/multigrid_kernel.cu 2009-07-29 20:16:08 UTC (rev 15485)
@@ -125,16 +125,25 @@
)
{
int n = blockIdx.x + 1; /* 1 <= n <= E->lmesh.NNO */
- int doff = blockIdx.y + 1; /* 1 <= doff < NSD */
+ int doff = blockIdx.y + 1; /* 1 <= doff <= NSD */
unsigned int tid = threadIdx.x; /* 0 <= tid < MAX_EQN */
/* Each block writes one element of Au in global memory: Au[eqn]. */
int eqn = E->ID[n].doff[doff]; /* XXX: Compute this value? */
+ if (n == 1 && doff == 1 && tid == 0) {
+ Au[E->lmesh.NEQ] = 0.0;
+ }
+
if (strip_bcs) {
/* See get_bcs_id_for_residual(). */
unsigned int flags = E->NODE[n];
- unsigned int vb = 0x1 << doff; /* VBX, VBY, or VBZ */
+ unsigned int vb;
+ switch (doff) {
+ case 1: vb = VBX; break; /* 0x2 */
+ case 2: vb = VBY; break; /* 0x8 */
+ case 3: vb = VBZ; break; /* 0x4 */
+ }
if (flags & vb) {
/* no-op: Au[eqn] is zero */
if (tid == 0) {
@@ -215,11 +224,6 @@
/* Each block writes one element of Au in global memory. */
if (tid == 0) {
Au[eqn] = sum[0];
-
- if (n == 1 && doff == 1) {
- /* Well, actually, the first block writes one more. */
- Au[E->lmesh.NEQ] = 0.0;
- }
}
return;
@@ -252,7 +256,7 @@
int i = blockIdx.x * blockDim.x + threadIdx.x + 1;
if (i <= nno) {
/* 1 <= i <= E->lmesh.NNO */
- int doff = blockIdx.y + 1; /* 1 <= doff < NSD */
+ int doff = blockIdx.y + 1; /* 1 <= doff <= NSD */
int eqn = E->ID[i].doff[doff];
if (E->NODE[i] & OFFSIDE) {
@@ -276,7 +280,7 @@
const int nno = E->lmesh.NNO;
int j = threadIdx.x + 3; /* 3 <= j < MAX_EQN */
- int doff = threadIdx.y + 1; /* 1 <= doff < NSD */
+ int doff = threadIdx.y + 1; /* 1 <= doff <= NSD */
for (int i = 1; i <= nno; i++) {
int eqn = E->ID[i].doff[doff];
@@ -415,7 +419,7 @@
)
{
int n = blockIdx.x + 1; /* 1 <= n <= E->lmesh.NNO */
- int doff = blockIdx.y + 1; /* 1 <= doff < NSD */
+ int doff = blockIdx.y + 1; /* 1 <= doff <= NSD */
unsigned int tid = threadIdx.x; /* 0 <= tid < MAX_EQN */
/* Each block writes one element of Ad and d0 in global memory:
@@ -548,7 +552,7 @@
dim3 block(MAX_EQN, 1, 1);
dim3 grid(E->lmesh.NNO, NSD, 1);
- if (0) n_assemble_del2_u<<< grid, block >>>(d_E, d_d0, d_Ad, 1);
+ if (1) n_assemble_del2_u<<< grid, block >>>(d_E, d_d0, d_Ad, 1);
else host_n_assemble_del2_u(E, d_d0, d_Ad, 1);
} else {
More information about the CIG-COMMITS
mailing list