[cig-commits] r15068 - mc/3D/CitcomS/trunk/lib
leif at geodynamics.org
leif at geodynamics.org
Tue May 26 19:32:03 PDT 2009
Author: leif
Date: 2009-05-26 19:32:03 -0700 (Tue, 26 May 2009)
New Revision: 15068
Modified:
mc/3D/CitcomS/trunk/lib/cgrad_kernel.cu
Log:
Parallelized strip_bcs_from_residual().
Modified: mc/3D/CitcomS/trunk/lib/cgrad_kernel.cu
===================================================================
--- mc/3D/CitcomS/trunk/lib/cgrad_kernel.cu 2009-05-27 01:28:55 UTC (rev 15067)
+++ mc/3D/CitcomS/trunk/lib/cgrad_kernel.cu 2009-05-27 02:32:03 UTC (rev 15068)
@@ -37,6 +37,7 @@
struct matrix_mult {
int n; /* number of octoterms: 1, 2, 4, or 8 */
struct octoterm *ot;
+ int zero_res; /* boolean */
};
@@ -115,6 +116,7 @@
nTotal = 0;
for(i=0;i<neq;i++) {
E->mm[i].n = 0;
+ E->mm[i].zero_res = 0;
}
for(e=1;e<=nel;e++) {
@@ -135,6 +137,10 @@
} /* end for e */
+ /* strip_bcs_from_residual */
+ for(i=1;i<=E->num_zero_resid;i++)
+ E->mm[E->zero_resid[i]].zero_res = 1;
+
/* return the total number of octoterms */
return nTotal;
}
@@ -200,43 +206,50 @@
for (i = 0; i < neq; i++) {
- sum = 0.0;
-
/* ENODES*ENODES = 8*8 = 64 threads per block */
/* XXX: 8*(8-n) wasted threads */
- for (o = 0; o < E->mm[i].n; o++) {
+ sum = 0.0;
+
+ if (strip_bcs && E->mm[i].zero_res) {
- e = E->mm[i].ot[o].e;
- a = E->mm[i].ot[o].a;
- offset = E->mm[i].ot[o].offset;
+ /* no-op: Au[i] is zero */
+
+ } else {
- for (b = 1; b <= ENODES; b++) {
+ for (o = 0; o < E->mm[i].n; o++) {
- /* each thread computes three terms */
-
- nodeb = E->IEN[e].node[b];
- ii = (a*LOC_MAT_SIZE+b)*NSD-(NSD*LOC_MAT_SIZE+NSD);
+ e = E->mm[i].ot[o].e;
+ a = E->mm[i].ot[o].a;
+ offset = E->mm[i].ot[o].offset;
- /* XXX: must reduce here */
- sum +=
- E->elt_k[e].k[ii+offset] *
- u[E->ID[nodeb].doff[1]]
- + E->elt_k[e].k[ii+offset+1] *
- u[E->ID[nodeb].doff[2]]
- + E->elt_k[e].k[ii+offset+2] *
- u[E->ID[nodeb].doff[3]];
+ for (b = 1; b <= ENODES; b++) {
+
+ /* each thread computes three terms */
+
+ nodeb = E->IEN[e].node[b];
+ ii = (a*LOC_MAT_SIZE+b)*NSD-(NSD*LOC_MAT_SIZE+NSD);
+
+ /* XXX: must reduce here */
+ sum +=
+ E->elt_k[e].k[ii+offset] *
+ u[E->ID[nodeb].doff[1]]
+ + E->elt_k[e].k[ii+offset+1] *
+ u[E->ID[nodeb].doff[2]]
+ + E->elt_k[e].k[ii+offset+2] *
+ u[E->ID[nodeb].doff[3]];
+
+ }
}
+
}
/* each block writes one element, Au[i] */
Au[i] = sum;
+
}
- if (strip_bcs)
- strip_bcs_from_residual(E,Au);
-
return;
}
More information about the CIG-COMMITS
mailing list