[cig-commits] r15482 - mc/3D/CitcomS/trunk/lib
leif at geodynamics.org
leif at geodynamics.org
Mon Jul 27 16:44:52 PDT 2009
Author: leif
Date: 2009-07-27 16:44:52 -0700 (Mon, 27 Jul 2009)
New Revision: 15482
Modified:
mc/3D/CitcomS/trunk/lib/multigrid_kernel.cu
Log:
Fixed stupid reduce bug that affected several routines. (This is what
I get for coping & pasting example code without thinking.) Now the
only remaining broken routine is n_assemble_del2_u().
Modified: mc/3D/CitcomS/trunk/lib/multigrid_kernel.cu
===================================================================
--- mc/3D/CitcomS/trunk/lib/multigrid_kernel.cu 2009-07-27 19:41:34 UTC (rev 15481)
+++ mc/3D/CitcomS/trunk/lib/multigrid_kernel.cu 2009-07-27 23:44:52 UTC (rev 15482)
@@ -200,7 +200,11 @@
__shared__ double sum[MAX_EQN];
sum[tid] = acc;
__syncthreads();
- for (unsigned int s = MAX_EQN/2; s > 0; s >>= 1) {
+ if (tid >= 32) {
+ sum[tid-32] += sum[tid];
+ }
+ __syncthreads();
+ for (unsigned int s = 16; s > 0; s >>= 1) {
if (tid < s) {
sum[tid] += sum[tid + s];
}
@@ -264,46 +268,6 @@
}
}
-__global__ void gauss_seidel_2(
- struct Some_variables *E,
- double *F, double *Ad
- )
-{
- int i, doff, eqn;
-
- i = blockIdx.x + 1; /* 1 <= i <= E->lmesh.NNO */
- doff = blockIdx.y + 1; /* 1 <= doff < NSD */
- eqn = E->ID[i].doff[doff];
-
- int *C;
- higher_precision *B;
- double UU, Ad_eqn;
- int j;
-
- C = E->Node_map+(i-1)*MAX_EQN;
- B = E->Eqn_k[doff]+(i-1)*MAX_EQN;
-
- /* load from global memory */
- Ad_eqn = Ad[eqn];
-
- /* Ad on boundaries differs after the following operation, but
- no communications are needed yet, because boundary Ad will
- not be used for the G-S iterations for interior nodes */
-
- for (j=3;j<MAX_EQN;j++) {
- UU = E->temp[C[j]];
- Ad_eqn += B[j]*UU;
- }
-
- /* store to global memory */
- Ad[eqn] = Ad_eqn;
-
- if (!(E->NODE[i] & OFFSIDE)) {
- E->temp[eqn] = (F[eqn] - Ad_eqn)*E->BI[eqn];
- }
-
-}
-
__global__ void gauss_seidel_2a(
struct Some_variables *E,
double *F, double *Ad
@@ -338,7 +302,11 @@
}
__syncthreads();
/* Reduce. */
- for (unsigned int s = (MAX_EQN-3)/2; s > 0; s >>= 1) {
+ if (threadIdx.x >= 32) {
+ sum[threadIdx.x-32][threadIdx.y] += sum[threadIdx.x][threadIdx.y];
+ }
+ __syncthreads();
+ for (unsigned int s = 16; s > 0; s >>= 1) {
if (threadIdx.x < s) {
sum[threadIdx.x][threadIdx.y] += sum[threadIdx.x + s][threadIdx.y];
}
@@ -404,7 +372,13 @@
__syncthreads();
/* Reduce. */
- for (unsigned int s = (MAX_EQN-3)/2; s > 0; s >>= 1) {
+ if (tid >= 32) {
+ sum1[tid-32] += sum1[tid];
+ sum2[tid-32] += sum2[tid];
+ sum3[tid-32] += sum3[tid];
+ }
+ __syncthreads();
+ for (unsigned int s = 16; s > 0; s >>= 1) {
if (tid < s) {
sum1[tid] += sum1[tid + s];
sum2[tid] += sum2[tid + s];
@@ -477,7 +451,11 @@
/* Reduce the partial sums for this block.
Based on reduce2() in the CUDA SDK. */
- for (unsigned int s = MAX_EQN/2; s > 0; s >>= 1) {
+ if (tid >= 32) {
+ sum[tid-32] += sum[tid];
+ }
+ __syncthreads();
+ for (unsigned int s = 16; s > 0; s >>= 1) {
if (tid < s) {
sum[tid] += sum[tid + s];
}
@@ -583,28 +561,23 @@
else host_gauss_seidel_1(E, &s_E, d_F, d_Ad);
}
- if (0) {
+ if (1) {
dim3 block(MAX_EQN - 3, NSD, 1);
dim3 grid(1, 1, 1);
gauss_seidel_2a<<< grid, block >>>(d_E, d_F, d_Ad);
}
- if (1) {
- //dim3 block(MAX_EQN - 3, NSD, 1);
+ if (0) {
dim3 block(MAX_EQN - 3, 1, 1);
dim3 grid(1, 1, 1);
gauss_seidel_2b<<< grid, block >>>(d_E, d_F, d_Ad);
}
if (0) host_gauss_seidel_2(E, &s_E, d_F, d_Ad);
- if (cudaThreadSynchronize() != cudaSuccess) {
- assert(0 && "something went wrong");
- }
-
/* Ad on boundaries differs after the following operation */
{
dim3 block(MAX_EQN, 1, 1);
dim3 grid(E->lmesh.NNO, NSD, 1);
- if (0) gauss_seidel_3<<< grid, block >>>(d_E, d_d0, d_Ad);
+ if (1) gauss_seidel_3<<< grid, block >>>(d_E, d_d0, d_Ad);
else host_gauss_seidel_3(E, &s_E, d_d0, d_Ad);
}
}
@@ -683,559 +656,6 @@
}
-void oy_gauss_seidel_2(
- struct Some_variables *E
- )
-{
- const int neq = E->lmesh.NEQ;
- const int nno = E->lmesh.NNO;
-
- int *ti = (int *)malloc(3* nno * sizeof(int));
- int *ti_tally = (int *)malloc(3 * nno * sizeof(int));
- for (int i = 1; i <= nno; i++) {
- ti[i - 1] = 0;
- ti_tally[i - 1] = 0;
- }
-
- int *back = ti;
-
- int *temp = (int *)malloc((neq+1)*sizeof(int));
- int *temp_pre = (int *)malloc((neq+1)*sizeof(int));
- int *temp_post = (int *)malloc((neq+1)*sizeof(int));
- for (int i = 0; i <= neq; ++i) {
- temp[i] = 0; /* uninit */
- temp_pre[i] = 0;
- temp_post[i] = 0;
- }
-
- FILE *dot = fopen("gs2.dot", "w");
-
- fprintf(dot,
- "digraph G {\n"
- " node [shape=box];\n\n");
-
- // job1[label="Job #1"];
- const int DOT_MIN = 270;
- const int DOT_MAX = 300;
-
- for (int i = 0; i <= neq && DOT_MIN <= i && i < DOT_MAX; ++i) {
- fprintf(dot, " Ad_%d_0[label=\"Ad[%d]\"];\n", i, i);
- }
- int *Ad = (int *)malloc((neq+1)*sizeof(int));
- for (int i = 0; i <= neq; ++i) {
- Ad[i] = 0;
- }
-
- for (int i = 0; i <= neq && DOT_MIN <= i && i < DOT_MAX; ++i) {
- fprintf(dot, " temp_%d_0[label=\"temp[%d]\"];\n", i, i);
- }
- for (int i = 1; i <= nno; i++) {
- int eqn1 = E->ID[i].doff[1];
- int eqn2 = E->ID[i].doff[2];
- int eqn3 = E->ID[i].doff[3];
- if (eqn3 < DOT_MIN || DOT_MAX <= eqn3) {
- continue;
- }
- if (!(E->NODE[i] & OFFSIDE)) {
- fprintf(dot, " temp_%d_offside[label=\"temp[%d] OFFSIDE\"];\n", eqn1, eqn1);
- fprintf(dot, " temp_%d_offside[label=\"temp[%d] OFFSIDE\"];\n", eqn2, eqn2);
- fprintf(dot, " temp_%d_offside[label=\"temp[%d] OFFSIDE\"];\n", eqn3, eqn3);
- }
- }
- for (int j = 3; j < MAX_EQN; j++) {
- fprintf(dot, " B1_%d_UU[label=\"B1[%d]*UU\"];\n", j, j);
- fprintf(dot, " B2_%d_UU[label=\"B2[%d]*UU\"];\n", j, j);
- fprintf(dot, " B3_%d_UU[label=\"B3[%d]*UU\"];\n", j, j);
- }
- fprintf(dot, "\n");
-
- for (int i = 1; i <= nno; i++) {
-
- int eqn1 = E->ID[i].doff[1];
- int eqn2 = E->ID[i].doff[2];
- int eqn3 = E->ID[i].doff[3];
-
- int *C = E->Node_map + (i-1)*MAX_EQN;
-
- //fprintf(stderr, "@@@ read: ");
- for (int j = 3; j < MAX_EQN; j++) {
- //double UU = E->temp[C[j]];
- //fprintf(stderr, "%d ", C[j]);
-
- /***
- * This may read 'temp' elements which are not written by
- * the OFFSIDE code below. BUT, if a temp element IS
- * written by the OFFSIDE code, the read here follows the
- * write.
- */
-
- if (C[j] < DOT_MIN || DOT_MAX <= C[j]) {
- } else if (temp_pre[C[j]] || temp_post[C[j]]) {
- /* edge already written */
- } else if (temp[C[j]]) {
- /* offside value */
- fprintf(dot, " temp_%d_offside->B1_%d_UU;\n", C[j], j);
- } else {
- /* original value */
- fprintf(dot, " temp_%d_0->B1_%d_UU;\n", C[j], j);
- }
- if (temp[C[j]]) {
- /* already written */
- ++temp_post[C[j]];
- } else {
- ++temp_pre[C[j]];
- }
- for (int *x = ti; x < back; ++x) {
- if (C[j] == *x) {
- int tallyho = x - ti;
- ++ti_tally[tallyho];
- }
- }
- /*
- double UU = E->temp[C[j]];
- Ad[eqn1] += B1[j]*UU;
- Ad[eqn2] += B2[j]*UU;
- Ad[eqn3] += B3[j]*UU;
- */
- if (DOT_MIN <= eqn3 && eqn3 < DOT_MAX) {
- fprintf(dot, " Ad_%d_%d[label=\"Ad[%d]\"];\n", eqn1, Ad[eqn1] + 1, eqn1);
- fprintf(dot, " Ad_%d_%d[label=\"Ad[%d]\"];\n", eqn2, Ad[eqn2] + 1, eqn2);
- fprintf(dot, " Ad_%d_%d[label=\"Ad[%d]\"];\n", eqn3, Ad[eqn3] + 1, eqn3);
- fprintf(dot, " Ad_%d_%d->Ad_%d_%d;\n", eqn1, Ad[eqn1], eqn1, Ad[eqn1] + 1);
- fprintf(dot, " Ad_%d_%d->Ad_%d_%d;\n", eqn2, Ad[eqn2], eqn2, Ad[eqn2] + 1);
- fprintf(dot, " Ad_%d_%d->Ad_%d_%d;\n", eqn3, Ad[eqn3], eqn3, Ad[eqn3] + 1);
- fprintf(dot, " B1_%d_UU->Ad_%d_%d;\n", j, eqn1, Ad[eqn1] + 1);
- fprintf(dot, " B2_%d_UU->Ad_%d_%d;\n", j, eqn2, Ad[eqn2] + 1);
- fprintf(dot, " B3_%d_UU->Ad_%d_%d;\n", j, eqn3, Ad[eqn3] + 1);
- }
- ++Ad[eqn1];
- ++Ad[eqn2];
- ++Ad[eqn3];
- }
- //fprintf(stderr, "\n");
-
- if (!(E->NODE[i] & OFFSIDE)) {
- fprintf(stderr, "@@@@@@ %d write: %d %d %d\n", i, eqn1, eqn2, eqn3);
- //E->temp[eqn1] = (F[eqn1] - Ad[eqn1])*E->BI[eqn1];
- //E->temp[eqn2] = (F[eqn2] - Ad[eqn2])*E->BI[eqn2];
- //E->temp[eqn3] = (F[eqn3] - Ad[eqn3])*E->BI[eqn3];
- ++temp[eqn1];
- ++temp[eqn2];
- ++temp[eqn3];
- if (i < nno) {
- int next_i = i + 1;
- int *next_C = E->Node_map + (next_i-1)*MAX_EQN;
- int flag1, flag2, flag3;
- flag1 = flag2 = flag3 = 0;
- for (int j = 3; j < MAX_EQN; j++) {
- if (next_C[j] == eqn1) {
- flag1 = 1;
- } else if (next_C[j] == eqn2) {
- flag2 = 1;
- } else if (next_C[j] == eqn3) {
- flag3 = 1;
- }
- }
- assert(flag1 && flag2 && flag3);
- } else {
- assert(0);
- }
- *back++ = eqn1;
- *back++ = eqn2;
- *back++ = eqn3;
- if (DOT_MIN <= eqn3 && eqn3 < DOT_MAX) {
- fprintf(dot, " Ad_%d_%d->temp_%d_offside;\n", eqn1, Ad[eqn1], eqn1);
- fprintf(dot, " Ad_%d_%d->temp_%d_offside;\n", eqn2, Ad[eqn2], eqn2);
- fprintf(dot, " Ad_%d_%d->temp_%d_offside;\n", eqn3, Ad[eqn3], eqn3);
- }
- }
-
- }
-
- for (int i = 0; i <= neq; ++i) {
- assert(temp[i] <= 1);
- if (temp[i]) {
- fprintf(stderr, "@@@ temp[%d] was written %d times\n", i, temp[i]);
- }
- }
- for (int *x = ti; x < back; ++x) {
- int tallyho = x - ti;
- int eqn = *x;
- assert(temp_pre[eqn] == 0);
- assert(temp_pre[eqn] + temp_post[eqn] == ti_tally[tallyho]);
- fprintf(stderr, "@@@ read tally %d for %d: %d + %d = %d\n", tallyho, eqn,
- temp_pre[eqn], temp_post[eqn], ti_tally[tallyho]);
- }
-
- fprintf(dot, "}\n");
- fclose(dot);
-
-}
-
-
-void piano_roll_gauss_seidel_2(
- struct Some_variables *E
- )
-{
- const int neq = E->lmesh.NEQ;
- const int nno = E->lmesh.NNO;
-
- int *roll = (int *)malloc(nno*MAX_EQN*sizeof(int));
- for (int i = 0; i < nno*MAX_EQN; ++i) {
- roll[i] = 0;
- }
-
- int offsideTally = 0;
-
- int *Ad = (int *)malloc((neq+1)*sizeof(int));
- for (int i = 0; i <= neq; ++i) {
- Ad[i] = 0;
- }
- int *temp = (int *)malloc((neq+1)*sizeof(int));
- for (int i = 0; i <= neq; ++i) {
- temp[i] = 0;
- }
-
- for (int i = 1; i <= nno; i++) {
-
- int eqn1 = E->ID[i].doff[1];
- int eqn2 = E->ID[i].doff[2];
- int eqn3 = E->ID[i].doff[3];
-
- int *C = E->Node_map + (i-1)*MAX_EQN;
-
- higher_precision *B1,*B2,*B3;
- B1 = E->Eqn_k[1] + (i-1)*MAX_EQN;
- B2 = E->Eqn_k[2] + (i-1)*MAX_EQN;
- B3 = E->Eqn_k[3] + (i-1)*MAX_EQN;
-
- for (int j = 3; j < MAX_EQN; j++) {
- /*
- double UU = E->temp[C[j]];
- Ad[eqn1] += B1[j]*UU;
- Ad[eqn2] += B2[j]*UU;
- Ad[eqn3] += B3[j]*UU;
- */
-
- int *row = roll + j*nno;
- row[i-1] = eqn1;
-
- Ad[eqn1] = max(Ad[eqn1], temp[C[j]]) + 1;
- Ad[eqn2] = max(Ad[eqn2], temp[C[j]]) + 1;
- Ad[eqn3] = max(Ad[eqn3], temp[C[j]]) + 1;
- }
-
- if (!(E->NODE[i] & OFFSIDE)) {
- ++offsideTally;
- /*
- E->temp[eqn1] = (F[eqn1] - Ad[eqn1])*E->BI[eqn1];
- E->temp[eqn2] = (F[eqn2] - Ad[eqn2])*E->BI[eqn2];
- E->temp[eqn3] = (F[eqn3] - Ad[eqn3])*E->BI[eqn3];
- */
- temp[eqn1] = Ad[eqn1] + 1;
- temp[eqn2] = Ad[eqn2] + 1;
- temp[eqn3] = Ad[eqn3] + 1;
- }
-
- }
-
- fprintf(stderr, "!offside: %d of %d\n", offsideTally, nno);
- int maxAd = 0, maxTemp = 0;
- for (int i = 0; i <= neq; ++i) {
- maxAd = max(maxAd, Ad[i]);
- maxTemp = max(maxTemp, temp[i]);
- fprintf(stderr, "depth[%d]: Ad %d temp %d\n", i, Ad[i], temp[i]);
- }
- fprintf(stderr, "max Ad depth %d\n", maxAd);
- fprintf(stderr, "max temp depth %d\n", maxTemp);
-
- //------------------------------------------------------------------------
- //------------------------------------------------------------------------
- //------------------------------------------------------------------------
-
-
- int *nodes = (int *)malloc(maxAd*sizeof(int));
- for (int i = 0; i < maxAd; ++i) {
- nodes[i] = 0;
- }
- for (int i = 0; i <= neq; ++i) {
- Ad[i] = 0;
- temp[i] = 0;
- }
-
- for (int i = 1; i <= nno; i++) {
-
- int eqn1 = E->ID[i].doff[1];
- int eqn2 = E->ID[i].doff[2];
- int eqn3 = E->ID[i].doff[3];
-
- int *C = E->Node_map + (i-1)*MAX_EQN;
-
- higher_precision *B1,*B2,*B3;
- B1 = E->Eqn_k[1] + (i-1)*MAX_EQN;
- B2 = E->Eqn_k[2] + (i-1)*MAX_EQN;
- B3 = E->Eqn_k[3] + (i-1)*MAX_EQN;
-
- for (int j = 3; j < MAX_EQN; j++) {
- /*
- double UU = E->temp[C[j]];
- Ad[eqn1] += B1[j]*UU;
- Ad[eqn2] += B2[j]*UU;
- Ad[eqn3] += B3[j]*UU;
- */
-
- int *row = roll + j*nno;
- row[i-1] = eqn1;
-
- Ad[eqn1] = max(Ad[eqn1], temp[C[j]]) + 1;
- Ad[eqn2] = max(Ad[eqn2], temp[C[j]]) + 1;
- Ad[eqn3] = max(Ad[eqn3], temp[C[j]]) + 1;
- ++nodes[Ad[eqn1]];
- ++nodes[Ad[eqn2]];
- ++nodes[Ad[eqn3]];
- }
-
- if (!(E->NODE[i] & OFFSIDE)) {
- ++offsideTally;
- /*
- E->temp[eqn1] = (F[eqn1] - Ad[eqn1])*E->BI[eqn1];
- E->temp[eqn2] = (F[eqn2] - Ad[eqn2])*E->BI[eqn2];
- E->temp[eqn3] = (F[eqn3] - Ad[eqn3])*E->BI[eqn3];
- */
- temp[eqn1] = Ad[eqn1] + 1;
- temp[eqn2] = Ad[eqn2] + 1;
- temp[eqn3] = Ad[eqn3] + 1;
- ++nodes[temp[eqn1]];
- ++nodes[temp[eqn2]];
- ++nodes[temp[eqn3]];
- }
-
- }
- for (int i = 0; i <= neq; ++i) {
- if (!Ad[i]) ++nodes[0];
- if (!temp[i]) ++nodes[0];
- }
-
- int nodeTally = 0;
- for (int i = 0; i < maxAd; ++i) {
- fprintf(stderr, "nodes at depth %03d %d\n", i, nodes[i]);
- nodeTally += nodes[i];
- }
- fprintf(stderr, "total nodes: %d\n", nodeTally);
-
- if (0) for (int j = 3; j < MAX_EQN; j++) {
- int *row = roll + j*nno;
- fprintf(stderr, "row %d: ", j);
- for (int i = 1; i <= nno; i++) {
- fprintf(stderr, "%d ", row[i-1]);
- }
- fprintf(stderr, "\n");
- }
-}
-
-
-typedef struct OpcodeStream {
- int alloc;
- int count;
- int *opcodes;
- int *operands;
-} OS;
-
-
-static void pushBackOpcode(OS *os, int opcode, int operand, OS *sub) {
- int subCount = sub->count ? 2 + sub->count : 1;
- if (os->count + 1 + subCount > os->alloc) {
- do {
- os->alloc *= 2;
- } while (os->count + 1 + subCount > os->alloc);
- os->opcodes = (int *)realloc(os->opcodes, os->alloc*sizeof(int));
- os->operands = (int *)realloc(os->operands, os->alloc*sizeof(int));
- }
- if (sub->count) {
- os->opcodes[os->count] = 10;
- os->operands[os->count] = 0;
- ++os->count;
- for (int op = 0; op < sub->count; ++op) {
- os->opcodes[os->count] = sub->opcodes[op];
- os->operands[os->count] = sub->operands[op];
- ++os->count;
- }
- os->opcodes[os->count] = 11;
- os->operands[os->count] = 0;
- ++os->count;
- } else {
- os->opcodes[os->count] = 0;
- os->operands[os->count] = 0;
- ++os->count;
- }
- os->opcodes[os->count] = opcode;
- os->operands[os->count] = operand;
- ++os->count;
-}
-
-void opcode_gauss_seidel_2(
- struct Some_variables *E
- )
-{
- const int neq = E->lmesh.NEQ;
- const int nno = E->lmesh.NNO;
-
- OS *Ad = (OS *)malloc((neq+1)*sizeof(OS));
- for (int i = 0; i <= neq; ++i) {
- Ad[i].alloc = 2;
- Ad[i].count = 0;
- Ad[i].opcodes = (int *)malloc(2*sizeof(int));
- Ad[i].operands = (int *)malloc(2*sizeof(int));
- }
- OS *temp = (OS *)malloc((neq+1)*sizeof(OS));
- for (int i = 0; i <= neq; ++i) {
- temp[i].alloc = 2;
- temp[i].count = 0;
- temp[i].opcodes = (int *)malloc(2*sizeof(int));
- temp[i].operands = (int *)malloc(2*sizeof(int));
- }
-
- for (int i = 1; i <= nno; i++) {
-
- int eqn1 = E->ID[i].doff[1];
- int eqn2 = E->ID[i].doff[2];
- int eqn3 = E->ID[i].doff[3];
-
- int *C = E->Node_map + (i-1)*MAX_EQN;
-
- higher_precision *B1,*B2,*B3;
- B1 = E->Eqn_k[1] + (i-1)*MAX_EQN;
- B2 = E->Eqn_k[2] + (i-1)*MAX_EQN;
- B3 = E->Eqn_k[3] + (i-1)*MAX_EQN;
-
- /* Ad on boundaries differs after the following operation, but
- no communications are needed yet, because boundary Ad will
- not be used for the G-S iterations for interior nodes */
-
- for (int j = 3; j < MAX_EQN; j++) {
- /*
- double UU = E->temp[C[j]];
- Ad[eqn1] += B1[j]*UU;
- Ad[eqn2] += B2[j]*UU;
- Ad[eqn3] += B3[j]*UU;
- */
- pushBackOpcode(Ad + eqn1, 1, j, temp + C[j]);
- pushBackOpcode(Ad + eqn2, 1, j, temp + C[j]);
- pushBackOpcode(Ad + eqn3, 1, j, temp + C[j]);
- }
-
- if (!(E->NODE[i] & OFFSIDE)) {
- /*
- E->temp[eqn1] = (F[eqn1] - Ad[eqn1])*E->BI[eqn1];
- E->temp[eqn2] = (F[eqn2] - Ad[eqn2])*E->BI[eqn2];
- E->temp[eqn3] = (F[eqn3] - Ad[eqn3])*E->BI[eqn3];
- */
- pushBackOpcode(temp + eqn1, 2, eqn1, Ad + eqn1);
- pushBackOpcode(temp + eqn2, 2, eqn2, Ad + eqn2);
- pushBackOpcode(temp + eqn3, 2, eqn3, Ad + eqn3);
- }
-
- }
-
- for (int i = 0; i <= neq; ++i) {
- fprintf(stderr, "Ad[%d]: ", i);
- OS *os = Ad + i;
- for (int op = 0; op < os->count; ++op) {
- if (os->opcodes[op] == 10) {
- fprintf(stderr, "(");
- } else if (os->opcodes[op] == 11) {
- fprintf(stderr, ")");
- } else if (os->opcodes[op] == 0) {
- fprintf(stderr, "@ ");
- } else {
- fprintf(stderr, "%d,%d ", os->opcodes[op], os->operands[op]);
- }
- }
- fprintf(stderr, "\n");
- }
-
-}
-
-
-void launch_tally_gauss_seidel_2(
- struct Some_variables *E
- )
-{
- const int neq = E->lmesh.NEQ;
- const int nno = E->lmesh.NNO;
-
- int *Ad = (int *)malloc((neq+1)*sizeof(int));
- int *temp = (int *)malloc((neq+1)*sizeof(int));
- for (int i = 0; i <= neq; ++i) {
- Ad[i] = 0;
- temp[i] = 0;
- }
- int launchTally = 0;
- int maxSpread = 0, minSpread = 0;
-
- for (int i = 1; i <= nno; i++) {
-
- int eqn1 = E->ID[i].doff[1];
- int eqn2 = E->ID[i].doff[2];
- int eqn3 = E->ID[i].doff[3];
-
- int *C = E->Node_map + (i-1)*MAX_EQN;
-
- higher_precision *B1,*B2,*B3;
- B1 = E->Eqn_k[1] + (i-1)*MAX_EQN;
- B2 = E->Eqn_k[2] + (i-1)*MAX_EQN;
- B3 = E->Eqn_k[3] + (i-1)*MAX_EQN;
-
- for (int j = 3; j < MAX_EQN; j++) {
- /*
- double UU = E->temp[C[j]];
- Ad[eqn1] += B1[j]*UU;
- Ad[eqn2] += B2[j]*UU;
- Ad[eqn3] += B3[j]*UU;
- */
-
- maxSpread = max(maxSpread, eqn1 - C[j]);
- minSpread = min(minSpread, eqn1 - C[j]);
- if (temp[C[j]]) {
- ++launchTally;
- for (int i = 0; i <= neq; ++i) {
- Ad[i] = 0;
- temp[i] = 0;
- }
- }
- Ad[eqn1] = 1;
- Ad[eqn2] = 1;
- Ad[eqn3] = 1;
- }
-
- if (!(E->NODE[i] & OFFSIDE)) {
- /*
- E->temp[eqn1] = (F[eqn1] - Ad[eqn1])*E->BI[eqn1];
- E->temp[eqn2] = (F[eqn2] - Ad[eqn2])*E->BI[eqn2];
- E->temp[eqn3] = (F[eqn3] - Ad[eqn3])*E->BI[eqn3];
- */
- if (Ad[eqn1] || Ad[eqn2] || Ad[eqn3]) {
- ++launchTally;
- for (int i = 0; i <= neq; ++i) {
- Ad[i] = 0;
- temp[i] = 0;
- }
- }
- temp[eqn1] = 1;
- temp[eqn2] = 1;
- temp[eqn3] = 1;
- }
-
- }
-
- fprintf(stderr, "launch tally: %d\n", launchTally);
- fprintf(stderr, "maxSpread: %d\n", maxSpread);
- fprintf(stderr, "minSpread: %d\n", minSpread);
-
-
-}
-
-
extern "C" void gauss_seidel(
struct All_variables *E,
double **d0,
@@ -1273,11 +693,6 @@
kE.NODE = E->NODE[level][M];
collect_terms(&kE);
- if (0) oy_gauss_seidel_2(&kE);
- if (0) piano_roll_gauss_seidel_2(&kE);
- if (0) opcode_gauss_seidel_2(&kE);
- if (0) launch_tally_gauss_seidel_2(&kE);
- if (0) assert(0);
do_gauss_seidel(
&kE,
More information about the CIG-COMMITS
mailing list