[cig-commits] [commit] rajesh-petsc-schur: Removed caps_per_proc for loops from General_matrix_functions.c (1bfd847)
cig_noreply at geodynamics.org
cig_noreply at geodynamics.org
Wed Nov 5 19:07:05 PST 2014
Repository : https://github.com/geodynamics/citcoms
On branch : rajesh-petsc-schur
Link : https://github.com/geodynamics/citcoms/compare/464e1b32299b15819f93efd98d969cddb84dfe51...f97ae655a50bdbd6dac1923a3471ee4dae178fbd
>---------------------------------------------------------------
commit 1bfd8478d42e61b89bc8cfc1679ae9dcc94936f5
Author: Rajesh Kommu <rajesh.kommu at gmail.com>
Date: Tue Sep 16 16:13:45 2014 -0700
Removed caps_per_proc for loops from General_matrix_functions.c
>---------------------------------------------------------------
1bfd8478d42e61b89bc8cfc1679ae9dcc94936f5
lib/General_matrix_functions.c | 121 ++++++++++++++---------------------------
1 file changed, 41 insertions(+), 80 deletions(-)
diff --git a/lib/General_matrix_functions.c b/lib/General_matrix_functions.c
index 983c70a..ddf261c 100644
--- a/lib/General_matrix_functions.c
+++ b/lib/General_matrix_functions.c
@@ -75,10 +75,9 @@ int solve_del2_u(E,d0,F,acc,high_lev)
neq = E->lmesh.NEQ[high_lev];
- for (m=1;m<=E->sphere.caps_per_proc;m++)
- for(i=0;i<neq;i++) {
- d0[CPPR][i] = 0.0;
- }
+ for(i=0;i<neq;i++) {
+ d0[CPPR][i] = 0.0;
+ }
r0=residual=sqrt(global_vdot(E,F,F,high_lev));
@@ -182,8 +181,7 @@ double multi_grid(E,d1,F,acc,hl)
/* because it's recursive, need a copy at
each level */
- for(i=E->mesh.levmin;i<=E->mesh.levmax;i++)
- for(m=1;m<=E->sphere.caps_per_proc;m++) {
+ for(i=E->mesh.levmin;i<=E->mesh.levmax;i++) {
del_vel[i][CPPR]=(double *)malloc((E->lmesh.NEQ[i]+1)*sizeof(double));
AU[i][CPPR] = (double *)malloc((E->lmesh.NEQ[i]+1)*sizeof(double));
vel[i][CPPR]=(double *)malloc((E->lmesh.NEQ[i]+1)*sizeof(double));
@@ -218,11 +216,9 @@ double multi_grid(E,d1,F,acc,hl)
strip_bcs_from_residual(E,vel[lev],lev);
if (lev==levmax)
- for(m=1;m<=E->sphere.caps_per_proc;m++)
for(j=0;j<E->lmesh.NEQ[lev];j++)
res[lev][CPPR][j]=F[CPPR][j];
else
- for(m=1;m<=E->sphere.caps_per_proc;m++)
for(j=0;j<E->lmesh.NEQ[lev];j++)
res[lev][CPPR][j]=fl[lev][CPPR][j];
@@ -231,18 +227,17 @@ double multi_grid(E,d1,F,acc,hl)
for (dlev=lev;dlev>=levmin+1;dlev--) {
/* Pre-smoothing */
- cycles=((dlev==levmax)?E->control.v_steps_high:E->control.down_heavy);
- ic = ((dlev==lev)?1:0);
- gauss_seidel(E,vel[dlev],res[dlev],AU[dlev],0.01,&cycles,dlev,ic);
+ cycles=((dlev==levmax)?E->control.v_steps_high:E->control.down_heavy);
+ ic = ((dlev==lev)?1:0);
+ gauss_seidel(E,vel[dlev],res[dlev],AU[dlev],0.01,&cycles,dlev,ic);
- for(m=1;m<=E->sphere.caps_per_proc;m++)
- for(i=0;i<E->lmesh.NEQ[dlev];i++) {
- res[dlev][CPPR][i] = res[dlev][CPPR][i] - AU[dlev][CPPR][i];
+ for(i=0;i<E->lmesh.NEQ[dlev];i++) {
+ res[dlev][CPPR][i] = res[dlev][CPPR][i] - AU[dlev][CPPR][i];
}
project_vector(E,dlev,res[dlev],res[dlev-1],1);
strip_bcs_from_residual(E,res[dlev-1],dlev-1);
- }
+ }
/* Bottom of the V */
cycles = E->control.v_steps_low;
@@ -258,13 +253,11 @@ double multi_grid(E,d1,F,acc,hl)
AudotAu = global_vdot(E,AU[ulev],AU[ulev],ulev);
alpha = global_vdot(E,AU[ulev],res[ulev],ulev)/AudotAu;
- for(m=1;m<=E->sphere.caps_per_proc;m++)
for(i=0;i<E->lmesh.NEQ[ulev];i++) {
vel[ulev][CPPR][i] += alpha*del_vel[ulev][CPPR][i];
}
if (ulev ==levmax)
- for(m=1;m<=E->sphere.caps_per_proc;m++)
for(i=0;i<E->lmesh.NEQ[ulev];i++) {
res[ulev][CPPR][i] -= alpha*AU[ulev][CPPR][i];
}
@@ -273,7 +266,6 @@ double multi_grid(E,d1,F,acc,hl)
}
}
- for(m=1;m<=E->sphere.caps_per_proc;m++)
for(j=0;j<E->lmesh.NEQ[levmax];j++) {
F[CPPR][j]=res[levmax][CPPR][j];
d1[CPPR][j]+=vel[levmax][CPPR][j];
@@ -281,8 +273,7 @@ double multi_grid(E,d1,F,acc,hl)
residual = sqrt(global_vdot(E,F,F,hl));
- for(i=E->mesh.levmin;i<=E->mesh.levmax;i++)
- for(m=1;m<=E->sphere.caps_per_proc;m++) {
+ for(i=E->mesh.levmin;i<=E->mesh.levmax;i++){
free((double*) del_vel[i][CPPR]);
free((double*) AU[i][CPPR]);
free((double*) vel[i][CPPR]);
@@ -291,7 +282,6 @@ double multi_grid(E,d1,F,acc,hl)
free((double*) fl[i][CPPR]);
}
-
return(residual);
}
@@ -333,7 +323,6 @@ double conj_grad(E,d0,F,acc,cycles,level)
steps = *cycles;
- for(m=1;m<=E->sphere.caps_per_proc;m++) {
r0[CPPR] = (double *)malloc(E->lmesh.NEQ[mem_lev]*sizeof(double));
r1[CPPR] = (double *)malloc(E->lmesh.NEQ[mem_lev]*sizeof(double));
r2[CPPR] = (double *)malloc(E->lmesh.NEQ[mem_lev]*sizeof(double));
@@ -342,13 +331,11 @@ double conj_grad(E,d0,F,acc,cycles,level)
p1[CPPR] = (double *)malloc((1+E->lmesh.NEQ[mem_lev])*sizeof(double));
p2[CPPR] = (double *)malloc((1+E->lmesh.NEQ[mem_lev])*sizeof(double));
Ap[CPPR] = (double *)malloc((1+E->lmesh.NEQ[mem_lev])*sizeof(double));
- }
- for(m=1;m<=E->sphere.caps_per_proc;m++)
for(i=0;i<high_neq;i++) {
- r1[CPPR][i] = F[CPPR][i];
+ r1[CPPR][i] = F[CPPR][i];
d0[CPPR][i] = 0.0;
- }
+ }
residual = sqrt(global_vdot(E,r1,r1,level));
@@ -357,20 +344,17 @@ double conj_grad(E,d0,F,acc,cycles,level)
while (((residual > acc) && (count < steps)) || count == 0) {
- for(m=1;m<=E->sphere.caps_per_proc;m++)
- for(i=0;i<high_neq;i++)
- z1[CPPR][i] = E->BI[level][CPPR][i] * r1[CPPR][i];
+ for(i=0;i<high_neq;i++)
+ z1[CPPR][i] = E->BI[level][CPPR][i] * r1[CPPR][i];
- dotr1z1 = global_vdot(E,r1,z1,level);
+ dotr1z1 = global_vdot(E,r1,z1,level);
if (count == 0 )
- for(m=1;m<=E->sphere.caps_per_proc;m++)
for(i=0;i<high_neq;i++)
p2[CPPR][i] = z1[CPPR][i];
else {
assert(dotr0z0 != 0.0 /* in head of conj_grad */);
beta = dotr1z1/dotr0z0;
- for(m=1;m<=E->sphere.caps_per_proc;m++)
for(i=0;i<high_neq;i++)
p2[CPPR][i] = z1[CPPR][i] + beta * p1[CPPR][i];
}
@@ -386,41 +370,35 @@ double conj_grad(E,d0,F,acc,cycles,level)
else
alpha = dotr1z1/dotprod;
- for(m=1;m<=E->sphere.caps_per_proc;m++)
- for(i=0;i<high_neq;i++) {
- d0[CPPR][i] += alpha * p2[CPPR][i];
- r2[CPPR][i] = r1[CPPR][i] - alpha * Ap[CPPR][i];
+ for(i=0;i<high_neq;i++) {
+ d0[CPPR][i] += alpha * p2[CPPR][i];
+ r2[CPPR][i] = r1[CPPR][i] - alpha * Ap[CPPR][i];
}
residual = sqrt(global_vdot(E,r2,r2,level));
- for(m=1;m<=E->sphere.caps_per_proc;m++) {
shuffle[CPPR] = r0[CPPR]; r0[CPPR] = r1[CPPR]; r1[CPPR] = r2[CPPR]; r2[CPPR] = shuffle[CPPR];
shuffle[CPPR] = z0[CPPR]; z0[CPPR] = z1[CPPR]; z1[CPPR] = shuffle[CPPR];
shuffle[CPPR] = p1[CPPR]; p1[CPPR] = p2[CPPR]; p2[CPPR] = shuffle[CPPR];
- }
count++;
- /* end of while-loop */
-
- }
+ } /* end of while-loop */
*cycles=count;
strip_bcs_from_residual(E,d0,level);
- for(m=1;m<=E->sphere.caps_per_proc;m++) {
- free((double*) r0[CPPR]);
- free((double*) r1[CPPR]);
- free((double*) r2[CPPR]);
- free((double*) z0[CPPR]);
- free((double*) z1[CPPR]);
- free((double*) p1[CPPR]);
- free((double*) p2[CPPR]);
- free((double*) Ap[CPPR]);
- }
+ free((double*) r0[CPPR]);
+ free((double*) r1[CPPR]);
+ free((double*) r2[CPPR]);
+ free((double*) z0[CPPR]);
+ free((double*) z1[CPPR]);
+ free((double*) p1[CPPR]);
+ free((double*) p2[CPPR]);
+ free((double*) Ap[CPPR]);
- return(residual); }
+ return(residual);
+}
#endif /* !USE_CUDA */
#ifndef USE_CUDA
@@ -472,24 +450,20 @@ void gauss_seidel(E,d0,F,Ad,acc,cycles,level,guess)
n_assemble_del2_u(E,d0,Ad,level,1);
}
else
- for (m=1;m<=E->sphere.caps_per_proc;m++)
- for(i=0;i<neq;i++) {
- d0[CPPR][i]=Ad[CPPR][i]=0.0;
- }
+ for(i=0;i<neq;i++) {
+ d0[CPPR][i]=Ad[CPPR][i]=0.0;
+ }
count = 0;
while (count < steps) {
- for (m=1;m<=E->sphere.caps_per_proc;m++)
- for(j=0;j<=E->lmesh.NEQ[level];j++)
+ for(j=0;j<=E->lmesh.NEQ[level];j++)
E->temp[CPPR][j] = 0.0;
- for (m=1;m<=E->sphere.caps_per_proc;m++)
- Ad[CPPR][neq] = 0.0;
+ Ad[CPPR][neq] = 0.0;
- for (m=1;m<=E->sphere.caps_per_proc;m++)
- for(i=1;i<=E->lmesh.NNO[level];i++)
+ for(i=1;i<=E->lmesh.NNO[level];i++)
if(E->NODE[level][CPPR][i] & OFFSIDE) {
eqn1=E->ID[level][CPPR][i].doff[1];
@@ -504,8 +478,7 @@ void gauss_seidel(E,d0,F,Ad,acc,cycles,level,guess)
E->temp1[CPPR][eqn3] = Ad[CPPR][eqn3];
}
- for (m=1;m<=E->sphere.caps_per_proc;m++)
- for(i=1;i<=E->lmesh.NNO[level];i++) {
+ for(i=1;i<=E->lmesh.NNO[level];i++) {
eqn1=E->ID[level][CPPR][i].doff[1];
eqn2=E->ID[level][CPPR][i].doff[2];
@@ -543,8 +516,7 @@ void gauss_seidel(E,d0,F,Ad,acc,cycles,level,guess)
d0[CPPR][eqn3] += E->temp[CPPR][eqn3];
}
- for (m=1;m<=E->sphere.caps_per_proc;m++)
- for(i=1;i<=E->lmesh.NNO[level];i++)
+ for(i=1;i<=E->lmesh.NNO[level];i++)
if(E->NODE[level][CPPR][i] & OFFSIDE) {
eqn1=E->ID[level][CPPR][i].doff[1];
eqn2=E->ID[level][CPPR][i].doff[2];
@@ -556,8 +528,7 @@ void gauss_seidel(E,d0,F,Ad,acc,cycles,level,guess)
(E->solver.exchange_id_d)(E, Ad, level);
- for (m=1;m<=E->sphere.caps_per_proc;m++)
- for(i=1;i<=E->lmesh.NNO[level];i++)
+ for(i=1;i<=E->lmesh.NNO[level];i++)
if(E->NODE[level][CPPR][i] & OFFSIDE) {
eqn1=E->ID[level][CPPR][i].doff[1];
eqn2=E->ID[level][CPPR][i].doff[2];
@@ -568,19 +539,11 @@ void gauss_seidel(E,d0,F,Ad,acc,cycles,level,guess)
}
- count++;
+ count++;
-/* for (m=1;m<=E->sphere.caps_per_proc;m++)
- for(i=0;i<neq;i++) {
- F[m][i] -= Ad[m][i];
- Ad[m][i] = 0.0;
- }
-*/
- }
+ }
*cycles=count;
- return;
-
}
#endif /* !USE_CUDA */
@@ -651,5 +614,3 @@ long double lg_pow(long double a, int n)
return(b);
}
-
-
More information about the CIG-COMMITS
mailing list