[cig-commits] r5832 - mc/3D/CitcomS/branches/compressible/lib

Thu Jan 18 16:10:53 PST 2007

Author: tan2
Date: 2007-01-18 16:10:53 -0800 (Thu, 18 Jan 2007)
New Revision: 5832

Modified:
   mc/3D/CitcomS/branches/compressible/lib/Nodal_mesh.c
   mc/3D/CitcomS/branches/compressible/lib/Obsolete.c
   mc/3D/CitcomS/branches/compressible/lib/Stokes_flow_Incomp.c
Log:
Refactoring Stokes solver

Modified: mc/3D/CitcomS/branches/compressible/lib/Nodal_mesh.c
===================================================================

--- mc/3D/CitcomS/branches/compressible/lib/Nodal_mesh.c	2007-01-18 22:39:21 UTC (rev 5831)
+++ mc/3D/CitcomS/branches/compressible/lib/Nodal_mesh.c	2007-01-19 00:10:53 UTC (rev 5832)
@@ -1,6 +1,6 @@
 /*
- *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * 
+ *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
  *<LicenseText>
  *
  * CitcomS by Louis Moresi, Shijie Zhong, Lijie Han, Eh Tan,
@@ -22,8 +22,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  *
  *</LicenseText>
- * 
- *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 /* Functions relating to the building and use of mesh locations ... */
 
@@ -33,20 +33,143 @@
 #include "element_definitions.h"
 #include "global_defs.h"
 
-extern int Emergency_stop;
 
-/*
-void flogical_mesh_to_real(E,data,level)
+void v_from_vector(E)
      struct All_variables *E;
-     float *data;
-     int level;
+{
+    int i,eqn1,eqn2,eqn3,m,node;
+    float sint,cost,sinf,cosf;
 
-{ int i,j,n1,n2;
+    const int nno = E->lmesh.nno;
+    const int level=E->mesh.levmax;
 
-  return;
+    for (m=1;m<=E->sphere.caps_per_proc;m++)   {
+        for(node=1;node<=nno;node++)     {
+            E->sphere.cap[m].V[1][node] = E->U[m][E->id[m][node].doff[1]];
+            E->sphere.cap[m].V[2][node] = E->U[m][E->id[m][node].doff[2]];
+            E->sphere.cap[m].V[3][node] = E->U[m][E->id[m][node].doff[3]];
+            if (E->node[m][node] & VBX)
+                E->sphere.cap[m].V[1][node] = E->sphere.cap[m].VB[1][node];
+            if (E->node[m][node] & VBY)
+                E->sphere.cap[m].V[2][node] = E->sphere.cap[m].VB[2][node];
+            if (E->node[m][node] & VBZ)
+                E->sphere.cap[m].V[3][node] = E->sphere.cap[m].VB[3][node];
+        }
+        for (i=1;i<=E->lmesh.nno;i++)  {
+            eqn1 = E->id[m][i].doff[1];
+            eqn2 = E->id[m][i].doff[2];
+            eqn3 = E->id[m][i].doff[3];
+            sint = E->SinCos[level][m][0][i];
+            sinf = E->SinCos[level][m][1][i];
+            cost = E->SinCos[level][m][2][i];
+            cosf = E->SinCos[level][m][3][i];
+            E->temp[m][eqn1] = E->sphere.cap[m].V[1][i]*cost*cosf
+                - E->sphere.cap[m].V[2][i]*sinf
+                + E->sphere.cap[m].V[3][i]*sint*cosf;
+            E->temp[m][eqn2] = E->sphere.cap[m].V[1][i]*cost*sinf
+                + E->sphere.cap[m].V[2][i]*cosf
+                + E->sphere.cap[m].V[3][i]*sint*sinf;
+            E->temp[m][eqn3] = -E->sphere.cap[m].V[1][i]*sint
+                + E->sphere.cap[m].V[3][i]*cost;
+
+        }
+    }
+
+    return;
 }
-*/
 
+void v_from_vector_pseudo_surf(E)
+     struct All_variables *E;
+{
+    int i,eqn1,eqn2,eqn3,m,node;
+    float sint,cost,sinf,cosf;
+
+    const int nno = E->lmesh.nno;
+    const int level=E->mesh.levmax;
+    double sum_V = 0.0, sum_dV = 0.0, rel_error = 0.0, global_max_error = 0.0;
+    double tol_error = 1.0e-03;
+
+    for (m=1;m<=E->sphere.caps_per_proc;m++)   {
+        for(node=1;node<=nno;node++)     {
+            E->sphere.cap[m].Vprev[1][node] = E->sphere.cap[m].V[1][node];
+            E->sphere.cap[m].Vprev[2][node] = E->sphere.cap[m].V[2][node];
+            E->sphere.cap[m].Vprev[3][node] = E->sphere.cap[m].V[3][node];
+
+            E->sphere.cap[m].V[1][node] = E->U[m][E->id[m][node].doff[1]];
+            E->sphere.cap[m].V[2][node] = E->U[m][E->id[m][node].doff[2]];
+            E->sphere.cap[m].V[3][node] = E->U[m][E->id[m][node].doff[3]];
+            if (E->node[m][node] & VBX)
+                E->sphere.cap[m].V[1][node] = E->sphere.cap[m].VB[1][node];
+            if (E->node[m][node] & VBY)
+                E->sphere.cap[m].V[2][node] = E->sphere.cap[m].VB[2][node];
+            if (E->node[m][node] & VBZ)
+                E->sphere.cap[m].V[3][node] = E->sphere.cap[m].VB[3][node];
+
+            sum_dV += (E->sphere.cap[m].V[1][node] - E->sphere.cap[m].Vprev[1][node])*(E->sphere.cap[m].V[1][node] - E->sphere.cap[m].Vprev[1][node])
+                + (E->sphere.cap[m].V[2][node] - E->sphere.cap[m].Vprev[2][node])*(E->sphere.cap[m].V[2][node] - E->sphere.cap[m].Vprev[2][node])
+                + (E->sphere.cap[m].V[3][node] - E->sphere.cap[m].Vprev[3][node])*(E->sphere.cap[m].V[3][node] - E->sphere.cap[m].Vprev[3][node]);
+            sum_V += E->sphere.cap[m].V[1][node]*E->sphere.cap[m].V[1][node]
+                + E->sphere.cap[m].V[2][node]*E->sphere.cap[m].V[2][node]
+                + E->sphere.cap[m].V[3][node]*E->sphere.cap[m].V[3][node];
+        }
+        rel_error = sqrt(sum_dV)/sqrt(sum_V);
+        MPI_Allreduce(&rel_error,&global_max_error,1,MPI_DOUBLE,MPI_MAX,E->parallel.world);
+        if(global_max_error <= tol_error) E->monitor.stop_topo_loop = 1;
+        if(E->parallel.me==0)
+            fprintf(stderr,"global_max_error=%e stop_topo_loop=%d\n",global_max_error,E->monitor.stop_topo_loop);
+
+        for (i=1;i<=E->lmesh.nno;i++)  {
+            eqn1 = E->id[m][i].doff[1];
+            eqn2 = E->id[m][i].doff[2];
+            eqn3 = E->id[m][i].doff[3];
+            sint = E->SinCos[level][m][0][i];
+            sinf = E->SinCos[level][m][1][i];
+            cost = E->SinCos[level][m][2][i];
+            cosf = E->SinCos[level][m][3][i];
+            E->temp[m][eqn1] = E->sphere.cap[m].V[1][i]*cost*cosf
+                - E->sphere.cap[m].V[2][i]*sinf
+                + E->sphere.cap[m].V[3][i]*sint*cosf;
+            E->temp[m][eqn2] = E->sphere.cap[m].V[1][i]*cost*sinf
+                + E->sphere.cap[m].V[2][i]*cosf
+                + E->sphere.cap[m].V[3][i]*sint*sinf;
+            E->temp[m][eqn3] = -E->sphere.cap[m].V[1][i]*sint
+                + E->sphere.cap[m].V[3][i]*cost;
+
+        }
+    }
+
+    return;
+}
+
+
+void velo_from_element(E,VV,m,el,sphere_key)
+     struct All_variables *E;
+     float VV[4][9];
+     int m,el,sphere_key;
+{
+
+    int a, node;
+    const int ends=enodes[E->mesh.nsd];
+
+    if (sphere_key)
+        for(a=1;a<=ends;a++)   {
+            node = E->ien[m][el].node[a];
+            VV[1][a] = E->sphere.cap[m].V[1][node];
+            VV[2][a] = E->sphere.cap[m].V[2][node];
+            VV[3][a] = E->sphere.cap[m].V[3][node];
+        }
+    else
+        for(a=1;a<=ends;a++)   {
+            node = E->ien[m][el].node[a];
+            VV[1][a] = E->temp[m][E->id[m][node].doff[1]];
+            VV[2][a] = E->temp[m][E->id[m][node].doff[2]];
+            VV[3][a] = E->temp[m][E->id[m][node].doff[3]];
+        }
+
+    return;
+}
+
+
 void p_to_nodes(E,P,PN,lev)
      struct All_variables *E;
      double **P;
@@ -58,97 +181,25 @@
   for (m=1;m<=E->sphere.caps_per_proc;m++)
     for(node=1;node<=E->lmesh.NNO[lev];node++)
       PN[m][node] =  0.0;
-	  
+
   for (m=1;m<=E->sphere.caps_per_proc;m++)
     for(element=1;element<=E->lmesh.NEL[lev];element++)
        for(j=1;j<=enodes[E->mesh.nsd];j++)  {
      	  node = E->IEN[lev][m][element].node[j];
-    	  PN[m][node] += P[m][element] * E->TWW[lev][m][element].node[j] ; 
+    	  PN[m][node] += P[m][element] * E->TWW[lev][m][element].node[j] ;
     	  }
- 
+
    (E->exchange_node_f)(E,PN,lev);
 
    for(m=1;m<=E->sphere.caps_per_proc;m++)
      for(node=1;node<=E->lmesh.NNO[lev];node++)
         PN[m][node] *= E->MASS[lev][m][node];
 
-     return; 
+     return;
 }
 
 
-/*
-void p_to_centres(E,PN,P,lev)
-     struct All_variables *E;
-     float **PN;
-     double **P;
-     int lev;
 
-{  int p,element,node,j,m;
-   double weight;
-
-  for (m=1;m<=E->sphere.caps_per_proc;m++)
-    for(p=1;p<=E->lmesh.NEL[lev];p++)
-      P[m][p] = 0.0;
-
-   weight=1.0/((double)enodes[E->mesh.nsd]) ;
-   
-  for (m=1;m<=E->sphere.caps_per_proc;m++)
-    for(p=1;p<=E->lmesh.NEL[lev];p++)
-      for(j=1;j<=enodes[E->mesh.nsd];j++)
-        P[m][p] += PN[m][E->IEN[lev][m][p].node[j]] * weight;
-
-   return;  
-   }
-*/
-
-/*
-void v_to_intpts(E,VN,VE,lev)
-  struct All_variables *E;
-  float **VN,**VE;
-  int lev;
-  {
-
-   int m,e,i,j,k;
-   const int nsd=E->mesh.nsd;
-   const int vpts=vpoints[nsd];
-   const int ends=enodes[nsd];
-
- for (m=1;m<=E->sphere.caps_per_proc;m++)
-   for(e=1;e<=E->lmesh.NEL[lev];e++)
-     for(i=1;i<=vpts;i++)                 {
-        VE[m][(e-1)*vpts + i] = 0.0;
-        for(j=1;j<=ends;j++)
-          VE[m][(e-1)*vpts + i] += VN[m][E->IEN[lev][m][e].node[j]]*E->N.vpt[GNVINDEX(j,i)];
-        }
-
-   return;
-  }
-*/
-
-/*
-void visc_to_intpts(E,VN,VE,lev)
-   struct All_variables *E;
-   float **VN,**VE;
-   int lev;
-   {
-
-   int m,e,i,j,k;
-   const int nsd=E->mesh.nsd;
-   const int vpts=vpoints[nsd];
-   const int ends=enodes[nsd];
-
- for (m=1;m<=E->sphere.caps_per_proc;m++)
-   for(e=1;e<=E->lmesh.NEL[lev];e++)
-     for(i=1;i<=vpts;i++) {
-        VE[m][(e-1)*vpts + i] = 0.0;
-	for(j=1;j<=ends;j++)
-          VE[m][(e-1)*vpts + i] += log(VN[m][E->IEN[lev][m][e].node[j]]) *  E->N.vpt[GNVINDEX(j,i)];
-        VE[m][(e-1)*vpts + i] = exp(VE[m][(e-1)*vpts + i]);
-        }
-
-  }
-*/
-
 void visc_from_gint_to_nodes(E,VE,VN,lev)
   struct All_variables *E;
   float **VE,**VN;
@@ -176,11 +227,11 @@
        VN[m][n] += E->TWW[lev][m][e].node[j] * temp_visc;
        }
     }
- 
+
    (E->exchange_node_f)(E,VN,lev);
 
    for(m=1;m<=E->sphere.caps_per_proc;m++)
-     for(n=1;n<=E->lmesh.NNO[lev];n++) 
+     for(n=1;n<=E->lmesh.NNO[lev];n++)
         VN[m][n] *= E->MASS[lev][m][n];
 
    return;
@@ -208,7 +259,7 @@
    for(e=1;e<=E->lmesh.NEL[lev];e++)
      for(i=1;i<=vpts;i++)      {
        temp_visc=0.0;
-       for(j=1;j<=ends;j++)             
+       for(j=1;j<=ends;j++)
 	 temp_visc += E->N.vpt[GNVINDEX(j,i)]*VN[m][E->IEN[lev][m][e].node[j]];
 
        VE[m][(e-1)*vpts+i] = temp_visc;
@@ -241,7 +292,7 @@
 
      VN[m][e] = temp_visc;
     }
- 
+
    return;
 }
 

Modified: mc/3D/CitcomS/branches/compressible/lib/Obsolete.c
===================================================================
--- mc/3D/CitcomS/branches/compressible/lib/Obsolete.c	2007-01-18 22:39:21 UTC (rev 5831)
+++ mc/3D/CitcomS/branches/compressible/lib/Obsolete.c	2007-01-19 00:10:53 UTC (rev 5832)
@@ -811,6 +811,91 @@
   return;
 }
 
+
+/* ==========================================================  */
+/*  From Nodal_mesh.c                                          */
+/* =========================================================== */
+
+void flogical_mesh_to_real(E,data,level)
+     struct All_variables *E;
+     float *data;
+     int level;
+
+{ int i,j,n1,n2;
+
+  return;
+}
+
+
+void p_to_centres(E,PN,P,lev)
+     struct All_variables *E;
+     float **PN;
+     double **P;
+     int lev;
+
+{  int p,element,node,j,m;
+   double weight;
+
+  for (m=1;m<=E->sphere.caps_per_proc;m++)
+    for(p=1;p<=E->lmesh.NEL[lev];p++)
+      P[m][p] = 0.0;
+
+   weight=1.0/((double)enodes[E->mesh.nsd]) ;
+
+  for (m=1;m<=E->sphere.caps_per_proc;m++)
+    for(p=1;p<=E->lmesh.NEL[lev];p++)
+      for(j=1;j<=enodes[E->mesh.nsd];j++)
+        P[m][p] += PN[m][E->IEN[lev][m][p].node[j]] * weight;
+
+   return;
+   }
+
+
+void v_to_intpts(E,VN,VE,lev)
+  struct All_variables *E;
+  float **VN,**VE;
+  int lev;
+  {
+
+   int m,e,i,j,k;
+   const int nsd=E->mesh.nsd;
+   const int vpts=vpoints[nsd];
+   const int ends=enodes[nsd];
+
+ for (m=1;m<=E->sphere.caps_per_proc;m++)
+   for(e=1;e<=E->lmesh.NEL[lev];e++)
+     for(i=1;i<=vpts;i++)                 {
+        VE[m][(e-1)*vpts + i] = 0.0;
+        for(j=1;j<=ends;j++)
+          VE[m][(e-1)*vpts + i] += VN[m][E->IEN[lev][m][e].node[j]]*E->N.vpt[GNVINDEX(j,i)];
+        }
+
+   return;
+  }
+
+
+void visc_to_intpts(E,VN,VE,lev)
+   struct All_variables *E;
+   float **VN,**VE;
+   int lev;
+   {
+
+   int m,e,i,j,k;
+   const int nsd=E->mesh.nsd;
+   const int vpts=vpoints[nsd];
+   const int ends=enodes[nsd];
+
+ for (m=1;m<=E->sphere.caps_per_proc;m++)
+   for(e=1;e<=E->lmesh.NEL[lev];e++)
+     for(i=1;i<=vpts;i++) {
+        VE[m][(e-1)*vpts + i] = 0.0;
+	for(j=1;j<=ends;j++)
+          VE[m][(e-1)*vpts + i] += log(VN[m][E->IEN[lev][m][e].node[j]]) *  E->N.vpt[GNVINDEX(j,i)];
+        VE[m][(e-1)*vpts + i] = exp(VE[m][(e-1)*vpts + i]);
+        }
+
+  }
+
 /* version */
 /* $Id$ */
 

Modified: mc/3D/CitcomS/branches/compressible/lib/Stokes_flow_Incomp.c
===================================================================
--- mc/3D/CitcomS/branches/compressible/lib/Stokes_flow_Incomp.c	2007-01-18 22:39:21 UTC (rev 5831)
+++ mc/3D/CitcomS/branches/compressible/lib/Stokes_flow_Incomp.c	2007-01-19 00:10:53 UTC (rev 5832)
@@ -35,13 +35,28 @@
 #include "global_defs.h"
 #include <stdlib.h>
 
+static float solve_Ahat_p_fhat(struct All_variables *E,
+                               double **V, double **P, double **F,
+                               double imp, int *steps_max);
+static float solve_Ahat_p_fhat_BA(struct All_variables *E,
+                                  double **V, double **P, double **F,
+                                  double imp, int *steps_max);
+static float solve_Ahat_p_fhat_TALA(struct All_variables *E,
+                                    double **V, double **P, double **F,
+                                    double imp, int *steps_max);
+static double initial_vel_residual(struct All_variables *E,
+                                   double **V, double **P, double **F,
+                                   double imp);
+static double incompressibility_residual(struct All_variables *E,
+                                         double **V, double **r1);
+
+
 /* Master loop for pressure and (hence) velocity field */
 
 void solve_constrained_flow_iterative(E)
      struct All_variables *E;
 
 {
-    float solve_Ahat_p_fhat();
     void v_from_vector();
     void p_to_nodes();
 
@@ -63,7 +78,6 @@
      struct All_variables *E;
 
 {
-    float solve_Ahat_p_fhat();
     void v_from_vector_pseudo_surf();
     void p_to_nodes();
 
@@ -84,17 +98,35 @@
 
 /* ========================================================================= */
 
-float solve_Ahat_p_fhat(struct All_variables *E,
-			double **V, double **P, double **F,
-			double imp, int *steps_max)
+static float solve_Ahat_p_fhat(struct All_variables *E,
+                               double **V, double **P, double **F,
+                               double imp, int *steps_max)
 {
+    float residual;
+
+    if(E->control.inv_gruneisen < 1e-6)
+        residual = solve_Ahat_p_fhat_BA(E, V, P, F, imp, steps_max);
+    else
+        residual = solve_Ahat_p_fhat_TALA(E, V, P, F, imp, steps_max);
+
+    return(residual);
+}
+
+
+/* Solve incompressible Stokes flow (Boussinesq Approximation) using
+ * conjugate gradient (CG) iterations
+ */
+
+static float solve_Ahat_p_fhat_BA(struct All_variables *E,
+                                  double **V, double **P, double **F,
+                                  double imp, int *steps_max)
+{
     int m, i, j, count, valid, lev, npno, neq;
     int gnpno, gneq;
 
-    double *r1[NCS];
-    double *r0[NCS], *r2[NCS], *z0[NCS], *z1[NCS], *s1[NCS], *s2[NCS];
+    double *r1[NCS], *r2[NCS], *z1[NCS], *s1[NCS], *s2[NCS];
     double *shuffle[NCS];
-    double alpha, delta, s2dotAhat, r0dotr0, r1dotz1;
+    double alpha, delta, r0dotz0, r1dotz1;
     double residual, v_res;
 
     double global_vdot(), global_pdot();
@@ -114,74 +146,37 @@
     gneq = E->mesh.neq;
     npno = E->lmesh.npno;
     neq = E->lmesh.neq;
+    lev = E->mesh.levmax;
 
     for (m=1; m<=E->sphere.caps_per_proc; m++)   {
-        r0[m] = (double *)malloc((npno+1)*sizeof(double));
         r1[m] = (double *)malloc((npno+1)*sizeof(double));
         r2[m] = (double *)malloc((npno+1)*sizeof(double));
-        z0[m] = (double *)malloc((npno+1)*sizeof(double));
         z1[m] = (double *)malloc((npno+1)*sizeof(double));
         s1[m] = (double *)malloc((npno+1)*sizeof(double));
         s2[m] = (double *)malloc((npno+1)*sizeof(double));
     }
 
     time0 = CPU_time0();
+    valid = 1;
+    r0dotz0 = 0;
 
-
     /* calculate the initial velocity residual */
-    lev = E->mesh.levmax;
-    v_res = sqrt(global_vdot(E, F, F, lev)/gneq);
+    v_res = initial_vel_residual(E, V, P, F, imp);
 
-    if (E->parallel.me==0) {
-        fprintf(E->fp, "initial residue of momentum equation F %.9e %d\n",
-		v_res, gneq);
-        fprintf(stderr, "initial residue of momentum equation F %.9e %d\n",
-		v_res, gneq);
-    }
 
-
-    /* F = F - grad(P) - K*V */
-    assemble_grad_p(E, P, E->u1, lev);
-    for(m=1; m<=E->sphere.caps_per_proc; m++)
-        for(i=0; i<neq; i++)
-            F[m][i] = F[m][i] - E->u1[m][i];
-
-    assemble_del2_u(E, V, E->u1, lev, 1);
-    for(m=1; m<=E->sphere.caps_per_proc; m++)
-        for(i=0; i<neq; i++)
-            F[m][i] = F[m][i] - E->u1[m][i];
-
-    strip_bcs_from_residual(E, F, lev);
-
-
-    /* solve K*u1 = F for u1 */
-    valid=solve_del2_u(E, E->u1, F, imp*v_res, E->mesh.levmax);
-    strip_bcs_from_residual(E, E->u1, lev);
-
-
-    /* V = V + u1 */
-    for(m=1; m<=E->sphere.caps_per_proc; m++)
-        for(i=0; i<neq; i++)
-            V[m][i] += E->u1[m][i];
-
-
-    /* r1 = div(V) */
+    /* initial residual r1 = div(V) */
     assemble_div_u(E, V, r1, lev);
+    residual = incompressibility_residual(E, V, r1);
 
-    /* incompressiblity residual = norm(r1) / norm(V) */
-    residual = sqrt(global_pdot(E, r1, r1, lev)/gnpno);
-    E->monitor.vdotv = sqrt(global_vdot(E, V, V, lev)/gneq);
-    E->monitor.incompressibility = residual / E->monitor.vdotv;
-
     count = 0;
 
     if (E->control.print_convergence && E->parallel.me==0) {
         fprintf(E->fp, "AhatP (%03d) after %g seconds with div/v=%.3e "
-		"for step %d\n", count, CPU_time0()-time0,
-		E->monitor.incompressibility, E->monitor.solution_cycles);
+                "for step %d\n", count, CPU_time0()-time0,
+                E->monitor.incompressibility, E->monitor.solution_cycles);
         fprintf(stderr, "AhatP (%03d) after %g seconds with div/v=%.3e "
-		"for step %d\n", count, CPU_time0()-time0,
-		E->monitor.incompressibility, E->monitor.solution_cycles);
+                "for step %d\n", count, CPU_time0()-time0,
+                E->monitor.incompressibility, E->monitor.solution_cycles);
     }
 
 
@@ -194,52 +189,54 @@
            (dpressure >= imp) && (dvelocity >= imp) )  {
 
 
-	/* preconditioner B, z1 = B*r1 */
+        /* preconditioner B, BPI = inv(B), solve B*z1 = r1 for z1 */
         for(m=1; m<=E->sphere.caps_per_proc; m++)
             for(j=1; j<=npno; j++)
                 z1[m][j] = E->BPI[lev][m][j] * r1[m][j];
 
 
-	/* r1dotz1 = <r1, z1> */
+        /* r1dotz1 = <r1, z1> */
         r1dotz1 = global_pdot(E, r1, z1, lev);
+        assert(r1dotz1 != 0.0  /* Division by zero in head of incompressibility iteration */);
 
 
-        if ((count == 0))
+        /* update search direction */
+        if(count == 0)
             for (m=1; m<=E->sphere.caps_per_proc; m++)
                 for(j=1; j<=npno; j++)
                     s2[m][j] = z1[m][j];
         else {
-	    /* s2 = z1 + s1 * <r1,z1>/<r0,z0> */
-            r0dotr0 = global_pdot(E, r0, z0, lev);
-            assert(r0dotr0 != 0.0  /* Division by zero in head of incompressibility iteration */);
-            delta = r1dotz1 / r0dotr0;
+            /* s2 = z1 + s1 * <r1,z1>/<r0,z0> */
+            delta = r1dotz1 / r0dotz0;
             for(m=1; m<=E->sphere.caps_per_proc; m++)
                 for(j=1; j<=npno; j++)
                     s2[m][j] = z1[m][j] + delta * s1[m][j];
         }
 
-	/* solve K*u1 = grad(s2) for u1 */
+
+        /* solve K*u1 = grad(s2) for u1 */
         assemble_grad_p(E, s2, F, lev);
         valid = solve_del2_u(E, E->u1, F, imp*v_res, lev);
         strip_bcs_from_residual(E, E->u1, lev);
 
 
-	/* alpha = <r1, z1> / <s2, div(u1)> */
+        /* F = div(u1) */
         assemble_div_u(E, E->u1, F, lev);
-        s2dotAhat = global_pdot(E, s2, F, lev);
 
+
+        /* alpha = <r1, z1> / <s2, F> */
         if(valid)
             /* alpha defined this way is the same as R&W */
-            alpha = r1dotz1 / s2dotAhat;
+            alpha = r1dotz1 / global_pdot(E, s2, F, lev);
         else
             alpha = 0.0;
 
 
-	/* r2 = r1 - alpha * div(u1) */
-	/* P = P + alpha * s2 */
-	/* V = V - alpha * u1 */
+        /* r2 = r1 - alpha * div(u1) */
+        /* P = P + alpha * s2 */
+        /* V = V - alpha * u1 */
         for(m=1; m<=E->sphere.caps_per_proc; m++)
-            for(j=1; j<=npno; j++)   {
+            for(j=1; j<=npno; j++) {
                 r2[m][j] = r1[m][j] - alpha * F[m][j];
                 P[m][j] += alpha * s2[m][j];
             }
@@ -249,16 +246,11 @@
                 V[m][j] -= alpha * E->u1[m][j];
 
 
-	/* compute velocity and incompressibility residual */
+        /* compute velocity and incompressibility residual */
         assemble_div_u(E, V, F, lev);
-        E->monitor.vdotv = global_vdot(E, V, V, E->mesh.levmax);
-        E->monitor.incompressibility = sqrt((gneq/gnpno)
-					    *(1.0e-32
-					      + global_pdot(E, F, F, lev)
-					      / (1.0e-32+E->monitor.vdotv)));
+        incompressibility_residual(E, V, F);
 
-
-	/* compute velocity and pressure corrections */
+        /* compute velocity and pressure corrections */
         dpressure = alpha * sqrt(global_pdot(E, s2, s2, lev)
                                  / (1.0e-32 + global_pdot(E, P, P, lev)));
         dvelocity = alpha * sqrt(global_vdot(E, E->u1, E->u1, lev)
@@ -268,39 +260,35 @@
 
         if(E->control.print_convergence && E->parallel.me==0) {
             fprintf(E->fp, "AhatP (%03d) after %g seconds with div/v=%.3e "
-		    "dv/v=%.3e and dp/p=%.3e for step %d\n",
+                    "dv/v=%.3e and dp/p=%.3e for step %d\n",
                     count, CPU_time0()-time0, E->monitor.incompressibility,
                     dvelocity, dpressure, E->monitor.solution_cycles);
             fprintf(stderr, "AhatP (%03d) after %g seconds with div/v=%.3e "
-		    "dv/v=%.3e and dp/p=%.3e for step %d\n",
+                    "dv/v=%.3e and dp/p=%.3e for step %d\n",
                     count, CPU_time0()-time0, E->monitor.incompressibility,
                     dvelocity, dpressure, E->monitor.solution_cycles);
         }
 
 
-	/* swap array pointers */
+        /* shift array pointers */
         for(m=1; m<=E->sphere.caps_per_proc; m++) {
             shuffle[m] = s1[m];
-	    s1[m] = s2[m];
-	    s2[m] = shuffle[m];
+            s1[m] = s2[m];
+            s2[m] = shuffle[m];
 
-            shuffle[m] = r0[m];
-	    r0[m] = r1[m];
-	    r1[m] = r2[m];
-	    r2[m] = shuffle[m];
-
-            shuffle[m] = z0[m];
-	    z0[m] = z1[m];
-	    z1[m] = shuffle[m];
+            shuffle[m] = r1[m];
+            r1[m] = r2[m];
+            r2[m] = shuffle[m];
         }
 
-    }       /* end loop for conjugate gradient   */
+        /* shift <r0, z0> = <r1, z1> */
+        r0dotz0 = r1dotz1;
 
+    } /* end loop for conjugate gradient */
+
     for(m=1; m<=E->sphere.caps_per_proc; m++) {
-        free((void *) r0[m]);
         free((void *) r1[m]);
         free((void *) r2[m]);
-        free((void *) z0[m]);
         free((void *) z1[m]);
         free((void *) s1[m]);
         free((void *) s2[m]);
@@ -311,142 +299,94 @@
     return(residual);
 }
 
-/* ======================================================================== */
+/* Solve incompressible Stokes flow (Boussinesq Approximation) using
+ * bi-conjugate gradient stablized (BiCG-stab)iterations
+ */
 
+static float solve_Ahat_p_fhat_TALA(struct All_variables *E,
+                                    double **V, double **P, double **F,
+                                    double imp, int *steps_max)
+{
 
+}
 
 
-void v_from_vector(E)
-     struct All_variables *E;
+
+static double initial_vel_residual(struct All_variables *E,
+                                   double **V, double **P, double **F,
+                                   double imp)
 {
-    int i,eqn1,eqn2,eqn3,m,node;
-    float sint,cost,sinf,cosf;
+    void assemble_del2_u();
+    void assemble_grad_p();
+    void strip_bcs_from_residual();
+    int  solve_del2_u();
+    double global_vdot();
 
-    const int nno = E->lmesh.nno;
-    const int level=E->mesh.levmax;
+    int neq = E->lmesh.neq;
+    int gneq = E->mesh.neq;
+    int lev = E->mesh.levmax;
+    int i, m;
+    double v_res;
 
-    for (m=1;m<=E->sphere.caps_per_proc;m++)   {
-        for(node=1;node<=nno;node++)     {
-            E->sphere.cap[m].V[1][node] = E->U[m][E->id[m][node].doff[1]];
-            E->sphere.cap[m].V[2][node] = E->U[m][E->id[m][node].doff[2]];
-            E->sphere.cap[m].V[3][node] = E->U[m][E->id[m][node].doff[3]];
-            if (E->node[m][node] & VBX)
-                E->sphere.cap[m].V[1][node] = E->sphere.cap[m].VB[1][node];
-            if (E->node[m][node] & VBY)
-                E->sphere.cap[m].V[2][node] = E->sphere.cap[m].VB[2][node];
-            if (E->node[m][node] & VBZ)
-                E->sphere.cap[m].V[3][node] = E->sphere.cap[m].VB[3][node];
-        }
-        for (i=1;i<=E->lmesh.nno;i++)  {
-            eqn1 = E->id[m][i].doff[1];
-            eqn2 = E->id[m][i].doff[2];
-            eqn3 = E->id[m][i].doff[3];
-            sint = E->SinCos[level][m][0][i];
-            sinf = E->SinCos[level][m][1][i];
-            cost = E->SinCos[level][m][2][i];
-            cosf = E->SinCos[level][m][3][i];
-            E->temp[m][eqn1] = E->sphere.cap[m].V[1][i]*cost*cosf
-                - E->sphere.cap[m].V[2][i]*sinf
-                + E->sphere.cap[m].V[3][i]*sint*cosf;
-            E->temp[m][eqn2] = E->sphere.cap[m].V[1][i]*cost*sinf
-                + E->sphere.cap[m].V[2][i]*cosf
-                + E->sphere.cap[m].V[3][i]*sint*sinf;
-            E->temp[m][eqn3] = -E->sphere.cap[m].V[1][i]*sint
-                + E->sphere.cap[m].V[3][i]*cost;
+    v_res = sqrt(global_vdot(E, F, F, lev) / gneq);
 
-        }
+    if (E->parallel.me==0) {
+        fprintf(E->fp, "initial residue of momentum equation F %.9e %d\n",
+                v_res, gneq);
+        fprintf(stderr, "initial residue of momentum equation F %.9e %d\n",
+                v_res, gneq);
     }
 
-    return;
-}
 
-void v_from_vector_pseudo_surf(E)
-     struct All_variables *E;
-{
-    int i,eqn1,eqn2,eqn3,m,node;
-    float sint,cost,sinf,cosf;
+    /* F = F - grad(P) - K*V */
+    assemble_grad_p(E, P, E->u1, lev);
+    for(m=1; m<=E->sphere.caps_per_proc; m++)
+        for(i=0; i<neq; i++)
+            F[m][i] = F[m][i] - E->u1[m][i];
 
-    const int nno = E->lmesh.nno;
-    const int level=E->mesh.levmax;
-    double sum_V = 0.0, sum_dV = 0.0, rel_error = 0.0, global_max_error = 0.0;
-    double tol_error = 1.0e-03;
+    assemble_del2_u(E, V, E->u1, lev, 1);
+    for(m=1; m<=E->sphere.caps_per_proc; m++)
+        for(i=0; i<neq; i++)
+            F[m][i] = F[m][i] - E->u1[m][i];
 
-    for (m=1;m<=E->sphere.caps_per_proc;m++)   {
-        for(node=1;node<=nno;node++)     {
-            E->sphere.cap[m].Vprev[1][node] = E->sphere.cap[m].V[1][node];
-            E->sphere.cap[m].Vprev[2][node] = E->sphere.cap[m].V[2][node];
-            E->sphere.cap[m].Vprev[3][node] = E->sphere.cap[m].V[3][node];
+    strip_bcs_from_residual(E, F, lev);
 
-            E->sphere.cap[m].V[1][node] = E->U[m][E->id[m][node].doff[1]];
-            E->sphere.cap[m].V[2][node] = E->U[m][E->id[m][node].doff[2]];
-            E->sphere.cap[m].V[3][node] = E->U[m][E->id[m][node].doff[3]];
-            if (E->node[m][node] & VBX)
-                E->sphere.cap[m].V[1][node] = E->sphere.cap[m].VB[1][node];
-            if (E->node[m][node] & VBY)
-                E->sphere.cap[m].V[2][node] = E->sphere.cap[m].VB[2][node];
-            if (E->node[m][node] & VBZ)
-                E->sphere.cap[m].V[3][node] = E->sphere.cap[m].VB[3][node];
 
-            sum_dV += (E->sphere.cap[m].V[1][node] - E->sphere.cap[m].Vprev[1][node])*(E->sphere.cap[m].V[1][node] - E->sphere.cap[m].Vprev[1][node])
-                + (E->sphere.cap[m].V[2][node] - E->sphere.cap[m].Vprev[2][node])*(E->sphere.cap[m].V[2][node] - E->sphere.cap[m].Vprev[2][node])
-                + (E->sphere.cap[m].V[3][node] - E->sphere.cap[m].Vprev[3][node])*(E->sphere.cap[m].V[3][node] - E->sphere.cap[m].Vprev[3][node]);
-            sum_V += E->sphere.cap[m].V[1][node]*E->sphere.cap[m].V[1][node]
-                + E->sphere.cap[m].V[2][node]*E->sphere.cap[m].V[2][node]
-                + E->sphere.cap[m].V[3][node]*E->sphere.cap[m].V[3][node];
-        }
-        rel_error = sqrt(sum_dV)/sqrt(sum_V);
-        MPI_Allreduce(&rel_error,&global_max_error,1,MPI_DOUBLE,MPI_MAX,E->parallel.world);
-        if(global_max_error <= tol_error) E->monitor.stop_topo_loop = 1;
-        if(E->parallel.me==0)
-            fprintf(stderr,"global_max_error=%e stop_topo_loop=%d\n",global_max_error,E->monitor.stop_topo_loop);
+    /* solve K*u1 = F for u1 */
+    solve_del2_u(E, E->u1, F, imp*v_res, lev);
+    strip_bcs_from_residual(E, E->u1, lev);
 
-        for (i=1;i<=E->lmesh.nno;i++)  {
-            eqn1 = E->id[m][i].doff[1];
-            eqn2 = E->id[m][i].doff[2];
-            eqn3 = E->id[m][i].doff[3];
-            sint = E->SinCos[level][m][0][i];
-            sinf = E->SinCos[level][m][1][i];
-            cost = E->SinCos[level][m][2][i];
-            cosf = E->SinCos[level][m][3][i];
-            E->temp[m][eqn1] = E->sphere.cap[m].V[1][i]*cost*cosf
-                - E->sphere.cap[m].V[2][i]*sinf
-                + E->sphere.cap[m].V[3][i]*sint*cosf;
-            E->temp[m][eqn2] = E->sphere.cap[m].V[1][i]*cost*sinf
-                + E->sphere.cap[m].V[2][i]*cosf
-                + E->sphere.cap[m].V[3][i]*sint*sinf;
-            E->temp[m][eqn3] = -E->sphere.cap[m].V[1][i]*sint
-                + E->sphere.cap[m].V[3][i]*cost;
 
-        }
-    }
+    /* V = V + u1 */
+    for(m=1; m<=E->sphere.caps_per_proc; m++)
+        for(i=0; i<neq; i++)
+            V[m][i] += E->u1[m][i];
 
-    return;
+    return(v_res);
 }
 
 
-void velo_from_element(E,VV,m,el,sphere_key)
-     struct All_variables *E;
-     float VV[4][9];
-     int el,m,sphere_key;
+
+static double incompressibility_residual(struct All_variables *E,
+                                         double **V, double **F)
 {
+    double global_pdot();
+    double global_vdot();
 
-    int a, node;
-    const int ends=enodes[E->mesh.nsd];
+    int gnpno = E->mesh.npno;
+    int gneq = E->mesh.neq;
+    int lev = E->mesh.levmax;
+    double tmp1, tmp2;
 
-    if (sphere_key)
-        for(a=1;a<=ends;a++)   {
-            node = E->ien[m][el].node[a];
-            VV[1][a] = E->sphere.cap[m].V[1][node];
-            VV[2][a] = E->sphere.cap[m].V[2][node];
-            VV[3][a] = E->sphere.cap[m].V[3][node];
-        }
-    else
-        for(a=1;a<=ends;a++)   {
-            node = E->ien[m][el].node[a];
-            VV[1][a] = E->temp[m][E->id[m][node].doff[1]];
-            VV[2][a] = E->temp[m][E->id[m][node].doff[2]];
-            VV[3][a] = E->temp[m][E->id[m][node].doff[3]];
-        }
+    /* incompressiblity residual = norm(F) / norm(V) */
 
-    return;
+    tmp1 = global_vdot(E, V, V, lev);
+    tmp2 = global_pdot(E, F, F, lev);
+    E->monitor.incompressibility = sqrt((gneq / gnpno)
+                                        *( (1.0e-32 + tmp2)
+                                              / (1.0e-32 + tmp1) ));
+
+    E->monitor.vdotv = tmp1;
+
+    return(sqrt(tmp2/gnpno));;
 }