[cig-commits] r5833 - mc/3D/CitcomS/branches/compressible/lib

tan2 at geodynamics.org tan2 at geodynamics.org
Thu Jan 18 16:14:37 PST 2007


Author: tan2
Date: 2007-01-18 16:14:37 -0800 (Thu, 18 Jan 2007)
New Revision: 5833

Modified:
   mc/3D/CitcomS/branches/compressible/lib/Element_calculations.c
   mc/3D/CitcomS/branches/compressible/lib/Material_properties.c
   mc/3D/CitcomS/branches/compressible/lib/Regional_read_input_from_files.c
   mc/3D/CitcomS/branches/compressible/lib/Stokes_flow_Incomp.c
   mc/3D/CitcomS/branches/compressible/lib/global_defs.h
Log:
BiCGStab algorithm for TALA Stokes solver

Compared with the CG algorithm solver, the velocity differs by ~0.1% but the
stress is way wrong. 


Modified: mc/3D/CitcomS/branches/compressible/lib/Element_calculations.c
===================================================================
--- mc/3D/CitcomS/branches/compressible/lib/Element_calculations.c	2007-01-19 00:10:53 UTC (rev 5832)
+++ mc/3D/CitcomS/branches/compressible/lib/Element_calculations.c	2007-01-19 00:14:37 UTC (rev 5833)
@@ -577,14 +577,55 @@
     return;
 }
 
+
+
+
+/* compute div(rho_ref*V) = div(V) + Vz*d(ln(rho_ref))/dz */
+
+static void assemble_dlnrho(struct All_variables *E, double *dlnrhodr,
+			    double **U, double **result, int level)
+{
+    int e, nz, j3, a, b, m;
+
+    const int nel = E->lmesh.NEL[level];
+    const int ends = enodes[E->mesh.nsd];
+    const int dims = E->mesh.nsd;
+    double tmp;
+
+    for(m=1; m<=E->sphere.caps_per_proc; m++)
+        for(e=1; e<=nel; e++) {
+            nz = ((e-1) % E->lmesh.elz) + 1;
+            tmp = dlnrhodr[nz] / ends;
+            for(a=1; a<=ends; a++) {
+                b = E->IEN[level][m][e].node[a];
+                j3 = E->ID[level][m][b].doff[3];
+                result[m][e] +=  tmp * U[m][j3];
+	    }
+        }
+
+    return;
+}
+
+
+
+
+void assemble_div_rho_u(struct All_variables *E,
+                        double **U, double **result, int level)
+{
+    void assemble_div_u();
+    assemble_div_u(E, U, result, level);
+    assemble_dlnrho(E, E->dlnrhodr, U, result, level);
+
+    return;
+}
+
+
 /* ==========================================
    Assemble a div_u vector element by element
    ==========================================  */
 
-void assemble_div_u(E,U,divU,level)
-     struct All_variables *E;
-     double **U,**divU;
-     int level;
+void assemble_div_u(struct All_variables *E,
+                    double **U, double **divU, int level)
 {
     int e,j1,j2,j3,p,a,b,m;
 
@@ -611,11 +652,11 @@
 	    }
 	 }
 
-
     return;
 }
 
 
+
 /* ==========================================
    Assemble a grad_P vector element by element
    ==========================================  */

Modified: mc/3D/CitcomS/branches/compressible/lib/Material_properties.c
===================================================================
--- mc/3D/CitcomS/branches/compressible/lib/Material_properties.c	2007-01-19 00:10:53 UTC (rev 5832)
+++ mc/3D/CitcomS/branches/compressible/lib/Material_properties.c	2007-01-19 00:14:37 UTC (rev 5833)
@@ -40,6 +40,7 @@
 {
     int noz = E->lmesh.noz;
     int nno = E->lmesh.nno;
+    int nel = E->lmesh.nel;
 
     /* reference profile of density */
     E->rho_ref = (double *) malloc((noz+1)*sizeof(double));
@@ -49,12 +50,16 @@
 
     /* reference profile of temperature */
     E->T_ref = (double *) malloc((noz+1)*sizeof(double));
+
+    /* reference profile of d(ln(rho_ref))/dr */
+    E->dlnrhodr = (double *) malloc((nel+1)*sizeof(double));
 }
 
 
 void reference_state(struct All_variables *E)
 {
     int noz = E->lmesh.noz;
+    int nel = E->lmesh.nel;
     int i;
     double r, z, tmp, T0;
 
@@ -69,11 +74,17 @@
 	E->T_ref[i] = T0 * (exp(E->control.Di * z) - 1);
     }
 
-    for(i=1; i<=noz; i++) {
-	fprintf(stderr, "%d %f %f %f %f\n",
-		i, E->sx[1][3][i], 1-E->sx[1][3][i],
-		E->rho_ref[i], E->thermexp_ref[i]);
+    for(i=1; i<=nel; i++) {
+        // TODO: dln(rho)/dr
+        E->dlnrhodr[i] = - tmp;
     }
+
+    if(E->parallel.me < E->parallel.nprocz)
+        for(i=1; i<=noz; i++) {
+            fprintf(stderr, "%d %f %f %f %f\n",
+                    i+E->lmesh.nzs-1, E->sx[1][3][i], 1-E->sx[1][3][i],
+                    E->rho_ref[i], E->thermexp_ref[i]);
+        }
 }
 
 

Modified: mc/3D/CitcomS/branches/compressible/lib/Regional_read_input_from_files.c
===================================================================
--- mc/3D/CitcomS/branches/compressible/lib/Regional_read_input_from_files.c	2007-01-19 00:10:53 UTC (rev 5832)
+++ mc/3D/CitcomS/branches/compressible/lib/Regional_read_input_from_files.c	2007-01-19 00:14:37 UTC (rev 5833)
@@ -1,6 +1,6 @@
 /*
  *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * 
+ *
  *<LicenseText>
  *
  * CitcomS by Louis Moresi, Shijie Zhong, Lijie Han, Eh Tan,
@@ -22,7 +22,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  *
  *</LicenseText>
- * 
+ *
  *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 #include <math.h>

Modified: mc/3D/CitcomS/branches/compressible/lib/Stokes_flow_Incomp.c
===================================================================
--- mc/3D/CitcomS/branches/compressible/lib/Stokes_flow_Incomp.c	2007-01-19 00:10:53 UTC (rev 5832)
+++ mc/3D/CitcomS/branches/compressible/lib/Stokes_flow_Incomp.c	2007-01-19 00:14:37 UTC (rev 5833)
@@ -48,7 +48,7 @@
                                    double **V, double **P, double **F,
                                    double imp);
 static double incompressibility_residual(struct All_variables *E,
-                                         double **V, double **r1);
+                                         double **V, double **r);
 
 
 /* Master loop for pressure and (hence) velocity field */
@@ -130,7 +130,6 @@
     double residual, v_res;
 
     double global_vdot(), global_pdot();
-    double *dvector();
 
     double time0, CPU_time0();
     float dpressure, dvelocity;
@@ -189,7 +188,7 @@
            (dpressure >= imp) && (dvelocity >= imp) )  {
 
 
-        /* preconditioner B, BPI = inv(B), solve B*z1 = r1 for z1 */
+        /* preconditioner BPI ~= inv(K), z1 = BPI*r1 */
         for(m=1; m<=E->sphere.caps_per_proc; m++)
             for(j=1; j<=npno; j++)
                 z1[m][j] = E->BPI[lev][m][j] * r1[m][j];
@@ -233,14 +232,18 @@
 
 
         /* r2 = r1 - alpha * div(u1) */
+        for(m=1; m<=E->sphere.caps_per_proc; m++)
+            for(j=1; j<=npno; j++)
+                r2[m][j] = r1[m][j] - alpha * F[m][j];
+
+
         /* P = P + alpha * s2 */
-        /* V = V - alpha * u1 */
         for(m=1; m<=E->sphere.caps_per_proc; m++)
-            for(j=1; j<=npno; j++) {
-                r2[m][j] = r1[m][j] - alpha * F[m][j];
+            for(j=1; j<=npno; j++)
                 P[m][j] += alpha * s2[m][j];
-            }
 
+
+        /* V = V - alpha * u1 */
         for(m=1; m<=E->sphere.caps_per_proc; m++)
             for(j=0; j<neq; j++)
                 V[m][j] -= alpha * E->u1[m][j];
@@ -307,7 +310,254 @@
                                     double **V, double **P, double **F,
                                     double imp, int *steps_max)
 {
+    void assemble_div_u();
+    void assemble_del2_u();
+    void assemble_grad_p();
+    void strip_bcs_from_residual();
+    int  solve_del2_u();
+    void parallel_process_termination();
 
+    double global_vdot(), global_pdot();
+    double CPU_time0();
+
+    int gnpno, gneq;
+    int npno, neq;
+    int m, i, j, count, lev;
+    int valid;
+
+    double alpha, delta, omega;
+    double r0dotrt, r1dotrt;
+    double residual, dpressure, dvelocity;
+
+    double *r1[NCS], *r2[NCS], *z0[NCS], *p1[NCS], *p2[NCS];
+    double *rt[NCS], *v0[NCS], *s0[NCS], *st[NCS], *t0[NCS];
+    double *u0[NCS];
+    double *shuffle[NCS];
+
+    double time0, v_res;
+
+    gnpno = E->mesh.npno;
+    gneq = E->mesh.neq;
+    npno = E->lmesh.npno;
+    neq = E->lmesh.neq;
+    lev = E->mesh.levmax;
+
+    for (m=1; m<=E->sphere.caps_per_proc; m++)   {
+        r1[m] = (double *)malloc((npno+1)*sizeof(double));
+        r2[m] = (double *)malloc((npno+1)*sizeof(double));
+        p1[m] = (double *)malloc((npno+1)*sizeof(double));
+        p2[m] = (double *)malloc((npno+1)*sizeof(double));
+        rt[m] = (double *)malloc((npno+1)*sizeof(double));
+        z0[m] = (double *)malloc((npno+1)*sizeof(double));
+        v0[m] = (double *)malloc((npno+1)*sizeof(double));
+        s0[m] = (double *)malloc((npno+1)*sizeof(double));
+        st[m] = (double *)malloc((npno+1)*sizeof(double));
+        t0[m] = (double *)malloc((npno+1)*sizeof(double));
+
+        u0[m] = (double *)malloc((neq+1)*sizeof(double));
+    }
+
+    time0 = CPU_time0();
+    valid = 1;
+
+    /* calculate the initial velocity residual */
+    v_res = initial_vel_residual(E, V, P, F, imp);
+
+
+    /* initial residual r1 = div(rho_ref*V) */
+    assemble_div_rho_u(E, V, r1, lev);
+    residual = incompressibility_residual(E, V, r1);
+
+
+    /* initial conjugate residual rt = r1 */
+    for(m=1; m<=E->sphere.caps_per_proc; m++)
+        for(j=1; j<=npno; j++)
+            rt[m][j] = r1[m][j];
+
+
+    count = 0;
+    r0dotrt = alpha = omega = 0;
+
+    if (E->control.print_convergence && E->parallel.me==0) {
+        fprintf(E->fp, "AhatP (%03d) after %g seconds with div/v=%.3e "
+                "for step %d\n", count, CPU_time0()-time0,
+                E->monitor.incompressibility, E->monitor.solution_cycles);
+        fprintf(stderr, "AhatP (%03d) after %g seconds with div/v=%.3e "
+                "for step %d\n", count, CPU_time0()-time0,
+                E->monitor.incompressibility, E->monitor.solution_cycles);
+    }
+
+
+    /* pressure and velocity corrections */
+    dpressure = 1.0;
+    dvelocity = 1.0;
+
+    while( (valid) && (count < *steps_max) &&
+           (E->monitor.incompressibility >= E->control.tole_comp) &&
+           (dpressure >= imp) && (dvelocity >= imp) )  {
+
+
+
+        /* r1dotrt = <r1, rt> */
+        r1dotrt = global_pdot(E, r1, rt, lev);
+        if(r1dotrt == 0.0) {
+            fprintf(E->fp, "BiCGstab method failed!!\n");
+            fprintf(stderr, "BiCGstab method failed!!\n");
+            parallel_process_termination();
+        }
+
+
+        /* update search direction */
+        if(count == 0)
+            for (m=1; m<=E->sphere.caps_per_proc; m++)
+                for(j=1; j<=npno; j++)
+                    p2[m][j] = r1[m][j];
+        else {
+            /* p2 = r1 + <r1,rt>/<r0,rt> * alpha/omega * (p1 - omega*v0) */
+            delta = (r1dotrt / r0dotrt) * (alpha / omega);
+            for(m=1; m<=E->sphere.caps_per_proc; m++)
+                for(j=1; j<=npno; j++)
+                    p2[m][j] = r1[m][j] + delta
+                        * (p1[m][j] - omega * v0[m][j]);
+        }
+
+
+        /* preconditioner BPI ~= inv(K), z0 = BPI*p2 */
+        for(m=1; m<=E->sphere.caps_per_proc; m++)
+            for(j=1; j<=npno; j++)
+                z0[m][j] = E->BPI[lev][m][j] * p2[m][j];
+
+
+        /* solve K*u0 = grad(p2) for u1 */
+        assemble_grad_p(E, p2, F, lev);
+        valid = solve_del2_u(E, u0, F, imp*v_res, lev);
+        if(!valid) fprintf(stderr, "not valid 1\n");
+        strip_bcs_from_residual(E, u0, lev);
+
+
+        /* v0 = div(rho_ref*u0) */
+        assemble_div_rho_u(E, u0, v0, lev);
+
+
+        /* alpha = r1dotrt / <rt, v0> */
+        alpha = r1dotrt / global_pdot(E, rt, v0, lev);
+
+
+        /* s0 = r1 - alpha * v0 */
+        for(m=1; m<=E->sphere.caps_per_proc; m++)
+            for(j=1; j<=npno; j++)
+                s0[m][j] = r1[m][j] - alpha * v0[m][j];
+
+
+        /* stop iteration if norm(s) is small enough */
+        if(global_pdot(E, s0, s0, lev) < imp*gnpno) {
+            // is the check correct?
+            // update solution, TODO
+            //break;
+        }
+
+
+        /* preconditioner BPI ~= inv(K), st = BPI*s0 */
+        for(m=1; m<=E->sphere.caps_per_proc; m++)
+            for(j=1; j<=npno; j++)
+                st[m][j] = E->BPI[lev][m][j] * s0[m][j];
+
+
+        /* solve K*u1 = grad(st) for u1 */
+        assemble_grad_p(E, st, F, lev);
+        valid = solve_del2_u(E, E->u1, F, imp*v_res, lev);
+        if(!valid) fprintf(stderr, "not valid 2\n");
+        strip_bcs_from_residual(E, E->u1, lev);
+
+
+        /* t0 = div(rho_ref * u1) */
+        assemble_div_rho_u(E, E->u1, t0, lev);
+
+
+        /* omega = <t0, s0> / <t0, t0> */
+        omega = global_pdot(E, t0, s0, lev) / global_pdot(E, t0, t0, lev);
+
+
+        /* r2 = s0 - omega * t0 */
+        for(m=1; m<=E->sphere.caps_per_proc; m++)
+            for(j=1; j<=npno; j++)
+                r2[m][j] = s0[m][j] - omega * t0[m][j];
+
+
+        /* P = P + alpha * z0 + omega * st */
+        for(m=1; m<=E->sphere.caps_per_proc; m++)
+            for(j=1; j<=npno; j++)
+                P[m][j] += alpha * z0[m][j] + omega * st[m][j];
+
+
+        /* V = V - alpha * u0 - omega * u1 */
+        for(m=1; m<=E->sphere.caps_per_proc; m++)
+            for(j=0; j<neq; j++)
+                V[m][j] -= alpha * u0[m][j] + omega * E->u1[m][j];
+
+
+        /* compute velocity and incompressibility residual */
+        assemble_div_rho_u(E, V, t0, lev);
+        incompressibility_residual(E, V, t0);
+
+        /* compute velocity and pressure corrections */
+        dpressure = alpha * sqrt( (global_pdot(E, z0, z0, lev) +
+                                   global_pdot(E, st, st, lev))
+                                 / (1.0e-32 + global_pdot(E, P, P, lev)));
+        dvelocity = alpha * sqrt( (global_vdot(E, u0, u0, lev) +
+                                   global_vdot(E, E->u1, E->u1, lev))
+                                 / (1.0e-32 + E->monitor.vdotv));
+
+        count++;
+
+        if(E->control.print_convergence && E->parallel.me==0) {
+            fprintf(E->fp, "AhatP (%03d) after %g seconds with div/v=%.3e "
+                    "dv/v=%.3e and dp/p=%.3e for step %d\n",
+                    count, CPU_time0()-time0, E->monitor.incompressibility,
+                    dvelocity, dpressure, E->monitor.solution_cycles);
+            fprintf(stderr, "AhatP (%03d) after %g seconds with div/v=%.3e "
+                    "dv/v=%.3e and dp/p=%.3e for step %d\n",
+                    count, CPU_time0()-time0, E->monitor.incompressibility,
+                    dvelocity, dpressure, E->monitor.solution_cycles);
+        }
+
+
+        /* shift array pointers */
+        for(m=1; m<=E->sphere.caps_per_proc; m++) {
+            shuffle[m] = p1[m];
+            p1[m] = p2[m];
+            p2[m] = shuffle[m];
+
+            shuffle[m] = r1[m];
+            r1[m] = r2[m];
+            r2[m] = shuffle[m];
+        }
+
+        /* shift <r0, rt> = <r1, rt> */
+        r0dotrt = r1dotrt;
+
+    } /* end loop for conjugate gradient */
+
+
+    for(m=1; m<=E->sphere.caps_per_proc; m++) {
+        free((void *) r1[m]);
+        free((void *) r2[m]);
+        free((void *) z0[m]);
+        free((void *) p1[m]);
+        free((void *) p2[m]);
+        free((void *) rt[m]);
+        free((void *) v0[m]);
+        free((void *) s0[m]);
+        free((void *) st[m]);
+        free((void *) t0[m]);
+
+        free((void *) u0[m]);
+    }
+
+    *steps_max=count;
+
+    return(residual);
+
 }
 
 
@@ -368,7 +618,7 @@
 
 
 static double incompressibility_residual(struct All_variables *E,
-                                         double **V, double **F)
+                                         double **V, double **r)
 {
     double global_pdot();
     double global_vdot();
@@ -381,7 +631,7 @@
     /* incompressiblity residual = norm(F) / norm(V) */
 
     tmp1 = global_vdot(E, V, V, lev);
-    tmp2 = global_pdot(E, F, F, lev);
+    tmp2 = global_pdot(E, r, r, lev);
     E->monitor.incompressibility = sqrt((gneq / gnpno)
                                         *( (1.0e-32 + tmp2)
                                               / (1.0e-32 + tmp1) ));

Modified: mc/3D/CitcomS/branches/compressible/lib/global_defs.h
===================================================================
--- mc/3D/CitcomS/branches/compressible/lib/global_defs.h	2007-01-19 00:10:53 UTC (rev 5832)
+++ mc/3D/CitcomS/branches/compressible/lib/global_defs.h	2007-01-19 00:14:37 UTC (rev 5833)
@@ -729,6 +729,7 @@
 
     double *rho;
     double *rho_ref, *thermexp_ref, *T_ref;
+    double *dlnrhodr;
 
     double *P[NCS],*F[NCS],*H[NCS],*S[NCS],*U[NCS];
     double *T[NCS],*Tdot[NCS],*buoyancy[NCS];



More information about the cig-commits mailing list