[cig-commits] r14242 - seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust

tan2 at geodynamics.org tan2 at geodynamics.org
Fri Mar 6 12:41:36 PST 2009


Author: tan2
Date: 2009-03-06 12:41:35 -0800 (Fri, 06 Mar 2009)
New Revision: 14242

Added:
   seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/prem
Removed:
   seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/1D
Modified:
   seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/citcoms_isotropic_no_crust.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/citcoms_parm.h
   seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/read_citcoms_data.c
Log:
Finished citcoms earth model with good scaling.

* Use PREM as 1D reference, as it defines some essential constants, e.g. raidus of CMB and MOHO.
* Save the result of previous domain and element search as the hints of next search.
* Read citcoms domain bounds in binary format to avoid losing precision.
* Allowing some round-off error when locating elements (and caps), and no needs of nodging points.
* Optimized the flow path so that no goto in the fast path.
* The interpolation scheme is adapted from CitcomS Exchanger, but is done in Cartesian space, instead of spherical space.



Deleted: seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/1D
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/1D	2009-03-06 17:13:09 UTC (rev 14241)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/1D	2009-03-06 20:41:35 UTC (rev 14242)
@@ -1 +0,0 @@
-link ../../1D_ref/blank/
\ No newline at end of file

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/citcoms_isotropic_no_crust.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/citcoms_isotropic_no_crust.f90	2009-03-06 17:13:09 UTC (rev 14241)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/citcoms_isotropic_no_crust.f90	2009-03-06 20:41:35 UTC (rev 14242)
@@ -55,11 +55,11 @@
   double precision inv_time
 
   ! a static variable holding the result of previous search
-  integer, save :: elem
-  data elem /-1/
+  integer, save :: domain, elem
+  data domain, elem /-1, -1/
 
   ! get (Vp, Vs, rho) in km/s and g/cm^3 from citcoms data
-  call get_velo_from_citcoms(radius, theta, phi, vpv, vsv, rho, elem)
+  call get_velo_from_citcoms(radius, theta, phi, vpv, vsv, rho, domain, elem)
 
 
   ! non-dimensionalize
@@ -263,8 +263,3 @@
 
 !----------------------------------
 
-subroutine finalize_mantle_model()
-
-  implicit none
-
-end subroutine finalize_mantle_model

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/citcoms_parm.h
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/citcoms_parm.h	2009-03-06 17:13:09 UTC (rev 14241)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/citcoms_parm.h	2009-03-06 20:41:35 UTC (rev 14242)
@@ -26,11 +26,8 @@
 =====================================================================*/
 
 
-/* filename (including full path) of citcoms domain output */
-const char citcoms_bound_filename[] = "/home/tan2/dv/trunk/fulltest.domain";
+/* filename prefix (including full path) of citcoms output */
+const char citcoms_model_filename_base[] = "/home/tan2/dv/trunk/33";
 
-/* filename base of citcoms output */
-const char citcoms_model_filename_base[] = "/home/tan2/dv/trunk/fulltest";
-
 /* time step of citcoms output */
 const int citcoms_step = 0;

Added: seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/prem
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/prem	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/prem	2009-03-06 20:41:35 UTC (rev 14242)
@@ -0,0 +1 @@
+link ../../1D_ref/prem/
\ No newline at end of file


Property changes on: seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/prem
___________________________________________________________________
Name: svn:special
   + *

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/read_citcoms_data.c
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/read_citcoms_data.c	2009-03-06 17:13:09 UTC (rev 14241)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/pluggable/MODELS/3D/citcoms_isotropic_no_crust/read_citcoms_data.c	2009-03-06 20:41:35 UTC (rev 14242)
@@ -3,8 +3,8 @@
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdint.h>
 #include <math.h>
-#include <limits.h>
 #include <float.h>
 #include "mpi.h"
 #include "citcoms_parm.h"
@@ -14,13 +14,9 @@
     double x[4];
     double y[4];
     double z[4];
-    double cost[4]; /* cos(theta) of this node */
-    double sint[4]; /* sin(theta) of this node  */
-    double cosf[4]; /* cos(phi) of this node  */
-    double sinf[4]; /* sin(phi) of this node  */
 
-    double midx, midy, midz;
-    double disk_radius;
+    double midx, midy, midz; /* center of the cap, on a unit sphere */
+    double disk_radius2;     /* radius^2 of covering disk */
 };
 typedef struct citcoms_cap cap_t;
 
@@ -29,23 +25,32 @@
     double x;
     double y;
     double z;
+};
+typedef struct citcoms_node node_t;
 
+
+struct citcoms_mat {
     double rho;
     double vp;
     double vs;
 };
-typedef struct citcoms_node node_t;
+typedef struct citcoms_mat mat_t;
 
 
-struct citcoms_model {
-    double *rsqr; /* 1d radius^2 profile */
+struct citcoms_element {
+    double mid[3];          /* the projected mid point */
+    double eta_axes[2][3];  /* axes of eta coord. */
+    double inv_length2[2];  /* length^2 of eta axes */
+    double size2;           /* length^2 of diagnal of the element */
+};
+typedef struct citcoms_element element_t;
 
-    double *cost; /* cos(theta) of surface nodes */
-    double *sint; /* sin(theta) of surface nodes  */
-    double *cosf; /* cos(phi) of surface nodes  */
-    double *sinf; /* sin(phi) of surface nodes  */
 
-    node_t *nodes;
+struct citcoms_model {
+    double *rsqr;           /* 1d radius^2 profile */
+    node_t *nodes;          /* coordinates of surface nodes */
+    mat_t *mats;            /* material prop. of all nodes */
+    element_t *elements;
 };
 typedef struct citcoms_model model_t;
 
@@ -56,149 +61,346 @@
     double rmin;
     double rmax;
 
-    cap_t cap;
+    cap_t *cap;
 
-    model_t model;
-
-    struct citcoms_domain *next;
+    model_t *model;
 };
 typedef struct citcoms_domain domain_t;
 
 
 /* model static variable declaration */
-domain_t *first;
+domain_t *domains;
+int ndomains;
 
+int myrank;
+cap_t my_cap;
+
+
 /* citcoms local mesh geometry */
-int nox = 0;
-int noy = 0;
-int noz = 0;
+int nox;
+int noy;
+int noz;
 
 
+/* some statistic counters */
+long npoints = 0;
+long elem_searched = 0;
+long dist_searched = 0;
+long hint_works = 0;
+long inv_mapping_iter = 0;
 
 /************************************************************************
  * Private functions first. Public functions at the end of the file.
  ************************************************************************/
 
 
-/*
- * Scan input str to get a long int vector *values. The vector length is from
- * input len. The input str contains white-space seperated numbers.
- */
-static int scan_long_vector(const char *str, int len, long *values)
+static int ijk2elem(int nx, int ny, int nz)
 {
-    char *nptr, *endptr;
+    return nz + nx * (noz - 1) + ny * (nox - 1) * (noz - 1);
+}
+
+
+
+/* given an element number, return the surface node numbers of the 4 nodes */
+static void element2surfnodes(int elem, int corners[4])
+{
+    int nx, ny;
+    int lower_left_corner;
     int i;
 
-    /* cast to avoid compiler warning */
-    nptr = endptr = (char *) str;
+    const int elxz = (nox - 1) * (noz - 1);
 
-    for (i = 0; i < len; ++i) {
-        values[i] = strtol(nptr, &endptr, 10);
-        if (nptr == endptr) {
-            /* error: no conversion is performed */
-            return -1;
-        }
-        nptr = endptr;
-    }
+    /* connectivity for surface nodes, ordered counterclockwisely */
+    const int conn[4] = {0, 1, nox+1, nox};
 
-    /** debug **
-    for (i = 0; i < len; ++i) fprintf(stderr, "%ld, ", values[i]);
-    fprintf(stderr, "\n");
-    /**/
-    return len;
+    /* The ordering scheme in citcoms is:
+     * elem = nz + nx*(noz-1) + ny*(nox-1)*(noz-1)
+     * snode = nx + ny*nox
+     */
+
+    ny = elem / elxz;
+    nx = (elem - ny * elxz) / (noz - 1);
+
+    /* the first surface node of this element */
+    lower_left_corner = nx + ny * nox;
+
+    for(i=0; i<4; i++)
+        corners[i] = lower_left_corner + conn[i];
+
+    return;
 }
 
 
-/*
- * From input file, read a line, which contains white-space seperated numbers
- * of length num_columns, store the numbers in a long int array.
- * Return num_columns on success, 0 on file reading error, and -1 on conversion error.
- */
-static int read_long_vector(FILE *in, int num_columns, long *fields)
+
+static void radial_project_2_plane(const double normal[3], double point[3])
 {
-    char buffer[256], *p;
+    /* Plane S defined as (M, X) = 1, where M is the normal vector to S
+     *
+     * Radially project P to P' onto S
+     *   P' = a*P; (M, P') = 1; ==> a = 1 / (M, P)
+     */
 
-    p = fgets(buffer, 255, in);
-    if (!p) {
-        return 0;
-    }
+    int dim;
+    double mdot = 0;
 
-    return scan_long_vector(buffer, num_columns, fields);
+    for(dim=0; dim<3; dim++)
+        mdot += point[dim] * normal[dim];
+
+    for(dim=0; dim<3; dim++)
+        point[dim] *= 1 / mdot;
+
+    return;
 }
 
 
-/*
- * Scan input str to get a double vector *values. The vector length is from
- * input len. The input str contains white-space seperated numbers.
- */
-static int scan_double_vector(const char *str, int len, double *values)
+
+static void compute_elem_geom(element_t *elements, const node_t *nodes)
 {
-    char *nptr, *endptr;
-    int i;
+    /* Given 4 points A, B, C, D, on a unit sphere,
+     * and their mid point M. Radially project M to M' onto the unit sphere:
+     *   M' = m*M; where m = 1 / ||M||
+     *
+     * The tangent plane S of the sphere at M' is:
+     *   (M', X) = 1, where (,) is the inner product.
+     *
+     * Radially project A onto S, this point is called A':
+     *   A' = a*A; (M', A') = 1; ==> a = 1 / m / (M, A)
+     *
+     * Then, A', B', C', D' and M' are all on plane S.
+     */
 
-    /* cast to avoid compiler warning */
-    nptr = endptr = (char *) str;
 
-    for (i = 0; i < len; ++i) {
-        values[i] = strtod(nptr, &endptr);
-        if (nptr == endptr) {
-            /* error: no conversion is performed */
-            return -1;
+    const int facenodes = 4;  /* # of nodes on element's face */
+    const int edgenodes = 2;  /* # of nodes on element's edge */
+    const int naxes = 2;      /* # of eta axes */
+
+    /* connectivity for surface nodes, ordered counterclockwisely
+     *   3--2    diagram of node ordering
+     *   |  |
+     *   0--1
+     *
+     * node 1, 2 are in the edge that is normal to positive eta0 axis
+     * node 2, 3 are in the edge that is normal to positive eta1 axis
+     * node 0, 3 are in the edge that is normal to negative eta0 axis
+     * node 0, 1 are in the edge that is normal to negative eta1 axis
+     */
+    const int high[2][2] = {{1, 2},
+                            {2, 3}};
+    const int low[2][2] = {{0, 3},
+                           {0, 1}};
+
+    const int nel = (nox - 1) * (noy - 1) * (noz - 1);
+
+    int elem;
+
+    for(elem=0; elem<nel; elem++) {
+        int i, j;
+        int dim;
+        int corners[4];
+        double coord[4][3];
+        double mid[3] = {0, 0, 0};
+        double r;
+        element_t *e = &elements[elem];
+
+        element2surfnodes(elem, corners);
+
+        /* coordinates of the nodes on the unit sphere, ABCD */
+        for(i=0; i<facenodes; i++) {
+            int n = corners[i];
+            coord[i][0] = nodes[n].x;
+            coord[i][1] = nodes[n].y;
+            coord[i][2] = nodes[n].z;
         }
-        nptr = endptr;
+
+        /* coordinates of M */
+        for(dim=0; dim<3; dim++) {
+            for(i=0; i<facenodes; i++)
+                mid[dim] += coord[i][dim];
+
+            mid[dim] /= facenodes;
+        }
+        r = sqrt( mid[0] * mid[0] +
+                  mid[1] * mid[1] +
+                  mid[2] * mid[2] );
+
+        /* coordinates of M' */
+        for(dim=0; dim<3; dim++)
+            e->mid[dim] = mid[dim] / r;
+
+        /* project ABCD to surface S */
+        for(i=0; i<facenodes; i++)
+            radial_project_2_plane(e->mid, coord[i]);
+
+        /** debug **
+        if(myrank==0) {
+            fprintf(stderr, "elem=%d M'=(%.15e, %.15e, %.15e) %e\n",
+                    elem, e->mid[0], e->mid[1], e->mid[2],
+                    e->mid[0]*e->mid[0] + e->mid[1]*e->mid[1] + e->mid[2]*e->mid[2]);
+            for(i=0; i<facenodes; i++) {
+                fprintf(stderr, "        node=(%.15e, %.15e, %.15e)  %e\n",
+                        coord[i][0], coord[i][1], coord[i][2],
+                        coord[i][0]*e->mid[0] + coord[i][1]*e->mid[1] + coord[i][2]*e->mid[2]);
+            }
+        }
+        /**/
+
+        e->size2 = 0;
+        for(j=0; j<naxes; j++) {
+            double eta[3];
+            double length2;
+
+            length2 = 0;
+            for(dim=0; dim<3; dim++) {
+                double diff = 0;
+                for(i=0; i<edgenodes; i++) {
+                    diff += coord[high[j][i]][dim] - coord[low[j][i]][dim];
+                }
+                eta[dim] = 0.5 * diff / edgenodes;
+                e->eta_axes[j][dim] = eta[dim];
+                length2 += eta[dim] * eta[dim];
+            }
+
+            e->inv_length2[j] = 1 / length2;
+            e->size2 += length2;
+
+        }
+
+        /** debug **
+        if(myrank==0) {
+            fprintf(stderr, "elem=%d, eta0=(%.15e, %.15e, %.15e) eta1=(%.15e, %.15e, %.15e) %e %e\n",
+                    elem,
+                    e->eta_axes[0][0], e->eta_axes[0][1], e->eta_axes[0][2],
+                    e->eta_axes[1][0], e->eta_axes[1][1], e->eta_axes[1][2],
+                    e->size2,
+                    e->eta_axes[0][0]*e->eta_axes[1][0]+e->eta_axes[0][1]*e->eta_axes[1][1]+e->eta_axes[0][2]*e->eta_axes[1][2]);
+        }
+        /**/
     }
 
-    /** debug **
-    for (i = 0; i < len; ++i) fprintf(stderr, "%e, ", values[i]);
-    fprintf(stderr, "\n");
-    /**/
-    return len;
+    return;
 }
 
 
-/*
- * From input file, read a line, which contains white-space seperated numbers
- * of length num_columns, store the numbers in a double array.
- * Return num_columns on success, 0 on file reading error, and -1 on conversion error.
- */
-static int read_double_vector(FILE *in, int num_columns, double *fields)
+
+static void get_2d_shape_functions(double shp[4], const double eta[2])
 {
-    char buffer[256], *p;
+    /* see the node ordering above */
 
-    p = fgets(buffer, 255, in);
-    if (!p) {
-        return 0;
+    shp[0] = 0.25 * (1.0 - eta[0]) * (1.0 - eta[1]);
+    shp[1] = 0.25 * (1.0 + eta[0]) * (1.0 - eta[1]);
+    shp[2] = 0.25 * (1.0 + eta[0]) * (1.0 + eta[1]);
+    shp[3] = 0.25 * (1.0 - eta[0]) * (1.0 + eta[1]);
+}
+
+
+
+static void inverse_mapping(const model_t *model, int elem,
+                            double x, double y, double z,
+                            double shp12[4])
+{
+    const int facenodes = 4;
+    const int naxes = 2;
+
+    int count = 0;
+    double x0[3] = {x, y, z};
+
+    /* coordinate of test point at eta coordinates */
+    double eta[2] = {0, 0};
+
+    int i, j, dim;
+    int corners[4];
+    double dist2;
+    double coord[4][3];
+    element_t *e = &model->elements[elem];
+    double accuracy = e->size2 * 1e-10;
+    double xx[3];
+
+    element2surfnodes(elem, corners);
+
+    /* coordinates of the nodes on the unit sphere, ABCD */
+    for(i=0; i<facenodes; i++) {
+        int n = corners[i];
+        coord[i][0] = model->nodes[n].x;
+        coord[i][1] = model->nodes[n].y;
+        coord[i][2] = model->nodes[n].z;
     }
 
-    return scan_double_vector(buffer, num_columns, fields);
+    /* project ABCD to plane S */
+    for(i=0; i<facenodes; i++)
+        radial_project_2_plane(e->mid, coord[i]);
+
+    /* project x0 to plane S */
+    radial_project_2_plane(e->mid, x0);
+
+
+    do {
+        double deta[2] = {0, 0};
+        double dx;
+
+        inv_mapping_iter ++;
+
+        get_2d_shape_functions(shp12, eta);
+
+        /* interpolate to xx */
+        xx[0] = xx[1] = xx[2] = 0;
+        for(dim=0; dim<3; dim++) {
+            for(i=0; i<facenodes; i++)
+                xx[dim] += coord[i][dim] * shp12[i];
+        }
+
+        dist2 = 0;
+        for(dim=0; dim<3; dim++) {
+            dx = x0[dim] - xx[dim];
+            dist2 += dx * dx;
+
+            /* correction of eta */
+            for(j=0; j<naxes; j++)
+                deta[j] += dx * e->eta_axes[j][dim];
+        }
+
+        for(j=0; j<naxes; j++)
+            eta[j] += deta[j] * e->inv_length2[j];
+
+        /* converged */
+	if(dist2 < accuracy) {
+            get_2d_shape_functions(shp12, eta);
+            return;
+        }
+
+        count ++;
+
+    } while(count < 1000);
+
+    /* cannot converge */
+    fprintf(stderr, "error: rank=%d %e eta=(%f %f) x0=(%.15e %.15e %.15e) xx=(%.15e %.15e %.15e)\n",
+            myrank, dist2, eta[0], eta[1], x, y, z, xx[0], xx[1], xx[2]);
+    abort();
+
+    return;
 }
 
 
+
 static void create_model(model_t *model, const double *coord, const double *data)
 {
     const double *x, *y, *z;
     const double *rho, *vp, *vs;
-    int i, j, n;
+    int i, j, k, n;
     const int nsf = nox * noy;
     const int nno = nox * noy * noz;
+    const int nel = (nox - 1) * (noy - 1) * (noz - 1);
 
     /*** allocate space ****/
-    model->rsqr = malloc(noz *sizeof(double));
+    model->rsqr = malloc(noz*sizeof(double));
+    model->nodes = malloc(nsf*sizeof(node_t));
+    model->mats = malloc(nno*sizeof(mat_t));
+    model->elements = malloc(nel*sizeof(element_t));
 
-    model->cost = malloc(nsf*sizeof(double));
-    model->sint = malloc(nsf*sizeof(double));
-    model->cosf = malloc(nsf*sizeof(double));
-    model->sinf = malloc(nsf*sizeof(double));
-
-    model->nodes = malloc(nno*sizeof(node_t));
-
     if(model->rsqr == NULL ||
-       model->cost == NULL ||
-       model->sint == NULL ||
-       model->cosf == NULL ||
-       model->sinf == NULL ||
-       model->nodes == NULL) {
-        fprintf(stderr, "error while allocating memory '%s' 111\n",
+       model->nodes == NULL ||
+       model->mats == NULL ||
+       model->elements == NULL) {
+        fprintf(stderr, "error while allocating memory '%s' 11\n",
                 __FILE__);
         abort();
     }
@@ -218,37 +420,38 @@
         model->rsqr[j] = x[j]*x[j] + y[j]*y[j] + z[j]*z[j];
     }
 
+
     /* put node-related data in a struct to gain cache performance */
-    for(i=0, n=0; i<nsf; i++, n+=noz) {
-        double a, b;
+    for(i=0, j=0, n=noz-1; i<nsf; i++, n+=noz) {
+        double r;
 
-        a = sqrt(x[n]*x[n] + y[n]*y[n]);
-        b = sqrt(x[n]*x[n] + y[n]*y[n] + z[n]*z[n]);
+        r = sqrt(x[n]*x[n] + y[n]*y[n] + z[n]*z[n]);
 
-        model->cost[i] = z[n] / b;
-        model->sint[i] = a / b;
-        model->cosf[i] = x[n] / a;
-        model->sinf[i] = y[n] / a;
+        /* coordiante on a unit sphere */
+        model->nodes[i].x = x[n] / r;
+        model->nodes[i].y = y[n] / r;
+        model->nodes[i].z = z[n] / r;
 
-        for(j=n; j<n+noz; j++) {
-            model->nodes[j].x = x[j];
-            model->nodes[j].y = y[j];
-            model->nodes[j].z = z[j];
-
-            model->nodes[j].rho = rho[j];
-            model->nodes[j].vp = vp[j];
-            model->nodes[j].vs = vs[j];
+        for(k=0; k<noz; k++) {
+            model->mats[j].rho = rho[j];
+            model->mats[j].vp = vp[j];
+            model->mats[j].vs = vs[j];
+            j++;
         }
     }
 
+    /* setup for interpolation */
+    compute_elem_geom(model->elements, model->nodes);
+
     return;
 }
 
 
+
 static void new_cap_corners(cap_t *cap, const double x[4], const double y[4],
                             const double z[4])
 {
-    double r;
+    double r_inv;
     int i;
 
     /* coordinates of corners */
@@ -256,16 +459,6 @@
     memcpy(cap->y, y, 4*sizeof(double));
     memcpy(cap->z, z, 4*sizeof(double));
 
-    for(i=0; i<4; i++) {
-        double rad, rho;
-        rho = sqrt(x[i] * x[i] + y[i] * y[i]);
-        rad = sqrt(x[i] * x[i] + y[i] * y[i] * z[i] * z[i]);
-        cap->cost[i] = z[i] / rad;
-        cap->sint[i] = rho / rad;
-        cap->cosf[i] = x[i] / rho;
-        cap->sinf[i] = y[i] / rho;
-    }
-
     /* coordinates of mid points, project to the unit sphere */
     cap->midx = cap->midy = cap->midz = 0;
     for(i=0; i<4; i++) {
@@ -273,30 +466,34 @@
         cap->midy += y[i] * 0.25;
         cap->midz += z[i] * 0.25;
     }
-    r = cap->midx * cap->midx + cap->midy * cap->midy + cap->midz * cap->midz;
-    cap->midx /= r;
-    cap->midy /= r;
-    cap->midz /= r;
+    r_inv = 1.0 / sqrt(cap->midx * cap->midx + cap->midy * cap->midy + cap->midz * cap->midz);
+    cap->midx *= r_inv;
+    cap->midy *= r_inv;
+    cap->midz *= r_inv;
 
-    /* radius of covering disk */
-    cap->disk_radius = 0;
+    /* radius^2 of covering disk */
+    cap->disk_radius2 = 0;
     for(i=0; i<4; i++) {
         double tmp;
 
-        tmp = sqrt( (cap->midx - x[i]) * (cap->midx - x[i]) +
-                    (cap->midy - y[i]) * (cap->midy - y[i]) +
-                    (cap->midz - z[i]) * (cap->midz - z[i]) );
-        cap->disk_radius = (cap->disk_radius > tmp) ? cap->disk_radius : tmp;
+        tmp = ( (cap->midx - x[i]) * (cap->midx - x[i]) +
+                (cap->midy - y[i]) * (cap->midy - y[i]) +
+                (cap->midz - z[i]) * (cap->midz - z[i]) );
+        cap->disk_radius2 = (cap->disk_radius2 > tmp) ? cap->disk_radius2 : tmp;
     }
+
+    /* broaden the disk radius slightly to avoid round-off error */
+    cap->disk_radius2 *= 1 + 256 * DBL_EPSILON;
     return;
 }
 
 
-/* make a vector pointing from (x0,y0,z0) to (xf,yf,zf) */
 
 static void makevector(double vec[3], double xf, double yf, double zf,
                        double x0, double y0, double z0)
 {
+    /* make a vector pointing from (x0,y0,z0) to (xf,yf,zf) */
+
     vec[0] = xf - x0;
     vec[1] = yf - y0;
     vec[2] = zf - z0;
@@ -304,10 +501,11 @@
 }
 
 
-/* cross product of vector A and B */
 
 static void crossit(double cross[3], const double A[3], const double B[3])
 {
+    /* cross product of vector A and B */
+
     cross[0] = A[1] * B[2] - A[2] * B[1];
     cross[1] = A[2] * B[0] - A[0] * B[2];
     cross[2] = A[0] * B[1] - A[1] * B[0];
@@ -315,92 +513,77 @@
 }
 
 
-/* finds the radial component of a Cartesian vector */
 
-static double findradial(const double vec[3],
-                         double cost, double sint,
-                         double cosf, double sinf)
-{
-    double radialparti, radialpartj, radialpartk;
-    double radial;
-
-    radialparti = vec[0] * sint * cosf;
-    radialpartj = vec[1] * sint * sinf;
-    radialpartk = vec[2] * cost;
-
-    radial = radialparti + radialpartj + radialpartk;
-
-    return radial;
-}
-
-
-/***** ICHECK BOUNDS ******************************/
-/*                                                */
-/* This function check if a test_point is bounded */
-/* by 4 corner nodes of a cap                     */
-/* This is done by:                               */
-/* 1) generate vectors from node to node          */
-/* 2) generate vectors from each node to point    */
-/*    in question                                 */
-/* 3) for each node, take cross product of vector */
-/*    pointing to it from previous node and       */
-/*    vector from node to point in question       */
-/* 4) Find radial components of all the cross     */
-/*    products.                                   */
-/* 5) If all radial components are positive,      */
-/*    point is bounded by the 4 nodes             */
-/* 6) If some radial components are close to 0    */
-/*    point is on a boundary - adjust it an       */
-/*    epsilon amount for this analysis only       */
-/*    which will force it to lie in one element   */
-/*    or cap                                      */
-/*                                                */
-/* The 4 corner nodes must be ordered             */
-/* counterclockwisely.                            */
-
 static int icheck_bounds(const double corner_x[4], const double corner_y[4],
                          const double corner_z[4],
-                         const double corner_cost[4], const double corner_sint[4],
-                         const double corner_cosf[4], const double corner_sinf[4],
                          double x, double y, double z)
 {
+    /* This function check if a test_point is bounded */
+    /* by 4 corner nodes ABCD of a cap                */
+    /* The 4 corner nodes must be ordered             */
+    /* counterclockwisely.                            */
+    /*                                                */
+    /* This is done by:                               */
+    /* 1) generate vector AB from node A to node B    */
+    /* 2) generate vector OA from origin to node A    */
+    /* 3) generate vector OP from origin to the point */
+    /* 4) take cross product of OA and AB             */
+    /*    this defines the normal vector of plane OAB */
+    /* 5) take inner product of the normal vector     */
+    /*    with OP, if the inner product is positive,  */
+    /*    P is on the positive side of OAB            */
+    /* 6) if all inner produce are positive           */
+    /*    (within the round-off error),               */
+    /*    point is bounded by the 4 nodes             */
+    /* 7) If some inner products are tiny             */
+    /*    point is on a boundary - adjust it an       */
+    /*    epsilon amount for this analysis only       */
+    /*    which will force it to lie in one element   */
+    /*    or cap. (This is disabled.)                 */
+    /*                                                */
+
     int ntries;
-    int i, icheck;
+    int i;
 
     double v[4][3];
-    double rad[4];
+    double op[3] = {x, y, z};
+    double inner[4];
 
     double theta, phi;
 
-    const double eps = 1e-6;
-    const double tiny = 1e-15;
+    /* allows 4-bits of round-off error accumulated */
+    const double tiny = 4 * DBL_EPSILON;
 
-    /* make vectors from corner node to corner node */
+    /* make vectors AB from corner node to corner node */
     makevector(v[0], corner_x[0], corner_y[0], corner_z[0],
                corner_x[3], corner_y[3], corner_z[3]);
+
     for(i=0; i<3; i++)
         makevector(v[i+1], corner_x[i+1], corner_y[i+1], corner_z[i+1],
                    corner_x[i], corner_y[i], corner_z[i]);
 
+#define NUDGE_POINT 0
+#if NUDGE_POINT
     for(ntries=0; ntries<3; ntries++) {
+        const double eps = 1e-6;
+#endif
+        elem_searched ++;
 
         for(i=0; i<4; i++) {
-            double p[3];
+            double oa[3] = {corner_x[i], corner_y[i], corner_z[i]};
             double cross[3];
 
-            /* make vectors from test point to node */
-            makevector(p, x, y, z, corner_x[i], corner_y[i], corner_z[i]);
+            /* Calculate cross products of OA and AB */
+            crossit(cross, oa, v[i]);
 
-            /* Calculate cross products */
-            crossit(cross, v[i], p);
-
-            /* Calculate radial component of cross products */
-            rad[i] = findradial(cross, corner_cost[i], corner_sint[i],
-                                corner_cosf[i], corner_sinf[i]);
+            inner[i] = op[0] * cross[0] +
+                op[1] * cross[1] +
+                op[2] * cross[2];
         }
 
-        /*
-        fprintf(stderr,"Rads: %f %f %f %f\n", rad[0], rad[1], rad[2], rad[3]);
+        /** debug **
+        if(myrank==0) {
+        fprintf(stderr,"Inner prods: %e %e %e %e\n", inner[0], inner[1], inner[2], inner[3]);
         fprintf(stderr,"Test Point: %f %f %f     %f %f\n", x, y, z,
                 atan2(sqrt(x*x+y*y),z)*180/M_PI, atan2(y,x)*180/M_PI);
         fprintf(stderr,"Corner points: 0: %f %f %f\n",
@@ -411,25 +594,22 @@
                 corner_x[2], corner_y[2], corner_z[2]);
         fprintf(stderr,"Corner points: 3: %f %f %f\n",
                 corner_x[3], corner_y[3], corner_z[3]);
-        */
+        }
+        /**/
 
-        /* If all radial components are positive, point is bounded by the 4 nodes */
-        if (rad[0] >= 0 &&
-            rad[1] >= 0 &&
-            rad[2] >= 0 &&
-            rad[3] >= 0)
+        /* If one inner product is significantly negative, point is not bounded */
+        if (inner[0] < -tiny ||
+            inner[1] < -tiny ||
+            inner[2] < -tiny ||
+            inner[3] < -tiny)
+            return 0;
+        else
             return 1;
 
-        /* If one radial component is significantly negative, point is not bounded */
-        if (rad[0] < -tiny ||
-            rad[1] < -tiny ||
-            rad[2] < -tiny ||
-            rad[3] < -tiny)
-            return 0;
+#if NUDGE_POINT
+        /* The point is very close to the boundary, nudge it slightly */
+        /* Hopefully, this doesn't happen often, may be expensive */
 
-        /*  Check if any radial components is zero (along a boundary), adjust if so */
-        /*  Hopefully, this doesn't happen often, may be expensive                  */
-
         theta = atan2(sqrt(x*x + y*y), z);
         phi = atan2(y, x);
 
@@ -450,7 +630,7 @@
 
     /* even after nudging the point, it is still on the boundary, fail loudly */
     fprintf(stderr,"Error(icheck_bounds) - too many tries\n");
-    fprintf(stderr,"Rads: %f %f %f %f\n", rad[0], rad[1], rad[2], rad[3]);
+    fprintf(stderr,"Inner prods: %e %e %e %e\n", inner[0], inner[1], inner[2], inner[3]);
     fprintf(stderr,"Test Point: %f %f %f     %f %f\n", x, y, z,
             theta*180/M_PI, phi*180/M_PI);
     fprintf(stderr,"Corner points: 0: %f %f %f\n",
@@ -464,140 +644,99 @@
     exit(10);
 
     return 0;
+#endif
 }
 
 
-/*
- * In: one cap, consists of four points on a unit sphere and a convering disk; and
- *     one point on a unit sphere.
- * Out: whether the point is inside the cap or not
- */
-static int is_inside(const cap_t *cap, const double x, const double y, const double z)
+
+static int is_inside_cap(const cap_t *cap, double x, double y, double z)
 {
-    double dist;
+    /* In: one cap, consists of four points on a unit sphere and a convering disk;
+     *     one point (x, y, z) on a unit sphere.
+     * Out: whether the point is inside the cap or not
+     */
 
-    dist = sqrt( (cap->midx - x) * (cap->midx - x) +
-                 (cap->midy - y) * (cap->midy - y) +
-                 (cap->midz - z) * (cap->midz - z) );
+    double dist2;
 
-    if(dist > cap->disk_radius)
+    dist2 = ( (cap->midx - x) * (cap->midx - x) +
+              (cap->midy - y) * (cap->midy - y) +
+              (cap->midz - z) * (cap->midz - z) );
+
+    if(dist2 > cap->disk_radius2)
         return 0;
 
-    if(icheck_bounds(cap->x, cap->y, cap->z,
-                     cap->cost, cap->sint, cap->cosf, cap->sinf,
-                     x, y, z))
-        return 1;
-
-    return 0;
+    return icheck_bounds(cap->x, cap->y, cap->z, x, y, z);
 }
 
 
-/*
- * In: two caps, each consists of four points on a unit sphere and a convering disk
- * Out: whether these caps overlap or not
- */
+
 static int is_overlapping(const cap_t *cap0, const cap_t *cap1)
 {
-    double dist;
+    /* In: two caps, each consists of four points on a unit sphere and a convering disk
+     * Out: whether these caps overlap or not
+     */
+
+    double dist2;
     int i;
 
     /* distance between cap centers */
-    dist = sqrt( (cap0->midx - cap1->midx) * (cap0->midx - cap1->midx) +
-                 (cap0->midy - cap1->midy) * (cap0->midy - cap1->midy) +
-                 (cap0->midz - cap1->midz) * (cap0->midz - cap1->midz) );
+    dist2 = ( (cap0->midx - cap1->midx) * (cap0->midx - cap1->midx) +
+              (cap0->midy - cap1->midy) * (cap0->midy - cap1->midy) +
+              (cap0->midz - cap1->midz) * (cap0->midz - cap1->midz) );
 
-    /* the centers are too far away then overlapping is impossible */
-    if(dist > (cap0->disk_radius + cap1->disk_radius))
+    /* the centers are too far away, overlapping is impossible */
+    if(sqrt(dist2) > sqrt(cap0->disk_radius2) + sqrt(cap1->disk_radius2))
         return 0;
 
     /* the center of one disk falls on another disk */
-    if(dist < cap0->disk_radius && dist < cap1->disk_radius)
+    if(dist2 < cap0->disk_radius2 &&
+       dist2 < cap1->disk_radius2)
         return 1;
 
-    /* check each corner, is it inside the other cap? */
-    for(i=0; i<4; i++)
-        if(is_inside(cap0, cap1->x[i], cap1->y[i], cap1->z[i]))
-            return 1;
-
-    for(i=0; i<4; i++)
-        if(is_inside(cap1, cap0->x[i], cap0->y[i], cap0->z[i]))
-            return 1;
-
     /* check each center, is it inside the other cap? */
     if(icheck_bounds(cap0->x, cap0->y, cap0->z,
-                     cap0->cost, cap0->sint, cap0->cosf, cap0->sinf,
                      cap1->midx, cap1->midy, cap1->midz))
         return 1;
 
     if(icheck_bounds(cap1->x, cap1->y, cap1->z,
-                     cap1->cost, cap1->sint, cap1->cosf, cap1->sinf,
                      cap0->midx, cap0->midy, cap0->midz))
         return 1;
 
-    return 0;
-}
+    /* check each corner, is it inside the other cap? */
+    for(i=0; i<4; i++)
+        if(is_inside_cap(cap0, cap1->x[i], cap1->y[i], cap1->z[i]))
+            return 1;
 
-/* given an element number, return the node numbers of the bottom 4 nodes */
-static void bottom_nodes_of_element(int elem, int corners[4])
-{
-    int nx, ny, nz;
-    int lower_left_corner;
-    int i;
-
-    const int elxz = (nox - 1) * (noz - 1);
-
-    /* node connectivity ordered counterclockwisely */
-    const int conn[4] = {0, noz, noz*(nox+1), noz*nox};
-
-    /* The ordering scheme in citcoms is:
-     * elem = nz + nx*(noz-1) + ny*(nox-1)*(noz-1)
-     * node = nz + nx*noz + ny*nox*noz
-     */
-
-    nz = elem % (noz - 1);
-    ny = elem / elxz;
-    nx = (elem - ny * elxz) / (noz - 1);
-
-    /* the first node of this element */
-    lower_left_corner = elem + nx + ny * (nox + noz - 1);
-
     for(i=0; i<4; i++)
-        corners[i] = lower_left_corner + conn[i];
+        if(is_inside_cap(cap1, cap0->x[i], cap0->y[i], cap0->z[i]))
+            return 1;
 
-    return;
+    return 0;
 }
 
 
-/* whether point (x, y, z) is inside element elem */
+
 static int is_inside_element(const model_t *model,
                              double x, double y, double z, int elem)
 {
+    /* whether point (x, y, z) is inside element elem */
+
     double corner_x[4], corner_y[4], corner_z[4];
-    double corner_cost[4], corner_sint[4];
-    double corner_cosf[4], corner_sinf[4];
-    int i, j, corners[4];
+    int i, corners[4];
 
-    bottom_nodes_of_element(elem, corners);
+    element2surfnodes(elem, corners);
 
     for(i=0; i<4; i++) {
         corner_x[i] = model->nodes[corners[i]].x;
         corner_y[i] = model->nodes[corners[i]].y;
         corner_z[i] = model->nodes[corners[i]].z;
-
-        j = corners[i] / noz;
-        corner_cost[i] = model->cost[j];
-        corner_sint[i] = model->sint[j];
-        corner_cosf[i] = model->cosf[j];
-        corner_sinf[i] = model->sinf[j];
     }
 
-    return icheck_bounds(corner_x, corner_y, corner_z,
-                         corner_cost, corner_sint,
-                         corner_cosf, corner_sinf,
-                         x, y, z);
+    return icheck_bounds(corner_x, corner_y, corner_z, x, y, z);
 }
 
 
+
 static int find_layer(const model_t *model,
                       double x, double y, double z)
 {
@@ -619,19 +758,21 @@
 }
 
 
+
 static int find_nearest_node(const model_t *model,
-                             double x, double y, double z,
-                             int nz)
+                             double x, double y, double z)
 {
+    /* which surface node is nearest to (x, y, z) */
+
     double dist2, min_dist2;
     int i, n;
-    const int nno = nox * noy * noz;
+    const int nsf = nox * noy;
 
+    dist_searched ++;
     min_dist2 = DBL_MAX;
 
-    /* loop over all nodes at this layer*/
     n = -1;
-    for(i=nz; i<nno; i+=noz) {
+    for(i=0; i<nsf; i++) {
         node_t *node = &model->nodes[i];
         dist2 = ( (node->x - x) * (node->x - x) +
                   (node->y - y) * (node->y - y) +
@@ -646,12 +787,7 @@
 }
 
 
-static int ijk2elem(int nx, int ny, int nz)
-{
-    return nz + nx * (noz - 1) + ny * (nox - 1) * (noz - 1);
-}
 
-
 static int search_neighboring_elements(const model_t *model,
                                        double x, double y, double z,
                                        int nz, int nearest_node)
@@ -662,8 +798,8 @@
 
     const int nel = (nox-1) * (noy-1) * (noz-1);
 
-    ny = nearest_node / (nox * noz);
-    nx = (nearest_node - ny * nox * noz) / noz;
+    ny = nearest_node / nox;
+    nx = nearest_node % nox;
 
     neighbor_elements[0] = ijk2elem(nx, ny, nz);
     neighbor_elements[1] = ijk2elem(nx, ny-1, nz);
@@ -671,11 +807,9 @@
     neighbor_elements[3] = ijk2elem(nx-1, ny, nz);
 
     for(i=0; i<4; i++) {
-        if(neighbor_elements[i] < 0 || neighbor_elements[i] >= nel)
-            /* out of range */
-            continue;
-
-        if(is_inside_element(model, x, y, z, neighbor_elements[i]))
+        if(neighbor_elements[i] >= 0 &&
+           neighbor_elements[i] < nel &&
+           is_inside_element(model, x, y, z, neighbor_elements[i]))
             /* found */
             return neighbor_elements[i];
     }
@@ -685,6 +819,7 @@
 }
 
 
+
 static int search_all_elements_in_layer(const model_t *model,
                                         double x, double y, double z,
                                         int nz)
@@ -705,26 +840,20 @@
 }
 
 
+
 static int find_element(const model_t *model,
-                        double x, double y, double z, int hint_elem)
+                        double x, double y, double z)
 {
     int nz, nearest_node, elem;
 
-    if(hint_elem > 0) {
-        /* check the hint_elem first */
-        if(is_inside_element(model, x, y, z, hint_elem))
-            /* found */
-            return hint_elem;
-    }
-
     /* find corresponding radial layer */
     nz = find_layer(model, x, y, z);
     if(nz < 0)
         /* not found */
         return -1;
 
-    /* in this layer, find the nearest node to (x,y,z) */
-    nearest_node = find_nearest_node(model, x, y, z, nz);
+    /* find the nearest node to (x,y,z) */
+    nearest_node = find_nearest_node(model, x, y, z);
 
     /* search neighboring elements of the nearest node */
     elem = search_neighboring_elements(model, x, y, z, nz, nearest_node);
@@ -733,6 +862,10 @@
         /* found */
         return elem;
 
+    /* If we are here, we are already in trouble... */
+    fprintf(stderr, "warning: point (%.15e %.15e %.15e), nz=%d, nearest_node=%d\n",
+            x, y, z, nz, nearest_node);
+
     /* a more expensive search */
     elem = search_all_elements_in_layer(model, x, y, z, nz);
 
@@ -746,10 +879,12 @@
 }
 
 
-/* convert (radius, theta, phi) on a unit sphere to (x, y, z) */
+
 static void rtp2xyz(double radius, double theta, double phi,
                     double *x, double *y, double *z)
 {
+    /* convert (radius, theta, phi) on a unit sphere to (x, y, z) */
+
     *x = radius * sin(theta) * cos(phi);
     *y = radius * sin(theta) * sin(phi);
     *z = radius * cos(theta);
@@ -757,34 +892,87 @@
 }
 
 
+
 static void get_shape_functions(const model_t *model, int elem,
-                                double x, double y, double z,
+                                double x, double y, double z, double r,
                                 double shp[8])
 {
-    //XXX
+    /* The shape functions are composed of two components. One defined in
+     * the radial direction, the other in the azimuthal direction.
+     */
+
+    int nz;
+    int i, j, n;
+    double shp3[2], shp12[4];
+    double r0, r1, inv_dr;
+    const int facenodes = 4;
+
+    /* eta2 axis is the radial direction, easy to define shape functions */
+    /* diagram: r1----r---r0  */
+
+    nz = elem % (nox - 1);
+
+    r1 = sqrt(model->rsqr[nz+1]);
+    r0 = sqrt(model->rsqr[nz]);
+    inv_dr = 1 / (r1 - r0);
+
+    shp3[0] = (r1 - r) * inv_dr;
+    shp3[1] = (r - r0) * inv_dr;
+
+    /* Do a inverse mapping in eta0 and eta1 axes to define the shape functions
+     * on these axes. */
+
+    inverse_mapping(model, elem, x, y, z, shp12);
+
+    for(i=0; i<2; i++)
+        for(j=0, n=i*facenodes; j<facenodes; j++, n++)
+            shp[n] = shp12[j] * shp3[i];
+
     return;
 }
 
 
-static void interpolate_velocities(const model_t *model, const double shp[8],
+
+static void interpolate_velocities(const model_t *model, int elem,
+                                   const double shp[8],
                                    double *vp, double *vs, double *rho)
 {
     int i;
-    for(i=0; i<8; i++) {
-        //XXX: do something
+    int nx, ny, nz;
+    int lower_left_corner;
+
+    const int conn[8] = {0, noz, noz*(nox+1), noz*nox,
+                         1, noz+1, noz*(nox+1)+1, noz*nox+1};
+    const int elxz = (nox - 1) * (noz - 1);
+    const int elemnodes = 8;  /* # of nodes per element */
+
+    nz = elem % (noz - 1);
+    ny = elem / elxz;
+    nx = (elem - ny * elxz) / (noz - 1);
+
+    /* the first node of this element */
+    lower_left_corner = elem + nx + ny * (nox + noz - 1);
+
+    *vp = *vs = *rho = 0;
+    for(i=0; i<elemnodes; i++) {
+        int n = lower_left_corner + conn[i];
+        *vp += model->mats[n].vp * shp[i];
+        *vs += model->mats[n].vs * shp[i];
+        *rho += model->mats[n].rho * shp[i];
     }
     return;
 }
 
 
+
 static void get_velocities(const model_t *model, int elem,
-                           double x, double y, double z,
+                           double x, double y, double z, double r,
                            double *vp, double *vs, double *rho)
 {
     double shp[8];
 
-    get_shape_functions(model, elem, x, y, z, shp);
-    interpolate_velocities(model, shp, vp, vs, rho);
+    get_shape_functions(model, elem, x, y, z, r, shp);
+    interpolate_velocities(model, elem, shp, vp, vs, rho);
 
     return;
 }
@@ -799,14 +987,12 @@
 void FC_FUNC_(read_citcoms_mantle_model, READ_CITCOMS_MANTLE_MODEL)
      (const double x[4], const double y[4], const double z[4])
 {
-    const int ncolumns = 11;
-    int myrank;
+    const int ncolumns = 10;
     int i, buffer_size;
     int total_citcoms_proc;
     double *buffer;
+    char citcoms_bound_filename[256];
     FILE *fp;
-    cap_t my_cap;
-    domain_t *curr, *prev;
 
     MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
 
@@ -820,9 +1006,11 @@
      ***********************************************************************/
 
     if(myrank == 0) {
-        long ltmp[2];
+        const char fmt[] = "%s.domain";
+        int32_t header[4];
 
-        fp = fopen(citcoms_bound_filename, "r");
+        snprintf(citcoms_bound_filename, 255, fmt, citcoms_model_filename_base);
+        fp = fopen(citcoms_bound_filename, "rb");
         if(fp == NULL) {
             fprintf(stderr, "error while opening file '%s'\n",
                     citcoms_bound_filename);
@@ -830,19 +1018,28 @@
         }
 
         /* read header line */
-        if(read_long_vector(fp, 2, ltmp) != 2) {
-            fprintf(stderr, "error while reading first line of '%s'\n",
+        if(fread(header, sizeof(int32_t), 4, fp) != 4) {
+            fprintf(stderr, "error while reading '%s'\n",
                     citcoms_bound_filename);
             abort();
         }
 
-        if(ltmp[1] != ncolumns) {
-            fprintf(stderr, "'%s' contains wrong number of columns\n",
+        /* check guard */
+        if(header[2] != 0x12345678 ||
+           header[3] != sizeof(int32_t)) {
+            fprintf(stderr, "error: '%s' is not a citcoms domain file, or the binary format is not compatible.\n",
                     citcoms_bound_filename);
             abort();
         }
 
-        total_citcoms_proc = (int) ltmp[0];
+
+        if(header[1] != ncolumns) {
+            fprintf(stderr, "error: '%s' contains wrong number of columns\n",
+                    citcoms_bound_filename);
+            abort();
+        }
+
+        total_citcoms_proc = header[0];
     }
 
     MPI_Bcast(&total_citcoms_proc, 1, MPI_INT, 0, MPI_COMM_WORLD);
@@ -856,12 +1053,10 @@
 
     if(myrank == 0) {
         /* read content of domain bound */
-        for(i=0; i<total_citcoms_proc; i++) {
-            if(read_double_vector(fp, ncolumns, &(buffer[i*ncolumns])) != ncolumns) {
-                fprintf(stderr, "error while reading %d-th line of '%s'\n",
-                        i, citcoms_bound_filename);
-                abort();
-            }
+        if(fread(buffer, sizeof(double), buffer_size, fp) != buffer_size) {
+            fprintf(stderr, "error while reading '%s'\n",
+                    citcoms_bound_filename);
+            abort();
         }
         fclose(fp);
     }
@@ -874,55 +1069,65 @@
      * Find out which citcoms domain overlaps with my domain.
      ***********************************************************************/
 
-    first = malloc(sizeof(domain_t));
-    curr = first;
-    prev = NULL;
+    domains = malloc(total_citcoms_proc * sizeof(domain_t));
+    if(domains == NULL) {
+        fprintf(stderr, "error while allocating memory '%s' 2\n",
+                __FILE__);
+        abort();
+    }
 
+    ndomains = 0;
+
     for(i=0; i<total_citcoms_proc; i++) {
         const double *tmp = &(buffer[i*ncolumns]);
         double ctheta, cphi;
+        double rmin, rmax;
         double cx[4], cy[4], cz[4];
         cap_t citcoms_cap;
+        domain_t *dom;
         int j;
 
+        rmin = tmp[0];
+        rmax = tmp[1];
         for(j=0; j<4; j++) {
-            ctheta = tmp[1 + 2*j];
-            cphi = tmp[2 + 2*j];
+            ctheta = tmp[2*j + 2];
+            cphi = tmp[2*j + 3];
             rtp2xyz(1.0, ctheta, cphi, &(cx[j]), &(cy[j]), &(cz[j]));
         }
 
         new_cap_corners(&citcoms_cap, cx, cy, cz);
 
         if(is_overlapping(&my_cap, &citcoms_cap)) {
-            curr->rank = (int) tmp[0];
-            curr->rmin = tmp[9];
-            curr->rmax = tmp[10];
-            memcpy(&(curr->cap), &citcoms_cap, sizeof(cap_t));
-            curr->next = malloc(sizeof(domain_t));
-            prev = curr;
-            curr = curr->next;
+            dom = &domains[ndomains];
+
+            dom->rank = i;
+            dom->rmin = rmin;
+            dom->rmax = rmax;
+
+            dom->cap = malloc(sizeof(cap_t));
+            dom->model = malloc(sizeof(model_t));
+            if(dom->cap == NULL ||
+               dom->model == NULL) {
+                fprintf(stderr, "error while allocating memory '%s' 3\n",
+                        __FILE__);
+                abort();
+            }
+            memcpy(dom->cap, &citcoms_cap, sizeof(cap_t));
+
+            ndomains ++;
         }
     }
 
-    if(prev == NULL) {
-        fprintf(stderr, "rank=%d: cannot find overllaping domain '%s' 2\n",
+    if(ndomains == 0) {
+        fprintf(stderr, "rank=%d: cannot find overllaping domain '%s'\n",
                 myrank, __FILE__);
         abort();
     }
 
-    /* this pointer is over-allocated */
-    free(curr);
-    prev->next = NULL;
-
     free(buffer);
 
     /* count how many overlapping domains */
-    curr = first;
-    i = 0;
-    do {
-        i++;
-    } while (curr = curr->next);
-    fprintf(stderr, "rank=%d, overlaps with %d citcoms domains\n", myrank, i);
+    fprintf(stderr, "rank=%d, overlaps with %d citcoms domains\n", myrank, ndomains);
     MPI_Barrier(MPI_COMM_WORLD);
 
 
@@ -930,18 +1135,18 @@
      * Read the mantle model from those citcoms domains
      ***********************************************************************/
 
-    curr = first;
-    do {
+    for(i=0; i<ndomains; i++) {
         const char fmt1[] = "%s.coord_bin.%d";
         const char fmt2[] = "%s.seismic.%d.%d";
         char filename1[256];
         char filename2[256];
         FILE *f1, *f2;
-        int header[4];
+        int32_t header[4];
         int nno, size;
         double *coord, *seismic;
+        domain_t *dom = &domains[i];
 
-        snprintf(filename1, 255, fmt1, citcoms_model_filename_base, curr->rank);
+        snprintf(filename1, 255, fmt1, citcoms_model_filename_base, dom->rank);
         f1 = fopen(filename1, "rb");
         if(f1 == NULL) {
             fprintf(stderr, "error while opening file '%s'\n",
@@ -949,7 +1154,7 @@
             abort();
         }
 
-        snprintf(filename2, 255, fmt2, citcoms_model_filename_base, curr->rank, citcoms_step);
+        snprintf(filename2, 255, fmt2, citcoms_model_filename_base, dom->rank, citcoms_step);
         f2 = fopen(filename2, "rb");
         if(f2 == NULL) {
             fprintf(stderr, "error while opening file '%s'\n",
@@ -958,12 +1163,13 @@
         }
 
         /* read header */
-        fread(header, sizeof(int), 4, f1);
-        if(header[3] != INT_MAX) {
-            fprintf(stderr, "error while reading file header '%s'\n",
+        fread(header, sizeof(int32_t), 4, f1);
+        if(header[3] != 0x12345678) {
+            fprintf(stderr, "error: '%s' is not a citcoms coord_bin file, or the binary format is not compatible\n",
                     filename1);
             abort();
         }
+
         nox = header[0];
         noy = header[1];
         noz = header[2];
@@ -972,7 +1178,8 @@
         coord = malloc(nno*3*sizeof(double));
         seismic = malloc(nno*3*sizeof(double));
         if(coord == NULL || seismic == NULL) {
-            fprintf(stderr, "error while allocating memory rank=%d\n", myrank);
+            fprintf(stderr, "error while allocating memory '%s' 4\n",
+                    __FILE__);
             abort();
         }
 
@@ -993,68 +1200,146 @@
         }
 
         /* store data */
-        create_model(&(curr->model), coord, seismic);
+        create_model(dom->model, coord, seismic);
 
         free(seismic);
         free(coord);
 
         fclose(f2);
         fclose(f1);
-    } while (curr = curr->next);
+    }
 
-    fprintf(stderr, "rank=%d, 10\n", myrank);
-    MPI_Barrier(MPI_COMM_WORLD);
+    fprintf(stderr, "finished reading mantle model\n");
 
     return;
 }
 
 
 void FC_FUNC_(get_velo_from_citcoms, GET_VELO_FROM_CITCOMS)
-    (const double *radius, const double *theta, const double *phi,
+    ( double *radius,  double *theta,  double *phi,
      double *vpv, double *vsv, double *rho,
-     int *hint_elem)
+     int *hint_domain, int *hint_elem)
 {
     int elem;
     double x, y, z;
-    domain_t *curr;
+    double xx, yy, zz;
+    domain_t *dom;
+    int i;
 
-    curr = first;
-    do {
+    npoints ++;
 
-        if(*radius < curr->rmin || *radius > curr->rmax)
-            /* not in this citcoms domain */
-            continue;
+    rtp2xyz(*radius, *theta, *phi, &x, &y, &z);
 
-        rtp2xyz(*radius, *theta, *phi, &x, &y, &z);
+    /* project to unit sphere */
+    xx = x / *radius;
+    yy = y / *radius;
+    zz = z / *radius;
 
-        if(! is_inside(&(curr->cap), x, y, z))
-            /* not in this citcoms domain */
-            continue;
+    if(*hint_domain >= 0) {
+        dom = &domains[*hint_domain];
 
-        elem = find_element(&(curr->model), x, y, z, *hint_elem);
+        if(is_inside_element(dom->model, x, y, z, *hint_elem)) {
+            /* search is successful */
+            hint_works ++;
+            get_velocities(dom->model, *hint_elem, x, y, z, *radius,
+                           vpv, vsv, rho);
+            return;
+        }
+    }
 
-        if(elem < 0)
-            /* search failed!!! */
-            break;
+    for(i=0; i<ndomains; i++) {
+        dom = &domains[i];
 
-        /* search is successful */
-        get_velocities(&(curr->model), elem, x, y, z,
-                       vpv, vsv, rho);
+        if(*radius >= dom->rmin &&
+           *radius <= dom->rmax &&
+           is_inside_cap(dom->cap, xx, yy, zz)) {
+            /* in this citcoms domain */
+            *hint_domain = i;
+            goto searching;
+        }
+    }
 
-        /* the next search is very likely to be in the same element,
-         * save the current result as the hint of next search */
-        *hint_elem = elem;
+    /* The point is not inside any cap! */
+    goto fail;
 
-        return;
+ searching:
 
-    } while (curr = curr->next);
+    elem = find_element(dom->model, x, y, z);
 
-    /* failed search */
-    fprintf(stderr, "Cannot find corresponding citcoms element! (%.15e, %.15e, %.15e)\n",
-            *radius, *theta, *phi);
+    if(elem < 0)
+        goto fail;
+
+    /* the next search is very likely to be in the same element,
+     * save the current result as the hint of next search */
+    *hint_elem = elem;
+
+    /* search is successful */
+    get_velocities(dom->model, *hint_elem, x, y, z, *radius,
+                   vpv, vsv, rho);
+
+    return;
+
+ fail:
+
+    /* search has failed, it means there is a bug in the code above...  */
+    fprintf(stderr, "Cannot find corresponding citcoms element! rank=%d point=(%.15e, %.15e, %.15e)\n",
+            myrank, *radius, *theta, *phi);
+
+
+    /* some postmortem information to aid debugging */
+    fprintf(stderr, "inside my_cap? %d\n", is_inside_cap(&my_cap, xx, yy, zz));
+
+    for(i=0; i<ndomains; i++) {
+        int nz, nearest_node, elem;
+        int nx, ny;
+
+        dom = &domains[i];
+
+        if(*radius >= dom->rmin &&
+           *radius <= dom->rmax &&
+           is_inside_cap(dom->cap, xx, yy, zz)) {
+
+            fprintf(stderr, "inside domain %d, citcom_rank=%d\n",
+                    i, dom->rank);
+
+            nz = find_layer(dom->model, x, y, z);
+            nearest_node = find_nearest_node(dom->model, x, y, z);
+            elem = search_neighboring_elements(dom->model, x, y, z, nz, nearest_node);
+
+            ny = nearest_node / nox;
+            nx = nearest_node % nox;
+
+            fprintf(stderr, "%d %d %d %d %d, [%d %d %d %d]\n", nx, ny, nz, nearest_node, elem,
+                    ijk2elem(nx, ny, nz), ijk2elem(nx-1, ny, nz),
+                    ijk2elem(nx-1, ny-1, nz), ijk2elem(nx, ny-1, nz));
+           }
+    }
+
     abort();
 
     return;
 }
 
 
+void FC_FUNC_(finalize_mantle_model, FINALIZE_MANTLE_MODEL)()
+{
+    float tmp[5], recv[5];
+
+    /* some statistics */
+    fprintf(stderr, "rank=%d, points=%ld, distance searched=%ld, elements searched=%ld, hint=%ld, inverse mapping=%ld\n",
+            myrank, npoints, dist_searched, elem_searched, hint_works, inv_mapping_iter);
+
+    tmp[0] = npoints;
+    tmp[1] = dist_searched;
+    tmp[2] = elem_searched;
+    tmp[3] = hint_works;
+    tmp[4] = inv_mapping_iter;
+
+    MPI_Allreduce(tmp, recv, 5, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD);
+
+    if(myrank==0)
+        fprintf(stderr, "total points=%e, distance searched=%e, elements searched=%e, hint=%e, inverse mapping=%e\n",
+                recv[0], recv[1], recv[2], recv[3], recv[4]);
+
+}
+



More information about the CIG-COMMITS mailing list