[cig-commits] r14232 - long/3D/SNAC/trunk/Snac/plugins/hillSlope

cstark at geodynamics.org cstark at geodynamics.org
Wed Mar 4 15:34:36 PST 2009


Author: cstark
Date: 2009-03-04 15:34:36 -0800 (Wed, 04 Mar 2009)
New Revision: 14232

Modified:
   long/3D/SNAC/trunk/Snac/plugins/hillSlope/Track.c
Log:
Follow-up to changes to tracking code to make it work in parallel.

Bug fix: was doing an MPI_Allreduce in a conditional block that would not be reached in all threads, which of course is a big no-no - the run eventually hangs and then bails (on Ranger) as the threads become terminally unsyncable.

Corrected by placing MPI_Allreduce call - which checks to see if all threads are elastically stabilized (equilibrated) or if they can't comment - in a block that will be reached in all threads.




Modified: long/3D/SNAC/trunk/Snac/plugins/hillSlope/Track.c
===================================================================
--- long/3D/SNAC/trunk/Snac/plugins/hillSlope/Track.c	2009-03-04 23:15:53 UTC (rev 14231)
+++ long/3D/SNAC/trunk/Snac/plugins/hillSlope/Track.c	2009-03-04 23:34:36 UTC (rev 14232)
@@ -44,7 +44,7 @@
 #define FALSE 0
 #endif
 
-#define DEBUG
+#define DEBUG2
 
 void SnacHillSlope_Track( void* _context ) {
 	Snac_Context			*context = (Snac_Context*)_context;
@@ -72,6 +72,7 @@
 	const double			trackLevel=(double)contextExt->trackLevel;
 	const double			startThreshold=(contextExt->startThreshold>=0.0 ? contextExt->startThreshold : 1e-2);
 	const double			stopThreshold=(contextExt->stopThreshold>=0.0 ? contextExt->stopThreshold : 1e-3);
+	char eflag, ceflag;
 	
 
 /* 	if (context->timeStep % context->dumpEvery == 0) { */
@@ -82,6 +83,10 @@
 	 *  Bail now if all threads have reached elastic equilibrium
 	 */
 	if(contextExt->consensusElasticStabilizedFlag){
+#ifdef DEBUG
+	    fprintf(stderr,"c=%d,t=%d/%d: Consensus eqm... bailing at top of Track.c\n",context->rank, 
+		    context->timeStep,context->maxTimeSteps);
+#endif
 	    return;
 	}
 
@@ -154,10 +159,10 @@
 	    }
 	}
 
-#ifdef DEBUG
+#ifdef DEBUG2
 	    fprintf(stderr,
-		    "t=%d:  reachesTop=%d consensusElasticStabilized=%d  elasticStabilized=%d   startedTracking=%d:  max_vel=%g  unit_vel=%g\n",
-		    context->timeStep, reachesTopFlag, 
+		    "c=%d,t=%d/%d:  reachesTop=%d consensusElasticStabilized=%d  elasticStabilized=%d   startedTracking=%d:  max_vel=%g  unit_vel=%g\n",
+		    context->rank, context->timeStep, context->maxTimeSteps, reachesTopFlag, 
 		    contextExt->consensusElasticStabilizedFlag, contextExt->elasticStabilizedFlag, contextExt->startedTrackingFlag, 
 		    max_yVelocity, unit_yVelocity ); 
 #endif
@@ -168,6 +173,9 @@
 	if(!reachesTopFlag) {
 	    contextExt->startedTrackingFlag = TRUE;
 	    contextExt->elasticStabilizedFlag = TRUE;
+#ifdef DEBUG
+	    fprintf(stderr,"c=%d,t=%d/%d:  Doesn't reach top\n",context->rank, context->timeStep, context->maxTimeSteps);
+#endif
 	} else {
 	    /*
 	     * Now deprecated: estimate unit rates of motion for later comparison with falling rates
@@ -179,6 +187,9 @@
 	    if(!contextExt->startedTrackingFlag && max_yVelocity>=unit_yVelocity && context->timeStep>=4) 
 		contextExt->startedTrackingFlag=TRUE;
 
+#ifdef DEBUG
+	    fprintf(stderr,"c=%d,t=%d/%d: Does reach top - check if equilibrating\n",context->rank, context->timeStep, context->maxTimeSteps);
+#endif
 	    /*
 	     *  If surface change is slowing and slowly enough, flag that elastic eqm has been reached on this thread
 	     */
@@ -191,6 +202,9 @@
 		    /*
 		     *  Stabilizing on this thread
 		     */
+#ifdef DEBUG
+		  fprintf(stderr,"c=%d,t=%d/%d: Locally report we're equilibrating\n",context->rank, context->timeStep, context->maxTimeSteps);
+#endif
 		    contextExt->elasticStabilizedFlag = TRUE;
 		}
 	    }
@@ -198,21 +212,44 @@
 	/*
 	 *  Decide whether to stop or to continue simulation
 	 */
+#ifdef DEBUG
+	fprintf(stderr,"c=%d,t=%d/%d: Checking consensus... %d\n",context->rank, context->timeStep, context->maxTimeSteps,
+		contextExt->consensusElasticStabilizedFlag);
+#endif
+	/*
+	 *  Check all threads to see if global equilibration has been reached
+	 */
+	//	    MPI_Allreduce( &contextExt->elasticStabilizedFlag, &contextExt->consensusElasticStabilizedFlag, 
+	//	   1, MPI_INT, MPI_LAND, context->communicator );
+	eflag=contextExt->elasticStabilizedFlag;
+	MPI_Allreduce( &eflag, &ceflag, 
+		       1, MPI_CHAR, MPI_MIN, context->communicator );
+#ifdef DEBUG
+	fprintf(stderr,"c=%d,t=%d/%d: ... revised consensus= %d->%d\n",context->rank, context->timeStep, context->maxTimeSteps,
+		ceflag,
+		contextExt->consensusElasticStabilizedFlag);
+#endif
 	if(contextExt->startedTrackingFlag){
 	    /*
-	     *  Check all threads to see if global equilibration has been reached
-	     */
-	    MPI_Allreduce( &(contextExt->elasticStabilizedFlag), &(contextExt->consensusElasticStabilizedFlag), 
-			   1, MPI_INT, MPI_LAND, context->communicator );
-	    /*
 	     *  If all threads agree to elastic eqm, and we only want to run to this point, 
 	     *    tell the simulation to stop at this time step
 	     */
-	    if(contextExt->consensusElasticStabilizedFlag) {
-		if(contextExt->solveElasticEqmOnlyFlag) {
-		    context->maxTimeSteps=context->timeStep/*+context->dumpEvery*/;
-		}
+	  contextExt->consensusElasticStabilizedFlag=ceflag;
+#ifdef DEBUG
+	fprintf(stderr,"c=%d,t=%d/%d: ... revised consensus= %d->%d\n",context->rank, context->timeStep, context->maxTimeSteps,
+		ceflag,
+		contextExt->consensusElasticStabilizedFlag);
+#endif
+
+	  if(contextExt->consensusElasticStabilizedFlag) {
+	    if(contextExt->solveElasticEqmOnlyFlag) {
+#ifdef DEBUG
+	      fprintf(stderr,"c=%d, t=%d/%d: Stopping run at t= %d\n",context->rank, context->timeStep, 
+		      context->maxTimeSteps,context->maxTimeSteps);
+#endif
+	      context->maxTimeSteps=context->timeStep/*+context->dumpEvery*/;
 	    }
+	  }
 	}
 
 	/*



More information about the CIG-COMMITS mailing list