[cig-commits] r1346 - in branches/s-wang: . for_deal.II for_deal.II/include for_deal.II/include/deal.II for_deal.II/include/deal.II/lac for_deal.II/source for_deal.II/source/lac for_deal.II/source/numerics include/aspect include/aspect/particle source source/postprocess source/simulator

Tue Nov 6 12:23:43 PST 2012

Author: s-wang
Date: 2012-11-06 13:23:43 -0700 (Tue, 06 Nov 2012)
New Revision: 1346

Added:
   branches/s-wang/for_deal.II/
   branches/s-wang/for_deal.II/include/
   branches/s-wang/for_deal.II/include/deal.II/
   branches/s-wang/for_deal.II/include/deal.II/lac/
   branches/s-wang/for_deal.II/include/deal.II/lac/petsc_matrix_base.h
   branches/s-wang/for_deal.II/include/deal.II/lac/petsc_parallel_block_vector.h
   branches/s-wang/for_deal.II/include/deal.II/lac/trilinos_vector_base.h
   branches/s-wang/for_deal.II/source/
   branches/s-wang/for_deal.II/source/lac/
   branches/s-wang/for_deal.II/source/lac/constraint_matrix.cc
   branches/s-wang/for_deal.II/source/lac/petsc_matrix_base.cc
   branches/s-wang/for_deal.II/source/lac/trilinos_sparse_matrix.cc
   branches/s-wang/for_deal.II/source/lac/trilinos_vector_base.cc
   branches/s-wang/for_deal.II/source/numerics/
   branches/s-wang/for_deal.II/source/numerics/derivative_approximation.inst.in
   branches/s-wang/include/aspect/global_trilinos.h
Modified:
   branches/s-wang/include/aspect/global.h
   branches/s-wang/include/aspect/particle/world.h
   branches/s-wang/include/aspect/simulator.h
   branches/s-wang/source/main.cc
   branches/s-wang/source/postprocess/composition_statistics.cc
   branches/s-wang/source/postprocess/temperature_statistics.cc
   branches/s-wang/source/simulator/assembly.cc
   branches/s-wang/source/simulator/core.cc
   branches/s-wang/source/simulator/helper_functions.cc
   branches/s-wang/source/simulator/initial_conditions.cc
   branches/s-wang/source/simulator/solver.cc
Log:
still in the testing phase of replacing trilinos with petsc.


Added: branches/s-wang/for_deal.II/include/deal.II/lac/petsc_matrix_base.h
===================================================================

--- branches/s-wang/for_deal.II/include/deal.II/lac/petsc_matrix_base.h	                        (rev 0)
+++ branches/s-wang/for_deal.II/include/deal.II/lac/petsc_matrix_base.h	2012-11-06 20:23:43 UTC (rev 1346)
@@ -0,0 +1,1870 @@
+//---------------------------------------------------------------------------
+//    $Id: petsc_matrix_base.h 26045 2012-08-21 09:38:15Z young $
+//
+//    Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 by the deal.II authors
+//
+//    This file is subject to QPL and may not be  distributed
+//    without copyright and license information. Please refer
+//    to the file deal.II/doc/license.html for the  text  and
+//    further information on this license.
+//
+//---------------------------------------------------------------------------
+#ifndef __deal2__petsc_matrix_base_h
+#define __deal2__petsc_matrix_base_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_USE_PETSC
+
+#  include <deal.II/base/subscriptor.h>
+#  include <deal.II/lac/full_matrix.h>
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/vector.h>
+
+#  include <petscmat.h>
+#  include <deal.II/base/std_cxx1x/shared_ptr.h>
+
+#  include <vector>
+#  include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename Matrix> class BlockMatrixBase;
+
+
+namespace PETScWrappers
+{
+                                   // forward declarations
+  class VectorBase;
+  class MatrixBase;
+
+  namespace MatrixIterators
+  {
+/**
+ * STL conforming iterator. This class acts as an iterator walking over the
+ * elements of PETSc matrices. Since PETSc offers a uniform interface for all
+ * types of matrices, this iterator can be used to access both sparse and full
+ * matrices.
+ *
+ * Note that PETSc does not give any guarantees as to the order of elements
+ * within each row. Note also that accessing the elements of a full matrix
+ * surprisingly only shows the nonzero elements of the matrix, not all
+ * elements.
+ *
+ * @ingroup PETScWrappers
+ * @author Guido Kanschat, Roy Stogner, Wolfgang Bangerth, 2004
+ */
+    class const_iterator
+    {
+      private:
+                                         /**
+                                          * Accessor class for iterators
+                                          */
+        class Accessor
+        {
+          public:
+                                             /**
+                                              * Constructor. Since we use
+                                              * accessors only for read
+                                              * access, a const matrix
+                                              * pointer is sufficient.
+                                              */
+            Accessor (const MatrixBase    *matrix,
+                      const unsigned int   row,
+                      const unsigned int   index);
+
+                                             /**
+                                              * Row number of the element
+                                              * represented by this
+                                              * object.
+                                              */
+            unsigned int row() const;
+
+                                             /**
+                                              * Index in row of the element
+                                              * represented by this
+                                              * object.
+                                              */
+            unsigned int index() const;
+
+                                             /**
+                                              * Column number of the
+                                              * element represented by
+                                              * this object.
+                                              */
+            unsigned int column() const;
+
+                                             /**
+                                              * Value of this matrix entry.
+                                              */
+            PetscScalar value() const;
+
+                                             /**
+                                              * Exception
+                                              */
+            DeclException0 (ExcBeyondEndOfMatrix);
+                                             /**
+                                              * Exception
+                                              */
+            DeclException3 (ExcAccessToNonlocalRow,
+                            int, int, int,
+                            << "You tried to access row " << arg1
+                            << " of a distributed matrix, but only rows "
+                            << arg2 << " through " << arg3
+                            << " are stored locally and can be accessed.");
+
+          private:
+                                             /**
+                                              * The matrix accessed.
+                                              */
+            mutable MatrixBase *matrix;
+
+                                             /**
+                                              * Current row number.
+                                              */
+            unsigned int a_row;
+
+                                             /**
+                                              * Current index in row.
+                                              */
+            unsigned int a_index;
+
+                                             /**
+                                              * Cache where we store the
+                                              * column indices of the present
+                                              * row. This is necessary, since
+                                              * PETSc makes access to the
+                                              * elements of its matrices
+                                              * rather hard, and it is much
+                                              * more efficient to copy all
+                                              * column entries of a row once
+                                              * when we enter it than
+                                              * repeatedly asking PETSc for
+                                              * individual ones. This also
+                                              * makes some sense since it is
+                                              * likely that we will access
+                                              * them sequentially anyway.
+                                              *
+                                              * In order to make copying of
+                                              * iterators/accessor of
+                                              * acceptable performance, we
+                                              * keep a shared pointer to these
+                                              * entries so that more than one
+                                              * accessor can access this data
+                                              * if necessary.
+                                              */
+            std_cxx1x::shared_ptr<const std::vector<unsigned int> > colnum_cache;
+
+                                             /**
+                                              * Similar cache for the values
+                                              * of this row.
+                                              */
+            std_cxx1x::shared_ptr<const std::vector<PetscScalar> > value_cache;
+
+                                             /**
+                                              * Discard the old row caches
+                                              * (they may still be used by
+                                              * other accessors) and generate
+                                              * new ones for the row pointed
+                                              * to presently by this accessor.
+                                              */
+            void visit_present_row ();
+
+                                             /**
+                                              * Make enclosing class a
+                                              * friend.
+                                              */
+            friend class const_iterator;
+        };
+
+      public:
+
+                                         /**
+                                          * Constructor. Create an iterator
+                                          * into the matrix @p matrix for the
+                                          * given row and the index within it.
+                                          */
+        const_iterator (const MatrixBase   *matrix,
+                        const unsigned int  row,
+                        const unsigned int  index);
+
+                                         /**
+                                          * Prefix increment.
+                                          */
+        const_iterator& operator++ ();
+
+                                         /**
+                                          * Postfix increment.
+                                          */
+        const_iterator operator++ (int);
+
+                                         /**
+                                          * Dereferencing operator.
+                                          */
+        const Accessor& operator* () const;
+
+                                         /**
+                                          * Dereferencing operator.
+                                          */
+        const Accessor* operator-> () const;
+
+                                         /**
+                                          * Comparison. True, if
+                                          * both iterators point to
+                                          * the same matrix
+                                          * position.
+                                          */
+        bool operator == (const const_iterator&) const;
+                                         /**
+                                          * Inverse of <tt>==</tt>.
+                                          */
+        bool operator != (const const_iterator&) const;
+
+                                         /**
+                                          * Comparison
+                                          * operator. Result is true
+                                          * if either the first row
+                                          * number is smaller or if
+                                          * the row numbers are
+                                          * equal and the first
+                                          * index is smaller.
+                                          */
+        bool operator < (const const_iterator&) const;
+
+                                         /**
+                                          * Exception
+                                          */
+        DeclException2 (ExcInvalidIndexWithinRow,
+                        int, int,
+                        << "Attempt to access element " << arg2
+                        << " of row " << arg1
+                        << " which doesn't have that many elements.");
+
+      private:
+                                         /**
+                                          * Store an object of the
+                                          * accessor class.
+                                          */
+        Accessor accessor;
+    };
+
+  }
+
+
+/**
+ * Base class for all matrix classes that are implemented on top of the PETSc
+ * matrix types. Since in PETSc all matrix types (i.e. sequential and
+ * parallel, sparse, blocked, etc.)  are built by filling the contents of an
+ * abstract object that is only referenced through a pointer of a type that is
+ * independent of the actual matrix type, we can implement almost all
+ * functionality of matrices in this base class. Derived classes will then only
+ * have to provide the functionality to create one or the other kind of
+ * matrix.
+ *
+ * The interface of this class is modeled after the existing
+ * SparseMatrix class in deal.II. It has almost the same member
+ * functions, and is often exchangable. However, since PETSc only supports a
+ * single scalar type (either double, float, or a complex data type), it is
+ * not templated, and only works with whatever your PETSc installation has
+ * defined the data type PetscScalar to.
+ *
+ * Note that PETSc only guarantees that operations do what you expect if the
+ * functions @p MatAssemblyBegin and @p MatAssemblyEnd have been called
+ * after matrix assembly. Therefore, you need to call
+ * SparseMatrix::compress() before you actually use the matrix. This also
+ * calls @p MatCompress that compresses the storage format for sparse
+ * matrices by discarding unused elements. PETSc allows to continue with
+ * assembling the matrix after calls to these functions, but since there are
+ * no more free entries available after that any more, it is better to only
+ * call SparseMatrix::compress() once at the end of the assembly stage and
+ * before the matrix is actively used.
+ *
+ * @ingroup PETScWrappers
+ * @ingroup Matrix1
+ * @author Wolfgang Bangerth, 2004
+ */
+  class MatrixBase : public Subscriptor
+  {
+    public:
+                                       /**
+                                        * Declare a typedef for the iterator
+                                        * class.
+                                        */
+      typedef MatrixIterators::const_iterator const_iterator;
+
+                                       /**
+                                        * Declare a typedef in analogy to all
+                                        * the other container classes.
+                                        */
+      typedef PetscScalar value_type;
+
+                                       /**
+                                        * Default constructor.
+                                        */
+      MatrixBase ();
+
+                                       /**
+                                        * Destructor. Made virtual so that one
+                                        * can use pointers to this class.
+                                        */
+      virtual ~MatrixBase ();
+
+                                       /**
+                                        * This operator assigns a scalar to a
+                                        * matrix. Since this does usually not
+                                        * make much sense (should we set all
+                                        * matrix entries to this value? Only
+                                        * the nonzero entries of the sparsity
+                                        * pattern?), this operation is only
+                                        * allowed if the actual value to be
+                                        * assigned is zero. This operator only
+                                        * exists to allow for the obvious
+                                        * notation <tt>matrix=0</tt>, which
+                                        * sets all elements of the matrix to
+                                        * zero, but keeps the sparsity pattern
+                                        * previously used.
+                                        */
+      MatrixBase &
+      operator = (const value_type d);
+                                       /**
+                                        * Release all memory and return
+                                        * to a state just like after
+                                        * having called the default
+                                        * constructor.
+                                        */
+      void clear ();
+
+                                       /**
+                                        * Set the element (<i>i,j</i>) to @p
+                                        * value.
+                                        *
+                                        * If the present object (from a
+                                        * derived class of this one) happens
+                                        * to be a sparse matrix, then this
+                                        * function adds a new entry to the
+                                        * matrix if it didn't exist before,
+                                        * very much in contrast to the
+                                        * SparseMatrix class which throws an
+                                        * error if the entry does not exist.
+                                        * If <tt>value</tt> is not a finite
+                                        * number an exception is thrown.
+                                        */
+      void set (const unsigned int i,
+                const unsigned int j,
+                const PetscScalar value);
+
+                                       /**
+                                        * Set all elements given in a
+                                        * FullMatrix<double> into the sparse
+                                        * matrix locations given by
+                                        * <tt>indices</tt>. In other words,
+                                        * this function writes the elements
+                                        * in <tt>full_matrix</tt> into the
+                                        * calling matrix, using the
+                                        * local-to-global indexing specified
+                                        * by <tt>indices</tt> for both the
+                                        * rows and the columns of the
+                                        * matrix. This function assumes a
+                                        * quadratic sparse matrix and a
+                                        * quadratic full_matrix, the usual
+                                        * situation in FE calculations.
+                                        *
+                                        * If the present object (from a
+                                        * derived class of this one) happens
+                                        * to be a sparse matrix, then this
+                                        * function adds some new entries to
+                                        * the matrix if they didn't exist
+                                        * before, very much in contrast to
+                                        * the SparseMatrix class which
+                                        * throws an error if the entry does
+                                        * not exist.
+                                        *
+                                        * The optional parameter
+                                        * <tt>elide_zero_values</tt> can be
+                                        * used to specify whether zero
+                                        * values should be inserted anyway
+                                        * or they should be filtered
+                                        * away. The default value is
+                                        * <tt>false</tt>, i.e., even zero
+                                        * values are inserted/replaced.
+                                        */
+      void set (const std::vector<unsigned int> &indices,
+                const FullMatrix<PetscScalar>   &full_matrix,
+                const bool                       elide_zero_values = false);
+
+                                       /**
+                                        * Same function as before, but now
+                                        * including the possibility to use
+                                        * rectangular full_matrices and
+                                        * different local-to-global indexing
+                                        * on rows and columns, respectively.
+                                        */
+      void set (const std::vector<unsigned int> &row_indices,
+                const std::vector<unsigned int> &col_indices,
+                const FullMatrix<PetscScalar>   &full_matrix,
+                const bool                       elide_zero_values = false);
+
+                                       /**
+                                        * Set several elements in the
+                                        * specified row of the matrix with
+                                        * column indices as given by
+                                        * <tt>col_indices</tt> to the
+                                        * respective value.
+                                        *
+                                        * If the present object (from a
+                                        * derived class of this one) happens
+                                        * to be a sparse matrix, then this
+                                        * function adds some new entries to
+                                        * the matrix if they didn't exist
+                                        * before, very much in contrast to
+                                        * the SparseMatrix class which
+                                        * throws an error if the entry does
+                                        * not exist.
+                                        *
+                                        * The optional parameter
+                                        * <tt>elide_zero_values</tt> can be
+                                        * used to specify whether zero
+                                        * values should be inserted anyway
+                                        * or they should be filtered
+                                        * away. The default value is
+                                        * <tt>false</tt>, i.e., even zero
+                                        * values are inserted/replaced.
+                                        */
+      void set (const unsigned int               row,
+                const std::vector<unsigned int> &col_indices,
+                const std::vector<PetscScalar>  &values,
+                const bool                       elide_zero_values = false);
+
+                                       /**
+                                        * Set several elements to values
+                                        * given by <tt>values</tt> in a
+                                        * given row in columns given by
+                                        * col_indices into the sparse
+                                        * matrix.
+                                        *
+                                        * If the present object (from a
+                                        * derived class of this one) happens
+                                        * to be a sparse matrix, then this
+                                        * function adds some new entries to
+                                        * the matrix if they didn't exist
+                                        * before, very much in contrast to
+                                        * the SparseMatrix class which
+                                        * throws an error if the entry does
+                                        * not exist.
+                                        *
+                                        * The optional parameter
+                                        * <tt>elide_zero_values</tt> can be
+                                        * used to specify whether zero
+                                        * values should be inserted anyway
+                                        * or they should be filtered
+                                        * away. The default value is
+                                        * <tt>false</tt>, i.e., even zero
+                                        * values are inserted/replaced.
+                                        */
+      void set (const unsigned int  row,
+                const unsigned int  n_cols,
+                const unsigned int *col_indices,
+                const PetscScalar  *values,
+                const bool          elide_zero_values = false);
+
+                                       /**
+                                        * Add @p value to the element
+                                        * (<i>i,j</i>).
+                                        *
+                                        * If the present object (from a
+                                        * derived class of this one) happens
+                                        * to be a sparse matrix, then this
+                                        * function adds a new entry to the
+                                        * matrix if it didn't exist before,
+                                        * very much in contrast to the
+                                        * SparseMatrix class which throws an
+                                        * error if the entry does not exist.
+                                        * If <tt>value</tt> is not a finite
+                                        * number an exception is thrown.
+                                        */
+      void add (const unsigned int i,
+                const unsigned int j,
+                const PetscScalar value);
+
+                                       /**
+                                        * Add all elements given in a
+                                        * FullMatrix<double> into sparse
+                                        * matrix locations given by
+                                        * <tt>indices</tt>. In other words,
+                                        * this function adds the elements in
+                                        * <tt>full_matrix</tt> to the
+                                        * respective entries in calling
+                                        * matrix, using the local-to-global
+                                        * indexing specified by
+                                        * <tt>indices</tt> for both the rows
+                                        * and the columns of the
+                                        * matrix. This function assumes a
+                                        * quadratic sparse matrix and a
+                                        * quadratic full_matrix, the usual
+                                        * situation in FE calculations.
+                                        *
+                                        * If the present object (from a
+                                        * derived class of this one) happens
+                                        * to be a sparse matrix, then this
+                                        * function adds some new entries to
+                                        * the matrix if they didn't exist
+                                        * before, very much in contrast to
+                                        * the SparseMatrix class which
+                                        * throws an error if the entry does
+                                        * not exist.
+                                        *
+                                        * The optional parameter
+                                        * <tt>elide_zero_values</tt> can be
+                                        * used to specify whether zero
+                                        * values should be added anyway or
+                                        * these should be filtered away and
+                                        * only non-zero data is added. The
+                                        * default value is <tt>true</tt>,
+                                        * i.e., zero values won't be added
+                                        * into the matrix.
+                                        */
+      void add (const std::vector<unsigned int> &indices,
+                const FullMatrix<PetscScalar>   &full_matrix,
+                const bool                       elide_zero_values = true);
+
+                                       /**
+                                        * Same function as before, but now
+                                        * including the possibility to use
+                                        * rectangular full_matrices and
+                                        * different local-to-global indexing
+                                        * on rows and columns, respectively.
+                                        */
+      void add (const std::vector<unsigned int> &row_indices,
+                const std::vector<unsigned int> &col_indices,
+                const FullMatrix<PetscScalar>   &full_matrix,
+                const bool                       elide_zero_values = true);
+
+                                       /**
+                                        * Set several elements in the
+                                        * specified row of the matrix with
+                                        * column indices as given by
+                                        * <tt>col_indices</tt> to the
+                                        * respective value.
+                                        *
+                                        * If the present object (from a
+                                        * derived class of this one) happens
+                                        * to be a sparse matrix, then this
+                                        * function adds some new entries to
+                                        * the matrix if they didn't exist
+                                        * before, very much in contrast to
+                                        * the SparseMatrix class which
+                                        * throws an error if the entry does
+                                        * not exist.
+                                        *
+                                        * The optional parameter
+                                        * <tt>elide_zero_values</tt> can be
+                                        * used to specify whether zero
+                                        * values should be added anyway or
+                                        * these should be filtered away and
+                                        * only non-zero data is added. The
+                                        * default value is <tt>true</tt>,
+                                        * i.e., zero values won't be added
+                                        * into the matrix.
+                                        */
+      void add (const unsigned int               row,
+                const std::vector<unsigned int> &col_indices,
+                const std::vector<PetscScalar>  &values,
+                const bool                       elide_zero_values = true);
+
+                                       /**
+                                        * Add an array of values given by
+                                        * <tt>values</tt> in the given
+                                        * global matrix row at columns
+                                        * specified by col_indices in the
+                                        * sparse matrix.
+                                        *
+                                        * If the present object (from a
+                                        * derived class of this one) happens
+                                        * to be a sparse matrix, then this
+                                        * function adds some new entries to
+                                        * the matrix if they didn't exist
+                                        * before, very much in contrast to
+                                        * the SparseMatrix class which
+                                        * throws an error if the entry does
+                                        * not exist.
+                                        *
+                                        * The optional parameter
+                                        * <tt>elide_zero_values</tt> can be
+                                        * used to specify whether zero
+                                        * values should be added anyway or
+                                        * these should be filtered away and
+                                        * only non-zero data is added. The
+                                        * default value is <tt>true</tt>,
+                                        * i.e., zero values won't be added
+                                        * into the matrix.
+                                        */
+      void add (const unsigned int  row,
+                const unsigned int  n_cols,
+                const unsigned int *col_indices,
+                const PetscScalar  *values,
+                const bool          elide_zero_values = true,
+                const bool          col_indices_are_sorted = false);
+
+                                       /**
+                                        * Remove all elements from
+                                        * this <tt>row</tt> by setting
+                                        * them to zero. The function
+                                        * does not modify the number
+                                        * of allocated nonzero
+                                        * entries, it only sets some
+                                        * entries to zero. It may drop
+                                        * them from the sparsity
+                                        * pattern, though (but retains
+                                        * the allocated memory in case
+                                        * new entries are again added
+                                        * later).
+                                        *
+                                        * This operation is used in
+                                        * eliminating constraints (e.g. due to
+                                        * hanging nodes) and makes sure that
+                                        * we can write this modification to
+                                        * the matrix without having to read
+                                        * entries (such as the locations of
+                                        * non-zero elements) from it --
+                                        * without this operation, removing
+                                        * constraints on parallel matrices is
+                                        * a rather complicated procedure.
+                                        *
+                                        * The second parameter can be used to
+                                        * set the diagonal entry of this row
+                                        * to a value different from zero. The
+                                        * default is to set it to zero.
+                                        */
+      void clear_row (const unsigned int row,
+                      const PetscScalar  new_diag_value = 0);
+
+                                       /**
+                                        * Same as clear_row(), except that it
+                                        * works on a number of rows at once.
+                                        *
+                                        * The second parameter can be used to
+                                        * set the diagonal entries of all
+                                        * cleared rows to something different
+                                        * from zero. Note that all of these
+                                        * diagonal entries get the same value
+                                        * -- if you want different values for
+                                        * the diagonal entries, you have to
+                                        * set them by hand.
+                                        */
+      void clear_rows (const std::vector<unsigned int> &rows,
+                       const PetscScalar                new_diag_value = 0);
+
+                                       /**
+                                        * PETSc matrices store their own
+                                        * sparsity patterns. So, in analogy to
+                                        * our own SparsityPattern class,
+                                        * this function compresses the
+                                        * sparsity pattern and allows the
+                                        * resulting matrix to be used in all
+                                        * other operations where before only
+                                        * assembly functions were
+                                        * allowed. This function must
+                                        * therefore be called once you have
+                                        * assembled the matrix.
+                                        *
+                                        * See @ref GlossCompress "Compressing distributed objects"
+                                        * for more information.
+                                        * more information.
+                                        */
+      void compress (::dealii::VectorOperation::values operation
+		     =::dealii::VectorOperation::unknown);
+                                       /**
+                                        * Return the value of the entry
+                                        * (<i>i,j</i>).  This may be an
+                                        * expensive operation and you should
+                                        * always take care where to call this
+                                        * function. In contrast to the
+                                        * respective function in the
+                                        * @p MatrixBase class, we don't
+                                        * throw an exception if the respective
+                                        * entry doesn't exist in the sparsity
+                                        * pattern of this class, since PETSc
+                                        * does not transmit this information.
+                                        *
+                                        * This function is therefore exactly
+                                        * equivalent to the <tt>el()</tt> function.
+                                        */
+      PetscScalar operator () (const unsigned int i,
+                               const unsigned int j) const;
+
+                                       /**
+                                        * Return the value of the matrix entry
+                                        * (<i>i,j</i>). If this entry does not
+                                        * exist in the sparsity pattern, then
+                                        * zero is returned. While this may be
+                                        * convenient in some cases, note that
+                                        * it is simple to write algorithms
+                                        * that are slow compared to an optimal
+                                        * solution, since the sparsity of the
+                                        * matrix is not used.
+                                        */
+      PetscScalar el (const unsigned int i,
+                      const unsigned int j) const;
+
+                                       /**
+                                        * Return the main diagonal
+                                        * element in the <i>i</i>th
+                                        * row. This function throws an
+                                        * error if the matrix is not
+                                        * quadratic.
+                                        *
+                                        * Since we do not have direct access
+                                        * to the underlying data structure,
+                                        * this function is no faster than the
+                                        * elementwise access using the el()
+                                        * function. However, we provide this
+                                        * function for compatibility with the
+                                        * SparseMatrix class.
+                                        */
+      PetscScalar diag_element (const unsigned int i) const;
+
+                                       /**
+                                        * Return the number of rows in this
+                                        * matrix.
+                                        */
+      unsigned int m () const;
+
+                                       /**
+                                        * Return the number of columns in this
+                                        * matrix.
+                                        */
+      unsigned int n () const;
+
+                                       /**
+                                        * Return the local dimension of the
+                                        * matrix, i.e. the number of rows
+                                        * stored on the present MPI
+                                        * process. For sequential matrices,
+                                        * this number is the same as m(),
+                                        * but for parallel matrices it may be
+                                        * smaller.
+                                        *
+                                        * To figure out which elements
+                                        * exactly are stored locally,
+                                        * use local_range().
+                                        */
+      unsigned int local_size () const;
+
+                                       /**
+                                        * Return a pair of indices
+                                        * indicating which rows of
+                                        * this matrix are stored
+                                        * locally. The first number is
+                                        * the index of the first
+                                        * row stored, the second
+                                        * the index of the one past
+                                        * the last one that is stored
+                                        * locally. If this is a
+                                        * sequential matrix, then the
+                                        * result will be the pair
+                                        * (0,m()), otherwise it will be
+                                        * a pair (i,i+n), where
+                                        * <tt>n=local_size()</tt>.
+                                        */
+      std::pair<unsigned int, unsigned int>
+      local_range () const;
+
+                                       /**
+                                        * Return whether @p index is
+                                        * in the local range or not,
+                                        * see also local_range().
+                                        */
+      bool in_local_range (const unsigned int index) const;
+
+                                       /**
+                                        * Return a reference to the MPI
+                                        * communicator object in use with this
+                                        * matrix. This function has to be
+                                        * implemented in derived classes.
+                                        */
+      virtual const MPI_Comm & get_mpi_communicator () const = 0;
+
+                                       /**
+                                        * Return the number of nonzero
+                                        * elements of this
+                                        * matrix. Actually, it returns
+                                        * the number of entries in the
+                                        * sparsity pattern; if any of
+                                        * the entries should happen to
+                                        * be zero, it is counted anyway.
+                                        */
+      unsigned int n_nonzero_elements () const;
+
+                                       /**
+                                        * Number of entries in a specific row.
+                                        */
+      unsigned int row_length (const unsigned int row) const;
+
+                                       /**
+                                        * Return the l1-norm of the matrix, that is
+                                        * $|M|_1=max_{all columns j}\sum_{all
+                                        * rows i} |M_ij|$,
+                                        * (max. sum of columns).
+                                        * This is the
+                                        * natural matrix norm that is compatible
+                                        * to the l1-norm for vectors, i.e.
+                                        * $|Mv|_1\leq |M|_1 |v|_1$.
+                                        * (cf. Haemmerlin-Hoffmann:
+                                        * Numerische Mathematik)
+                                        */
+      PetscReal l1_norm () const;
+
+                                       /**
+                                        * Return the linfty-norm of the
+                                        * matrix, that is
+                                        * $|M|_infty=max_{all rows i}\sum_{all
+                                        * columns j} |M_ij|$,
+                                        * (max. sum of rows).
+                                        * This is the
+                                        * natural matrix norm that is compatible
+                                        * to the linfty-norm of vectors, i.e.
+                                        * $|Mv|_infty \leq |M|_infty |v|_infty$.
+                                        * (cf. Haemmerlin-Hoffmann:
+                                        * Numerische Mathematik)
+                                        */
+      PetscReal linfty_norm () const;
+
+                                       /**
+                                        * Return the frobenius norm of the
+                                        * matrix, i.e. the square root of the
+                                        * sum of squares of all entries in the
+                                        * matrix.
+                                        */
+      PetscReal frobenius_norm () const;
+
+
+                                       /**
+                                        * Return the square of the norm
+                                        * of the vector $v$ with respect
+                                        * to the norm induced by this
+                                        * matrix,
+                                        * i.e. $\left(v,Mv\right)$. This
+                                        * is useful, e.g. in the finite
+                                        * element context, where the
+                                        * $L_2$ norm of a function
+                                        * equals the matrix norm with
+                                        * respect to the mass matrix of
+                                        * the vector representing the
+                                        * nodal values of the finite
+                                        * element function.
+                                        *
+                                        * Obviously, the matrix needs to
+                                        * be quadratic for this operation.
+                                        *
+                                        * The implementation of this function
+                                        * is not as efficient as the one in
+                                        * the @p MatrixBase class used in
+                                        * deal.II (i.e. the original one, not
+                                        * the PETSc wrapper class) since PETSc
+                                        * doesn't support this operation and
+                                        * needs a temporary vector.
+                                        *
+                                        * Note that if the current object
+                                        * represents a parallel distributed
+                                        * matrix (of type
+                                        * PETScWrappers::MPI::SparseMatrix),
+                                        * then the given vector has to be
+                                        * a distributed vector as
+                                        * well. Conversely, if the matrix is
+                                        * not distributed, then neither
+                                        * may the vector be.
+                                        */
+      PetscScalar matrix_norm_square (const VectorBase &v) const;
+
+
+                                       /**
+                                        * Compute the matrix scalar
+                                        * product $\left(u,Mv\right)$.
+                                        *
+                                        * The implementation of this function
+                                        * is not as efficient as the one in
+                                        * the @p MatrixBase class used in
+                                        * deal.II (i.e. the original one, not
+                                        * the PETSc wrapper class) since PETSc
+                                        * doesn't support this operation and
+                                        * needs a temporary vector.
+                                        *
+                                        * Note that if the current object
+                                        * represents a parallel distributed
+                                        * matrix (of type
+                                        * PETScWrappers::MPI::SparseMatrix),
+                                        * then both vectors have to be
+                                        * distributed vectors as
+                                        * well. Conversely, if the matrix is
+                                        * not distributed, then neither of the
+                                        * vectors may be.
+                                        */
+      PetscScalar matrix_scalar_product (const VectorBase &u,
+					 const VectorBase &v) const;
+
+
+#if DEAL_II_PETSC_VERSION_GTE(3,1,0)
+                                       /**
+                                        * Return the trace of the
+                                        * matrix, i.e. the sum of all
+                                        * diagonal entries in the
+                                        * matrix.
+                                        */
+      PetscReal trace () const;
+#endif
+
+                                       /**
+                                        * Multiply the entire matrix by a
+                                        * fixed factor.
+                                        */
+      MatrixBase & operator *= (const PetscScalar factor);
+
+                                       /**
+                                        * Divide the entire matrix by a
+                                        * fixed factor.
+                                        */
+      MatrixBase & operator /= (const PetscScalar factor);
+
+                                       /**
+                                        * Matrix-vector multiplication:
+                                        * let <i>dst = M*src</i> with
+                                        * <i>M</i> being this matrix.
+                                        *
+                                        * Source and destination must
+                                        * not be the same vector.
+                                        *
+                                        * Note that if the current object
+                                        * represents a parallel distributed
+                                        * matrix (of type
+                                        * PETScWrappers::MPI::SparseMatrix),
+                                        * then both vectors have to be
+                                        * distributed vectors as
+                                        * well. Conversely, if the matrix is
+                                        * not distributed, then neither of the
+                                        * vectors may be.
+                                        */
+      void vmult (VectorBase       &dst,
+                  const VectorBase &src) const;
+
+                                       /**
+                                        * Matrix-vector multiplication: let
+                                        * <i>dst = M<sup>T</sup>*src</i> with
+                                        * <i>M</i> being this matrix. This
+                                        * function does the same as vmult()
+                                        * but takes the transposed matrix.
+                                        *
+                                        * Source and destination must
+                                        * not be the same vector.
+                                        *
+                                        * Note that if the current object
+                                        * represents a parallel distributed
+                                        * matrix (of type
+                                        * PETScWrappers::MPI::SparseMatrix),
+                                        * then both vectors have to be
+                                        * distributed vectors as
+                                        * well. Conversely, if the matrix is
+                                        * not distributed, then neither of the
+                                        * vectors may be.
+                                        */
+      void Tvmult (VectorBase       &dst,
+                   const VectorBase &src) const;
+
+                                       /**
+                                        * Adding Matrix-vector
+                                        * multiplication. Add
+                                        * <i>M*src</i> on <i>dst</i>
+                                        * with <i>M</i> being this
+                                        * matrix.
+                                        *
+                                        * Source and destination must
+                                        * not be the same vector.
+                                        *
+                                        * Note that if the current object
+                                        * represents a parallel distributed
+                                        * matrix (of type
+                                        * PETScWrappers::MPI::SparseMatrix),
+                                        * then both vectors have to be
+                                        * distributed vectors as
+                                        * well. Conversely, if the matrix is
+                                        * not distributed, then neither of the
+                                        * vectors may be.
+                                        */
+      void vmult_add (VectorBase       &dst,
+                      const VectorBase &src) const;
+
+                                       /**
+                                        * Adding Matrix-vector
+                                        * multiplication. Add
+                                        * <i>M<sup>T</sup>*src</i> to
+                                        * <i>dst</i> with <i>M</i> being
+                                        * this matrix. This function
+                                        * does the same as vmult_add()
+                                        * but takes the transposed
+                                        * matrix.
+                                        *
+                                        * Source and destination must
+                                        * not be the same vector.
+                                        *
+                                        * Note that if the current object
+                                        * represents a parallel distributed
+                                        * matrix (of type
+                                        * PETScWrappers::MPI::SparseMatrix),
+                                        * then both vectors have to be
+                                        * distributed vectors as
+                                        * well. Conversely, if the matrix is
+                                        * not distributed, then neither of the
+                                        * vectors may be.
+                                        */
+      void Tvmult_add (VectorBase       &dst,
+                       const VectorBase &src) const;
+
+
+                                       /**
+                                        * Compute the residual of an
+                                        * equation <i>Mx=b</i>, where
+                                        * the residual is defined to be
+                                        * <i>r=b-Mx</i>. Write the
+                                        * residual into
+                                        * @p dst. The
+                                        * <i>l<sub>2</sub></i> norm of
+                                        * the residual vector is
+                                        * returned.
+                                        *
+                                        * Source <i>x</i> and destination
+                                        * <i>dst</i> must not be the same
+                                        * vector.
+                                        *
+                                        * Note that if the current object
+                                        * represents a parallel distributed
+                                        * matrix (of type
+                                        * PETScWrappers::MPI::SparseMatrix),
+                                        * then all vectors have to be
+                                        * distributed vectors as
+                                        * well. Conversely, if the matrix is
+                                        * not distributed, then neither of the
+                                        * vectors may be.
+                                        */
+      PetscScalar residual (VectorBase       &dst,
+                            const VectorBase &x,
+                            const VectorBase &b) const;
+
+                                       /**
+                                        * STL-like iterator with the
+                                        * first entry.
+                                        */
+      const_iterator begin () const;
+
+                                       /**
+                                        * Final iterator.
+                                        */
+      const_iterator end () const;
+
+                                       /**
+                                        * STL-like iterator with the
+                                        * first entry of row @p r.
+                                        *
+                                        * Note that if the given row is empty,
+                                        * i.e. does not contain any nonzero
+                                        * entries, then the iterator returned by
+                                        * this function equals
+                                        * <tt>end(r)</tt>. Note also that the
+                                        * iterator may not be dereferencable in
+                                        * that case.
+                                        */
+      const_iterator begin (const unsigned int r) const;
+
+                                       /**
+                                        * Final iterator of row <tt>r</tt>. It
+                                        * points to the first element past the
+                                        * end of line @p r, or past the end of
+                                        * the entire sparsity pattern.
+                                        *
+                                        * Note that the end iterator is not
+                                        * necessarily dereferencable. This is in
+                                        * particular the case if it is the end
+                                        * iterator for the last row of a matrix.
+                                        */
+      const_iterator end (const unsigned int r) const;
+
+                                       /**
+                                        * Conversion operator to gain access
+                                        * to the underlying PETSc type. If you
+                                        * do this, you cut this class off some
+                                        * information it may need, so this
+                                        * conversion operator should only be
+                                        * used if you know what you do. In
+                                        * particular, it should only be used
+                                        * for read-only operations into the
+                                        * matrix.
+                                        */
+      operator Mat () const;
+
+                                        /**
+                                         * Make an in-place transpose of a
+                                         * matrix.
+                                         */
+      void transpose ();
+
+                                        /**
+                                         * Test whether a matrix is
+                                         * symmetric.  Default
+                                         * tolerance is
+                                         * $1000\times32$-bit machine
+                                         * precision.
+                                         */
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+      PetscTruth
+#else
+        PetscBool
+#endif
+        is_symmetric (const double tolerance = 1.e-12);
+
+                                        /**
+                                         * Test whether a matrix is
+                                         * Hermitian, i.e. it is the
+                                         * complex conjugate of its
+                                         * transpose. Default
+                                         * tolerance is
+                                         * $1000\times32$-bit machine
+                                         * precision.
+                                         */
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+      PetscTruth
+#else
+        PetscBool
+#endif
+        is_hermitian (const double tolerance = 1.e-12);
+
+                                        /**
+                                         * Abstract PETSc object that helps view
+                                         * in ASCII other PETSc objects. Currently
+                                         * this function simply writes non-zero
+                                         * elements of a matrix to the terminal.
+                                         */
+      void write_ascii ();
+
+                                       /**
+                                        *  Returns the number bytes consumed
+                                        *  by this matrix on this CPU.
+                                        */
+      std::size_t memory_consumption() const;
+
+                                       /**
+                                        * Exception
+                                        */
+      DeclException1 (ExcPETScError,
+                      int,
+                      << "An error with error number " << arg1
+                      << " occurred while calling a PETSc function");
+                                       /**
+                                        * Exception
+                                        */
+      DeclException0 (ExcSourceEqualsDestination);
+
+      /**
+        * Exception.
+        */
+      DeclException2 (ExcWrongMode,
+                      int, int,
+                      << "You tried to do a "
+                      << (arg1 == 1 ?
+                          "'set'" :
+                          (arg1 == 2 ?
+                           "'add'" : "???"))
+                      << " operation but the matrix is currently in "
+                      << (arg2 == 1 ?
+                          "'set'" :
+                          (arg2 == 2 ?
+                           "'add'" : "???"))
+                      << " mode. You first have to call 'compress()'.");
+
+    protected:
+                                       /**
+                                        * A generic matrix object in
+                                        * PETSc. The actual type, a sparse
+                                        * matrix, is set in the constructor.
+                                        */
+      Mat matrix;
+
+                                       /**
+                                        * PETSc doesn't allow to mix additions
+                                        * to matrix entries and overwriting
+                                        * them (to make synchronisation of
+                                        * parallel computations
+                                        * simpler). Since the interface of the
+                                        * existing classes don't support the
+                                        * notion of not interleaving things,
+                                        * we have to emulate this
+                                        * ourselves. The way we do it is to,
+                                        * for each access operation, store
+                                        * whether it is an insertion or an
+                                        * addition. If the previous one was of
+                                        * different type, then we first have
+                                        * to flush the PETSc buffers;
+                                        * otherwise, we can simply go on.
+                                        *
+                                        * The following structure and variable
+                                        * declare and store the previous
+                                        * state.
+                                        */
+      struct LastAction
+      {
+          enum Values { none, insert, add };
+      };
+
+                                       /**
+                                        * Store whether the last action was a
+                                        * write or add operation.
+                                        */
+      LastAction::Values last_action;
+
+                                       /**
+                                        * Ensure that the add/set mode that
+                                        * is required for actions following
+                                        * this call is compatible with the
+                                        * current mode.
+                                        * Should be called from all internal
+                                        * functions accessing matrix elements.
+                                        */
+      void prepare_action(const LastAction::Values new_action);
+
+                                       /**
+                                        * For some matrix storage
+                                        * formats, in particular for the
+                                        * PETSc distributed blockmatrices,
+                                        * set and add operations on
+                                        * individual elements can not be
+                                        * freely mixed. Rather, one has
+                                        * to synchronize operations when
+                                        * one wants to switch from
+                                        * setting elements to adding to
+                                        * elements.
+                                        * BlockMatrixBase automatically
+                                        * synchronizes the access by
+                                        * calling this helper function
+                                        * for each block.
+                                        * This function ensures that the
+                                        * matrix is in a state that
+                                        * allows adding elements; if it
+                                        * previously already was in this
+                                        * state, the function does
+                                        * nothing.
+                                        */
+      void prepare_add();
+                                       /**
+                                        * Same as prepare_add() but
+                                        * prepare the matrix for setting
+                                        * elements if the representation
+                                        * of elements in this class
+                                        * requires such an operation.
+                                        */
+      void prepare_set();
+
+
+
+    private:
+                                       /**
+                                        * An internal array of integer
+                                        * values that is used to store the
+                                        * column indices when
+                                        * adding/inserting local data into
+                                        * the (large) sparse matrix.
+                                        */
+#ifdef PETSC_USE_64BIT_INDICES
+      std::vector<PetscInt> column_indices;
+#else
+      std::vector<int> column_indices;
+#endif
+
+                                       /**
+                                        * An internal array of double values
+                                        * that is used to store the column
+                                        * indices when adding/inserting
+                                        * local data into the (large) sparse
+                                        * matrix.
+                                        */
+      std::vector<PetscScalar> column_values;
+
+
+                                       /**
+                                        *  To allow calling protected
+                                        *  prepare_add() and
+                                        *  prepare_set().
+                                        */
+      template <class> friend class dealii::BlockMatrixBase;
+
+
+public:	// added by shuqiangwang
+      void copy_from(const MatrixBase &source);
+      void add(double factor, const MatrixBase &source);
+  };
+
+
+
+#ifndef DOXYGEN
+// -------------------------- inline and template functions ----------------------
+
+
+  namespace MatrixIterators
+  {
+
+    inline
+    const_iterator::Accessor::
+    Accessor (const MatrixBase   *matrix,
+              const unsigned int  row,
+              const unsigned int  index)
+                    :
+                    matrix(const_cast<MatrixBase*>(matrix)),
+                    a_row(row),
+                    a_index(index)
+    {
+      visit_present_row ();
+    }
+
+
+    inline
+    unsigned int
+    const_iterator::Accessor::row() const
+    {
+      Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+      return a_row;
+    }
+
+
+    inline
+    unsigned int
+    const_iterator::Accessor::column() const
+    {
+      Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+      return (*colnum_cache)[a_index];
+    }
+
+
+    inline
+    unsigned int
+    const_iterator::Accessor::index() const
+    {
+      Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+      return a_index;
+    }
+
+
+    inline
+    PetscScalar
+    const_iterator::Accessor::value() const
+    {
+      Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+      return (*value_cache)[a_index];
+    }
+
+
+    inline
+    const_iterator::
+    const_iterator(const MatrixBase   *matrix,
+                   const unsigned int  row,
+                   const unsigned int  index)
+                    :
+                    accessor(matrix, row, index)
+    {}
+
+
+
+    inline
+    const_iterator &
+    const_iterator::operator++ ()
+    {
+      Assert (accessor.a_row < accessor.matrix->m(), ExcIteratorPastEnd());
+
+      ++accessor.a_index;
+
+                                       // if at end of line: do one step, then
+                                       // cycle until we find a row with a
+                                       // nonzero number of entries
+      if (accessor.a_index >= accessor.colnum_cache->size())
+        {
+          accessor.a_index = 0;
+          ++accessor.a_row;
+
+          while ((accessor.a_row < accessor.matrix->m())
+                 &&
+                 (accessor.matrix->row_length(accessor.a_row) == 0))
+            ++accessor.a_row;
+
+          accessor.visit_present_row();
+        }
+      return *this;
+    }
+
+
+    inline
+    const_iterator
+    const_iterator::operator++ (int)
+    {
+      const const_iterator old_state = *this;
+      ++(*this);
+      return old_state;
+    }
+
+
+    inline
+    const const_iterator::Accessor &
+    const_iterator::operator* () const
+    {
+      return accessor;
+    }
+
+
+    inline
+    const const_iterator::Accessor *
+    const_iterator::operator-> () const
+    {
+      return &accessor;
+    }
+
+
+    inline
+    bool
+    const_iterator::
+    operator == (const const_iterator& other) const
+    {
+      return (accessor.a_row == other.accessor.a_row &&
+              accessor.a_index == other.accessor.a_index);
+    }
+
+
+    inline
+    bool
+    const_iterator::
+    operator != (const const_iterator& other) const
+    {
+      return ! (*this == other);
+    }
+
+
+    inline
+    bool
+    const_iterator::
+    operator < (const const_iterator& other) const
+    {
+      return (accessor.row() < other.accessor.row() ||
+              (accessor.row() == other.accessor.row() &&
+               accessor.index() < other.accessor.index()));
+    }
+
+  }
+
+
+
+                                        // Inline the set() and add()
+                                        // functions, since they will be
+                                        // called frequently, and the
+                                        // compiler can optimize away
+                                        // some unnecessary loops when
+                                        // the sizes are given at
+                                        // compile time.
+  inline
+  void
+  MatrixBase::set (const unsigned int i,
+                   const unsigned int j,
+                   const PetscScalar  value)
+  {
+    Assert (numbers::is_finite(value), ExcNumberNotFinite());
+
+    set (i, 1, &j, &value, false);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::set (const std::vector<unsigned int> &indices,
+                   const FullMatrix<PetscScalar>   &values,
+                   const bool                       elide_zero_values)
+  {
+    Assert (indices.size() == values.m(),
+            ExcDimensionMismatch(indices.size(), values.m()));
+    Assert (values.m() == values.n(), ExcNotQuadratic());
+
+    for (unsigned int i=0; i<indices.size(); ++i)
+      set (indices[i], indices.size(), &indices[0], &values(i,0),
+           elide_zero_values);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::set (const std::vector<unsigned int> &row_indices,
+                   const std::vector<unsigned int> &col_indices,
+                   const FullMatrix<PetscScalar>   &values,
+                   const bool                       elide_zero_values)
+  {
+    Assert (row_indices.size() == values.m(),
+            ExcDimensionMismatch(row_indices.size(), values.m()));
+    Assert (col_indices.size() == values.n(),
+            ExcDimensionMismatch(col_indices.size(), values.n()));
+
+    for (unsigned int i=0; i<row_indices.size(); ++i)
+      set (row_indices[i], col_indices.size(), &col_indices[0], &values(i,0),
+           elide_zero_values);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::set (const unsigned int               row,
+                   const std::vector<unsigned int> &col_indices,
+                   const std::vector<PetscScalar>  &values,
+                   const bool                       elide_zero_values)
+  {
+    Assert (col_indices.size() == values.size(),
+            ExcDimensionMismatch(col_indices.size(), values.size()));
+
+    set (row, col_indices.size(), &col_indices[0], &values[0],
+         elide_zero_values);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::set (const unsigned int  row,
+                   const unsigned int  n_cols,
+                   const unsigned int *col_indices,
+                   const PetscScalar  *values,
+                   const bool          elide_zero_values)
+  {
+    prepare_action(LastAction::insert);
+
+#ifdef PETSC_USE_64BIT_INDICES
+    const PetscInt petsc_i = row;
+    PetscInt * col_index_ptr;
+#else
+    const int petsc_i = row;
+    int * col_index_ptr;
+#endif
+    PetscScalar const* col_value_ptr;
+    int n_columns;
+
+                                   // If we don't elide zeros, the pointers
+                                   // are already available...
+#ifndef PETSC_USE_64BIT_INDICES
+    if (elide_zero_values == false)
+      {
+        col_index_ptr = (int*)col_indices;
+        col_value_ptr = values;
+        n_columns = n_cols;
+      }
+    else
+#endif
+      {
+                                   // Otherwise, extract nonzero values in
+                                   // each row and get the respective index.
+        if (column_indices.size() < n_cols)
+          {
+            column_indices.resize(n_cols);
+            column_values.resize(n_cols);
+          }
+
+        n_columns = 0;
+        for (unsigned int j=0; j<n_cols; ++j)
+          {
+            const PetscScalar value = values[j];
+            Assert (numbers::is_finite(value), ExcNumberNotFinite());
+            if (value != PetscScalar())
+              {
+                column_indices[n_columns] = col_indices[j];
+                column_values[n_columns] = value;
+                n_columns++;
+              }
+          }
+        Assert(n_columns <= (int)n_cols, ExcInternalError());
+
+        col_index_ptr = &column_indices[0];
+        col_value_ptr = &column_values[0];
+      }
+
+    const int ierr
+      = MatSetValues (matrix, 1, &petsc_i, n_columns, col_index_ptr,
+                      col_value_ptr, INSERT_VALUES);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  inline
+  void
+  MatrixBase::add (const unsigned int i,
+                   const unsigned int j,
+                   const PetscScalar  value)
+  {
+
+    Assert (numbers::is_finite(value), ExcNumberNotFinite());
+
+    if (value == PetscScalar())
+      {
+                                  // we have to do checkings on Insert/Add
+                                  // in any case
+                                  // to be consistent with the MPI
+                                  // communication model (see the comments
+                                  // in the documentation of
+                                  // TrilinosWrappers::Vector), but we can
+                                  // save some work if the addend is
+                                  // zero. However, these actions are done
+                                  // in case we pass on to the other
+                                  // function.
+        prepare_action(LastAction::add);
+
+        return;
+      }
+    else
+      add (i, 1, &j, &value, false);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::add (const std::vector<unsigned int> &indices,
+                   const FullMatrix<PetscScalar>   &values,
+                   const bool                       elide_zero_values)
+  {
+    Assert (indices.size() == values.m(),
+            ExcDimensionMismatch(indices.size(), values.m()));
+    Assert (values.m() == values.n(), ExcNotQuadratic());
+
+    for (unsigned int i=0; i<indices.size(); ++i)
+      add (indices[i], indices.size(), &indices[0], &values(i,0),
+           elide_zero_values);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::add (const std::vector<unsigned int> &row_indices,
+                   const std::vector<unsigned int> &col_indices,
+                   const FullMatrix<PetscScalar>   &values,
+                   const bool                       elide_zero_values)
+  {
+    Assert (row_indices.size() == values.m(),
+            ExcDimensionMismatch(row_indices.size(), values.m()));
+    Assert (col_indices.size() == values.n(),
+            ExcDimensionMismatch(col_indices.size(), values.n()));
+
+    for (unsigned int i=0; i<row_indices.size(); ++i)
+      add (row_indices[i], col_indices.size(), &col_indices[0], &values(i,0),
+           elide_zero_values);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::add (const unsigned int               row,
+                   const std::vector<unsigned int> &col_indices,
+                   const std::vector<PetscScalar>  &values,
+                   const bool                       elide_zero_values)
+  {
+    Assert (col_indices.size() == values.size(),
+            ExcDimensionMismatch(col_indices.size(), values.size()));
+
+    add (row, col_indices.size(), &col_indices[0], &values[0],
+         elide_zero_values);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::add (const unsigned int  row,
+                   const unsigned int  n_cols,
+                   const unsigned int *col_indices,
+                   const PetscScalar  *values,
+                   const bool          elide_zero_values,
+                   const bool          /*col_indices_are_sorted*/)
+  {
+    prepare_action(LastAction::add);
+
+#ifdef PETSC_USE_64BIT_INDICES
+    const PetscInt petsc_i = row;
+    PetscInt * col_index_ptr;
+#else
+    const int petsc_i = row;
+    int * col_index_ptr;
+#endif
+    PetscScalar const* col_value_ptr;
+    int n_columns;
+
+                                   // If we don't elide zeros, the pointers
+                                   // are already available...
+#ifndef PETSC_USE_64BIT_INDICES
+    if (elide_zero_values == false)
+      {
+        col_index_ptr = (int*)col_indices;
+        col_value_ptr = values;
+        n_columns = n_cols;
+      }
+    else
+#endif
+      {
+                                   // Otherwise, extract nonzero values in
+                                   // each row and get the respective index.
+        if (column_indices.size() < n_cols)
+          {
+            column_indices.resize(n_cols);
+            column_values.resize(n_cols);
+          }
+
+        n_columns = 0;
+        for (unsigned int j=0; j<n_cols; ++j)
+          {
+            const PetscScalar value = values[j];
+            Assert (numbers::is_finite(value), ExcNumberNotFinite());
+            if (value != PetscScalar())
+              {
+                column_indices[n_columns] = col_indices[j];
+                column_values[n_columns] = value;
+                n_columns++;
+              }
+          }
+        Assert(n_columns <= (int)n_cols, ExcInternalError());
+
+        col_index_ptr = &column_indices[0];
+        col_value_ptr = &column_values[0];
+      }
+
+    const int ierr
+      = MatSetValues (matrix, 1, &petsc_i, n_columns, col_index_ptr,
+                      col_value_ptr, ADD_VALUES);
+    Assert (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+
+
+
+  inline
+  PetscScalar
+  MatrixBase::operator() (const unsigned int i,
+                          const unsigned int j) const
+  {
+    return el(i,j);
+  }
+
+
+
+  inline
+  MatrixBase::const_iterator
+  MatrixBase::begin() const
+  {
+    return const_iterator(this, 0, 0);
+  }
+
+
+  inline
+  MatrixBase::const_iterator
+  MatrixBase::end() const
+  {
+    return const_iterator(this, m(), 0);
+  }
+
+
+  inline
+  MatrixBase::const_iterator
+  MatrixBase::begin(const unsigned int r) const
+  {
+    Assert (r < m(), ExcIndexRange(r, 0, m()));
+    if (row_length(r) > 0)
+      return const_iterator(this, r, 0);
+    else
+      return end (r);
+  }
+
+
+  inline
+  MatrixBase::const_iterator
+  MatrixBase::end(const unsigned int r) const
+  {
+    Assert (r < m(), ExcIndexRange(r, 0, m()));
+
+                                     // place the iterator on the first entry
+                                     // past this line, or at the end of the
+                                     // matrix
+    for (unsigned int i=r+1; i<m(); ++i)
+      if (row_length(i) > 0)
+        return const_iterator(this, i, 0);
+
+                                     // if there is no such line, then take the
+                                     // end iterator of the matrix
+    return end();
+  }
+
+
+
+  inline
+  bool
+  MatrixBase::in_local_range (const unsigned int index) const
+  {
+#ifdef PETSC_USE_64BIT_INDICES
+    PetscInt begin, end;
+#else
+    int begin, end;
+#endif
+    const int ierr = MatGetOwnershipRange (static_cast<const Mat &>(matrix),
+                                           &begin, &end);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return ((index >= static_cast<unsigned int>(begin)) &&
+            (index < static_cast<unsigned int>(end)));
+  }
+
+
+
+  inline
+  void
+  MatrixBase::prepare_action(const LastAction::Values new_action)
+  {
+    if (last_action == new_action)
+      ;
+    else if (last_action == LastAction::none)
+      last_action = new_action;
+    else
+      Assert (false, ExcWrongMode (last_action, new_action));
+  }
+
+
+
+  inline
+  void
+  MatrixBase::prepare_add()
+  {
+    prepare_action(LastAction::add);
+  }
+
+
+
+  inline
+  void
+  MatrixBase::prepare_set()
+  {
+    prepare_action(LastAction::insert);
+  }
+
+#endif // DOXYGEN
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+#endif // DEAL_II_USE_PETSC
+
+
+/*----------------------------   petsc_matrix_base.h     ---------------------------*/
+
+#endif
+/*----------------------------   petsc_matrix_base.h     ---------------------------*/

Added: branches/s-wang/for_deal.II/include/deal.II/lac/petsc_parallel_block_vector.h
===================================================================
--- branches/s-wang/for_deal.II/include/deal.II/lac/petsc_parallel_block_vector.h	                        (rev 0)
+++ branches/s-wang/for_deal.II/include/deal.II/lac/petsc_parallel_block_vector.h	2012-11-06 20:23:43 UTC (rev 1346)
@@ -0,0 +1,528 @@
+//---------------------------------------------------------------------------
+//    $Id: petsc_parallel_block_vector.h 25345 2012-03-31 08:37:04Z bangerth $
+//
+//    Copyright (C) 2004, 2005, 2006, 2007, 2009, 2010, 2012 by the deal.II authors
+//
+//    This file is subject to QPL and may not be  distributed
+//    without copyright and license information. Please refer
+//    to the file deal.II/doc/license.html for the  text  and
+//    further information on this license.
+//
+//---------------------------------------------------------------------------
+#ifndef __deal2__petsc_parallel_block_vector_h
+#define __deal2__petsc_parallel_block_vector_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_USE_PETSC
+
+#  include <deal.II/lac/petsc_parallel_vector.h>
+#  include <deal.II/lac/block_indices.h>
+#  include <deal.II/lac/block_vector_base.h>
+#  include <deal.II/lac/exceptions.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace PETScWrappers
+{
+                                   // forward declaration
+  class BlockVector;
+
+  namespace MPI
+  {
+
+/*! @addtogroup PETScWrappers
+ *@{
+ */
+
+/**
+ * An implementation of block vectors based on the parallel vector class
+ * implemented in PETScWrappers. While the base class provides for most of the
+ * interface, this class handles the actual allocation of vectors and provides
+ * functions that are specific to the underlying vector type.
+ *
+ * The model of distribution of data is such that each of the blocks is
+ * distributed across all MPI processes named in the MPI communicator. I.e. we
+ * don't just distribute the whole vector, but each component. In the
+ * constructors and reinit() functions, one therefore not only has to specify
+ * the sizes of the individual blocks, but also the number of elements of each
+ * of these blocks to be stored on the local process.
+ *
+ * @ingroup Vectors
+ * @see @ref GlossBlockLA "Block (linear algebra)"
+ * @author Wolfgang Bangerth, 2004
+ */
+    class BlockVector : public BlockVectorBase<Vector>
+    {
+      public:
+                                         /**
+                                          * Typedef the base class for simpler
+                                          * access to its own typedefs.
+                                          */
+        typedef BlockVectorBase<Vector> BaseClass;
+
+                                         /**
+                                          * Typedef the type of the underlying
+                                          * vector.
+                                          */
+        typedef BaseClass::BlockType  BlockType;
+
+                                         /**
+                                          * Import the typedefs from the base
+                                          * class.
+                                          */
+        typedef BaseClass::value_type      value_type;
+        typedef BaseClass::pointer         pointer;
+        typedef BaseClass::const_pointer   const_pointer;
+        typedef BaseClass::reference       reference;
+        typedef BaseClass::const_reference const_reference;
+        typedef BaseClass::size_type       size_type;
+        typedef BaseClass::iterator        iterator;
+        typedef BaseClass::const_iterator  const_iterator;
+
+                                         /**
+                                          * Default constructor. Generate an
+                                          * empty vector without any blocks.
+                                          */
+        BlockVector ();
+
+                                         /**
+                                          *  Constructor. Generate a block
+                                          *  vector with @p n_blocks blocks,
+                                          *  each of which is a parallel
+                                          *  vector across @p communicator
+                                          *  with @p block_size elements of
+                                          *  which @p local_size elements are
+                                          *  stored on the present process.
+                                          */
+        explicit BlockVector (const unsigned int  n_blocks,
+                              const MPI_Comm     &communicator,
+                              const unsigned int  block_size,
+                              const unsigned int  local_size);
+
+                                         /**
+                                          * Copy-Constructor. Set all the
+                                          * properties of the parallel vector
+                                          * to those of the given argument and
+                                          * copy the elements.
+                                          */
+        BlockVector (const BlockVector  &V);
+
+                                         /**
+                                          * Constructor. Set the number of
+                                          * blocks to
+                                          * <tt>block_sizes.size()</tt> and
+                                          * initialize each block with
+                                          * <tt>block_sizes[i]</tt> zero
+                                          * elements. The individual blocks
+                                          * are distributed across the given
+                                          * communicator, and each store
+                                          * <tt>local_elements[i]</tt>
+                                          * elements on the present process.
+                                          */
+        BlockVector (const std::vector<unsigned int> &block_sizes,
+                     const MPI_Comm                  &communicator,
+                     const std::vector<unsigned int> &local_elements);
+
+                                         /**
+                                          * Destructor. Clears memory
+                                          */
+        ~BlockVector ();
+
+                                         /**
+                                          * Copy operator: fill all components
+                                          * of the vector that are locally
+                                          * stored with the given scalar value.
+                                          */
+        BlockVector & operator = (const value_type s);
+
+                                         /**
+                                          * Copy operator for arguments of the
+                                          * same type.
+                                          */
+        BlockVector &
+        operator= (const BlockVector &V);
+
+                                         /**
+                                          * Copy the given sequential
+                                          * (non-distributed) block vector
+                                          * into the present parallel block
+                                          * vector. It is assumed that they
+                                          * have the same size, and this
+                                          * operation does not change the
+                                          * partitioning of the parallel
+                                          * vectors by which its elements are
+                                          * distributed across several MPI
+                                          * processes. What this operation
+                                          * therefore does is to copy that
+                                          * chunk of the given vector @p v
+                                          * that corresponds to elements of
+                                          * the target vector that are stored
+                                          * locally, and copies them, for each
+                                          * of the individual blocks of this
+                                          * object. Elements that are not
+                                          * stored locally are not touched.
+                                          *
+                                          * This being a parallel vector, you
+                                          * must make sure that @em all
+                                          * processes call this function at
+                                          * the same time. It is not possible
+                                          * to change the local part of a
+                                          * parallel vector on only one
+                                          * process, independent of what other
+                                          * processes do, with this function.
+                                          */
+        BlockVector &
+        operator = (const PETScWrappers::BlockVector &v);
+
+                                         /**
+                                          * Reinitialize the BlockVector to
+                                          * contain @p n_blocks of size @p
+                                          * block_size, each of which stores
+                                          * @p local_size elements
+                                          * locally. The @p communicator
+                                          * argument denotes which MPI channel
+                                          * each of these blocks shall
+                                          * communicate.
+                                          *
+                                          * If <tt>fast==false</tt>, the vector
+                                          * is filled with zeros.
+                                          */
+        void reinit (const unsigned int  n_blocks,
+                     const MPI_Comm     &communicator,
+                     const unsigned int  block_size,
+                     const unsigned int  local_size,
+                     const bool fast = false);
+
+                                         /**
+                                          * Reinitialize the BlockVector such
+                                          * that it contains
+                                          * <tt>block_sizes.size()</tt>
+                                          * blocks. Each block is
+                                          * reinitialized to dimension
+                                          * <tt>block_sizes[i]</tt>. Each of
+                                          * them stores
+                                          * <tt>local_sizes[i]</tt> elements
+                                          * on the present process.
+                                          *
+                                          * If the number of blocks is the
+                                          * same as before this function
+                                          * was called, all vectors remain
+                                          * the same and reinit() is
+                                          * called for each vector.
+                                          *
+                                          * If <tt>fast==false</tt>, the vector
+                                          * is filled with zeros.
+                                          *
+                                          * Note that you must call this
+                                          * (or the other reinit()
+                                          * functions) function, rather
+                                          * than calling the reinit()
+                                          * functions of an individual
+                                          * block, to allow the block
+                                          * vector to update its caches of
+                                          * vector sizes. If you call
+                                          * reinit() of one of the
+                                          * blocks, then subsequent
+                                          * actions on this object may
+                                          * yield unpredictable results
+                                          * since they may be routed to
+                                          * the wrong block.
+                                          */
+        void reinit (const std::vector<unsigned int> &block_sizes,
+                     const MPI_Comm                  &communicator,
+                     const std::vector<unsigned int> &local_sizes,
+                     const bool                       fast=false);
+
+        void reinit (const std::vector<unsigned int> &block_sizes,
+        		const MPI_Comm                  &communicator);				// added by shuqiangwang
+
+                                         /**
+                                          * Change the dimension to that
+                                          * of the vector <tt>V</tt>. The same
+                                          * applies as for the other
+                                          * reinit() function.
+                                          *
+                                          * The elements of <tt>V</tt> are not
+                                          * copied, i.e.  this function is
+                                          * the same as calling <tt>reinit
+                                          * (V.size(), fast)</tt>.
+                                          *
+                                          * Note that you must call this
+                                          * (or the other reinit()
+                                          * functions) function, rather
+                                          * than calling the reinit()
+                                          * functions of an individual
+                                          * block, to allow the block
+                                          * vector to update its caches of
+                                          * vector sizes. If you call
+                                          * reinit() on one of the
+                                          * blocks, then subsequent
+                                          * actions on this object may
+                                          * yield unpredictable results
+                                          * since they may be routed to
+                                          * the wrong block.
+                                          */
+        void reinit (const BlockVector &V,
+                     const bool         fast=false);
+
+                                         /**
+                                          * Return a reference to the MPI
+                                          * communicator object in use with
+                                          * this vector.
+                                          */
+        const MPI_Comm & get_mpi_communicator () const;
+
+                                         /**
+                                          * Swap the contents of this
+                                          * vector and the other vector
+                                          * <tt>v</tt>. One could do this
+                                          * operation with a temporary
+                                          * variable and copying over the
+                                          * data elements, but this
+                                          * function is significantly more
+                                          * efficient since it only swaps
+                                          * the pointers to the data of
+                                          * the two vectors and therefore
+                                          * does not need to allocate
+                                          * temporary storage and move
+                                          * data around.
+                                          *
+                                          * Limitation: right now this
+                                          * function only works if both
+                                          * vectors have the same number
+                                          * of blocks. If needed, the
+                                          * numbers of blocks should be
+                                          * exchanged, too.
+                                          *
+                                          * This function is analog to the
+                                          * the swap() function of all C++
+                                          * standard containers. Also,
+                                          * there is a global function
+                                          * swap(u,v) that simply calls
+                                          * <tt>u.swap(v)</tt>, again in analogy
+                                          * to standard functions.
+                                          */
+        void swap (BlockVector &v);
+
+                                         /**
+                                          * Print to a stream.
+                                          */
+        void print (std::ostream       &out,
+                    const unsigned int  precision = 3,
+                    const bool          scientific = true,
+                    const bool          across = true) const;
+
+                                         /**
+                                          * Exception
+                                          */
+        DeclException0 (ExcIteratorRangeDoesNotMatchVectorSize);
+                                         /**
+                                          * Exception
+                                          */
+        DeclException0 (ExcNonMatchingBlockVectors);
+    };
+
+/*@}*/
+
+/*----------------------- Inline functions ----------------------------------*/
+
+
+    inline
+    BlockVector::BlockVector ()
+    {}
+
+
+
+    inline
+    BlockVector::BlockVector (const unsigned int  n_blocks,
+                              const MPI_Comm     &communicator,
+                              const unsigned int  block_size,
+                              const unsigned int  local_size)
+    {
+      reinit (n_blocks, communicator, block_size, local_size);
+    }
+
+
+
+    inline
+    BlockVector::BlockVector (const std::vector<unsigned int> &block_sizes,
+                              const MPI_Comm     &communicator,
+                              const std::vector<unsigned int> &local_elements)
+    {
+      reinit (block_sizes, communicator, local_elements, false);
+    }
+
+
+    inline
+    BlockVector::BlockVector (const BlockVector& v)
+                    :
+                    BlockVectorBase<Vector > ()
+    {
+      this->components.resize (v.n_blocks());
+      this->block_indices = v.block_indices;
+
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        this->components[i] = v.components[i];
+
+      collect_sizes();     // shuqiangwang
+    }
+
+
+
+    inline
+    BlockVector &
+    BlockVector::operator = (const value_type s)
+    {
+      BaseClass::operator = (s);
+      return *this;
+    }
+
+
+
+    inline
+    BlockVector &
+    BlockVector::operator = (const BlockVector &v)
+    {
+      BaseClass::operator = (v);
+      return *this;
+    }
+
+
+
+    inline
+    BlockVector::~BlockVector ()
+    {}
+
+
+    inline
+    void
+    BlockVector::reinit (const unsigned int  n_blocks,
+                         const MPI_Comm     &communicator,
+                         const unsigned int  block_size,
+                         const unsigned int  local_size,
+                         const bool fast)
+    {
+      reinit(std::vector<unsigned int>(n_blocks, block_size),
+             communicator,
+             std::vector<unsigned int>(n_blocks, local_size),
+             fast);
+    }
+
+
+
+    inline
+    void
+    BlockVector::reinit (const std::vector<unsigned int> &block_sizes,
+                         const MPI_Comm                  &communicator,
+                         const std::vector<unsigned int> &local_sizes,
+                         const bool                       fast)
+    {
+      this->block_indices.reinit (block_sizes);
+      if (this->components.size() != this->n_blocks())
+        this->components.resize(this->n_blocks());
+
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        this->components[i].reinit(communicator, block_sizes[i],
+                             local_sizes[i], fast);
+
+      collect_sizes();     // shuqiangwang
+    }
+
+    inline
+    void
+    BlockVector::reinit (const std::vector<unsigned int> &block_sizes,
+            			 const MPI_Comm     &communicator)				// added by shuqiangwang
+    {
+    	this->block_indices.reinit (block_sizes);
+    	if (this->components.size() != this->n_blocks())
+    		this->components.resize(this->n_blocks());
+
+    	collect_sizes();     // shuqiangwang
+    }
+
+    inline
+    void
+    BlockVector::reinit (const BlockVector& v,
+                         const bool fast)
+    {
+      this->block_indices = v.get_block_indices();
+      if (this->components.size() != this->n_blocks())
+        this->components.resize(this->n_blocks());
+
+      for (unsigned int i=0;i<this->n_blocks();++i)
+        block(i).reinit(v.block(i), fast);
+
+      collect_sizes();     // shuqiangwang
+    }
+
+
+
+    inline
+    const MPI_Comm &
+    BlockVector::get_mpi_communicator () const
+    {
+      return block(0).get_mpi_communicator();
+    }
+
+
+
+    inline
+    void
+    BlockVector::swap (BlockVector &v)
+    {
+      Assert (this->n_blocks() == v.n_blocks(),
+              ExcDimensionMismatch(this->n_blocks(), v.n_blocks()));
+
+      for (unsigned int i=0; i<this->n_blocks(); ++i)
+        this->components[i].swap (v.components[i]);
+      ::dealii::swap (this->block_indices, v.block_indices);
+    }
+
+
+
+    inline
+    void
+    BlockVector::print (std::ostream       &out,
+                        const unsigned int  precision,
+                        const bool          scientific,
+                        const bool          across) const
+    {
+      for (unsigned int i=0;i<this->n_blocks();++i)
+        {
+          if (across)
+            out << 'C' << i << ':';
+          else
+            out << "Component " << i << std::endl;
+          this->components[i].print(out, precision, scientific, across);
+        }
+    }
+
+
+
+/**
+ * Global function which overloads the default implementation
+ * of the C++ standard library which uses a temporary object. The
+ * function simply exchanges the data of the two vectors.
+ *
+ * @relates PETScWrappers::MPI::BlockVector
+ * @author Wolfgang Bangerth, 2000
+ */
+    inline
+    void swap (BlockVector &u,
+               BlockVector &v)
+    {
+      u.swap (v);
+    }
+
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif  // DEAL_II_USE_PETSC
+
+#endif

Added: branches/s-wang/for_deal.II/include/deal.II/lac/trilinos_vector_base.h
===================================================================
--- branches/s-wang/for_deal.II/include/deal.II/lac/trilinos_vector_base.h	                        (rev 0)
+++ branches/s-wang/for_deal.II/include/deal.II/lac/trilinos_vector_base.h	2012-11-06 20:23:43 UTC (rev 1346)
@@ -0,0 +1,1996 @@
+//---------------------------------------------------------------------------
+//    $Id: trilinos_vector_base.h 26093 2012-08-22 21:37:41Z heister $
+//
+//    Copyright (C) 2008, 2009, 2010, 2011, 2012 by the deal.II authors
+//
+//    This file is subject to QPL and may not be  distributed
+//    without copyright and license information. Please refer
+//    to the file deal.II/doc/license.html for the  text  and
+//    further information on this license.
+//
+//---------------------------------------------------------------------------
+#ifndef __deal2__trilinos_vector_base_h
+#define __deal2__trilinos_vector_base_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_USE_TRILINOS
+
+#include <deal.II/base/utilities.h>
+#  include <deal.II/base/std_cxx1x/shared_ptr.h>
+#  include <deal.II/base/subscriptor.h>
+#  include <deal.II/lac/exceptions.h>
+#  include <deal.II/lac/vector.h>
+
+#  include <vector>
+#  include <utility>
+#  include <memory>
+
+#  define TrilinosScalar double
+#  include "Epetra_ConfigDefs.h"
+#  ifdef DEAL_II_COMPILER_SUPPORTS_MPI // only if MPI is installed
+#    include "mpi.h"
+#    include "Epetra_MpiComm.h"
+#  else
+#    include "Epetra_SerialComm.h"
+#  endif
+#  include "Epetra_FEVector.h"
+
+DEAL_II_NAMESPACE_OPEN
+
+                                // forward declaration
+template <typename number> class Vector;
+
+
+/**
+ * @addtogroup TrilinosWrappers
+ *@{
+ */
+namespace TrilinosWrappers
+{
+                                // forward declaration
+  class VectorBase;
+
+
+                                       /**
+                                        * @cond internal
+                                        */
+
+/**
+ * A namespace for internal implementation details of the
+ * TrilinosWrapper members.
+ *
+ * @ingroup TrilinosWrappers
+ */
+  namespace internal
+  {
+                                       /**
+                                        * This class implements a
+                                        * wrapper for accessing the
+                                        * Trilinos vector in the same
+                                        * way as we access deal.II
+                                        * objects: it is initialized
+                                        * with a vector and an element
+                                        * within it, and has a
+                                        * conversion operator to
+                                        * extract the scalar value of
+                                        * this element. It also has a
+                                        * variety of assignment
+                                        * operator for writing to this
+                                        * one element.  @ingroup
+                                        * TrilinosWrappers
+                                        */
+    class VectorReference
+    {
+      private:
+                                       /**
+                                        * Constructor. It is made
+                                        * private so as to only allow
+                                        * the actual vector class to
+                                        * create it.
+                                        */
+        VectorReference (VectorBase        &vector,
+                         const unsigned int index);
+
+      public:
+                                       /**
+                                        * This looks like a copy
+                                        * operator, but does something
+                                        * different than usual. In
+                                        * particular, it does not copy
+                                        * the member variables of this
+                                        * reference. Rather, it
+                                        * handles the situation where
+                                        * we have two vectors @p v and
+                                        * @p w, and assign elements
+                                        * like in
+                                        * <tt>v(i)=w(i)</tt>. Here,
+                                        * both left and right hand
+                                        * side of the assignment have
+                                        * data type VectorReference,
+                                        * but what we really mean is
+                                        * to assign the vector
+                                        * elements represented by the
+                                        * two references. This
+                                        * operator implements this
+                                        * operation. Note also that
+                                        * this allows us to make the
+                                        * assignment operator const.
+                                        */
+        const VectorReference &
+          operator = (const VectorReference &r) const;
+
+                                         /**
+                                          * Same as above but for non-const
+                                          * reference objects.
+                                          */
+        const VectorReference &
+          operator = (const VectorReference &r);
+
+                                       /**
+                                        * Set the referenced element of the
+                                        * vector to <tt>s</tt>.
+                                        */
+        const VectorReference &
+          operator = (const TrilinosScalar &s) const;
+
+                                       /**
+                                        * Add <tt>s</tt> to the
+                                        * referenced element of the
+                                        * vector->
+                                        */
+        const VectorReference &
+          operator += (const TrilinosScalar &s) const;
+
+                                       /**
+                                        * Subtract <tt>s</tt> from the
+                                        * referenced element of the
+                                        * vector->
+                                        */
+        const VectorReference &
+          operator -= (const TrilinosScalar &s) const;
+
+                                       /**
+                                        * Multiply the referenced
+                                        * element of the vector by
+                                        * <tt>s</tt>.
+                                        */
+        const VectorReference &
+          operator *= (const TrilinosScalar &s) const;
+
+                                       /**
+                                        * Divide the referenced
+                                        * element of the vector by
+                                        * <tt>s</tt>.
+                                        */
+        const VectorReference &
+          operator /= (const TrilinosScalar &s) const;
+
+                                       /**
+                                        * Convert the reference to an
+                                        * actual value, i.e. return
+                                        * the value of the referenced
+                                        * element of the vector.
+                                        */
+        operator TrilinosScalar () const;
+
+                                       /**
+                                        * Exception
+                                        */
+        DeclException1 (ExcTrilinosError,
+                        int,
+                        << "An error with error number " << arg1
+                        << " occurred while calling a Trilinos function");
+
+                                       /**
+                                        * Exception
+                                        */
+        DeclException3 (ExcAccessToNonLocalElement,
+                        int, int, int,
+                        << "You tried to access element " << arg1
+                        << " of a distributed vector, but only elements "
+                        << arg2 << " through " << arg3
+                        << " are stored locally and can be accessed.");
+
+      private:
+                                       /**
+                                        * Point to the vector we are
+                                        * referencing.
+                                        */
+        VectorBase   &vector;
+
+                                       /**
+                                        * Index of the referenced element
+                                        * of the vector.
+                                        */
+        const unsigned int  index;
+
+                                       /**
+                                        * Make the vector class a
+                                        * friend, so that it can
+                                        * create objects of the
+                                        * present type.
+                                        */
+        friend class ::dealii::TrilinosWrappers::VectorBase;
+    };
+  }
+                                       /**
+                                        * @endcond
+                                        */
+
+
+/**
+ * Base class for the two types of Trilinos vectors, the distributed
+ * memory vector MPI::Vector and a localized vector Vector. The latter
+ * is designed for use in either serial implementations or as a
+ * localized copy on each processor.  The implementation of this class
+ * is based on the Trilinos vector class Epetra_FEVector, the (parallel)
+ * partitioning of which is governed by an Epetra_Map. This means that
+ * the vector type is generic and can be done in this base class, while
+ * the definition of the partition map (and hence, the constructor and
+ * reinit function) will have to be done in the derived classes. The
+ * Epetra_FEVector is precisely the kind of vector we deal with all the
+ * time - we probably get it from some assembly process, where also
+ * entries not locally owned might need to written and hence need to be
+ * forwarded to the owner. The only requirement for this class to work
+ * is that Trilinos is installed with the same compiler as is used for
+ * compilation of deal.II.
+ *
+ * The interface of this class is modeled after the existing Vector
+ * class in deal.II. It has almost the same member functions, and is
+ * often exchangable. However, since Trilinos only supports a single
+ * scalar type (double), it is not templated, and only works with that
+ * type.
+ *
+ * Note that Trilinos only guarantees that operations do what you expect
+ * if the function @p GlobalAssemble has been called after vector
+ * assembly in order to distribute the data. Therefore, you need to call
+ * Vector::compress() before you actually use the vectors.
+ *
+ * @ingroup TrilinosWrappers
+ * @ingroup Vectors
+ * @author Martin Kronbichler, 2008
+ */
+  class VectorBase : public Subscriptor
+  {
+    public:
+                                       /**
+                                        * Declare some of the standard
+                                        * types used in all
+                                        * containers. These types
+                                        * parallel those in the
+                                        * <tt>C</tt> standard libraries
+                                        * <tt>vector<...></tt> class.
+                                        */
+      typedef TrilinosScalar            value_type;
+      typedef TrilinosScalar            real_type;
+      typedef std::size_t               size_type;
+      typedef internal::VectorReference reference;
+      typedef const internal::VectorReference const_reference;
+
+                                       /**
+                                        * @name 1: Basic Object-handling
+                                        */
+                                       //@{
+
+                                       /**
+                                        * Default constructor that
+                                        * generates an empty (zero size)
+                                        * vector. The function
+                                        * <tt>reinit()</tt> will have to
+                                        * give the vector the correct
+                                        * size and distribution among
+                                        * processes in case of an MPI
+                                        * run.
+                                        */
+      VectorBase ();
+
+                                       /**
+                                        * Copy constructor. Sets the
+                                        * dimension to that of the given
+                                        * vector, and copies all the
+                                        * elements.
+                                        */
+      VectorBase (const VectorBase &v);
+
+                                       /**
+                                        * Destructor
+                                        */
+      virtual ~VectorBase ();
+
+                                       /**
+                                        * Release all memory and return
+                                        * to a state just like after
+                                        * having called the default
+                                        * constructor.
+                                        */
+      void clear ();
+
+                                       /**
+                                        * Reinit functionality, sets the
+                                        * dimension and possibly the
+                                        * parallel partitioning (Epetra_Map)
+                                        * of the calling vector to the
+                                        * settings of the input vector.
+                                        */
+      void reinit (const VectorBase &v,
+                   const bool        fast = false);
+
+                                       /**
+                                        * Compress the underlying
+                                        * representation of the Trilinos
+                                        * object, i.e. flush the buffers
+                                        * of the vector object if it has
+                                        * any. This function is
+                                        * necessary after writing into a
+                                        * vector element-by-element and
+                                        * before anything else can be
+                                        * done on it.
+                                        *
+                                        * The (defaulted) argument can
+                                        * be used to specify the
+                                        * compress mode
+                                        * (<code>Add</code> or
+                                        * <code>Insert</code>) in case
+                                        * the vector has not been
+                                        * written to since the last
+                                        * time this function was
+                                        * called. The argument is
+                                        * ignored if the vector has
+                                        * been added or written to
+                                        * since the last time
+                                        * compress() was called.
+                                        *
+                                        * See @ref GlossCompress "Compressing distributed objects"
+                                        * for more information.
+                                        */
+      void compress (::dealii::VectorOperation::values operation
+		     =::dealii::VectorOperation::unknown);
+
+				       /**
+					* @deprecated
+					*/
+      void compress (const Epetra_CombineMode last_action);
+
+                                       /**
+                                        * Returns the state of the
+                                        * vector, i.e., whether
+                                        * compress() has already been
+                                        * called after an operation
+                                        * requiring data exchange.
+                                        */
+      bool is_compressed () const;
+
+                                       /**
+                                        * Set all components of the
+                                        * vector to the given number @p
+                                        * s. Simply pass this down to
+                                        * the Trilinos Epetra object,
+                                        * but we still need to declare
+                                        * this function to make the
+                                        * example given in the
+                                        * discussion about making the
+                                        * constructor explicit work.
+                                        *
+                                        * Since the semantics of
+                                        * assigning a scalar to a vector
+                                        * are not immediately clear,
+                                        * this operator should really
+                                        * only be used if you want to
+                                        * set the entire vector to
+                                        * zero. This allows the
+                                        * intuitive notation
+                                        * <tt>v=0</tt>. Assigning other
+                                        * values is deprecated and may
+                                        * be disallowed in the future.
+                                        */
+      VectorBase &
+        operator = (const TrilinosScalar s);
+
+                                       /**
+                                        * Copy function. This function takes
+                                        * a VectorBase vector and copies all
+                                        * the elements. The target vector
+                                        * will have the same parallel
+                                        * distribution as the calling
+                                        * vector.
+                                        */
+      VectorBase &
+        operator = (const VectorBase &v);
+
+                                       /**
+                                        * Another copy function. This
+                                        * one takes a deal.II vector and
+                                        * copies it into a
+                                        * TrilinosWrapper vector. Note
+                                        * that since we do not provide
+                                        * any Epetra_map that tells
+                                        * about the partitioning of the
+                                        * vector among the MPI
+                                        * processes, the size of the
+                                        * TrilinosWrapper vector has to
+                                        * be the same as the size of the
+                                        * input vector. In order to
+                                        * change the map, use the
+                                        * reinit(const Epetra_Map
+                                        * &input_map) function.
+                                        */
+      template <typename Number>
+      VectorBase &
+        operator = (const ::dealii::Vector<Number> &v);
+
+                                       /**
+                                        * Test for equality. This
+                                        * function assumes that the
+                                        * present vector and the one to
+                                        * compare with have the same
+                                        * size already, since comparing
+                                        * vectors of different sizes
+                                        * makes not much sense anyway.
+                                        */
+      bool operator == (const VectorBase &v) const;
+
+                                       /**
+                                        * Test for inequality. This
+                                        * function assumes that the
+                                        * present vector and the one to
+                                        * compare with have the same
+                                        * size already, since comparing
+                                        * vectors of different sizes
+                                        * makes not much sense anyway.
+                                        */
+      bool operator != (const VectorBase &v) const;
+
+                                       /**
+                                        * Return the global dimension of
+                                        * the vector.
+                                        */
+      unsigned int size () const;
+
+                                       /**
+                                        * Return the local dimension of
+                                        * the vector, i.e. the number of
+                                        * elements stored on the present
+                                        * MPI process. For sequential
+                                        * vectors, this number is the
+                                        * same as size(), but for
+                                        * parallel vectors it may be
+                                        * smaller.
+                                        *
+                                        * To figure out which elements
+                                        * exactly are stored locally,
+                                        * use local_range().
+                                        *
+                                        * If the vector contains ghost
+                                        * elements, they are included in
+                                        * this number.
+                                        */
+      unsigned int local_size () const;
+
+                                       /**
+                                        * Return a pair of indices
+                                        * indicating which elements of
+                                        * this vector are stored
+                                        * locally. The first number is
+                                        * the index of the first element
+                                        * stored, the second the index
+                                        * of the one past the last one
+                                        * that is stored locally. If
+                                        * this is a sequential vector,
+                                        * then the result will be the
+                                        * pair (0,N), otherwise it will
+                                        * be a pair (i,i+n), where
+                                        * <tt>n=local_size()</tt>.
+                                        */
+      std::pair<unsigned int, unsigned int> local_range () const;
+
+                                       /**
+                                        * Return whether @p index is in
+                                        * the local range or not, see
+                                        * also local_range().
+                                        */
+      bool in_local_range (const unsigned int index) const;
+
+                                       /**
+                                        * Return if the vector contains ghost
+                                        * elements. This answer is true if there
+                                        * are ghost elements on at least one
+                                        * process.
+                                        */
+      bool has_ghost_elements() const;
+
+                                       /**
+                                        * Return the scalar (inner)
+                                        * product of two vectors. The
+                                        * vectors must have the same
+                                        * size.
+                                        */
+      TrilinosScalar operator * (const VectorBase &vec) const;
+
+                                       /**
+                                        * Return square of the
+                                        * $l_2$-norm.
+                                        */
+      real_type norm_sqr () const;
+
+                                       /**
+                                        * Mean value of the elements of
+                                        * this vector.
+                                        */
+      TrilinosScalar mean_value () const;
+
+                                       /**
+                                        * Compute the minimal value of
+                                        * the elements of this vector.
+                                        */
+      TrilinosScalar minimal_value () const;
+
+                                       /**
+                                        * $l_1$-norm of the vector.  The
+                                        * sum of the absolute values.
+                                        */
+      real_type l1_norm () const;
+
+                                       /**
+                                        * $l_2$-norm of the vector.  The
+                                        * square root of the sum of the
+                                        * squares of the elements.
+                                        */
+      real_type l2_norm () const;
+
+                                       /**
+                                        * $l_p$-norm of the vector. The
+                                        * <i>p</i>th root of the sum of
+                                        * the <i>p</i>th powers of the
+                                        * absolute values of the
+                                        * elements.
+                                        */
+      real_type lp_norm (const TrilinosScalar p) const;
+
+                                       /**
+                                        * Maximum absolute value of the
+                                        * elements.
+                                        */
+      real_type linfty_norm () const;
+
+									  /**
+									   * Return vector component with
+									   * the minimal magnitude.
+									   */
+      real_type min () const;			// shuqiangwang
+
+									  /**
+									   * Return vector component with
+									   * the maximal magnitude.
+									   */
+      real_type max () const;
+                                       /**
+                                        * Return whether the vector
+                                        * contains only elements with
+                                        * value zero. This function is
+                                        * mainly for internal
+                                        * consistency checks and should
+                                        * seldom be used when not in
+                                        * debug mode since it uses quite
+                                        * some time.
+                                        */
+      bool all_zero () const;
+
+                                       /**
+                                        * Return @p true if the vector
+                                        * has no negative entries,
+                                        * i.e. all entries are zero or
+                                        * positive. This function is
+                                        * used, for example, to check
+                                        * whether refinement indicators
+                                        * are really all positive (or
+                                        * zero).
+                                        */
+      bool is_non_negative () const;
+                                       //@}
+
+
+                                       /**
+                                        * @name 2: Data-Access
+                                        */
+                                       //@{
+
+                                       /**
+                                        * Provide access to a given
+                                        * element, both read and write.
+                                        */
+      reference
+        operator () (const unsigned int index);
+
+                                       /**
+                                        * Provide read-only access to an
+                                        * element. This is equivalent to
+                                        * the <code>el()</code> command.
+                                        */
+      TrilinosScalar
+        operator () (const unsigned int index) const;
+
+                                       /**
+                                        * Provide access to a given
+                                        * element, both read and write.
+                                        *
+                                        * Exactly the same as operator().
+                                        */
+      reference
+        operator [] (const unsigned int index);
+
+                                       /**
+                                        * Provide read-only access to an
+                                        * element. This is equivalent to
+                                        * the <code>el()</code> command.
+                                        *
+                                        * Exactly the same as operator().
+                                        */
+      TrilinosScalar
+        operator [] (const unsigned int index) const;
+
+                                       /**
+                                        * Return the value of the vector
+                                        * entry <i>i</i>. Note that this
+                                        * function does only work
+                                        * properly when we request a
+                                        * data stored on the local
+                                        * processor. The function will
+                                        * throw an exception in case the
+                                        * elements sits on another
+                                        * process.
+                                        */
+      TrilinosScalar el (const unsigned int index) const;
+
+                                       /**
+                                        * A collective set operation:
+                                        * instead of setting individual
+                                        * elements of a vector, this
+                                        * function allows to set a whole
+                                        * set of elements at once. The
+                                        * indices of the elements to be
+                                        * set are stated in the first
+                                        * argument, the corresponding
+                                        * values in the second.
+                                        */
+      void set (const std::vector<unsigned int>    &indices,
+                const std::vector<TrilinosScalar>  &values);
+
+                                       /**
+                                        * This is a second collective
+                                        * set operation. As a
+                                        * difference, this function
+                                        * takes a deal.II vector of
+                                        * values.
+                                        */
+      void set (const std::vector<unsigned int>        &indices,
+                const ::dealii::Vector<TrilinosScalar> &values);
+                                       //@}
+
+
+                                       /**
+                                        * @name 3: Modification of vectors
+                                        */
+                                       //@{
+
+                                       /**
+                                        * This collective set operation
+                                        * is of lower level and can
+                                        * handle anything else &mdash;
+                                        * the only thing you have to
+                                        * provide is an address where
+                                        * all the indices are stored and
+                                        * the number of elements to be
+                                        * set.
+                                        */
+      void set (const unsigned int    n_elements,
+                const unsigned int   *indices,
+                const TrilinosScalar *values);
+
+                                       /**
+                                        * A collective add operation:
+                                        * This funnction adds a whole
+                                        * set of values stored in @p
+                                        * values to the vector
+                                        * components specified by @p
+                                        * indices.
+                                        */
+      void add (const std::vector<unsigned int>   &indices,
+                const std::vector<TrilinosScalar> &values);
+
+                                       /**
+                                        * This is a second collective
+                                        * add operation. As a
+                                        * difference, this function
+                                        * takes a deal.II vector of
+                                        * values.
+                                        */
+      void add (const std::vector<unsigned int>        &indices,
+                const ::dealii::Vector<TrilinosScalar> &values);
+
+                                      /**
+                                       * Take an address where
+                                       * <tt>n_elements</tt> are stored
+                                       * contiguously and add them into
+                                       * the vector. Handles all cases
+                                       * which are not covered by the
+                                       * other two <tt>add()</tt>
+                                       * functions above.
+                                       */
+      void add (const unsigned int    n_elements,
+                const unsigned int   *indices,
+                const TrilinosScalar *values);
+
+                                       /**
+                                        * Multiply the entire vector by
+                                        * a fixed factor.
+                                        */
+      VectorBase & operator *= (const TrilinosScalar factor);
+
+                                       /**
+                                        * Divide the entire vector by a
+                                        * fixed factor.
+                                        */
+      VectorBase & operator /= (const TrilinosScalar factor);
+
+                                       /**
+                                        * Add the given vector to the
+                                        * present one.
+                                        */
+      VectorBase & operator += (const VectorBase &V);
+
+                                       /**
+                                        * Subtract the given vector from
+                                        * the present one.
+                                        */
+      VectorBase & operator -= (const VectorBase &V);
+
+                                       /**
+                                        * Addition of @p s to all
+                                        * components. Note that @p s is
+                                        * a scalar and not a vector.
+                                        */
+      void add (const TrilinosScalar s);
+
+                                       /**
+                                        * Simple vector addition, equal
+                                        * to the <tt>operator
+                                        * +=</tt>.
+                                        *
+                                        * Though, if the second argument
+                                        * <tt>allow_different_maps</tt>
+                                        * is set, then it is possible to
+                                        * add data from a different map.
+                                        */
+      void add (const VectorBase &V,
+                const bool        allow_different_maps = false);
+
+                                       /**
+                                        * Simple addition of a multiple
+                                        * of a vector, i.e. <tt>*this =
+                                        * a*V</tt>.
+                                        */
+      void add (const TrilinosScalar  a,
+                const VectorBase     &V);
+
+                                       /**
+                                        * Multiple addition of scaled
+                                        * vectors, i.e. <tt>*this = a*V +
+                                        * b*W</tt>.
+                                        */
+      void add (const TrilinosScalar  a,
+                const VectorBase     &V,
+                const TrilinosScalar  b,
+                const VectorBase     &W);
+
+                                       /**
+                                        * Scaling and simple vector
+                                        * addition, i.e.  <tt>*this =
+                                        * s*(*this) + V</tt>.
+                                        */
+      void sadd (const TrilinosScalar  s,
+                 const VectorBase     &V);
+
+                                       /**
+                                        * Scaling and simple addition,
+                                        * i.e.  <tt>*this = s*(*this) +
+                                        * a*V</tt>.
+                                        */
+      void sadd (const TrilinosScalar  s,
+                 const TrilinosScalar  a,
+                 const VectorBase     &V);
+
+                                       /**
+                                        * Scaling and multiple addition.
+                                        */
+      void sadd (const TrilinosScalar  s,
+                 const TrilinosScalar  a,
+                 const VectorBase     &V,
+                 const TrilinosScalar  b,
+                 const VectorBase     &W);
+
+                                       /**
+                                        * Scaling and multiple addition.
+                                        * <tt>*this = s*(*this) + a*V +
+                                        * b*W + c*X</tt>.
+                                        */
+      void sadd (const TrilinosScalar  s,
+                 const TrilinosScalar  a,
+                 const VectorBase     &V,
+                 const TrilinosScalar  b,
+                 const VectorBase     &W,
+                 const TrilinosScalar  c,
+                 const VectorBase     &X);
+
+                                       /**
+                                        * Scale each element of this
+                                        * vector by the corresponding
+                                        * element in the argument. This
+                                        * function is mostly meant to
+                                        * simulate multiplication (and
+                                        * immediate re-assignment) by a
+                                        * diagonal scaling matrix.
+                                        */
+      void scale (const VectorBase &scaling_factors);
+
+                                       /**
+                                        * Assignment <tt>*this =
+                                        * a*V</tt>.
+                                        */
+      void equ (const TrilinosScalar  a,
+                const VectorBase     &V);
+
+                                       /**
+                                        * Assignment <tt>*this = a*V +
+                                        * b*W</tt>.
+                                        */
+      void equ (const TrilinosScalar  a,
+                const VectorBase     &V,
+                const TrilinosScalar  b,
+                const VectorBase     &W);
+
+                                       /**
+                                        * Compute the elementwise ratio
+                                        * of the two given vectors, that
+                                        * is let <tt>this[i] =
+                                        * a[i]/b[i]</tt>. This is useful
+                                        * for example if you want to
+                                        * compute the cellwise ratio of
+                                        * true to estimated error.
+                                        *
+                                        * This vector is appropriately
+                                        * scaled to hold the result.
+                                        *
+                                        * If any of the <tt>b[i]</tt> is
+                                        * zero, the result is
+                                        * undefined. No attempt is made
+                                        * to catch such situations.
+                                        */
+      void ratio (const VectorBase &a,
+                  const VectorBase &b);
+                                       //@}
+
+
+                                       /**
+                                        * @name 4: Mixed stuff
+                                        */
+                                       //@{
+
+                                       /**
+                                        * Return a const reference to the
+                                        * underlying Trilinos
+                                        * Epetra_MultiVector class.
+                                        */
+      const Epetra_MultiVector & trilinos_vector () const;
+
+                                       /**
+                                        * Return a (modifyable) reference to
+                                        * the underlying Trilinos
+                                        * Epetra_FEVector class.
+                                        */
+      Epetra_FEVector & trilinos_vector ();
+
+                                       /**
+                                        * Return a const reference to the
+                                        * underlying Trilinos Epetra_Map
+                                        * that sets the parallel
+                                        * partitioning of the vector.
+                                        */
+      const Epetra_Map & vector_partitioner () const;
+
+                                       /**
+                                        *  Output of vector in
+                                        *  user-defined format in analogy
+                                        *  to the dealii::Vector<number>
+                                        *  class.
+                                        */
+      void print (const char* format = 0) const;
+
+                                       /**
+                                        * Print to a stream. @p
+                                        * precision denotes the desired
+                                        * precision with which values
+                                        * shall be printed, @p
+                                        * scientific whether scientific
+                                        * notation shall be used. If @p
+                                        * across is @p true then the
+                                        * vector is printed in a line,
+                                        * while if @p false then the
+                                        * elements are printed on a
+                                        * separate line each.
+                                        */
+      void print (std::ostream       &out,
+                  const unsigned int  precision  = 3,
+                  const bool          scientific = true,
+                  const bool          across     = true) const;
+
+                                       /**
+                                        * Swap the contents of this
+                                        * vector and the other vector @p
+                                        * v. One could do this operation
+                                        * with a temporary variable and
+                                        * copying over the data
+                                        * elements, but this function is
+                                        * significantly more efficient
+                                        * since it only swaps the
+                                        * pointers to the data of the
+                                        * two vectors and therefore does
+                                        * not need to allocate temporary
+                                        * storage and move data
+                                        * around. Note that the vectors
+                                        * need to be of the same size
+                                        * and base on the same map.
+                                        *
+                                        * This function is analog to the
+                                        * the @p swap function of all C
+                                        * standard containers. Also,
+                                        * there is a global function
+                                        * <tt>swap(u,v)</tt> that simply
+                                        * calls <tt>u.swap(v)</tt>,
+                                        * again in analogy to standard
+                                        * functions.
+                                        */
+      void swap (VectorBase &v);
+
+                                       /**
+                                        * Estimate for the memory
+                                        * consumption in bytes.
+                                        */
+      std::size_t memory_consumption () const;
+                                       //@}
+
+                                       /**
+                                        * Exception
+                                        */
+      DeclException0 (ExcGhostsPresent);
+
+                                       /**
+                                        * Exception
+                                        */
+      DeclException0 (ExcDifferentParallelPartitioning);
+
+                                       /**
+                                        * Exception
+                                        */
+      DeclException1 (ExcTrilinosError,
+                      int,
+                      << "An error with error number " << arg1
+                      << " occurred while calling a Trilinos function");
+
+                                       /**
+                                        * Exception
+                                        */
+      DeclException3 (ExcAccessToNonlocalElement,
+                      int, int, int,
+                      << "You tried to access element " << arg1
+                      << " of a distributed vector, but only entries "
+                      << arg2 << " through " << arg3
+                      << " are stored locally and can be accessed.");
+
+
+    private:
+                                       /**
+                                        * Trilinos doesn't allow to
+                                        * mix additions to matrix
+                                        * entries and overwriting them
+                                        * (to make synchronisation of
+                                        * parallel computations
+                                        * simpler). The way we do it
+                                        * is to, for each access
+                                        * operation, store whether it
+                                        * is an insertion or an
+                                        * addition. If the previous
+                                        * one was of different type,
+                                        * then we first have to flush
+                                        * the Trilinos buffers;
+                                        * otherwise, we can simply go
+                                        * on.  Luckily, Trilinos has
+                                        * an object for this which
+                                        * does already all the
+                                        * parallel communications in
+                                        * such a case, so we simply
+                                        * use their model, which
+                                        * stores whether the last
+                                        * operation was an addition or
+                                        * an insertion.
+                                        */
+      Epetra_CombineMode last_action;
+
+                                       /**
+                                        * A boolean variable to hold
+                                        * information on whether the
+                                        * vector is compressed or not.
+                                        */
+      bool compressed;
+
+      /**
+       * Whether this vector has ghost elements. This is true
+       * on all processors even if only one of them has any
+       * ghost elements.
+       */
+      bool has_ghosts;
+
+                                       /**
+                                        * An Epetra distibuted vector
+                                        * type. Requires an existing
+                                        * Epetra_Map for storing data.
+                                        */
+      std_cxx1x::shared_ptr<Epetra_FEVector> vector;
+
+
+                                       /**
+                                        * Make the reference class a
+                                        * friend.
+                                        */
+      friend class internal::VectorReference;
+      friend class Vector;
+      friend class MPI::Vector;
+  };
+
+
+
+
+// ------------------- inline and template functions --------------
+
+/**
+ * Global function swap which overloads the default implementation of
+ * the C standard library which uses a temporary object. The function
+ * simply exchanges the data of the two vectors.
+ *
+ * @relates TrilinosWrappers::VectorBase
+ * @author Martin Kronbichler, Wolfgang Bangerth, 2008
+ */
+  inline
+  void swap (VectorBase &u, VectorBase &v)
+  {
+    u.swap (v);
+  }
+
+
+#ifndef DOXYGEN
+
+  namespace internal
+  {
+    inline
+    VectorReference::VectorReference (VectorBase        &vector,
+                                      const unsigned int index)
+                    :
+                    vector (vector),
+                    index (index)
+    {}
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator = (const VectorReference &r) const
+    {
+                                        // as explained in the class
+                                        // documentation, this is not the copy
+                                        // operator. so simply pass on to the
+                                        // "correct" assignment operator
+      *this = static_cast<TrilinosScalar> (r);
+
+      return *this;
+    }
+
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator = (const VectorReference &r)
+    {
+                                        // as above
+      *this = static_cast<TrilinosScalar> (r);
+
+      return *this;
+    }
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator = (const TrilinosScalar &value) const
+    {
+      vector.set (1, &index, &value);
+      return *this;
+    }
+
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator += (const TrilinosScalar &value) const
+    {
+      vector.add (1, &index, &value);
+      return *this;
+    }
+
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator -= (const TrilinosScalar &value) const
+    {
+      TrilinosScalar new_value = -value;
+      vector.add (1, &index, &new_value);
+      return *this;
+    }
+
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator *= (const TrilinosScalar &value) const
+    {
+      TrilinosScalar new_value = static_cast<TrilinosScalar>(*this) * value;
+      vector.set (1, &index, &new_value);
+      return *this;
+    }
+
+
+
+    inline
+    const VectorReference &
+    VectorReference::operator /= (const TrilinosScalar &value) const
+    {
+      TrilinosScalar new_value = static_cast<TrilinosScalar>(*this) / value;
+      vector.set (1, &index, &new_value);
+      return *this;
+    }
+  }
+
+
+
+  inline
+  bool
+  VectorBase::is_compressed () const
+  {
+    return compressed;
+  }
+
+
+
+  inline
+  bool
+  VectorBase::in_local_range (const unsigned int index) const
+  {
+    std::pair<unsigned int, unsigned int> range = local_range();
+
+    return ((index >= range.first) && (index <  range.second));
+  }
+
+
+
+  inline
+  bool
+  VectorBase::has_ghost_elements() const
+  {
+    return has_ghosts;
+  }
+
+
+
+  inline
+  internal::VectorReference
+  VectorBase::operator () (const unsigned int index)
+  {
+    return internal::VectorReference (*this, index);
+  }
+
+
+
+  inline
+  internal::VectorReference
+  VectorBase::operator [] (const unsigned int index)
+  {
+    return operator() (index);
+  }
+
+
+  inline
+  TrilinosScalar
+  VectorBase::operator [] (const unsigned int index) const
+  {
+    return operator() (index);
+  }
+
+
+
+  inline
+  void
+  VectorBase::reinit (const VectorBase &v,
+                      const bool        fast)
+  {
+    Assert (vector.get() != 0,
+            ExcMessage("Vector has not been constructed properly."));
+
+    if (fast == false ||
+        vector_partitioner().SameAs(v.vector_partitioner())==false)
+      vector.reset (new Epetra_FEVector(*v.vector));
+  }
+
+
+
+  inline
+  void
+  VectorBase::compress (const Epetra_CombineMode last_action)
+  {
+    ::dealii::VectorOperation::values last_action_;
+    if (last_action == Add)
+      last_action_ = ::dealii::VectorOperation::add;
+    else if (last_action == Insert)
+      last_action_ = ::dealii::VectorOperation::insert;
+    else
+      AssertThrow(false, ExcNotImplemented());
+
+    compress(last_action_);
+  }
+  
+
+
+  inline
+  void
+  VectorBase::compress (::dealii::VectorOperation::values given_last_action)
+  {
+				     //Select which mode to send to
+				     //Trilinos. Note that we use last_action
+				     //if available and ignore what the user
+				     //tells us to detect wrongly mixed
+				     //operations. Typically given_last_action
+				     //is only used on machines that do not
+				     //execute an operation (because they have
+				     //no own cells for example).
+    Epetra_CombineMode mode = last_action;
+    if (last_action == Zero)
+      {
+	if (given_last_action==::dealii::VectorOperation::add)
+	  mode = Add;
+	else if (given_last_action==::dealii::VectorOperation::insert)
+	  mode = Insert;
+      }
+
+#ifdef DEBUG
+#  ifdef DEAL_II_COMPILER_SUPPORTS_MPI
+                                     // check that every process has decided
+                                     // to use the same mode. This will
+                                     // otherwise result in undefined
+                                     // behaviour in the call to
+                                     // GlobalAssemble().
+    double double_mode = mode;
+    Utilities::MPI::MinMaxAvg result
+      = Utilities::MPI::min_max_avg (double_mode,
+                                     dynamic_cast<const Epetra_MpiComm*>
+                                     (&vector_partitioner().Comm())->GetMpiComm());
+    Assert(result.max-result.min<1e-5,
+           ExcMessage ("Not all processors agree whether the last operation on "
+                       "this vector was an addition or a set operation. This will "
+                       "prevent the compress() operation from succeeding."));
+
+#  endif
+#endif
+
+                                 // Now pass over the information about
+                                 // what we did last to the vector.
+    const int ierr = vector->GlobalAssemble(mode);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+    last_action = Zero;
+
+    compressed = true;
+  }
+
+
+
+  inline
+  VectorBase &
+  VectorBase::operator = (const TrilinosScalar s)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    Assert (numbers::is_finite(s), ExcNumberNotFinite());
+
+    const int ierr = vector->PutScalar(s);
+
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return *this;
+  }
+
+
+
+  inline
+  void
+  VectorBase::set (const std::vector<unsigned int>    &indices,
+                   const std::vector<TrilinosScalar>  &values)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    Assert (indices.size() == values.size(),
+            ExcDimensionMismatch(indices.size(),values.size()));
+
+    set (indices.size(), &indices[0], &values[0]);
+  }
+
+
+
+  inline
+  void
+  VectorBase::set (const std::vector<unsigned int>        &indices,
+                   const ::dealii::Vector<TrilinosScalar> &values)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    Assert (indices.size() == values.size(),
+            ExcDimensionMismatch(indices.size(),values.size()));
+
+    set (indices.size(), &indices[0], values.begin());
+  }
+
+
+
+  inline
+  void
+  VectorBase::set (const unsigned int    n_elements,
+                   const unsigned int   *indices,
+                   const TrilinosScalar *values)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    if (last_action == Add)
+      vector->GlobalAssemble(Add);
+
+    if (last_action != Insert)
+      last_action = Insert;
+
+    for (unsigned int i=0; i<n_elements; ++i)
+      {
+        const unsigned int row = indices[i];
+        const int local_row = vector->Map().LID(indices[i]);
+        if (local_row == -1)
+          {
+            const int ierr = vector->ReplaceGlobalValues (1,
+                                                          (const int*)(&row),
+                                                          &values[i]);
+            AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+            compressed = false;
+          }
+        else
+          (*vector)[0][local_row] = values[i];
+      }
+  }
+
+
+
+  inline
+  void
+  VectorBase::add (const std::vector<unsigned int>    &indices,
+                   const std::vector<TrilinosScalar>  &values)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (indices.size() == values.size(),
+            ExcDimensionMismatch(indices.size(),values.size()));
+
+    add (indices.size(), &indices[0], &values[0]);
+  }
+
+
+
+  inline
+  void
+  VectorBase::add (const std::vector<unsigned int>        &indices,
+                   const ::dealii::Vector<TrilinosScalar> &values)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (indices.size() == values.size(),
+            ExcDimensionMismatch(indices.size(),values.size()));
+
+    add (indices.size(), &indices[0], values.begin());
+  }
+
+
+
+  inline
+  void
+  VectorBase::add (const unsigned int    n_elements,
+                   const unsigned int   *indices,
+                   const TrilinosScalar *values)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    if (last_action != Add)
+      {
+        if (last_action == Insert)
+          vector->GlobalAssemble(Insert);
+        last_action = Add;
+      }
+
+    for (unsigned int i=0; i<n_elements; ++i)
+      {
+        const unsigned int row = indices[i];
+        const int local_row = vector->Map().LID(row);
+        if (local_row == -1)
+          {
+            const int ierr = vector->SumIntoGlobalValues (1,
+                                                          (const int*)(&row),
+                                                          &values[i]);
+            AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+            compressed = false;
+          }
+        else
+          (*vector)[0][local_row] += values[i];
+      }
+  }
+
+
+
+  inline
+  unsigned int
+  VectorBase::size () const
+  {
+    return (unsigned int) (vector->Map().MaxAllGID() + 1 -
+                           vector->Map().MinAllGID());
+  }
+
+
+
+  inline
+  unsigned int
+  VectorBase::local_size () const
+  {
+    return (unsigned int) vector->Map().NumMyElements();
+  }
+
+
+
+  inline
+  std::pair<unsigned int, unsigned int>
+  VectorBase::local_range () const
+  {
+    int begin, end;
+    begin = vector->Map().MinMyGID();
+    end = vector->Map().MaxMyGID()+1;
+    return std::make_pair (begin, end);
+  }
+
+
+
+  inline
+  TrilinosScalar
+  VectorBase::operator * (const VectorBase &vec) const
+  {
+    Assert (vector->Map().SameAs(vec.vector->Map()),
+            ExcDifferentParallelPartitioning());
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    TrilinosScalar result;
+
+    const int ierr = vector->Dot(*(vec.vector), &result);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return result;
+  }
+
+
+
+  inline
+  VectorBase::real_type
+  VectorBase::norm_sqr () const
+  {
+    const TrilinosScalar d = l2_norm();
+    return d*d;
+  }
+
+
+
+  inline
+  TrilinosScalar
+  VectorBase::mean_value () const
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    TrilinosScalar mean;
+    const int ierr = vector->MeanValue (&mean);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return mean;
+  }
+
+
+
+  inline
+  TrilinosScalar
+  VectorBase::minimal_value () const
+  {
+    TrilinosScalar min_value;
+    const int ierr = vector->MinValue (&min_value);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return min_value;
+  }
+
+
+
+  inline
+  VectorBase::real_type
+  VectorBase::l1_norm () const
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    TrilinosScalar d;
+    const int ierr = vector->Norm1 (&d);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return d;
+  }
+
+
+
+  inline
+  VectorBase::real_type
+  VectorBase::l2_norm () const
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    TrilinosScalar d;
+    const int ierr = vector->Norm2 (&d);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return d;
+  }
+
+
+
+  inline
+  VectorBase::real_type
+  VectorBase::lp_norm (const TrilinosScalar p) const
+  {
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+    TrilinosScalar norm = 0;
+    TrilinosScalar sum=0;
+    const unsigned int n_local = local_size();
+
+                                        // loop over all the elements because
+                                        // Trilinos does not support lp norms
+    for (unsigned int i=0; i<n_local; i++)
+      sum += std::pow(std::fabs((*vector)[0][i]), p);
+
+    norm = std::pow(sum, static_cast<TrilinosScalar>(1./p));
+
+    return norm;
+  }
+
+
+
+  inline
+  VectorBase::real_type
+  VectorBase::linfty_norm () const
+  {
+                                     // while we disallow the other
+                                     // norm operations on ghosted
+                                     // vectors, this particular norm
+                                     // is safe to run even in the
+                                     // presence of ghost elements
+    TrilinosScalar d;
+    const int ierr = vector->NormInf (&d);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return d;
+  }
+
+
+
+                                   // inline also scalar products, vector
+                                   // additions etc. since they are all
+                                   // representable by a single Trilinos
+                                   // call. This reduces the overhead of the
+                                   // wrapper class.
+  inline
+  VectorBase &
+  VectorBase::operator *= (const TrilinosScalar a)
+  {
+    Assert (numbers::is_finite(a), ExcNumberNotFinite());
+
+    const int ierr = vector->Scale(a);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return *this;
+  }
+
+
+
+  inline
+  VectorBase &
+  VectorBase::operator /= (const TrilinosScalar a)
+  {
+    Assert (numbers::is_finite(a), ExcNumberNotFinite());
+
+    const TrilinosScalar factor = 1./a;
+
+    Assert (numbers::is_finite(factor), ExcNumberNotFinite());
+
+    const int ierr = vector->Scale(factor);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return *this;
+  }
+
+
+
+  inline
+  VectorBase &
+  VectorBase::operator += (const VectorBase &v)
+  {
+    Assert (size() == v.size(),
+            ExcDimensionMismatch(size(), v.size()));
+    Assert (vector->Map().SameAs(v.vector->Map()),
+            ExcDifferentParallelPartitioning());
+
+    const int ierr = vector->Update (1.0, *(v.vector), 1.0);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return *this;
+  }
+
+
+
+  inline
+  VectorBase &
+  VectorBase::operator -= (const VectorBase &v)
+  {
+    Assert (size() == v.size(),
+            ExcDimensionMismatch(size(), v.size()));
+    Assert (vector->Map().SameAs(v.vector->Map()),
+            ExcDifferentParallelPartitioning());
+
+    const int ierr = vector->Update (-1.0, *(v.vector), 1.0);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    return *this;
+  }
+
+
+
+  inline
+  void
+  VectorBase::add (const TrilinosScalar s)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (numbers::is_finite(s), ExcNumberNotFinite());
+
+    unsigned int n_local = local_size();
+    for (unsigned int i=0; i<n_local; i++)
+      (*vector)[0][i] += s;
+  }
+
+
+
+  inline
+  void
+  VectorBase::add (const TrilinosScalar  a,
+                   const VectorBase     &v)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (local_size() == v.local_size(),
+            ExcDimensionMismatch(local_size(), v.local_size()));
+
+    Assert (numbers::is_finite(a), ExcNumberNotFinite());
+
+    const int ierr = vector->Update(a, *(v.vector), 1.);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  inline
+  void
+  VectorBase::add (const TrilinosScalar  a,
+                   const VectorBase     &v,
+                   const TrilinosScalar  b,
+                   const VectorBase     &w)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (local_size() == v.local_size(),
+            ExcDimensionMismatch(local_size(), v.local_size()));
+    Assert (local_size() == w.local_size(),
+            ExcDimensionMismatch(local_size(), w.local_size()));
+
+    Assert (numbers::is_finite(a), ExcNumberNotFinite());
+    Assert (numbers::is_finite(b), ExcNumberNotFinite());
+
+    const int ierr = vector->Update(a, *(v.vector), b, *(w.vector), 1.);
+
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  inline
+  void
+  VectorBase::sadd (const TrilinosScalar  s,
+                    const VectorBase     &v)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (local_size() == v.local_size(),
+            ExcDimensionMismatch(local_size(), v.local_size()));
+
+    Assert (numbers::is_finite(s), ExcNumberNotFinite());
+
+    const int ierr = vector->Update(1., *(v.vector), s);
+
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  inline
+  void
+  VectorBase::sadd (const TrilinosScalar  s,
+                    const TrilinosScalar  a,
+                    const VectorBase     &v)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (local_size() == v.local_size(),
+            ExcDimensionMismatch(local_size(), v.local_size()));
+
+    Assert (numbers::is_finite(s), ExcNumberNotFinite());
+    Assert (numbers::is_finite(a), ExcNumberNotFinite());
+
+    const int ierr = vector->Update(a, *(v.vector), s);
+
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  inline
+  void
+  VectorBase::sadd (const TrilinosScalar  s,
+                    const TrilinosScalar  a,
+                    const VectorBase     &v,
+                    const TrilinosScalar  b,
+                    const VectorBase     &w)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (local_size() == v.local_size(),
+            ExcDimensionMismatch(local_size(), v.local_size()));
+    Assert (local_size() == w.local_size(),
+            ExcDimensionMismatch(local_size(), w.local_size()));
+
+    Assert (numbers::is_finite(s), ExcNumberNotFinite());
+    Assert (numbers::is_finite(a), ExcNumberNotFinite());
+    Assert (numbers::is_finite(b), ExcNumberNotFinite());
+
+    const int ierr = vector->Update(a, *(v.vector), b, *(w.vector), s);
+
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  inline
+  void
+  VectorBase::sadd (const TrilinosScalar  s,
+                    const TrilinosScalar  a,
+                    const VectorBase     &v,
+                    const TrilinosScalar  b,
+                    const VectorBase     &w,
+                    const TrilinosScalar  c,
+                    const VectorBase     &x)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (local_size() == v.local_size(),
+            ExcDimensionMismatch(local_size(), v.local_size()));
+    Assert (local_size() == w.local_size(),
+            ExcDimensionMismatch(local_size(), w.local_size()));
+    Assert (local_size() == x.local_size(),
+            ExcDimensionMismatch(local_size(), x.local_size()));
+
+    Assert (numbers::is_finite(s), ExcNumberNotFinite());
+    Assert (numbers::is_finite(a), ExcNumberNotFinite());
+    Assert (numbers::is_finite(b), ExcNumberNotFinite());
+    Assert (numbers::is_finite(c), ExcNumberNotFinite());
+
+                                        // Update member can only
+                                        // input two other vectors so
+                                        // do it in two steps
+    const int ierr = vector->Update(a, *(v.vector), b, *(w.vector), s);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+    const int jerr = vector->Update(c, *(x.vector), 1.);
+    Assert (jerr == 0, ExcTrilinosError(jerr));
+    (void)jerr; // removes -Wunused-parameter warning in optimized mode
+  }
+
+
+
+  inline
+  void
+  VectorBase::scale (const VectorBase &factors)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (local_size() == factors.local_size(),
+            ExcDimensionMismatch(local_size(), factors.local_size()));
+
+    const int ierr = vector->Multiply (1.0, *(factors.vector), *vector, 0.0);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  inline
+  void
+  VectorBase::equ (const TrilinosScalar  a,
+                   const VectorBase     &v)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (numbers::is_finite(a), ExcNumberNotFinite());
+
+                                   // If we don't have the same map, copy.
+    if (vector->Map().SameAs(v.vector->Map())==false)
+      {
+        *vector = *v.vector;
+        *this *= a;
+      }
+    else
+      {
+                                   // Otherwise, just update
+        int ierr = vector->Update(a, *v.vector, 0.0);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+        last_action = Zero;
+      }
+
+  }
+
+
+
+  inline
+  void
+  VectorBase::equ (const TrilinosScalar  a,
+                   const VectorBase     &v,
+                   const TrilinosScalar  b,
+                   const VectorBase     &w)
+  {
+                                     // if we have ghost values, do not allow
+                                     // writing to this vector at all.
+    Assert (!has_ghost_elements(), ExcGhostsPresent());
+    Assert (v.local_size() == w.local_size(),
+            ExcDimensionMismatch (v.local_size(), w.local_size()));
+
+    Assert (numbers::is_finite(a), ExcNumberNotFinite());
+    Assert (numbers::is_finite(b), ExcNumberNotFinite());
+
+                                   // If we don't have the same map, copy.
+     if (vector->Map().SameAs(v.vector->Map())==false)
+      {
+        *vector = *v.vector;
+        sadd(a, b, w);
+      }
+    else
+      {
+                                   // Otherwise, just update. verify
+                                   // that *this does not only have
+                                   // the same map as v (the
+                                   // if-condition above) but also as
+                                   // w
+        Assert (vector->Map().SameAs(w.vector->Map()),
+                ExcDifferentParallelPartitioning());
+        int ierr = vector->Update(a, *v.vector, b, *w.vector, 0.0);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+        last_action = Zero;
+      }
+  }
+
+
+
+  inline
+  void
+  VectorBase::ratio (const VectorBase &v,
+                     const VectorBase &w)
+  {
+    Assert (v.local_size() == w.local_size(),
+            ExcDimensionMismatch (v.local_size(), w.local_size()));
+    Assert (local_size() == w.local_size(),
+            ExcDimensionMismatch (local_size(), w.local_size()));
+
+    const int ierr = vector->ReciprocalMultiply(1.0, *(w.vector),
+                                                *(v.vector), 0.0);
+
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+  }
+
+
+
+  inline
+  const Epetra_MultiVector &
+  VectorBase::trilinos_vector () const
+  {
+    return static_cast<const Epetra_MultiVector&>(*vector);
+  }
+
+
+
+  inline
+  Epetra_FEVector &
+  VectorBase::trilinos_vector ()
+  {
+    return *vector;
+  }
+
+
+
+  inline
+  const Epetra_Map &
+  VectorBase::vector_partitioner () const
+  {
+    return static_cast<const Epetra_Map&>(vector->Map());
+  }
+
+
+#endif // DOXYGEN
+
+}
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_USE_TRILINOS
+
+/*----------------------------   trilinos_vector_base.h     ---------------------------*/
+
+#endif
+/*----------------------------   trilinos_vector_base.h     ---------------------------*/

Added: branches/s-wang/for_deal.II/source/lac/constraint_matrix.cc
===================================================================
--- branches/s-wang/for_deal.II/source/lac/constraint_matrix.cc	                        (rev 0)
+++ branches/s-wang/for_deal.II/source/lac/constraint_matrix.cc	2012-11-06 20:23:43 UTC (rev 1346)
@@ -0,0 +1,2481 @@
+//---------------------------------------------------------------------------
+//    $Id: constraint_matrix.cc 26168 2012-08-29 21:27:03Z heister $
+//
+//    Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 by the deal.II authors
+//
+//    This file is subject to QPL and may not be  distributed
+//    without copyright and license information. Please refer
+//    to the file deal.II/doc/license.html for the  text  and
+//    further information on this license.
+//
+//---------------------------------------------------------------------------
+
+
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/lac/constraint_matrix.templates.h>
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/compressed_sparsity_pattern.h>
+#include <deal.II/lac/compressed_set_sparsity_pattern.h>
+#include <deal.II/lac/compressed_simple_sparsity_pattern.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+#include <deal.II/lac/sparse_matrix_ez.h>
+#include <deal.II/lac/block_sparse_matrix_ez.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/petsc_sparse_matrix.h>
+#include <deal.II/lac/petsc_block_sparse_matrix.h>
+#include <deal.II/lac/petsc_parallel_vector.h>
+#include <deal.II/lac/petsc_parallel_block_vector.h>
+#include <deal.II/lac/petsc_parallel_sparse_matrix.h>
+#include <deal.II/lac/petsc_parallel_block_sparse_matrix.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/trilinos_sparse_matrix.h>
+#include <deal.II/lac/trilinos_block_sparse_matrix.h>
+#include <deal.II/lac/matrix_block.h>
+
+#include <algorithm>
+#include <numeric>
+#include <set>
+
+// we only need output streams, but older compilers did not provide
+// them in a separate include file
+#ifdef HAVE_STD_OSTREAM_HEADER
+#  include <ostream>
+#else
+#  include <iostream>
+#endif
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+                                        // Static member variable
+const Table<2,bool> ConstraintMatrix::default_empty_table = Table<2,bool>();
+
+
+
+bool
+ConstraintMatrix::check_zero_weight (const std::pair<unsigned int, double> &p)
+{
+  return (p.second == 0);
+}
+
+
+
+bool
+ConstraintMatrix::ConstraintLine::operator < (const ConstraintLine &a) const
+{
+  return line < a.line;
+}
+
+
+
+bool
+ConstraintMatrix::ConstraintLine::operator == (const ConstraintLine &a) const
+{
+  return line == a.line;
+}
+
+
+
+std::size_t
+ConstraintMatrix::ConstraintLine::memory_consumption () const
+{
+  return (MemoryConsumption::memory_consumption (line) +
+          MemoryConsumption::memory_consumption (entries) +
+          MemoryConsumption::memory_consumption (inhomogeneity));
+}
+
+
+
+void
+ConstraintMatrix::add_lines (const std::set<unsigned int> &lines)
+{
+  for (std::set<unsigned int>::const_iterator
+         i = lines.begin(); i != lines.end(); ++i)
+    add_line (*i);
+}
+
+
+
+void
+ConstraintMatrix::add_lines (const std::vector<bool> &lines)
+{
+  for (unsigned int i=0; i<lines.size(); ++i)
+    if (lines[i] == true)
+      add_line (i);
+}
+
+
+
+void
+ConstraintMatrix::add_lines (const IndexSet &lines)
+{
+  for (unsigned int i=0; i<lines.n_elements(); ++i)
+    add_line (lines.nth_index_in_set(i));
+}
+
+
+
+void
+ConstraintMatrix::add_entries
+  (const unsigned int                                  line,
+   const std::vector<std::pair<unsigned int,double> > &col_val_pairs)
+{
+  Assert (sorted==false, ExcMatrixIsClosed());
+  Assert (is_constrained(line), ExcLineInexistant(line));
+
+  ConstraintLine * line_ptr = &lines[lines_cache[calculate_line_index(line)]];
+  Assert (line_ptr->line == line, ExcInternalError());
+
+                                   // if in debug mode, check whether an
+                                   // entry for this column already
+                                   // exists and if its the same as
+                                   // the one entered at present
+                                   //
+                                   // in any case: skip this entry if
+                                   // an entry for this column already
+                                   // exists, since we don't want to
+                                   // enter it twice
+  for (std::vector<std::pair<unsigned int,double> >::const_iterator
+         col_val_pair = col_val_pairs.begin();
+       col_val_pair!=col_val_pairs.end(); ++col_val_pair)
+    {
+      Assert (line != col_val_pair->first,
+              ExcMessage ("Can't constrain a degree of freedom to itself"));
+
+      for (ConstraintLine::Entries::const_iterator
+             p=line_ptr->entries.begin();
+           p != line_ptr->entries.end(); ++p)
+        if (p->first == col_val_pair->first)
+          {
+                                             // entry exists, break
+                                             // innermost loop
+            Assert (p->second == col_val_pair->second,
+                    ExcEntryAlreadyExists(line, col_val_pair->first,
+                                          p->second, col_val_pair->second));
+            break;
+          }
+
+      line_ptr->entries.push_back (*col_val_pair);
+    }
+}
+
+
+
+void ConstraintMatrix::add_selected_constraints
+  (const ConstraintMatrix &constraints,
+   const IndexSet         &filter)
+{
+  if (constraints.n_constraints() == 0)
+    return;
+
+  Assert (filter.size() > constraints.lines.back().line,
+          ExcMessage ("Filter needs to be larger than constraint matrix size."));
+  for (std::vector<ConstraintLine>::const_iterator line=constraints.lines.begin();
+       line!=constraints.lines.end(); ++line)
+    if (filter.is_element(line->line))
+      {
+        const unsigned int row = filter.index_within_set (line->line);
+        add_line (row);
+        set_inhomogeneity (row, line->inhomogeneity);
+        for (unsigned int i=0; i<line->entries.size(); ++i)
+          if (filter.is_element(line->entries[i].first))
+            add_entry (row, filter.index_within_set (line->entries[i].first),
+                       line->entries[i].second);
+      }
+}
+
+
+
+void ConstraintMatrix::close ()
+{
+  if (sorted == true)
+    return;
+
+                                   // sort the lines
+  std::sort (lines.begin(), lines.end());
+
+                                   // update list of pointers and give the
+                                   // vector a sharp size since we won't
+                                   // modify the size any more after this
+                                   // point.
+  {
+    std::vector<unsigned int> new_lines (lines_cache.size(),
+                                         numbers::invalid_unsigned_int);
+    unsigned int counter = 0;
+    for (std::vector<ConstraintLine>::const_iterator line=lines.begin();
+         line!=lines.end(); ++line, ++counter)
+      new_lines[calculate_line_index(line->line)] = counter;
+    std::swap (lines_cache, new_lines);
+  }
+
+                                   // in debug mode: check whether we really
+                                   // set the pointers correctly.
+  for (unsigned int i=0; i<lines_cache.size(); ++i)
+    if (lines_cache[i] != numbers::invalid_unsigned_int)
+      Assert (i == calculate_line_index(lines[lines_cache[i]].line),
+              ExcInternalError());
+
+                                   // first, strip zero entries, as we
+                                   // have to do that only once
+  for (std::vector<ConstraintLine>::iterator line = lines.begin();
+       line!=lines.end(); ++line)
+                                     // first remove zero
+                                     // entries. that would mean that
+                                     // in the linear constraint for a
+                                     // node, x_i = ax_1 + bx_2 + ...,
+                                     // another node times 0
+                                     // appears. obviously,
+                                     // 0*something can be omitted
+    line->entries.erase (std::remove_if (line->entries.begin(),
+                                         line->entries.end(),
+                                         &check_zero_weight),
+                         line->entries.end());
+
+
+
+#ifdef DEBUG
+  // In debug mode we are computing an estimate for the maximum number
+  // of constraints so that we can bail out if there is a cycle in the
+  // constraints (which is easier than searching for cycles in the graph).
+  //
+  // Let us figure out the largest dof index. This is an upper bound for the
+  // number of constraints because it is an approximation for the number of dofs
+  // in our system.
+  unsigned int largest_idx = 0;
+  for (std::vector<ConstraintLine>::iterator line = lines.begin();
+             line!=lines.end(); ++line)
+    {
+      for (ConstraintLine::Entries::iterator it = line->entries.begin();it!=line->entries.end();++it)
+        {
+          largest_idx=std::max(largest_idx, it->first);
+        }
+    }
+#endif
+
+                                   // replace references to dofs that
+                                   // are themselves constrained. note
+                                   // that because we may replace
+                                   // references to other dofs that
+                                   // may themselves be constrained to
+                                   // third ones, we have to iterate
+                                   // over all this until we replace
+                                   // no chains of constraints any
+                                   // more
+                                   //
+                                   // the iteration replaces
+                                   // references to constrained
+                                   // degrees of freedom by
+                                   // second-order references. for
+                                   // example if x3=x0/2+x2/2 and
+                                   // x2=x0/2+x1/2, then the new list
+                                   // will be x3=x0/2+x0/4+x1/4. note
+                                   // that x0 appear twice. we will
+                                   // throw this duplicate out in the
+                                   // following step, where we sort
+                                   // the list so that throwing out
+                                   // duplicates becomes much more
+                                   // efficient. also, we have to do
+                                   // it only once, rather than in
+                                   // each iteration
+  unsigned int iteration = 0;
+  while (true)
+    {
+      bool chained_constraint_replaced = false;
+
+      for (std::vector<ConstraintLine>::iterator line = lines.begin();
+           line!=lines.end(); ++line)
+        {
+#ifdef DEBUG
+          // we need to keep track of how many replacements we do in this line, because we can
+          // end up in a cycle A->B->C->A without the number of entries growing.
+          unsigned int n_replacements = 0;
+#endif
+
+
+
+                                           // loop over all entries of
+                                           // this line (including
+                                           // ones that we have
+                                           // appended in this go
+                                           // around) and see whether
+                                           // they are further
+                                           // constrained. ignore
+                                           // elements that we don't
+                                           // store on the current
+                                           // processor
+          unsigned int entry = 0;
+          while (entry < line->entries.size())
+            if (((local_lines.size() == 0)
+                 ||
+                 (local_lines.is_element(line->entries[entry].first)))
+                &&
+                is_constrained (line->entries[entry].first))
+              {
+                                                 // ok, this entry is
+                                                 // further
+                                                 // constrained:
+                chained_constraint_replaced = true;
+
+                                                 // look up the chain
+                                                 // of constraints for
+                                                 // this entry
+                const unsigned int dof_index = line->entries[entry].first;
+                const double       weight = line->entries[entry].second;
+
+                Assert (dof_index != line->line,
+                        ExcMessage ("Cycle in constraints detected!"));
+
+                const ConstraintLine * constrained_line =
+                  &lines[lines_cache[calculate_line_index(dof_index)]];
+                Assert (constrained_line->line == dof_index,
+                        ExcInternalError());
+
+                                                 // now we have to
+                                                 // replace an entry
+                                                 // by its
+                                                 // expansion. we do
+                                                 // that by
+                                                 // overwriting the
+                                                 // entry by the first
+                                                 // entry of the
+                                                 // expansion and
+                                                 // adding the
+                                                 // remaining ones to
+                                                 // the end, where we
+                                                 // will later process
+                                                 // them once more
+                                                 //
+                                                 // we can of course
+                                                 // only do that if
+                                                 // the DoF that we
+                                                 // are currently
+                                                 // handle is
+                                                 // constrained by a
+                                                 // linear combination
+                                                 // of other dofs:
+                if (constrained_line->entries.size() > 0)
+                  {
+                    for (unsigned int i=0; i<constrained_line->entries.size(); ++i)
+                      Assert (dof_index != constrained_line->entries[i].first,
+                              ExcMessage ("Cycle in constraints detected!"));
+
+                                                     // replace first
+                                                     // entry, then tack
+                                                     // the rest to the
+                                                     // end of the list
+                    line->entries[entry] =
+                      std::make_pair (constrained_line->entries[0].first,
+                                      constrained_line->entries[0].second *
+                                      weight);
+
+                    for (unsigned int i=1; i<constrained_line->entries.size(); ++i)
+                      line->entries
+                        .push_back (std::make_pair (constrained_line->entries[i].first,
+                                                    constrained_line->entries[i].second *
+                                                    weight));
+
+#ifdef DEBUG
+                    // keep track of how many entries we replace in this line. If we do more than
+                    // there are constraints or dofs in our system, we must have a cycle.
+                    ++n_replacements;
+                    Assert(n_replacements/2<largest_idx, ExcMessage("Cycle in constraints detected!"));
+                    if (n_replacements/2>=largest_idx)
+                      return; // this enables us to test for this Exception.
+#endif
+                  }
+                else
+                                                   // the DoF that we
+                                                   // encountered is not
+                                                   // constrained by a linear
+                                                   // combination of other
+                                                   // dofs but is equal to
+                                                   // just the inhomogeneity
+                                                   // (i.e. its chain of
+                                                   // entries is empty). in
+                                                   // that case, we can't just
+                                                   // overwrite the current
+                                                   // entry, but we have to
+                                                   // actually eliminate it
+                  {
+                    line->entries.erase (line->entries.begin()+entry);
+                  }
+
+                line->inhomogeneity += constrained_line->inhomogeneity *
+                                       weight;
+
+                                                 // now that we're here, do
+                                                 // not increase index by
+                                                 // one but rather make
+                                                 // another pass for the
+                                                 // present entry because we
+                                                 // have replaced the
+                                                 // present entry by another
+                                                 // one, or because we have
+                                                 // deleted it and shifted
+                                                 // all following ones one
+                                                 // forward
+              }
+            else
+                                               // entry not further
+                                               // constrained. just move
+                                               // ahead by one
+              ++entry;
+        }
+
+                                       // if we didn't do anything in
+                                       // this round, then quit the
+                                       // loop
+      if (chained_constraint_replaced == false)
+        break;
+
+                                       // increase iteration count. note
+                                       // that we should not iterate more
+                                       // times than there are constraints,
+                                       // since this puts a natural upper
+                                       // bound on the length of constraint
+                                       // chains
+      ++iteration;
+      Assert (iteration <= lines.size(), ExcInternalError());
+    }
+
+                                   // finally sort the entries and re-scale
+                                   // them if necessary. in this step, we also
+                                   // throw out duplicates as mentioned
+                                   // above. moreover, as some entries might
+                                   // have had zero weights, we replace them
+                                   // by a vector with sharp sizes.
+  for (std::vector<ConstraintLine>::iterator line = lines.begin();
+       line!=lines.end(); ++line)
+    {
+      std::sort (line->entries.begin(), line->entries.end());
+
+                                       // loop over the now sorted list and
+                                       // see whether any of the entries
+                                       // references the same dofs more than
+                                       // once in order to find how many
+                                       // non-duplicate entries we have. This
+                                       // lets us allocate the correct amount
+                                       // of memory for the constraint
+                                       // entries.
+      unsigned int duplicates = 0;
+      for (unsigned int i=1; i<line->entries.size(); ++i)
+        if (line->entries[i].first == line->entries[i-1].first)
+          duplicates++;
+
+      if (duplicates > 0 || line->entries.size() < line->entries.capacity())
+        {
+          ConstraintLine::Entries new_entries;
+
+                                             // if we have no duplicates, copy
+                                             // verbatim the entries. this
+                                             // way, the final size is of the
+                                             // vector is correct.
+          if (duplicates == 0)
+            new_entries = line->entries;
+          else
+            {
+                                             // otherwise, we need to go
+                                             // through the list by and and
+                                             // resolve the duplicates
+              new_entries.reserve (line->entries.size() - duplicates);
+              new_entries.push_back(line->entries[0]);
+              for (unsigned int j=1; j<line->entries.size(); ++j)
+                if (line->entries[j].first == line->entries[j-1].first)
+                  {
+                    Assert (new_entries.back().first == line->entries[j].first,
+                            ExcInternalError());
+                    new_entries.back().second += line->entries[j].second;
+                  }
+                else
+                  new_entries.push_back (line->entries[j]);
+
+              Assert (new_entries.size() == line->entries.size() - duplicates,
+                      ExcInternalError());
+
+                                             // make sure there are
+                                             // really no duplicates
+                                             // left and that the list
+                                             // is still sorted
+              for (unsigned int j=1; j<new_entries.size(); ++j)
+                {
+                  Assert (new_entries[j].first != new_entries[j-1].first,
+                          ExcInternalError());
+                  Assert (new_entries[j].first > new_entries[j-1].first,
+                          ExcInternalError());
+                }
+            }
+
+                                             // replace old list of
+                                             // constraints for this dof by
+                                             // the new one
+          line->entries.swap (new_entries);
+        }
+
+                                       // finally do the following
+                                       // check: if the sum of
+                                       // weights for the
+                                       // constraints is close to
+                                       // one, but not exactly
+                                       // one, then rescale all
+                                       // the weights so that they
+                                       // sum up to 1. this adds a
+                                       // little numerical
+                                       // stability and avoids all
+                                       // sorts of problems where
+                                       // the actual value is
+                                       // close to, but not quite
+                                       // what we expected
+                                       //
+                                       // the case where the
+                                       // weights don't quite sum
+                                       // up happens when we
+                                       // compute the
+                                       // interpolation weights
+                                       // "on the fly", i.e. not
+                                       // from precomputed
+                                       // tables. in this case,
+                                       // the interpolation
+                                       // weights are also subject
+                                       // to round-off
+      double sum = 0;
+      for (unsigned int i=0; i<line->entries.size(); ++i)
+        sum += line->entries[i].second;
+      if ((sum != 1.0) && (std::fabs (sum-1.) < 1.e-13))
+        {
+          for (unsigned int i=0; i<line->entries.size(); ++i)
+            line->entries[i].second /= sum;
+          line->inhomogeneity /= sum;
+        }
+    } // end of loop over all constraint lines
+
+#ifdef DEBUG
+                                   // if in debug mode: check that no dof is
+                                   // constrained to another dof that is also
+                                   // constrained. exclude dofs from this
+                                   // check whose constraint lines are not
+                                   // stored on the local processor
+  for (std::vector<ConstraintLine>::const_iterator line=lines.begin();
+       line!=lines.end(); ++line)
+    for (ConstraintLine::Entries::const_iterator
+           entry=line->entries.begin();
+         entry!=line->entries.end(); ++entry)
+      if ((local_lines.size() == 0)
+          ||
+          (local_lines.is_element(entry->first)))
+        {
+                                           // make sure that entry->first is
+                                           // not the index of a line itself
+          const bool is_circle = is_constrained(entry->first);
+          Assert (is_circle == false,
+                  ExcDoFConstrainedToConstrainedDoF(line->line, entry->first));
+        }
+#endif
+
+  sorted = true;
+}
+
+
+
+void
+ConstraintMatrix::merge (const ConstraintMatrix &other_constraints,
+                         const MergeConflictBehavior merge_conflict_behavior)
+{
+  AssertThrow(local_lines == other_constraints.local_lines,
+              ExcNotImplemented());
+
+                                   // store the previous state with
+                                   // respect to sorting
+  const bool object_was_sorted = sorted;
+  sorted = false;
+
+  if (other_constraints.lines_cache.size() > lines_cache.size())
+    lines_cache.resize(other_constraints.lines_cache.size(),
+                       numbers::invalid_unsigned_int);
+
+                                   // first action is to fold into the present
+                                   // object possible constraints in the
+                                   // second object. we don't strictly need to
+                                   // do this any more since the
+                                   // ConstraintMatrix has learned to deal
+                                   // with chains of constraints in the
+                                   // close() function, but we have
+                                   // traditionally done this and it's not
+                                   // overly hard to do.
+                                   //
+                                   // for this, loop over all
+                                   // constraints and replace the
+                                   // constraint lines with a new one
+                                   // where constraints are replaced
+                                   // if necessary.
+  ConstraintLine::Entries tmp;
+  for (std::vector<ConstraintLine>::iterator line=lines.begin();
+       line!=lines.end(); ++line)
+    {
+      tmp.clear ();
+      for (unsigned int i=0; i<line->entries.size(); ++i)
+        {
+                                           // if the present dof is not
+                                           // constrained, or if we won't take
+                                           // the constraint from the other
+                                           // object, then simply copy it over
+          if (other_constraints.is_constrained(line->entries[i].first) == false
+              ||
+              ((merge_conflict_behavior != right_object_wins)
+               &&
+               other_constraints.is_constrained(line->entries[i].first)
+               &&
+               this->is_constrained(line->entries[i].first)))
+            tmp.push_back(line->entries[i]);
+          else
+                                             // otherwise resolve
+                                             // further constraints by
+                                             // replacing the old
+                                             // entry by a sequence of
+                                             // new entries taken from
+                                             // the other object, but
+                                             // with multiplied
+                                             // weights
+            {
+              const ConstraintLine::Entries* other_line
+                = other_constraints.get_constraint_entries (line->entries[i].first);
+              Assert (other_line != 0,
+                      ExcInternalError());
+
+              const double weight = line->entries[i].second;
+
+              for (ConstraintLine::Entries::const_iterator j=other_line->begin();
+                   j!=other_line->end(); ++j)
+                tmp.push_back (std::pair<unsigned int,double>(j->first,
+                                                              j->second*weight));
+
+              line->inhomogeneity += other_constraints.get_inhomogeneity(line->entries[i].first) *
+                                     weight;
+            }
+        }
+                                       // finally exchange old and
+                                       // newly resolved line
+      line->entries.swap (tmp);
+    }
+
+
+
+                                   // next action: append those lines at the
+                                   // end that we want to add
+  for (std::vector<ConstraintLine>::const_iterator
+         line=other_constraints.lines.begin();
+       line!=other_constraints.lines.end(); ++line)
+    if (is_constrained(line->line) == false)
+      lines.push_back (*line);
+    else
+      {
+                                         // the constrained dof we want to
+                                         // copy from the other object is also
+                                         // constrained here. let's see what
+                                         // we should do with that
+        switch (merge_conflict_behavior)
+          {
+            case no_conflicts_allowed:
+                  AssertThrow (false,
+                               ExcDoFIsConstrainedFromBothObjects (line->line));
+                  break;
+
+            case left_object_wins:
+                                                   // ignore this constraint
+                  break;
+
+            case right_object_wins:
+                                                   // we need to replace the
+                                                   // existing constraint by
+                                                   // the one from the other
+                                                   // object
+                  lines[lines_cache[calculate_line_index(line->line)]].entries
+                    = line->entries;
+                  lines[lines_cache[calculate_line_index(line->line)]].inhomogeneity
+                    = line->inhomogeneity;
+                  break;
+
+            default:
+                  Assert (false, ExcNotImplemented());
+          }
+      }
+
+                                // update the lines cache
+  unsigned int counter = 0;
+  for (std::vector<ConstraintLine>::const_iterator line=lines.begin();
+       line!=lines.end(); ++line, ++counter)
+    lines_cache[calculate_line_index(line->line)] = counter;
+
+                                   // if the object was sorted before,
+                                   // then make sure it is so
+                                   // afterward as well. otherwise
+                                   // leave everything in the unsorted
+                                   // state
+  if (object_was_sorted == true)
+    close ();
+}
+
+
+
+void ConstraintMatrix::shift (const unsigned int offset)
+{
+                                   //TODO: this doesn't work with IndexSets yet. [TH]
+  AssertThrow(local_lines.size()==0, ExcNotImplemented());
+
+  lines_cache.insert (lines_cache.begin(), offset,
+                      numbers::invalid_unsigned_int);
+
+  for (std::vector<ConstraintLine>::iterator i = lines.begin();
+       i != lines.end(); ++i)
+    {
+      i->line += offset;
+      for (ConstraintLine::Entries::iterator
+             j = i->entries.begin();
+           j != i->entries.end(); ++j)
+        j->first += offset;
+    }
+}
+
+
+
+void ConstraintMatrix::clear ()
+{
+  {
+    std::vector<ConstraintLine> tmp;
+    lines.swap (tmp);
+  }
+
+  {
+    std::vector<unsigned int> tmp;
+    lines_cache.swap (tmp);
+  }
+
+  sorted = false;
+}
+
+
+
+void ConstraintMatrix::reinit (const IndexSet & local_constraints)
+{
+  local_lines = local_constraints;
+  clear();
+}
+
+
+
+void ConstraintMatrix::condense (const SparsityPattern &uncondensed,
+                                 SparsityPattern       &condensed) const
+{
+  Assert (sorted == true, ExcMatrixNotClosed());
+  Assert (uncondensed.is_compressed() == true, ExcMatrixNotClosed());
+  Assert (uncondensed.n_rows() == uncondensed.n_cols(),
+          ExcNotQuadratic());
+
+
+                                   // store for each line of the matrix
+                                   // its new line number
+                                   // after compression. If the shift is
+                                   // -1, this line will be condensed away
+  std::vector<int> new_line;
+
+  new_line.reserve (uncondensed.n_rows());
+
+  std::vector<ConstraintLine>::const_iterator next_constraint = lines.begin();
+  unsigned int                                shift           = 0;
+  unsigned int n_rows = uncondensed.n_rows();
+
+  if (next_constraint == lines.end())
+                                     // if no constraint is to be handled
+    for (unsigned int row=0; row!=n_rows; ++row)
+      new_line.push_back (row);
+  else
+    for (unsigned int row=0; row!=n_rows; ++row)
+      if (row == next_constraint->line)
+        {
+                                           // this line is constrained
+          new_line.push_back (-1);
+                                           // note that @p{lines} is ordered
+          ++shift;
+          ++next_constraint;
+          if (next_constraint == lines.end())
+                                             // nothing more to do; finish rest
+                                             // of loop
+            {
+              for (unsigned int i=row+1; i<n_rows; ++i)
+                new_line.push_back (i-shift);
+              break;
+            };
+        }
+      else
+        new_line.push_back (row-shift);
+
+
+  next_constraint = lines.begin();
+                                   // note: in this loop we need not check
+                                   // whether @p{next_constraint} is a valid
+                                   // iterator, since @p{next_constraint} is
+                                   // only evaluated so often as there are
+                                   // entries in new_line[*] which tells us
+                                   // which constraints exist
+  for (unsigned int row=0; row<uncondensed.n_rows(); ++row)
+    if (new_line[row] != -1)
+                                       // line not constrained
+                                       // copy entries if column will not
+                                       // be condensed away, distribute
+                                       // otherwise
+      for (unsigned int j=uncondensed.get_rowstart_indices()[row];
+           j<uncondensed.get_rowstart_indices()[row+1]; ++j)
+        if (new_line[uncondensed.get_column_numbers()[j]] != -1)
+          condensed.add (new_line[row], new_line[uncondensed.get_column_numbers()[j]]);
+        else
+          {
+                                             // let c point to the constraint
+                                             // of this column
+            std::vector<ConstraintLine>::const_iterator c = lines.begin();
+            while (c->line != uncondensed.get_column_numbers()[j])
+              ++c;
+
+            for (unsigned int q=0; q!=c->entries.size(); ++q)
+              condensed.add (new_line[row], new_line[c->entries[q].first]);
+          }
+    else
+                                       // line must be distributed
+      {
+        for (unsigned int j=uncondensed.get_rowstart_indices()[row];
+             j<uncondensed.get_rowstart_indices()[row+1]; ++j)
+                                           // for each entry: distribute
+          if (new_line[uncondensed.get_column_numbers()[j]] != -1)
+                                             // column is not constrained
+            for (unsigned int q=0; q!=next_constraint->entries.size(); ++q)
+              condensed.add (new_line[next_constraint->entries[q].first],
+                             new_line[uncondensed.get_column_numbers()[j]]);
+
+          else
+                                             // not only this line but
+                                             // also this col is constrained
+            {
+                                               // let c point to the constraint
+                                               // of this column
+              std::vector<ConstraintLine>::const_iterator c = lines.begin();
+              while (c->line != uncondensed.get_column_numbers()[j]) ++c;
+
+              for (unsigned int p=0; p!=c->entries.size(); ++p)
+                for (unsigned int q=0; q!=next_constraint->entries.size(); ++q)
+                  condensed.add (new_line[next_constraint->entries[q].first],
+                                 new_line[c->entries[p].first]);
+            };
+
+        ++next_constraint;
+      };
+
+  condensed.compress();
+}
+
+
+
+void ConstraintMatrix::condense (SparsityPattern &sparsity) const
+{
+  Assert (sorted == true, ExcMatrixNotClosed());
+  Assert (sparsity.is_compressed() == false, ExcMatrixIsClosed());
+  Assert (sparsity.n_rows() == sparsity.n_cols(),
+          ExcNotQuadratic());
+
+                                   // store for each index whether it must be
+                                   // distributed or not. If entry is
+                                   // numbers::invalid_unsigned_int,
+                                   // no distribution is necessary.
+                                   // otherwise, the number states which line
+                                   // in the constraint matrix handles this
+                                   // index
+  std::vector<unsigned int> distribute(sparsity.n_rows(),
+                                       numbers::invalid_unsigned_int);
+
+  for (unsigned int c=0; c<lines.size(); ++c)
+    distribute[lines[c].line] = c;
+
+  const unsigned int n_rows = sparsity.n_rows();
+  for (unsigned int row=0; row<n_rows; ++row)
+    {
+      if (distribute[row] == numbers::invalid_unsigned_int)
+        {
+                                           // regular line. loop over cols all
+                                           // valid cols. note that this
+                                           // changes the line we are
+                                           // presently working on: we add
+                                           // additional entries. these are
+                                           // put to the end of the
+                                           // row. however, as constrained
+                                           // nodes cannot be constrained to
+                                           // other constrained nodes, nothing
+                                           // will happen if we run into these
+                                           // added nodes, as they can't be
+                                           // distributed further. we might
+                                           // store the position of the last
+                                           // old entry and stop work there,
+                                           // but since operating on the newly
+                                           // added ones only takes two
+                                           // comparisons (column index valid,
+                                           // distribute[column] necessarily
+                                           // ==numbers::invalid_unsigned_int),
+                                           // it is cheaper to not do so and
+                                           // run right until the end of the
+                                           // line
+          for (SparsityPattern::iterator entry = sparsity.begin(row);
+               ((entry != sparsity.end(row)) &&
+                entry->is_valid_entry());
+               ++entry)
+            {
+              const unsigned int column = entry->column();
+
+              if (distribute[column] != numbers::invalid_unsigned_int)
+                {
+                                                   // distribute entry
+                                                   // at regular row
+                                                   // @p{row} and
+                                                   // irregular column
+                                                   // sparsity.colnums[j]
+                  for (unsigned int q=0;
+                       q!=lines[distribute[column]].entries.size();
+                       ++q)
+                    sparsity.add (row,
+                                  lines[distribute[column]].entries[q].first);
+                }
+            }
+        }
+      else
+                                         // row must be
+                                         // distributed. note that
+                                         // here the present row is
+                                         // not touched (unlike above)
+        {
+          for (SparsityPattern::iterator entry = sparsity.begin(row);
+               (entry != sparsity.end(row)) && entry->is_valid_entry(); ++entry)
+            {
+              const unsigned int column = entry->column();
+              if (distribute[column] == numbers::invalid_unsigned_int)
+                                                 // distribute entry at irregular
+                                                 // row @p{row} and regular column
+                                                 // sparsity.colnums[j]
+                for (unsigned int q=0;
+                     q!=lines[distribute[row]].entries.size(); ++q)
+                  sparsity.add (lines[distribute[row]].entries[q].first,
+                                column);
+              else
+                                                 // distribute entry at irregular
+                                                 // row @p{row} and irregular column
+                                                 // sparsity.get_column_numbers()[j]
+                for (unsigned int p=0; p!=lines[distribute[row]].entries.size(); ++p)
+                  for (unsigned int q=0;
+                       q!=lines[distribute[column]].entries.size(); ++q)
+                    sparsity.add (lines[distribute[row]].entries[p].first,
+                                  lines[distribute[column]].entries[q].first);
+            }
+        }
+    }
+
+  sparsity.compress();
+}
+
+
+
+void ConstraintMatrix::condense (CompressedSparsityPattern &sparsity) const
+{
+  Assert (sorted == true, ExcMatrixNotClosed());
+  Assert (sparsity.n_rows() == sparsity.n_cols(),
+          ExcNotQuadratic());
+
+                                   // store for each index whether it must be
+                                   // distributed or not. If entry is
+                                   // numbers::invalid_unsigned_int,
+                                   // no distribution is necessary.
+                                   // otherwise, the number states which line
+                                   // in the constraint matrix handles this
+                                   // index
+  std::vector<unsigned int> distribute(sparsity.n_rows(),
+                                       numbers::invalid_unsigned_int);
+
+  for (unsigned int c=0; c<lines.size(); ++c)
+    distribute[lines[c].line] = c;
+
+  const unsigned int n_rows = sparsity.n_rows();
+  for (unsigned int row=0; row<n_rows; ++row)
+    {
+      if (distribute[row] == numbers::invalid_unsigned_int)
+                                         // regular line. loop over
+                                         // cols. note that as we
+                                         // proceed to distribute
+                                         // cols, the loop may get
+                                         // longer
+        for (unsigned int j=0; j<sparsity.row_length(row); ++j)
+          {
+            const unsigned int column = sparsity.column_number(row,j);
+
+            if (distribute[column] != numbers::invalid_unsigned_int)
+              {
+                                                 // distribute entry
+                                                 // at regular row
+                                                 // @p{row} and
+                                                 // irregular column
+                                                 // column. note that
+                                                 // this changes the
+                                                 // line we are
+                                                 // presently working
+                                                 // on: we add
+                                                 // additional
+                                                 // entries. if we add
+                                                 // another entry at a
+                                                 // column behind the
+                                                 // present one, we
+                                                 // will encounter it
+                                                 // later on (but
+                                                 // since it can't be
+                                                 // further
+                                                 // constrained, won't
+                                                 // have to do
+                                                 // anything about
+                                                 // it). if we add it
+                                                 // up front of the
+                                                 // present column, we
+                                                 // will find the
+                                                 // present column
+                                                 // later on again as
+                                                 // it was shifted
+                                                 // back (again
+                                                 // nothing happens,
+                                                 // in particular no
+                                                 // endless loop, as
+                                                 // when we encounter
+                                                 // it the second time
+                                                 // we won't be able
+                                                 // to add more
+                                                 // entries as they
+                                                 // all already exist,
+                                                 // but we do the same
+                                                 // work more often
+                                                 // than necessary,
+                                                 // and the loop gets
+                                                 // longer), so move
+                                                 // the cursor one to
+                                                 // the right in the
+                                                 // case that we add
+                                                 // an entry up front
+                                                 // that did not exist
+                                                 // before. check
+                                                 // whether it existed
+                                                 // before by tracking
+                                                 // the length of this
+                                                 // row
+                unsigned int old_rowlength = sparsity.row_length(row);
+                for (unsigned int q=0;
+                     q!=lines[distribute[column]].entries.size();
+                     ++q)
+                  {
+                    const unsigned int
+                      new_col = lines[distribute[column]].entries[q].first;
+
+                    sparsity.add (row, new_col);
+
+                    const unsigned int new_rowlength = sparsity.row_length(row);
+                    if ((new_col < column) && (old_rowlength != new_rowlength))
+                      ++j;
+                    old_rowlength = new_rowlength;
+                  };
+              };
+          }
+      else
+                                         // row must be distributed
+        for (unsigned int j=0; j<sparsity.row_length(row); ++j)
+          {
+            const unsigned int column = sparsity.column_number(row,j);
+
+            if (distribute[column] == numbers::invalid_unsigned_int)
+                                               // distribute entry at irregular
+                                               // row @p{row} and regular column
+                                               // sparsity.colnums[j]
+              for (unsigned int q=0;
+                   q!=lines[distribute[row]].entries.size(); ++q)
+                sparsity.add (lines[distribute[row]].entries[q].first,
+                              column);
+            else
+                                               // distribute entry at irregular
+                                               // row @p{row} and irregular column
+                                               // sparsity.get_column_numbers()[j]
+              for (unsigned int p=0; p!=lines[distribute[row]].entries.size(); ++p)
+                for (unsigned int q=0;
+                     q!=lines[distribute[sparsity.column_number(row,j)]]
+                                    .entries.size(); ++q)
+                  sparsity.add (lines[distribute[row]].entries[p].first,
+                                lines[distribute[sparsity.column_number(row,j)]]
+                                .entries[q].first);
+          };
+    };
+}
+
+
+
+void ConstraintMatrix::condense (CompressedSetSparsityPattern &sparsity) const
+{
+  Assert (sorted == true, ExcMatrixNotClosed());
+  Assert (sparsity.n_rows() == sparsity.n_cols(),
+          ExcNotQuadratic());
+
+                                   // store for each index whether it must be
+                                   // distributed or not. If entry is
+                                   // numbers::invalid_unsigned_int,
+                                   // no distribution is necessary.
+                                   // otherwise, the number states which line
+                                   // in the constraint matrix handles this
+                                   // index
+  std::vector<unsigned int> distribute(sparsity.n_rows(),
+                                       numbers::invalid_unsigned_int);
+
+  for (unsigned int c=0; c<lines.size(); ++c)
+    distribute[lines[c].line] = c;
+
+  const unsigned int n_rows = sparsity.n_rows();
+  for (unsigned int row=0; row<n_rows; ++row)
+    {
+      if (distribute[row] == numbers::invalid_unsigned_int)
+        {
+                                           // regular line. loop over
+                                           // cols. note that as we proceed to
+                                           // distribute cols, the loop may
+                                           // get longer
+          CompressedSetSparsityPattern::row_iterator col_num = sparsity.row_begin (row);
+
+          for (; col_num != sparsity.row_end (row); ++col_num)
+            {
+              const unsigned int column = *col_num;
+
+              if (distribute[column] != numbers::invalid_unsigned_int)
+                {
+                  // row
+                  for (unsigned int q=0;
+                       q!=lines[distribute[column]].entries.size();
+                       ++q)
+                    {
+                      const unsigned int
+                        new_col = lines[distribute[column]].entries[q].first;
+
+                      sparsity.add (row, new_col);
+                    }
+                }
+            }
+        }
+      else
+        // row must be distributed
+        {
+          CompressedSetSparsityPattern::row_iterator col_num = sparsity.row_begin (row);
+
+          for (; col_num != sparsity.row_end (row); ++col_num)
+            {
+              const unsigned int column = *col_num;
+
+              if (distribute[column] == numbers::invalid_unsigned_int)
+                // distribute entry at irregular
+                // row @p{row} and regular column
+                // sparsity.colnums[j]
+                for (unsigned int q=0;
+                     q!=lines[distribute[row]].entries.size(); ++q)
+                  sparsity.add (lines[distribute[row]].entries[q].first,
+                                column);
+              else
+                // distribute entry at irregular
+                // row @p{row} and irregular column
+                // sparsity.get_column_numbers()[j]
+                for (unsigned int p=0; p!=lines[distribute[row]].entries.size(); ++p)
+                  for (unsigned int q=0;
+                       q!=lines[distribute[column]]
+                         .entries.size(); ++q)
+                    sparsity.add (lines[distribute[row]].entries[p].first,
+                                  lines[distribute[column]]
+                                  .entries[q].first);
+            };
+        }
+    };
+}
+
+
+
+void ConstraintMatrix::condense (CompressedSimpleSparsityPattern &sparsity) const
+{
+  Assert (sorted == true, ExcMatrixNotClosed());
+  Assert (sparsity.n_rows() == sparsity.n_cols(),
+          ExcNotQuadratic());
+
+                                   // store for each index whether it must be
+                                   // distributed or not. If entry is
+                                   // numbers::invalid_unsigned_int,
+                                   // no distribution is necessary.
+                                   // otherwise, the number states which line
+                                   // in the constraint matrix handles this
+                                   // index
+  std::vector<unsigned int> distribute(sparsity.n_rows(),
+                                       numbers::invalid_unsigned_int);
+
+  for (unsigned int c=0; c<lines.size(); ++c)
+    distribute[lines[c].line] = c;
+
+  const unsigned int n_rows = sparsity.n_rows();
+  for (unsigned int row=0; row<n_rows; ++row)
+    {
+      if (distribute[row] == numbers::invalid_unsigned_int)
+                                         // regular line. loop over
+                                         // cols. note that as we
+                                         // proceed to distribute
+                                         // cols, the loop may get
+                                         // longer
+        for (unsigned int j=0; j<sparsity.row_length(row); ++j)
+          {
+            const unsigned int column = sparsity.column_number(row,j);
+
+            if (distribute[column] != numbers::invalid_unsigned_int)
+              {
+                                                 // distribute entry
+                                                 // at regular row
+                                                 // @p{row} and
+                                                 // irregular column
+                                                 // column. note that
+                                                 // this changes the
+                                                 // line we are
+                                                 // presently working
+                                                 // on: we add
+                                                 // additional
+                                                 // entries. if we add
+                                                 // another entry at a
+                                                 // column behind the
+                                                 // present one, we
+                                                 // will encounter it
+                                                 // later on (but
+                                                 // since it can't be
+                                                 // further
+                                                 // constrained, won't
+                                                 // have to do
+                                                 // anything about
+                                                 // it). if we add it
+                                                 // up front of the
+                                                 // present column, we
+                                                 // will find the
+                                                 // present column
+                                                 // later on again as
+                                                 // it was shifted
+                                                 // back (again
+                                                 // nothing happens,
+                                                 // in particular no
+                                                 // endless loop, as
+                                                 // when we encounter
+                                                 // it the second time
+                                                 // we won't be able
+                                                 // to add more
+                                                 // entries as they
+                                                 // all already exist,
+                                                 // but we do the same
+                                                 // work more often
+                                                 // than necessary,
+                                                 // and the loop gets
+                                                 // longer), so move
+                                                 // the cursor one to
+                                                 // the right in the
+                                                 // case that we add
+                                                 // an entry up front
+                                                 // that did not exist
+                                                 // before. check
+                                                 // whether it existed
+                                                 // before by tracking
+                                                 // the length of this
+                                                 // row
+                unsigned int old_rowlength = sparsity.row_length(row);
+                for (unsigned int q=0;
+                     q!=lines[distribute[column]].entries.size();
+                     ++q)
+                  {
+                    const unsigned int
+                      new_col = lines[distribute[column]].entries[q].first;
+
+                    sparsity.add (row, new_col);
+
+                    const unsigned int new_rowlength = sparsity.row_length(row);
+                    if ((new_col < column) && (old_rowlength != new_rowlength))
+                      ++j;
+                    old_rowlength = new_rowlength;
+                  };
+              };
+          }
+      else
+                                         // row must be distributed
+        for (unsigned int j=0; j<sparsity.row_length(row); ++j)
+          {
+            const unsigned int column = sparsity.column_number(row,j);
+
+            if (distribute[column] == numbers::invalid_unsigned_int)
+                                               // distribute entry at irregular
+                                               // row @p{row} and regular column
+                                               // sparsity.colnums[j]
+              for (unsigned int q=0;
+                   q!=lines[distribute[row]].entries.size(); ++q)
+                sparsity.add (lines[distribute[row]].entries[q].first,
+                              column);
+            else
+                                               // distribute entry at irregular
+                                               // row @p{row} and irregular column
+                                               // sparsity.get_column_numbers()[j]
+              for (unsigned int p=0; p!=lines[distribute[row]].entries.size(); ++p)
+                for (unsigned int q=0;
+                     q!=lines[distribute[sparsity.column_number(row,j)]]
+                                    .entries.size(); ++q)
+                  sparsity.add (lines[distribute[row]].entries[p].first,
+                                lines[distribute[sparsity.column_number(row,j)]]
+                                .entries[q].first);
+          };
+    };
+}
+
+
+
+void ConstraintMatrix::condense (BlockSparsityPattern &sparsity) const
+{
+  Assert (sorted == true, ExcMatrixNotClosed());
+  Assert (sparsity.is_compressed() == false, ExcMatrixIsClosed());
+  Assert (sparsity.n_rows() == sparsity.n_cols(),
+          ExcNotQuadratic());
+  Assert (sparsity.n_block_rows() == sparsity.n_block_cols(),
+          ExcNotQuadratic());
+  Assert (sparsity.get_column_indices() == sparsity.get_row_indices(),
+          ExcNotQuadratic());
+
+  const BlockIndices &
+    index_mapping = sparsity.get_column_indices();
+
+  const unsigned int n_blocks = sparsity.n_block_rows();
+
+                                   // store for each index whether it must be
+                                   // distributed or not. If entry is
+                                   // numbers::invalid_unsigned_int,
+                                   // no distribution is necessary.
+                                   // otherwise, the number states which line
+                                   // in the constraint matrix handles this
+                                   // index
+  std::vector<unsigned int> distribute (sparsity.n_rows(),
+                                        numbers::invalid_unsigned_int);
+
+  for (unsigned int c=0; c<lines.size(); ++c)
+    distribute[lines[c].line] = c;
+
+  const unsigned int n_rows = sparsity.n_rows();
+  for (unsigned int row=0; row<n_rows; ++row)
+    {
+                                       // get index of this row
+                                       // within the blocks
+      const std::pair<unsigned int,unsigned int>
+        block_index = index_mapping.global_to_local(row);
+      const unsigned int block_row = block_index.first;
+
+      if (distribute[row] == numbers::invalid_unsigned_int)
+                                         // regular line. loop over
+                                         // all columns and see
+                                         // whether this column must
+                                         // be distributed
+        {
+
+                                           // to loop over all entries
+                                           // in this row, we have to
+                                           // loop over all blocks in
+                                           // this blockrow and the
+                                           // corresponding row
+                                           // therein
+          for (unsigned int block_col=0; block_col<n_blocks; ++block_col)
+            {
+              const SparsityPattern &
+                block_sparsity = sparsity.block(block_row, block_col);
+
+              for (SparsityPattern::const_iterator
+                     entry = block_sparsity.begin(block_index.second);
+                   (entry != block_sparsity.end(block_index.second)) &&
+                     entry->is_valid_entry();
+                   ++entry)
+                {
+                  const unsigned int global_col
+                    = index_mapping.local_to_global(block_col, entry->column());
+
+                  if (distribute[global_col] != numbers::invalid_unsigned_int)
+                                                     // distribute entry at regular
+                                                     // row @p{row} and irregular column
+                                                     // global_col
+                    {
+                      for (unsigned int q=0;
+                           q!=lines[distribute[global_col]].entries.size(); ++q)
+                        sparsity.add (row,
+                                      lines[distribute[global_col]].entries[q].first);
+                    }
+                }
+            }
+        }
+      else
+        {
+                                           // row must be
+                                           // distributed. split the
+                                           // whole row into the
+                                           // chunks defined by the
+                                           // blocks
+          for (unsigned int block_col=0; block_col<n_blocks; ++block_col)
+            {
+              const SparsityPattern &
+                block_sparsity = sparsity.block(block_row,block_col);
+
+              for (SparsityPattern::const_iterator
+                     entry = block_sparsity.begin(block_index.second);
+                   (entry != block_sparsity.end(block_index.second)) &&
+                     entry->is_valid_entry();
+                   ++entry)
+                {
+                  const unsigned int global_col
+                    = index_mapping.local_to_global (block_col, entry->column());
+
+                  if (distribute[global_col] == numbers::invalid_unsigned_int)
+                                                     // distribute entry at irregular
+                                                     // row @p{row} and regular column
+                                                     // global_col.
+                    {
+                      for (unsigned int q=0; q!=lines[distribute[row]].entries.size(); ++q)
+                        sparsity.add (lines[distribute[row]].entries[q].first, global_col);
+                    }
+                  else
+                                                     // distribute entry at irregular
+                                                     // row @p{row} and irregular column
+                                                     // @p{global_col}
+                    {
+                      for (unsigned int p=0; p!=lines[distribute[row]].entries.size(); ++p)
+                        for (unsigned int q=0; q!=lines[distribute[global_col]].entries.size(); ++q)
+                          sparsity.add (lines[distribute[row]].entries[p].first,
+                                        lines[distribute[global_col]].entries[q].first);
+                    }
+                }
+            }
+        }
+    }
+
+  sparsity.compress();
+}
+
+
+
+void ConstraintMatrix::condense (BlockCompressedSparsityPattern &sparsity) const
+{
+  Assert (sorted == true, ExcMatrixNotClosed());
+  Assert (sparsity.n_rows() == sparsity.n_cols(),
+          ExcNotQuadratic());
+  Assert (sparsity.n_block_rows() == sparsity.n_block_cols(),
+          ExcNotQuadratic());
+  Assert (sparsity.get_column_indices() == sparsity.get_row_indices(),
+          ExcNotQuadratic());
+
+  const BlockIndices &
+    index_mapping = sparsity.get_column_indices();
+
+  const unsigned int n_blocks = sparsity.n_block_rows();
+
+                                   // store for each index whether it must be
+                                   // distributed or not. If entry is
+                                   // numbers::invalid_unsigned_int,
+                                   // no distribution is necessary.
+                                   // otherwise, the number states which line
+                                   // in the constraint matrix handles this
+                                   // index
+  std::vector<unsigned int> distribute (sparsity.n_rows(),
+                                        numbers::invalid_unsigned_int);
+
+  for (unsigned int c=0; c<lines.size(); ++c)
+    distribute[lines[c].line] = static_cast<signed int>(c);
+
+  const unsigned int n_rows = sparsity.n_rows();
+  for (unsigned int row=0; row<n_rows; ++row)
+    {
+                                       // get index of this row
+                                       // within the blocks
+      const std::pair<unsigned int,unsigned int>
+        block_index = index_mapping.global_to_local(row);
+      const unsigned int block_row = block_index.first;
+      const unsigned int local_row = block_index.second;
+
+      if (distribute[row] == numbers::invalid_unsigned_int)
+                                         // regular line. loop over
+                                         // all columns and see
+                                         // whether this column must
+                                         // be distributed. note that
+                                         // as we proceed to
+                                         // distribute cols, the loop
+                                         // over cols may get longer.
+                                         //
+                                         // don't try to be clever
+                                         // here as in the algorithm
+                                         // for the
+                                         // CompressedSparsityPattern,
+                                         // as that would be much more
+                                         // complicated here. after
+                                         // all, we know that
+                                         // compressed patterns are
+                                         // inefficient...
+        {
+
+                                           // to loop over all entries
+                                           // in this row, we have to
+                                           // loop over all blocks in
+                                           // this blockrow and the
+                                           // corresponding row
+                                           // therein
+          for (unsigned int block_col=0; block_col<n_blocks; ++block_col)
+            {
+              const CompressedSparsityPattern &
+                block_sparsity = sparsity.block(block_row, block_col);
+
+              for (unsigned int j=0; j<block_sparsity.row_length(local_row); ++j)
+                {
+                  const unsigned int global_col
+                    = index_mapping.local_to_global(block_col,
+                                                    block_sparsity.column_number(local_row,j));
+
+                  if (distribute[global_col] != numbers::invalid_unsigned_int)
+                                                     // distribute entry at regular
+                                                     // row @p{row} and irregular column
+                                                     // global_col
+                    {
+                      for (unsigned int q=0;
+                           q!=lines[distribute[global_col]]
+                                          .entries.size(); ++q)
+                        sparsity.add (row,
+                                      lines[distribute[global_col]].entries[q].first);
+                    };
+                };
+            };
+        }
+      else
+        {
+                                           // row must be
+                                           // distributed. split the
+                                           // whole row into the
+                                           // chunks defined by the
+                                           // blocks
+          for (unsigned int block_col=0; block_col<n_blocks; ++block_col)
+            {
+              const CompressedSparsityPattern &
+                block_sparsity = sparsity.block(block_row,block_col);
+
+              for (unsigned int j=0; j<block_sparsity.row_length(local_row); ++j)
+                {
+                  const unsigned int global_col
+                    = index_mapping.local_to_global (block_col,
+                                                     block_sparsity.column_number(local_row,j));
+
+                  if (distribute[global_col] == numbers::invalid_unsigned_int)
+                                                     // distribute entry at irregular
+                                                     // row @p{row} and regular column
+                                                     // global_col.
+                    {
+                      for (unsigned int q=0; q!=lines[distribute[row]].entries.size(); ++q)
+                        sparsity.add (lines[distribute[row]].entries[q].first,
+                                      global_col);
+                    }
+                  else
+                                                     // distribute entry at irregular
+                                                     // row @p{row} and irregular column
+                                                     // @p{global_col}
+                    {
+                      for (unsigned int p=0; p!=lines[distribute[row]].entries.size(); ++p)
+                        for (unsigned int q=0; q!=lines[distribute[global_col]].entries.size(); ++q)
+                          sparsity.add (lines[distribute[row]].entries[p].first,
+                                        lines[distribute[global_col]].entries[q].first);
+                    };
+                };
+            };
+        };
+    };
+}
+
+
+
+void ConstraintMatrix::condense (BlockCompressedSetSparsityPattern &sparsity) const
+{
+  Assert (sorted == true, ExcMatrixNotClosed());
+  Assert (sparsity.n_rows() == sparsity.n_cols(),
+          ExcNotQuadratic());
+  Assert (sparsity.n_block_rows() == sparsity.n_block_cols(),
+          ExcNotQuadratic());
+  Assert (sparsity.get_column_indices() == sparsity.get_row_indices(),
+          ExcNotQuadratic());
+
+  const BlockIndices &
+    index_mapping = sparsity.get_column_indices();
+
+  const unsigned int n_blocks = sparsity.n_block_rows();
+
+                                   // store for each index whether it must be
+                                   // distributed or not. If entry is
+                                   // numbers::invalid_unsigned_int,
+                                   // no distribution is necessary.
+                                   // otherwise, the number states which line
+                                   // in the constraint matrix handles this
+                                   // index
+  std::vector<unsigned int> distribute (sparsity.n_rows(),
+                                        numbers::invalid_unsigned_int);
+
+  for (unsigned int c=0; c<lines.size(); ++c)
+    distribute[lines[c].line] = static_cast<signed int>(c);
+
+  const unsigned int n_rows = sparsity.n_rows();
+  for (unsigned int row=0; row<n_rows; ++row)
+    {
+                                       // get index of this row
+                                       // within the blocks
+      const std::pair<unsigned int,unsigned int>
+        block_index = index_mapping.global_to_local(row);
+      const unsigned int block_row = block_index.first;
+      const unsigned int local_row = block_index.second;
+
+      if (distribute[row] == numbers::invalid_unsigned_int)
+                                         // regular line. loop over
+                                         // all columns and see
+                                         // whether this column must
+                                         // be distributed. note that
+                                         // as we proceed to
+                                         // distribute cols, the loop
+                                         // over cols may get longer.
+                                         //
+                                         // don't try to be clever
+                                         // here as in the algorithm
+                                         // for the
+                                         // CompressedSparsityPattern,
+                                         // as that would be much more
+                                         // complicated here. after
+                                         // all, we know that
+                                         // compressed patterns are
+                                         // inefficient...
+        {
+
+                                           // to loop over all entries
+                                           // in this row, we have to
+                                           // loop over all blocks in
+                                           // this blockrow and the
+                                           // corresponding row
+                                           // therein
+          for (unsigned int block_col=0; block_col<n_blocks; ++block_col)
+            {
+              const CompressedSetSparsityPattern &
+                block_sparsity = sparsity.block(block_row, block_col);
+
+              for (CompressedSetSparsityPattern::row_iterator
+                     j = block_sparsity.row_begin(local_row);
+                   j != block_sparsity.row_end(local_row); ++j)
+                {
+                  const unsigned int global_col
+                    = index_mapping.local_to_global(block_col, *j);
+
+                  if (distribute[global_col] != numbers::invalid_unsigned_int)
+                                                     // distribute entry at regular
+                                                     // row @p{row} and irregular column
+                                                     // global_col
+                    {
+                      for (unsigned int q=0;
+                           q!=lines[distribute[global_col]]
+                                          .entries.size(); ++q)
+                        sparsity.add (row,
+                                      lines[distribute[global_col]].entries[q].first);
+                    };
+                };
+            };
+        }
+      else
+        {
+                                           // row must be
+                                           // distributed. split the
+                                           // whole row into the
+                                           // chunks defined by the
+                                           // blocks
+          for (unsigned int block_col=0; block_col<n_blocks; ++block_col)
+            {
+              const CompressedSetSparsityPattern &
+                block_sparsity = sparsity.block(block_row,block_col);
+
+              for (CompressedSetSparsityPattern::row_iterator
+                     j = block_sparsity.row_begin(local_row);
+                   j != block_sparsity.row_end(local_row); ++j)
+                {
+                  const unsigned int global_col
+                    = index_mapping.local_to_global (block_col, *j);
+
+                  if (distribute[global_col] == numbers::invalid_unsigned_int)
+                                                     // distribute entry at irregular
+                                                     // row @p{row} and regular column
+                                                     // global_col.
+                    {
+                      for (unsigned int q=0; q!=lines[distribute[row]].entries.size(); ++q)
+                        sparsity.add (lines[distribute[row]].entries[q].first,
+                                      global_col);
+                    }
+                  else
+                                                     // distribute entry at irregular
+                                                     // row @p{row} and irregular column
+                                                     // @p{global_col}
+                    {
+                      for (unsigned int p=0; p!=lines[distribute[row]].entries.size(); ++p)
+                        for (unsigned int q=0; q!=lines[distribute[global_col]].entries.size(); ++q)
+                          sparsity.add (lines[distribute[row]].entries[p].first,
+                                        lines[distribute[global_col]].entries[q].first);
+                    };
+                };
+            };
+        };
+    };
+}
+
+
+
+void ConstraintMatrix::condense (BlockCompressedSimpleSparsityPattern &sparsity) const
+{
+  Assert (sorted == true, ExcMatrixNotClosed());
+  Assert (sparsity.n_rows() == sparsity.n_cols(),
+          ExcNotQuadratic());
+  Assert (sparsity.n_block_rows() == sparsity.n_block_cols(),
+          ExcNotQuadratic());
+  Assert (sparsity.get_column_indices() == sparsity.get_row_indices(),
+          ExcNotQuadratic());
+
+  const BlockIndices &
+    index_mapping = sparsity.get_column_indices();
+
+  const unsigned int n_blocks = sparsity.n_block_rows();
+
+                                   // store for each index whether it must be
+                                   // distributed or not. If entry is
+                                   // numbers::invalid_unsigned_int,
+                                   // no distribution is necessary.
+                                   // otherwise, the number states which line
+                                   // in the constraint matrix handles this
+                                   // index
+  std::vector<unsigned int> distribute (sparsity.n_rows(),
+                                        numbers::invalid_unsigned_int);
+
+  for (unsigned int c=0; c<lines.size(); ++c)
+    distribute[lines[c].line] = static_cast<signed int>(c);
+
+  const unsigned int n_rows = sparsity.n_rows();
+  for (unsigned int row=0; row<n_rows; ++row)
+    {
+                                       // get index of this row
+                                       // within the blocks
+      const std::pair<unsigned int,unsigned int>
+        block_index = index_mapping.global_to_local(row);
+      const unsigned int block_row = block_index.first;
+      const unsigned int local_row = block_index.second;
+
+      if (distribute[row] == numbers::invalid_unsigned_int)
+                                         // regular line. loop over
+                                         // all columns and see
+                                         // whether this column must
+                                         // be distributed. note that
+                                         // as we proceed to
+                                         // distribute cols, the loop
+                                         // over cols may get longer.
+                                         //
+                                         // don't try to be clever
+                                         // here as in the algorithm
+                                         // for the
+                                         // CompressedSparsityPattern,
+                                         // as that would be much more
+                                         // complicated here. after
+                                         // all, we know that
+                                         // compressed patterns are
+                                         // inefficient...
+        {
+
+                                           // to loop over all entries
+                                           // in this row, we have to
+                                           // loop over all blocks in
+                                           // this blockrow and the
+                                           // corresponding row
+                                           // therein
+          for (unsigned int block_col=0; block_col<n_blocks; ++block_col)
+            {
+              const CompressedSimpleSparsityPattern &
+                block_sparsity = sparsity.block(block_row, block_col);
+
+              for (unsigned int j=0; j<block_sparsity.row_length(local_row); ++j)
+                {
+                  const unsigned int global_col
+                    = index_mapping.local_to_global(block_col,
+                                                    block_sparsity.column_number(local_row,j));
+
+                  if (distribute[global_col] != numbers::invalid_unsigned_int)
+                                                     // distribute entry at regular
+                                                     // row @p{row} and irregular column
+                                                     // global_col
+                    {
+                      for (unsigned int q=0;
+                           q!=lines[distribute[global_col]]
+                                          .entries.size(); ++q)
+                        sparsity.add (row,
+                                      lines[distribute[global_col]].entries[q].first);
+                    };
+                };
+            };
+        }
+      else
+        {
+                                           // row must be
+                                           // distributed. split the
+                                           // whole row into the
+                                           // chunks defined by the
+                                           // blocks
+          for (unsigned int block_col=0; block_col<n_blocks; ++block_col)
+            {
+              const CompressedSimpleSparsityPattern &
+                block_sparsity = sparsity.block(block_row,block_col);
+
+              for (unsigned int j=0; j<block_sparsity.row_length(local_row); ++j)
+                {
+                  const unsigned int global_col
+                    = index_mapping.local_to_global (block_col,
+                                                     block_sparsity.column_number(local_row,j));
+
+                  if (distribute[global_col] == numbers::invalid_unsigned_int)
+                                                     // distribute entry at irregular
+                                                     // row @p{row} and regular column
+                                                     // global_col.
+                    {
+                      for (unsigned int q=0; q!=lines[distribute[row]].entries.size(); ++q)
+                        sparsity.add (lines[distribute[row]].entries[q].first,
+                                      global_col);
+                    }
+                  else
+                                                     // distribute entry at irregular
+                                                     // row @p{row} and irregular column
+                                                     // @p{global_col}
+                    {
+                      for (unsigned int p=0; p!=lines[distribute[row]].entries.size(); ++p)
+                        for (unsigned int q=0; q!=lines[distribute[global_col]].entries.size(); ++q)
+                          sparsity.add (lines[distribute[row]].entries[p].first,
+                                        lines[distribute[global_col]].entries[q].first);
+                    };
+                };
+            };
+        };
+    };
+}
+
+
+
+#ifdef DEAL_II_USE_TRILINOS
+
+                                   // this is a specialization for a
+                                   // parallel (non-block) Trilinos
+                                   // vector. The basic idea is to just work
+                                   // on the local range of the vector. But
+                                   // we need access to values that the
+                                   // local nodes are constrained to.
+
+template<>
+void
+ConstraintMatrix::distribute (TrilinosWrappers::MPI::Vector &vec) const
+{
+  Assert (sorted==true, ExcMatrixIsClosed());
+
+                                   //TODO: not implemented yet, we need to fix
+                                   //LocalRange() first to only include
+                                   //"owned" indices. For this we need to keep
+                                   //track of the owned indices, because
+                                   //Trilinos doesn't. Use same constructor
+                                   //interface as in PETSc with two IndexSets!
+  AssertThrow (vec.vector_partitioner().IsOneToOne(),
+               ExcMessage ("Distribute does not work on vectors with overlapping parallel partitioning."));
+
+  typedef std::vector<ConstraintLine>::const_iterator constraint_iterator;
+  ConstraintLine index_comparison;
+  index_comparison.line = vec.local_range().first;
+  const constraint_iterator begin_my_constraints =
+    Utilities::lower_bound (lines.begin(),lines.end(),index_comparison);
+
+  index_comparison.line = vec.local_range().second;
+  const constraint_iterator end_my_constraints
+    = Utilities::lower_bound(lines.begin(),lines.end(),index_comparison);
+
+                                   // Here we search all the indices that we
+                                   // need to have read-access to - the
+                                   // local nodes and all the nodes that the
+                                   // constraints indicate.
+  IndexSet my_indices (vec.size());
+  {
+    const std::pair<unsigned int, unsigned int>
+      local_range = vec.local_range();
+
+    my_indices.add_range (local_range.first, local_range.second);
+
+    std::set<unsigned int> individual_indices;
+    for (constraint_iterator it = begin_my_constraints;
+         it != end_my_constraints; ++it)
+      for (unsigned int i=0; i<it->entries.size(); ++i)
+        if ((it->entries[i].first < local_range.first)
+            ||
+            (it->entries[i].first >= local_range.second))
+          individual_indices.insert (it->entries[i].first);
+
+    my_indices.add_indices (individual_indices.begin(),
+                            individual_indices.end());
+  }
+
+#ifdef DEAL_II_COMPILER_SUPPORTS_MPI
+  const Epetra_MpiComm *mpi_comm
+    = dynamic_cast<const Epetra_MpiComm*>(&vec.trilinos_vector().Comm());
+
+  Assert (mpi_comm != 0, ExcInternalError());
+
+  TrilinosWrappers::MPI::Vector vec_distribute
+    (my_indices.make_trilinos_map (mpi_comm->Comm(), true));
+#else
+  TrilinosWrappers::MPI::Vector vec_distribute
+    (my_indices.make_trilinos_map (MPI_COMM_WORLD, true));
+#endif
+
+                                   // here we import the data
+  vec_distribute.reinit(vec,false,true);
+
+  for (constraint_iterator it = begin_my_constraints;
+       it != end_my_constraints; ++it)
+    {
+                                       // fill entry in line
+                                       // next_constraint.line by adding the
+                                       // different contributions
+      double new_value = it->inhomogeneity;
+      for (unsigned int i=0; i<it->entries.size(); ++i)
+        new_value += (vec_distribute(it->entries[i].first) *
+                      it->entries[i].second);
+      vec(it->line) = new_value;
+    }
+
+                                   // some processes might not apply
+                                   // constraints, so we need to explicitly
+                                   // state, that the others are doing an
+                                   // insert here:
+  vec.compress (::dealii::VectorOperation::insert);
+}
+
+
+
+template<>
+void
+ConstraintMatrix::distribute (TrilinosWrappers::MPI::BlockVector &vec) const
+{
+  Assert (sorted==true, ExcMatrixIsClosed());
+
+  IndexSet my_indices (vec.size());
+  for (unsigned int block=0; block<vec.n_blocks(); ++block)
+    {
+      typedef std::vector<ConstraintLine>::const_iterator constraint_iterator;
+      ConstraintLine index_comparison;
+      index_comparison.line = vec.block(block).local_range().first
+        +vec.get_block_indices().block_start(block);
+      const constraint_iterator begin_my_constraints =
+        Utilities::lower_bound (lines.begin(),lines.end(),index_comparison);
+
+      index_comparison.line = vec.block(block).local_range().second
+        +vec.get_block_indices().block_start(block);
+
+      const constraint_iterator end_my_constraints
+        = Utilities::lower_bound(lines.begin(),lines.end(),index_comparison);
+
+                                   // Here we search all the indices that we
+                                   // need to have read-access to - the local
+                                   // nodes and all the nodes that the
+                                   // constraints indicate. No caching done
+                                   // yet. would need some more clever data
+                                   // structures for doing that.
+      const std::pair<unsigned int, unsigned int>
+        local_range = vec.block(block).local_range();
+
+      my_indices.add_range (local_range.first, local_range.second);
+
+      std::set<unsigned int> individual_indices;
+      for (constraint_iterator it = begin_my_constraints;
+           it != end_my_constraints; ++it)
+        for (unsigned int i=0; i<it->entries.size(); ++i)
+          if ((it->entries[i].first < local_range.first)
+              ||
+              (it->entries[i].first >= local_range.second))
+            individual_indices.insert (it->entries[i].first);
+
+      my_indices.add_indices (individual_indices.begin(),
+                              individual_indices.end());
+    }
+
+#ifdef DEAL_II_COMPILER_SUPPORTS_MPI
+  const Epetra_MpiComm *mpi_comm
+    = dynamic_cast<const Epetra_MpiComm*>(&vec.block(0).trilinos_vector().Comm());
+
+  Assert (mpi_comm != 0, ExcInternalError());
+
+  TrilinosWrappers::MPI::Vector vec_distribute
+    (my_indices.make_trilinos_map (mpi_comm->Comm(), true));
+#else
+  TrilinosWrappers::MPI::Vector vec_distribute
+    (my_indices.make_trilinos_map (MPI_COMM_WORLD, true));
+#endif
+
+                                   // here we import the data
+  vec_distribute.reinit(vec,true);
+
+  for (unsigned int block=0; block<vec.n_blocks(); ++block)
+    {
+      typedef std::vector<ConstraintLine>::const_iterator constraint_iterator;
+      ConstraintLine index_comparison;
+      index_comparison.line = vec.block(block).local_range().first
+        +vec.get_block_indices().block_start(block);
+      const constraint_iterator begin_my_constraints =
+        Utilities::lower_bound (lines.begin(),lines.end(),index_comparison);
+
+      index_comparison.line = vec.block(block).local_range().second
+        +vec.get_block_indices().block_start(block);
+
+      const constraint_iterator end_my_constraints
+        = Utilities::lower_bound(lines.begin(),lines.end(),index_comparison);
+
+      for (constraint_iterator it = begin_my_constraints;
+           it != end_my_constraints; ++it)
+        {
+                                       // fill entry in line
+                                       // next_constraint.line by adding the
+                                       // different contributions
+          double new_value = it->inhomogeneity;
+          for (unsigned int i=0; i<it->entries.size(); ++i)
+            new_value += (vec_distribute(it->entries[i].first) *
+                          it->entries[i].second);
+          vec(it->line) = new_value;
+        }
+      vec.block(block).compress(::dealii::VectorOperation::insert);
+    }
+}
+
+#endif
+
+#ifdef DEAL_II_USE_PETSC
+
+                                   // this is a specialization for a
+                                   // parallel (non-block) PETSc
+                                   // vector. The basic idea is to just work
+                                   // on the local range of the vector. But
+                                   // we need access to values that the
+                                   // local nodes are constrained to.
+
+template<>
+void
+ConstraintMatrix::distribute (PETScWrappers::MPI::Vector &vec) const
+{
+  Assert (sorted==true, ExcMatrixIsClosed());
+
+  typedef std::vector<ConstraintLine>::const_iterator constraint_iterator;
+  ConstraintLine index_comparison;
+  index_comparison.line = vec.local_range().first;
+  const constraint_iterator begin_my_constraints =
+    Utilities::lower_bound (lines.begin(),lines.end(),index_comparison);
+
+  index_comparison.line = vec.local_range().second;
+  const constraint_iterator end_my_constraints
+    = Utilities::lower_bound(lines.begin(),lines.end(),index_comparison);
+
+                                   // all indices we need to read from
+  IndexSet my_indices (vec.size());
+
+  const std::pair<unsigned int, unsigned int>
+    local_range = vec.local_range();
+
+  my_indices.add_range (local_range.first, local_range.second);
+
+  std::set<unsigned int> individual_indices;
+  for (constraint_iterator it = begin_my_constraints;
+       it != end_my_constraints; ++it)
+    for (unsigned int i=0; i<it->entries.size(); ++i)
+      if ((it->entries[i].first < local_range.first)
+          ||
+          (it->entries[i].first >= local_range.second))
+        individual_indices.insert (it->entries[i].first);
+
+  my_indices.add_indices (individual_indices.begin(),
+                          individual_indices.end());
+
+  IndexSet local_range_is (vec.size());
+  local_range_is.add_range(local_range.first, local_range.second);
+
+
+                                   // create a vector and import those indices
+  PETScWrappers::MPI::Vector ghost_vec (vec.get_mpi_communicator(),
+                                        local_range_is,
+                                        my_indices);
+  ghost_vec = vec;
+  ghost_vec.update_ghost_values();
+
+                                   // finally do the distribution on own
+                                   // constraints
+  for (constraint_iterator it = begin_my_constraints;
+       it != end_my_constraints; ++it)
+    {
+                                       // fill entry in line
+                                       // next_constraint.line by adding the
+                                       // different contributions
+      PetscScalar new_value = it->inhomogeneity;
+      for (unsigned int i=0; i<it->entries.size(); ++i)
+        new_value += (PetscScalar(ghost_vec(it->entries[i].first)) *
+                      it->entries[i].second);
+      vec(it->line) = new_value;
+    }
+
+  vec.compress ();
+}
+
+
+template<>
+void
+ConstraintMatrix::distribute (PETScWrappers::MPI::BlockVector &vec) const   // modified by shuqiangwang
+{
+  Assert (sorted==true, ExcMatrixIsClosed());
+//  AssertThrow (false, ExcNotImplemented());
+  for(unsigned int i=0; i<vec.n_blocks(); i++)
+	  distribute(vec.block(i));
+}
+
+#endif
+
+
+
+unsigned int ConstraintMatrix::n_constraints () const
+{
+  return lines.size();
+}
+
+
+
+bool ConstraintMatrix::is_identity_constrained (const unsigned int index) const
+{
+  if (is_constrained(index) == false)
+    return false;
+
+  const ConstraintLine & p = lines[lines_cache[calculate_line_index(index)]];
+  Assert (p.line == index, ExcInternalError());
+
+                                       // return if an entry for this
+                                       // line was found and if it has
+                                       // only one entry equal to 1.0
+  return ((p.entries.size() == 1) &&
+          (p.entries[0].second == 1.0));
+}
+
+
+
+unsigned int ConstraintMatrix::max_constraint_indirections () const
+{
+  unsigned int return_value = 0;
+  for (std::vector<ConstraintLine>::const_iterator i=lines.begin();
+       i!=lines.end(); ++i)
+                                     // use static cast, since
+                                     // typeof(size)==std::size_t, which is !=
+                                     // unsigned int on AIX
+    return_value = std::max(return_value,
+                            static_cast<unsigned int>(i->entries.size()));
+
+  return return_value;
+}
+
+
+
+bool ConstraintMatrix::has_inhomogeneities () const
+{
+  for (std::vector<ConstraintLine>::const_iterator i=lines.begin();
+       i!=lines.end(); ++i)
+    if (i->inhomogeneity != 0.)
+      return true;
+
+  return false;
+}
+
+
+void ConstraintMatrix::print (std::ostream &out) const
+{
+  for (unsigned int i=0; i!=lines.size(); ++i)
+    {
+                                       // output the list of
+                                       // constraints as pairs of dofs
+                                       // and their weights
+      if (lines[i].entries.size() > 0)
+        {
+          for (unsigned int j=0; j<lines[i].entries.size(); ++j)
+            out << "    " << lines[i].line
+                << " " << lines[i].entries[j].first
+                << ":  " << lines[i].entries[j].second << "\n";
+
+                                       // print out inhomogeneity.
+          if (lines[i].inhomogeneity != 0)
+            out << "    " << lines[i].line
+                << ": " << lines[i].inhomogeneity << "\n";
+        }
+      else
+                                         // but also output something
+                                         // if the constraint simply
+                                         // reads x[13]=0, i.e. where
+                                         // the right hand side is not
+                                         // a linear combination of
+                                         // other dofs
+        {
+          if (lines[i].inhomogeneity != 0)
+            out << "    " << lines[i].line
+                << " = " << lines[i].inhomogeneity
+                << "\n";
+          else
+            out << "    " << lines[i].line << " = 0\n";
+        }
+    }
+
+  AssertThrow (out, ExcIO());
+}
+
+
+
+void
+ConstraintMatrix::write_dot (std::ostream &out) const
+{
+  out << "digraph constraints {"
+      << std::endl;
+  for (unsigned int i=0; i!=lines.size(); ++i)
+    {
+                                       // same concept as in the
+                                       // previous function
+      if (lines[i].entries.size() > 0)
+        for (unsigned int j=0; j<lines[i].entries.size(); ++j)
+          out << "  " << lines[i].line << "->" << lines[i].entries[j].first
+              << "; // weight: "
+              << lines[i].entries[j].second
+              << "\n";
+      else
+        out << "  " << lines[i].line << "\n";
+    }
+  out << "}" << std::endl;
+}
+
+
+
+std::size_t
+ConstraintMatrix::memory_consumption () const
+{
+  return (MemoryConsumption::memory_consumption (lines) +
+          MemoryConsumption::memory_consumption (lines_cache) +
+          MemoryConsumption::memory_consumption (sorted) +
+          MemoryConsumption::memory_consumption (local_lines));
+}
+
+
+
+
+
+// explicit instantiations
+//
+// define a list of functions for vectors and matrices, respectively, where
+// the vector/matrix can be replaced using a preprocessor variable
+// VectorType/MatrixType. note that we need a space between "VectorType" and
+// ">" to disambiguate ">>" when VectorType trails in an angle bracket
+
+// TODO: The way we define all the instantiations is probably not the very
+// best one. Try to find a better description.
+
+#define VECTOR_FUNCTIONS(VectorType) \
+  template void ConstraintMatrix::condense<VectorType >(const VectorType &uncondensed,\
+                                                       VectorType       &condensed) const;\
+  template void ConstraintMatrix::condense<VectorType >(VectorType &vec) const;\
+  template void ConstraintMatrix::condense<float,VectorType >(const SparseMatrix<float> &uncondensed, \
+                                                              const VectorType &uncondensed_vector, \
+                                                              SparseMatrix<float> &condensed, \
+                                                              VectorType       &condensed_vector) const; \
+  template void ConstraintMatrix::condense<double,VectorType >(const SparseMatrix<double> &uncondensed, \
+                                                               const VectorType &uncondensed_vector, \
+                                                               SparseMatrix<double> &condensed, \
+                                                               VectorType       &condensed_vector) const; \
+  template void ConstraintMatrix:: \
+    distribute_local_to_global<VectorType > (const Vector<double>            &, \
+                                             const std::vector<unsigned int> &, \
+                                             VectorType                      &, \
+                                             const FullMatrix<double>        &) const; \
+  template void ConstraintMatrix::distribute<VectorType >(const VectorType &condensed,\
+                                                         VectorType       &uncondensed) const;\
+  template void ConstraintMatrix::distribute<VectorType >(VectorType &vec) const
+
+#define PARALLEL_VECTOR_FUNCTIONS(VectorType) \
+  template void ConstraintMatrix:: \
+    distribute_local_to_global<VectorType > (const Vector<double>            &, \
+                                             const std::vector<unsigned int> &, \
+                                             VectorType                      &, \
+                                             const FullMatrix<double>        &) const
+
+
+// TODO: Can PETSc really do all the operations required by the above
+// condense/distribute function etc also on distributed vectors? Trilinos
+// can't do that - we have to rewrite those functions by hand if we want to
+// use them. The key is to use local ranges etc., which still needs to be
+// implemented.
+#ifdef DEAL_II_USE_PETSC
+VECTOR_FUNCTIONS(PETScWrappers::MPI::Vector);
+VECTOR_FUNCTIONS(PETScWrappers::MPI::BlockVector);
+#endif
+
+#ifdef DEAL_II_USE_TRILINOS
+PARALLEL_VECTOR_FUNCTIONS(TrilinosWrappers::MPI::Vector);
+PARALLEL_VECTOR_FUNCTIONS(TrilinosWrappers::MPI::BlockVector);
+#endif
+
+#define MATRIX_VECTOR_FUNCTIONS(MatrixType, VectorType) \
+template void ConstraintMatrix:: \
+distribute_local_to_global<MatrixType,VectorType > (const FullMatrix<double>        &, \
+                                                    const Vector<double>            &, \
+                                                    const std::vector<unsigned int> &, \
+                                                    MatrixType                      &, \
+                                                    VectorType                      &, \
+                                                    bool                             , \
+                                                    internal::bool2type<false>) const
+#define MATRIX_FUNCTIONS(MatrixType) \
+template void ConstraintMatrix:: \
+distribute_local_to_global<MatrixType,Vector<double> > (const FullMatrix<double>        &, \
+                                                        const Vector<double>            &, \
+                                                        const std::vector<unsigned int> &, \
+                                                        MatrixType                      &, \
+                                                        Vector<double>                  &, \
+                                                        bool                             , \
+                                                        internal::bool2type<false>) const
+#define BLOCK_MATRIX_VECTOR_FUNCTIONS(MatrixType, VectorType)   \
+template void ConstraintMatrix:: \
+distribute_local_to_global<MatrixType,VectorType > (const FullMatrix<double>        &, \
+                                                    const Vector<double>            &, \
+                                                    const std::vector<unsigned int> &, \
+                                                    MatrixType                      &, \
+                                                    VectorType                      &, \
+                                                    bool                             , \
+                                                    internal::bool2type<true>) const
+#define BLOCK_MATRIX_FUNCTIONS(MatrixType)      \
+template void ConstraintMatrix:: \
+distribute_local_to_global<MatrixType,Vector<double> > (const FullMatrix<double>        &, \
+                                                        const Vector<double>            &, \
+                                                        const std::vector<unsigned int> &, \
+                                                        MatrixType                      &, \
+                                                        Vector<double>                  &, \
+                                                        bool                             , \
+                                                        internal::bool2type<true>) const
+
+MATRIX_FUNCTIONS(SparseMatrix<double>);
+MATRIX_FUNCTIONS(SparseMatrix<float>);
+MATRIX_FUNCTIONS(FullMatrix<double>);
+MATRIX_FUNCTIONS(FullMatrix<float>);
+MATRIX_VECTOR_FUNCTIONS(SparseMatrix<float>, Vector<float>);
+
+BLOCK_MATRIX_FUNCTIONS(BlockSparseMatrix<double>);
+BLOCK_MATRIX_FUNCTIONS(BlockSparseMatrix<float>);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(BlockSparseMatrix<double>, BlockVector<double>);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(BlockSparseMatrix<float>,  BlockVector<float>);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(BlockSparseMatrix<float>,  BlockVector<double>);
+
+MATRIX_FUNCTIONS(SparseMatrixEZ<double>);
+MATRIX_FUNCTIONS(SparseMatrixEZ<float>);
+MATRIX_VECTOR_FUNCTIONS(SparseMatrixEZ<float>,  Vector<float>);
+
+// BLOCK_MATRIX_FUNCTIONS(BlockSparseMatrixEZ<double>);
+// BLOCK_MATRIX_VECTOR_FUNCTIONS(BlockSparseMatrixEZ<float>,  Vector<float>);
+
+#ifdef DEAL_II_USE_PETSC
+MATRIX_FUNCTIONS(PETScWrappers::SparseMatrix);
+BLOCK_MATRIX_FUNCTIONS(PETScWrappers::BlockSparseMatrix);
+MATRIX_FUNCTIONS(PETScWrappers::MPI::SparseMatrix);
+BLOCK_MATRIX_FUNCTIONS(PETScWrappers::MPI::BlockSparseMatrix);
+MATRIX_VECTOR_FUNCTIONS(PETScWrappers::SparseMatrix, PETScWrappers::Vector);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(PETScWrappers::BlockSparseMatrix, PETScWrappers::BlockVector);
+MATRIX_VECTOR_FUNCTIONS(PETScWrappers::MPI::SparseMatrix, PETScWrappers::MPI::Vector);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(PETScWrappers::MPI::BlockSparseMatrix ,PETScWrappers::MPI::BlockVector);
+#endif
+
+#ifdef DEAL_II_USE_TRILINOS
+MATRIX_FUNCTIONS(TrilinosWrappers::SparseMatrix);
+BLOCK_MATRIX_FUNCTIONS(TrilinosWrappers::BlockSparseMatrix);
+MATRIX_VECTOR_FUNCTIONS(TrilinosWrappers::SparseMatrix, TrilinosWrappers::Vector);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(TrilinosWrappers::BlockSparseMatrix, TrilinosWrappers::BlockVector);
+MATRIX_VECTOR_FUNCTIONS(TrilinosWrappers::SparseMatrix, TrilinosWrappers::MPI::Vector);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(TrilinosWrappers::BlockSparseMatrix, TrilinosWrappers::MPI::BlockVector);
+#endif
+
+
+#define SPARSITY_FUNCTIONS(SparsityType) \
+  template void ConstraintMatrix::add_entries_local_to_global<SparsityType> (\
+    const std::vector<unsigned int> &, \
+    SparsityType &,                    \
+    const bool,                        \
+    const Table<2,bool> &, \
+    internal::bool2type<false>) const; \
+  template void ConstraintMatrix::add_entries_local_to_global<SparsityType> (\
+    const std::vector<unsigned int> &, \
+    const std::vector<unsigned int> &, \
+    SparsityType &,                    \
+    const bool,                        \
+    const Table<2,bool> &) const
+#define BLOCK_SPARSITY_FUNCTIONS(SparsityType) \
+  template void ConstraintMatrix::add_entries_local_to_global<SparsityType> (\
+    const std::vector<unsigned int> &, \
+    SparsityType &,                    \
+    const bool,                        \
+    const Table<2,bool> &, \
+    internal::bool2type<true>) const; \
+  template void ConstraintMatrix::add_entries_local_to_global<SparsityType> (\
+    const std::vector<unsigned int> &, \
+    const std::vector<unsigned int> &, \
+    SparsityType &,                    \
+    const bool,                        \
+    const Table<2,bool> &) const
+
+SPARSITY_FUNCTIONS(SparsityPattern);
+SPARSITY_FUNCTIONS(CompressedSparsityPattern);
+SPARSITY_FUNCTIONS(CompressedSetSparsityPattern);
+SPARSITY_FUNCTIONS(CompressedSimpleSparsityPattern);
+BLOCK_SPARSITY_FUNCTIONS(BlockSparsityPattern);
+BLOCK_SPARSITY_FUNCTIONS(BlockCompressedSparsityPattern);
+BLOCK_SPARSITY_FUNCTIONS(BlockCompressedSetSparsityPattern);
+BLOCK_SPARSITY_FUNCTIONS(BlockCompressedSimpleSparsityPattern);
+
+#ifdef DEAL_II_USE_TRILINOS
+SPARSITY_FUNCTIONS(TrilinosWrappers::SparsityPattern);
+BLOCK_SPARSITY_FUNCTIONS(TrilinosWrappers::BlockSparsityPattern);
+#endif
+
+
+#define ONLY_MATRIX_FUNCTIONS(MatrixType) \
+  template void ConstraintMatrix::distribute_local_to_global<MatrixType > (\
+  const FullMatrix<double>        &, \
+  const std::vector<unsigned int> &, \
+  const std::vector<unsigned int> &, \
+  MatrixType                      &) const
+
+ONLY_MATRIX_FUNCTIONS(SparseMatrix<float>);
+ONLY_MATRIX_FUNCTIONS(SparseMatrix<double>);
+ONLY_MATRIX_FUNCTIONS(MatrixBlock<SparseMatrix<float> >);
+ONLY_MATRIX_FUNCTIONS(MatrixBlock<SparseMatrix<double> >);
+ONLY_MATRIX_FUNCTIONS(BlockSparseMatrix<float>);
+ONLY_MATRIX_FUNCTIONS(BlockSparseMatrix<double>);
+
+#ifdef DEAL_II_USE_TRILINOS
+ONLY_MATRIX_FUNCTIONS(TrilinosWrappers::SparseMatrix);
+ONLY_MATRIX_FUNCTIONS(TrilinosWrappers::BlockSparseMatrix);
+#endif
+
+#ifdef DEAL_II_USE_PETSC
+ONLY_MATRIX_FUNCTIONS(PETScWrappers::SparseMatrix);
+ONLY_MATRIX_FUNCTIONS(PETScWrappers::BlockSparseMatrix);
+ONLY_MATRIX_FUNCTIONS(PETScWrappers::MPI::SparseMatrix);
+ONLY_MATRIX_FUNCTIONS(PETScWrappers::MPI::BlockSparseMatrix);
+#endif
+
+#include "constraint_matrix.inst"
+
+DEAL_II_NAMESPACE_CLOSE

Added: branches/s-wang/for_deal.II/source/lac/petsc_matrix_base.cc
===================================================================
--- branches/s-wang/for_deal.II/source/lac/petsc_matrix_base.cc	                        (rev 0)
+++ branches/s-wang/for_deal.II/source/lac/petsc_matrix_base.cc	2012-11-06 20:23:43 UTC (rev 1346)
@@ -0,0 +1,649 @@
+//---------------------------------------------------------------------------
+//    $Id: petsc_matrix_base.cc 26045 2012-08-21 09:38:15Z young $
+//    Version: $Name$
+//
+//    Copyright (C) 2004, 2005, 2006, 2008, 2009, 2010, 2011, 2012 by the deal.II authors
+//
+//    This file is subject to QPL and may not be  distributed
+//    without copyright and license information. Please refer
+//    to the file deal.II/doc/license.html for the  text  and
+//    further information on this license.
+//
+//---------------------------------------------------------------------------
+
+
+#include <deal.II/lac/petsc_matrix_base.h>
+
+#ifdef DEAL_II_USE_PETSC
+
+#  include <deal.II/lac/petsc_full_matrix.h>
+#  include <deal.II/lac/petsc_sparse_matrix.h>
+#  include <deal.II/lac/petsc_parallel_sparse_matrix.h>
+#  include <deal.II/lac/petsc_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+  namespace MatrixIterators
+  {
+    void
+    MatrixBase::const_iterator::Accessor::
+    visit_present_row ()
+    {
+                                       // if we are asked to visit the
+                                       // past-the-end line, then simply
+                                       // release all our caches and go on
+                                       // with life
+      if (this->a_row == matrix->m())
+        {
+          colnum_cache.reset ();
+          value_cache.reset ();
+
+          return;
+        }
+
+                                       // otherwise first flush PETSc caches
+      matrix->compress ();
+
+                                       // get a representation of the present
+                                       // row
+      PetscInt           ncols;
+      const PetscInt    *colnums;
+      const PetscScalar *values;
+
+      int ierr;
+      ierr = MatGetRow(*matrix, this->a_row, &ncols, &colnums, &values);
+      AssertThrow (ierr == 0, MatrixBase::ExcPETScError(ierr));
+
+                                       // copy it into our caches if the line
+                                       // isn't empty. if it is, then we've
+                                       // done something wrong, since we
+                                       // shouldn't have initialized an
+                                       // iterator for an empty line (what
+                                       // would it point to?)
+      Assert (ncols != 0, ExcInternalError());
+      colnum_cache.reset (new std::vector<unsigned int> (colnums,
+                                                         colnums+ncols));
+      value_cache.reset (new std::vector<PetscScalar> (values, values+ncols));
+
+                                       // and finally restore the matrix
+      ierr = MatRestoreRow(*matrix, this->a_row, &ncols, &colnums, &values);
+      AssertThrow (ierr == 0, MatrixBase::ExcPETScError(ierr));
+    }
+  }
+
+
+
+  MatrixBase::MatrixBase ()
+                  :
+                  last_action (LastAction::none)
+  {}
+
+
+
+  MatrixBase::~MatrixBase ()
+  {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    const int ierr = MatDestroy (matrix);
+#else
+    const int ierr = MatDestroy (&matrix);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  void
+  MatrixBase::clear ()
+  {
+                                     // destroy the matrix...
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    int ierr = MatDestroy (matrix);
+#else
+    int ierr = MatDestroy (&matrix);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+                                     // ...and replace it by an empty
+                                     // sequential matrix
+    const int m=0, n=0, n_nonzero_per_row=0;
+    ierr = MatCreateSeqAIJ(PETSC_COMM_SELF, m, n, n_nonzero_per_row,
+                           0, &matrix);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  MatrixBase &
+  MatrixBase::operator = (const value_type d)
+  {
+    Assert (d==value_type(), ExcScalarAssignmentOnlyForZeroValue());
+
+                                     // flush previously cached elements. this
+                                     // seems to be necessary since petsc
+                                     // 2.2.1, at least for parallel vectors
+                                     // (see test bits/petsc_64)
+    compress ();
+
+    const int ierr = MatZeroEntries (matrix);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+
+
+  void
+  MatrixBase::clear_row (const unsigned int row,
+                         const PetscScalar  new_diag_value)
+  {
+    compress ();
+
+                                     // now set all the entries of this row to
+                                     // zero
+    const PetscInt petsc_row = row;
+
+    IS index_set;
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    ISCreateGeneral (get_mpi_communicator(), 1, &petsc_row, &index_set);
+#else
+    ISCreateGeneral (get_mpi_communicator(), 1, &petsc_row, PETSC_COPY_VALUES, &index_set);
+#endif
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    const int ierr
+      = MatZeroRowsIS(matrix, index_set, new_diag_value);
+#else
+    const int ierr
+      = MatZeroRowsIS(matrix, index_set, new_diag_value, PETSC_NULL, PETSC_NULL);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    ISDestroy (index_set);
+#else
+    ISDestroy (&index_set);
+#endif
+
+    compress ();
+  }
+
+
+
+  void
+  MatrixBase::clear_rows (const std::vector<unsigned int> &rows,
+                          const PetscScalar                new_diag_value)
+  {
+    compress ();
+
+                                     // now set all the entries of these rows
+                                     // to zero
+    const std::vector<PetscInt> petsc_rows (rows.begin(), rows.end());
+
+                                     // call the functions. note that we have
+                                     // to call them even if #rows is empty,
+                                     // since this is a collective operation
+    IS index_set;
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    ISCreateGeneral (get_mpi_communicator(), rows.size(),
+                     &petsc_rows[0], &index_set);
+#else
+    ISCreateGeneral (get_mpi_communicator(), rows.size(),
+                     &petsc_rows[0], PETSC_COPY_VALUES, &index_set);
+#endif
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    const int ierr
+      = MatZeroRowsIS(matrix, index_set, new_diag_value);
+#else
+    const int ierr
+      = MatZeroRowsIS(matrix, index_set, new_diag_value, PETSC_NULL, PETSC_NULL);
+#endif
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    ISDestroy (index_set);
+#else
+    ISDestroy (&index_set);
+#endif
+
+    compress ();
+  }
+
+
+
+  PetscScalar
+  MatrixBase::el (const unsigned int i,
+                  const unsigned int j) const
+  {
+#ifdef PETSC_USE_64BIT_INDICES
+    PetscInt
+#else
+    int
+#endif
+      petsc_i = i, petsc_j = j;
+    PetscScalar value;
+
+    const int ierr
+      = MatGetValues (matrix, 1, &petsc_i, 1, &petsc_j,
+                      &value);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return value;
+  }
+
+
+
+  PetscScalar
+  MatrixBase::diag_element (const unsigned int i) const
+  {
+    Assert (m() == n(), ExcNotQuadratic());
+
+                                     // this doesn't seem to work any
+                                     // different than any other element
+    return el(i,i);
+  }
+
+
+
+  void
+  MatrixBase::compress (::dealii::VectorOperation::values operation)
+  {
+                                     // flush buffers
+    int ierr;
+    ierr = MatAssemblyBegin (matrix,MAT_FINAL_ASSEMBLY);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    ierr = MatAssemblyEnd (matrix,MAT_FINAL_ASSEMBLY);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    last_action = LastAction::none;
+  }
+
+
+
+  unsigned int
+  MatrixBase::m () const
+  {
+#ifdef PETSC_USE_64BIT_INDICES
+    PetscInt
+#else
+    int
+#endif
+      n_rows, n_cols;
+    int ierr = MatGetSize (matrix, &n_rows, &n_cols);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return n_rows;
+  }
+
+
+
+  unsigned int
+  MatrixBase::n () const
+  {
+#ifdef PETSC_USE_64BIT_INDICES
+    PetscInt
+#else
+    int
+#endif
+      n_rows, n_cols;
+    int ierr = MatGetSize (matrix, &n_rows, &n_cols);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return n_cols;
+  }
+
+
+
+  unsigned int
+  MatrixBase::local_size () const
+  {
+#ifdef PETSC_USE_64BIT_INDICES
+    PetscInt
+#else
+    int
+#endif
+      n_rows, n_cols;
+    int ierr = MatGetLocalSize (matrix, &n_rows, &n_cols);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return n_rows;
+  }
+
+
+
+  std::pair<unsigned int, unsigned int>
+  MatrixBase::local_range () const
+  {
+#ifdef PETSC_USE_64BIT_INDICES
+    PetscInt
+#else
+    int
+#endif
+      begin, end;
+    const int ierr = MatGetOwnershipRange (static_cast<const Mat &>(matrix),
+                                           &begin, &end);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return std::make_pair (begin, end);
+  }
+
+
+
+  unsigned int
+  MatrixBase::n_nonzero_elements () const
+  {
+    MatInfo mat_info;
+    const int ierr
+      = MatGetInfo (matrix, MAT_GLOBAL_SUM, &mat_info);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return static_cast<unsigned int>(mat_info.nz_used);
+  }
+
+
+
+  unsigned int
+  MatrixBase::
+  row_length (const unsigned int row) const
+  {
+//TODO: this function will probably only work if compress() was called on the
+//matrix previously. however, we can't do this here, since it would impose
+//global communication and one would have to make sure that this function is
+//called the same number of times from all processors, something that is
+//unreasonable. there should simply be a way in PETSc to query the number of
+//entries in a row bypassing the call to compress(), but I can't find one
+    Assert (row < m(), ExcInternalError());
+
+                                     // get a representation of the present
+                                     // row
+    PetscInt ncols;
+    const PetscInt    *colnums;
+    const PetscScalar *values;
+
+//TODO: this is probably horribly inefficient; we should lobby for a way to
+//query this information from PETSc
+    int ierr;
+    ierr = MatGetRow(*this, row, &ncols, &colnums, &values);
+    AssertThrow (ierr == 0, MatrixBase::ExcPETScError(ierr));
+
+                                     // then restore the matrix and return the
+                                     // number of columns in this row as
+                                     // queried previously
+    ierr = MatRestoreRow(*this, row, &ncols, &colnums, &values);
+    AssertThrow (ierr == 0, MatrixBase::ExcPETScError(ierr));
+
+    return ncols;
+  }
+
+
+  PetscReal
+  MatrixBase::l1_norm () const
+  {
+    PetscReal result;
+
+    const int ierr
+      = MatNorm (matrix, NORM_1, &result);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return result;
+  }
+
+
+
+  PetscReal
+  MatrixBase::linfty_norm () const
+  {
+    PetscReal result;
+
+    const int ierr
+      = MatNorm (matrix, NORM_INFINITY, &result);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return result;
+  }
+
+
+
+  PetscReal
+  MatrixBase::frobenius_norm () const
+  {
+    PetscReal result;
+
+    const int ierr
+      = MatNorm (matrix, NORM_FROBENIUS, &result);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return result;
+  }
+
+
+  PetscScalar
+  MatrixBase::matrix_norm_square (const VectorBase &v) const
+  {
+    Vector tmp(v.size());
+    vmult (tmp, v);
+    return tmp*v;
+  }
+
+  
+  PetscScalar
+  MatrixBase::matrix_scalar_product (const VectorBase &u,
+				     const VectorBase &v) const
+  {
+    Vector tmp(v.size());
+    vmult (tmp, v);
+    return u*tmp;
+  }
+
+
+#if DEAL_II_PETSC_VERSION_GTE(3,1,0)
+  PetscReal
+  MatrixBase::trace () const
+  {
+    PetscReal result;
+
+    const int ierr
+      = MatGetTrace (matrix, &result);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return result;
+  }
+#endif
+
+
+
+  MatrixBase &
+  MatrixBase::operator *= (const PetscScalar a)
+  {
+    const int ierr = MatScale (matrix, a);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+
+
+  MatrixBase &
+  MatrixBase::operator /= (const PetscScalar a)
+  {
+    const PetscScalar factor = 1./a;
+    const int ierr = MatScale (matrix, factor);
+
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+    return *this;
+  }
+
+
+  void
+  MatrixBase::vmult (VectorBase       &dst,
+                     const VectorBase &src) const
+  {
+    Assert (&src != &dst, ExcSourceEqualsDestination());
+
+    const int ierr = MatMult (matrix, src, dst);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  void
+  MatrixBase::Tvmult (VectorBase       &dst,
+                      const VectorBase &src) const
+  {
+    Assert (&src != &dst, ExcSourceEqualsDestination());
+
+    const int ierr = MatMultTranspose (matrix, src, dst);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  void
+  MatrixBase::vmult_add (VectorBase       &dst,
+                         const VectorBase &src) const
+  {
+    Assert (&src != &dst, ExcSourceEqualsDestination());
+
+    const int ierr = MatMultAdd (matrix, src, dst, dst);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+
+  void
+  MatrixBase::Tvmult_add (VectorBase       &dst,
+                          const VectorBase &src) const
+  {
+    Assert (&src != &dst, ExcSourceEqualsDestination());
+
+    const int ierr = MatMultTransposeAdd (matrix, src, dst, dst);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+
+  PetscScalar
+  MatrixBase::residual (VectorBase       &dst,
+                        const VectorBase &x,
+                        const VectorBase &b) const
+  {
+                                     // avoid the use of a temporary, and
+                                     // rather do one negation pass more than
+                                     // necessary
+    vmult (dst, x);
+    dst -= b;
+    dst *= -1;
+
+    return dst.l2_norm();
+  }
+
+
+
+  MatrixBase::operator Mat () const
+  {
+    return matrix;
+  }
+
+  void
+  MatrixBase::transpose ()
+  {
+    int ierr = MatTranspose(matrix, MAT_REUSE_MATRIX, &matrix);
+    AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+  PetscTruth
+#else
+  PetscBool
+#endif
+  MatrixBase::is_symmetric (const double tolerance)
+  {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    PetscTruth
+#else
+      PetscBool
+#endif
+      truth;
+                                       // First flush PETSc caches
+    compress ();
+    MatIsSymmetric (matrix, tolerance, &truth);
+    return truth;
+  }
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+  PetscTruth
+#else
+  PetscBool
+#endif
+  MatrixBase::is_hermitian (const double tolerance)
+  {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+    PetscTruth
+#else
+      PetscBool
+#endif
+      truth;
+
+                                     // First flush PETSc caches
+    compress ();
+    MatIsHermitian (matrix, tolerance, &truth);
+
+    return truth;
+  }
+
+  void
+  MatrixBase::write_ascii ()
+  {
+                                       // First flush PETSc caches
+    compress ();
+
+                                       // Set options
+    PetscViewerSetFormat (PETSC_VIEWER_STDOUT_WORLD,
+                          PETSC_VIEWER_DEFAULT);
+
+                                       // Write to screen
+    MatView (matrix,PETSC_VIEWER_STDOUT_WORLD);
+  }
+
+
+
+  std::size_t
+  MatrixBase::memory_consumption() const
+  {
+    MatInfo info;
+    MatGetInfo(matrix, MAT_LOCAL, &info);
+
+    return sizeof(*this) + static_cast<unsigned int>(info.memory);
+  }
+
+
+  void MatrixBase::copy_from(const MatrixBase &source)	// added by shuqiangwang
+  {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+	  int ierr = MatDestroy (matrix);
+#else
+	  int ierr = MatDestroy (&matrix);
+#endif
+	  AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+	  ierr = MatDuplicate(source.matrix,MAT_COPY_VALUES,&(this->matrix));
+	  AssertThrow (ierr == 0, ExcPETScError(ierr));
+	  this->last_action = source.last_action;
+	  this->column_indices = source.column_indices;
+	  this->column_values = source.column_values;
+  }
+
+  void MatrixBase::add(double factor, const MatrixBase &source)
+  {
+	  const int ierr = MatAXPY(this->matrix,factor,source.matrix,DIFFERENT_NONZERO_PATTERN);
+	  AssertThrow (ierr == 0, ExcPETScError(ierr));
+  }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_USE_PETSC

Added: branches/s-wang/for_deal.II/source/lac/trilinos_sparse_matrix.cc
===================================================================
--- branches/s-wang/for_deal.II/source/lac/trilinos_sparse_matrix.cc	                        (rev 0)
+++ branches/s-wang/for_deal.II/source/lac/trilinos_sparse_matrix.cc	2012-11-06 20:23:43 UTC (rev 1346)
@@ -0,0 +1,1518 @@
+//---------------------------------------------------------------------------
+//    $Id: trilinos_sparse_matrix.cc 25809 2012-08-09 13:41:07Z heister $
+//    Version: $Name$
+//
+//    Copyright (C) 2008, 2009, 2010, 2011, 2012 by the deal.II authors
+//
+//    This file is subject to QPL and may not be  distributed
+//    without copyright and license information. Please refer
+//    to the file deal.II/doc/license.html for the  text  and
+//    further information on this license.
+//
+//---------------------------------------------------------------------------
+
+
+#include <deal.II/lac/trilinos_sparse_matrix.h>
+
+#ifdef DEAL_II_USE_TRILINOS
+
+#  include <deal.II/base/utilities.h>
+#  include <deal.II/lac/sparse_matrix.h>
+#  include <deal.II/lac/trilinos_sparsity_pattern.h>
+#  include <deal.II/lac/sparsity_pattern.h>
+#  include <deal.II/lac/compressed_sparsity_pattern.h>
+#  include <deal.II/lac/compressed_set_sparsity_pattern.h>
+#  include <deal.II/lac/compressed_simple_sparsity_pattern.h>
+
+#  include <ml_epetra_utils.h>
+#  include <ml_struct.h>
+#  include <Teuchos_RCP.hpp>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TrilinosWrappers
+{
+  namespace MatrixIterators
+  {
+    void
+    SparseMatrix::const_iterator::Accessor::
+    visit_present_row ()
+    {
+                                  // if we are asked to visit the
+                                  // past-the-end line, then simply
+                                  // release all our caches and go on
+                                  // with life
+      if (this->a_row == matrix->m())
+        {
+          colnum_cache.reset ();
+          value_cache.reset ();
+
+          return;
+        }
+
+                                  // otherwise first flush Trilinos caches
+      matrix->compress ();
+
+                                  // get a representation of the present
+                                  // row
+      int ncols;
+      int colnums = matrix->n();
+      if (value_cache.get() == 0)
+        {
+          value_cache.reset (new std::vector<TrilinosScalar> (matrix->n()));
+          colnum_cache.reset (new std::vector<unsigned int> (matrix->n()));
+        }
+      else
+        {
+          value_cache->resize (matrix->n());
+          colnum_cache->resize (matrix->n());
+        }
+
+      int ierr = matrix->trilinos_matrix().
+        ExtractGlobalRowCopy((int)this->a_row,
+                             colnums,
+                             ncols, &((*value_cache)[0]),
+                             reinterpret_cast<int*>(&((*colnum_cache)[0])));
+      value_cache->resize (ncols);
+      colnum_cache->resize (ncols);
+      AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+                                  // copy it into our caches if the
+                                  // line isn't empty. if it is, then
+                                  // we've done something wrong, since
+                                  // we shouldn't have initialized an
+                                  // iterator for an empty line (what
+                                  // would it point to?)
+    }
+  }
+
+
+                                  // The constructor is actually the
+                                  // only point where we have to check
+                                  // whether we build a serial or a
+                                  // parallel Trilinos matrix.
+                                  // Actually, it does not even matter
+                                  // how many threads there are, but
+                                  // only if we use an MPI compiler or
+                                  // a standard compiler. So, even one
+                                  // thread on a configuration with
+                                  // MPI will still get a parallel
+                                  // interface.
+  SparseMatrix::SparseMatrix ()
+                  :
+                  column_space_map (new Epetra_Map (0, 0,
+                                                     Utilities::Trilinos::comm_self())),
+                  matrix (new Epetra_FECrsMatrix(View, *column_space_map,
+                                                  *column_space_map, 0)),
+                  last_action (Zero),
+                  compressed (true)
+  {
+    matrix->FillComplete();
+  }
+
+
+
+  SparseMatrix::SparseMatrix (const Epetra_Map  &input_map,
+                              const unsigned int n_max_entries_per_row)
+                  :
+                  column_space_map (new Epetra_Map (input_map)),
+                  matrix (new Epetra_FECrsMatrix(Copy, *column_space_map,
+                                                  int(n_max_entries_per_row), false)),
+                  last_action (Zero),
+                  compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const Epetra_Map                &input_map,
+                              const std::vector<unsigned int> &n_entries_per_row)
+                  :
+                  column_space_map (new Epetra_Map (input_map)),
+                  matrix (new Epetra_FECrsMatrix
+                          (Copy, *column_space_map,
+                           (int*)const_cast<unsigned int*>(&(n_entries_per_row[0])),
+                           false)),
+                  last_action (Zero),
+                  compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const Epetra_Map  &input_row_map,
+                              const Epetra_Map  &input_col_map,
+                              const unsigned int n_max_entries_per_row)
+                  :
+                  column_space_map (new Epetra_Map (input_col_map)),
+                  matrix (new Epetra_FECrsMatrix(Copy, input_row_map,
+                                                 int(n_max_entries_per_row), false)),
+                  last_action (Zero),
+                  compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const Epetra_Map                &input_row_map,
+                              const Epetra_Map                &input_col_map,
+                              const std::vector<unsigned int> &n_entries_per_row)
+                  :
+                  column_space_map (new Epetra_Map (input_col_map)),
+                  matrix (new Epetra_FECrsMatrix(Copy, input_row_map,
+                      (int*)const_cast<unsigned int*>(&(n_entries_per_row[0])),
+                                                 false)),
+                  last_action (Zero),
+                  compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const unsigned int m,
+                              const unsigned int n,
+                              const unsigned int n_max_entries_per_row)
+                  :
+                  column_space_map (new Epetra_Map (n, 0,
+                                                    Utilities::Trilinos::comm_self())),
+
+                                   // on one processor only, we know how the
+                                   // columns of the matrix will be
+                                   // distributed (everything on one
+                                   // processor), so we can hand in this
+                                   // information to the constructor. we
+                                   // can't do so in parallel, where the
+                                   // information from columns is only
+                                   // available when entries have been added
+                  matrix (new Epetra_FECrsMatrix(Copy,
+                                                 Epetra_Map (m, 0,
+                                                             Utilities::Trilinos::comm_self()),
+                                                 *column_space_map,
+                                                 n_max_entries_per_row,
+                                                 false)),
+                  last_action (Zero),
+                  compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const unsigned int               m,
+                              const unsigned int               n,
+                              const std::vector<unsigned int> &n_entries_per_row)
+                  :
+                  column_space_map (new Epetra_Map (n, 0,
+                                                    Utilities::Trilinos::comm_self())),
+                  matrix (new Epetra_FECrsMatrix(Copy,
+                                                 Epetra_Map (m, 0,
+                                                             Utilities::Trilinos::comm_self()),
+                                                 *column_space_map,
+                           (int*)const_cast<unsigned int*>(&(n_entries_per_row[0])),
+                                                 false)),
+                  last_action (Zero),
+                  compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const IndexSet     &parallel_partitioning,
+                              const MPI_Comm     &communicator,
+                              const unsigned int n_max_entries_per_row)
+                  :
+                  column_space_map (new Epetra_Map(parallel_partitioning.
+                                                   make_trilinos_map(communicator, false))),
+                  matrix (new Epetra_FECrsMatrix(Copy,
+                                                 *column_space_map,
+                                                 n_max_entries_per_row,
+                                                 false)),
+                  last_action (Zero),
+                  compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const IndexSet     &parallel_partitioning,
+                              const MPI_Comm     &communicator,
+                              const std::vector<unsigned int> &n_entries_per_row)
+                  :
+                  column_space_map (new Epetra_Map(parallel_partitioning.
+                                                   make_trilinos_map(communicator, false))),
+                  matrix (new Epetra_FECrsMatrix(Copy,
+                                                 *column_space_map,
+                           (int*)const_cast<unsigned int*>(&(n_entries_per_row[0])),
+                                                 false)),
+                  last_action (Zero),
+                  compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const IndexSet     &row_parallel_partitioning,
+                              const IndexSet     &col_parallel_partitioning,
+                              const MPI_Comm     &communicator,
+                              const unsigned int n_max_entries_per_row)
+                  :
+                  column_space_map (new Epetra_Map(col_parallel_partitioning.
+                                                   make_trilinos_map(communicator, false))),
+                  matrix (new Epetra_FECrsMatrix(Copy,
+                                                 row_parallel_partitioning.
+                                                 make_trilinos_map(communicator, false),
+                                                 n_max_entries_per_row,
+                                                 false)),
+                  last_action (Zero),
+                  compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const IndexSet     &row_parallel_partitioning,
+                              const IndexSet     &col_parallel_partitioning,
+                              const MPI_Comm     &communicator,
+                              const std::vector<unsigned int> &n_entries_per_row)
+                  :
+                  column_space_map (new Epetra_Map(col_parallel_partitioning.
+                                                   make_trilinos_map(communicator, false))),
+                  matrix (new Epetra_FECrsMatrix(Copy,
+                                                 row_parallel_partitioning.
+                                                 make_trilinos_map(communicator, false),
+                           (int*)const_cast<unsigned int*>(&(n_entries_per_row[0])),
+                                                 false)),
+                  last_action (Zero),
+                  compressed (false)
+  {}
+
+
+
+  SparseMatrix::SparseMatrix (const SparsityPattern &sparsity_pattern)
+                  :
+                  column_space_map (new Epetra_Map (sparsity_pattern.domain_partitioner())),
+                  matrix (new Epetra_FECrsMatrix(Copy,
+                                                 sparsity_pattern.trilinos_sparsity_pattern(),
+                                                 false)),
+                  last_action (Zero),
+                  compressed (true)
+  {
+    Assert(sparsity_pattern.trilinos_sparsity_pattern().Filled() == true,
+           ExcMessage("The Trilinos sparsity pattern has not been compressed."));
+    compress();
+  }
+
+
+
+  SparseMatrix::SparseMatrix (const SparseMatrix &input_matrix)
+                  :
+                  Subscriptor(),
+                  column_space_map (new Epetra_Map (input_matrix.domain_partitioner())),
+                  matrix (new Epetra_FECrsMatrix(*input_matrix.matrix)),
+                  last_action (Zero),
+                  compressed (true)
+  {}
+
+
+
+  SparseMatrix::~SparseMatrix ()
+  {}
+
+
+
+  void
+  SparseMatrix::copy_from (const SparseMatrix &m)
+  {
+
+                                   // check whether we need to update the
+                                   // partitioner or can just copy the data:
+                                   // in case we have the same distribution,
+                                   // we can just copy the data.
+    if (local_range() == m.local_range())
+      *matrix = *m.matrix;
+    else
+      {
+        column_space_map.reset (new Epetra_Map (m.domain_partitioner()));
+
+                                // release memory before reallocation
+        matrix.reset ();
+        temp_vector.clear ();
+        matrix.reset (new Epetra_FECrsMatrix(*m.matrix));
+      }
+
+    compress();
+  }
+
+
+
+  template <typename SparsityType>
+  void
+  SparseMatrix::reinit (const SparsityType &sparsity_pattern)
+  {
+    const Epetra_Map rows (sparsity_pattern.n_rows(),
+                           0,
+                           Utilities::Trilinos::comm_self());
+    const Epetra_Map columns (sparsity_pattern.n_cols(),
+                              0,
+                              Utilities::Trilinos::comm_self());
+
+    reinit (rows, columns, sparsity_pattern);
+  }
+
+
+
+  template <typename SparsityType>
+  void
+  SparseMatrix::reinit (const Epetra_Map    &input_map,
+                        const SparsityType  &sparsity_pattern,
+                        const bool           exchange_data)
+  {
+    reinit (input_map, input_map, sparsity_pattern, exchange_data);
+  }
+
+
+
+  template <typename SparsityType>
+  void
+  SparseMatrix::reinit (const Epetra_Map    &input_row_map,
+                        const Epetra_Map    &input_col_map,
+                        const SparsityType  &sparsity_pattern,
+                        const bool           exchange_data)
+  {
+                                // release memory before reallocation
+    temp_vector.clear();
+    matrix.reset();
+
+                                // if we want to exchange data, build
+                                // a usual Trilinos sparsity pattern
+                                // and let that handle the
+                                // exchange. otherwise, manually
+                                // create a CrsGraph, which consumes
+                                // considerably less memory because it
+                                // can set correct number of indices
+                                // right from the start
+    if (exchange_data)
+      {
+        SparsityPattern trilinos_sparsity;
+        trilinos_sparsity.reinit (input_row_map, input_col_map,
+                                  sparsity_pattern, exchange_data);
+        reinit (trilinos_sparsity);
+
+        return;
+      }
+
+    Assert (exchange_data == false, ExcNotImplemented());
+    if (input_row_map.Comm().MyPID() == 0)
+      {
+        AssertDimension (sparsity_pattern.n_rows(),
+                         static_cast<unsigned int>(input_row_map.NumGlobalElements()));
+        AssertDimension (sparsity_pattern.n_cols(),
+                         static_cast<unsigned int>(input_col_map.NumGlobalElements()));
+      }
+
+    column_space_map.reset (new Epetra_Map (input_col_map));
+
+    const unsigned int first_row = input_row_map.MinMyGID(),
+      last_row = input_row_map.MaxMyGID()+1;
+    std::vector<int> n_entries_per_row(last_row-first_row);
+
+    for (unsigned int row=first_row; row<last_row; ++row)
+      n_entries_per_row[row-first_row] = sparsity_pattern.row_length(row);
+
+                                  // The deal.II notation of a Sparsity
+                                  // pattern corresponds to the Epetra
+                                  // concept of a Graph. Hence, we generate
+                                  // a graph by copying the sparsity pattern
+                                  // into it, and then build up the matrix
+                                  // from the graph. This is considerable
+                                  // faster than directly filling elements
+                                  // into the matrix. Moreover, it consumes
+                                  // less memory, since the internal
+                                  // reordering is done on ints only, and we
+                                  // can leave the doubles aside.
+
+                                   // for more than one processor, need to
+                                   // specify only row map first and let the
+                                   // matrix entries decide about the column
+                                   // map (which says which columns are
+                                   // present in the matrix, not to be
+                                   // confused with the col_map that tells
+                                   // how the domain dofs of the matrix will
+                                   // be distributed). for only one
+                                   // processor, we can directly assign the
+                                   // columns as well. Compare this with bug
+                                   // # 4123 in the Sandia Bugzilla.
+    std_cxx1x::shared_ptr<Epetra_CrsGraph> graph;
+    if (input_row_map.Comm().NumProc() > 1)
+      graph.reset (new Epetra_CrsGraph (Copy, input_row_map,
+                                        &n_entries_per_row[0], true));
+    else
+      graph.reset (new Epetra_CrsGraph (Copy, input_row_map, input_col_map,
+                                        &n_entries_per_row[0], true));
+
+                                  // This functions assumes that the
+                                  // sparsity pattern sits on all processors
+                                  // (completely). The parallel version uses
+                                  // an Epetra graph that is already
+                                  // distributed.
+
+                                  // now insert the indices
+    std::vector<int>   row_indices;
+
+    for (unsigned int row=first_row; row<last_row; ++row)
+      {
+        const int row_length = sparsity_pattern.row_length(row);
+        if (row_length == 0)
+          continue;
+
+        row_indices.resize (row_length, -1);
+
+        typename SparsityType::row_iterator col_num = sparsity_pattern.row_begin (row),
+          row_end = sparsity_pattern.row_end(row);
+        for (unsigned int col = 0; col_num != row_end; ++col_num, ++col)
+          row_indices[col] = *col_num;
+
+        graph->Epetra_CrsGraph::InsertGlobalIndices (row, row_length,
+                                                     &row_indices[0]);
+      }
+
+                                  // Eventually, optimize the graph
+                                  // structure (sort indices, make memory
+                                  // contiguous, etc).
+    graph->FillComplete(input_col_map, input_row_map);
+    graph->OptimizeStorage();
+
+                                   // check whether we got the number of
+                                   // columns right.
+    AssertDimension (sparsity_pattern.n_cols(),
+                     static_cast<unsigned int>(graph->NumGlobalCols()));
+
+                                  // And now finally generate the matrix.
+    matrix.reset (new Epetra_FECrsMatrix(Copy, *graph, false));
+    last_action = Zero;
+
+                                  // In the end, the matrix needs to
+                                  // be compressed in order to be
+                                  // really ready.
+    compress();
+  }
+
+
+
+  void
+  SparseMatrix::reinit (const SparsityPattern &sparsity_pattern)
+  {
+    temp_vector.clear ();
+    matrix.reset ();
+
+                                   // reinit with a (parallel) Trilinos
+                                   // sparsity pattern.
+    column_space_map.reset (new Epetra_Map
+                            (sparsity_pattern.domain_partitioner()));
+    matrix.reset (new Epetra_FECrsMatrix
+                  (Copy, sparsity_pattern.trilinos_sparsity_pattern(), false));
+    compress();
+  }
+
+
+
+  void
+  SparseMatrix::reinit (const SparseMatrix &sparse_matrix)
+  {
+    column_space_map.reset (new Epetra_Map (sparse_matrix.domain_partitioner()));
+    temp_vector.clear ();
+    matrix.reset ();
+    matrix.reset (new Epetra_FECrsMatrix
+                  (Copy, sparse_matrix.trilinos_sparsity_pattern(), false));
+
+    compress();
+  }
+
+
+
+  template <typename number>
+  void
+  SparseMatrix::reinit (const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+                        const double                          drop_tolerance,
+                        const bool                            copy_values,
+                        const ::dealii::SparsityPattern      *use_this_sparsity)
+  {
+    const Epetra_Map rows (dealii_sparse_matrix.m(),
+                           0,
+                           Utilities::Trilinos::comm_self());
+    const Epetra_Map columns (dealii_sparse_matrix.n(),
+                              0,
+                              Utilities::Trilinos::comm_self());
+    reinit (rows, columns, dealii_sparse_matrix, drop_tolerance,
+            copy_values, use_this_sparsity);
+  }
+
+
+
+  template <typename number>
+  void
+  SparseMatrix::reinit (const Epetra_Map                     &input_map,
+                        const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+                        const double                          drop_tolerance,
+                        const bool                            copy_values,
+                        const ::dealii::SparsityPattern      *use_this_sparsity)
+  {
+    reinit (input_map, input_map, dealii_sparse_matrix, drop_tolerance,
+            copy_values, use_this_sparsity);
+  }
+
+
+
+  template <typename number>
+  void
+  SparseMatrix::reinit (const Epetra_Map                     &input_row_map,
+                        const Epetra_Map                     &input_col_map,
+                        const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+                        const double                          drop_tolerance,
+                        const bool                            copy_values,
+                        const ::dealii::SparsityPattern      *use_this_sparsity)
+  {
+    if (copy_values == false)
+      {
+                                   // in case we do not copy values, just
+                                   // call the other function.
+        if (use_this_sparsity == 0)
+          reinit (input_row_map, input_col_map,
+                  dealii_sparse_matrix.get_sparsity_pattern());
+        else
+          reinit (input_row_map, input_col_map,
+                  *use_this_sparsity);
+        return;
+      }
+
+    unsigned int n_rows = dealii_sparse_matrix.m();
+
+    Assert (input_row_map.NumGlobalElements() == (int)n_rows,
+            ExcDimensionMismatch (input_row_map.NumGlobalElements(),
+                                  n_rows));
+    Assert (input_col_map.NumGlobalElements() == (int)dealii_sparse_matrix.n(),
+            ExcDimensionMismatch (input_col_map.NumGlobalElements(),
+                                  dealii_sparse_matrix.n()));
+
+    const ::dealii::SparsityPattern & sparsity_pattern =
+      (use_this_sparsity!=0)? *use_this_sparsity :
+      dealii_sparse_matrix.get_sparsity_pattern();
+
+    if (matrix.get() != 0 && m() == n_rows &&
+        n_nonzero_elements() == sparsity_pattern.n_nonzero_elements())
+      goto set_matrix_values;
+
+    {
+      SparsityPattern trilinos_sparsity;
+      trilinos_sparsity.reinit (input_row_map, input_col_map, sparsity_pattern);
+      reinit (trilinos_sparsity);
+    }
+
+  set_matrix_values:
+                                // fill the values. the same as above: go
+                                // through all rows of the matrix, and then
+                                // all columns. since the sparsity patterns of
+                                // the input matrix and the specified sparsity
+                                // pattern might be different, need to go
+                                // through the row for both these sparsity
+                                // structures simultaneously in order to
+                                // really set the correct values.
+    const std::size_t * const in_rowstart_indices
+      = dealii_sparse_matrix.get_sparsity_pattern().get_rowstart_indices();
+    const unsigned int * const in_cols
+      = dealii_sparse_matrix.get_sparsity_pattern().get_column_numbers();
+    const unsigned int * cols = sparsity_pattern.get_column_numbers();
+    const std::size_t * rowstart_indices =
+      sparsity_pattern.get_rowstart_indices();
+
+    unsigned int maximum_row_length = matrix->MaxNumEntries();
+    std::vector<unsigned int> row_indices (maximum_row_length);
+    std::vector<TrilinosScalar> values (maximum_row_length);
+    std::size_t in_index, index;
+
+    for (unsigned int row=0; row<n_rows; ++row)
+      if (input_row_map.MyGID(row))
+        {
+          index = rowstart_indices[row];
+          in_index = in_rowstart_indices[row];
+          unsigned int col = 0;
+          if (sparsity_pattern.optimize_diagonal())
+            {
+              values[col] = dealii_sparse_matrix.global_entry(in_index);
+              row_indices[col++] = row;
+              ++index;
+              ++in_index;
+            }
+
+          while (in_index < in_rowstart_indices[row+1] &&
+                 index < rowstart_indices[row+1])
+            {
+              while (cols[index] < in_cols[in_index] && index < rowstart_indices[row+1])
+                ++index;
+              while (in_cols[in_index] < cols[index] && in_index < in_rowstart_indices[row+1])
+                ++in_index;
+
+              if (std::fabs(dealii_sparse_matrix.global_entry(in_index)) > drop_tolerance)
+                {
+                  values[col] = dealii_sparse_matrix.global_entry(in_index);
+                  row_indices[col++] = in_cols[in_index];
+                }
+              ++index;
+              ++in_index;
+            }
+          set (row, col, reinterpret_cast<unsigned int*>(&row_indices[0]),
+               &values[0], false);
+        }
+
+    compress();
+  }
+
+
+
+  void
+  SparseMatrix::reinit (const Epetra_CrsMatrix &input_matrix,
+                        const bool              copy_values)
+  {
+    Assert (input_matrix.Filled()==true,
+            ExcMessage("Input CrsMatrix has not called FillComplete()!"));
+
+    column_space_map.reset (new Epetra_Map (input_matrix.DomainMap()));
+
+    const Epetra_CrsGraph *graph = &input_matrix.Graph();
+
+    temp_vector.clear ();
+    matrix.reset ();
+    matrix.reset (new Epetra_FECrsMatrix(Copy, *graph, false));
+
+    matrix->FillComplete (*column_space_map, input_matrix.RangeMap(), true);
+
+    if (copy_values == true)
+      {
+                                // point to the first data entry in the two
+                                // matrices and copy the content
+        const TrilinosScalar * in_values = input_matrix[0];
+        TrilinosScalar * values = (*matrix)[0];
+        const unsigned int my_nonzeros = input_matrix.NumMyNonzeros();
+        std::memcpy (&values[0], &in_values[0],
+                     my_nonzeros*sizeof (TrilinosScalar));
+      }
+
+    compress();
+  }
+
+
+
+  void
+  SparseMatrix::clear ()
+  {
+                                  // When we clear the matrix, reset
+                                  // the pointer and generate an
+                                  // empty matrix.
+    column_space_map.reset (new Epetra_Map (0, 0,
+                                            Utilities::Trilinos::comm_self()));
+    temp_vector.clear();
+    matrix.reset (new Epetra_FECrsMatrix(View, *column_space_map, 0));
+
+    matrix->FillComplete();
+
+    compressed = true;
+  }
+
+
+
+  void
+  SparseMatrix::clear_row (const unsigned int   row,
+                           const TrilinosScalar new_diag_value)
+  {
+    Assert (matrix->Filled()==true, ExcMatrixNotCompressed());
+
+                                  // Only do this on the rows owned
+                                  // locally on this processor.
+    int local_row = matrix->LRID(row);
+    if (local_row >= 0)
+      {
+        TrilinosScalar *values;
+        int *col_indices;
+        int num_entries;
+        const int ierr = matrix->ExtractMyRowView(local_row, num_entries,
+                                                  values, col_indices);
+
+        Assert (ierr == 0,
+                ExcTrilinosError(ierr));
+
+        int* diag_find = std::find(col_indices,col_indices+num_entries,
+                                   local_row);
+        int diag_index = (int)(diag_find - col_indices);
+
+        for (int j=0; j<num_entries; ++j)
+          if (diag_index != j || new_diag_value == 0)
+            values[j] = 0.;
+
+        if (diag_find && std::fabs(values[diag_index]) == 0.0 &&
+            new_diag_value != 0.0)
+          values[diag_index] = new_diag_value;
+      }
+  }
+
+
+
+  void
+  SparseMatrix::clear_rows (const std::vector<unsigned int> &rows,
+                            const TrilinosScalar             new_diag_value)
+  {
+    compress();
+    for (unsigned int row=0; row<rows.size(); ++row)
+      clear_row(rows[row], new_diag_value);
+
+                                        // This function needs to be called
+                                        // on all processors. We change some
+                                        // data, so we need to flush the
+                                        // buffers to make sure that the
+                                        // right data is used.
+    compress();
+  }
+
+
+
+  TrilinosScalar
+  SparseMatrix::operator() (const unsigned int i,
+                            const unsigned int j) const
+  {
+                                      // Extract local indices in
+                                      // the matrix.
+    int trilinos_i = matrix->LRID(i), trilinos_j = matrix->LCID(j);
+    TrilinosScalar value = 0.;
+
+                                      // If the data is not on the
+                                      // present processor, we throw
+                                      // an exception. This is one of
+                                      // the two tiny differences to
+                                      // the el(i,j) call, which does
+                                      // not throw any assertions.
+    if (trilinos_i == -1)
+      {
+        Assert (false, ExcAccessToNonLocalElement(i, j, local_range().first,
+                                                  local_range().second));
+      }
+    else
+      {
+                                      // Check whether the matrix has
+                                      // already been transformed to local
+                                      // indices.
+        Assert (matrix->Filled(), ExcMatrixNotCompressed());
+
+                                      // Prepare pointers for extraction
+                                      // of a view of the row.
+        int nnz_present = matrix->NumMyEntries(trilinos_i);
+        int nnz_extracted;
+        int *col_indices;
+        TrilinosScalar *values;
+
+                                      // Generate the view and make
+                                      // sure that we have not generated
+                                      // an error.
+        int ierr = matrix->ExtractMyRowView(trilinos_i, nnz_extracted,
+                                            values, col_indices);
+        Assert (ierr==0, ExcTrilinosError(ierr));
+
+        Assert (nnz_present == nnz_extracted,
+                ExcDimensionMismatch(nnz_present, nnz_extracted));
+
+                                      // Search the index where we
+                                      // look for the value, and then
+                                      // finally get it.
+
+        int* el_find = std::find(col_indices, col_indices + nnz_present,
+                                 trilinos_j);
+
+        int local_col_index = (int)(el_find - col_indices);
+
+                                        // This is actually the only
+                                        // difference to the el(i,j)
+                                        // function, which means that
+                                        // we throw an exception in
+                                        // this case instead of just
+                                        // returning zero for an
+                                        // element that is not present
+                                        // in the sparsity pattern.
+        if (local_col_index == nnz_present)
+          {
+            Assert (false, ExcInvalidIndex (i,j));
+          }
+        else
+          value = values[local_col_index];
+      }
+
+    return value;
+  }
+
+
+
+  TrilinosScalar
+  SparseMatrix::el (const unsigned int i,
+                    const unsigned int j) const
+  {
+                                      // Extract local indices in
+                                      // the matrix.
+    int trilinos_i = matrix->LRID(i), trilinos_j = matrix->LCID(j);
+    TrilinosScalar value = 0.;
+
+                                      // If the data is not on the
+                                      // present processor, we can't
+                                      // continue. Just print out zero
+                                      // as discussed in the
+                                      // documentation of this
+                                      // function. if you want error
+                                      // checking, use operator().
+    if ((trilinos_i == -1 ) || (trilinos_j == -1))
+      return 0.;
+    else
+    {
+                                      // Check whether the matrix
+                                      // already is transformed to
+                                      // local indices.
+      Assert (matrix->Filled(), ExcMatrixNotCompressed());
+
+                                      // Prepare pointers for extraction
+                                      // of a view of the row.
+      int nnz_present = matrix->NumMyEntries(trilinos_i);
+      int nnz_extracted;
+      int *col_indices;
+      TrilinosScalar *values;
+
+                                      // Generate the view and make
+                                      // sure that we have not generated
+                                      // an error.
+      int ierr = matrix->ExtractMyRowView(trilinos_i, nnz_extracted,
+                                          values, col_indices);
+      Assert (ierr==0, ExcTrilinosError(ierr));
+
+      Assert (nnz_present == nnz_extracted,
+              ExcDimensionMismatch(nnz_present, nnz_extracted));
+
+                                      // Search the index where we
+                                      // look for the value, and then
+                                      // finally get it.
+      int* el_find = std::find(col_indices, col_indices + nnz_present,
+                               trilinos_j);
+
+      int local_col_index = (int)(el_find - col_indices);
+
+
+                                        // This is actually the only
+                                        // difference to the () function
+                                        // querying (i,j), where we throw an
+                                        // exception instead of just
+                                        // returning zero for an element
+                                        // that is not present in the
+                                        // sparsity pattern.
+      if (local_col_index == nnz_present)
+        value = 0;
+      else
+        value = values[local_col_index];
+    }
+
+    return value;
+  }
+
+
+
+  TrilinosScalar
+  SparseMatrix::diag_element (const unsigned int i) const
+  {
+    Assert (m() == n(), ExcNotQuadratic());
+    
+#ifdef DEBUG
+				     // use operator() in debug mode because
+				     // it checks if this is a valid element
+				     // (in parallel)
+    return operator()(i,i);
+#else    
+                                  // Trilinos doesn't seem to have a
+                                  // more efficient way to access the
+                                  // diagonal than by just using the
+                                  // standard el(i,j) function.
+    return el(i,i);
+#endif    
+  }
+
+
+
+  unsigned int
+  SparseMatrix::row_length (const unsigned int row) const
+  {
+    Assert (row < m(), ExcInternalError());
+
+                                  // get a representation of the
+                                  // present row
+    int ncols = -1;
+    int local_row = matrix->LRID(row);
+
+                                  // on the processor who owns this
+                                  // row, we'll have a non-negative
+                                  // value.
+    if (local_row >= 0)
+      {
+        int ierr = matrix->NumMyRowEntries (local_row, ncols);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+      }
+
+    return ncols;
+  }
+
+
+
+  namespace internals
+  {
+    void perform_mmult (const SparseMatrix &inputleft,
+                        const SparseMatrix &inputright,
+                        SparseMatrix       &result,
+                        const VectorBase   &V,
+                        const bool          transpose_left)
+    {
+      const bool use_vector = (V.size() == inputright.m() ? true : false);
+      if (transpose_left == false)
+        {
+          Assert (inputleft.n() == inputright.m(),
+                  ExcDimensionMismatch(inputleft.n(), inputright.m()));
+          Assert (inputleft.domain_partitioner().SameAs(inputright.range_partitioner()),
+                  ExcMessage ("Parallel partitioning of A and B does not fit."));
+        }
+      else
+        {
+          Assert (inputleft.m() == inputright.m(),
+                  ExcDimensionMismatch(inputleft.m(), inputright.m()));
+          Assert (inputleft.range_partitioner().SameAs(inputright.range_partitioner()),
+                  ExcMessage ("Parallel partitioning of A and B does not fit."));
+        }
+
+      result.clear();
+
+                                   // create a suitable operator B: in case
+                                   // we do not use a vector, all we need to
+                                   // do is to set the pointer. Otherwise,
+                                   // we insert the data from B, but
+                                   // multiply each row with the respective
+                                   // vector element.
+      Teuchos::RCP<Epetra_CrsMatrix> mod_B;
+      if (use_vector == false)
+        {
+          mod_B = Teuchos::rcp(const_cast<Epetra_CrsMatrix*>
+                               (&inputright.trilinos_matrix()),
+                               false);
+        }
+      else
+        {
+          mod_B = Teuchos::rcp(new Epetra_CrsMatrix
+                               (Copy, inputright.trilinos_sparsity_pattern()),
+                               true);
+          mod_B->FillComplete(inputright.domain_partitioner(),
+                              inputright.range_partitioner());
+          Assert (inputright.local_range() == V.local_range(),
+                  ExcMessage ("Parallel distribution of matrix B and vector V "
+                              "does not match."));
+
+          const int local_N = inputright.local_size();
+          for (int i=0; i<local_N; ++i)
+            {
+              int N_entries = -1;
+              double *new_data, *B_data;
+              mod_B->ExtractMyRowView (i, N_entries, new_data);
+              inputright.trilinos_matrix().ExtractMyRowView (i, N_entries, B_data);
+              double value = V.trilinos_vector()[0][i];
+              for (int j=0; j<N_entries; ++j)
+                new_data[j] = value * B_data[j];
+            }
+        }
+
+                                   // use ML built-in method for performing
+                                   // the matrix-matrix product.
+                                   // create ML operators on top of the
+                                   // Epetra matrices. if we use a
+                                   // transposed matrix, let ML know it
+      ML_Comm* comm;
+      ML_Comm_Create(&comm);
+#ifdef ML_MPI
+      const Epetra_MpiComm *epcomm = dynamic_cast<const Epetra_MpiComm*>(&(inputleft.trilinos_matrix().Comm()));
+      // Get the MPI communicator, as it may not be MPI_COMM_W0RLD, and update the ML comm object
+      if (epcomm) ML_Comm_Set_UsrComm(comm,epcomm->Comm());
+#endif
+      ML_Operator *A_ = ML_Operator_Create(comm);
+      ML_Operator *B_ = ML_Operator_Create(comm);
+      ML_Operator *C_ = ML_Operator_Create(comm);
+      SparseMatrix transposed_mat;
+
+      if (transpose_left == false)
+        ML_Operator_WrapEpetraCrsMatrix
+          (const_cast<Epetra_CrsMatrix*>(&inputleft.trilinos_matrix()),A_,
+           false);
+      else
+        {
+                                // create transposed matrix
+          SparsityPattern sparsity_transposed (inputleft.domain_partitioner(),
+                                               inputleft.range_partitioner());
+          Assert (inputleft.domain_partitioner().LinearMap() == true,
+                  ExcMessage("Matrix must be partitioned contiguously between procs."));
+          for (unsigned int i=0; i<inputleft.local_size(); ++i)
+            {
+              int num_entries, * indices;
+              inputleft.trilinos_sparsity_pattern().ExtractMyRowView(i, num_entries,
+                                                                     indices);
+              Assert (num_entries >= 0, ExcInternalError());
+              const unsigned int GID = inputleft.row_partitioner().GID(i);
+              for (int j=0; j<num_entries; ++j)
+                sparsity_transposed.add (inputleft.col_partitioner().GID(indices[j]),
+                                         GID);
+            }
+
+          sparsity_transposed.compress();
+          transposed_mat.reinit (sparsity_transposed);
+          for (unsigned int i=0; i<inputleft.local_size(); ++i)
+            {
+              int num_entries, * indices;
+              double * values;
+              inputleft.trilinos_matrix().ExtractMyRowView(i, num_entries,
+                                                           values, indices);
+              Assert (num_entries >= 0, ExcInternalError());
+              const unsigned int GID = inputleft.row_partitioner().GID(i);
+              for (int j=0; j<num_entries; ++j)
+                transposed_mat.set (inputleft.col_partitioner().GID(indices[j]),
+                                    GID, values[j]);
+            }
+          transposed_mat.compress();
+          ML_Operator_WrapEpetraCrsMatrix
+            (const_cast<Epetra_CrsMatrix*>(&transposed_mat.trilinos_matrix()),
+             A_,false);
+        }
+      ML_Operator_WrapEpetraCrsMatrix(mod_B.get(),B_,false);
+
+                                   // We implement the multiplication by
+                                   // hand in a similar way as is done in
+                                   // ml/src/Operator/ml_rap.c for a triple
+                                   // matrix product. This means that the
+                                   // code is very similar to the one found
+                                   // in ml/src/Operator/ml_rap.c
+
+                                   // import data if necessary
+      ML_Operator *Btmp, *Ctmp, *Ctmp2, *tptr;
+      ML_CommInfoOP *getrow_comm;
+      int max_per_proc;
+      int N_input_vector = B_->invec_leng;
+      getrow_comm = B_->getrow->pre_comm;
+      if ( getrow_comm != NULL)
+        for (int i = 0; i < getrow_comm->N_neighbors; i++)
+          for (int j = 0; j < getrow_comm->neighbors[i].N_send; j++)
+            AssertThrow (getrow_comm->neighbors[i].send_list[j] < N_input_vector,
+                         ExcInternalError());
+
+      ML_create_unique_col_id(N_input_vector, &(B_->getrow->loc_glob_map),
+                              getrow_comm, &max_per_proc, B_->comm);
+      B_->getrow->use_loc_glob_map = ML_YES;
+      if (A_->getrow->pre_comm != NULL)
+        ML_exchange_rows( B_, &Btmp, A_->getrow->pre_comm);
+      else Btmp = B_;
+
+                                   // perform matrix-matrix product
+      ML_matmat_mult(A_, Btmp , &Ctmp);
+
+                                   // release temporary structures we needed
+                                   // for multiplication
+      ML_free(B_->getrow->loc_glob_map);
+      B_->getrow->loc_glob_map = NULL;
+      B_->getrow->use_loc_glob_map = ML_NO;
+      if (A_->getrow->pre_comm != NULL)
+        {
+          tptr = Btmp;
+          while ( (tptr!= NULL) && (tptr->sub_matrix != B_))
+            tptr = tptr->sub_matrix;
+          if (tptr != NULL) tptr->sub_matrix = NULL;
+          ML_RECUR_CSR_MSRdata_Destroy(Btmp);
+          ML_Operator_Destroy(&Btmp);
+        }
+
+                                   // make correct data structures
+      if (A_->getrow->post_comm != NULL)
+        ML_exchange_rows(Ctmp, &Ctmp2, A_->getrow->post_comm);
+      else
+        Ctmp2 = Ctmp;
+
+      ML_back_to_csrlocal(Ctmp2, C_, max_per_proc);
+
+      ML_RECUR_CSR_MSRdata_Destroy (Ctmp);
+      ML_Operator_Destroy (&Ctmp);
+
+      if (A_->getrow->post_comm != NULL)
+        {
+          ML_RECUR_CSR_MSRdata_Destroy(Ctmp2);
+          ML_Operator_Destroy (&Ctmp2);
+        }
+
+                                   // create an Epetra matrix from the ML
+                                   // matrix that we got as a result.
+      Epetra_CrsMatrix * C_mat;
+      ML_Operator2EpetraCrsMatrix(C_, C_mat);
+      C_mat->FillComplete();
+      C_mat->OptimizeStorage();
+      result.reinit (*C_mat);
+
+                                   // destroy allocated memory
+      delete C_mat;
+      ML_Operator_Destroy (&A_);
+      ML_Operator_Destroy (&B_);
+      ML_Operator_Destroy (&C_);
+      ML_Comm_Destroy (&comm);
+    }
+  }
+
+
+  void
+  SparseMatrix::mmult (SparseMatrix       &C,
+                       const SparseMatrix &B,
+                       const VectorBase   &V) const
+  {
+    internals::perform_mmult (*this, B, C, V, false);
+  }
+
+
+
+  void
+  SparseMatrix::Tmmult (SparseMatrix       &C,
+                        const SparseMatrix &B,
+                        const VectorBase   &V) const
+  {
+    internals::perform_mmult (*this, B, C, V, true);
+  }
+
+
+
+  void
+  SparseMatrix::add (const TrilinosScalar  factor,
+                     const SparseMatrix   &rhs)
+  {
+    Assert (rhs.m() == m(), ExcDimensionMismatch (rhs.m(), m()));
+    Assert (rhs.n() == n(), ExcDimensionMismatch (rhs.n(), n()));
+
+    const std::pair<unsigned int, unsigned int>
+      local_range = rhs.local_range();
+
+    int ierr;
+
+                                   // If both matrices have been transformed
+                                   // to local index space (in Trilinos
+                                   // speak: they are filled), we're having
+                                   // matrices based on the same indices
+                                   // with the same number of nonzeros
+                                   // (actually, we'd need sparsity pattern,
+                                   // but that is too expensive to check),
+                                   // we can extract views of the column
+                                   // data on both matrices and simply
+                                   // manipulate the values that are
+                                   // addressed by the pointers.
+    if (matrix->Filled() == true &&
+        rhs.matrix->Filled() == true &&
+        this->local_range() == local_range &&
+        matrix->NumMyNonzeros() == rhs.matrix->NumMyNonzeros())
+      for (unsigned int row=local_range.first;
+           row < local_range.second; ++row)
+        {
+          Assert (matrix->NumGlobalEntries(row) ==
+                  rhs.matrix->NumGlobalEntries(row),
+                  ExcDimensionMismatch(matrix->NumGlobalEntries(row),
+                                       rhs.matrix->NumGlobalEntries(row)));
+
+          const int row_local = matrix->RowMap().LID(row);
+          int n_entries, rhs_n_entries;
+          TrilinosScalar *value_ptr, *rhs_value_ptr;
+
+                                   // In debug mode, we want to check
+                                   // whether the indices really are the
+                                   // same in the calling matrix and the
+                                   // input matrix. The reason for doing
+                                   // this only in debug mode is that both
+                                   // extracting indices and comparing
+                                   // indices is relatively slow compared to
+                                   // just working with the values.
+#ifdef DEBUG
+          int *index_ptr, *rhs_index_ptr;
+          ierr = rhs.matrix->ExtractMyRowView (row_local, rhs_n_entries,
+                                               rhs_value_ptr, rhs_index_ptr);
+          Assert (ierr == 0, ExcTrilinosError(ierr));
+
+          ierr = matrix->ExtractMyRowView (row_local, n_entries, value_ptr,
+                                           index_ptr);
+          Assert (ierr == 0, ExcTrilinosError(ierr));
+#else
+          rhs.matrix->ExtractMyRowView (row_local, rhs_n_entries,rhs_value_ptr);
+          matrix->ExtractMyRowView (row_local, n_entries, value_ptr);
+#endif
+
+          AssertThrow (n_entries == rhs_n_entries,
+                       ExcDimensionMismatch (n_entries, rhs_n_entries));
+
+          for (int i=0; i<n_entries; ++i)
+            {
+              *value_ptr++ += *rhs_value_ptr++ * factor;
+#ifdef DEBUG
+              Assert (*index_ptr++ == *rhs_index_ptr++,
+                      ExcInternalError());
+#endif
+            }
+        }
+                                   // If we have different sparsity patterns
+                                   // (expressed by a different number of
+                                   // nonzero elements), we have to be more
+                                   // careful and extract a copy of the row
+                                   // data, multiply it by the factor and
+                                   // then add it to the matrix using the
+                                   // respective add() function.
+    else
+      {
+        unsigned int max_row_length = 0;
+        for (unsigned int row=local_range.first;
+           row < local_range.second; ++row)
+            max_row_length
+              = std::max (max_row_length,
+                          static_cast<unsigned int>(rhs.matrix->NumGlobalEntries(row)));
+
+        std::vector<int>            column_indices (max_row_length);
+        std::vector<TrilinosScalar> values (max_row_length);
+
+        if (matrix->Filled() == true && rhs.matrix->Filled() == true &&
+            this->local_range() == local_range)
+          for (unsigned int row=local_range.first;
+               row < local_range.second; ++row)
+            {
+              const int row_local = matrix->RowMap().LID(row);
+              int n_entries;
+
+              ierr = rhs.matrix->ExtractMyRowCopy (row_local, max_row_length,
+                                                   n_entries,
+                                                   &values[0],
+                                                   &column_indices[0]);
+              Assert (ierr == 0, ExcTrilinosError(ierr));
+
+              for (int i=0; i<n_entries; ++i)
+                values[i] *= factor;
+
+              TrilinosScalar *value_ptr = &values[0];
+
+              ierr = matrix->SumIntoMyValues (row_local, n_entries, value_ptr,
+                                              &column_indices[0]);
+              Assert (ierr == 0, ExcTrilinosError(ierr));
+            }
+        else
+          {
+            for (unsigned int row=local_range.first;
+                 row < local_range.second; ++row)
+              {
+                int n_entries;
+                ierr = rhs.matrix->Epetra_CrsMatrix::ExtractGlobalRowCopy
+                    ((int)row, max_row_length, n_entries, &values[0], &column_indices[0]);
+                Assert (ierr == 0, ExcTrilinosError(ierr));
+
+                for (int i=0; i<n_entries; ++i)
+                  values[i] *= factor;
+
+                ierr = matrix->Epetra_CrsMatrix::SumIntoGlobalValues
+                    ((int)row, n_entries, &values[0], &column_indices[0]);
+                Assert (ierr == 0, ExcTrilinosError(ierr));
+              }
+            compress ();
+
+          }
+      }
+  }
+
+
+
+  void
+  SparseMatrix::transpose ()
+  {
+                                  // This only flips a flag that tells
+                                  // Trilinos that any vmult operation
+                                  // should be done with the
+                                  // transpose. However, the matrix
+                                  // structure is not reset.
+    int ierr;
+
+    if (!matrix->UseTranspose())
+      {
+        ierr = matrix->SetUseTranspose (true);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+      }
+    else
+      {
+        ierr = matrix->SetUseTranspose (false);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+      }
+  }
+
+
+  /**
+   * shuqiangwang: output to be the same as petsc style.
+   */
+  void
+  SparseMatrix::write_ascii ()
+  {
+//    Assert (false, ExcNotImplemented());
+	  double * values;
+	  int * indices;
+	  int num_entries;
+
+	  for (int i=0; i<matrix->NumMyRows(); ++i)
+	  {
+		  std::cout << "row " << matrix->GRID(i) << ":";
+		  matrix->ExtractMyRowView (i, num_entries, values, indices);
+		  for (int j=0; j<num_entries; ++j)
+			  std::cout << " (" << matrix->GCID(indices[j]) << ", " << values[j] << ") ";
+		  std::cout << std::endl;
+	  }
+  }
+
+
+
+                                  // As of now, no particularly neat
+                                  // ouput is generated in case of
+                                  // multiple processors.
+  void
+  SparseMatrix::print (std::ostream &out,
+                       const bool    print_detailed_trilinos_information) const
+  {
+    if (print_detailed_trilinos_information == true)
+      out << *matrix;
+    else
+      {
+        double * values;
+        int * indices;
+        int num_entries;
+
+        for (int i=0; i<matrix->NumMyRows(); ++i)
+          {
+            matrix->ExtractMyRowView (i, num_entries, values, indices);
+            for (int j=0; j<num_entries; ++j)
+              out << "(" << matrix->GRID(i) << "," << matrix->GCID(indices[j]) << ") "
+                  << values[j] << std::endl;
+          }
+      }
+
+    AssertThrow (out, ExcIO());
+  }
+
+
+
+  std::size_t
+  SparseMatrix::memory_consumption () const
+  {
+    unsigned int static_memory = sizeof(this) + sizeof (*matrix)
+      + sizeof(*matrix->Graph().DataPtr());
+    return ((sizeof(TrilinosScalar)+sizeof(int))*matrix->NumMyNonzeros() +
+            sizeof(int)*local_size() +
+            static_memory);
+  }
+
+
+
+
+  // explicit instantiations
+  //
+  template void
+  SparseMatrix::reinit (const dealii::SparsityPattern &);
+  template void
+  SparseMatrix::reinit (const CompressedSparsityPattern &);
+  template void
+  SparseMatrix::reinit (const CompressedSetSparsityPattern &);
+  template void
+  SparseMatrix::reinit (const CompressedSimpleSparsityPattern &);
+
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const dealii::SparsityPattern &,
+                        const bool);
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const CompressedSparsityPattern &,
+                        const bool);
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const CompressedSetSparsityPattern &,
+                        const bool);
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const CompressedSimpleSparsityPattern &,
+                        const bool);
+
+
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const Epetra_Map &,
+                        const dealii::SparsityPattern &,
+                        const bool);
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const Epetra_Map &,
+                        const CompressedSparsityPattern &,
+                        const bool);
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const Epetra_Map &,
+                        const CompressedSimpleSparsityPattern &,
+                        const bool);
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const Epetra_Map &,
+                        const CompressedSetSparsityPattern &,
+                        const bool);
+
+  template void
+  SparseMatrix::reinit (const dealii::SparseMatrix<float> &,
+                        const double,
+                        const bool,
+                        const dealii::SparsityPattern *);
+  template void
+  SparseMatrix::reinit (const dealii::SparseMatrix<double> &,
+                        const double,
+                        const bool,
+                        const dealii::SparsityPattern *);
+  template void
+  SparseMatrix::reinit (const dealii::SparseMatrix<long double> &,
+                        const double,
+                        const bool,
+                        const dealii::SparsityPattern *);
+
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const dealii::SparseMatrix<float> &,
+                        const double,
+                        const bool,
+                        const dealii::SparsityPattern *);
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const dealii::SparseMatrix<double> &,
+                        const double,
+                        const bool,
+                        const dealii::SparsityPattern *);
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const dealii::SparseMatrix<long double> &,
+                        const double,
+                        const bool,
+                        const dealii::SparsityPattern *);
+
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const Epetra_Map &,
+                        const dealii::SparseMatrix<float> &,
+                        const double,
+                        const bool,
+                        const dealii::SparsityPattern *);
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const Epetra_Map &,
+                        const dealii::SparseMatrix<double> &,
+                        const double,
+                        const bool,
+                        const dealii::SparsityPattern *);
+  template void
+  SparseMatrix::reinit (const Epetra_Map &,
+                        const Epetra_Map &,
+                        const dealii::SparseMatrix<long double> &,
+                        const double,
+                        const bool,
+                        const dealii::SparsityPattern *);
+
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_USE_TRILINOS

Added: branches/s-wang/for_deal.II/source/lac/trilinos_vector_base.cc
===================================================================
--- branches/s-wang/for_deal.II/source/lac/trilinos_vector_base.cc	                        (rev 0)
+++ branches/s-wang/for_deal.II/source/lac/trilinos_vector_base.cc	2012-11-06 20:23:43 UTC (rev 1346)
@@ -0,0 +1,459 @@
+//---------------------------------------------------------------------------
+//    $Id: trilinos_vector_base.cc 26093 2012-08-22 21:37:41Z heister $
+//    Version: $Name$
+//
+//    Copyright (C) 2008, 2010, 2011, 2012 by the deal.II authors
+//
+//    This file is subject to QPL and may not be  distributed
+//    without copyright and license information. Please refer
+//    to the file deal.II/doc/license.html for the  text  and
+//    further information on this license.
+//
+//---------------------------------------------------------------------------
+
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/trilinos_vector_base.h>
+
+#ifdef DEAL_II_USE_TRILINOS
+
+#  include <cmath>
+#  include <Epetra_Import.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TrilinosWrappers
+{
+  namespace internal
+  {
+    VectorReference::operator TrilinosScalar () const
+    {
+      Assert (index < vector.size(),
+              ExcIndexRange (index, 0, vector.size()));
+
+                                        // Trilinos allows for vectors
+                                        // to be referenced by the [] or
+                                        // () operators but only ()
+                                        // checks index bounds. We check
+                                        // these bounds by ourselves, so
+                                        // we can use []. Note that we
+                                        // can only get local values.
+
+      const int local_index = vector.vector->Map().LID(index);
+      Assert (local_index >= 0,
+              ExcAccessToNonLocalElement (index,
+                                          vector.vector->Map().MinMyGID(),
+                                          vector.vector->Map().MaxMyGID()));
+
+
+      return (*(vector.vector))[0][local_index];
+    }
+  }
+
+
+
+  VectorBase::VectorBase ()
+                        :
+                        last_action (Zero),
+                        compressed  (true),
+                        has_ghosts  (false),
+#ifdef DEAL_II_COMPILER_SUPPORTS_MPI
+                        vector(new Epetra_FEVector(
+                                 Epetra_Map(0,0,Epetra_MpiComm(MPI_COMM_SELF))))
+#else
+                        vector(new Epetra_FEVector(
+                                 Epetra_Map(0,0,Epetra_SerialComm())))
+#endif
+  {}
+
+
+
+  VectorBase::VectorBase (const VectorBase &v)
+                        :
+                        Subscriptor(),
+                        last_action (Zero),
+                        compressed (true),
+                        has_ghosts  (v.has_ghosts),
+                        vector(new Epetra_FEVector(*v.vector))
+  {}
+
+
+
+  VectorBase::~VectorBase ()
+  {}
+
+
+
+  void
+  VectorBase::clear ()
+  {
+                                     // When we clear the vector,
+                                     // reset the pointer and generate
+                                     // an empty vector.
+#ifdef DEAL_II_COMPILER_SUPPORTS_MPI
+    Epetra_Map map (0, 0, Epetra_MpiComm(MPI_COMM_SELF));
+#else
+    Epetra_Map map (0, 0, Epetra_SerialComm());
+#endif
+
+    has_ghosts = false;
+    vector.reset (new Epetra_FEVector(map));
+    last_action = Zero;
+  }
+
+
+
+  VectorBase &
+  VectorBase::operator = (const VectorBase &v)
+  {
+    Assert (vector.get() != 0,
+            ExcMessage("Vector is not constructed properly."));
+
+    if (local_range() != v.local_range())
+      {
+        last_action = Zero;
+        vector.reset (new Epetra_FEVector(*v.vector));
+        has_ghosts = v.has_ghosts;
+      }
+    else
+      {
+        Assert (vector->Map().SameAs(v.vector->Map()) == true,
+                ExcMessage ("The Epetra maps in the assignment operator ="
+                            " do not match, even though the local_range "
+                            " seems to be the same. Check vector setup!"));
+        int ierr;
+        ierr = vector->GlobalAssemble(last_action);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+        ierr = vector->Update(1.0, *v.vector, 0.0);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+        last_action = Zero;
+      }
+
+    return *this;
+  }
+
+
+
+  template <typename number>
+  VectorBase &
+  VectorBase::operator = (const ::dealii::Vector<number> &v)
+  {
+    Assert (size() == v.size(),
+            ExcDimensionMismatch(size(), v.size()));
+
+                                     // this is probably not very efficient
+                                     // but works. in particular, we could do
+                                     // better if we know that
+                                     // number==TrilinosScalar because then we
+                                     // could elide the copying of elements
+                                     //
+                                     // let's hope this isn't a
+                                     // particularly frequent operation
+    std::pair<unsigned int, unsigned int>
+      local_range = this->local_range ();
+    for (unsigned int i=local_range.first; i<local_range.second; ++i)
+      (*vector)[0][i-local_range.first] = v(i);
+
+    return *this;
+  }
+
+
+
+  TrilinosScalar
+  VectorBase::el (const unsigned int index) const
+  {
+                                        // Extract local indices in
+                                        // the vector.
+    int trilinos_i = vector->Map().LID(index);
+    TrilinosScalar value = 0.;
+
+                                        // If the element is not
+                                        // present on the current
+                                        // processor, we can't
+                                        // continue. Just print out 0.
+
+                                        // TODO: Is this reasonable?
+    if (trilinos_i == -1 )
+      {
+        return 0.;
+        //Assert (false, ExcAccessToNonlocalElement(index, local_range().first,
+        //                                local_range().second-1));
+      }
+    else
+      value = (*vector)[0][trilinos_i];
+
+    return value;
+  }
+
+
+
+  TrilinosScalar
+  VectorBase::operator () (const unsigned int index) const
+  {
+                                        // Extract local indices in
+                                        // the vector.
+    int trilinos_i = vector->Map().LID(index);
+    TrilinosScalar value = 0.;
+
+                                        // If the element is not present
+                                        // on the current processor, we
+                                        // can't continue. This is the
+                                        // main difference to the el()
+                                        // function.
+    if (trilinos_i == -1 )
+      {
+        Assert (false, ExcAccessToNonlocalElement(index, local_range().first,
+                                                  local_range().second-1));
+      }
+    else
+      value = (*vector)[0][trilinos_i];
+
+    return value;
+  }
+
+
+
+  void
+  VectorBase::add (const VectorBase &v,
+                   const bool        allow_different_maps)
+  {
+    if (allow_different_maps == false)
+      *this += v;
+    else
+      {
+        AssertThrow (size() == v.size(),
+                     ExcDimensionMismatch (size(), v.size()));
+
+        Epetra_Import data_exchange (vector->Map(), v.vector->Map());
+
+        int ierr = vector->Import(*v.vector, data_exchange, Add);
+        AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+        last_action = Insert;
+      }
+  }
+
+
+
+  bool
+  VectorBase::operator == (const VectorBase &v) const
+  {
+    Assert (size() == v.size(),
+            ExcDimensionMismatch(size(), v.size()));
+    if (local_size() != v.local_size())
+      return false;
+
+    unsigned int i;
+    for (i=0; i<local_size(); i++)
+      if ((*(v.vector))[0][i]!=(*vector)[0][i]) return false;
+
+    return true;
+  }
+
+
+
+  bool
+  VectorBase::operator != (const VectorBase &v) const
+  {
+    Assert (size() == v.size(),
+            ExcDimensionMismatch(size(), v.size()));
+
+    return (!(*this==v));
+  }
+
+// shuqiangwang
+  VectorBase::real_type VectorBase::min () const
+  {
+	  Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+	  TrilinosScalar d;
+	  const int ierr = vector->MinValue (&d);
+	  AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+	  return d;
+  }
+
+  VectorBase::real_type VectorBase::max () const
+  {
+	  Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+	  TrilinosScalar d;
+	  const int ierr = vector->MaxValue (&d);
+	  AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+	  return d;
+  }
+
+  bool
+  VectorBase::all_zero () const
+  {
+                                     // get a representation of the vector and
+                                     // loop over all the elements
+    TrilinosScalar *start_ptr = (*vector)[0];
+    const TrilinosScalar *ptr  = start_ptr,
+                         *eptr = start_ptr + local_size();
+    unsigned int flag = 0;
+    while (ptr != eptr)
+      {
+        if (*ptr != 0)
+          {
+            flag = 1;
+            break;
+          }
+        ++ptr;
+      }
+
+#ifdef DEAL_II_COMPILER_SUPPORTS_MPI
+                                     // in parallel, check that the vector
+                                     // is zero on _all_ processors.
+    const Epetra_MpiComm *mpi_comm
+      = dynamic_cast<const Epetra_MpiComm*>(&vector->Map().Comm());
+    unsigned int num_nonzero = Utilities::MPI::sum(flag, mpi_comm->Comm());
+    return num_nonzero == 0;
+#else
+    return flag == 0;
+#endif
+
+  }
+
+
+
+  bool
+  VectorBase::is_non_negative () const
+  {
+#ifdef DEAL_II_COMPILER_SUPPORTS_MPI
+                                     // if this vector is a parallel one, then
+                                     // we need to communicate to determine
+                                     // the answer to the current
+                                     // function. this still has to be
+                                     // implemented
+    AssertThrow(local_size() == size(), ExcNotImplemented());
+#endif
+                                     // get a representation of the vector and
+                                     // loop over all the elements
+    TrilinosScalar *start_ptr;
+    int leading_dimension;
+    int ierr = vector->ExtractView (&start_ptr, &leading_dimension);
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+                                       // TODO: This
+                                       // won't work in parallel like
+                                       // this. Find out a better way to
+                                       // this in that case.
+    const TrilinosScalar *ptr  = start_ptr,
+                         *eptr = start_ptr + size();
+    bool flag = true;
+    while (ptr != eptr)
+      {
+        if (*ptr < 0.0)
+          {
+            flag = false;
+            break;
+          }
+        ++ptr;
+      }
+
+    return flag;
+  }
+
+
+
+                                        // TODO: up to now only local
+                                        // data printed out! Find a
+                                        // way to neatly output
+                                        // distributed data...
+  void
+  VectorBase::print (const char *format) const
+  {
+    Assert (vector->GlobalLength()!=0, ExcEmptyObject());
+
+    for (unsigned int j=0; j<size(); ++j)
+      {
+        double t = (*vector)[0][j];
+
+        if (format != 0)
+          std::printf (format, t);
+        else
+          std::printf (" %5.2f", double(t));
+      }
+    std::printf ("\n");
+  }
+
+
+
+  void
+  VectorBase::print (std::ostream      &out,
+                     const unsigned int precision,
+                     const bool         scientific,
+                     const bool         across) const
+  {
+    AssertThrow (out, ExcIO());
+
+                                        // get a representation of the
+                                        // vector and loop over all
+                                        // the elements TODO: up to
+                                        // now only local data printed
+                                        // out! Find a way to neatly
+                                        // output distributed data...
+    TrilinosScalar *val;
+    int leading_dimension;
+    int ierr = vector->ExtractView (&val, &leading_dimension);
+
+    AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+    out.precision (precision);
+    if (scientific)
+      out.setf (std::ios::scientific, std::ios::floatfield);
+    else
+      out.setf (std::ios::fixed, std::ios::floatfield);
+
+    if (across)
+      for (unsigned int i=0; i<size(); ++i)
+        out << static_cast<double>(val[i]) << ' ';
+    else
+      for (unsigned int i=0; i<size(); ++i)
+        out << static_cast<double>(val[i]) << std::endl;
+    out << std::endl;
+
+                                        // restore the representation
+                                        // of the vector
+    AssertThrow (out, ExcIO());
+  }
+
+
+
+  void
+  VectorBase::swap (VectorBase &v)
+  {
+    std::swap(last_action, v.last_action);
+    std::swap(compressed, v.compressed);
+    std::swap(vector, v.vector);
+  }
+
+
+
+  std::size_t
+  VectorBase::memory_consumption () const
+  {
+                                     //TODO[TH]: No accurate memory
+                                     //consumption for Trilinos vectors
+                                     //yet. This is a rough approximation with
+                                     //one index and the value per local
+                                     //entry.
+    return sizeof(*this)
+      + this->local_size()*( sizeof(double)+sizeof(int) );
+  }
+
+} /* end of namespace TrilinosWrappers */
+
+
+namespace TrilinosWrappers
+{
+#include "trilinos_vector_base.inst"
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_USE_TRILINOS

Added: branches/s-wang/for_deal.II/source/numerics/derivative_approximation.inst.in
===================================================================
--- branches/s-wang/for_deal.II/source/numerics/derivative_approximation.inst.in	                        (rev 0)
+++ branches/s-wang/for_deal.II/source/numerics/derivative_approximation.inst.in	2012-11-06 20:23:43 UTC (rev 1346)
@@ -0,0 +1,181 @@
+//---------------------------------------------------------------------------
+//    $Id: derivative_approximation.inst.in 25612 2012-06-07 16:46:33Z heister $
+//    Version: $Name$
+//
+//    Copyright (C) 2010, 2012 by the deal.II authors
+//
+//    This file is subject to QPL and may not be  distributed
+//    without copyright and license information. Please refer
+//    to the file deal.II/doc/license.html for the  text  and
+//    further information on this license.
+//
+//---------------------------------------------------------------------------
+
+for (deal_II_dimension : DIMENSIONS)
+{
+#define INSTANTIATE(InputVector,DH)                      \
+template                                                 \
+void                                                     \
+DerivativeApproximation::                                \
+approximate_gradient<deal_II_dimension>                  \
+(const Mapping<deal_II_dimension> &mapping,              \
+ const DH<deal_II_dimension> &dof_handler,               \
+ const InputVector  &solution,                           \
+ Vector<float>         &derivative_norm,                 \
+ const unsigned int     component);                      \
+                                                         \
+template                                                 \
+void                                                     \
+DerivativeApproximation::                                \
+approximate_gradient<deal_II_dimension>                  \
+(const DH<deal_II_dimension> &dof_handler,               \
+ const InputVector     &solution,                        \
+ Vector<float>         &derivative_norm,                 \
+ const unsigned int     component);                      \
+                                                         \
+template                                                 \
+void                                                     \
+DerivativeApproximation::                                \
+approximate_second_derivative<deal_II_dimension>         \
+(const Mapping<deal_II_dimension> &mapping,              \
+ const DH<deal_II_dimension> &dof_handler,               \
+ const InputVector  &solution,                           \
+ Vector<float>         &derivative_norm,                 \
+ const unsigned int     component);                      \
+                                                         \
+template                                                 \
+void                                                     \
+DerivativeApproximation::                                \
+approximate_second_derivative<deal_II_dimension>         \
+(const DH<deal_II_dimension> &dof_handler,               \
+ const InputVector     &solution,                        \
+ Vector<float>         &derivative_norm,                 \
+ const unsigned int     component);                      \
+                                                         \
+template                                                 \
+void                                                     \
+DerivativeApproximation::                                \
+approximate_derivative_tensor<deal_II_dimension>         \
+(const Mapping<deal_II_dimension> & 	mapping,	 \
+ const DH<deal_II_dimension> &dof_handler,               \
+ const InputVector     &solution,                        \
+ const DH<deal_II_dimension>::active_cell_iterator &cell,\
+ Tensor<1,deal_II_dimension> &derivative,                \
+ const unsigned int     component);                      \
+                                                         \
+template                                                 \
+void                                                     \
+DerivativeApproximation::                                \
+approximate_derivative_tensor<deal_II_dimension>         \
+(const Mapping<deal_II_dimension> & 	mapping,	 \
+ const DH<deal_II_dimension> &dof_handler,               \
+ const InputVector     &solution,                        \
+ const DH<deal_II_dimension>::active_cell_iterator &cell,\
+ Tensor<2,deal_II_dimension> &derivative,                \
+ const unsigned int     component);                      \
+                                                         \
+template                                                 \
+void                                                     \
+DerivativeApproximation::                                \
+approximate_derivative_tensor<deal_II_dimension>         \
+(const Mapping<deal_II_dimension> & 	mapping,	 \
+ const DH<deal_II_dimension> &dof_handler,               \
+ const InputVector     &solution,                        \
+ const DH<deal_II_dimension>::active_cell_iterator &cell,\
+ Tensor<3,deal_II_dimension> &derivative,                \
+ const unsigned int     component);			 \
+							 \
+template                                                 \
+void                                                     \
+DerivativeApproximation::                                \
+approximate_derivative_tensor<deal_II_dimension>         \
+(const DH<deal_II_dimension> &dof_handler,               \
+ const InputVector     &solution,                        \
+ const DH<deal_II_dimension>::active_cell_iterator &cell,\
+ Tensor<1,deal_II_dimension> &derivative,                \
+ const unsigned int     component);                      \
+                                                         \
+template                                                 \
+void                                                     \
+DerivativeApproximation::                                \
+approximate_derivative_tensor<deal_II_dimension>         \
+(const DH<deal_II_dimension> &dof_handler,               \
+ const InputVector     &solution,                        \
+ const DH<deal_II_dimension>::active_cell_iterator &cell,\
+ Tensor<2,deal_II_dimension> &derivative,                \
+ const unsigned int     component);                      \
+                                                         \
+template                                                 \
+void                                                     \
+DerivativeApproximation::                                \
+approximate_derivative_tensor<deal_II_dimension>         \
+(const DH<deal_II_dimension> &dof_handler,               \
+ const InputVector     &solution,                        \
+ const DH<deal_II_dimension>::active_cell_iterator &cell,\
+ Tensor<3,deal_II_dimension> &derivative,                \
+ const unsigned int     component)
+
+
+INSTANTIATE(Vector<double>, DoFHandler);
+INSTANTIATE(Vector<float>, DoFHandler);
+INSTANTIATE(BlockVector<double>, DoFHandler);
+INSTANTIATE(BlockVector<float>, DoFHandler);
+
+INSTANTIATE(Vector<double>, hp::DoFHandler);
+INSTANTIATE(Vector<float>, hp::DoFHandler);
+INSTANTIATE(BlockVector<double>, hp::DoFHandler);
+INSTANTIATE(BlockVector<float>, hp::DoFHandler);
+
+#ifdef DEAL_II_USE_PETSC
+INSTANTIATE(PETScWrappers::Vector, DoFHandler);
+INSTANTIATE(PETScWrappers::BlockVector, DoFHandler);
+INSTANTIATE(PETScWrappers::MPI::Vector, DoFHandler);			// shuqiangwang
+INSTANTIATE(PETScWrappers::MPI::BlockVector, DoFHandler);
+
+INSTANTIATE(PETScWrappers::Vector, hp::DoFHandler);
+INSTANTIATE(PETScWrappers::BlockVector, hp::DoFHandler);
+#endif
+
+#ifdef DEAL_II_USE_TRILINOS
+INSTANTIATE(TrilinosWrappers::Vector, DoFHandler);
+INSTANTIATE(TrilinosWrappers::BlockVector, DoFHandler);
+INSTANTIATE(TrilinosWrappers::MPI::Vector, DoFHandler);
+INSTANTIATE(TrilinosWrappers::MPI::BlockVector, DoFHandler);
+
+//TODO: test hp before instantiating
+#endif
+
+#undef INSTANTIATE
+
+template
+double
+DerivativeApproximation::
+derivative_norm(const Tensor<1,deal_II_dimension> &derivative);
+
+template
+double
+DerivativeApproximation::
+derivative_norm(const Tensor<2,deal_II_dimension> &derivative);
+
+template
+double
+DerivativeApproximation::
+derivative_norm(const Tensor<3,deal_II_dimension> &derivative);
+
+
+// static variables
+//
+// on AIX, the linker is unhappy about some missing symbols. they
+// should really be there, but explicitly instantiating them will also
+// not hurt
+template
+const UpdateFlags
+DerivativeApproximation::Gradient<deal_II_dimension>::update_flags;
+
+template
+const UpdateFlags
+DerivativeApproximation::SecondDerivative<deal_II_dimension>::update_flags;
+template
+const UpdateFlags
+DerivativeApproximation::ThirdDerivative<deal_II_dimension>::update_flags;
+}

Modified: branches/s-wang/include/aspect/global.h
===================================================================
--- branches/s-wang/include/aspect/global.h	2012-11-06 18:55:36 UTC (rev 1345)
+++ branches/s-wang/include/aspect/global.h	2012-11-06 20:23:43 UTC (rev 1346)
@@ -24,9 +24,10 @@
 #define __aspect__global_h
 
 
-#include <deal.II/lac/trilinos_block_vector.h>
-#include <deal.II/lac/trilinos_block_sparse_matrix.h>
-#include <deal.II/lac/trilinos_precondition.h>
+#include <deal.II/lac/petsc_parallel_block_vector.h>
+#include <deal.II/lac/petsc_parallel_block_sparse_matrix.h>
+#include <deal.II/lac/petsc_solver.h>
+#include <deal.II/lac/petsc_precondition.h>
 
 #include <boost/archive/binary_oarchive.hpp>
 #include <boost/archive/binary_iarchive.hpp>
@@ -74,46 +75,47 @@
     /**
      * Typedef for the vector type used.
      */
-    typedef TrilinosWrappers::MPI::Vector Vector;
+    typedef PETScWrappers::MPI::Vector Vector;
 
     /**
      * Typedef for the type used to describe vectors that
      * consist of multiple blocks.
      */
-    typedef TrilinosWrappers::MPI::BlockVector BlockVector;
+    typedef PETScWrappers::MPI::BlockVector BlockVector;
 
     /**
      * Typedef for the sparse matrix type used.
      */
-    typedef TrilinosWrappers::SparseMatrix SparseMatrix;
+    typedef PETScWrappers::MPI::SparseMatrix SparseMatrix;
 
     /**
      * Typedef for the type used to describe sparse matrices that
      * consist of multiple blocks.
      */
-    typedef TrilinosWrappers::BlockSparseMatrix BlockSparseMatrix;
+    typedef PETScWrappers::MPI::BlockSparseMatrix BlockSparseMatrix;
 
+    typedef PETScWrappers::SolverCG SolverCG;
+
     /**
      * Typedef for the AMG preconditioner type used for the
      * top left block of the Stokes matrix.
      */
-    typedef TrilinosWrappers::PreconditionAMG PreconditionAMG;
+    typedef PETScWrappers::PreconditionBoomerAMG PreconditionAMG;
 
     /**
      * Typedef for the Incomplete Cholesky preconditioner used
      * for other blocks of the system matrix.
      */
-    typedef TrilinosWrappers::PreconditionIC PreconditionIC;
+    typedef PETScWrappers::PreconditionICC PreconditionIC;
 
     /**
      * Typedef for the Incomplete LU decomposition preconditioner used
      * for other blocks of the system matrix.
      */
-    typedef TrilinosWrappers::PreconditionILU PreconditionILU;
+    typedef PETScWrappers::PreconditionJacobi PreconditionILU;
   }
 }
 
-
 /**
  * A macro that is used in instantiating the ASPECT classes and functions
  * for both 2d and 3d. Call this macro with the name of another macro that

Added: branches/s-wang/include/aspect/global_trilinos.h
===================================================================
--- branches/s-wang/include/aspect/global_trilinos.h	                        (rev 0)
+++ branches/s-wang/include/aspect/global_trilinos.h	2012-11-06 20:23:43 UTC (rev 1346)
@@ -0,0 +1,127 @@
+/*
+  Copyright (C) 2011, 2012 by the authors of the ASPECT code.
+
+  This file is part of ASPECT.
+
+  ASPECT is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2, or (at your option)
+  any later version.
+
+  ASPECT is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with ASPECT; see the file doc/COPYING.  If not see
+  <http://www.gnu.org/licenses/>.
+*/
+/*  $Id: global.h 895 2012-04-10 12:53:27Z bangerth $  */
+
+
+#ifndef __aspect__global_h
+#define __aspect__global_h
+
+
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/trilinos_block_sparse_matrix.h>
+#include <deal.II/lac/trilinos_precondition.h>
+
+#include <boost/archive/binary_oarchive.hpp>
+#include <boost/archive/binary_iarchive.hpp>
+#include <boost/archive/text_oarchive.hpp>
+#include <boost/archive/text_iarchive.hpp>
+namespace aspect
+{
+  /**
+   * A variable whose value denotes the number of seconds in one year.
+   */
+  extern const double year_in_seconds;
+
+  /**
+   * A variable that denotes whether we should periodically
+   * output statistics about memory consumption, run times, etc
+   * via the Simulator::output_statistics() function or other
+   * means.
+   */
+  extern const bool output_parallel_statistics;
+
+
+  /**
+   * A typedef that denotes the BOOST stream type for reading data
+   * during serialization. The type chosen here is a binary archive
+   * which we subsequently will have to un-compress.
+   */
+  typedef boost::archive::binary_iarchive iarchive;
+
+  /**
+   * A typedef that denotes the BOOST stream type for writing data
+   * during serialization. The type chosen here is a binary archive
+   * which we compress before writing it into a file.
+   */
+  typedef boost::archive::binary_oarchive oarchive;
+
+  /**
+   * A namespace that contains typedefs for classes used in
+   * the linear algebra description.
+   */
+  namespace LinearAlgebra
+  {
+    using namespace dealii;
+
+
+    /**
+     * Typedef for the vector type used.
+     */
+    typedef TrilinosWrappers::MPI::Vector Vector;
+
+    /**
+     * Typedef for the type used to describe vectors that
+     * consist of multiple blocks.
+     */
+    typedef TrilinosWrappers::MPI::BlockVector BlockVector;
+
+    /**
+     * Typedef for the sparse matrix type used.
+     */
+    typedef TrilinosWrappers::SparseMatrix SparseMatrix;
+
+    /**
+     * Typedef for the type used to describe sparse matrices that
+     * consist of multiple blocks.
+     */
+    typedef TrilinosWrappers::BlockSparseMatrix BlockSparseMatrix;
+
+    /**
+     * Typedef for the AMG preconditioner type used for the
+     * top left block of the Stokes matrix.
+     */
+    typedef TrilinosWrappers::PreconditionAMG PreconditionAMG;
+
+    /**
+     * Typedef for the Incomplete Cholesky preconditioner used
+     * for other blocks of the system matrix.
+     */
+    typedef TrilinosWrappers::PreconditionIC PreconditionIC;
+
+    /**
+     * Typedef for the Incomplete LU decomposition preconditioner used
+     * for other blocks of the system matrix.
+     */
+    typedef TrilinosWrappers::PreconditionILU PreconditionILU;
+  }
+}
+
+
+/**
+ * A macro that is used in instantiating the ASPECT classes and functions
+ * for both 2d and 3d. Call this macro with the name of another macro that
+ * when called with a single integer argument instantiates the respective
+ * classes in the given space dimension.
+ */
+#define ASPECT_INSTANTIATE(INSTANTIATIONS) \
+  INSTANTIATIONS(2) \
+  INSTANTIATIONS(3)
+
+#endif

Modified: branches/s-wang/include/aspect/particle/world.h
===================================================================
--- branches/s-wang/include/aspect/particle/world.h	2012-11-06 18:55:36 UTC (rev 1345)
+++ branches/s-wang/include/aspect/particle/world.h	2012-11-06 20:23:43 UTC (rev 1346)
@@ -375,7 +375,7 @@
         };
 
         // Advance particles by the specified timestep using the current integration scheme.
-        void advance_timestep(double timestep, const TrilinosWrappers::MPI::BlockVector &solution)
+        void advance_timestep(double timestep, const LinearAlgebra::BlockVector &solution)
         {
           bool        continue_integrator = true;
 
@@ -565,7 +565,7 @@
           free(recv_data);
         };
 
-        void get_particle_velocities(const TrilinosWrappers::MPI::BlockVector &solution)
+        void get_particle_velocities(const LinearAlgebra::BlockVector &solution)
         {
           Vector<double>                single_res(dim+2);
           std::vector<Vector<double> >  result;
@@ -577,7 +577,7 @@
           std::vector<Point<dim> >      particle_points;
 
           // Prepare the field function
-          Functions::FEFieldFunction<dim, DoFHandler<dim>, TrilinosWrappers::MPI::BlockVector> fe_value(*_dh, solution, *_mapping);
+          Functions::FEFieldFunction<dim, DoFHandler<dim>, LinearAlgebra::BlockVector> fe_value(*_dh, solution, *_mapping);
 
           // Get the velocity for each cell at a time so we can take advantage of knowing the active cell
           for (it=_particles.begin(); it!=_particles.end();)

Modified: branches/s-wang/include/aspect/simulator.h
===================================================================
--- branches/s-wang/include/aspect/simulator.h	2012-11-06 18:55:36 UTC (rev 1345)
+++ branches/s-wang/include/aspect/simulator.h	2012-11-06 20:23:43 UTC (rev 1346)
@@ -1285,7 +1285,7 @@
        * <code>source/simulator/assembly.cc</code>.
        */
       void
-      compute_material_model_input_values (const TrilinosWrappers::MPI::BlockVector                    &current_linearization_point,
+      compute_material_model_input_values (const LinearAlgebra::BlockVector                    &current_linearization_point,
                                            const FEValues<dim,dim>                                     &input_finite_element_values,
                                            const bool                                                   compute_strainrate,
                                            typename MaterialModel::Interface<dim>::MaterialModelInputs &material_model_inputs) const;
@@ -1436,7 +1436,7 @@
       LinearAlgebra::BlockVector                                old_old_solution;
       LinearAlgebra::BlockVector                                system_rhs;
 
-      TrilinosWrappers::MPI::BlockVector                        current_linearization_point;
+      LinearAlgebra::BlockVector                        current_linearization_point;
 
       // only used if is_compressible()
       LinearAlgebra::BlockVector                                pressure_shape_function_integrals;

Modified: branches/s-wang/source/main.cc
===================================================================
--- branches/s-wang/source/main.cc	2012-11-06 18:55:36 UTC (rev 1345)
+++ branches/s-wang/source/main.cc	2012-11-06 20:23:43 UTC (rev 1346)
@@ -24,12 +24,14 @@
 
 #include <deal.II/base/utilities.h>
 #include <deal.II/base/mpi.h>
+#include <deal.II/lac/vector_memory.h>
 
 
 int main (int argc, char *argv[])
 {
   using namespace dealii;
   Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv);
+  PetscInitialize(&argc,&argv,0,0);
 
   try
     {
@@ -169,5 +171,9 @@
       return 1;
     }
 
+  dealii::GrowingVectorMemory<dealii::PETScWrappers::MPI::Vector>::release_unused_memory ();
+  dealii::GrowingVectorMemory<dealii::PETScWrappers::Vector>::release_unused_memory ();
+  PetscFinalize();
+
   return 0;
 }

Modified: branches/s-wang/source/postprocess/composition_statistics.cc
===================================================================
--- branches/s-wang/source/postprocess/composition_statistics.cc	2012-11-06 18:55:36 UTC (rev 1345)
+++ branches/s-wang/source/postprocess/composition_statistics.cc	2012-11-06 20:23:43 UTC (rev 1346)
@@ -94,16 +94,17 @@
       std::vector<double> local_max_compositions (this->n_compositional_fields(),
                                                   std::numeric_limits<double>::min());
 
-      for (unsigned int c=0; c<this->n_compositional_fields(); ++c)
-        for (unsigned int i=0; i<this->get_solution().block(3+c).local_size(); ++i)
-          {
-            local_min_compositions[c]
-              = std::min<double> (local_min_compositions[c],
-                                  this->get_solution().block(3+c).trilinos_vector()[0][i]);
-            local_max_compositions[c]
-              = std::max<double> (local_max_compositions[c],
-                                  this->get_solution().block(3+c).trilinos_vector()[0][i]);
-          }
+//      for (unsigned int c=0; c<this->n_compositional_fields(); ++c)
+//        for (unsigned int i=0; i<this->get_solution().block(3+c).local_size(); ++i)
+//          {
+// shuqiangwang
+//            local_min_compositions[c]
+//              = std::min<double> (local_min_compositions[c],
+//                                  this->get_solution().block(3+c).trilinos_vector()[0][i]);
+//            local_max_compositions[c]
+//              = std::max<double> (local_max_compositions[c],
+//                                  this->get_solution().block(3+c).trilinos_vector()[0][i]);
+//          }
 
       // now do the reductions over all processors. we can use Utilities::MPI::max
       // for the maximal values. unfortunately, there is currently no matching
@@ -115,21 +116,26 @@
                                                    std::numeric_limits<double>::min());
 
       {
-        for (unsigned int c=0; c<this->n_compositional_fields(); ++c)
-          local_min_compositions[c] = -local_min_compositions[c];
-        Utilities::MPI::max (local_min_compositions,
-                             this->get_mpi_communicator(),
-                             global_min_compositions);
-        for (unsigned int c=0; c<this->n_compositional_fields(); ++c)
-          {
-            local_min_compositions[c] = -local_min_compositions[c];
-            global_min_compositions[c] = -global_min_compositions[c];
-          }
-
-        // it's simpler for the maximal values
-        Utilities::MPI::max (local_max_compositions,
-                             this->get_mpi_communicator(),
-                             global_max_compositions);
+//        for (unsigned int c=0; c<this->n_compositional_fields(); ++c)
+//          local_min_compositions[c] = -local_min_compositions[c];
+//        Utilities::MPI::max (local_min_compositions,
+//                             this->get_mpi_communicator(),
+//                             global_min_compositions);
+//        for (unsigned int c=0; c<this->n_compositional_fields(); ++c)
+//          {
+//            local_min_compositions[c] = -local_min_compositions[c];
+//            global_min_compositions[c] = -global_min_compositions[c];
+//          }
+//
+//        // it's simpler for the maximal values
+//        Utilities::MPI::max (local_max_compositions,
+//                             this->get_mpi_communicator(),
+//                             global_max_compositions);
+    	  for (unsigned int c=0; c<this->n_compositional_fields(); ++c)
+    	  {
+    		  global_min_compositions[c] = this->get_solution().block(3+c).min();
+    		  global_max_compositions[c] = this->get_solution().block(3+c).max();
+    	  }
       }
 
       // finally produce something for the statistics file

Modified: branches/s-wang/source/postprocess/temperature_statistics.cc
===================================================================
--- branches/s-wang/source/postprocess/temperature_statistics.cc	2012-11-06 18:55:36 UTC (rev 1345)
+++ branches/s-wang/source/postprocess/temperature_statistics.cc	2012-11-06 20:23:43 UTC (rev 1346)
@@ -84,15 +84,16 @@
       // picture of their true values
       double local_min_temperature = std::numeric_limits<double>::max();
       double local_max_temperature = std::numeric_limits<double>::min();
-      for (unsigned int i=0; i<this->get_solution().block(2).local_size(); ++i)
-        {
-          local_min_temperature
-            = std::min<double> (local_min_temperature,
-                                this->get_solution().block(2).trilinos_vector()[0][i]);
-          local_max_temperature
-            = std::max<double> (local_max_temperature,
-                                this->get_solution().block(2).trilinos_vector()[0][i]);
-        }
+//      for (unsigned int i=0; i<this->get_solution().block(2).local_size(); ++i)
+//        {
+//// shuqiangwang
+//          local_min_temperature
+//            = std::min<double> (local_min_temperature,
+//                                this->get_solution().block(2).trilinos_vector()[0][i]);
+//          local_max_temperature
+//            = std::max<double> (local_max_temperature,
+//                                this->get_solution().block(2).trilinos_vector()[0][i]);
+//        }
 
       const double global_temperature_integral
         = Utilities::MPI::sum (local_temperature_integral, this->get_mpi_communicator());
@@ -104,13 +105,15 @@
       // one communication by multiplying
       // one value by -1
       {
-        double local_values[2] = { -local_min_temperature, local_max_temperature };
-        double global_values[2];
-
-        Utilities::MPI::max (local_values, this->get_mpi_communicator(), global_values);
-
-        global_min_temperature = -global_values[0];
-        global_max_temperature = global_values[1];
+//        double local_values[2] = { -local_min_temperature, local_max_temperature };
+//        double global_values[2];
+//
+//        Utilities::MPI::max (local_values, this->get_mpi_communicator(), global_values);
+//
+//        global_min_temperature = -global_values[0];
+//        global_max_temperature = global_values[1];
+    	  global_min_temperature = this->get_solution().block(2).min();
+    	  global_max_temperature = this->get_solution().block(2).max();
       }
 
       statistics.add_value ("Minimal temperature (K)",

Modified: branches/s-wang/source/simulator/assembly.cc
===================================================================
--- branches/s-wang/source/simulator/assembly.cc	2012-11-06 18:55:36 UTC (rev 1345)
+++ branches/s-wang/source/simulator/assembly.cc	2012-11-06 20:23:43 UTC (rev 1346)
@@ -926,7 +926,7 @@
   template <int dim>
   void
   Simulator<dim>::
-  compute_material_model_input_values (const TrilinosWrappers::MPI::BlockVector                    &current_linearization_point,
+  compute_material_model_input_values (const LinearAlgebra::BlockVector                    &current_linearization_point,
                                        const FEValues<dim>                                         &input_finite_element_values,
                                        const bool                                                   compute_strainrate,
                                        typename MaterialModel::Interface<dim>::MaterialModelInputs &material_model_inputs) const
@@ -1112,11 +1112,11 @@
     Amg_preconditioner.reset (new LinearAlgebra::PreconditionAMG());
 
     LinearAlgebra::PreconditionAMG::AdditionalData Amg_data;
-    Amg_data.constant_modes = constant_modes;
-    Amg_data.elliptic = true;
-    Amg_data.higher_order_elements = true;
-    Amg_data.smoother_sweeps = 2;
-    Amg_data.aggregation_threshold = 0.02;
+    //Amg_data.constant_modes = constant_modes;
+    //Amg_data.elliptic = true;
+    //Amg_data.higher_order_elements = true;
+    //Amg_data.smoother_sweeps = 2;
+    //Amg_data.aggregation_threshold = 0.02;
 
     Mp_preconditioner->initialize (system_preconditioner_matrix.block(1,1));
     Amg_preconditioner->initialize (system_preconditioner_matrix.block(0,0),
@@ -1307,11 +1307,17 @@
          StokesSystem<dim> (finite_element));
 
     system_matrix.compress();
-    system_rhs.compress(Add);
+    system_rhs.compress(dealii::VectorOperation::add);
 
     if (material_model->is_compressible())
-      pressure_shape_function_integrals.compress(Add);
+      pressure_shape_function_integrals.compress(dealii::VectorOperation::add);
 
+//    system_matrix.block(0,1).write_ascii();
+//    system_matrix.block(1,0).write_ascii();
+//    system_matrix.block(0,0).write_ascii();
+    system_rhs.print(std::cout,7,false,false);
+    exit(0);
+
     rebuild_stokes_matrix = false;
 
     computing_timer.exit_section();
@@ -1325,7 +1331,7 @@
   {
     computing_timer.enter_section ("   Build temperature preconditioner");
     {
-      T_preconditioner.reset (new TrilinosWrappers::PreconditionILU());
+      T_preconditioner.reset (new LinearAlgebra::PreconditionILU());
       T_preconditioner->initialize (system_matrix.block(2,2));
     }
     computing_timer.exit_section();
@@ -1543,6 +1549,7 @@
               }
           }
       }
+    data.local_matrix.print(std::cout);
   }
 
   template <int dim>
@@ -1605,8 +1612,11 @@
          TemperatureSystem<dim> (finite_element));
 
     system_matrix.compress();
-    system_rhs.compress(Add);
+    system_rhs.compress(dealii::VectorOperation::add);
 
+    system_matrix.block(2,2).write_ascii();
+    exit(0);
+
     computing_timer.exit_section();
   }
 
@@ -1619,7 +1629,7 @@
     AssertIndexRange(composition_index,parameters.n_compositional_fields);
 
     computing_timer.enter_section ("   Build composition preconditioner");
-    C_preconditioner.reset (new TrilinosWrappers::PreconditionILU());
+    C_preconditioner.reset (new LinearAlgebra::PreconditionILU());
     C_preconditioner->initialize (system_matrix.block(3+composition_index,3+composition_index));
 
     computing_timer.exit_section();
@@ -1817,7 +1827,7 @@
          CompositionSystem<dim> (finite_element));
 
     system_matrix.compress();
-    system_rhs.compress(Add);
+    system_rhs.compress(dealii::VectorOperation::add);
 
     computing_timer.exit_section();
   }

Modified: branches/s-wang/source/simulator/core.cc
===================================================================
--- branches/s-wang/source/simulator/core.cc	2012-11-06 18:55:36 UTC (rev 1345)
+++ branches/s-wang/source/simulator/core.cc	2012-11-06 20:23:43 UTC (rev 1346)
@@ -55,7 +55,72 @@
 
 using namespace dealii;
 
+/**
+ * Temporary utility for replacing TrilinosWrappers with PETScWrappers.
+ */
+namespace CIG
+{
+/**
+ *	convert a block_partition used for trilinos into data used for petsc.
+ *	It is assumed that block_partition.size()==2.
+ */
+void convert_block_partitioning(
+		const std::vector<unsigned int> 	&system_dofs_per_block,
+		const std::vector<dealii::IndexSet> &system_partitioning,
+		std::vector<unsigned int> 			&block_sizes,
+		std::vector<unsigned int> 			&local_sizes)
+{
+	// init,
+	block_sizes.clear();
+	local_sizes.clear();
 
+	// block_sizes
+	block_sizes = system_dofs_per_block;
+
+	// local_sizes
+	for(unsigned int i=0; i<system_partitioning.size(); i++)
+		local_sizes.push_back(system_partitioning[i].n_elements());
+}
+
+
+void setup_petsc_matrix(
+		MPI_Comm mpi_communicator,
+		const std::vector<unsigned int> &block_sizes,
+		const std::vector<unsigned int> &local_sizes,
+		int max_coupling_between_dofs,
+		dealii::PETScWrappers::MPI::BlockSparseMatrix  &matrix)
+{
+	int size = block_sizes.size();
+
+	matrix.reinit(size,size);
+
+	for(int i=0; i<size; i++)
+		for(int j=0; j<size; j++)
+		{
+			matrix.block(i,j).reinit(
+					mpi_communicator,
+					block_sizes[i], block_sizes[j],
+					local_sizes[i], local_sizes[j],
+					max_coupling_between_dofs);
+		}
+
+	matrix.collect_sizes();
+}
+
+void setup_petsc_vector(
+		MPI_Comm mpi_communicator,
+		std::vector<unsigned int> &block_sizes,
+		std::vector<dealii::IndexSet> &partitioning,
+		std::vector<dealii::IndexSet> &relevant_partitioning,
+		dealii::PETScWrappers::MPI::BlockVector &vector)
+{
+	vector.reinit(block_sizes,mpi_communicator);
+	for(unsigned int i=0; i<block_sizes.size(); i++)
+		vector.block(i).reinit(mpi_communicator,partitioning[i],relevant_partitioning[i]);
+}
+}
+
+
 namespace aspect
 {
   namespace
@@ -504,7 +569,7 @@
                                      this_mpi_process(mpi_communicator));
     sp.compress();
 
-    system_matrix.reinit (sp);
+    //shuqiangwang: this function is not used. system_matrix.reinit (sp);
   }
 
 
@@ -538,7 +603,7 @@
                                      this_mpi_process(mpi_communicator));
     sp.compress();
 
-    system_preconditioner_matrix.reinit (sp);
+    //shuqiangwang; this function is not used. system_preconditioner_matrix.reinit (sp);
   }
 
 
@@ -714,19 +779,21 @@
     }
 
     // finally initialize vectors, matrices, etc.
+    std::vector<unsigned int> block_sizes, local_sizes;
+    CIG::convert_block_partitioning(system_dofs_per_block,system_partitioning,block_sizes,local_sizes);
 
-    setup_system_matrix (system_partitioning);
-    setup_system_preconditioner (system_partitioning);
+    CIG::setup_petsc_matrix(mpi_communicator,block_sizes,local_sizes,dof_handler.max_couplings_between_dofs(),system_matrix);						//setup_system_matrix (system_partitioning);
+    CIG::setup_petsc_matrix(mpi_communicator,block_sizes,local_sizes,dof_handler.max_couplings_between_dofs(),system_preconditioner_matrix);		//setup_system_preconditioner (system_partitioning);
 
-    system_rhs.reinit(system_partitioning, mpi_communicator);
-    solution.reinit(system_relevant_partitioning, mpi_communicator);
-    old_solution.reinit(system_relevant_partitioning, mpi_communicator);
-    old_old_solution.reinit(system_relevant_partitioning, mpi_communicator);
+    system_rhs.reinit(block_sizes,mpi_communicator,local_sizes);									//system_rhs.reinit(system_partitioning, mpi_communicator);
+    CIG::setup_petsc_vector(mpi_communicator,block_sizes,system_partitioning,system_relevant_partitioning,solution);			//solution.reinit(system_relevant_partitioning, mpi_communicator);
+    CIG::setup_petsc_vector(mpi_communicator,block_sizes,system_partitioning,system_relevant_partitioning,old_solution); 		//old_solution.reinit(system_relevant_partitioning, mpi_communicator);
+    CIG::setup_petsc_vector(mpi_communicator,block_sizes,system_partitioning,system_relevant_partitioning,old_old_solution); 	//old_old_solution.reinit(system_relevant_partitioning, mpi_communicator);
 
-    current_linearization_point.reinit (system_relevant_partitioning, MPI_COMM_WORLD);
+    CIG::setup_petsc_vector(mpi_communicator,block_sizes,system_partitioning,system_relevant_partitioning,current_linearization_point); 	//current_linearization_point.reinit (system_relevant_partitioning, MPI_COMM_WORLD);
 
     if (material_model->is_compressible())
-      pressure_shape_function_integrals.reinit (system_partitioning, mpi_communicator);
+    	pressure_shape_function_integrals.reinit(block_sizes,mpi_communicator,local_sizes);			//pressure_shape_function_integrals.reinit (system_partitioning, mpi_communicator);
 
     rebuild_stokes_matrix         = true;
     rebuild_stokes_preconditioner = true;
@@ -739,6 +806,8 @@
   template <int dim>
   void Simulator<dim>::postprocess ()
   {
+    return;   // shuqiangwang
+
     computing_timer.enter_section ("Postprocessing");
     pcout << "   Postprocessing:" << std::endl;
 
@@ -1407,6 +1476,8 @@
         {
           old_old_solution      = old_solution;
           old_solution          = solution;
+          old_old_solution.update_ghost_values();			//shuqiangwang: need to check when this is needed.
+          old_solution.update_ghost_values();
         }
 
         // periodically generate snapshots so that we can resume here

Modified: branches/s-wang/source/simulator/helper_functions.cc
===================================================================
--- branches/s-wang/source/simulator/helper_functions.cc	2012-11-06 18:55:36 UTC (rev 1345)
+++ branches/s-wang/source/simulator/helper_functions.cc	2012-11-06 20:23:43 UTC (rev 1346)
@@ -78,12 +78,12 @@
           << "* Matrix " << system_matrix.memory_consumption()/mb << std::endl
           << "* 5 Vectors " << 5*solution.memory_consumption()/mb << std::endl
           << "* preconditioner " << (system_preconditioner_matrix.memory_consumption()
-                                     + Amg_preconditioner->memory_consumption()
+                                     //+ Amg_preconditioner->memory_consumption()
                                      /*+Mp_preconditioner->memory_consumption()
                                                                       +T_preconditioner->memory_consumption()*/)/mb
           << std::endl
           << "  - matrix " << system_preconditioner_matrix.memory_consumption()/mb << std::endl
-          << "  - prec vel " << Amg_preconditioner->memory_consumption()/mb << std::endl
+      //          << "  - prec vel " << Amg_preconditioner->memory_consumption()/mb << std::endl
           << "  - prec mass " << 0/*Mp_preconditioner->memory_consumption()/mb*/ << std::endl
           << "  - prec T " << 0/*T_preconditioner->memory_consumption()/mb*/ << std::endl
           << std::endl;

Modified: branches/s-wang/source/simulator/initial_conditions.cc
===================================================================
--- branches/s-wang/source/simulator/initial_conditions.cc	2012-11-06 18:55:36 UTC (rev 1345)
+++ branches/s-wang/source/simulator/initial_conditions.cc	2012-11-06 20:23:43 UTC (rev 1346)
@@ -196,8 +196,15 @@
                                                                                dim+2+parameters.n_compositional_fields),
                                   system_tmp);
 
+        system_tmp.compress();
+        system_tmp.print(std::cout,7,false,false);
         // we may have hanging nodes, so apply constraints
         constraints.distribute (system_tmp);
+        constraints.print(std::cout);
+//        exit(0);
+        system_tmp.compress();
+        system_tmp.print(std::cout,7,false,false);
+        exit(0);
 
         old_solution.block(1) = system_tmp.block(1);
       }
@@ -289,13 +296,20 @@
         old_solution.block(1) = system_tmp.block(1);
       }
 
+    old_solution.compress();
+    old_solution.print(std::cout,7,false,false);
+    exit(0);
     // normalize the pressure in such a way that the surface pressure
     // equals a known and desired value
     normalize_pressure(old_solution);
 
     // set the current solution to the same value as the previous solution
     solution = old_solution;
+
+    solution.print(std::cout,7,false,false);
+    exit(0);
   }
+
 }
 
 

Modified: branches/s-wang/source/simulator/solver.cc
===================================================================
--- branches/s-wang/source/simulator/solver.cc	2012-11-06 18:55:36 UTC (rev 1345)
+++ branches/s-wang/source/simulator/solver.cc	2012-11-06 20:23:43 UTC (rev 1346)
@@ -28,6 +28,17 @@
 #include <deal.II/lac/trilinos_solver.h>
 #include <deal.II/lac/pointer_matrix.h>
 
+namespace CIG
+{
+template <class VectorType>
+void reduce_accuracy(VectorType &vector)
+{
+	std::pair<unsigned int,unsigned int> range = vector.local_range();
+	for(unsigned int i=range.first; i<range.second; i++)
+		vector[i] = std::floor(vector[i]);
+	vector.compress();
+}
+}
 
 namespace aspect
 {
@@ -67,9 +78,9 @@
         /**
          * Compute the residual with the Stokes block.
          */
-        double residual (TrilinosWrappers::MPI::BlockVector       &dst,
-                         const TrilinosWrappers::MPI::BlockVector &x,
-                         const TrilinosWrappers::MPI::BlockVector &b) const;
+        double residual (LinearAlgebra::BlockVector       &dst,
+                         const LinearAlgebra::BlockVector &x,
+                         const LinearAlgebra::BlockVector &b) const;
 
         void clear() {};
 
@@ -130,9 +141,9 @@
 
 
 
-    double StokesBlock::residual (TrilinosWrappers::MPI::BlockVector       &dst,
-                                  const TrilinosWrappers::MPI::BlockVector &x,
-                                  const TrilinosWrappers::MPI::BlockVector &b) const
+    double StokesBlock::residual (LinearAlgebra::BlockVector       &dst,
+                                  const LinearAlgebra::BlockVector &x,
+                                  const LinearAlgebra::BlockVector &b) const
     {
       // compute b-Ax where A is only the top left 2x2 block
       this->vmult (dst, x);
@@ -222,7 +233,7 @@
       {
         SolverControl solver_control(5000, 1e-6 * src.block(1).l2_norm());
 
-        TrilinosWrappers::SolverCG solver(solver_control);
+        LinearAlgebra::SolverCG solver(solver_control);
 
         // Trilinos reports a breakdown
         // in case src=dst=0, even
@@ -247,7 +258,7 @@
       if (do_solve_A == true)
         {
           SolverControl solver_control(5000, utmp.l2_norm()*1e-2);
-          TrilinosWrappers::SolverCG solver(solver_control);
+          LinearAlgebra::SolverCG solver(solver_control);
           solver.solve(stokes_matrix.block(0,0), dst.block(0), utmp,
                        a_preconditioner);
         }
@@ -279,19 +290,19 @@
                                     parameters.composition_solver_tolerance*system_rhs.block(index+2).l2_norm());
 
       SolverGMRES<LinearAlgebra::Vector>   solver (solver_control,
-                                                   SolverGMRES<LinearAlgebra::Vector>::AdditionalData(30,true));
+                                                   SolverGMRES<LinearAlgebra::Vector>::AdditionalData(300,true));
 
 //TODO: clean up: why do we copy system_rhs here, then call set_zero when we later
 // overwrite the vector in residual(), then call set_zero again, and then throw away
 // the result
       LinearAlgebra::BlockVector
-      distributed_solution (system_rhs);
-      current_constraints.set_zero(distributed_solution);
+      distributed_solution (system_rhs);	distributed_solution.compress();
+      current_constraints.set_zero(distributed_solution);		distributed_solution.compress();
       // create vector with distribution of system_rhs.
       LinearAlgebra::Vector block_remap (system_rhs.block (index+2));
       // copy block of current_linearization_point into it, because
       // current_linearization is distributed differently.
-      block_remap = current_linearization_point.block (index+2);
+      block_remap = current_linearization_point.block (index+2);	block_remap.compress();
       // (ab)use the distributed solution vector to temporarily put a residual in
       initial_residual = system_matrix.block(index+2,index+2).residual (distributed_solution.block(index+2),
                                                                         block_remap,
@@ -299,13 +310,15 @@
       current_constraints.set_zero(distributed_solution);
 
       // then overwrite it again with the current best guess and solve the linear system
-      distributed_solution.block(index+2) = block_remap;
+      distributed_solution.block(index+2) = block_remap;		distributed_solution.compress();
       solver.solve (system_matrix.block(index+2,index+2), distributed_solution.block(index+2),
                     system_rhs.block(index+2), index==0?*T_preconditioner:*C_preconditioner);
 
       current_constraints.distribute (distributed_solution);
-      solution.block(index+2) = distributed_solution.block(index+2);
+      solution.block(index+2) = distributed_solution.block(index+2);		solution.compress();
 
+//      CIG::reduce_accuracy(solution.block(index+2));
+
       // print number of iterations and also record it in the
       // statistics file
       pcout << solver_control.last_step()
@@ -320,6 +333,9 @@
     }
     computing_timer.exit_section();
 
+    solution.block(index+2).print(std::cout,7,false,false);
+    exit(0);
+
     return initial_residual;
   }
 
@@ -353,7 +369,7 @@
     remap.block (1) = current_linearization_point.block (1);
     // before solving we scale the initial solution to the right dimensions
     remap.block (1) /= pressure_scaling;
-    current_constraints.set_zero (remap);
+    current_constraints.set_zero (remap);	remap.compress();
     // if the model is compressible then we need to adjust the right hand
     // side of the equation to make it compatible with the matrix on the
     // left
@@ -373,7 +389,7 @@
     LinearAlgebra::BlockVector distributed_stokes_rhs;
     distributed_stokes_rhs.reinit(system_rhs);
     distributed_stokes_rhs.block(0) = system_rhs.block(0);
-    distributed_stokes_rhs.block(1) = system_rhs.block(1);
+    distributed_stokes_rhs.block(1) = system_rhs.block(1);		distributed_stokes_rhs.compress();
 
     PrimitiveVectorMemory< LinearAlgebra::BlockVector > mem;
 
@@ -382,7 +398,7 @@
     const double solver_tolerance = std::max (parameters.linear_solver_tolerance *
                                               distributed_stokes_rhs.l2_norm(),
                                               1e-12 * initial_residual);
-    SolverControl solver_control_cheap (30, solver_tolerance);
+    SolverControl solver_control_cheap (300, solver_tolerance);
     SolverControl solver_control_expensive (system_matrix.block(0,1).m() +
                                             system_matrix.block(1,0).m(), solver_tolerance);
 
@@ -394,10 +410,10 @@
                               *Mp_preconditioner, *Amg_preconditioner,
                               false);
 
-        SolverFGMRES<LinearAlgebra::BlockVector>
+        SolverGMRES<LinearAlgebra::BlockVector>
         solver(solver_control_cheap, mem,
-               SolverFGMRES<LinearAlgebra::BlockVector>::
-               AdditionalData(30, true));
+        		SolverGMRES<LinearAlgebra::BlockVector>::
+               AdditionalData(300, true));
         solver.solve(stokes_block, distributed_stokes_solution,
                      distributed_stokes_rhs, preconditioner);
       }
@@ -412,10 +428,10 @@
                               *Mp_preconditioner, *Amg_preconditioner,
                               true);
 
-        SolverFGMRES<LinearAlgebra::BlockVector>
+        SolverGMRES<LinearAlgebra::BlockVector>
         solver(solver_control_expensive, mem,
-               SolverFGMRES<LinearAlgebra::BlockVector>::
-               AdditionalData(50, true));
+        		SolverGMRES<LinearAlgebra::BlockVector>::
+               AdditionalData(500, true));
         solver.solve(stokes_block, distributed_stokes_solution,
                      distributed_stokes_rhs, preconditioner);
       }