[cig-commits] r1394 - in branches/s-wang2: . for_deal.II for_deal.II/examples for_deal.II/examples/step-32 for_deal.II/include for_deal.II/include/deal.II for_deal.II/include/deal.II/lac for_deal.II/source for_deal.II/source/lac for_deal.II/source/numerics include/aspect include/aspect/particle include/aspect/postprocess source source/postprocess source/simulator
s-wang at dealii.org
s-wang at dealii.org
Wed Nov 28 21:55:57 PST 2012
Author: s-wang
Date: 2012-11-28 22:55:56 -0700 (Wed, 28 Nov 2012)
New Revision: 1394
Added:
branches/s-wang2/for_deal.II/
branches/s-wang2/for_deal.II/examples/
branches/s-wang2/for_deal.II/examples/step-32/
branches/s-wang2/for_deal.II/examples/step-32/test-step-32.cc
branches/s-wang2/for_deal.II/include/
branches/s-wang2/for_deal.II/include/deal.II/
branches/s-wang2/for_deal.II/include/deal.II/lac/
branches/s-wang2/for_deal.II/include/deal.II/lac/petsc_matrix_base.h
branches/s-wang2/for_deal.II/include/deal.II/lac/petsc_parallel_block_vector.h
branches/s-wang2/for_deal.II/include/deal.II/lac/petsc_solver.h
branches/s-wang2/for_deal.II/include/deal.II/lac/trilinos_sparse_matrix.h
branches/s-wang2/for_deal.II/include/deal.II/lac/trilinos_vector_base.h
branches/s-wang2/for_deal.II/source/
branches/s-wang2/for_deal.II/source/lac/
branches/s-wang2/for_deal.II/source/lac/constraint_matrix.cc
branches/s-wang2/for_deal.II/source/lac/petsc_matrix_base.cc
branches/s-wang2/for_deal.II/source/lac/petsc_solver.cc
branches/s-wang2/for_deal.II/source/lac/trilinos_sparse_matrix.cc
branches/s-wang2/for_deal.II/source/lac/trilinos_vector_base.cc
branches/s-wang2/for_deal.II/source/numerics/
branches/s-wang2/for_deal.II/source/numerics/derivative_approximation.inst.in
branches/s-wang2/include/aspect/global_trilinos.h
Modified:
branches/s-wang2/include/aspect/global.h
branches/s-wang2/include/aspect/particle/integrator.h
branches/s-wang2/include/aspect/particle/world.h
branches/s-wang2/include/aspect/postprocess/interface.h
branches/s-wang2/include/aspect/simulator.h
branches/s-wang2/source/main.cc
branches/s-wang2/source/postprocess/composition_statistics.cc
branches/s-wang2/source/postprocess/temperature_statistics.cc
branches/s-wang2/source/simulator/assembly.cc
branches/s-wang2/source/simulator/core.cc
branches/s-wang2/source/simulator/helper_functions.cc
branches/s-wang2/source/simulator/initial_conditions.cc
branches/s-wang2/source/simulator/solver.cc
Log:
merged with new aspect.
Added: branches/s-wang2/for_deal.II/examples/step-32/test-step-32.cc
===================================================================
--- branches/s-wang2/for_deal.II/examples/step-32/test-step-32.cc (rev 0)
+++ branches/s-wang2/for_deal.II/examples/step-32/test-step-32.cc 2012-11-29 05:55:56 UTC (rev 1394)
@@ -0,0 +1,4649 @@
+/* Author: Martin Kronbichler, Uppsala University,
+ Wolfgang Bangerth, Texas A&M University,
+ Timo Heister, University of Goettingen, 2008-2011 */
+/* */
+/* Copyright (C) 2008, 2009, 2010, 2011, 2012 by the deal.II authors */
+/* */
+/* This file is subject to QPL and may not be distributed */
+/* without copyright and license information. Please refer */
+/* to the file deal.II/doc/license.html for the text and */
+/* further information on this license. */
+
+ // @sect3{Include files}
+
+ //The first task as usual is to
+ // include the functionality of these
+ // well-known deal.II library files
+ // and some C++ header files.
+#include <deal.II/base/quadrature_lib.h>
+#include <deal.II/base/logstream.h>
+#include <deal.II/base/function.h>
+#include <deal.II/base/utilities.h>
+#include <deal.II/base/conditional_ostream.h>
+#include <deal.II/base/work_stream.h>
+#include <deal.II/base/timer.h>
+#include <deal.II/base/parameter_handler.h>
+
+#include <deal.II/lac/full_matrix.h>
+#include <deal.II/lac/solver_bicgstab.h>
+#include <deal.II/lac/solver_cg.h>
+#include <deal.II/lac/solver_gmres.h>
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/lac/block_sparsity_pattern.h>
+#include <deal.II/lac/petsc_parallel_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/petsc_parallel_block_vector.h>
+#include <deal.II/lac/petsc_sparse_matrix.h>
+#include <deal.II/lac/petsc_parallel_sparse_matrix.h>
+#include <deal.II/lac/petsc_parallel_block_sparse_matrix.h>
+#include <deal.II/lac/petsc_precondition.h>
+#include <deal.II/lac/petsc_solver.h>
+
+#include <deal.II/grid/tria.h>
+#include <deal.II/grid/grid_generator.h>
+#include <deal.II/grid/tria_accessor.h>
+#include <deal.II/grid/tria_iterator.h>
+#include <deal.II/grid/filtered_iterator.h>
+#include <deal.II/grid/tria_boundary_lib.h>
+#include <deal.II/grid/grid_tools.h>
+#include <deal.II/grid/grid_refinement.h>
+
+#include <deal.II/dofs/dof_handler.h>
+#include <deal.II/dofs/dof_renumbering.h>
+#include <deal.II/dofs/dof_accessor.h>
+#include <deal.II/dofs/dof_tools.h>
+
+#include <deal.II/fe/fe_q.h>
+#include <deal.II/fe/fe_dgq.h>
+#include <deal.II/fe/fe_dgp.h>
+#include <deal.II/fe/fe_system.h>
+#include <deal.II/fe/fe_values.h>
+#include <deal.II/fe/mapping_q.h>
+
+#include <deal.II/numerics/vector_tools.h>
+#include <deal.II/numerics/matrix_tools.h>
+#include <deal.II/numerics/data_out.h>
+#include <deal.II/numerics/error_estimator.h>
+#include <deal.II/numerics/solution_transfer.h>
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <limits>
+#include <locale>
+#include <string>
+
+ // This is the only include file that
+ // is new: It introduces the
+ // parallel::distributed::SolutionTransfer
+ // equivalent of the
+ // dealii::SolutionTransfer class to
+ // take a solution from on mesh to
+ // the next one upon mesh refinement,
+ // but in the case of parallel
+ // distributed triangulations:
+#include <deal.II/distributed/solution_transfer.h>
+
+ // The following classes are used in
+ // parallel distributed computations
+ // and have all already been
+ // introduced in step-40:
+#include <deal.II/base/index_set.h>
+#include <deal.II/distributed/tria.h>
+#include <deal.II/distributed/grid_refinement.h>
+
+
+/**
+ * utilities to replace Trilinos with PETSc.
+ */
+namespace CIG
+{
+/**
+ * convert a block_partition used for trilinos into data used for petsc.
+ * It is assumed that block_partition.size()==2.
+ */
+void convert_block_partitioning(
+ const std::vector<dealii::IndexSet> &block_partition,
+ int n_u, int n_p,
+ std::vector<unsigned int> &block_sizes,
+ std::vector<unsigned int> &local_sizes)
+{
+ Assert(block_partition.size()==2, dealii::ExcMessage("logic error"));
+
+ // init,
+ block_sizes.clear();
+ local_sizes.clear();
+
+ // block_sizes
+ block_sizes.push_back(n_u);
+ block_sizes.push_back(n_p);
+
+ // local_sizes
+ local_sizes.push_back(block_partition[0].n_elements());
+ local_sizes.push_back(block_partition[1].n_elements());
+}
+
+
+void setup_petsc_matrix(
+ std::vector<unsigned int> &block_sizes,
+ std::vector<unsigned int> &local_sizes,
+ int max_coupling_between_dofs,
+ dealii::PETScWrappers::MPI::BlockSparseMatrix &matrix)
+{
+ Assert(block_sizes.size()==2, dealii::ExcMessage("logic error"));
+
+ int n_u = block_sizes[0];
+ int n_p = block_sizes[1];
+ matrix.reinit(2,2);
+ matrix.block(0,0).reinit(
+ MPI_COMM_WORLD,n_u,n_u,local_sizes[0],local_sizes[0],max_coupling_between_dofs);
+ matrix.block(0,1).reinit(
+ MPI_COMM_WORLD,n_u,n_p,local_sizes[0],local_sizes[1],max_coupling_between_dofs);
+ matrix.block(1,0).reinit(
+ MPI_COMM_WORLD,n_p,n_u,local_sizes[1],local_sizes[0],max_coupling_between_dofs);
+ matrix.block(1,1).reinit(
+ MPI_COMM_WORLD,n_p,n_p,local_sizes[1],local_sizes[1],max_coupling_between_dofs);
+ matrix.collect_sizes();
+}
+
+void setup_petsc_vector(
+ std::vector<unsigned int> &block_sizes,
+ std::vector<dealii::IndexSet> &partitioning,
+ std::vector<dealii::IndexSet> &relevant_partitioning,
+ dealii::PETScWrappers::MPI::BlockVector &vector)
+{
+ Assert(block_sizes.size()==2, dealii::ExcMessage("logic error"));
+
+ vector.reinit(block_sizes,MPI_COMM_WORLD);
+ vector.block(0).reinit(MPI_COMM_WORLD,partitioning[0],relevant_partitioning[0]);
+ vector.block(1).reinit(MPI_COMM_WORLD,partitioning[1],relevant_partitioning[1]);
+ vector.collect_sizes();
+}
+
+template <class VectorType>
+void reduce_accuracy(VectorType &vector)
+{
+ std::pair<unsigned int,unsigned int> range = vector.local_range();
+ for(unsigned int i=range.first; i<range.second; i++)
+ vector[i] = std::floor(vector[i]);
+ vector.compress();
+}
+
+}
+
+ // The next step is like in all
+ // previous tutorial programs: We put
+ // everything into a namespace of its
+ // own and then import the deal.II
+ // classes and functions into it:
+namespace Step32
+{
+ using namespace dealii;
+
+ // @sect3{Equation data}
+
+ // In the following namespace, we
+ // define the various pieces of
+ // equation data that describe the
+ // problem. This corresponds to the
+ // various aspects of making the
+ // problem at least slightly
+ // realistc and that were
+ // exhaustively discussed in the
+ // description of the testcase in
+ // the introduction.
+ //
+ // We start with a few coefficients
+ // that have constant values (the
+ // comment after the value
+ // indicates its physical units):
+ namespace EquationData
+ {
+ const double eta = 1e21; /* Pa s */
+ const double kappa = 1e-6; /* m / s */
+ const double reference_density = 3300; /* kg / m^3 */
+ const double reference_temperature = 293; /* K */
+ const double expansion_coefficient = 2e-5; /* 1/K */
+ const double specific_heat = 1250; /* J / K / kg */
+ const double radiogenic_heating = 7.4e-12; /* W / kg */
+
+
+ const double R0 = 6371000.-2890000.; /* m */
+ const double R1 = 6371000.- 35000.; /* m */
+
+ const double T0 = 4000+273; /* K */
+ const double T1 = 700+273; /* K */
+
+
+ // The next set of definitions
+ // are for functions that encode
+ // the density as a function of
+ // temperature, the gravity
+ // vector, and the initial values
+ // for the temperature. Again,
+ // all of these (along with the
+ // values they compute) are
+ // discussed in the introduction:
+ double density (const double temperature)
+ {
+ return (reference_density *
+ (1 - expansion_coefficient * (temperature -
+ reference_temperature)));
+ }
+
+
+ template <int dim>
+ Tensor<1,dim> gravity_vector (const Point<dim> &p)
+ {
+ const double r = p.norm();
+ return -(1.245e-6 * r + 7.714e13/r/r) * p / r;
+ }
+
+
+
+ template <int dim>
+ class TemperatureInitialValues : public Function<dim>
+ {
+ public:
+ TemperatureInitialValues () : Function<dim>(1) {}
+
+ virtual double value (const Point<dim> &p,
+ const unsigned int component = 0) const;
+
+ virtual void vector_value (const Point<dim> &p,
+ Vector<double> &value) const;
+ };
+
+
+
+ template <int dim>
+ double
+ TemperatureInitialValues<dim>::value (const Point<dim> &p,
+ const unsigned int) const
+ {
+ const double r = p.norm();
+ const double h = R1-R0;
+
+ const double s = (r-R0)/h;
+ const double q = (dim==3)?std::max(0.0,cos(numbers::PI*abs(p(2)/R1))):1.0;
+ const double phi = std::atan2(p(0),p(1));
+ const double tau = s
+ +
+ 0.2 * s * (1-s) * std::sin(6*phi) * q;
+
+ return T0*(1.0-tau) + T1*tau;
+ }
+
+
+ template <int dim>
+ void
+ TemperatureInitialValues<dim>::vector_value (const Point<dim> &p,
+ Vector<double> &values) const
+ {
+ for (unsigned int c=0; c<this->n_components; ++c)
+ values(c) = TemperatureInitialValues<dim>::value (p, c);
+ }
+
+
+ // As mentioned in the
+ // introduction we need to
+ // rescale the pressure to avoid
+ // the relative ill-conditioning
+ // of the momentum and mass
+ // conservation equations. The
+ // scaling factor is
+ // $\frac{\eta}{L}$ where $L$ was
+ // a typical length scale. By
+ // experimenting it turns out
+ // that a good length scale is
+ // the diameter of plumes, which
+ // is around 10 km:
+ const double pressure_scaling = eta / 10000;
+
+ // The final number in this
+ // namespace is a constant that
+ // denotes the number of seconds
+ // per (average, tropical)
+ // year. We use this only when
+ // generating screen output:
+ // internally, all computations
+ // of this program happen in SI
+ // units (kilogram, meter,
+ // seconds) but writing
+ // geological times in seconds
+ // yields numbers that one can't
+ // relate to reality, and so we
+ // convert to years using the
+ // factor defined here:
+ const double year_in_seconds = 60*60*24*365.2425;
+
+ }
+
+
+
+ // @sect3{Preconditioning the Stokes system}
+
+ // This namespace implements the
+ // preconditioner. As discussed in the
+ // introduction, this preconditioner
+ // differs in a number of key portions from
+ // the one used in step-31. Specifically,
+ // it is a right preconditioner,
+ // implementing the matrix
+ // @f{align*}\left(\begin{array}{cc}A^{-1}
+ // & B^T \\ 0 & S^{-1}\end{array}\right)@f}
+ // where the two inverse matrix operations
+ // are approximated by linear solvers or,
+ // if the right flag is given to the
+ // constructor of this class, by a single
+ // AMG V-cycle for the velocity block. The
+ // three code blocks of the
+ // <code>vmult</code> function implement
+ // the multiplications with the three
+ // blocks of this preconditioner matrix and
+ // should be self explanatory if you have
+ // read through step-31 or the discussion
+ // of compositing solvers in step-20.
+ namespace LinearSolvers
+ {
+ template <class PreconditionerA, class PreconditionerMp>
+ class BlockSchurPreconditioner : public Subscriptor
+ {
+ public:
+ BlockSchurPreconditioner (const PETScWrappers::MPI::BlockSparseMatrix &S,
+ const PETScWrappers::MPI::BlockSparseMatrix &Spre,
+ const PreconditionerMp &Mppreconditioner,
+ const PreconditionerA &Apreconditioner,
+ const bool do_solve_A)
+ :
+ stokes_matrix (&S),
+ stokes_preconditioner_matrix (&Spre),
+ mp_preconditioner (Mppreconditioner),
+ a_preconditioner (Apreconditioner),
+ do_solve_A (do_solve_A)
+ {}
+
+ void vmult (PETScWrappers::MPI::BlockVector &dst,
+ const PETScWrappers::MPI::BlockVector &src) const
+ {
+ PETScWrappers::MPI::Vector utmp(src.block(0));
+
+ {
+ SolverControl solver_control(5000, 1e-6 * src.block(1).l2_norm());
+
+ SolverCG<PETScWrappers::MPI::Vector> solver(solver_control);
+
+ solver.solve(stokes_preconditioner_matrix->block(1,1),
+ dst.block(1), src.block(1),
+ mp_preconditioner);
+
+ dst.block(1) *= -1.0;
+ }
+
+ {
+ stokes_matrix->block(0,1).vmult(utmp, dst.block(1));
+ utmp*=-1.0;
+ utmp.add(src.block(0));
+ }
+
+ if (do_solve_A == true)
+ {
+ SolverControl solver_control(5000, utmp.l2_norm()*1e-2);
+ PETScWrappers::SolverCG solver(solver_control);
+ solver.solve(stokes_matrix->block(0,0), dst.block(0), utmp,
+ a_preconditioner);
+ }
+ else
+ a_preconditioner.vmult (dst.block(0), utmp);
+ }
+
+ private:
+ const SmartPointer<const PETScWrappers::MPI::BlockSparseMatrix> stokes_matrix;
+ const SmartPointer<const PETScWrappers::MPI::BlockSparseMatrix> stokes_preconditioner_matrix;
+ const PreconditionerMp &mp_preconditioner;
+ const PreconditionerA &a_preconditioner;
+ const bool do_solve_A;
+ };
+ }
+
+
+
+ // @sect3{Definition of assembly data structures}
+ //
+ // As described in the
+ // introduction, we will use the
+ // WorkStream mechanism discussed
+ // in the @ref threads module to
+ // parallelize operations among the
+ // processors of a single
+ // machine. The WorkStream class
+ // requires that data is passed
+ // around in two kinds of data
+ // structures, one for scratch data
+ // and one to pass data from the
+ // assembly function to the
+ // function that copies local
+ // contributions into global
+ // objects.
+ //
+ // The following namespace (and the
+ // two sub-namespaces) contains a
+ // collection of data structures
+ // that serve this purpose, one
+ // pair for each of the four
+ // operations discussed in the
+ // introduction that we will want
+ // to parallelize. Each assembly
+ // routine gets two sets of data: a
+ // Scratch array that collects all
+ // the classes and arrays that are
+ // used for the calculation of the
+ // cell contribution, and a
+ // CopyData array that keeps local
+ // matrices and vectors which will
+ // be written into the global
+ // matrix. Whereas CopyData is a
+ // container for the final data
+ // that is written into the global
+ // matrices and vector (and, thus,
+ // absolutely necessary), the
+ // Scratch arrays are merely there
+ // for performance reasons —
+ // it would be much more expensive
+ // to set up a FEValues object on
+ // each cell, than creating it only
+ // once and updating some
+ // derivative data.
+ //
+ // Step-31 had four assembly
+ // routines: One for the
+ // preconditioner matrix of the
+ // Stokes system, one for the
+ // Stokes matrix and right hand
+ // side, one for the temperature
+ // matrices and one for the right
+ // hand side of the temperature
+ // equation. We here organize the
+ // scratch arrays and CopyData
+ // objects for each of those four
+ // assembly components using a
+ // <code>struct</code> environment
+ // (since we consider these as
+ // temporary objects we pass
+ // around, rather than classes that
+ // implement functionality of their
+ // own, though this is a more
+ // subjective point of view to
+ // distinguish between
+ // <code>struct</code>s and
+ // <code>class</code>es).
+ //
+ // Regarding the Scratch objects,
+ // each struct is equipped with a
+ // constructor that creates an
+ // FEValues object for a @ref
+ // FiniteElement "finite element",
+ // a @ref Quadrature "quadrature formula",
+ // the @ref Mapping "mapping" that
+ // describes the
+ // interpolation of curved
+ // boundaries, and some @ref
+ // UpdateFlags "update flags".
+ // Moreover, we manually implement
+ // a copy constructor (since the
+ // FEValues class is not copyable
+ // by itself), and provide some
+ // additional vector fields that
+ // are used to hold intermediate
+ // data during the computation of
+ // local contributions.
+ //
+ // Let us start with the scratch
+ // arrays and, specifically, the
+ // one used for assembly of the
+ // Stokes preconditioner:
+ namespace Assembly
+ {
+ namespace Scratch
+ {
+ template <int dim>
+ struct StokesPreconditioner
+ {
+ StokesPreconditioner (const FiniteElement<dim> &stokes_fe,
+ const Quadrature<dim> &stokes_quadrature,
+ const Mapping<dim> &mapping,
+ const UpdateFlags update_flags);
+
+ StokesPreconditioner (const StokesPreconditioner &data);
+
+
+ FEValues<dim> stokes_fe_values;
+
+ std::vector<Tensor<2,dim> > grad_phi_u;
+ std::vector<double> phi_p;
+ };
+
+ template <int dim>
+ StokesPreconditioner<dim>::
+ StokesPreconditioner (const FiniteElement<dim> &stokes_fe,
+ const Quadrature<dim> &stokes_quadrature,
+ const Mapping<dim> &mapping,
+ const UpdateFlags update_flags)
+ :
+ stokes_fe_values (mapping, stokes_fe, stokes_quadrature,
+ update_flags),
+ grad_phi_u (stokes_fe.dofs_per_cell),
+ phi_p (stokes_fe.dofs_per_cell)
+ {}
+
+
+
+ template <int dim>
+ StokesPreconditioner<dim>::
+ StokesPreconditioner (const StokesPreconditioner &scratch)
+ :
+ stokes_fe_values (scratch.stokes_fe_values.get_mapping(),
+ scratch.stokes_fe_values.get_fe(),
+ scratch.stokes_fe_values.get_quadrature(),
+ scratch.stokes_fe_values.get_update_flags()),
+ grad_phi_u (scratch.grad_phi_u),
+ phi_p (scratch.phi_p)
+ {}
+
+
+
+ // The next one is the scratch object
+ // used for the assembly of the full
+ // Stokes system. Observe that we
+ // derive the StokesSystem scratch
+ // class from the StokesPreconditioner
+ // class above. We do this because all the
+ // objects that are necessary for the
+ // assembly of the preconditioner are
+ // also needed for the actual matrix
+ // system and right hand side, plus
+ // some extra data. This makes the
+ // program more compact. Note also that
+ // the assembly of the Stokes system
+ // and the temperature right hand side
+ // further down requires data from
+ // temperature and velocity,
+ // respectively, so we actually need
+ // two FEValues objects for those two
+ // cases.
+ template <int dim>
+ struct StokesSystem : public StokesPreconditioner<dim>
+ {
+ StokesSystem (const FiniteElement<dim> &stokes_fe,
+ const Mapping<dim> &mapping,
+ const Quadrature<dim> &stokes_quadrature,
+ const UpdateFlags stokes_update_flags,
+ const FiniteElement<dim> &temperature_fe,
+ const UpdateFlags temperature_update_flags);
+
+ StokesSystem (const StokesSystem<dim> &data);
+
+
+ FEValues<dim> temperature_fe_values;
+
+ std::vector<Tensor<1,dim> > phi_u;
+ std::vector<SymmetricTensor<2,dim> > grads_phi_u;
+ std::vector<double> div_phi_u;
+
+ std::vector<double> old_temperature_values;
+ };
+
+
+ template <int dim>
+ StokesSystem<dim>::
+ StokesSystem (const FiniteElement<dim> &stokes_fe,
+ const Mapping<dim> &mapping,
+ const Quadrature<dim> &stokes_quadrature,
+ const UpdateFlags stokes_update_flags,
+ const FiniteElement<dim> &temperature_fe,
+ const UpdateFlags temperature_update_flags)
+ :
+ StokesPreconditioner<dim> (stokes_fe, stokes_quadrature,
+ mapping,
+ stokes_update_flags),
+ temperature_fe_values (mapping, temperature_fe, stokes_quadrature,
+ temperature_update_flags),
+ phi_u (stokes_fe.dofs_per_cell),
+ grads_phi_u (stokes_fe.dofs_per_cell),
+ div_phi_u (stokes_fe.dofs_per_cell),
+ old_temperature_values (stokes_quadrature.size())
+ {}
+
+
+ template <int dim>
+ StokesSystem<dim>::
+ StokesSystem (const StokesSystem<dim> &scratch)
+ :
+ StokesPreconditioner<dim> (scratch),
+ temperature_fe_values (scratch.temperature_fe_values.get_mapping(),
+ scratch.temperature_fe_values.get_fe(),
+ scratch.temperature_fe_values.get_quadrature(),
+ scratch.temperature_fe_values.get_update_flags()),
+ phi_u (scratch.phi_u),
+ grads_phi_u (scratch.grads_phi_u),
+ div_phi_u (scratch.div_phi_u),
+ old_temperature_values (scratch.old_temperature_values)
+ {}
+
+
+ // After defining the objects used in
+ // the assembly of the Stokes system,
+ // we do the same for the assembly of
+ // the matrices necessary for the
+ // temperature system. The general
+ // structure is very similar:
+ template <int dim>
+ struct TemperatureMatrix
+ {
+ TemperatureMatrix (const FiniteElement<dim> &temperature_fe,
+ const Mapping<dim> &mapping,
+ const Quadrature<dim> &temperature_quadrature);
+
+ TemperatureMatrix (const TemperatureMatrix &data);
+
+
+ FEValues<dim> temperature_fe_values;
+
+ std::vector<double> phi_T;
+ std::vector<Tensor<1,dim> > grad_phi_T;
+ };
+
+
+ template <int dim>
+ TemperatureMatrix<dim>::
+ TemperatureMatrix (const FiniteElement<dim> &temperature_fe,
+ const Mapping<dim> &mapping,
+ const Quadrature<dim> &temperature_quadrature)
+ :
+ temperature_fe_values (mapping,
+ temperature_fe, temperature_quadrature,
+ update_values | update_gradients |
+ update_JxW_values),
+ phi_T (temperature_fe.dofs_per_cell),
+ grad_phi_T (temperature_fe.dofs_per_cell)
+ {}
+
+
+ template <int dim>
+ TemperatureMatrix<dim>::
+ TemperatureMatrix (const TemperatureMatrix &scratch)
+ :
+ temperature_fe_values (scratch.temperature_fe_values.get_mapping(),
+ scratch.temperature_fe_values.get_fe(),
+ scratch.temperature_fe_values.get_quadrature(),
+ scratch.temperature_fe_values.get_update_flags()),
+ phi_T (scratch.phi_T),
+ grad_phi_T (scratch.grad_phi_T)
+ {}
+
+
+ // The final scratch object is used in
+ // the assembly of the right hand side
+ // of the temperature system. This
+ // object is significantly larger than
+ // the ones above because a lot more
+ // quantities enter the computation of
+ // the right hand side of the
+ // temperature equation. In particular,
+ // the temperature values and gradients
+ // of the previous two time steps need
+ // to be evaluated at the quadrature
+ // points, as well as the velocities
+ // and the strain rates (i.e. the
+ // symmetric gradients of the velocity)
+ // that enter the right hand side as
+ // friction heating terms. Despite the
+ // number of terms, the following
+ // should be rather self explanatory:
+ template <int dim>
+ struct TemperatureRHS
+ {
+ TemperatureRHS (const FiniteElement<dim> &temperature_fe,
+ const FiniteElement<dim> &stokes_fe,
+ const Mapping<dim> &mapping,
+ const Quadrature<dim> &quadrature);
+
+ TemperatureRHS (const TemperatureRHS &data);
+
+
+ FEValues<dim> temperature_fe_values;
+ FEValues<dim> stokes_fe_values;
+
+ std::vector<double> phi_T;
+ std::vector<Tensor<1,dim> > grad_phi_T;
+
+ std::vector<Tensor<1,dim> > old_velocity_values;
+ std::vector<Tensor<1,dim> > old_old_velocity_values;
+
+ std::vector<SymmetricTensor<2,dim> > old_strain_rates;
+ std::vector<SymmetricTensor<2,dim> > old_old_strain_rates;
+
+ std::vector<double> old_temperature_values;
+ std::vector<double> old_old_temperature_values;
+ std::vector<Tensor<1,dim> > old_temperature_grads;
+ std::vector<Tensor<1,dim> > old_old_temperature_grads;
+ std::vector<double> old_temperature_laplacians;
+ std::vector<double> old_old_temperature_laplacians;
+ };
+
+
+ template <int dim>
+ TemperatureRHS<dim>::
+ TemperatureRHS (const FiniteElement<dim> &temperature_fe,
+ const FiniteElement<dim> &stokes_fe,
+ const Mapping<dim> &mapping,
+ const Quadrature<dim> &quadrature)
+ :
+ temperature_fe_values (mapping,
+ temperature_fe, quadrature,
+ update_values |
+ update_gradients |
+ update_hessians |
+ update_quadrature_points |
+ update_JxW_values),
+ stokes_fe_values (mapping,
+ stokes_fe, quadrature,
+ update_values | update_gradients),
+ phi_T (temperature_fe.dofs_per_cell),
+ grad_phi_T (temperature_fe.dofs_per_cell),
+
+ old_velocity_values (quadrature.size()),
+ old_old_velocity_values (quadrature.size()),
+ old_strain_rates (quadrature.size()),
+ old_old_strain_rates (quadrature.size()),
+
+ old_temperature_values (quadrature.size()),
+ old_old_temperature_values(quadrature.size()),
+ old_temperature_grads(quadrature.size()),
+ old_old_temperature_grads(quadrature.size()),
+ old_temperature_laplacians(quadrature.size()),
+ old_old_temperature_laplacians(quadrature.size())
+ {}
+
+
+ template <int dim>
+ TemperatureRHS<dim>::
+ TemperatureRHS (const TemperatureRHS &scratch)
+ :
+ temperature_fe_values (scratch.temperature_fe_values.get_mapping(),
+ scratch.temperature_fe_values.get_fe(),
+ scratch.temperature_fe_values.get_quadrature(),
+ scratch.temperature_fe_values.get_update_flags()),
+ stokes_fe_values (scratch.stokes_fe_values.get_mapping(),
+ scratch.stokes_fe_values.get_fe(),
+ scratch.stokes_fe_values.get_quadrature(),
+ scratch.stokes_fe_values.get_update_flags()),
+ phi_T (scratch.phi_T),
+ grad_phi_T (scratch.grad_phi_T),
+
+ old_velocity_values (scratch.old_velocity_values),
+ old_old_velocity_values (scratch.old_old_velocity_values),
+ old_strain_rates (scratch.old_strain_rates),
+ old_old_strain_rates (scratch.old_old_strain_rates),
+
+ old_temperature_values (scratch.old_temperature_values),
+ old_old_temperature_values (scratch.old_old_temperature_values),
+ old_temperature_grads (scratch.old_temperature_grads),
+ old_old_temperature_grads (scratch.old_old_temperature_grads),
+ old_temperature_laplacians (scratch.old_temperature_laplacians),
+ old_old_temperature_laplacians (scratch.old_old_temperature_laplacians)
+ {}
+ }
+
+
+ // The CopyData objects are even
+ // simpler than the Scratch
+ // objects as all they have to do
+ // is to store the results of
+ // local computations until they
+ // can be copied into the global
+ // matrix or vector
+ // objects. These structures
+ // therefore only need to provide
+ // a constructor, a copy
+ // operation, and some arrays for
+ // local matrix, local vectors
+ // and the relation between local
+ // and global degrees of freedom
+ // (a.k.a.
+ // <code>local_dof_indices</code>). Again,
+ // we have one such structure for
+ // each of the four operations we
+ // will parallelize using the
+ // WorkStream class:
+ namespace CopyData
+ {
+ template <int dim>
+ struct StokesPreconditioner
+ {
+ StokesPreconditioner (const FiniteElement<dim> &stokes_fe);
+ StokesPreconditioner (const StokesPreconditioner &data);
+
+ FullMatrix<double> local_matrix;
+ std::vector<unsigned int> local_dof_indices;
+ };
+
+ template <int dim>
+ StokesPreconditioner<dim>::
+ StokesPreconditioner (const FiniteElement<dim> &stokes_fe)
+ :
+ local_matrix (stokes_fe.dofs_per_cell,
+ stokes_fe.dofs_per_cell),
+ local_dof_indices (stokes_fe.dofs_per_cell)
+ {}
+
+ template <int dim>
+ StokesPreconditioner<dim>::
+ StokesPreconditioner (const StokesPreconditioner &data)
+ :
+ local_matrix (data.local_matrix),
+ local_dof_indices (data.local_dof_indices)
+ {}
+
+
+
+ template <int dim>
+ struct StokesSystem : public StokesPreconditioner<dim>
+ {
+ StokesSystem (const FiniteElement<dim> &stokes_fe);
+ StokesSystem (const StokesSystem<dim> &data);
+
+ Vector<double> local_rhs;
+ };
+
+ template <int dim>
+ StokesSystem<dim>::
+ StokesSystem (const FiniteElement<dim> &stokes_fe)
+ :
+ StokesPreconditioner<dim> (stokes_fe),
+ local_rhs (stokes_fe.dofs_per_cell)
+ {}
+
+ template <int dim>
+ StokesSystem<dim>::
+ StokesSystem (const StokesSystem<dim> &data)
+ :
+ StokesPreconditioner<dim> (data),
+ local_rhs (data.local_rhs)
+ {}
+
+
+
+ template <int dim>
+ struct TemperatureMatrix
+ {
+ TemperatureMatrix (const FiniteElement<dim> &temperature_fe);
+ TemperatureMatrix (const TemperatureMatrix &data);
+
+ FullMatrix<double> local_mass_matrix;
+ FullMatrix<double> local_stiffness_matrix;
+ std::vector<unsigned int> local_dof_indices;
+ };
+
+ template <int dim>
+ TemperatureMatrix<dim>::
+ TemperatureMatrix (const FiniteElement<dim> &temperature_fe)
+ :
+ local_mass_matrix (temperature_fe.dofs_per_cell,
+ temperature_fe.dofs_per_cell),
+ local_stiffness_matrix (temperature_fe.dofs_per_cell,
+ temperature_fe.dofs_per_cell),
+ local_dof_indices (temperature_fe.dofs_per_cell)
+ {}
+
+ template <int dim>
+ TemperatureMatrix<dim>::
+ TemperatureMatrix (const TemperatureMatrix &data)
+ :
+ local_mass_matrix (data.local_mass_matrix),
+ local_stiffness_matrix (data.local_stiffness_matrix),
+ local_dof_indices (data.local_dof_indices)
+ {}
+
+
+
+ template <int dim>
+ struct TemperatureRHS
+ {
+ TemperatureRHS (const FiniteElement<dim> &temperature_fe);
+ TemperatureRHS (const TemperatureRHS &data);
+
+ Vector<double> local_rhs;
+ std::vector<unsigned int> local_dof_indices;
+ FullMatrix<double> matrix_for_bc;
+ };
+
+ template <int dim>
+ TemperatureRHS<dim>::
+ TemperatureRHS (const FiniteElement<dim> &temperature_fe)
+ :
+ local_rhs (temperature_fe.dofs_per_cell),
+ local_dof_indices (temperature_fe.dofs_per_cell),
+ matrix_for_bc (temperature_fe.dofs_per_cell,
+ temperature_fe.dofs_per_cell)
+ {}
+
+ template <int dim>
+ TemperatureRHS<dim>::
+ TemperatureRHS (const TemperatureRHS &data)
+ :
+ local_rhs (data.local_rhs),
+ local_dof_indices (data.local_dof_indices),
+ matrix_for_bc (data.matrix_for_bc)
+ {}
+ }
+ }
+
+
+
+ // @sect3{The <code>BoussinesqFlowProblem</code> class template}
+ //
+ // This is the declaration of the
+ // main class. It is very similar
+ // to step-31 but there are a
+ // number differences we will
+ // comment on below.
+ //
+ // The top of the class is
+ // essentially the same as in
+ // step-31, listing the public
+ // methods and a set of private
+ // functions that do the heavy
+ // lifting. Compared to step-31
+ // there are only two additions to
+ // this section: the function
+ // <code>get_cfl_number()</code>
+ // that computes the maximum CFL
+ // number over all cells which
+ // we then compute the global time
+ // step from, and the function
+ // <code>get_entropy_variation()</code>
+ // that is used in the computation
+ // of the entropy stabilization. It
+ // is akin to the
+ // <code>get_extrapolated_temperature_range()</code>
+ // we have used in step-31 for this
+ // purpose, but works on the
+ // entropy instead of the
+ // temperature instead.
+ template <int dim>
+ class BoussinesqFlowProblem
+ {
+ public:
+ struct Parameters;
+ BoussinesqFlowProblem (Parameters ¶meters);
+ void run ();
+
+ int m_myrank; // for debugging
+
+ private:
+ void setup_dofs ();
+ void assemble_stokes_preconditioner ();
+ void build_stokes_preconditioner ();
+ void assemble_stokes_system ();
+ void assemble_temperature_matrix ();
+ void assemble_temperature_system (const double maximal_velocity);
+ void project_temperature_field ();
+ double get_maximal_velocity () const;
+ double get_cfl_number () const;
+ double get_entropy_variation (const double average_temperature) const;
+ std::pair<double,double> get_extrapolated_temperature_range () const;
+ void solve ();
+ void output_results ();
+ void refine_mesh (const unsigned int max_grid_level);
+
+ double
+ compute_viscosity(const std::vector<double> &old_temperature,
+ const std::vector<double> &old_old_temperature,
+ const std::vector<Tensor<1,dim> > &old_temperature_grads,
+ const std::vector<Tensor<1,dim> > &old_old_temperature_grads,
+ const std::vector<double> &old_temperature_laplacians,
+ const std::vector<double> &old_old_temperature_laplacians,
+ const std::vector<Tensor<1,dim> > &old_velocity_values,
+ const std::vector<Tensor<1,dim> > &old_old_velocity_values,
+ const std::vector<SymmetricTensor<2,dim> > &old_strain_rates,
+ const std::vector<SymmetricTensor<2,dim> > &old_old_strain_rates,
+ const double global_u_infty,
+ const double global_T_variation,
+ const double average_temperature,
+ const double global_entropy_variation,
+ const double cell_diameter) const;
+
+ public:
+
+ // The first significant new
+ // component is the definition
+ // of a struct for the
+ // parameters according to the
+ // discussion in the
+ // introduction. This structure
+ // is initialized by reading
+ // from a parameter file during
+ // construction of this object.
+ struct Parameters
+ {
+ Parameters (const std::string ¶meter_filename);
+
+ static void declare_parameters (ParameterHandler &prm);
+ void parse_parameters (ParameterHandler &prm);
+
+ double end_time;
+
+ unsigned int initial_global_refinement;
+ unsigned int initial_adaptive_refinement;
+
+ bool generate_graphical_output;
+ unsigned int graphical_output_interval;
+
+ unsigned int adaptive_refinement_interval;
+
+ double stabilization_alpha;
+ double stabilization_c_R;
+ double stabilization_beta;
+
+ unsigned int stokes_velocity_degree;
+ bool use_locally_conservative_discretization;
+
+ unsigned int temperature_degree;
+ };
+
+ private:
+ Parameters ¶meters;
+
+ // The <code>pcout</code> (for
+ // <i>%parallel
+ // <code>std::cout</code></i>)
+ // object is used to simplify
+ // writing output: each MPI
+ // process can use this to
+ // generate output as usual,
+ // but since each of these
+ // processes will (hopefully)
+ // produce the same output it
+ // will just be replicated many
+ // times over; with the
+ // ConditionalOStream class,
+ // only the output generated by
+ // one MPI process will
+ // actually be printed to
+ // screen, whereas the output
+ // by all the other threads
+ // will simply be forgotten.
+ ConditionalOStream pcout;
+
+ // The following member
+ // variables will then again be
+ // similar to those in step-31
+ // (and to other tutorial
+ // programs). As mentioned in
+ // the introduction, we fully
+ // distribute computations, so
+ // we will have to use the
+ // parallel::distributed::Triangulation
+ // class (see step-40) but the
+ // remainder of these variables
+ // is rather standard with two
+ // exceptions:
+ //
+ // - The <code>mapping</code>
+ // variable is used to denote a
+ // higher-order polynomial
+ // mapping. As mentioned in the
+ // introduction, we use this
+ // mapping when forming
+ // integrals through quadrature
+ // for all cells that are
+ // adjacent to either the inner
+ // or outer boundaries of our
+ // domain where the boundary is
+ // curved.
+ //
+ // - In a bit of naming
+ // confusion, you will notice
+ // below that some of the
+ // variables from namespace
+ // PETScWrappers are taken
+ // from namespace
+ // PETScWrappers::MPI (such
+ // as the right hand side
+ // vectors) whereas others are
+ // not (such as the various
+ // matrices). For the matrices,
+ // we happen to use the same
+ // class names for %parallel
+ // and sequential data
+ // structures, i.e., all
+ // matrices will actually be
+ // considered %parallel
+ // below. On the other hand,
+ // for vectors, only those from
+ // namespace
+ // PETScWrappers::MPI are
+ // actually distributed. In
+ // particular, we will
+ // frequently have to query
+ // velocities and temperatures
+ // at arbitrary quadrature
+ // points; consequently, rather
+ // than importing ghost
+ // information of a vector
+ // whenever we need access to
+ // degrees of freedom that are
+ // relevant locally but owned
+ // by another processor, we
+ // solve linear systems in
+ // %parallel but then
+ // immediately initialize a
+ // vector including ghost
+ // entries of the solution for
+ // further processing. The
+ // various
+ // <code>*_solution</code>
+ // vectors are therefore filled
+ // immediately after solving
+ // their respective linear
+ // system in %parallel and will
+ // always contain values for
+ // all @ref
+ // GlossLocallyRelevantDof
+ // "locally relevant degrees of freedom";
+ // the fully
+ // distributed vectors that we
+ // obtain from the solution
+ // process and that only ever
+ // contain the @ref
+ // GlossLocallyOwnedDof
+ // "locally owned degrees of freedom"
+ // are destroyed
+ // immediately after the
+ // solution process and after
+ // we have copied the relevant
+ // values into the member
+ // variable vectors.
+ parallel::distributed::Triangulation<dim> triangulation;
+ double global_Omega_diameter;
+
+ const MappingQ<dim> mapping;
+
+ const FESystem<dim> stokes_fe;
+ DoFHandler<dim> stokes_dof_handler;
+ ConstraintMatrix stokes_constraints;
+
+ PETScWrappers::MPI::BlockSparseMatrix stokes_matrix;
+ PETScWrappers::MPI::BlockSparseMatrix stokes_preconditioner_matrix;
+
+ PETScWrappers::MPI::BlockVector stokes_solution;
+ PETScWrappers::MPI::BlockVector old_stokes_solution;
+ PETScWrappers::MPI::BlockVector stokes_rhs;
+
+
+ FE_Q<dim> temperature_fe;
+ DoFHandler<dim> temperature_dof_handler;
+ ConstraintMatrix temperature_constraints;
+
+ PETScWrappers::MPI::SparseMatrix temperature_mass_matrix;
+ PETScWrappers::MPI::SparseMatrix temperature_stiffness_matrix;
+ PETScWrappers::MPI::SparseMatrix temperature_matrix;
+
+ PETScWrappers::MPI::Vector temperature_solution;
+ PETScWrappers::MPI::Vector old_temperature_solution;
+ PETScWrappers::MPI::Vector old_old_temperature_solution;
+ PETScWrappers::MPI::Vector temperature_rhs;
+
+
+ double time_step;
+ double old_time_step;
+ unsigned int timestep_number;
+
+ std_cxx1x::shared_ptr<PETScWrappers::PreconditionBoomerAMG> Amg_preconditioner;
+ std_cxx1x::shared_ptr<PETScWrappers::PreconditionJacobi> Mp_preconditioner;
+ std_cxx1x::shared_ptr<PETScWrappers::PreconditionJacobi> T_preconditioner;
+
+ bool rebuild_stokes_matrix;
+ bool rebuild_stokes_preconditioner;
+ bool rebuild_temperature_matrices;
+ bool rebuild_temperature_preconditioner;
+
+ // The next member variable,
+ // <code>computing_timer</code>
+ // is used to conveniently
+ // account for compute time
+ // spent in certain "sections"
+ // of the code that are
+ // repeatedly entered. For
+ // example, we will enter (and
+ // leave) sections for Stokes
+ // matrix assembly and would
+ // like to accumulate the run
+ // time spent in this section
+ // over all time steps. Every
+ // so many time steps as well
+ // as at the end of the program
+ // (through the destructor of
+ // the TimerOutput class) we
+ // will then produce a nice
+ // summary of the times spent
+ // in the different sections
+ // into which we categorize the
+ // run-time of this program.
+ TimerOutput computing_timer;
+
+ // After these member variables
+ // we have a number of
+ // auxiliary functions that
+ // have been broken out of the
+ // ones listed
+ // above. Specifically, there
+ // are first three functions
+ // that we call from
+ // <code>setup_dofs</code> and
+ // then the ones that do the
+ // assembling of linear
+ // systems:
+ void setup_stokes_matrix (const std::vector<IndexSet> &stokes_partitioning);
+ void setup_stokes_preconditioner (const std::vector<IndexSet> &stokes_partitioning);
+ void setup_temperature_matrices (const IndexSet &temperature_partitioning);
+
+
+ // Following the @ref
+ // MTWorkStream
+ // "task-based parallelization"
+ // paradigm,
+ // we split all the assembly
+ // routines into two parts: a
+ // first part that can do all
+ // the calculations on a
+ // certain cell without taking
+ // care of other threads, and a
+ // second part (which is
+ // writing the local data into
+ // the global matrices and
+ // vectors) which can be
+ // entered by only one thread
+ // at a time. In order to
+ // implement that, we provide
+ // functions for each of those
+ // two steps for all the four
+ // assembly routines that we
+ // use in this program. The
+ // following eight functions do
+ // exactly this:
+ void
+ local_assemble_stokes_preconditioner (const typename DoFHandler<dim>::active_cell_iterator &cell,
+ Assembly::Scratch::StokesPreconditioner<dim> &scratch,
+ Assembly::CopyData::StokesPreconditioner<dim> &data);
+
+ void
+ copy_local_to_global_stokes_preconditioner (const Assembly::CopyData::StokesPreconditioner<dim> &data);
+
+
+ void
+ local_assemble_stokes_system (const typename DoFHandler<dim>::active_cell_iterator &cell,
+ Assembly::Scratch::StokesSystem<dim> &scratch,
+ Assembly::CopyData::StokesSystem<dim> &data);
+
+ void
+ copy_local_to_global_stokes_system (const Assembly::CopyData::StokesSystem<dim> &data);
+
+
+ void
+ local_assemble_temperature_matrix (const typename DoFHandler<dim>::active_cell_iterator &cell,
+ Assembly::Scratch::TemperatureMatrix<dim> &scratch,
+ Assembly::CopyData::TemperatureMatrix<dim> &data);
+
+ void
+ copy_local_to_global_temperature_matrix (const Assembly::CopyData::TemperatureMatrix<dim> &data);
+
+
+
+ void
+ local_assemble_temperature_rhs (const std::pair<double,double> global_T_range,
+ const double global_max_velocity,
+ const double global_entropy_variation,
+ const typename DoFHandler<dim>::active_cell_iterator &cell,
+ Assembly::Scratch::TemperatureRHS<dim> &scratch,
+ Assembly::CopyData::TemperatureRHS<dim> &data);
+
+ void
+ copy_local_to_global_temperature_rhs (const Assembly::CopyData::TemperatureRHS<dim> &data);
+
+ // Finally, we forward declare
+ // a member class that we will
+ // define later on and that
+ // will be used to compute a
+ // number of quantities from
+ // our solution vectors that
+ // we'd like to put into the
+ // output files for
+ // visualization.
+ class Postprocessor;
+ };
+
+
+ // @sect3{BoussinesqFlowProblem class implementation}
+
+ // @sect4{BoussinesqFlowProblem::Parameters}
+ //
+ // Here comes the definition of the
+ // parameters for the Stokes
+ // problem. We allow to set the end
+ // time for the simulation, the
+ // level of refinements (both
+ // global and adaptive, which in
+ // the sum specify what maximum
+ // level the cells are allowed to
+ // have), and the interval between
+ // refinements in the time
+ // stepping.
+ //
+ // Then, we let the user specify
+ // constants for the stabilization
+ // parameters (as discussed in the
+ // introduction), the polynomial
+ // degree for the Stokes velocity
+ // space, whether to use the
+ // locally conservative
+ // discretization based on FE_DGP
+ // elements for the pressure or not
+ // (FE_Q elements for pressure),
+ // and the polynomial degree for
+ // the temperature interpolation.
+ //
+ // The constructor checks for a
+ // valid input file (if not, a file
+ // with default parameters for the
+ // quantities is written), and
+ // eventually parses the
+ // parameters.
+ template <int dim>
+ BoussinesqFlowProblem<dim>::Parameters::Parameters (const std::string ¶meter_filename)
+ :
+ end_time (1e8),
+ initial_global_refinement (2),
+ initial_adaptive_refinement (2),
+ adaptive_refinement_interval (10),
+ stabilization_alpha (2),
+ stabilization_c_R (0.11),
+ stabilization_beta (0.078),
+ stokes_velocity_degree (2),
+ use_locally_conservative_discretization (true),
+ temperature_degree (2)
+ {
+ ParameterHandler prm;
+ BoussinesqFlowProblem<dim>::Parameters::declare_parameters (prm);
+
+ std::ifstream parameter_file (parameter_filename.c_str());
+
+ if (!parameter_file)
+ {
+ parameter_file.close ();
+
+ std::ostringstream message;
+ message << "Input parameter file <"
+ << parameter_filename << "> not found. Creating a"
+ << std::endl
+ << "template file of the same name."
+ << std::endl;
+
+ std::ofstream parameter_out (parameter_filename.c_str());
+ prm.print_parameters (parameter_out,
+ ParameterHandler::Text);
+
+ AssertThrow (false, ExcMessage (message.str().c_str()));
+ }
+
+ const bool success = prm.read_input (parameter_file);
+ AssertThrow (success, ExcMessage ("Invalid input parameter file."));
+
+ parse_parameters (prm);
+ }
+
+
+
+ // Next we have a function that
+ // declares the parameters that we
+ // expect in the input file,
+ // together with their data types,
+ // default values and a
+ // description:
+ template <int dim>
+ void
+ BoussinesqFlowProblem<dim>::Parameters::
+ declare_parameters (ParameterHandler &prm)
+ {
+ prm.declare_entry ("End time", "1e8",
+ Patterns::Double (0),
+ "The end time of the simulation in years.");
+ prm.declare_entry ("Initial global refinement", "2",
+ Patterns::Integer (0),
+ "The number of global refinement steps performed on "
+ "the initial coarse mesh, before the problem is first "
+ "solved there.");
+ prm.declare_entry ("Initial adaptive refinement", "2",
+ Patterns::Integer (0),
+ "The number of adaptive refinement steps performed after "
+ "initial global refinement.");
+ prm.declare_entry ("Time steps between mesh refinement", "10",
+ Patterns::Integer (1),
+ "The number of time steps after which the mesh is to be "
+ "adapted based on computed error indicators.");
+ prm.declare_entry ("Generate graphical output", "false",
+ Patterns::Bool (),
+ "Whether graphical output is to be generated or not. "
+ "You may not want to get graphical output if the number "
+ "of processors is large.");
+ prm.declare_entry ("Time steps between graphical output", "50",
+ Patterns::Integer (1),
+ "The number of time steps between each generation of "
+ "graphical output files.");
+
+ prm.enter_subsection ("Stabilization parameters");
+ {
+ prm.declare_entry ("alpha", "2",
+ Patterns::Double (1, 2),
+ "The exponent in the entropy viscosity stabilization.");
+ prm.declare_entry ("c_R", "0.11",
+ Patterns::Double (0),
+ "The c_R factor in the entropy viscosity "
+ "stabilization.");
+ prm.declare_entry ("beta", "0.078",
+ Patterns::Double (0),
+ "The beta factor in the artificial viscosity "
+ "stabilization. An appropriate value for 2d is 0.052 "
+ "and 0.078 for 3d.");
+ }
+ prm.leave_subsection ();
+
+ prm.enter_subsection ("Discretization");
+ {
+ prm.declare_entry ("Stokes velocity polynomial degree", "2",
+ Patterns::Integer (1),
+ "The polynomial degree to use for the velocity variables "
+ "in the Stokes system.");
+ prm.declare_entry ("Temperature polynomial degree", "2",
+ Patterns::Integer (1),
+ "The polynomial degree to use for the temperature variable.");
+ prm.declare_entry ("Use locally conservative discretization", "true",
+ Patterns::Bool (),
+ "Whether to use a Stokes discretization that is locally "
+ "conservative at the expense of a larger number of degrees "
+ "of freedom, or to go with a cheaper discretization "
+ "that does not locally conserve mass (although it is "
+ "globally conservative.");
+ }
+ prm.leave_subsection ();
+ }
+
+
+
+ // And then we need a function that
+ // reads the contents of the
+ // ParameterHandler object we get
+ // by reading the input file and
+ // puts the results into variables
+ // that store the values of the
+ // parameters we have previously
+ // declared:
+ template <int dim>
+ void
+ BoussinesqFlowProblem<dim>::Parameters::
+ parse_parameters (ParameterHandler &prm)
+ {
+ end_time = prm.get_double ("End time");
+ initial_global_refinement = prm.get_integer ("Initial global refinement");
+ initial_adaptive_refinement = prm.get_integer ("Initial adaptive refinement");
+
+ adaptive_refinement_interval= prm.get_integer ("Time steps between mesh refinement");
+
+ generate_graphical_output = prm.get_bool ("Generate graphical output");
+ graphical_output_interval = prm.get_integer ("Time steps between graphical output");
+
+ prm.enter_subsection ("Stabilization parameters");
+ {
+ stabilization_alpha = prm.get_double ("alpha");
+ stabilization_c_R = prm.get_double ("c_R");
+ stabilization_beta = prm.get_double ("beta");
+ }
+ prm.leave_subsection ();
+
+ prm.enter_subsection ("Discretization");
+ {
+ stokes_velocity_degree = prm.get_integer ("Stokes velocity polynomial degree");
+ temperature_degree = prm.get_integer ("Temperature polynomial degree");
+ use_locally_conservative_discretization
+ = prm.get_bool ("Use locally conservative discretization");
+ }
+ prm.leave_subsection ();
+ }
+
+
+
+
+ // @sect4{BoussinesqFlowProblem::BoussinesqFlowProblem}
+ //
+ // The constructor of the problem
+ // is very similar to the
+ // constructor in step-31. What is
+ // different is the %parallel
+ // communication: Trilinos uses a
+ // message passing interface (MPI)
+ // for data distribution. When
+ // entering the
+ // BoussinesqFlowProblem class, we
+ // have to decide how the
+ // parallization is to be done. We
+ // choose a rather simple strategy
+ // and let all processors that are
+ // running the program work
+ // together, specified by the
+ // communicator
+ // <code>MPI_COMM_WORLD</code>. Next,
+ // we create the output stream (as
+ // we already did in step-18) that
+ // only generates output on the
+ // first MPI process and is
+ // completely forgetful on all
+ // others. The implementation of
+ // this idea is to check the
+ // process number when
+ // <code>pcout</code> gets a true
+ // argument, and it uses the
+ // <code>std::cout</code> stream
+ // for output. If we are one
+ // processor five, for instance,
+ // then we will give a
+ // <code>false</code> argument to
+ // <code>pcout</code>, which means
+ // that the output of that
+ // processor will not be
+ // printed. With the exception of
+ // the mapping object (for which we
+ // use polynomials of degree 4) all
+ // but the final member variable
+ // are exactly the same as in
+ // step-31.
+ //
+ // This final object, the
+ // TimerOutput object, is then told
+ // to restrict output to the
+ // <code>pcout</code> stream
+ // (processor 0), and then we
+ // specify that we want to get a
+ // summary table at the end of the
+ // program which shows us wallclock
+ // times (as opposed to CPU
+ // times). We will manually also
+ // request intermediate summaries
+ // every so many time steps in the
+ // <code>run()</code> function
+ // below.
+ template <int dim>
+ BoussinesqFlowProblem<dim>::BoussinesqFlowProblem (Parameters ¶meters_)
+ :
+ parameters (parameters_),
+ pcout (std::cout,
+ (Utilities::MPI::this_mpi_process(MPI_COMM_WORLD)
+ == 0)),
+
+ triangulation (MPI_COMM_WORLD,
+ typename Triangulation<dim>::MeshSmoothing
+ (Triangulation<dim>::smoothing_on_refinement |
+ Triangulation<dim>::smoothing_on_coarsening)),
+
+ mapping (4),
+
+ stokes_fe (FE_Q<dim>(parameters.stokes_velocity_degree),
+ dim,
+ (parameters.use_locally_conservative_discretization
+ ?
+ static_cast<const FiniteElement<dim> &>
+ (FE_DGP<dim>(parameters.stokes_velocity_degree-1))
+ :
+ static_cast<const FiniteElement<dim> &>
+ (FE_Q<dim>(parameters.stokes_velocity_degree-1))),
+ 1),
+
+ stokes_dof_handler (triangulation),
+
+ temperature_fe (parameters.temperature_degree),
+ temperature_dof_handler (triangulation),
+
+ time_step (0),
+ old_time_step (0),
+ timestep_number (0),
+ rebuild_stokes_matrix (true),
+ rebuild_stokes_preconditioner (true),
+ rebuild_temperature_matrices (true),
+ rebuild_temperature_preconditioner (true),
+
+ computing_timer (pcout,
+ TimerOutput::summary,
+ TimerOutput::wall_times)
+ {}
+
+
+
+ // @sect4{The BoussinesqFlowProblem helper functions}
+ // @sect5{BoussinesqFlowProblem::get_maximal_velocity}
+ // Except for two small details,
+ // the function to compute the
+ // global maximum of the velocity
+ // is the same as in step-31. The
+ // first detail is actually common
+ // to all functions that implement
+ // loops over all cells in the
+ // triangulation: When operating in
+ // %parallel, each processor can
+ // only work on a chunk of cells
+ // since each processor only has a
+ // certain part of the entire
+ // triangulation. This chunk of
+ // cells that we want to work on is
+ // identified via a so-called
+ // <code>subdomain_id</code>, as we
+ // also did in step-18. All we need
+ // to change is hence to perform
+ // the cell-related operations only
+ // on cells that are owned by the
+ // current process (as opposed to
+ // ghost or artificial cells),
+ // i.e. for which the subdomain id
+ // equals the number of the process
+ // ID. Since this is a commonly
+ // used operation, there is a
+ // shortcut for this operation: we
+ // can ask whether the cell is
+ // owned by the current processor
+ // using
+ // <code>cell-@>is_locally_owned()</code>.
+ //
+ // The second difference is the way
+ // we calculate the maximum
+ // value. Before, we could simply
+ // have a <code>double</code>
+ // variable that we checked against
+ // on each quadrature point for
+ // each cell. Now, we have to be a
+ // bit more careful since each
+ // processor only operates on a
+ // subset of cells. What we do is
+ // to first let each processor
+ // calculate the maximum among its
+ // cells, and then do a global
+ // communication operation
+ // <code>Utilities::MPI::max</code>
+ // that computes the maximum value
+ // among all the maximum values of
+ // the individual processors. MPI
+ // provides such a call, but it's
+ // even simpler to use the
+ // respective function in namespace
+ // Utilities::MPI using the MPI
+ // communicator object since that
+ // will do the right thing even if
+ // we work without MPI and on a
+ // single machine only. The call to
+ // <code>Utilities::MPI::max</code>
+ // needs two arguments, namely the
+ // local maximum (input) and the
+ // MPI communicator, which is
+ // MPI_COMM_WORLD in this example.
+ template <int dim>
+ double BoussinesqFlowProblem<dim>::get_maximal_velocity () const
+ {
+ const QIterated<dim> quadrature_formula (QTrapez<1>(),
+ parameters.stokes_velocity_degree);
+ const unsigned int n_q_points = quadrature_formula.size();
+
+ FEValues<dim> fe_values (mapping, stokes_fe, quadrature_formula, update_values);
+ std::vector<Tensor<1,dim> > velocity_values(n_q_points);
+
+ const FEValuesExtractors::Vector velocities (0);
+
+ double max_local_velocity = 0;
+
+ typename DoFHandler<dim>::active_cell_iterator
+ cell = stokes_dof_handler.begin_active(),
+ endc = stokes_dof_handler.end();
+ for (; cell!=endc; ++cell)
+ if (cell->is_locally_owned())
+ {
+ fe_values.reinit (cell);
+ fe_values[velocities].get_function_values (stokes_solution,
+ velocity_values);
+
+ for (unsigned int q=0; q<n_q_points; ++q)
+ max_local_velocity = std::max (max_local_velocity,
+ velocity_values[q].norm());
+ }
+
+ return Utilities::MPI::max (max_local_velocity, MPI_COMM_WORLD);
+ }
+
+
+ // @sect5{BoussinesqFlowProblem::get_cfl_number}
+ // The next function does something
+ // similar, but we now compute the
+ // CFL number, i.e., maximal
+ // velocity on a cell divided by
+ // the cell diameter. This number
+ // is necessary to determine the
+ // time step size, as we use a
+ // semi-explicit time stepping
+ // scheme for the temperature
+ // equation (see step-31 for a
+ // discussion). We compute it in
+ // the same way as above: Compute
+ // the local maximum over all
+ // locally owned cells, then
+ // exchange it via MPI to find the
+ // global maximum.
+ template <int dim>
+ double BoussinesqFlowProblem<dim>::get_cfl_number () const
+ {
+ const QIterated<dim> quadrature_formula (QTrapez<1>(),
+ parameters.stokes_velocity_degree);
+ const unsigned int n_q_points = quadrature_formula.size();
+
+ FEValues<dim> fe_values (mapping, stokes_fe, quadrature_formula, update_values);
+ std::vector<Tensor<1,dim> > velocity_values(n_q_points);
+
+ const FEValuesExtractors::Vector velocities (0);
+
+ double max_local_cfl = 0;
+
+ typename DoFHandler<dim>::active_cell_iterator
+ cell = stokes_dof_handler.begin_active(),
+ endc = stokes_dof_handler.end();
+ for (; cell!=endc; ++cell)
+ if (cell->is_locally_owned())
+ {
+ fe_values.reinit (cell);
+ fe_values[velocities].get_function_values (stokes_solution,
+ velocity_values);
+
+ double max_local_velocity = 1e-10;
+ for (unsigned int q=0; q<n_q_points; ++q)
+ max_local_velocity = std::max (max_local_velocity,
+ velocity_values[q].norm());
+ max_local_cfl = std::max(max_local_cfl,
+ max_local_velocity / cell->diameter());
+ }
+
+ return Utilities::MPI::max (max_local_cfl, MPI_COMM_WORLD);
+ }
+
+
+ // @sect5{BoussinesqFlowProblem::get_entropy_variation}
+ // Next comes the computation of
+ // the global entropy variation
+ // $\|E(T)-\bar{E}(T)\|_\infty$
+ // where the entropy $E$ is defined
+ // as discussed in the
+ // introduction. This is needed for
+ // the evaluation of the
+ // stabilization in the temperature
+ // equation as explained in the
+ // introduction. The entropy
+ // variation is actually only
+ // needed if we use $\alpha=2$ as a
+ // power in the residual
+ // computation. The infinity norm
+ // is computed by the maxima over
+ // quadrature points, as usual in
+ // discrete computations.
+ //
+ // In order to compute this quantity, we
+ // first have to find the space-average
+ // $\bar{E}(T)$ and then evaluate the
+ // maximum. However, that means that we
+ // would need to perform two loops. We can
+ // avoid the overhead by noting that
+ // $\|E(T)-\bar{E}(T)\|_\infty =
+ // \max\big(E_{\textrm{max}}(T)-\bar{E}(T),
+ // \bar{E}(T)-E_{\textrm{min}}(T)\big)$, i.e., the
+ // maximum out of the deviation from the
+ // average entropy in positive and negative
+ // directions. The four quantities we need
+ // for the latter formula (maximum entropy,
+ // minimum entropy, average entropy, area)
+ // can all be evaluated in the same loop
+ // over all cells, so we choose this
+ // simpler variant.
+ template <int dim>
+ double
+ BoussinesqFlowProblem<dim>::get_entropy_variation (const double average_temperature) const
+ {
+ if (parameters.stabilization_alpha != 2)
+ return 1.;
+
+ const QGauss<dim> quadrature_formula (parameters.temperature_degree+1);
+ const unsigned int n_q_points = quadrature_formula.size();
+
+ FEValues<dim> fe_values (temperature_fe, quadrature_formula,
+ update_values | update_JxW_values);
+ std::vector<double> old_temperature_values(n_q_points);
+ std::vector<double> old_old_temperature_values(n_q_points);
+
+ // In the two functions above we
+ // computed the maximum of
+ // numbers that were all
+ // non-negative, so we knew that
+ // zero was certainly a lower
+ // bound. On the other hand, here
+ // we need to find the maximum
+ // deviation from the average
+ // value, i.e., we will need to
+ // know the maximal and minimal
+ // values of the entropy for
+ // which we don't a priori know
+ // the sign.
+ //
+ // To compute it, we can
+ // therefore start with the
+ // largest and smallest possible
+ // values we can store in a
+ // double precision number: The
+ // minimum is initialized with a
+ // bigger and the maximum with a
+ // smaller number than any one
+ // that is going to appear. We
+ // are then guaranteed that these
+ // numbers will be overwritten in
+ // the loop on the first cell or,
+ // if this processor does not own
+ // any cells, in the
+ // communication step at the
+ // latest. The following loop
+ // then computes the minimum and
+ // maximum local entropy as well
+ // as keeps track of the
+ // area/volume of the part of the
+ // domain we locally own and the
+ // integral over the entropy on
+ // it:
+ double min_entropy = std::numeric_limits<double>::max(),
+ max_entropy = -std::numeric_limits<double>::max(),
+ area = 0,
+ entropy_integrated = 0;
+
+ typename DoFHandler<dim>::active_cell_iterator
+ cell = temperature_dof_handler.begin_active(),
+ endc = temperature_dof_handler.end();
+ for (; cell!=endc; ++cell)
+ if (cell->is_locally_owned())
+ {
+ fe_values.reinit (cell);
+ fe_values.get_function_values (old_temperature_solution,
+ old_temperature_values);
+ fe_values.get_function_values (old_old_temperature_solution,
+ old_old_temperature_values);
+ for (unsigned int q=0; q<n_q_points; ++q)
+ {
+ const double T = (old_temperature_values[q] +
+ old_old_temperature_values[q]) / 2;
+ const double entropy = ((T-average_temperature) *
+ (T-average_temperature));
+
+ min_entropy = std::min (min_entropy, entropy);
+ max_entropy = std::max (max_entropy, entropy);
+ area += fe_values.JxW(q);
+ entropy_integrated += fe_values.JxW(q) * entropy;
+ }
+ }
+
+ // Now we only need to exchange
+ // data between processors: we
+ // need to sum the two integrals
+ // (<code>area</code>,
+ // <code>entropy_integrated</code>),
+ // and get the extrema for
+ // maximum and minimum. We could
+ // do this through four different
+ // data exchanges, but we can it
+ // with two: Utilities::MPI::sum
+ // also exists in a variant that
+ // takes an array of values that
+ // are all to be summed up. And
+ // we can also utilize the
+ // Utilities::MPI::max function
+ // by realizing that forming the
+ // minimum over the minimal
+ // entropies equals forming the
+ // negative of the maximum over
+ // the negative of the minimal
+ // entropies; this maximum can
+ // then be combined with forming
+ // the maximum over the maximal
+ // entropies.
+ const double local_sums[2] = { entropy_integrated, area },
+ local_maxima[2] = { -min_entropy, max_entropy };
+ double global_sums[2], global_maxima[2];
+
+ Utilities::MPI::sum (local_sums, MPI_COMM_WORLD, global_sums);
+ Utilities::MPI::max (local_maxima, MPI_COMM_WORLD, global_maxima);
+
+ // Having computed everything
+ // this way, we can then compute
+ // the average entropy and find
+ // the $L^\infty$ norm by taking
+ // the larger of the deviation of
+ // the maximum or minimum from
+ // the average:
+ const double average_entropy = global_sums[0] / global_sums[1];
+ const double entropy_diff = std::max(global_maxima[1] - average_entropy,
+ average_entropy - (-global_maxima[0]));
+ return entropy_diff;
+ }
+
+
+
+ // @sect5{BoussinesqFlowProblem::get_extrapolated_temperature_range}
+ // The next function computes the
+ // minimal and maximal value of the
+ // extrapolated temperature over
+ // the entire domain. Again, this
+ // is only a slightly modified
+ // version of the respective
+ // function in step-31. As in the
+ // function above, we collect local
+ // minima and maxima and then
+ // compute the global extrema using
+ // the same trick as above.
+ //
+ // As already discussed in step-31, the
+ // function needs to distinguish between
+ // the first and all following time steps
+ // because it uses a higher order
+ // temperature extrapolation scheme when at
+ // least two previous time steps are
+ // available.
+ template <int dim>
+ std::pair<double,double>
+ BoussinesqFlowProblem<dim>::get_extrapolated_temperature_range () const
+ {
+ const QIterated<dim> quadrature_formula (QTrapez<1>(),
+ parameters.temperature_degree);
+ const unsigned int n_q_points = quadrature_formula.size();
+
+ FEValues<dim> fe_values (mapping, temperature_fe, quadrature_formula,
+ update_values);
+ std::vector<double> old_temperature_values(n_q_points);
+ std::vector<double> old_old_temperature_values(n_q_points);
+
+ double min_local_temperature = std::numeric_limits<double>::max(),
+ max_local_temperature = -std::numeric_limits<double>::max();
+
+ if (timestep_number != 0)
+ {
+ typename DoFHandler<dim>::active_cell_iterator
+ cell = temperature_dof_handler.begin_active(),
+ endc = temperature_dof_handler.end();
+ for (; cell!=endc; ++cell)
+ if (cell->is_locally_owned())
+ {
+ fe_values.reinit (cell);
+ fe_values.get_function_values (old_temperature_solution,
+ old_temperature_values);
+ fe_values.get_function_values (old_old_temperature_solution,
+ old_old_temperature_values);
+
+ for (unsigned int q=0; q<n_q_points; ++q)
+ {
+ const double temperature =
+ (1. + time_step/old_time_step) * old_temperature_values[q]-
+ time_step/old_time_step * old_old_temperature_values[q];
+
+ min_local_temperature = std::min (min_local_temperature,
+ temperature);
+ max_local_temperature = std::max (max_local_temperature,
+ temperature);
+ }
+ }
+ }
+ else
+ {
+ typename DoFHandler<dim>::active_cell_iterator
+ cell = temperature_dof_handler.begin_active(),
+ endc = temperature_dof_handler.end();
+ for (; cell!=endc; ++cell)
+ if (cell->is_locally_owned())
+ {
+ fe_values.reinit (cell);
+ fe_values.get_function_values (old_temperature_solution,
+ old_temperature_values);
+
+ for (unsigned int q=0; q<n_q_points; ++q)
+ {
+ const double temperature = old_temperature_values[q];
+
+ min_local_temperature = std::min (min_local_temperature,
+ temperature);
+ max_local_temperature = std::max (max_local_temperature,
+ temperature);
+ }
+ }
+ }
+
+ double local_extrema[2] = { -min_local_temperature,
+ max_local_temperature };
+ double global_extrema[2];
+ Utilities::MPI::max (local_extrema, MPI_COMM_WORLD, global_extrema);
+
+ return std::make_pair(-global_extrema[0], global_extrema[1]);
+ }
+
+
+ // @sect5{BoussinesqFlowProblem::compute_viscosity}
+ // The function that calculates the
+ // viscosity is purely local and so needs
+ // no communication at all. It is mostly
+ // the same as in step-31 but with an
+ // updated formulation of the viscosity if
+ // $\alpha=2$ is chosen:
+ template <int dim>
+ double
+ BoussinesqFlowProblem<dim>::
+ compute_viscosity (const std::vector<double> &old_temperature,
+ const std::vector<double> &old_old_temperature,
+ const std::vector<Tensor<1,dim> > &old_temperature_grads,
+ const std::vector<Tensor<1,dim> > &old_old_temperature_grads,
+ const std::vector<double> &old_temperature_laplacians,
+ const std::vector<double> &old_old_temperature_laplacians,
+ const std::vector<Tensor<1,dim> > &old_velocity_values,
+ const std::vector<Tensor<1,dim> > &old_old_velocity_values,
+ const std::vector<SymmetricTensor<2,dim> > &old_strain_rates,
+ const std::vector<SymmetricTensor<2,dim> > &old_old_strain_rates,
+ const double global_u_infty,
+ const double global_T_variation,
+ const double average_temperature,
+ const double global_entropy_variation,
+ const double cell_diameter) const
+ {
+ if (global_u_infty == 0)
+ return 5e-3 * cell_diameter;
+
+ const unsigned int n_q_points = old_temperature.size();
+
+ double max_residual = 0;
+ double max_velocity = 0;
+
+ for (unsigned int q=0; q < n_q_points; ++q)
+ {
+ const Tensor<1,dim> u = (old_velocity_values[q] +
+ old_old_velocity_values[q]) / 2;
+
+ const SymmetricTensor<2,dim> strain_rate = (old_strain_rates[q] +
+ old_old_strain_rates[q]) / 2;
+
+ const double T = (old_temperature[q] + old_old_temperature[q]) / 2;
+ const double dT_dt = (old_temperature[q] - old_old_temperature[q])
+ / old_time_step;
+ const double u_grad_T = u * (old_temperature_grads[q] +
+ old_old_temperature_grads[q]) / 2;
+
+ const double kappa_Delta_T = EquationData::kappa
+ * (old_temperature_laplacians[q] +
+ old_old_temperature_laplacians[q]) / 2;
+ const double gamma
+ = ((EquationData::radiogenic_heating * EquationData::density(T)
+ +
+ 2 * EquationData::eta * strain_rate * strain_rate) /
+ (EquationData::density(T) * EquationData::specific_heat));
+
+ double residual
+ = std::abs(dT_dt + u_grad_T - kappa_Delta_T - gamma);
+ if (parameters.stabilization_alpha == 2)
+ residual *= std::abs(T - average_temperature);
+
+ max_residual = std::max (residual, max_residual);
+ max_velocity = std::max (std::sqrt (u*u), max_velocity);
+ }
+
+ const double max_viscosity = (parameters.stabilization_beta *
+ max_velocity * cell_diameter);
+ if (timestep_number == 0)
+ return max_viscosity;
+ else
+ {
+ Assert (old_time_step > 0, ExcInternalError());
+
+ double entropy_viscosity;
+ if (parameters.stabilization_alpha == 2)
+ entropy_viscosity = (parameters.stabilization_c_R *
+ cell_diameter * cell_diameter *
+ max_residual /
+ global_entropy_variation);
+ else
+ entropy_viscosity = (parameters.stabilization_c_R *
+ cell_diameter * global_Omega_diameter *
+ max_velocity * max_residual /
+ (global_u_infty * global_T_variation));
+
+ return std::min (max_viscosity, entropy_viscosity);
+ }
+ }
+
+
+
+ // @sect5{BoussinesqFlowProblem::project_temperature_field}
+
+ // This function is new compared to
+ // step-31. What is does is to re-implement
+ // the library function
+ // <code>VectorTools::project()</code> for
+ // an MPI-based parallelization, a function
+ // we used for generating an initial vector
+ // for temperature based on some initial
+ // function. The library function only
+ // works with shared memory but doesn't
+ // know how to utilize multiple machines
+ // coupled through MPI to compute the
+ // projected field. The details of a
+ // <code>project()</code> function are not
+ // very difficult. All we do is to use a
+ // mass matrix and put the evaluation of
+ // the initial value function on the right
+ // hand side. The mass matrix for
+ // temperature we can simply generate using
+ // the respective assembly function, so all
+ // we need to do here is to create the
+ // right hand side and do a CG solve. The
+ // assembly function does a loop over all
+ // cells and evaluates the function in the
+ // <code>EquationData</code> namespace, and
+ // does this only on cells owned by the
+ // respective processor. The implementation
+ // of this assembly differs from the
+ // assembly we do for the principal
+ // assembly functions further down (which
+ // include thread-based parallelization
+ // with the WorkStream concept). Here we
+ // chose to keep things simple (keeping in
+ // mind that this function is also only
+ // called once at the beginning of the
+ // program, not in every time step), and
+ // generating the right hand side is cheap
+ // anyway so we won't even notice that this
+ // part is not parallized by threads.
+ //
+ // Regarding the implementation of
+ // inhomogeneous Dirichlet boundary
+ // conditions: Since we use the temperature
+ // ConstraintMatrix, we could apply the
+ // boundary conditions directly when
+ // building the respective matrix and right
+ // hand side. In this case, the boundary
+ // conditions are inhomogeneous, which
+ // makes this procedure somewhat tricky
+ // since we get the matrix from some other
+ // function that uses its own integration
+ // and assembly loop. However, the correct
+ // imposition of boundary conditions needs
+ // the matrix data we work on plus the
+ // right hand side simultaneously, since
+ // the right hand side is created by
+ // Gaussian elimination on the matrix
+ // rows. In order to not introduce the
+ // matrix assembly at this place, but still
+ // having the matrix data available, we
+ // choose to create a dummy matrix
+ // <code>matrix_for_bc</code> that we only
+ // fill with data when we need it for
+ // imposing boundary conditions. These
+ // positions are exactly those where we
+ // have an inhomogeneous entry in the
+ // ConstraintMatrix. There are only a few
+ // such positions (on the boundary DoFs),
+ // so it is still much cheaper to use this
+ // function than to create the full matrix
+ // here. To implement this, we ask the
+ // constraint matrix whether the DoF under
+ // consideration is inhomogeneously
+ // constrained. In that case, we generate
+ // the respective matrix column that we
+ // need for creating the correct right hand
+ // side. Note that this (manually
+ // generated) matrix entry needs to be
+ // exactly the entry that we would fill the
+ // matrix with — otherwise, this will
+ // not work.
+ template <int dim>
+ void BoussinesqFlowProblem<dim>::project_temperature_field ()
+ {
+ assemble_temperature_matrix ();
+
+ QGauss<dim> quadrature(parameters.temperature_degree+2);
+ UpdateFlags update_flags = UpdateFlags(update_values |
+ update_quadrature_points |
+ update_JxW_values);
+ FEValues<dim> fe_values (mapping, temperature_fe, quadrature, update_flags);
+
+ const unsigned int dofs_per_cell = fe_values.dofs_per_cell,
+ n_q_points = fe_values.n_quadrature_points;
+
+ std::vector<unsigned int> local_dof_indices (dofs_per_cell);
+ Vector<double> cell_vector (dofs_per_cell);
+ FullMatrix<double> matrix_for_bc (dofs_per_cell, dofs_per_cell);
+
+ std::vector<double> rhs_values(n_q_points);
+
+ PETScWrappers::MPI::Vector
+ rhs (MPI_COMM_WORLD,temperature_mass_matrix.m(),temperature_mass_matrix.local_size()), //rhs (temperature_mass_matrix.row_partitioner()),
+ solution (MPI_COMM_WORLD,temperature_mass_matrix.m(),temperature_mass_matrix.local_size()); //solution (temperature_mass_matrix.row_partitioner());
+
+ const EquationData::TemperatureInitialValues<dim> initial_temperature;
+
+ typename DoFHandler<dim>::active_cell_iterator
+ cell = temperature_dof_handler.begin_active(),
+ endc = temperature_dof_handler.end();
+
+ for (; cell!=endc; ++cell)
+ if (cell->is_locally_owned())
+ {
+ cell->get_dof_indices (local_dof_indices);
+ fe_values.reinit (cell);
+
+ initial_temperature.value_list (fe_values.get_quadrature_points(),
+ rhs_values);
+
+ cell_vector = 0;
+ matrix_for_bc = 0;
+ for (unsigned int point=0; point<n_q_points; ++point)
+ for (unsigned int i=0; i<dofs_per_cell; ++i)
+ {
+ cell_vector(i) += rhs_values[point] *
+ fe_values.shape_value(i,point) *
+ fe_values.JxW(point);
+ if (temperature_constraints.is_inhomogeneously_constrained(local_dof_indices[i]))
+ {
+ for (unsigned int j=0; j<dofs_per_cell; ++j)
+ matrix_for_bc(j,i) += fe_values.shape_value(i,point) *
+ fe_values.shape_value(j,point) *
+ fe_values.JxW(point);
+ }
+ }
+
+ temperature_constraints.distribute_local_to_global (cell_vector,
+ local_dof_indices,
+ rhs,
+ matrix_for_bc);
+ }
+
+ rhs.compress (); //rhs.compress (Add);
+ solution.compress();
+// return;
+
+ // Now that we have the right linear
+ // system, we solve it using the CG
+ // method with a simple Jacobi
+ // preconditioner:
+ SolverControl solver_control(5*rhs.size(), 1e-12*rhs.l2_norm());
+ SolverCG<PETScWrappers::MPI::Vector> cg(solver_control);
+
+ PETScWrappers::PreconditionJacobi preconditioner_mass;
+ preconditioner_mass.initialize(temperature_mass_matrix);
+
+ cg.solve (temperature_mass_matrix, solution, rhs, preconditioner_mass);
+
+ temperature_constraints.distribute (solution);
+
+ // Having so computed the current
+ // temperature field, let us set the
+ // member variable that holds the
+ // temperature nodes. Strictly speaking,
+ // we really only need to set
+ // <code>old_temperature_solution</code>
+ // since the first thing we will do is to
+ // compute the Stokes solution that only
+ // requires the previous time step's
+ // temperature field. That said, nothing
+ // good can come from not initializing
+ // the other vectors as well (especially
+ // since it's a relatively cheap
+ // operation and we only have to do it
+ // once at the beginning of the program)
+ // if we ever want to extend our
+ // numerical method or physical model,
+ // and so we initialize
+ // <code>temperature_solution</code> and
+ // <code>old_old_temperature_solution</code>
+ // as well. As a sidenote, while the
+ // <code>solution</code> vector is
+ // strictly distributed (i.e. each
+ // processor only stores a mutually
+ // exclusive subset of elements), the
+ // assignment makes sure that the vectors
+ // on the left hand side (which where
+ // initialized to contain ghost elements
+ // as well) also get the correct ghost
+ // elements. In other words, the
+ // assignment here requires communication
+ // between processors:
+// CIG::reduce_accuracy(solution);
+
+ temperature_solution = solution;
+ old_temperature_solution = solution;
+ old_old_temperature_solution = solution;
+ temperature_solution.update_ghost_values();
+ old_temperature_solution.update_ghost_values();
+ old_old_temperature_solution.update_ghost_values();
+ }
+
+
+
+
+ // @sect4{The BoussinesqFlowProblem setup functions}
+
+ // The following three functions set up the
+ // Stokes matrix, the matrix used for the
+ // Stokes preconditioner, and the
+ // temperature matrix. The code is mostly
+ // the same as in step-31, but it has been
+ // broken out into three functions of their
+ // own for simplicity.
+ //
+ // The main functional difference between
+ // the code here and that in step-31 is
+ // that the matrices we want to set up are
+ // distributed across multiple
+ // processors. Since we still want to build
+ // up the sparsity pattern first for
+ // efficiency reasons, we could continue to
+ // build the <i>entire</i> sparsity pattern
+ // as a
+ // BlockCompressedSimpleSparsityPattern, as
+ // we did in step-31. However, that would
+ // be inefficient: every processor would
+ // build the same sparsity pattern, but
+ // only initialize a small part of the
+ // matrix using it. It also violates the
+ // principle that every processor should
+ // only work on those cells it owns (and,
+ // if necessary the layer of ghost cells
+ // around it).
+ //
+ // Rather, we use an object of type
+ // PETScWrappers::BlockSparsityPattern,
+ // which is (obviously) a wrapper around a
+ // sparsity pattern object provided by
+ // Trilinos. The advantage is that the
+ // Trilinos sparsity pattern class can
+ // communicate across multiple processors:
+ // if this processor fills in all the
+ // nonzero entries that result from the
+ // cells it owns, and every other processor
+ // does so as well, then at the end after
+ // some MPI communication initiated by the
+ // <code>compress()</code> call, we will
+ // have the globally assembled sparsity
+ // pattern available with which the global
+ // matrix can be initialized.
+ //
+ // The only other change we need to make is
+ // to tell the
+ // DoFTools::make_sparsity_pattern() function
+ // that it is only supposed to work on a
+ // subset of cells, namely the ones whose
+ // <code>subdomain_id</code> equals the
+ // number of the current processor, and to
+ // ignore all other cells.
+ //
+ // This strategy is replicated across all
+ // three of the following functions.
+ //
+ // Note that Trilinos matrices store the
+ // information contained in the sparsity
+ // patterns, so we can safely release the
+ // <code>sp</code> variable once the matrix
+ // has been given the sparsity structure.
+ template <int dim>
+ void BoussinesqFlowProblem<dim>::
+ setup_stokes_matrix (const std::vector<IndexSet> &stokes_partitioning)
+ {
+ assert(false);
+
+ stokes_matrix.clear ();
+
+ TrilinosWrappers::BlockSparsityPattern sp (stokes_partitioning,
+ MPI_COMM_WORLD);
+
+ Table<2,DoFTools::Coupling> coupling (dim+1, dim+1);
+
+ for (unsigned int c=0; c<dim+1; ++c)
+ for (unsigned int d=0; d<dim+1; ++d)
+ if (! ((c==dim) && (d==dim)))
+ coupling[c][d] = DoFTools::always;
+ else
+ coupling[c][d] = DoFTools::none;
+
+ DoFTools::make_sparsity_pattern (stokes_dof_handler,
+ coupling, sp,
+ stokes_constraints, false,
+ Utilities::MPI::
+ this_mpi_process(MPI_COMM_WORLD));
+ sp.compress();
+
+// stokes_matrix.reinit (sp);
+// stokes_matrix.reinit(sp.n_block_rows(), sp.n_block_cols());
+// for (unsigned int r=0; r<sp.n_block_rows(); ++r)
+// for (unsigned int c=0; c<sp.n_block_cols(); ++c)
+// {
+// stokes_matrix.block(r,s).reinit((block_sparsity_pattern.block(r,c));
+// }
+ }
+
+
+
+ template <int dim>
+ void BoussinesqFlowProblem<dim>::
+ setup_stokes_preconditioner (const std::vector<IndexSet> &stokes_partitioning)
+ {
+ assert(false);
+
+ Amg_preconditioner.reset ();
+ Mp_preconditioner.reset ();
+
+ stokes_preconditioner_matrix.clear ();
+
+ TrilinosWrappers::BlockSparsityPattern sp (stokes_partitioning,
+ MPI_COMM_WORLD);
+
+ Table<2,DoFTools::Coupling> coupling (dim+1, dim+1);
+ for (unsigned int c=0; c<dim+1; ++c)
+ for (unsigned int d=0; d<dim+1; ++d)
+ if (c == d)
+ coupling[c][d] = DoFTools::always;
+ else
+ coupling[c][d] = DoFTools::none;
+
+ DoFTools::make_sparsity_pattern (stokes_dof_handler,
+ coupling, sp,
+ stokes_constraints, false,
+ Utilities::MPI::
+ this_mpi_process(MPI_COMM_WORLD));
+ sp.compress();
+
+// stokes_preconditioner_matrix.reinit (sp);
+ }
+
+
+ template <int dim>
+ void BoussinesqFlowProblem<dim>::
+ setup_temperature_matrices (const IndexSet &temperature_partitioner)
+ {
+ T_preconditioner.reset ();
+ temperature_mass_matrix.clear ();
+ temperature_stiffness_matrix.clear ();
+ temperature_matrix.clear ();
+
+// PETScWrappers::SparsityPattern sp (temperature_partitioner,
+// MPI_COMM_WORLD);
+// DoFTools::make_sparsity_pattern (temperature_dof_handler, sp,
+// temperature_constraints, false,
+// Utilities::MPI::
+// this_mpi_process(MPI_COMM_WORLD));
+// sp.compress();
+ int my_rank = Utilities::MPI::this_mpi_process(MPI_COMM_WORLD);
+ const unsigned int n_local_dofs = temperature_dof_handler.n_locally_owned_dofs();
+ const unsigned int n_dofs = temperature_dof_handler.n_dofs();
+// = DoFTools::count_dofs_with_subdomain_association (temperature_dof_handler,
+// Utilities::MPI::this_mpi_process(MPI_COMM_WORLD));
+
+// temperature_matrix.reinit (sp);
+// temperature_mass_matrix.reinit (sp);
+// temperature_stiffness_matrix.reinit (sp);
+ temperature_matrix.reinit (MPI_COMM_WORLD,
+ temperature_dof_handler.n_dofs(),
+ temperature_dof_handler.n_dofs(),
+ n_local_dofs,
+ n_local_dofs,
+ temperature_dof_handler.max_couplings_between_dofs());
+ temperature_mass_matrix.reinit (MPI_COMM_WORLD,
+ temperature_dof_handler.n_dofs(),
+ temperature_dof_handler.n_dofs(),
+ n_local_dofs,
+ n_local_dofs,
+ temperature_dof_handler.max_couplings_between_dofs());
+ temperature_stiffness_matrix.reinit (MPI_COMM_WORLD,
+ temperature_dof_handler.n_dofs(),
+ temperature_dof_handler.n_dofs(),
+ n_local_dofs,
+ n_local_dofs,
+ temperature_dof_handler.max_couplings_between_dofs());
+ }
+
+
+
+ // The remainder of the setup function
+ // (after splitting out the three functions
+ // above) mostly has to deal with the
+ // things we need to do for parallelization
+ // across processors. Because setting all
+ // of this up is a significant compute time
+ // expense of the program, we put
+ // everything we do here into a timer group
+ // so that we can get summary information
+ // about the fraction of time spent in this
+ // part of the program at its end.
+ //
+ // At the top as usual we enumerate degrees
+ // of freedom and sort them by
+ // component/block, followed by writing
+ // their numbers to the screen from
+ // processor zero. The DoFHandler::distributed_dofs() function, when applied to a parallel::distributed::Triangulation object, sorts degrees of freedom in such a
+ // way that all degrees of freedom
+ // associated with subdomain zero come
+ // before all those associated with
+ // subdomain one, etc. For the Stokes
+ // part, this entails, however, that
+ // velocities and pressures become
+ // intermixed, but this is trivially
+ // solved by sorting again by blocks; it
+ // is worth noting that this latter
+ // operation leaves the relative ordering
+ // of all velocities and pressures alone,
+ // i.e. within the velocity block we will
+ // still have all those associated with
+ // subdomain zero before all velocities
+ // associated with subdomain one,
+ // etc. This is important since we store
+ // each of the blocks of this matrix
+ // distributed across all processors and
+ // want this to be done in such a way
+ // that each processor stores that part
+ // of the matrix that is roughly equal to
+ // the degrees of freedom located on
+ // those cells that it will actually work
+ // on.
+ //
+ // When printing the numbers of degrees of
+ // freedom, note that these numbers are
+ // going to be large if we use many
+ // processors. Consequently, we let the
+ // stream put a comma separator in between
+ // every three digits. The state of the
+ // stream, using the locale, is saved from
+ // before to after this operation. While
+ // slightly opaque, the code works because
+ // the default locale (which we get using
+ // the constructor call
+ // <code>std::locale("")</code>) implies
+ // printing numbers with a comma separator
+ // for every third digit (i.e., thousands,
+ // millions, billions).
+ template <int dim>
+ void BoussinesqFlowProblem<dim>::setup_dofs ()
+ {
+ computing_timer.enter_section("Setup dof systems");
+
+ std::vector<unsigned int> stokes_sub_blocks (dim+1,0);
+ stokes_sub_blocks[dim] = 1;
+ stokes_dof_handler.distribute_dofs (stokes_fe);
+ DoFRenumbering::component_wise (stokes_dof_handler, stokes_sub_blocks);
+
+ temperature_dof_handler.distribute_dofs (temperature_fe);
+
+ std::vector<unsigned int> stokes_dofs_per_block (2);
+ DoFTools::count_dofs_per_block (stokes_dof_handler, stokes_dofs_per_block,
+ stokes_sub_blocks);
+
+ const unsigned int n_u = stokes_dofs_per_block[0],
+ n_p = stokes_dofs_per_block[1],
+ n_T = temperature_dof_handler.n_dofs();
+
+ std::locale s = pcout.get_stream().getloc();
+ pcout.get_stream().imbue(std::locale(""));
+ pcout << "Number of active cells: "
+ << triangulation.n_global_active_cells()
+ << " (on "
+ << triangulation.n_levels()
+ << " levels)"
+ << std::endl
+ << "Number of degrees of freedom: "
+ << n_u + n_p + n_T
+ << " (" << n_u << '+' << n_p << '+'<< n_T <<')'
+ << std::endl
+ << std::endl;
+ pcout.get_stream().imbue(s);
+
+
+ // After this, we have to set up the
+ // various partitioners (of type
+ // <code>IndexSet</code>, see the
+ // introduction) that describe which
+ // parts of each matrix or vector will be
+ // stored where, then call the functions
+ // that actually set up the matrices, and
+ // at the end also resize the various
+ // vectors we keep around in this
+ // program.
+ std::vector<IndexSet> stokes_partitioning, stokes_relevant_partitioning;
+ IndexSet temperature_partitioning (n_T), temperature_relevant_partitioning (n_T);
+ IndexSet stokes_relevant_set;
+ {
+ IndexSet stokes_index_set = stokes_dof_handler.locally_owned_dofs();
+ stokes_partitioning.push_back(stokes_index_set.get_view(0,n_u));
+ stokes_partitioning.push_back(stokes_index_set.get_view(n_u,n_u+n_p));
+
+ DoFTools::extract_locally_relevant_dofs (stokes_dof_handler,
+ stokes_relevant_set);
+ stokes_relevant_partitioning.push_back(stokes_relevant_set.get_view(0,n_u));
+ stokes_relevant_partitioning.push_back(stokes_relevant_set.get_view(n_u,n_u+n_p));
+
+ temperature_partitioning = temperature_dof_handler.locally_owned_dofs();
+ DoFTools::extract_locally_relevant_dofs (temperature_dof_handler,
+ temperature_relevant_partitioning);
+ }
+
+ // Following this, we can compute
+ // constraints for the solution vectors,
+ // including hanging node constraints and
+ // homogenous and inhomogenous boundary
+ // values for the Stokes and temperature
+ // fields. Note that as for everything
+ // else, the constraint objects can not
+ // hold <i>all</i> constraints on every
+ // processor. Rather, each processor
+ // needs to store only those that are
+ // actually necessary for correctness
+ // given that it only assembles linear
+ // systems on cells it owns. As discussed
+ // in the
+ // @ref distributed_paper "this paper",
+ // the set of constraints we need to know
+ // about is exactly the set of
+ // constraints on all locally relevant
+ // degrees of freedom, so this is what we
+ // use to initialize the constraint
+ // objects.
+ {
+ stokes_constraints.clear ();
+ stokes_constraints.reinit (stokes_relevant_set);
+
+ DoFTools::make_hanging_node_constraints (stokes_dof_handler,
+ stokes_constraints);
+
+ std::vector<bool> velocity_mask (dim+1, true);
+ velocity_mask[dim] = false;
+ VectorTools::interpolate_boundary_values (stokes_dof_handler,
+ 0,
+ ZeroFunction<dim>(dim+1),
+ stokes_constraints,
+ velocity_mask);
+
+ std::set<types::boundary_id> no_normal_flux_boundaries;
+ no_normal_flux_boundaries.insert (1);
+ VectorTools::compute_no_normal_flux_constraints (stokes_dof_handler, 0,
+ no_normal_flux_boundaries,
+ stokes_constraints,
+ mapping);
+ stokes_constraints.close ();
+ }
+ {
+ temperature_constraints.clear ();
+ temperature_constraints.reinit (temperature_relevant_partitioning);
+
+ DoFTools::make_hanging_node_constraints (temperature_dof_handler,
+ temperature_constraints);
+ VectorTools::interpolate_boundary_values (temperature_dof_handler,
+ 0,
+ EquationData::TemperatureInitialValues<dim>(),
+ temperature_constraints);
+ VectorTools::interpolate_boundary_values (temperature_dof_handler,
+ 1,
+ EquationData::TemperatureInitialValues<dim>(),
+ temperature_constraints);
+ temperature_constraints.close ();
+ }
+
+ // All this done, we can then initialize
+ // the various matrix and vector objects
+ // to their proper sizes. At the end, we
+ // also record that all matrices and
+ // preconditioners have to be re-computed
+ // at the beginning of the next time
+ // step.
+ std::vector<unsigned int> block_sizes, local_sizes;
+ CIG::convert_block_partitioning(stokes_partitioning,n_u,n_p,block_sizes,local_sizes);
+
+// setup_stokes_matrix (stokes_partitioning);
+ CIG::setup_petsc_matrix(block_sizes,local_sizes,stokes_dof_handler.max_couplings_between_dofs(),stokes_matrix);
+// setup_stokes_preconditioner (stokes_partitioning);
+ CIG::setup_petsc_matrix(block_sizes,local_sizes,stokes_dof_handler.max_couplings_between_dofs(),stokes_preconditioner_matrix);
+ setup_temperature_matrices (temperature_partitioning);
+
+ stokes_rhs.reinit(block_sizes,MPI_COMM_WORLD,local_sizes); // stokes_rhs.reinit (stokes_partitioning, MPI_COMM_WORLD);
+ CIG::setup_petsc_vector(block_sizes, stokes_partitioning,stokes_relevant_partitioning,stokes_solution);
+// old_stokes_solution.reinit (stokes_solution);
+ CIG::setup_petsc_vector(block_sizes, stokes_partitioning,stokes_relevant_partitioning,old_stokes_solution);
+
+ temperature_rhs.reinit (MPI_COMM_WORLD, temperature_partitioning);
+ temperature_solution.reinit (MPI_COMM_WORLD, temperature_partitioning, temperature_relevant_partitioning);
+ old_temperature_solution.reinit (MPI_COMM_WORLD, temperature_partitioning, temperature_relevant_partitioning);
+ old_old_temperature_solution.reinit(MPI_COMM_WORLD, temperature_partitioning, temperature_relevant_partitioning);
+
+ rebuild_stokes_matrix = true;
+ rebuild_stokes_preconditioner = true;
+ rebuild_temperature_matrices = true;
+ rebuild_temperature_preconditioner = true;
+
+ computing_timer.exit_section();
+ }
+
+
+
+ // @sect4{The BoussinesqFlowProblem assembly functions}
+ //
+ // Following the discussion in the
+ // introduction and in the @ref threads
+ // module, we split the assembly functions
+ // into different parts:
+ //
+ // <ul> <li> The local calculations of
+ // matrices and right hand sides, given a
+ // certain cell as input (these functions
+ // are named <code>local_assemble_*</code>
+ // below). The resulting function is, in
+ // other words, essentially the body of the
+ // loop over all cells in step-31. Note,
+ // however, that these functions store the
+ // result from the local calculations in
+ // variables of classes from the CopyData
+ // namespace.
+ //
+ // <li>These objects are then given to the
+ // second step which writes the local data
+ // into the global data structures (these
+ // functions are named
+ // <code>copy_local_to_global_*</code>
+ // below). These functions are pretty
+ // trivial.
+ //
+ // <li>These two subfunctions are then used
+ // in the respective assembly routine
+ // (called <code>assemble_*</code> below),
+ // where a WorkStream object is set up and
+ // runs over all the cells that belong to
+ // the processor's subdomain. </ul>
+
+ // @sect5{Stokes preconditioner assembly}
+ //
+ // Let us start with the functions that
+ // builds the Stokes preconditioner. The
+ // first two of these are pretty trivial,
+ // given the discussion above. Note in
+ // particular that the main point in using
+ // the scratch data object is that we want
+ // to avoid allocating any objects on the
+ // free space each time we visit a new
+ // cell. As a consequence, the assembly
+ // function below only has automatic local
+ // variables, and everything else is
+ // accessed through the scratch data
+ // object, which is allocated only once
+ // before we start the loop over all cells:
+ template <int dim>
+ void
+ BoussinesqFlowProblem<dim>::
+ local_assemble_stokes_preconditioner (const typename DoFHandler<dim>::active_cell_iterator &cell,
+ Assembly::Scratch::StokesPreconditioner<dim> &scratch,
+ Assembly::CopyData::StokesPreconditioner<dim> &data)
+ {
+ const unsigned int dofs_per_cell = stokes_fe.dofs_per_cell;
+ const unsigned int n_q_points = scratch.stokes_fe_values.n_quadrature_points;
+
+ const FEValuesExtractors::Vector velocities (0);
+ const FEValuesExtractors::Scalar pressure (dim);
+
+ scratch.stokes_fe_values.reinit (cell);
+ cell->get_dof_indices (data.local_dof_indices);
+
+ data.local_matrix = 0;
+
+ for (unsigned int q=0; q<n_q_points; ++q)
+ {
+ for (unsigned int k=0; k<dofs_per_cell; ++k)
+ {
+ scratch.grad_phi_u[k] = scratch.stokes_fe_values[velocities].gradient(k,q);
+ scratch.phi_p[k] = scratch.stokes_fe_values[pressure].value (k, q);
+ }
+
+ for (unsigned int i=0; i<dofs_per_cell; ++i)
+ for (unsigned int j=0; j<dofs_per_cell; ++j)
+ data.local_matrix(i,j) += (EquationData::eta *
+ scalar_product (scratch.grad_phi_u[i],
+ scratch.grad_phi_u[j])
+ +
+ (1./EquationData::eta) *
+ EquationData::pressure_scaling *
+ EquationData::pressure_scaling *
+ (scratch.phi_p[i] * scratch.phi_p[j]))
+ * scratch.stokes_fe_values.JxW(q);
+ }
+ }
+
+
+
+ template <int dim>
+ void
+ BoussinesqFlowProblem<dim>::
+ copy_local_to_global_stokes_preconditioner (const Assembly::CopyData::StokesPreconditioner<dim> &data)
+ {
+ stokes_constraints.distribute_local_to_global (data.local_matrix,
+ data.local_dof_indices,
+ stokes_preconditioner_matrix);
+ }
+
+
+ // Now for the function that actually puts
+ // things together, using the WorkStream
+ // functions. WorkStream::run needs a
+ // start and end iterator to enumerate the
+ // cells it is supposed to work
+ // on. Typically, one would use
+ // DoFHandler::begin_active() and
+ // DoFHandler::end() for that but here we
+ // actually only want the subset of cells
+ // that in fact are owned by the current
+ // processor. This is where the
+ // FilteredIterator class comes into play:
+ // you give it a range of cells and it
+ // provides an iterator that only iterates
+ // over that subset of cells that satisfy a
+ // certain predicate (a predicate is a
+ // function of one argument that either
+ // returns true or false). The predicate we
+ // use here is
+ // IteratorFilters::LocallyOwnedCell, i.e.,
+ // it returns true exactly if the cell is
+ // owned by the current processor. The
+ // resulting iterator range is then exactly
+ // what we need.
+ //
+ // With this obstacle out of the way, we
+ // call the WorkStream::run function with
+ // this set of cells, scratch and copy
+ // objects, and with pointers to two
+ // functions: the local assembly and
+ // copy-local-to-global function. These
+ // functions need to have very specific
+ // signatures: three arguments in the first
+ // and one argument in the latter case (see
+ // the documentation of the WorkStream::run
+ // function for the meaning of these
+ // arguments). Note how we use the
+ // construct <code>std_cxx1x::bind</code>
+ // to create a function object that
+ // satisfies this requirement. It uses
+ // placeholders <code>_1, std_cxx1x::_2,
+ // _3</code> for the local assembly
+ // function that specify cell, scratch
+ // data, and copy data, as well as the
+ // placeholder <code>_1</code> for the copy
+ // function that expects the data to be
+ // written into the global matrix. On the
+ // other hand, the implicit zeroth argument
+ // of member functions (namely the
+ // <code>this</code> pointer of the object
+ // on which that member function is to
+ // operate on) is <i>bound</i> to the
+ // <code>this</code> pointer of the current
+ // function. The WorkStream::run function,
+ // as a consequence, does not need to know
+ // anything about the object these
+ // functions work on.
+ //
+ // When the WorkStream is executed, it will
+ // create several local assembly routines
+ // of the first kind for several cells and
+ // let some available processors work on
+ // them. The function that needs to be
+ // synchronized, i.e., the write operation
+ // into the global matrix, however, is
+ // executed by only one thread at a time in
+ // the prescribed order. Of course, this
+ // only holds for the parallelization on a
+ // single MPI process. Different MPI
+ // processes will have their own WorkStream
+ // objects and do that work completely
+ // independently (and in different memory
+ // spaces). In a distributed calculation,
+ // some data will accumulate at degrees of
+ // freedom that are not owned by the
+ // respective processor. It would be
+ // inefficient to send data around every
+ // time we encounter such a dof. What
+ // happens instead is that the Trilinos
+ // sparse matrix will keep that data and
+ // send it to the owner at the end of
+ // assembly, by calling the
+ // <code>compress()</code> command.
+ template <int dim>
+ void
+ BoussinesqFlowProblem<dim>::assemble_stokes_preconditioner ()
+ {
+ stokes_preconditioner_matrix = 0;
+
+ const QGauss<dim> quadrature_formula(parameters.stokes_velocity_degree+1);
+
+ typedef
+ FilteredIterator<typename DoFHandler<dim>::active_cell_iterator>
+ CellFilter;
+
+ WorkStream::
+ run (CellFilter (IteratorFilters::LocallyOwnedCell(),
+ stokes_dof_handler.begin_active()),
+ CellFilter (IteratorFilters::LocallyOwnedCell(),
+ stokes_dof_handler.end()),
+ std_cxx1x::bind (&BoussinesqFlowProblem<dim>::
+ local_assemble_stokes_preconditioner,
+ this,
+ std_cxx1x::_1,
+ std_cxx1x::_2,
+ std_cxx1x::_3),
+ std_cxx1x::bind (&BoussinesqFlowProblem<dim>::
+ copy_local_to_global_stokes_preconditioner,
+ this,
+ std_cxx1x::_1),
+ Assembly::Scratch::
+ StokesPreconditioner<dim> (stokes_fe, quadrature_formula,
+ mapping,
+ update_JxW_values |
+ update_values |
+ update_gradients),
+ Assembly::CopyData::
+ StokesPreconditioner<dim> (stokes_fe));
+
+ stokes_preconditioner_matrix.compress();
+ }
+
+
+
+ // The final function in this block
+ // initiates assembly of the Stokes
+ // preconditioner matrix and then in fact
+ // builds the Stokes preconditioner. It is
+ // mostly the same as in the serial
+ // case. The only difference to step-31 is
+ // that we use a Jacobi preconditioner for
+ // the pressure mass matrix instead of IC,
+ // as discussed in the introduction.
+ template <int dim>
+ void
+ BoussinesqFlowProblem<dim>::build_stokes_preconditioner ()
+ {
+ if (rebuild_stokes_preconditioner == false)
+ return;
+
+ computing_timer.enter_section (" Build Stokes preconditioner");
+ pcout << " Rebuilding Stokes preconditioner..." << std::flush;
+
+ assemble_stokes_preconditioner ();
+
+ std::vector<std::vector<bool> > constant_modes;
+ std::vector<bool> velocity_components (dim+1,true);
+ velocity_components[dim] = false;
+ DoFTools::extract_constant_modes (stokes_dof_handler, velocity_components,
+ constant_modes);
+
+ Mp_preconditioner.reset (new PETScWrappers::PreconditionJacobi());
+ Amg_preconditioner.reset (new PETScWrappers::PreconditionBoomerAMG());
+
+ PETScWrappers::PreconditionBoomerAMG::AdditionalData Amg_data;
+// Amg_data.constant_modes = constant_modes;
+// Amg_data.elliptic = true;
+// Amg_data.higher_order_elements = true;
+// Amg_data.smoother_sweeps = 2;
+// Amg_data.aggregation_threshold = 0.02;
+
+ Mp_preconditioner->initialize (stokes_preconditioner_matrix.block(1,1));
+ Amg_preconditioner->initialize (stokes_preconditioner_matrix.block(0,0),
+ Amg_data);
+
+ rebuild_stokes_preconditioner = false;
+
+ pcout << std::endl;
+ computing_timer.exit_section();
+ }
+
+
+ // @sect5{Stokes system assembly}
+
+ // The next three functions implement the
+ // assembly of the Stokes system, again
+ // split up into a part performing local
+ // calculations, one for writing the local
+ // data into the global matrix and vector,
+ // and one for actually running the loop
+ // over all cells with the help of the
+ // WorkStream class. Note that the assembly
+ // of the Stokes matrix needs only to be
+ // done in case we have changed the
+ // mesh. Otherwise, just the
+ // (temperature-dependent) right hand side
+ // needs to be calculated here. Since we
+ // are working with distributed matrices
+ // and vectors, we have to call the
+ // respective <code>compress()</code>
+ // functions in the end of the assembly in
+ // order to send non-local data to the
+ // owner process.
+ template <int dim>
+ void
+ BoussinesqFlowProblem<dim>::
+ local_assemble_stokes_system (const typename DoFHandler<dim>::active_cell_iterator &cell,
+ Assembly::Scratch::StokesSystem<dim> &scratch,
+ Assembly::CopyData::StokesSystem<dim> &data)
+ {
+// static int debug_index = 0;
+// std::cout << "debug_index = " << debug_index << std::endl;
+
+ const unsigned int dofs_per_cell = scratch.stokes_fe_values.get_fe().dofs_per_cell;
+ const unsigned int n_q_points = scratch.stokes_fe_values.n_quadrature_points;
+
+ const FEValuesExtractors::Vector velocities (0);
+ const FEValuesExtractors::Scalar pressure (dim);
+
+ scratch.stokes_fe_values.reinit (cell);
+
+ typename DoFHandler<dim>::active_cell_iterator
+ temperature_cell (&triangulation,
+ cell->level(),
+ cell->index(),
+ &temperature_dof_handler);
+ scratch.temperature_fe_values.reinit (temperature_cell);
+
+ if (rebuild_stokes_matrix)
+ data.local_matrix = 0;
+ data.local_rhs = 0;
+
+ scratch.temperature_fe_values.get_function_values (old_temperature_solution,
+ scratch.old_temperature_values);
+
+ for (unsigned int q=0; q<n_q_points; ++q)
+ {
+ const double old_temperature = scratch.old_temperature_values[q];
+
+ for (unsigned int k=0; k<dofs_per_cell; ++k)
+ {
+ scratch.phi_u[k] = scratch.stokes_fe_values[velocities].value (k,q);
+ if (rebuild_stokes_matrix)
+ {
+ scratch.grads_phi_u[k] = scratch.stokes_fe_values[velocities].symmetric_gradient(k,q);
+ scratch.div_phi_u[k] = scratch.stokes_fe_values[velocities].divergence (k, q);
+ scratch.phi_p[k] = scratch.stokes_fe_values[pressure].value (k, q);
+ }
+ }
+
+ if (rebuild_stokes_matrix == true)
+ for (unsigned int i=0; i<dofs_per_cell; ++i)
+ for (unsigned int j=0; j<dofs_per_cell; ++j)
+ data.local_matrix(i,j) += (EquationData::eta * 2 *
+ (scratch.grads_phi_u[i] * scratch.grads_phi_u[j])
+ - (EquationData::pressure_scaling *
+ scratch.div_phi_u[i] * scratch.phi_p[j])
+ - (EquationData::pressure_scaling *
+ scratch.phi_p[i] * scratch.div_phi_u[j]))
+ * scratch.stokes_fe_values.JxW(q);
+
+ const Tensor<1,dim>
+ gravity = EquationData::gravity_vector (scratch.stokes_fe_values
+ .quadrature_point(q));
+
+ for (unsigned int i=0; i<dofs_per_cell; ++i)
+ {
+// std::cout.precision(20);
+// std::cout << std::fixed;
+// std::cout << "i = " << i << ", " << data.local_rhs(q) << ", " <<
+// old_temperature << ", " <<
+// EquationData::density(old_temperature) << ", " <<
+// gravity << ", " <<
+// scratch.phi_u[i] << ", " <<
+// scratch.stokes_fe_values.JxW(q) << std::endl;
+
+ data.local_rhs(i) += (EquationData::density(old_temperature) *
+ gravity *
+ scratch.phi_u[i]) *
+ scratch.stokes_fe_values.JxW(q);
+ }
+ }
+
+ cell->get_dof_indices (data.local_dof_indices);
+
+// debug_index++;
+// data.local_rhs.print(std::cout,7,false,false);
+ }
+
+
+
+ template <int dim>
+ void
+ BoussinesqFlowProblem<dim>::
+ copy_local_to_global_stokes_system (const Assembly::CopyData::StokesSystem<dim> &data)
+ {
+// return;
+// static int debug_index = 0;
+// std::cout << "debug_index = " << debug_index << std::endl;
+// debug_index++;
+// data.local_rhs.print(std::cout,7,false,false);
+
+ if (rebuild_stokes_matrix == true)
+ stokes_constraints.distribute_local_to_global (data.local_matrix,
+ data.local_rhs,
+ data.local_dof_indices,
+ stokes_matrix,
+ stokes_rhs);
+ else
+ stokes_constraints.distribute_local_to_global (data.local_rhs,
+ data.local_dof_indices,
+ stokes_rhs);
+ }
+
+
+
+ template <int dim>
+ void BoussinesqFlowProblem<dim>::assemble_stokes_system ()
+ {
+ computing_timer.enter_section (" Assemble Stokes system");
+
+ if (rebuild_stokes_matrix == true)
+ stokes_matrix=0;
+
+ stokes_rhs=0;
+
+ const QGauss<dim> quadrature_formula(parameters.stokes_velocity_degree+1);
+
+ typedef
+ FilteredIterator<typename DoFHandler<dim>::active_cell_iterator>
+ CellFilter;
+
+ WorkStream::
+ run (CellFilter (IteratorFilters::LocallyOwnedCell(),
+ stokes_dof_handler.begin_active()),
+ CellFilter (IteratorFilters::LocallyOwnedCell(),
+ stokes_dof_handler.end()),
+ std_cxx1x::bind (&BoussinesqFlowProblem<dim>::
+ local_assemble_stokes_system,
+ this,
+ std_cxx1x::_1,
+ std_cxx1x::_2,
+ std_cxx1x::_3),
+ std_cxx1x::bind (&BoussinesqFlowProblem<dim>::
+ copy_local_to_global_stokes_system,
+ this,
+ std_cxx1x::_1),
+ Assembly::Scratch::
+ StokesSystem<dim> (stokes_fe, mapping, quadrature_formula,
+ (update_values |
+ update_quadrature_points |
+ update_JxW_values |
+ (rebuild_stokes_matrix == true
+ ?
+ update_gradients
+ :
+ UpdateFlags(0))),
+ temperature_fe,
+ update_values),
+ Assembly::CopyData::
+ StokesSystem<dim> (stokes_fe));
+
+ stokes_matrix.compress(dealii::VectorOperation::add);
+ stokes_rhs.compress(dealii::VectorOperation::add); //stokes_rhs.compress(Add);
+
+// std::ofstream filename_matrix, filename_vector;
+// filename_matrix.open("stokes_matrix00.txt");
+// stokes_matrix.block(1,0).write_ascii();
+// stokes_matrix.block(1,1).write_ascii();
+// filename_vector.open("stokes_rhs.txt");
+// stokes_rhs.print(filename_vector,7,false,false);
+
+ rebuild_stokes_matrix = false;
+
+ pcout << std::endl;
+ computing_timer.exit_section();
+ }
+
+
+ // @sect5{Temperature matrix assembly}
+
+ // The task to be performed by the next
+ // three functions is to calculate a mass
+ // matrix and a Laplace matrix on the
+ // temperature system. These will be
+ // combined in order to yield the
+ // semi-implicit time stepping matrix that
+ // consists of the mass matrix plus a time
+ // step-dependent weight factor times the
+ // Laplace matrix. This function is again
+ // essentially the body of the loop over
+ // all cells from step-31.
+ //
+ // The two following functions perform
+ // similar services as the ones above.
+ template <int dim>
+ void BoussinesqFlowProblem<dim>::
+ local_assemble_temperature_matrix (const typename DoFHandler<dim>::active_cell_iterator &cell,
+ Assembly::Scratch::TemperatureMatrix<dim> &scratch,
+ Assembly::CopyData::TemperatureMatrix<dim> &data)
+ {
+ const unsigned int dofs_per_cell = scratch.temperature_fe_values.get_fe().dofs_per_cell;
+ const unsigned int n_q_points = scratch.temperature_fe_values.n_quadrature_points;
+
+ scratch.temperature_fe_values.reinit (cell);
+ cell->get_dof_indices (data.local_dof_indices);
+
+ data.local_mass_matrix = 0;
+ data.local_stiffness_matrix = 0;
+
+ for (unsigned int q=0; q<n_q_points; ++q)
+ {
+ for (unsigned int k=0; k<dofs_per_cell; ++k)
+ {
+ scratch.grad_phi_T[k] = scratch.temperature_fe_values.shape_grad (k,q);
+ scratch.phi_T[k] = scratch.temperature_fe_values.shape_value (k, q);
+ }
+
+ for (unsigned int i=0; i<dofs_per_cell; ++i)
+ for (unsigned int j=0; j<dofs_per_cell; ++j)
+ {
+ data.local_mass_matrix(i,j)
+ += (scratch.phi_T[i] * scratch.phi_T[j]
+ *
+ scratch.temperature_fe_values.JxW(q));
+ data.local_stiffness_matrix(i,j)
+ += (EquationData::kappa * scratch.grad_phi_T[i] * scratch.grad_phi_T[j]
+ *
+ scratch.temperature_fe_values.JxW(q));
+ }
+ }
+ }
+
+
+
+ template <int dim>
+ void
+ BoussinesqFlowProblem<dim>::
+ copy_local_to_global_temperature_matrix (const Assembly::CopyData::TemperatureMatrix<dim> &data)
+ {
+ temperature_constraints.distribute_local_to_global (data.local_mass_matrix,
+ data.local_dof_indices,
+ temperature_mass_matrix);
+ temperature_constraints.distribute_local_to_global (data.local_stiffness_matrix,
+ data.local_dof_indices,
+ temperature_stiffness_matrix);
+ }
+
+
+ template <int dim>
+ void BoussinesqFlowProblem<dim>::assemble_temperature_matrix ()
+ {
+ if (rebuild_temperature_matrices == false)
+ return;
+
+ computing_timer.enter_section (" Assemble temperature matrices");
+ temperature_mass_matrix = 0;
+ temperature_stiffness_matrix = 0;
+
+ const QGauss<dim> quadrature_formula(parameters.temperature_degree+2);
+
+ typedef
+ FilteredIterator<typename DoFHandler<dim>::active_cell_iterator>
+ CellFilter;
+
+ WorkStream::
+ run (CellFilter (IteratorFilters::LocallyOwnedCell(),
+ temperature_dof_handler.begin_active()),
+ CellFilter (IteratorFilters::LocallyOwnedCell(),
+ temperature_dof_handler.end()),
+ std_cxx1x::bind (&BoussinesqFlowProblem<dim>::
+ local_assemble_temperature_matrix,
+ this,
+ std_cxx1x::_1,
+ std_cxx1x::_2,
+ std_cxx1x::_3),
+ std_cxx1x::bind (&BoussinesqFlowProblem<dim>::
+ copy_local_to_global_temperature_matrix,
+ this,
+ std_cxx1x::_1),
+ Assembly::Scratch::
+ TemperatureMatrix<dim> (temperature_fe, mapping, quadrature_formula),
+ Assembly::CopyData::
+ TemperatureMatrix<dim> (temperature_fe));
+
+ temperature_mass_matrix.compress();
+ temperature_stiffness_matrix.compress();
+
+ rebuild_temperature_matrices = false;
+ rebuild_temperature_preconditioner = true;
+
+ computing_timer.exit_section();
+ }
+
+
+ // @sect5{Temperature right hand side assembly}
+
+ // This is the last assembly function. It
+ // calculates the right hand side of the
+ // temperature system, which includes the
+ // convection and the stabilization
+ // terms. It includes a lot of evaluations
+ // of old solutions at the quadrature
+ // points (which are necessary for
+ // calculating the artificial viscosity of
+ // stabilization), but is otherwise similar
+ // to the other assembly functions. Notice,
+ // once again, how we resolve the dilemma
+ // of having inhomogeneous boundary
+ // conditions, by just making a right hand
+ // side at this point (compare the comments
+ // for the <code>project()</code> function
+ // above): We create some matrix columns
+ // with exactly the values that would be
+ // entered for the temperature stiffness
+ // matrix, in case we have inhomogeneously
+ // constrained dofs. That will account for
+ // the correct balance of the right hand
+ // side vector with the matrix system of
+ // temperature.
+ template <int dim>
+ void BoussinesqFlowProblem<dim>::
+ local_assemble_temperature_rhs (const std::pair<double,double> global_T_range,
+ const double global_max_velocity,
+ const double global_entropy_variation,
+ const typename DoFHandler<dim>::active_cell_iterator &cell,
+ Assembly::Scratch::TemperatureRHS<dim> &scratch,
+ Assembly::CopyData::TemperatureRHS<dim> &data)
+ {
+ const bool use_bdf2_scheme = (timestep_number != 0);
+
+ const unsigned int dofs_per_cell = scratch.temperature_fe_values.get_fe().dofs_per_cell;
+ const unsigned int n_q_points = scratch.temperature_fe_values.n_quadrature_points;
+
+ const FEValuesExtractors::Vector velocities (0);
+
+ data.local_rhs = 0;
+ data.matrix_for_bc = 0;
+ cell->get_dof_indices (data.local_dof_indices);
+
+ scratch.temperature_fe_values.reinit (cell);
+
+ typename DoFHandler<dim>::active_cell_iterator
+ stokes_cell (&triangulation,
+ cell->level(),
+ cell->index(),
+ &stokes_dof_handler);
+ scratch.stokes_fe_values.reinit (stokes_cell);
+
+ scratch.temperature_fe_values.get_function_values (old_temperature_solution,
+ scratch.old_temperature_values);
+ scratch.temperature_fe_values.get_function_values (old_old_temperature_solution,
+ scratch.old_old_temperature_values);
+
+ scratch.temperature_fe_values.get_function_gradients (old_temperature_solution,
+ scratch.old_temperature_grads);
+ scratch.temperature_fe_values.get_function_gradients (old_old_temperature_solution,
+ scratch.old_old_temperature_grads);
+
+ scratch.temperature_fe_values.get_function_laplacians (old_temperature_solution,
+ scratch.old_temperature_laplacians);
+ scratch.temperature_fe_values.get_function_laplacians (old_old_temperature_solution,
+ scratch.old_old_temperature_laplacians);
+
+ scratch.stokes_fe_values[velocities].get_function_values (stokes_solution,
+ scratch.old_velocity_values);
+ scratch.stokes_fe_values[velocities].get_function_values (old_stokes_solution,
+ scratch.old_old_velocity_values);
+ scratch.stokes_fe_values[velocities].get_function_symmetric_gradients (stokes_solution,
+ scratch.old_strain_rates);
+ scratch.stokes_fe_values[velocities].get_function_symmetric_gradients (old_stokes_solution,
+ scratch.old_old_strain_rates);
+
+ const double nu
+ = compute_viscosity (scratch.old_temperature_values,
+ scratch.old_old_temperature_values,
+ scratch.old_temperature_grads,
+ scratch.old_old_temperature_grads,
+ scratch.old_temperature_laplacians,
+ scratch.old_old_temperature_laplacians,
+ scratch.old_velocity_values,
+ scratch.old_old_velocity_values,
+ scratch.old_strain_rates,
+ scratch.old_old_strain_rates,
+ global_max_velocity,
+ global_T_range.second - global_T_range.first,
+ 0.5 * (global_T_range.second + global_T_range.first),
+ global_entropy_variation,
+ cell->diameter());
+
+ for (unsigned int q=0; q<n_q_points; ++q)
+ {
+ for (unsigned int k=0; k<dofs_per_cell; ++k)
+ {
+ scratch.phi_T[k] = scratch.temperature_fe_values.shape_value (k, q);
+ scratch.grad_phi_T[k] = scratch.temperature_fe_values.shape_grad (k,q);
+ }
+
+
+ const double T_term_for_rhs
+ = (use_bdf2_scheme ?
+ (scratch.old_temperature_values[q] *
+ (1 + time_step/old_time_step)
+ -
+ scratch.old_old_temperature_values[q] *
+ (time_step * time_step) /
+ (old_time_step * (time_step + old_time_step)))
+ :
+ scratch.old_temperature_values[q]);
+
+ const double ext_T
+ = (use_bdf2_scheme ?
+ (scratch.old_temperature_values[q] *
+ (1 + time_step/old_time_step)
+ -
+ scratch.old_old_temperature_values[q] *
+ time_step/old_time_step)
+ :
+ scratch.old_temperature_values[q]);
+
+ const Tensor<1,dim> ext_grad_T
+ = (use_bdf2_scheme ?
+ (scratch.old_temperature_grads[q] *
+ (1 + time_step/old_time_step)
+ -
+ scratch.old_old_temperature_grads[q] *
+ time_step/old_time_step)
+ :
+ scratch.old_temperature_grads[q]);
+
+ const Tensor<1,dim> extrapolated_u
+ = (use_bdf2_scheme ?
+ (scratch.old_velocity_values[q] *
+ (1 + time_step/old_time_step)
+ -
+ scratch.old_old_velocity_values[q] *
+ time_step/old_time_step)
+ :
+ scratch.old_velocity_values[q]);
+
+ const SymmetricTensor<2,dim> extrapolated_strain_rate
+ = (use_bdf2_scheme ?
+ (scratch.old_strain_rates[q] *
+ (1 + time_step/old_time_step)
+ -
+ scratch.old_old_strain_rates[q] *
+ time_step/old_time_step)
+ :
+ scratch.old_strain_rates[q]);
+
+ const double gamma
+ = ((EquationData::radiogenic_heating * EquationData::density(ext_T)
+ +
+ 2 * EquationData::eta * extrapolated_strain_rate * extrapolated_strain_rate) /
+ (EquationData::density(ext_T) * EquationData::specific_heat));
+
+ for (unsigned int i=0; i<dofs_per_cell; ++i)
+ {
+ data.local_rhs(i) += (T_term_for_rhs * scratch.phi_T[i]
+ -
+ time_step *
+ extrapolated_u * ext_grad_T * scratch.phi_T[i]
+ -
+ time_step *
+ nu * ext_grad_T * scratch.grad_phi_T[i]
+ +
+ time_step *
+ gamma * scratch.phi_T[i])
+ *
+ scratch.temperature_fe_values.JxW(q);
+
+ if (temperature_constraints.is_inhomogeneously_constrained(data.local_dof_indices[i]))
+ {
+ for (unsigned int j=0; j<dofs_per_cell; ++j)
+ data.matrix_for_bc(j,i) += (scratch.phi_T[i] * scratch.phi_T[j] *
+ (use_bdf2_scheme ?
+ ((2*time_step + old_time_step) /
+ (time_step + old_time_step)) : 1.)
+ +
+ scratch.grad_phi_T[i] *
+ scratch.grad_phi_T[j] *
+ EquationData::kappa *
+ time_step)
+ *
+ scratch.temperature_fe_values.JxW(q);
+ }
+ }
+ }
+ }
+
+
+ template <int dim>
+ void
+ BoussinesqFlowProblem<dim>::
+ copy_local_to_global_temperature_rhs (const Assembly::CopyData::TemperatureRHS<dim> &data)
+ {
+ temperature_constraints.distribute_local_to_global (data.local_rhs,
+ data.local_dof_indices,
+ temperature_rhs,
+ data.matrix_for_bc);
+ }
+
+
+
+ // In the function that runs the WorkStream
+ // for actually calculating the right hand
+ // side, we also generate the final
+ // matrix. As mentioned above, it is a sum
+ // of the mass matrix and the Laplace
+ // matrix, times some time step-dependent
+ // weight. This weight is specified by the
+ // BDF-2 time integration scheme, see the
+ // introduction in step-31. What is new in
+ // this tutorial program (in addition to
+ // the use of MPI parallelization and the
+ // WorkStream class), is that we now
+ // precompute the temperature
+ // preconditioner as well. The reason is
+ // that the setup of the Jacobi
+ // preconditioner takes a noticeable time
+ // compared to the solver because we
+ // usually only need between 10 and 20
+ // iterations for solving the temperature
+ // system (this might sound strange, as
+ // Jacobi really only consists of a
+ // diagonal, but in Trilinos it is derived
+ // from more general framework for point
+ // relaxation preconditioners which is a
+ // bit inefficient). Hence, it is more
+ // efficient to precompute the
+ // preconditioner, even though the matrix
+ // entries may slightly change because the
+ // time step might change. This is not too
+ // big a problem because we remesh every
+ // few time steps (and regenerate the
+ // preconditioner then).
+ template <int dim>
+ void BoussinesqFlowProblem<dim>::assemble_temperature_system (const double maximal_velocity)
+ {
+ const bool use_bdf2_scheme = (timestep_number != 0);
+
+ if (use_bdf2_scheme == true)
+ {
+ temperature_matrix.copy_from (temperature_mass_matrix);
+ temperature_matrix *= (2*time_step + old_time_step) /
+ (time_step + old_time_step);
+ temperature_matrix.add (time_step, temperature_stiffness_matrix);
+ }
+ else
+ {
+ temperature_matrix.copy_from (temperature_mass_matrix);
+ temperature_matrix.add (time_step, temperature_stiffness_matrix);
+ }
+ temperature_matrix.compress();
+
+ if (rebuild_temperature_preconditioner == true)
+ {
+ T_preconditioner.reset (new PETScWrappers::PreconditionJacobi());
+ T_preconditioner->initialize (temperature_matrix);
+ rebuild_temperature_preconditioner = false;
+ }
+
+ // The next part is computing the right
+ // hand side vectors. To do so, we first
+ // compute the average temperature $T_m$
+ // that we use for evaluating the
+ // artificial viscosity stabilization
+ // through the residual $E(T) =
+ // (T-T_m)^2$. We do this by defining the
+ // midpoint between maximum and minimum
+ // temperature as average temperature in
+ // the definition of the entropy
+ // viscosity. An alternative would be to
+ // use the integral average, but the
+ // results are not very sensitive to this
+ // choice. The rest then only requires
+ // calling WorkStream::run again, binding
+ // the arguments to the
+ // <code>local_assemble_temperature_rhs</code>
+ // function that are the same in every
+ // call to the correct values:
+ temperature_rhs = 0;
+
+ const QGauss<dim> quadrature_formula(parameters.temperature_degree+2);
+ const std::pair<double,double>
+ global_T_range = get_extrapolated_temperature_range();
+
+ const double average_temperature = 0.5 * (global_T_range.first +
+ global_T_range.second);
+ const double global_entropy_variation =
+ get_entropy_variation (average_temperature);
+
+ typedef
+ FilteredIterator<typename DoFHandler<dim>::active_cell_iterator>
+ CellFilter;
+
+ WorkStream::
+ run (CellFilter (IteratorFilters::LocallyOwnedCell(),
+ temperature_dof_handler.begin_active()),
+ CellFilter (IteratorFilters::LocallyOwnedCell(),
+ temperature_dof_handler.end()),
+ std_cxx1x::bind (&BoussinesqFlowProblem<dim>::
+ local_assemble_temperature_rhs,
+ this,
+ global_T_range,
+ maximal_velocity,
+ global_entropy_variation,
+ std_cxx1x::_1,
+ std_cxx1x::_2,
+ std_cxx1x::_3),
+ std_cxx1x::bind (&BoussinesqFlowProblem<dim>::
+ copy_local_to_global_temperature_rhs,
+ this,
+ std_cxx1x::_1),
+ Assembly::Scratch::
+ TemperatureRHS<dim> (temperature_fe, stokes_fe, mapping,
+ quadrature_formula),
+ Assembly::CopyData::
+ TemperatureRHS<dim> (temperature_fe));
+
+ temperature_rhs.compress(); //temperature_rhs.compress(Add);
+ }
+
+
+
+
+ // @sect4{BoussinesqFlowProblem::solve}
+
+ // This function solves the linear systems
+ // in each time step of the Boussinesq
+ // problem. First, we
+ // work on the Stokes system and then on
+ // the temperature system. In essence, it
+ // does the same things as the respective
+ // function in step-31. However, there are a few
+ // changes here.
+ //
+ // The first change is related to the way
+ // we store our solution: we keep the
+ // vectors with locally owned degrees of
+ // freedom plus ghost nodes on each MPI
+ // node. When we enter a solver which is
+ // supposed to perform matrix-vector
+ // products with a distributed matrix, this
+ // is not the appropriate form,
+ // though. There, we will want to have the
+ // solution vector to be distributed in the
+ // same way as the matrix, i.e. without any
+ // ghosts. So what we do first is to
+ // generate a distributed vector called
+ // <code>distributed_stokes_solution</code>
+ // and put only the locally owned dofs into
+ // that, which is neatly done by the
+ // <code>operator=</code> of the Trilinos
+ // vector.
+ //
+ // Next, we scale the pressure solution (or
+ // rather, the initial guess) for the
+ // solver so that it matches with the
+ // length scales in the matrices, as
+ // discussed in the introduction. We also
+ // immediately scale the pressure solution
+ // back to the correct units after the
+ // solution is completed. We also need to
+ // set the pressure values at hanging nodes
+ // to zero. This we also did in step-31 in
+ // order not to disturb the Schur
+ // complement by some vector entries that
+ // actually are irrelevant during the solve
+ // stage. As a difference to step-31, here
+ // we do it only for the locally owned
+ // pressure dofs. After solving for the
+ // Stokes solution, each processor copies
+ // the distributed solution back into the
+ // solution vector that also includes ghost
+ // elements.
+ //
+ // The third and most obvious change is
+ // that we have two variants for the Stokes
+ // solver: A fast solver that sometimes
+ // breaks down, and a robust solver that is
+ // slower. This is what we already
+ // discussed in the introduction. Here is
+ // how we realize it: First, we perform 30
+ // iterations with the fast solver based on
+ // the simple preconditioner based on the
+ // AMG V-cycle instead of an approximate
+ // solve (this is indicated by the
+ // <code>false</code> argument to the
+ // <code>LinearSolvers::BlockSchurPreconditioner</code>
+ // object). If we converge, everything is
+ // fine. If we do not converge, the solver
+ // control object will throw an exception
+ // SolverControl::NoConvergence. Usually,
+ // this would abort the program because we
+ // don't catch them in our usual
+ // <code>solve()</code> functions. This is
+ // certainly not what we want to happen
+ // here. Rather, we want to switch to the
+ // strong solver and continue the solution
+ // process with whatever vector we got so
+ // far. Hence, we catch the exception with
+ // the C++ try/catch mechanism. We then
+ // simply go through the same solver
+ // sequence again in the <code>catch</code>
+ // clause, this time passing the @p true
+ // flag to the preconditioner for the
+ // strong solver, signaling an approximate
+ // CG solve.
+ template <int dim>
+ void BoussinesqFlowProblem<dim>::solve ()
+ {
+ computing_timer.enter_section (" Solve Stokes system");
+
+ {
+ pcout << " Solving Stokes system... " << std::flush;
+
+ PETScWrappers::MPI::BlockVector
+ distributed_stokes_solution (stokes_rhs);
+ distributed_stokes_solution = stokes_solution;
+
+ distributed_stokes_solution.block(1) /= EquationData::pressure_scaling;
+
+ const unsigned int
+ start = (distributed_stokes_solution.block(0).size() +
+ distributed_stokes_solution.block(1).local_range().first),
+ end = (distributed_stokes_solution.block(0).size() +
+ distributed_stokes_solution.block(1).local_range().second);
+ for (unsigned int i=start; i<end; ++i)
+ if (stokes_constraints.is_constrained (i))
+ distributed_stokes_solution(i) = 0;
+
+
+ PrimitiveVectorMemory<PETScWrappers::MPI::BlockVector> mem;
+
+ unsigned int n_iterations = 0;
+ const double solver_tolerance = 1e-8 * stokes_rhs.l2_norm();
+// const double solver_tolerance = 1;
+ std::cout << "stokes_rhs.l2_norm() = " << stokes_rhs.l2_norm() << std::endl;
+ SolverControl solver_control (300, solver_tolerance);
+
+ try
+ {
+ const LinearSolvers::BlockSchurPreconditioner<PETScWrappers::PreconditionBoomerAMG,
+ PETScWrappers::PreconditionJacobi>
+ preconditioner (stokes_matrix, stokes_preconditioner_matrix,
+ *Mp_preconditioner, *Amg_preconditioner,
+ false);
+
+ SolverGMRES<PETScWrappers::MPI::BlockVector>
+ solver(solver_control, mem,
+ SolverGMRES<PETScWrappers::MPI::BlockVector>::
+ AdditionalData(300, true));
+ solver.solve(stokes_matrix, distributed_stokes_solution, stokes_rhs,
+ preconditioner);
+
+ n_iterations = solver_control.last_step();
+ }
+
+ catch (SolverControl::NoConvergence)
+ {
+ const LinearSolvers::BlockSchurPreconditioner<PETScWrappers::PreconditionBoomerAMG,
+ PETScWrappers::PreconditionJacobi>
+ preconditioner (stokes_matrix, stokes_preconditioner_matrix,
+ *Mp_preconditioner, *Amg_preconditioner,
+ true);
+
+ SolverControl solver_control_refined (stokes_matrix.m(), solver_tolerance);
+ SolverGMRES<PETScWrappers::MPI::BlockVector>
+ solver(solver_control_refined, mem,
+ SolverGMRES<PETScWrappers::MPI::BlockVector>::
+ AdditionalData(500, true));
+ solver.solve(stokes_matrix, distributed_stokes_solution, stokes_rhs,
+ preconditioner);
+
+ n_iterations = (solver_control.last_step() +
+ solver_control_refined.last_step());
+ }
+
+
+ stokes_constraints.distribute (distributed_stokes_solution);
+
+ distributed_stokes_solution.block(1) *= EquationData::pressure_scaling;
+
+ stokes_solution = distributed_stokes_solution;
+ pcout << n_iterations << " iterations."
+ << std::endl;
+
+ pcout << "distributed_stokes_solution linfty_norm: " <<
+ distributed_stokes_solution.block(0).linfty_norm() << ", " <<
+ distributed_stokes_solution.block(1).linfty_norm() << std::endl;
+
+ }
+ computing_timer.exit_section();
+
+
+ // Now let's turn to the temperature
+ // part: First, we compute the time step
+ // size. We found that we need smaller
+ // time steps for 3D than for 2D for the
+ // shell geometry. This is because the
+ // cells are more distorted in that case
+ // (it is the smallest edge length that
+ // determines the CFL number). Instead of
+ // computing the time step from maximum
+ // velocity and minimal mesh size as in
+ // step-31, we compute local CFL numbers,
+ // i.e., on each cell we compute the
+ // maximum velocity times the mesh size,
+ // and compute the maximum of
+ // them. Hence, we need to choose the
+ // factor in front of the time step
+ // slightly smaller.
+ //
+ // After temperature right hand side
+ // assembly, we solve the linear system
+ // for temperature (with fully
+ // distributed vectors without any
+ // ghosts), apply constraints and copy
+ // the vector back to one with ghosts.
+ //
+ // In the end, we extract the temperature
+ // range similarly to step-31 to produce
+ // some output (for example in order to
+ // help us choose the stabilization
+ // constants, as discussed in the
+ // introduction). The only difference is
+ // that we need to exchange maxima over
+ // all processors.
+ computing_timer.enter_section (" Assemble temperature rhs");
+ {
+ old_time_step = time_step;
+
+ const double scaling = (dim==3 ? 0.25 : 1.0);
+ time_step = (scaling/(2.1*dim*std::sqrt(1.*dim)) /
+ (parameters.temperature_degree *
+ get_cfl_number()));
+
+ const double maximal_velocity = get_maximal_velocity();
+ pcout << " Maximal velocity: "
+ << maximal_velocity *EquationData::year_in_seconds * 100
+ << " cm/year"
+ << std::endl;
+ pcout << " " << "Time step: "
+ << time_step/EquationData::year_in_seconds
+ << " years"
+ << std::endl;
+
+ temperature_solution = old_temperature_solution;
+ assemble_temperature_system (maximal_velocity);
+ }
+ computing_timer.exit_section ();
+
+ computing_timer.enter_section (" Solve temperature system");
+ {
+ SolverControl solver_control (temperature_matrix.m(),
+ 1e-12*temperature_rhs.l2_norm());
+// SolverCG<PETScWrappers::MPI::Vector> cg (solver_control);
+ PETScWrappers::SolverGMRES cg(solver_control,MPI_COMM_WORLD);
+
+ PETScWrappers::MPI::Vector
+ distributed_temperature_solution (temperature_rhs);
+ distributed_temperature_solution = temperature_solution;
+
+ cg.solve (temperature_matrix, distributed_temperature_solution,
+ temperature_rhs, *T_preconditioner);
+// cg.solve (temperature_matrix, distributed_temperature_solution,
+// temperature_rhs, PETScWrappers::PreconditionILU(temperature_matrix));
+
+ temperature_constraints.distribute (distributed_temperature_solution);
+ temperature_solution = distributed_temperature_solution;
+
+ pcout << " "
+ << solver_control.last_step()
+ << " CG iterations for temperature" << std::endl;
+ computing_timer.exit_section();
+
+ double temperature[2] = { std::numeric_limits<double>::max(),
+ -std::numeric_limits<double>::max() };
+ double global_temperature[2];
+
+// for (unsigned int i=0; i<distributed_temperature_solution.local_size(); ++i)
+// {
+// temperature[0] = std::min<double> (temperature[0],
+// distributed_temperature_solution.trilinos_vector()[0][i]);
+// temperature[1] = std::max<double> (temperature[1],
+// distributed_temperature_solution.trilinos_vector()[0][i]);
+// }
+ temperature[0] = distributed_temperature_solution.min();
+ temperature[1] = distributed_temperature_solution.max();
+
+ temperature[0] *= -1.0;
+ Utilities::MPI::max (temperature, MPI_COMM_WORLD, global_temperature);
+ global_temperature[0] *= -1.0;
+
+ pcout << " Temperature range: "
+ << global_temperature[0] << ' ' << global_temperature[1]
+ << std::endl;
+ }
+ }
+
+
+ // @sect4{BoussinesqFlowProblem::output_results}
+
+ // Next comes the function that generates
+ // the output. The quantities to output
+ // could be introduced manually like we did
+ // in step-31. An alternative is to hand
+ // this task over to a class PostProcessor
+ // that inherits from the class
+ // DataPostprocessor, which can be attached
+ // to DataOut. This allows us to output
+ // derived quantities from the solution,
+ // like the friction heating included in
+ // this example. It overloads the virtual
+ // function
+ // DataPostprocessor::compute_derived_quantities_vector,
+ // which is then internally called from
+ // DataOut::build_patches. We have to give
+ // it values of the numerical solution, its
+ // derivatives, normals to the cell, the
+ // actual evaluation points and any
+ // additional quantities. This follows the
+ // same procedure as discussed in step-29
+ // and other programs.
+ template <int dim>
+ class BoussinesqFlowProblem<dim>::Postprocessor : public DataPostprocessor<dim>
+ {
+ public:
+ Postprocessor (const unsigned int partition,
+ const double minimal_pressure);
+
+ virtual
+ void
+ compute_derived_quantities_vector (const std::vector<Vector<double> > &uh,
+ const std::vector<std::vector<Tensor<1,dim> > > &duh,
+ const std::vector<std::vector<Tensor<2,dim> > > &dduh,
+ const std::vector<Point<dim> > &normals,
+ const std::vector<Point<dim> > &evaluation_points,
+ std::vector<Vector<double> > &computed_quantities) const;
+
+ virtual std::vector<std::string> get_names () const;
+
+ virtual
+ std::vector<DataComponentInterpretation::DataComponentInterpretation>
+ get_data_component_interpretation () const;
+
+ virtual UpdateFlags get_needed_update_flags () const;
+
+ private:
+ const unsigned int partition;
+ const double minimal_pressure;
+ };
+
+
+ template <int dim>
+ BoussinesqFlowProblem<dim>::Postprocessor::
+ Postprocessor (const unsigned int partition,
+ const double minimal_pressure)
+ :
+ partition (partition),
+ minimal_pressure (minimal_pressure)
+ {}
+
+
+ // Here we define the names for the
+ // variables we want to output. These are
+ // the actual solution values for velocity,
+ // pressure, and temperature, as well as
+ // the friction heating and to each cell
+ // the number of the processor that owns
+ // it. This allows us to visualize the
+ // partitioning of the domain among the
+ // processors. Except for the velocity,
+ // which is vector-valued, all other
+ // quantities are scalar.
+ template <int dim>
+ std::vector<std::string>
+ BoussinesqFlowProblem<dim>::Postprocessor::get_names() const
+ {
+ std::vector<std::string> solution_names (dim, "velocity");
+ solution_names.push_back ("p");
+ solution_names.push_back ("T");
+ solution_names.push_back ("friction_heating");
+ solution_names.push_back ("partition");
+
+ return solution_names;
+ }
+
+
+ template <int dim>
+ std::vector<DataComponentInterpretation::DataComponentInterpretation>
+ BoussinesqFlowProblem<dim>::Postprocessor::
+ get_data_component_interpretation () const
+ {
+ std::vector<DataComponentInterpretation::DataComponentInterpretation>
+ interpretation (dim,
+ DataComponentInterpretation::component_is_part_of_vector);
+
+ interpretation.push_back (DataComponentInterpretation::component_is_scalar);
+ interpretation.push_back (DataComponentInterpretation::component_is_scalar);
+ interpretation.push_back (DataComponentInterpretation::component_is_scalar);
+ interpretation.push_back (DataComponentInterpretation::component_is_scalar);
+
+ return interpretation;
+ }
+
+
+ template <int dim>
+ UpdateFlags
+ BoussinesqFlowProblem<dim>::Postprocessor::get_needed_update_flags() const
+ {
+ return update_values | update_gradients | update_q_points;
+ }
+
+
+ // Now we implement the function that
+ // computes the derived quantities. As we
+ // also did for the output, we rescale the
+ // velocity from its SI units to something
+ // more readable, namely cm/year. Next, the
+ // pressure is scaled to be between 0 and
+ // the maximum pressure. This makes it more
+ // easily comparable -- in essence making
+ // all pressure variables positive or
+ // zero. Temperature is taken as is, and
+ // the friction heating is computed as $2
+ // \eta \varepsilon(\mathbf{u}) \cdot
+ // \varepsilon(\mathbf{u})$.
+ //
+ // The quantities we output here are more
+ // for illustration, rather than for actual
+ // scientific value. We come back to this
+ // briefly in the results section of this
+ // program and explain what one may in fact
+ // be interested in.
+ template <int dim>
+ void
+ BoussinesqFlowProblem<dim>::Postprocessor::
+ compute_derived_quantities_vector (const std::vector<Vector<double> > &uh,
+ const std::vector<std::vector<Tensor<1,dim> > > &duh,
+ const std::vector<std::vector<Tensor<2,dim> > > &/*dduh*/,
+ const std::vector<Point<dim> > &/*normals*/,
+ const std::vector<Point<dim> > &/*evaluation_points*/,
+ std::vector<Vector<double> > &computed_quantities) const
+ {
+ const unsigned int n_quadrature_points = uh.size();
+ Assert (duh.size() == n_quadrature_points, ExcInternalError());
+ Assert (computed_quantities.size() == n_quadrature_points, ExcInternalError());
+ Assert (uh[0].size() == dim+2, ExcInternalError());
+
+ for (unsigned int q=0; q<n_quadrature_points; ++q)
+ {
+ for (unsigned int d=0; d<dim; ++d)
+ computed_quantities[q](d)
+ = (uh[q](d) * EquationData::year_in_seconds * 100);
+
+ const double pressure = (uh[q](dim)-minimal_pressure);
+ computed_quantities[q](dim) = pressure;
+
+ const double temperature = uh[q](dim+1);
+ computed_quantities[q](dim+1) = temperature;
+
+ Tensor<2,dim> grad_u;
+ for (unsigned int d=0; d<dim; ++d)
+ grad_u[d] = duh[q][d];
+ const SymmetricTensor<2,dim> strain_rate = symmetrize (grad_u);
+ computed_quantities[q](dim+2) = 2 * EquationData::eta *
+ strain_rate * strain_rate;
+
+ computed_quantities[q](dim+3) = partition;
+ }
+ }
+
+
+ // The <code>output_results()</code>
+ // function does mostly what the
+ // corresponding one did in to step-31, in
+ // particular the merging data from the two
+ // DoFHandler objects (for the Stokes and
+ // the temperature parts of the problem)
+ // into one. There is one minor change: we
+ // make sure that each processor only works
+ // on the subdomain it owns locally (and
+ // not on ghost or artificial cells) when
+ // building the joint solution vector. The
+ // same will then have to be done in
+ // DataOut::build_patches(), but that
+ // function does so automatically.
+ //
+ // What we end up with is a set of patches
+ // that we can write using the functions in
+ // DataOutBase in a variety of output
+ // formats. Here, we then have to pay
+ // attention that what each processor
+ // writes is really only its own part of
+ // the domain, i.e. we will want to write
+ // each processor's contribution into a
+ // separate file. This we do by adding an
+ // additional number to the filename when
+ // we write the solution. This is not
+ // really new, we did it similarly in
+ // step-40. Note that we write in the
+ // compressed format @p .vtu instead of
+ // plain vtk files, which saves quite some
+ // storage.
+ //
+ // All the rest of the work is done in the
+ // PostProcessor class.
+ template <int dim>
+ void BoussinesqFlowProblem<dim>::output_results ()
+ {
+ computing_timer.enter_section ("Postprocessing");
+
+ const FESystem<dim> joint_fe (stokes_fe, 1,
+ temperature_fe, 1);
+
+ DoFHandler<dim> joint_dof_handler (triangulation);
+ joint_dof_handler.distribute_dofs (joint_fe);
+ Assert (joint_dof_handler.n_dofs() ==
+ stokes_dof_handler.n_dofs() + temperature_dof_handler.n_dofs(),
+ ExcInternalError());
+
+ PETScWrappers::MPI::Vector joint_solution;
+ joint_solution.reinit(MPI_COMM_WORLD, joint_dof_handler.locally_owned_dofs()); // joint_solution.reinit (joint_dof_handler.locally_owned_dofs(), MPI_COMM_WORLD);
+
+ {
+ std::vector<unsigned int> local_joint_dof_indices (joint_fe.dofs_per_cell);
+ std::vector<unsigned int> local_stokes_dof_indices (stokes_fe.dofs_per_cell);
+ std::vector<unsigned int> local_temperature_dof_indices (temperature_fe.dofs_per_cell);
+
+ typename DoFHandler<dim>::active_cell_iterator
+ joint_cell = joint_dof_handler.begin_active(),
+ joint_endc = joint_dof_handler.end(),
+ stokes_cell = stokes_dof_handler.begin_active(),
+ temperature_cell = temperature_dof_handler.begin_active();
+ for (; joint_cell!=joint_endc;
+ ++joint_cell, ++stokes_cell, ++temperature_cell)
+ if (joint_cell->is_locally_owned())
+ {
+ joint_cell->get_dof_indices (local_joint_dof_indices);
+ stokes_cell->get_dof_indices (local_stokes_dof_indices);
+ temperature_cell->get_dof_indices (local_temperature_dof_indices);
+
+ for (unsigned int i=0; i<joint_fe.dofs_per_cell; ++i)
+ if (joint_fe.system_to_base_index(i).first.first == 0)
+ {
+ Assert (joint_fe.system_to_base_index(i).second
+ <
+ local_stokes_dof_indices.size(),
+ ExcInternalError());
+
+ joint_solution(local_joint_dof_indices[i])
+ = stokes_solution(local_stokes_dof_indices
+ [joint_fe.system_to_base_index(i).second]);
+ }
+ else
+ {
+ Assert (joint_fe.system_to_base_index(i).first.first == 1,
+ ExcInternalError());
+ Assert (joint_fe.system_to_base_index(i).second
+ <
+ local_temperature_dof_indices.size(),
+ ExcInternalError());
+ joint_solution(local_joint_dof_indices[i])
+ = temperature_solution(local_temperature_dof_indices
+ [joint_fe.system_to_base_index(i).second]);
+ }
+ }
+ }
+
+// joint_solution.print(std::cout);
+ joint_solution.compress();
+
+ IndexSet locally_relevant_joint_dofs(joint_dof_handler.n_dofs());
+ DoFTools::extract_locally_relevant_dofs (joint_dof_handler, locally_relevant_joint_dofs);
+ PETScWrappers::MPI::Vector locally_relevant_joint_solution;
+ locally_relevant_joint_solution.reinit (MPI_COMM_WORLD,joint_dof_handler.locally_owned_dofs(),locally_relevant_joint_dofs); // locally_relevant_joint_solution.reinit (locally_relevant_joint_dofs, MPI_COMM_WORLD);
+ locally_relevant_joint_solution = joint_solution;
+
+ Postprocessor postprocessor (Utilities::MPI::this_mpi_process(MPI_COMM_WORLD),
+ stokes_solution.block(1).min()); //stokes_solution.block(1).minimal_value());
+
+ DataOut<dim> data_out;
+ data_out.attach_dof_handler (joint_dof_handler);
+ data_out.add_data_vector (locally_relevant_joint_solution, postprocessor);
+ data_out.build_patches ();
+
+ static int out_index=0;
+ const std::string filename = ("solution-" +
+ Utilities::int_to_string (out_index, 5) +
+ "." +
+ Utilities::int_to_string
+ (triangulation.locally_owned_subdomain(), 4) +
+ ".vtu");
+ std::ofstream output (filename.c_str());
+ data_out.write_vtu (output);
+
+
+ // At this point, all processors have
+ // written their own files to disk. We
+ // could visualize them individually in
+ // Visit or Paraview, but in reality we
+ // of course want to visualize the whole
+ // set of files at once. To this end, we
+ // create a master file in each of the
+ // formats understood by Visit
+ // (<code>.visit</code>) and Paraview
+ // (<code>.pvtu</code>) on the zeroth
+ // processor that describes how the
+ // individual files are defining the
+ // global data set.
+ if (Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) == 0)
+ {
+ std::vector<std::string> filenames;
+ for (unsigned int i=0; i<Utilities::MPI::n_mpi_processes(MPI_COMM_WORLD); ++i)
+ filenames.push_back (std::string("solution-") +
+ Utilities::int_to_string (out_index, 5) +
+ "." +
+ Utilities::int_to_string(i, 4) +
+ ".vtu");
+ const std::string
+ pvtu_master_filename = ("solution-" +
+ Utilities::int_to_string (out_index, 5) +
+ ".pvtu");
+ std::ofstream pvtu_master (pvtu_master_filename.c_str());
+ data_out.write_pvtu_record (pvtu_master, filenames);
+
+ const std::string
+ visit_master_filename = ("solution-" +
+ Utilities::int_to_string (out_index, 5) +
+ ".visit");
+ std::ofstream visit_master (visit_master_filename.c_str());
+ data_out.write_visit_record (visit_master, filenames);
+ }
+
+ computing_timer.exit_section ();
+ out_index++;
+ }
+
+
+
+ // @sect4{BoussinesqFlowProblem::refine_mesh}
+
+ // This function isn't really new
+ // either. Since the
+ // <code>setup_dofs</code> function that we
+ // call in the middle has its own timer
+ // section, we split timing this function
+ // into two sections. It will also allow us
+ // to easily identify which of the two is
+ // more expensive.
+ //
+ // One thing of note, however, is that we
+ // only want to compute error indicators on
+ // the locally owned subdomain. In order to
+ // achieve this, we pass one additional
+ // argument to the
+ // KellyErrorEstimator::estimate
+ // function. Note that the vector for error
+ // estimates is resized to the number of
+ // active cells present on the current
+ // process, which is less than the total
+ // number of active cells on all processors
+ // (but more than the number of locally
+ // owned active cells); each processor only
+ // has a few coarse cells around the
+ // locally owned ones, as also explained in
+ // step-40.
+ //
+ // The local error estimates are then
+ // handed to a %parallel version of
+ // GridRefinement (in namespace
+ // parallel::distributed::GridRefinement,
+ // see also step-40) which looks at the
+ // errors and finds the cells that need
+ // refinement by comparing the error values
+ // across processors. As in step-31, we
+ // want to limit the maximum grid level. So
+ // in case some cells have been marked that
+ // are already at the finest level, we
+ // simply clear the refine flags.
+ template <int dim>
+ void BoussinesqFlowProblem<dim>::refine_mesh (const unsigned int max_grid_level)
+ {
+ computing_timer.enter_section ("Refine mesh structure, part 1");
+ Vector<float> estimated_error_per_cell (triangulation.n_active_cells());
+
+ KellyErrorEstimator<dim>::estimate (temperature_dof_handler,
+ QGauss<dim-1>(parameters.temperature_degree+1),
+ typename FunctionMap<dim>::type(),
+ temperature_solution,
+ estimated_error_per_cell,
+ std::vector<bool>(),
+ 0,
+ 0,
+ triangulation.locally_owned_subdomain());
+
+ parallel::distributed::GridRefinement::
+ refine_and_coarsen_fixed_fraction (triangulation,
+ estimated_error_per_cell,
+ 0.3, 0.1);
+
+ if (triangulation.n_levels() > max_grid_level)
+ for (typename Triangulation<dim>::active_cell_iterator
+ cell = triangulation.begin_active(max_grid_level);
+ cell != triangulation.end(); ++cell)
+ cell->clear_refine_flag ();
+
+ // With all flags marked as necessary, we
+ // set up the
+ // parallel::distributed::SolutionTransfer
+ // object to transfer the solutions for
+ // the current time level and the next
+ // older one. The syntax is similar to
+ // the non-%parallel solution transfer
+ // (with the exception that here a
+ // pointer to the vector entries is
+ // enough). The remainder of the function
+ // is concerned with setting up the data
+ // structures again after mesh refinement
+ // and restoring the solution vectors on
+ // the new mesh.
+ std::vector<const PETScWrappers::MPI::Vector *> x_temperature (2);
+ x_temperature[0] = &temperature_solution;
+ x_temperature[1] = &old_temperature_solution;
+ std::vector<const PETScWrappers::MPI::BlockVector *> x_stokes (2);
+ x_stokes[0] = &stokes_solution;
+ x_stokes[1] = &old_stokes_solution;
+
+ parallel::distributed::SolutionTransfer<dim,PETScWrappers::MPI::Vector>
+ temperature_trans(temperature_dof_handler);
+ parallel::distributed::SolutionTransfer<dim,PETScWrappers::MPI::BlockVector>
+ stokes_trans(stokes_dof_handler);
+
+ triangulation.prepare_coarsening_and_refinement();
+ temperature_trans.prepare_for_coarsening_and_refinement(x_temperature);
+ stokes_trans.prepare_for_coarsening_and_refinement(x_stokes);
+
+ triangulation.execute_coarsening_and_refinement ();
+ computing_timer.exit_section();
+
+ setup_dofs ();
+
+ computing_timer.enter_section ("Refine mesh structure, part 2");
+
+ {
+ PETScWrappers::MPI::Vector distributed_temp1 (temperature_rhs);
+ PETScWrappers::MPI::Vector distributed_temp2 (temperature_rhs);
+
+ std::vector<PETScWrappers::MPI::Vector *> tmp (2);
+ tmp[0] = &(distributed_temp1);
+ tmp[1] = &(distributed_temp2);
+ temperature_trans.interpolate(tmp);
+
+ temperature_solution = distributed_temp1;
+ old_temperature_solution = distributed_temp2;
+ }
+
+ {
+ PETScWrappers::MPI::BlockVector distributed_stokes (stokes_rhs);
+ PETScWrappers::MPI::BlockVector old_distributed_stokes (stokes_rhs);
+
+ std::vector<PETScWrappers::MPI::BlockVector *> stokes_tmp (2);
+ stokes_tmp[0] = &(distributed_stokes);
+ stokes_tmp[1] = &(old_distributed_stokes);
+
+ stokes_trans.interpolate (stokes_tmp);
+ stokes_solution = distributed_stokes;
+ old_stokes_solution = old_distributed_stokes;
+ }
+
+ computing_timer.exit_section();
+ }
+
+
+
+ // @sect4{BoussinesqFlowProblem::run}
+
+ // This is the final and controlling
+ // function in this class. It, in fact,
+ // runs the entire rest of the program and
+ // is, once more, very similar to
+ // step-31. We use a different mesh now (a
+ // GridGenerator::hyper_shell instead of a
+ // simple cube geometry), and use the
+ // <code>project_temperature_field()</code>
+ // function instead of the library function
+ // <code>VectorTools::project</code>, the
+ // rest is as before.
+ template <int dim>
+ void BoussinesqFlowProblem<dim>::run ()
+ {
+ GridGenerator::hyper_shell (triangulation,
+ Point<dim>(),
+ EquationData::R0,
+ EquationData::R1,
+ (dim==3) ? 96 : 12,
+ true);
+ static HyperShellBoundary<dim> boundary;
+ triangulation.set_boundary (0, boundary);
+ triangulation.set_boundary (1, boundary);
+
+ global_Omega_diameter = GridTools::diameter (triangulation);
+
+ triangulation.refine_global (parameters.initial_global_refinement);
+
+ setup_dofs();
+
+ unsigned int pre_refinement_step = 0;
+
+ start_time_iteration:
+
+ project_temperature_field ();
+// temperature_solution.print(std::cout,3,false,false);
+// pcout << " temperature_solution linfty_norm: " << temperature_solution.linfty_norm() << std::endl;
+// return;
+
+ timestep_number = 0;
+ time_step = old_time_step = 0;
+
+ double time = 0;
+
+ do
+ {
+// if(timestep_number==3)
+// break;
+
+ pcout << "Timestep " << timestep_number
+ << ": t=" << time/EquationData::year_in_seconds
+ << " years"
+ << std::endl;
+
+ assemble_stokes_system ();
+// return;
+
+ build_stokes_preconditioner ();
+ assemble_temperature_matrix ();
+
+// return;
+
+ solve ();
+
+ pcout << std::endl;
+
+ if ((timestep_number == 0) &&
+ (pre_refinement_step < parameters.initial_adaptive_refinement))
+ {
+ refine_mesh (parameters.initial_global_refinement +
+ parameters.initial_adaptive_refinement);
+ ++pre_refinement_step;
+ goto start_time_iteration;
+ }
+ else if ((timestep_number > 0)
+ &&
+ (timestep_number % parameters.adaptive_refinement_interval == 0))
+ refine_mesh (parameters.initial_global_refinement +
+ parameters.initial_adaptive_refinement);
+
+ if ((parameters.generate_graphical_output == true)
+ &&
+ (timestep_number % parameters.graphical_output_interval == 0))
+ output_results ();
+
+ // In order to speed up linear
+ // solvers, we extrapolate the
+ // solutions from the old time levels
+ // to the new one. This gives a very
+ // good initial guess, cutting the
+ // number of iterations needed in
+ // solvers by more than one half. We
+ // do not need to extrapolate in the
+ // last iteration, so if we reached
+ // the final time, we stop here.
+ //
+ // As the last thing during a
+ // time step (before actually
+ // bumping up the number of
+ // the time step), we check
+ // whether the current time
+ // step number is divisible
+ // by 100, and if so we let
+ // the computing timer print
+ // a summary of CPU times
+ // spent so far.
+ if (time > parameters.end_time * EquationData::year_in_seconds)
+ break;
+
+ PETScWrappers::MPI::BlockVector old_old_stokes_solution(old_stokes_solution);
+ old_old_stokes_solution = old_stokes_solution;
+ old_stokes_solution = stokes_solution;
+ old_old_temperature_solution = old_temperature_solution;
+ old_temperature_solution = temperature_solution;
+ if (old_time_step > 0)
+ {
+ //Trilinos sadd does not like ghost vectors even as input. Copy into distributed vectors for now:
+ {
+ PETScWrappers::MPI::BlockVector distr_solution (stokes_rhs);
+ distr_solution = stokes_solution;
+ PETScWrappers::MPI::BlockVector distr_old_solution (stokes_rhs);
+ distr_old_solution = old_old_stokes_solution;
+ distr_solution .sadd (1.+time_step/old_time_step, -time_step/old_time_step,
+ distr_old_solution);
+ stokes_solution = distr_solution;
+ }
+ {
+ PETScWrappers::MPI::Vector distr_solution (temperature_rhs);
+ distr_solution = temperature_solution;
+ PETScWrappers::MPI::Vector distr_old_solution (temperature_rhs);
+ distr_old_solution = old_old_temperature_solution;
+ distr_solution .sadd (1.+time_step/old_time_step, -time_step/old_time_step,
+ distr_old_solution);
+ temperature_solution = distr_solution;
+ }
+ }
+
+ if ((timestep_number > 0) && (timestep_number % 100 == 0))
+ computing_timer.print_summary ();
+
+ time += time_step;
+ ++timestep_number;
+ }
+ while (true);
+
+ // If we are generating graphical
+ // output, do so also for the last
+ // time step unless we had just
+ // done so before we left the
+ // do-while loop
+ if ((parameters.generate_graphical_output == true)
+ &&
+ !((timestep_number-1) % parameters.graphical_output_interval == 0))
+ output_results ();
+ }
+}
+
+
+
+ // @sect3{The <code>main</code> function}
+
+ // The main function is short as usual and
+ // very similar to the one in step-31. Since
+ // we use a parameter file which is specified
+ // as an argument in the command line, we
+ // have to read it in here and pass it on to
+ // the Parameters class for parsing. If no
+ // filename is given in the command line, we
+ // simply use the <code>\step-32.prm</code>
+ // file which is distributed together with
+ // the program.
+ //
+ // Because 3d computations are simply
+ // very slow unless you throw a lot
+ // of processors at them, the program
+ // defaults to 2d. You can get the 3d
+ // version by changing the constant
+ // dimension below to 3.
+int main (int argc, char *argv[])
+{
+ using namespace Step32;
+ using namespace dealii;
+
+ Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv);
+ PetscInitialize(&argc,&argv,0,0); // Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv);
+
+ std::cout << "dealii::Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) = " <<
+ dealii::Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) << std::endl;
+ try
+ {
+ deallog.depth_console (0);
+
+ std::string parameter_filename;
+ if (argc>=2)
+ parameter_filename = argv[1];
+ else
+ parameter_filename = "step-32.prm";
+
+ const int dim = 2;
+ BoussinesqFlowProblem<dim>::Parameters parameters(parameter_filename);
+ BoussinesqFlowProblem<dim> flow_problem (parameters);
+ flow_problem.m_myrank = dealii::Utilities::MPI::this_mpi_process(MPI_COMM_WORLD);
+ flow_problem.run ();
+ }
+ catch (std::exception &exc)
+ {
+ std::cerr << std::endl << std::endl
+ << "----------------------------------------------------"
+ << std::endl;
+ std::cerr << "Exception on processing: " << std::endl
+ << exc.what() << std::endl
+ << "Aborting!" << std::endl
+ << "----------------------------------------------------"
+ << std::endl;
+
+ return 1;
+ }
+ catch (...)
+ {
+ std::cerr << std::endl << std::endl
+ << "----------------------------------------------------"
+ << std::endl;
+ std::cerr << "Unknown exception!" << std::endl
+ << "Aborting!" << std::endl
+ << "----------------------------------------------------"
+ << std::endl;
+ return 1;
+ }
+
+ dealii::GrowingVectorMemory<dealii::PETScWrappers::MPI::Vector>::release_unused_memory ();
+ dealii::GrowingVectorMemory<dealii::PETScWrappers::Vector>::release_unused_memory ();
+ PetscFinalize();
+
+// dealii::TrilinosWrappers::BlockVector vec;
+
+ return 0;
+}
Added: branches/s-wang2/for_deal.II/include/deal.II/lac/petsc_matrix_base.h
===================================================================
--- branches/s-wang2/for_deal.II/include/deal.II/lac/petsc_matrix_base.h (rev 0)
+++ branches/s-wang2/for_deal.II/include/deal.II/lac/petsc_matrix_base.h 2012-11-29 05:55:56 UTC (rev 1394)
@@ -0,0 +1,1870 @@
+//---------------------------------------------------------------------------
+// $Id: petsc_matrix_base.h 27628 2012-11-20 22:49:26Z heister $
+//
+// Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 by the deal.II authors
+//
+// This file is subject to QPL and may not be distributed
+// without copyright and license information. Please refer
+// to the file deal.II/doc/license.html for the text and
+// further information on this license.
+//
+//---------------------------------------------------------------------------
+#ifndef __deal2__petsc_matrix_base_h
+#define __deal2__petsc_matrix_base_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_USE_PETSC
+
+# include <deal.II/base/subscriptor.h>
+# include <deal.II/lac/full_matrix.h>
+# include <deal.II/lac/exceptions.h>
+# include <deal.II/lac/vector.h>
+
+# include <petscmat.h>
+# include <deal.II/base/std_cxx1x/shared_ptr.h>
+
+# include <vector>
+# include <cmath>
+
+DEAL_II_NAMESPACE_OPEN
+
+template <typename Matrix> class BlockMatrixBase;
+
+
+namespace PETScWrappers
+{
+ // forward declarations
+ class VectorBase;
+ class MatrixBase;
+
+ namespace MatrixIterators
+ {
+ /**
+ * STL conforming iterator. This class acts as an iterator walking over the
+ * elements of PETSc matrices. Since PETSc offers a uniform interface for all
+ * types of matrices, this iterator can be used to access both sparse and full
+ * matrices.
+ *
+ * Note that PETSc does not give any guarantees as to the order of elements
+ * within each row. Note also that accessing the elements of a full matrix
+ * surprisingly only shows the nonzero elements of the matrix, not all
+ * elements.
+ *
+ * @ingroup PETScWrappers
+ * @author Guido Kanschat, Roy Stogner, Wolfgang Bangerth, 2004
+ */
+ class const_iterator
+ {
+ private:
+ /**
+ * Accessor class for iterators
+ */
+ class Accessor
+ {
+ public:
+ /**
+ * Constructor. Since we use
+ * accessors only for read
+ * access, a const matrix
+ * pointer is sufficient.
+ */
+ Accessor (const MatrixBase *matrix,
+ const unsigned int row,
+ const unsigned int index);
+
+ /**
+ * Row number of the element
+ * represented by this
+ * object.
+ */
+ unsigned int row() const;
+
+ /**
+ * Index in row of the element
+ * represented by this
+ * object.
+ */
+ unsigned int index() const;
+
+ /**
+ * Column number of the
+ * element represented by
+ * this object.
+ */
+ unsigned int column() const;
+
+ /**
+ * Value of this matrix entry.
+ */
+ PetscScalar value() const;
+
+ /**
+ * Exception
+ */
+ DeclException0 (ExcBeyondEndOfMatrix);
+ /**
+ * Exception
+ */
+ DeclException3 (ExcAccessToNonlocalRow,
+ int, int, int,
+ << "You tried to access row " << arg1
+ << " of a distributed matrix, but only rows "
+ << arg2 << " through " << arg3
+ << " are stored locally and can be accessed.");
+
+ private:
+ /**
+ * The matrix accessed.
+ */
+ mutable MatrixBase *matrix;
+
+ /**
+ * Current row number.
+ */
+ unsigned int a_row;
+
+ /**
+ * Current index in row.
+ */
+ unsigned int a_index;
+
+ /**
+ * Cache where we store the
+ * column indices of the present
+ * row. This is necessary, since
+ * PETSc makes access to the
+ * elements of its matrices
+ * rather hard, and it is much
+ * more efficient to copy all
+ * column entries of a row once
+ * when we enter it than
+ * repeatedly asking PETSc for
+ * individual ones. This also
+ * makes some sense since it is
+ * likely that we will access
+ * them sequentially anyway.
+ *
+ * In order to make copying of
+ * iterators/accessor of
+ * acceptable performance, we
+ * keep a shared pointer to these
+ * entries so that more than one
+ * accessor can access this data
+ * if necessary.
+ */
+ std_cxx1x::shared_ptr<const std::vector<unsigned int> > colnum_cache;
+
+ /**
+ * Similar cache for the values
+ * of this row.
+ */
+ std_cxx1x::shared_ptr<const std::vector<PetscScalar> > value_cache;
+
+ /**
+ * Discard the old row caches
+ * (they may still be used by
+ * other accessors) and generate
+ * new ones for the row pointed
+ * to presently by this accessor.
+ */
+ void visit_present_row ();
+
+ /**
+ * Make enclosing class a
+ * friend.
+ */
+ friend class const_iterator;
+ };
+
+ public:
+
+ /**
+ * Constructor. Create an iterator
+ * into the matrix @p matrix for the
+ * given row and the index within it.
+ */
+ const_iterator (const MatrixBase *matrix,
+ const unsigned int row,
+ const unsigned int index);
+
+ /**
+ * Prefix increment.
+ */
+ const_iterator &operator++ ();
+
+ /**
+ * Postfix increment.
+ */
+ const_iterator operator++ (int);
+
+ /**
+ * Dereferencing operator.
+ */
+ const Accessor &operator* () const;
+
+ /**
+ * Dereferencing operator.
+ */
+ const Accessor *operator-> () const;
+
+ /**
+ * Comparison. True, if
+ * both iterators point to
+ * the same matrix
+ * position.
+ */
+ bool operator == (const const_iterator &) const;
+ /**
+ * Inverse of <tt>==</tt>.
+ */
+ bool operator != (const const_iterator &) const;
+
+ /**
+ * Comparison
+ * operator. Result is true
+ * if either the first row
+ * number is smaller or if
+ * the row numbers are
+ * equal and the first
+ * index is smaller.
+ */
+ bool operator < (const const_iterator &) const;
+
+ /**
+ * Exception
+ */
+ DeclException2 (ExcInvalidIndexWithinRow,
+ int, int,
+ << "Attempt to access element " << arg2
+ << " of row " << arg1
+ << " which doesn't have that many elements.");
+
+ private:
+ /**
+ * Store an object of the
+ * accessor class.
+ */
+ Accessor accessor;
+ };
+
+ }
+
+
+ /**
+ * Base class for all matrix classes that are implemented on top of the PETSc
+ * matrix types. Since in PETSc all matrix types (i.e. sequential and
+ * parallel, sparse, blocked, etc.) are built by filling the contents of an
+ * abstract object that is only referenced through a pointer of a type that is
+ * independent of the actual matrix type, we can implement almost all
+ * functionality of matrices in this base class. Derived classes will then only
+ * have to provide the functionality to create one or the other kind of
+ * matrix.
+ *
+ * The interface of this class is modeled after the existing
+ * SparseMatrix class in deal.II. It has almost the same member
+ * functions, and is often exchangable. However, since PETSc only supports a
+ * single scalar type (either double, float, or a complex data type), it is
+ * not templated, and only works with whatever your PETSc installation has
+ * defined the data type PetscScalar to.
+ *
+ * Note that PETSc only guarantees that operations do what you expect if the
+ * functions @p MatAssemblyBegin and @p MatAssemblyEnd have been called
+ * after matrix assembly. Therefore, you need to call
+ * SparseMatrix::compress() before you actually use the matrix. This also
+ * calls @p MatCompress that compresses the storage format for sparse
+ * matrices by discarding unused elements. PETSc allows to continue with
+ * assembling the matrix after calls to these functions, but since there are
+ * no more free entries available after that any more, it is better to only
+ * call SparseMatrix::compress() once at the end of the assembly stage and
+ * before the matrix is actively used.
+ *
+ * @ingroup PETScWrappers
+ * @ingroup Matrix1
+ * @author Wolfgang Bangerth, 2004
+ */
+ class MatrixBase : public Subscriptor
+ {
+ public:
+ /**
+ * Declare a typedef for the iterator
+ * class.
+ */
+ typedef MatrixIterators::const_iterator const_iterator;
+
+ /**
+ * Declare a typedef in analogy to all
+ * the other container classes.
+ */
+ typedef PetscScalar value_type;
+
+ /**
+ * Default constructor.
+ */
+ MatrixBase ();
+
+ /**
+ * Destructor. Made virtual so that one
+ * can use pointers to this class.
+ */
+ virtual ~MatrixBase ();
+
+ /**
+ * This operator assigns a scalar to a
+ * matrix. Since this does usually not
+ * make much sense (should we set all
+ * matrix entries to this value? Only
+ * the nonzero entries of the sparsity
+ * pattern?), this operation is only
+ * allowed if the actual value to be
+ * assigned is zero. This operator only
+ * exists to allow for the obvious
+ * notation <tt>matrix=0</tt>, which
+ * sets all elements of the matrix to
+ * zero, but keeps the sparsity pattern
+ * previously used.
+ */
+ MatrixBase &
+ operator = (const value_type d);
+ /**
+ * Release all memory and return
+ * to a state just like after
+ * having called the default
+ * constructor.
+ */
+ void clear ();
+
+ /**
+ * Set the element (<i>i,j</i>) to @p
+ * value.
+ *
+ * If the present object (from a
+ * derived class of this one) happens
+ * to be a sparse matrix, then this
+ * function adds a new entry to the
+ * matrix if it didn't exist before,
+ * very much in contrast to the
+ * SparseMatrix class which throws an
+ * error if the entry does not exist.
+ * If <tt>value</tt> is not a finite
+ * number an exception is thrown.
+ */
+ void set (const unsigned int i,
+ const unsigned int j,
+ const PetscScalar value);
+
+ /**
+ * Set all elements given in a
+ * FullMatrix<double> into the sparse
+ * matrix locations given by
+ * <tt>indices</tt>. In other words,
+ * this function writes the elements
+ * in <tt>full_matrix</tt> into the
+ * calling matrix, using the
+ * local-to-global indexing specified
+ * by <tt>indices</tt> for both the
+ * rows and the columns of the
+ * matrix. This function assumes a
+ * quadratic sparse matrix and a
+ * quadratic full_matrix, the usual
+ * situation in FE calculations.
+ *
+ * If the present object (from a
+ * derived class of this one) happens
+ * to be a sparse matrix, then this
+ * function adds some new entries to
+ * the matrix if they didn't exist
+ * before, very much in contrast to
+ * the SparseMatrix class which
+ * throws an error if the entry does
+ * not exist.
+ *
+ * The optional parameter
+ * <tt>elide_zero_values</tt> can be
+ * used to specify whether zero
+ * values should be inserted anyway
+ * or they should be filtered
+ * away. The default value is
+ * <tt>false</tt>, i.e., even zero
+ * values are inserted/replaced.
+ */
+ void set (const std::vector<unsigned int> &indices,
+ const FullMatrix<PetscScalar> &full_matrix,
+ const bool elide_zero_values = false);
+
+ /**
+ * Same function as before, but now
+ * including the possibility to use
+ * rectangular full_matrices and
+ * different local-to-global indexing
+ * on rows and columns, respectively.
+ */
+ void set (const std::vector<unsigned int> &row_indices,
+ const std::vector<unsigned int> &col_indices,
+ const FullMatrix<PetscScalar> &full_matrix,
+ const bool elide_zero_values = false);
+
+ /**
+ * Set several elements in the
+ * specified row of the matrix with
+ * column indices as given by
+ * <tt>col_indices</tt> to the
+ * respective value.
+ *
+ * If the present object (from a
+ * derived class of this one) happens
+ * to be a sparse matrix, then this
+ * function adds some new entries to
+ * the matrix if they didn't exist
+ * before, very much in contrast to
+ * the SparseMatrix class which
+ * throws an error if the entry does
+ * not exist.
+ *
+ * The optional parameter
+ * <tt>elide_zero_values</tt> can be
+ * used to specify whether zero
+ * values should be inserted anyway
+ * or they should be filtered
+ * away. The default value is
+ * <tt>false</tt>, i.e., even zero
+ * values are inserted/replaced.
+ */
+ void set (const unsigned int row,
+ const std::vector<unsigned int> &col_indices,
+ const std::vector<PetscScalar> &values,
+ const bool elide_zero_values = false);
+
+ /**
+ * Set several elements to values
+ * given by <tt>values</tt> in a
+ * given row in columns given by
+ * col_indices into the sparse
+ * matrix.
+ *
+ * If the present object (from a
+ * derived class of this one) happens
+ * to be a sparse matrix, then this
+ * function adds some new entries to
+ * the matrix if they didn't exist
+ * before, very much in contrast to
+ * the SparseMatrix class which
+ * throws an error if the entry does
+ * not exist.
+ *
+ * The optional parameter
+ * <tt>elide_zero_values</tt> can be
+ * used to specify whether zero
+ * values should be inserted anyway
+ * or they should be filtered
+ * away. The default value is
+ * <tt>false</tt>, i.e., even zero
+ * values are inserted/replaced.
+ */
+ void set (const unsigned int row,
+ const unsigned int n_cols,
+ const unsigned int *col_indices,
+ const PetscScalar *values,
+ const bool elide_zero_values = false);
+
+ /**
+ * Add @p value to the element
+ * (<i>i,j</i>).
+ *
+ * If the present object (from a
+ * derived class of this one) happens
+ * to be a sparse matrix, then this
+ * function adds a new entry to the
+ * matrix if it didn't exist before,
+ * very much in contrast to the
+ * SparseMatrix class which throws an
+ * error if the entry does not exist.
+ * If <tt>value</tt> is not a finite
+ * number an exception is thrown.
+ */
+ void add (const unsigned int i,
+ const unsigned int j,
+ const PetscScalar value);
+
+ /**
+ * Add all elements given in a
+ * FullMatrix<double> into sparse
+ * matrix locations given by
+ * <tt>indices</tt>. In other words,
+ * this function adds the elements in
+ * <tt>full_matrix</tt> to the
+ * respective entries in calling
+ * matrix, using the local-to-global
+ * indexing specified by
+ * <tt>indices</tt> for both the rows
+ * and the columns of the
+ * matrix. This function assumes a
+ * quadratic sparse matrix and a
+ * quadratic full_matrix, the usual
+ * situation in FE calculations.
+ *
+ * If the present object (from a
+ * derived class of this one) happens
+ * to be a sparse matrix, then this
+ * function adds some new entries to
+ * the matrix if they didn't exist
+ * before, very much in contrast to
+ * the SparseMatrix class which
+ * throws an error if the entry does
+ * not exist.
+ *
+ * The optional parameter
+ * <tt>elide_zero_values</tt> can be
+ * used to specify whether zero
+ * values should be added anyway or
+ * these should be filtered away and
+ * only non-zero data is added. The
+ * default value is <tt>true</tt>,
+ * i.e., zero values won't be added
+ * into the matrix.
+ */
+ void add (const std::vector<unsigned int> &indices,
+ const FullMatrix<PetscScalar> &full_matrix,
+ const bool elide_zero_values = true);
+
+ /**
+ * Same function as before, but now
+ * including the possibility to use
+ * rectangular full_matrices and
+ * different local-to-global indexing
+ * on rows and columns, respectively.
+ */
+ void add (const std::vector<unsigned int> &row_indices,
+ const std::vector<unsigned int> &col_indices,
+ const FullMatrix<PetscScalar> &full_matrix,
+ const bool elide_zero_values = true);
+
+ /**
+ * Set several elements in the
+ * specified row of the matrix with
+ * column indices as given by
+ * <tt>col_indices</tt> to the
+ * respective value.
+ *
+ * If the present object (from a
+ * derived class of this one) happens
+ * to be a sparse matrix, then this
+ * function adds some new entries to
+ * the matrix if they didn't exist
+ * before, very much in contrast to
+ * the SparseMatrix class which
+ * throws an error if the entry does
+ * not exist.
+ *
+ * The optional parameter
+ * <tt>elide_zero_values</tt> can be
+ * used to specify whether zero
+ * values should be added anyway or
+ * these should be filtered away and
+ * only non-zero data is added. The
+ * default value is <tt>true</tt>,
+ * i.e., zero values won't be added
+ * into the matrix.
+ */
+ void add (const unsigned int row,
+ const std::vector<unsigned int> &col_indices,
+ const std::vector<PetscScalar> &values,
+ const bool elide_zero_values = true);
+
+ /**
+ * Add an array of values given by
+ * <tt>values</tt> in the given
+ * global matrix row at columns
+ * specified by col_indices in the
+ * sparse matrix.
+ *
+ * If the present object (from a
+ * derived class of this one) happens
+ * to be a sparse matrix, then this
+ * function adds some new entries to
+ * the matrix if they didn't exist
+ * before, very much in contrast to
+ * the SparseMatrix class which
+ * throws an error if the entry does
+ * not exist.
+ *
+ * The optional parameter
+ * <tt>elide_zero_values</tt> can be
+ * used to specify whether zero
+ * values should be added anyway or
+ * these should be filtered away and
+ * only non-zero data is added. The
+ * default value is <tt>true</tt>,
+ * i.e., zero values won't be added
+ * into the matrix.
+ */
+ void add (const unsigned int row,
+ const unsigned int n_cols,
+ const unsigned int *col_indices,
+ const PetscScalar *values,
+ const bool elide_zero_values = true,
+ const bool col_indices_are_sorted = false);
+
+ /**
+ * Remove all elements from
+ * this <tt>row</tt> by setting
+ * them to zero. The function
+ * does not modify the number
+ * of allocated nonzero
+ * entries, it only sets some
+ * entries to zero. It may drop
+ * them from the sparsity
+ * pattern, though (but retains
+ * the allocated memory in case
+ * new entries are again added
+ * later).
+ *
+ * This operation is used in
+ * eliminating constraints (e.g. due to
+ * hanging nodes) and makes sure that
+ * we can write this modification to
+ * the matrix without having to read
+ * entries (such as the locations of
+ * non-zero elements) from it --
+ * without this operation, removing
+ * constraints on parallel matrices is
+ * a rather complicated procedure.
+ *
+ * The second parameter can be used to
+ * set the diagonal entry of this row
+ * to a value different from zero. The
+ * default is to set it to zero.
+ */
+ void clear_row (const unsigned int row,
+ const PetscScalar new_diag_value = 0);
+
+ /**
+ * Same as clear_row(), except that it
+ * works on a number of rows at once.
+ *
+ * The second parameter can be used to
+ * set the diagonal entries of all
+ * cleared rows to something different
+ * from zero. Note that all of these
+ * diagonal entries get the same value
+ * -- if you want different values for
+ * the diagonal entries, you have to
+ * set them by hand.
+ */
+ void clear_rows (const std::vector<unsigned int> &rows,
+ const PetscScalar new_diag_value = 0);
+
+ /**
+ * PETSc matrices store their own
+ * sparsity patterns. So, in analogy to
+ * our own SparsityPattern class,
+ * this function compresses the
+ * sparsity pattern and allows the
+ * resulting matrix to be used in all
+ * other operations where before only
+ * assembly functions were
+ * allowed. This function must
+ * therefore be called once you have
+ * assembled the matrix.
+ *
+ * See @ref GlossCompress "Compressing distributed objects"
+ * for more information.
+ * more information.
+ */
+ void compress (::dealii::VectorOperation::values operation
+ =::dealii::VectorOperation::unknown);
+ /**
+ * Return the value of the entry
+ * (<i>i,j</i>). This may be an
+ * expensive operation and you should
+ * always take care where to call this
+ * function. In contrast to the
+ * respective function in the
+ * @p MatrixBase class, we don't
+ * throw an exception if the respective
+ * entry doesn't exist in the sparsity
+ * pattern of this class, since PETSc
+ * does not transmit this information.
+ *
+ * This function is therefore exactly
+ * equivalent to the <tt>el()</tt> function.
+ */
+ PetscScalar operator () (const unsigned int i,
+ const unsigned int j) const;
+
+ /**
+ * Return the value of the matrix entry
+ * (<i>i,j</i>). If this entry does not
+ * exist in the sparsity pattern, then
+ * zero is returned. While this may be
+ * convenient in some cases, note that
+ * it is simple to write algorithms
+ * that are slow compared to an optimal
+ * solution, since the sparsity of the
+ * matrix is not used.
+ */
+ PetscScalar el (const unsigned int i,
+ const unsigned int j) const;
+
+ /**
+ * Return the main diagonal
+ * element in the <i>i</i>th
+ * row. This function throws an
+ * error if the matrix is not
+ * quadratic.
+ *
+ * Since we do not have direct access
+ * to the underlying data structure,
+ * this function is no faster than the
+ * elementwise access using the el()
+ * function. However, we provide this
+ * function for compatibility with the
+ * SparseMatrix class.
+ */
+ PetscScalar diag_element (const unsigned int i) const;
+
+ /**
+ * Return the number of rows in this
+ * matrix.
+ */
+ unsigned int m () const;
+
+ /**
+ * Return the number of columns in this
+ * matrix.
+ */
+ unsigned int n () const;
+
+ /**
+ * Return the local dimension of the
+ * matrix, i.e. the number of rows
+ * stored on the present MPI
+ * process. For sequential matrices,
+ * this number is the same as m(),
+ * but for parallel matrices it may be
+ * smaller.
+ *
+ * To figure out which elements
+ * exactly are stored locally,
+ * use local_range().
+ */
+ unsigned int local_size () const;
+
+ /**
+ * Return a pair of indices
+ * indicating which rows of
+ * this matrix are stored
+ * locally. The first number is
+ * the index of the first
+ * row stored, the second
+ * the index of the one past
+ * the last one that is stored
+ * locally. If this is a
+ * sequential matrix, then the
+ * result will be the pair
+ * (0,m()), otherwise it will be
+ * a pair (i,i+n), where
+ * <tt>n=local_size()</tt>.
+ */
+ std::pair<unsigned int, unsigned int>
+ local_range () const;
+
+ /**
+ * Return whether @p index is
+ * in the local range or not,
+ * see also local_range().
+ */
+ bool in_local_range (const unsigned int index) const;
+
+ /**
+ * Return a reference to the MPI
+ * communicator object in use with this
+ * matrix. This function has to be
+ * implemented in derived classes.
+ */
+ virtual const MPI_Comm &get_mpi_communicator () const = 0;
+
+ /**
+ * Return the number of nonzero
+ * elements of this
+ * matrix. Actually, it returns
+ * the number of entries in the
+ * sparsity pattern; if any of
+ * the entries should happen to
+ * be zero, it is counted anyway.
+ */
+ unsigned int n_nonzero_elements () const;
+
+ /**
+ * Number of entries in a specific row.
+ */
+ unsigned int row_length (const unsigned int row) const;
+
+ /**
+ * Return the l1-norm of the matrix, that is
+ * $|M|_1=max_{all columns j}\sum_{all
+ * rows i} |M_ij|$,
+ * (max. sum of columns).
+ * This is the
+ * natural matrix norm that is compatible
+ * to the l1-norm for vectors, i.e.
+ * $|Mv|_1\leq |M|_1 |v|_1$.
+ * (cf. Haemmerlin-Hoffmann:
+ * Numerische Mathematik)
+ */
+ PetscReal l1_norm () const;
+
+ /**
+ * Return the linfty-norm of the
+ * matrix, that is
+ * $|M|_infty=max_{all rows i}\sum_{all
+ * columns j} |M_ij|$,
+ * (max. sum of rows).
+ * This is the
+ * natural matrix norm that is compatible
+ * to the linfty-norm of vectors, i.e.
+ * $|Mv|_infty \leq |M|_infty |v|_infty$.
+ * (cf. Haemmerlin-Hoffmann:
+ * Numerische Mathematik)
+ */
+ PetscReal linfty_norm () const;
+
+ /**
+ * Return the frobenius norm of the
+ * matrix, i.e. the square root of the
+ * sum of squares of all entries in the
+ * matrix.
+ */
+ PetscReal frobenius_norm () const;
+
+
+ /**
+ * Return the square of the norm
+ * of the vector $v$ with respect
+ * to the norm induced by this
+ * matrix,
+ * i.e. $\left(v,Mv\right)$. This
+ * is useful, e.g. in the finite
+ * element context, where the
+ * $L_2$ norm of a function
+ * equals the matrix norm with
+ * respect to the mass matrix of
+ * the vector representing the
+ * nodal values of the finite
+ * element function.
+ *
+ * Obviously, the matrix needs to
+ * be quadratic for this operation.
+ *
+ * The implementation of this function
+ * is not as efficient as the one in
+ * the @p MatrixBase class used in
+ * deal.II (i.e. the original one, not
+ * the PETSc wrapper class) since PETSc
+ * doesn't support this operation and
+ * needs a temporary vector.
+ *
+ * Note that if the current object
+ * represents a parallel distributed
+ * matrix (of type
+ * PETScWrappers::MPI::SparseMatrix),
+ * then the given vector has to be
+ * a distributed vector as
+ * well. Conversely, if the matrix is
+ * not distributed, then neither
+ * may the vector be.
+ */
+ PetscScalar matrix_norm_square (const VectorBase &v) const;
+
+
+ /**
+ * Compute the matrix scalar
+ * product $\left(u,Mv\right)$.
+ *
+ * The implementation of this function
+ * is not as efficient as the one in
+ * the @p MatrixBase class used in
+ * deal.II (i.e. the original one, not
+ * the PETSc wrapper class) since PETSc
+ * doesn't support this operation and
+ * needs a temporary vector.
+ *
+ * Note that if the current object
+ * represents a parallel distributed
+ * matrix (of type
+ * PETScWrappers::MPI::SparseMatrix),
+ * then both vectors have to be
+ * distributed vectors as
+ * well. Conversely, if the matrix is
+ * not distributed, then neither of the
+ * vectors may be.
+ */
+ PetscScalar matrix_scalar_product (const VectorBase &u,
+ const VectorBase &v) const;
+
+
+#if DEAL_II_PETSC_VERSION_GTE(3,1,0)
+ /**
+ * Return the trace of the
+ * matrix, i.e. the sum of all
+ * diagonal entries in the
+ * matrix.
+ */
+ PetscReal trace () const;
+#endif
+
+ /**
+ * Multiply the entire matrix by a
+ * fixed factor.
+ */
+ MatrixBase &operator *= (const PetscScalar factor);
+
+ /**
+ * Divide the entire matrix by a
+ * fixed factor.
+ */
+ MatrixBase &operator /= (const PetscScalar factor);
+
+ /**
+ * Matrix-vector multiplication:
+ * let <i>dst = M*src</i> with
+ * <i>M</i> being this matrix.
+ *
+ * Source and destination must
+ * not be the same vector.
+ *
+ * Note that if the current object
+ * represents a parallel distributed
+ * matrix (of type
+ * PETScWrappers::MPI::SparseMatrix),
+ * then both vectors have to be
+ * distributed vectors as
+ * well. Conversely, if the matrix is
+ * not distributed, then neither of the
+ * vectors may be.
+ */
+ void vmult (VectorBase &dst,
+ const VectorBase &src) const;
+
+ /**
+ * Matrix-vector multiplication: let
+ * <i>dst = M<sup>T</sup>*src</i> with
+ * <i>M</i> being this matrix. This
+ * function does the same as vmult()
+ * but takes the transposed matrix.
+ *
+ * Source and destination must
+ * not be the same vector.
+ *
+ * Note that if the current object
+ * represents a parallel distributed
+ * matrix (of type
+ * PETScWrappers::MPI::SparseMatrix),
+ * then both vectors have to be
+ * distributed vectors as
+ * well. Conversely, if the matrix is
+ * not distributed, then neither of the
+ * vectors may be.
+ */
+ void Tvmult (VectorBase &dst,
+ const VectorBase &src) const;
+
+ /**
+ * Adding Matrix-vector
+ * multiplication. Add
+ * <i>M*src</i> on <i>dst</i>
+ * with <i>M</i> being this
+ * matrix.
+ *
+ * Source and destination must
+ * not be the same vector.
+ *
+ * Note that if the current object
+ * represents a parallel distributed
+ * matrix (of type
+ * PETScWrappers::MPI::SparseMatrix),
+ * then both vectors have to be
+ * distributed vectors as
+ * well. Conversely, if the matrix is
+ * not distributed, then neither of the
+ * vectors may be.
+ */
+ void vmult_add (VectorBase &dst,
+ const VectorBase &src) const;
+
+ /**
+ * Adding Matrix-vector
+ * multiplication. Add
+ * <i>M<sup>T</sup>*src</i> to
+ * <i>dst</i> with <i>M</i> being
+ * this matrix. This function
+ * does the same as vmult_add()
+ * but takes the transposed
+ * matrix.
+ *
+ * Source and destination must
+ * not be the same vector.
+ *
+ * Note that if the current object
+ * represents a parallel distributed
+ * matrix (of type
+ * PETScWrappers::MPI::SparseMatrix),
+ * then both vectors have to be
+ * distributed vectors as
+ * well. Conversely, if the matrix is
+ * not distributed, then neither of the
+ * vectors may be.
+ */
+ void Tvmult_add (VectorBase &dst,
+ const VectorBase &src) const;
+
+
+ /**
+ * Compute the residual of an
+ * equation <i>Mx=b</i>, where
+ * the residual is defined to be
+ * <i>r=b-Mx</i>. Write the
+ * residual into
+ * @p dst. The
+ * <i>l<sub>2</sub></i> norm of
+ * the residual vector is
+ * returned.
+ *
+ * Source <i>x</i> and destination
+ * <i>dst</i> must not be the same
+ * vector.
+ *
+ * Note that if the current object
+ * represents a parallel distributed
+ * matrix (of type
+ * PETScWrappers::MPI::SparseMatrix),
+ * then all vectors have to be
+ * distributed vectors as
+ * well. Conversely, if the matrix is
+ * not distributed, then neither of the
+ * vectors may be.
+ */
+ PetscScalar residual (VectorBase &dst,
+ const VectorBase &x,
+ const VectorBase &b) const;
+
+ /**
+ * STL-like iterator with the
+ * first entry.
+ */
+ const_iterator begin () const;
+
+ /**
+ * Final iterator.
+ */
+ const_iterator end () const;
+
+ /**
+ * STL-like iterator with the
+ * first entry of row @p r.
+ *
+ * Note that if the given row is empty,
+ * i.e. does not contain any nonzero
+ * entries, then the iterator returned by
+ * this function equals
+ * <tt>end(r)</tt>. Note also that the
+ * iterator may not be dereferencable in
+ * that case.
+ */
+ const_iterator begin (const unsigned int r) const;
+
+ /**
+ * Final iterator of row <tt>r</tt>. It
+ * points to the first element past the
+ * end of line @p r, or past the end of
+ * the entire sparsity pattern.
+ *
+ * Note that the end iterator is not
+ * necessarily dereferencable. This is in
+ * particular the case if it is the end
+ * iterator for the last row of a matrix.
+ */
+ const_iterator end (const unsigned int r) const;
+
+ /**
+ * Conversion operator to gain access
+ * to the underlying PETSc type. If you
+ * do this, you cut this class off some
+ * information it may need, so this
+ * conversion operator should only be
+ * used if you know what you do. In
+ * particular, it should only be used
+ * for read-only operations into the
+ * matrix.
+ */
+ operator Mat () const;
+
+ /**
+ * Make an in-place transpose of a
+ * matrix.
+ */
+ void transpose ();
+
+ /**
+ * Test whether a matrix is
+ * symmetric. Default
+ * tolerance is
+ * $1000\times32$-bit machine
+ * precision.
+ */
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ PetscTruth
+#else
+ PetscBool
+#endif
+ is_symmetric (const double tolerance = 1.e-12);
+
+ /**
+ * Test whether a matrix is
+ * Hermitian, i.e. it is the
+ * complex conjugate of its
+ * transpose. Default
+ * tolerance is
+ * $1000\times32$-bit machine
+ * precision.
+ */
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ PetscTruth
+#else
+ PetscBool
+#endif
+ is_hermitian (const double tolerance = 1.e-12);
+
+ /**
+ * Abstract PETSc object that helps view
+ * in ASCII other PETSc objects. Currently
+ * this function simply writes non-zero
+ * elements of a matrix to the terminal.
+ */
+ void write_ascii () const; // shuqiangwang
+
+ /**
+ * Returns the number bytes consumed
+ * by this matrix on this CPU.
+ */
+ std::size_t memory_consumption() const;
+
+ /**
+ * Exception
+ */
+ DeclException1 (ExcPETScError,
+ int,
+ << "An error with error number " << arg1
+ << " occurred while calling a PETSc function");
+ /**
+ * Exception
+ */
+ DeclException0 (ExcSourceEqualsDestination);
+
+ /**
+ * Exception.
+ */
+ DeclException2 (ExcWrongMode,
+ int, int,
+ << "You tried to do a "
+ << (arg1 == 1 ?
+ "'set'" :
+ (arg1 == 2 ?
+ "'add'" : "???"))
+ << " operation but the matrix is currently in "
+ << (arg2 == 1 ?
+ "'set'" :
+ (arg2 == 2 ?
+ "'add'" : "???"))
+ << " mode. You first have to call 'compress()'.");
+
+ protected:
+ /**
+ * A generic matrix object in
+ * PETSc. The actual type, a sparse
+ * matrix, is set in the constructor.
+ */
+ Mat matrix;
+
+ /**
+ * PETSc doesn't allow to mix additions
+ * to matrix entries and overwriting
+ * them (to make synchronisation of
+ * parallel computations
+ * simpler). Since the interface of the
+ * existing classes don't support the
+ * notion of not interleaving things,
+ * we have to emulate this
+ * ourselves. The way we do it is to,
+ * for each access operation, store
+ * whether it is an insertion or an
+ * addition. If the previous one was of
+ * different type, then we first have
+ * to flush the PETSc buffers;
+ * otherwise, we can simply go on.
+ *
+ * The following structure and variable
+ * declare and store the previous
+ * state.
+ */
+ struct LastAction
+ {
+ enum Values { none, insert, add };
+ };
+
+ /**
+ * Store whether the last action was a
+ * write or add operation.
+ */
+ LastAction::Values last_action;
+
+ /**
+ * Ensure that the add/set mode that
+ * is required for actions following
+ * this call is compatible with the
+ * current mode.
+ * Should be called from all internal
+ * functions accessing matrix elements.
+ */
+ void prepare_action(const LastAction::Values new_action);
+
+ /**
+ * For some matrix storage
+ * formats, in particular for the
+ * PETSc distributed blockmatrices,
+ * set and add operations on
+ * individual elements can not be
+ * freely mixed. Rather, one has
+ * to synchronize operations when
+ * one wants to switch from
+ * setting elements to adding to
+ * elements.
+ * BlockMatrixBase automatically
+ * synchronizes the access by
+ * calling this helper function
+ * for each block.
+ * This function ensures that the
+ * matrix is in a state that
+ * allows adding elements; if it
+ * previously already was in this
+ * state, the function does
+ * nothing.
+ */
+ void prepare_add();
+ /**
+ * Same as prepare_add() but
+ * prepare the matrix for setting
+ * elements if the representation
+ * of elements in this class
+ * requires such an operation.
+ */
+ void prepare_set();
+
+
+
+ private:
+ /**
+ * An internal array of integer
+ * values that is used to store the
+ * column indices when
+ * adding/inserting local data into
+ * the (large) sparse matrix.
+ */
+#ifdef PETSC_USE_64BIT_INDICES
+ std::vector<PetscInt> column_indices;
+#else
+ std::vector<int> column_indices;
+#endif
+
+ /**
+ * An internal array of double values
+ * that is used to store the column
+ * indices when adding/inserting
+ * local data into the (large) sparse
+ * matrix.
+ */
+ std::vector<PetscScalar> column_values;
+
+
+ /**
+ * To allow calling protected
+ * prepare_add() and
+ * prepare_set().
+ */
+ template <class> friend class dealii::BlockMatrixBase;
+
+
+public: // added by shuqiangwang
+ void copy_from(const MatrixBase &source);
+ void add(double factor, const MatrixBase &source);
+ };
+
+
+
+#ifndef DOXYGEN
+// -------------------------- inline and template functions ----------------------
+
+
+ namespace MatrixIterators
+ {
+
+ inline
+ const_iterator::Accessor::
+ Accessor (const MatrixBase *matrix,
+ const unsigned int row,
+ const unsigned int index)
+ :
+ matrix(const_cast<MatrixBase *>(matrix)),
+ a_row(row),
+ a_index(index)
+ {
+ visit_present_row ();
+ }
+
+
+ inline
+ unsigned int
+ const_iterator::Accessor::row() const
+ {
+ Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+ return a_row;
+ }
+
+
+ inline
+ unsigned int
+ const_iterator::Accessor::column() const
+ {
+ Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+ return (*colnum_cache)[a_index];
+ }
+
+
+ inline
+ unsigned int
+ const_iterator::Accessor::index() const
+ {
+ Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+ return a_index;
+ }
+
+
+ inline
+ PetscScalar
+ const_iterator::Accessor::value() const
+ {
+ Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+ return (*value_cache)[a_index];
+ }
+
+
+ inline
+ const_iterator::
+ const_iterator(const MatrixBase *matrix,
+ const unsigned int row,
+ const unsigned int index)
+ :
+ accessor(matrix, row, index)
+ {}
+
+
+
+ inline
+ const_iterator &
+ const_iterator::operator++ ()
+ {
+ Assert (accessor.a_row < accessor.matrix->m(), ExcIteratorPastEnd());
+
+ ++accessor.a_index;
+
+ // if at end of line: do one step, then
+ // cycle until we find a row with a
+ // nonzero number of entries
+ if (accessor.a_index >= accessor.colnum_cache->size())
+ {
+ accessor.a_index = 0;
+ ++accessor.a_row;
+
+ while ((accessor.a_row < accessor.matrix->m())
+ &&
+ (accessor.matrix->row_length(accessor.a_row) == 0))
+ ++accessor.a_row;
+
+ accessor.visit_present_row();
+ }
+ return *this;
+ }
+
+
+ inline
+ const_iterator
+ const_iterator::operator++ (int)
+ {
+ const const_iterator old_state = *this;
+ ++(*this);
+ return old_state;
+ }
+
+
+ inline
+ const const_iterator::Accessor &
+ const_iterator::operator* () const
+ {
+ return accessor;
+ }
+
+
+ inline
+ const const_iterator::Accessor *
+ const_iterator::operator-> () const
+ {
+ return &accessor;
+ }
+
+
+ inline
+ bool
+ const_iterator::
+ operator == (const const_iterator &other) const
+ {
+ return (accessor.a_row == other.accessor.a_row &&
+ accessor.a_index == other.accessor.a_index);
+ }
+
+
+ inline
+ bool
+ const_iterator::
+ operator != (const const_iterator &other) const
+ {
+ return ! (*this == other);
+ }
+
+
+ inline
+ bool
+ const_iterator::
+ operator < (const const_iterator &other) const
+ {
+ return (accessor.row() < other.accessor.row() ||
+ (accessor.row() == other.accessor.row() &&
+ accessor.index() < other.accessor.index()));
+ }
+
+ }
+
+
+
+ // Inline the set() and add()
+ // functions, since they will be
+ // called frequently, and the
+ // compiler can optimize away
+ // some unnecessary loops when
+ // the sizes are given at
+ // compile time.
+ inline
+ void
+ MatrixBase::set (const unsigned int i,
+ const unsigned int j,
+ const PetscScalar value)
+ {
+ Assert (numbers::is_finite(value), ExcNumberNotFinite());
+
+ set (i, 1, &j, &value, false);
+ }
+
+
+
+ inline
+ void
+ MatrixBase::set (const std::vector<unsigned int> &indices,
+ const FullMatrix<PetscScalar> &values,
+ const bool elide_zero_values)
+ {
+ Assert (indices.size() == values.m(),
+ ExcDimensionMismatch(indices.size(), values.m()));
+ Assert (values.m() == values.n(), ExcNotQuadratic());
+
+ for (unsigned int i=0; i<indices.size(); ++i)
+ set (indices[i], indices.size(), &indices[0], &values(i,0),
+ elide_zero_values);
+ }
+
+
+
+ inline
+ void
+ MatrixBase::set (const std::vector<unsigned int> &row_indices,
+ const std::vector<unsigned int> &col_indices,
+ const FullMatrix<PetscScalar> &values,
+ const bool elide_zero_values)
+ {
+ Assert (row_indices.size() == values.m(),
+ ExcDimensionMismatch(row_indices.size(), values.m()));
+ Assert (col_indices.size() == values.n(),
+ ExcDimensionMismatch(col_indices.size(), values.n()));
+
+ for (unsigned int i=0; i<row_indices.size(); ++i)
+ set (row_indices[i], col_indices.size(), &col_indices[0], &values(i,0),
+ elide_zero_values);
+ }
+
+
+
+ inline
+ void
+ MatrixBase::set (const unsigned int row,
+ const std::vector<unsigned int> &col_indices,
+ const std::vector<PetscScalar> &values,
+ const bool elide_zero_values)
+ {
+ Assert (col_indices.size() == values.size(),
+ ExcDimensionMismatch(col_indices.size(), values.size()));
+
+ set (row, col_indices.size(), &col_indices[0], &values[0],
+ elide_zero_values);
+ }
+
+
+
+ inline
+ void
+ MatrixBase::set (const unsigned int row,
+ const unsigned int n_cols,
+ const unsigned int *col_indices,
+ const PetscScalar *values,
+ const bool elide_zero_values)
+ {
+ prepare_action(LastAction::insert);
+
+#ifdef PETSC_USE_64BIT_INDICES
+ const PetscInt petsc_i = row;
+ PetscInt *col_index_ptr;
+#else
+ const int petsc_i = row;
+ int *col_index_ptr;
+#endif
+ PetscScalar const *col_value_ptr;
+ int n_columns;
+
+ // If we don't elide zeros, the pointers
+ // are already available...
+#ifndef PETSC_USE_64BIT_INDICES
+ if (elide_zero_values == false)
+ {
+ col_index_ptr = (int *)col_indices;
+ col_value_ptr = values;
+ n_columns = n_cols;
+ }
+ else
+#endif
+ {
+ // Otherwise, extract nonzero values in
+ // each row and get the respective index.
+ if (column_indices.size() < n_cols)
+ {
+ column_indices.resize(n_cols);
+ column_values.resize(n_cols);
+ }
+
+ n_columns = 0;
+ for (unsigned int j=0; j<n_cols; ++j)
+ {
+ const PetscScalar value = values[j];
+ Assert (numbers::is_finite(value), ExcNumberNotFinite());
+ if (value != PetscScalar())
+ {
+ column_indices[n_columns] = col_indices[j];
+ column_values[n_columns] = value;
+ n_columns++;
+ }
+ }
+ Assert(n_columns <= (int)n_cols, ExcInternalError());
+
+ col_index_ptr = &column_indices[0];
+ col_value_ptr = &column_values[0];
+ }
+
+ const int ierr
+ = MatSetValues (matrix, 1, &petsc_i, n_columns, col_index_ptr,
+ col_value_ptr, INSERT_VALUES);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+ }
+
+
+
+ inline
+ void
+ MatrixBase::add (const unsigned int i,
+ const unsigned int j,
+ const PetscScalar value)
+ {
+
+ Assert (numbers::is_finite(value), ExcNumberNotFinite());
+
+ if (value == PetscScalar())
+ {
+ // we have to do checkings on Insert/Add
+ // in any case
+ // to be consistent with the MPI
+ // communication model (see the comments
+ // in the documentation of
+ // TrilinosWrappers::Vector), but we can
+ // save some work if the addend is
+ // zero. However, these actions are done
+ // in case we pass on to the other
+ // function.
+ prepare_action(LastAction::add);
+
+ return;
+ }
+ else
+ add (i, 1, &j, &value, false);
+ }
+
+
+
+ inline
+ void
+ MatrixBase::add (const std::vector<unsigned int> &indices,
+ const FullMatrix<PetscScalar> &values,
+ const bool elide_zero_values)
+ {
+ Assert (indices.size() == values.m(),
+ ExcDimensionMismatch(indices.size(), values.m()));
+ Assert (values.m() == values.n(), ExcNotQuadratic());
+
+ for (unsigned int i=0; i<indices.size(); ++i)
+ add (indices[i], indices.size(), &indices[0], &values(i,0),
+ elide_zero_values);
+ }
+
+
+
+ inline
+ void
+ MatrixBase::add (const std::vector<unsigned int> &row_indices,
+ const std::vector<unsigned int> &col_indices,
+ const FullMatrix<PetscScalar> &values,
+ const bool elide_zero_values)
+ {
+ Assert (row_indices.size() == values.m(),
+ ExcDimensionMismatch(row_indices.size(), values.m()));
+ Assert (col_indices.size() == values.n(),
+ ExcDimensionMismatch(col_indices.size(), values.n()));
+
+ for (unsigned int i=0; i<row_indices.size(); ++i)
+ add (row_indices[i], col_indices.size(), &col_indices[0], &values(i,0),
+ elide_zero_values);
+ }
+
+
+
+ inline
+ void
+ MatrixBase::add (const unsigned int row,
+ const std::vector<unsigned int> &col_indices,
+ const std::vector<PetscScalar> &values,
+ const bool elide_zero_values)
+ {
+ Assert (col_indices.size() == values.size(),
+ ExcDimensionMismatch(col_indices.size(), values.size()));
+
+ add (row, col_indices.size(), &col_indices[0], &values[0],
+ elide_zero_values);
+ }
+
+
+
+ inline
+ void
+ MatrixBase::add (const unsigned int row,
+ const unsigned int n_cols,
+ const unsigned int *col_indices,
+ const PetscScalar *values,
+ const bool elide_zero_values,
+ const bool /*col_indices_are_sorted*/)
+ {
+ prepare_action(LastAction::add);
+
+#ifdef PETSC_USE_64BIT_INDICES
+ const PetscInt petsc_i = row;
+ PetscInt *col_index_ptr;
+#else
+ const int petsc_i = row;
+ int *col_index_ptr;
+#endif
+ PetscScalar const *col_value_ptr;
+ int n_columns;
+
+ // If we don't elide zeros, the pointers
+ // are already available...
+#ifndef PETSC_USE_64BIT_INDICES
+ if (elide_zero_values == false)
+ {
+ col_index_ptr = (int *)col_indices;
+ col_value_ptr = values;
+ n_columns = n_cols;
+ }
+ else
+#endif
+ {
+ // Otherwise, extract nonzero values in
+ // each row and get the respective index.
+ if (column_indices.size() < n_cols)
+ {
+ column_indices.resize(n_cols);
+ column_values.resize(n_cols);
+ }
+
+ n_columns = 0;
+ for (unsigned int j=0; j<n_cols; ++j)
+ {
+ const PetscScalar value = values[j];
+ Assert (numbers::is_finite(value), ExcNumberNotFinite());
+ if (value != PetscScalar())
+ {
+ column_indices[n_columns] = col_indices[j];
+ column_values[n_columns] = value;
+ n_columns++;
+ }
+ }
+ Assert(n_columns <= (int)n_cols, ExcInternalError());
+
+ col_index_ptr = &column_indices[0];
+ col_value_ptr = &column_values[0];
+ }
+
+ const int ierr
+ = MatSetValues (matrix, 1, &petsc_i, n_columns, col_index_ptr,
+ col_value_ptr, ADD_VALUES);
+ Assert (ierr == 0, ExcPETScError(ierr));
+ }
+
+
+
+
+
+
+ inline
+ PetscScalar
+ MatrixBase::operator() (const unsigned int i,
+ const unsigned int j) const
+ {
+ return el(i,j);
+ }
+
+
+
+ inline
+ MatrixBase::const_iterator
+ MatrixBase::begin() const
+ {
+ return const_iterator(this, 0, 0);
+ }
+
+
+ inline
+ MatrixBase::const_iterator
+ MatrixBase::end() const
+ {
+ return const_iterator(this, m(), 0);
+ }
+
+
+ inline
+ MatrixBase::const_iterator
+ MatrixBase::begin(const unsigned int r) const
+ {
+ Assert (r < m(), ExcIndexRange(r, 0, m()));
+ if (row_length(r) > 0)
+ return const_iterator(this, r, 0);
+ else
+ return end (r);
+ }
+
+
+ inline
+ MatrixBase::const_iterator
+ MatrixBase::end(const unsigned int r) const
+ {
+ Assert (r < m(), ExcIndexRange(r, 0, m()));
+
+ // place the iterator on the first entry
+ // past this line, or at the end of the
+ // matrix
+ for (unsigned int i=r+1; i<m(); ++i)
+ if (row_length(i) > 0)
+ return const_iterator(this, i, 0);
+
+ // if there is no such line, then take the
+ // end iterator of the matrix
+ return end();
+ }
+
+
+
+ inline
+ bool
+ MatrixBase::in_local_range (const unsigned int index) const
+ {
+#ifdef PETSC_USE_64BIT_INDICES
+ PetscInt begin, end;
+#else
+ int begin, end;
+#endif
+ const int ierr = MatGetOwnershipRange (static_cast<const Mat &>(matrix),
+ &begin, &end);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ return ((index >= static_cast<unsigned int>(begin)) &&
+ (index < static_cast<unsigned int>(end)));
+ }
+
+
+
+ inline
+ void
+ MatrixBase::prepare_action(const LastAction::Values new_action)
+ {
+ if (last_action == new_action)
+ ;
+ else if (last_action == LastAction::none)
+ last_action = new_action;
+ else
+ Assert (false, ExcWrongMode (last_action, new_action));
+ }
+
+
+
+ inline
+ void
+ MatrixBase::prepare_add()
+ {
+ prepare_action(LastAction::add);
+ }
+
+
+
+ inline
+ void
+ MatrixBase::prepare_set()
+ {
+ prepare_action(LastAction::insert);
+ }
+
+#endif // DOXYGEN
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+#endif // DEAL_II_USE_PETSC
+
+
+/*---------------------------- petsc_matrix_base.h ---------------------------*/
+
+#endif
+/*---------------------------- petsc_matrix_base.h ---------------------------*/
Added: branches/s-wang2/for_deal.II/include/deal.II/lac/petsc_parallel_block_vector.h
===================================================================
--- branches/s-wang2/for_deal.II/include/deal.II/lac/petsc_parallel_block_vector.h (rev 0)
+++ branches/s-wang2/for_deal.II/include/deal.II/lac/petsc_parallel_block_vector.h 2012-11-29 05:55:56 UTC (rev 1394)
@@ -0,0 +1,528 @@
+//---------------------------------------------------------------------------
+// $Id: petsc_parallel_block_vector.h 27628 2012-11-20 22:49:26Z heister $
+//
+// Copyright (C) 2004, 2005, 2006, 2007, 2009, 2010, 2012 by the deal.II authors
+//
+// This file is subject to QPL and may not be distributed
+// without copyright and license information. Please refer
+// to the file deal.II/doc/license.html for the text and
+// further information on this license.
+//
+//---------------------------------------------------------------------------
+#ifndef __deal2__petsc_parallel_block_vector_h
+#define __deal2__petsc_parallel_block_vector_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_USE_PETSC
+
+# include <deal.II/lac/petsc_parallel_vector.h>
+# include <deal.II/lac/block_indices.h>
+# include <deal.II/lac/block_vector_base.h>
+# include <deal.II/lac/exceptions.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace PETScWrappers
+{
+ // forward declaration
+ class BlockVector;
+
+ namespace MPI
+ {
+
+ /*! @addtogroup PETScWrappers
+ *@{
+ */
+
+ /**
+ * An implementation of block vectors based on the parallel vector class
+ * implemented in PETScWrappers. While the base class provides for most of the
+ * interface, this class handles the actual allocation of vectors and provides
+ * functions that are specific to the underlying vector type.
+ *
+ * The model of distribution of data is such that each of the blocks is
+ * distributed across all MPI processes named in the MPI communicator. I.e. we
+ * don't just distribute the whole vector, but each component. In the
+ * constructors and reinit() functions, one therefore not only has to specify
+ * the sizes of the individual blocks, but also the number of elements of each
+ * of these blocks to be stored on the local process.
+ *
+ * @ingroup Vectors
+ * @see @ref GlossBlockLA "Block (linear algebra)"
+ * @author Wolfgang Bangerth, 2004
+ */
+ class BlockVector : public BlockVectorBase<Vector>
+ {
+ public:
+ /**
+ * Typedef the base class for simpler
+ * access to its own typedefs.
+ */
+ typedef BlockVectorBase<Vector> BaseClass;
+
+ /**
+ * Typedef the type of the underlying
+ * vector.
+ */
+ typedef BaseClass::BlockType BlockType;
+
+ /**
+ * Import the typedefs from the base
+ * class.
+ */
+ typedef BaseClass::value_type value_type;
+ typedef BaseClass::pointer pointer;
+ typedef BaseClass::const_pointer const_pointer;
+ typedef BaseClass::reference reference;
+ typedef BaseClass::const_reference const_reference;
+ typedef BaseClass::size_type size_type;
+ typedef BaseClass::iterator iterator;
+ typedef BaseClass::const_iterator const_iterator;
+
+ /**
+ * Default constructor. Generate an
+ * empty vector without any blocks.
+ */
+ BlockVector ();
+
+ /**
+ * Constructor. Generate a block
+ * vector with @p n_blocks blocks,
+ * each of which is a parallel
+ * vector across @p communicator
+ * with @p block_size elements of
+ * which @p local_size elements are
+ * stored on the present process.
+ */
+ explicit BlockVector (const unsigned int n_blocks,
+ const MPI_Comm &communicator,
+ const unsigned int block_size,
+ const unsigned int local_size);
+
+ /**
+ * Copy-Constructor. Set all the
+ * properties of the parallel vector
+ * to those of the given argument and
+ * copy the elements.
+ */
+ BlockVector (const BlockVector &V);
+
+ /**
+ * Constructor. Set the number of
+ * blocks to
+ * <tt>block_sizes.size()</tt> and
+ * initialize each block with
+ * <tt>block_sizes[i]</tt> zero
+ * elements. The individual blocks
+ * are distributed across the given
+ * communicator, and each store
+ * <tt>local_elements[i]</tt>
+ * elements on the present process.
+ */
+ BlockVector (const std::vector<unsigned int> &block_sizes,
+ const MPI_Comm &communicator,
+ const std::vector<unsigned int> &local_elements);
+
+ /**
+ * Destructor. Clears memory
+ */
+ ~BlockVector ();
+
+ /**
+ * Copy operator: fill all components
+ * of the vector that are locally
+ * stored with the given scalar value.
+ */
+ BlockVector &operator = (const value_type s);
+
+ /**
+ * Copy operator for arguments of the
+ * same type.
+ */
+ BlockVector &
+ operator= (const BlockVector &V);
+
+ /**
+ * Copy the given sequential
+ * (non-distributed) block vector
+ * into the present parallel block
+ * vector. It is assumed that they
+ * have the same size, and this
+ * operation does not change the
+ * partitioning of the parallel
+ * vectors by which its elements are
+ * distributed across several MPI
+ * processes. What this operation
+ * therefore does is to copy that
+ * chunk of the given vector @p v
+ * that corresponds to elements of
+ * the target vector that are stored
+ * locally, and copies them, for each
+ * of the individual blocks of this
+ * object. Elements that are not
+ * stored locally are not touched.
+ *
+ * This being a parallel vector, you
+ * must make sure that @em all
+ * processes call this function at
+ * the same time. It is not possible
+ * to change the local part of a
+ * parallel vector on only one
+ * process, independent of what other
+ * processes do, with this function.
+ */
+ BlockVector &
+ operator = (const PETScWrappers::BlockVector &v);
+
+ /**
+ * Reinitialize the BlockVector to
+ * contain @p n_blocks of size @p
+ * block_size, each of which stores
+ * @p local_size elements
+ * locally. The @p communicator
+ * argument denotes which MPI channel
+ * each of these blocks shall
+ * communicate.
+ *
+ * If <tt>fast==false</tt>, the vector
+ * is filled with zeros.
+ */
+ void reinit (const unsigned int n_blocks,
+ const MPI_Comm &communicator,
+ const unsigned int block_size,
+ const unsigned int local_size,
+ const bool fast = false);
+
+ /**
+ * Reinitialize the BlockVector such
+ * that it contains
+ * <tt>block_sizes.size()</tt>
+ * blocks. Each block is
+ * reinitialized to dimension
+ * <tt>block_sizes[i]</tt>. Each of
+ * them stores
+ * <tt>local_sizes[i]</tt> elements
+ * on the present process.
+ *
+ * If the number of blocks is the
+ * same as before this function
+ * was called, all vectors remain
+ * the same and reinit() is
+ * called for each vector.
+ *
+ * If <tt>fast==false</tt>, the vector
+ * is filled with zeros.
+ *
+ * Note that you must call this
+ * (or the other reinit()
+ * functions) function, rather
+ * than calling the reinit()
+ * functions of an individual
+ * block, to allow the block
+ * vector to update its caches of
+ * vector sizes. If you call
+ * reinit() of one of the
+ * blocks, then subsequent
+ * actions on this object may
+ * yield unpredictable results
+ * since they may be routed to
+ * the wrong block.
+ */
+ void reinit (const std::vector<unsigned int> &block_sizes,
+ const MPI_Comm &communicator,
+ const std::vector<unsigned int> &local_sizes,
+ const bool fast=false);
+
+ void reinit (const std::vector<unsigned int> &block_sizes,
+ const MPI_Comm &communicator); // added by shuqiangwang
+
+ /**
+ * Change the dimension to that
+ * of the vector <tt>V</tt>. The same
+ * applies as for the other
+ * reinit() function.
+ *
+ * The elements of <tt>V</tt> are not
+ * copied, i.e. this function is
+ * the same as calling <tt>reinit
+ * (V.size(), fast)</tt>.
+ *
+ * Note that you must call this
+ * (or the other reinit()
+ * functions) function, rather
+ * than calling the reinit()
+ * functions of an individual
+ * block, to allow the block
+ * vector to update its caches of
+ * vector sizes. If you call
+ * reinit() on one of the
+ * blocks, then subsequent
+ * actions on this object may
+ * yield unpredictable results
+ * since they may be routed to
+ * the wrong block.
+ */
+ void reinit (const BlockVector &V,
+ const bool fast=false);
+
+ /**
+ * Return a reference to the MPI
+ * communicator object in use with
+ * this vector.
+ */
+ const MPI_Comm &get_mpi_communicator () const;
+
+ /**
+ * Swap the contents of this
+ * vector and the other vector
+ * <tt>v</tt>. One could do this
+ * operation with a temporary
+ * variable and copying over the
+ * data elements, but this
+ * function is significantly more
+ * efficient since it only swaps
+ * the pointers to the data of
+ * the two vectors and therefore
+ * does not need to allocate
+ * temporary storage and move
+ * data around.
+ *
+ * Limitation: right now this
+ * function only works if both
+ * vectors have the same number
+ * of blocks. If needed, the
+ * numbers of blocks should be
+ * exchanged, too.
+ *
+ * This function is analog to the
+ * the swap() function of all C++
+ * standard containers. Also,
+ * there is a global function
+ * swap(u,v) that simply calls
+ * <tt>u.swap(v)</tt>, again in analogy
+ * to standard functions.
+ */
+ void swap (BlockVector &v);
+
+ /**
+ * Print to a stream.
+ */
+ void print (std::ostream &out,
+ const unsigned int precision = 3,
+ const bool scientific = true,
+ const bool across = true) const;
+
+ /**
+ * Exception
+ */
+ DeclException0 (ExcIteratorRangeDoesNotMatchVectorSize);
+ /**
+ * Exception
+ */
+ DeclException0 (ExcNonMatchingBlockVectors);
+ };
+
+ /*@}*/
+
+ /*----------------------- Inline functions ----------------------------------*/
+
+
+ inline
+ BlockVector::BlockVector ()
+ {}
+
+
+
+ inline
+ BlockVector::BlockVector (const unsigned int n_blocks,
+ const MPI_Comm &communicator,
+ const unsigned int block_size,
+ const unsigned int local_size)
+ {
+ reinit (n_blocks, communicator, block_size, local_size);
+ }
+
+
+
+ inline
+ BlockVector::BlockVector (const std::vector<unsigned int> &block_sizes,
+ const MPI_Comm &communicator,
+ const std::vector<unsigned int> &local_elements)
+ {
+ reinit (block_sizes, communicator, local_elements, false);
+ }
+
+
+ inline
+ BlockVector::BlockVector (const BlockVector &v)
+ :
+ BlockVectorBase<Vector > ()
+ {
+ this->components.resize (v.n_blocks());
+ this->block_indices = v.block_indices;
+
+ for (unsigned int i=0; i<this->n_blocks(); ++i)
+ this->components[i] = v.components[i];
+
+ collect_sizes(); // shuqiangwang
+ }
+
+
+
+ inline
+ BlockVector &
+ BlockVector::operator = (const value_type s)
+ {
+ BaseClass::operator = (s);
+ return *this;
+ }
+
+
+
+ inline
+ BlockVector &
+ BlockVector::operator = (const BlockVector &v)
+ {
+ BaseClass::operator = (v);
+ return *this;
+ }
+
+
+
+ inline
+ BlockVector::~BlockVector ()
+ {}
+
+
+ inline
+ void
+ BlockVector::reinit (const unsigned int n_blocks,
+ const MPI_Comm &communicator,
+ const unsigned int block_size,
+ const unsigned int local_size,
+ const bool fast)
+ {
+ reinit(std::vector<unsigned int>(n_blocks, block_size),
+ communicator,
+ std::vector<unsigned int>(n_blocks, local_size),
+ fast);
+ }
+
+
+
+ inline
+ void
+ BlockVector::reinit (const std::vector<unsigned int> &block_sizes,
+ const MPI_Comm &communicator,
+ const std::vector<unsigned int> &local_sizes,
+ const bool fast)
+ {
+ this->block_indices.reinit (block_sizes);
+ if (this->components.size() != this->n_blocks())
+ this->components.resize(this->n_blocks());
+
+ for (unsigned int i=0; i<this->n_blocks(); ++i)
+ this->components[i].reinit(communicator, block_sizes[i],
+ local_sizes[i], fast);
+
+ collect_sizes(); // shuqiangwang
+ }
+
+ inline
+ void
+ BlockVector::reinit (const std::vector<unsigned int> &block_sizes,
+ const MPI_Comm &communicator) // added by shuqiangwang
+ {
+ this->block_indices.reinit (block_sizes);
+ if (this->components.size() != this->n_blocks())
+ this->components.resize(this->n_blocks());
+
+ collect_sizes(); // shuqiangwang
+ }
+
+ inline
+ void
+ BlockVector::reinit (const BlockVector &v,
+ const bool fast)
+ {
+ this->block_indices = v.get_block_indices();
+ if (this->components.size() != this->n_blocks())
+ this->components.resize(this->n_blocks());
+
+ for (unsigned int i=0; i<this->n_blocks(); ++i)
+ block(i).reinit(v.block(i), fast);
+
+ collect_sizes(); // shuqiangwang
+ }
+
+
+
+ inline
+ const MPI_Comm &
+ BlockVector::get_mpi_communicator () const
+ {
+ return block(0).get_mpi_communicator();
+ }
+
+
+
+ inline
+ void
+ BlockVector::swap (BlockVector &v)
+ {
+ Assert (this->n_blocks() == v.n_blocks(),
+ ExcDimensionMismatch(this->n_blocks(), v.n_blocks()));
+
+ for (unsigned int i=0; i<this->n_blocks(); ++i)
+ this->components[i].swap (v.components[i]);
+ ::dealii::swap (this->block_indices, v.block_indices);
+ }
+
+
+
+ inline
+ void
+ BlockVector::print (std::ostream &out,
+ const unsigned int precision,
+ const bool scientific,
+ const bool across) const
+ {
+ for (unsigned int i=0; i<this->n_blocks(); ++i)
+ {
+ if (across)
+ out << 'C' << i << ':';
+ else
+ out << "Component " << i << std::endl;
+ this->components[i].print(out, precision, scientific, across);
+ }
+ }
+
+
+
+ /**
+ * Global function which overloads the default implementation
+ * of the C++ standard library which uses a temporary object. The
+ * function simply exchanges the data of the two vectors.
+ *
+ * @relates PETScWrappers::MPI::BlockVector
+ * @author Wolfgang Bangerth, 2000
+ */
+ inline
+ void swap (BlockVector &u,
+ BlockVector &v)
+ {
+ u.swap (v);
+ }
+
+ }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_USE_PETSC
+
+#endif
Added: branches/s-wang2/for_deal.II/include/deal.II/lac/petsc_solver.h
===================================================================
--- branches/s-wang2/for_deal.II/include/deal.II/lac/petsc_solver.h (rev 0)
+++ branches/s-wang2/for_deal.II/include/deal.II/lac/petsc_solver.h 2012-11-29 05:55:56 UTC (rev 1394)
@@ -0,0 +1,1272 @@
+//---------------------------------------------------------------------------
+// $Id: petsc_solver.h 27666 2012-11-21 22:05:49Z bangerth $
+//
+// Copyright (C) 2004, 2005, 2006, 2007, 2009, 2010, 2012 by the deal.II authors
+//
+// This file is subject to QPL and may not be distributed
+// without copyright and license information. Please refer
+// to the file deal.II/doc/license.html for the text and
+// further information on this license.
+//
+//---------------------------------------------------------------------------
+#ifndef __deal2__petsc_solver_h
+#define __deal2__petsc_solver_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_USE_PETSC
+
+# include <deal.II/lac/exceptions.h>
+# include <deal.II/lac/solver_control.h>
+# include <deal.II/base/std_cxx1x/shared_ptr.h>
+
+# include <petscksp.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+namespace PETScWrappers
+{
+ // forward declarations
+ class MatrixBase;
+ class VectorBase;
+ class PreconditionerBase;
+
+
+ /**
+ * Base class for solver classes using the PETSc solvers. Since solvers in
+ * PETSc are selected based on flags passed to a generic solver object,
+ * basically all the actual solver calls happen in this class, and derived
+ * classes simply set the right flags to select one solver or another, or to
+ * set certain parameters for individual solvers.
+ *
+ * Optionally, the user can create a solver derived from the
+ * SolverBase class and can set the default arguments necessary to
+ * solve the linear system of equations with SolverControl. These
+ * default options can be overridden by specifying command line
+ * arguments of the form @p -ksp_*. For example,
+ * @p -ksp_monitor_true_residual prints out true residual norm
+ * (unpreconditioned) at each iteration and @p -ksp_view provides
+ * information about the linear solver and the preconditioner used in
+ * the current context. The type of the solver can also be changed
+ * during runtime by specifying @p -ksp_type {richardson, cg, gmres,
+ * fgmres, ..} to dynamically test the optimal solver along with a
+ * suitable preconditioner set using @p -pc_type {jacobi, bjacobi,
+ * ilu, lu, ..}. There are several other command line options
+ * available to modify the behavior of the PETSc linear solver and can
+ * be obtained from the <a
+ * href="http://www.mcs.anl.gov/petsc">documentation and manual
+ * pages</a>.
+ *
+ * @note Repeated calls to solve() on a solver object with a Preconditioner
+ * must be used with care. The preconditioner is initialized in the first call
+ * to solve() and subsequent calls reuse the solver and preconditioner
+ * object. This is done for performance reasons. The solver and preconditioner
+ * can be reset by calling reset().
+ *
+ * One of the gotchas of PETSc is that -- in particular in MPI mode -- it
+ * often does not produce very helpful error messages. In order to save
+ * other users some time in searching a hard to track down error, here is
+ * one situation and the error message one gets there:
+ * when you don't specify an MPI communicator to your solver's constructor. In
+ * this case, you will get an error of the following form from each of your
+ * parallel processes:
+ * @verbatim
+ * [1]PETSC ERROR: PCSetVector() line 1173 in src/ksp/pc/interface/precon.c
+ * [1]PETSC ERROR: Arguments must have same communicators!
+ * [1]PETSC ERROR: Different communicators in the two objects: Argument # 1 and 2!
+ * [1]PETSC ERROR: KSPSetUp() line 195 in src/ksp/ksp/interface/itfunc.c
+ * @endverbatim
+ *
+ * This error, on which one can spend a very long time figuring out
+ * what exactly goes wrong, results from not specifying an MPI
+ * communicator. Note that the communicator @em must match that of the
+ * matrix and all vectors in the linear system which we want to
+ * solve. Aggravating the situation is the fact that the default
+ * argument to the solver classes, @p PETSC_COMM_SELF, is the
+ * appropriate argument for the sequential case (which is why it is
+ * the default argument), so this error only shows up in parallel
+ * mode.
+ *
+ * @ingroup PETScWrappers
+ * @author Wolfgang Bangerth, 2004
+ */
+ class SolverBase
+ {
+ public:
+ /**
+ * Constructor. Takes the solver
+ * control object and the MPI
+ * communicator over which parallel
+ * computations are to happen.
+ *
+ * Note that the communicator used here
+ * must match the communicator used in
+ * the system matrix, solution, and
+ * right hand side object of the solve
+ * to be done with this
+ * solver. Otherwise, PETSc will
+ * generate hard to track down errors,
+ * see the documentation of the
+ * SolverBase class.
+ */
+ SolverBase (SolverControl &cn,
+ const MPI_Comm &mpi_communicator);
+
+ /**
+ * Destructor.
+ */
+ virtual ~SolverBase ();
+
+ /**
+ * Solve the linear system
+ * <tt>Ax=b</tt>. Depending on the
+ * information provided by derived
+ * classes and the object passed as a
+ * preconditioner, one of the linear
+ * solvers and preconditioners of PETSc
+ * is chosen. Repeated calls to
+ * solve() do not reconstruct the
+ * preconditioner for performance
+ * reasons. See class Documentation.
+ */
+ void
+ solve (const MatrixBase &A,
+ VectorBase &x,
+ const VectorBase &b,
+ const PreconditionerBase &preconditioner);
+
+
+ /**
+ * Resets the contained preconditioner
+ * and solver object. See class
+ * description for more details.
+ */
+ virtual void reset();
+
+
+ /**
+ * Sets a prefix name for the solver
+ * object. Useful when customizing the
+ * PETSc KSP object with command-line
+ * options.
+ */
+ void set_prefix(const std::string &prefix);
+
+
+ /**
+ * Access to object that controls
+ * convergence.
+ */
+ SolverControl &control() const;
+
+ /**
+ * Exception
+ */
+ DeclException1 (ExcPETScError,
+ int,
+ << "An error with error number " << arg1
+ << " occurred while calling a PETSc function");
+
+ protected:
+
+ /**
+ * Reference to the object that
+ * controls convergence of the
+ * iterative solver. In fact, for these
+ * PETSc wrappers, PETSc does so
+ * itself, but we copy the data from
+ * this object before starting the
+ * solution process, and copy the data
+ * back into it afterwards.
+ */
+ SolverControl &solver_control;
+
+ /**
+ * Copy of the MPI communicator object
+ * to be used for the solver.
+ */
+ const MPI_Comm mpi_communicator;
+
+ /**
+ * Function that takes a Krylov
+ * Subspace Solver context object, and
+ * sets the type of solver that is
+ * requested by the derived class.
+ */
+ virtual void set_solver_type (KSP &ksp) const = 0;
+
+ /**
+ * Solver prefix name to qualify options
+ * specific to the PETSc KSP object in the
+ * current context.
+ * Note: A hyphen (-) must NOT be given
+ * at the beginning of the prefix name.
+ * The first character of all runtime
+ * options is AUTOMATICALLY the hyphen.
+ */
+ std::string prefix_name;
+
+ private:
+ /**
+ * A function that is used in PETSc as
+ * a callback to check on
+ * convergence. It takes the
+ * information provided from PETSc and
+ * checks it against deal.II's own
+ * SolverControl objects to see if
+ * convergence has been reached.
+ */
+ static
+#ifdef PETSC_USE_64BIT_INDICES
+ PetscErrorCode
+#else
+ int
+#endif
+ convergence_test (KSP ksp,
+#ifdef PETSC_USE_64BIT_INDICES
+ const PetscInt iteration,
+#else
+ const int iteration,
+#endif
+ const PetscReal residual_norm,
+ KSPConvergedReason *reason,
+ void *solver_control);
+
+ /**
+ * A structure that contains the PETSc
+ * solver and preconditioner
+ * objects. This object is preserved
+ * between subsequent calls to the
+ * solver if the same preconditioner is
+ * used as in the previous solver
+ * step. This may save some computation
+ * time, if setting up a preconditioner
+ * is expensive, such as in the case of
+ * an ILU for example.
+ *
+ * The actual declaration of this class
+ * is complicated by the fact that
+ * PETSc changed its solver interface
+ * completely and incompatibly between
+ * versions 2.1.6 and 2.2.0 :-(
+ *
+ * Objects of this type are explicitly
+ * created, but are destroyed when the
+ * surrounding solver object goes out
+ * of scope, or when we assign a new
+ * value to the pointer to this
+ * object. The respective *Destroy
+ * functions are therefore written into
+ * the destructor of this object, even
+ * though the object does not have a
+ * constructor.
+ */
+ struct SolverData
+ {
+ /**
+ * Destructor
+ */
+ ~SolverData ();
+
+ /**
+ * Objects for Krylov subspace
+ * solvers and preconditioners.
+ */
+ KSP ksp;
+ PC pc;
+ };
+
+ /**
+ * Pointer to an object that stores the
+ * solver context. This is recreated in
+ * the main solver routine if
+ * necessary.
+ */
+ std_cxx1x::shared_ptr<SolverData> solver_data;
+ };
+
+
+
+ /**
+ * An implementation of the solver interface using the PETSc Richardson
+ * solver.
+ *
+ * @ingroup PETScWrappers
+ * @author Wolfgang Bangerth, 2004
+ */
+ class SolverRichardson : public SolverBase
+ {
+ public:
+ /**
+ * Standardized data struct to
+ * pipe additional data to the
+ * solver.
+ */
+ struct AdditionalData
+ {
+ /**
+ * Constructor. By default,
+ * set the damping parameter
+ * to one.
+ */
+ AdditionalData (const double omega = 1);
+
+ /**
+ * Relaxation parameter.
+ */
+ double omega;
+ };
+
+ /**
+ * Constructor. In contrast to
+ * deal.II's own solvers, there is no
+ * need to give a vector memory
+ * object. However, PETSc solvers want
+ * to have an MPI communicator context
+ * over which computations are
+ * parallelized. By default,
+ * @p PETSC_COMM_SELF is used here,
+ * but you can change this. Note that
+ * for single processor (non-MPI)
+ * versions, this parameter does not
+ * have any effect.
+ *
+ * The last argument takes a structure
+ * with additional, solver dependent
+ * flags for tuning.
+ *
+ * Note that the communicator used here
+ * must match the communicator used in
+ * the system matrix, solution, and
+ * right hand side object of the solve
+ * to be done with this
+ * solver. Otherwise, PETSc will
+ * generate hard to track down errors,
+ * see the documentation of the
+ * SolverBase class.
+ */
+ SolverRichardson (SolverControl &cn,
+ const MPI_Comm &mpi_communicator = PETSC_COMM_SELF,
+ const AdditionalData &data = AdditionalData());
+
+ protected:
+ /**
+ * Store a copy of the flags for this
+ * particular solver.
+ */
+ const AdditionalData additional_data;
+
+ /**
+ * Function that takes a Krylov
+ * Subspace Solver context object, and
+ * sets the type of solver that is
+ *appropriate for this class.
+ */
+ virtual void set_solver_type (KSP &ksp) const;
+ };
+
+
+
+ /**
+ * An implementation of the solver interface using the PETSc Chebychev
+ * solver.
+ *
+ * @ingroup PETScWrappers
+ * @author Wolfgang Bangerth, 2004
+ */
+ class SolverChebychev : public SolverBase
+ {
+ public:
+ /**
+ * Standardized data struct to
+ * pipe additional data to the
+ * solver.
+ */
+ struct AdditionalData
+ {};
+
+ /**
+ * Constructor. In contrast to
+ * deal.II's own solvers, there is no
+ * need to give a vector memory
+ * object. However, PETSc solvers want
+ * to have an MPI communicator context
+ * over which computations are
+ * parallelized. By default,
+ * @p PETSC_COMM_SELF is used here,
+ * but you can change this. Note that
+ * for single processor (non-MPI)
+ * versions, this parameter does not
+ * have any effect.
+ *
+ * The last argument takes a structure
+ * with additional, solver dependent
+ * flags for tuning.
+ *
+ * Note that the communicator used here
+ * must match the communicator used in
+ * the system matrix, solution, and
+ * right hand side object of the solve
+ * to be done with this
+ * solver. Otherwise, PETSc will
+ * generate hard to track down errors,
+ * see the documentation of the
+ * SolverBase class.
+ */
+ SolverChebychev (SolverControl &cn,
+ const MPI_Comm &mpi_communicator = PETSC_COMM_SELF,
+ const AdditionalData &data = AdditionalData());
+
+ protected:
+ /**
+ * Store a copy of the flags for this
+ * particular solver.
+ */
+ const AdditionalData additional_data;
+
+ /**
+ * Function that takes a Krylov
+ * Subspace Solver context object, and
+ * sets the type of solver that is
+ *appropriate for this class.
+ */
+ virtual void set_solver_type (KSP &ksp) const;
+ };
+
+
+
+ /**
+ * An implementation of the solver interface using the PETSc CG
+ * solver.
+ *
+ * @ingroup PETScWrappers
+ * @author Wolfgang Bangerth, 2004
+ */
+ class SolverCG : public SolverBase
+ {
+ public:
+ /**
+ * Standardized data struct to
+ * pipe additional data to the
+ * solver.
+ */
+ struct AdditionalData
+ {};
+
+ /**
+ * Constructor. In contrast to
+ * deal.II's own solvers, there is no
+ * need to give a vector memory
+ * object. However, PETSc solvers want
+ * to have an MPI communicator context
+ * over which computations are
+ * parallelized. By default,
+ * @p PETSC_COMM_SELF is used here,
+ * but you can change this. Note that
+ * for single processor (non-MPI)
+ * versions, this parameter does not
+ * have any effect.
+ *
+ * The last argument takes a structure
+ * with additional, solver dependent
+ * flags for tuning.
+ *
+ * Note that the communicator used here
+ * must match the communicator used in
+ * the system matrix, solution, and
+ * right hand side object of the solve
+ * to be done with this
+ * solver. Otherwise, PETSc will
+ * generate hard to track down errors,
+ * see the documentation of the
+ * SolverBase class.
+ */
+ SolverCG (SolverControl &cn,
+ const MPI_Comm &mpi_communicator = PETSC_COMM_SELF,
+ const AdditionalData &data = AdditionalData());
+
+ protected:
+ /**
+ * Store a copy of the flags for this
+ * particular solver.
+ */
+ const AdditionalData additional_data;
+
+ /**
+ * Function that takes a Krylov
+ * Subspace Solver context object, and
+ * sets the type of solver that is
+ *appropriate for this class.
+ */
+ virtual void set_solver_type (KSP &ksp) const;
+ };
+
+
+
+ /**
+ * An implementation of the solver interface using the PETSc BiCG
+ * solver.
+ *
+ * @ingroup PETScWrappers
+ * @author Wolfgang Bangerth, 2004
+ */
+ class SolverBiCG : public SolverBase
+ {
+ public:
+ /**
+ * Standardized data struct to
+ * pipe additional data to the
+ * solver.
+ */
+ struct AdditionalData
+ {};
+
+ /**
+ * Constructor. In contrast to
+ * deal.II's own solvers, there is no
+ * need to give a vector memory
+ * object. However, PETSc solvers want
+ * to have an MPI communicator context
+ * over which computations are
+ * parallelized. By default,
+ * @p PETSC_COMM_SELF is used here,
+ * but you can change this. Note that
+ * for single processor (non-MPI)
+ * versions, this parameter does not
+ * have any effect.
+ *
+ * The last argument takes a structure
+ * with additional, solver dependent
+ * flags for tuning.
+ *
+ * Note that the communicator used here
+ * must match the communicator used in
+ * the system matrix, solution, and
+ * right hand side object of the solve
+ * to be done with this
+ * solver. Otherwise, PETSc will
+ * generate hard to track down errors,
+ * see the documentation of the
+ * SolverBase class.
+ */
+ SolverBiCG (SolverControl &cn,
+ const MPI_Comm &mpi_communicator = PETSC_COMM_SELF,
+ const AdditionalData &data = AdditionalData());
+
+ protected:
+ /**
+ * Store a copy of the flags for this
+ * particular solver.
+ */
+ const AdditionalData additional_data;
+
+ /**
+ * Function that takes a Krylov
+ * Subspace Solver context object, and
+ * sets the type of solver that is
+ *appropriate for this class.
+ */
+ virtual void set_solver_type (KSP &ksp) const;
+ };
+
+
+
+ /**
+ * An implementation of the solver interface using the PETSc GMRES
+ * solver.
+ *
+ * @author Wolfgang Bangerth, 2004
+ */
+ class SolverGMRES : public SolverBase
+ {
+ public:
+ /**
+ * Standardized data struct to
+ * pipe additional data to the
+ * solver.
+ */
+ struct AdditionalData
+ {
+ /**
+ * Constructor. By default, set the
+ * number of temporary vectors to
+ * 30, i.e. do a restart every 30
+ * iterations.
+ */
+ AdditionalData (const unsigned int restart_parameter = 30,
+ const bool right_preconditioning = false);
+
+ /**
+ * Maximum number of
+ * tmp vectors.
+ */
+ unsigned int restart_parameter;
+
+ /**
+ * Flag for right
+ * preconditioning.
+ */
+ bool right_preconditioning;
+ };
+
+ /**
+ * Constructor. In contrast to
+ * deal.II's own solvers, there is no
+ * need to give a vector memory
+ * object. However, PETSc solvers want
+ * to have an MPI communicator context
+ * over which computations are
+ * parallelized. By default,
+ * @p PETSC_COMM_SELF is used here,
+ * but you can change this. Note that
+ * for single processor (non-MPI)
+ * versions, this parameter does not
+ * have any effect.
+ *
+ * The last argument takes a structure
+ * with additional, solver dependent
+ * flags for tuning.
+ *
+ * Note that the communicator used here
+ * must match the communicator used in
+ * the system matrix, solution, and
+ * right hand side object of the solve
+ * to be done with this
+ * solver. Otherwise, PETSc will
+ * generate hard to track down errors,
+ * see the documentation of the
+ * SolverBase class.
+ */
+ SolverGMRES (SolverControl &cn,
+ const MPI_Comm &mpi_communicator = PETSC_COMM_SELF,
+ const AdditionalData &data = AdditionalData());
+
+ protected:
+ /**
+ * Store a copy of the flags for this
+ * particular solver.
+ */
+ const AdditionalData additional_data;
+
+ /**
+ * Function that takes a Krylov
+ * Subspace Solver context object, and
+ * sets the type of solver that is
+ *appropriate for this class.
+ */
+ virtual void set_solver_type (KSP &ksp) const;
+ };
+
+
+
+ /**
+ * An implementation of the solver interface using the PETSc BiCGStab
+ * solver.
+ *
+ * @ingroup PETScWrappers
+ * @author Wolfgang Bangerth, 2004
+ */
+ class SolverBicgstab : public SolverBase
+ {
+ public:
+ /**
+ * Standardized data struct to
+ * pipe additional data to the
+ * solver.
+ */
+ struct AdditionalData
+ {};
+
+ /**
+ * Constructor. In contrast to
+ * deal.II's own solvers, there is no
+ * need to give a vector memory
+ * object. However, PETSc solvers want
+ * to have an MPI communicator context
+ * over which computations are
+ * parallelized. By default,
+ * @p PETSC_COMM_SELF is used here,
+ * but you can change this. Note that
+ * for single processor (non-MPI)
+ * versions, this parameter does not
+ * have any effect.
+ *
+ * The last argument takes a structure
+ * with additional, solver dependent
+ * flags for tuning.
+ *
+ * Note that the communicator used here
+ * must match the communicator used in
+ * the system matrix, solution, and
+ * right hand side object of the solve
+ * to be done with this
+ * solver. Otherwise, PETSc will
+ * generate hard to track down errors,
+ * see the documentation of the
+ * SolverBase class.
+ */
+ SolverBicgstab (SolverControl &cn,
+ const MPI_Comm &mpi_communicator = PETSC_COMM_SELF,
+ const AdditionalData &data = AdditionalData());
+
+ protected:
+ /**
+ * Store a copy of the flags for this
+ * particular solver.
+ */
+ const AdditionalData additional_data;
+
+ /**
+ * Function that takes a Krylov
+ * Subspace Solver context object, and
+ * sets the type of solver that is
+ *appropriate for this class.
+ */
+ virtual void set_solver_type (KSP &ksp) const;
+ };
+
+ /**
+ * An implementation of the solver interface using the PETSc CG Squared
+ * solver.
+ *
+ * @ingroup PETScWrappers
+ * @author Wolfgang Bangerth, 2004
+ */
+ class SolverCGS : public SolverBase
+ {
+ public:
+ /**
+ * Standardized data struct to
+ * pipe additional data to the
+ * solver.
+ */
+ struct AdditionalData
+ {};
+
+ /**
+ * Constructor. In contrast to
+ * deal.II's own solvers, there is no
+ * need to give a vector memory
+ * object. However, PETSc solvers want
+ * to have an MPI communicator context
+ * over which computations are
+ * parallelized. By default,
+ * @p PETSC_COMM_SELF is used here,
+ * but you can change this. Note that
+ * for single processor (non-MPI)
+ * versions, this parameter does not
+ * have any effect.
+ *
+ * The last argument takes a structure
+ * with additional, solver dependent
+ * flags for tuning.
+ *
+ * Note that the communicator used here
+ * must match the communicator used in
+ * the system matrix, solution, and
+ * right hand side object of the solve
+ * to be done with this
+ * solver. Otherwise, PETSc will
+ * generate hard to track down errors,
+ * see the documentation of the
+ * SolverBase class.
+ */
+ SolverCGS (SolverControl &cn,
+ const MPI_Comm &mpi_communicator = PETSC_COMM_SELF,
+ const AdditionalData &data = AdditionalData());
+
+ protected:
+ /**
+ * Store a copy of the flags for this
+ * particular solver.
+ */
+ const AdditionalData additional_data;
+
+ /**
+ * Function that takes a Krylov
+ * Subspace Solver context object, and
+ * sets the type of solver that is
+ *appropriate for this class.
+ */
+ virtual void set_solver_type (KSP &ksp) const;
+ };
+
+
+
+ /**
+ * An implementation of the solver interface using the PETSc TFQMR
+ * solver.
+ *
+ * @ingroup PETScWrappers
+ * @author Wolfgang Bangerth, 2004
+ */
+ class SolverTFQMR : public SolverBase
+ {
+ public:
+ /**
+ * Standardized data struct to
+ * pipe additional data to the
+ * solver.
+ */
+ struct AdditionalData
+ {};
+
+ /**
+ * Constructor. In contrast to
+ * deal.II's own solvers, there is no
+ * need to give a vector memory
+ * object. However, PETSc solvers want
+ * to have an MPI communicator context
+ * over which computations are
+ * parallelized. By default,
+ * @p PETSC_COMM_SELF is used here,
+ * but you can change this. Note that
+ * for single processor (non-MPI)
+ * versions, this parameter does not
+ * have any effect.
+ *
+ * The last argument takes a structure
+ * with additional, solver dependent
+ * flags for tuning.
+ *
+ * Note that the communicator used here
+ * must match the communicator used in
+ * the system matrix, solution, and
+ * right hand side object of the solve
+ * to be done with this
+ * solver. Otherwise, PETSc will
+ * generate hard to track down errors,
+ * see the documentation of the
+ * SolverBase class.
+ */
+ SolverTFQMR (SolverControl &cn,
+ const MPI_Comm &mpi_communicator = PETSC_COMM_SELF,
+ const AdditionalData &data = AdditionalData());
+
+ protected:
+ /**
+ * Store a copy of the flags for this
+ * particular solver.
+ */
+ const AdditionalData additional_data;
+
+ /**
+ * Function that takes a Krylov
+ * Subspace Solver context object, and
+ * sets the type of solver that is
+ *appropriate for this class.
+ */
+ virtual void set_solver_type (KSP &ksp) const;
+ };
+
+
+
+
+ /**
+ * An implementation of the solver interface using the PETSc TFQMR-2 solver
+ * (called TCQMR in PETSc). Note that this solver had a serious bug in
+ * versions up to and including PETSc 2.1.6, in that it did not check
+ * convergence and always returned an error code. Thus, this class will abort
+ * with an error indicating failure to converge with PETSc 2.1.6 and
+ * prior. This should be fixed in later versions of PETSc, though.
+ *
+ * @ingroup PETScWrappers
+ * @author Wolfgang Bangerth, 2004
+ */
+ class SolverTCQMR : public SolverBase
+ {
+ public:
+ /**
+ * Standardized data struct to
+ * pipe additional data to the
+ * solver.
+ */
+ struct AdditionalData
+ {};
+
+ /**
+ * Constructor. In contrast to
+ * deal.II's own solvers, there is no
+ * need to give a vector memory
+ * object. However, PETSc solvers want
+ * to have an MPI communicator context
+ * over which computations are
+ * parallelized. By default,
+ * @p PETSC_COMM_SELF is used here,
+ * but you can change this. Note that
+ * for single processor (non-MPI)
+ * versions, this parameter does not
+ * have any effect.
+ *
+ * The last argument takes a structure
+ * with additional, solver dependent
+ * flags for tuning.
+ *
+ * Note that the communicator used here
+ * must match the communicator used in
+ * the system matrix, solution, and
+ * right hand side object of the solve
+ * to be done with this
+ * solver. Otherwise, PETSc will
+ * generate hard to track down errors,
+ * see the documentation of the
+ * SolverBase class.
+ */
+ SolverTCQMR (SolverControl &cn,
+ const MPI_Comm &mpi_communicator = PETSC_COMM_SELF,
+ const AdditionalData &data = AdditionalData());
+
+ protected:
+ /**
+ * Store a copy of the flags for this
+ * particular solver.
+ */
+ const AdditionalData additional_data;
+
+ /**
+ * Function that takes a Krylov
+ * Subspace Solver context object, and
+ * sets the type of solver that is
+ *appropriate for this class.
+ */
+ virtual void set_solver_type (KSP &ksp) const;
+ };
+
+
+
+ /**
+ * An implementation of the solver interface using the PETSc CR
+ * solver.
+ *
+ * @ingroup PETScWrappers
+ * @author Wolfgang Bangerth, 2004
+ */
+ class SolverCR : public SolverBase
+ {
+ public:
+ /**
+ * Standardized data struct to
+ * pipe additional data to the
+ * solver.
+ */
+ struct AdditionalData
+ {};
+
+ /**
+ * Constructor. In contrast to
+ * deal.II's own solvers, there is no
+ * need to give a vector memory
+ * object. However, PETSc solvers want
+ * to have an MPI communicator context
+ * over which computations are
+ * parallelized. By default,
+ * @p PETSC_COMM_SELF is used here,
+ * but you can change this. Note that
+ * for single processor (non-MPI)
+ * versions, this parameter does not
+ * have any effect.
+ *
+ * The last argument takes a structure
+ * with additional, solver dependent
+ * flags for tuning.
+ *
+ * Note that the communicator used here
+ * must match the communicator used in
+ * the system matrix, solution, and
+ * right hand side object of the solve
+ * to be done with this
+ * solver. Otherwise, PETSc will
+ * generate hard to track down errors,
+ * see the documentation of the
+ * SolverBase class.
+ */
+ SolverCR (SolverControl &cn,
+ const MPI_Comm &mpi_communicator = PETSC_COMM_SELF,
+ const AdditionalData &data = AdditionalData());
+
+ protected:
+ /**
+ * Store a copy of the flags for this
+ * particular solver.
+ */
+ const AdditionalData additional_data;
+
+ /**
+ * Function that takes a Krylov
+ * Subspace Solver context object, and
+ * sets the type of solver that is
+ *appropriate for this class.
+ */
+ virtual void set_solver_type (KSP &ksp) const;
+ };
+
+
+
+ /**
+ * An implementation of the solver interface using the PETSc Least Squares
+ * solver.
+ *
+ * @ingroup PETScWrappers
+ * @author Wolfgang Bangerth, 2004
+ */
+ class SolverLSQR : public SolverBase
+ {
+ public:
+ /**
+ * Standardized data struct to
+ * pipe additional data to the
+ * solver.
+ */
+ struct AdditionalData
+ {};
+
+ /**
+ * Constructor. In contrast to
+ * deal.II's own solvers, there is no
+ * need to give a vector memory
+ * object. However, PETSc solvers want
+ * to have an MPI communicator context
+ * over which computations are
+ * parallelized. By default,
+ * @p PETSC_COMM_SELF is used here,
+ * but you can change this. Note that
+ * for single processor (non-MPI)
+ * versions, this parameter does not
+ * have any effect.
+ *
+ * The last argument takes a structure
+ * with additional, solver dependent
+ * flags for tuning.
+ *
+ * Note that the communicator used here
+ * must match the communicator used in
+ * the system matrix, solution, and
+ * right hand side object of the solve
+ * to be done with this
+ * solver. Otherwise, PETSc will
+ * generate hard to track down errors,
+ * see the documentation of the
+ * SolverBase class.
+ */
+ SolverLSQR (SolverControl &cn,
+ const MPI_Comm &mpi_communicator = PETSC_COMM_SELF,
+ const AdditionalData &data = AdditionalData());
+
+ protected:
+ /**
+ * Store a copy of the flags for this
+ * particular solver.
+ */
+ const AdditionalData additional_data;
+
+ /**
+ * Function that takes a Krylov
+ * Subspace Solver context object, and
+ * sets the type of solver that is
+ *appropriate for this class.
+ */
+ virtual void set_solver_type (KSP &ksp) const;
+ };
+
+
+ /**
+ * An implementation of the solver interface using the PETSc PREONLY
+ * solver. Actually this is NOT a real solution algorithm. solve() only
+ * applies the preconditioner once and returns immediately. Its only purpose
+ * is to provide a solver object, when the preconditioner should be used as a
+ * real solver. It is very useful in conjunction with the complete LU
+ * decomposition preconditioner <tt> PreconditionLU </tt>, which in
+ * conjunction with this solver class becomes a direct solver.
+ *
+ * @ingroup PETScWrappers
+ * @author Wolfgang Bangerth, 2004, Oliver Kayser-Herold, 2004
+ */
+ class SolverPreOnly : public SolverBase
+ {
+ public:
+ /**
+ * Standardized data struct to
+ * pipe additional data to the
+ * solver.
+ */
+ struct AdditionalData
+ {};
+
+ /**
+ * Constructor. In contrast to
+ * deal.II's own solvers, there is no
+ * need to give a vector memory
+ * object. However, PETSc solvers want
+ * to have an MPI communicator context
+ * over which computations are
+ * parallelized. By default,
+ * @p PETSC_COMM_SELF is used here,
+ * but you can change this. Note that
+ * for single processor (non-MPI)
+ * versions, this parameter does not
+ * have any effect.
+ *
+ * The last argument takes a structure
+ * with additional, solver dependent
+ * flags for tuning.
+ *
+ * Note that the communicator used here
+ * must match the communicator used in
+ * the system matrix, solution, and
+ * right hand side object of the solve
+ * to be done with this
+ * solver. Otherwise, PETSc will
+ * generate hard to track down errors,
+ * see the documentation of the
+ * SolverBase class.
+ */
+ SolverPreOnly (SolverControl &cn,
+ const MPI_Comm &mpi_communicator = PETSC_COMM_SELF,
+ const AdditionalData &data = AdditionalData());
+
+ protected:
+ /**
+ * Store a copy of the flags for this
+ * particular solver.
+ */
+ const AdditionalData additional_data;
+
+ /**
+ * Function that takes a Krylov
+ * Subspace Solver context object, and
+ * sets the type of solver that is
+ * appropriate for this class.
+ */
+ virtual void set_solver_type (KSP &ksp) const;
+ };
+
+ /**
+ * An implementation of the solver interface using the sparse direct MUMPS
+ * solver through PETSc. This class has the usual interface of all other
+ * solver classes but it is of course different in that it doesn't implement
+ * an iterative solver. As a consequence, things like the SolverControl object
+ * have no particular meaning here.
+ *
+ * MUMPS allows to make use of symmetry in this matrix. In this class this is
+ * made possible by the set_symmetric_mode() function. If your matrix is
+ * symmetric, you can use this class as follows:
+ * @code
+ * SolverControl cn;
+ * PETScWrappers::SparseDirectMUMPS solver(cn, mpi_communicator);
+ * solver.set_symmetric_mode(true);
+ * solver.solve(system_matrix, solution, system_rhs);
+ * @endcode
+ *
+ * @note The class internally calls KSPSetFromOptions thus you are
+ * able to use all the PETSc parameters for MATSOLVERMUMPS package.
+ * See http://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/Mat/MATSOLVERMUMPS.html
+ *
+ * @ingroup PETScWrappers
+ * @author Daniel Brauss, Alexander Grayver, 2012
+ */
+ class SparseDirectMUMPS : public SolverBase
+ {
+ public:
+ /**
+ * Standardized data structure
+ * to pipe additional data to
+ * the solver.
+ */
+ struct AdditionalData
+ {};
+ /**
+ * Constructor
+ */
+ SparseDirectMUMPS (SolverControl &cn,
+ const MPI_Comm &mpi_communicator = PETSC_COMM_SELF,
+ const AdditionalData &data = AdditionalData());
+
+ /**
+ * The method to solve the
+ * linear system.
+ */
+ void solve (const MatrixBase &A,
+ VectorBase &x,
+ const VectorBase &b);
+
+ /**
+ * The method allows to take advantage
+ * if the system matrix is symmetric by
+ * using LDL^T decomposition unstead of
+ * more expensive LU. The argument
+ * indicates whether the matrix is
+ * symmetric or not.
+ */
+ void set_symmetric_mode (const bool flag);
+
+ protected:
+ /**
+ * Store a copy of flags for this
+ * particular solver.
+ */
+ const AdditionalData additional_data;
+
+ virtual void set_solver_type (KSP &ksp) const;
+
+ private:
+ /**
+ * A function that is used in PETSc
+ * as a callback to check convergence.
+ * It takes the information provided
+ * from PETSc and checks it against
+ * deal.II's own SolverControl objects
+ * to see if convergence has been reached.
+ */
+ static
+#ifdef PETSC_USE_64BIT_INDICES
+ PetscErrorCode
+#else
+ int
+#endif
+ convergence_test (KSP ksp,
+#ifdef PETSC_USE_64BIT_INDICES
+ const PetscInt iteration,
+#else
+ const int iteration,
+#endif
+ const PetscReal residual_norm,
+ KSPConvergedReason *reason,
+ void *solver_control);
+ /**
+ * A structure that contains the
+ * PETSc solver and preconditioner
+ * objects. Since the solve member
+ * function in the base is not used
+ * here, the private SolverData struct
+ * located in the base could not be used
+ * either
+ */
+ struct SolverDataMUMPS
+ {
+ KSP ksp;
+ PC pc;
+ };
+
+ std_cxx1x::shared_ptr<SolverDataMUMPS> solver_data;
+
+ /**
+ * Flag specifies whether matrix
+ * being factorized is symmetric
+ * or not. It influences the type
+ * of the used preconditioner
+ * (PCLU or PCCHOLESKY)
+ */
+ bool symmetric_mode;
+ };
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_USE_PETSC
+
+/*---------------------------- petsc_solver.h ---------------------------*/
+
+#endif
+/*---------------------------- petsc_solver.h ---------------------------*/
Added: branches/s-wang2/for_deal.II/include/deal.II/lac/trilinos_sparse_matrix.h
===================================================================
--- branches/s-wang2/for_deal.II/include/deal.II/lac/trilinos_sparse_matrix.h (rev 0)
+++ branches/s-wang2/for_deal.II/include/deal.II/lac/trilinos_sparse_matrix.h 2012-11-29 05:55:56 UTC (rev 1394)
@@ -0,0 +1,3483 @@
+//---------------------------------------------------------------------------
+// $Id: trilinos_sparse_matrix.h 27628 2012-11-20 22:49:26Z heister $
+//
+// Copyright (C) 2008, 2009, 2010, 2011, 2012 by the deal.II authors
+//
+// This file is subject to QPL and may not be distributed
+// without copyright and license information. Please refer
+// to the file deal.II/doc/license.html for the text and
+// further information on this license.
+//
+//---------------------------------------------------------------------------
+#ifndef __deal2__trilinos_sparse_matrix_h
+#define __deal2__trilinos_sparse_matrix_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_USE_TRILINOS
+
+# include <deal.II/base/std_cxx1x/shared_ptr.h>
+# include <deal.II/base/subscriptor.h>
+# include <deal.II/base/index_set.h>
+# include <deal.II/lac/full_matrix.h>
+# include <deal.II/lac/exceptions.h>
+# include <deal.II/lac/trilinos_vector_base.h>
+# include <deal.II/lac/parallel_vector.h>
+
+# include <vector>
+# include <cmath>
+# include <memory>
+
+# define TrilinosScalar double
+# include <Epetra_FECrsMatrix.h>
+# include <Epetra_Map.h>
+# include <Epetra_CrsGraph.h>
+# include <Epetra_Vector.h>
+# ifdef DEAL_II_COMPILER_SUPPORTS_MPI
+# include <Epetra_MpiComm.h>
+# include "mpi.h"
+# else
+# include "Epetra_SerialComm.h"
+# endif
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declarations
+template <typename MatrixType> class BlockMatrixBase;
+
+template <typename number> class SparseMatrix;
+class SparsityPattern;
+
+namespace TrilinosWrappers
+{
+ // forward declarations
+ class VectorBase;
+ class SparseMatrix;
+ class SparsityPattern;
+
+ /**
+ * Iterators for Trilinos matrices
+ */
+ namespace MatrixIterators
+ {
+ /**
+ * STL conforming iterator. This class acts as an iterator walking
+ * over the elements of Trilinos matrices. The implementation of this
+ * class is similar to the one for PETSc matrices.
+ *
+ * Note that Trilinos stores the elements within each row in ascending
+ * order. This is opposed to the deal.II sparse matrix style where the
+ * diagonal element (if it exists) is stored before all other values, and
+ * the PETSc sparse matrices, where one can't guarantee a certain order of
+ * the elements.
+ *
+ * @ingroup TrilinosWrappers
+ * @author Martin Kronbichler, Wolfgang Bangerth, 2008
+ */
+ class const_iterator
+ {
+ private:
+ /**
+ * Accessor class for iterators
+ */
+ class Accessor
+ {
+ public:
+ /**
+ * Constructor. Since we use
+ * accessors only for read
+ * access, a const matrix
+ * pointer is sufficient.
+ */
+ Accessor (const SparseMatrix *matrix,
+ const unsigned int row,
+ const unsigned int index);
+
+ /**
+ * Row number of the element
+ * represented by this object.
+ */
+ unsigned int row() const;
+
+ /**
+ * Index in row of the element
+ * represented by this object.
+ */
+ unsigned int index() const;
+
+ /**
+ * Column number of the element
+ * represented by this object.
+ */
+ unsigned int column() const;
+
+ /**
+ * Value of this matrix entry.
+ */
+ TrilinosScalar value() const;
+
+ /**
+ * Exception
+ */
+ DeclException0 (ExcBeyondEndOfMatrix);
+
+ /**
+ * Exception
+ */
+ DeclException3 (ExcAccessToNonlocalRow,
+ int, int, int,
+ << "You tried to access row " << arg1
+ << " of a distributed matrix, but only rows "
+ << arg2 << " through " << arg3
+ << " are stored locally and can be accessed.");
+
+ private:
+ /**
+ * The matrix accessed.
+ */
+ mutable SparseMatrix *matrix;
+
+ /**
+ * Current row number.
+ */
+ unsigned int a_row;
+
+ /**
+ * Current index in row.
+ */
+ unsigned int a_index;
+
+ /**
+ * Cache where we store the
+ * column indices of the
+ * present row. This is
+ * necessary, since Trilinos
+ * makes access to the elements
+ * of its matrices rather hard,
+ * and it is much more
+ * efficient to copy all column
+ * entries of a row once when
+ * we enter it than repeatedly
+ * asking Trilinos for
+ * individual ones. This also
+ * makes some sense since it is
+ * likely that we will access
+ * them sequentially anyway.
+ *
+ * In order to make copying of
+ * iterators/accessor of
+ * acceptable performance, we
+ * keep a shared pointer to
+ * these entries so that more
+ * than one accessor can access
+ * this data if necessary.
+ */
+ std_cxx1x::shared_ptr<std::vector<unsigned int> > colnum_cache;
+
+ /**
+ * Similar cache for the values
+ * of this row.
+ */
+ std_cxx1x::shared_ptr<std::vector<TrilinosScalar> > value_cache;
+
+ /**
+ * Discard the old row caches
+ * (they may still be used by
+ * other accessors) and
+ * generate new ones for the
+ * row pointed to presently by
+ * this accessor.
+ */
+ void visit_present_row ();
+
+ /**
+ * Make enclosing class a
+ * friend.
+ */
+ friend class const_iterator;
+ };
+
+ public:
+
+ /**
+ * Constructor. Create an
+ * iterator into the matrix @p
+ * matrix for the given row and
+ * the index within it.
+ */
+ const_iterator (const SparseMatrix *matrix,
+ const unsigned int row,
+ const unsigned int index);
+
+ /**
+ * Prefix increment.
+ */
+ const_iterator &operator++ ();
+
+ /**
+ * Postfix increment.
+ */
+ const_iterator operator++ (int);
+
+ /**
+ * Dereferencing operator.
+ */
+ const Accessor &operator* () const;
+
+ /**
+ * Dereferencing operator.
+ */
+ const Accessor *operator-> () const;
+
+ /**
+ * Comparison. True, if both
+ * iterators point to the same
+ * matrix position.
+ */
+ bool operator == (const const_iterator &) const;
+
+ /**
+ * Inverse of <tt>==</tt>.
+ */
+ bool operator != (const const_iterator &) const;
+
+ /**
+ * Comparison operator. Result
+ * is true if either the first
+ * row number is smaller or if
+ * the row numbers are equal
+ * and the first index is
+ * smaller.
+ */
+ bool operator < (const const_iterator &) const;
+
+ /**
+ * Exception
+ */
+ DeclException2 (ExcInvalidIndexWithinRow,
+ int, int,
+ << "Attempt to access element " << arg2
+ << " of row " << arg1
+ << " which doesn't have that many elements.");
+
+ private:
+ /**
+ * Store an object of the
+ * accessor class.
+ */
+ Accessor accessor;
+ };
+
+ }
+
+
+ /**
+ * This class implements a wrapper to use the Trilinos distributed
+ * sparse matrix class Epetra_FECrsMatrix. This is precisely the kind of
+ * matrix we deal with all the time - we most likely get it from some
+ * assembly process, where also entries not locally owned might need to
+ * be written and hence need to be forwarded to the owner process. This
+ * class is designed to be used in a distributed memory architecture
+ * with an MPI compiler on the bottom, but works equally well also for
+ * serial processes. The only requirement for this class to work is that
+ * Trilinos has been installed with the same compiler as is used for
+ * generating deal.II.
+ *
+ * The interface of this class is modeled after the existing
+ * SparseMatrix class in deal.II. It has almost the same member
+ * functions, and is often exchangable. However, since Trilinos only
+ * supports a single scalar type (double), it is not templated, and only
+ * works with doubles.
+ *
+ * Note that Trilinos only guarantees that operations do what you expect
+ * if the functions @p GlobalAssemble has been called after matrix
+ * assembly. Therefore, you need to call SparseMatrix::compress()
+ * before you actually use the matrix. This also calls @p FillComplete
+ * that compresses the storage format for sparse matrices by discarding
+ * unused elements. Trilinos allows to continue with assembling the
+ * matrix after calls to these functions, though.
+ *
+ * @ingroup TrilinosWrappers
+ * @ingroup Matrix1
+ * @author Martin Kronbichler, Wolfgang Bangerth, 2008, 2009
+ */
+ class SparseMatrix : public Subscriptor
+ {
+ public:
+ /**
+ * A structure that describes
+ * some of the traits of this
+ * class in terms of its run-time
+ * behavior. Some other classes
+ * (such as the block matrix
+ * classes) that take one or
+ * other of the matrix classes as
+ * its template parameters can
+ * tune their behavior based on
+ * the variables in this class.
+ */
+ struct Traits
+ {
+ /**
+ * It is safe to elide additions
+ * of zeros to individual
+ * elements of this matrix.
+ */
+ static const bool zero_addition_can_be_elided = true;
+ };
+
+ /**
+ * Declare a typedef for the
+ * iterator class.
+ */
+ typedef MatrixIterators::const_iterator const_iterator;
+
+ /**
+ * Declare a typedef in analogy
+ * to all the other container
+ * classes.
+ */
+ typedef TrilinosScalar value_type;
+
+ /**
+ * @name Constructors and initalization.
+ */
+//@{
+ /**
+ * Default constructor. Generates
+ * an empty (zero-size) matrix.
+ */
+ SparseMatrix ();
+
+ /**
+ * Generate a matrix that is completely
+ * stored locally, having #m rows and
+ * #n columns.
+ *
+ * The number of columns entries per
+ * row is specified as the maximum
+ * number of entries argument.
+ */
+ SparseMatrix (const unsigned int m,
+ const unsigned int n,
+ const unsigned int n_max_entries_per_row);
+
+ /**
+ * Generate a matrix that is completely
+ * stored locally, having #m rows and
+ * #n columns.
+ *
+ * The vector
+ * <tt>n_entries_per_row</tt>
+ * specifies the number of entries in
+ * each row.
+ */
+ SparseMatrix (const unsigned int m,
+ const unsigned int n,
+ const std::vector<unsigned int> &n_entries_per_row);
+
+ /**
+ * Generate a matrix from a Trilinos
+ * sparsity pattern object.
+ */
+ SparseMatrix (const SparsityPattern &InputSparsityPattern);
+
+ /**
+ * Copy constructor. Sets the
+ * calling matrix to be the same
+ * as the input matrix, i.e.,
+ * using the same sparsity
+ * pattern and entries.
+ */
+ SparseMatrix (const SparseMatrix &InputMatrix);
+
+ /**
+ * Destructor. Made virtual so
+ * that one can use pointers to
+ * this class.
+ */
+ virtual ~SparseMatrix ();
+
+ /**
+ * This function initializes the
+ * Trilinos matrix with a deal.II
+ * sparsity pattern, i.e. it makes
+ * the Trilinos Epetra matrix know
+ * the position of nonzero entries
+ * according to the sparsity
+ * pattern. This function is meant
+ * for use in serial programs, where
+ * there is no need to specify how
+ * the matrix is going to be
+ * distributed among different
+ * processors. This function works in
+ * %parallel, too, but it is
+ * recommended to manually specify
+ * the %parallel partioning of the
+ * matrix using an Epetra_Map. When
+ * run in %parallel, it is currently
+ * necessary that each processor
+ * holds the sparsity_pattern
+ * structure because each processor
+ * sets its rows.
+ *
+ * This is a collective operation
+ * that needs to be called on all
+ * processors in order to avoid a
+ * dead lock.
+ */
+ template<typename SparsityType>
+ void reinit (const SparsityType &sparsity_pattern);
+
+ /**
+ * This function reinitializes the
+ * Trilinos sparse matrix from a
+ * (possibly distributed) Trilinos
+ * sparsity pattern.
+ *
+ * This is a collective operation
+ * that needs to be called on all
+ * processors in order to avoid a
+ * dead lock.
+ */
+ void reinit (const SparsityPattern &sparsity_pattern);
+
+ /**
+ * This function copies the content
+ * in <tt>sparse_matrix</tt> to the
+ * calling matrix.
+ *
+ * This is a collective operation
+ * that needs to be called on all
+ * processors in order to avoid a
+ * dead lock.
+ */
+ void reinit (const SparseMatrix &sparse_matrix);
+
+ /**
+ * This function initializes the
+ * Trilinos matrix using the deal.II
+ * sparse matrix and the entries
+ * stored therein. It uses a
+ * threshold to copy only elements
+ * with modulus larger than the
+ * threshold (so zeros in the deal.II
+ * matrix can be filtered away).
+ *
+ * The optional parameter
+ * <tt>copy_values</tt> decides
+ * whether only the sparsity
+ * structure of the input matrix
+ * should be used or the matrix
+ * entries should be copied, too.
+ *
+ * This is a collective operation
+ * that needs to be called on all
+ * processors in order to avoid a
+ * dead lock.
+ */
+ template <typename number>
+ void reinit (const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+ const double drop_tolerance=1e-13,
+ const bool copy_values=true,
+ const ::dealii::SparsityPattern *use_this_sparsity=0);
+
+ /**
+ * This reinit function takes as
+ * input a Trilinos Epetra_CrsMatrix
+ * and copies its sparsity
+ * pattern. If so requested, even the
+ * content (values) will be copied.
+ */
+ void reinit (const Epetra_CrsMatrix &input_matrix,
+ const bool copy_values = true);
+//@}
+ /**
+ * @name Constructors and initialization using an Epetra_Map description
+ */
+//@{
+ /**
+ * Constructor using an Epetra_Map to
+ * describe the %parallel
+ * partitioning. The parameter @p
+ * n_max_entries_per_row sets the
+ * number of nonzero entries in each
+ * row that will be allocated. Note
+ * that this number does not need to
+ * be exact, and it is even allowed
+ * that the actual matrix structure
+ * has more nonzero entries than
+ * specified in the
+ * constructor. However it is still
+ * advantageous to provide good
+ * estimates here since this will
+ * considerably increase the
+ * performance of the matrix
+ * setup. However, there is no effect
+ * in the performance of
+ * matrix-vector products, since
+ * Trilinos reorganizes the matrix
+ * memory prior to use (in the
+ * compress() step).
+ */
+ SparseMatrix (const Epetra_Map ¶llel_partitioning,
+ const unsigned int n_max_entries_per_row = 0);
+
+ /**
+ * Same as before, but now set a
+ * value of nonzeros for each matrix
+ * row. Since we know the number of
+ * elements in the matrix exactly in
+ * this case, we can already allocate
+ * the right amount of memory, which
+ * makes the creation process
+ * including the insertion of nonzero
+ * elements by the respective
+ * SparseMatrix::reinit call
+ * considerably faster.
+ */
+ SparseMatrix (const Epetra_Map ¶llel_partitioning,
+ const std::vector<unsigned int> &n_entries_per_row);
+
+ /**
+ * This constructor is similar to the
+ * one above, but it now takes two
+ * different Epetra maps for rows and
+ * columns. This interface is meant
+ * to be used for generating
+ * rectangular matrices, where one
+ * map describes the %parallel
+ * partitioning of the dofs
+ * associated with the matrix rows
+ * and the other one the partitioning
+ * of dofs in the matrix
+ * columns. Note that there is no
+ * real parallelism along the columns
+ * – the processor that owns a
+ * certain row always owns all the
+ * column elements, no matter how far
+ * they might be spread out. The
+ * second Epetra_Map is only used to
+ * specify the number of columns and
+ * for internal arragements when
+ * doing matrix-vector products with
+ * vectors based on that column map.
+ *
+ * The integer input @p
+ * n_max_entries_per_row defines the
+ * number of columns entries per row
+ * that will be allocated.
+ */
+ SparseMatrix (const Epetra_Map &row_parallel_partitioning,
+ const Epetra_Map &col_parallel_partitioning,
+ const unsigned int n_max_entries_per_row = 0);
+
+ /**
+ * This constructor is similar to the
+ * one above, but it now takes two
+ * different Epetra maps for rows and
+ * columns. This interface is meant
+ * to be used for generating
+ * rectangular matrices, where one
+ * map specifies the %parallel
+ * distribution of degrees of freedom
+ * associated with matrix rows and
+ * the second one specifies the
+ * %parallel distribution the dofs
+ * associated with columns in the
+ * matrix. The second map also
+ * provides information for the
+ * internal arrangement in matrix
+ * vector products (i.e., the
+ * distribution of vector this matrix
+ * is to be multiplied with), but is
+ * not used for the distribution of
+ * the columns – rather, all
+ * column elements of a row are
+ * stored on the same processor in
+ * any case. The vector
+ * <tt>n_entries_per_row</tt>
+ * specifies the number of entries in
+ * each row of the newly generated
+ * matrix.
+ */
+ SparseMatrix (const Epetra_Map &row_parallel_partitioning,
+ const Epetra_Map &col_parallel_partitioning,
+ const std::vector<unsigned int> &n_entries_per_row);
+
+ /**
+ * This function is initializes the
+ * Trilinos Epetra matrix according to
+ * the specified sparsity_pattern, and
+ * also reassigns the matrix rows to
+ * different processes according to a
+ * user-supplied Epetra map. In
+ * programs following the style of the
+ * tutorial programs, this function
+ * (and the respective call for a
+ * rectangular matrix) are the natural
+ * way to initialize the matrix size,
+ * its distribution among the MPI
+ * processes (if run in %parallel) as
+ * well as the locatoin of non-zero
+ * elements. Trilinos stores the
+ * sparsity pattern internally, so it
+ * won't be needed any more after this
+ * call, in contrast to the deal.II own
+ * object. The optional argument @p
+ * exchange_data can be used for
+ * reinitialization with a sparsity
+ * pattern that is not fully
+ * constructed. This feature is only
+ * implemented for input sparsity
+ * patterns of type
+ * CompressedSimpleSparsityPattern. If
+ * the flag is not set, each processor
+ * just sets the elements in the
+ * sparsity pattern that belong to its
+ * rows.
+ *
+ * This is a collective operation
+ * that needs to be called on all
+ * processors in order to avoid a
+ * dead lock.
+ */
+ template<typename SparsityType>
+ void reinit (const Epetra_Map ¶llel_partitioning,
+ const SparsityType &sparsity_pattern,
+ const bool exchange_data = false);
+
+ /**
+ * This function is similar to the
+ * other initialization function
+ * above, but now also reassigns the
+ * matrix rows and columns according
+ * to two user-supplied Epetra maps.
+ * To be used for rectangular
+ * matrices. The optional argument @p
+ * exchange_data can be used for
+ * reinitialization with a sparsity
+ * pattern that is not fully
+ * constructed. This feature is only
+ * implemented for input sparsity
+ * patterns of type
+ * CompressedSimpleSparsityPattern.
+ *
+ * This is a collective operation
+ * that needs to be called on all
+ * processors in order to avoid a
+ * dead lock.
+ */
+ template<typename SparsityType>
+ void reinit (const Epetra_Map &row_parallel_partitioning,
+ const Epetra_Map &col_parallel_partitioning,
+ const SparsityType &sparsity_pattern,
+ const bool exchange_data = false);
+
+ /**
+ * This function initializes the
+ * Trilinos matrix using the deal.II
+ * sparse matrix and the entries
+ * stored therein. It uses a
+ * threshold to copy only elements
+ * with modulus larger than the
+ * threshold (so zeros in the deal.II
+ * matrix can be filtered away). In
+ * contrast to the other reinit
+ * function with deal.II sparse
+ * matrix argument, this function
+ * takes a %parallel partitioning
+ * specified by the user instead of
+ * internally generating it.
+ *
+ * The optional parameter
+ * <tt>copy_values</tt> decides
+ * whether only the sparsity
+ * structure of the input matrix
+ * should be used or the matrix
+ * entries should be copied, too.
+ *
+ * This is a collective operation
+ * that needs to be called on all
+ * processors in order to avoid a
+ * dead lock.
+ */
+ template <typename number>
+ void reinit (const Epetra_Map ¶llel_partitioning,
+ const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+ const double drop_tolerance=1e-13,
+ const bool copy_values=true,
+ const ::dealii::SparsityPattern *use_this_sparsity=0);
+
+ /**
+ * This function is similar to the
+ * other initialization function with
+ * deal.II sparse matrix input above,
+ * but now takes Epetra maps for both
+ * the rows and the columns of the
+ * matrix. Chosen for rectangular
+ * matrices.
+ *
+ * The optional parameter
+ * <tt>copy_values</tt> decides
+ * whether only the sparsity
+ * structure of the input matrix
+ * should be used or the matrix
+ * entries should be copied, too.
+ *
+ * This is a collective operation
+ * that needs to be called on all
+ * processors in order to avoid a
+ * dead lock.
+ */
+ template <typename number>
+ void reinit (const Epetra_Map &row_parallel_partitioning,
+ const Epetra_Map &col_parallel_partitioning,
+ const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+ const double drop_tolerance=1e-13,
+ const bool copy_values=true,
+ const ::dealii::SparsityPattern *use_this_sparsity=0);
+//@}
+ /**
+ * @name Constructors and initialization using an IndexSet description
+ */
+//@{
+ /**
+ * Constructor using an IndexSet and
+ * an MPI communicator to describe
+ * the %parallel partitioning. The
+ * parameter @p n_max_entries_per_row
+ * sets the number of nonzero entries
+ * in each row that will be
+ * allocated. Note that this number
+ * does not need to be exact, and it
+ * is even allowed that the actual
+ * matrix structure has more nonzero
+ * entries than specified in the
+ * constructor. However it is still
+ * advantageous to provide good
+ * estimates here since this will
+ * considerably increase the
+ * performance of the matrix
+ * setup. However, there is no effect
+ * in the performance of
+ * matrix-vector products, since
+ * Trilinos reorganizes the matrix
+ * memory prior to use (in the
+ * compress() step).
+ */
+ SparseMatrix (const IndexSet ¶llel_partitioning,
+ const MPI_Comm &communicator = MPI_COMM_WORLD,
+ const unsigned int n_max_entries_per_row = 0);
+
+ /**
+ * Same as before, but now set the
+ * number of nonzeros in each matrix
+ * row separately. Since we know the
+ * number of elements in the matrix
+ * exactly in this case, we can
+ * already allocate the right amount
+ * of memory, which makes the
+ * creation process including the
+ * insertion of nonzero elements by
+ * the respective
+ * SparseMatrix::reinit call
+ * considerably faster.
+ */
+ SparseMatrix (const IndexSet ¶llel_partitioning,
+ const MPI_Comm &communicator,
+ const std::vector<unsigned int> &n_entries_per_row);
+
+ /**
+ * This constructor is similar to the
+ * one above, but it now takes two
+ * different IndexSet partitions for
+ * row and columns. This interface is
+ * meant to be used for generating
+ * rectangular matrices, where the
+ * first index set describes the
+ * %parallel partitioning of the
+ * degrees of freedom associated with
+ * the matrix rows and the second one
+ * the partitioning of the matrix
+ * columns. The second index set
+ * specifies the partitioning of the
+ * vectors this matrix is to be
+ * multiplied with, not the
+ * distribution of the elements that
+ * actually appear in the matrix.
+ *
+ * The parameter @p
+ * n_max_entries_per_row defines how
+ * much memory will be allocated for
+ * each row. This number does not
+ * need to be accurate, as the
+ * structure is reorganized in the
+ * compress() call.
+ */
+ SparseMatrix (const IndexSet &row_parallel_partitioning,
+ const IndexSet &col_parallel_partitioning,
+ const MPI_Comm &communicator = MPI_COMM_WORLD,
+ const unsigned int n_max_entries_per_row = 0);
+
+ /**
+ * This constructor is similar to the
+ * one above, but it now takes two
+ * different Epetra maps for rows and
+ * columns. This interface is meant
+ * to be used for generating
+ * rectangular matrices, where one
+ * map specifies the %parallel
+ * distribution of degrees of freedom
+ * associated with matrix rows and
+ * the second one specifies the
+ * %parallel distribution the dofs
+ * associated with columns in the
+ * matrix. The second map also
+ * provides information for the
+ * internal arrangement in matrix
+ * vector products (i.e., the
+ * distribution of vector this matrix
+ * is to be multiplied with), but is
+ * not used for the distribution of
+ * the columns – rather, all
+ * column elements of a row are
+ * stored on the same processor in
+ * any case. The vector
+ * <tt>n_entries_per_row</tt>
+ * specifies the number of entries in
+ * each row of the newly generated
+ * matrix.
+ */
+ SparseMatrix (const IndexSet &row_parallel_partitioning,
+ const IndexSet &col_parallel_partitioning,
+ const MPI_Comm &communicator,
+ const std::vector<unsigned int> &n_entries_per_row);
+
+ /**
+ * This function is initializes the
+ * Trilinos Epetra matrix according
+ * to the specified sparsity_pattern,
+ * and also reassigns the matrix rows
+ * to different processes according
+ * to a user-supplied index set and
+ * %parallel communicator. In
+ * programs following the style of
+ * the tutorial programs, this
+ * function (and the respective call
+ * for a rectangular matrix) are the
+ * natural way to initialize the
+ * matrix size, its distribution
+ * among the MPI processes (if run in
+ * %parallel) as well as the locatoin
+ * of non-zero elements. Trilinos
+ * stores the sparsity pattern
+ * internally, so it won't be needed
+ * any more after this call, in
+ * contrast to the deal.II own
+ * object. The optional argument @p
+ * exchange_data can be used for
+ * reinitialization with a sparsity
+ * pattern that is not fully
+ * constructed. This feature is only
+ * implemented for input sparsity
+ * patterns of type
+ * CompressedSimpleSparsityPattern. If
+ * the flag is not set, each
+ * processor just sets the elements
+ * in the sparsity pattern that
+ * belong to its rows.
+ *
+ * This is a collective operation
+ * that needs to be called on all
+ * processors in order to avoid a
+ * dead lock.
+ */
+ template<typename SparsityType>
+ void reinit (const IndexSet ¶llel_partitioning,
+ const SparsityType &sparsity_pattern,
+ const MPI_Comm &communicator = MPI_COMM_WORLD,
+ const bool exchange_data = false);
+
+ /**
+ * This function is similar to the
+ * other initialization function
+ * above, but now also reassigns the
+ * matrix rows and columns according
+ * to two user-supplied index sets.
+ * To be used for rectangular
+ * matrices. The optional argument @p
+ * exchange_data can be used for
+ * reinitialization with a sparsity
+ * pattern that is not fully
+ * constructed. This feature is only
+ * implemented for input sparsity
+ * patterns of type
+ * CompressedSimpleSparsityPattern.
+ *
+ * This is a collective operation
+ * that needs to be called on all
+ * processors in order to avoid a
+ * dead lock.
+ */
+ template<typename SparsityType>
+ void reinit (const IndexSet &row_parallel_partitioning,
+ const IndexSet &col_parallel_partitioning,
+ const SparsityType &sparsity_pattern,
+ const MPI_Comm &communicator = MPI_COMM_WORLD,
+ const bool exchange_data = false);
+
+ /**
+ * This function initializes the
+ * Trilinos matrix using the deal.II
+ * sparse matrix and the entries
+ * stored therein. It uses a
+ * threshold to copy only elements
+ * with modulus larger than the
+ * threshold (so zeros in the deal.II
+ * matrix can be filtered away). In
+ * contrast to the other reinit
+ * function with deal.II sparse
+ * matrix argument, this function
+ * takes a %parallel partitioning
+ * specified by the user instead of
+ * internally generating it.
+ *
+ * The optional parameter
+ * <tt>copy_values</tt> decides
+ * whether only the sparsity
+ * structure of the input matrix
+ * should be used or the matrix
+ * entries should be copied, too.
+ *
+ * This is a collective operation
+ * that needs to be called on all
+ * processors in order to avoid a
+ * dead lock.
+ */
+ template <typename number>
+ void reinit (const IndexSet ¶llel_partitioning,
+ const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+ const MPI_Comm &communicator = MPI_COMM_WORLD,
+ const double drop_tolerance=1e-13,
+ const bool copy_values=true,
+ const ::dealii::SparsityPattern *use_this_sparsity=0);
+
+ /**
+ * This function is similar to the
+ * other initialization function with
+ * deal.II sparse matrix input above,
+ * but now takes index sets for both
+ * the rows and the columns of the
+ * matrix. Chosen for rectangular
+ * matrices.
+ *
+ * The optional parameter
+ * <tt>copy_values</tt> decides
+ * whether only the sparsity
+ * structure of the input matrix
+ * should be used or the matrix
+ * entries should be copied, too.
+ *
+ * This is a collective operation
+ * that needs to be called on all
+ * processors in order to avoid a
+ * dead lock.
+ */
+ template <typename number>
+ void reinit (const IndexSet &row_parallel_partitioning,
+ const IndexSet &col_parallel_partitioning,
+ const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+ const MPI_Comm &communicator = MPI_COMM_WORLD,
+ const double drop_tolerance=1e-13,
+ const bool copy_values=true,
+ const ::dealii::SparsityPattern *use_this_sparsity=0);
+//@}
+ /**
+ * @name Information on the matrix
+ */
+//@{
+
+ /**
+ * Return the number of rows in
+ * this matrix.
+ */
+ unsigned int m () const;
+
+ /**
+ * Return the number of columns
+ * in this matrix.
+ */
+ unsigned int n () const;
+
+ /**
+ * Return the local dimension
+ * of the matrix, i.e. the
+ * number of rows stored on the
+ * present MPI process. For
+ * sequential matrices, this
+ * number is the same as m(),
+ * but for %parallel matrices it
+ * may be smaller.
+ *
+ * To figure out which elements
+ * exactly are stored locally,
+ * use local_range().
+ */
+ unsigned int local_size () const;
+
+ /**
+ * Return a pair of indices
+ * indicating which rows of
+ * this matrix are stored
+ * locally. The first number is
+ * the index of the first row
+ * stored, the second the index
+ * of the one past the last one
+ * that is stored locally. If
+ * this is a sequential matrix,
+ * then the result will be the
+ * pair (0,m()), otherwise it
+ * will be a pair (i,i+n),
+ * where
+ * <tt>n=local_size()</tt>.
+ */
+ std::pair<unsigned int, unsigned int>
+ local_range () const;
+
+ /**
+ * Return whether @p index is
+ * in the local range or not,
+ * see also local_range().
+ */
+ bool in_local_range (const unsigned int index) const;
+
+ /**
+ * Return the number of nonzero
+ * elements of this matrix.
+ */
+ unsigned int n_nonzero_elements () const;
+
+ /**
+ * Number of entries in a
+ * specific row.
+ */
+ unsigned int row_length (const unsigned int row) const;
+
+ /**
+ * Returns the state of the matrix,
+ * i.e., whether compress() needs to
+ * be called after an operation
+ * requiring data exchange. A call to
+ * compress() is also needed when the
+ * method set() has been called (even
+ * when working in serial).
+ */
+ bool is_compressed () const;
+
+ /**
+ * Determine an estimate for the memory
+ * consumption (in bytes) of this
+ * object. Note that only the memory
+ * reserved on the current processor is
+ * returned in case this is called in
+ * an MPI-based program.
+ */
+ std::size_t memory_consumption () const;
+
+//@}
+ /**
+ * @name Modifying entries
+ */
+//@{
+
+ /**
+ * This operator assigns a scalar to
+ * a matrix. Since this does usually
+ * not make much sense (should we set
+ * all matrix entries to this value?
+ * Only the nonzero entries of the
+ * sparsity pattern?), this operation
+ * is only allowed if the actual
+ * value to be assigned is zero. This
+ * operator only exists to allow for
+ * the obvious notation
+ * <tt>matrix=0</tt>, which sets all
+ * elements of the matrix to zero,
+ * but keeps the sparsity pattern
+ * previously used.
+ */
+ SparseMatrix &
+ operator = (const double d);
+
+ /**
+ * Release all memory and return to a
+ * state just like after having
+ * called the default constructor.
+ *
+ * This is a collective operation
+ * that needs to be called on all
+ * processors in order to avoid a
+ * dead lock.
+ */
+ void clear ();
+
+ /**
+ * This command does two things:
+ * <ul>
+ * <li> If the matrix was initialized
+ * without a sparsity pattern,
+ * elements have been added manually
+ * using the set() command. When this
+ * process is completed, a call to
+ * compress() reorganizes the
+ * internal data structures (aparsity
+ * pattern) so that a fast access to
+ * data is possible in matrix-vector
+ * products.
+ * <li> If the matrix structure has
+ * already been fixed (either by
+ * initialization with a sparsity
+ * pattern or by calling compress()
+ * during the setup phase), this
+ * command does the %parallel
+ * exchange of data. This is
+ * necessary when we perform assembly
+ * on more than one (MPI) process,
+ * because then some non-local row
+ * data will accumulate on nodes that
+ * belong to the current's processor
+ * element, but are actually held by
+ * another. This command is usually
+ * called after all elements have
+ * been traversed.
+ * </ul>
+ *
+ * In both cases, this function
+ * compresses the data structures and
+ * allows the resulting matrix to be
+ * used in all other operations like
+ * matrix-vector products. This is a
+ * collective operation, i.e., it
+ * needs to be run on all processors
+ * when used in %parallel.
+ *
+ * See @ref GlossCompress "Compressing distributed objects"
+ * for more information.
+ */
+ void compress (::dealii::VectorOperation::values operation
+ =::dealii::VectorOperation::unknown);
+
+ /**
+ * Set the element (<i>i,j</i>)
+ * to @p value.
+ *
+ * This function is able to insert new
+ * elements into the matrix as long as
+ * compress() has not been called, so
+ * the sparsity pattern will be
+ * extended. When compress() is called
+ * for the first time, then this is no
+ * longer possible and an insertion of
+ * elements at positions which have not
+ * been initialized will throw an
+ * exception. Note that in case
+ * elements need to be inserted, it is
+ * mandatory that elements are inserted
+ * only once. Otherwise, the elements
+ * will actually be added in the end
+ * (since it is not possible to
+ * efficiently find values to the same
+ * entry before compress() has been
+ * called). In the case that an element
+ * is set more than once, initialize
+ * the matrix with a sparsity pattern
+ * first.
+ */
+ void set (const unsigned int i,
+ const unsigned int j,
+ const TrilinosScalar value);
+
+ /**
+ * Set all elements given in a
+ * FullMatrix<double> into the sparse
+ * matrix locations given by
+ * <tt>indices</tt>. In other words,
+ * this function writes the elements
+ * in <tt>full_matrix</tt> into the
+ * calling matrix, using the
+ * local-to-global indexing specified
+ * by <tt>indices</tt> for both the
+ * rows and the columns of the
+ * matrix. This function assumes a
+ * quadratic sparse matrix and a
+ * quadratic full_matrix, the usual
+ * situation in FE calculations.
+ *
+ * This function is able to insert
+ * new elements into the matrix as
+ * long as compress() has not been
+ * called, so the sparsity pattern
+ * will be extended. When compress()
+ * is called for the first time, then
+ * this is no longer possible and an
+ * insertion of elements at positions
+ * which have not been initialized
+ * will throw an exception.
+ *
+ * The optional parameter
+ * <tt>elide_zero_values</tt> can be
+ * used to specify whether zero
+ * values should be inserted anyway
+ * or they should be filtered
+ * away. The default value is
+ * <tt>false</tt>, i.e., even zero
+ * values are inserted/replaced.
+ */
+ void set (const std::vector<unsigned int> &indices,
+ const FullMatrix<TrilinosScalar> &full_matrix,
+ const bool elide_zero_values = false);
+
+ /**
+ * Same function as before, but now
+ * including the possibility to use
+ * rectangular full_matrices and
+ * different local-to-global indexing
+ * on rows and columns, respectively.
+ */
+ void set (const std::vector<unsigned int> &row_indices,
+ const std::vector<unsigned int> &col_indices,
+ const FullMatrix<TrilinosScalar> &full_matrix,
+ const bool elide_zero_values = false);
+
+ /**
+ * Set several elements in the
+ * specified row of the matrix with
+ * column indices as given by
+ * <tt>col_indices</tt> to the
+ * respective value.
+ *
+ * This function is able to insert
+ * new elements into the matrix as
+ * long as compress() has not been
+ * called, so the sparsity pattern
+ * will be extended. When compress()
+ * is called for the first time, then
+ * this is no longer possible and an
+ * insertion of elements at positions
+ * which have not been initialized
+ * will throw an exception.
+ *
+ * The optional parameter
+ * <tt>elide_zero_values</tt> can be
+ * used to specify whether zero
+ * values should be inserted anyway
+ * or they should be filtered
+ * away. The default value is
+ * <tt>false</tt>, i.e., even zero
+ * values are inserted/replaced.
+ */
+ void set (const unsigned int row,
+ const std::vector<unsigned int> &col_indices,
+ const std::vector<TrilinosScalar> &values,
+ const bool elide_zero_values = false);
+
+ /**
+ * Set several elements to values
+ * given by <tt>values</tt> in a
+ * given row in columns given by
+ * col_indices into the sparse
+ * matrix.
+ *
+ * This function is able to insert
+ * new elements into the matrix as
+ * long as compress() has not been
+ * called, so the sparsity pattern
+ * will be extended. When compress()
+ * is called for the first time, then
+ * this is no longer possible and an
+ * insertion of elements at positions
+ * which have not been initialized
+ * will throw an exception.
+ *
+ * The optional parameter
+ * <tt>elide_zero_values</tt> can be
+ * used to specify whether zero
+ * values should be inserted anyway
+ * or they should be filtered
+ * away. The default value is
+ * <tt>false</tt>, i.e., even zero
+ * values are inserted/replaced.
+ */
+ void set (const unsigned int row,
+ const unsigned int n_cols,
+ const unsigned int *col_indices,
+ const TrilinosScalar *values,
+ const bool elide_zero_values = false);
+
+ /**
+ * Add @p value to the element
+ * (<i>i,j</i>).
+ *
+ * Just as the respective call in
+ * deal.II SparseMatrix<Number>
+ * class (but in contrast to the
+ * situation for PETSc based
+ * matrices), this function
+ * throws an exception if an
+ * entry does not exist in the
+ * sparsity pattern. Moreover, if
+ * <tt>value</tt> is not a finite
+ * number an exception is thrown.
+ */
+ void add (const unsigned int i,
+ const unsigned int j,
+ const TrilinosScalar value);
+
+ /**
+ * Add all elements given in a
+ * FullMatrix<double> into sparse
+ * matrix locations given by
+ * <tt>indices</tt>. In other words,
+ * this function adds the elements in
+ * <tt>full_matrix</tt> to the
+ * respective entries in calling
+ * matrix, using the local-to-global
+ * indexing specified by
+ * <tt>indices</tt> for both the rows
+ * and the columns of the
+ * matrix. This function assumes a
+ * quadratic sparse matrix and a
+ * quadratic full_matrix, the usual
+ * situation in FE calculations.
+ *
+ * Just as the respective call in
+ * deal.II SparseMatrix<Number>
+ * class (but in contrast to the
+ * situation for PETSc based
+ * matrices), this function
+ * throws an exception if an
+ * entry does not exist in the
+ * sparsity pattern.
+ *
+ * The optional parameter
+ * <tt>elide_zero_values</tt> can be
+ * used to specify whether zero
+ * values should be added anyway or
+ * these should be filtered away and
+ * only non-zero data is added. The
+ * default value is <tt>true</tt>,
+ * i.e., zero values won't be added
+ * into the matrix.
+ */
+ void add (const std::vector<unsigned int> &indices,
+ const FullMatrix<TrilinosScalar> &full_matrix,
+ const bool elide_zero_values = true);
+
+ /**
+ * Same function as before, but now
+ * including the possibility to use
+ * rectangular full_matrices and
+ * different local-to-global indexing
+ * on rows and columns, respectively.
+ */
+ void add (const std::vector<unsigned int> &row_indices,
+ const std::vector<unsigned int> &col_indices,
+ const FullMatrix<TrilinosScalar> &full_matrix,
+ const bool elide_zero_values = true);
+
+ /**
+ * Set several elements in the
+ * specified row of the matrix with
+ * column indices as given by
+ * <tt>col_indices</tt> to the
+ * respective value.
+ *
+ * Just as the respective call in
+ * deal.II SparseMatrix<Number>
+ * class (but in contrast to the
+ * situation for PETSc based
+ * matrices), this function
+ * throws an exception if an
+ * entry does not exist in the
+ * sparsity pattern.
+ *
+ * The optional parameter
+ * <tt>elide_zero_values</tt> can be
+ * used to specify whether zero
+ * values should be added anyway or
+ * these should be filtered away and
+ * only non-zero data is added. The
+ * default value is <tt>true</tt>,
+ * i.e., zero values won't be added
+ * into the matrix.
+ */
+ void add (const unsigned int row,
+ const std::vector<unsigned int> &col_indices,
+ const std::vector<TrilinosScalar> &values,
+ const bool elide_zero_values = true);
+
+ /**
+ * Add an array of values given by
+ * <tt>values</tt> in the given
+ * global matrix row at columns
+ * specified by col_indices in the
+ * sparse matrix.
+ *
+ * Just as the respective call in
+ * deal.II SparseMatrix<Number> class
+ * (but in contrast to the situation
+ * for PETSc based matrices), this
+ * function throws an exception if an
+ * entry does not exist in the
+ * sparsity pattern.
+ *
+ * The optional parameter
+ * <tt>elide_zero_values</tt> can be
+ * used to specify whether zero
+ * values should be added anyway or
+ * these should be filtered away and
+ * only non-zero data is added. The
+ * default value is <tt>true</tt>,
+ * i.e., zero values won't be added
+ * into the matrix.
+ */
+ void add (const unsigned int row,
+ const unsigned int n_cols,
+ const unsigned int *col_indices,
+ const TrilinosScalar *values,
+ const bool elide_zero_values = true,
+ const bool col_indices_are_sorted = false);
+
+ /**
+ * Multiply the entire matrix
+ * by a fixed factor.
+ */
+ SparseMatrix &operator *= (const TrilinosScalar factor);
+
+ /**
+ * Divide the entire matrix by
+ * a fixed factor.
+ */
+ SparseMatrix &operator /= (const TrilinosScalar factor);
+
+ /**
+ * Copy the given (Trilinos) matrix
+ * (sparsity pattern and entries).
+ */
+ void copy_from (const SparseMatrix &source);
+
+ /**
+ * Add <tt>matrix</tt> scaled by
+ * <tt>factor</tt> to this matrix,
+ * i.e. the matrix
+ * <tt>factor*matrix</tt> is added to
+ * <tt>this</tt>. If the sparsity
+ * pattern of the calling matrix does
+ * not contain all the elements in
+ * the sparsity pattern of the input
+ * matrix, this function will throw
+ * an exception.
+ */
+ void add (const TrilinosScalar factor,
+ const SparseMatrix &matrix);
+
+ /**
+ * Remove all elements from
+ * this <tt>row</tt> by setting
+ * them to zero. The function
+ * does not modify the number
+ * of allocated nonzero
+ * entries, it only sets some
+ * entries to zero. It may drop
+ * them from the sparsity
+ * pattern, though (but retains
+ * the allocated memory in case
+ * new entries are again added
+ * later). Note that this is a
+ * global operation, so this
+ * needs to be done on all MPI
+ * processes.
+ *
+ * This operation is used in
+ * eliminating constraints
+ * (e.g. due to hanging nodes)
+ * and makes sure that we can
+ * write this modification to
+ * the matrix without having to
+ * read entries (such as the
+ * locations of non-zero
+ * elements) from it —
+ * without this operation,
+ * removing constraints on
+ * %parallel matrices is a
+ * rather complicated
+ * procedure.
+ *
+ * The second parameter can be
+ * used to set the diagonal
+ * entry of this row to a value
+ * different from zero. The
+ * default is to set it to
+ * zero.
+ */
+ void clear_row (const unsigned int row,
+ const TrilinosScalar new_diag_value = 0);
+
+ /**
+ * Same as clear_row(), except
+ * that it works on a number of
+ * rows at once.
+ *
+ * The second parameter can be
+ * used to set the diagonal
+ * entries of all cleared rows
+ * to something different from
+ * zero. Note that all of these
+ * diagonal entries get the
+ * same value -- if you want
+ * different values for the
+ * diagonal entries, you have
+ * to set them by hand.
+ */
+ void clear_rows (const std::vector<unsigned int> &rows,
+ const TrilinosScalar new_diag_value = 0);
+
+ /**
+ * Make an in-place transpose
+ * of a matrix.
+ */
+ void transpose ();
+
+//@}
+ /**
+ * @name Entry Access
+ */
+//@{
+
+ /**
+ * Return the value of the
+ * entry (<i>i,j</i>). This
+ * may be an expensive
+ * operation and you should
+ * always take care where to
+ * call this function. As in
+ * the deal.II sparse matrix
+ * class, we throw an exception
+ * if the respective entry
+ * doesn't exist in the
+ * sparsity pattern of this
+ * class, which is requested
+ * from Trilinos. Moreover, an
+ * exception will be thrown
+ * when the requested element
+ * is not saved on the calling
+ * process.
+ */
+ TrilinosScalar operator () (const unsigned int i,
+ const unsigned int j) const;
+
+ /**
+ * Return the value of the
+ * matrix entry
+ * (<i>i,j</i>). If this entry
+ * does not exist in the
+ * sparsity pattern, then zero
+ * is returned. While this may
+ * be convenient in some cases,
+ * note that it is simple to
+ * write algorithms that are
+ * slow compared to an optimal
+ * solution, since the sparsity
+ * of the matrix is not used.
+ * On the other hand, if you
+ * want to be sure the entry
+ * exists, you should use
+ * operator() instead.
+ *
+ * The lack of error checking
+ * in this function can also
+ * yield surprising results if
+ * you have a parallel
+ * matrix. In that case, just
+ * because you get a zero
+ * result from this function
+ * does not mean that either
+ * the entry does not exist in
+ * the sparsity pattern or that
+ * it does but has a value of
+ * zero. Rather, it could also
+ * be that it simply isn't
+ * stored on the current
+ * processor; in that case, it
+ * may be stored on a different
+ * processor, and possibly so
+ * with a nonzero value.
+ */
+ TrilinosScalar el (const unsigned int i,
+ const unsigned int j) const;
+
+ /**
+ * Return the main diagonal
+ * element in the <i>i</i>th
+ * row. This function throws an
+ * error if the matrix is not
+ * quadratic and it also throws
+ * an error if <i>(i,i)</i> is not
+ * element of the local matrix.
+ * See also the comment in
+ * trilinos_sparse_matrix.cc.
+ */
+ TrilinosScalar diag_element (const unsigned int i) const;
+
+//@}
+ /**
+ * @name Multiplications
+ */
+//@{
+
+ /**
+ * Matrix-vector multiplication:
+ * let <i>dst = M*src</i> with
+ * <i>M</i> being this matrix.
+ *
+ * Source and destination must
+ * not be the same vector.
+ *
+ * Note that both vectors have to
+ * be distributed vectors
+ * generated using the same Map
+ * as was used for the matrix in
+ * case you work on a distributed
+ * memory architecture, using the
+ * interface in the
+ * TrilinosWrappers::VectorBase
+ * class (or one of the two
+ * derived classes Vector and
+ * MPI::Vector).
+ *
+ * In case of a localized Vector,
+ * this function will only work
+ * when running on one processor,
+ * since the matrix object is
+ * inherently
+ * distributed. Otherwise, and
+ * exception will be thrown.
+ */
+ void vmult (VectorBase &dst,
+ const VectorBase &src) const;
+
+ /**
+ * Same as before, but working with
+ * deal.II's own distributed vector
+ * class.
+ */
+ void vmult (parallel::distributed::Vector<TrilinosScalar> &dst,
+ const parallel::distributed::Vector<TrilinosScalar> &src) const;
+
+ /**
+ * Matrix-vector multiplication:
+ * let <i>dst =
+ * M<sup>T</sup>*src</i> with
+ * <i>M</i> being this
+ * matrix. This function does the
+ * same as vmult() but takes the
+ * transposed matrix.
+ *
+ * Source and destination must
+ * not be the same vector.
+ *
+ * Note that both vectors have to
+ * be distributed vectors
+ * generated using the same Map
+ * as was used for the matrix in
+ * case you work on a distributed
+ * memory architecture, using the
+ * interface in the
+ * TrilinosWrappers::VectorBase
+ * class (or one of the two
+ * derived classes Vector and
+ * MPI::Vector).
+ *
+ * In case of a localized Vector,
+ * this function will only work
+ * when running on one processor,
+ * since the matrix object is
+ * inherently
+ * distributed. Otherwise, and
+ * exception will be thrown.
+ */
+ void Tvmult (VectorBase &dst,
+ const VectorBase &src) const;
+
+ /**
+ * Same as before, but working with
+ * deal.II's own distributed vector
+ * class.
+ */
+ void Tvmult (parallel::distributed::Vector<TrilinosScalar> &dst,
+ const parallel::distributed::Vector<TrilinosScalar> &src) const;
+
+ /**
+ * Adding Matrix-vector
+ * multiplication. Add
+ * <i>M*src</i> on <i>dst</i>
+ * with <i>M</i> being this
+ * matrix.
+ *
+ * Source and destination must
+ * not be the same vector.
+ *
+ * Note that both vectors have to
+ * be distributed vectors
+ * generated using the same Map
+ * as was used for the matrix in
+ * case you work on a distributed
+ * memory architecture, using the
+ * interface in the
+ * TrilinosWrappers::VectorBase
+ * class (or one of the two
+ * derived classes Vector and
+ * MPI::Vector).
+ *
+ * In case of a localized Vector,
+ * this function will only work
+ * when running on one processor,
+ * since the matrix object is
+ * inherently
+ * distributed. Otherwise, and
+ * exception will be thrown.
+ */
+ void vmult_add (VectorBase &dst,
+ const VectorBase &src) const;
+
+ /**
+ * Adding Matrix-vector
+ * multiplication. Add
+ * <i>M<sup>T</sup>*src</i> to
+ * <i>dst</i> with <i>M</i> being
+ * this matrix. This function
+ * does the same as vmult_add()
+ * but takes the transposed
+ * matrix.
+ *
+ * Source and destination must
+ * not be the same vector.
+ *
+ * Note that both vectors have to
+ * be distributed vectors
+ * generated using the same Map
+ * as was used for the matrix in
+ * case you work on a distributed
+ * memory architecture, using the
+ * interface in the
+ * TrilinosWrappers::VectorBase
+ * class (or one of the two
+ * derived classes Vector and
+ * MPI::Vector).
+ *
+ * In case of a localized Vector,
+ * this function will only work
+ * when running on one processor,
+ * since the matrix object is
+ * inherently
+ * distributed. Otherwise, and
+ * exception will be thrown.
+ */
+ void Tvmult_add (VectorBase &dst,
+ const VectorBase &src) const;
+
+ /**
+ * Return the square of the norm
+ * of the vector $v$ with respect
+ * to the norm induced by this
+ * matrix, i.e.,
+ * $\left(v,Mv\right)$. This is
+ * useful, e.g. in the finite
+ * element context, where the
+ * $L_2$ norm of a function
+ * equals the matrix norm with
+ * respect to the mass matrix of
+ * the vector representing the
+ * nodal values of the finite
+ * element function.
+ *
+ * Obviously, the matrix needs to
+ * be quadratic for this
+ * operation.
+ *
+ * The implementation of this
+ * function is not as efficient
+ * as the one in the @p
+ * SparseMatrix class used in
+ * deal.II (i.e. the original
+ * one, not the Trilinos wrapper
+ * class) since Trilinos doesn't
+ * support this operation and
+ * needs a temporary vector.
+ *
+ * Note that both vectors have to
+ * be distributed vectors
+ * generated using the same Map
+ * as was used for the matrix in
+ * case you work on a distributed
+ * memory architecture, using the
+ * interface in the
+ * TrilinosWrappers::VectorBase
+ * class (or one of the two
+ * derived classes Vector and
+ * MPI::Vector).
+ *
+ * In case of a localized Vector,
+ * this function will only work
+ * when running on one processor,
+ * since the matrix object is
+ * inherently
+ * distributed. Otherwise, and
+ * exception will be thrown.
+ */
+ TrilinosScalar matrix_norm_square (const VectorBase &v) const;
+
+ /**
+ * Compute the matrix scalar
+ * product $\left(u,Mv\right)$.
+ *
+ * The implementation of this
+ * function is not as efficient
+ * as the one in the @p
+ * SparseMatrix class used in
+ * deal.II (i.e. the original
+ * one, not the Trilinos
+ * wrapper class) since
+ * Trilinos doesn't support
+ * this operation and needs a
+ * temporary vector.
+ *
+ * Note that both vectors have to
+ * be distributed vectors
+ * generated using the same Map
+ * as was used for the matrix in
+ * case you work on a distributed
+ * memory architecture, using the
+ * interface in the
+ * TrilinosWrappers::VectorBase
+ * class (or one of the two
+ * derived classes Vector and
+ * MPI::Vector).
+ *
+ * In case of a localized Vector,
+ * this function will only work
+ * when running on one processor,
+ * since the matrix object is
+ * inherently
+ * distributed. Otherwise, and
+ * exception will be thrown.
+ */
+ TrilinosScalar matrix_scalar_product (const VectorBase &u,
+ const VectorBase &v) const;
+
+ /**
+ * Compute the residual of an
+ * equation <i>Mx=b</i>, where
+ * the residual is defined to
+ * be <i>r=b-Mx</i>. Write the
+ * residual into @p dst. The
+ * <i>l<sub>2</sub></i> norm of
+ * the residual vector is
+ * returned.
+ *
+ * Source <i>x</i> and
+ * destination <i>dst</i> must
+ * not be the same vector.
+ *
+ * Note that both vectors have to
+ * be distributed vectors
+ * generated using the same Map
+ * as was used for the matrix in
+ * case you work on a distributed
+ * memory architecture, using the
+ * interface in the
+ * TrilinosWrappers::VectorBase
+ * class (or one of the two
+ * derived classes Vector and
+ * MPI::Vector).
+ *
+ * In case of a localized Vector,
+ * this function will only work
+ * when running on one processor,
+ * since the matrix object is
+ * inherently
+ * distributed. Otherwise, and
+ * exception will be thrown.
+ */
+ TrilinosScalar residual (VectorBase &dst,
+ const VectorBase &x,
+ const VectorBase &b) const;
+
+ /**
+ * Perform the matrix-matrix
+ * multiplication <tt>C = A * B</tt>,
+ * or, if an optional vector argument
+ * is given, <tt>C = A * diag(V) *
+ * B</tt>, where <tt>diag(V)</tt>
+ * defines a diagonal matrix with the
+ * vector entries.
+ *
+ * This function assumes that the
+ * calling matrix <tt>A</tt> and
+ * <tt>B</tt> have compatible
+ * sizes. The size of <tt>C</tt> will
+ * be set within this function.
+ *
+ * The content as well as the sparsity
+ * pattern of the matrix C will be
+ * changed by this function, so make
+ * sure that the sparsity pattern is
+ * not used somewhere else in your
+ * program. This is an expensive
+ * operation, so think twice before you
+ * use this function.
+ */
+ void mmult (SparseMatrix &C,
+ const SparseMatrix &B,
+ const VectorBase &V = VectorBase()) const;
+
+
+ /**
+ * Perform the matrix-matrix
+ * multiplication with the transpose of
+ * <tt>this</tt>, i.e., <tt>C =
+ * A<sup>T</sup> * B</tt>, or, if an
+ * optional vector argument is given,
+ * <tt>C = A<sup>T</sup> * diag(V) *
+ * B</tt>, where <tt>diag(V)</tt>
+ * defines a diagonal matrix with the
+ * vector entries.
+ *
+ * This function assumes that the
+ * calling matrix <tt>A</tt> and
+ * <tt>B</tt> have compatible
+ * sizes. The size of <tt>C</tt> will
+ * be set within this function.
+ *
+ * The content as well as the sparsity
+ * pattern of the matrix C will be
+ * changed by this function, so make
+ * sure that the sparsity pattern is
+ * not used somewhere else in your
+ * program. This is an expensive
+ * operation, so think twice before you
+ * use this function.
+ */
+ void Tmmult (SparseMatrix &C,
+ const SparseMatrix &B,
+ const VectorBase &V = VectorBase()) const;
+
+//@}
+ /**
+ * @name Matrix norms
+ */
+//@{
+
+ /**
+ * Return the
+ * <i>l</i><sub>1</sub>-norm of
+ * the matrix, that is
+ * $|M|_1=
+ * \max_{\mathrm{all\ columns\ } j}
+ * \sum_{\mathrm{all\ rows\ } i}
+ * |M_{ij}|$, (max. sum
+ * of columns). This is the
+ * natural matrix norm that is
+ * compatible to the l1-norm for
+ * vectors, i.e. $|Mv|_1 \leq
+ * |M|_1 |v|_1$.
+ * (cf. Haemmerlin-Hoffmann:
+ * Numerische Mathematik)
+ */
+ TrilinosScalar l1_norm () const;
+
+ /**
+ * Return the linfty-norm of the
+ * matrix, that is
+ * $|M|_\infty=\max_{\mathrm{all\
+ * rows\ } i}\sum_{\mathrm{all\
+ * columns\ } j} |M_{ij}|$,
+ * (max. sum of rows). This is
+ * the natural matrix norm that
+ * is compatible to the
+ * linfty-norm of vectors, i.e.
+ * $|Mv|_\infty \leq |M|_\infty
+ * |v|_\infty$.
+ * (cf. Haemmerlin-Hoffmann:
+ * Numerische Mathematik)
+ */
+ TrilinosScalar linfty_norm () const;
+
+ /**
+ * Return the frobenius norm of
+ * the matrix, i.e. the square
+ * root of the sum of squares
+ * of all entries in the
+ * matrix.
+ */
+ TrilinosScalar frobenius_norm () const;
+
+//@}
+ /**
+ * @name Access to underlying Trilinos data
+ */
+//@{
+
+ /**
+ * Return a const reference to the
+ * underlying Trilinos
+ * Epetra_CrsMatrix data.
+ */
+ const Epetra_CrsMatrix &trilinos_matrix () const;
+
+ /**
+ * Return a const reference to the
+ * underlying Trilinos
+ * Epetra_CrsGraph data that stores
+ * the sparsity pattern of the
+ * matrix.
+ */
+ const Epetra_CrsGraph &trilinos_sparsity_pattern () const;
+
+ /**
+ * Return a const reference to the
+ * underlying Trilinos Epetra_Map
+ * that sets the partitioning of the
+ * domain space of this matrix, i.e.,
+ * the partitioning of the vectors
+ * this matrix has to be multiplied
+ * with.
+ */
+ const Epetra_Map &domain_partitioner () const;
+
+ /**
+ * Return a const reference to the
+ * underlying Trilinos Epetra_Map
+ * that sets the partitioning of the
+ * range space of this matrix, i.e.,
+ * the partitioning of the vectors
+ * that are result from matrix-vector
+ * products.
+ */
+ const Epetra_Map &range_partitioner () const;
+
+ /**
+ * Return a const reference to the
+ * underlying Trilinos Epetra_Map
+ * that sets the partitioning of the
+ * matrix rows. Equal to the
+ * partitioning of the range.
+ */
+ const Epetra_Map &row_partitioner () const;
+
+ /**
+ * Return a const reference to the
+ * underlying Trilinos Epetra_Map
+ * that sets the partitioning of the
+ * matrix columns. This is in general
+ * not equal to the partitioner
+ * Epetra_Map for the domain because
+ * of overlap in the matrix.
+ */
+ const Epetra_Map &col_partitioner () const;
+//@}
+ /**
+ * @name Iterators
+ */
+//@{
+
+ /**
+ * STL-like iterator with the
+ * first entry.
+ */
+ const_iterator begin () const;
+
+ /**
+ * Final iterator.
+ */
+ const_iterator end () const;
+
+ /**
+ * STL-like iterator with the
+ * first entry of row @p r.
+ *
+ * Note that if the given row
+ * is empty, i.e. does not
+ * contain any nonzero entries,
+ * then the iterator returned
+ * by this function equals
+ * <tt>end(r)</tt>. Note also
+ * that the iterator may not be
+ * dereferencable in that case.
+ */
+ const_iterator begin (const unsigned int r) const;
+
+ /**
+ * Final iterator of row
+ * <tt>r</tt>. It points to the
+ * first element past the end
+ * of line @p r, or past the
+ * end of the entire sparsity
+ * pattern.
+ *
+ * Note that the end iterator
+ * is not necessarily
+ * dereferencable. This is in
+ * particular the case if it is
+ * the end iterator for the
+ * last row of a matrix.
+ */
+ const_iterator end (const unsigned int r) const;
+
+//@}
+ /**
+ * @name Input/Output
+ */
+//@{
+
+ /**
+ * Abstract Trilinos object
+ * that helps view in ASCII
+ * other Trilinos
+ * objects. Currently this
+ * function is not
+ * implemented. TODO: Not
+ * implemented.
+ */
+ void write_ascii () const; // shuqiangwang
+
+ /**
+ * Print the matrix to the given
+ * stream, using the format
+ * <tt>(line,col) value</tt>, i.e. one
+ * nonzero entry of the matrix per
+ * line. The optional flag outputs the
+ * sparsity pattern in Trilinos style,
+ * where the data is sorted according
+ * to the processor number when printed
+ * to the stream, as well as a summary
+ * of the matrix like the global size.
+ */
+ void print (std::ostream &out,
+ const bool write_extended_trilinos_info = false) const;
+
+//@}
+ /** @addtogroup Exceptions
+ *
+ */
+//@{
+ /**
+ * Exception
+ */
+ DeclException1 (ExcTrilinosError,
+ int,
+ << "An error with error number " << arg1
+ << " occurred while calling a Trilinos function");
+
+ /**
+ * Exception
+ */
+ DeclException2 (ExcInvalidIndex,
+ int, int,
+ << "The entry with index <" << arg1 << ',' << arg2
+ << "> does not exist.");
+
+ /**
+ * Exception
+ */
+ DeclException0 (ExcSourceEqualsDestination);
+
+ /**
+ * Exception
+ */
+ DeclException0 (ExcMatrixNotCompressed);
+
+ /**
+ * Exception
+ */
+ DeclException4 (ExcAccessToNonLocalElement,
+ int, int, int, int,
+ << "You tried to access element (" << arg1
+ << "/" << arg2 << ")"
+ << " of a distributed matrix, but only rows "
+ << arg3 << " through " << arg4
+ << " are stored locally and can be accessed.");
+
+ /**
+ * Exception
+ */
+ DeclException2 (ExcAccessToNonPresentElement,
+ int, int,
+ << "You tried to access element (" << arg1
+ << "/" << arg2 << ")"
+ << " of a sparse matrix, but it appears to not"
+ << " exist in the Trilinos sparsity pattern.");
+//@}
+
+
+
+ protected:
+
+ /**
+ * For some matrix storage
+ * formats, in particular for the
+ * PETSc distributed blockmatrices,
+ * set and add operations on
+ * individual elements can not be
+ * freely mixed. Rather, one has
+ * to synchronize operations when
+ * one wants to switch from
+ * setting elements to adding to
+ * elements.
+ * BlockMatrixBase automatically
+ * synchronizes the access by
+ * calling this helper function
+ * for each block.
+ * This function ensures that the
+ * matrix is in a state that
+ * allows adding elements; if it
+ * previously already was in this
+ * state, the function does
+ * nothing.
+ */
+ void prepare_add();
+
+ /**
+ * Same as prepare_add() but
+ * prepare the matrix for setting
+ * elements if the representation
+ * of elements in this class
+ * requires such an operation.
+ */
+ void prepare_set();
+
+
+
+ private:
+
+ /**
+ * Pointer to the user-supplied
+ * Epetra Trilinos mapping of
+ * the matrix columns that
+ * assigns parts of the matrix
+ * to the individual processes.
+ */
+ std_cxx1x::shared_ptr<Epetra_Map> column_space_map;
+
+ /**
+ * A sparse matrix object in
+ * Trilinos to be used for
+ * finite element based
+ * problems which allows for
+ * assembling into non-local
+ * elements. The actual type,
+ * a sparse matrix, is set in
+ * the constructor.
+ */
+ std_cxx1x::shared_ptr<Epetra_FECrsMatrix> matrix;
+
+ /**
+ * Trilinos doesn't allow to mix
+ * additions to matrix entries and
+ * overwriting them (to make
+ * synchronisation of %parallel
+ * computations simpler). The way we
+ * do it is to, for each access
+ * operation, store whether it is an
+ * insertion or an addition. If the
+ * previous one was of different
+ * type, then we first have to flush
+ * the Trilinos buffers; otherwise,
+ * we can simply go on. Luckily,
+ * Trilinos has an object for this
+ * which does already all the
+ * %parallel communications in such a
+ * case, so we simply use their
+ * model, which stores whether the
+ * last operation was an addition or
+ * an insertion.
+ */
+ Epetra_CombineMode last_action;
+
+ /**
+ * A boolean variable to hold
+ * information on whether the
+ * vector is compressed or not.
+ */
+ bool compressed;
+
+ /**
+ * An internal Trilinos vector that
+ * is used for accelerating vmult_add
+ * functions (in order not to need to
+ * recreate temporary vectors every
+ * time that function is called).
+ */
+ mutable VectorBase temp_vector;
+
+ /**
+ * An internal array of integer
+ * values that is used to store the
+ * column indices when
+ * adding/inserting local data into
+ * the (large) sparse matrix.
+ */
+ std::vector<unsigned int> column_indices;
+
+ /**
+ * An internal array of double values
+ * that is used to store the column
+ * indices when adding/inserting
+ * local data into the (large) sparse
+ * matrix.
+ */
+ std::vector<TrilinosScalar> column_values;
+
+ /**
+ * To allow calling protected
+ * prepare_add() and
+ * prepare_set().
+ */
+ friend class BlockMatrixBase<SparseMatrix>;
+ };
+
+
+
+// -------------------------- inline and template functions ----------------------
+
+
+#ifndef DOXYGEN
+
+ namespace MatrixIterators
+ {
+
+ inline
+ const_iterator::Accessor::
+ Accessor (const SparseMatrix *matrix,
+ const unsigned int row,
+ const unsigned int index)
+ :
+ matrix(const_cast<SparseMatrix *>(matrix)),
+ a_row(row),
+ a_index(index)
+ {
+ visit_present_row ();
+ }
+
+
+ inline
+ unsigned int
+ const_iterator::Accessor::row() const
+ {
+ Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+ return a_row;
+ }
+
+
+
+ inline
+ unsigned int
+ const_iterator::Accessor::column() const
+ {
+ Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+ return (*colnum_cache)[a_index];
+ }
+
+
+
+ inline
+ unsigned int
+ const_iterator::Accessor::index() const
+ {
+ Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+ return a_index;
+ }
+
+
+
+ inline
+ TrilinosScalar
+ const_iterator::Accessor::value() const
+ {
+ Assert (a_row < matrix->m(), ExcBeyondEndOfMatrix());
+ return (*value_cache)[a_index];
+ }
+
+
+
+ inline
+ const_iterator::
+ const_iterator(const SparseMatrix *matrix,
+ const unsigned int row,
+ const unsigned int index)
+ :
+ accessor(matrix, row, index)
+ {}
+
+
+
+ inline
+ const_iterator &
+ const_iterator::operator++ ()
+ {
+ Assert (accessor.a_row < accessor.matrix->m(), ExcIteratorPastEnd());
+
+ ++accessor.a_index;
+
+ // If at end of line: do one
+ // step, then cycle until we
+ // find a row with a nonzero
+ // number of entries.
+ if (accessor.a_index >= accessor.colnum_cache->size())
+ {
+ accessor.a_index = 0;
+ ++accessor.a_row;
+
+ while ((accessor.a_row < accessor.matrix->m())
+ &&
+ (accessor.matrix->row_length(accessor.a_row) == 0))
+ ++accessor.a_row;
+
+ accessor.visit_present_row();
+ }
+ return *this;
+ }
+
+
+
+ inline
+ const_iterator
+ const_iterator::operator++ (int)
+ {
+ const const_iterator old_state = *this;
+ ++(*this);
+ return old_state;
+ }
+
+
+
+ inline
+ const const_iterator::Accessor &
+ const_iterator::operator* () const
+ {
+ return accessor;
+ }
+
+
+
+ inline
+ const const_iterator::Accessor *
+ const_iterator::operator-> () const
+ {
+ return &accessor;
+ }
+
+
+
+ inline
+ bool
+ const_iterator::
+ operator == (const const_iterator &other) const
+ {
+ return (accessor.a_row == other.accessor.a_row &&
+ accessor.a_index == other.accessor.a_index);
+ }
+
+
+
+ inline
+ bool
+ const_iterator::
+ operator != (const const_iterator &other) const
+ {
+ return ! (*this == other);
+ }
+
+
+
+ inline
+ bool
+ const_iterator::
+ operator < (const const_iterator &other) const
+ {
+ return (accessor.row() < other.accessor.row() ||
+ (accessor.row() == other.accessor.row() &&
+ accessor.index() < other.accessor.index()));
+ }
+
+ }
+
+
+
+ inline
+ SparseMatrix::const_iterator
+ SparseMatrix::begin() const
+ {
+ return const_iterator(this, 0, 0);
+ }
+
+
+
+ inline
+ SparseMatrix::const_iterator
+ SparseMatrix::end() const
+ {
+ return const_iterator(this, m(), 0);
+ }
+
+
+
+ inline
+ SparseMatrix::const_iterator
+ SparseMatrix::begin(const unsigned int r) const
+ {
+ Assert (r < m(), ExcIndexRange(r, 0, m()));
+ if (row_length(r) > 0)
+ return const_iterator(this, r, 0);
+ else
+ return end (r);
+ }
+
+
+
+ inline
+ SparseMatrix::const_iterator
+ SparseMatrix::end(const unsigned int r) const
+ {
+ Assert (r < m(), ExcIndexRange(r, 0, m()));
+
+ // place the iterator on the first entry
+ // past this line, or at the end of the
+ // matrix
+ for (unsigned int i=r+1; i<m(); ++i)
+ if (row_length(i) > 0)
+ return const_iterator(this, i, 0);
+
+ // if there is no such line, then take the
+ // end iterator of the matrix
+ return end();
+ }
+
+
+
+ inline
+ bool
+ SparseMatrix::in_local_range (const unsigned int index) const
+ {
+ int begin, end;
+ begin = matrix->RowMap().MinMyGID();
+ end = matrix->RowMap().MaxMyGID()+1;
+
+ return ((index >= static_cast<unsigned int>(begin)) &&
+ (index < static_cast<unsigned int>(end)));
+ }
+
+
+
+ inline
+ bool
+ SparseMatrix::is_compressed () const
+ {
+ return compressed;
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::compress (::dealii::VectorOperation::values /*operation*/)
+ {
+ // flush buffers
+ int ierr;
+ ierr = matrix->GlobalAssemble (*column_space_map, matrix->RowMap(),
+ true);
+
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ ierr = matrix->OptimizeStorage ();
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ last_action = Zero;
+
+ compressed = true;
+ }
+
+
+
+ inline
+ SparseMatrix &
+ SparseMatrix::operator = (const double d)
+ {
+ Assert (d==0, ExcScalarAssignmentOnlyForZeroValue());
+ compress ();
+
+ const int ierr = matrix->PutScalar(d);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ return *this;
+ }
+
+
+
+ // Inline the set() and add()
+ // functions, since they will be
+ // called frequently, and the
+ // compiler can optimize away
+ // some unnecessary loops when
+ // the sizes are given at
+ // compile time.
+ inline
+ void
+ SparseMatrix::set (const unsigned int i,
+ const unsigned int j,
+ const TrilinosScalar value)
+ {
+
+ Assert (numbers::is_finite(value), ExcNumberNotFinite());
+
+ set (i, 1, &j, &value, false);
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::set (const std::vector<unsigned int> &indices,
+ const FullMatrix<TrilinosScalar> &values,
+ const bool elide_zero_values)
+ {
+ Assert (indices.size() == values.m(),
+ ExcDimensionMismatch(indices.size(), values.m()));
+ Assert (values.m() == values.n(), ExcNotQuadratic());
+
+ for (unsigned int i=0; i<indices.size(); ++i)
+ set (indices[i], indices.size(), &indices[0], &values(i,0),
+ elide_zero_values);
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::set (const std::vector<unsigned int> &row_indices,
+ const std::vector<unsigned int> &col_indices,
+ const FullMatrix<TrilinosScalar> &values,
+ const bool elide_zero_values)
+ {
+ Assert (row_indices.size() == values.m(),
+ ExcDimensionMismatch(row_indices.size(), values.m()));
+ Assert (col_indices.size() == values.n(),
+ ExcDimensionMismatch(col_indices.size(), values.n()));
+
+ for (unsigned int i=0; i<row_indices.size(); ++i)
+ set (row_indices[i], col_indices.size(), &col_indices[0], &values(i,0),
+ elide_zero_values);
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::set (const unsigned int row,
+ const std::vector<unsigned int> &col_indices,
+ const std::vector<TrilinosScalar> &values,
+ const bool elide_zero_values)
+ {
+ Assert (col_indices.size() == values.size(),
+ ExcDimensionMismatch(col_indices.size(), values.size()));
+
+ set (row, col_indices.size(), &col_indices[0], &values[0],
+ elide_zero_values);
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::set (const unsigned int row,
+ const unsigned int n_cols,
+ const unsigned int *col_indices,
+ const TrilinosScalar *values,
+ const bool elide_zero_values)
+ {
+ int ierr;
+ if (last_action == Add)
+ {
+ ierr = matrix->GlobalAssemble (*column_space_map, matrix->RowMap(),
+ true);
+
+ Assert (ierr == 0, ExcTrilinosError(ierr));
+ }
+
+ last_action = Insert;
+
+ int *col_index_ptr;
+ TrilinosScalar const *col_value_ptr;
+ int n_columns;
+
+ // If we don't elide zeros, the pointers
+ // are already available...
+ if (elide_zero_values == false)
+ {
+ col_index_ptr = (int *)col_indices;
+ col_value_ptr = values;
+ n_columns = n_cols;
+ }
+ else
+ {
+ // Otherwise, extract nonzero values in
+ // each row and get the respective
+ // indices.
+ if (column_indices.size() < n_cols)
+ {
+ column_indices.resize(n_cols);
+ column_values.resize(n_cols);
+ }
+
+ n_columns = 0;
+ for (unsigned int j=0; j<n_cols; ++j)
+ {
+ const double value = values[j];
+ Assert (numbers::is_finite(value), ExcNumberNotFinite());
+ if (value != 0)
+ {
+ column_indices[n_columns] = col_indices[j];
+ column_values[n_columns] = value;
+ n_columns++;
+ }
+ }
+
+ Assert(n_columns <= (int)n_cols, ExcInternalError());
+
+ col_index_ptr = (int *)&column_indices[0];
+ col_value_ptr = &column_values[0];
+ }
+
+
+ // If the calling matrix owns the row to
+ // which we want to insert values, we
+ // can directly call the Epetra_CrsMatrix
+ // input function, which is much faster
+ // than the Epetra_FECrsMatrix
+ // function. We distinguish between two
+ // cases: the first one is when the matrix
+ // is not filled (i.e., it is possible to
+ // add new elements to the sparsity pattern),
+ // and the second one is when the pattern is
+ // already fixed. In the former case, we
+ // add the possibility to insert new values,
+ // and in the second we just replace
+ // data.
+ if (row_partitioner().MyGID(static_cast<int>(row)) == true)
+ {
+ if (matrix->Filled() == false)
+ {
+ ierr = matrix->Epetra_CrsMatrix::InsertGlobalValues(row, n_columns,
+ const_cast<double *>(col_value_ptr),
+ col_index_ptr);
+
+ // When inserting elements, we do
+ // not want to create exceptions in
+ // the case when inserting non-local
+ // data (since that's what we want
+ // to do right now).
+ if (ierr > 0)
+ ierr = 0;
+ }
+ else
+ ierr = matrix->Epetra_CrsMatrix::ReplaceGlobalValues(row, n_columns,
+ const_cast<double *>(col_value_ptr),
+ col_index_ptr);
+ }
+ else
+ {
+ // When we're at off-processor data, we
+ // have to stick with the standard
+ // Insert/ReplaceGlobalValues
+ // function. Nevertheless, the way we
+ // call it is the fastest one (any other
+ // will lead to repeated allocation and
+ // deallocation of memory in order to
+ // call the function we already use,
+ // which is very unefficient if writing
+ // one element at a time).
+ compressed = false;
+
+ if (matrix->Filled() == false)
+ {
+ ierr = matrix->InsertGlobalValues (1, (int *)&row,
+ n_columns, col_index_ptr,
+ &col_value_ptr,
+ Epetra_FECrsMatrix::ROW_MAJOR);
+ if (ierr > 0)
+ ierr = 0;
+ }
+ else
+ ierr = matrix->ReplaceGlobalValues (1, (int *)&row,
+ n_columns, col_index_ptr,
+ &col_value_ptr,
+ Epetra_FECrsMatrix::ROW_MAJOR);
+ }
+
+ Assert (ierr <= 0, ExcAccessToNonPresentElement(row, col_index_ptr[0]));
+ AssertThrow (ierr >= 0, ExcTrilinosError(ierr));
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::add (const unsigned int i,
+ const unsigned int j,
+ const TrilinosScalar value)
+ {
+ Assert (numbers::is_finite(value), ExcNumberNotFinite());
+
+ if (value == 0)
+ {
+ // we have to do checkings on Insert/Add
+ // in any case
+ // to be consistent with the MPI
+ // communication model (see the comments
+ // in the documentation of
+ // TrilinosWrappers::Vector), but we can
+ // save some work if the addend is
+ // zero. However, these actions are done
+ // in case we pass on to the other
+ // function.
+ if (last_action == Insert)
+ {
+ int ierr;
+ ierr = matrix->GlobalAssemble(*column_space_map,
+ row_partitioner(), false);
+
+ Assert (ierr == 0, ExcTrilinosError(ierr));
+ (void)ierr; // removes -Wunused-but-set-variable in optimized mode
+ }
+
+ last_action = Add;
+
+ return;
+ }
+ else
+ add (i, 1, &j, &value, false);
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::add (const std::vector<unsigned int> &indices,
+ const FullMatrix<TrilinosScalar> &values,
+ const bool elide_zero_values)
+ {
+ Assert (indices.size() == values.m(),
+ ExcDimensionMismatch(indices.size(), values.m()));
+ Assert (values.m() == values.n(), ExcNotQuadratic());
+
+ for (unsigned int i=0; i<indices.size(); ++i)
+ add (indices[i], indices.size(), &indices[0], &values(i,0),
+ elide_zero_values);
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::add (const std::vector<unsigned int> &row_indices,
+ const std::vector<unsigned int> &col_indices,
+ const FullMatrix<TrilinosScalar> &values,
+ const bool elide_zero_values)
+ {
+ Assert (row_indices.size() == values.m(),
+ ExcDimensionMismatch(row_indices.size(), values.m()));
+ Assert (col_indices.size() == values.n(),
+ ExcDimensionMismatch(col_indices.size(), values.n()));
+
+ for (unsigned int i=0; i<row_indices.size(); ++i)
+ add (row_indices[i], col_indices.size(), &col_indices[0],
+ &values(i,0), elide_zero_values);
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::add (const unsigned int row,
+ const std::vector<unsigned int> &col_indices,
+ const std::vector<TrilinosScalar> &values,
+ const bool elide_zero_values)
+ {
+ Assert (col_indices.size() == values.size(),
+ ExcDimensionMismatch(col_indices.size(), values.size()));
+
+ add (row, col_indices.size(), &col_indices[0], &values[0],
+ elide_zero_values);
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::add (const unsigned int row,
+ const unsigned int n_cols,
+ const unsigned int *col_indices,
+ const TrilinosScalar *values,
+ const bool elide_zero_values,
+ const bool /*col_indices_are_sorted*/)
+ {
+ int ierr;
+ if (last_action == Insert)
+ {
+ // TODO: this could lead to a dead lock when only one processor
+ // calls GlobalAssemble.
+ ierr = matrix->GlobalAssemble(*column_space_map,
+ row_partitioner(), false);
+
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+ }
+
+ last_action = Add;
+
+ int *col_index_ptr;
+ TrilinosScalar const *col_value_ptr;
+ int n_columns;
+
+ // If we don't elide zeros, the pointers
+ // are already available...
+ if (elide_zero_values == false)
+ {
+ col_index_ptr = (int *)col_indices;
+ col_value_ptr = values;
+ n_columns = n_cols;
+#ifdef DEBUG
+ for (unsigned int j=0; j<n_cols; ++j)
+ Assert (numbers::is_finite(values[j]), ExcNumberNotFinite());
+#endif
+ }
+ else
+ {
+ // Otherwise, extract nonzero values in
+ // each row and the corresponding index.
+ if (column_indices.size() < n_cols)
+ {
+ column_indices.resize(n_cols);
+ column_values.resize(n_cols);
+ }
+
+ n_columns = 0;
+ for (unsigned int j=0; j<n_cols; ++j)
+ {
+ const double value = values[j];
+ Assert (numbers::is_finite(value), ExcNumberNotFinite());
+ if (value != 0)
+ {
+ column_indices[n_columns] = col_indices[j];
+ column_values[n_columns] = value;
+ n_columns++;
+ }
+ }
+
+ Assert(n_columns <= (int)n_cols, ExcInternalError());
+
+ col_index_ptr = (int *)&column_indices[0];
+ col_value_ptr = &column_values[0];
+ }
+
+ // If the calling matrix owns the row to
+ // which we want to add values, we
+ // can directly call the Epetra_CrsMatrix
+ // input function, which is much faster
+ // than the Epetra_FECrsMatrix function.
+ if (row_partitioner().MyGID(static_cast<int>(row)) == true)
+ {
+ ierr = matrix->Epetra_CrsMatrix::SumIntoGlobalValues(row, n_columns,
+ const_cast<double *>(col_value_ptr),
+ col_index_ptr);
+ }
+ else
+ {
+ // When we're at off-processor data, we
+ // have to stick with the standard
+ // SumIntoGlobalValues
+ // function. Nevertheless, the way we
+ // call it is the fastest one (any other
+ // will lead to repeated allocation and
+ // deallocation of memory in order to
+ // call the function we already use,
+ // which is very unefficient if writing
+ // one element at a time).
+ compressed = false;
+
+ ierr = matrix->SumIntoGlobalValues (1, (int *)&row, n_columns,
+ col_index_ptr,
+ &col_value_ptr,
+ Epetra_FECrsMatrix::ROW_MAJOR);
+ }
+
+#ifdef DEBUG
+ if (ierr > 0)
+ {
+ std::cout << "------------------------------------------"
+ << std::endl;
+ std::cout << "Got error " << ierr << " in row " << row
+ << " of proc " << row_partitioner().Comm().MyPID()
+ << " when trying to add the columns:" << std::endl;
+ for (int i=0; i<n_columns; ++i)
+ std::cout << col_index_ptr[i] << " ";
+ std::cout << std::endl << std::endl;
+ std::cout << "Matrix row has the following indices:" << std::endl;
+ int n_indices, *indices;
+ trilinos_sparsity_pattern().ExtractMyRowView(row_partitioner().LID(static_cast<int>(row)),
+ n_indices,
+ indices);
+ for (int i=0; i<n_indices; ++i)
+ std::cout << indices[i] << " ";
+ std::cout << endl << std::endl;
+ Assert (ierr <= 0,
+ ExcAccessToNonPresentElement(row, col_index_ptr[0]));
+ }
+#endif
+ Assert (ierr >= 0, ExcTrilinosError(ierr));
+ }
+
+
+
+ // inline "simple" functions that are
+ // called frequently and do only involve
+ // a call to some Trilinos function.
+ inline
+ unsigned int
+ SparseMatrix::m () const
+ {
+ return matrix -> NumGlobalRows();
+ }
+
+
+
+ inline
+ unsigned int
+ SparseMatrix::n () const
+ {
+ return matrix -> NumGlobalCols();
+ }
+
+
+
+ inline
+ unsigned int
+ SparseMatrix::local_size () const
+ {
+ return matrix -> NumMyRows();
+ }
+
+
+
+ inline
+ std::pair<unsigned int, unsigned int>
+ SparseMatrix::local_range () const
+ {
+ unsigned int begin, end;
+ begin = matrix -> RowMap().MinMyGID();
+ end = matrix -> RowMap().MaxMyGID()+1;
+
+ return std::make_pair (begin, end);
+ }
+
+
+
+ inline
+ unsigned int
+ SparseMatrix::n_nonzero_elements () const
+ {
+ return matrix->NumGlobalNonzeros();
+ }
+
+
+
+ template <typename SparsityType>
+ inline
+ void SparseMatrix::reinit (const IndexSet ¶llel_partitioning,
+ const SparsityType &sparsity_pattern,
+ const MPI_Comm &communicator,
+ const bool exchange_data)
+ {
+ Epetra_Map map = parallel_partitioning.make_trilinos_map (communicator, false);
+ reinit (map, map, sparsity_pattern, exchange_data);
+ }
+
+
+
+ template <typename SparsityType>
+ inline
+ void SparseMatrix::reinit (const IndexSet &row_parallel_partitioning,
+ const IndexSet &col_parallel_partitioning,
+ const SparsityType &sparsity_pattern,
+ const MPI_Comm &communicator,
+ const bool exchange_data)
+ {
+ Epetra_Map row_map =
+ row_parallel_partitioning.make_trilinos_map (communicator, false);
+ Epetra_Map col_map =
+ col_parallel_partitioning.make_trilinos_map (communicator, false);
+ reinit (row_map, col_map, sparsity_pattern, exchange_data);
+ }
+
+
+
+ template <typename number>
+ inline
+ void SparseMatrix::reinit (const IndexSet ¶llel_partitioning,
+ const ::dealii::SparseMatrix<number> &sparse_matrix,
+ const MPI_Comm &communicator,
+ const double drop_tolerance,
+ const bool copy_values,
+ const ::dealii::SparsityPattern *use_this_sparsity)
+ {
+ Epetra_Map map = parallel_partitioning.make_trilinos_map (communicator, false);
+ reinit (map, map, sparse_matrix, drop_tolerance, copy_values,
+ use_this_sparsity);
+ }
+
+
+
+ template <typename number>
+ inline
+ void SparseMatrix::reinit (const IndexSet &row_parallel_partitioning,
+ const IndexSet &col_parallel_partitioning,
+ const ::dealii::SparseMatrix<number> &sparse_matrix,
+ const MPI_Comm &communicator,
+ const double drop_tolerance,
+ const bool copy_values,
+ const ::dealii::SparsityPattern *use_this_sparsity)
+ {
+ Epetra_Map row_map =
+ row_parallel_partitioning.make_trilinos_map (communicator, false);
+ Epetra_Map col_map =
+ col_parallel_partitioning.make_trilinos_map (communicator, false);
+ reinit (row_map, col_map, sparse_matrix, drop_tolerance, copy_values,
+ use_this_sparsity);
+ }
+
+
+
+ inline
+ TrilinosScalar
+ SparseMatrix::l1_norm () const
+ {
+ Assert (matrix->Filled(), ExcMatrixNotCompressed());
+ return matrix->NormOne();
+ }
+
+
+
+ inline
+ TrilinosScalar
+ SparseMatrix::linfty_norm () const
+ {
+ Assert (matrix->Filled(), ExcMatrixNotCompressed());
+ return matrix->NormInf();
+ }
+
+
+
+ inline
+ TrilinosScalar
+ SparseMatrix::frobenius_norm () const
+ {
+ Assert (matrix->Filled(), ExcMatrixNotCompressed());
+ return matrix->NormFrobenius();
+ }
+
+
+
+ inline
+ SparseMatrix &
+ SparseMatrix::operator *= (const TrilinosScalar a)
+ {
+ const int ierr = matrix->Scale (a);
+ Assert (ierr == 0, ExcTrilinosError(ierr));
+ (void)ierr; // removes -Wunused-variable in optimized mode
+
+ return *this;
+ }
+
+
+
+ inline
+ SparseMatrix &
+ SparseMatrix::operator /= (const TrilinosScalar a)
+ {
+ Assert (a !=0, ExcDivideByZero());
+
+ const TrilinosScalar factor = 1./a;
+
+ const int ierr = matrix->Scale (factor);
+ Assert (ierr == 0, ExcTrilinosError(ierr));
+ (void)ierr; // removes -Wunused-variable in optimized mode
+
+ return *this;
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::vmult (VectorBase &dst,
+ const VectorBase &src) const
+ {
+ Assert (&src != &dst, ExcSourceEqualsDestination());
+ Assert (matrix->Filled(), ExcMatrixNotCompressed());
+
+ Assert (src.vector_partitioner().SameAs(matrix->DomainMap()) == true,
+ ExcMessage ("Column map of matrix does not fit with vector map!"));
+ Assert (dst.vector_partitioner().SameAs(matrix->RangeMap()) == true,
+ ExcMessage ("Row map of matrix does not fit with vector map!"));
+
+ const int ierr = matrix->Multiply (false, src.trilinos_vector(),
+ dst.trilinos_vector());
+ Assert (ierr == 0, ExcTrilinosError(ierr));
+ (void)ierr; // removes -Wunused-variable in optimized mode
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::vmult (parallel::distributed::Vector<TrilinosScalar> &dst,
+ const parallel::distributed::Vector<TrilinosScalar> &src) const
+ {
+ Assert (&src != &dst, ExcSourceEqualsDestination());
+ Assert (matrix->Filled(), ExcMatrixNotCompressed());
+
+ AssertDimension (dst.local_size(), static_cast<unsigned int>(matrix->RangeMap().NumMyElements()));
+ AssertDimension (src.local_size(), static_cast<unsigned int>(matrix->DomainMap().NumMyElements()));
+
+ Epetra_Vector tril_dst (View, matrix->RangeMap(), dst.begin());
+ Epetra_Vector tril_src (View, matrix->DomainMap(),
+ const_cast<double *>(src.begin()));
+
+ const int ierr = matrix->Multiply (false, tril_src, tril_dst);
+ Assert (ierr == 0, ExcTrilinosError(ierr));
+ (void)ierr; // removes -Wunused-variable in optimized mode
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::Tvmult (VectorBase &dst,
+ const VectorBase &src) const
+ {
+ Assert (&src != &dst, ExcSourceEqualsDestination());
+ Assert (matrix->Filled(), ExcMatrixNotCompressed());
+
+ Assert (src.vector_partitioner().SameAs(matrix->RangeMap()) == true,
+ ExcMessage ("Column map of matrix does not fit with vector map!"));
+ Assert (dst.vector_partitioner().SameAs(matrix->DomainMap()) == true,
+ ExcMessage ("Row map of matrix does not fit with vector map!"));
+
+ const int ierr = matrix->Multiply (true, src.trilinos_vector(),
+ dst.trilinos_vector());
+ Assert (ierr == 0, ExcTrilinosError(ierr));
+ (void)ierr; // removes -Wunused-variable in optimized mode
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::Tvmult (parallel::distributed::Vector<TrilinosScalar> &dst,
+ const parallel::distributed::Vector<TrilinosScalar> &src) const
+ {
+ Assert (&src != &dst, ExcSourceEqualsDestination());
+ Assert (matrix->Filled(), ExcMatrixNotCompressed());
+
+ AssertDimension (dst.local_size(), static_cast<unsigned int>(matrix->DomainMap().NumMyElements()));
+ AssertDimension (src.local_size(), static_cast<unsigned int>(matrix->RangeMap().NumMyElements()));
+
+ Epetra_Vector tril_dst (View, matrix->DomainMap(), dst.begin());
+ Epetra_Vector tril_src (View, matrix->RangeMap(),
+ const_cast<double *>(src.begin()));
+
+ const int ierr = matrix->Multiply (true, tril_src, tril_dst);
+ Assert (ierr == 0, ExcTrilinosError(ierr));
+ (void)ierr; // removes -Wunused-variable in optimized mode
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::vmult_add (VectorBase &dst,
+ const VectorBase &src) const
+ {
+ Assert (&src != &dst, ExcSourceEqualsDestination());
+
+ // Choose to reinit the vector with fast
+ // argument set, which does not overwrite
+ // the content -- this is what we need
+ // since we're going to overwrite that
+ // anyway in the vmult operation.
+ temp_vector.reinit(dst, true);
+
+ vmult (temp_vector, src);
+ dst += temp_vector;
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::Tvmult_add (VectorBase &dst,
+ const VectorBase &src) const
+ {
+ Assert (&src != &dst, ExcSourceEqualsDestination());
+
+ temp_vector.reinit(dst, true);
+
+ Tvmult (temp_vector, src);
+ dst += temp_vector;
+ }
+
+
+
+ inline
+ TrilinosScalar
+ SparseMatrix::matrix_norm_square (const VectorBase &v) const
+ {
+ Assert (row_partitioner().SameAs(domain_partitioner()),
+ ExcNotQuadratic());
+
+ temp_vector.reinit(v);
+
+ vmult (temp_vector, v);
+ return temp_vector*v;
+ }
+
+
+
+ inline
+ TrilinosScalar
+ SparseMatrix::matrix_scalar_product (const VectorBase &u,
+ const VectorBase &v) const
+ {
+ Assert (row_partitioner().SameAs(domain_partitioner()),
+ ExcNotQuadratic());
+
+ temp_vector.reinit(v);
+
+ vmult (temp_vector, v);
+ return u*temp_vector;
+ }
+
+
+
+ inline
+ TrilinosScalar
+ SparseMatrix::residual (VectorBase &dst,
+ const VectorBase &x,
+ const VectorBase &b) const
+ {
+ vmult (dst, x);
+ dst -= b;
+ dst *= -1.;
+
+ return dst.l2_norm();
+ }
+
+
+ inline
+ const Epetra_CrsMatrix &
+ SparseMatrix::trilinos_matrix () const
+ {
+ return static_cast<const Epetra_CrsMatrix &>(*matrix);
+ }
+
+
+
+ inline
+ const Epetra_CrsGraph &
+ SparseMatrix::trilinos_sparsity_pattern () const
+ {
+ return matrix->Graph();
+ }
+
+
+
+ inline
+ const Epetra_Map &
+ SparseMatrix::domain_partitioner () const
+ {
+ return matrix->DomainMap();
+ }
+
+
+
+ inline
+ const Epetra_Map &
+ SparseMatrix::range_partitioner () const
+ {
+ return matrix->RangeMap();
+ }
+
+
+
+ inline
+ const Epetra_Map &
+ SparseMatrix::row_partitioner () const
+ {
+ return matrix->RowMap();
+ }
+
+
+
+ inline
+ const Epetra_Map &
+ SparseMatrix::col_partitioner () const
+ {
+ return matrix->ColMap();
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::prepare_add()
+ {
+ //nothing to do here
+ }
+
+
+
+ inline
+ void
+ SparseMatrix::prepare_set()
+ {
+ //nothing to do here
+ }
+
+
+
+#endif // DOXYGEN
+
+}
+
+
+DEAL_II_NAMESPACE_CLOSE
+
+
+#endif // DEAL_II_USE_TRILINOS
+
+
+/*----------------------- trilinos_sparse_matrix.h --------------------*/
+
+#endif
+/*----------------------- trilinos_sparse_matrix.h --------------------*/
Added: branches/s-wang2/for_deal.II/include/deal.II/lac/trilinos_vector_base.h
===================================================================
--- branches/s-wang2/for_deal.II/include/deal.II/lac/trilinos_vector_base.h (rev 0)
+++ branches/s-wang2/for_deal.II/include/deal.II/lac/trilinos_vector_base.h 2012-11-29 05:55:56 UTC (rev 1394)
@@ -0,0 +1,1997 @@
+//---------------------------------------------------------------------------
+// $Id: trilinos_vector_base.h 27628 2012-11-20 22:49:26Z heister $
+//
+// Copyright (C) 2008, 2009, 2010, 2011, 2012 by the deal.II authors
+//
+// This file is subject to QPL and may not be distributed
+// without copyright and license information. Please refer
+// to the file deal.II/doc/license.html for the text and
+// further information on this license.
+//
+//---------------------------------------------------------------------------
+#ifndef __deal2__trilinos_vector_base_h
+#define __deal2__trilinos_vector_base_h
+
+
+#include <deal.II/base/config.h>
+
+#ifdef DEAL_II_USE_TRILINOS
+
+#include <deal.II/base/utilities.h>
+# include <deal.II/base/std_cxx1x/shared_ptr.h>
+# include <deal.II/base/subscriptor.h>
+# include <deal.II/lac/exceptions.h>
+# include <deal.II/lac/vector.h>
+
+# include <vector>
+# include <utility>
+# include <memory>
+
+# define TrilinosScalar double
+# include "Epetra_ConfigDefs.h"
+# ifdef DEAL_II_COMPILER_SUPPORTS_MPI // only if MPI is installed
+# include "mpi.h"
+# include "Epetra_MpiComm.h"
+# else
+# include "Epetra_SerialComm.h"
+# endif
+# include "Epetra_FEVector.h"
+
+DEAL_II_NAMESPACE_OPEN
+
+// forward declaration
+template <typename number> class Vector;
+
+
+/**
+ * @addtogroup TrilinosWrappers
+ *@{
+ */
+namespace TrilinosWrappers
+{
+ // forward declaration
+ class VectorBase;
+
+
+ /**
+ * @cond internal
+ */
+
+ /**
+ * A namespace for internal implementation details of the
+ * TrilinosWrapper members.
+ *
+ * @ingroup TrilinosWrappers
+ */
+ namespace internal
+ {
+ /**
+ * This class implements a
+ * wrapper for accessing the
+ * Trilinos vector in the same
+ * way as we access deal.II
+ * objects: it is initialized
+ * with a vector and an element
+ * within it, and has a
+ * conversion operator to
+ * extract the scalar value of
+ * this element. It also has a
+ * variety of assignment
+ * operator for writing to this
+ * one element. @ingroup
+ * TrilinosWrappers
+ */
+ class VectorReference
+ {
+ private:
+ /**
+ * Constructor. It is made
+ * private so as to only allow
+ * the actual vector class to
+ * create it.
+ */
+ VectorReference (VectorBase &vector,
+ const unsigned int index);
+
+ public:
+ /**
+ * This looks like a copy
+ * operator, but does something
+ * different than usual. In
+ * particular, it does not copy
+ * the member variables of this
+ * reference. Rather, it
+ * handles the situation where
+ * we have two vectors @p v and
+ * @p w, and assign elements
+ * like in
+ * <tt>v(i)=w(i)</tt>. Here,
+ * both left and right hand
+ * side of the assignment have
+ * data type VectorReference,
+ * but what we really mean is
+ * to assign the vector
+ * elements represented by the
+ * two references. This
+ * operator implements this
+ * operation. Note also that
+ * this allows us to make the
+ * assignment operator const.
+ */
+ const VectorReference &
+ operator = (const VectorReference &r) const;
+
+ /**
+ * Same as above but for non-const
+ * reference objects.
+ */
+ const VectorReference &
+ operator = (const VectorReference &r);
+
+ /**
+ * Set the referenced element of the
+ * vector to <tt>s</tt>.
+ */
+ const VectorReference &
+ operator = (const TrilinosScalar &s) const;
+
+ /**
+ * Add <tt>s</tt> to the
+ * referenced element of the
+ * vector->
+ */
+ const VectorReference &
+ operator += (const TrilinosScalar &s) const;
+
+ /**
+ * Subtract <tt>s</tt> from the
+ * referenced element of the
+ * vector->
+ */
+ const VectorReference &
+ operator -= (const TrilinosScalar &s) const;
+
+ /**
+ * Multiply the referenced
+ * element of the vector by
+ * <tt>s</tt>.
+ */
+ const VectorReference &
+ operator *= (const TrilinosScalar &s) const;
+
+ /**
+ * Divide the referenced
+ * element of the vector by
+ * <tt>s</tt>.
+ */
+ const VectorReference &
+ operator /= (const TrilinosScalar &s) const;
+
+ /**
+ * Convert the reference to an
+ * actual value, i.e. return
+ * the value of the referenced
+ * element of the vector.
+ */
+ operator TrilinosScalar () const;
+
+ /**
+ * Exception
+ */
+ DeclException1 (ExcTrilinosError,
+ int,
+ << "An error with error number " << arg1
+ << " occurred while calling a Trilinos function");
+
+ /**
+ * Exception
+ */
+ DeclException3 (ExcAccessToNonLocalElement,
+ int, int, int,
+ << "You tried to access element " << arg1
+ << " of a distributed vector, but only elements "
+ << arg2 << " through " << arg3
+ << " are stored locally and can be accessed.");
+
+ private:
+ /**
+ * Point to the vector we are
+ * referencing.
+ */
+ VectorBase &vector;
+
+ /**
+ * Index of the referenced element
+ * of the vector.
+ */
+ const unsigned int index;
+
+ /**
+ * Make the vector class a
+ * friend, so that it can
+ * create objects of the
+ * present type.
+ */
+ friend class ::dealii::TrilinosWrappers::VectorBase;
+ };
+ }
+ /**
+ * @endcond
+ */
+
+
+ /**
+ * Base class for the two types of Trilinos vectors, the distributed
+ * memory vector MPI::Vector and a localized vector Vector. The latter
+ * is designed for use in either serial implementations or as a
+ * localized copy on each processor. The implementation of this class
+ * is based on the Trilinos vector class Epetra_FEVector, the (parallel)
+ * partitioning of which is governed by an Epetra_Map. This means that
+ * the vector type is generic and can be done in this base class, while
+ * the definition of the partition map (and hence, the constructor and
+ * reinit function) will have to be done in the derived classes. The
+ * Epetra_FEVector is precisely the kind of vector we deal with all the
+ * time - we probably get it from some assembly process, where also
+ * entries not locally owned might need to written and hence need to be
+ * forwarded to the owner. The only requirement for this class to work
+ * is that Trilinos is installed with the same compiler as is used for
+ * compilation of deal.II.
+ *
+ * The interface of this class is modeled after the existing Vector
+ * class in deal.II. It has almost the same member functions, and is
+ * often exchangable. However, since Trilinos only supports a single
+ * scalar type (double), it is not templated, and only works with that
+ * type.
+ *
+ * Note that Trilinos only guarantees that operations do what you expect
+ * if the function @p GlobalAssemble has been called after vector
+ * assembly in order to distribute the data. Therefore, you need to call
+ * Vector::compress() before you actually use the vectors.
+ *
+ * @ingroup TrilinosWrappers
+ * @ingroup Vectors
+ * @author Martin Kronbichler, 2008
+ */
+ class VectorBase : public Subscriptor
+ {
+ public:
+ /**
+ * Declare some of the standard
+ * types used in all
+ * containers. These types
+ * parallel those in the
+ * <tt>C</tt> standard libraries
+ * <tt>vector<...></tt> class.
+ */
+ typedef TrilinosScalar value_type;
+ typedef TrilinosScalar real_type;
+ typedef std::size_t size_type;
+ typedef internal::VectorReference reference;
+ typedef const internal::VectorReference const_reference;
+
+ /**
+ * @name 1: Basic Object-handling
+ */
+ //@{
+
+ /**
+ * Default constructor that
+ * generates an empty (zero size)
+ * vector. The function
+ * <tt>reinit()</tt> will have to
+ * give the vector the correct
+ * size and distribution among
+ * processes in case of an MPI
+ * run.
+ */
+ VectorBase ();
+
+ /**
+ * Copy constructor. Sets the
+ * dimension to that of the given
+ * vector, and copies all the
+ * elements.
+ */
+ VectorBase (const VectorBase &v);
+
+ /**
+ * Destructor
+ */
+ virtual ~VectorBase ();
+
+ /**
+ * Release all memory and return
+ * to a state just like after
+ * having called the default
+ * constructor.
+ */
+ void clear ();
+
+ /**
+ * Reinit functionality, sets the
+ * dimension and possibly the
+ * parallel partitioning (Epetra_Map)
+ * of the calling vector to the
+ * settings of the input vector.
+ */
+ void reinit (const VectorBase &v,
+ const bool fast = false);
+
+ /**
+ * Compress the underlying
+ * representation of the Trilinos
+ * object, i.e. flush the buffers
+ * of the vector object if it has
+ * any. This function is
+ * necessary after writing into a
+ * vector element-by-element and
+ * before anything else can be
+ * done on it.
+ *
+ * The (defaulted) argument can
+ * be used to specify the
+ * compress mode
+ * (<code>Add</code> or
+ * <code>Insert</code>) in case
+ * the vector has not been
+ * written to since the last
+ * time this function was
+ * called. The argument is
+ * ignored if the vector has
+ * been added or written to
+ * since the last time
+ * compress() was called.
+ *
+ * See @ref GlossCompress "Compressing distributed objects"
+ * for more information.
+ */
+ void compress (::dealii::VectorOperation::values operation
+ =::dealii::VectorOperation::unknown);
+
+ /**
+ * @deprecated
+ */
+ void compress (const Epetra_CombineMode last_action);
+
+ /**
+ * Returns the state of the
+ * vector, i.e., whether
+ * compress() has already been
+ * called after an operation
+ * requiring data exchange.
+ */
+ bool is_compressed () const;
+
+ /**
+ * Set all components of the
+ * vector to the given number @p
+ * s. Simply pass this down to
+ * the Trilinos Epetra object,
+ * but we still need to declare
+ * this function to make the
+ * example given in the
+ * discussion about making the
+ * constructor explicit work.
+ *
+ * Since the semantics of
+ * assigning a scalar to a vector
+ * are not immediately clear,
+ * this operator should really
+ * only be used if you want to
+ * set the entire vector to
+ * zero. This allows the
+ * intuitive notation
+ * <tt>v=0</tt>. Assigning other
+ * values is deprecated and may
+ * be disallowed in the future.
+ */
+ VectorBase &
+ operator = (const TrilinosScalar s);
+
+ /**
+ * Copy function. This function takes
+ * a VectorBase vector and copies all
+ * the elements. The target vector
+ * will have the same parallel
+ * distribution as the calling
+ * vector.
+ */
+ VectorBase &
+ operator = (const VectorBase &v);
+
+ /**
+ * Another copy function. This
+ * one takes a deal.II vector and
+ * copies it into a
+ * TrilinosWrapper vector. Note
+ * that since we do not provide
+ * any Epetra_map that tells
+ * about the partitioning of the
+ * vector among the MPI
+ * processes, the size of the
+ * TrilinosWrapper vector has to
+ * be the same as the size of the
+ * input vector. In order to
+ * change the map, use the
+ * reinit(const Epetra_Map
+ * &input_map) function.
+ */
+ template <typename Number>
+ VectorBase &
+ operator = (const ::dealii::Vector<Number> &v);
+
+ /**
+ * Test for equality. This
+ * function assumes that the
+ * present vector and the one to
+ * compare with have the same
+ * size already, since comparing
+ * vectors of different sizes
+ * makes not much sense anyway.
+ */
+ bool operator == (const VectorBase &v) const;
+
+ /**
+ * Test for inequality. This
+ * function assumes that the
+ * present vector and the one to
+ * compare with have the same
+ * size already, since comparing
+ * vectors of different sizes
+ * makes not much sense anyway.
+ */
+ bool operator != (const VectorBase &v) const;
+
+ /**
+ * Return the global dimension of
+ * the vector.
+ */
+ unsigned int size () const;
+
+ /**
+ * Return the local dimension of
+ * the vector, i.e. the number of
+ * elements stored on the present
+ * MPI process. For sequential
+ * vectors, this number is the
+ * same as size(), but for
+ * parallel vectors it may be
+ * smaller.
+ *
+ * To figure out which elements
+ * exactly are stored locally,
+ * use local_range().
+ *
+ * If the vector contains ghost
+ * elements, they are included in
+ * this number.
+ */
+ unsigned int local_size () const;
+
+ /**
+ * Return a pair of indices
+ * indicating which elements of
+ * this vector are stored
+ * locally. The first number is
+ * the index of the first element
+ * stored, the second the index
+ * of the one past the last one
+ * that is stored locally. If
+ * this is a sequential vector,
+ * then the result will be the
+ * pair (0,N), otherwise it will
+ * be a pair (i,i+n), where
+ * <tt>n=local_size()</tt>.
+ */
+ std::pair<unsigned int, unsigned int> local_range () const;
+
+ /**
+ * Return whether @p index is in
+ * the local range or not, see
+ * also local_range().
+ */
+ bool in_local_range (const unsigned int index) const;
+
+ /**
+ * Return if the vector contains ghost
+ * elements. This answer is true if there
+ * are ghost elements on at least one
+ * process.
+ */
+ bool has_ghost_elements() const;
+
+ /**
+ * Return the scalar (inner)
+ * product of two vectors. The
+ * vectors must have the same
+ * size.
+ */
+ TrilinosScalar operator * (const VectorBase &vec) const;
+
+ /**
+ * Return square of the
+ * $l_2$-norm.
+ */
+ real_type norm_sqr () const;
+
+ /**
+ * Mean value of the elements of
+ * this vector.
+ */
+ TrilinosScalar mean_value () const;
+
+ /**
+ * Compute the minimal value of
+ * the elements of this vector.
+ */
+ TrilinosScalar minimal_value () const;
+
+ /**
+ * $l_1$-norm of the vector. The
+ * sum of the absolute values.
+ */
+ real_type l1_norm () const;
+
+ /**
+ * $l_2$-norm of the vector. The
+ * square root of the sum of the
+ * squares of the elements.
+ */
+ real_type l2_norm () const;
+
+ /**
+ * $l_p$-norm of the vector. The
+ * <i>p</i>th root of the sum of
+ * the <i>p</i>th powers of the
+ * absolute values of the
+ * elements.
+ */
+ real_type lp_norm (const TrilinosScalar p) const;
+
+ /**
+ * Maximum absolute value of the
+ * elements.
+ */
+ real_type linfty_norm () const;
+
+ /**
+ * Return vector component with
+ * the minimal magnitude.
+ */
+ real_type min () const; // shuqiangwang
+
+ /**
+ * Return vector component with
+ * the maximal magnitude.
+ */
+ real_type max () const;
+ /**
+ * Return whether the vector
+ * contains only elements with
+ * value zero. This function is
+ * mainly for internal
+ * consistency checks and should
+ * seldom be used when not in
+ * debug mode since it uses quite
+ * some time.
+ */
+ bool all_zero () const;
+
+ /**
+ * Return @p true if the vector
+ * has no negative entries,
+ * i.e. all entries are zero or
+ * positive. This function is
+ * used, for example, to check
+ * whether refinement indicators
+ * are really all positive (or
+ * zero).
+ */
+ bool is_non_negative () const;
+ //@}
+
+
+ /**
+ * @name 2: Data-Access
+ */
+ //@{
+
+ /**
+ * Provide access to a given
+ * element, both read and write.
+ */
+ reference
+ operator () (const unsigned int index);
+
+ /**
+ * Provide read-only access to an
+ * element. This is equivalent to
+ * the <code>el()</code> command.
+ */
+ TrilinosScalar
+ operator () (const unsigned int index) const;
+
+ /**
+ * Provide access to a given
+ * element, both read and write.
+ *
+ * Exactly the same as operator().
+ */
+ reference
+ operator [] (const unsigned int index);
+
+ /**
+ * Provide read-only access to an
+ * element. This is equivalent to
+ * the <code>el()</code> command.
+ *
+ * Exactly the same as operator().
+ */
+ TrilinosScalar
+ operator [] (const unsigned int index) const;
+
+ /**
+ * Return the value of the vector
+ * entry <i>i</i>. Note that this
+ * function does only work
+ * properly when we request a
+ * data stored on the local
+ * processor. The function will
+ * throw an exception in case the
+ * elements sits on another
+ * process.
+ */
+ TrilinosScalar el (const unsigned int index) const;
+
+ /**
+ * A collective set operation:
+ * instead of setting individual
+ * elements of a vector, this
+ * function allows to set a whole
+ * set of elements at once. The
+ * indices of the elements to be
+ * set are stated in the first
+ * argument, the corresponding
+ * values in the second.
+ */
+ void set (const std::vector<unsigned int> &indices,
+ const std::vector<TrilinosScalar> &values);
+
+ /**
+ * This is a second collective
+ * set operation. As a
+ * difference, this function
+ * takes a deal.II vector of
+ * values.
+ */
+ void set (const std::vector<unsigned int> &indices,
+ const ::dealii::Vector<TrilinosScalar> &values);
+ //@}
+
+
+ /**
+ * @name 3: Modification of vectors
+ */
+ //@{
+
+ /**
+ * This collective set operation
+ * is of lower level and can
+ * handle anything else —
+ * the only thing you have to
+ * provide is an address where
+ * all the indices are stored and
+ * the number of elements to be
+ * set.
+ */
+ void set (const unsigned int n_elements,
+ const unsigned int *indices,
+ const TrilinosScalar *values);
+
+ /**
+ * A collective add operation:
+ * This funnction adds a whole
+ * set of values stored in @p
+ * values to the vector
+ * components specified by @p
+ * indices.
+ */
+ void add (const std::vector<unsigned int> &indices,
+ const std::vector<TrilinosScalar> &values);
+
+ /**
+ * This is a second collective
+ * add operation. As a
+ * difference, this function
+ * takes a deal.II vector of
+ * values.
+ */
+ void add (const std::vector<unsigned int> &indices,
+ const ::dealii::Vector<TrilinosScalar> &values);
+
+ /**
+ * Take an address where
+ * <tt>n_elements</tt> are stored
+ * contiguously and add them into
+ * the vector. Handles all cases
+ * which are not covered by the
+ * other two <tt>add()</tt>
+ * functions above.
+ */
+ void add (const unsigned int n_elements,
+ const unsigned int *indices,
+ const TrilinosScalar *values);
+
+ /**
+ * Multiply the entire vector by
+ * a fixed factor.
+ */
+ VectorBase &operator *= (const TrilinosScalar factor);
+
+ /**
+ * Divide the entire vector by a
+ * fixed factor.
+ */
+ VectorBase &operator /= (const TrilinosScalar factor);
+
+ /**
+ * Add the given vector to the
+ * present one.
+ */
+ VectorBase &operator += (const VectorBase &V);
+
+ /**
+ * Subtract the given vector from
+ * the present one.
+ */
+ VectorBase &operator -= (const VectorBase &V);
+
+ /**
+ * Addition of @p s to all
+ * components. Note that @p s is
+ * a scalar and not a vector.
+ */
+ void add (const TrilinosScalar s);
+
+ /**
+ * Simple vector addition, equal
+ * to the <tt>operator
+ * +=</tt>.
+ *
+ * Though, if the second argument
+ * <tt>allow_different_maps</tt>
+ * is set, then it is possible to
+ * add data from a different map.
+ */
+ void add (const VectorBase &V,
+ const bool allow_different_maps = false);
+
+ /**
+ * Simple addition of a multiple
+ * of a vector, i.e. <tt>*this =
+ * a*V</tt>.
+ */
+ void add (const TrilinosScalar a,
+ const VectorBase &V);
+
+ /**
+ * Multiple addition of scaled
+ * vectors, i.e. <tt>*this = a*V +
+ * b*W</tt>.
+ */
+ void add (const TrilinosScalar a,
+ const VectorBase &V,
+ const TrilinosScalar b,
+ const VectorBase &W);
+
+ /**
+ * Scaling and simple vector
+ * addition, i.e. <tt>*this =
+ * s*(*this) + V</tt>.
+ */
+ void sadd (const TrilinosScalar s,
+ const VectorBase &V);
+
+ /**
+ * Scaling and simple addition,
+ * i.e. <tt>*this = s*(*this) +
+ * a*V</tt>.
+ */
+ void sadd (const TrilinosScalar s,
+ const TrilinosScalar a,
+ const VectorBase &V);
+
+ /**
+ * Scaling and multiple addition.
+ */
+ void sadd (const TrilinosScalar s,
+ const TrilinosScalar a,
+ const VectorBase &V,
+ const TrilinosScalar b,
+ const VectorBase &W);
+
+ /**
+ * Scaling and multiple addition.
+ * <tt>*this = s*(*this) + a*V +
+ * b*W + c*X</tt>.
+ */
+ void sadd (const TrilinosScalar s,
+ const TrilinosScalar a,
+ const VectorBase &V,
+ const TrilinosScalar b,
+ const VectorBase &W,
+ const TrilinosScalar c,
+ const VectorBase &X);
+
+ /**
+ * Scale each element of this
+ * vector by the corresponding
+ * element in the argument. This
+ * function is mostly meant to
+ * simulate multiplication (and
+ * immediate re-assignment) by a
+ * diagonal scaling matrix.
+ */
+ void scale (const VectorBase &scaling_factors);
+
+ /**
+ * Assignment <tt>*this =
+ * a*V</tt>.
+ */
+ void equ (const TrilinosScalar a,
+ const VectorBase &V);
+
+ /**
+ * Assignment <tt>*this = a*V +
+ * b*W</tt>.
+ */
+ void equ (const TrilinosScalar a,
+ const VectorBase &V,
+ const TrilinosScalar b,
+ const VectorBase &W);
+
+ /**
+ * Compute the elementwise ratio
+ * of the two given vectors, that
+ * is let <tt>this[i] =
+ * a[i]/b[i]</tt>. This is useful
+ * for example if you want to
+ * compute the cellwise ratio of
+ * true to estimated error.
+ *
+ * This vector is appropriately
+ * scaled to hold the result.
+ *
+ * If any of the <tt>b[i]</tt> is
+ * zero, the result is
+ * undefined. No attempt is made
+ * to catch such situations.
+ */
+ void ratio (const VectorBase &a,
+ const VectorBase &b);
+ //@}
+
+
+ /**
+ * @name 4: Mixed stuff
+ */
+ //@{
+
+ /**
+ * Return a const reference to the
+ * underlying Trilinos
+ * Epetra_MultiVector class.
+ */
+ const Epetra_MultiVector &trilinos_vector () const;
+
+ /**
+ * Return a (modifyable) reference to
+ * the underlying Trilinos
+ * Epetra_FEVector class.
+ */
+ Epetra_FEVector &trilinos_vector ();
+
+ /**
+ * Return a const reference to the
+ * underlying Trilinos Epetra_Map
+ * that sets the parallel
+ * partitioning of the vector.
+ */
+ const Epetra_Map &vector_partitioner () const;
+
+ /**
+ * Output of vector in
+ * user-defined format in analogy
+ * to the dealii::Vector<number>
+ * class.
+ */
+ void print (const char *format = 0) const;
+
+ /**
+ * Print to a stream. @p
+ * precision denotes the desired
+ * precision with which values
+ * shall be printed, @p
+ * scientific whether scientific
+ * notation shall be used. If @p
+ * across is @p true then the
+ * vector is printed in a line,
+ * while if @p false then the
+ * elements are printed on a
+ * separate line each.
+ */
+ void print (std::ostream &out,
+ const unsigned int precision = 3,
+ const bool scientific = true,
+ const bool across = true) const;
+
+ /**
+ * Swap the contents of this
+ * vector and the other vector @p
+ * v. One could do this operation
+ * with a temporary variable and
+ * copying over the data
+ * elements, but this function is
+ * significantly more efficient
+ * since it only swaps the
+ * pointers to the data of the
+ * two vectors and therefore does
+ * not need to allocate temporary
+ * storage and move data
+ * around. Note that the vectors
+ * need to be of the same size
+ * and base on the same map.
+ *
+ * This function is analog to the
+ * the @p swap function of all C
+ * standard containers. Also,
+ * there is a global function
+ * <tt>swap(u,v)</tt> that simply
+ * calls <tt>u.swap(v)</tt>,
+ * again in analogy to standard
+ * functions.
+ */
+ void swap (VectorBase &v);
+
+ /**
+ * Estimate for the memory
+ * consumption in bytes.
+ */
+ std::size_t memory_consumption () const;
+ //@}
+
+ /**
+ * Exception
+ */
+ DeclException0 (ExcGhostsPresent);
+
+ /**
+ * Exception
+ */
+ DeclException0 (ExcDifferentParallelPartitioning);
+
+ /**
+ * Exception
+ */
+ DeclException1 (ExcTrilinosError,
+ int,
+ << "An error with error number " << arg1
+ << " occurred while calling a Trilinos function");
+
+ /**
+ * Exception
+ */
+ DeclException3 (ExcAccessToNonlocalElement,
+ int, int, int,
+ << "You tried to access element " << arg1
+ << " of a distributed vector, but only entries "
+ << arg2 << " through " << arg3
+ << " are stored locally and can be accessed.");
+
+
+ private:
+ /**
+ * Trilinos doesn't allow to
+ * mix additions to matrix
+ * entries and overwriting them
+ * (to make synchronisation of
+ * parallel computations
+ * simpler). The way we do it
+ * is to, for each access
+ * operation, store whether it
+ * is an insertion or an
+ * addition. If the previous
+ * one was of different type,
+ * then we first have to flush
+ * the Trilinos buffers;
+ * otherwise, we can simply go
+ * on. Luckily, Trilinos has
+ * an object for this which
+ * does already all the
+ * parallel communications in
+ * such a case, so we simply
+ * use their model, which
+ * stores whether the last
+ * operation was an addition or
+ * an insertion.
+ */
+ Epetra_CombineMode last_action;
+
+ /**
+ * A boolean variable to hold
+ * information on whether the
+ * vector is compressed or not.
+ */
+ bool compressed;
+
+ /**
+ * Whether this vector has ghost elements. This is true
+ * on all processors even if only one of them has any
+ * ghost elements.
+ */
+ bool has_ghosts;
+
+ /**
+ * An Epetra distibuted vector
+ * type. Requires an existing
+ * Epetra_Map for storing data.
+ */
+ std_cxx1x::shared_ptr<Epetra_FEVector> vector;
+
+
+ /**
+ * Make the reference class a
+ * friend.
+ */
+ friend class internal::VectorReference;
+ friend class Vector;
+ friend class MPI::Vector;
+ };
+
+
+
+
+// ------------------- inline and template functions --------------
+
+ /**
+ * Global function swap which overloads the default implementation of
+ * the C standard library which uses a temporary object. The function
+ * simply exchanges the data of the two vectors.
+ *
+ * @relates TrilinosWrappers::VectorBase
+ * @author Martin Kronbichler, Wolfgang Bangerth, 2008
+ */
+ inline
+ void swap (VectorBase &u, VectorBase &v)
+ {
+ u.swap (v);
+ }
+
+
+#ifndef DOXYGEN
+
+ namespace internal
+ {
+ inline
+ VectorReference::VectorReference (VectorBase &vector,
+ const unsigned int index)
+ :
+ vector (vector),
+ index (index)
+ {}
+
+
+ inline
+ const VectorReference &
+ VectorReference::operator = (const VectorReference &r) const
+ {
+ // as explained in the class
+ // documentation, this is not the copy
+ // operator. so simply pass on to the
+ // "correct" assignment operator
+ *this = static_cast<TrilinosScalar> (r);
+
+ return *this;
+ }
+
+
+
+ inline
+ const VectorReference &
+ VectorReference::operator = (const VectorReference &r)
+ {
+ // as above
+ *this = static_cast<TrilinosScalar> (r);
+
+ return *this;
+ }
+
+
+ inline
+ const VectorReference &
+ VectorReference::operator = (const TrilinosScalar &value) const
+ {
+ vector.set (1, &index, &value);
+ return *this;
+ }
+
+
+
+ inline
+ const VectorReference &
+ VectorReference::operator += (const TrilinosScalar &value) const
+ {
+ vector.add (1, &index, &value);
+ return *this;
+ }
+
+
+
+ inline
+ const VectorReference &
+ VectorReference::operator -= (const TrilinosScalar &value) const
+ {
+ TrilinosScalar new_value = -value;
+ vector.add (1, &index, &new_value);
+ return *this;
+ }
+
+
+
+ inline
+ const VectorReference &
+ VectorReference::operator *= (const TrilinosScalar &value) const
+ {
+ TrilinosScalar new_value = static_cast<TrilinosScalar>(*this) * value;
+ vector.set (1, &index, &new_value);
+ return *this;
+ }
+
+
+
+ inline
+ const VectorReference &
+ VectorReference::operator /= (const TrilinosScalar &value) const
+ {
+ TrilinosScalar new_value = static_cast<TrilinosScalar>(*this) / value;
+ vector.set (1, &index, &new_value);
+ return *this;
+ }
+ }
+
+
+
+ inline
+ bool
+ VectorBase::is_compressed () const
+ {
+ return compressed;
+ }
+
+
+
+ inline
+ bool
+ VectorBase::in_local_range (const unsigned int index) const
+ {
+ std::pair<unsigned int, unsigned int> range = local_range();
+
+ return ((index >= range.first) && (index < range.second));
+ }
+
+
+
+ inline
+ bool
+ VectorBase::has_ghost_elements() const
+ {
+ return has_ghosts;
+ }
+
+
+
+ inline
+ internal::VectorReference
+ VectorBase::operator () (const unsigned int index)
+ {
+ return internal::VectorReference (*this, index);
+ }
+
+
+
+ inline
+ internal::VectorReference
+ VectorBase::operator [] (const unsigned int index)
+ {
+ return operator() (index);
+ }
+
+
+ inline
+ TrilinosScalar
+ VectorBase::operator [] (const unsigned int index) const
+ {
+ return operator() (index);
+ }
+
+
+
+ inline
+ void
+ VectorBase::reinit (const VectorBase &v,
+ const bool fast)
+ {
+ Assert (vector.get() != 0,
+ ExcMessage("Vector has not been constructed properly."));
+
+ if (fast == false ||
+ vector_partitioner().SameAs(v.vector_partitioner())==false)
+ vector.reset (new Epetra_FEVector(*v.vector));
+ }
+
+
+
+ inline
+ void
+ VectorBase::compress (const Epetra_CombineMode last_action)
+ {
+ ::dealii::VectorOperation::values last_action_ =
+ ::dealii::VectorOperation::unknown;
+ if (last_action == Add)
+ last_action_ = ::dealii::VectorOperation::add;
+ else if (last_action == Insert)
+ last_action_ = ::dealii::VectorOperation::insert;
+ else
+ AssertThrow(false, ExcNotImplemented());
+
+ compress(last_action_);
+ }
+
+
+
+ inline
+ void
+ VectorBase::compress (::dealii::VectorOperation::values given_last_action)
+ {
+ //Select which mode to send to
+ //Trilinos. Note that we use last_action
+ //if available and ignore what the user
+ //tells us to detect wrongly mixed
+ //operations. Typically given_last_action
+ //is only used on machines that do not
+ //execute an operation (because they have
+ //no own cells for example).
+ Epetra_CombineMode mode = last_action;
+ if (last_action == Zero)
+ {
+ if (given_last_action==::dealii::VectorOperation::add)
+ mode = Add;
+ else if (given_last_action==::dealii::VectorOperation::insert)
+ mode = Insert;
+ }
+
+#ifdef DEBUG
+# ifdef DEAL_II_COMPILER_SUPPORTS_MPI
+ // check that every process has decided
+ // to use the same mode. This will
+ // otherwise result in undefined
+ // behaviour in the call to
+ // GlobalAssemble().
+ double double_mode = mode;
+ Utilities::MPI::MinMaxAvg result
+ = Utilities::MPI::min_max_avg (double_mode,
+ dynamic_cast<const Epetra_MpiComm *>
+ (&vector_partitioner().Comm())->GetMpiComm());
+ Assert(result.max-result.min<1e-5,
+ ExcMessage ("Not all processors agree whether the last operation on "
+ "this vector was an addition or a set operation. This will "
+ "prevent the compress() operation from succeeding."));
+
+# endif
+#endif
+
+ // Now pass over the information about
+ // what we did last to the vector.
+ const int ierr = vector->GlobalAssemble(mode);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+ last_action = Zero;
+
+ compressed = true;
+ }
+
+
+
+ inline
+ VectorBase &
+ VectorBase::operator = (const TrilinosScalar s)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+ Assert (numbers::is_finite(s), ExcNumberNotFinite());
+
+ const int ierr = vector->PutScalar(s);
+
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ return *this;
+ }
+
+
+
+ inline
+ void
+ VectorBase::set (const std::vector<unsigned int> &indices,
+ const std::vector<TrilinosScalar> &values)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+ Assert (indices.size() == values.size(),
+ ExcDimensionMismatch(indices.size(),values.size()));
+
+ set (indices.size(), &indices[0], &values[0]);
+ }
+
+
+
+ inline
+ void
+ VectorBase::set (const std::vector<unsigned int> &indices,
+ const ::dealii::Vector<TrilinosScalar> &values)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+ Assert (indices.size() == values.size(),
+ ExcDimensionMismatch(indices.size(),values.size()));
+
+ set (indices.size(), &indices[0], values.begin());
+ }
+
+
+
+ inline
+ void
+ VectorBase::set (const unsigned int n_elements,
+ const unsigned int *indices,
+ const TrilinosScalar *values)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+ if (last_action == Add)
+ vector->GlobalAssemble(Add);
+
+ if (last_action != Insert)
+ last_action = Insert;
+
+ for (unsigned int i=0; i<n_elements; ++i)
+ {
+ const unsigned int row = indices[i];
+ const int local_row = vector->Map().LID(static_cast<int>(row));
+ if (local_row == -1)
+ {
+ const int ierr = vector->ReplaceGlobalValues (1,
+ (const int *)(&row),
+ &values[i]);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+ compressed = false;
+ }
+ else
+ (*vector)[0][local_row] = values[i];
+ }
+ }
+
+
+
+ inline
+ void
+ VectorBase::add (const std::vector<unsigned int> &indices,
+ const std::vector<TrilinosScalar> &values)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+ Assert (indices.size() == values.size(),
+ ExcDimensionMismatch(indices.size(),values.size()));
+
+ add (indices.size(), &indices[0], &values[0]);
+ }
+
+
+
+ inline
+ void
+ VectorBase::add (const std::vector<unsigned int> &indices,
+ const ::dealii::Vector<TrilinosScalar> &values)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+ Assert (indices.size() == values.size(),
+ ExcDimensionMismatch(indices.size(),values.size()));
+
+ add (indices.size(), &indices[0], values.begin());
+ }
+
+
+
+ inline
+ void
+ VectorBase::add (const unsigned int n_elements,
+ const unsigned int *indices,
+ const TrilinosScalar *values)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+ if (last_action != Add)
+ {
+ if (last_action == Insert)
+ vector->GlobalAssemble(Insert);
+ last_action = Add;
+ }
+
+ for (unsigned int i=0; i<n_elements; ++i)
+ {
+ const unsigned int row = indices[i];
+ const int local_row = vector->Map().LID(static_cast<int>(row));
+ if (local_row == -1)
+ {
+ const int ierr = vector->SumIntoGlobalValues (1,
+ (const int *)(&row),
+ &values[i]);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+ compressed = false;
+ }
+ else
+ (*vector)[0][local_row] += values[i];
+ }
+ }
+
+
+
+ inline
+ unsigned int
+ VectorBase::size () const
+ {
+ return (unsigned int) (vector->Map().MaxAllGID() + 1 -
+ vector->Map().MinAllGID());
+ }
+
+
+
+ inline
+ unsigned int
+ VectorBase::local_size () const
+ {
+ return (unsigned int) vector->Map().NumMyElements();
+ }
+
+
+
+ inline
+ std::pair<unsigned int, unsigned int>
+ VectorBase::local_range () const
+ {
+ int begin, end;
+ begin = vector->Map().MinMyGID();
+ end = vector->Map().MaxMyGID()+1;
+ return std::make_pair (begin, end);
+ }
+
+
+
+ inline
+ TrilinosScalar
+ VectorBase::operator * (const VectorBase &vec) const
+ {
+ Assert (vector->Map().SameAs(vec.vector->Map()),
+ ExcDifferentParallelPartitioning());
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+ TrilinosScalar result;
+
+ const int ierr = vector->Dot(*(vec.vector), &result);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ return result;
+ }
+
+
+
+ inline
+ VectorBase::real_type
+ VectorBase::norm_sqr () const
+ {
+ const TrilinosScalar d = l2_norm();
+ return d*d;
+ }
+
+
+
+ inline
+ TrilinosScalar
+ VectorBase::mean_value () const
+ {
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+ TrilinosScalar mean;
+ const int ierr = vector->MeanValue (&mean);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ return mean;
+ }
+
+
+
+ inline
+ TrilinosScalar
+ VectorBase::minimal_value () const
+ {
+ TrilinosScalar min_value;
+ const int ierr = vector->MinValue (&min_value);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ return min_value;
+ }
+
+
+
+ inline
+ VectorBase::real_type
+ VectorBase::l1_norm () const
+ {
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+ TrilinosScalar d;
+ const int ierr = vector->Norm1 (&d);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ return d;
+ }
+
+
+
+ inline
+ VectorBase::real_type
+ VectorBase::l2_norm () const
+ {
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+ TrilinosScalar d;
+ const int ierr = vector->Norm2 (&d);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ return d;
+ }
+
+
+
+ inline
+ VectorBase::real_type
+ VectorBase::lp_norm (const TrilinosScalar p) const
+ {
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+ TrilinosScalar norm = 0;
+ TrilinosScalar sum=0;
+ const unsigned int n_local = local_size();
+
+ // loop over all the elements because
+ // Trilinos does not support lp norms
+ for (unsigned int i=0; i<n_local; i++)
+ sum += std::pow(std::fabs((*vector)[0][i]), p);
+
+ norm = std::pow(sum, static_cast<TrilinosScalar>(1./p));
+
+ return norm;
+ }
+
+
+
+ inline
+ VectorBase::real_type
+ VectorBase::linfty_norm () const
+ {
+ // while we disallow the other
+ // norm operations on ghosted
+ // vectors, this particular norm
+ // is safe to run even in the
+ // presence of ghost elements
+ TrilinosScalar d;
+ const int ierr = vector->NormInf (&d);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ return d;
+ }
+
+
+
+ // inline also scalar products, vector
+ // additions etc. since they are all
+ // representable by a single Trilinos
+ // call. This reduces the overhead of the
+ // wrapper class.
+ inline
+ VectorBase &
+ VectorBase::operator *= (const TrilinosScalar a)
+ {
+ Assert (numbers::is_finite(a), ExcNumberNotFinite());
+
+ const int ierr = vector->Scale(a);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ return *this;
+ }
+
+
+
+ inline
+ VectorBase &
+ VectorBase::operator /= (const TrilinosScalar a)
+ {
+ Assert (numbers::is_finite(a), ExcNumberNotFinite());
+
+ const TrilinosScalar factor = 1./a;
+
+ Assert (numbers::is_finite(factor), ExcNumberNotFinite());
+
+ const int ierr = vector->Scale(factor);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ return *this;
+ }
+
+
+
+ inline
+ VectorBase &
+ VectorBase::operator += (const VectorBase &v)
+ {
+ Assert (size() == v.size(),
+ ExcDimensionMismatch(size(), v.size()));
+ Assert (vector->Map().SameAs(v.vector->Map()),
+ ExcDifferentParallelPartitioning());
+
+ const int ierr = vector->Update (1.0, *(v.vector), 1.0);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ return *this;
+ }
+
+
+
+ inline
+ VectorBase &
+ VectorBase::operator -= (const VectorBase &v)
+ {
+ Assert (size() == v.size(),
+ ExcDimensionMismatch(size(), v.size()));
+ Assert (vector->Map().SameAs(v.vector->Map()),
+ ExcDifferentParallelPartitioning());
+
+ const int ierr = vector->Update (-1.0, *(v.vector), 1.0);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ return *this;
+ }
+
+
+
+ inline
+ void
+ VectorBase::add (const TrilinosScalar s)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+ Assert (numbers::is_finite(s), ExcNumberNotFinite());
+
+ unsigned int n_local = local_size();
+ for (unsigned int i=0; i<n_local; i++)
+ (*vector)[0][i] += s;
+ }
+
+
+
+ inline
+ void
+ VectorBase::add (const TrilinosScalar a,
+ const VectorBase &v)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+ Assert (local_size() == v.local_size(),
+ ExcDimensionMismatch(local_size(), v.local_size()));
+
+ Assert (numbers::is_finite(a), ExcNumberNotFinite());
+
+ const int ierr = vector->Update(a, *(v.vector), 1.);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+ }
+
+
+
+ inline
+ void
+ VectorBase::add (const TrilinosScalar a,
+ const VectorBase &v,
+ const TrilinosScalar b,
+ const VectorBase &w)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+ Assert (local_size() == v.local_size(),
+ ExcDimensionMismatch(local_size(), v.local_size()));
+ Assert (local_size() == w.local_size(),
+ ExcDimensionMismatch(local_size(), w.local_size()));
+
+ Assert (numbers::is_finite(a), ExcNumberNotFinite());
+ Assert (numbers::is_finite(b), ExcNumberNotFinite());
+
+ const int ierr = vector->Update(a, *(v.vector), b, *(w.vector), 1.);
+
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+ }
+
+
+
+ inline
+ void
+ VectorBase::sadd (const TrilinosScalar s,
+ const VectorBase &v)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+ Assert (local_size() == v.local_size(),
+ ExcDimensionMismatch(local_size(), v.local_size()));
+
+ Assert (numbers::is_finite(s), ExcNumberNotFinite());
+
+ const int ierr = vector->Update(1., *(v.vector), s);
+
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+ }
+
+
+
+ inline
+ void
+ VectorBase::sadd (const TrilinosScalar s,
+ const TrilinosScalar a,
+ const VectorBase &v)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+ Assert (local_size() == v.local_size(),
+ ExcDimensionMismatch(local_size(), v.local_size()));
+
+ Assert (numbers::is_finite(s), ExcNumberNotFinite());
+ Assert (numbers::is_finite(a), ExcNumberNotFinite());
+
+ const int ierr = vector->Update(a, *(v.vector), s);
+
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+ }
+
+
+
+ inline
+ void
+ VectorBase::sadd (const TrilinosScalar s,
+ const TrilinosScalar a,
+ const VectorBase &v,
+ const TrilinosScalar b,
+ const VectorBase &w)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+ Assert (local_size() == v.local_size(),
+ ExcDimensionMismatch(local_size(), v.local_size()));
+ Assert (local_size() == w.local_size(),
+ ExcDimensionMismatch(local_size(), w.local_size()));
+
+ Assert (numbers::is_finite(s), ExcNumberNotFinite());
+ Assert (numbers::is_finite(a), ExcNumberNotFinite());
+ Assert (numbers::is_finite(b), ExcNumberNotFinite());
+
+ const int ierr = vector->Update(a, *(v.vector), b, *(w.vector), s);
+
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+ }
+
+
+
+ inline
+ void
+ VectorBase::sadd (const TrilinosScalar s,
+ const TrilinosScalar a,
+ const VectorBase &v,
+ const TrilinosScalar b,
+ const VectorBase &w,
+ const TrilinosScalar c,
+ const VectorBase &x)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+ Assert (local_size() == v.local_size(),
+ ExcDimensionMismatch(local_size(), v.local_size()));
+ Assert (local_size() == w.local_size(),
+ ExcDimensionMismatch(local_size(), w.local_size()));
+ Assert (local_size() == x.local_size(),
+ ExcDimensionMismatch(local_size(), x.local_size()));
+
+ Assert (numbers::is_finite(s), ExcNumberNotFinite());
+ Assert (numbers::is_finite(a), ExcNumberNotFinite());
+ Assert (numbers::is_finite(b), ExcNumberNotFinite());
+ Assert (numbers::is_finite(c), ExcNumberNotFinite());
+
+ // Update member can only
+ // input two other vectors so
+ // do it in two steps
+ const int ierr = vector->Update(a, *(v.vector), b, *(w.vector), s);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ const int jerr = vector->Update(c, *(x.vector), 1.);
+ Assert (jerr == 0, ExcTrilinosError(jerr));
+ (void)jerr; // removes -Wunused-parameter warning in optimized mode
+ }
+
+
+
+ inline
+ void
+ VectorBase::scale (const VectorBase &factors)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+ Assert (local_size() == factors.local_size(),
+ ExcDimensionMismatch(local_size(), factors.local_size()));
+
+ const int ierr = vector->Multiply (1.0, *(factors.vector), *vector, 0.0);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+ }
+
+
+
+ inline
+ void
+ VectorBase::equ (const TrilinosScalar a,
+ const VectorBase &v)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+ Assert (numbers::is_finite(a), ExcNumberNotFinite());
+
+ // If we don't have the same map, copy.
+ if (vector->Map().SameAs(v.vector->Map())==false)
+ {
+ *vector = *v.vector;
+ *this *= a;
+ }
+ else
+ {
+ // Otherwise, just update
+ int ierr = vector->Update(a, *v.vector, 0.0);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ last_action = Zero;
+ }
+
+ }
+
+
+
+ inline
+ void
+ VectorBase::equ (const TrilinosScalar a,
+ const VectorBase &v,
+ const TrilinosScalar b,
+ const VectorBase &w)
+ {
+ // if we have ghost values, do not allow
+ // writing to this vector at all.
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+ Assert (v.local_size() == w.local_size(),
+ ExcDimensionMismatch (v.local_size(), w.local_size()));
+
+ Assert (numbers::is_finite(a), ExcNumberNotFinite());
+ Assert (numbers::is_finite(b), ExcNumberNotFinite());
+
+ // If we don't have the same map, copy.
+ if (vector->Map().SameAs(v.vector->Map())==false)
+ {
+ *vector = *v.vector;
+ sadd(a, b, w);
+ }
+ else
+ {
+ // Otherwise, just update. verify
+ // that *this does not only have
+ // the same map as v (the
+ // if-condition above) but also as
+ // w
+ Assert (vector->Map().SameAs(w.vector->Map()),
+ ExcDifferentParallelPartitioning());
+ int ierr = vector->Update(a, *v.vector, b, *w.vector, 0.0);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ last_action = Zero;
+ }
+ }
+
+
+
+ inline
+ void
+ VectorBase::ratio (const VectorBase &v,
+ const VectorBase &w)
+ {
+ Assert (v.local_size() == w.local_size(),
+ ExcDimensionMismatch (v.local_size(), w.local_size()));
+ Assert (local_size() == w.local_size(),
+ ExcDimensionMismatch (local_size(), w.local_size()));
+
+ const int ierr = vector->ReciprocalMultiply(1.0, *(w.vector),
+ *(v.vector), 0.0);
+
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+ }
+
+
+
+ inline
+ const Epetra_MultiVector &
+ VectorBase::trilinos_vector () const
+ {
+ return static_cast<const Epetra_MultiVector &>(*vector);
+ }
+
+
+
+ inline
+ Epetra_FEVector &
+ VectorBase::trilinos_vector ()
+ {
+ return *vector;
+ }
+
+
+
+ inline
+ const Epetra_Map &
+ VectorBase::vector_partitioner () const
+ {
+ return static_cast<const Epetra_Map &>(vector->Map());
+ }
+
+
+#endif // DOXYGEN
+
+}
+
+/*@}*/
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_USE_TRILINOS
+
+/*---------------------------- trilinos_vector_base.h ---------------------------*/
+
+#endif
+/*---------------------------- trilinos_vector_base.h ---------------------------*/
Added: branches/s-wang2/for_deal.II/source/lac/constraint_matrix.cc
===================================================================
--- branches/s-wang2/for_deal.II/source/lac/constraint_matrix.cc (rev 0)
+++ branches/s-wang2/for_deal.II/source/lac/constraint_matrix.cc 2012-11-29 05:55:56 UTC (rev 1394)
@@ -0,0 +1,2578 @@
+//---------------------------------------------------------------------------
+// $Id: constraint_matrix.cc 27628 2012-11-20 22:49:26Z heister $
+//
+// Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 by the deal.II authors
+//
+// This file is subject to QPL and may not be distributed
+// without copyright and license information. Please refer
+// to the file deal.II/doc/license.html for the text and
+// further information on this license.
+//
+//---------------------------------------------------------------------------
+
+
+#include <deal.II/lac/constraint_matrix.h>
+#include <deal.II/lac/constraint_matrix.templates.h>
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/compressed_sparsity_pattern.h>
+#include <deal.II/lac/compressed_set_sparsity_pattern.h>
+#include <deal.II/lac/compressed_simple_sparsity_pattern.h>
+#include <deal.II/lac/block_vector.h>
+#include <deal.II/lac/block_sparse_matrix.h>
+#include <deal.II/lac/sparse_matrix_ez.h>
+#include <deal.II/lac/block_sparse_matrix_ez.h>
+#include <deal.II/lac/parallel_vector.h>
+#include <deal.II/lac/parallel_block_vector.h>
+#include <deal.II/lac/petsc_vector.h>
+#include <deal.II/lac/petsc_block_vector.h>
+#include <deal.II/lac/petsc_sparse_matrix.h>
+#include <deal.II/lac/petsc_block_sparse_matrix.h>
+#include <deal.II/lac/petsc_parallel_vector.h>
+#include <deal.II/lac/petsc_parallel_block_vector.h>
+#include <deal.II/lac/petsc_parallel_sparse_matrix.h>
+#include <deal.II/lac/petsc_parallel_block_sparse_matrix.h>
+#include <deal.II/lac/trilinos_vector.h>
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/trilinos_sparse_matrix.h>
+#include <deal.II/lac/trilinos_block_sparse_matrix.h>
+#include <deal.II/lac/matrix_block.h>
+
+#include <algorithm>
+#include <numeric>
+#include <set>
+#include <ostream>
+
+DEAL_II_NAMESPACE_OPEN
+
+
+
+// Static member variable
+const Table<2,bool> ConstraintMatrix::default_empty_table = Table<2,bool>();
+
+
+
+bool
+ConstraintMatrix::check_zero_weight (const std::pair<unsigned int, double> &p)
+{
+ return (p.second == 0);
+}
+
+
+
+bool
+ConstraintMatrix::ConstraintLine::operator < (const ConstraintLine &a) const
+{
+ return line < a.line;
+}
+
+
+
+bool
+ConstraintMatrix::ConstraintLine::operator == (const ConstraintLine &a) const
+{
+ return line == a.line;
+}
+
+
+
+std::size_t
+ConstraintMatrix::ConstraintLine::memory_consumption () const
+{
+ return (MemoryConsumption::memory_consumption (line) +
+ MemoryConsumption::memory_consumption (entries) +
+ MemoryConsumption::memory_consumption (inhomogeneity));
+}
+
+
+
+void
+ConstraintMatrix::add_lines (const std::set<unsigned int> &lines)
+{
+ for (std::set<unsigned int>::const_iterator
+ i = lines.begin(); i != lines.end(); ++i)
+ add_line (*i);
+}
+
+
+
+void
+ConstraintMatrix::add_lines (const std::vector<bool> &lines)
+{
+ for (unsigned int i=0; i<lines.size(); ++i)
+ if (lines[i] == true)
+ add_line (i);
+}
+
+
+
+void
+ConstraintMatrix::add_lines (const IndexSet &lines)
+{
+ for (unsigned int i=0; i<lines.n_elements(); ++i)
+ add_line (lines.nth_index_in_set(i));
+}
+
+
+
+void
+ConstraintMatrix::add_entries
+(const unsigned int line,
+ const std::vector<std::pair<unsigned int,double> > &col_val_pairs)
+{
+ Assert (sorted==false, ExcMatrixIsClosed());
+ Assert (is_constrained(line), ExcLineInexistant(line));
+
+ ConstraintLine *line_ptr = &lines[lines_cache[calculate_line_index(line)]];
+ Assert (line_ptr->line == line, ExcInternalError());
+
+ // if in debug mode, check whether an
+ // entry for this column already
+ // exists and if its the same as
+ // the one entered at present
+ //
+ // in any case: skip this entry if
+ // an entry for this column already
+ // exists, since we don't want to
+ // enter it twice
+ for (std::vector<std::pair<unsigned int,double> >::const_iterator
+ col_val_pair = col_val_pairs.begin();
+ col_val_pair!=col_val_pairs.end(); ++col_val_pair)
+ {
+ Assert (line != col_val_pair->first,
+ ExcMessage ("Can't constrain a degree of freedom to itself"));
+
+ for (ConstraintLine::Entries::const_iterator
+ p=line_ptr->entries.begin();
+ p != line_ptr->entries.end(); ++p)
+ if (p->first == col_val_pair->first)
+ {
+ // entry exists, break
+ // innermost loop
+ Assert (p->second == col_val_pair->second,
+ ExcEntryAlreadyExists(line, col_val_pair->first,
+ p->second, col_val_pair->second));
+ break;
+ }
+
+ line_ptr->entries.push_back (*col_val_pair);
+ }
+}
+
+
+
+void ConstraintMatrix::add_selected_constraints
+(const ConstraintMatrix &constraints,
+ const IndexSet &filter)
+{
+ if (constraints.n_constraints() == 0)
+ return;
+
+ Assert (filter.size() > constraints.lines.back().line,
+ ExcMessage ("Filter needs to be larger than constraint matrix size."));
+ for (std::vector<ConstraintLine>::const_iterator line=constraints.lines.begin();
+ line!=constraints.lines.end(); ++line)
+ if (filter.is_element(line->line))
+ {
+ const unsigned int row = filter.index_within_set (line->line);
+ add_line (row);
+ set_inhomogeneity (row, line->inhomogeneity);
+ for (unsigned int i=0; i<line->entries.size(); ++i)
+ if (filter.is_element(line->entries[i].first))
+ add_entry (row, filter.index_within_set (line->entries[i].first),
+ line->entries[i].second);
+ }
+}
+
+
+
+void ConstraintMatrix::close ()
+{
+ if (sorted == true)
+ return;
+
+ // sort the lines
+ std::sort (lines.begin(), lines.end());
+
+ // update list of pointers and give the
+ // vector a sharp size since we won't
+ // modify the size any more after this
+ // point.
+ {
+ std::vector<unsigned int> new_lines (lines_cache.size(),
+ numbers::invalid_unsigned_int);
+ unsigned int counter = 0;
+ for (std::vector<ConstraintLine>::const_iterator line=lines.begin();
+ line!=lines.end(); ++line, ++counter)
+ new_lines[calculate_line_index(line->line)] = counter;
+ std::swap (lines_cache, new_lines);
+ }
+
+ // in debug mode: check whether we really
+ // set the pointers correctly.
+ for (unsigned int i=0; i<lines_cache.size(); ++i)
+ if (lines_cache[i] != numbers::invalid_unsigned_int)
+ Assert (i == calculate_line_index(lines[lines_cache[i]].line),
+ ExcInternalError());
+
+ // first, strip zero entries, as we
+ // have to do that only once
+ for (std::vector<ConstraintLine>::iterator line = lines.begin();
+ line!=lines.end(); ++line)
+ // first remove zero
+ // entries. that would mean that
+ // in the linear constraint for a
+ // node, x_i = ax_1 + bx_2 + ...,
+ // another node times 0
+ // appears. obviously,
+ // 0*something can be omitted
+ line->entries.erase (std::remove_if (line->entries.begin(),
+ line->entries.end(),
+ &check_zero_weight),
+ line->entries.end());
+
+
+
+#ifdef DEBUG
+ // In debug mode we are computing an estimate for the maximum number
+ // of constraints so that we can bail out if there is a cycle in the
+ // constraints (which is easier than searching for cycles in the graph).
+ //
+ // Let us figure out the largest dof index. This is an upper bound for the
+ // number of constraints because it is an approximation for the number of dofs
+ // in our system.
+ unsigned int largest_idx = 0;
+ for (std::vector<ConstraintLine>::iterator line = lines.begin();
+ line!=lines.end(); ++line)
+ {
+ for (ConstraintLine::Entries::iterator it = line->entries.begin(); it!=line->entries.end(); ++it)
+ {
+ largest_idx=std::max(largest_idx, it->first);
+ }
+ }
+#endif
+
+ // replace references to dofs that
+ // are themselves constrained. note
+ // that because we may replace
+ // references to other dofs that
+ // may themselves be constrained to
+ // third ones, we have to iterate
+ // over all this until we replace
+ // no chains of constraints any
+ // more
+ //
+ // the iteration replaces
+ // references to constrained
+ // degrees of freedom by
+ // second-order references. for
+ // example if x3=x0/2+x2/2 and
+ // x2=x0/2+x1/2, then the new list
+ // will be x3=x0/2+x0/4+x1/4. note
+ // that x0 appear twice. we will
+ // throw this duplicate out in the
+ // following step, where we sort
+ // the list so that throwing out
+ // duplicates becomes much more
+ // efficient. also, we have to do
+ // it only once, rather than in
+ // each iteration
+ unsigned int iteration = 0;
+ while (true)
+ {
+ bool chained_constraint_replaced = false;
+
+ for (std::vector<ConstraintLine>::iterator line = lines.begin();
+ line!=lines.end(); ++line)
+ {
+#ifdef DEBUG
+ // we need to keep track of how many replacements we do in this line, because we can
+ // end up in a cycle A->B->C->A without the number of entries growing.
+ unsigned int n_replacements = 0;
+#endif
+
+
+
+ // loop over all entries of
+ // this line (including
+ // ones that we have
+ // appended in this go
+ // around) and see whether
+ // they are further
+ // constrained. ignore
+ // elements that we don't
+ // store on the current
+ // processor
+ unsigned int entry = 0;
+ while (entry < line->entries.size())
+ if (((local_lines.size() == 0)
+ ||
+ (local_lines.is_element(line->entries[entry].first)))
+ &&
+ is_constrained (line->entries[entry].first))
+ {
+ // ok, this entry is
+ // further
+ // constrained:
+ chained_constraint_replaced = true;
+
+ // look up the chain
+ // of constraints for
+ // this entry
+ const unsigned int dof_index = line->entries[entry].first;
+ const double weight = line->entries[entry].second;
+
+ Assert (dof_index != line->line,
+ ExcMessage ("Cycle in constraints detected!"));
+
+ const ConstraintLine *constrained_line =
+ &lines[lines_cache[calculate_line_index(dof_index)]];
+ Assert (constrained_line->line == dof_index,
+ ExcInternalError());
+
+ // now we have to
+ // replace an entry
+ // by its
+ // expansion. we do
+ // that by
+ // overwriting the
+ // entry by the first
+ // entry of the
+ // expansion and
+ // adding the
+ // remaining ones to
+ // the end, where we
+ // will later process
+ // them once more
+ //
+ // we can of course
+ // only do that if
+ // the DoF that we
+ // are currently
+ // handle is
+ // constrained by a
+ // linear combination
+ // of other dofs:
+ if (constrained_line->entries.size() > 0)
+ {
+ for (unsigned int i=0; i<constrained_line->entries.size(); ++i)
+ Assert (dof_index != constrained_line->entries[i].first,
+ ExcMessage ("Cycle in constraints detected!"));
+
+ // replace first
+ // entry, then tack
+ // the rest to the
+ // end of the list
+ line->entries[entry] =
+ std::make_pair (constrained_line->entries[0].first,
+ constrained_line->entries[0].second *
+ weight);
+
+ for (unsigned int i=1; i<constrained_line->entries.size(); ++i)
+ line->entries
+ .push_back (std::make_pair (constrained_line->entries[i].first,
+ constrained_line->entries[i].second *
+ weight));
+
+#ifdef DEBUG
+ // keep track of how many entries we replace in this line. If we do more than
+ // there are constraints or dofs in our system, we must have a cycle.
+ ++n_replacements;
+ Assert(n_replacements/2<largest_idx, ExcMessage("Cycle in constraints detected!"));
+ if (n_replacements/2>=largest_idx)
+ return; // this enables us to test for this Exception.
+#endif
+ }
+ else
+ // the DoF that we
+ // encountered is not
+ // constrained by a linear
+ // combination of other
+ // dofs but is equal to
+ // just the inhomogeneity
+ // (i.e. its chain of
+ // entries is empty). in
+ // that case, we can't just
+ // overwrite the current
+ // entry, but we have to
+ // actually eliminate it
+ {
+ line->entries.erase (line->entries.begin()+entry);
+ }
+
+ line->inhomogeneity += constrained_line->inhomogeneity *
+ weight;
+
+ // now that we're here, do
+ // not increase index by
+ // one but rather make
+ // another pass for the
+ // present entry because we
+ // have replaced the
+ // present entry by another
+ // one, or because we have
+ // deleted it and shifted
+ // all following ones one
+ // forward
+ }
+ else
+ // entry not further
+ // constrained. just move
+ // ahead by one
+ ++entry;
+ }
+
+ // if we didn't do anything in
+ // this round, then quit the
+ // loop
+ if (chained_constraint_replaced == false)
+ break;
+
+ // increase iteration count. note
+ // that we should not iterate more
+ // times than there are constraints,
+ // since this puts a natural upper
+ // bound on the length of constraint
+ // chains
+ ++iteration;
+ Assert (iteration <= lines.size(), ExcInternalError());
+ }
+
+ // finally sort the entries and re-scale
+ // them if necessary. in this step, we also
+ // throw out duplicates as mentioned
+ // above. moreover, as some entries might
+ // have had zero weights, we replace them
+ // by a vector with sharp sizes.
+ for (std::vector<ConstraintLine>::iterator line = lines.begin();
+ line!=lines.end(); ++line)
+ {
+ std::sort (line->entries.begin(), line->entries.end());
+
+ // loop over the now sorted list and
+ // see whether any of the entries
+ // references the same dofs more than
+ // once in order to find how many
+ // non-duplicate entries we have. This
+ // lets us allocate the correct amount
+ // of memory for the constraint
+ // entries.
+ unsigned int duplicates = 0;
+ for (unsigned int i=1; i<line->entries.size(); ++i)
+ if (line->entries[i].first == line->entries[i-1].first)
+ duplicates++;
+
+ if (duplicates > 0 || line->entries.size() < line->entries.capacity())
+ {
+ ConstraintLine::Entries new_entries;
+
+ // if we have no duplicates, copy
+ // verbatim the entries. this
+ // way, the final size is of the
+ // vector is correct.
+ if (duplicates == 0)
+ new_entries = line->entries;
+ else
+ {
+ // otherwise, we need to go
+ // through the list by and and
+ // resolve the duplicates
+ new_entries.reserve (line->entries.size() - duplicates);
+ new_entries.push_back(line->entries[0]);
+ for (unsigned int j=1; j<line->entries.size(); ++j)
+ if (line->entries[j].first == line->entries[j-1].first)
+ {
+ Assert (new_entries.back().first == line->entries[j].first,
+ ExcInternalError());
+ new_entries.back().second += line->entries[j].second;
+ }
+ else
+ new_entries.push_back (line->entries[j]);
+
+ Assert (new_entries.size() == line->entries.size() - duplicates,
+ ExcInternalError());
+
+ // make sure there are
+ // really no duplicates
+ // left and that the list
+ // is still sorted
+ for (unsigned int j=1; j<new_entries.size(); ++j)
+ {
+ Assert (new_entries[j].first != new_entries[j-1].first,
+ ExcInternalError());
+ Assert (new_entries[j].first > new_entries[j-1].first,
+ ExcInternalError());
+ }
+ }
+
+ // replace old list of
+ // constraints for this dof by
+ // the new one
+ line->entries.swap (new_entries);
+ }
+
+ // finally do the following
+ // check: if the sum of
+ // weights for the
+ // constraints is close to
+ // one, but not exactly
+ // one, then rescale all
+ // the weights so that they
+ // sum up to 1. this adds a
+ // little numerical
+ // stability and avoids all
+ // sorts of problems where
+ // the actual value is
+ // close to, but not quite
+ // what we expected
+ //
+ // the case where the
+ // weights don't quite sum
+ // up happens when we
+ // compute the
+ // interpolation weights
+ // "on the fly", i.e. not
+ // from precomputed
+ // tables. in this case,
+ // the interpolation
+ // weights are also subject
+ // to round-off
+ double sum = 0;
+ for (unsigned int i=0; i<line->entries.size(); ++i)
+ sum += line->entries[i].second;
+ if ((sum != 1.0) && (std::fabs (sum-1.) < 1.e-13))
+ {
+ for (unsigned int i=0; i<line->entries.size(); ++i)
+ line->entries[i].second /= sum;
+ line->inhomogeneity /= sum;
+ }
+ } // end of loop over all constraint lines
+
+#ifdef DEBUG
+ // if in debug mode: check that no dof is
+ // constrained to another dof that is also
+ // constrained. exclude dofs from this
+ // check whose constraint lines are not
+ // stored on the local processor
+ for (std::vector<ConstraintLine>::const_iterator line=lines.begin();
+ line!=lines.end(); ++line)
+ for (ConstraintLine::Entries::const_iterator
+ entry=line->entries.begin();
+ entry!=line->entries.end(); ++entry)
+ if ((local_lines.size() == 0)
+ ||
+ (local_lines.is_element(entry->first)))
+ {
+ // make sure that entry->first is
+ // not the index of a line itself
+ const bool is_circle = is_constrained(entry->first);
+ Assert (is_circle == false,
+ ExcDoFConstrainedToConstrainedDoF(line->line, entry->first));
+ }
+#endif
+
+ sorted = true;
+}
+
+
+
+void
+ConstraintMatrix::merge (const ConstraintMatrix &other_constraints,
+ const MergeConflictBehavior merge_conflict_behavior)
+{
+ AssertThrow(local_lines == other_constraints.local_lines,
+ ExcNotImplemented());
+
+ // store the previous state with
+ // respect to sorting
+ const bool object_was_sorted = sorted;
+ sorted = false;
+
+ if (other_constraints.lines_cache.size() > lines_cache.size())
+ lines_cache.resize(other_constraints.lines_cache.size(),
+ numbers::invalid_unsigned_int);
+
+ // first action is to fold into the present
+ // object possible constraints in the
+ // second object. we don't strictly need to
+ // do this any more since the
+ // ConstraintMatrix has learned to deal
+ // with chains of constraints in the
+ // close() function, but we have
+ // traditionally done this and it's not
+ // overly hard to do.
+ //
+ // for this, loop over all
+ // constraints and replace the
+ // constraint lines with a new one
+ // where constraints are replaced
+ // if necessary.
+ ConstraintLine::Entries tmp;
+ for (std::vector<ConstraintLine>::iterator line=lines.begin();
+ line!=lines.end(); ++line)
+ {
+ tmp.clear ();
+ for (unsigned int i=0; i<line->entries.size(); ++i)
+ {
+ // if the present dof is not
+ // constrained, or if we won't take
+ // the constraint from the other
+ // object, then simply copy it over
+ if (other_constraints.is_constrained(line->entries[i].first) == false
+ ||
+ ((merge_conflict_behavior != right_object_wins)
+ &&
+ other_constraints.is_constrained(line->entries[i].first)
+ &&
+ this->is_constrained(line->entries[i].first)))
+ tmp.push_back(line->entries[i]);
+ else
+ // otherwise resolve
+ // further constraints by
+ // replacing the old
+ // entry by a sequence of
+ // new entries taken from
+ // the other object, but
+ // with multiplied
+ // weights
+ {
+ const ConstraintLine::Entries *other_line
+ = other_constraints.get_constraint_entries (line->entries[i].first);
+ Assert (other_line != 0,
+ ExcInternalError());
+
+ const double weight = line->entries[i].second;
+
+ for (ConstraintLine::Entries::const_iterator j=other_line->begin();
+ j!=other_line->end(); ++j)
+ tmp.push_back (std::pair<unsigned int,double>(j->first,
+ j->second*weight));
+
+ line->inhomogeneity += other_constraints.get_inhomogeneity(line->entries[i].first) *
+ weight;
+ }
+ }
+ // finally exchange old and
+ // newly resolved line
+ line->entries.swap (tmp);
+ }
+
+
+
+ // next action: append those lines at the
+ // end that we want to add
+ for (std::vector<ConstraintLine>::const_iterator
+ line=other_constraints.lines.begin();
+ line!=other_constraints.lines.end(); ++line)
+ if (is_constrained(line->line) == false)
+ lines.push_back (*line);
+ else
+ {
+ // the constrained dof we want to
+ // copy from the other object is also
+ // constrained here. let's see what
+ // we should do with that
+ switch (merge_conflict_behavior)
+ {
+ case no_conflicts_allowed:
+ AssertThrow (false,
+ ExcDoFIsConstrainedFromBothObjects (line->line));
+ break;
+
+ case left_object_wins:
+ // ignore this constraint
+ break;
+
+ case right_object_wins:
+ // we need to replace the
+ // existing constraint by
+ // the one from the other
+ // object
+ lines[lines_cache[calculate_line_index(line->line)]].entries
+ = line->entries;
+ lines[lines_cache[calculate_line_index(line->line)]].inhomogeneity
+ = line->inhomogeneity;
+ break;
+
+ default:
+ Assert (false, ExcNotImplemented());
+ }
+ }
+
+ // update the lines cache
+ unsigned int counter = 0;
+ for (std::vector<ConstraintLine>::const_iterator line=lines.begin();
+ line!=lines.end(); ++line, ++counter)
+ lines_cache[calculate_line_index(line->line)] = counter;
+
+ // if the object was sorted before,
+ // then make sure it is so
+ // afterward as well. otherwise
+ // leave everything in the unsorted
+ // state
+ if (object_was_sorted == true)
+ close ();
+}
+
+
+
+void ConstraintMatrix::shift (const unsigned int offset)
+{
+ //TODO: this doesn't work with IndexSets yet. [TH]
+ AssertThrow(local_lines.size()==0, ExcNotImplemented());
+
+ lines_cache.insert (lines_cache.begin(), offset,
+ numbers::invalid_unsigned_int);
+
+ for (std::vector<ConstraintLine>::iterator i = lines.begin();
+ i != lines.end(); ++i)
+ {
+ i->line += offset;
+ for (ConstraintLine::Entries::iterator
+ j = i->entries.begin();
+ j != i->entries.end(); ++j)
+ j->first += offset;
+ }
+}
+
+
+
+void ConstraintMatrix::clear ()
+{
+ {
+ std::vector<ConstraintLine> tmp;
+ lines.swap (tmp);
+ }
+
+ {
+ std::vector<unsigned int> tmp;
+ lines_cache.swap (tmp);
+ }
+
+ sorted = false;
+}
+
+
+
+void ConstraintMatrix::reinit (const IndexSet &local_constraints)
+{
+ local_lines = local_constraints;
+ clear();
+}
+
+
+
+void ConstraintMatrix::condense (const SparsityPattern &uncondensed,
+ SparsityPattern &condensed) const
+{
+ Assert (sorted == true, ExcMatrixNotClosed());
+ Assert (uncondensed.is_compressed() == true, ExcMatrixNotClosed());
+ Assert (uncondensed.n_rows() == uncondensed.n_cols(),
+ ExcNotQuadratic());
+
+
+ // store for each line of the matrix
+ // its new line number
+ // after compression. If the shift is
+ // -1, this line will be condensed away
+ std::vector<int> new_line;
+
+ new_line.reserve (uncondensed.n_rows());
+
+ std::vector<ConstraintLine>::const_iterator next_constraint = lines.begin();
+ unsigned int shift = 0;
+ unsigned int n_rows = uncondensed.n_rows();
+
+ if (next_constraint == lines.end())
+ // if no constraint is to be handled
+ for (unsigned int row=0; row!=n_rows; ++row)
+ new_line.push_back (row);
+ else
+ for (unsigned int row=0; row!=n_rows; ++row)
+ if (row == next_constraint->line)
+ {
+ // this line is constrained
+ new_line.push_back (-1);
+ // note that @p{lines} is ordered
+ ++shift;
+ ++next_constraint;
+ if (next_constraint == lines.end())
+ // nothing more to do; finish rest
+ // of loop
+ {
+ for (unsigned int i=row+1; i<n_rows; ++i)
+ new_line.push_back (i-shift);
+ break;
+ };
+ }
+ else
+ new_line.push_back (row-shift);
+
+
+ next_constraint = lines.begin();
+ // note: in this loop we need not check
+ // whether @p{next_constraint} is a valid
+ // iterator, since @p{next_constraint} is
+ // only evaluated so often as there are
+ // entries in new_line[*] which tells us
+ // which constraints exist
+ for (unsigned int row=0; row<uncondensed.n_rows(); ++row)
+ if (new_line[row] != -1)
+ // line not constrained
+ // copy entries if column will not
+ // be condensed away, distribute
+ // otherwise
+ for (unsigned int j=uncondensed.get_rowstart_indices()[row];
+ j<uncondensed.get_rowstart_indices()[row+1]; ++j)
+ if (new_line[uncondensed.get_column_numbers()[j]] != -1)
+ condensed.add (new_line[row], new_line[uncondensed.get_column_numbers()[j]]);
+ else
+ {
+ // let c point to the constraint
+ // of this column
+ std::vector<ConstraintLine>::const_iterator c = lines.begin();
+ while (c->line != uncondensed.get_column_numbers()[j])
+ ++c;
+
+ for (unsigned int q=0; q!=c->entries.size(); ++q)
+ condensed.add (new_line[row], new_line[c->entries[q].first]);
+ }
+ else
+ // line must be distributed
+ {
+ for (unsigned int j=uncondensed.get_rowstart_indices()[row];
+ j<uncondensed.get_rowstart_indices()[row+1]; ++j)
+ // for each entry: distribute
+ if (new_line[uncondensed.get_column_numbers()[j]] != -1)
+ // column is not constrained
+ for (unsigned int q=0; q!=next_constraint->entries.size(); ++q)
+ condensed.add (new_line[next_constraint->entries[q].first],
+ new_line[uncondensed.get_column_numbers()[j]]);
+
+ else
+ // not only this line but
+ // also this col is constrained
+ {
+ // let c point to the constraint
+ // of this column
+ std::vector<ConstraintLine>::const_iterator c = lines.begin();
+ while (c->line != uncondensed.get_column_numbers()[j]) ++c;
+
+ for (unsigned int p=0; p!=c->entries.size(); ++p)
+ for (unsigned int q=0; q!=next_constraint->entries.size(); ++q)
+ condensed.add (new_line[next_constraint->entries[q].first],
+ new_line[c->entries[p].first]);
+ };
+
+ ++next_constraint;
+ };
+
+ condensed.compress();
+}
+
+
+
+void ConstraintMatrix::condense (SparsityPattern &sparsity) const
+{
+ Assert (sorted == true, ExcMatrixNotClosed());
+ Assert (sparsity.is_compressed() == false, ExcMatrixIsClosed());
+ Assert (sparsity.n_rows() == sparsity.n_cols(),
+ ExcNotQuadratic());
+
+ // store for each index whether it must be
+ // distributed or not. If entry is
+ // numbers::invalid_unsigned_int,
+ // no distribution is necessary.
+ // otherwise, the number states which line
+ // in the constraint matrix handles this
+ // index
+ std::vector<unsigned int> distribute(sparsity.n_rows(),
+ numbers::invalid_unsigned_int);
+
+ for (unsigned int c=0; c<lines.size(); ++c)
+ distribute[lines[c].line] = c;
+
+ const unsigned int n_rows = sparsity.n_rows();
+ for (unsigned int row=0; row<n_rows; ++row)
+ {
+ if (distribute[row] == numbers::invalid_unsigned_int)
+ {
+ // regular line. loop over cols all
+ // valid cols. note that this
+ // changes the line we are
+ // presently working on: we add
+ // additional entries. these are
+ // put to the end of the
+ // row. however, as constrained
+ // nodes cannot be constrained to
+ // other constrained nodes, nothing
+ // will happen if we run into these
+ // added nodes, as they can't be
+ // distributed further. we might
+ // store the position of the last
+ // old entry and stop work there,
+ // but since operating on the newly
+ // added ones only takes two
+ // comparisons (column index valid,
+ // distribute[column] necessarily
+ // ==numbers::invalid_unsigned_int),
+ // it is cheaper to not do so and
+ // run right until the end of the
+ // line
+ for (SparsityPattern::iterator entry = sparsity.begin(row);
+ ((entry != sparsity.end(row)) &&
+ entry->is_valid_entry());
+ ++entry)
+ {
+ const unsigned int column = entry->column();
+
+ if (distribute[column] != numbers::invalid_unsigned_int)
+ {
+ // distribute entry
+ // at regular row
+ // @p{row} and
+ // irregular column
+ // sparsity.colnums[j]
+ for (unsigned int q=0;
+ q!=lines[distribute[column]].entries.size();
+ ++q)
+ sparsity.add (row,
+ lines[distribute[column]].entries[q].first);
+ }
+ }
+ }
+ else
+ // row must be
+ // distributed. note that
+ // here the present row is
+ // not touched (unlike above)
+ {
+ for (SparsityPattern::iterator entry = sparsity.begin(row);
+ (entry != sparsity.end(row)) && entry->is_valid_entry(); ++entry)
+ {
+ const unsigned int column = entry->column();
+ if (distribute[column] == numbers::invalid_unsigned_int)
+ // distribute entry at irregular
+ // row @p{row} and regular column
+ // sparsity.colnums[j]
+ for (unsigned int q=0;
+ q!=lines[distribute[row]].entries.size(); ++q)
+ sparsity.add (lines[distribute[row]].entries[q].first,
+ column);
+ else
+ // distribute entry at irregular
+ // row @p{row} and irregular column
+ // sparsity.get_column_numbers()[j]
+ for (unsigned int p=0; p!=lines[distribute[row]].entries.size(); ++p)
+ for (unsigned int q=0;
+ q!=lines[distribute[column]].entries.size(); ++q)
+ sparsity.add (lines[distribute[row]].entries[p].first,
+ lines[distribute[column]].entries[q].first);
+ }
+ }
+ }
+
+ sparsity.compress();
+}
+
+
+
+void ConstraintMatrix::condense (CompressedSparsityPattern &sparsity) const
+{
+ Assert (sorted == true, ExcMatrixNotClosed());
+ Assert (sparsity.n_rows() == sparsity.n_cols(),
+ ExcNotQuadratic());
+
+ // store for each index whether it must be
+ // distributed or not. If entry is
+ // numbers::invalid_unsigned_int,
+ // no distribution is necessary.
+ // otherwise, the number states which line
+ // in the constraint matrix handles this
+ // index
+ std::vector<unsigned int> distribute(sparsity.n_rows(),
+ numbers::invalid_unsigned_int);
+
+ for (unsigned int c=0; c<lines.size(); ++c)
+ distribute[lines[c].line] = c;
+
+ const unsigned int n_rows = sparsity.n_rows();
+ for (unsigned int row=0; row<n_rows; ++row)
+ {
+ if (distribute[row] == numbers::invalid_unsigned_int)
+ // regular line. loop over
+ // cols. note that as we
+ // proceed to distribute
+ // cols, the loop may get
+ // longer
+ for (unsigned int j=0; j<sparsity.row_length(row); ++j)
+ {
+ const unsigned int column = sparsity.column_number(row,j);
+
+ if (distribute[column] != numbers::invalid_unsigned_int)
+ {
+ // distribute entry
+ // at regular row
+ // @p{row} and
+ // irregular column
+ // column. note that
+ // this changes the
+ // line we are
+ // presently working
+ // on: we add
+ // additional
+ // entries. if we add
+ // another entry at a
+ // column behind the
+ // present one, we
+ // will encounter it
+ // later on (but
+ // since it can't be
+ // further
+ // constrained, won't
+ // have to do
+ // anything about
+ // it). if we add it
+ // up front of the
+ // present column, we
+ // will find the
+ // present column
+ // later on again as
+ // it was shifted
+ // back (again
+ // nothing happens,
+ // in particular no
+ // endless loop, as
+ // when we encounter
+ // it the second time
+ // we won't be able
+ // to add more
+ // entries as they
+ // all already exist,
+ // but we do the same
+ // work more often
+ // than necessary,
+ // and the loop gets
+ // longer), so move
+ // the cursor one to
+ // the right in the
+ // case that we add
+ // an entry up front
+ // that did not exist
+ // before. check
+ // whether it existed
+ // before by tracking
+ // the length of this
+ // row
+ unsigned int old_rowlength = sparsity.row_length(row);
+ for (unsigned int q=0;
+ q!=lines[distribute[column]].entries.size();
+ ++q)
+ {
+ const unsigned int
+ new_col = lines[distribute[column]].entries[q].first;
+
+ sparsity.add (row, new_col);
+
+ const unsigned int new_rowlength = sparsity.row_length(row);
+ if ((new_col < column) && (old_rowlength != new_rowlength))
+ ++j;
+ old_rowlength = new_rowlength;
+ };
+ };
+ }
+ else
+ // row must be distributed
+ for (unsigned int j=0; j<sparsity.row_length(row); ++j)
+ {
+ const unsigned int column = sparsity.column_number(row,j);
+
+ if (distribute[column] == numbers::invalid_unsigned_int)
+ // distribute entry at irregular
+ // row @p{row} and regular column
+ // sparsity.colnums[j]
+ for (unsigned int q=0;
+ q!=lines[distribute[row]].entries.size(); ++q)
+ sparsity.add (lines[distribute[row]].entries[q].first,
+ column);
+ else
+ // distribute entry at irregular
+ // row @p{row} and irregular column
+ // sparsity.get_column_numbers()[j]
+ for (unsigned int p=0; p!=lines[distribute[row]].entries.size(); ++p)
+ for (unsigned int q=0;
+ q!=lines[distribute[sparsity.column_number(row,j)]]
+ .entries.size(); ++q)
+ sparsity.add (lines[distribute[row]].entries[p].first,
+ lines[distribute[sparsity.column_number(row,j)]]
+ .entries[q].first);
+ };
+ };
+}
+
+
+
+void ConstraintMatrix::condense (CompressedSetSparsityPattern &sparsity) const
+{
+ Assert (sorted == true, ExcMatrixNotClosed());
+ Assert (sparsity.n_rows() == sparsity.n_cols(),
+ ExcNotQuadratic());
+
+ // store for each index whether it must be
+ // distributed or not. If entry is
+ // numbers::invalid_unsigned_int,
+ // no distribution is necessary.
+ // otherwise, the number states which line
+ // in the constraint matrix handles this
+ // index
+ std::vector<unsigned int> distribute(sparsity.n_rows(),
+ numbers::invalid_unsigned_int);
+
+ for (unsigned int c=0; c<lines.size(); ++c)
+ distribute[lines[c].line] = c;
+
+ const unsigned int n_rows = sparsity.n_rows();
+ for (unsigned int row=0; row<n_rows; ++row)
+ {
+ if (distribute[row] == numbers::invalid_unsigned_int)
+ {
+ // regular line. loop over
+ // cols. note that as we proceed to
+ // distribute cols, the loop may
+ // get longer
+ CompressedSetSparsityPattern::row_iterator col_num = sparsity.row_begin (row);
+
+ for (; col_num != sparsity.row_end (row); ++col_num)
+ {
+ const unsigned int column = *col_num;
+
+ if (distribute[column] != numbers::invalid_unsigned_int)
+ {
+ // row
+ for (unsigned int q=0;
+ q!=lines[distribute[column]].entries.size();
+ ++q)
+ {
+ const unsigned int
+ new_col = lines[distribute[column]].entries[q].first;
+
+ sparsity.add (row, new_col);
+ }
+ }
+ }
+ }
+ else
+ // row must be distributed
+ {
+ CompressedSetSparsityPattern::row_iterator col_num = sparsity.row_begin (row);
+
+ for (; col_num != sparsity.row_end (row); ++col_num)
+ {
+ const unsigned int column = *col_num;
+
+ if (distribute[column] == numbers::invalid_unsigned_int)
+ // distribute entry at irregular
+ // row @p{row} and regular column
+ // sparsity.colnums[j]
+ for (unsigned int q=0;
+ q!=lines[distribute[row]].entries.size(); ++q)
+ sparsity.add (lines[distribute[row]].entries[q].first,
+ column);
+ else
+ // distribute entry at irregular
+ // row @p{row} and irregular column
+ // sparsity.get_column_numbers()[j]
+ for (unsigned int p=0; p!=lines[distribute[row]].entries.size(); ++p)
+ for (unsigned int q=0;
+ q!=lines[distribute[column]]
+ .entries.size(); ++q)
+ sparsity.add (lines[distribute[row]].entries[p].first,
+ lines[distribute[column]]
+ .entries[q].first);
+ };
+ }
+ };
+}
+
+
+
+void ConstraintMatrix::condense (CompressedSimpleSparsityPattern &sparsity) const
+{
+ Assert (sorted == true, ExcMatrixNotClosed());
+ Assert (sparsity.n_rows() == sparsity.n_cols(),
+ ExcNotQuadratic());
+
+ // store for each index whether it must be
+ // distributed or not. If entry is
+ // numbers::invalid_unsigned_int,
+ // no distribution is necessary.
+ // otherwise, the number states which line
+ // in the constraint matrix handles this
+ // index
+ std::vector<unsigned int> distribute(sparsity.n_rows(),
+ numbers::invalid_unsigned_int);
+
+ for (unsigned int c=0; c<lines.size(); ++c)
+ distribute[lines[c].line] = c;
+
+ const unsigned int n_rows = sparsity.n_rows();
+ for (unsigned int row=0; row<n_rows; ++row)
+ {
+ if (distribute[row] == numbers::invalid_unsigned_int)
+ // regular line. loop over
+ // cols. note that as we
+ // proceed to distribute
+ // cols, the loop may get
+ // longer
+ for (unsigned int j=0; j<sparsity.row_length(row); ++j)
+ {
+ const unsigned int column = sparsity.column_number(row,j);
+
+ if (distribute[column] != numbers::invalid_unsigned_int)
+ {
+ // distribute entry
+ // at regular row
+ // @p{row} and
+ // irregular column
+ // column. note that
+ // this changes the
+ // line we are
+ // presently working
+ // on: we add
+ // additional
+ // entries. if we add
+ // another entry at a
+ // column behind the
+ // present one, we
+ // will encounter it
+ // later on (but
+ // since it can't be
+ // further
+ // constrained, won't
+ // have to do
+ // anything about
+ // it). if we add it
+ // up front of the
+ // present column, we
+ // will find the
+ // present column
+ // later on again as
+ // it was shifted
+ // back (again
+ // nothing happens,
+ // in particular no
+ // endless loop, as
+ // when we encounter
+ // it the second time
+ // we won't be able
+ // to add more
+ // entries as they
+ // all already exist,
+ // but we do the same
+ // work more often
+ // than necessary,
+ // and the loop gets
+ // longer), so move
+ // the cursor one to
+ // the right in the
+ // case that we add
+ // an entry up front
+ // that did not exist
+ // before. check
+ // whether it existed
+ // before by tracking
+ // the length of this
+ // row
+ unsigned int old_rowlength = sparsity.row_length(row);
+ for (unsigned int q=0;
+ q!=lines[distribute[column]].entries.size();
+ ++q)
+ {
+ const unsigned int
+ new_col = lines[distribute[column]].entries[q].first;
+
+ sparsity.add (row, new_col);
+
+ const unsigned int new_rowlength = sparsity.row_length(row);
+ if ((new_col < column) && (old_rowlength != new_rowlength))
+ ++j;
+ old_rowlength = new_rowlength;
+ };
+ };
+ }
+ else
+ // row must be distributed
+ for (unsigned int j=0; j<sparsity.row_length(row); ++j)
+ {
+ const unsigned int column = sparsity.column_number(row,j);
+
+ if (distribute[column] == numbers::invalid_unsigned_int)
+ // distribute entry at irregular
+ // row @p{row} and regular column
+ // sparsity.colnums[j]
+ for (unsigned int q=0;
+ q!=lines[distribute[row]].entries.size(); ++q)
+ sparsity.add (lines[distribute[row]].entries[q].first,
+ column);
+ else
+ // distribute entry at irregular
+ // row @p{row} and irregular column
+ // sparsity.get_column_numbers()[j]
+ for (unsigned int p=0; p!=lines[distribute[row]].entries.size(); ++p)
+ for (unsigned int q=0;
+ q!=lines[distribute[sparsity.column_number(row,j)]]
+ .entries.size(); ++q)
+ sparsity.add (lines[distribute[row]].entries[p].first,
+ lines[distribute[sparsity.column_number(row,j)]]
+ .entries[q].first);
+ };
+ };
+}
+
+
+
+void ConstraintMatrix::condense (BlockSparsityPattern &sparsity) const
+{
+ Assert (sorted == true, ExcMatrixNotClosed());
+ Assert (sparsity.is_compressed() == false, ExcMatrixIsClosed());
+ Assert (sparsity.n_rows() == sparsity.n_cols(),
+ ExcNotQuadratic());
+ Assert (sparsity.n_block_rows() == sparsity.n_block_cols(),
+ ExcNotQuadratic());
+ Assert (sparsity.get_column_indices() == sparsity.get_row_indices(),
+ ExcNotQuadratic());
+
+ const BlockIndices &
+ index_mapping = sparsity.get_column_indices();
+
+ const unsigned int n_blocks = sparsity.n_block_rows();
+
+ // store for each index whether it must be
+ // distributed or not. If entry is
+ // numbers::invalid_unsigned_int,
+ // no distribution is necessary.
+ // otherwise, the number states which line
+ // in the constraint matrix handles this
+ // index
+ std::vector<unsigned int> distribute (sparsity.n_rows(),
+ numbers::invalid_unsigned_int);
+
+ for (unsigned int c=0; c<lines.size(); ++c)
+ distribute[lines[c].line] = c;
+
+ const unsigned int n_rows = sparsity.n_rows();
+ for (unsigned int row=0; row<n_rows; ++row)
+ {
+ // get index of this row
+ // within the blocks
+ const std::pair<unsigned int,unsigned int>
+ block_index = index_mapping.global_to_local(row);
+ const unsigned int block_row = block_index.first;
+
+ if (distribute[row] == numbers::invalid_unsigned_int)
+ // regular line. loop over
+ // all columns and see
+ // whether this column must
+ // be distributed
+ {
+
+ // to loop over all entries
+ // in this row, we have to
+ // loop over all blocks in
+ // this blockrow and the
+ // corresponding row
+ // therein
+ for (unsigned int block_col=0; block_col<n_blocks; ++block_col)
+ {
+ const SparsityPattern &
+ block_sparsity = sparsity.block(block_row, block_col);
+
+ for (SparsityPattern::const_iterator
+ entry = block_sparsity.begin(block_index.second);
+ (entry != block_sparsity.end(block_index.second)) &&
+ entry->is_valid_entry();
+ ++entry)
+ {
+ const unsigned int global_col
+ = index_mapping.local_to_global(block_col, entry->column());
+
+ if (distribute[global_col] != numbers::invalid_unsigned_int)
+ // distribute entry at regular
+ // row @p{row} and irregular column
+ // global_col
+ {
+ for (unsigned int q=0;
+ q!=lines[distribute[global_col]].entries.size(); ++q)
+ sparsity.add (row,
+ lines[distribute[global_col]].entries[q].first);
+ }
+ }
+ }
+ }
+ else
+ {
+ // row must be
+ // distributed. split the
+ // whole row into the
+ // chunks defined by the
+ // blocks
+ for (unsigned int block_col=0; block_col<n_blocks; ++block_col)
+ {
+ const SparsityPattern &
+ block_sparsity = sparsity.block(block_row,block_col);
+
+ for (SparsityPattern::const_iterator
+ entry = block_sparsity.begin(block_index.second);
+ (entry != block_sparsity.end(block_index.second)) &&
+ entry->is_valid_entry();
+ ++entry)
+ {
+ const unsigned int global_col
+ = index_mapping.local_to_global (block_col, entry->column());
+
+ if (distribute[global_col] == numbers::invalid_unsigned_int)
+ // distribute entry at irregular
+ // row @p{row} and regular column
+ // global_col.
+ {
+ for (unsigned int q=0; q!=lines[distribute[row]].entries.size(); ++q)
+ sparsity.add (lines[distribute[row]].entries[q].first, global_col);
+ }
+ else
+ // distribute entry at irregular
+ // row @p{row} and irregular column
+ // @p{global_col}
+ {
+ for (unsigned int p=0; p!=lines[distribute[row]].entries.size(); ++p)
+ for (unsigned int q=0; q!=lines[distribute[global_col]].entries.size(); ++q)
+ sparsity.add (lines[distribute[row]].entries[p].first,
+ lines[distribute[global_col]].entries[q].first);
+ }
+ }
+ }
+ }
+ }
+
+ sparsity.compress();
+}
+
+
+
+void ConstraintMatrix::condense (BlockCompressedSparsityPattern &sparsity) const
+{
+ Assert (sorted == true, ExcMatrixNotClosed());
+ Assert (sparsity.n_rows() == sparsity.n_cols(),
+ ExcNotQuadratic());
+ Assert (sparsity.n_block_rows() == sparsity.n_block_cols(),
+ ExcNotQuadratic());
+ Assert (sparsity.get_column_indices() == sparsity.get_row_indices(),
+ ExcNotQuadratic());
+
+ const BlockIndices &
+ index_mapping = sparsity.get_column_indices();
+
+ const unsigned int n_blocks = sparsity.n_block_rows();
+
+ // store for each index whether it must be
+ // distributed or not. If entry is
+ // numbers::invalid_unsigned_int,
+ // no distribution is necessary.
+ // otherwise, the number states which line
+ // in the constraint matrix handles this
+ // index
+ std::vector<unsigned int> distribute (sparsity.n_rows(),
+ numbers::invalid_unsigned_int);
+
+ for (unsigned int c=0; c<lines.size(); ++c)
+ distribute[lines[c].line] = static_cast<signed int>(c);
+
+ const unsigned int n_rows = sparsity.n_rows();
+ for (unsigned int row=0; row<n_rows; ++row)
+ {
+ // get index of this row
+ // within the blocks
+ const std::pair<unsigned int,unsigned int>
+ block_index = index_mapping.global_to_local(row);
+ const unsigned int block_row = block_index.first;
+ const unsigned int local_row = block_index.second;
+
+ if (distribute[row] == numbers::invalid_unsigned_int)
+ // regular line. loop over
+ // all columns and see
+ // whether this column must
+ // be distributed. note that
+ // as we proceed to
+ // distribute cols, the loop
+ // over cols may get longer.
+ //
+ // don't try to be clever
+ // here as in the algorithm
+ // for the
+ // CompressedSparsityPattern,
+ // as that would be much more
+ // complicated here. after
+ // all, we know that
+ // compressed patterns are
+ // inefficient...
+ {
+
+ // to loop over all entries
+ // in this row, we have to
+ // loop over all blocks in
+ // this blockrow and the
+ // corresponding row
+ // therein
+ for (unsigned int block_col=0; block_col<n_blocks; ++block_col)
+ {
+ const CompressedSparsityPattern &
+ block_sparsity = sparsity.block(block_row, block_col);
+
+ for (unsigned int j=0; j<block_sparsity.row_length(local_row); ++j)
+ {
+ const unsigned int global_col
+ = index_mapping.local_to_global(block_col,
+ block_sparsity.column_number(local_row,j));
+
+ if (distribute[global_col] != numbers::invalid_unsigned_int)
+ // distribute entry at regular
+ // row @p{row} and irregular column
+ // global_col
+ {
+ for (unsigned int q=0;
+ q!=lines[distribute[global_col]]
+ .entries.size(); ++q)
+ sparsity.add (row,
+ lines[distribute[global_col]].entries[q].first);
+ };
+ };
+ };
+ }
+ else
+ {
+ // row must be
+ // distributed. split the
+ // whole row into the
+ // chunks defined by the
+ // blocks
+ for (unsigned int block_col=0; block_col<n_blocks; ++block_col)
+ {
+ const CompressedSparsityPattern &
+ block_sparsity = sparsity.block(block_row,block_col);
+
+ for (unsigned int j=0; j<block_sparsity.row_length(local_row); ++j)
+ {
+ const unsigned int global_col
+ = index_mapping.local_to_global (block_col,
+ block_sparsity.column_number(local_row,j));
+
+ if (distribute[global_col] == numbers::invalid_unsigned_int)
+ // distribute entry at irregular
+ // row @p{row} and regular column
+ // global_col.
+ {
+ for (unsigned int q=0; q!=lines[distribute[row]].entries.size(); ++q)
+ sparsity.add (lines[distribute[row]].entries[q].first,
+ global_col);
+ }
+ else
+ // distribute entry at irregular
+ // row @p{row} and irregular column
+ // @p{global_col}
+ {
+ for (unsigned int p=0; p!=lines[distribute[row]].entries.size(); ++p)
+ for (unsigned int q=0; q!=lines[distribute[global_col]].entries.size(); ++q)
+ sparsity.add (lines[distribute[row]].entries[p].first,
+ lines[distribute[global_col]].entries[q].first);
+ };
+ };
+ };
+ };
+ };
+}
+
+
+
+void ConstraintMatrix::condense (BlockCompressedSetSparsityPattern &sparsity) const
+{
+ Assert (sorted == true, ExcMatrixNotClosed());
+ Assert (sparsity.n_rows() == sparsity.n_cols(),
+ ExcNotQuadratic());
+ Assert (sparsity.n_block_rows() == sparsity.n_block_cols(),
+ ExcNotQuadratic());
+ Assert (sparsity.get_column_indices() == sparsity.get_row_indices(),
+ ExcNotQuadratic());
+
+ const BlockIndices &
+ index_mapping = sparsity.get_column_indices();
+
+ const unsigned int n_blocks = sparsity.n_block_rows();
+
+ // store for each index whether it must be
+ // distributed or not. If entry is
+ // numbers::invalid_unsigned_int,
+ // no distribution is necessary.
+ // otherwise, the number states which line
+ // in the constraint matrix handles this
+ // index
+ std::vector<unsigned int> distribute (sparsity.n_rows(),
+ numbers::invalid_unsigned_int);
+
+ for (unsigned int c=0; c<lines.size(); ++c)
+ distribute[lines[c].line] = static_cast<signed int>(c);
+
+ const unsigned int n_rows = sparsity.n_rows();
+ for (unsigned int row=0; row<n_rows; ++row)
+ {
+ // get index of this row
+ // within the blocks
+ const std::pair<unsigned int,unsigned int>
+ block_index = index_mapping.global_to_local(row);
+ const unsigned int block_row = block_index.first;
+ const unsigned int local_row = block_index.second;
+
+ if (distribute[row] == numbers::invalid_unsigned_int)
+ // regular line. loop over
+ // all columns and see
+ // whether this column must
+ // be distributed. note that
+ // as we proceed to
+ // distribute cols, the loop
+ // over cols may get longer.
+ //
+ // don't try to be clever
+ // here as in the algorithm
+ // for the
+ // CompressedSparsityPattern,
+ // as that would be much more
+ // complicated here. after
+ // all, we know that
+ // compressed patterns are
+ // inefficient...
+ {
+
+ // to loop over all entries
+ // in this row, we have to
+ // loop over all blocks in
+ // this blockrow and the
+ // corresponding row
+ // therein
+ for (unsigned int block_col=0; block_col<n_blocks; ++block_col)
+ {
+ const CompressedSetSparsityPattern &
+ block_sparsity = sparsity.block(block_row, block_col);
+
+ for (CompressedSetSparsityPattern::row_iterator
+ j = block_sparsity.row_begin(local_row);
+ j != block_sparsity.row_end(local_row); ++j)
+ {
+ const unsigned int global_col
+ = index_mapping.local_to_global(block_col, *j);
+
+ if (distribute[global_col] != numbers::invalid_unsigned_int)
+ // distribute entry at regular
+ // row @p{row} and irregular column
+ // global_col
+ {
+ for (unsigned int q=0;
+ q!=lines[distribute[global_col]]
+ .entries.size(); ++q)
+ sparsity.add (row,
+ lines[distribute[global_col]].entries[q].first);
+ };
+ };
+ };
+ }
+ else
+ {
+ // row must be
+ // distributed. split the
+ // whole row into the
+ // chunks defined by the
+ // blocks
+ for (unsigned int block_col=0; block_col<n_blocks; ++block_col)
+ {
+ const CompressedSetSparsityPattern &
+ block_sparsity = sparsity.block(block_row,block_col);
+
+ for (CompressedSetSparsityPattern::row_iterator
+ j = block_sparsity.row_begin(local_row);
+ j != block_sparsity.row_end(local_row); ++j)
+ {
+ const unsigned int global_col
+ = index_mapping.local_to_global (block_col, *j);
+
+ if (distribute[global_col] == numbers::invalid_unsigned_int)
+ // distribute entry at irregular
+ // row @p{row} and regular column
+ // global_col.
+ {
+ for (unsigned int q=0; q!=lines[distribute[row]].entries.size(); ++q)
+ sparsity.add (lines[distribute[row]].entries[q].first,
+ global_col);
+ }
+ else
+ // distribute entry at irregular
+ // row @p{row} and irregular column
+ // @p{global_col}
+ {
+ for (unsigned int p=0; p!=lines[distribute[row]].entries.size(); ++p)
+ for (unsigned int q=0; q!=lines[distribute[global_col]].entries.size(); ++q)
+ sparsity.add (lines[distribute[row]].entries[p].first,
+ lines[distribute[global_col]].entries[q].first);
+ };
+ };
+ };
+ };
+ };
+}
+
+
+
+void ConstraintMatrix::condense (BlockCompressedSimpleSparsityPattern &sparsity) const
+{
+ Assert (sorted == true, ExcMatrixNotClosed());
+ Assert (sparsity.n_rows() == sparsity.n_cols(),
+ ExcNotQuadratic());
+ Assert (sparsity.n_block_rows() == sparsity.n_block_cols(),
+ ExcNotQuadratic());
+ Assert (sparsity.get_column_indices() == sparsity.get_row_indices(),
+ ExcNotQuadratic());
+
+ const BlockIndices &
+ index_mapping = sparsity.get_column_indices();
+
+ const unsigned int n_blocks = sparsity.n_block_rows();
+
+ // store for each index whether it must be
+ // distributed or not. If entry is
+ // numbers::invalid_unsigned_int,
+ // no distribution is necessary.
+ // otherwise, the number states which line
+ // in the constraint matrix handles this
+ // index
+ std::vector<unsigned int> distribute (sparsity.n_rows(),
+ numbers::invalid_unsigned_int);
+
+ for (unsigned int c=0; c<lines.size(); ++c)
+ distribute[lines[c].line] = static_cast<signed int>(c);
+
+ const unsigned int n_rows = sparsity.n_rows();
+ for (unsigned int row=0; row<n_rows; ++row)
+ {
+ // get index of this row
+ // within the blocks
+ const std::pair<unsigned int,unsigned int>
+ block_index = index_mapping.global_to_local(row);
+ const unsigned int block_row = block_index.first;
+ const unsigned int local_row = block_index.second;
+
+ if (distribute[row] == numbers::invalid_unsigned_int)
+ // regular line. loop over
+ // all columns and see
+ // whether this column must
+ // be distributed. note that
+ // as we proceed to
+ // distribute cols, the loop
+ // over cols may get longer.
+ //
+ // don't try to be clever
+ // here as in the algorithm
+ // for the
+ // CompressedSparsityPattern,
+ // as that would be much more
+ // complicated here. after
+ // all, we know that
+ // compressed patterns are
+ // inefficient...
+ {
+
+ // to loop over all entries
+ // in this row, we have to
+ // loop over all blocks in
+ // this blockrow and the
+ // corresponding row
+ // therein
+ for (unsigned int block_col=0; block_col<n_blocks; ++block_col)
+ {
+ const CompressedSimpleSparsityPattern &
+ block_sparsity = sparsity.block(block_row, block_col);
+
+ for (unsigned int j=0; j<block_sparsity.row_length(local_row); ++j)
+ {
+ const unsigned int global_col
+ = index_mapping.local_to_global(block_col,
+ block_sparsity.column_number(local_row,j));
+
+ if (distribute[global_col] != numbers::invalid_unsigned_int)
+ // distribute entry at regular
+ // row @p{row} and irregular column
+ // global_col
+ {
+ for (unsigned int q=0;
+ q!=lines[distribute[global_col]]
+ .entries.size(); ++q)
+ sparsity.add (row,
+ lines[distribute[global_col]].entries[q].first);
+ };
+ };
+ };
+ }
+ else
+ {
+ // row must be
+ // distributed. split the
+ // whole row into the
+ // chunks defined by the
+ // blocks
+ for (unsigned int block_col=0; block_col<n_blocks; ++block_col)
+ {
+ const CompressedSimpleSparsityPattern &
+ block_sparsity = sparsity.block(block_row,block_col);
+
+ for (unsigned int j=0; j<block_sparsity.row_length(local_row); ++j)
+ {
+ const unsigned int global_col
+ = index_mapping.local_to_global (block_col,
+ block_sparsity.column_number(local_row,j));
+
+ if (distribute[global_col] == numbers::invalid_unsigned_int)
+ // distribute entry at irregular
+ // row @p{row} and regular column
+ // global_col.
+ {
+ for (unsigned int q=0; q!=lines[distribute[row]].entries.size(); ++q)
+ sparsity.add (lines[distribute[row]].entries[q].first,
+ global_col);
+ }
+ else
+ // distribute entry at irregular
+ // row @p{row} and irregular column
+ // @p{global_col}
+ {
+ for (unsigned int p=0; p!=lines[distribute[row]].entries.size(); ++p)
+ for (unsigned int q=0; q!=lines[distribute[global_col]].entries.size(); ++q)
+ sparsity.add (lines[distribute[row]].entries[p].first,
+ lines[distribute[global_col]].entries[q].first);
+ };
+ };
+ };
+ };
+ };
+}
+
+
+
+#ifdef DEAL_II_USE_TRILINOS
+
+// this is a specialization for a
+// parallel (non-block) Trilinos
+// vector. The basic idea is to just work
+// on the local range of the vector. But
+// we need access to values that the
+// local nodes are constrained to.
+
+template<>
+void
+ConstraintMatrix::distribute (TrilinosWrappers::MPI::Vector &vec) const
+{
+ Assert (sorted==true, ExcMatrixIsClosed());
+
+ //TODO: not implemented yet, we need to fix
+ //LocalRange() first to only include
+ //"owned" indices. For this we need to keep
+ //track of the owned indices, because
+ //Trilinos doesn't. Use same constructor
+ //interface as in PETSc with two IndexSets!
+ AssertThrow (vec.vector_partitioner().IsOneToOne(),
+ ExcMessage ("Distribute does not work on vectors with overlapping parallel partitioning."));
+
+ typedef std::vector<ConstraintLine>::const_iterator constraint_iterator;
+ ConstraintLine index_comparison;
+ index_comparison.line = vec.local_range().first;
+ const constraint_iterator begin_my_constraints =
+ Utilities::lower_bound (lines.begin(),lines.end(),index_comparison);
+
+ index_comparison.line = vec.local_range().second;
+ const constraint_iterator end_my_constraints
+ = Utilities::lower_bound(lines.begin(),lines.end(),index_comparison);
+
+ // Here we search all the indices that we
+ // need to have read-access to - the
+ // local nodes and all the nodes that the
+ // constraints indicate.
+ IndexSet my_indices (vec.size());
+ {
+ const std::pair<unsigned int, unsigned int>
+ local_range = vec.local_range();
+
+ my_indices.add_range (local_range.first, local_range.second);
+
+ std::set<unsigned int> individual_indices;
+ for (constraint_iterator it = begin_my_constraints;
+ it != end_my_constraints; ++it)
+ for (unsigned int i=0; i<it->entries.size(); ++i)
+ if ((it->entries[i].first < local_range.first)
+ ||
+ (it->entries[i].first >= local_range.second))
+ individual_indices.insert (it->entries[i].first);
+
+ my_indices.add_indices (individual_indices.begin(),
+ individual_indices.end());
+ }
+
+#ifdef DEAL_II_COMPILER_SUPPORTS_MPI
+ const Epetra_MpiComm *mpi_comm
+ = dynamic_cast<const Epetra_MpiComm *>(&vec.trilinos_vector().Comm());
+
+ Assert (mpi_comm != 0, ExcInternalError());
+
+ TrilinosWrappers::MPI::Vector vec_distribute
+ (my_indices.make_trilinos_map (mpi_comm->Comm(), true));
+#else
+ TrilinosWrappers::MPI::Vector vec_distribute
+ (my_indices.make_trilinos_map (MPI_COMM_WORLD, true));
+#endif
+
+ // here we import the data
+ vec_distribute.reinit(vec,false,true);
+
+ for (constraint_iterator it = begin_my_constraints;
+ it != end_my_constraints; ++it)
+ {
+ // fill entry in line
+ // next_constraint.line by adding the
+ // different contributions
+ double new_value = it->inhomogeneity;
+ for (unsigned int i=0; i<it->entries.size(); ++i)
+ new_value += (vec_distribute(it->entries[i].first) *
+ it->entries[i].second);
+ vec(it->line) = new_value;
+ }
+
+ // some processes might not apply
+ // constraints, so we need to explicitly
+ // state, that the others are doing an
+ // insert here:
+ vec.compress (::dealii::VectorOperation::insert);
+}
+
+
+
+template<>
+void
+ConstraintMatrix::distribute (TrilinosWrappers::MPI::BlockVector &vec) const
+{
+ Assert (sorted==true, ExcMatrixIsClosed());
+
+ IndexSet my_indices (vec.size());
+ for (unsigned int block=0; block<vec.n_blocks(); ++block)
+ {
+ typedef std::vector<ConstraintLine>::const_iterator constraint_iterator;
+ ConstraintLine index_comparison;
+ index_comparison.line = vec.block(block).local_range().first
+ +vec.get_block_indices().block_start(block);
+ const constraint_iterator begin_my_constraints =
+ Utilities::lower_bound (lines.begin(),lines.end(),index_comparison);
+
+ index_comparison.line = vec.block(block).local_range().second
+ +vec.get_block_indices().block_start(block);
+
+ const constraint_iterator end_my_constraints
+ = Utilities::lower_bound(lines.begin(),lines.end(),index_comparison);
+
+ // Here we search all the indices that we
+ // need to have read-access to - the local
+ // nodes and all the nodes that the
+ // constraints indicate. No caching done
+ // yet. would need some more clever data
+ // structures for doing that.
+ const std::pair<unsigned int, unsigned int>
+ local_range = vec.block(block).local_range();
+
+ my_indices.add_range (local_range.first, local_range.second);
+
+ std::set<unsigned int> individual_indices;
+ for (constraint_iterator it = begin_my_constraints;
+ it != end_my_constraints; ++it)
+ for (unsigned int i=0; i<it->entries.size(); ++i)
+ if ((it->entries[i].first < local_range.first)
+ ||
+ (it->entries[i].first >= local_range.second))
+ individual_indices.insert (it->entries[i].first);
+
+ my_indices.add_indices (individual_indices.begin(),
+ individual_indices.end());
+ }
+
+#ifdef DEAL_II_COMPILER_SUPPORTS_MPI
+ const Epetra_MpiComm *mpi_comm
+ = dynamic_cast<const Epetra_MpiComm *>(&vec.block(0).trilinos_vector().Comm());
+
+ Assert (mpi_comm != 0, ExcInternalError());
+
+ TrilinosWrappers::MPI::Vector vec_distribute
+ (my_indices.make_trilinos_map (mpi_comm->Comm(), true));
+#else
+ TrilinosWrappers::MPI::Vector vec_distribute
+ (my_indices.make_trilinos_map (MPI_COMM_WORLD, true));
+#endif
+
+ // here we import the data
+ vec_distribute.reinit(vec,true);
+
+ for (unsigned int block=0; block<vec.n_blocks(); ++block)
+ {
+ typedef std::vector<ConstraintLine>::const_iterator constraint_iterator;
+ ConstraintLine index_comparison;
+ index_comparison.line = vec.block(block).local_range().first
+ +vec.get_block_indices().block_start(block);
+ const constraint_iterator begin_my_constraints =
+ Utilities::lower_bound (lines.begin(),lines.end(),index_comparison);
+
+ index_comparison.line = vec.block(block).local_range().second
+ +vec.get_block_indices().block_start(block);
+
+ const constraint_iterator end_my_constraints
+ = Utilities::lower_bound(lines.begin(),lines.end(),index_comparison);
+
+ for (constraint_iterator it = begin_my_constraints;
+ it != end_my_constraints; ++it)
+ {
+ // fill entry in line
+ // next_constraint.line by adding the
+ // different contributions
+ double new_value = it->inhomogeneity;
+ for (unsigned int i=0; i<it->entries.size(); ++i)
+ new_value += (vec_distribute(it->entries[i].first) *
+ it->entries[i].second);
+ vec(it->line) = new_value;
+ }
+ vec.block(block).compress(::dealii::VectorOperation::insert);
+ }
+}
+
+#endif
+
+#ifdef DEAL_II_USE_PETSC
+
+// this is a specialization for a
+// parallel (non-block) PETSc
+// vector. The basic idea is to just work
+// on the local range of the vector. But
+// we need access to values that the
+// local nodes are constrained to.
+
+template<>
+void
+ConstraintMatrix::distribute (PETScWrappers::MPI::Vector &vec) const
+{
+ Assert (sorted==true, ExcMatrixIsClosed());
+
+ typedef std::vector<ConstraintLine>::const_iterator constraint_iterator;
+ ConstraintLine index_comparison;
+ index_comparison.line = vec.local_range().first;
+ const constraint_iterator begin_my_constraints =
+ Utilities::lower_bound (lines.begin(),lines.end(),index_comparison);
+
+ index_comparison.line = vec.local_range().second;
+ const constraint_iterator end_my_constraints
+ = Utilities::lower_bound(lines.begin(),lines.end(),index_comparison);
+
+ // all indices we need to read from
+ IndexSet my_indices (vec.size());
+
+ const std::pair<unsigned int, unsigned int>
+ local_range = vec.local_range();
+
+ my_indices.add_range (local_range.first, local_range.second);
+
+ std::set<unsigned int> individual_indices;
+ for (constraint_iterator it = begin_my_constraints;
+ it != end_my_constraints; ++it)
+ for (unsigned int i=0; i<it->entries.size(); ++i)
+ if ((it->entries[i].first < local_range.first)
+ ||
+ (it->entries[i].first >= local_range.second))
+ individual_indices.insert (it->entries[i].first);
+
+ my_indices.add_indices (individual_indices.begin(),
+ individual_indices.end());
+
+ IndexSet local_range_is (vec.size());
+ local_range_is.add_range(local_range.first, local_range.second);
+
+
+ // create a vector and import those indices
+ PETScWrappers::MPI::Vector ghost_vec (vec.get_mpi_communicator(),
+ local_range_is,
+ my_indices);
+ ghost_vec = vec;
+ ghost_vec.update_ghost_values();
+
+ // finally do the distribution on own
+ // constraints
+ for (constraint_iterator it = begin_my_constraints;
+ it != end_my_constraints; ++it)
+ {
+ // fill entry in line
+ // next_constraint.line by adding the
+ // different contributions
+ PetscScalar new_value = it->inhomogeneity;
+ for (unsigned int i=0; i<it->entries.size(); ++i)
+ new_value += (PetscScalar(ghost_vec(it->entries[i].first)) *
+ it->entries[i].second);
+ vec(it->line) = new_value;
+ }
+
+ vec.compress ();
+}
+
+
+
+template<>
+void
+ConstraintMatrix::distribute (PETScWrappers::MPI::BlockVector &vec) const // modified by shuqiangwang
+{
+// Assert (sorted==true, ExcMatrixIsClosed());
+// for(unsigned int i=0; i<vec.n_blocks(); i++)
+// distribute(vec.block(i));
+
+ Assert (sorted==true, ExcMatrixIsClosed());
+
+// IndexSet my_indices (vec.size());
+// IndexSet local_range_is (vec.size());
+ std::vector<IndexSet> my_indices(vec.n_blocks()); // relevant
+ std::vector<IndexSet> local_range_is(vec.n_blocks()); // locally owned
+ std::vector<unsigned int> block_sizes(vec.n_blocks()); // size of locally owned
+
+ for(unsigned int block=0; block<vec.n_blocks(); block++)
+ {
+ my_indices[block].set_size(vec.block(block).size());
+// my_indices[block].set_size(vec.size());
+ local_range_is[block].set_size(vec.block(block).size());
+// local_range_is[block].set_size(vec.size());
+// block_sizes[block] = vec(block).size();
+ }
+
+ static int debug_index = 0;
+ debug_index++;
+
+ for (unsigned int block=0; block<vec.n_blocks(); ++block)
+ {
+ typedef std::vector<ConstraintLine>::const_iterator constraint_iterator;
+ ConstraintLine index_comparison;
+ index_comparison.line = vec.block(block).local_range().first
+ +vec.get_block_indices().block_start(block);
+ const constraint_iterator begin_my_constraints =
+ Utilities::lower_bound (lines.begin(),lines.end(),index_comparison);
+
+ index_comparison.line = vec.block(block).local_range().second
+ +vec.get_block_indices().block_start(block);
+
+ const constraint_iterator end_my_constraints
+ = Utilities::lower_bound(lines.begin(),lines.end(),index_comparison);
+
+ // Here we search all the indices that we
+ // need to have read-access to - the local
+ // nodes and all the nodes that the
+ // constraints indicate. No caching done
+ // yet. would need some more clever data
+ // structures for doing that.
+ const std::pair<unsigned int, unsigned int>
+ local_range = vec.block(block).local_range();
+ local_range_is[block].add_range(local_range.first, local_range.second);
+ block_sizes[block] = local_range.second - local_range.first;
+
+ my_indices[block].add_range (local_range.first, local_range.second);
+
+ std::set<unsigned int> individual_indices;
+ for (constraint_iterator it = begin_my_constraints; it != end_my_constraints; ++it)
+ for (unsigned int i=0; i<it->entries.size(); ++i)
+ if ((it->entries[i].first < local_range.first) || (it->entries[i].first >= local_range.second))
+ individual_indices.insert (it->entries[i].first - vec.get_block_indices().block_start(block));
+
+ my_indices[block].add_indices (individual_indices.begin(),
+ individual_indices.end());
+ }
+
+ // create a vector and import those indices
+// PETScWrappers::MPI::BlockVector ghost_vec (vec.get_mpi_communicator(),
+// local_range_is,
+// my_indices);
+// ghost_vec = vec;
+// ghost_vec.update_ghost_values();
+ PETScWrappers::MPI::BlockVector ghost_vec;
+ ghost_vec.reinit(block_sizes,vec.get_mpi_communicator());
+ for (unsigned int block=0; block<vec.n_blocks(); ++block)
+ ghost_vec.block(block).reinit(vec.get_mpi_communicator(),local_range_is[block],my_indices[block]);
+ ghost_vec.collect_sizes();
+
+ ghost_vec = vec;
+ ghost_vec.update_ghost_values();
+
+ for (unsigned int block=0; block<vec.n_blocks(); ++block)
+ {
+ typedef std::vector<ConstraintLine>::const_iterator constraint_iterator;
+ ConstraintLine index_comparison;
+ index_comparison.line = vec.block(block).local_range().first
+ +vec.get_block_indices().block_start(block);
+ const constraint_iterator begin_my_constraints =
+ Utilities::lower_bound (lines.begin(),lines.end(),index_comparison);
+
+ index_comparison.line = vec.block(block).local_range().second
+ +vec.get_block_indices().block_start(block);
+
+ const constraint_iterator end_my_constraints
+ = Utilities::lower_bound(lines.begin(),lines.end(),index_comparison);
+
+ for (constraint_iterator it = begin_my_constraints;
+ it != end_my_constraints; ++it)
+ {
+ // fill entry in line
+ // next_constraint.line by adding the
+ // different contributions
+ double new_value = it->inhomogeneity;
+ for (unsigned int i=0; i<it->entries.size(); ++i)
+ new_value += (ghost_vec(it->entries[i].first) *
+ it->entries[i].second);
+ vec(it->line) = new_value;
+ }
+ vec.block(block).compress(::dealii::VectorOperation::insert);
+ }
+
+}
+
+#endif
+
+
+
+unsigned int ConstraintMatrix::n_constraints () const
+{
+ return lines.size();
+}
+
+
+
+bool ConstraintMatrix::is_identity_constrained (const unsigned int index) const
+{
+ if (is_constrained(index) == false)
+ return false;
+
+ const ConstraintLine &p = lines[lines_cache[calculate_line_index(index)]];
+ Assert (p.line == index, ExcInternalError());
+
+ // return if an entry for this
+ // line was found and if it has
+ // only one entry equal to 1.0
+ return ((p.entries.size() == 1) &&
+ (p.entries[0].second == 1.0));
+}
+
+
+
+unsigned int ConstraintMatrix::max_constraint_indirections () const
+{
+ unsigned int return_value = 0;
+ for (std::vector<ConstraintLine>::const_iterator i=lines.begin();
+ i!=lines.end(); ++i)
+ // use static cast, since
+ // typeof(size)==std::size_t, which is !=
+ // unsigned int on AIX
+ return_value = std::max(return_value,
+ static_cast<unsigned int>(i->entries.size()));
+
+ return return_value;
+}
+
+
+
+bool ConstraintMatrix::has_inhomogeneities () const
+{
+ for (std::vector<ConstraintLine>::const_iterator i=lines.begin();
+ i!=lines.end(); ++i)
+ if (i->inhomogeneity != 0.)
+ return true;
+
+ return false;
+}
+
+
+void ConstraintMatrix::print (std::ostream &out) const
+{
+ for (unsigned int i=0; i!=lines.size(); ++i)
+ {
+ // output the list of
+ // constraints as pairs of dofs
+ // and their weights
+ if (lines[i].entries.size() > 0)
+ {
+ for (unsigned int j=0; j<lines[i].entries.size(); ++j)
+ out << " " << lines[i].line
+ << " " << lines[i].entries[j].first
+ << ": " << lines[i].entries[j].second << "\n";
+
+ // print out inhomogeneity.
+ if (lines[i].inhomogeneity != 0)
+ out << " " << lines[i].line
+ << ": " << lines[i].inhomogeneity << "\n";
+ }
+ else
+ // but also output something
+ // if the constraint simply
+ // reads x[13]=0, i.e. where
+ // the right hand side is not
+ // a linear combination of
+ // other dofs
+ {
+ if (lines[i].inhomogeneity != 0)
+ out << " " << lines[i].line
+ << " = " << lines[i].inhomogeneity
+ << "\n";
+ else
+ out << " " << lines[i].line << " = 0\n";
+ }
+ }
+
+ AssertThrow (out, ExcIO());
+}
+
+
+
+void
+ConstraintMatrix::write_dot (std::ostream &out) const
+{
+ out << "digraph constraints {"
+ << std::endl;
+ for (unsigned int i=0; i!=lines.size(); ++i)
+ {
+ // same concept as in the
+ // previous function
+ if (lines[i].entries.size() > 0)
+ for (unsigned int j=0; j<lines[i].entries.size(); ++j)
+ out << " " << lines[i].line << "->" << lines[i].entries[j].first
+ << "; // weight: "
+ << lines[i].entries[j].second
+ << "\n";
+ else
+ out << " " << lines[i].line << "\n";
+ }
+ out << "}" << std::endl;
+}
+
+
+
+std::size_t
+ConstraintMatrix::memory_consumption () const
+{
+ return (MemoryConsumption::memory_consumption (lines) +
+ MemoryConsumption::memory_consumption (lines_cache) +
+ MemoryConsumption::memory_consumption (sorted) +
+ MemoryConsumption::memory_consumption (local_lines));
+}
+
+
+
+
+
+// explicit instantiations
+//
+// define a list of functions for vectors and matrices, respectively, where
+// the vector/matrix can be replaced using a preprocessor variable
+// VectorType/MatrixType. note that we need a space between "VectorType" and
+// ">" to disambiguate ">>" when VectorType trails in an angle bracket
+
+// TODO: The way we define all the instantiations is probably not the very
+// best one. Try to find a better description.
+
+#define VECTOR_FUNCTIONS(VectorType) \
+ template void ConstraintMatrix::condense<VectorType >(const VectorType &uncondensed,\
+ VectorType &condensed) const;\
+ template void ConstraintMatrix::condense<VectorType >(VectorType &vec) const;\
+ template void ConstraintMatrix::condense<float,VectorType >(const SparseMatrix<float> &uncondensed, \
+ const VectorType &uncondensed_vector, \
+ SparseMatrix<float> &condensed, \
+ VectorType &condensed_vector) const; \
+ template void ConstraintMatrix::condense<double,VectorType >(const SparseMatrix<double> &uncondensed, \
+ const VectorType &uncondensed_vector, \
+ SparseMatrix<double> &condensed, \
+ VectorType &condensed_vector) const; \
+ template void ConstraintMatrix:: \
+ distribute_local_to_global<VectorType > (const Vector<double> &, \
+ const std::vector<unsigned int> &, \
+ VectorType &, \
+ const FullMatrix<double> &) const; \
+ template void ConstraintMatrix::distribute<VectorType >(const VectorType &condensed,\
+ VectorType &uncondensed) const;\
+ template void ConstraintMatrix::distribute<VectorType >(VectorType &vec) const
+
+#define PARALLEL_VECTOR_FUNCTIONS(VectorType) \
+ template void ConstraintMatrix:: \
+ distribute_local_to_global<VectorType > (const Vector<double> &, \
+ const std::vector<unsigned int> &, \
+ VectorType &, \
+ const FullMatrix<double> &) const
+
+
+// TODO: Can PETSc really do all the operations required by the above
+// condense/distribute function etc also on distributed vectors? Trilinos
+// can't do that - we have to rewrite those functions by hand if we want to
+// use them. The key is to use local ranges etc., which still needs to be
+// implemented.
+#ifdef DEAL_II_USE_PETSC
+VECTOR_FUNCTIONS(PETScWrappers::MPI::Vector);
+VECTOR_FUNCTIONS(PETScWrappers::MPI::BlockVector);
+#endif
+
+#ifdef DEAL_II_USE_TRILINOS
+PARALLEL_VECTOR_FUNCTIONS(TrilinosWrappers::MPI::Vector);
+PARALLEL_VECTOR_FUNCTIONS(TrilinosWrappers::MPI::BlockVector);
+#endif
+
+#define MATRIX_VECTOR_FUNCTIONS(MatrixType, VectorType) \
+ template void ConstraintMatrix:: \
+ distribute_local_to_global<MatrixType,VectorType > (const FullMatrix<double> &, \
+ const Vector<double> &, \
+ const std::vector<unsigned int> &, \
+ MatrixType &, \
+ VectorType &, \
+ bool , \
+ internal::bool2type<false>) const
+#define MATRIX_FUNCTIONS(MatrixType) \
+ template void ConstraintMatrix:: \
+ distribute_local_to_global<MatrixType,Vector<double> > (const FullMatrix<double> &, \
+ const Vector<double> &, \
+ const std::vector<unsigned int> &, \
+ MatrixType &, \
+ Vector<double> &, \
+ bool , \
+ internal::bool2type<false>) const
+#define BLOCK_MATRIX_VECTOR_FUNCTIONS(MatrixType, VectorType) \
+ template void ConstraintMatrix:: \
+ distribute_local_to_global<MatrixType,VectorType > (const FullMatrix<double> &, \
+ const Vector<double> &, \
+ const std::vector<unsigned int> &, \
+ MatrixType &, \
+ VectorType &, \
+ bool , \
+ internal::bool2type<true>) const
+#define BLOCK_MATRIX_FUNCTIONS(MatrixType) \
+ template void ConstraintMatrix:: \
+ distribute_local_to_global<MatrixType,Vector<double> > (const FullMatrix<double> &, \
+ const Vector<double> &, \
+ const std::vector<unsigned int> &, \
+ MatrixType &, \
+ Vector<double> &, \
+ bool , \
+ internal::bool2type<true>) const
+
+MATRIX_FUNCTIONS(SparseMatrix<double>);
+MATRIX_FUNCTIONS(SparseMatrix<float>);
+MATRIX_FUNCTIONS(FullMatrix<double>);
+MATRIX_FUNCTIONS(FullMatrix<float>);
+MATRIX_VECTOR_FUNCTIONS(SparseMatrix<float>, Vector<float>);
+
+BLOCK_MATRIX_FUNCTIONS(BlockSparseMatrix<double>);
+BLOCK_MATRIX_FUNCTIONS(BlockSparseMatrix<float>);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(BlockSparseMatrix<double>, BlockVector<double>);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(BlockSparseMatrix<float>, BlockVector<float>);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(BlockSparseMatrix<float>, BlockVector<double>);
+
+MATRIX_FUNCTIONS(SparseMatrixEZ<double>);
+MATRIX_FUNCTIONS(SparseMatrixEZ<float>);
+MATRIX_VECTOR_FUNCTIONS(SparseMatrixEZ<float>, Vector<float>);
+
+// BLOCK_MATRIX_FUNCTIONS(BlockSparseMatrixEZ<double>);
+// BLOCK_MATRIX_VECTOR_FUNCTIONS(BlockSparseMatrixEZ<float>, Vector<float>);
+
+#ifdef DEAL_II_USE_PETSC
+MATRIX_FUNCTIONS(PETScWrappers::SparseMatrix);
+BLOCK_MATRIX_FUNCTIONS(PETScWrappers::BlockSparseMatrix);
+MATRIX_FUNCTIONS(PETScWrappers::MPI::SparseMatrix);
+BLOCK_MATRIX_FUNCTIONS(PETScWrappers::MPI::BlockSparseMatrix);
+MATRIX_VECTOR_FUNCTIONS(PETScWrappers::SparseMatrix, PETScWrappers::Vector);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(PETScWrappers::BlockSparseMatrix, PETScWrappers::BlockVector);
+MATRIX_VECTOR_FUNCTIONS(PETScWrappers::MPI::SparseMatrix, PETScWrappers::MPI::Vector);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(PETScWrappers::MPI::BlockSparseMatrix ,PETScWrappers::MPI::BlockVector);
+#endif
+
+#ifdef DEAL_II_USE_TRILINOS
+MATRIX_FUNCTIONS(TrilinosWrappers::SparseMatrix);
+BLOCK_MATRIX_FUNCTIONS(TrilinosWrappers::BlockSparseMatrix);
+MATRIX_VECTOR_FUNCTIONS(TrilinosWrappers::SparseMatrix, TrilinosWrappers::Vector);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(TrilinosWrappers::BlockSparseMatrix, TrilinosWrappers::BlockVector);
+MATRIX_VECTOR_FUNCTIONS(TrilinosWrappers::SparseMatrix, TrilinosWrappers::MPI::Vector);
+BLOCK_MATRIX_VECTOR_FUNCTIONS(TrilinosWrappers::BlockSparseMatrix, TrilinosWrappers::MPI::BlockVector);
+#endif
+
+
+#define SPARSITY_FUNCTIONS(SparsityType) \
+ template void ConstraintMatrix::add_entries_local_to_global<SparsityType> (\
+ const std::vector<unsigned int> &, \
+ SparsityType &, \
+ const bool, \
+ const Table<2,bool> &, \
+ internal::bool2type<false>) const; \
+ template void ConstraintMatrix::add_entries_local_to_global<SparsityType> (\
+ const std::vector<unsigned int> &, \
+ const std::vector<unsigned int> &, \
+ SparsityType &, \
+ const bool, \
+ const Table<2,bool> &) const
+#define BLOCK_SPARSITY_FUNCTIONS(SparsityType) \
+ template void ConstraintMatrix::add_entries_local_to_global<SparsityType> (\
+ const std::vector<unsigned int> &, \
+ SparsityType &, \
+ const bool, \
+ const Table<2,bool> &, \
+ internal::bool2type<true>) const; \
+ template void ConstraintMatrix::add_entries_local_to_global<SparsityType> (\
+ const std::vector<unsigned int> &, \
+ const std::vector<unsigned int> &, \
+ SparsityType &, \
+ const bool, \
+ const Table<2,bool> &) const
+
+SPARSITY_FUNCTIONS(SparsityPattern);
+SPARSITY_FUNCTIONS(CompressedSparsityPattern);
+SPARSITY_FUNCTIONS(CompressedSetSparsityPattern);
+SPARSITY_FUNCTIONS(CompressedSimpleSparsityPattern);
+BLOCK_SPARSITY_FUNCTIONS(BlockSparsityPattern);
+BLOCK_SPARSITY_FUNCTIONS(BlockCompressedSparsityPattern);
+BLOCK_SPARSITY_FUNCTIONS(BlockCompressedSetSparsityPattern);
+BLOCK_SPARSITY_FUNCTIONS(BlockCompressedSimpleSparsityPattern);
+
+#ifdef DEAL_II_USE_TRILINOS
+SPARSITY_FUNCTIONS(TrilinosWrappers::SparsityPattern);
+BLOCK_SPARSITY_FUNCTIONS(TrilinosWrappers::BlockSparsityPattern);
+#endif
+
+
+#define ONLY_MATRIX_FUNCTIONS(MatrixType) \
+ template void ConstraintMatrix::distribute_local_to_global<MatrixType > (\
+ const FullMatrix<double> &, \
+ const std::vector<unsigned int> &, \
+ const std::vector<unsigned int> &, \
+ MatrixType &) const
+
+ONLY_MATRIX_FUNCTIONS(SparseMatrix<float>);
+ONLY_MATRIX_FUNCTIONS(SparseMatrix<double>);
+ONLY_MATRIX_FUNCTIONS(MatrixBlock<SparseMatrix<float> >);
+ONLY_MATRIX_FUNCTIONS(MatrixBlock<SparseMatrix<double> >);
+ONLY_MATRIX_FUNCTIONS(BlockSparseMatrix<float>);
+ONLY_MATRIX_FUNCTIONS(BlockSparseMatrix<double>);
+
+#ifdef DEAL_II_USE_TRILINOS
+ONLY_MATRIX_FUNCTIONS(TrilinosWrappers::SparseMatrix);
+ONLY_MATRIX_FUNCTIONS(TrilinosWrappers::BlockSparseMatrix);
+#endif
+
+#ifdef DEAL_II_USE_PETSC
+ONLY_MATRIX_FUNCTIONS(PETScWrappers::SparseMatrix);
+ONLY_MATRIX_FUNCTIONS(PETScWrappers::BlockSparseMatrix);
+ONLY_MATRIX_FUNCTIONS(PETScWrappers::MPI::SparseMatrix);
+ONLY_MATRIX_FUNCTIONS(PETScWrappers::MPI::BlockSparseMatrix);
+#endif
+
+#include "constraint_matrix.inst"
+
+DEAL_II_NAMESPACE_CLOSE
Added: branches/s-wang2/for_deal.II/source/lac/petsc_matrix_base.cc
===================================================================
--- branches/s-wang2/for_deal.II/source/lac/petsc_matrix_base.cc (rev 0)
+++ branches/s-wang2/for_deal.II/source/lac/petsc_matrix_base.cc 2012-11-29 05:55:56 UTC (rev 1394)
@@ -0,0 +1,649 @@
+//---------------------------------------------------------------------------
+// $Id: petsc_matrix_base.cc 27628 2012-11-20 22:49:26Z heister $
+// Version: $Name$
+//
+// Copyright (C) 2004, 2005, 2006, 2008, 2009, 2010, 2011, 2012 by the deal.II authors
+//
+// This file is subject to QPL and may not be distributed
+// without copyright and license information. Please refer
+// to the file deal.II/doc/license.html for the text and
+// further information on this license.
+//
+//---------------------------------------------------------------------------
+
+
+#include <deal.II/lac/petsc_matrix_base.h>
+
+#ifdef DEAL_II_USE_PETSC
+
+# include <deal.II/lac/petsc_full_matrix.h>
+# include <deal.II/lac/petsc_sparse_matrix.h>
+# include <deal.II/lac/petsc_parallel_sparse_matrix.h>
+# include <deal.II/lac/petsc_vector.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+ namespace MatrixIterators
+ {
+ void
+ MatrixBase::const_iterator::Accessor::
+ visit_present_row ()
+ {
+ // if we are asked to visit the
+ // past-the-end line, then simply
+ // release all our caches and go on
+ // with life
+ if (this->a_row == matrix->m())
+ {
+ colnum_cache.reset ();
+ value_cache.reset ();
+
+ return;
+ }
+
+ // otherwise first flush PETSc caches
+ matrix->compress ();
+
+ // get a representation of the present
+ // row
+ PetscInt ncols;
+ const PetscInt *colnums;
+ const PetscScalar *values;
+
+ int ierr;
+ ierr = MatGetRow(*matrix, this->a_row, &ncols, &colnums, &values);
+ AssertThrow (ierr == 0, MatrixBase::ExcPETScError(ierr));
+
+ // copy it into our caches if the line
+ // isn't empty. if it is, then we've
+ // done something wrong, since we
+ // shouldn't have initialized an
+ // iterator for an empty line (what
+ // would it point to?)
+ Assert (ncols != 0, ExcInternalError());
+ colnum_cache.reset (new std::vector<unsigned int> (colnums,
+ colnums+ncols));
+ value_cache.reset (new std::vector<PetscScalar> (values, values+ncols));
+
+ // and finally restore the matrix
+ ierr = MatRestoreRow(*matrix, this->a_row, &ncols, &colnums, &values);
+ AssertThrow (ierr == 0, MatrixBase::ExcPETScError(ierr));
+ }
+ }
+
+
+
+ MatrixBase::MatrixBase ()
+ :
+ last_action (LastAction::none)
+ {}
+
+
+
+ MatrixBase::~MatrixBase ()
+ {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ const int ierr = MatDestroy (matrix);
+#else
+ const int ierr = MatDestroy (&matrix);
+#endif
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+ }
+
+
+
+ void
+ MatrixBase::clear ()
+ {
+ // destroy the matrix...
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ int ierr = MatDestroy (matrix);
+#else
+ int ierr = MatDestroy (&matrix);
+#endif
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+ // ...and replace it by an empty
+ // sequential matrix
+ const int m=0, n=0, n_nonzero_per_row=0;
+ ierr = MatCreateSeqAIJ(PETSC_COMM_SELF, m, n, n_nonzero_per_row,
+ 0, &matrix);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+ }
+
+
+
+ MatrixBase &
+ MatrixBase::operator = (const value_type d)
+ {
+ Assert (d==value_type(), ExcScalarAssignmentOnlyForZeroValue());
+
+ // flush previously cached elements. this
+ // seems to be necessary since petsc
+ // 2.2.1, at least for parallel vectors
+ // (see test bits/petsc_64)
+ compress ();
+
+ const int ierr = MatZeroEntries (matrix);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ return *this;
+ }
+
+
+
+ void
+ MatrixBase::clear_row (const unsigned int row,
+ const PetscScalar new_diag_value)
+ {
+ compress ();
+
+ // now set all the entries of this row to
+ // zero
+ const PetscInt petsc_row = row;
+
+ IS index_set;
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ ISCreateGeneral (get_mpi_communicator(), 1, &petsc_row, &index_set);
+#else
+ ISCreateGeneral (get_mpi_communicator(), 1, &petsc_row, PETSC_COPY_VALUES, &index_set);
+#endif
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ const int ierr
+ = MatZeroRowsIS(matrix, index_set, new_diag_value);
+#else
+ const int ierr
+ = MatZeroRowsIS(matrix, index_set, new_diag_value, PETSC_NULL, PETSC_NULL);
+#endif
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ ISDestroy (index_set);
+#else
+ ISDestroy (&index_set);
+#endif
+
+ compress ();
+ }
+
+
+
+ void
+ MatrixBase::clear_rows (const std::vector<unsigned int> &rows,
+ const PetscScalar new_diag_value)
+ {
+ compress ();
+
+ // now set all the entries of these rows
+ // to zero
+ const std::vector<PetscInt> petsc_rows (rows.begin(), rows.end());
+
+ // call the functions. note that we have
+ // to call them even if #rows is empty,
+ // since this is a collective operation
+ IS index_set;
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ ISCreateGeneral (get_mpi_communicator(), rows.size(),
+ &petsc_rows[0], &index_set);
+#else
+ ISCreateGeneral (get_mpi_communicator(), rows.size(),
+ &petsc_rows[0], PETSC_COPY_VALUES, &index_set);
+#endif
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ const int ierr
+ = MatZeroRowsIS(matrix, index_set, new_diag_value);
+#else
+ const int ierr
+ = MatZeroRowsIS(matrix, index_set, new_diag_value, PETSC_NULL, PETSC_NULL);
+#endif
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ ISDestroy (index_set);
+#else
+ ISDestroy (&index_set);
+#endif
+
+ compress ();
+ }
+
+
+
+ PetscScalar
+ MatrixBase::el (const unsigned int i,
+ const unsigned int j) const
+ {
+#ifdef PETSC_USE_64BIT_INDICES
+ PetscInt
+#else
+ int
+#endif
+ petsc_i = i, petsc_j = j;
+ PetscScalar value;
+
+ const int ierr
+ = MatGetValues (matrix, 1, &petsc_i, 1, &petsc_j,
+ &value);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ return value;
+ }
+
+
+
+ PetscScalar
+ MatrixBase::diag_element (const unsigned int i) const
+ {
+ Assert (m() == n(), ExcNotQuadratic());
+
+ // this doesn't seem to work any
+ // different than any other element
+ return el(i,i);
+ }
+
+
+
+ void
+ MatrixBase::compress (::dealii::VectorOperation::values operation) // why operation is not used?
+ {
+ // flush buffers
+ int ierr;
+ ierr = MatAssemblyBegin (matrix,MAT_FINAL_ASSEMBLY);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ ierr = MatAssemblyEnd (matrix,MAT_FINAL_ASSEMBLY);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ last_action = LastAction::none;
+ }
+
+
+
+ unsigned int
+ MatrixBase::m () const
+ {
+#ifdef PETSC_USE_64BIT_INDICES
+ PetscInt
+#else
+ int
+#endif
+ n_rows, n_cols;
+ int ierr = MatGetSize (matrix, &n_rows, &n_cols);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ return n_rows;
+ }
+
+
+
+ unsigned int
+ MatrixBase::n () const
+ {
+#ifdef PETSC_USE_64BIT_INDICES
+ PetscInt
+#else
+ int
+#endif
+ n_rows, n_cols;
+ int ierr = MatGetSize (matrix, &n_rows, &n_cols);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ return n_cols;
+ }
+
+
+
+ unsigned int
+ MatrixBase::local_size () const
+ {
+#ifdef PETSC_USE_64BIT_INDICES
+ PetscInt
+#else
+ int
+#endif
+ n_rows, n_cols;
+ int ierr = MatGetLocalSize (matrix, &n_rows, &n_cols);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ return n_rows;
+ }
+
+
+
+ std::pair<unsigned int, unsigned int>
+ MatrixBase::local_range () const
+ {
+#ifdef PETSC_USE_64BIT_INDICES
+ PetscInt
+#else
+ int
+#endif
+ begin, end;
+ const int ierr = MatGetOwnershipRange (static_cast<const Mat &>(matrix),
+ &begin, &end);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ return std::make_pair (begin, end);
+ }
+
+
+
+ unsigned int
+ MatrixBase::n_nonzero_elements () const
+ {
+ MatInfo mat_info;
+ const int ierr
+ = MatGetInfo (matrix, MAT_GLOBAL_SUM, &mat_info);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ return static_cast<unsigned int>(mat_info.nz_used);
+ }
+
+
+
+ unsigned int
+ MatrixBase::
+ row_length (const unsigned int row) const
+ {
+//TODO: this function will probably only work if compress() was called on the
+//matrix previously. however, we can't do this here, since it would impose
+//global communication and one would have to make sure that this function is
+//called the same number of times from all processors, something that is
+//unreasonable. there should simply be a way in PETSc to query the number of
+//entries in a row bypassing the call to compress(), but I can't find one
+ Assert (row < m(), ExcInternalError());
+
+ // get a representation of the present
+ // row
+ PetscInt ncols;
+ const PetscInt *colnums;
+ const PetscScalar *values;
+
+//TODO: this is probably horribly inefficient; we should lobby for a way to
+//query this information from PETSc
+ int ierr;
+ ierr = MatGetRow(*this, row, &ncols, &colnums, &values);
+ AssertThrow (ierr == 0, MatrixBase::ExcPETScError(ierr));
+
+ // then restore the matrix and return the
+ // number of columns in this row as
+ // queried previously
+ ierr = MatRestoreRow(*this, row, &ncols, &colnums, &values);
+ AssertThrow (ierr == 0, MatrixBase::ExcPETScError(ierr));
+
+ return ncols;
+ }
+
+
+ PetscReal
+ MatrixBase::l1_norm () const
+ {
+ PetscReal result;
+
+ const int ierr
+ = MatNorm (matrix, NORM_1, &result);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ return result;
+ }
+
+
+
+ PetscReal
+ MatrixBase::linfty_norm () const
+ {
+ PetscReal result;
+
+ const int ierr
+ = MatNorm (matrix, NORM_INFINITY, &result);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ return result;
+ }
+
+
+
+ PetscReal
+ MatrixBase::frobenius_norm () const
+ {
+ PetscReal result;
+
+ const int ierr
+ = MatNorm (matrix, NORM_FROBENIUS, &result);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ return result;
+ }
+
+
+ PetscScalar
+ MatrixBase::matrix_norm_square (const VectorBase &v) const
+ {
+ Vector tmp(v.size());
+ vmult (tmp, v);
+ return tmp*v;
+ }
+
+
+ PetscScalar
+ MatrixBase::matrix_scalar_product (const VectorBase &u,
+ const VectorBase &v) const
+ {
+ Vector tmp(v.size());
+ vmult (tmp, v);
+ return u*tmp;
+ }
+
+
+#if DEAL_II_PETSC_VERSION_GTE(3,1,0)
+ PetscReal
+ MatrixBase::trace () const
+ {
+ PetscReal result;
+
+ const int ierr
+ = MatGetTrace (matrix, &result);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ return result;
+ }
+#endif
+
+
+
+ MatrixBase &
+ MatrixBase::operator *= (const PetscScalar a)
+ {
+ const int ierr = MatScale (matrix, a);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ return *this;
+ }
+
+
+
+ MatrixBase &
+ MatrixBase::operator /= (const PetscScalar a)
+ {
+ const PetscScalar factor = 1./a;
+ const int ierr = MatScale (matrix, factor);
+
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ return *this;
+ }
+
+
+ void
+ MatrixBase::vmult (VectorBase &dst,
+ const VectorBase &src) const
+ {
+ Assert (&src != &dst, ExcSourceEqualsDestination());
+
+ const int ierr = MatMult (matrix, src, dst);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+ }
+
+
+
+ void
+ MatrixBase::Tvmult (VectorBase &dst,
+ const VectorBase &src) const
+ {
+ Assert (&src != &dst, ExcSourceEqualsDestination());
+
+ const int ierr = MatMultTranspose (matrix, src, dst);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+ }
+
+
+
+ void
+ MatrixBase::vmult_add (VectorBase &dst,
+ const VectorBase &src) const
+ {
+ Assert (&src != &dst, ExcSourceEqualsDestination());
+
+ const int ierr = MatMultAdd (matrix, src, dst, dst);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+ }
+
+
+
+ void
+ MatrixBase::Tvmult_add (VectorBase &dst,
+ const VectorBase &src) const
+ {
+ Assert (&src != &dst, ExcSourceEqualsDestination());
+
+ const int ierr = MatMultTransposeAdd (matrix, src, dst, dst);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+ }
+
+
+ PetscScalar
+ MatrixBase::residual (VectorBase &dst,
+ const VectorBase &x,
+ const VectorBase &b) const
+ {
+ // avoid the use of a temporary, and
+ // rather do one negation pass more than
+ // necessary
+ vmult (dst, x);
+ dst -= b;
+ dst *= -1;
+
+ return dst.l2_norm();
+ }
+
+
+
+ MatrixBase::operator Mat () const
+ {
+ return matrix;
+ }
+
+ void
+ MatrixBase::transpose ()
+ {
+ int ierr = MatTranspose(matrix, MAT_REUSE_MATRIX, &matrix);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+ }
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ PetscTruth
+#else
+ PetscBool
+#endif
+ MatrixBase::is_symmetric (const double tolerance)
+ {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ PetscTruth
+#else
+ PetscBool
+#endif
+ truth;
+ // First flush PETSc caches
+ compress ();
+ MatIsSymmetric (matrix, tolerance, &truth);
+ return truth;
+ }
+
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ PetscTruth
+#else
+ PetscBool
+#endif
+ MatrixBase::is_hermitian (const double tolerance)
+ {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ PetscTruth
+#else
+ PetscBool
+#endif
+ truth;
+
+ // First flush PETSc caches
+ compress ();
+ MatIsHermitian (matrix, tolerance, &truth);
+
+ return truth;
+ }
+
+ void
+ MatrixBase::write_ascii() const
+ {
+ // First flush PETSc caches
+// compress (); // shuqiangwang: call compress() before call write_ascii().
+
+ // Set options
+ PetscViewerSetFormat (PETSC_VIEWER_STDOUT_WORLD,
+ PETSC_VIEWER_DEFAULT);
+
+ // Write to screen
+ MatView (matrix,PETSC_VIEWER_STDOUT_WORLD);
+ }
+
+
+
+ std::size_t
+ MatrixBase::memory_consumption() const
+ {
+ MatInfo info;
+ MatGetInfo(matrix, MAT_LOCAL, &info);
+
+ return sizeof(*this) + static_cast<unsigned int>(info.memory);
+ }
+
+
+ void MatrixBase::copy_from(const MatrixBase &source) // added by shuqiangwang
+ {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ int ierr = MatDestroy (matrix);
+#else
+ int ierr = MatDestroy (&matrix);
+#endif
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ ierr = MatDuplicate(source.matrix,MAT_COPY_VALUES,&(this->matrix));
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+ this->last_action = source.last_action;
+ this->column_indices = source.column_indices;
+ this->column_values = source.column_values;
+ }
+
+ void MatrixBase::add(double factor, const MatrixBase &source)
+ {
+ const int ierr = MatAXPY(this->matrix,factor,source.matrix,DIFFERENT_NONZERO_PATTERN);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+ }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_USE_PETSC
Added: branches/s-wang2/for_deal.II/source/lac/petsc_solver.cc
===================================================================
--- branches/s-wang2/for_deal.II/source/lac/petsc_solver.cc (rev 0)
+++ branches/s-wang2/for_deal.II/source/lac/petsc_solver.cc 2012-11-29 05:55:56 UTC (rev 1394)
@@ -0,0 +1,890 @@
+//---------------------------------------------------------------------------
+// $Id: petsc_solver.cc 27668 2012-11-21 23:50:16Z bangerth $
+// Version: $Name$
+//
+// Copyright (C) 2004, 2006, 2008, 2009, 2010, 2012 by the deal.II authors
+//
+// This file is subject to QPL and may not be distributed
+// without copyright and license information. Please refer
+// to the file deal.II/doc/license.html for the text and
+// further information on this license.
+//
+//---------------------------------------------------------------------------
+
+#include <deal.II/base/logstream.h>
+#include <deal.II/lac/petsc_solver.h>
+
+#ifdef DEAL_II_USE_PETSC
+
+# include <deal.II/lac/petsc_matrix_base.h>
+# include <deal.II/lac/petsc_vector_base.h>
+# include <deal.II/lac/petsc_precondition.h>
+# include <cmath>
+
+#include <petscversion.h>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace PETScWrappers
+{
+
+ SolverBase::SolverData::~SolverData ()
+ {
+ // destroy the solver object
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ int ierr = KSPDestroy (ksp);
+#else
+ int ierr = KSPDestroy (&ksp);
+#endif
+
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+ }
+
+
+
+ SolverBase::SolverBase (SolverControl &cn,
+ const MPI_Comm &mpi_communicator)
+ :
+ solver_control (cn),
+ mpi_communicator (mpi_communicator)
+ {}
+
+
+
+ SolverBase::~SolverBase ()
+ {}
+
+
+
+ void
+ SolverBase::solve (const MatrixBase &A,
+ VectorBase &x,
+ const VectorBase &b,
+ const PreconditionerBase &preconditioner)
+ {
+ int ierr;
+ // first create a solver object if this
+ // is necessary
+ if (solver_data.get() == 0)
+ {
+ solver_data.reset (new SolverData());
+
+ ierr = KSPCreate (mpi_communicator, &solver_data->ksp);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // set the matrices involved. the
+ // last argument is irrelevant here,
+ // since we use the solver only once
+ // anyway
+ ierr = KSPSetOperators (solver_data->ksp, A, preconditioner,
+ SAME_PRECONDITIONER);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // let derived classes set the solver
+ // type, and the preconditioning
+ // object set the type of
+ // preconditioner
+ set_solver_type (solver_data->ksp);
+
+ ierr = KSPSetPC (solver_data->ksp, preconditioner.get_pc());
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // then a convergence monitor
+ // function. that function simply
+ // checks with the solver_control
+ // object we have in this object for
+ // convergence
+ KSPSetConvergenceTest (solver_data->ksp, &convergence_test,
+ reinterpret_cast<void *>(&solver_control),
+ PETSC_NULL);
+
+ KSPSetNormType(solver_data->ksp, KSP_NORM_UNPRECONDITIONED); // shuqiangwang
+// int maxits;
+// double rtol, atol, dtol;
+// KSPGetTolerances(solver_data->ksp, &rtol, &atol, &dtol, &maxits);
+//// KSPSetTolerances(solver_data->ksp, rtol, solver_control.tolerance(), dtol, solver_control.max_steps());
+// KSPSetTolerances(solver_data->ksp, solver_control.tolerance(), atol, dtol, solver_control.max_steps());
+ }
+
+ // set the command line option prefix name
+ ierr = KSPSetOptionsPrefix(solver_data->ksp, prefix_name.c_str());
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // set the command line options provided
+ // by the user to override the defaults
+ ierr = KSPSetFromOptions (solver_data->ksp);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // then do the real work: set up solver
+ // internal data and solve the
+ // system.
+ ierr = KSPSetUp (solver_data->ksp);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ ierr = KSPSolve (solver_data->ksp, b, x);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // do not destroy solver object
+// solver_data.reset ();
+
+ // in case of failure: throw
+ // exception
+ if (solver_control.last_check() != SolverControl::success)
+ throw SolverControl::NoConvergence (solver_control.last_step(),
+ solver_control.last_value());
+ // otherwise exit as normal
+ }
+
+
+ void
+ SolverBase::set_prefix(const std::string &prefix)
+ {
+ prefix_name = prefix ;
+ }
+
+
+ void
+ SolverBase::reset()
+ {
+ solver_data.reset ();
+ }
+
+
+ SolverControl &
+ SolverBase::control() const
+ {
+ return solver_control;
+ }
+
+
+ int
+ SolverBase::convergence_test (KSP /*ksp*/,
+#ifdef PETSC_USE_64BIT_INDICES
+ const PetscInt iteration,
+#else
+ const int iteration,
+#endif
+ const PetscReal residual_norm,
+ KSPConvergedReason *reason,
+ void *solver_control_x)
+ {
+ SolverControl &solver_control = *reinterpret_cast<SolverControl *>(solver_control_x);
+
+ const SolverControl::State state
+ = solver_control.check (iteration, residual_norm);
+
+ switch (state)
+ {
+ case ::dealii::SolverControl::iterate:
+ *reason = KSP_CONVERGED_ITERATING;
+ break;
+
+ case ::dealii::SolverControl::success:
+ *reason = static_cast<KSPConvergedReason>(1);
+ break;
+
+ case ::dealii::SolverControl::failure:
+ if (solver_control.last_step() > solver_control.max_steps())
+ *reason = KSP_DIVERGED_ITS;
+ else
+ *reason = KSP_DIVERGED_DTOL;
+ break;
+
+ default:
+ Assert (false, ExcNotImplemented());
+ }
+
+ // return without failure
+ return 0;
+ }
+
+
+
+ /* ---------------------- SolverRichardson ------------------------ */
+
+ SolverRichardson::AdditionalData::
+ AdditionalData (const double omega)
+ :
+ omega (omega)
+ {}
+
+
+
+ SolverRichardson::SolverRichardson (SolverControl &cn,
+ const MPI_Comm &mpi_communicator,
+ const AdditionalData &data)
+ :
+ SolverBase (cn, mpi_communicator),
+ additional_data (data)
+ {}
+
+
+ void
+ SolverRichardson::set_solver_type (KSP &ksp) const
+ {
+ int ierr;
+ ierr = KSPSetType (ksp, KSPRICHARDSON);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // set the damping factor from the data
+ ierr = KSPRichardsonSetScale (ksp, additional_data.omega);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // in the deal.II solvers, we always
+ // honor the initial guess in the
+ // solution vector. do so here as well:
+ KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+
+ // Hand over the absolute
+ // tolerance and the maximum
+ // iteration number to the PETSc
+ // convergence criterion. The
+ // custom deal.II SolverControl
+ // object is ignored by the PETSc
+ // Richardson method (when no
+ // PETSc monitoring is present),
+ // since in this case PETSc
+ // uses a faster version of
+ // the Richardson iteration,
+ // where no residual is
+ // available.
+ KSPSetTolerances(ksp, PETSC_DEFAULT, this->solver_control.tolerance(),
+ PETSC_DEFAULT, this->solver_control.max_steps()+1);
+ }
+
+
+ /* ---------------------- SolverChebychev ------------------------ */
+
+ SolverChebychev::SolverChebychev (SolverControl &cn,
+ const MPI_Comm &mpi_communicator,
+ const AdditionalData &data)
+ :
+ SolverBase (cn, mpi_communicator),
+ additional_data (data)
+ {}
+
+
+ void
+ SolverChebychev::set_solver_type (KSP &ksp) const
+ {
+ // set the type of solver. note the
+ // completely pointless change in
+ // spelling Chebyshev between PETSc 3.2
+ // and 3.3...
+ int ierr;
+
+#if DEAL_II_PETSC_VERSION_LT(3,3,0)
+ ierr = KSPSetType (ksp, KSPCHEBYCHEV);
+#else
+ ierr = KSPSetType (ksp, KSPCHEBYSHEV);
+#endif
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // in the deal.II solvers, we always
+ // honor the initial guess in the
+ // solution vector. do so here as well:
+ KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+ }
+
+
+ /* ---------------------- SolverCG ------------------------ */
+
+ SolverCG::SolverCG (SolverControl &cn,
+ const MPI_Comm &mpi_communicator,
+ const AdditionalData &data)
+ :
+ SolverBase (cn, mpi_communicator),
+ additional_data (data)
+ {}
+
+
+ void
+ SolverCG::set_solver_type (KSP &ksp) const
+ {
+ int ierr;
+ ierr = KSPSetType (ksp, KSPCG);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // in the deal.II solvers, we always
+ // honor the initial guess in the
+ // solution vector. do so here as well:
+ KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+ }
+
+
+ /* ---------------------- SolverBiCG ------------------------ */
+
+ SolverBiCG::SolverBiCG (SolverControl &cn,
+ const MPI_Comm &mpi_communicator,
+ const AdditionalData &data)
+ :
+ SolverBase (cn, mpi_communicator),
+ additional_data (data)
+ {}
+
+
+ void
+ SolverBiCG::set_solver_type (KSP &ksp) const
+ {
+ int ierr;
+ ierr = KSPSetType (ksp, KSPBICG);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // in the deal.II solvers, we always
+ // honor the initial guess in the
+ // solution vector. do so here as well:
+ KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+ }
+
+
+ /* ---------------------- SolverGMRES ------------------------ */
+
+ SolverGMRES::AdditionalData::
+ AdditionalData (const unsigned int restart_parameter,
+ const bool right_preconditioning)
+ :
+ restart_parameter (restart_parameter),
+ right_preconditioning (right_preconditioning)
+ {}
+
+
+
+ SolverGMRES::SolverGMRES (SolverControl &cn,
+ const MPI_Comm &mpi_communicator,
+ const AdditionalData &data)
+ :
+ SolverBase (cn, mpi_communicator),
+ additional_data (data)
+ {}
+
+
+ void
+ SolverGMRES::set_solver_type (KSP &ksp) const
+ {
+ int ierr;
+ ierr = KSPSetType (ksp, KSPGMRES);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // set the restart parameter from the
+ // data. we would like to use the simple
+ // code that is commented out, but this
+ // leads to nasty warning and error
+ // messages due to some stupidity on
+ // PETSc's side: KSPGMRESSetRestart is
+ // implemented as a macro in which return
+ // statements are hidden. This may work
+ // if people strictly follow the PETSc
+ // coding style of always having
+ // functions return an integer error
+ // code, but the present function isn't
+ // like this.
+ /*
+ ierr = KSPGMRESSetRestart (ksp, additional_data.restart_parameter);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+ */
+ // so rather expand their macros by hand,
+ // and do some equally nasty stuff that at
+ // least doesn't yield warnings...
+ int (*fun_ptr)(KSP,int);
+ ierr = PetscObjectQueryFunction((PetscObject)(ksp),
+ "KSPGMRESSetRestart_C",
+ (void (* *)())&fun_ptr);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ ierr = (*fun_ptr)(ksp,additional_data.restart_parameter);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // Set preconditioning side to
+ // right
+ if (additional_data.right_preconditioning)
+ {
+#if DEAL_II_PETSC_VERSION_LT(3,2,0)
+ ierr = KSPSetPreconditionerSide(ksp, PC_RIGHT);
+#else
+ ierr = KSPSetPCSide(ksp, PC_RIGHT);
+#endif
+
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+ }
+
+ // in the deal.II solvers, we always
+ // honor the initial guess in the
+ // solution vector. do so here as well:
+ KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+ }
+
+
+ /* ---------------------- SolverBicgstab ------------------------ */
+
+ SolverBicgstab::SolverBicgstab (SolverControl &cn,
+ const MPI_Comm &mpi_communicator,
+ const AdditionalData &data)
+ :
+ SolverBase (cn, mpi_communicator),
+ additional_data (data)
+ {}
+
+
+ void
+ SolverBicgstab::set_solver_type (KSP &ksp) const
+ {
+ int ierr;
+ ierr = KSPSetType (ksp, KSPBCGS);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // in the deal.II solvers, we always
+ // honor the initial guess in the
+ // solution vector. do so here as well:
+ KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+ }
+
+
+ /* ---------------------- SolverCGS ------------------------ */
+
+ SolverCGS::SolverCGS (SolverControl &cn,
+ const MPI_Comm &mpi_communicator,
+ const AdditionalData &data)
+ :
+ SolverBase (cn, mpi_communicator),
+ additional_data (data)
+ {}
+
+
+ void
+ SolverCGS::set_solver_type (KSP &ksp) const
+ {
+ int ierr;
+ ierr = KSPSetType (ksp, KSPCGS);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // in the deal.II solvers, we always
+ // honor the initial guess in the
+ // solution vector. do so here as well:
+ KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+ }
+
+
+ /* ---------------------- SolverTFQMR ------------------------ */
+
+ SolverTFQMR::SolverTFQMR (SolverControl &cn,
+ const MPI_Comm &mpi_communicator,
+ const AdditionalData &data)
+ :
+ SolverBase (cn, mpi_communicator),
+ additional_data (data)
+ {}
+
+
+ void
+ SolverTFQMR::set_solver_type (KSP &ksp) const
+ {
+ int ierr;
+ ierr = KSPSetType (ksp, KSPTFQMR);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // in the deal.II solvers, we always
+ // honor the initial guess in the
+ // solution vector. do so here as well:
+ KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+ }
+
+
+ /* ---------------------- SolverTCQMR ------------------------ */
+
+ SolverTCQMR::SolverTCQMR (SolverControl &cn,
+ const MPI_Comm &mpi_communicator,
+ const AdditionalData &data)
+ :
+ SolverBase (cn, mpi_communicator),
+ additional_data (data)
+ {}
+
+
+ void
+ SolverTCQMR::set_solver_type (KSP &ksp) const
+ {
+ int ierr;
+ ierr = KSPSetType (ksp, KSPTCQMR);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // in the deal.II solvers, we always
+ // honor the initial guess in the
+ // solution vector. do so here as well:
+ KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+ }
+
+
+ /* ---------------------- SolverCR ------------------------ */
+
+ SolverCR::SolverCR (SolverControl &cn,
+ const MPI_Comm &mpi_communicator,
+ const AdditionalData &data)
+ :
+ SolverBase (cn, mpi_communicator),
+ additional_data (data)
+ {}
+
+
+ void
+ SolverCR::set_solver_type (KSP &ksp) const
+ {
+ int ierr;
+ ierr = KSPSetType (ksp, KSPCR);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // in the deal.II solvers, we always
+ // honor the initial guess in the
+ // solution vector. do so here as well:
+ KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+ }
+
+
+ /* ---------------------- SolverLSQR ------------------------ */
+
+ SolverLSQR::SolverLSQR (SolverControl &cn,
+ const MPI_Comm &mpi_communicator,
+ const AdditionalData &data)
+ :
+ SolverBase (cn, mpi_communicator),
+ additional_data (data)
+ {}
+
+
+ void
+ SolverLSQR::set_solver_type (KSP &ksp) const
+ {
+ int ierr;
+ ierr = KSPSetType (ksp, KSPLSQR);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // in the deal.II solvers, we always
+ // honor the initial guess in the
+ // solution vector. do so here as well:
+ KSPSetInitialGuessNonzero (ksp, PETSC_TRUE);
+ }
+
+
+ /* ---------------------- SolverPreOnly ------------------------ */
+
+ SolverPreOnly::SolverPreOnly (SolverControl &cn,
+ const MPI_Comm &mpi_communicator,
+ const AdditionalData &data)
+ :
+ SolverBase (cn, mpi_communicator),
+ additional_data (data)
+ {}
+
+
+ void
+ SolverPreOnly::set_solver_type (KSP &ksp) const
+ {
+ int ierr;
+ ierr = KSPSetType (ksp, KSPPREONLY);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ // The KSPPREONLY solver of
+ // PETSc never calls the convergence
+ // monitor, which leads to failure
+ // even when everything was ok.
+ // Therefore the SolverControl status
+ // is set to some nice values, which
+ // guarantee a nice result at the end
+ // of the solution process.
+ solver_control.check (1, 0.0);
+
+ // Using the PREONLY solver with
+ // a nonzero initial guess leads
+ // PETSc to produce some error messages.
+ KSPSetInitialGuessNonzero (ksp, PETSC_FALSE);
+ }
+
+
+ /* ---------------------- SparseDirectMUMPS------------------------ */
+
+ SparseDirectMUMPS::SparseDirectMUMPS (SolverControl &cn,
+ const MPI_Comm &mpi_communicator,
+ const AdditionalData &data)
+ :
+ SolverBase (cn, mpi_communicator),
+ additional_data (data),
+ symmetric_mode(false)
+ {}
+
+
+ void
+ SparseDirectMUMPS::set_solver_type (KSP &ksp) const
+ {
+ /**
+ * KSPPREONLY implements a stub
+ * method that applies only the
+ * preconditioner. Its use is due
+ * to SparseDirectMUMPS being
+ * a direct (rather than iterative)
+ * solver
+ */
+ int ierr;
+ ierr = KSPSetType (ksp, KSPPREONLY);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ /**
+ * The KSPPREONLY solver of
+ * PETSc never calls the convergence
+ * monitor, which leads to failure
+ * even when everything was ok.
+ * Therefore, the SolverControl
+ * status is set to some nice
+ * values, which guarantee a nice
+ * result at the end of the solution
+ * process.
+ */
+ solver_control.check (1, 0.0);
+
+ /**
+ * Using a PREONLY solver with a
+ * nonzero initial guess leads PETSc
+ * to produce some error messages.
+ */
+ KSPSetInitialGuessNonzero (ksp, PETSC_FALSE);
+ }
+
+ void
+ SparseDirectMUMPS::solve (const MatrixBase &A,
+ VectorBase &x,
+ const VectorBase &b)
+ {
+#ifdef PETSC_HAVE_MUMPS
+ /**
+ * had some trouble with the
+ * deallog printing to console
+ * the outcome of the solve function
+ * for every process. Brought
+ * down the depth level to zero
+ * to alleviate this
+ */
+ deallog.depth_console (0);
+ int ierr;
+
+ /**
+ * factorization matrix to be
+ * obtained from MUMPS
+ */
+ Mat F;
+
+ /**
+ * setting MUMPS integer control
+ * parameters ICNTL to be passed
+ * to MUMPS. Setting
+ * entry 7 of MUMPS ICNTL array
+ * (of size 40) to a value of 2.
+ * This sets use of Approximate
+ * Minimum Fill (AMF)
+ */
+ PetscInt ival=2, icntl=7;
+ /**
+ * number of iterations to
+ * solution (should be 1)
+ * for a direct solver
+ */
+ PetscInt its;
+ /**
+ * norm of residual
+ */
+ PetscReal rnorm;
+
+ /**
+ * creating a solver object
+ * if this is necessary
+ */
+ if (solver_data.get() == 0)
+ {
+ solver_data.reset (new SolverDataMUMPS ());
+
+ /**
+ * creates the default KSP
+ * context and puts it in
+ * the location solver_data->ksp
+ */
+ ierr = KSPCreate (mpi_communicator, &solver_data->ksp);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ /**
+ * set the matrices involved.
+ * the last argument is irrelevant
+ * here, since we use the solver
+ * only once anyway
+ */
+ ierr = KSPSetOperators (solver_data->ksp, A, A,
+ DIFFERENT_NONZERO_PATTERN);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ /**
+ * setting the solver type
+ */
+ set_solver_type (solver_data->ksp);
+
+ /**
+ * getting the associated
+ * preconditioner context
+ */
+ ierr = KSPGetPC (solver_data->ksp, & solver_data->pc);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ /**
+ * build PETSc PC for particular
+ * PCLU or PCCHOLESKY preconditioner
+ * depending on whether the
+ * symmetric mode has been set
+ */
+ if (symmetric_mode)
+ ierr = PCSetType (solver_data->pc, PCCHOLESKY);
+ else
+ ierr = PCSetType (solver_data->pc, PCLU);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ /**
+ * convergence monitor function
+ * that checks with the solver_control
+ * object for convergence
+ */
+ KSPSetConvergenceTest (solver_data->ksp, &convergence_test,
+ reinterpret_cast<void *>(&solver_control),
+ PETSC_NULL);
+
+ /**
+ * set the software that is to be
+ * used to perform the lu factorization
+ * here we start to see differences
+ * with the base class solve function
+ */
+ ierr = PCFactorSetMatSolverPackage (solver_data->pc, MATSOLVERMUMPS);
+ AssertThrow (ierr == 0, ExcPETScError (ierr));
+
+ /**
+ * set up the package to call
+ * for the factorization
+ */
+ ierr = PCFactorSetUpMatSolverPackage (solver_data->pc);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ /**
+ * get the factored matrix F from the
+ * preconditioner context. This routine
+ * is valid only for LU, ILU, Cholesky,
+ * and imcomplete Cholesky
+ */
+ ierr = PCFactorGetMatrix(solver_data->pc, &F);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ /**
+ * Passing the control parameters
+ * to MUMPS
+ */
+ ierr = MatMumpsSetIcntl (F, icntl, ival);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ /**
+ * set the command line option prefix name
+ */
+ ierr = KSPSetOptionsPrefix(solver_data->ksp, prefix_name.c_str());
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ /**
+ * set the command line options provided
+ * by the user to override the defaults
+ */
+ ierr = KSPSetFromOptions (solver_data->ksp);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ }
+
+ /**
+ * solve the linear system
+ */
+ ierr = KSPSolve (solver_data->ksp, b, x);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+
+ /**
+ * in case of failure
+ * throw exception
+ */
+ if (solver_control.last_check() != SolverControl::success)
+ throw SolverControl::NoConvergence (solver_control.last_step(),
+ solver_control.last_value());
+ else
+ {
+ /**
+ * obtain convergence
+ * information. obtain
+ * the number of iterations
+ * and residual norm
+ */
+ ierr = KSPGetIterationNumber (solver_data->ksp, &its);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+ ierr = KSPGetResidualNorm (solver_data->ksp, &rnorm);
+ AssertThrow (ierr == 0, ExcPETScError(ierr));
+ }
+
+#else // PETSC_HAVE_MUMPS
+ Assert (false,
+ ExcMessage ("Your PETSc installation does not include a copy of "
+ "MUMPS package necessary for this solver"));
+
+ // Cast to void to silence compiler
+ // warnings
+ (void) A;
+ (void) x;
+ (void) b;
+#endif
+
+ }
+
+ int SparseDirectMUMPS::convergence_test (KSP /*ksp*/,
+#ifdef PETSC_USE_64BIT_INDICES
+ const PetscInt iteration,
+#else
+ const int iteration,
+#endif
+ const PetscReal residual_norm,
+ KSPConvergedReason *reason,
+ void *solver_control_x)
+ {
+ SolverControl &solver_control = *reinterpret_cast<SolverControl *>(solver_control_x);
+
+ const SolverControl::State state
+ = solver_control.check (iteration, residual_norm);
+
+ switch (state)
+ {
+ case ::dealii::SolverControl::iterate:
+ *reason = KSP_CONVERGED_ITERATING;
+ break;
+
+ case ::dealii::SolverControl::success:
+ *reason = static_cast<KSPConvergedReason>(1);
+ break;
+
+ case ::dealii::SolverControl::failure:
+ if (solver_control.last_step() > solver_control.max_steps())
+ *reason = KSP_DIVERGED_ITS;
+ else
+ *reason = KSP_DIVERGED_DTOL;
+ break;
+
+ default:
+ Assert (false, ExcNotImplemented());
+ }
+
+ return 0;
+ }
+
+ void
+ SparseDirectMUMPS::set_symmetric_mode(const bool flag)
+ {
+ symmetric_mode = flag;
+ }
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_USE_PETSC
Added: branches/s-wang2/for_deal.II/source/lac/trilinos_sparse_matrix.cc
===================================================================
--- branches/s-wang2/for_deal.II/source/lac/trilinos_sparse_matrix.cc (rev 0)
+++ branches/s-wang2/for_deal.II/source/lac/trilinos_sparse_matrix.cc 2012-11-29 05:55:56 UTC (rev 1394)
@@ -0,0 +1,1574 @@
+//---------------------------------------------------------------------------
+// $Id: trilinos_sparse_matrix.cc 27628 2012-11-20 22:49:26Z heister $
+// Version: $Name$
+//
+// Copyright (C) 2008, 2009, 2010, 2011, 2012 by the deal.II authors
+//
+// This file is subject to QPL and may not be distributed
+// without copyright and license information. Please refer
+// to the file deal.II/doc/license.html for the text and
+// further information on this license.
+//
+//---------------------------------------------------------------------------
+
+
+#include <deal.II/lac/trilinos_sparse_matrix.h>
+
+#ifdef DEAL_II_USE_TRILINOS
+
+# include <deal.II/base/utilities.h>
+# include <deal.II/lac/sparse_matrix.h>
+# include <deal.II/lac/trilinos_sparsity_pattern.h>
+# include <deal.II/lac/sparsity_pattern.h>
+# include <deal.II/lac/compressed_sparsity_pattern.h>
+# include <deal.II/lac/compressed_set_sparsity_pattern.h>
+# include <deal.II/lac/compressed_simple_sparsity_pattern.h>
+
+# include <ml_epetra_utils.h>
+# include <ml_struct.h>
+# include <Teuchos_RCP.hpp>
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TrilinosWrappers
+{
+ namespace MatrixIterators
+ {
+ void
+ SparseMatrix::const_iterator::Accessor::
+ visit_present_row ()
+ {
+ // if we are asked to visit the
+ // past-the-end line, then simply
+ // release all our caches and go on
+ // with life
+ if (this->a_row == matrix->m())
+ {
+ colnum_cache.reset ();
+ value_cache.reset ();
+
+ return;
+ }
+
+ // otherwise first flush Trilinos caches
+ matrix->compress ();
+
+ // get a representation of the present
+ // row
+ int ncols;
+ int colnums = matrix->n();
+ if (value_cache.get() == 0)
+ {
+ value_cache.reset (new std::vector<TrilinosScalar> (matrix->n()));
+ colnum_cache.reset (new std::vector<unsigned int> (matrix->n()));
+ }
+ else
+ {
+ value_cache->resize (matrix->n());
+ colnum_cache->resize (matrix->n());
+ }
+
+ int ierr = matrix->trilinos_matrix().
+ ExtractGlobalRowCopy((int)this->a_row,
+ colnums,
+ ncols, &((*value_cache)[0]),
+ reinterpret_cast<int *>(&((*colnum_cache)[0])));
+ value_cache->resize (ncols);
+ colnum_cache->resize (ncols);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ // copy it into our caches if the
+ // line isn't empty. if it is, then
+ // we've done something wrong, since
+ // we shouldn't have initialized an
+ // iterator for an empty line (what
+ // would it point to?)
+ }
+ }
+
+
+ // The constructor is actually the
+ // only point where we have to check
+ // whether we build a serial or a
+ // parallel Trilinos matrix.
+ // Actually, it does not even matter
+ // how many threads there are, but
+ // only if we use an MPI compiler or
+ // a standard compiler. So, even one
+ // thread on a configuration with
+ // MPI will still get a parallel
+ // interface.
+ SparseMatrix::SparseMatrix ()
+ :
+ column_space_map (new Epetra_Map (0, 0,
+ Utilities::Trilinos::comm_self())),
+ matrix (new Epetra_FECrsMatrix(View, *column_space_map,
+ *column_space_map, 0)),
+ last_action (Zero),
+ compressed (true)
+ {
+ matrix->FillComplete();
+ }
+
+
+
+ SparseMatrix::SparseMatrix (const Epetra_Map &input_map,
+ const unsigned int n_max_entries_per_row)
+ :
+ column_space_map (new Epetra_Map (input_map)),
+ matrix (new Epetra_FECrsMatrix(Copy, *column_space_map,
+ int(n_max_entries_per_row), false)),
+ last_action (Zero),
+ compressed (false)
+ {}
+
+
+
+ SparseMatrix::SparseMatrix (const Epetra_Map &input_map,
+ const std::vector<unsigned int> &n_entries_per_row)
+ :
+ column_space_map (new Epetra_Map (input_map)),
+ matrix (new Epetra_FECrsMatrix
+ (Copy, *column_space_map,
+ (int *)const_cast<unsigned int *>(&(n_entries_per_row[0])),
+ false)),
+ last_action (Zero),
+ compressed (false)
+ {}
+
+
+
+ SparseMatrix::SparseMatrix (const Epetra_Map &input_row_map,
+ const Epetra_Map &input_col_map,
+ const unsigned int n_max_entries_per_row)
+ :
+ column_space_map (new Epetra_Map (input_col_map)),
+ matrix (new Epetra_FECrsMatrix(Copy, input_row_map,
+ int(n_max_entries_per_row), false)),
+ last_action (Zero),
+ compressed (false)
+ {}
+
+
+
+ SparseMatrix::SparseMatrix (const Epetra_Map &input_row_map,
+ const Epetra_Map &input_col_map,
+ const std::vector<unsigned int> &n_entries_per_row)
+ :
+ column_space_map (new Epetra_Map (input_col_map)),
+ matrix (new Epetra_FECrsMatrix(Copy, input_row_map,
+ (int *)const_cast<unsigned int *>(&(n_entries_per_row[0])),
+ false)),
+ last_action (Zero),
+ compressed (false)
+ {}
+
+
+
+ SparseMatrix::SparseMatrix (const unsigned int m,
+ const unsigned int n,
+ const unsigned int n_max_entries_per_row)
+ :
+ column_space_map (new Epetra_Map (static_cast<int>(n), 0,
+ Utilities::Trilinos::comm_self())),
+
+ // on one processor only, we know how the
+ // columns of the matrix will be
+ // distributed (everything on one
+ // processor), so we can hand in this
+ // information to the constructor. we
+ // can't do so in parallel, where the
+ // information from columns is only
+ // available when entries have been added
+ matrix (new Epetra_FECrsMatrix(Copy,
+ Epetra_Map (static_cast<int>(m), 0,
+ Utilities::Trilinos::comm_self()),
+ *column_space_map,
+ n_max_entries_per_row,
+ false)),
+ last_action (Zero),
+ compressed (false)
+ {}
+
+
+
+ SparseMatrix::SparseMatrix (const unsigned int m,
+ const unsigned int n,
+ const std::vector<unsigned int> &n_entries_per_row)
+ :
+ column_space_map (new Epetra_Map (static_cast<int>(n), 0,
+ Utilities::Trilinos::comm_self())),
+ matrix (new Epetra_FECrsMatrix(Copy,
+ Epetra_Map (static_cast<int>(m), 0,
+ Utilities::Trilinos::comm_self()),
+ *column_space_map,
+ (int *)const_cast<unsigned int *>(&(n_entries_per_row[0])),
+ false)),
+ last_action (Zero),
+ compressed (false)
+ {}
+
+
+
+ SparseMatrix::SparseMatrix (const IndexSet ¶llel_partitioning,
+ const MPI_Comm &communicator,
+ const unsigned int n_max_entries_per_row)
+ :
+ column_space_map (new Epetra_Map(parallel_partitioning.
+ make_trilinos_map(communicator, false))),
+ matrix (new Epetra_FECrsMatrix(Copy,
+ *column_space_map,
+ n_max_entries_per_row,
+ false)),
+ last_action (Zero),
+ compressed (false)
+ {}
+
+
+
+ SparseMatrix::SparseMatrix (const IndexSet ¶llel_partitioning,
+ const MPI_Comm &communicator,
+ const std::vector<unsigned int> &n_entries_per_row)
+ :
+ column_space_map (new Epetra_Map(parallel_partitioning.
+ make_trilinos_map(communicator, false))),
+ matrix (new Epetra_FECrsMatrix(Copy,
+ *column_space_map,
+ (int *)const_cast<unsigned int *>(&(n_entries_per_row[0])),
+ false)),
+ last_action (Zero),
+ compressed (false)
+ {}
+
+
+
+ SparseMatrix::SparseMatrix (const IndexSet &row_parallel_partitioning,
+ const IndexSet &col_parallel_partitioning,
+ const MPI_Comm &communicator,
+ const unsigned int n_max_entries_per_row)
+ :
+ column_space_map (new Epetra_Map(col_parallel_partitioning.
+ make_trilinos_map(communicator, false))),
+ matrix (new Epetra_FECrsMatrix(Copy,
+ row_parallel_partitioning.
+ make_trilinos_map(communicator, false),
+ n_max_entries_per_row,
+ false)),
+ last_action (Zero),
+ compressed (false)
+ {}
+
+
+
+ SparseMatrix::SparseMatrix (const IndexSet &row_parallel_partitioning,
+ const IndexSet &col_parallel_partitioning,
+ const MPI_Comm &communicator,
+ const std::vector<unsigned int> &n_entries_per_row)
+ :
+ column_space_map (new Epetra_Map(col_parallel_partitioning.
+ make_trilinos_map(communicator, false))),
+ matrix (new Epetra_FECrsMatrix(Copy,
+ row_parallel_partitioning.
+ make_trilinos_map(communicator, false),
+ (int *)const_cast<unsigned int *>(&(n_entries_per_row[0])),
+ false)),
+ last_action (Zero),
+ compressed (false)
+ {}
+
+
+
+ SparseMatrix::SparseMatrix (const SparsityPattern &sparsity_pattern)
+ :
+ column_space_map (new Epetra_Map (sparsity_pattern.domain_partitioner())),
+ matrix (new Epetra_FECrsMatrix(Copy,
+ sparsity_pattern.trilinos_sparsity_pattern(),
+ false)),
+ last_action (Zero),
+ compressed (true)
+ {
+ Assert(sparsity_pattern.trilinos_sparsity_pattern().Filled() == true,
+ ExcMessage("The Trilinos sparsity pattern has not been compressed."));
+ compress();
+ }
+
+
+
+ SparseMatrix::SparseMatrix (const SparseMatrix &input_matrix)
+ :
+ Subscriptor(),
+ column_space_map (new Epetra_Map (input_matrix.domain_partitioner())),
+ matrix (new Epetra_FECrsMatrix(*input_matrix.matrix)),
+ last_action (Zero),
+ compressed (true)
+ {}
+
+
+
+ SparseMatrix::~SparseMatrix ()
+ {}
+
+
+
+ void
+ SparseMatrix::copy_from (const SparseMatrix &m)
+ {
+
+ // check whether we need to update the
+ // partitioner or can just copy the data:
+ // in case we have the same distribution,
+ // we can just copy the data.
+ if (local_range() == m.local_range())
+ *matrix = *m.matrix;
+ else
+ {
+ column_space_map.reset (new Epetra_Map (m.domain_partitioner()));
+
+ // release memory before reallocation
+ matrix.reset ();
+ temp_vector.clear ();
+ matrix.reset (new Epetra_FECrsMatrix(*m.matrix));
+ }
+
+ compress();
+ }
+
+
+
+ template <typename SparsityType>
+ void
+ SparseMatrix::reinit (const SparsityType &sparsity_pattern)
+ {
+ const Epetra_Map rows (static_cast<int>(sparsity_pattern.n_rows()),
+ 0,
+ Utilities::Trilinos::comm_self());
+ const Epetra_Map columns (static_cast<int>(sparsity_pattern.n_cols()),
+ 0,
+ Utilities::Trilinos::comm_self());
+
+ reinit (rows, columns, sparsity_pattern);
+ }
+
+
+
+ template <typename SparsityType>
+ void
+ SparseMatrix::reinit (const Epetra_Map &input_map,
+ const SparsityType &sparsity_pattern,
+ const bool exchange_data)
+ {
+ reinit (input_map, input_map, sparsity_pattern, exchange_data);
+ }
+
+
+
+ template <typename SparsityType>
+ void
+ SparseMatrix::reinit (const Epetra_Map &input_row_map,
+ const Epetra_Map &input_col_map,
+ const SparsityType &sparsity_pattern,
+ const bool exchange_data)
+ {
+ // release memory before reallocation
+ temp_vector.clear();
+ matrix.reset();
+
+ // if we want to exchange data, build
+ // a usual Trilinos sparsity pattern
+ // and let that handle the
+ // exchange. otherwise, manually
+ // create a CrsGraph, which consumes
+ // considerably less memory because it
+ // can set correct number of indices
+ // right from the start
+ if (exchange_data)
+ {
+ SparsityPattern trilinos_sparsity;
+ trilinos_sparsity.reinit (input_row_map, input_col_map,
+ sparsity_pattern, exchange_data);
+ reinit (trilinos_sparsity);
+
+ return;
+ }
+
+ Assert (exchange_data == false, ExcNotImplemented());
+ if (input_row_map.Comm().MyPID() == 0)
+ {
+ AssertDimension (sparsity_pattern.n_rows(),
+ static_cast<unsigned int>(input_row_map.NumGlobalElements()));
+ AssertDimension (sparsity_pattern.n_cols(),
+ static_cast<unsigned int>(input_col_map.NumGlobalElements()));
+ }
+
+ column_space_map.reset (new Epetra_Map (input_col_map));
+
+ const unsigned int first_row = input_row_map.MinMyGID(),
+ last_row = input_row_map.MaxMyGID()+1;
+ std::vector<int> n_entries_per_row(last_row-first_row);
+
+ for (unsigned int row=first_row; row<last_row; ++row)
+ n_entries_per_row[row-first_row] = sparsity_pattern.row_length(row);
+
+ // The deal.II notation of a Sparsity
+ // pattern corresponds to the Epetra
+ // concept of a Graph. Hence, we generate
+ // a graph by copying the sparsity pattern
+ // into it, and then build up the matrix
+ // from the graph. This is considerable
+ // faster than directly filling elements
+ // into the matrix. Moreover, it consumes
+ // less memory, since the internal
+ // reordering is done on ints only, and we
+ // can leave the doubles aside.
+
+ // for more than one processor, need to
+ // specify only row map first and let the
+ // matrix entries decide about the column
+ // map (which says which columns are
+ // present in the matrix, not to be
+ // confused with the col_map that tells
+ // how the domain dofs of the matrix will
+ // be distributed). for only one
+ // processor, we can directly assign the
+ // columns as well. Compare this with bug
+ // # 4123 in the Sandia Bugzilla.
+ std_cxx1x::shared_ptr<Epetra_CrsGraph> graph;
+ if (input_row_map.Comm().NumProc() > 1)
+ graph.reset (new Epetra_CrsGraph (Copy, input_row_map,
+ &n_entries_per_row[0], true));
+ else
+ graph.reset (new Epetra_CrsGraph (Copy, input_row_map, input_col_map,
+ &n_entries_per_row[0], true));
+
+ // This functions assumes that the
+ // sparsity pattern sits on all processors
+ // (completely). The parallel version uses
+ // an Epetra graph that is already
+ // distributed.
+
+ // now insert the indices
+ std::vector<int> row_indices;
+
+ for (unsigned int row=first_row; row<last_row; ++row)
+ {
+ const int row_length = sparsity_pattern.row_length(row);
+ if (row_length == 0)
+ continue;
+
+ row_indices.resize (row_length, -1);
+
+ typename SparsityType::row_iterator col_num = sparsity_pattern.row_begin (row),
+ row_end = sparsity_pattern.row_end(row);
+ for (unsigned int col = 0; col_num != row_end; ++col_num, ++col)
+ row_indices[col] = *col_num;
+
+ graph->Epetra_CrsGraph::InsertGlobalIndices (row, row_length,
+ &row_indices[0]);
+ }
+
+ // Eventually, optimize the graph
+ // structure (sort indices, make memory
+ // contiguous, etc).
+ graph->FillComplete(input_col_map, input_row_map);
+ graph->OptimizeStorage();
+
+ // check whether we got the number of
+ // columns right.
+ AssertDimension (sparsity_pattern.n_cols(),
+ static_cast<unsigned int>(graph->NumGlobalCols()));
+
+ // And now finally generate the matrix.
+ matrix.reset (new Epetra_FECrsMatrix(Copy, *graph, false));
+ last_action = Zero;
+
+ // In the end, the matrix needs to
+ // be compressed in order to be
+ // really ready.
+ compress();
+ }
+
+
+
+ void
+ SparseMatrix::reinit (const SparsityPattern &sparsity_pattern)
+ {
+ temp_vector.clear ();
+ matrix.reset ();
+
+ // reinit with a (parallel) Trilinos
+ // sparsity pattern.
+ column_space_map.reset (new Epetra_Map
+ (sparsity_pattern.domain_partitioner()));
+ matrix.reset (new Epetra_FECrsMatrix
+ (Copy, sparsity_pattern.trilinos_sparsity_pattern(), false));
+ compress();
+ }
+
+
+
+ void
+ SparseMatrix::reinit (const SparseMatrix &sparse_matrix)
+ {
+ column_space_map.reset (new Epetra_Map (sparse_matrix.domain_partitioner()));
+ temp_vector.clear ();
+ matrix.reset ();
+ matrix.reset (new Epetra_FECrsMatrix
+ (Copy, sparse_matrix.trilinos_sparsity_pattern(), false));
+
+ compress();
+ }
+
+
+
+ template <typename number>
+ void
+ SparseMatrix::reinit (const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+ const double drop_tolerance,
+ const bool copy_values,
+ const ::dealii::SparsityPattern *use_this_sparsity)
+ {
+ const Epetra_Map rows (static_cast<int>(dealii_sparse_matrix.m()),
+ 0,
+ Utilities::Trilinos::comm_self());
+ const Epetra_Map columns (static_cast<int>(dealii_sparse_matrix.n()),
+ 0,
+ Utilities::Trilinos::comm_self());
+ reinit (rows, columns, dealii_sparse_matrix, drop_tolerance,
+ copy_values, use_this_sparsity);
+ }
+
+
+
+ template <typename number>
+ void
+ SparseMatrix::reinit (const Epetra_Map &input_map,
+ const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+ const double drop_tolerance,
+ const bool copy_values,
+ const ::dealii::SparsityPattern *use_this_sparsity)
+ {
+ reinit (input_map, input_map, dealii_sparse_matrix, drop_tolerance,
+ copy_values, use_this_sparsity);
+ }
+
+
+
+ template <typename number>
+ void
+ SparseMatrix::reinit (const Epetra_Map &input_row_map,
+ const Epetra_Map &input_col_map,
+ const ::dealii::SparseMatrix<number> &dealii_sparse_matrix,
+ const double drop_tolerance,
+ const bool copy_values,
+ const ::dealii::SparsityPattern *use_this_sparsity)
+ {
+ if (copy_values == false)
+ {
+ // in case we do not copy values, just
+ // call the other function.
+ if (use_this_sparsity == 0)
+ reinit (input_row_map, input_col_map,
+ dealii_sparse_matrix.get_sparsity_pattern());
+ else
+ reinit (input_row_map, input_col_map,
+ *use_this_sparsity);
+ return;
+ }
+
+ unsigned int n_rows = dealii_sparse_matrix.m();
+
+ Assert (input_row_map.NumGlobalElements() == (int)n_rows,
+ ExcDimensionMismatch (input_row_map.NumGlobalElements(),
+ n_rows));
+ Assert (input_col_map.NumGlobalElements() == (int)dealii_sparse_matrix.n(),
+ ExcDimensionMismatch (input_col_map.NumGlobalElements(),
+ dealii_sparse_matrix.n()));
+
+ const ::dealii::SparsityPattern &sparsity_pattern =
+ (use_this_sparsity!=0)? *use_this_sparsity :
+ dealii_sparse_matrix.get_sparsity_pattern();
+
+ if (matrix.get() != 0 && m() == n_rows &&
+ n_nonzero_elements() == sparsity_pattern.n_nonzero_elements())
+ goto set_matrix_values;
+
+ {
+ SparsityPattern trilinos_sparsity;
+ trilinos_sparsity.reinit (input_row_map, input_col_map, sparsity_pattern);
+ reinit (trilinos_sparsity);
+ }
+
+set_matrix_values:
+ // fill the values. the same as above: go
+ // through all rows of the matrix, and then
+ // all columns. since the sparsity patterns of
+ // the input matrix and the specified sparsity
+ // pattern might be different, need to go
+ // through the row for both these sparsity
+ // structures simultaneously in order to
+ // really set the correct values.
+ const std::size_t *const in_rowstart_indices
+ = dealii_sparse_matrix.get_sparsity_pattern().get_rowstart_indices();
+ const unsigned int *const in_cols
+ = dealii_sparse_matrix.get_sparsity_pattern().get_column_numbers();
+ const unsigned int *cols = sparsity_pattern.get_column_numbers();
+ const std::size_t *rowstart_indices =
+ sparsity_pattern.get_rowstart_indices();
+
+ unsigned int maximum_row_length = matrix->MaxNumEntries();
+ std::vector<unsigned int> row_indices (maximum_row_length);
+ std::vector<TrilinosScalar> values (maximum_row_length);
+ std::size_t in_index, index;
+
+ for (unsigned int row=0; row<n_rows; ++row)
+ if (input_row_map.MyGID(static_cast<int>(row)))
+ {
+ index = rowstart_indices[row];
+ in_index = in_rowstart_indices[row];
+ unsigned int col = 0;
+ if (sparsity_pattern.optimize_diagonal())
+ {
+ values[col] = dealii_sparse_matrix.global_entry(in_index);
+ row_indices[col++] = row;
+ ++index;
+ ++in_index;
+ }
+
+ while (in_index < in_rowstart_indices[row+1] &&
+ index < rowstart_indices[row+1])
+ {
+ while (cols[index] < in_cols[in_index] && index < rowstart_indices[row+1])
+ ++index;
+ while (in_cols[in_index] < cols[index] && in_index < in_rowstart_indices[row+1])
+ ++in_index;
+
+ if (std::fabs(dealii_sparse_matrix.global_entry(in_index)) > drop_tolerance)
+ {
+ values[col] = dealii_sparse_matrix.global_entry(in_index);
+ row_indices[col++] = in_cols[in_index];
+ }
+ ++index;
+ ++in_index;
+ }
+ set (row, col, reinterpret_cast<unsigned int *>(&row_indices[0]),
+ &values[0], false);
+ }
+
+ compress();
+ }
+
+
+
+ void
+ SparseMatrix::reinit (const Epetra_CrsMatrix &input_matrix,
+ const bool copy_values)
+ {
+ Assert (input_matrix.Filled()==true,
+ ExcMessage("Input CrsMatrix has not called FillComplete()!"));
+
+ column_space_map.reset (new Epetra_Map (input_matrix.DomainMap()));
+
+ const Epetra_CrsGraph *graph = &input_matrix.Graph();
+
+ temp_vector.clear ();
+ matrix.reset ();
+ matrix.reset (new Epetra_FECrsMatrix(Copy, *graph, false));
+
+ matrix->FillComplete (*column_space_map, input_matrix.RangeMap(), true);
+
+ if (copy_values == true)
+ {
+ // point to the first data entry in the two
+ // matrices and copy the content
+ const TrilinosScalar *in_values = input_matrix[0];
+ TrilinosScalar *values = (*matrix)[0];
+ const unsigned int my_nonzeros = input_matrix.NumMyNonzeros();
+ std::memcpy (&values[0], &in_values[0],
+ my_nonzeros*sizeof (TrilinosScalar));
+ }
+
+ compress();
+ }
+
+
+
+ void
+ SparseMatrix::clear ()
+ {
+ // When we clear the matrix, reset
+ // the pointer and generate an
+ // empty matrix.
+ column_space_map.reset (new Epetra_Map (0, 0,
+ Utilities::Trilinos::comm_self()));
+ temp_vector.clear();
+ matrix.reset (new Epetra_FECrsMatrix(View, *column_space_map, 0));
+
+ matrix->FillComplete();
+
+ compressed = true;
+ }
+
+
+
+ void
+ SparseMatrix::clear_row (const unsigned int row,
+ const TrilinosScalar new_diag_value)
+ {
+ Assert (matrix->Filled()==true, ExcMatrixNotCompressed());
+
+ // Only do this on the rows owned
+ // locally on this processor.
+ int local_row = matrix->LRID(static_cast<int>(row));
+ if (local_row >= 0)
+ {
+ TrilinosScalar *values;
+ int *col_indices;
+ int num_entries;
+ const int ierr = matrix->ExtractMyRowView(local_row, num_entries,
+ values, col_indices);
+
+ Assert (ierr == 0,
+ ExcTrilinosError(ierr));
+
+ int *diag_find = std::find(col_indices,col_indices+num_entries,
+ local_row);
+ int diag_index = (int)(diag_find - col_indices);
+
+ for (int j=0; j<num_entries; ++j)
+ if (diag_index != j || new_diag_value == 0)
+ values[j] = 0.;
+
+ if (diag_find && std::fabs(values[diag_index]) == 0.0 &&
+ new_diag_value != 0.0)
+ values[diag_index] = new_diag_value;
+ }
+ }
+
+
+
+ void
+ SparseMatrix::clear_rows (const std::vector<unsigned int> &rows,
+ const TrilinosScalar new_diag_value)
+ {
+ compress();
+ for (unsigned int row=0; row<rows.size(); ++row)
+ clear_row(rows[row], new_diag_value);
+
+ // This function needs to be called
+ // on all processors. We change some
+ // data, so we need to flush the
+ // buffers to make sure that the
+ // right data is used.
+ compress();
+ }
+
+
+
+ TrilinosScalar
+ SparseMatrix::operator() (const unsigned int i,
+ const unsigned int j) const
+ {
+ // Extract local indices in
+ // the matrix.
+ int trilinos_i = matrix->LRID(static_cast<int>(i)), trilinos_j = matrix->LCID(static_cast<int>(j));
+ TrilinosScalar value = 0.;
+
+ // If the data is not on the
+ // present processor, we throw
+ // an exception. This is one of
+ // the two tiny differences to
+ // the el(i,j) call, which does
+ // not throw any assertions.
+ if (trilinos_i == -1)
+ {
+ Assert (false, ExcAccessToNonLocalElement(i, j, local_range().first,
+ local_range().second));
+ }
+ else
+ {
+ // Check whether the matrix has
+ // already been transformed to local
+ // indices.
+ Assert (matrix->Filled(), ExcMatrixNotCompressed());
+
+ // Prepare pointers for extraction
+ // of a view of the row.
+ int nnz_present = matrix->NumMyEntries(trilinos_i);
+ int nnz_extracted;
+ int *col_indices;
+ TrilinosScalar *values;
+
+ // Generate the view and make
+ // sure that we have not generated
+ // an error.
+ int ierr = matrix->ExtractMyRowView(trilinos_i, nnz_extracted,
+ values, col_indices);
+ Assert (ierr==0, ExcTrilinosError(ierr));
+
+ Assert (nnz_present == nnz_extracted,
+ ExcDimensionMismatch(nnz_present, nnz_extracted));
+
+ // Search the index where we
+ // look for the value, and then
+ // finally get it.
+
+ int *el_find = std::find(col_indices, col_indices + nnz_present,
+ trilinos_j);
+
+ int local_col_index = (int)(el_find - col_indices);
+
+ // This is actually the only
+ // difference to the el(i,j)
+ // function, which means that
+ // we throw an exception in
+ // this case instead of just
+ // returning zero for an
+ // element that is not present
+ // in the sparsity pattern.
+ if (local_col_index == nnz_present)
+ {
+ Assert (false, ExcInvalidIndex (i,j));
+ }
+ else
+ value = values[local_col_index];
+ }
+
+ return value;
+ }
+
+
+
+ TrilinosScalar
+ SparseMatrix::el (const unsigned int i,
+ const unsigned int j) const
+ {
+ // Extract local indices in
+ // the matrix.
+ int trilinos_i = matrix->LRID(static_cast<int>(i)), trilinos_j = matrix->LCID(static_cast<int>(j));
+ TrilinosScalar value = 0.;
+
+ // If the data is not on the
+ // present processor, we can't
+ // continue. Just print out zero
+ // as discussed in the
+ // documentation of this
+ // function. if you want error
+ // checking, use operator().
+ if ((trilinos_i == -1 ) || (trilinos_j == -1))
+ return 0.;
+ else
+ {
+ // Check whether the matrix
+ // already is transformed to
+ // local indices.
+ Assert (matrix->Filled(), ExcMatrixNotCompressed());
+
+ // Prepare pointers for extraction
+ // of a view of the row.
+ int nnz_present = matrix->NumMyEntries(trilinos_i);
+ int nnz_extracted;
+ int *col_indices;
+ TrilinosScalar *values;
+
+ // Generate the view and make
+ // sure that we have not generated
+ // an error.
+ int ierr = matrix->ExtractMyRowView(trilinos_i, nnz_extracted,
+ values, col_indices);
+ Assert (ierr==0, ExcTrilinosError(ierr));
+
+ Assert (nnz_present == nnz_extracted,
+ ExcDimensionMismatch(nnz_present, nnz_extracted));
+
+ // Search the index where we
+ // look for the value, and then
+ // finally get it.
+ int *el_find = std::find(col_indices, col_indices + nnz_present,
+ trilinos_j);
+
+ int local_col_index = (int)(el_find - col_indices);
+
+
+ // This is actually the only
+ // difference to the () function
+ // querying (i,j), where we throw an
+ // exception instead of just
+ // returning zero for an element
+ // that is not present in the
+ // sparsity pattern.
+ if (local_col_index == nnz_present)
+ value = 0;
+ else
+ value = values[local_col_index];
+ }
+
+ return value;
+ }
+
+
+
+ TrilinosScalar
+ SparseMatrix::diag_element (const unsigned int i) const
+ {
+ Assert (m() == n(), ExcNotQuadratic());
+
+#ifdef DEBUG
+ // use operator() in debug mode because
+ // it checks if this is a valid element
+ // (in parallel)
+ return operator()(i,i);
+#else
+ // Trilinos doesn't seem to have a
+ // more efficient way to access the
+ // diagonal than by just using the
+ // standard el(i,j) function.
+ return el(i,i);
+#endif
+ }
+
+
+
+ unsigned int
+ SparseMatrix::row_length (const unsigned int row) const
+ {
+ Assert (row < m(), ExcInternalError());
+
+ // get a representation of the
+ // present row
+ int ncols = -1;
+ int local_row = matrix->LRID(static_cast<int>(row));
+
+ // on the processor who owns this
+ // row, we'll have a non-negative
+ // value.
+ if (local_row >= 0)
+ {
+ int ierr = matrix->NumMyRowEntries (local_row, ncols);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+ }
+
+ return ncols;
+ }
+
+
+
+ namespace internals
+ {
+ void perform_mmult (const SparseMatrix &inputleft,
+ const SparseMatrix &inputright,
+ SparseMatrix &result,
+ const VectorBase &V,
+ const bool transpose_left)
+ {
+ const bool use_vector = (V.size() == inputright.m() ? true : false);
+ if (transpose_left == false)
+ {
+ Assert (inputleft.n() == inputright.m(),
+ ExcDimensionMismatch(inputleft.n(), inputright.m()));
+ Assert (inputleft.domain_partitioner().SameAs(inputright.range_partitioner()),
+ ExcMessage ("Parallel partitioning of A and B does not fit."));
+ }
+ else
+ {
+ Assert (inputleft.m() == inputright.m(),
+ ExcDimensionMismatch(inputleft.m(), inputright.m()));
+ Assert (inputleft.range_partitioner().SameAs(inputright.range_partitioner()),
+ ExcMessage ("Parallel partitioning of A and B does not fit."));
+ }
+
+ result.clear();
+
+ // create a suitable operator B: in case
+ // we do not use a vector, all we need to
+ // do is to set the pointer. Otherwise,
+ // we insert the data from B, but
+ // multiply each row with the respective
+ // vector element.
+ Teuchos::RCP<Epetra_CrsMatrix> mod_B;
+ if (use_vector == false)
+ {
+ mod_B = Teuchos::rcp(const_cast<Epetra_CrsMatrix *>
+ (&inputright.trilinos_matrix()),
+ false);
+ }
+ else
+ {
+ mod_B = Teuchos::rcp(new Epetra_CrsMatrix
+ (Copy, inputright.trilinos_sparsity_pattern()),
+ true);
+ mod_B->FillComplete(inputright.domain_partitioner(),
+ inputright.range_partitioner());
+ Assert (inputright.local_range() == V.local_range(),
+ ExcMessage ("Parallel distribution of matrix B and vector V "
+ "does not match."));
+
+ const int local_N = inputright.local_size();
+ for (int i=0; i<local_N; ++i)
+ {
+ int N_entries = -1;
+ double *new_data, *B_data;
+ mod_B->ExtractMyRowView (i, N_entries, new_data);
+ inputright.trilinos_matrix().ExtractMyRowView (i, N_entries, B_data);
+ double value = V.trilinos_vector()[0][i];
+ for (int j=0; j<N_entries; ++j)
+ new_data[j] = value * B_data[j];
+ }
+ }
+
+ // use ML built-in method for performing
+ // the matrix-matrix product.
+ // create ML operators on top of the
+ // Epetra matrices. if we use a
+ // transposed matrix, let ML know it
+ ML_Comm *comm;
+ ML_Comm_Create(&comm);
+#ifdef ML_MPI
+ const Epetra_MpiComm *epcomm = dynamic_cast<const Epetra_MpiComm *>(&(inputleft.trilinos_matrix().Comm()));
+ // Get the MPI communicator, as it may not be MPI_COMM_W0RLD, and update the ML comm object
+ if (epcomm) ML_Comm_Set_UsrComm(comm,epcomm->Comm());
+#endif
+ ML_Operator *A_ = ML_Operator_Create(comm);
+ ML_Operator *B_ = ML_Operator_Create(comm);
+ ML_Operator *C_ = ML_Operator_Create(comm);
+ SparseMatrix transposed_mat;
+
+ if (transpose_left == false)
+ ML_Operator_WrapEpetraCrsMatrix
+ (const_cast<Epetra_CrsMatrix *>(&inputleft.trilinos_matrix()),A_,
+ false);
+ else
+ {
+ // create transposed matrix
+ SparsityPattern sparsity_transposed (inputleft.domain_partitioner(),
+ inputleft.range_partitioner());
+ Assert (inputleft.domain_partitioner().LinearMap() == true,
+ ExcMessage("Matrix must be partitioned contiguously between procs."));
+ for (unsigned int i=0; i<inputleft.local_size(); ++i)
+ {
+ int num_entries, * indices;
+ inputleft.trilinos_sparsity_pattern().ExtractMyRowView(i, num_entries,
+ indices);
+ Assert (num_entries >= 0, ExcInternalError());
+ const unsigned int GID = inputleft.row_partitioner().GID(i);
+ for (int j=0; j<num_entries; ++j)
+ sparsity_transposed.add (inputleft.col_partitioner().GID(indices[j]),
+ GID);
+ }
+
+ sparsity_transposed.compress();
+ transposed_mat.reinit (sparsity_transposed);
+ for (unsigned int i=0; i<inputleft.local_size(); ++i)
+ {
+ int num_entries, * indices;
+ double *values;
+ inputleft.trilinos_matrix().ExtractMyRowView(i, num_entries,
+ values, indices);
+ Assert (num_entries >= 0, ExcInternalError());
+ const unsigned int GID = inputleft.row_partitioner().GID(i);
+ for (int j=0; j<num_entries; ++j)
+ transposed_mat.set (inputleft.col_partitioner().GID(indices[j]),
+ GID, values[j]);
+ }
+ transposed_mat.compress();
+ ML_Operator_WrapEpetraCrsMatrix
+ (const_cast<Epetra_CrsMatrix *>(&transposed_mat.trilinos_matrix()),
+ A_,false);
+ }
+ ML_Operator_WrapEpetraCrsMatrix(mod_B.get(),B_,false);
+
+ // We implement the multiplication by
+ // hand in a similar way as is done in
+ // ml/src/Operator/ml_rap.c for a triple
+ // matrix product. This means that the
+ // code is very similar to the one found
+ // in ml/src/Operator/ml_rap.c
+
+ // import data if necessary
+ ML_Operator *Btmp, *Ctmp, *Ctmp2, *tptr;
+ ML_CommInfoOP *getrow_comm;
+ int max_per_proc;
+ int N_input_vector = B_->invec_leng;
+ getrow_comm = B_->getrow->pre_comm;
+ if ( getrow_comm != NULL)
+ for (int i = 0; i < getrow_comm->N_neighbors; i++)
+ for (int j = 0; j < getrow_comm->neighbors[i].N_send; j++)
+ AssertThrow (getrow_comm->neighbors[i].send_list[j] < N_input_vector,
+ ExcInternalError());
+
+ ML_create_unique_col_id(N_input_vector, &(B_->getrow->loc_glob_map),
+ getrow_comm, &max_per_proc, B_->comm);
+ B_->getrow->use_loc_glob_map = ML_YES;
+ if (A_->getrow->pre_comm != NULL)
+ ML_exchange_rows( B_, &Btmp, A_->getrow->pre_comm);
+ else Btmp = B_;
+
+ // perform matrix-matrix product
+ ML_matmat_mult(A_, Btmp , &Ctmp);
+
+ // release temporary structures we needed
+ // for multiplication
+ ML_free(B_->getrow->loc_glob_map);
+ B_->getrow->loc_glob_map = NULL;
+ B_->getrow->use_loc_glob_map = ML_NO;
+ if (A_->getrow->pre_comm != NULL)
+ {
+ tptr = Btmp;
+ while ( (tptr!= NULL) && (tptr->sub_matrix != B_))
+ tptr = tptr->sub_matrix;
+ if (tptr != NULL) tptr->sub_matrix = NULL;
+ ML_RECUR_CSR_MSRdata_Destroy(Btmp);
+ ML_Operator_Destroy(&Btmp);
+ }
+
+ // make correct data structures
+ if (A_->getrow->post_comm != NULL)
+ ML_exchange_rows(Ctmp, &Ctmp2, A_->getrow->post_comm);
+ else
+ Ctmp2 = Ctmp;
+
+ ML_back_to_csrlocal(Ctmp2, C_, max_per_proc);
+
+ ML_RECUR_CSR_MSRdata_Destroy (Ctmp);
+ ML_Operator_Destroy (&Ctmp);
+
+ if (A_->getrow->post_comm != NULL)
+ {
+ ML_RECUR_CSR_MSRdata_Destroy(Ctmp2);
+ ML_Operator_Destroy (&Ctmp2);
+ }
+
+ // create an Epetra matrix from the ML
+ // matrix that we got as a result.
+ Epetra_CrsMatrix *C_mat;
+ ML_Operator2EpetraCrsMatrix(C_, C_mat);
+ C_mat->FillComplete();
+ C_mat->OptimizeStorage();
+ result.reinit (*C_mat);
+
+ // destroy allocated memory
+ delete C_mat;
+ ML_Operator_Destroy (&A_);
+ ML_Operator_Destroy (&B_);
+ ML_Operator_Destroy (&C_);
+ ML_Comm_Destroy (&comm);
+ }
+ }
+
+
+ void
+ SparseMatrix::mmult (SparseMatrix &C,
+ const SparseMatrix &B,
+ const VectorBase &V) const
+ {
+ internals::perform_mmult (*this, B, C, V, false);
+ }
+
+
+
+ void
+ SparseMatrix::Tmmult (SparseMatrix &C,
+ const SparseMatrix &B,
+ const VectorBase &V) const
+ {
+ internals::perform_mmult (*this, B, C, V, true);
+ }
+
+
+
+ void
+ SparseMatrix::add (const TrilinosScalar factor,
+ const SparseMatrix &rhs)
+ {
+ Assert (rhs.m() == m(), ExcDimensionMismatch (rhs.m(), m()));
+ Assert (rhs.n() == n(), ExcDimensionMismatch (rhs.n(), n()));
+
+ const std::pair<unsigned int, unsigned int>
+ local_range = rhs.local_range();
+
+ int ierr;
+
+ // If both matrices have been transformed
+ // to local index space (in Trilinos
+ // speak: they are filled), we're having
+ // matrices based on the same indices
+ // with the same number of nonzeros
+ // (actually, we'd need sparsity pattern,
+ // but that is too expensive to check),
+ // we can extract views of the column
+ // data on both matrices and simply
+ // manipulate the values that are
+ // addressed by the pointers.
+ if (matrix->Filled() == true &&
+ rhs.matrix->Filled() == true &&
+ this->local_range() == local_range &&
+ matrix->NumMyNonzeros() == rhs.matrix->NumMyNonzeros())
+ for (unsigned int row=local_range.first;
+ row < local_range.second; ++row)
+ {
+ Assert (matrix->NumGlobalEntries(row) ==
+ rhs.matrix->NumGlobalEntries(row),
+ ExcDimensionMismatch(matrix->NumGlobalEntries(row),
+ rhs.matrix->NumGlobalEntries(row)));
+
+ const int row_local = matrix->RowMap().LID(static_cast<int>(row));
+ int n_entries, rhs_n_entries;
+ TrilinosScalar *value_ptr, *rhs_value_ptr;
+
+ // In debug mode, we want to check
+ // whether the indices really are the
+ // same in the calling matrix and the
+ // input matrix. The reason for doing
+ // this only in debug mode is that both
+ // extracting indices and comparing
+ // indices is relatively slow compared to
+ // just working with the values.
+#ifdef DEBUG
+ int *index_ptr, *rhs_index_ptr;
+ ierr = rhs.matrix->ExtractMyRowView (row_local, rhs_n_entries,
+ rhs_value_ptr, rhs_index_ptr);
+ Assert (ierr == 0, ExcTrilinosError(ierr));
+
+ ierr = matrix->ExtractMyRowView (row_local, n_entries, value_ptr,
+ index_ptr);
+ Assert (ierr == 0, ExcTrilinosError(ierr));
+#else
+ rhs.matrix->ExtractMyRowView (row_local, rhs_n_entries,rhs_value_ptr);
+ matrix->ExtractMyRowView (row_local, n_entries, value_ptr);
+#endif
+
+ AssertThrow (n_entries == rhs_n_entries,
+ ExcDimensionMismatch (n_entries, rhs_n_entries));
+
+ for (int i=0; i<n_entries; ++i)
+ {
+ *value_ptr++ += *rhs_value_ptr++ * factor;
+#ifdef DEBUG
+ Assert (*index_ptr++ == *rhs_index_ptr++,
+ ExcInternalError());
+#endif
+ }
+ }
+ // If we have different sparsity patterns
+ // (expressed by a different number of
+ // nonzero elements), we have to be more
+ // careful and extract a copy of the row
+ // data, multiply it by the factor and
+ // then add it to the matrix using the
+ // respective add() function.
+ else
+ {
+ unsigned int max_row_length = 0;
+ for (unsigned int row=local_range.first;
+ row < local_range.second; ++row)
+ max_row_length
+ = std::max (max_row_length,
+ static_cast<unsigned int>(rhs.matrix->NumGlobalEntries(row)));
+
+ std::vector<int> column_indices (max_row_length);
+ std::vector<TrilinosScalar> values (max_row_length);
+
+ if (matrix->Filled() == true && rhs.matrix->Filled() == true &&
+ this->local_range() == local_range)
+ for (unsigned int row=local_range.first;
+ row < local_range.second; ++row)
+ {
+ const int row_local = matrix->RowMap().LID(static_cast<int>(row));
+ int n_entries;
+
+ ierr = rhs.matrix->ExtractMyRowCopy (row_local, max_row_length,
+ n_entries,
+ &values[0],
+ &column_indices[0]);
+ Assert (ierr == 0, ExcTrilinosError(ierr));
+
+ for (int i=0; i<n_entries; ++i)
+ values[i] *= factor;
+
+ TrilinosScalar *value_ptr = &values[0];
+
+ ierr = matrix->SumIntoMyValues (row_local, n_entries, value_ptr,
+ &column_indices[0]);
+ Assert (ierr == 0, ExcTrilinosError(ierr));
+ }
+ else
+ {
+ for (unsigned int row=local_range.first;
+ row < local_range.second; ++row)
+ {
+ int n_entries;
+ ierr = rhs.matrix->Epetra_CrsMatrix::ExtractGlobalRowCopy
+ ((int)row, max_row_length, n_entries, &values[0], &column_indices[0]);
+ Assert (ierr == 0, ExcTrilinosError(ierr));
+
+ for (int i=0; i<n_entries; ++i)
+ values[i] *= factor;
+
+ ierr = matrix->Epetra_CrsMatrix::SumIntoGlobalValues
+ ((int)row, n_entries, &values[0], &column_indices[0]);
+ Assert (ierr == 0, ExcTrilinosError(ierr));
+ }
+ compress ();
+
+ }
+ }
+ }
+
+
+
+ void
+ SparseMatrix::transpose ()
+ {
+ // This only flips a flag that tells
+ // Trilinos that any vmult operation
+ // should be done with the
+ // transpose. However, the matrix
+ // structure is not reset.
+ int ierr;
+
+ if (!matrix->UseTranspose())
+ {
+ ierr = matrix->SetUseTranspose (true);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+ }
+ else
+ {
+ ierr = matrix->SetUseTranspose (false);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+ }
+ }
+
+/**
+ * shuqiangwag: remove the zeros before 'e' in the scientific notation of a floating
+ * point value for output.
+ */
+ static void simplify_scientific_string(int len, const char inStr[20], char outStr[20])
+ {
+ int i, j;
+ for(i=0; i<len; i++)
+ outStr[i] = inStr[i];
+
+ // looking for "e" or "E" from the end
+ for(i=len-1; i>=0; i--)
+ if(outStr[i]=='e' || outStr[i]=='E')
+ {
+ i--;
+ break;
+ }
+
+ // replace every leading 0 with \0
+ for(; i>=0; i--)
+ {
+ if(outStr[i]=='0')
+ outStr[i]='\0';
+ else
+ break;
+ }
+
+ // now output
+ bool bPassedE = false;
+ j = 0;
+ for(i=0; i<len; i++)
+ {
+ if(outStr[i]=='e' || outStr[i]=='E')
+ bPassedE = true;
+ if(bPassedE)
+ {
+ outStr[j] = outStr[i];
+ j++;
+ }
+ else if(outStr[i]!='\0')
+ {
+ outStr[j] = outStr[i];
+ j++;
+ }
+ }
+ outStr[j] = '\0';
+ }
+
+ /**
+ * shuqiangwang: output to be the same as petsc style.
+ */
+ void
+ SparseMatrix::write_ascii () const
+ {
+// Assert (false, ExcNotImplemented());
+ double * values;
+ int * indices;
+ int num_entries;
+
+ const int len = 20;
+ char inStr[len], outStr[len];
+
+ for (int i=0; i<matrix->NumMyRows(); ++i)
+ {
+ std::cout << "row " << matrix->GRID(i) << ":";
+ matrix->ExtractMyRowView (i, num_entries, values, indices);
+ for (int j=0; j<num_entries; ++j)
+ {
+ std::cout << " (" << matrix->GCID(indices[j]);
+ snprintf(inStr, len, "%.5e", values[j]);
+ simplify_scientific_string(len,inStr,outStr);
+// std::cout << ", " << std::scientific << values[j];
+ std::cout << ", " << outStr;
+ std::cout << ") ";
+ }
+ std::cout << std::endl;
+ }
+ }
+
+
+ // As of now, no particularly neat
+ // ouput is generated in case of
+ // multiple processors.
+ void
+ SparseMatrix::print (std::ostream &out,
+ const bool print_detailed_trilinos_information) const
+ {
+ if (print_detailed_trilinos_information == true)
+ out << *matrix;
+ else
+ {
+ double *values;
+ int *indices;
+ int num_entries;
+
+ for (int i=0; i<matrix->NumMyRows(); ++i)
+ {
+ matrix->ExtractMyRowView (i, num_entries, values, indices);
+ for (int j=0; j<num_entries; ++j)
+ out << "(" << matrix->GRID(i) << "," << matrix->GCID(indices[j]) << ") "
+ << values[j] << std::endl;
+ }
+ }
+
+ AssertThrow (out, ExcIO());
+ }
+
+
+
+ std::size_t
+ SparseMatrix::memory_consumption () const
+ {
+ unsigned int static_memory = sizeof(this) + sizeof (*matrix)
+ + sizeof(*matrix->Graph().DataPtr());
+ return ((sizeof(TrilinosScalar)+sizeof(int))*matrix->NumMyNonzeros() +
+ sizeof(int)*local_size() +
+ static_memory);
+ }
+
+
+
+
+ // explicit instantiations
+ //
+ template void
+ SparseMatrix::reinit (const dealii::SparsityPattern &);
+ template void
+ SparseMatrix::reinit (const CompressedSparsityPattern &);
+ template void
+ SparseMatrix::reinit (const CompressedSetSparsityPattern &);
+ template void
+ SparseMatrix::reinit (const CompressedSimpleSparsityPattern &);
+
+ template void
+ SparseMatrix::reinit (const Epetra_Map &,
+ const dealii::SparsityPattern &,
+ const bool);
+ template void
+ SparseMatrix::reinit (const Epetra_Map &,
+ const CompressedSparsityPattern &,
+ const bool);
+ template void
+ SparseMatrix::reinit (const Epetra_Map &,
+ const CompressedSetSparsityPattern &,
+ const bool);
+ template void
+ SparseMatrix::reinit (const Epetra_Map &,
+ const CompressedSimpleSparsityPattern &,
+ const bool);
+
+
+ template void
+ SparseMatrix::reinit (const Epetra_Map &,
+ const Epetra_Map &,
+ const dealii::SparsityPattern &,
+ const bool);
+ template void
+ SparseMatrix::reinit (const Epetra_Map &,
+ const Epetra_Map &,
+ const CompressedSparsityPattern &,
+ const bool);
+ template void
+ SparseMatrix::reinit (const Epetra_Map &,
+ const Epetra_Map &,
+ const CompressedSimpleSparsityPattern &,
+ const bool);
+ template void
+ SparseMatrix::reinit (const Epetra_Map &,
+ const Epetra_Map &,
+ const CompressedSetSparsityPattern &,
+ const bool);
+
+ template void
+ SparseMatrix::reinit (const dealii::SparseMatrix<float> &,
+ const double,
+ const bool,
+ const dealii::SparsityPattern *);
+ template void
+ SparseMatrix::reinit (const dealii::SparseMatrix<double> &,
+ const double,
+ const bool,
+ const dealii::SparsityPattern *);
+ template void
+ SparseMatrix::reinit (const dealii::SparseMatrix<long double> &,
+ const double,
+ const bool,
+ const dealii::SparsityPattern *);
+
+ template void
+ SparseMatrix::reinit (const Epetra_Map &,
+ const dealii::SparseMatrix<float> &,
+ const double,
+ const bool,
+ const dealii::SparsityPattern *);
+ template void
+ SparseMatrix::reinit (const Epetra_Map &,
+ const dealii::SparseMatrix<double> &,
+ const double,
+ const bool,
+ const dealii::SparsityPattern *);
+ template void
+ SparseMatrix::reinit (const Epetra_Map &,
+ const dealii::SparseMatrix<long double> &,
+ const double,
+ const bool,
+ const dealii::SparsityPattern *);
+
+ template void
+ SparseMatrix::reinit (const Epetra_Map &,
+ const Epetra_Map &,
+ const dealii::SparseMatrix<float> &,
+ const double,
+ const bool,
+ const dealii::SparsityPattern *);
+ template void
+ SparseMatrix::reinit (const Epetra_Map &,
+ const Epetra_Map &,
+ const dealii::SparseMatrix<double> &,
+ const double,
+ const bool,
+ const dealii::SparsityPattern *);
+ template void
+ SparseMatrix::reinit (const Epetra_Map &,
+ const Epetra_Map &,
+ const dealii::SparseMatrix<long double> &,
+ const double,
+ const bool,
+ const dealii::SparsityPattern *);
+
+
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_USE_TRILINOS
Added: branches/s-wang2/for_deal.II/source/lac/trilinos_vector_base.cc
===================================================================
--- branches/s-wang2/for_deal.II/source/lac/trilinos_vector_base.cc (rev 0)
+++ branches/s-wang2/for_deal.II/source/lac/trilinos_vector_base.cc 2012-11-29 05:55:56 UTC (rev 1394)
@@ -0,0 +1,460 @@
+//---------------------------------------------------------------------------
+// $Id: trilinos_vector_base.cc 27628 2012-11-20 22:49:26Z heister $
+// Version: $Name$
+//
+// Copyright (C) 2008, 2010, 2011, 2012 by the deal.II authors
+//
+// This file is subject to QPL and may not be distributed
+// without copyright and license information. Please refer
+// to the file deal.II/doc/license.html for the text and
+// further information on this license.
+//
+//---------------------------------------------------------------------------
+
+
+#include <deal.II/base/memory_consumption.h>
+#include <deal.II/lac/trilinos_vector_base.h>
+
+#ifdef DEAL_II_USE_TRILINOS
+
+# include <cmath>
+# include <Epetra_Import.h>
+
+
+DEAL_II_NAMESPACE_OPEN
+
+namespace TrilinosWrappers
+{
+ namespace internal
+ {
+ VectorReference::operator TrilinosScalar () const
+ {
+ Assert (index < vector.size(),
+ ExcIndexRange (index, 0, vector.size()));
+
+ // Trilinos allows for vectors
+ // to be referenced by the [] or
+ // () operators but only ()
+ // checks index bounds. We check
+ // these bounds by ourselves, so
+ // we can use []. Note that we
+ // can only get local values.
+
+ const int local_index = vector.vector->Map().LID(static_cast<int>(index));
+ Assert (local_index >= 0,
+ ExcAccessToNonLocalElement (index,
+ vector.vector->Map().MinMyGID(),
+ vector.vector->Map().MaxMyGID()));
+
+
+ return (*(vector.vector))[0][local_index];
+ }
+ }
+
+
+
+ VectorBase::VectorBase ()
+ :
+ last_action (Zero),
+ compressed (true),
+ has_ghosts (false),
+#ifdef DEAL_II_COMPILER_SUPPORTS_MPI
+ vector(new Epetra_FEVector(
+ Epetra_Map(0,0,Epetra_MpiComm(MPI_COMM_SELF))))
+#else
+ vector(new Epetra_FEVector(
+ Epetra_Map(0,0,Epetra_SerialComm())))
+#endif
+ {}
+
+
+
+ VectorBase::VectorBase (const VectorBase &v)
+ :
+ Subscriptor(),
+ last_action (Zero),
+ compressed (true),
+ has_ghosts (v.has_ghosts),
+ vector(new Epetra_FEVector(*v.vector))
+ {}
+
+
+
+ VectorBase::~VectorBase ()
+ {}
+
+
+
+ void
+ VectorBase::clear ()
+ {
+ // When we clear the vector,
+ // reset the pointer and generate
+ // an empty vector.
+#ifdef DEAL_II_COMPILER_SUPPORTS_MPI
+ Epetra_Map map (0, 0, Epetra_MpiComm(MPI_COMM_SELF));
+#else
+ Epetra_Map map (0, 0, Epetra_SerialComm());
+#endif
+
+ has_ghosts = false;
+ vector.reset (new Epetra_FEVector(map));
+ last_action = Zero;
+ }
+
+
+
+ VectorBase &
+ VectorBase::operator = (const VectorBase &v)
+ {
+ Assert (vector.get() != 0,
+ ExcMessage("Vector is not constructed properly."));
+
+ if (local_range() != v.local_range())
+ {
+ last_action = Zero;
+ vector.reset (new Epetra_FEVector(*v.vector));
+ has_ghosts = v.has_ghosts;
+ }
+ else
+ {
+ Assert (vector->Map().SameAs(v.vector->Map()) == true,
+ ExcMessage ("The Epetra maps in the assignment operator ="
+ " do not match, even though the local_range "
+ " seems to be the same. Check vector setup!"));
+ int ierr;
+ ierr = vector->GlobalAssemble(last_action);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ ierr = vector->Update(1.0, *v.vector, 0.0);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ last_action = Zero;
+ }
+
+ return *this;
+ }
+
+
+
+ template <typename number>
+ VectorBase &
+ VectorBase::operator = (const ::dealii::Vector<number> &v)
+ {
+ Assert (size() == v.size(),
+ ExcDimensionMismatch(size(), v.size()));
+
+ // this is probably not very efficient
+ // but works. in particular, we could do
+ // better if we know that
+ // number==TrilinosScalar because then we
+ // could elide the copying of elements
+ //
+ // let's hope this isn't a
+ // particularly frequent operation
+ std::pair<unsigned int, unsigned int>
+ local_range = this->local_range ();
+ for (unsigned int i=local_range.first; i<local_range.second; ++i)
+ (*vector)[0][i-local_range.first] = v(i);
+
+ return *this;
+ }
+
+
+
+ TrilinosScalar
+ VectorBase::el (const unsigned int index) const
+ {
+ // Extract local indices in
+ // the vector.
+ int trilinos_i = vector->Map().LID(static_cast<int>(index));
+ TrilinosScalar value = 0.;
+
+ // If the element is not
+ // present on the current
+ // processor, we can't
+ // continue. Just print out 0.
+
+ // TODO: Is this reasonable?
+ if (trilinos_i == -1 )
+ {
+ return 0.;
+ //Assert (false, ExcAccessToNonlocalElement(index, local_range().first,
+ // local_range().second-1));
+ }
+ else
+ value = (*vector)[0][trilinos_i];
+
+ return value;
+ }
+
+
+
+ TrilinosScalar
+ VectorBase::operator () (const unsigned int index) const
+ {
+ // Extract local indices in
+ // the vector.
+ int trilinos_i = vector->Map().LID(static_cast<int>(index));
+ TrilinosScalar value = 0.;
+
+ // If the element is not present
+ // on the current processor, we
+ // can't continue. This is the
+ // main difference to the el()
+ // function.
+ if (trilinos_i == -1 )
+ {
+ Assert (false, ExcAccessToNonlocalElement(index, local_range().first,
+ local_range().second-1));
+ }
+ else
+ value = (*vector)[0][trilinos_i];
+
+ return value;
+ }
+
+
+
+ void
+ VectorBase::add (const VectorBase &v,
+ const bool allow_different_maps)
+ {
+ if (allow_different_maps == false)
+ *this += v;
+ else
+ {
+ AssertThrow (size() == v.size(),
+ ExcDimensionMismatch (size(), v.size()));
+
+ Epetra_Import data_exchange (vector->Map(), v.vector->Map());
+
+ int ierr = vector->Import(*v.vector, data_exchange, Add);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ last_action = Insert;
+ }
+ }
+
+
+
+ bool
+ VectorBase::operator == (const VectorBase &v) const
+ {
+ Assert (size() == v.size(),
+ ExcDimensionMismatch(size(), v.size()));
+ if (local_size() != v.local_size())
+ return false;
+
+ unsigned int i;
+ for (i=0; i<local_size(); i++)
+ if ((*(v.vector))[0][i]!=(*vector)[0][i]) return false;
+
+ return true;
+ }
+
+
+
+ bool
+ VectorBase::operator != (const VectorBase &v) const
+ {
+ Assert (size() == v.size(),
+ ExcDimensionMismatch(size(), v.size()));
+
+ return (!(*this==v));
+ }
+
+// shuqiangwang
+ VectorBase::real_type VectorBase::min () const
+ {
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+ TrilinosScalar d;
+ const int ierr = vector->MinValue (&d);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ return d;
+ }
+
+ VectorBase::real_type VectorBase::max () const
+ {
+ Assert (!has_ghost_elements(), ExcGhostsPresent());
+
+ TrilinosScalar d;
+ const int ierr = vector->MaxValue (&d);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ return d;
+ }
+
+
+ bool
+ VectorBase::all_zero () const
+ {
+ // get a representation of the vector and
+ // loop over all the elements
+ TrilinosScalar *start_ptr = (*vector)[0];
+ const TrilinosScalar *ptr = start_ptr,
+ *eptr = start_ptr + local_size();
+ unsigned int flag = 0;
+ while (ptr != eptr)
+ {
+ if (*ptr != 0)
+ {
+ flag = 1;
+ break;
+ }
+ ++ptr;
+ }
+
+#ifdef DEAL_II_COMPILER_SUPPORTS_MPI
+ // in parallel, check that the vector
+ // is zero on _all_ processors.
+ const Epetra_MpiComm *mpi_comm
+ = dynamic_cast<const Epetra_MpiComm *>(&vector->Map().Comm());
+ unsigned int num_nonzero = Utilities::MPI::sum(flag, mpi_comm->Comm());
+ return num_nonzero == 0;
+#else
+ return flag == 0;
+#endif
+
+ }
+
+
+
+ bool
+ VectorBase::is_non_negative () const
+ {
+#ifdef DEAL_II_COMPILER_SUPPORTS_MPI
+ // if this vector is a parallel one, then
+ // we need to communicate to determine
+ // the answer to the current
+ // function. this still has to be
+ // implemented
+ AssertThrow(local_size() == size(), ExcNotImplemented());
+#endif
+ // get a representation of the vector and
+ // loop over all the elements
+ TrilinosScalar *start_ptr;
+ int leading_dimension;
+ int ierr = vector->ExtractView (&start_ptr, &leading_dimension);
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+
+ // TODO: This
+ // won't work in parallel like
+ // this. Find out a better way to
+ // this in that case.
+ const TrilinosScalar *ptr = start_ptr,
+ *eptr = start_ptr + size();
+ bool flag = true;
+ while (ptr != eptr)
+ {
+ if (*ptr < 0.0)
+ {
+ flag = false;
+ break;
+ }
+ ++ptr;
+ }
+
+ return flag;
+ }
+
+
+
+ // TODO: up to now only local
+ // data printed out! Find a
+ // way to neatly output
+ // distributed data...
+ void
+ VectorBase::print (const char *format) const
+ {
+ Assert (vector->GlobalLength()!=0, ExcEmptyObject());
+
+ for (unsigned int j=0; j<size(); ++j)
+ {
+ double t = (*vector)[0][j];
+
+ if (format != 0)
+ std::printf (format, t);
+ else
+ std::printf (" %5.2f", double(t));
+ }
+ std::printf ("\n");
+ }
+
+
+
+ void
+ VectorBase::print (std::ostream &out,
+ const unsigned int precision,
+ const bool scientific,
+ const bool across) const
+ {
+ AssertThrow (out, ExcIO());
+
+ // get a representation of the
+ // vector and loop over all
+ // the elements TODO: up to
+ // now only local data printed
+ // out! Find a way to neatly
+ // output distributed data...
+ TrilinosScalar *val;
+ int leading_dimension;
+ int ierr = vector->ExtractView (&val, &leading_dimension);
+
+ AssertThrow (ierr == 0, ExcTrilinosError(ierr));
+ out.precision (precision);
+ if (scientific)
+ out.setf (std::ios::scientific, std::ios::floatfield);
+ else
+ out.setf (std::ios::fixed, std::ios::floatfield);
+
+ if (across)
+ for (unsigned int i=0; i<size(); ++i)
+ out << static_cast<double>(val[i]) << ' ';
+ else
+ for (unsigned int i=0; i<size(); ++i)
+ out << static_cast<double>(val[i]) << std::endl;
+ out << std::endl;
+
+ // restore the representation
+ // of the vector
+ AssertThrow (out, ExcIO());
+ }
+
+
+
+ void
+ VectorBase::swap (VectorBase &v)
+ {
+ std::swap(last_action, v.last_action);
+ std::swap(compressed, v.compressed);
+ std::swap(vector, v.vector);
+ }
+
+
+
+ std::size_t
+ VectorBase::memory_consumption () const
+ {
+ //TODO[TH]: No accurate memory
+ //consumption for Trilinos vectors
+ //yet. This is a rough approximation with
+ //one index and the value per local
+ //entry.
+ return sizeof(*this)
+ + this->local_size()*( sizeof(double)+sizeof(int) );
+ }
+
+} /* end of namespace TrilinosWrappers */
+
+
+namespace TrilinosWrappers
+{
+#include "trilinos_vector_base.inst"
+}
+
+DEAL_II_NAMESPACE_CLOSE
+
+#endif // DEAL_II_USE_TRILINOS
Added: branches/s-wang2/for_deal.II/source/numerics/derivative_approximation.inst.in
===================================================================
--- branches/s-wang2/for_deal.II/source/numerics/derivative_approximation.inst.in (rev 0)
+++ branches/s-wang2/for_deal.II/source/numerics/derivative_approximation.inst.in 2012-11-29 05:55:56 UTC (rev 1394)
@@ -0,0 +1,181 @@
+//---------------------------------------------------------------------------
+// $Id: derivative_approximation.inst.in 25612 2012-06-07 16:46:33Z heister $
+// Version: $Name$
+//
+// Copyright (C) 2010, 2012 by the deal.II authors
+//
+// This file is subject to QPL and may not be distributed
+// without copyright and license information. Please refer
+// to the file deal.II/doc/license.html for the text and
+// further information on this license.
+//
+//---------------------------------------------------------------------------
+
+for (deal_II_dimension : DIMENSIONS)
+{
+#define INSTANTIATE(InputVector,DH) \
+template \
+void \
+DerivativeApproximation:: \
+approximate_gradient<deal_II_dimension> \
+(const Mapping<deal_II_dimension> &mapping, \
+ const DH<deal_II_dimension> &dof_handler, \
+ const InputVector &solution, \
+ Vector<float> &derivative_norm, \
+ const unsigned int component); \
+ \
+template \
+void \
+DerivativeApproximation:: \
+approximate_gradient<deal_II_dimension> \
+(const DH<deal_II_dimension> &dof_handler, \
+ const InputVector &solution, \
+ Vector<float> &derivative_norm, \
+ const unsigned int component); \
+ \
+template \
+void \
+DerivativeApproximation:: \
+approximate_second_derivative<deal_II_dimension> \
+(const Mapping<deal_II_dimension> &mapping, \
+ const DH<deal_II_dimension> &dof_handler, \
+ const InputVector &solution, \
+ Vector<float> &derivative_norm, \
+ const unsigned int component); \
+ \
+template \
+void \
+DerivativeApproximation:: \
+approximate_second_derivative<deal_II_dimension> \
+(const DH<deal_II_dimension> &dof_handler, \
+ const InputVector &solution, \
+ Vector<float> &derivative_norm, \
+ const unsigned int component); \
+ \
+template \
+void \
+DerivativeApproximation:: \
+approximate_derivative_tensor<deal_II_dimension> \
+(const Mapping<deal_II_dimension> & mapping, \
+ const DH<deal_II_dimension> &dof_handler, \
+ const InputVector &solution, \
+ const DH<deal_II_dimension>::active_cell_iterator &cell,\
+ Tensor<1,deal_II_dimension> &derivative, \
+ const unsigned int component); \
+ \
+template \
+void \
+DerivativeApproximation:: \
+approximate_derivative_tensor<deal_II_dimension> \
+(const Mapping<deal_II_dimension> & mapping, \
+ const DH<deal_II_dimension> &dof_handler, \
+ const InputVector &solution, \
+ const DH<deal_II_dimension>::active_cell_iterator &cell,\
+ Tensor<2,deal_II_dimension> &derivative, \
+ const unsigned int component); \
+ \
+template \
+void \
+DerivativeApproximation:: \
+approximate_derivative_tensor<deal_II_dimension> \
+(const Mapping<deal_II_dimension> & mapping, \
+ const DH<deal_II_dimension> &dof_handler, \
+ const InputVector &solution, \
+ const DH<deal_II_dimension>::active_cell_iterator &cell,\
+ Tensor<3,deal_II_dimension> &derivative, \
+ const unsigned int component); \
+ \
+template \
+void \
+DerivativeApproximation:: \
+approximate_derivative_tensor<deal_II_dimension> \
+(const DH<deal_II_dimension> &dof_handler, \
+ const InputVector &solution, \
+ const DH<deal_II_dimension>::active_cell_iterator &cell,\
+ Tensor<1,deal_II_dimension> &derivative, \
+ const unsigned int component); \
+ \
+template \
+void \
+DerivativeApproximation:: \
+approximate_derivative_tensor<deal_II_dimension> \
+(const DH<deal_II_dimension> &dof_handler, \
+ const InputVector &solution, \
+ const DH<deal_II_dimension>::active_cell_iterator &cell,\
+ Tensor<2,deal_II_dimension> &derivative, \
+ const unsigned int component); \
+ \
+template \
+void \
+DerivativeApproximation:: \
+approximate_derivative_tensor<deal_II_dimension> \
+(const DH<deal_II_dimension> &dof_handler, \
+ const InputVector &solution, \
+ const DH<deal_II_dimension>::active_cell_iterator &cell,\
+ Tensor<3,deal_II_dimension> &derivative, \
+ const unsigned int component)
+
+
+INSTANTIATE(Vector<double>, DoFHandler);
+INSTANTIATE(Vector<float>, DoFHandler);
+INSTANTIATE(BlockVector<double>, DoFHandler);
+INSTANTIATE(BlockVector<float>, DoFHandler);
+
+INSTANTIATE(Vector<double>, hp::DoFHandler);
+INSTANTIATE(Vector<float>, hp::DoFHandler);
+INSTANTIATE(BlockVector<double>, hp::DoFHandler);
+INSTANTIATE(BlockVector<float>, hp::DoFHandler);
+
+#ifdef DEAL_II_USE_PETSC
+INSTANTIATE(PETScWrappers::Vector, DoFHandler);
+INSTANTIATE(PETScWrappers::BlockVector, DoFHandler);
+INSTANTIATE(PETScWrappers::MPI::Vector, DoFHandler); // shuqiangwang
+INSTANTIATE(PETScWrappers::MPI::BlockVector, DoFHandler);
+
+INSTANTIATE(PETScWrappers::Vector, hp::DoFHandler);
+INSTANTIATE(PETScWrappers::BlockVector, hp::DoFHandler);
+#endif
+
+#ifdef DEAL_II_USE_TRILINOS
+INSTANTIATE(TrilinosWrappers::Vector, DoFHandler);
+INSTANTIATE(TrilinosWrappers::BlockVector, DoFHandler);
+INSTANTIATE(TrilinosWrappers::MPI::Vector, DoFHandler);
+INSTANTIATE(TrilinosWrappers::MPI::BlockVector, DoFHandler);
+
+//TODO: test hp before instantiating
+#endif
+
+#undef INSTANTIATE
+
+template
+double
+DerivativeApproximation::
+derivative_norm(const Tensor<1,deal_II_dimension> &derivative);
+
+template
+double
+DerivativeApproximation::
+derivative_norm(const Tensor<2,deal_II_dimension> &derivative);
+
+template
+double
+DerivativeApproximation::
+derivative_norm(const Tensor<3,deal_II_dimension> &derivative);
+
+
+// static variables
+//
+// on AIX, the linker is unhappy about some missing symbols. they
+// should really be there, but explicitly instantiating them will also
+// not hurt
+template
+const UpdateFlags
+DerivativeApproximation::Gradient<deal_II_dimension>::update_flags;
+
+template
+const UpdateFlags
+DerivativeApproximation::SecondDerivative<deal_II_dimension>::update_flags;
+template
+const UpdateFlags
+DerivativeApproximation::ThirdDerivative<deal_II_dimension>::update_flags;
+}
Modified: branches/s-wang2/include/aspect/global.h
===================================================================
--- branches/s-wang2/include/aspect/global.h 2012-11-29 05:40:39 UTC (rev 1393)
+++ branches/s-wang2/include/aspect/global.h 2012-11-29 05:55:56 UTC (rev 1394)
@@ -24,9 +24,10 @@
#define __aspect__global_h
-#include <deal.II/lac/trilinos_block_vector.h>
-#include <deal.II/lac/trilinos_block_sparse_matrix.h>
-#include <deal.II/lac/trilinos_precondition.h>
+#include <deal.II/lac/petsc_parallel_block_vector.h>
+#include <deal.II/lac/petsc_parallel_block_sparse_matrix.h>
+#include <deal.II/lac/petsc_solver.h>
+#include <deal.II/lac/petsc_precondition.h>
#include <boost/archive/binary_oarchive.hpp>
#include <boost/archive/binary_iarchive.hpp>
@@ -74,46 +75,47 @@
/**
* Typedef for the vector type used.
*/
- typedef TrilinosWrappers::MPI::Vector Vector;
+ typedef PETScWrappers::MPI::Vector Vector;
/**
* Typedef for the type used to describe vectors that
* consist of multiple blocks.
*/
- typedef TrilinosWrappers::MPI::BlockVector BlockVector;
+ typedef PETScWrappers::MPI::BlockVector BlockVector;
/**
* Typedef for the sparse matrix type used.
*/
- typedef TrilinosWrappers::SparseMatrix SparseMatrix;
+ typedef PETScWrappers::MPI::SparseMatrix SparseMatrix;
/**
* Typedef for the type used to describe sparse matrices that
* consist of multiple blocks.
*/
- typedef TrilinosWrappers::BlockSparseMatrix BlockSparseMatrix;
+ typedef PETScWrappers::MPI::BlockSparseMatrix BlockSparseMatrix;
+// typedef PETScWrappers::SolverCG SolverCG;
+
/**
* Typedef for the AMG preconditioner type used for the
* top left block of the Stokes matrix.
*/
- typedef TrilinosWrappers::PreconditionAMG PreconditionAMG;
+ typedef PETScWrappers::PreconditionBoomerAMG PreconditionAMG;
/**
* Typedef for the Incomplete Cholesky preconditioner used
* for other blocks of the system matrix.
*/
- typedef TrilinosWrappers::PreconditionIC PreconditionIC;
+ typedef PETScWrappers::PreconditionICC PreconditionIC;
/**
* Typedef for the Incomplete LU decomposition preconditioner used
* for other blocks of the system matrix.
*/
- typedef TrilinosWrappers::PreconditionILU PreconditionILU;
+ typedef PETScWrappers::PreconditionILU PreconditionILU;
}
}
-
/**
* A macro that is used in instantiating the ASPECT classes and functions
* for both 2d and 3d. Call this macro with the name of another macro that
Added: branches/s-wang2/include/aspect/global_trilinos.h
===================================================================
--- branches/s-wang2/include/aspect/global_trilinos.h (rev 0)
+++ branches/s-wang2/include/aspect/global_trilinos.h 2012-11-29 05:55:56 UTC (rev 1394)
@@ -0,0 +1,127 @@
+/*
+ Copyright (C) 2011, 2012 by the authors of the ASPECT code.
+
+ This file is part of ASPECT.
+
+ ASPECT is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ ASPECT is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with ASPECT; see the file doc/COPYING. If not see
+ <http://www.gnu.org/licenses/>.
+*/
+/* $Id: global.h 895 2012-04-10 12:53:27Z bangerth $ */
+
+
+#ifndef __aspect__global_h
+#define __aspect__global_h
+
+
+#include <deal.II/lac/trilinos_block_vector.h>
+#include <deal.II/lac/trilinos_block_sparse_matrix.h>
+#include <deal.II/lac/trilinos_precondition.h>
+
+#include <boost/archive/binary_oarchive.hpp>
+#include <boost/archive/binary_iarchive.hpp>
+#include <boost/archive/text_oarchive.hpp>
+#include <boost/archive/text_iarchive.hpp>
+namespace aspect
+{
+ /**
+ * A variable whose value denotes the number of seconds in one year.
+ */
+ extern const double year_in_seconds;
+
+ /**
+ * A variable that denotes whether we should periodically
+ * output statistics about memory consumption, run times, etc
+ * via the Simulator::output_statistics() function or other
+ * means.
+ */
+ extern const bool output_parallel_statistics;
+
+
+ /**
+ * A typedef that denotes the BOOST stream type for reading data
+ * during serialization. The type chosen here is a binary archive
+ * which we subsequently will have to un-compress.
+ */
+ typedef boost::archive::binary_iarchive iarchive;
+
+ /**
+ * A typedef that denotes the BOOST stream type for writing data
+ * during serialization. The type chosen here is a binary archive
+ * which we compress before writing it into a file.
+ */
+ typedef boost::archive::binary_oarchive oarchive;
+
+ /**
+ * A namespace that contains typedefs for classes used in
+ * the linear algebra description.
+ */
+ namespace LinearAlgebra
+ {
+ using namespace dealii;
+
+
+ /**
+ * Typedef for the vector type used.
+ */
+ typedef TrilinosWrappers::MPI::Vector Vector;
+
+ /**
+ * Typedef for the type used to describe vectors that
+ * consist of multiple blocks.
+ */
+ typedef TrilinosWrappers::MPI::BlockVector BlockVector;
+
+ /**
+ * Typedef for the sparse matrix type used.
+ */
+ typedef TrilinosWrappers::SparseMatrix SparseMatrix;
+
+ /**
+ * Typedef for the type used to describe sparse matrices that
+ * consist of multiple blocks.
+ */
+ typedef TrilinosWrappers::BlockSparseMatrix BlockSparseMatrix;
+
+ /**
+ * Typedef for the AMG preconditioner type used for the
+ * top left block of the Stokes matrix.
+ */
+ typedef TrilinosWrappers::PreconditionAMG PreconditionAMG;
+
+ /**
+ * Typedef for the Incomplete Cholesky preconditioner used
+ * for other blocks of the system matrix.
+ */
+ typedef TrilinosWrappers::PreconditionIC PreconditionIC;
+
+ /**
+ * Typedef for the Incomplete LU decomposition preconditioner used
+ * for other blocks of the system matrix.
+ */
+ typedef TrilinosWrappers::PreconditionILU PreconditionILU;
+ }
+}
+
+
+/**
+ * A macro that is used in instantiating the ASPECT classes and functions
+ * for both 2d and 3d. Call this macro with the name of another macro that
+ * when called with a single integer argument instantiates the respective
+ * classes in the given space dimension.
+ */
+#define ASPECT_INSTANTIATE(INSTANTIATIONS) \
+ INSTANTIATIONS(2) \
+ INSTANTIATIONS(3)
+
+#endif
Modified: branches/s-wang2/include/aspect/particle/integrator.h
===================================================================
--- branches/s-wang2/include/aspect/particle/integrator.h 2012-11-29 05:40:39 UTC (rev 1393)
+++ branches/s-wang2/include/aspect/particle/integrator.h 2012-11-29 05:55:56 UTC (rev 1394)
@@ -22,6 +22,7 @@
#ifndef __aspect__particle_integrator_h
#define __aspect__particle_integrator_h
+#include <aspect/global.h>
#include <aspect/particle/particle.h>
#include <aspect/simulator.h>
@@ -393,7 +394,7 @@
const parallel::distributed::Triangulation<dim> *_tria;
const DoFHandler<dim> *_dh;
const Mapping<dim> *_mapping;
- const TrilinosWrappers::MPI::BlockVector *_solution;
+ const LinearAlgebra::BlockVector *_solution;
virtual IntegrationScheme select_scheme(const std::vector<Point<dim> > &cell_vertices, const std::vector<Point<dim> > &cell_velocities, const double timestep)
{
@@ -401,7 +402,7 @@
};
public:
- HybridIntegrator(const parallel::distributed::Triangulation<dim> *new_tria, const DoFHandler<dim> *new_dh, const Mapping<dim> *new_mapping, const TrilinosWrappers::MPI::BlockVector *new_solution)
+ HybridIntegrator(const parallel::distributed::Triangulation<dim> *new_tria, const DoFHandler<dim> *new_dh, const Mapping<dim> *new_mapping, const LinearAlgebra::BlockVector *new_solution)
{
_step = 0;
_loc0.clear();
@@ -424,7 +425,7 @@
IntegrationScheme cur_scheme;
//typename parallel::distributed::Triangulation<dim>::cell_iterator found_cell;
typename DoFHandler<dim>::active_cell_iterator found_cell;
- Functions::FEFieldFunction<dim, DoFHandler<dim>, TrilinosWrappers::MPI::BlockVector> fe_value(*_dh, *_solution, *_mapping);
+ Functions::FEFieldFunction<dim, DoFHandler<dim>, LinearAlgebra::BlockVector> fe_value(*_dh, *_solution, *_mapping);
// If this is the first step, go through all the cells and determine
// which integration scheme the particles in each cell should use
Modified: branches/s-wang2/include/aspect/particle/world.h
===================================================================
--- branches/s-wang2/include/aspect/particle/world.h 2012-11-29 05:40:39 UTC (rev 1393)
+++ branches/s-wang2/include/aspect/particle/world.h 2012-11-29 05:55:56 UTC (rev 1394)
@@ -375,7 +375,7 @@
};
// Advance particles by the specified timestep using the current integration scheme.
- void advance_timestep(double timestep, const TrilinosWrappers::MPI::BlockVector &solution)
+ void advance_timestep(double timestep, const LinearAlgebra::BlockVector &solution)
{
bool continue_integrator = true;
@@ -565,7 +565,7 @@
free(recv_data);
};
- void get_particle_velocities(const TrilinosWrappers::MPI::BlockVector &solution)
+ void get_particle_velocities(const LinearAlgebra::BlockVector &solution)
{
Vector<double> single_res(dim+2);
std::vector<Vector<double> > result;
@@ -577,7 +577,7 @@
std::vector<Point<dim> > particle_points;
// Prepare the field function
- Functions::FEFieldFunction<dim, DoFHandler<dim>, TrilinosWrappers::MPI::BlockVector> fe_value(*_dh, solution, *_mapping);
+ Functions::FEFieldFunction<dim, DoFHandler<dim>, LinearAlgebra::BlockVector> fe_value(*_dh, solution, *_mapping);
// Get the velocity for each cell at a time so we can take advantage of knowing the active cell
for (it=_particles.begin(); it!=_particles.end();)
Modified: branches/s-wang2/include/aspect/postprocess/interface.h
===================================================================
--- branches/s-wang2/include/aspect/postprocess/interface.h 2012-11-29 05:40:39 UTC (rev 1393)
+++ branches/s-wang2/include/aspect/postprocess/interface.h 2012-11-29 05:55:56 UTC (rev 1394)
@@ -33,8 +33,8 @@
#include <deal.II/base/std_cxx1x/shared_ptr.h>
#include <deal.II/base/table_handler.h>
#include <deal.II/base/parameter_handler.h>
-#include <deal.II/lac/trilinos_vector.h>
-#include <deal.II/lac/trilinos_block_vector.h>
+//#include <deal.II/lac/trilinos_vector.h>
+//#include <deal.II/lac/trilinos_block_vector.h>
#include <deal.II/dofs/dof_handler.h>
#include <deal.II/distributed/tria.h>
#include <deal.II/fe/mapping.h>
Modified: branches/s-wang2/include/aspect/simulator.h
===================================================================
--- branches/s-wang2/include/aspect/simulator.h 2012-11-29 05:40:39 UTC (rev 1393)
+++ branches/s-wang2/include/aspect/simulator.h 2012-11-29 05:55:56 UTC (rev 1394)
@@ -28,9 +28,9 @@
#include <deal.II/base/conditional_ostream.h>
#include <deal.II/base/symmetric_tensor.h>
-#include <deal.II/lac/trilinos_block_vector.h>
-#include <deal.II/lac/trilinos_block_sparse_matrix.h>
-#include <deal.II/lac/trilinos_precondition.h>
+//#include <deal.II/lac/trilinos_block_vector.h>
+//#include <deal.II/lac/trilinos_block_sparse_matrix.h>
+//#include <deal.II/lac/trilinos_precondition.h>
#include <deal.II/distributed/tria.h>
@@ -1217,7 +1217,7 @@
* <code>source/simulator/assembly.cc</code>.
*/
void
- compute_material_model_input_values (const TrilinosWrappers::MPI::BlockVector &input_solution,
+ compute_material_model_input_values (const LinearAlgebra::BlockVector &input_solution,
const FEValues<dim,dim> &input_finite_element_values,
const bool compute_strainrate,
typename MaterialModel::Interface<dim>::MaterialModelInputs &material_model_inputs) const;
@@ -1368,7 +1368,7 @@
LinearAlgebra::BlockVector old_old_solution;
LinearAlgebra::BlockVector system_rhs;
- TrilinosWrappers::MPI::BlockVector current_linearization_point;
+ LinearAlgebra::BlockVector current_linearization_point;
// only used if is_compressible()
LinearAlgebra::BlockVector pressure_shape_function_integrals;
Modified: branches/s-wang2/source/main.cc
===================================================================
--- branches/s-wang2/source/main.cc 2012-11-29 05:40:39 UTC (rev 1393)
+++ branches/s-wang2/source/main.cc 2012-11-29 05:55:56 UTC (rev 1394)
@@ -24,12 +24,14 @@
#include <deal.II/base/utilities.h>
#include <deal.II/base/mpi.h>
+#include <deal.II/lac/vector_memory.h>
int main (int argc, char *argv[])
{
using namespace dealii;
Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv);
+ PetscInitialize(&argc,&argv,0,0);
try
{
@@ -169,5 +171,9 @@
return 1;
}
+ dealii::GrowingVectorMemory<dealii::PETScWrappers::MPI::Vector>::release_unused_memory ();
+ dealii::GrowingVectorMemory<dealii::PETScWrappers::Vector>::release_unused_memory ();
+ PetscFinalize();
+
return 0;
}
Modified: branches/s-wang2/source/postprocess/composition_statistics.cc
===================================================================
--- branches/s-wang2/source/postprocess/composition_statistics.cc 2012-11-29 05:40:39 UTC (rev 1393)
+++ branches/s-wang2/source/postprocess/composition_statistics.cc 2012-11-29 05:55:56 UTC (rev 1394)
@@ -94,16 +94,17 @@
std::vector<double> local_max_compositions (this->n_compositional_fields(),
std::numeric_limits<double>::min());
- for (unsigned int c=0; c<this->n_compositional_fields(); ++c)
- for (unsigned int i=0; i<this->get_solution().block(3+c).local_size(); ++i)
- {
- local_min_compositions[c]
- = std::min<double> (local_min_compositions[c],
- this->get_solution().block(3+c).trilinos_vector()[0][i]);
- local_max_compositions[c]
- = std::max<double> (local_max_compositions[c],
- this->get_solution().block(3+c).trilinos_vector()[0][i]);
- }
+// for (unsigned int c=0; c<this->n_compositional_fields(); ++c)
+// for (unsigned int i=0; i<this->get_solution().block(3+c).local_size(); ++i)
+// {
+// shuqiangwang
+// local_min_compositions[c]
+// = std::min<double> (local_min_compositions[c],
+// this->get_solution().block(3+c).trilinos_vector()[0][i]);
+// local_max_compositions[c]
+// = std::max<double> (local_max_compositions[c],
+// this->get_solution().block(3+c).trilinos_vector()[0][i]);
+// }
// now do the reductions over all processors. we can use Utilities::MPI::max
// for the maximal values. unfortunately, there is currently no matching
@@ -115,21 +116,26 @@
std::numeric_limits<double>::min());
{
- for (unsigned int c=0; c<this->n_compositional_fields(); ++c)
- local_min_compositions[c] = -local_min_compositions[c];
- Utilities::MPI::max (local_min_compositions,
- this->get_mpi_communicator(),
- global_min_compositions);
- for (unsigned int c=0; c<this->n_compositional_fields(); ++c)
- {
- local_min_compositions[c] = -local_min_compositions[c];
- global_min_compositions[c] = -global_min_compositions[c];
- }
-
- // it's simpler for the maximal values
- Utilities::MPI::max (local_max_compositions,
- this->get_mpi_communicator(),
- global_max_compositions);
+// for (unsigned int c=0; c<this->n_compositional_fields(); ++c)
+// local_min_compositions[c] = -local_min_compositions[c];
+// Utilities::MPI::max (local_min_compositions,
+// this->get_mpi_communicator(),
+// global_min_compositions);
+// for (unsigned int c=0; c<this->n_compositional_fields(); ++c)
+// {
+// local_min_compositions[c] = -local_min_compositions[c];
+// global_min_compositions[c] = -global_min_compositions[c];
+// }
+//
+// // it's simpler for the maximal values
+// Utilities::MPI::max (local_max_compositions,
+// this->get_mpi_communicator(),
+// global_max_compositions);
+ for (unsigned int c=0; c<this->n_compositional_fields(); ++c)
+ {
+ global_min_compositions[c] = this->get_solution().block(3+c).min();
+ global_max_compositions[c] = this->get_solution().block(3+c).max();
+ }
}
// finally produce something for the statistics file
Modified: branches/s-wang2/source/postprocess/temperature_statistics.cc
===================================================================
--- branches/s-wang2/source/postprocess/temperature_statistics.cc 2012-11-29 05:40:39 UTC (rev 1393)
+++ branches/s-wang2/source/postprocess/temperature_statistics.cc 2012-11-29 05:55:56 UTC (rev 1394)
@@ -84,15 +84,16 @@
// picture of their true values
double local_min_temperature = std::numeric_limits<double>::max();
double local_max_temperature = std::numeric_limits<double>::min();
- for (unsigned int i=0; i<this->get_solution().block(2).local_size(); ++i)
- {
- local_min_temperature
- = std::min<double> (local_min_temperature,
- this->get_solution().block(2).trilinos_vector()[0][i]);
- local_max_temperature
- = std::max<double> (local_max_temperature,
- this->get_solution().block(2).trilinos_vector()[0][i]);
- }
+// for (unsigned int i=0; i<this->get_solution().block(2).local_size(); ++i)
+// {
+//// shuqiangwang
+// local_min_temperature
+// = std::min<double> (local_min_temperature,
+// this->get_solution().block(2).trilinos_vector()[0][i]);
+// local_max_temperature
+// = std::max<double> (local_max_temperature,
+// this->get_solution().block(2).trilinos_vector()[0][i]);
+// }
const double global_temperature_integral
= Utilities::MPI::sum (local_temperature_integral, this->get_mpi_communicator());
@@ -104,13 +105,15 @@
// one communication by multiplying
// one value by -1
{
- double local_values[2] = { -local_min_temperature, local_max_temperature };
- double global_values[2];
-
- Utilities::MPI::max (local_values, this->get_mpi_communicator(), global_values);
-
- global_min_temperature = -global_values[0];
- global_max_temperature = global_values[1];
+// double local_values[2] = { -local_min_temperature, local_max_temperature };
+// double global_values[2];
+//
+// Utilities::MPI::max (local_values, this->get_mpi_communicator(), global_values);
+//
+// global_min_temperature = -global_values[0];
+// global_max_temperature = global_values[1];
+ global_min_temperature = this->get_solution().block(2).min();
+ global_max_temperature = this->get_solution().block(2).max();
}
statistics.add_value ("Minimal temperature (K)",
Modified: branches/s-wang2/source/simulator/assembly.cc
===================================================================
--- branches/s-wang2/source/simulator/assembly.cc 2012-11-29 05:40:39 UTC (rev 1393)
+++ branches/s-wang2/source/simulator/assembly.cc 2012-11-29 05:55:56 UTC (rev 1394)
@@ -643,7 +643,7 @@
template <int dim>
void
Simulator<dim>::
- compute_material_model_input_values (const TrilinosWrappers::MPI::BlockVector &input_solution,
+ compute_material_model_input_values (const LinearAlgebra::BlockVector &input_solution,
const FEValues<dim> &input_finite_element_values,
const bool compute_strainrate,
typename MaterialModel::Interface<dim>::MaterialModelInputs &material_model_inputs) const
@@ -828,12 +828,12 @@
Mp_preconditioner.reset (new LinearAlgebra::PreconditionILU());
Amg_preconditioner.reset (new LinearAlgebra::PreconditionAMG());
- LinearAlgebra::PreconditionAMG::AdditionalData Amg_data;
- Amg_data.constant_modes = constant_modes;
- Amg_data.elliptic = true;
- Amg_data.higher_order_elements = true;
- Amg_data.smoother_sweeps = 2;
- Amg_data.aggregation_threshold = 0.02;
+ LinearAlgebra::PreconditionAMG::AdditionalData Amg_data(true);
+ //Amg_data.constant_modes = constant_modes;
+ //Amg_data.elliptic = true;
+ //Amg_data.higher_order_elements = true;
+ //Amg_data.smoother_sweeps = 2;
+ //Amg_data.aggregation_threshold = 0.02;
Mp_preconditioner->initialize (system_preconditioner_matrix.block(1,1));
Amg_preconditioner->initialize (system_preconditioner_matrix.block(0,0),
@@ -1023,11 +1023,11 @@
internal::Assembly::CopyData::
StokesSystem<dim> (finite_element));
- system_matrix.compress();
- system_rhs.compress(Add);
+ system_matrix.compress(dealii::VectorOperation::add); //shuqiangwang
+ system_rhs.compress(dealii::VectorOperation::add);
if (material_model->is_compressible())
- pressure_shape_function_integrals.compress(Add);
+ pressure_shape_function_integrals.compress(dealii::VectorOperation::add);
rebuild_stokes_matrix = false;
@@ -1044,7 +1044,7 @@
else
computing_timer.enter_section (" Build composition preconditioner");
{
- preconditioner.reset (new TrilinosWrappers::PreconditionILU());
+ preconditioner.reset (new LinearAlgebra::PreconditionILU());
preconditioner->initialize (system_matrix.block(2+index,2+index));
}
computing_timer.exit_section();
@@ -1393,7 +1393,7 @@
AdvectionSystem<dim> (finite_element));
system_matrix.compress();
- system_rhs.compress(Add);
+ system_rhs.compress(dealii::VectorOperation::add);
computing_timer.exit_section();
}
Modified: branches/s-wang2/source/simulator/core.cc
===================================================================
--- branches/s-wang2/source/simulator/core.cc 2012-11-29 05:40:39 UTC (rev 1393)
+++ branches/s-wang2/source/simulator/core.cc 2012-11-29 05:55:56 UTC (rev 1394)
@@ -55,7 +55,73 @@
using namespace dealii;
+/**
+ * Temporary utility for replacing TrilinosWrappers with PETScWrappers.
+ */
+namespace CIG
+{
+/**
+ * convert a block_partition used for trilinos into data used for petsc.
+ * It is assumed that block_partition.size()==2.
+ */
+void convert_block_partitioning(
+ const std::vector<unsigned int> &system_dofs_per_block,
+ const std::vector<dealii::IndexSet> &system_partitioning,
+ std::vector<unsigned int> &block_sizes,
+ std::vector<unsigned int> &local_sizes)
+{
+ // init,
+ block_sizes.clear();
+ local_sizes.clear();
+ // block_sizes
+ block_sizes = system_dofs_per_block;
+
+ // local_sizes
+ for(unsigned int i=0; i<system_partitioning.size(); i++)
+ local_sizes.push_back(system_partitioning[i].n_elements());
+}
+
+
+void setup_petsc_matrix(
+ MPI_Comm mpi_communicator,
+ const std::vector<unsigned int> &block_sizes,
+ const std::vector<unsigned int> &local_sizes,
+ int max_coupling_between_dofs,
+ dealii::PETScWrappers::MPI::BlockSparseMatrix &matrix)
+{
+ int size = block_sizes.size();
+
+ matrix.reinit(size,size);
+
+ for(int i=0; i<size; i++)
+ for(int j=0; j<size; j++)
+ {
+ matrix.block(i,j).reinit(
+ mpi_communicator,
+ block_sizes[i], block_sizes[j],
+ local_sizes[i], local_sizes[j],
+ max_coupling_between_dofs);
+ }
+
+ matrix.collect_sizes();
+}
+
+void setup_petsc_vector(
+ MPI_Comm mpi_communicator,
+ std::vector<unsigned int> &block_sizes,
+ std::vector<dealii::IndexSet> &partitioning,
+ std::vector<dealii::IndexSet> &relevant_partitioning,
+ dealii::PETScWrappers::MPI::BlockVector &vector)
+{
+ vector.reinit(block_sizes,mpi_communicator);
+ for(unsigned int i=0; i<block_sizes.size(); i++)
+ vector.block(i).reinit(mpi_communicator,partitioning[i],relevant_partitioning[i]);
+ vector.collect_sizes();
+}
+}
+
+
namespace aspect
{
namespace
@@ -507,7 +573,7 @@
this_mpi_process(mpi_communicator));
sp.compress();
- system_matrix.reinit (sp);
+ //shuqiangwang: this function is not used. system_matrix.reinit (sp);
}
@@ -541,7 +607,7 @@
this_mpi_process(mpi_communicator));
sp.compress();
- system_preconditioner_matrix.reinit (sp);
+ //shuqiangwang; this function is not used. system_preconditioner_matrix.reinit (sp);
}
@@ -717,19 +783,21 @@
}
// finally initialize vectors, matrices, etc.
+ std::vector<unsigned int> block_sizes, local_sizes;
+ CIG::convert_block_partitioning(system_dofs_per_block,system_partitioning,block_sizes,local_sizes);
- setup_system_matrix (system_partitioning);
- setup_system_preconditioner (system_partitioning);
+ CIG::setup_petsc_matrix(mpi_communicator,block_sizes,local_sizes,dof_handler.max_couplings_between_dofs(),system_matrix); //setup_system_matrix (system_partitioning);
+ CIG::setup_petsc_matrix(mpi_communicator,block_sizes,local_sizes,dof_handler.max_couplings_between_dofs(),system_preconditioner_matrix); //setup_system_preconditioner (system_partitioning);
- system_rhs.reinit(system_partitioning, mpi_communicator);
- solution.reinit(system_relevant_partitioning, mpi_communicator);
- old_solution.reinit(system_relevant_partitioning, mpi_communicator);
- old_old_solution.reinit(system_relevant_partitioning, mpi_communicator);
+ system_rhs.reinit(block_sizes,mpi_communicator,local_sizes); //system_rhs.reinit(system_partitioning, mpi_communicator);
+ CIG::setup_petsc_vector(mpi_communicator,block_sizes,system_partitioning,system_relevant_partitioning,solution); //solution.reinit(system_relevant_partitioning, mpi_communicator);
+ CIG::setup_petsc_vector(mpi_communicator,block_sizes,system_partitioning,system_relevant_partitioning,old_solution); //old_solution.reinit(system_relevant_partitioning, mpi_communicator);
+ CIG::setup_petsc_vector(mpi_communicator,block_sizes,system_partitioning,system_relevant_partitioning,old_old_solution); //old_old_solution.reinit(system_relevant_partitioning, mpi_communicator);
- current_linearization_point.reinit (system_relevant_partitioning, MPI_COMM_WORLD);
+ CIG::setup_petsc_vector(mpi_communicator,block_sizes,system_partitioning,system_relevant_partitioning,current_linearization_point); //current_linearization_point.reinit (system_relevant_partitioning, MPI_COMM_WORLD);
if (material_model->is_compressible())
- pressure_shape_function_integrals.reinit (system_partitioning, mpi_communicator);
+ pressure_shape_function_integrals.reinit(block_sizes,mpi_communicator,local_sizes); //pressure_shape_function_integrals.reinit (system_partitioning, mpi_communicator);
rebuild_stokes_matrix = true;
rebuild_stokes_preconditioner = true;
@@ -1414,6 +1482,8 @@
{
old_old_solution = old_solution;
old_solution = solution;
+ old_old_solution.update_ghost_values(); //shuqiangwang: need to check when this is needed.
+ old_solution.update_ghost_values();
}
// periodically generate snapshots so that we can resume here
Modified: branches/s-wang2/source/simulator/helper_functions.cc
===================================================================
--- branches/s-wang2/source/simulator/helper_functions.cc 2012-11-29 05:40:39 UTC (rev 1393)
+++ branches/s-wang2/source/simulator/helper_functions.cc 2012-11-29 05:55:56 UTC (rev 1394)
@@ -78,12 +78,12 @@
<< "* Matrix " << system_matrix.memory_consumption()/mb << std::endl
<< "* 5 Vectors " << 5*solution.memory_consumption()/mb << std::endl
<< "* preconditioner " << (system_preconditioner_matrix.memory_consumption()
- + Amg_preconditioner->memory_consumption()
+ //+ Amg_preconditioner->memory_consumption()
/*+Mp_preconditioner->memory_consumption()
+T_preconditioner->memory_consumption()*/)/mb
<< std::endl
<< " - matrix " << system_preconditioner_matrix.memory_consumption()/mb << std::endl
- << " - prec vel " << Amg_preconditioner->memory_consumption()/mb << std::endl
+ // << " - prec vel " << Amg_preconditioner->memory_consumption()/mb << std::endl
<< " - prec mass " << 0/*Mp_preconditioner->memory_consumption()/mb*/ << std::endl
<< " - prec T " << 0/*T_preconditioner->memory_consumption()/mb*/ << std::endl
<< std::endl;
Modified: branches/s-wang2/source/simulator/initial_conditions.cc
===================================================================
--- branches/s-wang2/source/simulator/initial_conditions.cc 2012-11-29 05:40:39 UTC (rev 1393)
+++ branches/s-wang2/source/simulator/initial_conditions.cc 2012-11-29 05:55:56 UTC (rev 1394)
@@ -153,8 +153,14 @@
// then apply constraints and copy the
// result into vectors with ghost elements
+// constraints.print(std::cout);
constraints.distribute(initial_solution);
+// static int debug_index = 0;
+// debug_index++;
+// if(debug_index==2)
+// exit(0);
+
// copy temperature/composition block only
solution.block(2+n) = initial_solution.block(2+n);
old_solution.block(2+n) = initial_solution.block(2+n);
@@ -196,8 +202,12 @@
dim+2+parameters.n_compositional_fields),
system_tmp);
+ system_tmp.compress(); // shuqiangwang: do I need this?
+// system_tmp.print(std::cout,7,false,false);
// we may have hanging nodes, so apply constraints
constraints.distribute (system_tmp);
+
+ system_tmp.compress();
old_solution.block(1) = system_tmp.block(1);
}
@@ -289,6 +299,8 @@
old_solution.block(1) = system_tmp.block(1);
}
+ old_solution.compress();
+
// normalize the pressure in such a way that the surface pressure
// equals a known and desired value
normalize_pressure(old_solution);
Modified: branches/s-wang2/source/simulator/solver.cc
===================================================================
--- branches/s-wang2/source/simulator/solver.cc 2012-11-29 05:40:39 UTC (rev 1393)
+++ branches/s-wang2/source/simulator/solver.cc 2012-11-29 05:55:56 UTC (rev 1394)
@@ -25,7 +25,7 @@
#include <deal.II/lac/solver_gmres.h>
#include <deal.II/lac/constraint_matrix.h>
-#include <deal.II/lac/trilinos_solver.h>
+//#include <deal.II/lac/trilinos_solver.h>
#include <deal.II/lac/pointer_matrix.h>
@@ -67,9 +67,9 @@
/**
* Compute the residual with the Stokes block.
*/
- double residual (TrilinosWrappers::MPI::BlockVector &dst,
- const TrilinosWrappers::MPI::BlockVector &x,
- const TrilinosWrappers::MPI::BlockVector &b) const;
+ double residual (LinearAlgebra::BlockVector &dst,
+ const LinearAlgebra::BlockVector &x,
+ const LinearAlgebra::BlockVector &b) const;
void clear() {};
@@ -130,9 +130,9 @@
- double StokesBlock::residual (TrilinosWrappers::MPI::BlockVector &dst,
- const TrilinosWrappers::MPI::BlockVector &x,
- const TrilinosWrappers::MPI::BlockVector &b) const
+ double StokesBlock::residual (LinearAlgebra::BlockVector &dst,
+ const LinearAlgebra::BlockVector &x,
+ const LinearAlgebra::BlockVector &b) const
{
// compute b-Ax where A is only the top left 2x2 block
this->vmult (dst, x);
@@ -222,7 +222,7 @@
{
SolverControl solver_control(5000, 1e-6 * src.block(1).l2_norm());
- TrilinosWrappers::SolverCG solver(solver_control);
+ PETScWrappers::SolverCG solver(solver_control);
// Trilinos reports a breakdown
// in case src=dst=0, even
@@ -247,7 +247,7 @@
if (do_solve_A == true)
{
SolverControl solver_control(5000, utmp.l2_norm()*1e-2);
- TrilinosWrappers::SolverCG solver(solver_control);
+ PETScWrappers::SolverCG solver(solver_control);
solver.solve(stokes_matrix.block(0,0), dst.block(0), utmp,
a_preconditioner);
}
@@ -286,13 +286,13 @@
// overwrite the vector in residual(), then call set_zero again, and then throw away
// the result
LinearAlgebra::BlockVector
- distributed_solution (system_rhs);
- current_constraints.set_zero(distributed_solution);
+ distributed_solution (system_rhs); distributed_solution.compress();
+ current_constraints.set_zero(distributed_solution); distributed_solution.compress();
// create vector with distribution of system_rhs.
LinearAlgebra::Vector block_remap (system_rhs.block (index+2));
// copy block of current_linearization_point into it, because
// current_linearization is distributed differently.
- block_remap = current_linearization_point.block (index+2);
+ block_remap = current_linearization_point.block (index+2); block_remap.compress();
// (ab)use the distributed solution vector to temporarily put a residual in
initial_residual = system_matrix.block(index+2,index+2).residual (distributed_solution.block(index+2),
block_remap,
@@ -300,12 +300,12 @@
current_constraints.set_zero(distributed_solution);
// then overwrite it again with the current best guess and solve the linear system
- distributed_solution.block(index+2) = block_remap;
+ distributed_solution.block(index+2) = block_remap; distributed_solution.compress();
solver.solve (system_matrix.block(index+2,index+2), distributed_solution.block(index+2),
system_rhs.block(index+2), index==0?*T_preconditioner:*C_preconditioner);
current_constraints.distribute (distributed_solution);
- solution.block(index+2) = distributed_solution.block(index+2);
+ solution.block(index+2) = distributed_solution.block(index+2); solution.compress();
// print number of iterations and also record it in the
// statistics file
@@ -354,7 +354,7 @@
remap.block (1) = current_linearization_point.block (1);
// before solving we scale the initial solution to the right dimensions
remap.block (1) /= pressure_scaling;
- current_constraints.set_zero (remap);
+ current_constraints.set_zero (remap); remap.compress();
// if the model is compressible then we need to adjust the right hand
// side of the equation to make it compatible with the matrix on the
// left
@@ -368,13 +368,13 @@
// then overwrite it again with the current best guess and solve the linear system
distributed_stokes_solution.block(0) = remap.block(0);
- distributed_stokes_solution.block(1) = remap.block(1);
+ distributed_stokes_solution.block(1) = remap.block(1); distributed_stokes_solution.compress();
// extract Stokes parts of rhs vector
LinearAlgebra::BlockVector distributed_stokes_rhs;
distributed_stokes_rhs.reinit(system_rhs);
distributed_stokes_rhs.block(0) = system_rhs.block(0);
- distributed_stokes_rhs.block(1) = system_rhs.block(1);
+ distributed_stokes_rhs.block(1) = system_rhs.block(1); distributed_stokes_rhs.compress();
PrimitiveVectorMemory< LinearAlgebra::BlockVector > mem;
More information about the CIG-COMMITS
mailing list