Chaste Release::3.1
PCLDUFactorisation.cpp
00001 /*
00002 
00003 Copyright (c) 2005-2012, University of Oxford.
00004 All rights reserved.
00005 
00006 University of Oxford means the Chancellor, Masters and Scholars of the
00007 University of Oxford, having an administrative office at Wellington
00008 Square, Oxford OX1 2JD, UK.
00009 
00010 This file is part of Chaste.
00011 
00012 Redistribution and use in source and binary forms, with or without
00013 modification, are permitted provided that the following conditions are met:
00014  * Redistributions of source code must retain the above copyright notice,
00015    this list of conditions and the following disclaimer.
00016  * Redistributions in binary form must reproduce the above copyright notice,
00017    this list of conditions and the following disclaimer in the documentation
00018    and/or other materials provided with the distribution.
00019  * Neither the name of the University of Oxford nor the names of its
00020    contributors may be used to endorse or promote products derived from this
00021    software without specific prior written permission.
00022 
00023 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00024 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00025 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00026 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
00027 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00028 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
00029 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00030 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00031 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00032 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033 
00034 */
00035 
00036 #include <iostream>
00037 
00038 #include "PetscVecTools.hpp" // Includes Ublas so must come first
00039 #include "PCLDUFactorisation.hpp"
00040 #include "Exception.hpp"
00041 
00042 PCLDUFactorisation::PCLDUFactorisation(KSP& rKspObject)
00043 {
00044 #ifdef TRACE_KSP
00045     mPCContext.mScatterTime = 0.0;
00046     mPCContext.mA1PreconditionerTime = 0.0;;
00047     mPCContext.mA2PreconditionerTime = 0.0;;
00048     mPCContext.mGatherTime = 0.0;;
00049 #endif
00050 
00051     PCLDUFactorisationCreate(rKspObject);
00052     PCLDUFactorisationSetUp();
00053 }
00054 
00055 PCLDUFactorisation::~PCLDUFactorisation()
00056 {
00057 #ifdef TRACE_KSP
00058     if (PetscTools::AmMaster())
00059     {
00060         std::cout << " -- LDU factorisation preconditioner profile information: " << std::endl;
00061         std::cout << "\t mScatterTime: " << mPCContext.mScatterTime << std::endl;
00062         std::cout << "\t mA1PreconditionerTime: " << mPCContext.mA1PreconditionerTime << std::endl;
00063         std::cout << "\t mA2PreconditionerTime: " << mPCContext.mA2PreconditionerTime << std::endl;
00064         std::cout << "\t mExtraLAOperations: " << mPCContext.mExtraLAOperations << std::endl;
00065         std::cout << "\t mGatherTime: " << mPCContext.mGatherTime << std::endl;
00066     }
00067 #endif
00068 
00069     PetscTools::Destroy(mPCContext.A11_matrix_subblock);
00070     PetscTools::Destroy(mPCContext.A22_matrix_subblock);
00071     PetscTools::Destroy(mPCContext.B_matrix_subblock);
00072 
00073     PCDestroy(PETSC_DESTROY_PARAM(mPCContext.PC_amg_A11));
00074     PCDestroy(PETSC_DESTROY_PARAM(mPCContext.PC_amg_A22));
00075 
00076     PetscTools::Destroy(mPCContext.x1_subvector);
00077     PetscTools::Destroy(mPCContext.y1_subvector);
00078     PetscTools::Destroy(mPCContext.x2_subvector);
00079     PetscTools::Destroy(mPCContext.y2_subvector);
00080     PetscTools::Destroy(mPCContext.z);
00081     PetscTools::Destroy(mPCContext.temp);
00082 
00083     VecScatterDestroy(PETSC_DESTROY_PARAM(mPCContext.A11_scatter_ctx));
00084     VecScatterDestroy(PETSC_DESTROY_PARAM(mPCContext.A22_scatter_ctx));
00085 }
00086 
00087 void PCLDUFactorisation::PCLDUFactorisationCreate(KSP& rKspObject)
00088 {
00089     KSPGetPC(rKspObject, &mPetscPCObject);
00090 
00091     Mat system_matrix, dummy;
00092     MatStructure flag;
00093     KSPGetOperators(rKspObject, &system_matrix, &dummy, &flag);
00094 
00095     PetscInt num_rows, num_columns;
00096     MatGetSize(system_matrix, &num_rows, &num_columns);
00097 
00098     PetscInt num_local_rows, num_local_columns;
00099     MatGetLocalSize(system_matrix, &num_local_rows, &num_local_columns);
00100 
00101     // Odd number of rows: impossible in Bidomain.
00102     // Odd number of local rows: impossible if V_m and phi_e for each node are stored in the same processor.
00103     if ((num_rows%2 != 0) || (num_local_rows%2 != 0))
00104     {
00105         TERMINATE("Wrong matrix parallel layout detected in PCLDUFactorisation.");
00106     }
00107 
00108     // Allocate memory
00109     unsigned subvector_num_rows = num_rows/2;
00110     unsigned subvector_local_rows = num_local_rows/2;
00111     mPCContext.x1_subvector = PetscTools::CreateVec(subvector_num_rows, subvector_local_rows);
00112     mPCContext.x2_subvector = PetscTools::CreateVec(subvector_num_rows, subvector_local_rows);
00113     mPCContext.y1_subvector = PetscTools::CreateVec(subvector_num_rows, subvector_local_rows);
00114     mPCContext.y2_subvector = PetscTools::CreateVec(subvector_num_rows, subvector_local_rows);
00115     mPCContext.z = PetscTools::CreateVec(subvector_num_rows, subvector_local_rows);
00116     mPCContext.temp = PetscTools::CreateVec(subvector_num_rows, subvector_local_rows);
00117 
00118     // Create scatter contexts
00119     {
00120         // Needed by VecScatterCreate in order to find out parallel layout.
00121         Vec dummy_vec = PetscTools::CreateVec(num_rows, num_local_rows);
00122 
00123         PetscVecTools::SetupInterleavedVectorScatterGather(dummy_vec, mPCContext.A11_scatter_ctx, mPCContext.A22_scatter_ctx);
00124 
00125         PetscTools::Destroy(dummy_vec);
00126     }
00127 
00128     // Get matrix sublock A11
00129     {
00130         // Work out local row range for subblock A11 (same as x1 or y1)
00131         PetscInt low, high, global_size;
00132         VecGetOwnershipRange(mPCContext.x1_subvector, &low, &high);
00133         VecGetSize(mPCContext.x1_subvector, &global_size);
00134         assert(global_size == num_rows/2);
00135 
00136         IS A11_local_rows;
00137         IS A11_columns;
00138         ISCreateStride(PETSC_COMM_WORLD, high-low, 2*low, 2, &A11_local_rows);
00139         ISCreateStride(PETSC_COMM_WORLD, global_size, 0, 2, &A11_columns);
00140 
00141 #if (PETSC_VERSION_MAJOR == 3 && PETSC_VERSION_MINOR >= 1) //PETSc 3.1 or later
00142         MatGetSubMatrix(system_matrix, A11_local_rows, A11_local_rows,
00143             MAT_INITIAL_MATRIX, &mPCContext.A11_matrix_subblock);
00144 #else
00145         MatGetSubMatrix(system_matrix, A11_local_rows, A11_columns, PETSC_DECIDE,
00146             MAT_INITIAL_MATRIX, &mPCContext.A11_matrix_subblock);
00147 #endif
00148         ISDestroy(PETSC_DESTROY_PARAM(A11_local_rows));
00149         ISDestroy(PETSC_DESTROY_PARAM(A11_columns));
00150     }
00151 
00152     // Get matrix sublock A22
00153     {
00154         // Work out local row range for subblock A22 (same as x2 or y2)
00155         PetscInt low, high, global_size;
00156         VecGetOwnershipRange(mPCContext.x2_subvector, &low, &high);
00157         VecGetSize(mPCContext.x2_subvector, &global_size);
00158         assert(global_size == num_rows/2);
00159 
00160         IS A22_local_rows;
00161         IS A22_columns;
00162         ISCreateStride(PETSC_COMM_WORLD, high-low, 2*low+1, 2, &A22_local_rows);
00163         ISCreateStride(PETSC_COMM_WORLD, global_size, 1, 2, &A22_columns);
00164 
00165 #if (PETSC_VERSION_MAJOR == 3 && PETSC_VERSION_MINOR >= 1) //PETSc 3.1 or later
00166         MatGetSubMatrix(system_matrix, A22_local_rows, A22_local_rows,
00167             MAT_INITIAL_MATRIX, &mPCContext.A22_matrix_subblock);
00168 #else
00169         MatGetSubMatrix(system_matrix, A22_local_rows, A22_columns, PETSC_DECIDE,
00170             MAT_INITIAL_MATRIX, &mPCContext.A22_matrix_subblock);
00171 #endif
00172 
00173         ISDestroy(PETSC_DESTROY_PARAM(A22_local_rows));
00174         ISDestroy(PETSC_DESTROY_PARAM(A22_columns));
00175     }
00176 
00177     // Get matrix sublock B (the upper triangular one)
00178     {
00179         // Work out local row range for subblock B (same as A11, x1 or y1)
00180         PetscInt low, high, global_size;
00181         VecGetOwnershipRange(mPCContext.x1_subvector, &low, &high);
00182         VecGetSize(mPCContext.x1_subvector, &global_size);
00183         assert(global_size == num_rows/2);
00184 
00185         IS B_local_rows;
00186         IS B_columns;
00187         ISCreateStride(PETSC_COMM_WORLD, high-low, 2*low, 2, &B_local_rows);
00188 
00189 #if (PETSC_VERSION_MAJOR == 3 && PETSC_VERSION_MINOR >= 1) //PETSc 3.1 or later
00190         ISCreateStride(PETSC_COMM_WORLD, high-low, 2*low+1, 2, &B_columns);
00191         MatGetSubMatrix(system_matrix, B_local_rows, B_columns,
00192             MAT_INITIAL_MATRIX, &mPCContext.B_matrix_subblock);
00193 #else
00194     ISCreateStride(PETSC_COMM_WORLD, global_size, 1, 2, &B_columns);
00195         MatGetSubMatrix(system_matrix, B_local_rows, B_columns, PETSC_DECIDE,
00196             MAT_INITIAL_MATRIX, &mPCContext.B_matrix_subblock);
00197 #endif
00198 
00199         ISDestroy(PETSC_DESTROY_PARAM(B_local_rows));
00200         ISDestroy(PETSC_DESTROY_PARAM(B_columns));
00201     }
00202 
00203     /*
00204      * Experimental (#1082): in PP removing the mass matrix from the A22 block seems to work better.
00205      *                       This is equivalent to do A22 = A22 + B in this implementation.
00206      */
00207 //#if (PETSC_VERSION_MAJOR == 2 && PETSC_VERSION_MINOR == 2) //PETSc 2.2
00208 //     PetscScalar petsc_one = 1.0;
00209 //     MatAXPY(&petsc_one, mPCContext.B_matrix_subblock, mPCContext.A22_matrix_subblock, DIFFERENT_NONZERO_PATTERN);
00210 // #else
00211 //     MatAXPY(mPCContext.A22_matrix_subblock, 1.0, mPCContext.B_matrix_subblock, DIFFERENT_NONZERO_PATTERN);
00212 //#endif
00213 
00214 //     // Shift the block
00215 //#if (PETSC_VERSION_MAJOR == 2 && PETSC_VERSION_MINOR == 2) //PETSc 2.2
00216 //     PetscScalar shift = -1e-8;
00217 //     MatShift(&shift, mPCContext.A22_matrix_subblock);
00218 // #else
00219 //     PetscScalar shift = -1e-8;
00220 //     MatShift(mPCContext.A22_matrix_subblock, shift);
00221 //#endif
00222 
00223     PCSetType(mPetscPCObject, PCSHELL);
00224 #if (PETSC_VERSION_MAJOR == 2 && PETSC_VERSION_MINOR == 2) //PETSc 2.2
00225     // Register PC context and call-back function
00226     PCShellSetApply(mPetscPCObject, PCLDUFactorisationApply, (void*) &mPCContext);
00227 #else
00228     // Register PC context so it gets passed to PCBlockDiagonalApply
00229     PCShellSetContext(mPetscPCObject, &mPCContext);
00230     // Register call-back function
00231     PCShellSetApply(mPetscPCObject, PCLDUFactorisationApply);
00232 #endif
00233 }
00234 
00235 void PCLDUFactorisation::PCLDUFactorisationSetUp()
00236 {
00237     // These options will get read by PCSetFromOptions
00238 //     PetscOptionsSetValue("-pc_hypre_boomeramg_max_iter", "1");
00239 //     PetscOptionsSetValue("-pc_hypre_boomeramg_strong_threshold", "0.0");
00240 //     PetscOptionsSetValue("-pc_hypre_type", "boomeramg");
00241 
00242     /*
00243      * Set up preconditioner for block A11
00244      */
00245     PCCreate(PETSC_COMM_WORLD, &(mPCContext.PC_amg_A11));
00246     PCSetOperators(mPCContext.PC_amg_A11, mPCContext.A11_matrix_subblock, mPCContext.A11_matrix_subblock, SAME_PRECONDITIONER);
00247 
00248     // Choose between the two following blocks in order to approximate inv(A11) with one AMG cycle
00249     // or with an CG solve with high tolerance
00251 //    PCSetType(mPCContext.PC_amg_A11, PCBJACOBI);
00252 
00253     PCSetType(mPCContext.PC_amg_A11, PCHYPRE);
00254     //PCHYPRESetType(mPCContext.PC_amg_A11, "euclid");
00255     PetscOptionsSetValue("-pc_hypre_type", "euclid");
00256     PetscOptionsSetValue("-pc_hypre_euclid_levels", "0");
00257 
00258 //     PCSetType(mPCContext.PC_amg_A11, PCHYPRE);
00259 //     PetscOptionsSetValue("-pc_hypre_type", "boomeramg");
00260 //     PetscOptionsSetValue("-pc_hypre_boomeramg_max_iter", "1");
00261 //     PetscOptionsSetValue("-pc_hypre_boomeramg_strong_threshold", "0.0");
00262 //     PetscOptionsSetValue("-pc_hypre_boomeramg_coarsen_type", "HMIS");
00263 
00265 //    PCSetType(mPCContext.PC_amg_A11, PCKSP);
00266 //    KSP ksp1;
00267 //    PCKSPGetKSP(mPCContext.PC_amg_A11,&ksp1);
00268 //    KSPSetType(ksp1, KSPCG);
00269 //    KSPSetTolerances(ksp1, 0.1, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT);
00270 //
00271 //    PC prec1;
00272 //    KSPGetPC(ksp1, &prec1);
00273 //    PCSetType(prec1, PCBJACOBI);
00274 //    PCSetFromOptions(prec1);
00275 //    PCSetOperators(prec1, mPCContext.A11_matrix_subblock, mPCContext.A11_matrix_subblock, DIFFERENT_NONZERO_PATTERN);//   SAME_PRECONDITIONER);
00276 //    PCSetUp(prec1);
00277 //
00278 //    KSPSetFromOptions(ksp1);
00279 //    KSPSetUp(ksp1);
00281 
00282     PCSetFromOptions(mPCContext.PC_amg_A11);
00283     PCSetUp(mPCContext.PC_amg_A11);
00284 
00285     /*
00286      * Set up amg preconditioner for block A22
00287      */
00288     PCCreate(PETSC_COMM_WORLD, &(mPCContext.PC_amg_A22));
00289     PCSetOperators(mPCContext.PC_amg_A22, mPCContext.A22_matrix_subblock, mPCContext.A22_matrix_subblock, SAME_PRECONDITIONER);
00290 
00291     // Choose between the two following blocks in order to approximate inv(A11) with one AMG cycle
00292     // or with an CG solve with high tolerance
00294     PCSetType(mPCContext.PC_amg_A22, PCHYPRE);
00295     PetscOptionsSetValue("-pc_hypre_type", "boomeramg");
00296     PetscOptionsSetValue("-pc_hypre_boomeramg_max_iter", "1");
00297     PetscOptionsSetValue("-pc_hypre_boomeramg_strong_threshold", "0.0");
00298     PetscOptionsSetValue("-pc_hypre_boomeramg_coarsen_type", "HMIS");
00299     //    PetscOptionsSetValue("-pc_hypre_boomeramg_interp_type","ext+i");
00300 
00302 //    PCSetType(mPCContext.PC_amg_A22, PCKSP);
00303 //    KSP ksp2;
00304 //    PCKSPGetKSP(mPCContext.PC_amg_A22,&ksp2);
00305 //    KSPSetType(ksp2, KSPCG);
00306 //    KSPSetTolerances(ksp2, 0.1, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT);
00307 //
00308 //    PC prec2;
00309 //    KSPGetPC(ksp2, &prec2);
00310 //    PCSetType(prec2, PCBJACOBI);
00311 //    PCSetFromOptions(prec2);
00312 //    PCSetOperators(prec2, mPCContext.A22_matrix_subblock, mPCContext.A22_matrix_subblock, DIFFERENT_NONZERO_PATTERN);//   SAME_PRECONDITIONER);
00313 //    PCSetUp(prec2);
00314 //
00315 //    KSPSetFromOptions(ksp2);
00316 //    KSPSetUp(ksp2);
00318 
00319     PCSetFromOptions(mPCContext.PC_amg_A22);
00320     PCSetUp(mPCContext.PC_amg_A22);
00321 }
00322 #if (PETSC_VERSION_MAJOR == 3 && PETSC_VERSION_MINOR >= 1) //PETSc 3.1 or later
00323 PetscErrorCode PCLDUFactorisationApply(PC pc_object, Vec x, Vec y)
00324 {
00325   void* pc_context;
00326 
00327   PCShellGetContext(pc_object, &pc_context);
00328 #else
00329 PetscErrorCode PCLDUFactorisationApply(void* pc_context, Vec x, Vec y)
00330 {
00331 #endif
00332 
00333 
00334     // Cast the pointer to a PC context to our defined type
00335     PCLDUFactorisation::PCLDUFactorisationContext* block_diag_context = (PCLDUFactorisation::PCLDUFactorisationContext*) pc_context;
00336     assert(block_diag_context!=NULL);
00337 
00338     /*
00339      * Split vector x into two. x = [x1 x2]'
00340      */
00341 #ifdef TRACE_KSP
00342     double init_time = MPI_Wtime();
00343 #endif
00344 
00345     PetscVecTools::DoInterleavedVecScatter(x, block_diag_context->A11_scatter_ctx, block_diag_context->x1_subvector, block_diag_context->A22_scatter_ctx, block_diag_context->x2_subvector);
00346 
00347 #ifdef TRACE_KSP
00348     block_diag_context->mScatterTime += MPI_Wtime() - init_time;
00349 #endif
00350 
00351     /*
00352      * Apply preconditioner: [y1 y2]' = inv(P)[x1 x2]'
00353      *
00354      *    z  = inv(A11)*x1
00355      *    y2 = inv(A22)*(x2 - B*z)
00356      *    y1 = z - inv(A11)(B*y2)
00357      */
00358 #ifdef TRACE_KSP
00359     init_time = MPI_Wtime();
00360 #endif
00361     //z  = inv(A11)*x1
00362     PCApply(block_diag_context->PC_amg_A11, block_diag_context->x1_subvector, block_diag_context->z);
00363 #ifdef TRACE_KSP
00364     block_diag_context->mA1PreconditionerTime += MPI_Wtime() - init_time;
00365 #endif
00366 
00367     //y2 = inv(A22)*(x2 - B*z)
00368 #ifdef TRACE_KSP
00369     init_time = MPI_Wtime();
00370 #endif
00371     MatMult(block_diag_context->B_matrix_subblock,block_diag_context->z,block_diag_context->temp); //temp = B*z
00372     double minus_one = -1.0;
00373 #if (PETSC_VERSION_MAJOR == 2 && PETSC_VERSION_MINOR == 2) //PETSc 2.2
00374     VecAYPX(&minus_one, block_diag_context->x2_subvector, block_diag_context->temp); // temp <-- x2 - temp
00375 #else
00376     VecAYPX(block_diag_context->temp, minus_one, block_diag_context->x2_subvector); // temp <-- x2 - temp
00377 #endif
00378 #ifdef TRACE_KSP
00379     block_diag_context->mExtraLAOperations += MPI_Wtime() - init_time;
00380 #endif
00381 
00382 
00383 #ifdef TRACE_KSP
00384     init_time = MPI_Wtime();
00385 #endif
00386     PCApply(block_diag_context->PC_amg_A22, block_diag_context->temp, block_diag_context->y2_subvector); // y2 = inv(A22)*temp
00387 #ifdef TRACE_KSP
00388     block_diag_context->mA2PreconditionerTime += MPI_Wtime() - init_time;
00389 #endif
00390 
00391     // y1 = z - inv(A11)(B*y2)
00392 #ifdef TRACE_KSP
00393     init_time = MPI_Wtime();
00394 #endif
00395     MatMult(block_diag_context->B_matrix_subblock,block_diag_context->y2_subvector,block_diag_context->temp); //temp = B*y2
00396 #ifdef TRACE_KSP
00397     block_diag_context->mExtraLAOperations += MPI_Wtime() - init_time;
00398 #endif
00399 #ifdef TRACE_KSP
00400     init_time = MPI_Wtime();
00401 #endif
00402     PCApply(block_diag_context->PC_amg_A11, block_diag_context->temp, block_diag_context->y1_subvector); // y1 = inv(A11)*temp
00403 #ifdef TRACE_KSP
00404     block_diag_context->mA1PreconditionerTime += MPI_Wtime() - init_time;
00405 #endif
00406 
00407 #ifdef TRACE_KSP
00408     init_time = MPI_Wtime();
00409 #endif
00410 #if (PETSC_VERSION_MAJOR == 2 && PETSC_VERSION_MINOR == 2) //PETSc 2.2
00411     VecAYPX(&minus_one, block_diag_context->z, block_diag_context->y1_subvector); // y1 <-- z - y1
00412 #else
00413     VecAYPX(block_diag_context->y1_subvector, minus_one, block_diag_context->z); // y1 <-- z - y1
00414 #endif
00415 #ifdef TRACE_KSP
00416     block_diag_context->mExtraLAOperations += MPI_Wtime() - init_time;
00417 #endif
00418 
00419     /*
00420      * Gather vectors y1 and y2. y = [y1 y2]'
00421      */
00422 #ifdef TRACE_KSP
00423     init_time = MPI_Wtime();
00424 #endif
00425 
00426     PetscVecTools::DoInterleavedVecGather(y, block_diag_context->A11_scatter_ctx, block_diag_context->y1_subvector, block_diag_context->A22_scatter_ctx, block_diag_context->y2_subvector);
00427 
00428 #ifdef TRACE_KSP
00429     block_diag_context->mGatherTime += MPI_Wtime() - init_time;
00430 #endif
00431 
00432     return 0;
00433 }