Chaste Release::3.1
NodePartitioner.cpp
00001 /*
00002 
00003 Copyright (c) 2005-2012, University of Oxford.
00004 All rights reserved.
00005 
00006 University of Oxford means the Chancellor, Masters and Scholars of the
00007 University of Oxford, having an administrative office at Wellington
00008 Square, Oxford OX1 2JD, UK.
00009 
00010 This file is part of Chaste.
00011 
00012 Redistribution and use in source and binary forms, with or without
00013 modification, are permitted provided that the following conditions are met:
00014  * Redistributions of source code must retain the above copyright notice,
00015    this list of conditions and the following disclaimer.
00016  * Redistributions in binary form must reproduce the above copyright notice,
00017    this list of conditions and the following disclaimer in the documentation
00018    and/or other materials provided with the distribution.
00019  * Neither the name of the University of Oxford nor the names of its
00020    contributors may be used to endorse or promote products derived from this
00021    software without specific prior written permission.
00022 
00023 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00024 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00025 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00026 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
00027 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00028 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
00029 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00030 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00031 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00032 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033 
00034 */
00035 #include <cassert>
00036 #include <algorithm>
00037 
00038 #include "Exception.hpp"
00039 #include "NodePartitioner.hpp"
00040 #include "PetscMatTools.hpp"
00041 #include "PetscTools.hpp"
00042 #include "Timer.hpp"
00043 #include "TrianglesMeshReader.hpp"
00044 #include "Warnings.hpp"
00045 #include "petscao.h"
00046 #include "petscmat.h"
00047 
00048 /*
00049  * The following definition fixes an odd incompatibility of METIS 4.0 and Chaste. Since
00050  * the library was compiled with a plain-C compiler, it fails to link using a C++ compiler.
00051  * Note that METIS 4.0 fails to compile with g++ or icpc, so a C compiler should be used.
00052  *
00053  * Somebody had this problem before: http://www-users.cs.umn.edu/~karypis/.discus/messages/15/113.html?1119486445
00054  *
00055  * Note that it is necessary to define the function header before the #include statement.
00056 */
00057 extern "C" {
00058 extern void METIS_PartMeshNodal(int*, int*, int*, int*, int*, int*, int*, int*, int*);
00059 }
00060 #include <parmetis.h>
00061 
00062 
00063 
00064 template <unsigned ELEMENT_DIM, unsigned SPACE_DIM>
00065 void NodePartitioner<ELEMENT_DIM, SPACE_DIM>::DumbPartitioning(AbstractMesh<ELEMENT_DIM, SPACE_DIM>& rMesh,
00066                                                                std::set<unsigned>& rNodesOwned)
00067 {
00068     //Note that if there is not DistributedVectorFactory in the mesh, then a dumb partition based
00069     //on rMesh.GetNumNodes() is applied automatically.
00070     //If there is a DistributedVectorFactory then that one will be returned
00071     if (rMesh.GetDistributedVectorFactory()->GetProblemSize() != rMesh.GetNumNodes())
00072     {
00073         EXCEPTION("The distributed vector factory size in the mesh doesn't match the total number of nodes.");
00074     }
00075 
00076     for (unsigned node_index = rMesh.GetDistributedVectorFactory()->GetLow();
00077          node_index < rMesh.GetDistributedVectorFactory()->GetHigh();
00078          node_index++)
00079     {
00080          rNodesOwned.insert(node_index);
00081     }
00082 }
00083 
00084 
00085 
00086 template <unsigned ELEMENT_DIM, unsigned SPACE_DIM>
00087 void NodePartitioner<ELEMENT_DIM, SPACE_DIM>::MetisLibraryPartitioning(AbstractMeshReader<ELEMENT_DIM, SPACE_DIM>& rMeshReader,
00088                                                                            std::vector<unsigned>& rNodesPermutation,
00089                                                                            std::set<unsigned>& rNodesOwned,
00090                                                                            std::vector<unsigned>& rProcessorsOffset)
00091 {
00092     assert(PetscTools::IsParallel());
00093 
00094     assert(ELEMENT_DIM==2 || ELEMENT_DIM==3); // Metis works with triangles and tetras
00095 
00096     //Metis 4.0 cannot partition second order meshes, see #1930
00097     TrianglesMeshReader<ELEMENT_DIM, SPACE_DIM>* p_mesh_reader=dynamic_cast<TrianglesMeshReader<ELEMENT_DIM, SPACE_DIM>*>(&rMeshReader);
00098 
00099     unsigned order_of_elements = 1;
00100     if (p_mesh_reader)
00101     {
00102         //A triangles mesh reader will let you read with non-linear elements
00103         order_of_elements = p_mesh_reader->GetOrderOfElements();
00104     }
00105 
00106     // If it is a quadratic TrianglesMeshReader
00107     if (order_of_elements == 2)
00108     {
00109         EXCEPTION("Metis cannot partition a quadratic mesh.");
00110     }
00111 
00112     int nn = rMeshReader.GetNumNodes();
00113     idxtype* npart = new idxtype[nn];
00114     assert(npart != NULL);
00115 
00116     //Only the master process will access the element data and perform the partitioning
00117     if (PetscTools::AmMaster())
00118     {
00119         int ne = rMeshReader.GetNumElements();
00120         idxtype* elmnts = new idxtype[ne * (ELEMENT_DIM+1)];
00121         assert(elmnts != NULL);
00122 
00123         unsigned counter=0;
00124         for (unsigned element_number = 0; element_number < rMeshReader.GetNumElements(); element_number++)
00125         {
00126             ElementData element_data = rMeshReader.GetNextElementData();
00127 
00128             for (unsigned i=0; i<ELEMENT_DIM+1; i++)
00129             {
00130                 elmnts[counter++] = element_data.NodeIndices[i];
00131             }
00132         }
00133         rMeshReader.Reset();
00134 
00135         int etype;
00136 
00137         switch (ELEMENT_DIM)
00138         {
00139             case 2:
00140                 etype = 1; //1 is Metis speak for triangles
00141                 break;
00142             case 3:
00143                 etype = 2; //2 is Metis speak for tetrahedra
00144                 break;
00145             default:
00146                 NEVER_REACHED;
00147         }
00148 
00149         int numflag = 0; //0 means C-style numbering is assumed
00150         int nparts = PetscTools::GetNumProcs();
00151         int edgecut;
00152         idxtype* epart = new idxtype[ne];
00153         assert(epart != NULL);
00154 
00155         Timer::Reset();
00156         METIS_PartMeshNodal(&ne, &nn, elmnts, &etype, &numflag, &nparts, &edgecut, epart, npart);//, wgetflag, vwgt);
00157         //Timer::Print("METIS call");
00158 
00159         delete[] elmnts;
00160         delete[] epart;
00161     }
00162 
00163     //Here's the new bottle-neck: share all the node ownership data
00164     //idxtype is normally int (see metis-4.0/Lib/struct.h 17-22)
00165     assert(sizeof(idxtype) == sizeof(int));
00166     MPI_Bcast(npart /*data*/, nn /*size*/, MPI_INT, 0 /*From Master*/, PETSC_COMM_WORLD);
00167 
00168     assert(rProcessorsOffset.size() == 0); // Making sure the vector is empty. After calling resize() only newly created memory will be initialised to 0.
00169     rProcessorsOffset.resize(PetscTools::GetNumProcs(), 0);
00170 
00171     for (unsigned node_index=0; node_index<rMeshReader.GetNumNodes(); node_index++)
00172     {
00173         unsigned part_read = npart[node_index];
00174 
00175         // METIS output says I own this node
00176         if (part_read == PetscTools::GetMyRank())
00177         {
00178             rNodesOwned.insert(node_index);
00179         }
00180 
00181         // Offset is defined as the first node owned by a processor. We compute it incrementally.
00182         // i.e. if node_index belongs to proc 3 (of 6) we have to shift the processors 4, 5, and 6
00183         // offset a position.
00184         for (unsigned proc=part_read+1; proc<PetscTools::GetNumProcs(); proc++)
00185         {
00186             rProcessorsOffset[proc]++;
00187         }
00188     }
00189 
00190     /*
00191      *  Once we know the offsets we can compute the permutation vector
00192      */
00193     std::vector<unsigned> local_index(PetscTools::GetNumProcs(), 0);
00194 
00195     rNodesPermutation.resize(rMeshReader.GetNumNodes());
00196 
00197     for (unsigned node_index=0; node_index<rMeshReader.GetNumNodes(); node_index++)
00198     {
00199         unsigned part_read = npart[node_index];
00200 
00201         rNodesPermutation[node_index] = rProcessorsOffset[part_read] + local_index[part_read];
00202 
00203         local_index[part_read]++;
00204     }
00205 
00206     delete[] npart;
00207 }
00208 
00209 
00210 template <unsigned ELEMENT_DIM, unsigned SPACE_DIM>
00211 void NodePartitioner<ELEMENT_DIM, SPACE_DIM>::PetscMatrixPartitioning(AbstractMeshReader<ELEMENT_DIM, SPACE_DIM>& rMeshReader,
00212                                               std::vector<unsigned>& rNodesPermutation,
00213                                               std::set<unsigned>& rNodesOwned,
00214                                               std::vector<unsigned>& rProcessorsOffset)
00215 {
00216     assert(PetscTools::IsParallel());
00217     assert(ELEMENT_DIM==2 || ELEMENT_DIM==3); // Metis works with triangles and tetras
00218 
00219     if(!PetscTools::HasParMetis()) //We must have ParMetis support compiled into Petsc
00220     {
00221         #define COVERAGE_IGNORE
00222 
00223         WARNING("Petsc had not been installed with ParMetis support.");
00224         #undef COVERAGE_IGNORE
00225     }
00226 
00227     unsigned num_nodes = rMeshReader.GetNumNodes();
00228     PetscInt num_procs = PetscTools::GetNumProcs();
00229     unsigned local_proc_index = PetscTools::GetMyRank();
00230 
00231     unsigned num_elements = rMeshReader.GetNumElements();
00232     unsigned num_local_elements = num_elements / num_procs;
00233     unsigned first_local_element = num_local_elements * local_proc_index;
00234     if (PetscTools::AmTopMost())
00235     {
00236         // Take the excess elements
00237         num_local_elements += num_elements - (num_local_elements * num_procs);
00238     }
00239 
00240     PetscTools::Barrier();
00241     Timer::Reset();
00242 
00243     /*
00244      * Create PETSc matrix which will have 1 for adjacent nodes.
00245      */
00246     Mat connectivity_matrix;
00248     PetscTools::SetupMat(connectivity_matrix, num_nodes, num_nodes, 54, PETSC_DECIDE, PETSC_DECIDE, false);
00249 
00250     if ( ! rMeshReader.IsFileFormatBinary() )
00251     {
00252         // Advance the file pointer to the first element I own
00253         for (unsigned element_index = 0; element_index < first_local_element; element_index++)
00254         {
00255             ElementData element_data = rMeshReader.GetNextElementData();
00256         }
00257     }
00258 
00259     // In the loop below we assume that there exist edges between any pair of nodes in an element. This is
00260     // a reasonable assumption for triangles and tetrahedra. This won't be the case for quadratic simplices,
00261     // squares or hexahedra (or higher order elements), leading to slightly suboptimal partitions in these
00262     // cases.
00263     // We allow ELEMENT_DIM smaller than SPACE_DIM in case this is a 2D mesh in
00264     // a 3D space.
00265     assert(SPACE_DIM >= ELEMENT_DIM);
00266 
00267     for (unsigned element_index = 0; element_index < num_local_elements; element_index++)
00268     {
00269         ElementData element_data;
00270 
00271         if ( rMeshReader.IsFileFormatBinary() )
00272         {
00273             element_data = rMeshReader.GetElementData(first_local_element + element_index);
00274         }
00275         else
00276         {
00277             element_data = rMeshReader.GetNextElementData();
00278         }
00279 
00280         for (unsigned i=0; i<element_data.NodeIndices.size(); i++)
00281         {
00282             unsigned row = element_data.NodeIndices[i];
00283             for (unsigned j=0; j<i; j++)
00284             {
00285                 unsigned col = element_data.NodeIndices[j];
00286                 MatSetValue(connectivity_matrix, row, col, 1.0, INSERT_VALUES);
00287                 MatSetValue(connectivity_matrix, col, row, 1.0, INSERT_VALUES);
00288             }
00289         }
00290     }
00292     PetscMatTools::Finalise(connectivity_matrix);
00293 
00294     PetscTools::Barrier();
00295     if (PetscTools::AmMaster())
00296     {
00297         Timer::PrintAndReset("Connectivity matrix assembly");
00298     }
00299 
00300     rMeshReader.Reset();
00301 
00302     PetscInt connectivity_matrix_lo;
00303     PetscInt connectivity_matrix_hi;
00304     MatGetOwnershipRange(connectivity_matrix, &connectivity_matrix_lo, &connectivity_matrix_hi);
00305 
00306     unsigned num_local_nodes = connectivity_matrix_hi - connectivity_matrix_lo;
00307 
00308     /* PETSc MatCreateMPIAdj and parMETIS rely on adjacency arrays
00309      * Named here:      xadj            adjncy
00310      * parMETIS name:   xadj            adjncy    (see S4.2 in parMETIS manual)
00311      * PETSc name:      i               j         (see MatCreateMPIAdj in PETSc manual)
00312      *
00313      * The adjacency information of all nodes is listed in the main array, adjncy.  Since each node
00314      * has a variable number of adjacent nodes, the array xadj is used to store the index (in adjncy) where
00315      * this information starts.  Since xadj[i] is the start of node i's information, xadj[i+1] marks the end.
00316      *
00317      *
00318      */
00319     MatInfo matrix_info;
00320     MatGetInfo(connectivity_matrix, MAT_LOCAL, &matrix_info);
00321     unsigned local_num_nz = (unsigned) matrix_info.nz_used;
00322 
00323     size_t size = (num_local_nodes+1)*sizeof(PetscInt);
00324     void* ptr;
00325     PetscMalloc(size, &ptr);
00326     PetscInt* xadj = (PetscInt*) ptr;
00327     size = local_num_nz*sizeof(PetscInt);
00328     PetscMalloc(size, &ptr);
00329     PetscInt* adjncy = (PetscInt*) ptr;
00330 
00331     PetscInt row_num_nz;
00332     const PetscInt* column_indices;
00333 
00334     xadj[0]=0;
00335     for (PetscInt row_global_index=connectivity_matrix_lo; row_global_index<connectivity_matrix_hi; row_global_index++)
00336     {
00337         MatGetRow(connectivity_matrix, row_global_index, &row_num_nz, &column_indices, PETSC_NULL);
00338 
00339         unsigned row_local_index = row_global_index - connectivity_matrix_lo;
00340         xadj[row_local_index+1] = xadj[row_local_index] + row_num_nz;
00341         for (PetscInt col_index=0; col_index<row_num_nz; col_index++)
00342         {
00343            adjncy[xadj[row_local_index] + col_index] =  column_indices[col_index];
00344         }
00345 
00346         MatRestoreRow(connectivity_matrix, row_global_index, &row_num_nz,&column_indices, PETSC_NULL);
00347     }
00348 
00349     PetscTools::Destroy(connectivity_matrix);
00350 
00351     // Convert to an adjacency matrix
00352     Mat adj_matrix;
00353     MatCreateMPIAdj(PETSC_COMM_WORLD, num_local_nodes, num_nodes, xadj, adjncy, PETSC_NULL, &adj_matrix);
00354 
00355     PetscTools::Barrier();
00356     if (PetscTools::AmMaster())
00357     {
00358         Timer::PrintAndReset("Adjacency matrix creation");
00359     }
00360 
00361     // Get PETSc to call ParMETIS
00362     MatPartitioning part;
00363     MatPartitioningCreate(PETSC_COMM_WORLD, &part);
00364     MatPartitioningSetAdjacency(part, adj_matrix);
00365     MatPartitioningSetFromOptions(part);
00366     IS new_process_numbers;
00367 
00368     MatPartitioningApply(part, &new_process_numbers);
00369     MatPartitioningDestroy(PETSC_DESTROY_PARAM(part));
00370 
00372     PetscTools::Destroy(adj_matrix);
00373 
00374     PetscTools::Barrier();
00375     if (PetscTools::AmMaster())
00376     {
00377         Timer::PrintAndReset("PETSc-ParMETIS call");
00378     }
00379 
00380     // Figure out who owns what - processor offsets
00381     PetscInt* num_nodes_per_process = new PetscInt[num_procs];
00382 #if (PETSC_VERSION_MAJOR == 3) //PETSc 3.x.x
00383     ISPartitioningCount(new_process_numbers, num_procs, num_nodes_per_process);
00384 #else
00385     ISPartitioningCount(new_process_numbers, num_nodes_per_process);
00386 #endif
00387 
00388     rProcessorsOffset.resize(num_procs);
00389     rProcessorsOffset[0] = 0;
00390     for (PetscInt i=1; i<num_procs; i++)
00391     {
00392         rProcessorsOffset[i] = rProcessorsOffset[i-1] + num_nodes_per_process[i-1];
00393     }
00394     unsigned my_num_nodes = num_nodes_per_process[local_proc_index];
00395     delete[] num_nodes_per_process;
00396 
00397     // Figure out who owns what - new node numbering
00398     IS new_global_node_indices;
00399     ISPartitioningToNumbering(new_process_numbers, &new_global_node_indices);
00400 
00401     // Index sets only give local information, we want global
00402     AO ordering;
00403     AOCreateBasicIS(new_global_node_indices, PETSC_NULL /* natural ordering */, &ordering);
00404 
00405     // The locally owned range under the new numbering
00406     PetscInt* local_range = new PetscInt[my_num_nodes];
00407     for (unsigned i=0; i<my_num_nodes; i++)
00408     {
00409         local_range[i] = rProcessorsOffset[local_proc_index] + i;
00410     }
00411     AOApplicationToPetsc(ordering, my_num_nodes, local_range);
00412     //AOView(ordering, PETSC_VIEWER_STDOUT_WORLD);
00413 
00414     // Fill in rNodesOwned
00415     rNodesOwned.insert(local_range, local_range + my_num_nodes);
00416     delete[] local_range;
00417 
00418     // Once we know the offsets we can compute the permutation vector
00419     PetscInt* global_range = new PetscInt[num_nodes];
00420     for (unsigned i=0; i<num_nodes; i++)
00421     {
00422         global_range[i] = i;
00423     }
00424     AOPetscToApplication(ordering, num_nodes, global_range);
00425 
00426     rNodesPermutation.resize(num_nodes);
00427     std::copy(global_range, global_range+num_nodes, rNodesPermutation.begin());
00428     delete[] global_range;
00429 
00430     AODestroy(PETSC_DESTROY_PARAM(ordering));
00431     ISDestroy(PETSC_DESTROY_PARAM(new_process_numbers));
00432     ISDestroy(PETSC_DESTROY_PARAM(new_global_node_indices));
00433 
00434     PetscTools::Barrier();
00435     if (PetscTools::AmMaster())
00436     {
00437         Timer::PrintAndReset("PETSc-ParMETIS output manipulation");
00438     }
00439 }
00441 // Explicit instantiation
00443 
00444 template class NodePartitioner<1,1>;
00445 template class NodePartitioner<1,2>;
00446 template class NodePartitioner<1,3>;
00447 template class NodePartitioner<2,2>;
00448 template class NodePartitioner<2,3>;
00449 template class NodePartitioner<3,3>;
00450 
00451 
00452 
00453 
00454