Chaste  Release::2018.1
Hdf5DataWriter.hpp
1 /*
2 
3 Copyright (c) 2005-2018, University of Oxford.
4 All rights reserved.
5 
6 University of Oxford means the Chancellor, Masters and Scholars of the
7 University of Oxford, having an administrative office at Wellington
8 Square, Oxford OX1 2JD, UK.
9 
10 This file is part of Chaste.
11 
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14  * Redistributions of source code must retain the above copyright notice,
15  this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright notice,
17  this list of conditions and the following disclaimer in the documentation
18  and/or other materials provided with the distribution.
19  * Neither the name of the University of Oxford nor the names of its
20  contributors may be used to endorse or promote products derived from this
21  software without specific prior written permission.
22 
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 
34 */
35 
36 #ifndef HDF5DATAWRITER_HPP_
37 #define HDF5DATAWRITER_HPP_
38 
39 #include <vector>
40 
41 #include "AbstractHdf5Access.hpp"
42 #include "DataWriterVariable.hpp"
43 #include "DistributedVectorFactory.hpp"
44 
48 class Hdf5DataWriter : public AbstractHdf5Access //: public AbstractDataWriter
49 {
50  friend class TestHdf5DataWriter;
51 private:
52 
55 
56  const bool mCleanDirectory;
57  const bool mUseExistingFile;
64  unsigned mLo;
65  unsigned mHi;
66  unsigned mNumberOwned;
67  unsigned mOffset;
69  bool mNeedExtend;
72  std::vector<DataWriterVariable> mVariables;
74  long unsigned mCurrentTimeStep;
90  bool mUseCache;
91  long unsigned mCacheFirstTimeStep;
92  std::vector<double> mDataCache;
99  void CheckVariableName(const std::string& rName);
100 
106  void CheckUnitsName(const std::string& rName);
107 
112 
118  void OpenFile();
119 
125 
134  void CalculateChunkDims( unsigned targetSize, unsigned* pChunkSizeInBytes, bool* pAllOneChunk );
135 
144  void SetChunkSize();
145 
146 public:
147 
164  const std::string& rDirectory,
165  const std::string& rBaseName,
166  bool cleanDirectory=true,
167  bool extendData=false,
168  std::string datasetName="Data",
169  bool useCache=false);
170 
174  virtual ~Hdf5DataWriter();
175 
181  void DefineFixedDimension(long dimensionSize);
182 
189  void DefineFixedDimension(const std::vector<unsigned>& rNodesToOuput, long vecSize);
190 
199  void DefineUnlimitedDimension(const std::string& rVariableName, const std::string& rVariableUnits, unsigned estimatedLength = 1);
200 
206 
215  int DefineVariable(const std::string& rVariableName, const std::string& rVariableUnits);
216 
226  bool IsInDefineMode();
227 
231  virtual void EndDefineMode();
232 
236  void PossiblyExtend();
237 
244  void EmptyDataset();
245 
252  void PutVector(int variableID, Vec petscVector);
253 
260  void PutStripedVector(std::vector<int> variableIDs, Vec petscVector);
261 
266  bool GetUsingCache();
267 
271  void WriteCache();
272 
278  void PutUnlimitedVariable(double value);
279 
283  void Close();
284 
292  int GetVariableByName(const std::string& rVariableName);
293 
294 
303  bool ApplyPermutation(const std::vector<unsigned>& rPermutation, bool unsafeExtendingMode=false);
304 
312  void DefineFixedDimensionUsingMatrix(const std::vector<unsigned>& rNodesToOuput, long vecSize);
313 
328  void SetFixedChunkSize(const unsigned& rTimestepsPerChunk,
329  const unsigned& rNodesPerChunk,
330  const unsigned& rVariablesPerChunk);
331 
332  /*
333  * * NOTES ON CHUNK SIZE AND ALIGNMENT *
334  *
335  * The default target chunk size is 128 K, which seems to be a good compromise
336  * for small problems (e.g. on a desktop PC). For larger problems, I/O
337  * performance often improves with increased chunk size. A sweet spot seems to
338  * be 1 M chunks.
339  *
340  * On a striped filesystem, for best performance set the chunk size and
341  * alignment (using `H5Pset_alignment` above) to the file stripe size. With
342  * `H5Pset_alignment`, every chunk starts at a multiple of the alignment value.
343  *
344  * To avoid wasting space, the chunk size should be an integer multiple of the
345  * alignment value. Note that the algorithm below automatically goes back one
346  * step after exceeding the chunk size, which minimises wasted space. To see
347  * why, consider the examples below.
348  *
349  * (Example 1) Say our file system uses 1 M stripes. If we set
350  * target_size_in_bytes = 1024*1024;
351  * below and uncomment
352  * H5Pset_alignment(fapl, 0, 1024*1024);
353  * above, i.e. aim for (slightly under) 1 M chunks and align them to 1 M
354  * boundaries, then the algorithm below will get as close as possible to 1 M
355  * chunks but not exceed it, so each chunk will be padded slightly to sit on
356  * the 1 M boundaries. Each chunk will therefore have its own stripe on the
357  * file system, which should give us the best bandwidth and least contention.
358  * Conclusion: this is optimal!
359  *
360  * Note: In general the algorithm can get very close to the target so the
361  * waste isn't bad. Typical utilization is 99.99% (check with "h5ls -v ...").
362  *
363  * (Example 2) We set
364  * target_size_in_bytes = 128*1024;
365  * and uncomment
366  * H5Pset_alignment(fapl, 0, 1024*1024);
367  * i.e. 128 K chunks aligned to 1 M boundaries. This would pad every chunk to
368  * 1 M boundaries, wasting 7/8 of the space in the file! A file which might be
369  * 5 G with an efficient layout would be more like 40 G! Conclusion: setting
370  * the chunk size to less than the alignment value is very bad!
371  *
372  * (Example 3) Say our file system uses 1 M stripes. We set
373  * target_size_in_bytes = 2*1024*1024;
374  * and uncomment
375  * H5Pset_alignment(fapl, 0, 1024*1024);
376  * i.e. 2 M chunks aligned to 1 M boundaries. This might not be optimal, but
377  * it's OK, since the chunk size is (slightly under) twice the alignment, as in
378  * Example 1 the amount of padding would be very small. Each read/write would
379  * require accessing 2 stripes on the file system. Conclusion: a chunk size of
380  * an integer multiple of the alignment value is fine (but not optimal).
381  */
382 
396  void SetTargetChunkSize(hsize_t targetSize);
397 
412  void SetAlignment(hsize_t alignment);
413 };
414 
415 #endif /*HDF5DATAWRITER_HPP_*/
void ComputeIncompleteOffset()
hsize_t CalculateNumberOfChunks()
bool ApplyPermutation(const std::vector< unsigned > &rPermutation, bool unsafeExtendingMode=false)
bool mUseOptimalChunkSizeAlgorithm
void CheckVariableName(const std::string &rName)
const bool mUseExistingFile
int GetVariableByName(const std::string &rVariableName)
hsize_t mChunkTargetSize
void CalculateChunkDims(unsigned targetSize, unsigned *pChunkSizeInBytes, bool *pAllOneChunk)
void DefineFixedDimensionUsingMatrix(const std::vector< unsigned > &rNodesToOuput, long vecSize)
void SetTargetChunkSize(hsize_t targetSize)
bool mUseMatrixForIncompleteData
const bool mCleanDirectory
void SetFixedChunkSize(const unsigned &rTimestepsPerChunk, const unsigned &rNodesPerChunk, const unsigned &rVariablesPerChunk)
unsigned mEstimatedUnlimitedLength
long unsigned mCacheFirstTimeStep
void AdvanceAlongUnlimitedDimension()
DistributedVectorFactory & mrVectorFactory
hsize_t mFixedChunkSize[DATASET_DIMS]
int DefineVariable(const std::string &rVariableName, const std::string &rVariableUnits)
Mat mDoubleIncompleteOutputMatrix
void PutVector(int variableID, Vec petscVector)
unsigned mNumberOwned
std::vector< double > mDataCache
void DefineFixedDimension(long dimensionSize)
Hdf5DataWriter(DistributedVectorFactory &rVectorFactory, const std::string &rDirectory, const std::string &rBaseName, bool cleanDirectory=true, bool extendData=false, std::string datasetName="Data", bool useCache=false)
void SetAlignment(hsize_t alignment)
virtual ~Hdf5DataWriter()
Mat mSingleIncompleteOutputMatrix
void CheckUnitsName(const std::string &rName)
void PutStripedVector(std::vector< int > variableIDs, Vec petscVector)
std::vector< DataWriterVariable > mVariables
void PutUnlimitedVariable(double value)
static const unsigned DATASET_DIMS
hsize_t mNumberOfChunks
unsigned mDataFixedDimensionSize
long unsigned mCurrentTimeStep
hsize_t mChunkSize[DATASET_DIMS]
unsigned mFileFixedDimensionSize
virtual void EndDefineMode()
void DefineUnlimitedDimension(const std::string &rVariableName, const std::string &rVariableUnits, unsigned estimatedLength=1)