Chaste  Release::2017.1
Hdf5DataReader.cpp
1 /*
2 
3 Copyright (c) 2005-2017, University of Oxford.
4 All rights reserved.
5 
6 University of Oxford means the Chancellor, Masters and Scholars of the
7 University of Oxford, having an administrative office at Wellington
8 Square, Oxford OX1 2JD, UK.
9 
10 This file is part of Chaste.
11 
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14  * Redistributions of source code must retain the above copyright notice,
15  this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright notice,
17  this list of conditions and the following disclaimer in the documentation
18  and/or other materials provided with the distribution.
19  * Neither the name of the University of Oxford nor the names of its
20  contributors may be used to endorse or promote products derived from this
21  software without specific prior written permission.
22 
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 
34 */
35 
36 #include "Hdf5DataReader.hpp"
37 #include "Exception.hpp"
38 #include "OutputFileHandler.hpp"
39 #include "PetscTools.hpp"
40 
41 #include <cassert>
42 #include <algorithm>
43 
44 Hdf5DataReader::Hdf5DataReader(const std::string& rDirectory,
45  const std::string& rBaseName,
46  bool makeAbsolute,
47  std::string datasetName)
48  : AbstractHdf5Access(rDirectory, rBaseName, datasetName, makeAbsolute),
49  mNumberTimesteps(1),
50  mClosed(false)
51 {
53 }
54 
56  const std::string& rBaseName,
57  std::string datasetName)
58  : AbstractHdf5Access(rDirectory, rBaseName, datasetName),
60  mClosed(false)
61 {
63 }
64 
66 {
67  if (!mDirectory.IsDir() || !mDirectory.Exists())
68  {
69  EXCEPTION("Directory does not exist: " + mDirectory.GetAbsolutePath());
70  }
71 
72  std::string file_name = mDirectory.GetAbsolutePath() + mBaseName + ".h5";
73  FileFinder h5_file(file_name, RelativeTo::Absolute);
74 
75  if (!h5_file.Exists())
76  {
77  EXCEPTION("Hdf5DataReader could not open " + file_name + " , as it does not exist.");
78  }
79 
80  // Open the file and the main dataset
81  mFileId = H5Fopen(file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
82 
83  if (mFileId <= 0)
84  {
85  EXCEPTION("Hdf5DataReader could not open " << file_name <<
86  " , H5Fopen error code = " << mFileId);
87  }
88 
89  mVariablesDatasetId = H5Dopen(mFileId, mDatasetName.c_str(), H5P_DEFAULT);
91 
92  if (mVariablesDatasetId <= 0)
93  {
94  H5Fclose(mFileId);
95  EXCEPTION("Hdf5DataReader opened " << file_name << " but could not find the dataset '" <<
96  mDatasetName.c_str() << "', H5Dopen error code = " << mVariablesDatasetId);
97  }
98 
99  hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
100  mVariablesDatasetRank = H5Sget_simple_extent_ndims(variables_dataspace);
101 
102  // Get the dataset/dataspace dimensions
103  hsize_t dataset_max_sizes[AbstractHdf5Access::DATASET_DIMS];
104  H5Sget_simple_extent_dims(variables_dataspace, mDatasetDims, dataset_max_sizes);
105 
106  for (unsigned i=1; i<AbstractHdf5Access::DATASET_DIMS; i++) // Zero is excluded since it may be unlimited
107  {
108  assert(mDatasetDims[i] == dataset_max_sizes[i]);
109  }
110 
111  // Check if an unlimited dimension has been defined
112  if (dataset_max_sizes[0] == H5S_UNLIMITED)
113  {
114  // In terms of an Unlimited dimension dataset:
115  // * Files pre - r16738 (inc. Release 3.1 and earlier) use simply "Time" for "Data"'s unlimited variable.
116  // * Files generated by r16738 - r18257 used "<DatasetName>_Time" for "<DatasetName>"'s unlimited variable,
117  // - These are not to be used and there is no backwards compatibility for them, since they weren't in a release.
118  // * Files post r18257 (inc. Release 3.2 onwards) use "<DatasetName>_Unlimited" for "<DatasetName>"'s
119  // unlimited variable,
120  // - a new attribute "Name" has been added to the Unlimited Dataset to allow it to assign
121  // any name to the unlimited variable. Which can then be easily read by this class.
122  // - if this dataset is missing we look for simply "Time" to remain backwards compatible with Releases <= 3.1
124 
125  hid_t timestep_dataspace = H5Dget_space(mUnlimitedDatasetId);
126 
127  // Get the dataset/dataspace dimensions
128  H5Sget_simple_extent_dims(timestep_dataspace, &mNumberTimesteps, nullptr);
129  }
130 
131  // Get the attribute where the name of the variables are stored
132  hid_t attribute_id = H5Aopen_name(mVariablesDatasetId, "Variable Details");
133 
134  // Get attribute datatype, dataspace, rank, and dimensions
135  hid_t attribute_type = H5Aget_type(attribute_id);
136  hid_t attribute_space = H5Aget_space(attribute_id);
137 
138  hsize_t attr_dataspace_dim;
139  H5Sget_simple_extent_dims(attribute_space, &attr_dataspace_dim, nullptr);
140 
141  unsigned num_columns = H5Sget_simple_extent_npoints(attribute_space);
142  char* string_array = (char *)malloc(sizeof(char)*MAX_STRING_SIZE*(int)num_columns);
143  H5Aread(attribute_id, attribute_type, string_array);
144 
145  // Loop over column names and store them.
146  for (unsigned index=0; index < num_columns; index++)
147  {
148  // Read the string from the raw vector
149  std::string column_name_unit(&string_array[MAX_STRING_SIZE*index]);
150 
151  // Find beginning of unit definition.
152  size_t name_length = column_name_unit.find('(');
153  size_t unit_length = column_name_unit.find(')') - name_length - 1;
154 
155  std::string column_name = column_name_unit.substr(0, name_length);
156  std::string column_unit = column_name_unit.substr(name_length+1, unit_length);
157 
158  mVariableNames.push_back(column_name);
159  mVariableToColumnIndex[column_name] = index;
160  mVariableToUnit[column_name] = column_unit;
161  }
162 
163  // Release all the identifiers
164  H5Tclose(attribute_type);
165  H5Sclose(attribute_space);
166  H5Aclose(attribute_id);
167 
168  // Free allocated memory
169  free(string_array);
170 
171  // Find out if it's incomplete data
172  H5E_BEGIN_TRY //Supress HDF5 error if the IsDataComplete name isn't there
173  {
174  attribute_id = H5Aopen_name(mVariablesDatasetId, "IsDataComplete");
175  }
176  H5E_END_TRY;
177  if (attribute_id < 0)
178  {
179  // This is in the old format (before we added the IsDataComplete attribute).
180  // Just quit (leaving a nasty hdf5 error).
181  return;
182  }
183 
184  attribute_type = H5Aget_type(attribute_id);
185  attribute_space = H5Aget_space(attribute_id);
186  unsigned is_data_complete;
187  H5Aread(attribute_id, H5T_NATIVE_UINT, &is_data_complete);
188 
189  // Release all the identifiers
190  H5Tclose(attribute_type);
191  H5Sclose(attribute_space);
192  H5Aclose(attribute_id);
193  mIsDataComplete = (is_data_complete == 1) ? true : false;
194 
195  if (is_data_complete)
196  {
197  return;
198  }
199 
200  // Incomplete data
201  // Read the vector thing
202  attribute_id = H5Aopen_name(mVariablesDatasetId, "NodeMap");
203  attribute_type = H5Aget_type(attribute_id);
204  attribute_space = H5Aget_space(attribute_id);
205 
206  // Get the dataset/dataspace dimensions
207  unsigned num_node_indices = H5Sget_simple_extent_npoints(attribute_space);
208 
209  // Read data from hyperslab in the file into the hyperslab in memory
210  mIncompleteNodeIndices.clear();
211  mIncompleteNodeIndices.resize(num_node_indices);
212  H5Aread(attribute_id, H5T_NATIVE_UINT, &mIncompleteNodeIndices[0]);
213 
214  H5Tclose(attribute_type);
215  H5Sclose(attribute_space);
216  H5Aclose(attribute_id);
217 }
218 
219 std::vector<double> Hdf5DataReader::GetVariableOverTime(const std::string& rVariableName,
220  unsigned nodeIndex)
221 {
223  {
224  EXCEPTION("The dataset '" << mDatasetName << "' does not contain time dependent data");
225  }
226 
227  unsigned actual_node_index = nodeIndex;
228  if (!mIsDataComplete)
229  {
230  unsigned node_index = 0;
231  for (node_index=0; node_index<mIncompleteNodeIndices.size(); node_index++)
232  {
233  if (mIncompleteNodeIndices[node_index]==nodeIndex)
234  {
235  actual_node_index = node_index;
236  break;
237  }
238  }
239  if (node_index == mIncompleteNodeIndices.size())
240  {
241  EXCEPTION("The incomplete dataset '" << mDatasetName << "' does not contain info of node " << nodeIndex);
242  }
243  }
244  if (actual_node_index >= mDatasetDims[1])
245  {
246  EXCEPTION("The dataset '" << mDatasetName << "' doesn't contain info of node " << actual_node_index);
247  }
248 
249  std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
250  if (col_iter == mVariableToColumnIndex.end())
251  {
252  EXCEPTION("The dataset '" << mDatasetName << "' doesn't contain data for variable " << rVariableName);
253  }
254  unsigned column_index = (*col_iter).second;
255 
256  // Define hyperslab in the dataset.
257  hsize_t offset[3] = {0, actual_node_index, column_index};
258  hsize_t count[3] = {mDatasetDims[0], 1, 1};
259  hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
260  H5Sselect_hyperslab(variables_dataspace, H5S_SELECT_SET, offset, nullptr, count, nullptr);
261 
262  // Define a simple memory dataspace
263  hid_t memspace = H5Screate_simple(1, &mDatasetDims[0] ,nullptr);
264 
265  // Data buffer to return
266  std::vector<double> ret(mDatasetDims[0]);
267 
268  // Read data from hyperslab in the file into the hyperslab in memory
269  H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, variables_dataspace, H5P_DEFAULT, &ret[0]);
270 
271  H5Sclose(variables_dataspace);
272  H5Sclose(memspace);
273 
274  return ret;
275 }
276 
277 std::vector<std::vector<double> > Hdf5DataReader::GetVariableOverTimeOverMultipleNodes(const std::string& rVariableName,
278  unsigned lowerIndex,
279  unsigned upperIndex)
280 {
282  {
283  EXCEPTION("The dataset '" << mDatasetName << "' does not contain time dependent data");
284  }
285 
286  if (!mIsDataComplete)
287  {
288  EXCEPTION("GetVariableOverTimeOverMultipleNodes() cannot be called using incomplete data sets (those for which data was only written for certain nodes)");
289  }
290 
291  if (upperIndex > mDatasetDims[1])
292  {
293  EXCEPTION("The dataset '" << mDatasetName << "' doesn't contain info for node " << upperIndex-1);
294  }
295 
296  std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
297  if (col_iter == mVariableToColumnIndex.end())
298  {
299  EXCEPTION("The dataset '" << mDatasetName << "' doesn't contain data for variable " << rVariableName);
300  }
301  unsigned column_index = (*col_iter).second;
302 
303  // Define hyperslab in the dataset.
304  hsize_t offset[3] = {0, lowerIndex, column_index};
305  hsize_t count[3] = {mDatasetDims[0], upperIndex-lowerIndex, 1};
306  hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
307  H5Sselect_hyperslab(variables_dataspace, H5S_SELECT_SET, offset, nullptr, count, nullptr);
308 
309  // Define a simple memory dataspace
310  hsize_t data_dimensions[2];
311  data_dimensions[0] = mDatasetDims[0];
312  data_dimensions[1] = upperIndex-lowerIndex;
313  hid_t memspace = H5Screate_simple(2, data_dimensions, nullptr);
314 
315  double* data_read = new double[mDatasetDims[0]*(upperIndex-lowerIndex)];
316 
317  // Read data from hyperslab in the file into the hyperslab in memory
318  H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, variables_dataspace, H5P_DEFAULT, data_read);
319 
320  H5Sclose(variables_dataspace);
321  H5Sclose(memspace);
322 
323  // Data buffer to return
324  unsigned num_nodes_read = upperIndex-lowerIndex;
325  unsigned num_timesteps = mDatasetDims[0];
326 
327  std::vector<std::vector<double> > ret(num_nodes_read);
328 
329  for (unsigned node_num=0; node_num<num_nodes_read; node_num++)
330  {
331  ret[node_num].resize(num_timesteps);
332  for (unsigned time_num=0; time_num<num_timesteps; time_num++)
333  {
334  ret[node_num][time_num] = data_read[num_nodes_read*time_num + node_num];
335  }
336  }
337 
338  delete[] data_read;
339 
340  return ret;
341 }
342 
344  const std::string& rVariableName,
345  unsigned timestep)
346 {
347  if (!mIsDataComplete)
348  {
349  EXCEPTION("You can only get a vector for complete data");
350  }
351  if (!mIsUnlimitedDimensionSet && timestep!=0)
352  {
353  EXCEPTION("The dataset '" << mDatasetName << "' does not contain time dependent data");
354  }
355 
356  std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
357  if (col_iter == mVariableToColumnIndex.end())
358  {
359  EXCEPTION("The dataset '" << mDatasetName << "' does not contain data for variable " << rVariableName);
360  }
361  unsigned column_index = (*col_iter).second;
362 
363  // Check for valid timestep
364  if (timestep >= mNumberTimesteps)
365  {
366  EXCEPTION("The dataset '" << mDatasetName << "' does not contain data for timestep number " << timestep);
367  }
368 
369  int lo, hi, size;
370  VecGetSize(data, &size);
371  if ((unsigned)size != mDatasetDims[1])
372  {
373  EXCEPTION("Could not read data because Vec is the wrong size");
374  }
375  // Get range owned by each processor
376  VecGetOwnershipRange(data, &lo, &hi);
377 
378  if (hi > lo) // i.e. we own some...
379  {
380  // Define a dataset in memory for this process
381  hsize_t v_size[1] = {(unsigned)(hi-lo)};
382  hid_t memspace = H5Screate_simple(1, v_size, nullptr);
383 
384  // Select hyperslab in the file.
385  hsize_t offset[3] = {timestep, (unsigned)(lo), column_index};
386  hsize_t count[3] = {1, (unsigned)(hi-lo), 1};
387  hid_t hyperslab_space = H5Dget_space(mVariablesDatasetId);
388  H5Sselect_hyperslab(hyperslab_space, H5S_SELECT_SET, offset, nullptr, count, nullptr);
389 
390  double* p_petsc_vector;
391  VecGetArray(data, &p_petsc_vector);
392 
393  herr_t err = H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, hyperslab_space, H5P_DEFAULT, p_petsc_vector);
394  UNUSED_OPT(err);
395  assert(err==0);
396 
397  VecRestoreArray(data, &p_petsc_vector);
398 
399  H5Sclose(hyperslab_space);
400  H5Sclose(memspace);
401  }
402 }
403 
405 {
406  // Data buffer to return
407  std::vector<double> ret(mNumberTimesteps);
408 
410  {
411  // Fake it
412  assert(mNumberTimesteps==1);
413  ret[0] = 0.0;
414  return ret;
415  }
416 
417  // Read data from hyperslab in the file into memory
418  H5Dread(mUnlimitedDatasetId, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &ret[0]);
419 
420  return ret;
421 }
422 
424 {
425  if (!mClosed)
426  {
427  H5Dclose(mVariablesDatasetId);
429  {
430  H5Dclose(mUnlimitedDatasetId);
431  }
432  H5Fclose(mFileId);
433  mClosed = true;
434  }
435 }
436 
438 {
439  Close();
440 }
441 
443 {
444  return mDatasetDims[1];
445 }
446 
447 std::vector<std::string> Hdf5DataReader::GetVariableNames()
448 {
449  return mVariableNames;
450 }
451 
452 std::string Hdf5DataReader::GetUnit(const std::string& rVariableName)
453 {
454  return mVariableToUnit[rVariableName];
455 }
456 
457 
hsize_t mNumberTimesteps
unsigned GetNumberOfRows()
std::string GetAbsolutePath() const
Definition: FileFinder.cpp:221
#define EXCEPTION(message)
Definition: Exception.hpp:143
std::string GetUnit(const std::string &rVariableName)
void CommonConstructor()
void GetVariableOverNodes(Vec data, const std::string &rVariableName, unsigned timestep=0)
hsize_t mDatasetDims[DATASET_DIMS]
std::map< std::string, unsigned > mVariableToColumnIndex
std::vector< std::vector< double > > GetVariableOverTimeOverMultipleNodes(const std::string &rVariableName, unsigned lowerIndex, unsigned upperIndex)
bool IsDir() const
Definition: FileFinder.cpp:190
std::vector< std::string > mVariableNames
std::vector< unsigned > mIncompleteNodeIndices
std::vector< double > GetUnlimitedDimensionValues()
Hdf5DataReader(const std::string &rDirectory, const std::string &rBaseName, bool makeAbsolute=true, std::string datasetName="Data")
std::vector< double > GetVariableOverTime(const std::string &rVariableName, unsigned nodeIndex)
bool Exists() const
Definition: FileFinder.cpp:180
#define UNUSED_OPT(var)
Definition: Exception.hpp:216
std::map< std::string, std::string > mVariableToUnit
unsigned mVariablesDatasetRank
std::vector< std::string > GetVariableNames()
static const unsigned DATASET_DIMS