Hdf5DataReader.cpp

00001 /*
00002 
00003 Copyright (C) University of Oxford, 2005-2011
00004 
00005 University of Oxford means the Chancellor, Masters and Scholars of the
00006 University of Oxford, having an administrative office at Wellington
00007 Square, Oxford OX1 2JD, UK.
00008 
00009 This file is part of Chaste.
00010 
00011 Chaste is free software: you can redistribute it and/or modify it
00012 under the terms of the GNU Lesser General Public License as published
00013 by the Free Software Foundation, either version 2.1 of the License, or
00014 (at your option) any later version.
00015 
00016 Chaste is distributed in the hope that it will be useful, but WITHOUT
00017 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00018 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
00019 License for more details. The offer of Chaste under the terms of the
00020 License is subject to the License being interpreted in accordance with
00021 English Law and subject to any action against the University of Oxford
00022 being under the jurisdiction of the English Courts.
00023 
00024 You should have received a copy of the GNU Lesser General Public License
00025 along with Chaste. If not, see <http://www.gnu.org/licenses/>.
00026 
00027 */
00028 
00029 #include "Hdf5DataReader.hpp"
00030 #include "Exception.hpp"
00031 #include "OutputFileHandler.hpp"
00032 #include "PetscTools.hpp"
00033 
00034 #include <cassert>
00035 #include <algorithm>
00036 
00037 Hdf5DataReader::Hdf5DataReader(const std::string& rDirectory,
00038                                const std::string& rBaseName,
00039                                bool makeAbsolute)
00040     : mBaseName(rBaseName),
00041       mIsUnlimitedDimensionSet(false),
00042       mNumberTimesteps(1),
00043       mIsDataComplete(true),
00044       mClosed(false)
00045 {
00046     RelativeTo::Value relative_to;
00047     if (makeAbsolute)
00048     {
00049         relative_to = RelativeTo::ChasteTestOutput;
00050     }
00051     else
00052     {
00053         relative_to = RelativeTo::Absolute;
00054     }
00055     FileFinder directory(rDirectory, relative_to);
00056     CommonConstructor(directory, rBaseName);
00057 }
00058 
00059 Hdf5DataReader::Hdf5DataReader(const FileFinder& rDirectory,
00060                                const std::string& rBaseName)
00061     : mBaseName(rBaseName),
00062       mIsUnlimitedDimensionSet(false),
00063       mNumberTimesteps(1),
00064       mIsDataComplete(true),
00065       mClosed(false)
00066 {
00067     CommonConstructor(rDirectory, rBaseName);
00068 }
00069 
00070 void Hdf5DataReader::CommonConstructor(const FileFinder& rDirectory, const std::string& rBaseName)
00071 {
00072     std::string results_dir = rDirectory.GetAbsolutePath();
00073     if (!rDirectory.IsDir() || !rDirectory.Exists())
00074     {
00075         EXCEPTION("Directory does not exist: " + results_dir);
00076     }
00077     mDirectory = results_dir;
00078     assert(*(mDirectory.end()-1) == '/'); // paranoia
00079 
00080     std::string file_name = results_dir + mBaseName + ".h5";
00081 
00082     // Open the file and the main dataset
00083     mFileId = H5Fopen(file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
00084 
00085     if (mFileId <= 0)
00086     {
00087         EXCEPTION("Hdf5DataReader could not open " + file_name);
00088     }
00089     mVariablesDatasetId = H5Dopen(mFileId, "Data");
00090 
00091     hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
00092     mVariablesDatasetRank = H5Sget_simple_extent_ndims(variables_dataspace);
00093 
00094     // Get the dataset/dataspace dimensions
00095     hsize_t dataset_max_sizes[MAX_DATASET_RANK];
00096     H5Sget_simple_extent_dims(variables_dataspace, mVariablesDatasetSizes, dataset_max_sizes);
00097 
00098     for (unsigned i=1; i<MAX_DATASET_RANK; i++)  // Zero is excluded since it may be unlimited
00099     {
00100         assert(mVariablesDatasetSizes[i] == dataset_max_sizes[i]);
00101     }
00102 
00103     // Check if an unlimited dimension has been defined
00104     if (dataset_max_sizes[0] == H5S_UNLIMITED)
00105     {
00106         mIsUnlimitedDimensionSet = true;
00107         mTimeDatasetId = H5Dopen(mFileId, "Time");
00108 
00109         hid_t timestep_dataspace = H5Dget_space(mTimeDatasetId);
00110 
00111         // Get the dataset/dataspace dimensions
00112         H5Sget_simple_extent_dims(timestep_dataspace, &mNumberTimesteps, NULL);
00113 
00114     }
00115 
00116     // Get the attribute where the name of the variables are stored
00117     hid_t attribute_id = H5Aopen_name(mVariablesDatasetId, "Variable Details");
00118 
00119     // Get attribute datatype, dataspace, rank, and dimensions
00120     hid_t attribute_type  = H5Aget_type(attribute_id);
00121     hid_t attribute_space = H5Aget_space(attribute_id);
00122 
00123     hsize_t attr_dataspace_dim;
00124     H5Sget_simple_extent_dims(attribute_space, &attr_dataspace_dim, NULL);
00125 
00126     unsigned num_columns = H5Sget_simple_extent_npoints(attribute_space);
00127     char* string_array = (char *)malloc(sizeof(char)*MAX_STRING_SIZE*(int)num_columns);
00128     H5Aread(attribute_id, attribute_type, string_array);
00129 
00130     // Loop over column names and store them.
00131     for (unsigned index=0; index < num_columns; index++)
00132     {
00133         // Read the string from the raw vector
00134         std::string column_name_unit(&string_array[MAX_STRING_SIZE*index]);
00135 
00136         // Find beginning of unit definition.
00137         size_t name_length = column_name_unit.find('(');
00138         size_t unit_length = column_name_unit.find(')') - name_length - 1;
00139 
00140         std::string column_name = column_name_unit.substr(0, name_length);
00141         std::string column_unit = column_name_unit.substr(name_length+1, unit_length);
00142 
00143         mVariableNames.push_back(column_name);
00144         mVariableToColumnIndex[column_name] = index;
00145         mVariableToUnit[column_name] = column_unit;
00146     }
00147 
00148     // Release all the identifiers
00149     H5Tclose(attribute_type);
00150     H5Sclose(attribute_space);
00151     H5Aclose(attribute_id);
00152 
00153     // Free allocated memory
00154     free(string_array);
00155 
00156     // Find out if it's incomplete data
00157     attribute_id = H5Aopen_name(mVariablesDatasetId, "IsDataComplete");
00158     if (attribute_id < 0)
00159     {
00160         // This is in the old format (before we added the IsDataComplete attribute).
00161         // Just quit (leaving a nasty hdf5 error).
00162         return;
00163     }
00164 
00165     attribute_type  = H5Aget_type(attribute_id);
00166     attribute_space = H5Aget_space(attribute_id);
00167     unsigned is_data_complete;
00168     H5Aread(attribute_id, H5T_NATIVE_UINT, &is_data_complete);
00169 
00170     // Release all the identifiers
00171     H5Tclose(attribute_type);
00172     H5Sclose(attribute_space);
00173     H5Aclose(attribute_id);
00174     mIsDataComplete = (is_data_complete == 1) ? true : false;
00175 
00176     if (is_data_complete)
00177     {
00178         return;
00179     }
00180 
00181     // Incomplete data
00182     // Read the vector thing
00183     attribute_id = H5Aopen_name(mVariablesDatasetId, "NodeMap");
00184     attribute_type  = H5Aget_type(attribute_id);
00185     attribute_space = H5Aget_space(attribute_id);
00186 
00187     // Get the dataset/dataspace dimensions
00188     unsigned num_node_indices = H5Sget_simple_extent_npoints(attribute_space);
00189 
00190     // Read data from hyperslab in the file into the hyperslab in memory
00191     mIncompleteNodeIndices.clear();
00192     mIncompleteNodeIndices.resize(num_node_indices);
00193     H5Aread(attribute_id, H5T_NATIVE_UINT, &mIncompleteNodeIndices[0]);
00194 
00195     H5Tclose(attribute_type);
00196     H5Sclose(attribute_space);
00197     H5Aclose(attribute_id);
00198 }
00199 
00200 std::vector<double> Hdf5DataReader::GetVariableOverTime(const std::string& rVariableName,
00201                                                         unsigned nodeIndex)
00202 {
00203     if (!mIsUnlimitedDimensionSet)
00204     {
00205         EXCEPTION("The file does not contain time dependent data");
00206     }
00207 
00208     unsigned actual_node_index = nodeIndex;
00209     if (!mIsDataComplete)
00210     {
00211         unsigned node_index = 0;
00212         for (node_index=0; node_index<mIncompleteNodeIndices.size(); node_index++)
00213         {
00214             if (mIncompleteNodeIndices[node_index]==nodeIndex)
00215             {
00216                 actual_node_index = node_index;
00217                 break;
00218             }
00219         }
00220         if ( node_index == mIncompleteNodeIndices.size())
00221         {
00222             EXCEPTION("The incomplete file does not contain info of node " << nodeIndex);
00223         }
00224     }
00225     if (actual_node_index >= mVariablesDatasetSizes[1])
00226     {
00227         EXCEPTION("The file doesn't contain info of node " << actual_node_index);
00228     }
00229 
00230     std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
00231     if (col_iter == mVariableToColumnIndex.end())
00232     {
00233         EXCEPTION("The file doesn't contain data for variable " + rVariableName);
00234     }
00235     int column_index = (*col_iter).second;
00236 
00237     // Define hyperslab in the dataset.
00238     hsize_t offset[3] = {0, actual_node_index, column_index};
00239     hsize_t count[3]  = {mVariablesDatasetSizes[0], 1, 1};
00240     hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
00241     H5Sselect_hyperslab(variables_dataspace, H5S_SELECT_SET, offset, NULL, count, NULL);
00242 
00243     // Define a simple memory dataspace
00244     hid_t memspace = H5Screate_simple(1, &mVariablesDatasetSizes[0] ,NULL);
00245 
00246     // Data buffer to return
00247     std::vector<double> ret(mVariablesDatasetSizes[0]);
00248 
00249     // Read data from hyperslab in the file into the hyperslab in memory
00250     H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, variables_dataspace, H5P_DEFAULT, &ret[0]);
00251 
00252     H5Sclose(variables_dataspace);
00253     H5Sclose(memspace);
00254 
00255     return ret;
00256 }
00257 
00258 std::vector<std::vector<double> > Hdf5DataReader::GetVariableOverTimeOverMultipleNodes(const std::string& rVariableName,
00259                                                                                        unsigned lowerIndex,
00260                                                                                        unsigned upperIndex)
00261 {
00262     if (!mIsUnlimitedDimensionSet)
00263     {
00264         EXCEPTION("The file does not contain time dependent data");
00265     }
00266 
00267     if (!mIsDataComplete)
00268     {
00269         EXCEPTION("GetVariableOverTimeOverMultipleNodes() cannot be called using incomplete data sets (those for which data was only written for certain nodes)");
00270     }
00271 
00272     if (upperIndex > mVariablesDatasetSizes[1])
00273     {
00274        EXCEPTION("The file doesn't contain info for node " << upperIndex-1);
00275     }
00276 
00277     std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
00278     if (col_iter == mVariableToColumnIndex.end())
00279     {
00280         EXCEPTION("The file doesn't contain data for variable " + rVariableName);
00281     }
00282     int column_index = (*col_iter).second;
00283 
00284     // Define hyperslab in the dataset.
00285     hsize_t offset[3] = {0, lowerIndex, column_index};
00286     hsize_t count[3]  = {mVariablesDatasetSizes[0], upperIndex-lowerIndex, 1};
00287     hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
00288     H5Sselect_hyperslab(variables_dataspace, H5S_SELECT_SET, offset, NULL, count, NULL);
00289 
00290     // Define a simple memory dataspace
00291     hsize_t data_dimensions[2];
00292     data_dimensions[0] = mVariablesDatasetSizes[0];
00293     data_dimensions[1] = upperIndex-lowerIndex;
00294     hid_t memspace = H5Screate_simple(2, data_dimensions, NULL);
00295 
00296     double* data_read = new double[mVariablesDatasetSizes[0]*(upperIndex-lowerIndex)];
00297 
00298     // Read data from hyperslab in the file into the hyperslab in memory
00299     H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, variables_dataspace, H5P_DEFAULT, data_read);
00300 
00301     H5Sclose(variables_dataspace);
00302     H5Sclose(memspace);
00303 
00304     // Data buffer to return
00305     unsigned num_nodes_read = upperIndex-lowerIndex;
00306     unsigned num_timesteps = mVariablesDatasetSizes[0];
00307 
00308     std::vector<std::vector<double> > ret(num_nodes_read);
00309 
00310     for (unsigned node_num=0; node_num<num_nodes_read; node_num++)
00311     {
00312         ret[node_num].resize(num_timesteps);
00313         for (unsigned time_num=0; time_num<num_timesteps; time_num++)
00314         {
00315             ret[node_num][time_num] = data_read[num_nodes_read*time_num + node_num];
00316         }
00317     }
00318 
00319     delete[] data_read;
00320 
00321     return ret;
00322 }
00323 
00324 void Hdf5DataReader::GetVariableOverNodes(Vec data,
00325                                           const std::string& rVariableName,
00326                                           unsigned timestep)
00327 {
00328     if (!mIsDataComplete)
00329     {
00330         EXCEPTION("You can only get a vector for complete data");
00331     }
00332     if (!mIsUnlimitedDimensionSet && timestep!=0)
00333     {
00334         EXCEPTION("The file does not contain time dependent data");
00335     }
00336 
00337     std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
00338     if (col_iter == mVariableToColumnIndex.end())
00339     {
00340         EXCEPTION("The file does not contain data for variable " + rVariableName);
00341     }
00342     int column_index = (*col_iter).second;
00343 
00344     // Check for valid timestep
00345     if (timestep >= mNumberTimesteps)
00346     {
00347         EXCEPTION("The file does not contain data for timestep number " << timestep);
00348     }
00349 
00350     int lo, hi, size;
00351     VecGetSize(data, &size);
00352     if ((unsigned)size != mVariablesDatasetSizes[1])
00353     {
00354         EXCEPTION("Could not read data because Vec is the wrong size");
00355     }
00356     // Get range owned by each processor
00357     VecGetOwnershipRange(data, &lo, &hi);
00358 
00359     if (hi > lo) // i.e. we own some...
00360     {
00361         // Define a dataset in memory for this process
00362         hsize_t v_size[1] = {hi-lo};
00363         hid_t memspace = H5Screate_simple(1, v_size, NULL);
00364 
00365         // Select hyperslab in the file.
00366         hsize_t offset[3] = {timestep, lo, column_index};
00367         hsize_t count[3]  = {1, hi-lo, 1};
00368         hid_t hyperslab_space = H5Dget_space(mVariablesDatasetId);
00369         H5Sselect_hyperslab(hyperslab_space, H5S_SELECT_SET, offset, NULL, count, NULL);
00370 
00371         double* p_petsc_vector;
00372         VecGetArray(data, &p_petsc_vector);
00373         herr_t err;
00374         err=H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, hyperslab_space, H5P_DEFAULT, p_petsc_vector);
00375         assert(err==0);
00376         VecRestoreArray(data, &p_petsc_vector);
00377 
00378         H5Sclose(hyperslab_space);
00379         H5Sclose(memspace);
00380     }
00381 }
00382 
00383 std::vector<double> Hdf5DataReader::GetUnlimitedDimensionValues()
00384 {
00385     // Data buffer to return
00386     std::vector<double> ret(mNumberTimesteps);
00387 
00388     if (!mIsUnlimitedDimensionSet)
00389     {
00390         // Fake it
00391         assert(mNumberTimesteps==1);
00392         ret[0] = 0.0;
00393         return ret;
00394     }
00395     // Define hyperslab in the dataset
00396     hid_t time_dataspace = H5Dget_space(mTimeDatasetId);
00397 
00398     // Define a simple memory dataspace
00399     hid_t memspace = H5Screate_simple(1, &mNumberTimesteps ,NULL);
00400 
00401     // Read data from hyperslab in the file into the hyperslab in memory
00402     H5Dread(mTimeDatasetId, H5T_NATIVE_DOUBLE, memspace, time_dataspace, H5P_DEFAULT, &ret[0]);
00403 
00404     H5Sclose(time_dataspace);
00405     H5Sclose(memspace);
00406 
00407     return ret;
00408 }
00409 
00410 void Hdf5DataReader::Close()
00411 {
00412     if (!mClosed)
00413     {
00414         H5Dclose(mVariablesDatasetId);
00415         if (mIsUnlimitedDimensionSet)
00416         {
00417             H5Dclose(mTimeDatasetId);
00418         }
00419         H5Fclose(mFileId);
00420         mClosed = true;
00421     }
00422 }
00423 
00424 Hdf5DataReader::~Hdf5DataReader()
00425 {
00426     Close();
00427 }
00428 
00429 unsigned Hdf5DataReader::GetNumberOfRows()
00430 {
00431     return mVariablesDatasetSizes[1];
00432 }
00433 
00434 std::vector<std::string> Hdf5DataReader::GetVariableNames()
00435 {
00436     return mVariableNames;
00437 }
00438 
00439 std::string Hdf5DataReader::GetUnit(const std::string& rVariableName)
00440 {
00441     return mVariableToUnit[rVariableName];
00442 }
00443 
00444 bool Hdf5DataReader::IsDataComplete()
00445 {
00446     return mIsDataComplete;
00447 }
00448 
00449 std::vector<unsigned> Hdf5DataReader::GetIncompleteNodeMap()
00450 {
00451     return mIncompleteNodeIndices;
00452 }
Generated on Thu Dec 22 13:00:07 2011 for Chaste by  doxygen 1.6.3