Chaste Release::3.1
Hdf5DataReader.cpp
00001 /*
00002 
00003 Copyright (c) 2005-2012, University of Oxford.
00004 All rights reserved.
00005 
00006 University of Oxford means the Chancellor, Masters and Scholars of the
00007 University of Oxford, having an administrative office at Wellington
00008 Square, Oxford OX1 2JD, UK.
00009 
00010 This file is part of Chaste.
00011 
00012 Redistribution and use in source and binary forms, with or without
00013 modification, are permitted provided that the following conditions are met:
00014  * Redistributions of source code must retain the above copyright notice,
00015    this list of conditions and the following disclaimer.
00016  * Redistributions in binary form must reproduce the above copyright notice,
00017    this list of conditions and the following disclaimer in the documentation
00018    and/or other materials provided with the distribution.
00019  * Neither the name of the University of Oxford nor the names of its
00020    contributors may be used to endorse or promote products derived from this
00021    software without specific prior written permission.
00022 
00023 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00024 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00025 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00026 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
00027 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00028 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
00029 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00030 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00031 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00032 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033 
00034 */
00035 
00036 #include "Hdf5DataReader.hpp"
00037 #include "Exception.hpp"
00038 #include "OutputFileHandler.hpp"
00039 #include "PetscTools.hpp"
00040 
00041 #include <cassert>
00042 #include <algorithm>
00043 
00044 Hdf5DataReader::Hdf5DataReader(const std::string& rDirectory,
00045                                const std::string& rBaseName,
00046                                bool makeAbsolute)
00047     : mBaseName(rBaseName),
00048       mIsUnlimitedDimensionSet(false),
00049       mNumberTimesteps(1),
00050       mIsDataComplete(true),
00051       mClosed(false)
00052 {
00053     RelativeTo::Value relative_to;
00054     if (makeAbsolute)
00055     {
00056         relative_to = RelativeTo::ChasteTestOutput;
00057     }
00058     else
00059     {
00060         relative_to = RelativeTo::Absolute;
00061     }
00062     FileFinder directory(rDirectory, relative_to);
00063     CommonConstructor(directory, rBaseName);
00064 }
00065 
00066 Hdf5DataReader::Hdf5DataReader(const FileFinder& rDirectory,
00067                                const std::string& rBaseName)
00068     : mBaseName(rBaseName),
00069       mIsUnlimitedDimensionSet(false),
00070       mNumberTimesteps(1),
00071       mIsDataComplete(true),
00072       mClosed(false)
00073 {
00074     CommonConstructor(rDirectory, rBaseName);
00075 }
00076 
00077 void Hdf5DataReader::CommonConstructor(const FileFinder& rDirectory, const std::string& rBaseName)
00078 {
00079     std::string results_dir = rDirectory.GetAbsolutePath();
00080     if (!rDirectory.IsDir() || !rDirectory.Exists())
00081     {
00082         EXCEPTION("Directory does not exist: " + results_dir);
00083     }
00084     mDirectory = results_dir;
00085     assert(*(mDirectory.end()-1) == '/'); // paranoia
00086 
00087     std::string file_name = results_dir + mBaseName + ".h5";
00088     FileFinder h5_file(file_name,RelativeTo::Absolute);
00089 
00090     if (!h5_file.Exists())
00091     {
00092         EXCEPTION("Hdf5DataReader could not open " + file_name + " , as it does not exist.");
00093     }
00094 
00095     // Open the file and the main dataset
00096     mFileId = H5Fopen(file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
00097 
00098     if (mFileId <= 0)
00099     {
00100         EXCEPTION("Hdf5DataReader could not open " << file_name <<
00101                   " , H5Fopen error code = " << mFileId);
00102     }
00103 
00104     mVariablesDatasetId = H5Dopen(mFileId, "Data");
00105 
00106     hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
00107     mVariablesDatasetRank = H5Sget_simple_extent_ndims(variables_dataspace);
00108 
00109     // Get the dataset/dataspace dimensions
00110     hsize_t dataset_max_sizes[MAX_DATASET_RANK];
00111     H5Sget_simple_extent_dims(variables_dataspace, mVariablesDatasetSizes, dataset_max_sizes);
00112 
00113     for (unsigned i=1; i<MAX_DATASET_RANK; i++)  // Zero is excluded since it may be unlimited
00114     {
00115         assert(mVariablesDatasetSizes[i] == dataset_max_sizes[i]);
00116     }
00117 
00118     // Check if an unlimited dimension has been defined
00119     if (dataset_max_sizes[0] == H5S_UNLIMITED)
00120     {
00121         mIsUnlimitedDimensionSet = true;
00122         mTimeDatasetId = H5Dopen(mFileId, "Time");
00123 
00124         hid_t timestep_dataspace = H5Dget_space(mTimeDatasetId);
00125 
00126         // Get the dataset/dataspace dimensions
00127         H5Sget_simple_extent_dims(timestep_dataspace, &mNumberTimesteps, NULL);
00128 
00129     }
00130 
00131     // Get the attribute where the name of the variables are stored
00132     hid_t attribute_id = H5Aopen_name(mVariablesDatasetId, "Variable Details");
00133 
00134     // Get attribute datatype, dataspace, rank, and dimensions
00135     hid_t attribute_type  = H5Aget_type(attribute_id);
00136     hid_t attribute_space = H5Aget_space(attribute_id);
00137 
00138     hsize_t attr_dataspace_dim;
00139     H5Sget_simple_extent_dims(attribute_space, &attr_dataspace_dim, NULL);
00140 
00141     unsigned num_columns = H5Sget_simple_extent_npoints(attribute_space);
00142     char* string_array = (char *)malloc(sizeof(char)*MAX_STRING_SIZE*(int)num_columns);
00143     H5Aread(attribute_id, attribute_type, string_array);
00144 
00145     // Loop over column names and store them.
00146     for (unsigned index=0; index < num_columns; index++)
00147     {
00148         // Read the string from the raw vector
00149         std::string column_name_unit(&string_array[MAX_STRING_SIZE*index]);
00150 
00151         // Find beginning of unit definition.
00152         size_t name_length = column_name_unit.find('(');
00153         size_t unit_length = column_name_unit.find(')') - name_length - 1;
00154 
00155         std::string column_name = column_name_unit.substr(0, name_length);
00156         std::string column_unit = column_name_unit.substr(name_length+1, unit_length);
00157 
00158         mVariableNames.push_back(column_name);
00159         mVariableToColumnIndex[column_name] = index;
00160         mVariableToUnit[column_name] = column_unit;
00161     }
00162 
00163     // Release all the identifiers
00164     H5Tclose(attribute_type);
00165     H5Sclose(attribute_space);
00166     H5Aclose(attribute_id);
00167 
00168     // Free allocated memory
00169     free(string_array);
00170 
00171     // Find out if it's incomplete data
00172     attribute_id = H5Aopen_name(mVariablesDatasetId, "IsDataComplete");
00173     if (attribute_id < 0)
00174     {
00175         // This is in the old format (before we added the IsDataComplete attribute).
00176         // Just quit (leaving a nasty hdf5 error).
00177         return;
00178     }
00179 
00180     attribute_type  = H5Aget_type(attribute_id);
00181     attribute_space = H5Aget_space(attribute_id);
00182     unsigned is_data_complete;
00183     H5Aread(attribute_id, H5T_NATIVE_UINT, &is_data_complete);
00184 
00185     // Release all the identifiers
00186     H5Tclose(attribute_type);
00187     H5Sclose(attribute_space);
00188     H5Aclose(attribute_id);
00189     mIsDataComplete = (is_data_complete == 1) ? true : false;
00190 
00191     if (is_data_complete)
00192     {
00193         return;
00194     }
00195 
00196     // Incomplete data
00197     // Read the vector thing
00198     attribute_id = H5Aopen_name(mVariablesDatasetId, "NodeMap");
00199     attribute_type  = H5Aget_type(attribute_id);
00200     attribute_space = H5Aget_space(attribute_id);
00201 
00202     // Get the dataset/dataspace dimensions
00203     unsigned num_node_indices = H5Sget_simple_extent_npoints(attribute_space);
00204 
00205     // Read data from hyperslab in the file into the hyperslab in memory
00206     mIncompleteNodeIndices.clear();
00207     mIncompleteNodeIndices.resize(num_node_indices);
00208     H5Aread(attribute_id, H5T_NATIVE_UINT, &mIncompleteNodeIndices[0]);
00209 
00210     H5Tclose(attribute_type);
00211     H5Sclose(attribute_space);
00212     H5Aclose(attribute_id);
00213 }
00214 
00215 std::vector<double> Hdf5DataReader::GetVariableOverTime(const std::string& rVariableName,
00216                                                         unsigned nodeIndex)
00217 {
00218     if (!mIsUnlimitedDimensionSet)
00219     {
00220         EXCEPTION("The file does not contain time dependent data");
00221     }
00222 
00223     unsigned actual_node_index = nodeIndex;
00224     if (!mIsDataComplete)
00225     {
00226         unsigned node_index = 0;
00227         for (node_index=0; node_index<mIncompleteNodeIndices.size(); node_index++)
00228         {
00229             if (mIncompleteNodeIndices[node_index]==nodeIndex)
00230             {
00231                 actual_node_index = node_index;
00232                 break;
00233             }
00234         }
00235         if ( node_index == mIncompleteNodeIndices.size())
00236         {
00237             EXCEPTION("The incomplete file does not contain info of node " << nodeIndex);
00238         }
00239     }
00240     if (actual_node_index >= mVariablesDatasetSizes[1])
00241     {
00242         EXCEPTION("The file doesn't contain info of node " << actual_node_index);
00243     }
00244 
00245     std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
00246     if (col_iter == mVariableToColumnIndex.end())
00247     {
00248         EXCEPTION("The file doesn't contain data for variable " + rVariableName);
00249     }
00250     int column_index = (*col_iter).second;
00251 
00252     // Define hyperslab in the dataset.
00253     hsize_t offset[3] = {0, actual_node_index, column_index};
00254     hsize_t count[3]  = {mVariablesDatasetSizes[0], 1, 1};
00255     hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
00256     H5Sselect_hyperslab(variables_dataspace, H5S_SELECT_SET, offset, NULL, count, NULL);
00257 
00258     // Define a simple memory dataspace
00259     hid_t memspace = H5Screate_simple(1, &mVariablesDatasetSizes[0] ,NULL);
00260 
00261     // Data buffer to return
00262     std::vector<double> ret(mVariablesDatasetSizes[0]);
00263 
00264     // Read data from hyperslab in the file into the hyperslab in memory
00265     H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, variables_dataspace, H5P_DEFAULT, &ret[0]);
00266 
00267     H5Sclose(variables_dataspace);
00268     H5Sclose(memspace);
00269 
00270     return ret;
00271 }
00272 
00273 std::vector<std::vector<double> > Hdf5DataReader::GetVariableOverTimeOverMultipleNodes(const std::string& rVariableName,
00274                                                                                        unsigned lowerIndex,
00275                                                                                        unsigned upperIndex)
00276 {
00277     if (!mIsUnlimitedDimensionSet)
00278     {
00279         EXCEPTION("The file does not contain time dependent data");
00280     }
00281 
00282     if (!mIsDataComplete)
00283     {
00284         EXCEPTION("GetVariableOverTimeOverMultipleNodes() cannot be called using incomplete data sets (those for which data was only written for certain nodes)");
00285     }
00286 
00287     if (upperIndex > mVariablesDatasetSizes[1])
00288     {
00289        EXCEPTION("The file doesn't contain info for node " << upperIndex-1);
00290     }
00291 
00292     std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
00293     if (col_iter == mVariableToColumnIndex.end())
00294     {
00295         EXCEPTION("The file doesn't contain data for variable " + rVariableName);
00296     }
00297     int column_index = (*col_iter).second;
00298 
00299     // Define hyperslab in the dataset.
00300     hsize_t offset[3] = {0, lowerIndex, column_index};
00301     hsize_t count[3]  = {mVariablesDatasetSizes[0], upperIndex-lowerIndex, 1};
00302     hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
00303     H5Sselect_hyperslab(variables_dataspace, H5S_SELECT_SET, offset, NULL, count, NULL);
00304 
00305     // Define a simple memory dataspace
00306     hsize_t data_dimensions[2];
00307     data_dimensions[0] = mVariablesDatasetSizes[0];
00308     data_dimensions[1] = upperIndex-lowerIndex;
00309     hid_t memspace = H5Screate_simple(2, data_dimensions, NULL);
00310 
00311     double* data_read = new double[mVariablesDatasetSizes[0]*(upperIndex-lowerIndex)];
00312 
00313     // Read data from hyperslab in the file into the hyperslab in memory
00314     H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, variables_dataspace, H5P_DEFAULT, data_read);
00315 
00316     H5Sclose(variables_dataspace);
00317     H5Sclose(memspace);
00318 
00319     // Data buffer to return
00320     unsigned num_nodes_read = upperIndex-lowerIndex;
00321     unsigned num_timesteps = mVariablesDatasetSizes[0];
00322 
00323     std::vector<std::vector<double> > ret(num_nodes_read);
00324 
00325     for (unsigned node_num=0; node_num<num_nodes_read; node_num++)
00326     {
00327         ret[node_num].resize(num_timesteps);
00328         for (unsigned time_num=0; time_num<num_timesteps; time_num++)
00329         {
00330             ret[node_num][time_num] = data_read[num_nodes_read*time_num + node_num];
00331         }
00332     }
00333 
00334     delete[] data_read;
00335 
00336     return ret;
00337 }
00338 
00339 void Hdf5DataReader::GetVariableOverNodes(Vec data,
00340                                           const std::string& rVariableName,
00341                                           unsigned timestep)
00342 {
00343     if (!mIsDataComplete)
00344     {
00345         EXCEPTION("You can only get a vector for complete data");
00346     }
00347     if (!mIsUnlimitedDimensionSet && timestep!=0)
00348     {
00349         EXCEPTION("The file does not contain time dependent data");
00350     }
00351 
00352     std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
00353     if (col_iter == mVariableToColumnIndex.end())
00354     {
00355         EXCEPTION("The file does not contain data for variable " + rVariableName);
00356     }
00357     int column_index = (*col_iter).second;
00358 
00359     // Check for valid timestep
00360     if (timestep >= mNumberTimesteps)
00361     {
00362         EXCEPTION("The file does not contain data for timestep number " << timestep);
00363     }
00364 
00365     int lo, hi, size;
00366     VecGetSize(data, &size);
00367     if ((unsigned)size != mVariablesDatasetSizes[1])
00368     {
00369         EXCEPTION("Could not read data because Vec is the wrong size");
00370     }
00371     // Get range owned by each processor
00372     VecGetOwnershipRange(data, &lo, &hi);
00373 
00374     if (hi > lo) // i.e. we own some...
00375     {
00376         // Define a dataset in memory for this process
00377         hsize_t v_size[1] = {hi-lo};
00378         hid_t memspace = H5Screate_simple(1, v_size, NULL);
00379 
00380         // Select hyperslab in the file.
00381         hsize_t offset[3] = {timestep, lo, column_index};
00382         hsize_t count[3]  = {1, hi-lo, 1};
00383         hid_t hyperslab_space = H5Dget_space(mVariablesDatasetId);
00384         H5Sselect_hyperslab(hyperslab_space, H5S_SELECT_SET, offset, NULL, count, NULL);
00385 
00386         double* p_petsc_vector;
00387         VecGetArray(data, &p_petsc_vector);
00388         herr_t err;
00389         err=H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, hyperslab_space, H5P_DEFAULT, p_petsc_vector);
00390         assert(err==0);
00391         VecRestoreArray(data, &p_petsc_vector);
00392 
00393         H5Sclose(hyperslab_space);
00394         H5Sclose(memspace);
00395     }
00396 }
00397 
00398 std::vector<double> Hdf5DataReader::GetUnlimitedDimensionValues()
00399 {
00400     // Data buffer to return
00401     std::vector<double> ret(mNumberTimesteps);
00402 
00403     if (!mIsUnlimitedDimensionSet)
00404     {
00405         // Fake it
00406         assert(mNumberTimesteps==1);
00407         ret[0] = 0.0;
00408         return ret;
00409     }
00410     // Define hyperslab in the dataset
00411     hid_t time_dataspace = H5Dget_space(mTimeDatasetId);
00412 
00413     // Define a simple memory dataspace
00414     hid_t memspace = H5Screate_simple(1, &mNumberTimesteps ,NULL);
00415 
00416     // Read data from hyperslab in the file into the hyperslab in memory
00417     H5Dread(mTimeDatasetId, H5T_NATIVE_DOUBLE, memspace, time_dataspace, H5P_DEFAULT, &ret[0]);
00418 
00419     H5Sclose(time_dataspace);
00420     H5Sclose(memspace);
00421 
00422     return ret;
00423 }
00424 
00425 void Hdf5DataReader::Close()
00426 {
00427     if (!mClosed)
00428     {
00429         H5Dclose(mVariablesDatasetId);
00430         if (mIsUnlimitedDimensionSet)
00431         {
00432             H5Dclose(mTimeDatasetId);
00433         }
00434         H5Fclose(mFileId);
00435         mClosed = true;
00436     }
00437 }
00438 
00439 Hdf5DataReader::~Hdf5DataReader()
00440 {
00441     Close();
00442 }
00443 
00444 unsigned Hdf5DataReader::GetNumberOfRows()
00445 {
00446     return mVariablesDatasetSizes[1];
00447 }
00448 
00449 std::vector<std::string> Hdf5DataReader::GetVariableNames()
00450 {
00451     return mVariableNames;
00452 }
00453 
00454 std::string Hdf5DataReader::GetUnit(const std::string& rVariableName)
00455 {
00456     return mVariableToUnit[rVariableName];
00457 }
00458 
00459 bool Hdf5DataReader::IsDataComplete()
00460 {
00461     return mIsDataComplete;
00462 }
00463 
00464 std::vector<unsigned> Hdf5DataReader::GetIncompleteNodeMap()
00465 {
00466     return mIncompleteNodeIndices;
00467 }