Hdf5DataReader.cpp

00001 /*
00002 
00003 Copyright (C) University of Oxford, 2005-2010
00004 
00005 University of Oxford means the Chancellor, Masters and Scholars of the
00006 University of Oxford, having an administrative office at Wellington
00007 Square, Oxford OX1 2JD, UK.
00008 
00009 This file is part of Chaste.
00010 
00011 Chaste is free software: you can redistribute it and/or modify it
00012 under the terms of the GNU Lesser General Public License as published
00013 by the Free Software Foundation, either version 2.1 of the License, or
00014 (at your option) any later version.
00015 
00016 Chaste is distributed in the hope that it will be useful, but WITHOUT
00017 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00018 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
00019 License for more details. The offer of Chaste under the terms of the
00020 License is subject to the License being interpreted in accordance with
00021 English Law and subject to any action against the University of Oxford
00022 being under the jurisdiction of the English Courts.
00023 
00024 You should have received a copy of the GNU Lesser General Public License
00025 along with Chaste. If not, see <http://www.gnu.org/licenses/>.
00026 
00027 */
00028 #include "Hdf5DataReader.hpp"
00029 #include "Exception.hpp"
00030 #include "OutputFileHandler.hpp"
00031 
00032 #include <cassert>
00033 
00034 
00035 Hdf5DataReader::Hdf5DataReader(const std::string& rDirectory,
00036                                const std::string& rBaseName,
00037                                bool makeAbsolute)
00038     : mDirectory(rDirectory),
00039       mBaseName(rBaseName),
00040       mIsUnlimitedDimensionSet(false),
00041       mNumberTimesteps(1),
00042       mIsDataComplete(true)
00043 {
00044     std::string results_dir;
00045 
00046     // Find out where files are really stored
00047     if (makeAbsolute)
00048     {
00049         OutputFileHandler output_file_handler(rDirectory, false);
00050         results_dir = output_file_handler.GetOutputDirectoryFullPath();
00051     }
00052     else
00053     {
00054         results_dir = rDirectory;
00055 
00056         // Add a trailing slash if needed
00057         if ( !(*(rDirectory.end()-1) == '/'))
00058         {
00059             results_dir = results_dir + "/";
00060         }
00061     }
00062 
00063     std::string file_name = results_dir + mBaseName + ".h5";
00064 
00065     // Open the file and the main dataset
00066     mFileId = H5Fopen(file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
00067 
00068     if (mFileId <= 0)
00069     {
00070         EXCEPTION("Hdf5DataReader could not open " + file_name);
00071     }
00072     mVariablesDatasetId = H5Dopen(mFileId, "Data");
00073 
00074     hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
00075     mVariablesDatasetRank = H5Sget_simple_extent_ndims(variables_dataspace);
00076 
00077     // Get the dataset/dataspace dimensions
00078     hsize_t dataset_max_sizes[MAX_DATASET_RANK];
00079     H5Sget_simple_extent_dims(variables_dataspace, mVariablesDatasetSizes, dataset_max_sizes);
00080 
00081     for (unsigned i=1; i<MAX_DATASET_RANK; i++)  // Zero is excluded since it may be unlimited
00082     {
00083         assert(mVariablesDatasetSizes[i] == dataset_max_sizes[i]);
00084     }
00085 
00086     // Check if an unlimited dimension has been defined
00087     if (dataset_max_sizes[0] == H5S_UNLIMITED)
00088     {
00089         mIsUnlimitedDimensionSet = true;
00090         mTimeDatasetId = H5Dopen(mFileId, "Time");
00091 
00092         hid_t timestep_dataspace = H5Dget_space(mTimeDatasetId);
00093 
00094         // Get the dataset/dataspace dimensions
00095         H5Sget_simple_extent_dims(timestep_dataspace, &mNumberTimesteps, NULL);
00096 
00097     }
00098 
00099     // Get the attribute where the name of the variables are stored
00100     hid_t attribute_id = H5Aopen_name(mVariablesDatasetId, "Variable Details");
00101 
00102     // Get attribute datatype, dataspace, rank, and dimensions
00103     hid_t attribute_type  = H5Aget_type(attribute_id);
00104     hid_t attribute_space = H5Aget_space(attribute_id);
00105 
00106     hsize_t attr_dataspace_dim;
00107     H5Sget_simple_extent_dims(attribute_space, &attr_dataspace_dim, NULL);
00108 
00109     unsigned num_columns = H5Sget_simple_extent_npoints(attribute_space);
00110     char* string_array = (char *)malloc(sizeof(char)*MAX_STRING_SIZE*(int)num_columns);
00111     H5Aread(attribute_id, attribute_type, string_array);
00112 
00113     // Loop over column names and store them.
00114     for (unsigned index=0; index < num_columns; index++)
00115     {
00116         // Read the string from the raw vector
00117         std::string column_name_unit(&string_array[MAX_STRING_SIZE*index]);
00118 
00119         // Find beginning of unit definition.
00120         size_t name_length = column_name_unit.find('(');
00121         size_t unit_length = column_name_unit.find(')') - name_length - 1;
00122 
00123         std::string column_name = column_name_unit.substr(0, name_length);
00124         std::string column_unit = column_name_unit.substr(name_length+1, unit_length);
00125 
00126         mVariableNames.push_back(column_name);
00127         mVariableToColumnIndex[column_name] = index;
00128         mVariableToUnit[column_name] = column_unit;
00129     }
00130 
00131     // Release all the identifiers
00132     H5Tclose(attribute_type);
00133     H5Sclose(attribute_space);
00134     H5Aclose(attribute_id);
00135 
00136     // Free allocated memory
00137     free(string_array);
00138 
00139     // Find out if it's incomplete data
00140     attribute_id = H5Aopen_name(mVariablesDatasetId, "IsDataComplete");
00141     if (attribute_id < 0)
00142     {
00143         // This is in the old format (before we added the IsDataComplete attribute).
00144         // Just quit (leaving a nasty hdf5 error).
00145         return;
00146     }
00147 
00148     attribute_type  = H5Aget_type(attribute_id);
00149     attribute_space = H5Aget_space(attribute_id);
00150     unsigned is_data_complete;
00151     H5Aread(attribute_id, H5T_NATIVE_UINT, &is_data_complete);
00152 
00153     // Release all the identifiers
00154     H5Tclose(attribute_type);
00155     H5Sclose(attribute_space);
00156     H5Aclose(attribute_id);
00157     mIsDataComplete = (is_data_complete == 1) ? true : false;
00158 
00159     if (is_data_complete)
00160     {
00161         return;
00162     }
00163 
00164     // Incomplete data
00165     // Read the vector thing
00166     attribute_id = H5Aopen_name(mVariablesDatasetId, "NodeMap");
00167     attribute_type  = H5Aget_type(attribute_id);
00168     attribute_space = H5Aget_space(attribute_id);
00169 
00170     // Get the dataset/dataspace dimensions
00171     unsigned num_node_indices = H5Sget_simple_extent_npoints(attribute_space);
00172 
00173     // Read data from hyperslab in the file into the hyperslab in memory
00174     mIncompleteNodeIndices.clear();
00175     mIncompleteNodeIndices.resize(num_node_indices);
00176     H5Aread(attribute_id, H5T_NATIVE_UINT, &mIncompleteNodeIndices[0]);
00177 
00178     H5Tclose(attribute_type);
00179     H5Sclose(attribute_space);
00180     H5Aclose(attribute_id);
00181 }
00182 
00183 std::vector<double> Hdf5DataReader::GetVariableOverTime(const std::string& rVariableName,
00184                                                         unsigned nodeIndex)
00185 {
00186     if (!mIsUnlimitedDimensionSet)
00187     {
00188         EXCEPTION("The file does not contain time dependent data");
00189     }
00190 
00191     unsigned actual_node_index = nodeIndex;
00192     if (!mIsDataComplete)
00193     {
00194         unsigned node_index = 0;
00195         for (node_index=0; node_index<mIncompleteNodeIndices.size(); node_index++)
00196         {
00197             if (mIncompleteNodeIndices[node_index]==nodeIndex)
00198             {
00199                 actual_node_index = node_index;
00200                 break;
00201             }
00202         }
00203         if ( node_index == mIncompleteNodeIndices.size())
00204         {
00205             std::stringstream ss;
00206             ss << "The incomplete file does not contain info of node " << nodeIndex ;
00207             EXCEPTION(ss.str());
00208         }
00209     }
00210     if (actual_node_index >= mVariablesDatasetSizes[1])
00211     {
00212         std::stringstream ss;
00213         ss << "The file doesn't contain info of node " << actual_node_index ;
00214         EXCEPTION(ss.str());
00215     }
00216 
00217     std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
00218     if (col_iter == mVariableToColumnIndex.end())
00219     {
00220         EXCEPTION("The file doesn't contain data for variable " + rVariableName);
00221     }
00222     int column_index = (*col_iter).second;
00223 
00224     // Define hyperslab in the dataset.
00225     hsize_t offset[3] = {0, actual_node_index, column_index};
00226     hsize_t count[3]  = {mVariablesDatasetSizes[0], 1, 1};
00227     hid_t variables_dataspace = H5Dget_space(mVariablesDatasetId);
00228     H5Sselect_hyperslab(variables_dataspace, H5S_SELECT_SET, offset, NULL, count, NULL);
00229 
00230     // Define a simple memory dataspace
00231     hid_t memspace = H5Screate_simple(1, &mVariablesDatasetSizes[0] ,NULL);
00232 
00233     // Data buffer to return
00234     std::vector<double> ret(mVariablesDatasetSizes[0]);
00235 
00236     // Read data from hyperslab in the file into the hyperslab in memory
00237     H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, variables_dataspace, H5P_DEFAULT, &ret[0]);
00238 
00239     H5Sclose(variables_dataspace);
00240     H5Sclose(memspace);
00241 
00242     return ret;
00243 }
00244 
00245 void Hdf5DataReader::GetVariableOverNodes(Vec data,
00246                                           const std::string& rVariableName,
00247                                           unsigned timestep)
00248 {
00249     if (!mIsDataComplete)
00250     {
00251         EXCEPTION("You can only get a vector for complete data");
00252     }
00253     if (!mIsUnlimitedDimensionSet && timestep!=0)
00254     {
00255         EXCEPTION("The file does not contain time dependent data");
00256     }
00257 
00258     std::map<std::string, unsigned>::iterator col_iter = mVariableToColumnIndex.find(rVariableName);
00259     if (col_iter == mVariableToColumnIndex.end())
00260     {
00261         EXCEPTION("The file does not contain data for variable " + rVariableName);
00262     }
00263     int column_index = (*col_iter).second;
00264 
00265     // Check for valid timestep
00266     if (timestep >= mNumberTimesteps)
00267     {
00268         std::stringstream ss;
00269         ss << "The file does not contain data for timestep number " << timestep;
00270         EXCEPTION(ss.str());
00271     }
00272 
00274     int lo, hi, size;
00275     VecGetSize(data, &size);
00276     if ((unsigned)size != mVariablesDatasetSizes[1])
00277     {
00278         EXCEPTION("Could not read data because Vec is the wrong size");
00279     }
00280     // Get range owned by each processor
00281     VecGetOwnershipRange(data, &lo, &hi);
00282 
00283     if (hi > lo) // i.e. we own some...
00284     {
00285         // Define a dataset in memory for this process
00286         hsize_t v_size[1] = {hi-lo};
00287         hid_t memspace = H5Screate_simple(1, v_size, NULL);
00288 
00289         // Select hyperslab in the file.
00290         hsize_t offset[3] = {timestep, lo, column_index};
00291         hsize_t count[3]  = {1, hi-lo, 1};
00292         hid_t hyperslab_space = H5Dget_space(mVariablesDatasetId);
00293         H5Sselect_hyperslab(hyperslab_space, H5S_SELECT_SET, offset, NULL, count, NULL);
00294 
00295         double* p_petsc_vector;
00296         VecGetArray(data, &p_petsc_vector);
00297         herr_t err;
00298         err=H5Dread(mVariablesDatasetId, H5T_NATIVE_DOUBLE, memspace, hyperslab_space, H5P_DEFAULT, p_petsc_vector);
00299         assert(err==0);
00300         VecRestoreArray(data, &p_petsc_vector);
00301 
00302         H5Sclose(hyperslab_space);
00303         H5Sclose(memspace);
00304     }
00305 }
00306 
00307 std::vector<double> Hdf5DataReader::GetUnlimitedDimensionValues()
00308 {
00309     // Data buffer to return
00310     std::vector<double> ret(mNumberTimesteps);
00311 
00312     if (!mIsUnlimitedDimensionSet)
00313     {
00314         // Fake it
00315         assert(mNumberTimesteps==1);
00316         ret[0] = 0.0;
00317         return ret;
00318     }
00319     // Define hyperslab in the dataset
00320     hid_t time_dataspace = H5Dget_space(mTimeDatasetId);
00321 
00322     // Define a simple memory dataspace
00323     hid_t memspace = H5Screate_simple(1, &mNumberTimesteps ,NULL);
00324 
00325     // Read data from hyperslab in the file into the hyperslab in memory
00326     H5Dread(mTimeDatasetId, H5T_NATIVE_DOUBLE, memspace, time_dataspace, H5P_DEFAULT, &ret[0]);
00327 
00328     H5Sclose(time_dataspace);
00329     H5Sclose(memspace);
00330 
00331     return ret;
00332 }
00333 
00334 void Hdf5DataReader::Close()
00335 {
00337     H5Dclose(mVariablesDatasetId);
00338 
00339     if (mIsUnlimitedDimensionSet)
00340     {
00341         H5Dclose(mTimeDatasetId);
00342     }
00343 
00344     H5Fclose(mFileId);
00345 }
00346 
00347 Hdf5DataReader::~Hdf5DataReader()
00348 {
00349     Close();
00350 }
00351 
00352 unsigned Hdf5DataReader::GetNumberOfRows()
00353 {
00354     return mVariablesDatasetSizes[1];
00355 }
00356 
00357 std::vector<std::string> Hdf5DataReader::GetVariableNames()
00358 {
00359     return mVariableNames;
00360 }
00361 
00362 std::string Hdf5DataReader::GetUnit(const std::string& rVariableName)
00363 {
00364     return mVariableToUnit[rVariableName];
00365 }
00366 
00367 bool Hdf5DataReader::IsDataComplete()
00368 {
00369     return mIsDataComplete;
00370 }
00371 
00372 std::vector<unsigned> Hdf5DataReader::GetIncompleteNodeMap()
00373 {
00374     return mIncompleteNodeIndices;
00375 }

Generated by  doxygen 1.6.2