Chaste  Release::2017.1
ColumnDataReader.cpp
Go to the documentation of this file.
1 /*
2 
3 Copyright (c) 2005-2017, University of Oxford.
4 All rights reserved.
5 
6 University of Oxford means the Chancellor, Masters and Scholars of the
7 University of Oxford, having an administrative office at Wellington
8 Square, Oxford OX1 2JD, UK.
9 
10 This file is part of Chaste.
11 
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14  * Redistributions of source code must retain the above copyright notice,
15  this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright notice,
17  this list of conditions and the following disclaimer in the documentation
18  and/or other materials provided with the distribution.
19  * Neither the name of the University of Oxford nor the names of its
20  contributors may be used to endorse or promote products derived from this
21  software without specific prior written permission.
22 
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 
34 */
35 
42 #include "ColumnDataReader.hpp"
43 #include "ColumnDataConstants.hpp"
44 
45 #include <fstream>
46 #include <sstream>
47 #include <iomanip>
48 #include <cassert>
49 #include <climits>
50 #include <cctype> //for isdigit
51 #include "OutputFileHandler.hpp"
52 #include "Exception.hpp"
53 
58 const int NOT_READ = INT_UNSET;
59 
60 ColumnDataReader::ColumnDataReader(const std::string& rDirectory,
61  const std::string& rBaseName,
62  bool makeAbsolute)
63 {
64  // Find out where files are really stored
65  std::string directory;
66  if (makeAbsolute)
67  {
68  OutputFileHandler output_file_handler(rDirectory, false);
69  directory = output_file_handler.GetOutputDirectoryFullPath();
70  }
71  else
72  {
73  // Add a trailing slash if needed
74  if (!(*(rDirectory.end()-1) == '/'))
75  {
76  directory = rDirectory + "/";
77  }
78  else
79  {
80  directory = rDirectory;
81  }
82  }
83  CheckFiles(directory, rBaseName);
84 }
85 
87  const std::string& rBaseName)
88 {
89  if (!rDirectory.IsDir() || !rDirectory.Exists())
90  {
91  EXCEPTION("Directory does not exist: " + rDirectory.GetAbsolutePath());
92  }
93  CheckFiles(rDirectory.GetAbsolutePath(), rBaseName);
94 }
95 
96 void ColumnDataReader::CheckFiles(const std::string& rDirectory, const std::string& rBaseName)
97 {
98  // Read in info file
99  mInfoFilename = rDirectory + rBaseName + ".info";
100  std::ifstream infofile(mInfoFilename.c_str(), std::ios::in);
101 
102  // If it doesn't exist - throw exception
103  if (!infofile.is_open())
104  {
105  EXCEPTION("Couldn't open info file: " + mInfoFilename);
106  }
107  std::string junk;
109  mHasUnlimitedDimension = false;
111 
112  infofile >> junk;
113  infofile >> mNumFixedDimensions >> junk;
114  infofile >> mHasUnlimitedDimension >> junk;
115  infofile >> mNumVariables;
116 
117  if (mNumFixedDimensions == NOT_READ || mNumVariables == NOT_READ)
118  {
119  infofile.close();
120  EXCEPTION("Couldn't read info file correctly");
121  }
122 
123  // Read in variables and associated them with a column number
125  {
126  if (mNumFixedDimensions < 1)
127  {
128  mDataFilename = rDirectory + rBaseName + ".dat";
129  }
130  else
131  {
132  std::stringstream suffix;
133  suffix << std::setfill('0') << std::setw(FILE_SUFFIX_WIDTH) << 0;
134 
135  mDataFilename = rDirectory + rBaseName + "_" + suffix.str() + ".dat";
136 
137  /*
138  * The ancillary path needs to come from a single place that is
139  * used by both the reader & writer, otherwise all will be bad.
140  */
141  mAncillaryFilename = rDirectory + rBaseName + "_unlimited.dat";
142 
143  // Extract the units and place into map
144  std::ifstream ancillaryfile(mAncillaryFilename.c_str(), std::ios::in);
145 
146  // If it doesn't exist - throw exception
147  if (!ancillaryfile.is_open())
148  {
149  EXCEPTION("Couldn't open ancillary data file");
150  }
151  std::string dimension;
152  std::getline(ancillaryfile, dimension);
153  std::stringstream dimension_stream(dimension);
154  std::string dimension_unit, dimension_name, header;
155  dimension_stream >> header;
156 
157  // Separate into variable name and units
158  int unitpos = header.find("(") + 1;
159 
160  dimension_name = header.substr(0, unitpos - 1);
161  dimension_unit = header.substr(unitpos, header.length() - unitpos - 1);
162 
163  mVariablesToUnits[dimension_name] = dimension_unit;
164  ancillaryfile.close();
165  }
166  }
167  else
168  {
169  mDataFilename = rDirectory + rBaseName + ".dat";
170  }
171 
172  std::ifstream datafile(mDataFilename.c_str(), std::ios::in);
173  // If it doesn't exist - throw exception
174  if (!datafile.is_open())
175  {
176  EXCEPTION("Couldn't open data file");
177  }
178 
179  std::string variables;
180  std::getline(datafile, variables);
181  std::stringstream variable_stream(variables);
182  std::string header, variable, unit;
183  int column = 0;
184 
185  // Insert variables into map
186  while (variable_stream >> header)
187  {
188  // Separate into variable name and units
189  int unitpos = header.find("(") + 1;
190 
191  variable = header.substr(0, unitpos - 1);
192  unit = header.substr(unitpos, header.length() - unitpos - 1);
193 
194  mVariablesToColumns[variable] = column;
195  mVariablesToUnits[variable] = unit;
196 
197  column++;
198  }
199 
200  /*
201  * Now read the first line of proper data to determine the field width used when this
202  * file was created. Do this by
203  * 1. reading the first entry and measuring the distance from
204  * the decimal point to the 'e'. This gives the precision; the field width is then
205  * precision + 7 (With MSVC on Windows, it's precision + 8).
206  * e.g. if the first entry is
207  * 6.3124e+01 => field width = 11 // chaste release 1 and 1.1
208  * -3.5124e+01 => field width = 11 // chaste release 1 and 1.1
209  * +1.00000000e+00 => field width = 15
210  * -1.20000000e+01 => field width = 15
211  * -1.12345678e-321 => field width = 15
212  * 2. Because the first column has a varying number of spaces read a few columns and
213  * do some modular arithmetic to work out the correct width
214  */
215  std::string first_line;
216  std::string first_entry;
217  unsigned last_pos=0u;
218  // Read the first entry of the line. If there is no first entry, move to the next line..
219  while (first_entry.length()==0 && !datafile.eof())
220  {
221  std::getline(datafile, first_line);
222  std::stringstream stream(first_line);
223  stream >> first_entry;
224  last_pos = stream.tellg(); // Where the first number ends (but it might be in the column 2 or 3)
225  while (stream.good() && last_pos <170) //Avoid reading more than about 10 columns, because we want to avoid last_pos being divisible by too many factors
226  {
227  std::string last_entry;
228  stream >> last_entry;
229  if (stream.tellg() > 0)
230  {
231  last_pos = stream.tellg();
232  }
233  }
234  }
235 
236  if (datafile.eof() && first_entry.length()==0)
237  {
238  EXCEPTION("Unable to determine field width from file as cannot find any data entries");
239  }
240  assert (last_pos > 0u);
241 
242  size_t dot_pos = first_entry.find(".");
243  size_t e_pos = first_entry.find("e");
244  if (dot_pos == std::string::npos || e_pos == std::string::npos)
245  {
246  EXCEPTION("Badly formatted scientific data field");
247  }
248 
249  unsigned est_field_width = e_pos - dot_pos - 1 + 8; // = Precision + 8
250 
251  if (last_pos % est_field_width == 0)
252  {
253  mFieldWidth = est_field_width;
254  }
255  else
256  {
257  assert ( last_pos % (est_field_width+1) == 0 || (last_pos+1) % (est_field_width+1) == 0 );
258  mFieldWidth = est_field_width+1;
259  }
260  infofile.close();
261  datafile.close();
262 }
263 
264 std::vector<double> ColumnDataReader::GetValues(const std::string& rVariableName)
265 {
266  if (mNumFixedDimensions > 0)
267  {
268  EXCEPTION("Data file has fixed dimension which must be specified");
269  }
270 
271  std::map<std::string, int>::iterator col = mVariablesToColumns.find(rVariableName);
272  if (col == mVariablesToColumns.end())
273  {
274  std::stringstream variable_name;
275  variable_name << rVariableName;
276  EXCEPTION("'" + variable_name.str() + "' is an unknown variable.");
277  }
278 
279  int column = (*col).second;
281 
282  return mValues;
283 }
284 
285 std::vector<double> ColumnDataReader::GetValues(const std::string& rVariableName,
286  int fixedDimension)
287 {
288  if (mNumFixedDimensions < 1)
289  {
290  EXCEPTION("Data file has no fixed dimension");
291  }
292 
293  mValues.clear();
295  {
296  std::string datafile = mDataFilename;
297  std::map<std::string, int>::iterator col = mVariablesToColumns.find(rVariableName);
298  if (col == mVariablesToColumns.end())
299  {
300  EXCEPTION("Unknown variable");
301  }
302  int column = (*col).second;
303 
304  int counter = 1;
305  while (true)
306  {
307  try
308  {
309  ReadValueFromFile(datafile, column, fixedDimension);
310  }
311  catch (Exception)
312  {
313  break;
314  }
315 
316  // Advance counter
317  std::string::size_type underscore_pos = datafile.rfind("_", datafile.length());
318  std::stringstream suffix;
319 
320  suffix << std::setfill('0') << std::setw(FILE_SUFFIX_WIDTH) << counter;
321 
322  if (underscore_pos != std::string::npos)
323  {
324  datafile = datafile.substr(0, underscore_pos+1) + suffix.str() + ".dat";
325  }
326  counter++;
327  }
328  }
329  else
330  {
331  int column = mVariablesToColumns[rVariableName];
332  if (0 == column)
333  {
334  EXCEPTION("Unknown variable");
335  }
336  ReadValueFromFile(mDataFilename, column, fixedDimension);
337  }
338 
339  return mValues;
340 }
341 
343 {
344  mValues.clear();
346  {
347  EXCEPTION("Data file has no unlimited dimension");
348  }
349  if (mNumFixedDimensions > 0)
350  {
351  // Read in from the ancillary file
353  }
354  else
355  {
356  // Read the first column
358  }
359  return mValues;
360 }
361 
362 void ColumnDataReader::ReadValueFromFile(const std::string& rFilename, int col, int row)
363 {
364  std::ifstream datafile(rFilename.c_str(), std::ios::in);
365  // If it doesn't exist - throw exception
366  if (!datafile.is_open())
367  {
368  EXCEPTION("Couldn't open data file");
369  }
370  std::string variable_values;
371  for (int i=0; i<row+1; i++)
372  {
373  std::getline(datafile, variable_values);
374  }
375 
376  std::getline(datafile, variable_values);
377  this->PushColumnEntryFromLine(variable_values, col);
378 
379  datafile.close();
380 }
381 
382 void ColumnDataReader::ReadColumnFromFile(const std::string& rFilename, int col)
383 {
384  // Empty the values vector
385  mValues.clear();
386 
387  // Read in from the ancillary file
388  std::ifstream datafile(rFilename.c_str(), std::ios::in);
389  std::string value;
390 
391  // We should have already checked that this file can be opened.
392  assert(datafile.is_open());
393 
394  // The current variable becomes true just after reading the last line
395  bool end_of_file_reached = false;
396 
397  // Skip header line
398  end_of_file_reached = std::getline(datafile, value).eof();
399 
400  while (!end_of_file_reached)
401  {
402  end_of_file_reached = std::getline(datafile, value).eof();
403  this->PushColumnEntryFromLine(value, col);
404  }
405  datafile.close();
406 }
407 
408 void ColumnDataReader::PushColumnEntryFromLine(const std::string& rLine, int col)
409 {
410  std::string value;
411  unsigned startpos = col * mFieldWidth;
412  value = rLine.substr(startpos, mFieldWidth);
413 
414  std::stringstream variable_stream(value);
415  double d_value;
416  variable_stream >> d_value;
417  if (variable_stream.fail())
418  {
419  if (variable_stream.eof()) //Missing data from column
420  {
421  d_value = DBL_MAX;
422  }
423  else
424  {
425 // LCOV_EXCL_START
426  // Clang Objective C++ (on Mac OSX) treats reading very small numbers (<2e-308) as an error but other compilers just round to zero
427  d_value = 0.0;
428 // LCOV_EXCL_STOP
429  }
430  }
431  mValues.push_back(d_value);
432 }
433 
434 bool ColumnDataReader::HasValues(const std::string& rVariableName)
435 {
436  std::map<std::string, int>::iterator col = mVariablesToColumns.find(rVariableName);
437  return !(col == mVariablesToColumns.end());
438 }
439 
441 {
442  return mFieldWidth;
443 }
std::string mAncillaryFilename
std::map< std::string, int > mVariablesToColumns
std::string mInfoFilename
bool HasValues(const std::string &rVariableName)
std::string GetAbsolutePath() const
Definition: FileFinder.cpp:221
#define EXCEPTION(message)
Definition: Exception.hpp:143
std::vector< double > GetValues(const std::string &rVariableName)
std::string GetOutputDirectoryFullPath() const
void ReadColumnFromFile(const std::string &rFilename, int col)
bool IsDir() const
Definition: FileFinder.cpp:190
std::string mDataFilename
std::map< std::string, std::string > mVariablesToUnits
std::vector< double > GetUnlimitedDimensionValues()
void CheckFiles(const std::string &rDirectory, const std::string &rBaseName)
bool Exists() const
Definition: FileFinder.cpp:180
ColumnDataReader(const std::string &rDirectory, const std::string &rBaseName, bool makeAbsolute=true)
const int NOT_READ
const int FILE_SUFFIX_WIDTH
void ReadValueFromFile(const std::string &rFilename, int col, int row)
std::vector< double > mValues
const int INT_UNSET
Definition: Exception.hpp:54
void PushColumnEntryFromLine(const std::string &rLine, int col)