Chaste Release::3.1
XmlTools.cpp
00001 /*
00002 
00003 Copyright (c) 2005-2012, University of Oxford.
00004 All rights reserved.
00005 
00006 University of Oxford means the Chancellor, Masters and Scholars of the
00007 University of Oxford, having an administrative office at Wellington
00008 Square, Oxford OX1 2JD, UK.
00009 
00010 This file is part of Chaste.
00011 
00012 Redistribution and use in source and binary forms, with or without
00013 modification, are permitted provided that the following conditions are met:
00014  * Redistributions of source code must retain the above copyright notice,
00015    this list of conditions and the following disclaimer.
00016  * Redistributions in binary form must reproduce the above copyright notice,
00017    this list of conditions and the following disclaimer in the documentation
00018    and/or other materials provided with the distribution.
00019  * Neither the name of the University of Oxford nor the names of its
00020    contributors may be used to endorse or promote products derived from this
00021    software without specific prior written permission.
00022 
00023 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00024 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00025 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00026 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
00027 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00028 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
00029 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00030 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00031 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00032 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033 
00034 */
00035 
00036 #include "XmlTools.hpp"
00037 
00038 #include <iostream>
00039 
00040 #include <xercesc/dom/DOM.hpp>
00041 #include <xercesc/util/PlatformUtils.hpp>
00042 #include <xercesc/util/QName.hpp>
00043 #include <xercesc/util/XMLUniDefs.hpp> // chLatin_*
00044 #include <xercesc/framework/Wrapper4InputSource.hpp>
00045 #include <xercesc/validators/common/Grammar.hpp>
00046 
00047 #include <xsd/cxx/xml/sax/std-input-source.hxx>
00048 #include <xsd/cxx/xml/dom/bits/error-handler-proxy.hxx>
00049 #include <xsd/cxx/tree/exceptions.hxx>
00050 
00051 #include "Exception.hpp"
00052 
00053 
00054 
00055 xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument> XmlTools::ReadXmlFile(
00056     const std::string& rFileName,
00057     const ::xsd::cxx::tree::properties<char>& rProps,
00058     bool validate)
00059 {
00060     xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument> p_doc;
00061     try
00062     {
00063         // Initialise Xerces
00064         xercesc::XMLPlatformUtils::Initialize();
00065         // Set up an error handler
00066         ::xsd::cxx::tree::error_handler<char> error_handler;
00067         // Parse XML to DOM
00068         p_doc = XmlTools::ReadFileToDomDocument(rFileName, error_handler, rProps, validate);
00069         // Any errors?
00070         error_handler.throw_if_failed< ::xsd::cxx::tree::parsing<char> >();
00071     }
00072     catch (const ::xsd::cxx::tree::parsing<char>& e)
00073     {
00074         Finalize();
00075         // Test for missing schema/xml file
00076 #if (XSD_INT_VERSION >= 3000000L)
00077         const ::xsd::cxx::tree::diagnostics<char>& diags = e.diagnostics();
00078         const ::xsd::cxx::tree::error<char>& first_error = diags[0];
00079 #else
00080         const ::xsd::cxx::tree::errors<char>& errors = e.errors();
00081         const ::xsd::cxx::tree::error<char>& first_error = errors[0];
00082 #endif
00083         if (first_error.line() == 0u)
00084         {
00085             std::cerr << first_error << std::endl;
00086             EXCEPTION("Missing file parsing configuration file: " + rFileName);
00087         }
00088         else
00089         {
00090             std::cerr << e << std::endl;
00091             EXCEPTION("XML parsing error in configuration file: " + rFileName);
00092         }
00093     }
00094 #define COVERAGE_IGNORE
00095     catch (...)
00096     { // This shouldn't happen, but just in case...
00097         Finalize();
00098         throw;
00099     }
00100 #undef COVERAGE_IGNORE
00101     return p_doc;
00102 }
00103 
00104 
00105 void XmlTools::Finalize()
00106 {
00107     xercesc::XMLPlatformUtils::Terminate();
00108 }
00109 
00110 XmlTools::Finalizer::Finalizer(bool init)
00111 {
00112     // The init=true case will very rarely be used, but a parameter to the constructor is needed
00113     // to stop some compilers complaining about an unused variable!
00114     if (init)
00115     {
00116 #define COVERAGE_IGNORE
00117         xercesc::XMLPlatformUtils::Initialize();
00118 #undef COVERAGE_IGNORE
00119     }
00120 }
00121 
00122 XmlTools::Finalizer::~Finalizer()
00123 {
00124     XmlTools::Finalize();
00125 }
00126 
00127 xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument> XmlTools::ReadFileToDomDocument(
00128         const std::string& rFileName,
00129         ::xsd::cxx::xml::error_handler<char>& rErrorHandler,
00130         const ::xsd::cxx::tree::properties<char>& rProps,
00131         bool validate)
00132 {
00133     using namespace xercesc;
00134     namespace xml = xsd::cxx::xml;
00135 
00136     // Get an implementation of the Load-Store (LS) interface.
00137     const XMLCh ls_id [] = {chLatin_L, chLatin_S, chNull};
00138     DOMImplementation* p_impl(DOMImplementationRegistry::getDOMImplementation(ls_id));
00139 
00140 #if _XERCES_VERSION >= 30000
00141     // Xerces-C++ 3.0.0 and later.
00142     xml::dom::auto_ptr<DOMLSParser> p_parser(p_impl->createLSParser(DOMImplementationLS::MODE_SYNCHRONOUS, 0));
00143     DOMConfiguration* p_conf(p_parser->getDomConfig());
00144 
00145     // Discard comment nodes in the document.
00146     p_conf->setParameter(XMLUni::fgDOMComments, false);
00147 
00148     // Enable datatype normalization.
00149     p_conf->setParameter(XMLUni::fgDOMDatatypeNormalization, true);
00150 
00151     // Do not create EntityReference nodes in the DOM tree.  No
00152     // EntityReference nodes will be created, only the nodes
00153     // corresponding to their fully expanded substitution text
00154     // will be created.
00155     p_conf->setParameter(XMLUni::fgDOMEntities, false);
00156 
00157     // Perform namespace processing.
00158     p_conf->setParameter(XMLUni::fgDOMNamespaces, true);
00159 
00160     // Do not include ignorable whitespace in the DOM tree.
00161     p_conf->setParameter(XMLUni::fgDOMElementContentWhitespace, false);
00162 
00163     // Enable validation.
00164     if (validate)
00165     {
00166         p_conf->setParameter(XMLUni::fgDOMValidate, true);
00167         p_conf->setParameter(XMLUni::fgXercesSchema, true);
00168         p_conf->setParameter(XMLUni::fgXercesSchemaFullChecking, false);
00169         // Code taken from xsd/cxx/xml/dom/parsing-source.txx
00170         if (!rProps.schema_location().empty())
00171         {
00172             xml::string locn(rProps.schema_location());
00173             const void* p_locn(locn.c_str());
00174             p_conf->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation,
00175                                  const_cast<void*>(p_locn));
00176         }
00177         if (!rProps.no_namespace_schema_location().empty())
00178         {
00179             xml::string locn(rProps.no_namespace_schema_location());
00180             const void* p_locn(locn.c_str());
00181 
00182             p_conf->setParameter(XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation,
00183                                  const_cast<void*>(p_locn));
00184         }
00185     }
00186     else
00187     {
00188         // This branch is only used by projects
00189 #define COVERAGE_IGNORE
00190         p_conf->setParameter(XMLUni::fgDOMValidate, false);
00191         p_conf->setParameter(XMLUni::fgXercesSchema, false);
00192         p_conf->setParameter(XMLUni::fgXercesSchemaFullChecking, false);
00193 #undef COVERAGE_IGNORE
00194     }
00195 
00196     // We will release the DOM document ourselves.
00197     p_conf->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
00198 
00199     // Set error handler.
00200     xml::dom::bits::error_handler_proxy<char> ehp(rErrorHandler);
00201     p_conf->setParameter(XMLUni::fgDOMErrorHandler, &ehp);
00202 
00203 #else // _XERCES_VERSION >= 30000
00204     // Same as above but for Xerces-C++ 2 series.
00205     xml::dom::auto_ptr<DOMBuilder> p_parser(p_impl->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS, 0));
00206 
00207     p_parser->setFeature(XMLUni::fgDOMComments, false);
00208     p_parser->setFeature(XMLUni::fgDOMDatatypeNormalization, true);
00209     p_parser->setFeature(XMLUni::fgDOMEntities, false);
00210     p_parser->setFeature(XMLUni::fgDOMNamespaces, true);
00211     p_parser->setFeature(XMLUni::fgDOMWhitespaceInElementContent, false);
00212     p_parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
00213 
00214     // Code taken from xsd/cxx/xml/dom/parsing-source.txx
00215     if (validate)
00216     {
00217         p_parser->setFeature(XMLUni::fgDOMValidation, true);
00218         p_parser->setFeature(XMLUni::fgXercesSchema, true);
00219         p_parser->setFeature(XMLUni::fgXercesSchemaFullChecking, false);
00220         if (!rProps.schema_location().empty())
00221         {
00222             xml::string locn(rProps.schema_location());
00223             const void* p_locn(locn.c_str());
00224             p_parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,
00225                                   const_cast<void*>(p_locn));
00226         }
00227 
00228         if (!rProps.no_namespace_schema_location().empty())
00229         {
00230             xml::string locn(rProps.no_namespace_schema_location());
00231             const void* p_locn(locn.c_str());
00232 
00233             p_parser->setProperty(XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation,
00234                                   const_cast<void*>(p_locn));
00235         }
00236     }
00237     else
00238     {
00239         // This branch is only used by projects
00240 #define COVERAGE_IGNORE
00241         p_parser->setFeature(XMLUni::fgDOMValidation, false);
00242         p_parser->setFeature(XMLUni::fgXercesSchema, false);
00243         p_parser->setFeature(XMLUni::fgXercesSchemaFullChecking, false);
00244 #undef COVERAGE_IGNORE
00245     }
00246 
00247     xml::dom::bits::error_handler_proxy<char> ehp(rErrorHandler);
00248     p_parser->setErrorHandler(&ehp);
00249 
00250 #endif // _XERCES_VERSION >= 30000
00251 
00252     // Do the parse
00253     xml::dom::auto_ptr<DOMDocument> p_doc(p_parser->parseURI(rFileName.c_str()));
00254 
00255     if (ehp.failed())
00256     {
00257         p_doc.reset();
00258     }
00259 
00260     return p_doc;
00261 }
00262 
00263 #define COVERAGE_IGNORE
00264 void XmlTools::PrintNode(const std::string& rMsg, xercesc::DOMNode* pNode, bool showChildren)
00265 {
00266     std::string prefix = X2C(pNode->getPrefix());
00267     std::string name = X2C(pNode->getLocalName());
00268     std::string nsuri = X2C(pNode->getNamespaceURI());
00269     std::cout << rMsg << " " << pNode << " " << prefix << ":" << name << " in " << nsuri << std::endl;
00270     if (showChildren)
00271     {
00272         for (xercesc::DOMNode* p_node = pNode->getFirstChild();
00273              p_node != NULL;
00274              p_node = p_node->getNextSibling())
00275         {
00276             std::cout << "     child type " << p_node->getNodeType();
00277             PrintNode("", p_node, false);
00278         }
00279         xercesc::DOMNamedNodeMap* p_attrs = pNode->getAttributes();
00280         if (p_attrs)
00281         {
00282             for (XMLSize_t i=0; i<p_attrs->getLength(); i++)
00283             {
00284                  xercesc::DOMNode* p_attr = p_attrs->item(i);
00285                  std::string value = X2C(p_attr->getNodeValue());
00286                  PrintNode("     attr (" + value + ")", p_attr, false);
00287             }
00288         }
00289     }
00290 }
00291 #undef COVERAGE_IGNORE
00292 
00293 xercesc::DOMElement* XmlTools::SetNamespace(xercesc::DOMDocument* pDocument,
00294                                             xercesc::DOMElement* pElement,
00295                                             const XMLCh* pNamespace)
00296 {
00297     using namespace xercesc;
00298 
00299     //PrintNode("Renaming", pElement, true);
00300     DOMNamedNodeMap* p_orig_attrs = pElement->getAttributes();
00301     std::vector<std::string> attr_values;
00302     if (p_orig_attrs)
00303     {
00304         for (XMLSize_t i=0; i<p_orig_attrs->getLength(); i++)
00305         {
00306             DOMNode* p_attr = p_orig_attrs->item(i);
00307             attr_values.push_back(X2C(p_attr->getNodeValue()));
00308         }
00309     }
00310     DOMElement* p_new_elt = static_cast<DOMElement*>(
00311         pDocument->renameNode(pElement, pNamespace, pElement->getLocalName()));
00312     //PrintNode("   to", p_new_elt, true);
00313     // Fix attributes - some get broken by the rename!
00314     if (p_orig_attrs)
00315     {
00316         DOMNamedNodeMap* p_new_attrs = p_new_elt->getAttributes();
00317         assert(p_new_attrs);
00318         assert(p_new_attrs == p_orig_attrs);
00319         assert(p_new_attrs->getLength() == attr_values.size());
00320         for (XMLSize_t i=0; i<p_new_attrs->getLength(); i++)
00321         {
00322             DOMNode* p_attr = p_new_attrs->item(i);
00323             p_attr->setNodeValue(X(attr_values[i]));
00324         }
00325     }
00326     //PrintNode("   after attr fix", p_new_elt, true);
00327 
00328     std::vector<DOMElement*> children = GetChildElements(p_new_elt);
00329     for (std::vector<DOMElement*>::iterator it = children.begin(); it != children.end(); ++it)
00330     {
00331         SetNamespace(pDocument, *it, pNamespace);
00332     }
00333 
00334     return p_new_elt;
00335 }
00336 
00337 xercesc::DOMElement* XmlTools::SetNamespace(xercesc::DOMDocument* pDocument,
00338                                             xercesc::DOMElement* pElement,
00339                                             const std::string& rNamespace)
00340 {
00341     return SetNamespace(pDocument, pElement, X(rNamespace));
00342 }
00343 
00344 
00345 std::vector<xercesc::DOMElement*> XmlTools::GetChildElements(const xercesc::DOMElement* pElement)
00346 {
00347     std::vector<xercesc::DOMElement*> children;
00348     for (xercesc::DOMNode* p_node = pElement->getFirstChild();
00349          p_node != NULL;
00350          p_node = p_node->getNextSibling())
00351     {
00352         if (p_node->getNodeType() == xercesc::DOMNode::ELEMENT_NODE)
00353         {
00354             children.push_back(static_cast<xercesc::DOMElement*>(p_node));
00355         }
00356     }
00357     return children;
00358 }
00359 
00360 
00361 void XmlTools::FindElements(const xercesc::DOMElement* pContextElement,
00362                             const std::vector<std::string>& rNames,
00363                             std::vector<xercesc::DOMElement*>& rResults,
00364                             unsigned depth)
00365 {
00366     for (xercesc::DOMNode* p_node = pContextElement->getFirstChild();
00367          p_node != NULL;
00368          p_node = p_node->getNextSibling())
00369     {
00370         if (p_node->getNodeType() == xercesc::DOMNode::ELEMENT_NODE &&
00371             X2C(p_node->getLocalName()) == rNames[depth])
00372         {
00373             xercesc::DOMElement* p_child_elt = static_cast<xercesc::DOMElement*>(p_node);
00374             if (depth == rNames.size() - 1)
00375             {
00376                 rResults.push_back(p_child_elt);
00377             }
00378             else
00379             {
00380                 FindElements(p_child_elt, rNames, rResults, depth+1);
00381             }
00382         }
00383     }
00384 }
00385 
00386 std::vector<xercesc::DOMElement*> XmlTools::FindElements(const xercesc::DOMElement* pContextElement,
00387                                                          const std::string& rPath)
00388 {
00389     std::vector<xercesc::DOMElement*> results;
00390     std::vector<std::string> path;
00391     size_t start_pos = 0;
00392     size_t slash_pos = 0;
00393     while (slash_pos != std::string::npos)
00394     {
00395         slash_pos = rPath.find('/', start_pos);
00396         if (slash_pos == std::string::npos)
00397         {
00398             path.push_back(rPath.substr(start_pos));
00399         }
00400         else
00401         {
00402             path.push_back(rPath.substr(start_pos, slash_pos-start_pos));
00403         }
00404         start_pos = slash_pos + 1;
00405     }
00406     FindElements(pContextElement, path, results);
00407     return results;
00408 }
00409 
00410 
00411 void XmlTools::WrapContentInElement(xercesc::DOMDocument* pDocument,
00412                                     xercesc::DOMElement* pElement,
00413                                     const XMLCh* pNewElementLocalName)
00414 {
00415     const XMLCh* p_namespace_uri = pElement->getNamespaceURI();
00416     const XMLCh* p_prefix = pElement->getPrefix();
00417     const XMLCh* p_qualified_name;
00418     if (p_prefix)
00419     {
00420 #define COVERAGE_IGNORE
00421         // We can't actually cover this code, since versions of the parameters file which need this
00422         // transform didn't use a namespace, so can't have a namespace prefix!
00423         xercesc::QName qname(p_prefix, pNewElementLocalName, 0);
00424         p_qualified_name = qname.getRawName();
00425 #undef COVERAGE_IGNORE
00426     }
00427     else
00428     {
00429         p_qualified_name = pNewElementLocalName;
00430     }
00431     xercesc::DOMElement* p_wrapper_elt = pDocument->createElementNS(p_namespace_uri, p_qualified_name);
00432     // Move all child nodes of pElement to be children of p_wrapper_elt
00433     xercesc::DOMNodeList* p_children = pElement->getChildNodes();
00434     for (unsigned i=0; i<p_children->getLength(); i++)
00435     {
00436         xercesc::DOMNode* p_child = pElement->removeChild(p_children->item(i));
00437         p_wrapper_elt->appendChild(p_child);
00438     }
00439     // Add the wrapper as the sole child of pElement
00440     pElement->appendChild(p_wrapper_elt);
00441 }
00442 
00443 
00444 std::string XmlTools::EscapeSpaces(const std::string& rPath)
00445 {
00446     std::string escaped_path;
00447     for (std::string::const_iterator it = rPath.begin(); it != rPath.end(); ++it)
00448     {
00449         if (*it == ' ')
00450         {
00451             escaped_path += "%20";
00452         }
00453         else
00454         {
00455             escaped_path += *it;
00456         }
00457     }
00458     return escaped_path;
00459 }