XmlTools.cpp

00001 /*
00002 
00003 Copyright (C) University of Oxford, 2005-2011
00004 
00005 University of Oxford means the Chancellor, Masters and Scholars of the
00006 University of Oxford, having an administrative office at Wellington
00007 Square, Oxford OX1 2JD, UK.
00008 
00009 This file is part of Chaste.
00010 
00011 Chaste is free software: you can redistribute it and/or modify it
00012 under the terms of the GNU Lesser General Public License as published
00013 by the Free Software Foundation, either version 2.1 of the License, or
00014 (at your option) any later version.
00015 
00016 Chaste is distributed in the hope that it will be useful, but WITHOUT
00017 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00018 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
00019 License for more details. The offer of Chaste under the terms of the
00020 License is subject to the License being interpreted in accordance with
00021 English Law and subject to any action against the University of Oxford
00022 being under the jurisdiction of the English Courts.
00023 
00024 You should have received a copy of the GNU Lesser General Public License
00025 along with Chaste. If not, see <http://www.gnu.org/licenses/>.
00026 
00027 */
00028 
00029 #include "XmlTools.hpp"
00030 
00031 #include <iostream>
00032 
00033 #include <xercesc/dom/DOM.hpp>
00034 #include <xercesc/util/PlatformUtils.hpp>
00035 #include <xercesc/util/QName.hpp>
00036 #include <xercesc/util/XMLUniDefs.hpp> // chLatin_*
00037 #include <xercesc/framework/Wrapper4InputSource.hpp>
00038 #include <xercesc/validators/common/Grammar.hpp>
00039 
00040 #include <xsd/cxx/xml/sax/std-input-source.hxx>
00041 #include <xsd/cxx/xml/dom/bits/error-handler-proxy.hxx>
00042 #include <xsd/cxx/tree/exceptions.hxx>
00043 
00044 #include "Exception.hpp"
00045 
00046 
00047 
00048 xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument> XmlTools::ReadXmlFile(
00049     const std::string& rFileName,
00050     const ::xsd::cxx::tree::properties<char>& rProps)
00051 {
00052     xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument> p_doc;
00053     try
00054     {
00055         // Initialise Xerces
00056         xercesc::XMLPlatformUtils::Initialize();
00057         // Set up an error handler
00058         ::xsd::cxx::tree::error_handler<char> error_handler;
00059         // Parse XML to DOM
00060         p_doc = XmlTools::ReadFileToDomDocument(rFileName, error_handler, rProps);
00061         // Any errors?
00062         error_handler.throw_if_failed< ::xsd::cxx::tree::parsing<char> >();
00063     }
00064     catch (const ::xsd::cxx::tree::parsing<char>& e)
00065     {
00066         Finalize();
00067         // Test for missing schema/xml file
00068 #if (XSD_INT_VERSION >= 3000000L)
00069         const ::xsd::cxx::tree::diagnostics<char>& diags = e.diagnostics();
00070         const ::xsd::cxx::tree::error<char>& first_error = diags[0];
00071 #else
00072         const ::xsd::cxx::tree::errors<char>& errors = e.errors();
00073         const ::xsd::cxx::tree::error<char>& first_error = errors[0];
00074 #endif
00075         if (first_error.line() == 0u)
00076         {
00077             std::cerr << first_error << std::endl;
00078             EXCEPTION("Missing file parsing configuration file: " + rFileName);
00079         }
00080         else
00081         {
00082             std::cerr << e << std::endl;
00083             EXCEPTION("XML parsing error in configuration file: " + rFileName);
00084         }
00085     }
00086 #define COVERAGE_IGNORE
00087     catch (...)
00088     { // This shouldn't happen, but just in case...
00089         Finalize();
00090         throw;
00091     }
00092 #undef COVERAGE_IGNORE
00093     return p_doc;
00094 }
00095 
00096 
00097 void XmlTools::Finalize()
00098 {
00099     xercesc::XMLPlatformUtils::Terminate();
00100 }
00101 
00102 XmlTools::Finalizer::Finalizer(bool init)
00103 {
00104     // The init=true case will very rarely be used, but a parameter to the constructor is needed
00105     // to stop some compilers complaining about an unused variable!
00106     if (init)
00107     {
00108 #define COVERAGE_IGNORE
00109         xercesc::XMLPlatformUtils::Initialize();
00110 #undef COVERAGE_IGNORE
00111     }
00112 }
00113 
00114 XmlTools::Finalizer::~Finalizer()
00115 {
00116     XmlTools::Finalize();
00117 }
00118 
00119 xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument> XmlTools::ReadFileToDomDocument(
00120         const std::string& rFileName,
00121         ::xsd::cxx::xml::error_handler<char>& rErrorHandler,
00122         const ::xsd::cxx::tree::properties<char>& rProps)
00123 {
00124     using namespace xercesc;
00125     namespace xml = xsd::cxx::xml;
00126 
00127     // Get an implementation of the Load-Store (LS) interface.
00128     const XMLCh ls_id [] = {chLatin_L, chLatin_S, chNull};
00129     DOMImplementation* p_impl(DOMImplementationRegistry::getDOMImplementation(ls_id));
00130 
00131 #if _XERCES_VERSION >= 30000
00132     // Xerces-C++ 3.0.0 and later.
00133     xml::dom::auto_ptr<DOMLSParser> p_parser(p_impl->createLSParser(DOMImplementationLS::MODE_SYNCHRONOUS, 0));
00134     DOMConfiguration* p_conf(p_parser->getDomConfig());
00135 
00136     // Discard comment nodes in the document.
00137     p_conf->setParameter(XMLUni::fgDOMComments, false);
00138 
00139     // Enable datatype normalization.
00140     p_conf->setParameter(XMLUni::fgDOMDatatypeNormalization, true);
00141 
00142     // Do not create EntityReference nodes in the DOM tree.  No
00143     // EntityReference nodes will be created, only the nodes
00144     // corresponding to their fully expanded substitution text
00145     // will be created.
00146     p_conf->setParameter(XMLUni::fgDOMEntities, false);
00147 
00148     // Perform namespace processing.
00149     p_conf->setParameter(XMLUni::fgDOMNamespaces, true);
00150 
00151     // Do not include ignorable whitespace in the DOM tree.
00152     p_conf->setParameter(XMLUni::fgDOMElementContentWhitespace, false);
00153 
00154     // Enable validation.
00155     p_conf->setParameter(XMLUni::fgDOMValidate, true);
00156     p_conf->setParameter(XMLUni::fgXercesSchema, true);
00157     p_conf->setParameter(XMLUni::fgXercesSchemaFullChecking, false);
00158     // Code taken from xsd/cxx/xml/dom/parsing-source.txx
00159     if (!rProps.schema_location().empty())
00160     {
00161         xml::string locn(rProps.schema_location());
00162         const void* p_locn(locn.c_str());
00163         p_conf->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation,
00164                              const_cast<void*>(p_locn));
00165     }
00166     if (!rProps.no_namespace_schema_location().empty())
00167     {
00168         xml::string locn(rProps.no_namespace_schema_location());
00169         const void* p_locn(locn.c_str());
00170 
00171         p_conf->setParameter(XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation,
00172                              const_cast<void*>(p_locn));
00173     }
00174 
00175     // We will release the DOM document ourselves.
00176     p_conf->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
00177 
00178     // Set error handler.
00179     xml::dom::bits::error_handler_proxy<char> ehp(rErrorHandler);
00180     p_conf->setParameter(XMLUni::fgDOMErrorHandler, &ehp);
00181 
00182 #else // _XERCES_VERSION >= 30000
00183     // Same as above but for Xerces-C++ 2 series.
00184     xml::dom::auto_ptr<DOMBuilder> p_parser(p_impl->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS, 0));
00185 
00186     p_parser->setFeature(XMLUni::fgDOMComments, false);
00187     p_parser->setFeature(XMLUni::fgDOMDatatypeNormalization, true);
00188     p_parser->setFeature(XMLUni::fgDOMEntities, false);
00189     p_parser->setFeature(XMLUni::fgDOMNamespaces, true);
00190     p_parser->setFeature(XMLUni::fgDOMWhitespaceInElementContent, false);
00191     p_parser->setFeature(XMLUni::fgDOMValidation, true);
00192     p_parser->setFeature(XMLUni::fgXercesSchema, true);
00193     p_parser->setFeature(XMLUni::fgXercesSchemaFullChecking, false);
00194     p_parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
00195 
00196     // Code taken from xsd/cxx/xml/dom/parsing-source.txx
00197     if (!rProps.schema_location().empty())
00198     {
00199         xml::string locn(rProps.schema_location());
00200         const void* p_locn(locn.c_str());
00201         p_parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,
00202                               const_cast<void*>(p_locn));
00203     }
00204 
00205     if (!rProps.no_namespace_schema_location().empty())
00206     {
00207         xml::string locn(rProps.no_namespace_schema_location());
00208         const void* p_locn(locn.c_str());
00209 
00210         p_parser->setProperty(XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation,
00211                               const_cast<void*>(p_locn));
00212     }
00213 
00214     xml::dom::bits::error_handler_proxy<char> ehp(rErrorHandler);
00215     p_parser->setErrorHandler(&ehp);
00216 
00217 #endif // _XERCES_VERSION >= 30000
00218 
00219     // Do the parse
00220     xml::dom::auto_ptr<DOMDocument> p_doc(p_parser->parseURI(rFileName.c_str()));
00221 
00222     if (ehp.failed())
00223     {
00224         p_doc.reset();
00225     }
00226 
00227     return p_doc;
00228 }
00229 
00230 #define COVERAGE_IGNORE
00231 void XmlTools::PrintNode(const std::string& rMsg, xercesc::DOMNode* pNode, bool showChildren)
00232 {
00233     std::string prefix = X2C(pNode->getPrefix());
00234     std::string name = X2C(pNode->getLocalName());
00235     std::string nsuri = X2C(pNode->getNamespaceURI());
00236     std::cout << rMsg << " " << pNode << " " << prefix << ":" << name << " in " << nsuri << std::endl;
00237     if (showChildren)
00238     {
00239         for (xercesc::DOMNode* p_node = pNode->getFirstChild();
00240              p_node != NULL;
00241              p_node = p_node->getNextSibling())
00242         {
00243             std::cout << "     child type " << p_node->getNodeType();
00244             PrintNode("", p_node, false);
00245         }
00246         xercesc::DOMNamedNodeMap* p_attrs = pNode->getAttributes();
00247         if (p_attrs)
00248         {
00249             for (XMLSize_t i=0; i<p_attrs->getLength(); i++)
00250             {
00251                  xercesc::DOMNode* p_attr = p_attrs->item(i);
00252                  std::string value = X2C(p_attr->getNodeValue());
00253                  PrintNode("     attr (" + value + ")", p_attr, false);
00254             }
00255         }
00256     }
00257 }
00258 #undef COVERAGE_IGNORE
00259 
00260 xercesc::DOMElement* XmlTools::SetNamespace(xercesc::DOMDocument* pDocument,
00261                                             xercesc::DOMElement* pElement,
00262                                             const XMLCh* pNamespace)
00263 {
00264     using namespace xercesc;
00265 
00266     //PrintNode("Renaming", pElement, true);
00267     DOMNamedNodeMap* p_orig_attrs = pElement->getAttributes();
00268     std::vector<std::string> attr_values;
00269     if (p_orig_attrs)
00270     {
00271         for (XMLSize_t i=0; i<p_orig_attrs->getLength(); i++)
00272         {
00273             DOMNode* p_attr = p_orig_attrs->item(i);
00274             attr_values.push_back(X2C(p_attr->getNodeValue()));
00275         }
00276     }
00277     DOMElement* p_new_elt = static_cast<DOMElement*>(
00278         pDocument->renameNode(pElement, pNamespace, pElement->getLocalName()));
00279     //PrintNode("   to", p_new_elt, true);
00280     // Fix attributes - some get broken by the rename!
00281     if (p_orig_attrs)
00282     {
00283         DOMNamedNodeMap* p_new_attrs = p_new_elt->getAttributes();
00284         assert(p_new_attrs);
00285         assert(p_new_attrs == p_orig_attrs);
00286         assert(p_new_attrs->getLength() == attr_values.size());
00287         for (XMLSize_t i=0; i<p_new_attrs->getLength(); i++)
00288         {
00289             DOMNode* p_attr = p_new_attrs->item(i);
00290             p_attr->setNodeValue(X(attr_values[i]));
00291         }
00292     }
00293     //PrintNode("   after attr fix", p_new_elt, true);
00294 
00295     std::vector<DOMElement*> children = GetChildElements(p_new_elt);
00296     for (std::vector<DOMElement*>::iterator it = children.begin(); it != children.end(); ++it)
00297     {
00298         SetNamespace(pDocument, *it, pNamespace);
00299     }
00300 
00301     return p_new_elt;
00302 }
00303 
00304 xercesc::DOMElement* XmlTools::SetNamespace(xercesc::DOMDocument* pDocument,
00305                                             xercesc::DOMElement* pElement,
00306                                             const std::string& rNamespace)
00307 {
00308     return SetNamespace(pDocument, pElement, X(rNamespace));
00309 }
00310 
00311 
00312 std::vector<xercesc::DOMElement*> XmlTools::GetChildElements(xercesc::DOMElement* pElement)
00313 {
00314     std::vector<xercesc::DOMElement*> children;
00315     for (xercesc::DOMNode* p_node = pElement->getFirstChild();
00316          p_node != NULL;
00317          p_node = p_node->getNextSibling())
00318     {
00319         if (p_node->getNodeType() == xercesc::DOMNode::ELEMENT_NODE)
00320         {
00321             children.push_back(static_cast<xercesc::DOMElement*>(p_node));
00322         }
00323     }
00324     return children;
00325 }
00326 
00327 
00328 void XmlTools::FindElements(xercesc::DOMElement* pContextElement,
00329                             const std::vector<std::string>& rNames,
00330                             std::vector<xercesc::DOMElement*>& rResults,
00331                             unsigned depth)
00332 {
00333     xercesc::DOMNodeList* p_child_elts = pContextElement->getElementsByTagName(X(rNames[depth]));
00334     unsigned num_children = p_child_elts->getLength();
00335     for (unsigned i=0; i<num_children; i++)
00336     {
00337         xercesc::DOMElement* p_child_elt = static_cast<xercesc::DOMElement*>(p_child_elts->item(i));
00338         if (depth == rNames.size() - 1)
00339         {
00340             rResults.push_back(p_child_elt);
00341         }
00342         else
00343         {
00344             FindElements(p_child_elt, rNames, rResults, depth+1);
00345         }
00346     }
00347 }
00348 
00349 std::vector<xercesc::DOMElement*> XmlTools::FindElements(xercesc::DOMElement* pContextElement,
00350                                                          const std::string& rPath)
00351 {
00352     std::vector<xercesc::DOMElement*> results;
00353     std::vector<std::string> path;
00354     size_t start_pos = 0;
00355     size_t slash_pos = 0;
00356     while (slash_pos != std::string::npos)
00357     {
00358         slash_pos = rPath.find('/', start_pos);
00359         if (slash_pos == std::string::npos)
00360         {
00361             path.push_back(rPath.substr(start_pos));
00362         }
00363         else
00364         {
00365             path.push_back(rPath.substr(start_pos, slash_pos-start_pos));
00366         }
00367         start_pos = slash_pos + 1;
00368     }
00369     FindElements(pContextElement, path, results);
00370     return results;
00371 }
00372 
00373 
00374 void XmlTools::WrapContentInElement(xercesc::DOMDocument* pDocument,
00375                                     xercesc::DOMElement* pElement,
00376                                     const XMLCh* pNewElementLocalName)
00377 {
00378     const XMLCh* p_namespace_uri = pElement->getNamespaceURI();
00379     const XMLCh* p_prefix = pElement->getPrefix();
00380     const XMLCh* p_qualified_name;
00381     if (p_prefix)
00382     {
00383 #define COVERAGE_IGNORE
00384         // We can't actually cover this code, since versions of the parameters file which need this
00385         // transform didn't use a namespace, so can't have a namespace prefix!
00386         xercesc::QName qname(p_prefix, pNewElementLocalName, 0);
00387         p_qualified_name = qname.getRawName();
00388 #undef COVERAGE_IGNORE
00389     }
00390     else
00391     {
00392         p_qualified_name = pNewElementLocalName;
00393     }
00394     xercesc::DOMElement* p_wrapper_elt = pDocument->createElementNS(p_namespace_uri, p_qualified_name);
00395     // Move all child nodes of pElement to be children of p_wrapper_elt
00396     xercesc::DOMNodeList* p_children = pElement->getChildNodes();
00397     for (unsigned i=0; i<p_children->getLength(); i++)
00398     {
00399         xercesc::DOMNode* p_child = pElement->removeChild(p_children->item(i));
00400         p_wrapper_elt->appendChild(p_child);
00401     }
00402     // Add the wrapper as the sole child of pElement
00403     pElement->appendChild(p_wrapper_elt);
00404 }
00405 
00406 
00407 std::string XmlTools::EscapeSpaces(const std::string& rPath)
00408 {
00409     std::string escaped_path;
00410     for (std::string::const_iterator it = rPath.begin(); it != rPath.end(); ++it)
00411     {
00412         if (*it == ' ')
00413         {
00414             escaped_path += "%20";
00415         }
00416         else
00417         {
00418             escaped_path += *it;
00419         }
00420     }
00421     return escaped_path;
00422 }

Generated on Tue May 31 14:31:41 2011 for Chaste by  doxygen 1.5.5