XmlTools.cpp

00001 /*
00002 
00003 Copyright (C) University of Oxford, 2005-2011
00004 
00005 University of Oxford means the Chancellor, Masters and Scholars of the
00006 University of Oxford, having an administrative office at Wellington
00007 Square, Oxford OX1 2JD, UK.
00008 
00009 This file is part of Chaste.
00010 
00011 Chaste is free software: you can redistribute it and/or modify it
00012 under the terms of the GNU Lesser General Public License as published
00013 by the Free Software Foundation, either version 2.1 of the License, or
00014 (at your option) any later version.
00015 
00016 Chaste is distributed in the hope that it will be useful, but WITHOUT
00017 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00018 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
00019 License for more details. The offer of Chaste under the terms of the
00020 License is subject to the License being interpreted in accordance with
00021 English Law and subject to any action against the University of Oxford
00022 being under the jurisdiction of the English Courts.
00023 
00024 You should have received a copy of the GNU Lesser General Public License
00025 along with Chaste. If not, see <http://www.gnu.org/licenses/>.
00026 
00027 */
00028 
00029 #include "XmlTools.hpp"
00030 
00031 #include <iostream>
00032 
00033 #include <xercesc/dom/DOM.hpp>
00034 #include <xercesc/util/PlatformUtils.hpp>
00035 #include <xercesc/util/QName.hpp>
00036 #include <xercesc/util/XMLUniDefs.hpp> // chLatin_*
00037 #include <xercesc/framework/Wrapper4InputSource.hpp>
00038 #include <xercesc/validators/common/Grammar.hpp>
00039 
00040 #include <xsd/cxx/xml/sax/std-input-source.hxx>
00041 #include <xsd/cxx/xml/dom/bits/error-handler-proxy.hxx>
00042 #include <xsd/cxx/tree/exceptions.hxx>
00043 
00044 #include "Exception.hpp"
00045 
00046 
00047 
00048 xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument> XmlTools::ReadXmlFile(
00049     const std::string& rFileName,
00050     const ::xsd::cxx::tree::properties<char>& rProps,
00051     bool validate)
00052 {
00053     xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument> p_doc;
00054     try
00055     {
00056         // Initialise Xerces
00057         xercesc::XMLPlatformUtils::Initialize();
00058         // Set up an error handler
00059         ::xsd::cxx::tree::error_handler<char> error_handler;
00060         // Parse XML to DOM
00061         p_doc = XmlTools::ReadFileToDomDocument(rFileName, error_handler, rProps, validate);
00062         // Any errors?
00063         error_handler.throw_if_failed< ::xsd::cxx::tree::parsing<char> >();
00064     }
00065     catch (const ::xsd::cxx::tree::parsing<char>& e)
00066     {
00067         Finalize();
00068         // Test for missing schema/xml file
00069 #if (XSD_INT_VERSION >= 3000000L)
00070         const ::xsd::cxx::tree::diagnostics<char>& diags = e.diagnostics();
00071         const ::xsd::cxx::tree::error<char>& first_error = diags[0];
00072 #else
00073         const ::xsd::cxx::tree::errors<char>& errors = e.errors();
00074         const ::xsd::cxx::tree::error<char>& first_error = errors[0];
00075 #endif
00076         if (first_error.line() == 0u)
00077         {
00078             std::cerr << first_error << std::endl;
00079             EXCEPTION("Missing file parsing configuration file: " + rFileName);
00080         }
00081         else
00082         {
00083             std::cerr << e << std::endl;
00084             EXCEPTION("XML parsing error in configuration file: " + rFileName);
00085         }
00086     }
00087 #define COVERAGE_IGNORE
00088     catch (...)
00089     { // This shouldn't happen, but just in case...
00090         Finalize();
00091         throw;
00092     }
00093 #undef COVERAGE_IGNORE
00094     return p_doc;
00095 }
00096 
00097 
00098 void XmlTools::Finalize()
00099 {
00100     xercesc::XMLPlatformUtils::Terminate();
00101 }
00102 
00103 XmlTools::Finalizer::Finalizer(bool init)
00104 {
00105     // The init=true case will very rarely be used, but a parameter to the constructor is needed
00106     // to stop some compilers complaining about an unused variable!
00107     if (init)
00108     {
00109 #define COVERAGE_IGNORE
00110         xercesc::XMLPlatformUtils::Initialize();
00111 #undef COVERAGE_IGNORE
00112     }
00113 }
00114 
00115 XmlTools::Finalizer::~Finalizer()
00116 {
00117     XmlTools::Finalize();
00118 }
00119 
00120 xsd::cxx::xml::dom::auto_ptr<xercesc::DOMDocument> XmlTools::ReadFileToDomDocument(
00121         const std::string& rFileName,
00122         ::xsd::cxx::xml::error_handler<char>& rErrorHandler,
00123         const ::xsd::cxx::tree::properties<char>& rProps,
00124         bool validate)
00125 {
00126     using namespace xercesc;
00127     namespace xml = xsd::cxx::xml;
00128 
00129     // Get an implementation of the Load-Store (LS) interface.
00130     const XMLCh ls_id [] = {chLatin_L, chLatin_S, chNull};
00131     DOMImplementation* p_impl(DOMImplementationRegistry::getDOMImplementation(ls_id));
00132 
00133 #if _XERCES_VERSION >= 30000
00134     // Xerces-C++ 3.0.0 and later.
00135     xml::dom::auto_ptr<DOMLSParser> p_parser(p_impl->createLSParser(DOMImplementationLS::MODE_SYNCHRONOUS, 0));
00136     DOMConfiguration* p_conf(p_parser->getDomConfig());
00137 
00138     // Discard comment nodes in the document.
00139     p_conf->setParameter(XMLUni::fgDOMComments, false);
00140 
00141     // Enable datatype normalization.
00142     p_conf->setParameter(XMLUni::fgDOMDatatypeNormalization, true);
00143 
00144     // Do not create EntityReference nodes in the DOM tree.  No
00145     // EntityReference nodes will be created, only the nodes
00146     // corresponding to their fully expanded substitution text
00147     // will be created.
00148     p_conf->setParameter(XMLUni::fgDOMEntities, false);
00149 
00150     // Perform namespace processing.
00151     p_conf->setParameter(XMLUni::fgDOMNamespaces, true);
00152 
00153     // Do not include ignorable whitespace in the DOM tree.
00154     p_conf->setParameter(XMLUni::fgDOMElementContentWhitespace, false);
00155 
00156     // Enable validation.
00157     if (validate)
00158     {
00159         p_conf->setParameter(XMLUni::fgDOMValidate, true);
00160         p_conf->setParameter(XMLUni::fgXercesSchema, true);
00161         p_conf->setParameter(XMLUni::fgXercesSchemaFullChecking, false);
00162         // Code taken from xsd/cxx/xml/dom/parsing-source.txx
00163         if (!rProps.schema_location().empty())
00164         {
00165             xml::string locn(rProps.schema_location());
00166             const void* p_locn(locn.c_str());
00167             p_conf->setParameter(XMLUni::fgXercesSchemaExternalSchemaLocation,
00168                                  const_cast<void*>(p_locn));
00169         }
00170         if (!rProps.no_namespace_schema_location().empty())
00171         {
00172             xml::string locn(rProps.no_namespace_schema_location());
00173             const void* p_locn(locn.c_str());
00174 
00175             p_conf->setParameter(XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation,
00176                                  const_cast<void*>(p_locn));
00177         }
00178     }
00179     else
00180     {
00181         // This branch is only used by projects
00182 #define COVERAGE_IGNORE
00183         p_conf->setParameter(XMLUni::fgDOMValidate, false);
00184         p_conf->setParameter(XMLUni::fgXercesSchema, false);
00185         p_conf->setParameter(XMLUni::fgXercesSchemaFullChecking, false);
00186 #undef COVERAGE_IGNORE
00187     }
00188 
00189     // We will release the DOM document ourselves.
00190     p_conf->setParameter(XMLUni::fgXercesUserAdoptsDOMDocument, true);
00191 
00192     // Set error handler.
00193     xml::dom::bits::error_handler_proxy<char> ehp(rErrorHandler);
00194     p_conf->setParameter(XMLUni::fgDOMErrorHandler, &ehp);
00195 
00196 #else // _XERCES_VERSION >= 30000
00197     // Same as above but for Xerces-C++ 2 series.
00198     xml::dom::auto_ptr<DOMBuilder> p_parser(p_impl->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS, 0));
00199 
00200     p_parser->setFeature(XMLUni::fgDOMComments, false);
00201     p_parser->setFeature(XMLUni::fgDOMDatatypeNormalization, true);
00202     p_parser->setFeature(XMLUni::fgDOMEntities, false);
00203     p_parser->setFeature(XMLUni::fgDOMNamespaces, true);
00204     p_parser->setFeature(XMLUni::fgDOMWhitespaceInElementContent, false);
00205     p_parser->setFeature(XMLUni::fgXercesUserAdoptsDOMDocument, true);
00206 
00207     // Code taken from xsd/cxx/xml/dom/parsing-source.txx
00208     if (validate)
00209     {
00210         p_parser->setFeature(XMLUni::fgDOMValidation, true);
00211         p_parser->setFeature(XMLUni::fgXercesSchema, true);
00212         p_parser->setFeature(XMLUni::fgXercesSchemaFullChecking, false);
00213         if (!rProps.schema_location().empty())
00214         {
00215             xml::string locn(rProps.schema_location());
00216             const void* p_locn(locn.c_str());
00217             p_parser->setProperty(XMLUni::fgXercesSchemaExternalSchemaLocation,
00218                                   const_cast<void*>(p_locn));
00219         }
00220 
00221         if (!rProps.no_namespace_schema_location().empty())
00222         {
00223             xml::string locn(rProps.no_namespace_schema_location());
00224             const void* p_locn(locn.c_str());
00225 
00226             p_parser->setProperty(XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation,
00227                                   const_cast<void*>(p_locn));
00228         }
00229     }
00230     else
00231     {
00232         // This branch is only used by projects
00233 #define COVERAGE_IGNORE
00234         p_parser->setFeature(XMLUni::fgDOMValidation, false);
00235         p_parser->setFeature(XMLUni::fgXercesSchema, false);
00236         p_parser->setFeature(XMLUni::fgXercesSchemaFullChecking, false);
00237 #undef COVERAGE_IGNORE
00238     }
00239 
00240     xml::dom::bits::error_handler_proxy<char> ehp(rErrorHandler);
00241     p_parser->setErrorHandler(&ehp);
00242 
00243 #endif // _XERCES_VERSION >= 30000
00244 
00245     // Do the parse
00246     xml::dom::auto_ptr<DOMDocument> p_doc(p_parser->parseURI(rFileName.c_str()));
00247 
00248     if (ehp.failed())
00249     {
00250         p_doc.reset();
00251     }
00252 
00253     return p_doc;
00254 }
00255 
00256 #define COVERAGE_IGNORE
00257 void XmlTools::PrintNode(const std::string& rMsg, xercesc::DOMNode* pNode, bool showChildren)
00258 {
00259     std::string prefix = X2C(pNode->getPrefix());
00260     std::string name = X2C(pNode->getLocalName());
00261     std::string nsuri = X2C(pNode->getNamespaceURI());
00262     std::cout << rMsg << " " << pNode << " " << prefix << ":" << name << " in " << nsuri << std::endl;
00263     if (showChildren)
00264     {
00265         for (xercesc::DOMNode* p_node = pNode->getFirstChild();
00266              p_node != NULL;
00267              p_node = p_node->getNextSibling())
00268         {
00269             std::cout << "     child type " << p_node->getNodeType();
00270             PrintNode("", p_node, false);
00271         }
00272         xercesc::DOMNamedNodeMap* p_attrs = pNode->getAttributes();
00273         if (p_attrs)
00274         {
00275             for (XMLSize_t i=0; i<p_attrs->getLength(); i++)
00276             {
00277                  xercesc::DOMNode* p_attr = p_attrs->item(i);
00278                  std::string value = X2C(p_attr->getNodeValue());
00279                  PrintNode("     attr (" + value + ")", p_attr, false);
00280             }
00281         }
00282     }
00283 }
00284 #undef COVERAGE_IGNORE
00285 
00286 xercesc::DOMElement* XmlTools::SetNamespace(xercesc::DOMDocument* pDocument,
00287                                             xercesc::DOMElement* pElement,
00288                                             const XMLCh* pNamespace)
00289 {
00290     using namespace xercesc;
00291 
00292     //PrintNode("Renaming", pElement, true);
00293     DOMNamedNodeMap* p_orig_attrs = pElement->getAttributes();
00294     std::vector<std::string> attr_values;
00295     if (p_orig_attrs)
00296     {
00297         for (XMLSize_t i=0; i<p_orig_attrs->getLength(); i++)
00298         {
00299             DOMNode* p_attr = p_orig_attrs->item(i);
00300             attr_values.push_back(X2C(p_attr->getNodeValue()));
00301         }
00302     }
00303     DOMElement* p_new_elt = static_cast<DOMElement*>(
00304         pDocument->renameNode(pElement, pNamespace, pElement->getLocalName()));
00305     //PrintNode("   to", p_new_elt, true);
00306     // Fix attributes - some get broken by the rename!
00307     if (p_orig_attrs)
00308     {
00309         DOMNamedNodeMap* p_new_attrs = p_new_elt->getAttributes();
00310         assert(p_new_attrs);
00311         assert(p_new_attrs == p_orig_attrs);
00312         assert(p_new_attrs->getLength() == attr_values.size());
00313         for (XMLSize_t i=0; i<p_new_attrs->getLength(); i++)
00314         {
00315             DOMNode* p_attr = p_new_attrs->item(i);
00316             p_attr->setNodeValue(X(attr_values[i]));
00317         }
00318     }
00319     //PrintNode("   after attr fix", p_new_elt, true);
00320 
00321     std::vector<DOMElement*> children = GetChildElements(p_new_elt);
00322     for (std::vector<DOMElement*>::iterator it = children.begin(); it != children.end(); ++it)
00323     {
00324         SetNamespace(pDocument, *it, pNamespace);
00325     }
00326 
00327     return p_new_elt;
00328 }
00329 
00330 xercesc::DOMElement* XmlTools::SetNamespace(xercesc::DOMDocument* pDocument,
00331                                             xercesc::DOMElement* pElement,
00332                                             const std::string& rNamespace)
00333 {
00334     return SetNamespace(pDocument, pElement, X(rNamespace));
00335 }
00336 
00337 
00338 std::vector<xercesc::DOMElement*> XmlTools::GetChildElements(xercesc::DOMElement* pElement)
00339 {
00340     std::vector<xercesc::DOMElement*> children;
00341     for (xercesc::DOMNode* p_node = pElement->getFirstChild();
00342          p_node != NULL;
00343          p_node = p_node->getNextSibling())
00344     {
00345         if (p_node->getNodeType() == xercesc::DOMNode::ELEMENT_NODE)
00346         {
00347             children.push_back(static_cast<xercesc::DOMElement*>(p_node));
00348         }
00349     }
00350     return children;
00351 }
00352 
00353 
00354 void XmlTools::FindElements(xercesc::DOMElement* pContextElement,
00355                             const std::vector<std::string>& rNames,
00356                             std::vector<xercesc::DOMElement*>& rResults,
00357                             unsigned depth)
00358 {
00359     xercesc::DOMNodeList* p_child_elts = pContextElement->getElementsByTagName(X(rNames[depth]));
00360     unsigned num_children = p_child_elts->getLength();
00361     for (unsigned i=0; i<num_children; i++)
00362     {
00363         xercesc::DOMElement* p_child_elt = static_cast<xercesc::DOMElement*>(p_child_elts->item(i));
00364         if (depth == rNames.size() - 1)
00365         {
00366             rResults.push_back(p_child_elt);
00367         }
00368         else
00369         {
00370             FindElements(p_child_elt, rNames, rResults, depth+1);
00371         }
00372     }
00373 }
00374 
00375 std::vector<xercesc::DOMElement*> XmlTools::FindElements(xercesc::DOMElement* pContextElement,
00376                                                          const std::string& rPath)
00377 {
00378     std::vector<xercesc::DOMElement*> results;
00379     std::vector<std::string> path;
00380     size_t start_pos = 0;
00381     size_t slash_pos = 0;
00382     while (slash_pos != std::string::npos)
00383     {
00384         slash_pos = rPath.find('/', start_pos);
00385         if (slash_pos == std::string::npos)
00386         {
00387             path.push_back(rPath.substr(start_pos));
00388         }
00389         else
00390         {
00391             path.push_back(rPath.substr(start_pos, slash_pos-start_pos));
00392         }
00393         start_pos = slash_pos + 1;
00394     }
00395     FindElements(pContextElement, path, results);
00396     return results;
00397 }
00398 
00399 
00400 void XmlTools::WrapContentInElement(xercesc::DOMDocument* pDocument,
00401                                     xercesc::DOMElement* pElement,
00402                                     const XMLCh* pNewElementLocalName)
00403 {
00404     const XMLCh* p_namespace_uri = pElement->getNamespaceURI();
00405     const XMLCh* p_prefix = pElement->getPrefix();
00406     const XMLCh* p_qualified_name;
00407     if (p_prefix)
00408     {
00409 #define COVERAGE_IGNORE
00410         // We can't actually cover this code, since versions of the parameters file which need this
00411         // transform didn't use a namespace, so can't have a namespace prefix!
00412         xercesc::QName qname(p_prefix, pNewElementLocalName, 0);
00413         p_qualified_name = qname.getRawName();
00414 #undef COVERAGE_IGNORE
00415     }
00416     else
00417     {
00418         p_qualified_name = pNewElementLocalName;
00419     }
00420     xercesc::DOMElement* p_wrapper_elt = pDocument->createElementNS(p_namespace_uri, p_qualified_name);
00421     // Move all child nodes of pElement to be children of p_wrapper_elt
00422     xercesc::DOMNodeList* p_children = pElement->getChildNodes();
00423     for (unsigned i=0; i<p_children->getLength(); i++)
00424     {
00425         xercesc::DOMNode* p_child = pElement->removeChild(p_children->item(i));
00426         p_wrapper_elt->appendChild(p_child);
00427     }
00428     // Add the wrapper as the sole child of pElement
00429     pElement->appendChild(p_wrapper_elt);
00430 }
00431 
00432 
00433 std::string XmlTools::EscapeSpaces(const std::string& rPath)
00434 {
00435     std::string escaped_path;
00436     for (std::string::const_iterator it = rPath.begin(); it != rPath.end(); ++it)
00437     {
00438         if (*it == ' ')
00439         {
00440             escaped_path += "%20";
00441         }
00442         else
00443         {
00444             escaped_path += *it;
00445         }
00446     }
00447     return escaped_path;
00448 }
Generated on Thu Dec 22 13:00:06 2011 for Chaste by  doxygen 1.6.3