Home || Architecture || Video Search || Visual Search || Scripts || Applications || Important Messages || OGL || Src

XmlFileReader.h

Go to the documentation of this file.
00001 #ifndef Impala_Persistency_XmlFileReader_h
00002 #define Impala_Persistency_XmlFileReader_h
00003 
00004 // 
00005 // Convenience class, wrapping an Xerces XML DOM parser.
00006 // 
00007 
00008 #include <string>
00009 #include <vector>
00010 #include <deque>
00011 #include <fstream>
00012 #include <assert.h>
00013 
00014 #include <xercesc/util/XMLString.hpp>
00015 #include <xercesc/util/PlatformUtils.hpp>
00016 #include <xercesc/dom/DOMException.hpp>
00017 #include <xercesc/util/OutOfMemoryException.hpp>
00018 #include <xercesc/dom/DOM.hpp>
00019 #include <xercesc/parsers/XercesDOMParser.hpp>
00020 #include <xercesc/framework/MemBufInputSource.hpp>
00021 #include <xercesc/framework/Wrapper4InputSource.hpp>
00022 
00023 #include "Util/IOBufferFile.h"
00024 #include "Persistency/DOMTreeErrorReporter.h"
00025 
00026 
00027 namespace Impala
00028 {
00029 namespace Persistency
00030 {
00031 
00032 class XmlFileReader
00033 {
00034 
00035 public:
00036     typedef XERCES_CPP_NAMESPACE_QUALIFIER DOMDocument DOMDocument;
00037 
00038     XmlFileReader()
00039     {
00040         mParser = 0;
00041     }
00042 
00043     virtual ~XmlFileReader()
00044     {
00045         FinalizeXerces();
00046     }
00047 
00052     DOMDocument*
00053     Read(const std::string& filePath, bool fileRequired = true)
00054     {
00055         if (!FileExists(filePath)) 
00056             if (fileRequired)
00057                 throw std::runtime_error("file missing: " + filePath);
00058             else
00059                 return 0;
00060 
00061         if (!ValidateFile(filePath)) 
00062             throw std::runtime_error("validation failed for file: " + filePath);
00063 
00064         Util::IOBufferFile ioBuf(filePath, true, true);
00065 
00066         return Read(filePath, &ioBuf);
00067     }
00068 
00073     DOMDocument*
00074     Read(const std::string& filePath, Util::IOBuffer* ioBuf)
00075     {
00076         ClearParser(); // clear in case the reader is used more than once
00077 
00078         if (!InitializeXerces()) 
00079             throw std::runtime_error("xerces initialization failed");
00080 
00081         DOMDocument* domDoc = ReadDomDoc(filePath, ioBuf);
00082         return domDoc;
00083     }
00084 
00085     virtual bool FileExists(const std::string& address)
00086     {
00087         std::ifstream xmlFile(address.c_str());
00088         if (xmlFile.is_open())
00089         {
00090             xmlFile.close();
00091             return true;
00092         }
00093         return false;
00094     }
00095 
00096     // is it readable, writable, the right format etc.
00097     virtual bool ValidateFile(const std::string& address)
00098     {
00099         return true;
00100     }
00101 
00102     static std::vector<XERCES_CPP_NAMESPACE_QUALIFIER DOMNode*> FindChildNodesByName(const XERCES_CPP_NAMESPACE_QUALIFIER DOMNode* const node, const char* const tagName)
00103     {
00104         XERCES_CPP_NAMESPACE_QUALIFIER DOMNodeList *children = node->getChildNodes();
00105         XMLCh *xercesTagName = XERCES_CPP_NAMESPACE_QUALIFIER XMLString::transcode(tagName);
00106         std::vector<XERCES_CPP_NAMESPACE_QUALIFIER DOMNode*> result;
00107         for (XMLSize_t i = 0; i < children->getLength(); i++)
00108         {
00109             XERCES_CPP_NAMESPACE_QUALIFIER DOMNode* child = children->item(i);
00110             const XMLCh *xChildName = child->getNodeName();
00111             if (XERCES_CPP_NAMESPACE_QUALIFIER XMLString::equals(child->getNodeName(), xercesTagName))
00112                 result.push_back(child);
00113         }
00114         XERCES_CPP_NAMESPACE_QUALIFIER XMLString::release(&xercesTagName);
00115         return result;
00116     }
00117 
00118     // recursive search
00119     static XERCES_CPP_NAMESPACE_QUALIFIER DOMNodeList *FindElementsByName(const XERCES_CPP_NAMESPACE_QUALIFIER DOMDocument* const doc, const char* const tagName)
00120     {
00121         XMLCh *xercesTagName = XERCES_CPP_NAMESPACE_QUALIFIER XMLString::transcode(tagName);
00122         XERCES_CPP_NAMESPACE_QUALIFIER DOMNodeList *elements = doc->getElementsByTagName(xercesTagName);
00123         XERCES_CPP_NAMESPACE_QUALIFIER XMLString::release(&xercesTagName);
00124         return elements;
00125     }
00126 
00127     static std::string GetAttributeValue(const XERCES_CPP_NAMESPACE_QUALIFIER DOMNamedNodeMap* const attributes, const char* const attributeName)
00128     {
00129         XMLCh *xercesAttribName = XERCES_CPP_NAMESPACE_QUALIFIER XMLString::transcode(attributeName);
00130         XERCES_CPP_NAMESPACE_QUALIFIER DOMNode *attrib = attributes->getNamedItem(xercesAttribName);
00131         XERCES_CPP_NAMESPACE_QUALIFIER XMLString::release(&xercesAttribName);
00132         return (attrib == 0) ? "" : GetNodeValue(attrib);
00133     }
00134 
00135 private:
00136 
00137     XERCES_CPP_NAMESPACE_QUALIFIER XercesDOMParser *mParser;
00138 
00139     bool InitializeXerces()
00140     {
00141         // Initialize the XML4C2 system
00142         try
00143         {
00144             XERCES_CPP_NAMESPACE_QUALIFIER XMLPlatformUtils::Initialize();
00145         }
00146 
00147         catch (const XERCES_CPP_NAMESPACE_QUALIFIER XMLException &/*toCatch*/)
00148         {
00149 //                std::cout << "Error during Xerces-c Initialization.\n"
00150 //                    << "  Exception message:"
00151 //                    << StrX(toCatch.getMessage()) << std::endl;
00152             return false;
00153         }
00154 
00155         return true;
00156     }
00157 
00158     void ClearParser()
00159     {
00160         if (mParser != 0)
00161         {
00162             delete mParser;
00163             mParser = 0;
00164         }
00165     }
00166 
00167     DOMDocument*
00168     ReadDomDoc(const std::string& filePath, Util::IOBuffer* ioBuf = 0)
00169     {
00170         //
00171         //  Create a parser, then attach an error handler to it.
00172         //  The parser will call back to methods of the ErrorHandler if it
00173         //  discovers errors during the course of parsing the XML document.
00174         //
00175         mParser = new XERCES_CPP_NAMESPACE_QUALIFIER XercesDOMParser;
00176         mParser->setValidationScheme(XERCES_CPP_NAMESPACE_QUALIFIER XercesDOMParser::Val_Auto);
00177         mParser->setDoNamespaces(false);
00178         mParser->setDoSchema(false);
00179         mParser->setValidationSchemaFullChecking(false);
00180         mParser->setCreateEntityReferenceNodes(false);
00181 
00182         DOMTreeErrorReporter *errReporter = new DOMTreeErrorReporter();
00183         mParser->setErrorHandler(errReporter);
00184 
00185         //
00186         //  Parse the XML file, catching any XML exceptions that might propagate
00187         //  out of it.
00188         //
00189         bool errorsOccured = false;
00190 
00191         typedef XERCES_CPP_NAMESPACE_QUALIFIER MemBufInputSource MemBufInputSource;
00192         MemBufInputSource memBuf(ioBuf->GetBuffer(), ioBuf->Size(),
00193                                  filePath.c_str(), false);
00194         try
00195         {
00196             mParser->parse(memBuf);
00197             //mParser->parse(filePath.c_str()); // SK: error handling is niet goed, want geen exception wanneer verkeerd adres.
00198         }
00199 
00200         catch (const XERCES_CPP_NAMESPACE_QUALIFIER OutOfMemoryException&)
00201         {
00202             // log something
00203             std::cout << "OutOfMemoryException!!!" << std::endl;
00204             errorsOccured = true;
00205         }
00206 
00207         catch (const XERCES_CPP_NAMESPACE_QUALIFIER XMLException& /*e*/)
00208         {
00209             // log something
00210 //                std::cout << "XMLException occurred while parsing; message: " << StrX(e.getMessage()) << std::endl;
00211             errorsOccured = true;
00212         }
00213 
00214         catch (const XERCES_CPP_NAMESPACE_QUALIFIER DOMException& e)
00215         {
00216             // log something
00217             std::cout << "\nDOM Error during parsing: '" << filePath << "'\n"
00218                 << "DOMException code is:  " << e.code << std::endl;
00219 
00220 //                const unsigned int maxChars = 2047;
00221 //                XMLCh errText[maxChars + 1];
00222 //                if (DOMImplementation::loadDOMExceptionMsg(e.code, errText, maxChars))
00223 //                    std::cout << "Message is: " << StrX(errText) << std::endl;
00224 
00225             errorsOccured = true;
00226         }
00227 
00228         catch (...)
00229         {
00230             std::cout << "An error occurred during parsing\n " << std::endl;
00231             errorsOccured = true;
00232         }
00233 
00234         // If the parse was successful, get the document data from the DOM tree
00235         XERCES_CPP_NAMESPACE_QUALIFIER DOMDocument *repositoryDoc = 0;
00236         if (!errorsOccured && !errReporter->getSawErrors())
00237         {
00238             repositoryDoc = mParser->getDocument();
00239         }
00240 
00241         //
00242         //  Clean up the error handler. The parser does not adopt handlers
00243         //  since they could be many objects or one object installed for multiple
00244         //  handlers.
00245         //
00246         delete errReporter;
00247 
00248         return repositoryDoc;
00249     }
00250 
00251     void FinalizeXerces()
00252     {
00253         ClearParser();
00254         XERCES_CPP_NAMESPACE_QUALIFIER XMLPlatformUtils::Terminate();
00255     }
00256 
00257     static std::string GetNodeValue(const XERCES_CPP_NAMESPACE_QUALIFIER DOMNode* const node)
00258     {
00259         const XMLCh* valueX = node->getNodeValue();
00260         char* valueC = XERCES_CPP_NAMESPACE_QUALIFIER XMLString::transcode(valueX);
00261         const std::string value = std::string(valueC);
00262         XERCES_CPP_NAMESPACE_QUALIFIER XMLString::release(&valueC);
00263         return value;
00264     }
00265 
00266 }; // class
00267 
00268 }//namespace Persistency
00269 }//namespace Impala
00270 
00271 #endif //

Generated on Fri Mar 19 09:31:45 2010 for ImpalaSrc by  doxygen 1.5.1