00001 #ifndef Impala_Persistency_XmlFileReader_h
00002 #define Impala_Persistency_XmlFileReader_h
00003
00004
00005
00006
00007
00008 #include <string>
00009 #include <vector>
00010 #include <deque>
00011 #include <fstream>
00012 #include <assert.h>
00013
00014 #include <xercesc/util/XMLString.hpp>
00015 #include <xercesc/util/PlatformUtils.hpp>
00016 #include <xercesc/dom/DOMException.hpp>
00017 #include <xercesc/util/OutOfMemoryException.hpp>
00018 #include <xercesc/dom/DOM.hpp>
00019 #include <xercesc/parsers/XercesDOMParser.hpp>
00020 #include <xercesc/framework/MemBufInputSource.hpp>
00021 #include <xercesc/framework/Wrapper4InputSource.hpp>
00022
00023 #include "Util/IOBufferFile.h"
00024 #include "Persistency/DOMTreeErrorReporter.h"
00025
00026
00027 namespace Impala
00028 {
00029 namespace Persistency
00030 {
00031
00032 class XmlFileReader
00033 {
00034
00035 public:
00036 typedef XERCES_CPP_NAMESPACE_QUALIFIER DOMDocument DOMDocument;
00037
00038 XmlFileReader()
00039 {
00040 mParser = 0;
00041 }
00042
00043 virtual ~XmlFileReader()
00044 {
00045 FinalizeXerces();
00046 }
00047
00052 DOMDocument*
00053 Read(const std::string& filePath, bool fileRequired = true)
00054 {
00055 if (!FileExists(filePath))
00056 if (fileRequired)
00057 throw std::runtime_error("file missing: " + filePath);
00058 else
00059 return 0;
00060
00061 if (!ValidateFile(filePath))
00062 throw std::runtime_error("validation failed for file: " + filePath);
00063
00064 Util::IOBufferFile ioBuf(filePath, true, true);
00065
00066 return Read(filePath, &ioBuf);
00067 }
00068
00073 DOMDocument*
00074 Read(const std::string& filePath, Util::IOBuffer* ioBuf)
00075 {
00076 ClearParser();
00077
00078 if (!InitializeXerces())
00079 throw std::runtime_error("xerces initialization failed");
00080
00081 DOMDocument* domDoc = ReadDomDoc(filePath, ioBuf);
00082 return domDoc;
00083 }
00084
00085 virtual bool FileExists(const std::string& address)
00086 {
00087 std::ifstream xmlFile(address.c_str());
00088 if (xmlFile.is_open())
00089 {
00090 xmlFile.close();
00091 return true;
00092 }
00093 return false;
00094 }
00095
00096
00097 virtual bool ValidateFile(const std::string& address)
00098 {
00099 return true;
00100 }
00101
00102 static std::vector<XERCES_CPP_NAMESPACE_QUALIFIER DOMNode*> FindChildNodesByName(const XERCES_CPP_NAMESPACE_QUALIFIER DOMNode* const node, const char* const tagName)
00103 {
00104 XERCES_CPP_NAMESPACE_QUALIFIER DOMNodeList *children = node->getChildNodes();
00105 XMLCh *xercesTagName = XERCES_CPP_NAMESPACE_QUALIFIER XMLString::transcode(tagName);
00106 std::vector<XERCES_CPP_NAMESPACE_QUALIFIER DOMNode*> result;
00107 for (XMLSize_t i = 0; i < children->getLength(); i++)
00108 {
00109 XERCES_CPP_NAMESPACE_QUALIFIER DOMNode* child = children->item(i);
00110 const XMLCh *xChildName = child->getNodeName();
00111 if (XERCES_CPP_NAMESPACE_QUALIFIER XMLString::equals(child->getNodeName(), xercesTagName))
00112 result.push_back(child);
00113 }
00114 XERCES_CPP_NAMESPACE_QUALIFIER XMLString::release(&xercesTagName);
00115 return result;
00116 }
00117
00118
00119 static XERCES_CPP_NAMESPACE_QUALIFIER DOMNodeList *FindElementsByName(const XERCES_CPP_NAMESPACE_QUALIFIER DOMDocument* const doc, const char* const tagName)
00120 {
00121 XMLCh *xercesTagName = XERCES_CPP_NAMESPACE_QUALIFIER XMLString::transcode(tagName);
00122 XERCES_CPP_NAMESPACE_QUALIFIER DOMNodeList *elements = doc->getElementsByTagName(xercesTagName);
00123 XERCES_CPP_NAMESPACE_QUALIFIER XMLString::release(&xercesTagName);
00124 return elements;
00125 }
00126
00127 static std::string GetAttributeValue(const XERCES_CPP_NAMESPACE_QUALIFIER DOMNamedNodeMap* const attributes, const char* const attributeName)
00128 {
00129 XMLCh *xercesAttribName = XERCES_CPP_NAMESPACE_QUALIFIER XMLString::transcode(attributeName);
00130 XERCES_CPP_NAMESPACE_QUALIFIER DOMNode *attrib = attributes->getNamedItem(xercesAttribName);
00131 XERCES_CPP_NAMESPACE_QUALIFIER XMLString::release(&xercesAttribName);
00132 return (attrib == 0) ? "" : GetNodeValue(attrib);
00133 }
00134
00135 private:
00136
00137 XERCES_CPP_NAMESPACE_QUALIFIER XercesDOMParser *mParser;
00138
00139 bool InitializeXerces()
00140 {
00141
00142 try
00143 {
00144 XERCES_CPP_NAMESPACE_QUALIFIER XMLPlatformUtils::Initialize();
00145 }
00146
00147 catch (const XERCES_CPP_NAMESPACE_QUALIFIER XMLException &)
00148 {
00149
00150
00151
00152 return false;
00153 }
00154
00155 return true;
00156 }
00157
00158 void ClearParser()
00159 {
00160 if (mParser != 0)
00161 {
00162 delete mParser;
00163 mParser = 0;
00164 }
00165 }
00166
00167 DOMDocument*
00168 ReadDomDoc(const std::string& filePath, Util::IOBuffer* ioBuf = 0)
00169 {
00170
00171
00172
00173
00174
00175 mParser = new XERCES_CPP_NAMESPACE_QUALIFIER XercesDOMParser;
00176 mParser->setValidationScheme(XERCES_CPP_NAMESPACE_QUALIFIER XercesDOMParser::Val_Auto);
00177 mParser->setDoNamespaces(false);
00178 mParser->setDoSchema(false);
00179 mParser->setValidationSchemaFullChecking(false);
00180 mParser->setCreateEntityReferenceNodes(false);
00181
00182 DOMTreeErrorReporter *errReporter = new DOMTreeErrorReporter();
00183 mParser->setErrorHandler(errReporter);
00184
00185
00186
00187
00188
00189 bool errorsOccured = false;
00190
00191 typedef XERCES_CPP_NAMESPACE_QUALIFIER MemBufInputSource MemBufInputSource;
00192 MemBufInputSource memBuf(ioBuf->GetBuffer(), ioBuf->Size(),
00193 filePath.c_str(), false);
00194 try
00195 {
00196 mParser->parse(memBuf);
00197
00198 }
00199
00200 catch (const XERCES_CPP_NAMESPACE_QUALIFIER OutOfMemoryException&)
00201 {
00202
00203 std::cout << "OutOfMemoryException!!!" << std::endl;
00204 errorsOccured = true;
00205 }
00206
00207 catch (const XERCES_CPP_NAMESPACE_QUALIFIER XMLException& )
00208 {
00209
00210
00211 errorsOccured = true;
00212 }
00213
00214 catch (const XERCES_CPP_NAMESPACE_QUALIFIER DOMException& e)
00215 {
00216
00217 std::cout << "\nDOM Error during parsing: '" << filePath << "'\n"
00218 << "DOMException code is: " << e.code << std::endl;
00219
00220
00221
00222
00223
00224
00225 errorsOccured = true;
00226 }
00227
00228 catch (...)
00229 {
00230 std::cout << "An error occurred during parsing\n " << std::endl;
00231 errorsOccured = true;
00232 }
00233
00234
00235 XERCES_CPP_NAMESPACE_QUALIFIER DOMDocument *repositoryDoc = 0;
00236 if (!errorsOccured && !errReporter->getSawErrors())
00237 {
00238 repositoryDoc = mParser->getDocument();
00239 }
00240
00241
00242
00243
00244
00245
00246 delete errReporter;
00247
00248 return repositoryDoc;
00249 }
00250
00251 void FinalizeXerces()
00252 {
00253 ClearParser();
00254 XERCES_CPP_NAMESPACE_QUALIFIER XMLPlatformUtils::Terminate();
00255 }
00256
00257 static std::string GetNodeValue(const XERCES_CPP_NAMESPACE_QUALIFIER DOMNode* const node)
00258 {
00259 const XMLCh* valueX = node->getNodeValue();
00260 char* valueC = XERCES_CPP_NAMESPACE_QUALIFIER XMLString::transcode(valueX);
00261 const std::string value = std::string(valueC);
00262 XERCES_CPP_NAMESPACE_QUALIFIER XMLString::release(&valueC);
00263 return value;
00264 }
00265
00266 };
00267
00268 }
00269 }
00270
00271 #endif //