Home || Architecture || Video Search || Visual Search || Scripts || Applications || Important Messages || OGL || Src

Sax2Reader.h

Go to the documentation of this file.
00001 #ifndef Impala_Application_SDash_Sax2Reader_h
00002 #define Impala_Application_SDash_Sax2Reader_h
00003 
00004 #include <xercesc/util/PlatformUtils.hpp>
00005 #include <xercesc/sax2/SAX2XMLReader.hpp>
00006 #include <xercesc/sax2/XMLReaderFactory.hpp>
00007 #if defined(XERCES_NEW_IOSTREAMS)
00008 #include <fstream>
00009 #else
00010 #include <fstream.h>
00011 #endif
00012 #include <xercesc/util/OutOfMemoryException.hpp>
00013 
00014 #include "Application/sdash/StrX.h"
00015 #include "Application/sdash/Sax2Handler.h"
00016 
00017 namespace Impala {
00018 namespace Application {
00019 namespace SDash {
00020 
00021 
00022 class Sax2Reader {
00023 
00024 public:
00025 
00026     // SK: I kept the parameters from the main() of a SAX2 sample:
00027 
00028     //    "Usage:"
00029     //        "    [options] <XML file | List file>"
00030     //        "This program invokes the SAX2XMLReader, and then prints the"
00031     //        "number of elements, attributes, spaces and characters found"
00032     //        "in each XML file, using SAX2 API."
00033     //        "Options:"
00034     //        "    -l          Indicate the input file is a List File that has a list of xml files."
00035     //        "                Default to off (Input file is an XML file)."
00036     //        "    -v=xxx      Validation scheme [always | never | auto*]."
00037     //        "    -f          Enable full schema constraint checking processing. Defaults to off."
00038     //        "    -p          Enable namespace-prefixes feature. Defaults to off."
00039     //        "    -n          Disable namespace processing. Defaults to on."
00040     //        "                NOTE: THIS IS OPPOSITE FROM OTHER SAMPLES."
00041     //        "    -s          Disable schema processing. Defaults to on."
00042     //        "                NOTE: THIS IS OPPOSITE FROM OTHER SAMPLES."
00043     //        "    -i          Disable identity constraint checking. Defaults to on."
00044     //        "                NOTE: THIS IS OPPOSITE FROM OTHER SAMPLES."
00045     //        "    -locale=ll_CC specify the locale, default: en_US."
00046     //        "  * = Default if not provided explicitly."
00047 
00048     Sax2Reader(int argC, const char* argV[], Sax2Handler* handler) : mParser(0)
00049     {
00050         mProperlyConstructed = true;
00051 
00052         mValScheme = SAX2XMLReader::Val_Auto;
00053         mDoNamespaces = true;
00054         mDoSchema = true;
00055         mSchemaFullChecking = false;
00056         mIdentityConstraintChecking = true;
00057         mDoList = false;
00058         mNamespacePrefixes = false;
00059         mRecognizeNEL = false;
00060 
00061         memset(mLocaleStr, 0, sizeof mLocaleStr);
00062 
00063         // check the flags
00064         int argInd;
00065         for (argInd = 0; argInd < argC; argInd++)
00066         {
00067             // Break out on first parm not starting with a dash
00068             if (argV[argInd][0] != '-')
00069                 break;
00070 
00071             if (!strncmp(argV[argInd], "-v=", 3)
00072                   ||  !strncmp(argV[argInd], "-V=", 3))
00073             {
00074                 const char* const parm = &argV[argInd][3];
00075 
00076                 if (!strcmp(parm, "never"))
00077                     mValScheme = SAX2XMLReader::Val_Never;
00078                 else if (!strcmp(parm, "auto"))
00079                     mValScheme = SAX2XMLReader::Val_Auto;
00080                 else if (!strcmp(parm, "always"))
00081                     mValScheme = SAX2XMLReader::Val_Always;
00082                 else
00083                 {
00084                     XERCES_STD_QUALIFIER cerr << "Unknown -v= value: " << parm << XERCES_STD_QUALIFIER endl;
00085                     mProperlyConstructed = false;
00086                     return;
00087                 }
00088             }
00089              else if (!strcmp(argV[argInd], "-n")
00090                   ||  !strcmp(argV[argInd], "-N"))
00091             {
00092                 mDoNamespaces = false;
00093             }
00094              else if (!strcmp(argV[argInd], "-s")
00095                   ||  !strcmp(argV[argInd], "-S"))
00096             {
00097                 mDoSchema = false;
00098             }
00099              else if (!strcmp(argV[argInd], "-f")
00100                   ||  !strcmp(argV[argInd], "-F"))
00101             {
00102                 mSchemaFullChecking = true;
00103             }
00104              else if (!strcmp(argV[argInd], "-i")
00105                   ||  !strcmp(argV[argInd], "-I"))
00106             {
00107                 mIdentityConstraintChecking = false;
00108             }
00109              else if (!strcmp(argV[argInd], "-l")
00110                   ||  !strcmp(argV[argInd], "-L"))
00111             {
00112                 mDoList = true;
00113             }
00114              else if (!strcmp(argV[argInd], "-p")
00115                   ||  !strcmp(argV[argInd], "-P"))
00116             {
00117                 mNamespacePrefixes = true;
00118             }
00119              else if (!strcmp(argV[argInd], "-special:nel"))
00120             {
00121                 // turning this on will lead to non-standard compliance behaviour
00122                 // it will recognize the unicode character 0x85 as new line character
00123                 // instead of regular character as specified in XML 1.0
00124                 // do not turn this on unless really necessary
00125                  mRecognizeNEL = true;
00126             }
00127              else if (!strncmp(argV[argInd], "-locale=", 8))
00128             {
00129                  // Get out the end of line
00130                  strncpy(mLocaleStr, &(argV[argInd][8]), sizeof mLocaleStr);
00131             }                   
00132             else
00133             {
00134                 XERCES_STD_QUALIFIER cerr << "Unknown option '" << argV[argInd]
00135                     << "', ignoring it\n" << XERCES_STD_QUALIFIER endl;
00136             }
00137         }
00138 
00139         //
00140         //  There should be one and only one parameter left, and that
00141         //  should be the file name.
00142         //
00143         if (argInd != argC - 1)
00144         {
00145             XERCES_STD_QUALIFIER cerr << "Invalid parameter count to Sax2Reader::Sax2Reader(..)"
00146                 << XERCES_STD_QUALIFIER endl;
00147             mProperlyConstructed = false;
00148             return;
00149         }
00150         mFileToRead = std::string(argV[argInd]);
00151 
00152         // Initialize the XML4C2 system
00153         try
00154         {
00155             if (strlen(mLocaleStr))
00156             {
00157                 XMLPlatformUtils::Initialize(mLocaleStr);
00158             }
00159             else
00160             {
00161                 XMLPlatformUtils::Initialize();
00162             }
00163 
00164             if (mRecognizeNEL)
00165             {
00166                 XMLPlatformUtils::recognizeNEL(mRecognizeNEL);
00167             }
00168         }
00169 
00170         catch (const XMLException& toCatch)
00171         {
00172             XERCES_STD_QUALIFIER cerr << "Error during initialization! Message:\n"
00173                 << StrX(toCatch.getMessage()) << XERCES_STD_QUALIFIER endl;
00174             mProperlyConstructed = false;
00175             return;
00176         }
00177 
00178         //
00179         //  According to what we were told on
00180         //  the command line, set the parser to validate or not.
00181         //
00182         mParser = XMLReaderFactory::createXMLReader();
00183         mParser->setFeature(XMLUni::fgSAX2CoreNameSpaces, mDoNamespaces);
00184         mParser->setFeature(XMLUni::fgXercesSchema, mDoSchema);
00185         mParser->setFeature(XMLUni::fgXercesSchemaFullChecking, mSchemaFullChecking);
00186         mParser->setFeature(XMLUni::fgXercesIdentityConstraintChecking, mIdentityConstraintChecking);
00187         mParser->setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, mNamespacePrefixes);
00188 
00189         if (mValScheme == SAX2XMLReader::Val_Auto)
00190         {
00191             mParser->setFeature(XMLUni::fgSAX2CoreValidation, true);
00192             mParser->setFeature(XMLUni::fgXercesDynamic, true);
00193         }
00194         if (mValScheme == SAX2XMLReader::Val_Never)
00195         {
00196             mParser->setFeature(XMLUni::fgSAX2CoreValidation, false);
00197         }
00198         if (mValScheme == SAX2XMLReader::Val_Always)
00199         {
00200             mParser->setFeature(XMLUni::fgSAX2CoreValidation, true);
00201             mParser->setFeature(XMLUni::fgXercesDynamic, false);
00202         }
00203 
00204         mParser->setContentHandler(handler);
00205         mParser->setErrorHandler(handler);
00206         mHandler = handler;
00207 
00208     } //ctor
00209 
00210     virtual ~Sax2Reader()
00211     {
00212         if (mHandler != 0)
00213             delete mHandler;
00214 
00215         //  Deleting the parser must be done prior to calling Terminate, below.
00216         if (mParser != 0)
00217             delete mParser;
00218 
00219         XMLPlatformUtils::Terminate();
00220     }
00221 
00222     virtual bool ProperlyConstructed()
00223     {
00224         return mProperlyConstructed;
00225     }
00226 
00227     Sax2Handler* GetHandler() const
00228     {
00229         return mHandler;
00230     }
00231 
00232     // uses the file name passed into the ctor 
00233     int Read()
00234     {
00235         if (mFileToRead.empty())
00236             return 1;
00237 
00238         const char* fileToParse = 0;
00239         unsigned long duration;
00240         bool errorOccurred = false;
00241 
00242         bool more = true;
00243         XERCES_STD_QUALIFIER ifstream fin;
00244 
00245         // the input is a list file
00246         if (mDoList)
00247             fin.open(mFileToRead.c_str());
00248 
00249         if (fin.fail()) {
00250             XERCES_STD_QUALIFIER cerr <<"Cannot open the list file: " << mFileToRead << XERCES_STD_QUALIFIER endl;
00251             return 2;
00252         }
00253 
00254         while (more)
00255         {
00256             char fURI[1000];
00257             //initialize the array to zeros
00258             memset(fURI,0,sizeof(fURI));
00259 
00260             if (mDoList) {
00261                 if (! fin.eof() ) {
00262                     fin.getline (fURI, sizeof(fURI));
00263                     if (!*fURI)
00264                         continue;
00265                     else {
00266                         fileToParse = fURI;
00267                         XERCES_STD_QUALIFIER cerr << "==Parsing== " << fileToParse << XERCES_STD_QUALIFIER endl;
00268                     }
00269                 }
00270                 else
00271                     break;
00272             }
00273             else {
00274                 fileToParse = mFileToRead.c_str();
00275                 more = false; // not a list, parse just one file
00276             }
00277 
00278             mHandler->resetErrors();
00279 
00280             try
00281             {
00282                 const unsigned long startMillis = XMLPlatformUtils::getCurrentMillis();
00283                 mParser->parse(fileToParse);
00284                 const unsigned long endMillis = XMLPlatformUtils::getCurrentMillis();
00285                 duration = endMillis - startMillis;
00286             }
00287             catch (const OutOfMemoryException&)
00288             {
00289                 XERCES_STD_QUALIFIER cerr << "OutOfMemoryException" << XERCES_STD_QUALIFIER endl;
00290                 errorOccurred = true;
00291                 continue;
00292             }
00293             catch (const XMLException& e)
00294             {
00295                 XERCES_STD_QUALIFIER cerr << "\nError during parsing: '" << fileToParse << "'\n"
00296                     << "Exception message is:  \n"
00297                     << StrX(e.getMessage()) << "\n" << XERCES_STD_QUALIFIER endl;
00298                 errorOccurred = true;
00299                 continue;
00300             }
00301 
00302             catch (...)
00303             {
00304                 XERCES_STD_QUALIFIER cerr << "\nUnexpected exception during parsing: '" << fileToParse << "'\n";
00305                 errorOccurred = true;
00306                 continue;
00307             }
00308 
00309             if (!mHandler->getSawErrors())
00310             {
00311                 //XERCES_STD_QUALIFIER cout << fileToParse << ": " << duration << " ms" << XERCES_STD_QUALIFIER endl;
00312             }
00313             else
00314                 errorOccurred = true;
00315 
00316         } // while (more)
00317 
00318         if (mDoList)
00319             fin.close();
00320 
00321         if (errorOccurred)
00322             return 3; // if failed on at least one xml file to be parsed 
00323         else
00324             return 0;
00325 
00326     } // Read()
00327 
00328 
00329 protected:
00330 
00331     std::string mFileToRead;
00332 
00333 
00334 private:
00335 
00336     SAX2XMLReader* mParser;
00337     Sax2Handler* mHandler;
00338 
00339     SAX2XMLReader::ValSchemes    mValScheme;
00340     bool                         mDoNamespaces;
00341     bool                         mDoSchema;
00342     bool                         mSchemaFullChecking;
00343     bool                         mIdentityConstraintChecking;
00344     bool                         mDoList;
00345     bool                         mNamespacePrefixes;
00346     bool                         mRecognizeNEL;
00347     char                         mLocaleStr[64];
00348 
00349     bool mProperlyConstructed;
00350 
00351 }; //class
00352 
00353 }
00354 }
00355 }
00356 
00357 #endif

Generated on Fri Mar 19 09:30:38 2010 for ImpalaSrc by  doxygen 1.5.1