Impala Documentation

00001 #ifndef Impala_Core_Histogram_FitWeibullMarginal_h
00002 #define Impala_Core_Histogram_FitWeibullMarginal_h
00003 
00004 #include <cmath>
00005 
00006 namespace Impala
00007 {
00008 namespace Core
00009 {
00010 namespace Histogram
00011 {
00012 
00013 
00014 /* constants: pi, Euler's constant, and log(2) */
00015 #ifndef M_PI
00016 #define M_PI 3.1415926535897932384626433832795029
00017 #endif
00018 #ifndef MMGAMMA
00019 #define MMGAMMA 0.5772156649015328606065120900824024
00020 #endif
00021 #ifndef M_LN2
00022 #define M_LN2 0.6931471805599453094172321214581766
00023 #endif
00024 
00025 
00026 // HistT refers to the (elementary) data type used to store the histogram data,
00027 // not to a "histogram" type
00028 template<class HistT>
00029 class FitWeibullMarginal
00030 {
00031 public:
00032 
00033     /* input: histogram of values minrange..maxrange */
00034     FitWeibullMarginal(HistT* hist, int bins, double minrange, double maxrange)
00035     {
00036         mHist = hist;
00037         mBins = bins;
00038         mMinval = minrange;
00039         mDx = (maxrange-minrange)/bins;
00040         mPrecision = 0.0;
00041         DoFit(hist, bins);
00042     }
00043 
00044     ~FitWeibullMarginal()
00045     {
00046         if (mHist)
00047             delete [] mHist;
00048     }
00049 
00050     double
00051     Mu()
00052     {
00053         return mMu;
00054     }
00055 
00056     double
00057     Beta()
00058     {
00059         return mBeta;
00060     }
00061 
00062     double
00063     Gamma()
00064     {
00065         return mGamma;
00066     }
00067 
00068     double
00069     A2()
00070     {
00071         return AndersonDarling();
00072     }
00073 
00074 private:
00075 
00076     void
00077     DoFit(HistT* orgHist, int orgBins)
00078     {
00079         //std::cout << "org mBins = " << orgBins << std::endl;
00080         //mMu = MuEstNew();
00081         mMu = MuEst();
00082         if (mNorm == 0) // computed by MuEst, histogram is empty
00083         {
00084             mMu = 0;
00085             mBeta = 0;
00086             mHist = 0;
00087             return;
00088         }
00089         //std::cout << "mMu = " << mMu << std::endl;
00090         //std::cout << "mDx = " << mDx << std::endl;
00091         //std::cout << "bin for 0.0 = " << (0.0 - mMinval)/mDx << std::endl;
00092         //std::cout << "double bin0 = " << (mMu - mMinval)/mDx << std::endl;
00093         int bin0 = (int)((mMu - mMinval)/mDx);
00094         //std::cout << "bin0 = " << bin0 << std::endl;
00095         int i;
00096 
00097         // fill new histogram with mirrored data
00098         mBins = bin0>orgBins/2 ? bin0 : orgBins-bin0;
00099         //std::cout << "new mBins = " << mBins << std::endl;
00100         mHist = new HistT [mBins];
00101 
00102         for (i=0; i<mBins; i++)
00103             mHist[i] = 0;
00104         for (i=0; i<orgBins; i++)
00105         {
00106             int j = bin0 > i ? bin0-i-1 : i-bin0;
00107             //std::cout << "  org " << i << " to " << j << std::endl;
00108             mHist[j] += orgHist[i];
00109         }
00110         // remove empty tail
00111         for (i=mBins-1; i>0 && (mHist[i] <= mPrecision); i--)
00112             ;
00113         mBins = i+1;
00114 
00115         // initial estimate of upper and lower bound on gamma (between 0.3 and 50)
00116         double gammal, gammah;
00117         double score;
00118         int its = 0;
00119 
00120         mGamma = 2.0;
00121         score = GammaEst(mGamma);
00122 
00123         if (score > 0.0)
00124         {
00125             while(score > 0.0)
00126             {
00127                 gammah = mGamma;
00128                 mGamma /= 2.0;
00129                 if (mGamma < 0.3)
00130                     break;
00131                 score = GammaEst(mGamma);
00132                 if (its++ > 30)
00133                     break;
00134             }
00135             gammal = mGamma;
00136         }
00137         else
00138         {
00139             while(score < 0.0)
00140             {
00141                 gammal = mGamma;
00142                 mGamma *= 2.0;
00143                 if (mGamma > 50.0)
00144                     break;
00145                 score = GammaEst(mGamma);
00146                 if (its++ > 30)
00147                     break;
00148             }
00149             gammah = mGamma;
00150         }
00151 
00152         // approximate gamma by newton-raphson
00153 
00154         //    double tol = 2.0*TOL*2.0;
00155         //    while ((gammah-gammal) > tol) {
00156 
00157         while (its < 10)
00158         {
00159             mGamma = (gammal+gammah)/2.0;
00160             score = GammaEst(mGamma);
00161             if (score >= 0.0)
00162                 gammah = mGamma;
00163             else
00164                 gammal = mGamma;
00165             if (its++ > 30)
00166             {
00167                 std::cerr << "Weibull MLE::no convergence..." << std::endl;
00168                 break;
00169             }
00170         }
00171 
00172         mGamma = (gammal+gammah)/2.0;
00173         mBeta = BetaEst(mGamma);
00174 
00175     }
00176 
00177     double
00178     MuEst()
00179     {
00180         double sum = 0;
00181         double x = mMinval;
00182 
00183         mNorm = 0;
00184         for (int i=0; i<mBins; i++, x += mDx)
00185         {
00186             if (mHist[i] > mPrecision)
00187             {
00188                 sum += x*mHist[i];
00189                 mNorm += mHist[i];
00190             }
00191         }
00192 
00193         if (mNorm == 0)
00194             return 0;
00195         return sum/mNorm;
00196     }
00197 
00198     double
00199     MuEstNew()
00200     {
00201         mNorm = 0;
00202         for (int i=0 ; i<mBins ; i++)
00203         {
00204             mNorm += mHist[i];
00205             //std::cout << "sum " << i << " = " << mNorm << std::endl;
00206         }
00207         if (mNorm == 0)
00208             return 0;
00209 
00210         double sum = 0;
00211         int i=0;
00212         for (i=0 ; sum < 0.5*mNorm ; i++)
00213             sum += mHist[i];
00214         int medianbin = (i>0) ? i-1 : 0;
00215         return mMinval + (medianbin+0.5)*mDx;
00216     }
00217 
00218     double
00219     BetaEst(double gamma)
00220     {
00221         double sum = 0;
00222         double x = mDx;
00223 
00224         for (int i=0; i<mBins; i++, x+=mDx)
00225             if (mHist[i] > mPrecision)
00226                 sum += pow(x, gamma)*mHist[i];
00227 
00228         if (mNorm == 0)
00229             return 0;
00230         return pow(sum/mNorm, 1./gamma);
00231     }
00232 
00233     double
00234     GammaEst(double gamma)
00235     { 
00236         double sum = 0;
00237 
00238         mBeta = BetaEst(gamma);
00239 
00240         double ddx = mDx/mBeta;
00241         double x = mDx/mBeta;
00242 
00243         for (int i=0; i<mBins; i++, x += ddx)
00244             if (mHist[i] > mPrecision)
00245                 sum += pow(x, gamma)*(1.0-gamma*log(x))*mHist[i];
00246 
00247         return -(sum/mNorm+gamma-1.0+log(gamma)+DiGamma(1./gamma));
00248     }
00249 
00250 
00251     double
00252     AndersonDarling() const
00253     { 
00254         double A2 = 0;
00255         double x;
00256         int i, n=0;
00257 
00258         double Fi = 0.0;
00259 
00260         for (i=0, x = mDx; i<mBins; i++, x += mDx)
00261         {
00262             if (mHist[i] > mPrecision)
00263             {
00264                 double xt = pow(x/mBeta, mGamma);
00265                 double F = exp(-xt);
00266                 double dF = (mGamma/mBeta)*pow(x/mBeta, mGamma-1.0)*F*mDx;
00267             
00268                 Fi += mHist[i]/mNorm;
00269                 F = 1.0-F;
00270                 if ((F>0.0) && (F<1.0))
00271                 {
00272                     A2 += ((Fi-F)*(Fi-F)/(F*(1-F)))*dF;
00273                     n++;
00274                 }
00275             }
00276         }
00277 
00278         return n*A2;
00279     }
00280 
00281     double
00282     gammaFunc(double xx) const
00283     {
00284         double x,y,tmp,ser;
00285         static double cof[6] =
00286             {
00287                 76.18009172947146, -86.50532032941677,
00288                 24.01409824083091, -1.231739572450155,
00289                 0.1208650973866179e-2, -0.5395239384953e-5
00290             };
00291         
00292         int j;
00293         y=x=xx;
00294         tmp=x+5.5;
00295         tmp -= (x+0.5)*log(tmp);
00296         ser=1.000000000190015;
00297         for (j=0;j<=5;j++)
00298             ser += cof[j]/++y;
00299         xx = -tmp+log(2.5066282746310005*ser/x);
00300         return exp(xx);
00301     }
00302 
00303     double
00304     DiGamma(double x) const
00305     {
00306         /* force into the interval 1..3 */
00307         if( x < 0.0 )
00308             return DiGamma(1.0-x)+M_PI/tan(M_PI*(1.0-x)) ;  /* refection formula */
00309         else if( x < 1.0 )
00310             return DiGamma(1.0+x)-1.0/x ;
00311         else if ( x == 1.0)
00312             return -MMGAMMA ;
00313         else if ( x == 2.0)
00314             return 1.0-MMGAMMA ;
00315         else if ( x == 3.0)
00316             return 1.5-MMGAMMA ;
00317         else if ( x > 3.0)
00318             /* duplication formula */
00319             return 0.5*(DiGamma(x/2.0)+DiGamma((x+1.0)/2.0))+M_LN2 ;
00320         else
00321         {
00322             static double Kncoe[] =
00323                 { .30459198558715155634315638246624251,
00324                   .72037977439182833573548891941219706, -.12454959243861367729528855995001087,
00325                   .27769457331927827002810119567456810e-1, -.67762371439822456447373550186163070e-2,
00326                   .17238755142247705209823876688592170e-2, -.44817699064252933515310345718960928e-3,
00327                   .11793660000155572716272710617753373e-3, -.31253894280980134452125172274246963e-4,
00328                   .83173997012173283398932708991137488e-5, -.22191427643780045431149221890172210e-5,
00329                   .59302266729329346291029599913617915e-6, -.15863051191470655433559920279603632e-6,
00330                   .42459203983193603241777510648681429e-7, -.11369129616951114238848106591780146e-7,
00331                   .304502217295931698401459168423403510e-8, -.81568455080753152802915013641723686e-9,
00332                   .21852324749975455125936715817306383e-9, -.58546491441689515680751900276454407e-10,
00333                   .15686348450871204869813586459513648e-10, -.42029496273143231373796179302482033e-11,
00334                   .11261435719264907097227520956710754e-11, -.30174353636860279765375177200637590e-12,
00335                   .80850955256389526647406571868193768e-13, -.21663779809421233144009565199997351e-13,
00336                   .58047634271339391495076374966835526e-14, -.15553767189204733561108869588173845e-14,
00337                   .41676108598040807753707828039353330e-15, -.11167065064221317094734023242188463e-15 } ;
00338 
00339             double Tn_1 = 1.0 ; /* T_{n-1}(x), started at n=1 */
00340             double Tn = x-2.0 ; /* T_{n}(x) , started at n=1 */
00341             double resul = Kncoe[0] + Kncoe[1]*Tn ;
00342 
00343             x -= 2.0 ;
00344 
00345             for(int n = 2 ; n < sizeof(Kncoe)/sizeof(double) ;n++)
00346             {
00347                 const double Tn1 = 2.0 * x * Tn - Tn_1 ;    /* Chebyshev recursion, Eq. 22.7.4 Abramowitz-Stegun */
00348                 resul += Kncoe[n]*Tn1 ;
00349                 Tn_1 = Tn ;
00350                 Tn = Tn1 ;
00351             }
00352             return resul ;
00353         }
00354     }
00355 
00356 
00357     HistT* mHist;
00358     int    mBins;
00359 
00360     double mMinval;
00361     double mDx;
00362     double mNorm;
00363 
00364     double mMu;
00365     double mBeta;
00366     double mGamma;
00367     double mPrecision;
00368 };
00369 
00370 } // namespace Histogram
00371 } // namespace Core
00372 } // namespace Impala
00373 
00374 #endif
FitWeibullMarginal.h