00001 #ifndef Impala_Core_Table_AveragePrecisionSpeedup_h
00002 #define Impala_Core_Table_AveragePrecisionSpeedup_h
00003
00004 #include "Core/Table/AnnotationTable.h"
00005 #include "Core/Table/SimilarityTableSet.h"
00006
00007 namespace Impala
00008 {
00009 namespace Core
00010 {
00011 namespace Table
00012 {
00013
00014
00015
00016
00017 template <class T>
00018 inline Real64
00019 AveragePrecisionSpeedup(T* rank, AnnotationTable* groundTruth, int topN = -1)
00020 {
00021 ILOG_VAR(Impala.Core.Table.AveragePrecisionSpeedup);
00022 Real64 ap = 0;
00023
00024 Real64 pos99 = 0.999;
00025 Real64 pos90 = 0.9;
00026 Real64 pos10 = 0.1;
00027
00028 Real64 curPos = 0;
00029
00030
00031 int status = 1;
00032 Real64 curRef = pos10;
00033
00034 int totalPositive = groundTruth->GetNrPositive();
00035 if ( 0 == totalPositive)
00036 return -1;
00037
00038 int positiveCount = 0;
00039 if (topN == -1)
00040 topN = rank->Size();
00041
00042
00043
00044
00045
00046
00047 for (int i=0 ; i<topN ; i++)
00048 {
00049 Quid q = rank->Get1(i);
00050 if (groundTruth->IsPositive(q))
00051 {
00052 positiveCount++;
00053 double precision = ((double)positiveCount)/((double)(i+1));
00054 ap += precision;
00055
00056
00057
00058 curPos = positiveCount*1.0/totalPositive;
00059
00060 switch (status) {
00061 case 1:
00062
00063 if (curPos > curRef)
00064 {
00065
00066 Real64 ratio = (positiveCount-1)*1.0/topN;
00067 fprintf(stdout," [1]recall=10.0%s %d / %d N= %d ratio= %f speedup= %.3f pos= %d\n", "%", positiveCount-1, totalPositive,topN, ratio, 0.1/ratio,i);
00068 status = 2;
00069 curRef = pos90;
00070
00071 }
00072
00073 break;
00074
00075 case 2:
00076
00077 if (curPos > curRef)
00078 {
00079
00080 Real64 ratio = (positiveCount-1)*1.0/topN;
00081 fprintf(stdout," [2]recall=90.0%s %d / %d N= %d ratio= %f speedup= %.3f pos= %d\n", "%", positiveCount-1, totalPositive,topN, ratio, 0.9/ratio,i);
00082 status = 3;
00083 curRef = pos99;
00084
00085 }
00086 break;
00087
00088 case 3:
00089
00090 if (curPos > curRef)
00091 {
00092
00093 Real64 ratio = (positiveCount-1)*1.0/topN;
00094 fprintf(stdout," [3]recall=99.9%s %d / %d N= %d ratio= %f speedup= %.3f pos= %d\n", "%", positiveCount, totalPositive,topN, ratio, 0.999/ratio,i+1);
00095 status = -1;
00096 curRef = 1e5;
00097
00098 }
00099 break;
00100
00101 default:
00102
00103 break;
00104 }
00105
00106
00107 }
00108 }
00109 if (groundTruth->GetNrPositive() > 0)
00110 ap /= groundTruth->GetNrPositive();
00111 return ap;
00112 }
00113
00114
00122 template <class T>
00123 inline Real64
00124 AveragePrecisionJudgedSpeedup(T* rank, AnnotationTable* groundTruth,
00125 bool skipIsNegative, int topN = -1,
00126 bool apOfFoundOnly = false)
00127 {
00128 ILOG_VAR(Impala.Core.Table.AveragePrecisionJudgedSpeedup);
00129 Real64 ap = 0;
00130
00131 Real64 pos99 = 0.999;
00132 Real64 pos90 = 0.9;
00133 Real64 pos10 = 0.1;
00134
00135 Real64 curPos = 0;
00136
00137
00138 int status = 1;
00139 Real64 curRef = pos10;
00140
00141 int totalPositive = groundTruth->GetNrPositive();
00142 if ( 0 == totalPositive)
00143 return -1;
00144
00145 int positiveCount = 0;
00146 int curRank = 0;
00147 if (topN == -1)
00148 topN = rank->Size();
00149 for (int i=0 ; i<topN ; i++)
00150 {
00151 Quid q = rank->Get1(i);
00152 int qIndex = groundTruth->GetIndex(q);
00153 if (qIndex == groundTruth->Size())
00154 continue;
00155 if (groundTruth->IsPositive(qIndex))
00156 {
00157 positiveCount++;
00158 double precision = ((double)positiveCount)/((double)(curRank+1));
00159 ap += precision;
00160
00161
00162
00163 curPos = positiveCount*1.0/totalPositive;
00164
00165 switch (status) {
00166 case 1:
00167
00168 if (curPos > curRef)
00169 {
00170
00171 Real64 ratio = (positiveCount-1)*1.0/topN;
00172 fprintf(stdout," [1]recall=10.0%s %d / %d N= %d ratio= %f speedup= %.3f pos= %d\n", "%", positiveCount-1, totalPositive,topN, ratio, 0.1/ratio,i);
00173 status = 2;
00174 curRef = pos90;
00175
00176 }
00177
00178 break;
00179
00180 case 2:
00181
00182 if (curPos > curRef)
00183 {
00184
00185 Real64 ratio = (positiveCount-1)*1.0/topN;
00186 fprintf(stdout," [2]recall=90.0%s %d / %d N= %d ratio= %f speedup= %.3f pos= %d\n", "%", positiveCount-1, totalPositive,topN, ratio, 0.9/ratio,i);
00187 status = 3;
00188 curRef = pos99;
00189
00190 }
00191 break;
00192
00193 case 3:
00194
00195 if (curPos > curRef)
00196 {
00197
00198 Real64 ratio = (positiveCount-1)*1.0/topN;
00199 fprintf(stdout," [3]recall=99.9%s %d / %d N= %d ratio= %f speedup= %.3f pos= %d\n", "%", positiveCount, totalPositive,topN, ratio, 0.999/ratio,i+1);
00200 status = -1;
00201 curRef = 1e5;
00202
00203 }
00204 break;
00205
00206 default:
00207
00208 break;
00209 }
00210
00211 }
00212 if (groundTruth->IsPositive(qIndex) || skipIsNegative
00213 || groundTruth->IsNegative(qIndex))
00214 {
00215 curRank++;
00216 }
00217 }
00218 int factor = (apOfFoundOnly) ? positiveCount : groundTruth->GetNrPositive();
00219 if (factor > 0)
00220 ap /= factor;
00221 return ap;
00222 }
00223
00224 template <class T>
00225 void
00226 DumpSimilarityAndTruthData(FILE* fp, T* rank, Core::Table::SimilarityTableSet::SimTableType* sim,
00227 AnnotationTable* groundTruth, Core::Table::QuidTable* qTable,
00228 int topN = -1)
00229
00230 {
00231 ILOG_VAR(Impala.Core.Table.DumpSimilarityAndTruthData);
00232
00233
00234
00235
00236
00237
00238 int positiveCount = 0;
00239 if (topN == -1)
00240 topN = rank->Size();
00241
00242 int size = qTable->Capacity();
00243
00244
00245
00246 for (int i=0 ; i<topN ; i++)
00247 {
00248 Quid q = rank->Get1(i);
00249
00250
00251
00252
00253
00254
00255 int curPos = Column::Find(qTable->GetColumn1(), q);
00256
00257 if (curPos == size)
00258 {
00259
00260 std::cout << "Can NOT find the Quid: " << QuidObj(q) << std::endl;
00261 continue;
00262 }
00263
00264 double prob = sim->Get1(curPos);
00265
00266 if (groundTruth->IsPositive(q))
00267 {
00268
00269 fprintf(fp,"%.17f\t%d\n", prob, 1);
00270 }
00271 else if (groundTruth->IsNegative(q))
00272 {
00273
00274 fprintf(fp,"%.17f\t%d\n", prob, 0);
00275 }
00276 else
00277 {
00278
00279 }
00280 }
00281
00282 int x = 0;
00283 }
00284
00285 }
00286 }
00287 }
00288
00289 #endif