00001 #ifndef Impala_Core_Trec_SearchJudge_h 00002 #define Impala_Core_Trec_SearchJudge_h 00003 00004 #include <vector> 00005 #include "Basis/File.h" 00006 #include "Basis/NativeTypes.h" 00007 #include "Util/StringParser.h" 00008 00009 namespace Impala 00010 { 00011 namespace Core 00012 { 00013 namespace Trec 00014 { 00015 00016 00017 class SearchJudge 00018 { 00019 public: 00020 00021 SearchJudge(String fileName) 00022 { 00023 Load(fileName); 00024 } 00025 00026 void 00027 Load(String fileName) 00028 { 00029 File f(fileName, "r"); 00030 mValid = f.Valid(); 00031 if (! mValid) 00032 return; 00033 int nr = 0; 00034 while (! f.Eof()) 00035 { 00036 String line = f.ReadLine(true); 00037 if (! line[0]) 00038 continue; 00039 Util::StringParser p(line); 00040 mJudgeTopic.push_back(p.GetString(' ')); 00041 int junk = p.GetInt(); 00042 mJudgeShot.push_back(p.GetString(' ')); 00043 mJudgeValue.push_back(p.GetInt(' ', false)); 00044 nr++; 00045 } 00046 std::cout << "done reading " << nr << " qrels" << std::endl; 00047 } 00048 00049 bool 00050 Valid() 00051 { 00052 return mValid; 00053 } 00054 00055 void 00056 Stats() 00057 { 00058 for (int i=0 ; i<mJudgeTopic.size() ; i++) 00059 { 00060 int t = GetTopicIdx(mJudgeTopic[i]); 00061 mTopicNrJudged[t]++; 00062 if (mJudgeValue[i] == 1) 00063 mTopicNrRelevant[t]++; 00064 } 00065 for (int t=0 ; t<mTopicName.size() ; t++) 00066 { 00067 std::cout << "Topic " << mTopicName[t] 00068 << ", nr judged = " << mTopicNrJudged[t] 00069 << ", nr relevant = " << mTopicNrRelevant[t] << std::endl; 00070 } 00071 } 00072 00073 bool 00074 HasTopic(String topic) 00075 { 00076 for (int i=0 ; i<mJudgeTopic.size() ; i++) 00077 if (mJudgeTopic[i] == topic) 00078 return true; 00079 return false; 00080 } 00081 00082 int 00083 GetNrRelevant(String topic) 00084 { 00085 int t = GetTopicIdx(topic); 00086 return mTopicNrRelevant[t]; 00087 } 00088 00089 int 00090 GetNrJudged(String topic) 00091 { 00092 int t = GetTopicIdx(topic); 00093 return mTopicNrJudged[t]; 00094 } 00095 00096 std::vector<String> GetJudgedShots(String topic) 00097 { 00098 std::vector<String> shots; 00099 for (int i=0; i<mJudgeTopic.size(); i++) 00100 if (mJudgeTopic[i] == topic) 00101 shots.push_back(mJudgeShot[i]); 00102 return shots; 00103 } 00104 00105 std::vector<int> GetJudgedValues(String topic) 00106 { 00107 std::vector<int> values; 00108 for (int i=0; i<mJudgeTopic.size(); i++) 00109 if (mJudgeTopic[i] == topic) 00110 values.push_back(mJudgeValue[i]); 00111 return values; 00112 } 00113 00114 int 00115 Judge(String topic, String shot) 00116 { 00117 for (int i=0 ; i<mJudgeTopic.size() ; i++) 00118 if (mJudgeTopic[i] == topic) 00119 if (mJudgeShot[i] == shot) 00120 return mJudgeValue[i]; 00121 return -2; 00122 } 00123 00124 // computes inferred average precision 00125 Real64 00126 Judge(String topic, std::vector<String> shots) 00127 { 00128 int unjudged = 0; 00129 int nonRel = 0; 00130 int rel = 0; 00131 Real64 infAP = 0; 00132 Real64 apAdd = 0; 00133 for (int s=0 ; s<shots.size() ; s++) 00134 { 00135 switch (Judge(topic, shots[s])) 00136 { 00137 case -1: 00138 unjudged++; 00139 break; 00140 case 0: 00141 nonRel++; 00142 break; 00143 case 1: 00144 rel++; 00145 if (s == 0) 00146 { 00147 infAP += 1; 00148 } 00149 else 00150 { 00151 Real64 k = s + 1; 00152 Real64 relMin = rel - 1; 00153 Real64 d = relMin + nonRel + unjudged; 00154 Real64 eps = 0.00001; 00155 apAdd = 1.0 / k + ((k-1) / k) * 00156 (d / (k-1)) * ((relMin + eps) / (relMin + nonRel + 2*eps)); 00157 //std::cout << "apAdd = " << apAdd 00158 // << ", rel = " << rel << ", d = " << d << std::endl; 00159 infAP += apAdd; 00160 } 00161 break; 00162 } 00163 } 00164 infAP /= GetNrRelevant(topic); 00165 return infAP; 00166 } 00167 00168 private: 00169 00170 int 00171 GetTopicIdx(String name) 00172 { 00173 for (int i=0 ; i<mTopicName.size() ; i++) 00174 if (mTopicName[i] == name) 00175 return i; 00176 mTopicName.push_back(name); 00177 mTopicNrJudged.push_back(0); 00178 mTopicNrRelevant.push_back(0); 00179 return mTopicName.size() - 1; 00180 } 00181 00182 bool mValid; 00183 00184 // original data 00185 std::vector<String> mJudgeTopic; 00186 std::vector<String> mJudgeShot; 00187 std::vector<int> mJudgeValue; 00188 00189 // per topic data 00190 std::vector<String> mTopicName; 00191 std::vector<int> mTopicNrJudged; 00192 std::vector<int> mTopicNrRelevant; 00193 00194 }; 00195 00196 } // namespace Trec 00197 } // namespace Core 00198 } // namespace Impala 00199 00200 #endif