00001
00002
00003
00004
00005
00006
00007
00008 #ifndef TAG_H_
00009 #define TAG_H_
00010
00011 #include <string>
00012 #include "Util/XmlDoc.h"
00013 #include "Persistency/XmlFileReader.h"
00014
00015 #include "Photo.h"
00016
00017 namespace Impala {
00018 namespace Application {
00019 namespace TagsLife {
00020
00021 using namespace std;
00022
00023 class Tag : public Util::XmlDoc {
00024 public:
00025 string tag;
00026 int mostUsedWeek, mostUsedCount, mostViewedWeek, mostViewedCount, nrOfPhotos, lastWeek;
00027 time_t firstUse, lastUse;
00028 Tag(string t) {
00029 tag = t;
00030 mostUsedCount = mostViewedCount = nrOfPhotos = lastWeek = 0;
00031 readTag("Data/" + tag + "_analysis.xml");
00032 readGeo("Data/" + tag + "_analysis_geo.xml");
00033 }
00034
00035 Photo* mostPopularPhotos[3];
00036
00037
00038 class BoundingBox {
00039 public:
00040 string boxid;
00041 int count;
00042 float left, right, top, bottom;
00043 float centerLongitude, centerLatitude;
00044
00045 BoundingBox(string id, int c, float l, float r, float t, float b) :
00046 boxid(id), count(c), left(l), right(r), top(t), bottom(b)
00047 {
00048 centerLongitude = (l+r)/2;
00049 centerLatitude = (t+b)/2;
00050 }
00051
00052 };
00053 typedef map<string, BoundingBox*> geoDataType;
00054
00055 class Week {
00056 public:
00057 Week(uint f, uint u, uint v, Photo* p) : flickrWeek(f), usage(u), views(v) {
00058 mostPopularPhoto = p;
00059
00060 }
00061 uint flickrWeek;
00062 uint usage;
00063 uint views;
00064 Photo* mostPopularPhoto;
00065 geoDataType geoData;
00066 };
00067 typedef map<uint, Week*> weekStatsType;
00068 weekStatsType weekStats;
00069
00070 static string formatDate(time_t* date) {
00071 char *months[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"};
00072 tm* dateTm = localtime(date);
00073 string txt = MakeString(months[dateTm->tm_mon]) + " " + MakeString(dateTm->tm_mday) + ", ";
00074 txt+= MakeString(dateTm->tm_year+1900);
00075 return txt;
00076 }
00077 static string flickrWeekToString(int flickrweek) {
00078 static time_t FLICKR_EPOCH = 1072933200;
00079 static time_t MS_IN_WEEK = 604800;
00080 time_t start = FLICKR_EPOCH + flickrweek*MS_IN_WEEK;
00081 time_t end = start + 0.86*MS_IN_WEEK;
00082 string txt = formatDate(&start) + " - " + formatDate(&end);
00083 return txt;
00084 }
00085
00086 geoDataType getGeoData(int minVal, int maxVal) {
00087 geoDataType geoData;
00088 for(int i=minVal; i < maxVal; i++) {
00089 if(weekStats.count(i) == 0) continue;
00090 for(geoDataType::iterator iter = weekStats[i]->geoData.begin();
00091 iter != weekStats[i]->geoData.end(); iter++) {
00092 if(geoData.count(iter->first)) {
00093 geoData[iter->first]->count += iter->second->count;
00094 } else {
00095 geoData[iter->first] = iter->second;
00096 }
00097 }
00098 }
00099 return(geoData);
00100 }
00101
00102 map<string, vector<string> > relatedTags;
00103
00104 int readRelated() {
00105 return readRelated("Data/Flickr." + tag + ".xml");
00106 }
00107
00108 private:
00109 void readTag(string xmlFile) {
00110 Persistency::XmlFileReader reader;
00111 ILOG_INFO("reading XML file: " + xmlFile);
00112 DOMDocument* doc = reader.Read(xmlFile);
00113 DOMNode* root = GetChildNode(doc, "tagdata", true);
00114
00115
00116
00117
00118 nrOfPhotos = atoi(GetAttributeValue(root, "nrofphotos"));
00119 firstUse = atoi(GetAttributeValue(root, "firstusedate"));
00120 lastUse = atoi(GetAttributeValue(root, "lastusedate"));
00121
00122 string s = ctime(&firstUse);
00123
00124 vector<DOMNode*> mostpopularphotos = GetChildNodes(root, "mostpopularphoto");
00125 ILOG_INFO("mostpopularphotos.size = " << mostpopularphotos.size());
00126 for(uint i=0; i < mostpopularphotos.size(); i++) {
00127
00128 DOMNode* photo = GetChildNode(mostpopularphotos[i], "photo", true);
00129 Photo* p = new Photo(photo);
00130 mostPopularPhotos[i] = p;
00131 }
00132 vector<DOMNode*> weekstats = GetChildNodes(root, "weekstats");
00133 ILOG_INFO("weekstats.size = " << weekstats.size());
00134 for(uint i=0; i < weekstats.size(); i++) {
00135 uint flickrWeek = atoi(GetAttributeValue(weekstats[i], "flickrweek"));
00136 uint usage = atoi(GetAttributeValue(weekstats[i], "usage"));
00137 uint views = atoi(GetAttributeValue(weekstats[i], "views"));
00138
00139 if(views > mostViewedCount) {
00140 mostViewedWeek = flickrWeek;
00141 mostViewedCount = views;
00142 }
00143 if(usage > mostUsedCount) {
00144 mostUsedWeek = flickrWeek;
00145 mostUsedCount = usage;
00146 }
00147
00148 DOMNode* photo = GetChildNode(weekstats[i], "mostpopularphoto", true);
00149 Photo* p = new Photo(photo);
00150 Week* w = new Week(flickrWeek, usage, views, p);
00151 weekStats[flickrWeek] = w;
00152 lastWeek = flickrWeek;
00153 }
00154 }
00155
00156 void readGeo(string xmlFile) {
00157 Persistency::XmlFileReader reader;
00158 ILOG_INFO("reading XML file: " + xmlFile);
00159 DOMDocument* doc = reader.Read(xmlFile, false);
00160 if(doc == 0) {
00161 ILOG_ERROR("Failed to read XML file: " + xmlFile);
00162 return;
00163 }
00164 DOMNode* root = GetChildNode(doc, "geodata", true);
00165
00166 vector<DOMNode*> weekstats = GetChildNodes(root, "weekstats");
00167 ILOG_INFO("GEO: weekstats.size = " << weekstats.size());
00168 for(uint i=0; i < weekstats.size(); i++) {
00169 uint flickrWeek = atoi(GetAttributeValue(weekstats[i], "flickrweek"));
00170
00171 vector<DOMNode*> bboxes = GetChildNodes(weekstats[i], "bbox");
00172 geoDataType boxes;
00173 for(uint j=0; j < bboxes.size(); j++) {
00174 uint count = atoi(GetAttributeValue(bboxes[j], "count"));
00175 if(count == 0) continue;
00176 string boxid = GetAttributeValue(bboxes[j], "boxid");
00177 float left = atof(GetAttributeValue(bboxes[j], "left"));
00178 float right = atof(GetAttributeValue(bboxes[j], "right"));
00179 float top = atof(GetAttributeValue(bboxes[j], "top"));
00180 float bottom = atof(GetAttributeValue(bboxes[j], "bottom"));
00181
00182 boxes[boxid] = new BoundingBox(boxid, count, left, right, top, bottom);
00183 }
00184 if(boxes.size() > 0) weekStats[flickrWeek]->geoData = boxes;
00185 }
00186 }
00187
00188 int readRelated(string xmlFile) {
00189 Persistency::XmlFileReader reader;
00190 ILOG_INFO("reading XML file: " + xmlFile);
00191 DOMDocument* doc = reader.Read(xmlFile, false);
00192 if(doc == 0) {
00193 ILOG_ERROR("Failed to read XML file: " + xmlFile);
00194 return -1;
00195 }
00196 DOMNode* root = GetChildNode(GetChildNode(doc,"rsp", true), "tags", true);
00197
00198 vector<DOMNode*> tags = GetChildNodes(root, "tag");
00199 ILOG_INFO("Related: tag.size = " << tags.size());
00200 for(uint i=0; i < tags.size(); i++) {
00201 relatedTags[tag].push_back(GetElementValue(tags[i]));
00202 }
00203
00204 return 0;
00205 }
00206
00207 ILOG_VAR_DEC;
00208 };
00209
00210 ILOG_VAR_INIT(Tag, Impala.Application.TagsLife);
00211
00212 }
00213 }
00214 }
00215
00216 #endif