00001 #ifndef MediaTable_TableDataSourceCSV_h
00002 #define MediaTable_TableDataSourceCSV_h
00003
00004 #include "WritableTableDataSource.h"
00005
00006 #ifdef MEDIATABLE_BASIC
00007 #include "RemoteRetriever.h"
00008 #endif
00009
00010 #include <iostream>
00011 #include <fstream>
00012 #include <string>
00013
00014 namespace Impala {
00015 namespace Application {
00016 namespace MediaTable {
00017
00018 class TableDataSourceCSV : public WritableTableDataSource
00019 {
00020 public:
00021 TableDataSourceCSV(std::string filename):
00022 WritableTableDataSource()
00023 {
00024 Init();
00025 OpenCSV(filename);
00026 }
00027
00028 ~TableDataSourceCSV()
00029 {
00030 }
00031
00032 void OpenCSV(std::string filename) {
00033 std::ifstream is(filename.c_str());
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 std::vector<int> columnType;
00048 std::vector<int> newColumnType;
00049 std::vector<std::vector<std::string> > data;
00050 if(!is.good()) return;
00051
00052 std::string line, cell;
00053 while(is.good()) {
00054
00055 std::vector<std::string> row;
00056 std::getline(is, line);
00057 if(line.length() == 0) continue;
00058 std::stringstream lineStream(line);
00059
00060 newColumnType = columnType;
00061 while(std::getline(lineStream,cell,',')) {
00062 if(cell.length() > 1 &&
00063 (cell.at(0) == 0x27 || cell.at(0) == 0x22))
00064 {
00065
00066 if(cell.at(cell.length()-1) == '\r')
00067 cell = cell.substr(0, cell.length()-1);
00068 while(cell.at(cell.length()-1) != 0x27 &&
00069 cell.at(cell.length()-1) != 0x22)
00070 {
00071 std::string ncell;
00072 if(!std::getline(lineStream,ncell,','))
00073 {
00074
00075
00076 ILOG_DEBUG("Line " << data.size() + 1 << " has bad quoting.");
00077 row.clear();
00078 break;
00079 } else
00080 cell += ncell;
00081 }
00082 cell = cell.substr(1, cell.length()-2);
00083 }
00084
00085
00086 row.push_back(cell);
00087 if(data.size() == 0) {
00088 newColumnType.push_back(TYPE_INT);
00089 } else {
00090 char result[100];
00091 if(row.size() > newColumnType.size())
00092 {
00093
00094
00095 continue;
00096 }
00097 if(newColumnType[row.size()-1] == TYPE_INT) {
00098 sprintf(result, "%d", atoi(cell.c_str()));
00099 if(std::string(result) != cell)
00100 newColumnType[row.size()-1] = TYPE_DOUBLE;
00101 }
00102 if(newColumnType[row.size()-1] == TYPE_DOUBLE) {
00103 sprintf(result, "%f", atof(cell.c_str()));
00104 if(strncmp(cell.c_str(), result, cell.length()))
00105 {
00106
00107
00108
00109 newColumnType[row.size()-1] = TYPE_IMAGE;
00110 }
00111 }
00112 if(newColumnType[row.size()-1] == TYPE_IMAGE) {
00113 ILOG_DEBUG(cell.substr(0, 7));
00114 ILOG_DEBUG(cell.substr(cell.length()-4));
00115 if ((cell.length() < 13) ||
00116 (cell.substr(0, 7) != "http://") ||
00117 ((cell.substr(cell.length()-4) != ".jpg") &&
00118 (cell.substr(cell.length()-5) != ".jpeg")))
00119 {
00120
00121
00122
00123 newColumnType[row.size()-1] = TYPE_TEXT;
00124 }
00125 }
00126 }
00127 }
00128 if(!row.size()) continue;
00129 if(data.size() && (row.size() != columnType.size())) {
00130 ILOG_DEBUG("Line " << data.size() + 1 << " has wrong number of cells (" <<
00131 row.size() << " should be " << columnType.size() << ")");
00132
00133 } else {
00134
00135 data.push_back(row);
00136
00137 columnType = newColumnType;
00138 }
00139 }
00140 is.close();
00141
00142 ILOG_DEBUG("phase 2");
00143
00144 #ifdef MEDIATABLE_BASIC
00145 int gotcha = 0;
00146 bool isFlickr = false;
00147 for(int col=0; col < columnType.size(); col++) {
00148 if(data[0][col] == "id") gotcha++;
00149 if(data[0][col] == "farm") gotcha++;
00150 if(data[0][col] == "server") gotcha++;
00151 if(data[0][col] == "secret") gotcha++;
00152 }
00153 if(gotcha == 4) {
00154 isFlickr = true;
00155 }
00156 mRemoteRetriever = RemoteRetriever::GetInstance();
00157 std::string id, farm, server, secret;
00158 std::string sizeSuffix = "t";
00159 #endif
00160 ILOG_DEBUG("phase 3");
00161
00162 for(int col=0; col < columnType.size(); col++) {
00163 #ifdef MEDIATABLE_BASIC
00164 if(isFlickr)
00165 {
00166 if(data[0][col] == "id")
00167 columnType[col] = TYPE_TEXT;
00168 if(data[0][col] == "farm" | data[0][col] == "server")
00169 continue;
00170 if(data[0][col] == "secret")
00171 {
00172 AddStaticColumn("thumb", TYPE_IMAGE);
00173
00174 continue;
00175 }
00176 }
00177 #endif
00178 AddStaticColumn(data[0][col], columnType[col]);
00179 ILOG_DEBUG("Added column " << data[0][col] << " of type " << columnType[col]);
00180 }
00181
00182 for(int row=1; row < data.size(); row++) {
00183 for(int col=0; col < columnType.size(); col++) {
00184 #ifdef MEDIATABLE_BASIC
00185 if(isFlickr)
00186 {
00187 if(data[0][col] == "id")
00188 id = data[row][col];
00189 if(data[0][col] == "farm")
00190 {
00191 farm = data[row][col];
00192 continue;
00193 }
00194 if(data[0][col] == "server")
00195 {
00196 server = data[row][col];
00197 continue;
00198 }
00199 if(data[0][col] == "secret")
00200 {
00201 secret = data[row][col];
00202 std::string imageUrl = "http://farm";
00203 imageUrl += farm + ".static.flickr.com/" + server + "/";
00204 imageUrl += id + "_" + secret;
00205
00206 if(sizeSuffix.length() > 0) imageUrl += "_" + sizeSuffix;
00207 AddTextData("thumb", imageUrl + ".jpg");
00208 continue;
00209 }
00210 }
00211 #endif
00212 if(columnType[col] == TYPE_INT) AddIntData(data[0][col], atoi(data[row][col].c_str()));
00213 if(columnType[col] == TYPE_DOUBLE) AddDoubleData(data[0][col], atof(data[row][col].c_str()));
00214 if(columnType[col] == TYPE_IMAGE) AddTextData(data[0][col], data[row][col]);
00215 if(columnType[col] == TYPE_TEXT) AddTextData(data[0][col], data[row][col]);
00216 }
00217 }
00218 }
00219
00220 #ifdef MEDIATABLE_BASIC
00221 Array2dVec3UInt8*
00222 GetImageDataByID(String column, int row)
00223 {
00224 std::string imageUrl = GetTextDataByID(column, row);
00225 Array2dVec3UInt8* ar = 0;
00226 ar = mRemoteRetriever->RetrieveImageData(imageUrl);
00227 return ar;
00228 }
00229
00230 RemoteRetriever* mRemoteRetriever;
00231
00232 #endif
00233
00234 private:
00235
00236 void Init()
00237 {
00238 }
00239
00240 ILOG_VAR_DEC;
00241 };
00242
00243 ILOG_VAR_INIT(TableDataSourceCSV, Application.MediaTable);
00244
00245 }
00246 }
00247 }
00248
00249 #endif // TableDataSourceCSV_h