Definition at line 32 of file TableDataSourceCSV.h. References Impala::Application::MediaTable::WritableTableDataSource::AddDoubleData(), Impala::Application::MediaTable::WritableTableDataSource::AddIntData(), Impala::Application::MediaTable::WritableTableDataSource::AddStaticColumn(), Impala::Application::MediaTable::WritableTableDataSource::AddTextData(), Impala::atof(), Impala::atoi(), Impala::Application::MediaTable::RemoteRetriever::GetInstance(), ILOG_DEBUG, Impala::Application::MediaTable::TableDataSource::TYPE_DOUBLE, Impala::Application::MediaTable::TableDataSource::TYPE_IMAGE, Impala::Application::MediaTable::TableDataSource::TYPE_INT, and Impala::Application::MediaTable::TableDataSource::TYPE_TEXT. Referenced by TableDataSourceCSV(). 00032 { 00033 std::ifstream is(filename.c_str()); 00034 00035 /* From: http://stackoverflow.com/questions/1120140/csv-parser-in-c 00036 * Can also use boost: 00037 std::vector<std::string> vec; 00038 using namespace boost; 00039 tokenizer<escaped_list_separator<char> > tk( 00040 line, escaped_list_separator<char>('\\', ',', '\"')); 00041 for (tokenizer<escaped_list_separator<char> >::iterator i(tk.begin()); 00042 i!=tk.end();++i) 00043 { 00044 vec.push_back(*i); 00045 }*/ 00046 00047 std::vector<int> columnType; 00048 std::vector<int> newColumnType; 00049 std::vector<std::vector<std::string> > data; 00050 if(!is.good()) return; 00051 00052 std::string line, cell; 00053 while(is.good()) { 00054 //ILOG_DEBUG("is good"); 00055 std::vector<std::string> row; 00056 std::getline(is, line); 00057 if(line.length() == 0) continue; 00058 std::stringstream lineStream(line); 00059 //ILOG_INFO("LINE: " << line); 00060 newColumnType = columnType; 00061 while(std::getline(lineStream,cell,',')) { 00062 if(cell.length() > 1 && 00063 (cell.at(0) == 0x27 || cell.at(0) == 0x22)) 00064 { 00065 // Fix CR/LF issue on Unixes 00066 if(cell.at(cell.length()-1) == '\r') 00067 cell = cell.substr(0, cell.length()-1); 00068 while(cell.at(cell.length()-1) != 0x27 && 00069 cell.at(cell.length()-1) != 0x22) 00070 { 00071 std::string ncell; 00072 if(!std::getline(lineStream,ncell,',')) 00073 { 00074 // Error! 00075 // CSV spec states that a new line is also allowed in a field. 00076 ILOG_DEBUG("Line " << data.size() + 1 << " has bad quoting."); 00077 row.clear(); 00078 break; 00079 } else 00080 cell += ncell; 00081 } 00082 cell = cell.substr(1, cell.length()-2); 00083 } 00084 //printf(cell.c_str()); 00085 //printf(","); 00086 row.push_back(cell); 00087 if(data.size() == 0) { 00088 newColumnType.push_back(TYPE_INT); 00089 } else { 00090 char result[100]; 00091 if(row.size() > newColumnType.size()) 00092 { 00093 // skipping overflow row 00094 // ILOG_DEBUG("Skipping overflow row"); 00095 continue; 00096 } 00097 if(newColumnType[row.size()-1] == TYPE_INT) { 00098 sprintf(result, "%d", atoi(cell.c_str())); 00099 if(std::string(result) != cell) 00100 newColumnType[row.size()-1] = TYPE_DOUBLE; 00101 } 00102 if(newColumnType[row.size()-1] == TYPE_DOUBLE) { 00103 sprintf(result, "%f", atof(cell.c_str())); 00104 if(strncmp(cell.c_str(), result, cell.length())) 00105 { 00106 /*ILOG_DEBUG("On row " << data.size()+1 << " and column " << 00107 data[0][row.size()-1] << ": " << 00108 result << " != " << cell);*/ 00109 newColumnType[row.size()-1] = TYPE_IMAGE; 00110 } 00111 } 00112 if(newColumnType[row.size()-1] == TYPE_IMAGE) { 00113 ILOG_DEBUG(cell.substr(0, 7)); 00114 ILOG_DEBUG(cell.substr(cell.length()-4)); 00115 if ((cell.length() < 13) || 00116 (cell.substr(0, 7) != "http://") || 00117 ((cell.substr(cell.length()-4) != ".jpg") && 00118 (cell.substr(cell.length()-5) != ".jpeg"))) 00119 { 00120 /*ILOG_DEBUG("On row " << data.size()+1 << " and column " << 00121 data[0][row.size()-1] << ": " << 00122 result << " != " << cell);*/ 00123 newColumnType[row.size()-1] = TYPE_TEXT; 00124 } 00125 } 00126 } 00127 } 00128 if(!row.size()) continue; 00129 if(data.size() && (row.size() != columnType.size())) { 00130 ILOG_DEBUG("Line " << data.size() + 1 << " has wrong number of cells (" << 00131 row.size() << " should be " << columnType.size() << ")"); 00132 00133 } else { 00134 //printf("\n"); 00135 data.push_back(row); 00136 // Only store columnType for valid rows 00137 columnType = newColumnType; 00138 } 00139 } 00140 is.close(); 00141 00142 ILOG_DEBUG("phase 2"); 00143 00144 #ifdef MEDIATABLE_BASIC 00145 int gotcha = 0; 00146 bool isFlickr = false; 00147 for(int col=0; col < columnType.size(); col++) { 00148 if(data[0][col] == "id") gotcha++; 00149 if(data[0][col] == "farm") gotcha++; 00150 if(data[0][col] == "server") gotcha++; 00151 if(data[0][col] == "secret") gotcha++; 00152 } 00153 if(gotcha == 4) { 00154 isFlickr = true; 00155 } 00156 mRemoteRetriever = RemoteRetriever::GetInstance(); 00157 std::string id, farm, server, secret; 00158 std::string sizeSuffix = "t"; 00159 #endif 00160 ILOG_DEBUG("phase 3"); 00161 00162 for(int col=0; col < columnType.size(); col++) { 00163 #ifdef MEDIATABLE_BASIC 00164 if(isFlickr) 00165 { 00166 if(data[0][col] == "id") 00167 columnType[col] = TYPE_TEXT; 00168 if(data[0][col] == "farm" | data[0][col] == "server") 00169 continue; 00170 if(data[0][col] == "secret") 00171 { 00172 AddStaticColumn("thumb", TYPE_IMAGE); 00173 //AddStaticColumn("photo", TYPE_IMAGE); 00174 continue; 00175 } 00176 } 00177 #endif 00178 AddStaticColumn(data[0][col], columnType[col]); 00179 ILOG_DEBUG("Added column " << data[0][col] << " of type " << columnType[col]); 00180 } 00181 00182 for(int row=1; row < data.size(); row++) { 00183 for(int col=0; col < columnType.size(); col++) { 00184 #ifdef MEDIATABLE_BASIC 00185 if(isFlickr) 00186 { 00187 if(data[0][col] == "id") 00188 id = data[row][col]; 00189 if(data[0][col] == "farm") 00190 { 00191 farm = data[row][col]; 00192 continue; 00193 } 00194 if(data[0][col] == "server") 00195 { 00196 server = data[row][col]; 00197 continue; 00198 } 00199 if(data[0][col] == "secret") 00200 { 00201 secret = data[row][col]; 00202 std::string imageUrl = "http://farm"; 00203 imageUrl += farm + ".static.flickr.com/" + server + "/"; 00204 imageUrl += id + "_" + secret; 00205 //AddTextData("photo", imageUrl + ".jpg"); 00206 if(sizeSuffix.length() > 0) imageUrl += "_" + sizeSuffix; 00207 AddTextData("thumb", imageUrl + ".jpg"); 00208 continue; 00209 } 00210 } 00211 #endif 00212 if(columnType[col] == TYPE_INT) AddIntData(data[0][col], atoi(data[row][col].c_str())); 00213 if(columnType[col] == TYPE_DOUBLE) AddDoubleData(data[0][col], atof(data[row][col].c_str())); 00214 if(columnType[col] == TYPE_IMAGE) AddTextData(data[0][col], data[row][col]); 00215 if(columnType[col] == TYPE_TEXT) AddTextData(data[0][col], data[row][col]); 00216 } 00217 } 00218 }
Here is the call graph for this function:
|