00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "features/SparseFeatures.h"
00012 #include "lib/File.h"
00013 #include "lib/AsciiFile.h"
00014 #include "lib/Mathematics.h"
00015
00016 using namespace shogun;
00017
00018 CAsciiFile::CAsciiFile(FILE* f, const char* name) : CFile(f, name)
00019 {
00020 }
00021
00022 CAsciiFile::CAsciiFile(char* fname, char rw, const char* name) : CFile(fname, rw, name)
00023 {
00024 }
00025
00026 CAsciiFile::~CAsciiFile()
00027 {
00028 }
00029
00030 #define GET_VECTOR(fname, mfname, sg_type) \
00031 void CAsciiFile::fname(sg_type*& vec, int32_t& len) \
00032 { \
00033 vec=NULL; \
00034 len=0; \
00035 int32_t num_feat=0; \
00036 int32_t num_vec=0; \
00037 mfname(vec, num_feat, num_vec); \
00038 if ((num_feat==1) || (num_vec==1)) \
00039 { \
00040 if (num_feat==1) \
00041 len=num_vec; \
00042 else \
00043 len=num_feat; \
00044 } \
00045 else \
00046 { \
00047 delete[] vec; \
00048 vec=NULL; \
00049 len=0; \
00050 SG_ERROR("Could not read vector from" \
00051 " file %s (shape %dx%d found but " \
00052 "vector expected).\n", filename, \
00053 num_vec, num_feat); \
00054 } \
00055 }
00056
00057 GET_VECTOR(get_byte_vector, get_byte_matrix, uint8_t)
00058 GET_VECTOR(get_char_vector, get_char_matrix, char)
00059 GET_VECTOR(get_int_vector, get_int_matrix, int32_t)
00060 GET_VECTOR(get_shortreal_vector, get_shortreal_matrix, float32_t)
00061 GET_VECTOR(get_real_vector, get_real_matrix, float64_t)
00062 GET_VECTOR(get_short_vector, get_short_matrix, int16_t)
00063 GET_VECTOR(get_word_vector, get_word_matrix, uint16_t)
00064 #undef GET_VECTOR
00065
00066 #define GET_MATRIX(fname, conv, sg_type) \
00067 void CAsciiFile::fname(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec) \
00068 { \
00069 struct stat stats; \
00070 if (stat(filename, &stats)!=0) \
00071 SG_ERROR("Could not get file statistics.\n"); \
00072 \
00073 char* data=new char[stats.st_size+1]; \
00074 memset(data, 0, sizeof(char)*(stats.st_size+1)); \
00075 size_t nread=fread(data, sizeof(char), stats.st_size, file); \
00076 if (nread<=0) \
00077 SG_ERROR("Could not read data from %s.\n", filename); \
00078 \
00079 SG_DEBUG("data read from file:\n%s\n", data); \
00080 \
00081 \
00082 int32_t nf=0; \
00083 num_feat=0; \
00084 num_vec=0; \
00085 char* ptr_item=NULL; \
00086 char* ptr_data=data; \
00087 CDynamicArray<char*>* items=new CDynamicArray<char*>(); \
00088 \
00089 while (*ptr_data) \
00090 { \
00091 if (*ptr_data=='\n') \
00092 { \
00093 if (ptr_item) \
00094 nf++; \
00095 \
00096 if (num_feat!=0 && nf!=num_feat) \
00097 SG_ERROR("Number of features mismatches (%d != %d) in vector" \
00098 " %d in file %s.\n", num_feat, nf, num_vec, filename); \
00099 \
00100 append_item(items, ptr_data, ptr_item); \
00101 num_feat=nf; \
00102 num_vec++; \
00103 nf=0; \
00104 ptr_item=NULL; \
00105 } \
00106 else if (!isblank(*ptr_data) && !ptr_item) \
00107 { \
00108 ptr_item=ptr_data; \
00109 } \
00110 else if (isblank(*ptr_data) && ptr_item) \
00111 { \
00112 append_item(items, ptr_data, ptr_item); \
00113 ptr_item=NULL; \
00114 nf++; \
00115 } \
00116 \
00117 ptr_data++; \
00118 } \
00119 \
00120 SG_DEBUG("num feat: %d, num_vec %d\n", num_feat, num_vec); \
00121 delete[] data; \
00122 \
00123 \
00124 matrix=new sg_type[num_vec*num_feat]; \
00125 for (int32_t i=0; i<num_vec; i++) \
00126 { \
00127 for (int32_t j=0; j<num_feat; j++) \
00128 { \
00129 char* item=items->get_element(i*num_feat+j); \
00130 matrix[i*num_feat+j]=conv(item); \
00131 delete[] item; \
00132 } \
00133 } \
00134 delete items; \
00135 }
00136
00137 GET_MATRIX(get_byte_matrix, atoi, uint8_t)
00138 GET_MATRIX(get_char_matrix, atoi, char)
00139 GET_MATRIX(get_int_matrix, atoi, int32_t)
00140 GET_MATRIX(get_uint_matrix, atoi, uint32_t)
00141 GET_MATRIX(get_long_matrix, atoll, int64_t)
00142 GET_MATRIX(get_ulong_matrix, atoll, uint64_t)
00143 GET_MATRIX(get_shortreal_matrix, atof, float32_t)
00144 GET_MATRIX(get_real_matrix, atof, float64_t)
00145 GET_MATRIX(get_longreal_matrix, atof, floatmax_t)
00146 GET_MATRIX(get_short_matrix, atoi, int16_t)
00147 GET_MATRIX(get_word_matrix, atoi, uint16_t)
00148 #undef GET_MATRIX
00149
00150 void CAsciiFile::get_byte_ndarray(uint8_t*& array, int32_t*& dims, int32_t& num_dims)
00151 {
00152 }
00153
00154 void CAsciiFile::get_char_ndarray(char*& array, int32_t*& dims, int32_t& num_dims)
00155 {
00156 }
00157
00158 void CAsciiFile::get_int_ndarray(int32_t*& array, int32_t*& dims, int32_t& num_dims)
00159 {
00160 }
00161
00162 void CAsciiFile::get_shortreal_ndarray(float32_t*& array, int32_t*& dims, int32_t& num_dims)
00163 {
00164 }
00165
00166 void CAsciiFile::get_real_ndarray(float64_t*& array, int32_t*& dims, int32_t& num_dims)
00167 {
00168 }
00169
00170 void CAsciiFile::get_short_ndarray(int16_t*& array, int32_t*& dims, int32_t& num_dims)
00171 {
00172 }
00173
00174 void CAsciiFile::get_word_ndarray(uint16_t*& array, int32_t*& dims, int32_t& num_dims)
00175 {
00176 }
00177
00178 #define GET_SPARSEMATRIX(fname, conv, sg_type) \
00179 void CAsciiFile::fname(TSparse<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec) \
00180 { \
00181 size_t blocksize=1024*1024; \
00182 size_t required_blocksize=blocksize; \
00183 uint8_t* dummy=new uint8_t[blocksize]; \
00184 \
00185 if (file) \
00186 { \
00187 num_vec=0; \
00188 num_feat=0; \
00189 \
00190 SG_INFO("counting line numbers in file %s\n", filename); \
00191 size_t sz=blocksize; \
00192 size_t block_offs=0; \
00193 size_t old_block_offs=0; \
00194 fseek(file, 0, SEEK_END); \
00195 size_t fsize=ftell(file); \
00196 rewind(file); \
00197 \
00198 while (sz == blocksize) \
00199 { \
00200 sz=fread(dummy, sizeof(uint8_t), blocksize, file); \
00201 bool contains_cr=false; \
00202 for (size_t i=0; i<sz; i++) \
00203 { \
00204 block_offs++; \
00205 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) \
00206 { \
00207 num_vec++; \
00208 contains_cr=true; \
00209 required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs+1); \
00210 old_block_offs=block_offs; \
00211 } \
00212 } \
00213 SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t"); \
00214 } \
00215 \
00216 SG_INFO("found %d feature vectors\n", num_vec); \
00217 delete[] dummy; \
00218 blocksize=required_blocksize; \
00219 dummy = new uint8_t[blocksize+1]; \
00220 matrix=new TSparse<sg_type>[num_vec]; \
00221 \
00222 rewind(file); \
00223 sz=blocksize; \
00224 int32_t lines=0; \
00225 while (sz == blocksize) \
00226 { \
00227 sz=fread(dummy, sizeof(uint8_t), blocksize, file); \
00228 \
00229 size_t old_sz=0; \
00230 for (size_t i=0; i<sz; i++) \
00231 { \
00232 if (i==sz-1 && dummy[i]!='\n' && sz==blocksize) \
00233 { \
00234 size_t len=i-old_sz+1; \
00235 uint8_t* data=&dummy[old_sz]; \
00236 \
00237 for (size_t j=0; j<len; j++) \
00238 dummy[j]=data[j]; \
00239 \
00240 sz=fread(dummy+len, sizeof(uint8_t), blocksize-len, file); \
00241 i=0; \
00242 old_sz=0; \
00243 sz+=len; \
00244 } \
00245 \
00246 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize)) \
00247 { \
00248 \
00249 size_t len=i-old_sz; \
00250 uint8_t* data=&dummy[old_sz]; \
00251 \
00252 int32_t dims=0; \
00253 for (size_t j=0; j<len; j++) \
00254 { \
00255 if (data[j]==':') \
00256 dims++; \
00257 } \
00258 \
00259 if (dims<=0) \
00260 { \
00261 SG_ERROR("Error in line %d - number of" \
00262 " dimensions is %d line is %d characters" \
00263 " long\n line_content:'%.*s'\n", lines, \
00264 dims, len, len, (const char*) data); \
00265 } \
00266 \
00267 TSparseEntry<sg_type>* feat=new TSparseEntry<sg_type>[dims]; \
00268 \
00269 \
00270 size_t j=0; \
00271 for (; j<len; j++) \
00272 { \
00273 if (data[j]==':') \
00274 { \
00275 j=-1; \
00276 break; \
00277 } \
00278 \
00279 if (data[j]==' ') \
00280 { \
00281 data[j]='\0'; \
00282 \
00283 \
00284 break; \
00285 } \
00286 } \
00287 \
00288 int32_t d=0; \
00289 j++; \
00290 uint8_t* start=&data[j]; \
00291 for (; j<len; j++) \
00292 { \
00293 if (data[j]==':') \
00294 { \
00295 data[j]='\0'; \
00296 \
00297 feat[d].feat_index=(int32_t) atoi((const char*) start)-1; \
00298 num_feat=CMath::max(num_feat, feat[d].feat_index+1); \
00299 \
00300 j++; \
00301 start=&data[j]; \
00302 for (; j<len; j++) \
00303 { \
00304 if (data[j]==' ' || data[j]=='\n') \
00305 { \
00306 data[j]='\0'; \
00307 feat[d].entry=(sg_type) conv((const char*) start); \
00308 d++; \
00309 break; \
00310 } \
00311 } \
00312 \
00313 if (j==len) \
00314 { \
00315 data[j]='\0'; \
00316 feat[dims-1].entry=(sg_type) conv((const char*) start); \
00317 } \
00318 \
00319 j++; \
00320 start=&data[j]; \
00321 } \
00322 } \
00323 \
00324 matrix[lines].vec_index=lines; \
00325 matrix[lines].num_feat_entries=dims; \
00326 matrix[lines].features=feat; \
00327 \
00328 old_sz=i+1; \
00329 lines++; \
00330 SG_PROGRESS(lines, 0, num_vec, 1, "LOADING:\t"); \
00331 } \
00332 } \
00333 } \
00334 \
00335 SG_INFO("file successfully read\n"); \
00336 } \
00337 \
00338 delete[] dummy; \
00339 }
00340
00341 GET_SPARSEMATRIX(get_bool_sparsematrix, atoi, bool)
00342 GET_SPARSEMATRIX(get_byte_sparsematrix, atoi, uint8_t)
00343 GET_SPARSEMATRIX(get_char_sparsematrix, atoi, char)
00344 GET_SPARSEMATRIX(get_int_sparsematrix, atoi, int32_t)
00345 GET_SPARSEMATRIX(get_uint_sparsematrix, atoi, uint32_t)
00346 GET_SPARSEMATRIX(get_long_sparsematrix, atoll, int64_t)
00347 GET_SPARSEMATRIX(get_ulong_sparsematrix, atoll, uint64_t)
00348 GET_SPARSEMATRIX(get_shortreal_sparsematrix, atof, float32_t)
00349 GET_SPARSEMATRIX(get_real_sparsematrix, atof, float64_t)
00350 GET_SPARSEMATRIX(get_longreal_sparsematrix, atof, floatmax_t)
00351 GET_SPARSEMATRIX(get_short_sparsematrix, atoi, int16_t)
00352 GET_SPARSEMATRIX(get_word_sparsematrix, atoi, uint16_t)
00353 #undef GET_SPARSEMATRIX
00354
00355
00356 void CAsciiFile::get_byte_string_list(T_STRING<uint8_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00357 {
00358 size_t blocksize=1024*1024;
00359 size_t required_blocksize=0;
00360 uint8_t* dummy=new uint8_t[blocksize];
00361 uint8_t* overflow=NULL;
00362 int32_t overflow_len=0;
00363
00364 if (file)
00365 {
00366 num_str=0;
00367 max_string_len=0;
00368
00369 SG_INFO("counting line numbers in file %s\n", filename);
00370 size_t sz=blocksize;
00371 size_t block_offs=0;
00372 size_t old_block_offs=0;
00373 fseek(file, 0, SEEK_END);
00374 size_t fsize=ftell(file);
00375 rewind(file);
00376
00377 while (sz == blocksize)
00378 {
00379 sz=fread(dummy, sizeof(uint8_t), blocksize, file);
00380 bool contains_cr=false;
00381 for (size_t i=0; i<sz; i++)
00382 {
00383 block_offs++;
00384 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
00385 {
00386 num_str++;
00387 contains_cr=true;
00388 required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs);
00389 old_block_offs=block_offs;
00390 }
00391 }
00392 SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t");
00393 }
00394
00395 SG_INFO("found %d strings\n", num_str);
00396 SG_DEBUG("block_size=%d\n", required_blocksize);
00397 delete[] dummy;
00398 blocksize=required_blocksize;
00399 dummy=new uint8_t[blocksize];
00400 overflow=new uint8_t[blocksize];
00401 strings=new T_STRING<uint8_t>[num_str];
00402
00403 rewind(file);
00404 sz=blocksize;
00405 int32_t lines=0;
00406 size_t old_sz=0;
00407 while (sz == blocksize)
00408 {
00409 sz=fread(dummy, sizeof(uint8_t), blocksize, file);
00410
00411 old_sz=0;
00412 for (size_t i=0; i<sz; i++)
00413 {
00414 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
00415 {
00416 int32_t len=i-old_sz;
00417 max_string_len=CMath::max(max_string_len, len+overflow_len);
00418
00419 strings[lines].length=len+overflow_len;
00420 strings[lines].string=new uint8_t[len+overflow_len];
00421
00422 for (int32_t j=0; j<overflow_len; j++)
00423 strings[lines].string[j]=overflow[j];
00424 for (int32_t j=0; j<len; j++)
00425 strings[lines].string[j+overflow_len]=dummy[old_sz+j];
00426
00427
00428 overflow_len=0;
00429
00430
00431 old_sz=i+1;
00432 lines++;
00433 SG_PROGRESS(lines, 0, num_str, 1, "LOADING:\t");
00434 }
00435 }
00436
00437 for (size_t i=old_sz; i<sz; i++)
00438 overflow[i-old_sz]=dummy[i];
00439
00440 overflow_len=sz-old_sz;
00441 }
00442 SG_INFO("file successfully read\n");
00443 SG_INFO("max_string_length=%d\n", max_string_len);
00444 SG_INFO("num_strings=%d\n", num_str);
00445 }
00446
00447 delete[] dummy;
00448 delete[] overflow;
00449 }
00450
00451 void CAsciiFile::get_char_string_list(T_STRING<char>*& strings, int32_t& num_str, int32_t& max_string_len)
00452 {
00453 size_t blocksize=1024*1024;
00454 size_t required_blocksize=0;
00455 char* dummy=new char[blocksize];
00456 char* overflow=NULL;
00457 int32_t overflow_len=0;
00458
00459 if (file)
00460 {
00461 num_str=0;
00462 max_string_len=0;
00463
00464 SG_INFO("counting line numbers in file %s\n", filename);
00465 size_t sz=blocksize;
00466 size_t block_offs=0;
00467 size_t old_block_offs=0;
00468 fseek(file, 0, SEEK_END);
00469 size_t fsize=ftell(file);
00470 rewind(file);
00471
00472 while (sz == blocksize)
00473 {
00474 sz=fread(dummy, sizeof(char), blocksize, file);
00475 bool contains_cr=false;
00476 for (size_t i=0; i<sz; i++)
00477 {
00478 block_offs++;
00479 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
00480 {
00481 num_str++;
00482 contains_cr=true;
00483 required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs);
00484 old_block_offs=block_offs;
00485 }
00486 }
00487 SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t");
00488 }
00489
00490 SG_INFO("found %d strings\n", num_str);
00491 SG_DEBUG("block_size=%d\n", required_blocksize);
00492 delete[] dummy;
00493 blocksize=required_blocksize;
00494 dummy=new char[blocksize];
00495 overflow=new char[blocksize];
00496 strings=new T_STRING<char>[num_str];
00497
00498 rewind(file);
00499 sz=blocksize;
00500 int32_t lines=0;
00501 size_t old_sz=0;
00502 while (sz == blocksize)
00503 {
00504 sz=fread(dummy, sizeof(char), blocksize, file);
00505
00506 old_sz=0;
00507 for (size_t i=0; i<sz; i++)
00508 {
00509 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
00510 {
00511 int32_t len=i-old_sz;
00512 max_string_len=CMath::max(max_string_len, len+overflow_len);
00513
00514 strings[lines].length=len+overflow_len;
00515 strings[lines].string=new char[len+overflow_len];
00516
00517 for (int32_t j=0; j<overflow_len; j++)
00518 strings[lines].string[j]=overflow[j];
00519 for (int32_t j=0; j<len; j++)
00520 strings[lines].string[j+overflow_len]=dummy[old_sz+j];
00521
00522
00523 overflow_len=0;
00524
00525
00526 old_sz=i+1;
00527 lines++;
00528 SG_PROGRESS(lines, 0, num_str, 1, "LOADING:\t");
00529 }
00530 }
00531
00532 for (size_t i=old_sz; i<sz; i++)
00533 overflow[i-old_sz]=dummy[i];
00534
00535 overflow_len=sz-old_sz;
00536 }
00537 SG_INFO("file successfully read\n");
00538 SG_INFO("max_string_length=%d\n", max_string_len);
00539 SG_INFO("num_strings=%d\n", num_str);
00540 }
00541
00542 delete[] dummy;
00543 delete[] overflow;
00544 }
00545
00546 void CAsciiFile::get_int_string_list(T_STRING<int32_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00547 {
00548 strings=NULL;
00549 num_str=0;
00550 max_string_len=0;
00551 }
00552
00553 void CAsciiFile::get_uint_string_list(T_STRING<uint32_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00554 {
00555 strings=NULL;
00556 num_str=0;
00557 max_string_len=0;
00558 }
00559
00560 void CAsciiFile::get_short_string_list(T_STRING<int16_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00561 {
00562 strings=NULL;
00563 num_str=0;
00564 max_string_len=0;
00565 }
00566
00567 void CAsciiFile::get_word_string_list(T_STRING<uint16_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00568 {
00569 strings=NULL;
00570 num_str=0;
00571 max_string_len=0;
00572 }
00573
00574 void CAsciiFile::get_long_string_list(T_STRING<int64_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00575 {
00576 strings=NULL;
00577 num_str=0;
00578 max_string_len=0;
00579 }
00580
00581 void CAsciiFile::get_ulong_string_list(T_STRING<uint64_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00582 {
00583 strings=NULL;
00584 num_str=0;
00585 max_string_len=0;
00586 }
00587
00588 void CAsciiFile::get_shortreal_string_list(T_STRING<float32_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00589 {
00590 strings=NULL;
00591 num_str=0;
00592 max_string_len=0;
00593 }
00594
00595 void CAsciiFile::get_real_string_list(T_STRING<float64_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00596 {
00597 strings=NULL;
00598 num_str=0;
00599 max_string_len=0;
00600 }
00601
00602 void CAsciiFile::get_longreal_string_list(T_STRING<floatmax_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00603 {
00604 strings=NULL;
00605 num_str=0;
00606 max_string_len=0;
00607 }
00608
00609
00612 #define SET_VECTOR(fname, mfname, sg_type) \
00613 void CAsciiFile::fname(const sg_type* vec, int32_t len) \
00614 { \
00615 mfname(vec, len, 1); \
00616 }
00617 SET_VECTOR(set_byte_vector, set_byte_matrix, uint8_t)
00618 SET_VECTOR(set_char_vector, set_char_matrix, char)
00619 SET_VECTOR(set_int_vector, set_int_matrix, int32_t)
00620 SET_VECTOR(set_shortreal_vector, set_shortreal_matrix, float32_t)
00621 SET_VECTOR(set_real_vector, set_real_matrix, float64_t)
00622 SET_VECTOR(set_short_vector, set_short_matrix, int16_t)
00623 SET_VECTOR(set_word_vector, set_word_matrix, uint16_t)
00624 #undef SET_VECTOR
00625
00626 #define SET_MATRIX(fname, sg_type, fprt_type, type_str) \
00627 void CAsciiFile::fname(const sg_type* matrix, int32_t num_feat, int32_t num_vec) \
00628 { \
00629 if (!(file && matrix)) \
00630 SG_ERROR("File or matrix invalid.\n"); \
00631 \
00632 for (int32_t i=0; i<num_vec; i++) \
00633 { \
00634 for (int32_t j=0; j<num_feat; j++) \
00635 { \
00636 sg_type v=matrix[num_feat*i+j]; \
00637 if (j==num_feat-1) \
00638 fprintf(file, type_str "\n", (fprt_type) v); \
00639 else \
00640 fprintf(file, type_str " ", (fprt_type) v); \
00641 } \
00642 } \
00643 }
00644 SET_MATRIX(set_char_matrix, char, char, "%c")
00645 SET_MATRIX(set_byte_matrix, uint8_t, uint8_t, "%u")
00646 SET_MATRIX(set_int_matrix, int32_t, int32_t, "%i")
00647 SET_MATRIX(set_uint_matrix, uint32_t, uint32_t, "%u")
00648 SET_MATRIX(set_long_matrix, int64_t, long long int, "%lli")
00649 SET_MATRIX(set_ulong_matrix, uint64_t, long long unsigned int, "%llu")
00650 SET_MATRIX(set_short_matrix, int16_t, int16_t, "%i")
00651 SET_MATRIX(set_word_matrix, uint16_t, uint16_t, "%u")
00652 SET_MATRIX(set_shortreal_matrix, float32_t, float32_t, "%f")
00653 SET_MATRIX(set_real_matrix, float64_t, float64_t, "%f")
00654 SET_MATRIX(set_longreal_matrix, floatmax_t, floatmax_t, "%Lf")
00655 #undef SET_MATRIX
00656
00657 #define SET_SPARSEMATRIX(fname, sg_type, fprt_type, type_str) \
00658 void CAsciiFile::fname(const TSparse<sg_type>* matrix, int32_t num_feat, int32_t num_vec) \
00659 { \
00660 if (!(file && matrix)) \
00661 SG_ERROR("File or matrix invalid.\n"); \
00662 \
00663 for (int32_t i=0; i<num_vec; i++) \
00664 { \
00665 TSparseEntry<sg_type>* vec = matrix[i].features; \
00666 int32_t len=matrix[i].num_feat_entries; \
00667 \
00668 for (int32_t j=0; j<len; j++) \
00669 { \
00670 if (j<len-1) \
00671 { \
00672 fprintf(file, "%d:" type_str " ", \
00673 (int32_t) vec[j].feat_index+1, (fprt_type) vec[j].entry); \
00674 } \
00675 else \
00676 { \
00677 fprintf(file, "%d:" type_str "\n", \
00678 (int32_t) vec[j].feat_index+1, (fprt_type) vec[j].entry); \
00679 } \
00680 } \
00681 } \
00682 }
00683 SET_SPARSEMATRIX(set_bool_sparsematrix, bool, uint8_t, "%u")
00684 SET_SPARSEMATRIX(set_char_sparsematrix, char, char, "%c")
00685 SET_SPARSEMATRIX(set_byte_sparsematrix, uint8_t, uint8_t, "%u")
00686 SET_SPARSEMATRIX(set_int_sparsematrix, int32_t, int32_t, "%i")
00687 SET_SPARSEMATRIX(set_uint_sparsematrix, uint32_t, uint32_t, "%u")
00688 SET_SPARSEMATRIX(set_long_sparsematrix, int64_t, long long int, "%lli")
00689 SET_SPARSEMATRIX(set_ulong_sparsematrix, uint64_t, long long unsigned int, "%llu")
00690 SET_SPARSEMATRIX(set_short_sparsematrix, int16_t, int16_t, "%i")
00691 SET_SPARSEMATRIX(set_word_sparsematrix, uint16_t, uint16_t, "%u")
00692 SET_SPARSEMATRIX(set_shortreal_sparsematrix, float32_t, float32_t, "%f")
00693 SET_SPARSEMATRIX(set_real_sparsematrix, float64_t, float64_t, "%f")
00694 SET_SPARSEMATRIX(set_longreal_sparsematrix, floatmax_t, floatmax_t, "%Lf")
00695 #undef SET_SPARSEMATRIX
00696
00697 void CAsciiFile::set_byte_string_list(const T_STRING<uint8_t>* strings, int32_t num_str)
00698 {
00699 if (!(file && strings))
00700 SG_ERROR("File or strings invalid.\n");
00701
00702 for (int32_t i=0; i<num_str; i++)
00703 {
00704 int32_t len = strings[i].length;
00705 fwrite(strings[i].string, sizeof(uint8_t), len, file);
00706 fprintf(file, "\n");
00707 }
00708 }
00709
00710 void CAsciiFile::set_char_string_list(const T_STRING<char>* strings, int32_t num_str)
00711 {
00712 if (!(file && strings))
00713 SG_ERROR("File or strings invalid.\n");
00714
00715 for (int32_t i=0; i<num_str; i++)
00716 {
00717 int32_t len = strings[i].length;
00718 fwrite(strings[i].string, sizeof(char), len, file);
00719 fprintf(file, "\n");
00720 }
00721 }
00722
00723 void CAsciiFile::set_int_string_list(const T_STRING<int32_t>* strings, int32_t num_str)
00724 {
00725 }
00726
00727 void CAsciiFile::set_uint_string_list(const T_STRING<uint32_t>* strings, int32_t num_str)
00728 {
00729 }
00730
00731 void CAsciiFile::set_short_string_list(const T_STRING<int16_t>* strings, int32_t num_str)
00732 {
00733 }
00734
00735 void CAsciiFile::set_word_string_list(const T_STRING<uint16_t>* strings, int32_t num_str)
00736 {
00737 }
00738
00739 void CAsciiFile::set_long_string_list(const T_STRING<int64_t>* strings, int32_t num_str)
00740 {
00741 }
00742
00743 void CAsciiFile::set_ulong_string_list(const T_STRING<uint64_t>* strings, int32_t num_str)
00744 {
00745 }
00746
00747 void CAsciiFile::set_shortreal_string_list(const T_STRING<float32_t>* strings, int32_t num_str)
00748 {
00749 }
00750
00751 void CAsciiFile::set_real_string_list(const T_STRING<float64_t>* strings, int32_t num_str)
00752 {
00753 }
00754
00755 void CAsciiFile::set_longreal_string_list(const T_STRING<floatmax_t>* strings, int32_t num_str)
00756 {
00757 }
00758
00759 template <class T> void CAsciiFile::append_item(
00760 CDynamicArray<T>* items, char* ptr_data, char* ptr_item)
00761 {
00762 size_t len=(ptr_data-ptr_item)/sizeof(char);
00763 char* item=new char[len+1];
00764 memset(item, 0, sizeof(char)*(len+1));
00765 item=strncpy(item, ptr_item, len);
00766
00767 SG_DEBUG("current %c, len %d, item %s\n", *ptr_data, len, item);
00768 items->append_element(item);
00769 }