AsciiFile.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2010 Soeren Sonnenburg
00008  * Copyright (C) 2010 Berlin Institute of Technology
00009  */
00010 
00011 #include "features/SparseFeatures.h"
00012 #include "lib/File.h"
00013 #include "lib/AsciiFile.h"
00014 #include "lib/Mathematics.h"
00015 
00016 using namespace shogun;
00017 
00018 CAsciiFile::CAsciiFile(FILE* f, const char* name) : CFile(f, name)
00019 {
00020 }
00021 
00022 CAsciiFile::CAsciiFile(char* fname, char rw, const char* name) : CFile(fname, rw, name)
00023 {
00024 }
00025 
00026 CAsciiFile::~CAsciiFile()
00027 {
00028 }
00029 
00030 #define GET_VECTOR(fname, mfname, sg_type) \
00031 void CAsciiFile::fname(sg_type*& vec, int32_t& len) \
00032 {                                                   \
00033     vec=NULL;                                       \
00034     len=0;                                          \
00035     int32_t num_feat=0;                             \
00036     int32_t num_vec=0;                              \
00037     mfname(vec, num_feat, num_vec);                 \
00038     if ((num_feat==1) || (num_vec==1))              \
00039     {                                               \
00040         if (num_feat==1)                            \
00041             len=num_vec;                            \
00042         else                                        \
00043             len=num_feat;                           \
00044     }                                               \
00045     else                                            \
00046     {                                               \
00047         delete[] vec;                               \
00048         vec=NULL;                                   \
00049         len=0;                                      \
00050         SG_ERROR("Could not read vector from"       \
00051                 " file %s (shape %dx%d found but "  \
00052                 "vector expected).\n", filename,    \
00053                 num_vec, num_feat);                 \
00054     }                                               \
00055 }
00056 
00057 GET_VECTOR(get_byte_vector, get_byte_matrix, uint8_t)
00058 GET_VECTOR(get_char_vector, get_char_matrix, char)
00059 GET_VECTOR(get_int_vector, get_int_matrix, int32_t)
00060 GET_VECTOR(get_shortreal_vector, get_shortreal_matrix, float32_t)
00061 GET_VECTOR(get_real_vector, get_real_matrix, float64_t)
00062 GET_VECTOR(get_short_vector, get_short_matrix, int16_t)
00063 GET_VECTOR(get_word_vector, get_word_matrix, uint16_t)
00064 #undef GET_VECTOR
00065 
00066 #define GET_MATRIX(fname, conv, sg_type)                                        \
00067 void CAsciiFile::fname(sg_type*& matrix, int32_t& num_feat, int32_t& num_vec)   \
00068 {                                                                               \
00069     struct stat stats;                                                          \
00070     if (stat(filename, &stats)!=0)                                              \
00071         SG_ERROR("Could not get file statistics.\n");                           \
00072                                                                                 \
00073     char* data=new char[stats.st_size+1];                                       \
00074     memset(data, 0, sizeof(char)*(stats.st_size+1));                            \
00075     size_t nread=fread(data, sizeof(char), stats.st_size, file);                \
00076     if (nread<=0)                                                               \
00077         SG_ERROR("Could not read data from %s.\n", filename);                   \
00078                                                                                 \
00079     SG_DEBUG("data read from file:\n%s\n", data);                               \
00080                                                                                 \
00081     /* determine num_feat and num_vec, populate dynamic array */                \
00082     int32_t nf=0;                                                               \
00083     num_feat=0;                                                                 \
00084     num_vec=0;                                                                  \
00085     char* ptr_item=NULL;                                                        \
00086     char* ptr_data=data;                                                        \
00087     CDynamicArray<char*>* items=new CDynamicArray<char*>();                     \
00088                                                                                 \
00089     while (*ptr_data)                                                           \
00090     {                                                                           \
00091         if (*ptr_data=='\n')                                                    \
00092         {                                                                       \
00093             if (ptr_item)                                                       \
00094                 nf++;                                                           \
00095                                                                                 \
00096             if (num_feat!=0 && nf!=num_feat)                                    \
00097                 SG_ERROR("Number of features mismatches (%d != %d) in vector"   \
00098                         " %d in file %s.\n", num_feat, nf, num_vec, filename);  \
00099                                                                                 \
00100             append_item(items, ptr_data, ptr_item);                             \
00101             num_feat=nf;                                                        \
00102             num_vec++;                                                          \
00103             nf=0;                                                               \
00104             ptr_item=NULL;                                                      \
00105         }                                                                       \
00106         else if (!isblank(*ptr_data) && !ptr_item)                              \
00107         {                                                                       \
00108             ptr_item=ptr_data;                                                  \
00109         }                                                                       \
00110         else if (isblank(*ptr_data) && ptr_item)                                \
00111         {                                                                       \
00112             append_item(items, ptr_data, ptr_item);                             \
00113             ptr_item=NULL;                                                      \
00114             nf++;                                                               \
00115         }                                                                       \
00116                                                                                 \
00117         ptr_data++;                                                             \
00118     }                                                                           \
00119                                                                                 \
00120     SG_DEBUG("num feat: %d, num_vec %d\n", num_feat, num_vec);                  \
00121     delete[] data;                                                              \
00122                                                                                 \
00123     /* now copy data into matrix */                                             \
00124     matrix=new sg_type[num_vec*num_feat];                                       \
00125     for (int32_t i=0; i<num_vec; i++)                                           \
00126     {                                                                           \
00127         for (int32_t j=0; j<num_feat; j++)                                      \
00128         {                                                                       \
00129             char* item=items->get_element(i*num_feat+j);                        \
00130             matrix[i*num_feat+j]=conv(item);                                    \
00131             delete[] item;                                                      \
00132         }                                                                       \
00133     }                                                                           \
00134     delete items;                                                               \
00135 }
00136 
00137 GET_MATRIX(get_byte_matrix, atoi, uint8_t)
00138 GET_MATRIX(get_char_matrix, atoi, char)
00139 GET_MATRIX(get_int_matrix, atoi, int32_t)
00140 GET_MATRIX(get_uint_matrix, atoi, uint32_t)
00141 GET_MATRIX(get_long_matrix, atoll, int64_t)
00142 GET_MATRIX(get_ulong_matrix, atoll, uint64_t)
00143 GET_MATRIX(get_shortreal_matrix, atof, float32_t)
00144 GET_MATRIX(get_real_matrix, atof, float64_t)
00145 GET_MATRIX(get_longreal_matrix, atof, floatmax_t)
00146 GET_MATRIX(get_short_matrix, atoi, int16_t)
00147 GET_MATRIX(get_word_matrix, atoi, uint16_t)
00148 #undef GET_MATRIX
00149 
00150 void CAsciiFile::get_byte_ndarray(uint8_t*& array, int32_t*& dims, int32_t& num_dims)
00151 {
00152 }
00153 
00154 void CAsciiFile::get_char_ndarray(char*& array, int32_t*& dims, int32_t& num_dims)
00155 {
00156 }
00157 
00158 void CAsciiFile::get_int_ndarray(int32_t*& array, int32_t*& dims, int32_t& num_dims)
00159 {
00160 }
00161 
00162 void CAsciiFile::get_shortreal_ndarray(float32_t*& array, int32_t*& dims, int32_t& num_dims)
00163 {
00164 }
00165 
00166 void CAsciiFile::get_real_ndarray(float64_t*& array, int32_t*& dims, int32_t& num_dims)
00167 {
00168 }
00169 
00170 void CAsciiFile::get_short_ndarray(int16_t*& array, int32_t*& dims, int32_t& num_dims)
00171 {
00172 }
00173 
00174 void CAsciiFile::get_word_ndarray(uint16_t*& array, int32_t*& dims, int32_t& num_dims)
00175 {
00176 }
00177 
00178 #define GET_SPARSEMATRIX(fname, conv, sg_type)                                      \
00179 void CAsciiFile::fname(TSparse<sg_type>*& matrix, int32_t& num_feat, int32_t& num_vec)  \
00180 {   \
00181     size_t blocksize=1024*1024; \
00182     size_t required_blocksize=blocksize;    \
00183     uint8_t* dummy=new uint8_t[blocksize];  \
00184     \
00185     if (file)   \
00186     {   \
00187         num_vec=0;  \
00188         num_feat=0; \
00189     \
00190         SG_INFO("counting line numbers in file %s\n", filename);    \
00191         size_t sz=blocksize;    \
00192         size_t block_offs=0;    \
00193         size_t old_block_offs=0;    \
00194         fseek(file, 0, SEEK_END);   \
00195         size_t fsize=ftell(file);   \
00196         rewind(file);   \
00197     \
00198         while (sz == blocksize) \
00199         {   \
00200             sz=fread(dummy, sizeof(uint8_t), blocksize, file);  \
00201             bool contains_cr=false; \
00202             for (size_t i=0; i<sz; i++) \
00203             {   \
00204                 block_offs++;   \
00205                 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))    \
00206                 {   \
00207                     num_vec++;  \
00208                     contains_cr=true;   \
00209                     required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs+1); \
00210                     old_block_offs=block_offs;  \
00211                 }   \
00212             }   \
00213             SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t");    \
00214         }   \
00215     \
00216         SG_INFO("found %d feature vectors\n", num_vec); \
00217         delete[] dummy; \
00218         blocksize=required_blocksize;   \
00219         dummy = new uint8_t[blocksize+1]; /*allow setting of '\0' at EOL*/  \
00220         matrix=new TSparse<sg_type>[num_vec];   \
00221     \
00222         rewind(file);   \
00223         sz=blocksize;   \
00224         int32_t lines=0;    \
00225         while (sz == blocksize) \
00226         {   \
00227             sz=fread(dummy, sizeof(uint8_t), blocksize, file);  \
00228     \
00229             size_t old_sz=0;    \
00230             for (size_t i=0; i<sz; i++) \
00231             {   \
00232                 if (i==sz-1 && dummy[i]!='\n' && sz==blocksize) \
00233                 {   \
00234                     size_t len=i-old_sz+1;  \
00235                     uint8_t* data=&dummy[old_sz];   \
00236     \
00237                     for (size_t j=0; j<len; j++)    \
00238                         dummy[j]=data[j];   \
00239     \
00240                     sz=fread(dummy+len, sizeof(uint8_t), blocksize-len, file);  \
00241                     i=0;    \
00242                     old_sz=0;   \
00243                     sz+=len;    \
00244                 }   \
00245     \
00246                 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))    \
00247                 {   \
00248     \
00249                     size_t len=i-old_sz;    \
00250                     uint8_t* data=&dummy[old_sz];   \
00251     \
00252                     int32_t dims=0; \
00253                     for (size_t j=0; j<len; j++)    \
00254                     {   \
00255                         if (data[j]==':')   \
00256                             dims++; \
00257                     }   \
00258     \
00259                     if (dims<=0)    \
00260                     {   \
00261                         SG_ERROR("Error in line %d - number of" \
00262                                 " dimensions is %d line is %d characters"   \
00263                                 " long\n line_content:'%.*s'\n", lines, \
00264                                 dims, len, len, (const char*) data);    \
00265                     }   \
00266     \
00267                     TSparseEntry<sg_type>* feat=new TSparseEntry<sg_type>[dims];    \
00268     \
00269                     /* skip label part */   \
00270                     size_t j=0; \
00271                     for (; j<len; j++)  \
00272                     {   \
00273                         if (data[j]==':')   \
00274                         {   \
00275                             j=-1; /* file without label*/   \
00276                             break;  \
00277                         }   \
00278     \
00279                         if (data[j]==' ')   \
00280                         {   \
00281                             data[j]='\0';   \
00282     \
00283                             /* skip label part */   \
00284                             break;  \
00285                         }   \
00286                     }   \
00287     \
00288                     int32_t d=0;    \
00289                     j++;    \
00290                     uint8_t* start=&data[j];    \
00291                     for (; j<len; j++)  \
00292                     {   \
00293                         if (data[j]==':')   \
00294                         {   \
00295                             data[j]='\0';   \
00296     \
00297                             feat[d].feat_index=(int32_t) atoi((const char*) start)-1;   \
00298                             num_feat=CMath::max(num_feat, feat[d].feat_index+1);    \
00299     \
00300                             j++;    \
00301                             start=&data[j]; \
00302                             for (; j<len; j++)  \
00303                             {   \
00304                                 if (data[j]==' ' || data[j]=='\n')  \
00305                                 {   \
00306                                     data[j]='\0';   \
00307                                     feat[d].entry=(sg_type) conv((const char*) start);  \
00308                                     d++;    \
00309                                     break;  \
00310                                 }   \
00311                             }   \
00312     \
00313                             if (j==len) \
00314                             {   \
00315                                 data[j]='\0';   \
00316                                 feat[dims-1].entry=(sg_type) conv((const char*) start); \
00317                             }   \
00318     \
00319                             j++;    \
00320                             start=&data[j]; \
00321                         }   \
00322                     }   \
00323     \
00324                     matrix[lines].vec_index=lines;  \
00325                     matrix[lines].num_feat_entries=dims;    \
00326                     matrix[lines].features=feat;    \
00327     \
00328                     old_sz=i+1; \
00329                     lines++;    \
00330                     SG_PROGRESS(lines, 0, num_vec, 1, "LOADING:\t");    \
00331                 }   \
00332             }   \
00333         }   \
00334     \
00335         SG_INFO("file successfully read\n");    \
00336     }   \
00337     \
00338     delete[] dummy; \
00339 }
00340 
00341 GET_SPARSEMATRIX(get_bool_sparsematrix, atoi, bool)
00342 GET_SPARSEMATRIX(get_byte_sparsematrix, atoi, uint8_t)
00343 GET_SPARSEMATRIX(get_char_sparsematrix, atoi, char)
00344 GET_SPARSEMATRIX(get_int_sparsematrix, atoi, int32_t)
00345 GET_SPARSEMATRIX(get_uint_sparsematrix, atoi, uint32_t)
00346 GET_SPARSEMATRIX(get_long_sparsematrix, atoll, int64_t)
00347 GET_SPARSEMATRIX(get_ulong_sparsematrix, atoll, uint64_t)
00348 GET_SPARSEMATRIX(get_shortreal_sparsematrix, atof, float32_t)
00349 GET_SPARSEMATRIX(get_real_sparsematrix, atof, float64_t)
00350 GET_SPARSEMATRIX(get_longreal_sparsematrix, atof, floatmax_t)
00351 GET_SPARSEMATRIX(get_short_sparsematrix, atoi, int16_t)
00352 GET_SPARSEMATRIX(get_word_sparsematrix, atoi, uint16_t)
00353 #undef GET_SPARSEMATRIX
00354 
00355 
00356 void CAsciiFile::get_byte_string_list(T_STRING<uint8_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00357 {
00358     size_t blocksize=1024*1024;
00359     size_t required_blocksize=0;
00360     uint8_t* dummy=new uint8_t[blocksize];
00361     uint8_t* overflow=NULL;
00362     int32_t overflow_len=0;
00363 
00364     if (file)
00365     {
00366         num_str=0;
00367         max_string_len=0;
00368 
00369         SG_INFO("counting line numbers in file %s\n", filename);
00370         size_t sz=blocksize;
00371         size_t block_offs=0;
00372         size_t old_block_offs=0;
00373         fseek(file, 0, SEEK_END);
00374         size_t fsize=ftell(file);
00375         rewind(file);
00376 
00377         while (sz == blocksize)
00378         {
00379             sz=fread(dummy, sizeof(uint8_t), blocksize, file);
00380             bool contains_cr=false;
00381             for (size_t i=0; i<sz; i++)
00382             {
00383                 block_offs++;
00384                 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
00385                 {
00386                     num_str++;
00387                     contains_cr=true;
00388                     required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs);
00389                     old_block_offs=block_offs;
00390                 }
00391             }
00392             SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t");
00393         }
00394 
00395         SG_INFO("found %d strings\n", num_str);
00396         SG_DEBUG("block_size=%d\n", required_blocksize);
00397         delete[] dummy;
00398         blocksize=required_blocksize;
00399         dummy=new uint8_t[blocksize];
00400         overflow=new uint8_t[blocksize];
00401         strings=new T_STRING<uint8_t>[num_str];
00402 
00403         rewind(file);
00404         sz=blocksize;
00405         int32_t lines=0;
00406         size_t old_sz=0;
00407         while (sz == blocksize)
00408         {
00409             sz=fread(dummy, sizeof(uint8_t), blocksize, file);
00410 
00411             old_sz=0;
00412             for (size_t i=0; i<sz; i++)
00413             {
00414                 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
00415                 {
00416                     int32_t len=i-old_sz;
00417                     max_string_len=CMath::max(max_string_len, len+overflow_len);
00418 
00419                     strings[lines].length=len+overflow_len;
00420                     strings[lines].string=new uint8_t[len+overflow_len];
00421 
00422                     for (int32_t j=0; j<overflow_len; j++)
00423                         strings[lines].string[j]=overflow[j];
00424                     for (int32_t j=0; j<len; j++)
00425                         strings[lines].string[j+overflow_len]=dummy[old_sz+j];
00426 
00427                     // clear overflow
00428                     overflow_len=0;
00429 
00430                     //CMath::display_vector(strings[lines].string, len);
00431                     old_sz=i+1;
00432                     lines++;
00433                     SG_PROGRESS(lines, 0, num_str, 1, "LOADING:\t");
00434                 }
00435             }
00436 
00437             for (size_t i=old_sz; i<sz; i++)
00438                 overflow[i-old_sz]=dummy[i];
00439 
00440             overflow_len=sz-old_sz;
00441         }
00442         SG_INFO("file successfully read\n");
00443         SG_INFO("max_string_length=%d\n", max_string_len);
00444         SG_INFO("num_strings=%d\n", num_str);
00445     }
00446 
00447     delete[] dummy;
00448     delete[] overflow;
00449 }
00450 
00451 void CAsciiFile::get_char_string_list(T_STRING<char>*& strings, int32_t& num_str, int32_t& max_string_len)
00452 {
00453     size_t blocksize=1024*1024;
00454     size_t required_blocksize=0;
00455     char* dummy=new char[blocksize];
00456     char* overflow=NULL;
00457     int32_t overflow_len=0;
00458 
00459     if (file)
00460     {
00461         num_str=0;
00462         max_string_len=0;
00463 
00464         SG_INFO("counting line numbers in file %s\n", filename);
00465         size_t sz=blocksize;
00466         size_t block_offs=0;
00467         size_t old_block_offs=0;
00468         fseek(file, 0, SEEK_END);
00469         size_t fsize=ftell(file);
00470         rewind(file);
00471 
00472         while (sz == blocksize)
00473         {
00474             sz=fread(dummy, sizeof(char), blocksize, file);
00475             bool contains_cr=false;
00476             for (size_t i=0; i<sz; i++)
00477             {
00478                 block_offs++;
00479                 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
00480                 {
00481                     num_str++;
00482                     contains_cr=true;
00483                     required_blocksize=CMath::max(required_blocksize, block_offs-old_block_offs);
00484                     old_block_offs=block_offs;
00485                 }
00486             }
00487             SG_PROGRESS(block_offs, 0, fsize, 1, "COUNTING:\t");
00488         }
00489 
00490         SG_INFO("found %d strings\n", num_str);
00491         SG_DEBUG("block_size=%d\n", required_blocksize);
00492         delete[] dummy;
00493         blocksize=required_blocksize;
00494         dummy=new char[blocksize];
00495         overflow=new char[blocksize];
00496         strings=new T_STRING<char>[num_str];
00497 
00498         rewind(file);
00499         sz=blocksize;
00500         int32_t lines=0;
00501         size_t old_sz=0;
00502         while (sz == blocksize)
00503         {
00504             sz=fread(dummy, sizeof(char), blocksize, file);
00505 
00506             old_sz=0;
00507             for (size_t i=0; i<sz; i++)
00508             {
00509                 if (dummy[i]=='\n' || (i==sz-1 && sz<blocksize))
00510                 {
00511                     int32_t len=i-old_sz;
00512                     max_string_len=CMath::max(max_string_len, len+overflow_len);
00513 
00514                     strings[lines].length=len+overflow_len;
00515                     strings[lines].string=new char[len+overflow_len];
00516 
00517                     for (int32_t j=0; j<overflow_len; j++)
00518                         strings[lines].string[j]=overflow[j];
00519                     for (int32_t j=0; j<len; j++)
00520                         strings[lines].string[j+overflow_len]=dummy[old_sz+j];
00521 
00522                     // clear overflow
00523                     overflow_len=0;
00524 
00525                     //CMath::display_vector(strings[lines].string, len);
00526                     old_sz=i+1;
00527                     lines++;
00528                     SG_PROGRESS(lines, 0, num_str, 1, "LOADING:\t");
00529                 }
00530             }
00531 
00532             for (size_t i=old_sz; i<sz; i++)
00533                 overflow[i-old_sz]=dummy[i];
00534 
00535             overflow_len=sz-old_sz;
00536         }
00537         SG_INFO("file successfully read\n");
00538         SG_INFO("max_string_length=%d\n", max_string_len);
00539         SG_INFO("num_strings=%d\n", num_str);
00540     }
00541 
00542     delete[] dummy;
00543     delete[] overflow;
00544 }
00545 
00546 void CAsciiFile::get_int_string_list(T_STRING<int32_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00547 {
00548     strings=NULL;
00549     num_str=0;
00550     max_string_len=0;
00551 }
00552 
00553 void CAsciiFile::get_uint_string_list(T_STRING<uint32_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00554 {
00555     strings=NULL;
00556     num_str=0;
00557     max_string_len=0;
00558 }
00559 
00560 void CAsciiFile::get_short_string_list(T_STRING<int16_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00561 {
00562     strings=NULL;
00563     num_str=0;
00564     max_string_len=0;
00565 }
00566 
00567 void CAsciiFile::get_word_string_list(T_STRING<uint16_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00568 {
00569     strings=NULL;
00570     num_str=0;
00571     max_string_len=0;
00572 }
00573 
00574 void CAsciiFile::get_long_string_list(T_STRING<int64_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00575 {
00576     strings=NULL;
00577     num_str=0;
00578     max_string_len=0;
00579 }
00580 
00581 void CAsciiFile::get_ulong_string_list(T_STRING<uint64_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00582 {
00583     strings=NULL;
00584     num_str=0;
00585     max_string_len=0;
00586 }
00587 
00588 void CAsciiFile::get_shortreal_string_list(T_STRING<float32_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00589 {
00590     strings=NULL;
00591     num_str=0;
00592     max_string_len=0;
00593 }
00594 
00595 void CAsciiFile::get_real_string_list(T_STRING<float64_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00596 {
00597     strings=NULL;
00598     num_str=0;
00599     max_string_len=0;
00600 }
00601 
00602 void CAsciiFile::get_longreal_string_list(T_STRING<floatmax_t>*& strings, int32_t& num_str, int32_t& max_string_len)
00603 {
00604     strings=NULL;
00605     num_str=0;
00606     max_string_len=0;
00607 }
00608 
00609 
00612 #define SET_VECTOR(fname, mfname, sg_type)  \
00613 void CAsciiFile::fname(const sg_type* vec, int32_t len) \
00614 {                                                           \
00615     mfname(vec, len, 1);                                    \
00616 }
00617 SET_VECTOR(set_byte_vector, set_byte_matrix, uint8_t)
00618 SET_VECTOR(set_char_vector, set_char_matrix, char)
00619 SET_VECTOR(set_int_vector, set_int_matrix, int32_t)
00620 SET_VECTOR(set_shortreal_vector, set_shortreal_matrix, float32_t)
00621 SET_VECTOR(set_real_vector, set_real_matrix, float64_t)
00622 SET_VECTOR(set_short_vector, set_short_matrix, int16_t)
00623 SET_VECTOR(set_word_vector, set_word_matrix, uint16_t)
00624 #undef SET_VECTOR
00625 
00626 #define SET_MATRIX(fname, sg_type, fprt_type, type_str) \
00627 void CAsciiFile::fname(const sg_type* matrix, int32_t num_feat, int32_t num_vec)    \
00628 {                                                                                   \
00629     if (!(file && matrix))                                                          \
00630         SG_ERROR("File or matrix invalid.\n");                                      \
00631                                                                                     \
00632     for (int32_t i=0; i<num_vec; i++)                                               \
00633     {                                                                               \
00634         for (int32_t j=0; j<num_feat; j++)                                          \
00635         {                                                                           \
00636             sg_type v=matrix[num_feat*i+j];                                         \
00637             if (j==num_feat-1)                                                      \
00638                 fprintf(file, type_str "\n", (fprt_type) v);                        \
00639             else                                                                    \
00640                 fprintf(file, type_str " ", (fprt_type) v);                         \
00641         }                                                                           \
00642     }                                                                               \
00643 }
00644 SET_MATRIX(set_char_matrix, char, char, "%c")
00645 SET_MATRIX(set_byte_matrix, uint8_t, uint8_t, "%u")
00646 SET_MATRIX(set_int_matrix, int32_t, int32_t, "%i")
00647 SET_MATRIX(set_uint_matrix, uint32_t, uint32_t, "%u")
00648 SET_MATRIX(set_long_matrix, int64_t, long long int, "%lli")
00649 SET_MATRIX(set_ulong_matrix, uint64_t, long long unsigned int, "%llu")
00650 SET_MATRIX(set_short_matrix, int16_t, int16_t, "%i")
00651 SET_MATRIX(set_word_matrix, uint16_t, uint16_t, "%u")
00652 SET_MATRIX(set_shortreal_matrix, float32_t, float32_t, "%f")
00653 SET_MATRIX(set_real_matrix, float64_t, float64_t, "%f")
00654 SET_MATRIX(set_longreal_matrix, floatmax_t, floatmax_t, "%Lf")
00655 #undef SET_MATRIX
00656 
00657 #define SET_SPARSEMATRIX(fname, sg_type, fprt_type, type_str) \
00658 void CAsciiFile::fname(const TSparse<sg_type>* matrix, int32_t num_feat, int32_t num_vec)   \
00659 {                                                                                           \
00660     if (!(file && matrix))                                                                  \
00661         SG_ERROR("File or matrix invalid.\n");                                              \
00662                                                                                             \
00663     for (int32_t i=0; i<num_vec; i++)                                                       \
00664     {                                                                                       \
00665         TSparseEntry<sg_type>* vec = matrix[i].features;                                    \
00666         int32_t len=matrix[i].num_feat_entries;                                             \
00667                                                                                             \
00668         for (int32_t j=0; j<len; j++)                                                       \
00669         {                                                                                   \
00670             if (j<len-1)                                                                    \
00671             {                                                                               \
00672                 fprintf(file, "%d:" type_str " ",                                           \
00673                         (int32_t) vec[j].feat_index+1, (fprt_type) vec[j].entry);           \
00674             }                                                                               \
00675             else                                                                            \
00676             {                                                                               \
00677                 fprintf(file, "%d:" type_str "\n",                                          \
00678                         (int32_t) vec[j].feat_index+1, (fprt_type) vec[j].entry);           \
00679             }                                                                               \
00680         }                                                                                   \
00681     }                                                                                       \
00682 }
00683 SET_SPARSEMATRIX(set_bool_sparsematrix, bool, uint8_t, "%u")
00684 SET_SPARSEMATRIX(set_char_sparsematrix, char, char, "%c")
00685 SET_SPARSEMATRIX(set_byte_sparsematrix, uint8_t, uint8_t, "%u")
00686 SET_SPARSEMATRIX(set_int_sparsematrix, int32_t, int32_t, "%i")
00687 SET_SPARSEMATRIX(set_uint_sparsematrix, uint32_t, uint32_t, "%u")
00688 SET_SPARSEMATRIX(set_long_sparsematrix, int64_t, long long int, "%lli")
00689 SET_SPARSEMATRIX(set_ulong_sparsematrix, uint64_t, long long unsigned int, "%llu")
00690 SET_SPARSEMATRIX(set_short_sparsematrix, int16_t, int16_t, "%i")
00691 SET_SPARSEMATRIX(set_word_sparsematrix, uint16_t, uint16_t, "%u")
00692 SET_SPARSEMATRIX(set_shortreal_sparsematrix, float32_t, float32_t, "%f")
00693 SET_SPARSEMATRIX(set_real_sparsematrix, float64_t, float64_t, "%f")
00694 SET_SPARSEMATRIX(set_longreal_sparsematrix, floatmax_t, floatmax_t, "%Lf")
00695 #undef SET_SPARSEMATRIX
00696 
00697 void CAsciiFile::set_byte_string_list(const T_STRING<uint8_t>* strings, int32_t num_str)
00698 {
00699     if (!(file && strings))
00700         SG_ERROR("File or strings invalid.\n");
00701 
00702     for (int32_t i=0; i<num_str; i++)
00703     {
00704         int32_t len = strings[i].length;
00705         fwrite(strings[i].string, sizeof(uint8_t), len, file);
00706         fprintf(file, "\n");
00707     }
00708 }
00709 
00710 void CAsciiFile::set_char_string_list(const T_STRING<char>* strings, int32_t num_str)
00711 {
00712     if (!(file && strings))
00713         SG_ERROR("File or strings invalid.\n");
00714 
00715     for (int32_t i=0; i<num_str; i++)
00716     {
00717         int32_t len = strings[i].length;
00718         fwrite(strings[i].string, sizeof(char), len, file);
00719         fprintf(file, "\n");
00720     }
00721 }
00722 
00723 void CAsciiFile::set_int_string_list(const T_STRING<int32_t>* strings, int32_t num_str)
00724 {
00725 }
00726 
00727 void CAsciiFile::set_uint_string_list(const T_STRING<uint32_t>* strings, int32_t num_str)
00728 {
00729 }
00730 
00731 void CAsciiFile::set_short_string_list(const T_STRING<int16_t>* strings, int32_t num_str)
00732 {
00733 }
00734 
00735 void CAsciiFile::set_word_string_list(const T_STRING<uint16_t>* strings, int32_t num_str)
00736 {
00737 }
00738 
00739 void CAsciiFile::set_long_string_list(const T_STRING<int64_t>* strings, int32_t num_str)
00740 {
00741 }
00742 
00743 void CAsciiFile::set_ulong_string_list(const T_STRING<uint64_t>* strings, int32_t num_str)
00744 {
00745 }
00746 
00747 void CAsciiFile::set_shortreal_string_list(const T_STRING<float32_t>* strings, int32_t num_str)
00748 {
00749 }
00750 
00751 void CAsciiFile::set_real_string_list(const T_STRING<float64_t>* strings, int32_t num_str)
00752 {
00753 }
00754 
00755 void CAsciiFile::set_longreal_string_list(const T_STRING<floatmax_t>* strings, int32_t num_str)
00756 {
00757 }
00758 
00759 template <class T> void CAsciiFile::append_item(
00760     CDynamicArray<T>* items, char* ptr_data, char* ptr_item)
00761 {
00762     size_t len=(ptr_data-ptr_item)/sizeof(char);
00763     char* item=new char[len+1];
00764     memset(item, 0, sizeof(char)*(len+1));
00765     item=strncpy(item, ptr_item, len);
00766 
00767     SG_DEBUG("current %c, len %d, item %s\n", *ptr_data, len, item);
00768     items->append_element(item);
00769 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation