BinaryFile.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2010 Soeren Sonnenburg
00008  * Copyright (C) 2010 Berlin Institute of Technology
00009  */
00010 #ifndef __BINARY_FILE_H__
00011 #define __BINARY_FILE_H__
00012 
00013 #include <shogun/lib/config.h>
00014 #include <shogun/lib/common.h>
00015 #include <shogun/base/SGObject.h>
00016 #include <shogun/lib/io.h>
00017 #include <shogun/lib/SimpleFile.h>
00018 
00019 namespace shogun
00020 {
00027 class CBinaryFile: public CFile
00028 {
00029 public:
00035     CBinaryFile(FILE* f, const char* name=NULL);
00036 
00043     CBinaryFile(char* fname, char rw='r', const char* name=NULL);
00044 
00046     virtual ~CBinaryFile();
00047 
00055     virtual void get_byte_vector(uint8_t*& vector, int32_t& len);
00056     virtual void get_char_vector(char*& vector, int32_t& len);
00057     virtual void get_int_vector(int32_t*& vector, int32_t& len);
00058     virtual void get_real_vector(float64_t*& vector, int32_t& len);
00059     virtual void get_shortreal_vector(float32_t*& vector, int32_t& len);
00060     virtual void get_short_vector(int16_t*& vector, int32_t& len);
00061     virtual void get_word_vector(uint16_t*& vector, int32_t& len);
00063 
00072     virtual void get_byte_matrix(
00073             uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00074     virtual void get_char_matrix(
00075             char*& matrix, int32_t& num_feat, int32_t& num_vec);
00076     virtual void get_int_matrix(
00077             int32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00078     virtual void get_uint_matrix(
00079             uint32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00080     virtual void get_long_matrix(
00081             int64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00082     virtual void get_ulong_matrix(
00083             uint64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00084     virtual void get_shortreal_matrix(
00085             float32_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00086     virtual void get_real_matrix(
00087             float64_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00088     virtual void get_longreal_matrix(
00089             floatmax_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00090     virtual void get_short_matrix(
00091             int16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00092     virtual void get_word_matrix(
00093             uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec);
00095 
00104     virtual void get_byte_ndarray(
00105             uint8_t*& array, int32_t*& dims, int32_t& num_dims);
00106     virtual void get_char_ndarray(
00107             char*& array, int32_t*& dims, int32_t& num_dims);
00108     virtual void get_int_ndarray(
00109             int32_t*& array, int32_t*& dims, int32_t& num_dims);
00110     virtual void get_shortreal_ndarray(
00111             float32_t*& array, int32_t*& dims, int32_t& num_dims);
00112     virtual void get_real_ndarray(
00113             float64_t*& array, int32_t*& dims, int32_t& num_dims);
00114     virtual void get_short_ndarray(
00115             int16_t*& array, int32_t*& dims, int32_t& num_dims);
00116     virtual void get_word_ndarray(
00117             uint16_t*& array, int32_t*& dims, int32_t& num_dims);
00119 
00128     virtual void get_bool_sparsematrix(
00129             TSparse<bool>*& matrix, int32_t& num_feat, int32_t& num_vec);
00130     virtual void get_byte_sparsematrix(
00131             TSparse<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00132     virtual void get_char_sparsematrix(
00133             TSparse<char>*& matrix, int32_t& num_feat, int32_t& num_vec);
00134     virtual void get_int_sparsematrix(
00135             TSparse<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00136     virtual void get_uint_sparsematrix(
00137             TSparse<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00138     virtual void get_long_sparsematrix(
00139             TSparse<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00140     virtual void get_ulong_sparsematrix(
00141             TSparse<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00142     virtual void get_short_sparsematrix(
00143             TSparse<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00144     virtual void get_word_sparsematrix(
00145             TSparse<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00146     virtual void get_shortreal_sparsematrix(
00147             TSparse<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00148     virtual void get_real_sparsematrix(
00149             TSparse<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00150     virtual void get_longreal_sparsematrix(
00151             TSparse<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec);
00153 
00154 
00163     virtual void get_byte_string_list(
00164             T_STRING<uint8_t>*& strings, int32_t& num_str,
00165             int32_t& max_string_len);
00166     virtual void get_char_string_list(
00167             T_STRING<char>*& strings, int32_t& num_str,
00168             int32_t& max_string_len);
00169     virtual void get_int_string_list(
00170             T_STRING<int32_t>*& strings, int32_t& num_str,
00171             int32_t& max_string_len);
00172     virtual void get_uint_string_list(
00173             T_STRING<uint32_t>*& strings, int32_t& num_str,
00174             int32_t& max_string_len);
00175     virtual void get_short_string_list(
00176             T_STRING<int16_t>*& strings, int32_t& num_str,
00177             int32_t& max_string_len);
00178     virtual void get_word_string_list(
00179             T_STRING<uint16_t>*& strings, int32_t& num_str,
00180             int32_t& max_string_len);
00181     virtual void get_long_string_list(
00182             T_STRING<int64_t>*& strings, int32_t& num_str,
00183             int32_t& max_string_len);
00184     virtual void get_ulong_string_list(
00185             T_STRING<uint64_t>*& strings, int32_t& num_str,
00186             int32_t& max_string_len);
00187     virtual void get_shortreal_string_list(
00188             T_STRING<float32_t>*& strings, int32_t& num_str,
00189             int32_t& max_string_len);
00190     virtual void get_real_string_list(
00191             T_STRING<float64_t>*& strings, int32_t& num_str,
00192             int32_t& max_string_len);
00193     virtual void get_longreal_string_list(
00194             T_STRING<floatmax_t>*& strings, int32_t& num_str,
00195             int32_t& max_string_len);
00197 
00205     virtual void set_byte_vector(const uint8_t* vector, int32_t len);
00206     virtual void set_char_vector(const char* vector, int32_t len);
00207     virtual void set_int_vector(const int32_t* vector, int32_t len);
00208     virtual void set_shortreal_vector( const float32_t* vector, int32_t len);
00209     virtual void set_real_vector(const float64_t* vector, int32_t len);
00210     virtual void set_short_vector(const int16_t* vector, int32_t len);
00211     virtual void set_word_vector(const uint16_t* vector, int32_t len);
00213 
00214 
00222     virtual void set_byte_matrix(
00223             const uint8_t* matrix, int32_t num_feat, int32_t num_vec);
00224     virtual void set_char_matrix(
00225             const char* matrix, int32_t num_feat, int32_t num_vec);
00226     virtual void set_int_matrix(
00227             const int32_t* matrix, int32_t num_feat, int32_t num_vec);
00228     virtual void set_uint_matrix(
00229             const uint32_t* matrix, int32_t num_feat, int32_t num_vec);
00230     virtual void set_long_matrix(
00231             const int64_t* matrix, int32_t num_feat, int32_t num_vec);
00232     virtual void set_ulong_matrix(
00233             const uint64_t* matrix, int32_t num_feat, int32_t num_vec);
00234     virtual void set_shortreal_matrix(
00235             const float32_t* matrix, int32_t num_feat, int32_t num_vec);
00236     virtual void set_real_matrix(
00237             const float64_t* matrix, int32_t num_feat, int32_t num_vec);
00238     virtual void set_longreal_matrix(
00239             const floatmax_t* matrix, int32_t num_feat, int32_t num_vec);
00240     virtual void set_short_matrix(
00241             const int16_t* matrix, int32_t num_feat, int32_t num_vec);
00242     virtual void set_word_matrix(
00243             const uint16_t* matrix, int32_t num_feat, int32_t num_vec);
00245 
00253     virtual void set_bool_sparsematrix(
00254             const TSparse<bool>* matrix, int32_t num_feat, int32_t num_vec);
00255     virtual void set_byte_sparsematrix(
00256             const TSparse<uint8_t>* matrix, int32_t num_feat, int32_t num_vec);
00257     virtual void set_char_sparsematrix(
00258             const TSparse<char>* matrix, int32_t num_feat, int32_t num_vec);
00259     virtual void set_int_sparsematrix(
00260             const TSparse<int32_t>* matrix, int32_t num_feat, int32_t num_vec);
00261     virtual void set_uint_sparsematrix(
00262             const TSparse<uint32_t>* matrix, int32_t num_feat, int32_t num_vec);
00263     virtual void set_long_sparsematrix(
00264             const TSparse<int64_t>* matrix, int32_t num_feat, int32_t num_vec);
00265     virtual void set_ulong_sparsematrix(
00266             const TSparse<uint64_t>* matrix, int32_t num_feat, int32_t num_vec);
00267     virtual void set_short_sparsematrix(
00268             const TSparse<int16_t>* matrix, int32_t num_feat, int32_t num_vec);
00269     virtual void set_word_sparsematrix(
00270             const TSparse<uint16_t>* matrix, int32_t num_feat, int32_t num_vec); 
00271     virtual void set_shortreal_sparsematrix(
00272             const TSparse<float32_t>* matrix, int32_t num_feat, int32_t num_vec);
00273     virtual void set_real_sparsematrix(
00274             const TSparse<float64_t>* matrix, int32_t num_feat, int32_t num_vec);
00275     virtual void set_longreal_sparsematrix(
00276             const TSparse<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec);
00278 
00279 
00288     virtual void set_byte_string_list(
00289             const T_STRING<uint8_t>* strings, int32_t num_str);
00290     virtual void set_char_string_list(
00291             const T_STRING<char>* strings, int32_t num_str);
00292     virtual void set_int_string_list(
00293             const T_STRING<int32_t>* strings, int32_t num_str);
00294     virtual void set_uint_string_list(
00295             const T_STRING<uint32_t>* strings, int32_t num_str);
00296     virtual void set_short_string_list(
00297             const T_STRING<int16_t>* strings, int32_t num_str);
00298     virtual void set_word_string_list(
00299             const T_STRING<uint16_t>* strings, int32_t num_str);
00300     virtual void set_long_string_list(
00301             const T_STRING<int64_t>* strings, int32_t num_str);
00302     virtual void set_ulong_string_list(
00303             const T_STRING<uint64_t>* strings, int32_t num_str);
00304     virtual void set_shortreal_string_list(
00305             const T_STRING<float32_t>* strings, int32_t num_str);
00306     virtual void set_real_string_list(
00307             const T_STRING<float64_t>* strings, int32_t num_str);
00308     virtual void set_longreal_string_list(
00309             const T_STRING<floatmax_t>* strings, int32_t num_str);
00311 
00313     inline virtual const char* get_name() const { return "BinaryFile"; }
00314 
00315 protected:
00320     SGDataType read_header();
00321 
00326     void write_header(SGDataType datatype);
00327 
00333     int32_t parse_first_header(SGDataType &type);
00334     
00340     int32_t parse_next_header(SGDataType &type);
00341 
00342 private:
00349     template <class DT> DT* load_data(DT* target, int64_t& num)
00350     {
00351         CSimpleFile<DT> f(filename, file);
00352         return f.load(target, num);
00353     }
00354 
00361     template <class DT> bool save_data(DT* src, int64_t num)
00362     {
00363         CSimpleFile<DT> f(filename, file);
00364         return f.save(src, num);
00365     }
00366 };
00367 }
00368 #endif //__BINARY_FILE_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation