00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "features/RealFileFeatures.h"
00012 #include "features/Features.h"
00013 #include "lib/io.h"
00014
00015 #include <stdio.h>
00016 #include <string.h>
00017
00018 using namespace shogun;
00019
00020 CRealFileFeatures::CRealFileFeatures(int32_t size, char* fname)
00021 : CSimpleFeatures<float64_t>(size)
00022 {
00023 working_file=fopen(fname, "r");
00024 working_filename=strdup(fname);
00025 ASSERT(working_file);
00026 intlen=0;
00027 doublelen=0;
00028 endian=0;
00029 fourcc=0;
00030 preprocd=0;
00031 labels=NULL;
00032 status=load_base_data();
00033 }
00034
00035 CRealFileFeatures::CRealFileFeatures(int32_t size, FILE* file)
00036 : CSimpleFeatures<float64_t>(size), working_file(file), working_filename(NULL)
00037 {
00038 ASSERT(working_file);
00039 intlen=0;
00040 doublelen=0;
00041 endian=0;
00042 fourcc=0;
00043 preprocd=0;
00044 labels=NULL;
00045 status=load_base_data();
00046 }
00047
00048 CRealFileFeatures::~CRealFileFeatures()
00049 {
00050 delete[] feature_matrix;
00051 delete[] working_filename;
00052 delete[] labels;
00053 }
00054
00055 CRealFileFeatures::CRealFileFeatures(const CRealFileFeatures & orig)
00056 : CSimpleFeatures<float64_t>(orig), working_file(orig.working_file), status(orig.status)
00057 {
00058 if (orig.working_filename)
00059 working_filename=strdup(orig.working_filename);
00060 if (orig.labels && get_num_vectors())
00061 {
00062 labels=new int32_t[get_num_vectors()];
00063 memcpy(labels, orig.labels, sizeof(int32_t)*get_num_vectors());
00064 }
00065 }
00066
00067 float64_t* CRealFileFeatures::compute_feature_vector(
00068 int32_t num, int32_t &len, float64_t* target)
00069 {
00070 ASSERT(num<num_vectors);
00071 len=num_features;
00072 float64_t* featurevector=target;
00073 if (!featurevector)
00074 featurevector=new float64_t[num_features];
00075 ASSERT(working_file);
00076 fseek(working_file, filepos+num_features*doublelen*num, SEEK_SET);
00077 ASSERT(fread(featurevector, doublelen, num_features, working_file)==(size_t) num_features);
00078 return featurevector;
00079 }
00080
00081 float64_t* CRealFileFeatures::load_feature_matrix()
00082 {
00083 ASSERT(working_file);
00084 fseek(working_file, filepos, SEEK_SET);
00085 delete[] feature_matrix;
00086
00087 SG_INFO( "allocating feature matrix of size %.2fM\n", sizeof(double)*num_features*num_vectors/1024.0/1024.0);
00088 free_feature_matrix();
00089 feature_matrix=new float64_t[num_features*num_vectors];
00090
00091 SG_INFO( "loading... be patient.\n");
00092
00093 for (int32_t i=0; i<(int32_t) num_vectors; i++)
00094 {
00095 if (!(i % (num_vectors/10+1)))
00096 SG_PRINT( "%02d%%.", (int) (100.0*i/num_vectors));
00097 else if (!(i % (num_vectors/200+1)))
00098 SG_PRINT( ".");
00099
00100 ASSERT(fread(&feature_matrix[num_features*i], doublelen, num_features, working_file)==(size_t) num_features);
00101 }
00102 SG_DONE();
00103
00104 return feature_matrix;
00105 }
00106
00107 int32_t CRealFileFeatures::get_label(int32_t idx)
00108 {
00109 ASSERT(idx<num_vectors);
00110 if (labels)
00111 return labels[idx];
00112 return 0;
00113 }
00114
00115 bool CRealFileFeatures::load_base_data()
00116 {
00117 ASSERT(working_file);
00118 uint32_t num_vec=0;
00119 uint32_t num_feat=0;
00120
00121 ASSERT(fread(&intlen, sizeof(uint8_t), 1, working_file)==1);
00122 ASSERT(fread(&doublelen, sizeof(uint8_t), 1, working_file)==1);
00123 ASSERT(fread(&endian, (uint32_t) intlen, 1, working_file)== 1);
00124 ASSERT(fread(&fourcc, (uint32_t) intlen, 1, working_file)==1);
00125 ASSERT(fread(&num_vec, (uint32_t) intlen, 1, working_file)==1);
00126 ASSERT(fread(&num_feat, (uint32_t) intlen, 1, working_file)==1);
00127 ASSERT(fread(&preprocd, (uint32_t) intlen, 1, working_file)==1);
00128 SG_INFO( "detected: intsize=%d, doublesize=%d, num_vec=%d, num_feat=%d, preprocd=%d\n", intlen, doublelen, num_vec, num_feat, preprocd);
00129 filepos=ftell(working_file);
00130 set_num_vectors(num_vec);
00131 set_num_features(num_feat);
00132 fseek(working_file, filepos+num_features*num_vectors*doublelen, SEEK_SET);
00133 delete[] labels;
00134 labels=new int[num_vec];
00135 ASSERT(fread(labels, intlen, num_vec, working_file) == num_vec);
00136 return true;
00137 }