RealFileFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "features/RealFileFeatures.h"
00012 #include "features/Features.h"
00013 #include "lib/io.h"
00014 
00015 #include <stdio.h>
00016 #include <string.h>
00017 
00018 using namespace shogun;
00019 
00020 CRealFileFeatures::CRealFileFeatures(int32_t size, char* fname)
00021 : CSimpleFeatures<float64_t>(size)
00022 {
00023     working_file=fopen(fname, "r");
00024     working_filename=strdup(fname);
00025     ASSERT(working_file);
00026     intlen=0;
00027     doublelen=0;
00028     endian=0;
00029     fourcc=0;
00030     preprocd=0;
00031     labels=NULL;
00032     status=load_base_data();
00033 }
00034 
00035 CRealFileFeatures::CRealFileFeatures(int32_t size, FILE* file)
00036 : CSimpleFeatures<float64_t>(size), working_file(file), working_filename(NULL)
00037 {
00038     ASSERT(working_file);
00039     intlen=0;
00040     doublelen=0;
00041     endian=0;
00042     fourcc=0;
00043     preprocd=0;
00044     labels=NULL;
00045     status=load_base_data();
00046 }
00047 
00048 CRealFileFeatures::~CRealFileFeatures()
00049 {
00050     delete[] feature_matrix;
00051     delete[] working_filename;
00052     delete[] labels;
00053 }
00054 
00055 CRealFileFeatures::CRealFileFeatures(const CRealFileFeatures & orig)
00056 : CSimpleFeatures<float64_t>(orig), working_file(orig.working_file), status(orig.status)
00057 {
00058     if (orig.working_filename)
00059         working_filename=strdup(orig.working_filename);
00060     if (orig.labels && get_num_vectors())
00061     {
00062         labels=new int32_t[get_num_vectors()];
00063         memcpy(labels, orig.labels, sizeof(int32_t)*get_num_vectors());
00064     }
00065 }
00066 
00067 float64_t* CRealFileFeatures::compute_feature_vector(
00068     int32_t num, int32_t &len, float64_t* target)
00069 {
00070     ASSERT(num<num_vectors);
00071     len=num_features;
00072     float64_t* featurevector=target;
00073     if (!featurevector)
00074         featurevector=new float64_t[num_features];
00075     ASSERT(working_file);
00076     fseek(working_file, filepos+num_features*doublelen*num, SEEK_SET);
00077     ASSERT(fread(featurevector, doublelen, num_features, working_file)==(size_t) num_features);
00078     return featurevector;
00079 }
00080 
00081 float64_t* CRealFileFeatures::load_feature_matrix()
00082 {
00083     ASSERT(working_file);
00084     fseek(working_file, filepos, SEEK_SET);
00085     delete[] feature_matrix;
00086 
00087     SG_INFO( "allocating feature matrix of size %.2fM\n", sizeof(double)*num_features*num_vectors/1024.0/1024.0);
00088     free_feature_matrix();
00089     feature_matrix=new float64_t[num_features*num_vectors];
00090 
00091     SG_INFO( "loading... be patient.\n");
00092 
00093     for (int32_t i=0; i<(int32_t) num_vectors; i++)
00094     {
00095         if (!(i % (num_vectors/10+1)))
00096             SG_PRINT( "%02d%%.", (int) (100.0*i/num_vectors));
00097         else if (!(i % (num_vectors/200+1)))
00098             SG_PRINT( ".");
00099 
00100         ASSERT(fread(&feature_matrix[num_features*i], doublelen, num_features, working_file)==(size_t) num_features);
00101     }
00102     SG_DONE();
00103 
00104     return feature_matrix;
00105 }
00106 
00107 int32_t CRealFileFeatures::get_label(int32_t idx)
00108 {
00109     ASSERT(idx<num_vectors);
00110     if (labels)
00111         return labels[idx];
00112     return 0;
00113 }
00114 
00115 bool CRealFileFeatures::load_base_data()
00116 {
00117     ASSERT(working_file);
00118     uint32_t num_vec=0;
00119     uint32_t num_feat=0;
00120 
00121     ASSERT(fread(&intlen, sizeof(uint8_t), 1, working_file)==1);
00122     ASSERT(fread(&doublelen, sizeof(uint8_t), 1, working_file)==1);
00123     ASSERT(fread(&endian, (uint32_t) intlen, 1, working_file)== 1);
00124     ASSERT(fread(&fourcc, (uint32_t) intlen, 1, working_file)==1);
00125     ASSERT(fread(&num_vec, (uint32_t) intlen, 1, working_file)==1);
00126     ASSERT(fread(&num_feat, (uint32_t) intlen, 1, working_file)==1);
00127     ASSERT(fread(&preprocd, (uint32_t) intlen, 1, working_file)==1);
00128     SG_INFO( "detected: intsize=%d, doublesize=%d, num_vec=%d, num_feat=%d, preprocd=%d\n", intlen, doublelen, num_vec, num_feat, preprocd);
00129     filepos=ftell(working_file);
00130     set_num_vectors(num_vec);
00131     set_num_features(num_feat);
00132     fseek(working_file, filepos+num_features*num_vectors*doublelen, SEEK_SET);
00133     delete[] labels;
00134     labels=new int[num_vec];
00135     ASSERT(fread(labels, intlen, num_vec, working_file) == num_vec);
00136     return true;
00137 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation