MindyGramFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2006 Konrad Rieck
00008  * Copyright (C) 2006-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  *
00010  * Indentation: bcpp -f 1 -s -ylcnc -bcl -i 4
00011  */
00012 
00013 #include "lib/config.h"
00014 
00015 #ifdef HAVE_MINDY
00016 
00017 #include "features/Features.h"
00018 #include "features/StringFeatures.h"
00019 #include "features/MindyGramFeatures.h"
00020 #include "lib/common.h"
00021 #include "lib/io.h"
00022 #include "lib/File.h"
00023 
00024 #include <math.h>
00025 #include <mindy.h>
00026 
00030 CMindyGramFeatures::~CMindyGramFeatures()
00031 {
00032     SG_DEBUG( "Destroying Mindy gram features\n");
00033     /* Destroy gram vectors */
00034     for (int32_t i = 0; i < num_vectors; i++)
00035         gram_destroy(vectors[i]);
00036     free(vectors);
00037 
00038     /* Destroy configuration */
00039     micfg_destroy(cfg);
00040 }
00041 
00045 CFeatures *CMindyGramFeatures::duplicate() const
00046 {
00047     return new CMindyGramFeatures(*this);
00048 }
00049 
00055 gram_t *CMindyGramFeatures::get_feature_vector(int32_t i)
00056 {
00057     ASSERT(vectors);
00058     ASSERT(i>=0 && i<num_vectors);
00059 
00060     return vectors[i];
00061 }
00062 
00067 void CMindyGramFeatures::set_feature_vector(int32_t i, gram_t * g)
00068 {
00069     ASSERT(vectors);
00070     ASSERT(i>=0 && i<num_vectors);
00071 
00072     /* Destroy previous gram */
00073     if (vectors[i])
00074         gram_destroy(vectors[i]);
00075 
00076     vectors[i] = g;
00077 }
00078 
00086 uint64_t CMindyGramFeatures::get_feature(int32_t i, int32_t j)
00087 {
00088     ASSERT(vectors && i<num_vectors);
00089     ASSERT(j<(signed) vectors[i]->num);
00090 
00091     return vectors[i]->gram[j];
00092 }
00093 
00099 int32_t CMindyGramFeatures::get_vector_length(int32_t i)
00100 {
00101     ASSERT(vectors && i<num_vectors);
00102     return vectors[i]->num;
00103 }
00104 
00109 void CMindyGramFeatures::trim_max(float64_t max)
00110 {
00111     for (int32_t i = 0; i < num_vectors; i++)
00112         gram_trim_max(vectors[i], max);
00113 } 
00114 
00120 bool CMindyGramFeatures::load(char * fname)
00121 {
00122     SG_INFO( "Loading strings from %s\n", fname);
00123     int64_t len = 0;
00124     char *s, *t;
00125 
00126     CFile f(fname, 'r', F_CHAR);
00127     char *data = f.load_char_data(NULL, len);
00128 
00129     if (!f.is_ok()) {
00130         SG_ERROR( "Reading file failed\n");
00131         return false;
00132     }
00133 
00134     /* Count strings terminated by \n */
00135     num_vectors = 0;
00136     for (int64_t i = 0; i < len; i++)
00137         if (data[i] == '\n')
00138             SG_INFO( "File contains %ld string vectors\n",
00139                     num_vectors);
00140 
00141     vectors = (gram_t **) calloc(num_vectors, sizeof(gram_t *));
00142     if (!vectors) {
00143         SG_ERROR( "Could not allocate memory\n");
00144         return false;
00145     }
00146 
00147     /* Extract grams from strings */
00148     t = s = data;
00149     for (int64_t i = 0; i < num_vectors; i++, t++) {
00150         if (*t != '\n')
00151             continue;
00152 
00153         vectors[i] = gram_extract(cfg, (byte_t *) s, t - s);
00154         s = t + 1;
00155     }
00156 
00157     return true;
00158 }
00159 #endif

SHOGUN Machine Learning Toolbox - Documentation