MindyGramFeatures.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "lib/config.h"
00014
00015 #ifdef HAVE_MINDY
00016
00017 #include "features/Features.h"
00018 #include "features/StringFeatures.h"
00019 #include "features/MindyGramFeatures.h"
00020 #include "lib/common.h"
00021 #include "lib/io.h"
00022 #include "lib/File.h"
00023
00024 #include <math.h>
00025 #include <mindy.h>
00026
00030 CMindyGramFeatures::~CMindyGramFeatures()
00031 {
00032 SG_DEBUG( "Destroying Mindy gram features\n");
00033
00034 for (int32_t i = 0; i < num_vectors; i++)
00035 gram_destroy(vectors[i]);
00036 free(vectors);
00037
00038
00039 micfg_destroy(cfg);
00040 }
00041
00045 CFeatures *CMindyGramFeatures::duplicate() const
00046 {
00047 return new CMindyGramFeatures(*this);
00048 }
00049
00055 gram_t *CMindyGramFeatures::get_feature_vector(int32_t i)
00056 {
00057 ASSERT(vectors);
00058 ASSERT(i>=0 && i<num_vectors);
00059
00060 return vectors[i];
00061 }
00062
00067 void CMindyGramFeatures::set_feature_vector(int32_t i, gram_t * g)
00068 {
00069 ASSERT(vectors);
00070 ASSERT(i>=0 && i<num_vectors);
00071
00072
00073 if (vectors[i])
00074 gram_destroy(vectors[i]);
00075
00076 vectors[i] = g;
00077 }
00078
00086 uint64_t CMindyGramFeatures::get_feature(int32_t i, int32_t j)
00087 {
00088 ASSERT(vectors && i<num_vectors);
00089 ASSERT(j<(signed) vectors[i]->num);
00090
00091 return vectors[i]->gram[j];
00092 }
00093
00099 int32_t CMindyGramFeatures::get_vector_length(int32_t i)
00100 {
00101 ASSERT(vectors && i<num_vectors);
00102 return vectors[i]->num;
00103 }
00104
00109 void CMindyGramFeatures::trim_max(float64_t max)
00110 {
00111 for (int32_t i = 0; i < num_vectors; i++)
00112 gram_trim_max(vectors[i], max);
00113 }
00114
00120 bool CMindyGramFeatures::load(char * fname)
00121 {
00122 SG_INFO( "Loading strings from %s\n", fname);
00123 int64_t len = 0;
00124 char *s, *t;
00125
00126 CFile f(fname, 'r', F_CHAR);
00127 char *data = f.load_char_data(NULL, len);
00128
00129 if (!f.is_ok()) {
00130 SG_ERROR( "Reading file failed\n");
00131 return false;
00132 }
00133
00134
00135 num_vectors = 0;
00136 for (int64_t i = 0; i < len; i++)
00137 if (data[i] == '\n')
00138 SG_INFO( "File contains %ld string vectors\n",
00139 num_vectors);
00140
00141 vectors = (gram_t **) calloc(num_vectors, sizeof(gram_t *));
00142 if (!vectors) {
00143 SG_ERROR( "Could not allocate memory\n");
00144 return false;
00145 }
00146
00147
00148 t = s = data;
00149 for (int64_t i = 0; i < num_vectors; i++, t++) {
00150 if (*t != '\n')
00151 continue;
00152
00153 vectors[i] = gram_extract(cfg, (byte_t *) s, t - s);
00154 s = t + 1;
00155 }
00156
00157 return true;
00158 }
00159 #endif