00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include "lib/config.h"
00014
00015 #ifdef HAVE_MINDY
00016
00017 #include <mindy.h>
00018
00019 #include "lib/common.h"
00020 #include "features/MindyGramFeatures.h"
00021 #include "lib/io.h"
00022 #include "kernel/MindyGramKernel.h"
00023 #include "kernel/SqrtDiagKernelNormalizer.h"
00024
00025
00026
00027
00028 param_spec_t p_map[] = {
00029 { "expo", SP_EXPO, 2.0, "Exponent (polynomial, minkowski)" },
00030 { "shift", SP_SHIFT, 0.0, "Shift value (polynomial)" },
00031 { "dist", SP_DIST, ST_MINKOWSKI, "Distance name (rbf)" },
00032 { "width", SP_WIDTH, 1.0, "Kernel width (rbf)" },
00033 { NULL },
00034 };
00035
00042 CMindyGramKernel::CMindyGramKernel(int32_t ch, char *meas, float64_t w)
00043 : CKernel(ch)
00044 {
00045
00046 measure=meas;
00047 norm=NO_NORMALIZATION;
00048 width=w;
00049 cache=0;
00050
00051
00052 simcof=sico_get_type(measure);
00053
00054
00055 SG_INFO("Initializing Mindy kernel.\n");
00056 if (simcof==SC_NONE)
00057 kernel=sm_create(sm_get_type(measure));
00058 else
00059 kernel=sm_create(ST_MINKERN);
00060
00061 SG_INFO("Mindy similarity measure: %s (using %s).\n",
00062 measure, sm_get_descr(kernel->type));
00063
00064
00065 if (kernel->type == ST_LINEAR)
00066 {
00067 SG_INFO("Optimization supported.\n");
00068 properties |= KP_LINADD;
00069 }
00070
00071 normal=NULL;
00072 clear_normal();
00073
00074 set_normalizer(new CSqrtDiagKernelNormalizer());
00075 }
00076
00077 CMindyGramKernel::CMindyGramKernel(
00078 CFeatures* l, CFeatures* r, char *m, float64_t w)
00079 : CKernel(10), measure(m), width(w)
00080 {
00081
00082 simcof=sico_get_type(measure);
00083
00084
00085 SG_INFO("Initializing Mindy kernel.\n");
00086 if (simcof==SC_NONE)
00087 kernel=sm_create(sm_get_type(measure));
00088 else
00089 kernel=sm_create(ST_MINKERN);
00090
00091 SG_INFO("Mindy similarity measure: %s (using %s).\n",
00092 measure, sm_get_descr(kernel->type));
00093
00094
00095 if (kernel->type == ST_LINEAR)
00096 {
00097 SG_INFO("Optimization supported.\n");
00098 properties |= KP_LINADD;
00099 }
00100
00101 normal=NULL;
00102 clear_normal();
00103 init_normalizer(new CSqrtDiagKernelNormalizer());
00104
00105 init(l, r);
00106 }
00107
00108
00109
00110
00111 void CMindyGramKernel::set_md5cache(int32_t c)
00112 {
00113 cache = c;
00114 if (cache <= 0)
00115 return;
00116
00117 SG_INFO("Creating MD5 cache of %d kb", cache);
00118 md5_cache_create(cache);
00119 }
00120
00121
00122
00123
00124 void CMindyGramKernel::set_param(char *param)
00125 {
00126
00127 parse_params(param);
00128
00129
00130 for (int32_t i = 0; p_map[i].name; i++) {
00131 if (p_map[i].idx != SP_DIST)
00132 SG_INFO( "Param %8s=%8.6f\t %s\n",
00133 p_map[i].name, p_map[i].val, p_map[i].descr);
00134 else
00135 SG_INFO( "Param %8s=%s\t %s\n", p_map[i].name,
00136 sm_get_name((sm_type_t) p_map[i].val),
00137 p_map[i].descr);
00138 }
00139 }
00140
00144 CMindyGramKernel::~CMindyGramKernel()
00145 {
00146 cleanup();
00147
00148 if (cache > 0)
00149 md5_cache_destroy();
00150
00151 sm_destroy(kernel);
00152 }
00153
00157 void CMindyGramKernel::parse_params(char *pa)
00158 {
00159 int32_t i;
00160 char *t, *p;
00161
00162 if (strlen(pa) == 0)
00163 return;
00164
00165
00166 while ((t = strsep(&pa, ",;"))) {
00167 for (i = 0; p_map[i].name; i++) {
00168
00169 size_t l = strlen(p_map[i].name);
00170 if (!strncasecmp(t, p_map[i].name, l)) {
00171 p = t + l + 1;
00172 if (p_map[i].idx == SP_DIST)
00173 p_map[i].val = sm_get_type(p);
00174 else
00175 p_map[i].val = atof(p);
00176 break;
00177 }
00178 }
00179 if (!p_map[i].name)
00180 SG_WARNING( "Unknown parameter '%s'. Skipping", t);
00181 }
00182
00183
00184 for (i = 0; p_map[i].name; i++)
00185 sm_set_param(kernel, p_map[i].idx, p_map[i].val);
00186 }
00187
00191 void CMindyGramKernel::cleanup()
00192 {
00193 delete_optimization();
00194 clear_normal();
00195
00196 CKernel::cleanup();
00197 }
00198
00204 void CMindyGramKernel::remove_lhs()
00205 {
00206 delete_optimization();
00207
00208 #ifdef SVMLIGHT
00209 if (lhs)
00210 cache_reset();
00211 #endif
00212
00213 lhs = NULL ;
00214 rhs = NULL ;
00215 }
00216
00220 void CMindyGramKernel::remove_rhs()
00221 {
00222 #ifdef SVMLIGHT
00223 if (rhs)
00224 cache_reset();
00225 #endif
00226
00227 if (sdiag_lhs != sdiag_rhs)
00228 delete[] sdiag_rhs;
00229
00230 sdiag_rhs = sdiag_lhs;
00231 rhs = lhs;
00232 }
00233
00240 bool CMindyGramKernel::init(CFeatures* l, CFeatures* r)
00241 {
00242 SG_DEBUG( "Initializing MindyGramKernel %p %p\n", l, r);
00243
00244 bool result = CKernel::init(l,r);
00245
00246
00247 ASSERT(l->get_feature_class()== C_MINDYGRAM);
00248 ASSERT(r->get_feature_class()==C_MINDYGRAM);
00249 ASSERT(l->get_feature_type()==F_ULONG);
00250 ASSERT(r->get_feature_type()==F_ULONG);
00251
00252 return init_normalizer();
00253 }
00254
00261 float64_t CMindyGramKernel::compute(int32_t i, int32_t j)
00262 {
00263
00264 CMindyGramFeatures *lm = (CMindyGramFeatures *) lhs;
00265 CMindyGramFeatures *rm = (CMindyGramFeatures *) rhs;
00266
00267
00268 float64_t result = gram_cmp(kernel, lm->get_feature_vector(i),
00269 rm->get_feature_vector(j));
00270
00271
00272 if (simcof != SC_NONE)
00273 result = 1 - sico(simcof, result, sdiag_lhs[i], sdiag_rhs[j]);
00274
00275 if (sm_get_class(kernel->type) == SC_DIST || simcof != SC_NONE) {
00276 if (width > 1e-10) {
00277
00278 result = exp(-result / width);
00279 } else {
00280 if (i != j) {
00281
00282 result = 0.5 * (sdiag_lhs[i] + sdiag_rhs[j] - result);
00283 } else {
00284
00285 gram_t *zero = gram_empty();
00286 result = gram_cmp(kernel, lm->get_feature_vector(i), zero);
00287 gram_destroy(zero);
00288 }
00289 }
00290 }
00291 }
00292
00298 void CMindyGramKernel::add_to_normal(int32_t i, float64_t w)
00299 {
00300
00301 CMindyGramFeatures *lm = (CMindyGramFeatures *) lhs;
00302
00303
00304 if (!normal)
00305 normal = gram_empty();
00306
00307 gram_add(normal, lm->get_feature_vector(i),
00308 normalizer->normalize_lhs(w, i));
00309
00310 set_is_initialized(true);
00311 }
00312
00316 void CMindyGramKernel::clear_normal()
00317 {
00318 if (normal)
00319 gram_destroy(normal);
00320 normal = NULL;
00321 set_is_initialized(false);
00322 }
00323
00330 bool CMindyGramKernel::init_optimization(int32_t n, int32_t *is, float64_t * ws)
00331 {
00332
00333 delete_optimization();
00334
00335
00336 if (n <= 0) {
00337 set_is_initialized(true);
00338 SG_DEBUG( "empty set of SVs\n");
00339 return true;
00340 }
00341
00342 SG_DEBUG( "initializing MindyGramKernel optimization\n");
00343 for (int32_t i = 0; i < n; i++) {
00344 if ( (i % (n / 10 + 1)) == 0)
00345 SG_PROGRESS(i, 0, n);
00346
00347
00348 add_to_normal(is[i], ws[i]);
00349 }
00350 SG_PRINT( "Done. \n");
00351
00352 set_is_initialized(true);
00353 return true;
00354 }
00355
00359 bool CMindyGramKernel::delete_optimization()
00360 {
00361 SG_DEBUG( "deleting MindyGramKernel optimization\n");
00362 clear_normal();
00363 return true;
00364 }
00365
00371 float64_t CMindyGramKernel::compute_optimized(int32_t i)
00372 {
00373 if (!get_is_initialized()) {
00374 SG_ERROR( "MindyGramKernel optimization not initialized\n");
00375 return -CMath::INFTY;
00376 }
00377
00378 CMindyGramFeatures *rm = (CMindyGramFeatures *) rhs;
00379 float64_t result = gram_cmp(kernel, rm->get_feature_vector(i), normal);
00380
00381 return normalizer->normalize_rhs(result, i);
00382 }
00383
00384 bool CMindyGramKernel::load_init(FILE* src)
00385 {
00386 return false;
00387 }
00388
00389 bool CMindyGramKernel::save_init(FILE* dest)
00390 {
00391 return false;
00392 }
00393 #endif