PluginEstimate.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "features/StringFeatures.h"
00014 #include "features/Labels.h"
00015 #include "distributions/LinearHMM.h"
00016 #include "classifier/PluginEstimate.h"
00017 
00018 using namespace shogun;
00019 
00020 CPluginEstimate::CPluginEstimate(float64_t pos_pseudo, float64_t neg_pseudo)
00021 : CClassifier(), m_pos_pseudo(1e-10), m_neg_pseudo(1e-10),
00022     pos_model(NULL), neg_model(NULL), features(NULL)
00023 {
00024 }
00025 
00026 CPluginEstimate::~CPluginEstimate()
00027 {
00028     delete pos_model;
00029     delete neg_model;
00030 
00031     SG_UNREF(features);
00032 }
00033 
00034 bool CPluginEstimate::train(CFeatures* data)
00035 {
00036     ASSERT(labels);
00037     if (data)
00038     {
00039         if (data->get_feature_class() != C_STRING ||
00040                 data->get_feature_type() != F_WORD)
00041         {
00042             SG_ERROR("Features not of class string type word\n");
00043         }
00044 
00045         set_features((CStringFeatures<uint16_t>*) data);
00046     }
00047     ASSERT(features);
00048 
00049     delete pos_model;
00050     delete neg_model;
00051 
00052     pos_model=new CLinearHMM(features);
00053     neg_model=new CLinearHMM(features);
00054 
00055     int32_t* pos_indizes=new int32_t[((CStringFeatures<uint16_t>*) features)->get_num_vectors()];
00056     int32_t* neg_indizes=new int32_t[((CStringFeatures<uint16_t>*) features)->get_num_vectors()];
00057 
00058     ASSERT(labels->get_num_labels()==features->get_num_vectors());
00059 
00060     int32_t pos_idx=0;
00061     int32_t neg_idx=0;
00062 
00063     for (int32_t i=0; i<labels->get_num_labels(); i++)
00064     {
00065         if (labels->get_label(i) > 0)
00066             pos_indizes[pos_idx++]=i;
00067         else
00068             neg_indizes[neg_idx++]=i;
00069     }
00070 
00071     SG_INFO( "training using pseudos %f and %f\n", m_pos_pseudo, m_neg_pseudo);
00072     pos_model->train(pos_indizes, pos_idx, m_pos_pseudo);
00073     neg_model->train(neg_indizes, neg_idx, m_neg_pseudo);
00074 
00075     delete[] pos_indizes;
00076     delete[] neg_indizes;
00077     
00078     return true;
00079 }
00080 
00081 CLabels* CPluginEstimate::classify()
00082 {
00083     ASSERT(features);
00084     CLabels* result=new CLabels(features->get_num_vectors());
00085     ASSERT(result->get_num_labels()==features->get_num_vectors());
00086 
00087     for (int32_t vec=0; vec<features->get_num_vectors(); vec++)
00088         result->set_label(vec, classify_example(vec));
00089 
00090     return result;
00091 }
00092 
00093 CLabels* CPluginEstimate::classify(CFeatures* data)
00094 {
00095     if (!data)
00096         SG_ERROR("No features specified\n");
00097 
00098     if (data->get_feature_class() != C_STRING ||
00099             data->get_feature_type() != F_WORD)
00100     {
00101         SG_ERROR("Features not of class string type word\n");
00102     }
00103 
00104     set_features((CStringFeatures<uint16_t>*) data);
00105     return classify();
00106 }
00107 
00108 float64_t CPluginEstimate::classify_example(int32_t vec_idx)
00109 {
00110     ASSERT(features);
00111 
00112     int32_t len;
00113     bool free_vec;
00114     uint16_t* vector=features->get_feature_vector(vec_idx, len, free_vec);
00115 
00116     if ((!pos_model) || (!neg_model))
00117         SG_ERROR( "model(s) not assigned\n");
00118       
00119     float64_t result=pos_model->get_log_likelihood_example(vector, len) - neg_model->get_log_likelihood_example(vector, len);
00120     features->free_feature_vector(vector, vec_idx, free_vec);
00121     return result;
00122 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation