Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "features/StringFeatures.h"
00014 #include "features/Labels.h"
00015 #include "distributions/LinearHMM.h"
00016 #include "classifier/PluginEstimate.h"
00017
00018 using namespace shogun;
00019
00020 CPluginEstimate::CPluginEstimate(float64_t pos_pseudo, float64_t neg_pseudo)
00021 : CClassifier(), m_pos_pseudo(1e-10), m_neg_pseudo(1e-10),
00022 pos_model(NULL), neg_model(NULL), features(NULL)
00023 {
00024 }
00025
00026 CPluginEstimate::~CPluginEstimate()
00027 {
00028 delete pos_model;
00029 delete neg_model;
00030
00031 SG_UNREF(features);
00032 }
00033
00034 bool CPluginEstimate::train(CFeatures* data)
00035 {
00036 ASSERT(labels);
00037 if (data)
00038 {
00039 if (data->get_feature_class() != C_STRING ||
00040 data->get_feature_type() != F_WORD)
00041 {
00042 SG_ERROR("Features not of class string type word\n");
00043 }
00044
00045 set_features((CStringFeatures<uint16_t>*) data);
00046 }
00047 ASSERT(features);
00048
00049 delete pos_model;
00050 delete neg_model;
00051
00052 pos_model=new CLinearHMM(features);
00053 neg_model=new CLinearHMM(features);
00054
00055 int32_t* pos_indizes=new int32_t[((CStringFeatures<uint16_t>*) features)->get_num_vectors()];
00056 int32_t* neg_indizes=new int32_t[((CStringFeatures<uint16_t>*) features)->get_num_vectors()];
00057
00058 ASSERT(labels->get_num_labels()==features->get_num_vectors());
00059
00060 int32_t pos_idx=0;
00061 int32_t neg_idx=0;
00062
00063 for (int32_t i=0; i<labels->get_num_labels(); i++)
00064 {
00065 if (labels->get_label(i) > 0)
00066 pos_indizes[pos_idx++]=i;
00067 else
00068 neg_indizes[neg_idx++]=i;
00069 }
00070
00071 SG_INFO( "training using pseudos %f and %f\n", m_pos_pseudo, m_neg_pseudo);
00072 pos_model->train(pos_indizes, pos_idx, m_pos_pseudo);
00073 neg_model->train(neg_indizes, neg_idx, m_neg_pseudo);
00074
00075 delete[] pos_indizes;
00076 delete[] neg_indizes;
00077
00078 return true;
00079 }
00080
00081 CLabels* CPluginEstimate::classify()
00082 {
00083 ASSERT(features);
00084 CLabels* result=new CLabels(features->get_num_vectors());
00085 ASSERT(result->get_num_labels()==features->get_num_vectors());
00086
00087 for (int32_t vec=0; vec<features->get_num_vectors(); vec++)
00088 result->set_label(vec, classify_example(vec));
00089
00090 return result;
00091 }
00092
00093 CLabels* CPluginEstimate::classify(CFeatures* data)
00094 {
00095 if (!data)
00096 SG_ERROR("No features specified\n");
00097
00098 if (data->get_feature_class() != C_STRING ||
00099 data->get_feature_type() != F_WORD)
00100 {
00101 SG_ERROR("Features not of class string type word\n");
00102 }
00103
00104 set_features((CStringFeatures<uint16_t>*) data);
00105 return classify();
00106 }
00107
00108 float64_t CPluginEstimate::classify_example(int32_t vec_idx)
00109 {
00110 ASSERT(features);
00111
00112 int32_t len;
00113 bool free_vec;
00114 uint16_t* vector=features->get_feature_vector(vec_idx, len, free_vec);
00115
00116 if ((!pos_model) || (!neg_model))
00117 SG_ERROR( "model(s) not assigned\n");
00118
00119 float64_t result=pos_model->get_log_likelihood_example(vector, len) - neg_model->get_log_likelihood_example(vector, len);
00120 features->free_feature_vector(vector, vec_idx, free_vec);
00121 return result;
00122 }