PluginEstimate.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "features/StringFeatures.h"
00014 #include "features/Labels.h"
00015 #include "distributions/hmm/LinearHMM.h"
00016 #include "classifier/PluginEstimate.h"
00017
00018
00019 CPluginEstimate::CPluginEstimate(float64_t pos_pseudo, float64_t neg_pseudo)
00020 : CClassifier(), m_pos_pseudo(1e-10), m_neg_pseudo(1e-10),
00021 pos_model(NULL), neg_model(NULL), features(NULL)
00022 {
00023 }
00024
00025 CPluginEstimate::~CPluginEstimate()
00026 {
00027 delete pos_model;
00028 delete neg_model;
00029
00030 SG_UNREF(features);
00031 }
00032
00033 bool CPluginEstimate::train()
00034 {
00035 ASSERT(labels);
00036 ASSERT(features);
00037
00038 delete pos_model;
00039 delete neg_model;
00040
00041 pos_model=new CLinearHMM(features);
00042 neg_model=new CLinearHMM(features);
00043
00044 int32_t* pos_indizes=new int32_t[((CStringFeatures<uint16_t>*) features)->get_num_vectors()];
00045 int32_t* neg_indizes=new int32_t[((CStringFeatures<uint16_t>*) features)->get_num_vectors()];
00046
00047 ASSERT(labels->get_num_labels()==features->get_num_vectors());
00048
00049 int32_t pos_idx=0;
00050 int32_t neg_idx=0;
00051
00052 for (int32_t i=0; i<labels->get_num_labels(); i++)
00053 {
00054 if (labels->get_label(i) > 0)
00055 pos_indizes[pos_idx++]=i;
00056 else
00057 neg_indizes[neg_idx++]=i;
00058 }
00059
00060 SG_INFO( "training using pseudos %f and %f\n", m_pos_pseudo, m_neg_pseudo);
00061 pos_model->train(pos_indizes, pos_idx, m_pos_pseudo);
00062 neg_model->train(neg_indizes, neg_idx, m_neg_pseudo);
00063
00064 delete[] pos_indizes;
00065 delete[] neg_indizes;
00066
00067 return true;
00068 }
00069
00070 CLabels* CPluginEstimate::classify(CLabels* result)
00071 {
00072 ASSERT(features);
00073
00074 if (!result)
00075 result=new CLabels(features->get_num_vectors());
00076 ASSERT(result->get_num_labels()==features->get_num_vectors());
00077
00078 for (int32_t vec=0; vec<features->get_num_vectors(); vec++)
00079 result->set_label(vec, classify_example(vec));
00080
00081 return result;
00082 }
00083
00084 float64_t CPluginEstimate::classify_example(int32_t vec_idx)
00085 {
00086 ASSERT(features);
00087
00088 int32_t len;
00089 uint16_t* vector=features->get_feature_vector(vec_idx, len);
00090
00091 if ((!pos_model) || (!neg_model))
00092 SG_ERROR( "model(s) not assigned\n");
00093
00094 float64_t result=pos_model->get_log_likelihood_example(vector, len) - neg_model->get_log_likelihood_example(vector, len);
00095 return result;
00096 }