Labels.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "features/Labels.h"
00013 #include "lib/common.h"
00014 #include "lib/File.h"
00015 #include "lib/io.h"
00016 #include "lib/Mathematics.h"
00017 
00018 #ifdef HAVE_BOOST_SERIALIZATION
00019 #include <boost/serialization/export.hpp>
00020 BOOST_CLASS_EXPORT(shogun::CLabels);
00021 #endif //HAVE_BOOST_SERIALIZATION
00022 
00023 using namespace shogun;
00024 
00025 CLabels::CLabels()
00026 : CSGObject()
00027 {
00028     labels = NULL;
00029     num_labels = 0;
00030     m_confidences=NULL;
00031     m_num_classes=0;
00032 }
00033 
00034 CLabels::CLabels(int32_t num_lab)
00035 : CSGObject(), num_labels(num_lab)
00036 {
00037     labels=new float64_t[num_lab];
00038     for (int32_t i=0; i<num_lab; i++)
00039         labels[i]=0;
00040 
00041     m_num_classes=0;
00042     m_confidences=NULL;
00043 }
00044 
00045 CLabels::CLabels(float64_t* p_labels, int32_t len)
00046 : CSGObject()
00047 {
00048     labels = NULL;
00049     num_labels = 0;
00050 
00051     set_labels(p_labels, len);
00052     
00053     // We don't allocate the confidences matrix, unless it is necessary. 
00054     // For problems with many classes and samples it might get really big.
00055     m_num_classes=get_num_classes();
00056     m_confidences=NULL; 
00057 }
00058 
00059 CLabels::CLabels(float64_t* in_confidences, int32_t in_num_labels, 
00060                  int32_t in_num_classes)
00061 : CSGObject()
00062 {
00063     labels=new float64_t[in_num_labels];
00064     for (int32_t i=0; i<in_num_labels; i++)
00065         labels[i]=0;
00066 
00067     m_num_classes=in_num_classes;
00068     m_confidences=in_confidences;
00069     find_labels();
00070 }
00071 
00072 CLabels::CLabels(CFile* loader)
00073 : CSGObject()
00074 {
00075     num_labels=0;
00076     labels=NULL;
00077     m_num_classes=0;
00078     m_confidences=NULL;
00079 
00080     load(loader);
00081 }
00082 
00083 CLabels::~CLabels()
00084 {
00085     delete[] labels;
00086     delete[] m_confidences;
00087     num_labels=0;
00088     m_num_classes=0;
00089     labels=NULL;
00090     m_confidences=NULL;
00091 }
00092 
00093 void CLabels::set_labels(float64_t* p_labels, int32_t len)
00094 {
00095     ASSERT(len>0);
00096     num_labels=len;
00097 
00098     delete[] labels;
00099     labels=CMath::clone_vector(p_labels, len);
00100 }
00101 
00102 void CLabels::set_confidences(float64_t* in_confidences, int32_t in_num_labels, 
00103                               int32_t in_num_classes)
00104 {
00105     if (num_labels && (num_labels != in_num_labels))
00106     {
00107         SG_ERROR("Shape of confidence matrix mismatch (number of "
00108                 "labels = %d does not match %d\n", num_labels, in_num_labels);
00109     }
00110 
00111     if (m_num_classes && (m_num_classes != in_num_classes))
00112     {
00113         SG_ERROR("Shape of confidence matrix mismatch (number of "
00114                 "num_classes = %d does not match %d\n", m_num_classes, in_num_classes);
00115     }
00116 
00117     delete[] m_confidences;
00118 
00119     num_labels=in_num_labels;
00120     m_num_classes=in_num_classes;
00121     m_confidences=in_confidences;
00122     find_labels();
00123 }
00124 
00125 float64_t* CLabels::get_confidences(int32_t& out_num_labels, int32_t& out_num_classes)
00126 {
00127     out_num_labels=num_labels;
00128     out_num_classes=m_num_classes;
00129     
00130     if (!num_labels || !m_num_classes || !m_confidences)
00131         SG_ERROR("No labels / confidences set\n");
00132 
00133     float64_t* out_conf=new float64_t[num_labels*m_num_classes];
00134     memcpy(out_conf, m_confidences, num_labels*m_num_classes*sizeof(float64_t));
00135     return out_conf;
00136 }
00137 
00138 void CLabels::get_confidences(float64_t** dst, int32_t* out_num_labels, int32_t* out_num_classes)
00139 {
00140     ASSERT(dst && out_num_labels && out_num_classes);
00141 
00142     if (num_labels<=0 || m_num_classes<=0 || !m_confidences)
00143         SG_ERROR("No labels / confidences set\n");
00144 
00145     *dst=NULL;
00146     *out_num_labels=num_labels;
00147     *out_num_classes=m_num_classes;
00148 
00149     float64_t* out_conf= (float64_t*) malloc((size_t) sizeof(float64_t)*num_labels*m_num_classes);
00150     memcpy(out_conf, m_confidences, num_labels*m_num_classes*sizeof(float64_t));
00151     *dst=out_conf;
00152 }
00153 
00154 float64_t* CLabels::get_sample_confidences(const int32_t& in_sample_index, 
00155                                            int32_t& out_num_classes)
00156 {
00157     out_num_classes=m_num_classes;
00158 
00159     if (!(in_sample_index>=0 && in_sample_index<num_labels &&
00160                 m_num_classes && m_confidences))
00161     {
00162         SG_ERROR("No labels / confidences set\n");
00163     }
00164 
00165     float64_t* out_conf=new float64_t[m_num_classes];
00166     for (int32_t n_class=0; n_class<m_num_classes; n_class++)
00167     {
00168         out_conf[n_class]=m_confidences[n_class+in_sample_index*m_num_classes];
00169     }
00170     return out_conf;
00171 }
00172 
00173 void CLabels::find_labels()
00174 {
00175     ASSERT(m_confidences);
00176     ASSERT(labels);
00177     
00178     float64_t max_conf;
00179     int32_t index;
00180     for (int32_t n_samp=0; n_samp<num_labels; n_samp++)
00181     {
00182         max_conf=m_confidences[n_samp];
00183         labels[n_samp]=0;
00184         for (int32_t n_class=1; n_class<m_num_classes; n_class++)
00185         {
00186             index=n_samp+n_class*m_num_classes;
00187             if (m_confidences[index]>max_conf)
00188             {
00189                 max_conf=m_confidences[index];
00190                 labels[n_samp]=n_class;             
00191             }
00192         }
00193     }
00194 }
00195 
00196 bool CLabels::is_two_class_labeling()
00197 {
00198     ASSERT(labels);
00199     bool found_plus_one=false;
00200     bool found_minus_one=false;
00201 
00202     for (int32_t i=0; i<num_labels; i++)
00203     {
00204         if (labels[i]==+1.0)
00205             found_plus_one=true;
00206         else if (labels[i]==-1.0)
00207             found_minus_one=true;
00208         else
00209             SG_ERROR("Not a two class labeling label[%d]=%f (only +1/-1 allowed)\n", i, labels[i]);
00210     }
00211 
00212     if (!found_plus_one)
00213         SG_ERROR("Not a two class labeling - no positively labeled examples found\n");
00214     if (!found_minus_one)
00215         SG_ERROR("Not a two class labeling - no negatively labeled examples found\n");
00216 
00217     return true;
00218 }
00219 
00220 int32_t CLabels::get_num_classes()
00221 {
00222     int32_t n=-1;
00223     int32_t* lab=get_int_labels(n);
00224 
00225     int32_t num_classes=0;
00226     for (int32_t i=0; i<n; i++)
00227         num_classes=CMath::max(num_classes,lab[i]);
00228 
00229     delete[] lab;
00230 
00231     return num_classes+1;
00232 }
00233 
00234 float64_t* CLabels::get_labels(int32_t &len)
00235 {
00236     len=num_labels;
00237 
00238     if (num_labels>0)
00239     {
00240         float64_t* _labels=new float64_t[num_labels] ;
00241         for (int32_t i=0; i<len; i++)
00242             _labels[i]=get_label(i) ;
00243         return _labels ;
00244     }
00245     else 
00246         return NULL;
00247 }
00248 
00249 void CLabels::get_labels(float64_t** p_labels, int32_t* len)
00250 {
00251     ASSERT(p_labels && len);
00252     *p_labels=NULL;
00253     *len=num_labels;
00254 
00255     if (num_labels>0)
00256     {
00257         *p_labels=(float64_t*) malloc(sizeof(float64_t)*num_labels);
00258 
00259         for (int32_t i=0; i<num_labels; i++)
00260             (*p_labels)[i]=get_label(i);
00261     }
00262 }
00263 
00264 int32_t* CLabels::get_int_labels(int32_t &len)
00265 {
00266     len=num_labels;
00267 
00268     if (num_labels>0)
00269     {
00270         int32_t* _labels=new int32_t[num_labels] ;
00271         for (int32_t i=0; i<len; i++)
00272             _labels[i]= (int32_t) get_label(i) ;
00273         return _labels ;
00274     }
00275     else 
00276         return NULL;
00277 }
00278 
00279 void CLabels::set_int_labels(int32_t * mylabels, int32_t len)
00280 {
00281     num_labels = len ;
00282     delete[] labels ;
00283     
00284     labels = new float64_t[num_labels] ;
00285     for (int32_t i=0; i<num_labels; i++)
00286         set_int_label(i, mylabels[i]) ;
00287 }
00288 
00289 void CLabels::load(CFile* loader)
00290 {
00291     delete[] labels;
00292     delete[] m_confidences;
00293     m_confidences = NULL;
00294     num_labels=0;
00295     ASSERT(loader);
00296     loader->get_real_vector(labels, num_labels);
00297     m_num_classes=get_num_classes();
00298 }
00299 
00300 void CLabels::save(CFile* writer)
00301 {
00302     ASSERT(writer);
00303     ASSERT(labels && labels>0);
00304     writer->set_real_vector(labels, num_labels);
00305 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation