AUCKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Gunnar Raetsch
00008  * Written (W) 2009 Soeren Sonnnenburg
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/common.h"
00013 #include "lib/Mathematics.h"
00014 #include "kernel/AUCKernel.h"
00015 #include "features/SimpleFeatures.h"
00016 #include "lib/io.h"
00017 
00018 CAUCKernel::CAUCKernel(int32_t size, CKernel* s)
00019 : CSimpleKernel<uint16_t>(size), subkernel(s)
00020 {
00021     SG_REF(subkernel);
00022 }
00023 
00024 CAUCKernel::~CAUCKernel()
00025 {
00026     SG_UNREF(subkernel);
00027     cleanup();
00028 }
00029 
00030 CLabels* CAUCKernel::setup_auc_maximization(CLabels* labels)
00031 {
00032     SG_INFO( "setting up AUC maximization\n") ;
00033     ASSERT(labels);
00034     ASSERT(labels->is_two_class_labeling());
00035 
00036     // get the original labels
00037     int32_t num=0;
00038     ASSERT(labels);
00039     int32_t* int_labels=labels->get_int_labels(num);
00040     ASSERT(subkernel->get_num_vec_rhs()==num);
00041 
00042     // count positive and negative
00043     int32_t num_pos=0;
00044     int32_t num_neg=0;
00045 
00046     for (int32_t i=0; i<num; i++)
00047     {
00048         if (int_labels[i]==1)
00049             num_pos++;
00050         else 
00051             num_neg++;
00052     }
00053 
00054     // create AUC features and labels (alternate labels)
00055     int32_t num_auc = num_pos*num_neg;
00056     SG_INFO("num_pos: %i  num_neg: %i  num_auc: %i\n", num_pos, num_neg, num_auc);
00057 
00058     uint16_t* features_auc = new uint16_t[num_auc*2];
00059     int32_t* labels_auc = new int32_t[num_auc];
00060     int32_t n=0 ;
00061 
00062     for (int32_t i=0; i<num; i++)
00063     {
00064         if (int_labels[i]!=1)
00065             continue;
00066 
00067         for (int32_t j=0; j<num; j++)
00068         {
00069             if (int_labels[j]!=-1)
00070                 continue;
00071 
00072             // create about as many positively as negatively labeled examples
00073             if (n%2==0)
00074             {
00075                 features_auc[n*2]=i;
00076                 features_auc[n*2+1]=j;
00077                 labels_auc[n]=1;
00078             }
00079             else
00080             {
00081                 features_auc[n*2]=j;
00082                 features_auc[n*2+1]=i;
00083                 labels_auc[n]=-1;
00084             }
00085 
00086             n++;
00087             ASSERT(n<=num_auc);
00088         }
00089     }
00090 
00091     // create label object and attach it to svm
00092     CLabels* lab_auc = new CLabels(num_auc);
00093     lab_auc->set_int_labels(labels_auc, num_auc);
00094     SG_REF(lab_auc);
00095 
00096     // create feature object
00097     CSimpleFeatures<uint16_t>* f = new CSimpleFeatures<uint16_t>(0);
00098     f->set_feature_matrix(features_auc, 2, num_auc);
00099 
00100     // create AUC kernel and attach the features
00101     init(f,f);
00102 
00103     delete[] int_labels;
00104     delete[] labels_auc;
00105 
00106     return lab_auc;
00107 }
00108 
00109 
00110 bool CAUCKernel::init(CFeatures* l, CFeatures* r)
00111 {
00112     CSimpleKernel<uint16_t>::init(l, r);
00113     init_normalizer();
00114     return true;
00115 }
00116 
00117 bool CAUCKernel::load_init(FILE* src)
00118 {
00119     return false;
00120 }
00121 
00122 bool CAUCKernel::save_init(FILE* dest)
00123 {
00124     return false;
00125 }
00126 
00127 float64_t CAUCKernel::compute(int32_t idx_a, int32_t idx_b)
00128 {
00129   int32_t alen, blen;
00130   bool afree, bfree;
00131 
00132   uint16_t* avec=((CSimpleFeatures<uint16_t>*) lhs)->get_feature_vector(idx_a, alen, afree);
00133   uint16_t* bvec=((CSimpleFeatures<uint16_t>*) rhs)->get_feature_vector(idx_b, blen, bfree);
00134 
00135   ASSERT(alen==2);
00136   ASSERT(blen==2);
00137 
00138   ASSERT(subkernel && subkernel->has_features());
00139 
00140   float64_t k11,k12,k21,k22;
00141   int32_t idx_a1=avec[0], idx_a2=avec[1], idx_b1=bvec[0], idx_b2=bvec[1];
00142 
00143   k11 = subkernel->kernel(idx_a1,idx_b1);
00144   k12 = subkernel->kernel(idx_a1,idx_b2);
00145   k21 = subkernel->kernel(idx_a2,idx_b1);
00146   k22 = subkernel->kernel(idx_a2,idx_b2);
00147 
00148   float64_t result = k11+k22-k21-k12;
00149 
00150   ((CSimpleFeatures<uint16_t>*) lhs)->free_feature_vector(avec, idx_a, afree);
00151   ((CSimpleFeatures<uint16_t>*) rhs)->free_feature_vector(bvec, idx_b, bfree);
00152 
00153   return result;
00154 }

SHOGUN Machine Learning Toolbox - Documentation