PolyFeatures.cpp

Go to the documentation of this file.
00001 #include "features/PolyFeatures.h"
00002 
00003 using namespace shogun;
00004 
00005 CPolyFeatures::CPolyFeatures(CSimpleFeatures<float64_t>* feat, int32_t degree, bool normalize)
00006     : CDotFeatures(), m_multi_index(NULL), m_multinomial_coefficients(NULL), 
00007         m_normalization_values(NULL)
00008 {
00009     ASSERT(feat);
00010 
00011     m_feat = feat;
00012     SG_REF(m_feat);
00013     m_degree=degree;
00014     m_normalize=normalize;
00015     m_input_dimensions=feat->get_num_features();
00016     m_output_dimensions=calc_feature_space_dimensions(m_input_dimensions, m_degree);
00017 
00018     store_multi_index();
00019     store_multinomial_coefficients();
00020     if (m_normalize)
00021         store_normalization_values();
00022 }
00023 
00024 CPolyFeatures::~CPolyFeatures()
00025 {
00026     delete[] m_multi_index;
00027     delete[] m_multinomial_coefficients;
00028     delete[] m_normalization_values;
00029     SG_UNREF(m_feat);
00030 }
00031 
00032 float64_t CPolyFeatures::dot(int32_t vec_idx1, int32_t vec_idx2)
00033 {
00034     int32_t len1;
00035     bool do_free1;
00036     float64_t* vec1 = m_feat->get_feature_vector(vec_idx1, len1, do_free1);
00037 
00038     int32_t len2;
00039     bool do_free2;
00040     float64_t* vec2 = m_feat->get_feature_vector(vec_idx2, len2, do_free2);
00041 
00042     float64_t sum=0;
00043     int cnt=0;
00044     for (int j=0; j<m_output_dimensions; j++)
00045     {
00046         float64_t out1=m_multinomial_coefficients[j];
00047         float64_t out2=m_multinomial_coefficients[j];
00048         for (int k=0; k<m_degree; k++)
00049         {
00050             out1*=vec1[m_multi_index[cnt]];
00051             out2*=vec2[m_multi_index[cnt]];
00052             cnt++;
00053         }
00054         sum+=out1*out2;
00055     }
00056     m_feat->free_feature_vector(vec1, len1, do_free1);
00057     m_feat->free_feature_vector(vec2, len2, do_free2);
00058 
00059     return sum;
00060 }
00061 
00062 float64_t CPolyFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00063 {
00064     if (vec2_len != m_output_dimensions)
00065         SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00066 
00067     int32_t len;
00068     bool do_free;
00069     float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
00070 
00071     
00072     int cnt=0;
00073     float64_t sum=0;
00074     for (int j=0; j<vec2_len; j++)
00075     {
00076         float64_t output=m_multinomial_coefficients[j];
00077         for (int k=0; k<m_degree; k++)
00078         {
00079             output*=vec[m_multi_index[cnt]];
00080             cnt++;
00081         }
00082         sum+=output*vec2[j];
00083     }
00084     if (m_normalize)
00085         sum = sum/m_normalization_values[vec_idx1];
00086 
00087     m_feat->free_feature_vector(vec, len, do_free);
00088     return sum;
00089 }
00090 void CPolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00091 {
00092     if (vec2_len != m_output_dimensions)
00093         SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00094 
00095     int32_t len;
00096     bool do_free;
00097     float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
00098 
00099     
00100     int cnt=0;
00101     float32_t norm_val=1;
00102     if (m_normalize)
00103         norm_val = m_normalization_values[vec_idx1];
00104     alpha/=norm_val;
00105     for (int j=0; j<vec2_len; j++)
00106     {
00107         float64_t output=m_multinomial_coefficients[j];
00108         for (int k=0; k<m_degree; k++)
00109         {
00110             output*=vec[m_multi_index[cnt]];
00111             cnt++;
00112         }
00113         if (abs_val)
00114             output=CMath::abs(output); 
00115 
00116         vec2[j]+=alpha*output;
00117     }
00118     m_feat->free_feature_vector(vec, len, do_free);
00119 }
00120 void CPolyFeatures::store_normalization_values()
00121 {
00122     delete[] m_normalization_values;
00123 
00124     int32_t num_vec = this->get_num_vectors();
00125 
00126     m_normalization_values=new float32_t[num_vec];
00127     for (int i=0; i<num_vec; i++)
00128     {
00129         float64_t tmp = CMath::sqrt(dot(i,i)); 
00130         if (tmp==0)
00131             // trap division by zero
00132             m_normalization_values[i]=1;
00133         else 
00134             m_normalization_values[i]=tmp;
00135     }
00136         
00137 }
00138 
00139 void CPolyFeatures::store_multi_index()
00140 {
00141     delete[] m_multi_index;
00142 
00143         m_multi_index=new uint16_t[m_output_dimensions*m_degree];
00144 
00145         uint16_t* exponents = new uint16_t[m_input_dimensions];
00146         if (!exponents)
00147         SG_ERROR( "Error allocating mem \n");   
00148     /*copy adress: otherwise it will be overwritten in recursion*/
00149         uint16_t* index = m_multi_index;
00150         enumerate_multi_index(0, &index, exponents, m_degree);
00151 
00152     delete[] exponents;
00153 }
00154 
00155 void CPolyFeatures::enumerate_multi_index(const int32_t feat_idx, uint16_t** index, uint16_t* exponents, const int32_t degree)
00156 {
00157     if (feat_idx==m_input_dimensions-1 || degree==0)
00158     {
00159         if (feat_idx==m_input_dimensions-1)
00160             exponents[feat_idx] = degree;
00161         if (degree==0)
00162             exponents[feat_idx] = 0;
00163         int32_t i, j;
00164         for (j=0; j<feat_idx+1; j++)
00165             for (i=0; i<exponents[j]; i++)
00166             {
00167                 **index = j;
00168                 (*index)++;
00169             }
00170         exponents[feat_idx] = 0;
00171         return;
00172     }
00173     int32_t k;
00174     for (k=0; k<=degree; k++)
00175     {
00176         exponents[feat_idx] =  k;
00177         enumerate_multi_index(feat_idx+1, index,  exponents, degree-k); 
00178     }
00179     return; 
00180 
00181 }
00182 
00183 void CPolyFeatures::store_multinomial_coefficients()
00184 {
00185     delete[] m_multinomial_coefficients;
00186 
00187     m_multinomial_coefficients = new float64_t[m_output_dimensions];
00188     int32_t* exponents = new int32_t[m_input_dimensions];
00189     if (!exponents)
00190         SG_ERROR( "Error allocating mem \n");
00191     int32_t j=0;
00192     for (j=0; j<m_input_dimensions; j++)
00193         exponents[j] = 0;
00194     int32_t k, cnt=0;
00195     for (j=0; j<m_output_dimensions; j++)
00196     {
00197         for (k=0; k<m_degree; k++)
00198         {
00199             exponents[m_multi_index[cnt]] ++;
00200             cnt++;
00201         }
00202         m_multinomial_coefficients[j] =  sqrt((double) multinomialcoef(exponents, m_input_dimensions));
00203         for (k=0; k<m_input_dimensions; k++)
00204         {
00205             exponents[k]=0;
00206         }
00207     }
00208     delete[] exponents;
00209 }
00210 
00211 int32_t CPolyFeatures::bico2(int32_t n, int32_t k)
00212 {
00213         
00214     /* for this problem k is usually small (<=degree), 
00215      * thus it is efficient to 
00216      * to use recursion and prune end recursions*/  
00217     if (n<k) 
00218         return 0;
00219     if (k>n/2)
00220         k = n-k;
00221     if (k<0)
00222         return 0;
00223     if (k==0)
00224         return 1;
00225     if (k==1)
00226         return n;
00227     if (k<4)
00228         return bico2(n-1, k-1)+bico2(n-1, k);
00229 
00230     /* call function as implemented in numerical recipies:
00231      * much more efficient for large binomial coefficients*/
00232     return bico(n, k);
00233     
00234 }
00235 
00236 int32_t CPolyFeatures::calc_feature_space_dimensions(int32_t N, int32_t D)
00237 {
00238     if (N==1)
00239         return 1;
00240     if (D==0)
00241         return 1;
00242     int32_t d;
00243     int32_t ret = 0;
00244     for (d=0; d<=D; d++)
00245         ret += calc_feature_space_dimensions(N-1, d);
00246 
00247     return ret;
00248 }
00249 
00250 int32_t CPolyFeatures::multinomialcoef(int32_t* exps, int32_t len)
00251 {
00252     int32_t ret = 1, i;
00253     int32_t n = 0;
00254     for (i=0; i<len; i++)
00255     {
00256         n += exps[i];
00257         ret *= bico2(n, exps[i]);
00258     }
00259     return ret; 
00260 }
00261 
00262 /* gammln as implemented in the
00263  * second edition of Numerical Recipes in C */
00264 float64_t CPolyFeatures::gammln(float64_t xx)
00265 {
00266     float64_t x,y,tmp,ser;
00267     static float64_t cof[6]={76.18009172947146,    -86.50532032941677,
00268                           24.01409824083091,    -1.231739572450155,
00269                           0.1208650973866179e-2,-0.5395239384953e-5};
00270     int32_t j;
00271 
00272     y=x=xx;
00273     tmp=x+5.5;
00274     tmp -= (x+0.5)*log(tmp);
00275     ser=1.000000000190015;
00276     for (j=0;j<=5;j++) ser += cof[j]/++y;
00277     return -tmp+log(2.5066282746310005*ser/x);
00278 }
00279 
00280 float64_t CPolyFeatures::factln(int32_t n)
00281 {
00282     static float64_t a[101];
00283 
00284     if (n < 0) SG_ERROR("Negative factorial in routine factln\n");
00285     if (n <= 1) return 0.0;
00286     if (n <= 100) return a[n] ? a[n] : (a[n]=gammln(n+1.0));
00287     else return gammln(n+1.0);
00288 }
00289 
00290 int32_t CPolyFeatures::bico(int32_t n, int32_t k)
00291 {
00292     /* use floor to clean roundoff errors*/
00293     return (int32_t) floor(0.5+exp(factln(n)-factln(k)-factln(n-k)));
00294 }
00295 CFeatures* CPolyFeatures::duplicate() const
00296 {
00297     return new CPolyFeatures(*this);
00298 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation