SparsePolyFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2010 Soeren Sonnenburg
00008  * Copyright (C) 2010 Berlin Institute of Technology
00009  */
00010 #include "features/SparsePolyFeatures.h"
00011 #include "lib/Hash.h"
00012 
00013 using namespace shogun;
00014 
00015 CSparsePolyFeatures::CSparsePolyFeatures(CSparseFeatures<float64_t>* feat, int32_t degree, bool normalize, int32_t hash_bits)
00016     : CDotFeatures(), m_normalization_values(NULL)
00017 {
00018     ASSERT(feat);
00019 
00020     m_feat = feat;
00021     SG_REF(m_feat);
00022     m_degree=degree;
00023     m_normalize=normalize;
00024     m_hash_bits=hash_bits;
00025     mask=(uint32_t) (((uint64_t) 1)<<m_hash_bits)-1;
00026     m_output_dimensions=1<<m_hash_bits;
00027     m_input_dimensions=feat->get_num_features();
00028 
00029     if (m_normalize)
00030         store_normalization_values();
00031 }
00032 
00033 CSparsePolyFeatures::~CSparsePolyFeatures()
00034 {
00035     delete[] m_normalization_values;
00036     SG_UNREF(m_feat);
00037 }
00038 
00039 float64_t CSparsePolyFeatures::dot(int32_t vec_idx1, int32_t vec_idx2)
00040 {
00041 
00042     int32_t len1, len2;
00043     bool do_free1, do_free2;
00044     TSparseEntry<float64_t>* vec1 = m_feat->get_sparse_feature_vector(vec_idx1, len1, do_free1);
00045     TSparseEntry<float64_t>* vec2 = m_feat->get_sparse_feature_vector(vec_idx2, len2, do_free2);
00046 
00047     float64_t result=CSparseFeatures<float64_t>::sparse_dot(1, vec1, len1, vec2, len2);
00048     result=CMath::pow(result, m_degree);
00049 
00050     m_feat->free_feature_vector(vec1, len1, do_free1);
00051     m_feat->free_feature_vector(vec2, len2, do_free2);
00052 
00053     return result;
00054 }
00055 
00056 float64_t CSparsePolyFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00057 {
00058     if (vec2_len != m_output_dimensions)
00059         SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00060 
00061     int32_t vlen;
00062     bool do_free;
00063     TSparseEntry<float64_t>* vec = m_feat->get_sparse_feature_vector(vec_idx1, vlen, do_free);
00064 
00065     float64_t result=0;
00066 
00067     if (vec)
00068     {
00069         if (m_degree==2)
00070         {
00071             /* (a+b)^2 = a^2 + 2ab +b^2 */
00072             for (int32_t i=0; i<vlen; i++)
00073             {
00074                 float64_t v1=vec[i].entry;
00075                 uint32_t seed=CHash::MurmurHash2((uint8_t*) &(vec[i].feat_index), sizeof(int32_t), 0xDEADBEAF);
00076 
00077                 for (int32_t j=i; j<vlen; j++)
00078                 {
00079                     float64_t v2=vec[j].entry;
00080                     uint32_t h=CHash::MurmurHash2((uint8_t*) &(vec[j].feat_index), sizeof(int32_t), seed) & mask;
00081                     float64_t v;
00082 
00083                     if (i==j)
00084                         v=v1*v1;
00085                     else
00086                         v=CMath::sqrt(2.0)*v1*v2;
00087 
00088                     result+=v*vec2[h];
00089                 }
00090             }
00091         }
00092         else if (m_degree==3)
00093             SG_NOTIMPLEMENTED;
00094     }
00095     
00096     if (m_normalize)
00097         result/=m_normalization_values[vec_idx1];
00098 
00099     m_feat->free_feature_vector(vec, vlen, do_free);
00100     return result;
00101 }
00102 
00103 void CSparsePolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00104 {
00105     if (vec2_len != m_output_dimensions)
00106         SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00107 
00108     int32_t vlen;
00109     bool do_free;
00110     TSparseEntry<float64_t>* vec = m_feat->get_sparse_feature_vector(vec_idx1, vlen, do_free);
00111 
00112     float64_t norm_val=1.0;
00113     if (m_normalize)
00114         norm_val = m_normalization_values[vec_idx1];
00115     alpha/=norm_val;
00116 
00117     if (m_degree==2)
00118     {
00119         /* (a+b)^2 = a^2 + 2ab +b^2 */
00120         for (int32_t i=0; i<vlen; i++)
00121         {
00122             float64_t v1=vec[i].entry;
00123             uint32_t seed=CHash::MurmurHash2((uint8_t*) &(vec[i].feat_index), sizeof(int32_t), 0xDEADBEAF);
00124 
00125             for (int32_t j=i; j<vlen; j++)
00126             {
00127                 float64_t v2=vec[j].entry;
00128                 uint32_t h=CHash::MurmurHash2((uint8_t*) &(vec[j].feat_index), sizeof(int32_t), seed) & mask;
00129                 float64_t v;
00130 
00131                 if (i==j)
00132                     v=alpha*v1*v1;
00133                 else
00134                     v=alpha*CMath::sqrt(2.0)*v1*v2;
00135 
00136                 if (abs_val)
00137                     vec2[h]+=CMath::abs(v); 
00138                 else
00139                     vec2[h]+=v; 
00140             }
00141         }
00142     }
00143     else if (m_degree==3)
00144         SG_NOTIMPLEMENTED;
00145 
00146     m_feat->free_feature_vector(vec, vlen, do_free);
00147 }
00148 
00149 void CSparsePolyFeatures::store_normalization_values()
00150 {
00151     delete[] m_normalization_values;
00152 
00153     int32_t num_vec = this->get_num_vectors();
00154 
00155     m_normalization_values=new float64_t[num_vec];
00156     for (int i=0; i<num_vec; i++)
00157     {
00158         float64_t val = CMath::sqrt(dot(i,i)); 
00159         if (val==0)
00160             // trap division by zero
00161             m_normalization_values[i]=1.0;
00162         else 
00163             m_normalization_values[i]=val;
00164     }
00165         
00166 }
00167 
00168 CFeatures* CSparsePolyFeatures::duplicate() const
00169 {
00170     return new CSparsePolyFeatures(*this);
00171 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation