00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "features/SparsePolyFeatures.h"
00011 #include "lib/Hash.h"
00012
00013 using namespace shogun;
00014
00015 CSparsePolyFeatures::CSparsePolyFeatures(CSparseFeatures<float64_t>* feat, int32_t degree, bool normalize, int32_t hash_bits)
00016 : CDotFeatures(), m_normalization_values(NULL)
00017 {
00018 ASSERT(feat);
00019
00020 m_feat = feat;
00021 SG_REF(m_feat);
00022 m_degree=degree;
00023 m_normalize=normalize;
00024 m_hash_bits=hash_bits;
00025 mask=(uint32_t) (((uint64_t) 1)<<m_hash_bits)-1;
00026 m_output_dimensions=1<<m_hash_bits;
00027 m_input_dimensions=feat->get_num_features();
00028
00029 if (m_normalize)
00030 store_normalization_values();
00031 }
00032
00033 CSparsePolyFeatures::~CSparsePolyFeatures()
00034 {
00035 delete[] m_normalization_values;
00036 SG_UNREF(m_feat);
00037 }
00038
00039 float64_t CSparsePolyFeatures::dot(int32_t vec_idx1, int32_t vec_idx2)
00040 {
00041
00042 int32_t len1, len2;
00043 bool do_free1, do_free2;
00044 TSparseEntry<float64_t>* vec1 = m_feat->get_sparse_feature_vector(vec_idx1, len1, do_free1);
00045 TSparseEntry<float64_t>* vec2 = m_feat->get_sparse_feature_vector(vec_idx2, len2, do_free2);
00046
00047 float64_t result=CSparseFeatures<float64_t>::sparse_dot(1, vec1, len1, vec2, len2);
00048 result=CMath::pow(result, m_degree);
00049
00050 m_feat->free_feature_vector(vec1, len1, do_free1);
00051 m_feat->free_feature_vector(vec2, len2, do_free2);
00052
00053 return result;
00054 }
00055
00056 float64_t CSparsePolyFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00057 {
00058 if (vec2_len != m_output_dimensions)
00059 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00060
00061 int32_t vlen;
00062 bool do_free;
00063 TSparseEntry<float64_t>* vec = m_feat->get_sparse_feature_vector(vec_idx1, vlen, do_free);
00064
00065 float64_t result=0;
00066
00067 if (vec)
00068 {
00069 if (m_degree==2)
00070 {
00071
00072 for (int32_t i=0; i<vlen; i++)
00073 {
00074 float64_t v1=vec[i].entry;
00075 uint32_t seed=CHash::MurmurHash2((uint8_t*) &(vec[i].feat_index), sizeof(int32_t), 0xDEADBEAF);
00076
00077 for (int32_t j=i; j<vlen; j++)
00078 {
00079 float64_t v2=vec[j].entry;
00080 uint32_t h=CHash::MurmurHash2((uint8_t*) &(vec[j].feat_index), sizeof(int32_t), seed) & mask;
00081 float64_t v;
00082
00083 if (i==j)
00084 v=v1*v1;
00085 else
00086 v=CMath::sqrt(2.0)*v1*v2;
00087
00088 result+=v*vec2[h];
00089 }
00090 }
00091 }
00092 else if (m_degree==3)
00093 SG_NOTIMPLEMENTED;
00094 }
00095
00096 if (m_normalize)
00097 result/=m_normalization_values[vec_idx1];
00098
00099 m_feat->free_feature_vector(vec, vlen, do_free);
00100 return result;
00101 }
00102
00103 void CSparsePolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00104 {
00105 if (vec2_len != m_output_dimensions)
00106 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00107
00108 int32_t vlen;
00109 bool do_free;
00110 TSparseEntry<float64_t>* vec = m_feat->get_sparse_feature_vector(vec_idx1, vlen, do_free);
00111
00112 float64_t norm_val=1.0;
00113 if (m_normalize)
00114 norm_val = m_normalization_values[vec_idx1];
00115 alpha/=norm_val;
00116
00117 if (m_degree==2)
00118 {
00119
00120 for (int32_t i=0; i<vlen; i++)
00121 {
00122 float64_t v1=vec[i].entry;
00123 uint32_t seed=CHash::MurmurHash2((uint8_t*) &(vec[i].feat_index), sizeof(int32_t), 0xDEADBEAF);
00124
00125 for (int32_t j=i; j<vlen; j++)
00126 {
00127 float64_t v2=vec[j].entry;
00128 uint32_t h=CHash::MurmurHash2((uint8_t*) &(vec[j].feat_index), sizeof(int32_t), seed) & mask;
00129 float64_t v;
00130
00131 if (i==j)
00132 v=alpha*v1*v1;
00133 else
00134 v=alpha*CMath::sqrt(2.0)*v1*v2;
00135
00136 if (abs_val)
00137 vec2[h]+=CMath::abs(v);
00138 else
00139 vec2[h]+=v;
00140 }
00141 }
00142 }
00143 else if (m_degree==3)
00144 SG_NOTIMPLEMENTED;
00145
00146 m_feat->free_feature_vector(vec, vlen, do_free);
00147 }
00148
00149 void CSparsePolyFeatures::store_normalization_values()
00150 {
00151 delete[] m_normalization_values;
00152
00153 int32_t num_vec = this->get_num_vectors();
00154
00155 m_normalization_values=new float64_t[num_vec];
00156 for (int i=0; i<num_vec; i++)
00157 {
00158 float64_t val = CMath::sqrt(dot(i,i));
00159 if (val==0)
00160
00161 m_normalization_values[i]=1.0;
00162 else
00163 m_normalization_values[i]=val;
00164 }
00165
00166 }
00167
00168 CFeatures* CSparsePolyFeatures::duplicate() const
00169 {
00170 return new CSparsePolyFeatures(*this);
00171 }