00001 #include "features/PolyFeatures.h"
00002
00003 using namespace shogun;
00004
00005 CPolyFeatures::CPolyFeatures(CSimpleFeatures<float64_t>* feat, int32_t degree, bool normalize)
00006 : CDotFeatures(), m_multi_index(NULL), m_multinomial_coefficients(NULL),
00007 m_normalization_values(NULL)
00008 {
00009 ASSERT(feat);
00010
00011 m_feat = feat;
00012 SG_REF(m_feat);
00013 m_degree=degree;
00014 m_normalize=normalize;
00015 m_input_dimensions=feat->get_num_features();
00016 m_output_dimensions=calc_feature_space_dimensions(m_input_dimensions, m_degree);
00017
00018 store_multi_index();
00019 store_multinomial_coefficients();
00020 if (m_normalize)
00021 store_normalization_values();
00022 }
00023
00024 CPolyFeatures::~CPolyFeatures()
00025 {
00026 delete[] m_multi_index;
00027 delete[] m_multinomial_coefficients;
00028 delete[] m_normalization_values;
00029 SG_UNREF(m_feat);
00030 }
00031
00032 float64_t CPolyFeatures::dot(int32_t vec_idx1, int32_t vec_idx2)
00033 {
00034 int32_t len1;
00035 bool do_free1;
00036 float64_t* vec1 = m_feat->get_feature_vector(vec_idx1, len1, do_free1);
00037
00038 int32_t len2;
00039 bool do_free2;
00040 float64_t* vec2 = m_feat->get_feature_vector(vec_idx2, len2, do_free2);
00041
00042 float64_t sum=0;
00043 int cnt=0;
00044 for (int j=0; j<m_output_dimensions; j++)
00045 {
00046 float64_t out1=m_multinomial_coefficients[j];
00047 float64_t out2=m_multinomial_coefficients[j];
00048 for (int k=0; k<m_degree; k++)
00049 {
00050 out1*=vec1[m_multi_index[cnt]];
00051 out2*=vec2[m_multi_index[cnt]];
00052 cnt++;
00053 }
00054 sum+=out1*out2;
00055 }
00056 m_feat->free_feature_vector(vec1, len1, do_free1);
00057 m_feat->free_feature_vector(vec2, len2, do_free2);
00058
00059 return sum;
00060 }
00061
00062 float64_t CPolyFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00063 {
00064 if (vec2_len != m_output_dimensions)
00065 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00066
00067 int32_t len;
00068 bool do_free;
00069 float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
00070
00071
00072 int cnt=0;
00073 float64_t sum=0;
00074 for (int j=0; j<vec2_len; j++)
00075 {
00076 float64_t output=m_multinomial_coefficients[j];
00077 for (int k=0; k<m_degree; k++)
00078 {
00079 output*=vec[m_multi_index[cnt]];
00080 cnt++;
00081 }
00082 sum+=output*vec2[j];
00083 }
00084 if (m_normalize)
00085 sum = sum/m_normalization_values[vec_idx1];
00086
00087 m_feat->free_feature_vector(vec, len, do_free);
00088 return sum;
00089 }
00090 void CPolyFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00091 {
00092 if (vec2_len != m_output_dimensions)
00093 SG_ERROR("Dimensions don't match, vec2_dim=%d, m_output_dimensions=%d\n", vec2_len, m_output_dimensions);
00094
00095 int32_t len;
00096 bool do_free;
00097 float64_t* vec = m_feat->get_feature_vector(vec_idx1, len, do_free);
00098
00099
00100 int cnt=0;
00101 float32_t norm_val=1;
00102 if (m_normalize)
00103 norm_val = m_normalization_values[vec_idx1];
00104 alpha/=norm_val;
00105 for (int j=0; j<vec2_len; j++)
00106 {
00107 float64_t output=m_multinomial_coefficients[j];
00108 for (int k=0; k<m_degree; k++)
00109 {
00110 output*=vec[m_multi_index[cnt]];
00111 cnt++;
00112 }
00113 if (abs_val)
00114 output=CMath::abs(output);
00115
00116 vec2[j]+=alpha*output;
00117 }
00118 m_feat->free_feature_vector(vec, len, do_free);
00119 }
00120 void CPolyFeatures::store_normalization_values()
00121 {
00122 delete[] m_normalization_values;
00123
00124 int32_t num_vec = this->get_num_vectors();
00125
00126 m_normalization_values=new float32_t[num_vec];
00127 for (int i=0; i<num_vec; i++)
00128 {
00129 float64_t tmp = CMath::sqrt(dot(i,i));
00130 if (tmp==0)
00131
00132 m_normalization_values[i]=1;
00133 else
00134 m_normalization_values[i]=tmp;
00135 }
00136
00137 }
00138
00139 void CPolyFeatures::store_multi_index()
00140 {
00141 delete[] m_multi_index;
00142
00143 m_multi_index=new uint16_t[m_output_dimensions*m_degree];
00144
00145 uint16_t* exponents = new uint16_t[m_input_dimensions];
00146 if (!exponents)
00147 SG_ERROR( "Error allocating mem \n");
00148
00149 uint16_t* index = m_multi_index;
00150 enumerate_multi_index(0, &index, exponents, m_degree);
00151
00152 delete[] exponents;
00153 }
00154
00155 void CPolyFeatures::enumerate_multi_index(const int32_t feat_idx, uint16_t** index, uint16_t* exponents, const int32_t degree)
00156 {
00157 if (feat_idx==m_input_dimensions-1 || degree==0)
00158 {
00159 if (feat_idx==m_input_dimensions-1)
00160 exponents[feat_idx] = degree;
00161 if (degree==0)
00162 exponents[feat_idx] = 0;
00163 int32_t i, j;
00164 for (j=0; j<feat_idx+1; j++)
00165 for (i=0; i<exponents[j]; i++)
00166 {
00167 **index = j;
00168 (*index)++;
00169 }
00170 exponents[feat_idx] = 0;
00171 return;
00172 }
00173 int32_t k;
00174 for (k=0; k<=degree; k++)
00175 {
00176 exponents[feat_idx] = k;
00177 enumerate_multi_index(feat_idx+1, index, exponents, degree-k);
00178 }
00179 return;
00180
00181 }
00182
00183 void CPolyFeatures::store_multinomial_coefficients()
00184 {
00185 delete[] m_multinomial_coefficients;
00186
00187 m_multinomial_coefficients = new float64_t[m_output_dimensions];
00188 int32_t* exponents = new int32_t[m_input_dimensions];
00189 if (!exponents)
00190 SG_ERROR( "Error allocating mem \n");
00191 int32_t j=0;
00192 for (j=0; j<m_input_dimensions; j++)
00193 exponents[j] = 0;
00194 int32_t k, cnt=0;
00195 for (j=0; j<m_output_dimensions; j++)
00196 {
00197 for (k=0; k<m_degree; k++)
00198 {
00199 exponents[m_multi_index[cnt]] ++;
00200 cnt++;
00201 }
00202 m_multinomial_coefficients[j] = sqrt((double) multinomialcoef(exponents, m_input_dimensions));
00203 for (k=0; k<m_input_dimensions; k++)
00204 {
00205 exponents[k]=0;
00206 }
00207 }
00208 delete[] exponents;
00209 }
00210
00211 int32_t CPolyFeatures::bico2(int32_t n, int32_t k)
00212 {
00213
00214
00215
00216
00217 if (n<k)
00218 return 0;
00219 if (k>n/2)
00220 k = n-k;
00221 if (k<0)
00222 return 0;
00223 if (k==0)
00224 return 1;
00225 if (k==1)
00226 return n;
00227 if (k<4)
00228 return bico2(n-1, k-1)+bico2(n-1, k);
00229
00230
00231
00232 return bico(n, k);
00233
00234 }
00235
00236 int32_t CPolyFeatures::calc_feature_space_dimensions(int32_t N, int32_t D)
00237 {
00238 if (N==1)
00239 return 1;
00240 if (D==0)
00241 return 1;
00242 int32_t d;
00243 int32_t ret = 0;
00244 for (d=0; d<=D; d++)
00245 ret += calc_feature_space_dimensions(N-1, d);
00246
00247 return ret;
00248 }
00249
00250 int32_t CPolyFeatures::multinomialcoef(int32_t* exps, int32_t len)
00251 {
00252 int32_t ret = 1, i;
00253 int32_t n = 0;
00254 for (i=0; i<len; i++)
00255 {
00256 n += exps[i];
00257 ret *= bico2(n, exps[i]);
00258 }
00259 return ret;
00260 }
00261
00262
00263
00264 float64_t CPolyFeatures::gammln(float64_t xx)
00265 {
00266 float64_t x,y,tmp,ser;
00267 static float64_t cof[6]={76.18009172947146, -86.50532032941677,
00268 24.01409824083091, -1.231739572450155,
00269 0.1208650973866179e-2,-0.5395239384953e-5};
00270 int32_t j;
00271
00272 y=x=xx;
00273 tmp=x+5.5;
00274 tmp -= (x+0.5)*log(tmp);
00275 ser=1.000000000190015;
00276 for (j=0;j<=5;j++) ser += cof[j]/++y;
00277 return -tmp+log(2.5066282746310005*ser/x);
00278 }
00279
00280 float64_t CPolyFeatures::factln(int32_t n)
00281 {
00282 static float64_t a[101];
00283
00284 if (n < 0) SG_ERROR("Negative factorial in routine factln\n");
00285 if (n <= 1) return 0.0;
00286 if (n <= 100) return a[n] ? a[n] : (a[n]=gammln(n+1.0));
00287 else return gammln(n+1.0);
00288 }
00289
00290 int32_t CPolyFeatures::bico(int32_t n, int32_t k)
00291 {
00292
00293 return (int32_t) floor(0.5+exp(factln(n)-factln(k)-factln(n-k)));
00294 }
00295 CFeatures* CPolyFeatures::duplicate() const
00296 {
00297 return new CPolyFeatures(*this);
00298 }