ExplicitSpecFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009 Soeren Sonnenburg
00008  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "features/ExplicitSpecFeatures.h"
00012 #include "lib/io.h"
00013 
00014 CExplicitSpecFeatures::CExplicitSpecFeatures(CStringFeatures<uint16_t>* str, bool normalize) : CDotFeatures()
00015 {
00016     ASSERT(str);
00017 
00018     use_normalization=normalize;
00019     num_strings = str->get_num_vectors();
00020     spec_size = str->get_num_symbols();
00021 
00022     obtain_kmer_spectrum(str);
00023 
00024     SG_DEBUG("SPEC size=%d, num_str=%d\n", spec_size, num_strings);
00025 }
00026 
00027 CExplicitSpecFeatures::CExplicitSpecFeatures(const CExplicitSpecFeatures& orig) : CDotFeatures(orig), 
00028     num_strings(orig.num_strings), alphabet_size(orig.alphabet_size), spec_size(orig.spec_size)
00029 {
00030     k_spectrum= new float64_t*[num_strings];
00031     for (int32_t i=0; i<num_strings; i++)
00032         k_spectrum[i]=CMath::clone_vector(k_spectrum[i], spec_size);
00033 }
00034 
00035 CExplicitSpecFeatures::~CExplicitSpecFeatures()
00036 {
00037     delete_kmer_spectrum();
00038 }
00039 
00040 float64_t CExplicitSpecFeatures::dot(int32_t vec_idx1, int32_t vec_idx2)
00041 {
00042     ASSERT(vec_idx1 < num_strings);
00043     ASSERT(vec_idx2 < num_strings);
00044     float64_t* vec1=k_spectrum[vec_idx1];
00045     float64_t* vec2=k_spectrum[vec_idx2];
00046 
00047     return CMath::dot(vec1, vec2, spec_size);
00048 }
00049 
00050 float64_t CExplicitSpecFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00051 {
00052     ASSERT(vec2_len == spec_size);
00053     ASSERT(vec_idx1 < num_strings);
00054     float64_t* vec1=k_spectrum[vec_idx1];
00055     float64_t result=0;
00056     
00057     for (int32_t i=0; i<spec_size; i++)
00058         result+=vec1[i]*vec2[i];
00059 
00060     return result;
00061 }
00062 
00063 void CExplicitSpecFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00064 {
00065     ASSERT(vec2_len == spec_size);
00066     ASSERT(vec_idx1 < num_strings);
00067     float64_t* vec1=k_spectrum[vec_idx1];
00068 
00069     if (abs_val)
00070     {
00071         for (int32_t i=0; i<spec_size; i++)
00072             vec2[i]+=alpha*CMath::abs(vec1[i]);
00073     }
00074     else
00075     {
00076         for (int32_t i=0; i<spec_size; i++)
00077             vec2[i]+=alpha*vec1[i];
00078     }
00079 }
00080 
00081 void CExplicitSpecFeatures::obtain_kmer_spectrum(CStringFeatures<uint16_t>* str)
00082 {
00083     k_spectrum= new float64_t*[num_strings];
00084 
00085     for (int32_t i=0; i<num_strings; i++)
00086     {
00087         k_spectrum[i]=new float64_t[spec_size];
00088         memset(k_spectrum[i], 0, sizeof(float64_t)*spec_size);
00089 
00090         int32_t len=0;
00091         uint16_t* fv=str->get_feature_vector(i, len);
00092 
00093         for (int32_t j=0; j<len; j++)
00094             k_spectrum[i][fv[j]]++;
00095 
00096         if (use_normalization)
00097         {
00098             float64_t n=0;
00099             for (int32_t j=0; j<spec_size; j++)
00100                 n+=CMath::sq(k_spectrum[i][j]);
00101 
00102             n=CMath::sqrt(n);
00103 
00104             for (int32_t j=0; j<spec_size; j++)
00105                 k_spectrum[i][j]/=n;
00106         }
00107     }
00108 }
00109 
00110 void CExplicitSpecFeatures::delete_kmer_spectrum()
00111 {
00112     for (int32_t i=0; i<num_strings; i++)
00113         delete[] k_spectrum[i];
00114 
00115     delete[] k_spectrum;
00116     k_spectrum=NULL;
00117 }
00118 
00119 CFeatures* CExplicitSpecFeatures::duplicate() const
00120 {
00121     return new CExplicitSpecFeatures(*this);
00122 }

SHOGUN Machine Learning Toolbox - Documentation