ExplicitSpecFeatures.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "features/ExplicitSpecFeatures.h"
00012 #include "lib/io.h"
00013
00014 CExplicitSpecFeatures::CExplicitSpecFeatures(CStringFeatures<uint16_t>* str, bool normalize) : CDotFeatures()
00015 {
00016 ASSERT(str);
00017
00018 use_normalization=normalize;
00019 num_strings = str->get_num_vectors();
00020 spec_size = str->get_num_symbols();
00021
00022 obtain_kmer_spectrum(str);
00023
00024 SG_DEBUG("SPEC size=%d, num_str=%d\n", spec_size, num_strings);
00025 }
00026
00027 CExplicitSpecFeatures::CExplicitSpecFeatures(const CExplicitSpecFeatures& orig) : CDotFeatures(orig),
00028 num_strings(orig.num_strings), alphabet_size(orig.alphabet_size), spec_size(orig.spec_size)
00029 {
00030 k_spectrum= new float64_t*[num_strings];
00031 for (int32_t i=0; i<num_strings; i++)
00032 k_spectrum[i]=CMath::clone_vector(k_spectrum[i], spec_size);
00033 }
00034
00035 CExplicitSpecFeatures::~CExplicitSpecFeatures()
00036 {
00037 delete_kmer_spectrum();
00038 }
00039
00040 float64_t CExplicitSpecFeatures::dot(int32_t vec_idx1, int32_t vec_idx2)
00041 {
00042 ASSERT(vec_idx1 < num_strings);
00043 ASSERT(vec_idx2 < num_strings);
00044 float64_t* vec1=k_spectrum[vec_idx1];
00045 float64_t* vec2=k_spectrum[vec_idx2];
00046
00047 return CMath::dot(vec1, vec2, spec_size);
00048 }
00049
00050 float64_t CExplicitSpecFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00051 {
00052 ASSERT(vec2_len == spec_size);
00053 ASSERT(vec_idx1 < num_strings);
00054 float64_t* vec1=k_spectrum[vec_idx1];
00055 float64_t result=0;
00056
00057 for (int32_t i=0; i<spec_size; i++)
00058 result+=vec1[i]*vec2[i];
00059
00060 return result;
00061 }
00062
00063 void CExplicitSpecFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00064 {
00065 ASSERT(vec2_len == spec_size);
00066 ASSERT(vec_idx1 < num_strings);
00067 float64_t* vec1=k_spectrum[vec_idx1];
00068
00069 if (abs_val)
00070 {
00071 for (int32_t i=0; i<spec_size; i++)
00072 vec2[i]+=alpha*CMath::abs(vec1[i]);
00073 }
00074 else
00075 {
00076 for (int32_t i=0; i<spec_size; i++)
00077 vec2[i]+=alpha*vec1[i];
00078 }
00079 }
00080
00081 void CExplicitSpecFeatures::obtain_kmer_spectrum(CStringFeatures<uint16_t>* str)
00082 {
00083 k_spectrum= new float64_t*[num_strings];
00084
00085 for (int32_t i=0; i<num_strings; i++)
00086 {
00087 k_spectrum[i]=new float64_t[spec_size];
00088 memset(k_spectrum[i], 0, sizeof(float64_t)*spec_size);
00089
00090 int32_t len=0;
00091 uint16_t* fv=str->get_feature_vector(i, len);
00092
00093 for (int32_t j=0; j<len; j++)
00094 k_spectrum[i][fv[j]]++;
00095
00096 if (use_normalization)
00097 {
00098 float64_t n=0;
00099 for (int32_t j=0; j<spec_size; j++)
00100 n+=CMath::sq(k_spectrum[i][j]);
00101
00102 n=CMath::sqrt(n);
00103
00104 for (int32_t j=0; j<spec_size; j++)
00105 k_spectrum[i][j]/=n;
00106 }
00107 }
00108 }
00109
00110 void CExplicitSpecFeatures::delete_kmer_spectrum()
00111 {
00112 for (int32_t i=0; i<num_strings; i++)
00113 delete[] k_spectrum[i];
00114
00115 delete[] k_spectrum;
00116 k_spectrum=NULL;
00117 }
00118
00119 CFeatures* CExplicitSpecFeatures::duplicate() const
00120 {
00121 return new CExplicitSpecFeatures(*this);
00122 }