Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "features/ExplicitSpecFeatures.h"
00012 #include "lib/io.h"
00013
00014 using namespace shogun;
00015
00016 CExplicitSpecFeatures::CExplicitSpecFeatures(CStringFeatures<uint16_t>* str, bool normalize) : CDotFeatures()
00017 {
00018 ASSERT(str);
00019
00020 use_normalization=normalize;
00021 num_strings = str->get_num_vectors();
00022 spec_size = str->get_num_symbols();
00023
00024 obtain_kmer_spectrum(str);
00025
00026 SG_DEBUG("SPEC size=%d, num_str=%d\n", spec_size, num_strings);
00027 }
00028
00029 CExplicitSpecFeatures::CExplicitSpecFeatures(const CExplicitSpecFeatures& orig) : CDotFeatures(orig),
00030 num_strings(orig.num_strings), alphabet_size(orig.alphabet_size), spec_size(orig.spec_size)
00031 {
00032 k_spectrum= new float64_t*[num_strings];
00033 for (int32_t i=0; i<num_strings; i++)
00034 k_spectrum[i]=CMath::clone_vector(k_spectrum[i], spec_size);
00035 }
00036
00037 CExplicitSpecFeatures::~CExplicitSpecFeatures()
00038 {
00039 delete_kmer_spectrum();
00040 }
00041
00042 float64_t CExplicitSpecFeatures::dot(int32_t vec_idx1, int32_t vec_idx2)
00043 {
00044 ASSERT(vec_idx1 < num_strings);
00045 ASSERT(vec_idx2 < num_strings);
00046 float64_t* vec1=k_spectrum[vec_idx1];
00047 float64_t* vec2=k_spectrum[vec_idx2];
00048
00049 return CMath::dot(vec1, vec2, spec_size);
00050 }
00051
00052 float64_t CExplicitSpecFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00053 {
00054 ASSERT(vec2_len == spec_size);
00055 ASSERT(vec_idx1 < num_strings);
00056 float64_t* vec1=k_spectrum[vec_idx1];
00057 float64_t result=0;
00058
00059 for (int32_t i=0; i<spec_size; i++)
00060 result+=vec1[i]*vec2[i];
00061
00062 return result;
00063 }
00064
00065 void CExplicitSpecFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00066 {
00067 ASSERT(vec2_len == spec_size);
00068 ASSERT(vec_idx1 < num_strings);
00069 float64_t* vec1=k_spectrum[vec_idx1];
00070
00071 if (abs_val)
00072 {
00073 for (int32_t i=0; i<spec_size; i++)
00074 vec2[i]+=alpha*CMath::abs(vec1[i]);
00075 }
00076 else
00077 {
00078 for (int32_t i=0; i<spec_size; i++)
00079 vec2[i]+=alpha*vec1[i];
00080 }
00081 }
00082
00083 void CExplicitSpecFeatures::obtain_kmer_spectrum(CStringFeatures<uint16_t>* str)
00084 {
00085 k_spectrum= new float64_t*[num_strings];
00086
00087 for (int32_t i=0; i<num_strings; i++)
00088 {
00089 k_spectrum[i]=new float64_t[spec_size];
00090 memset(k_spectrum[i], 0, sizeof(float64_t)*spec_size);
00091
00092 int32_t len=0;
00093 bool free_fv;
00094 uint16_t* fv=str->get_feature_vector(i, len, free_fv);
00095
00096 for (int32_t j=0; j<len; j++)
00097 k_spectrum[i][fv[j]]++;
00098
00099 str->free_feature_vector(fv, i, free_fv);
00100
00101 if (use_normalization)
00102 {
00103 float64_t n=0;
00104 for (int32_t j=0; j<spec_size; j++)
00105 n+=CMath::sq(k_spectrum[i][j]);
00106
00107 n=CMath::sqrt(n);
00108
00109 for (int32_t j=0; j<spec_size; j++)
00110 k_spectrum[i][j]/=n;
00111 }
00112 }
00113 }
00114
00115 void CExplicitSpecFeatures::delete_kmer_spectrum()
00116 {
00117 for (int32_t i=0; i<num_strings; i++)
00118 delete[] k_spectrum[i];
00119
00120 delete[] k_spectrum;
00121 k_spectrum=NULL;
00122 }
00123
00124 CFeatures* CExplicitSpecFeatures::duplicate() const
00125 {
00126 return new CExplicitSpecFeatures(*this);
00127 }