ExplicitSpecFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009 Soeren Sonnenburg
00008  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "features/ExplicitSpecFeatures.h"
00012 #include "lib/io.h"
00013 
00014 using namespace shogun;
00015 
00016 CExplicitSpecFeatures::CExplicitSpecFeatures(CStringFeatures<uint16_t>* str, bool normalize) : CDotFeatures()
00017 {
00018     ASSERT(str);
00019 
00020     use_normalization=normalize;
00021     num_strings = str->get_num_vectors();
00022     spec_size = str->get_num_symbols();
00023 
00024     obtain_kmer_spectrum(str);
00025 
00026     SG_DEBUG("SPEC size=%d, num_str=%d\n", spec_size, num_strings);
00027 }
00028 
00029 CExplicitSpecFeatures::CExplicitSpecFeatures(const CExplicitSpecFeatures& orig) : CDotFeatures(orig), 
00030     num_strings(orig.num_strings), alphabet_size(orig.alphabet_size), spec_size(orig.spec_size)
00031 {
00032     k_spectrum= new float64_t*[num_strings];
00033     for (int32_t i=0; i<num_strings; i++)
00034         k_spectrum[i]=CMath::clone_vector(k_spectrum[i], spec_size);
00035 }
00036 
00037 CExplicitSpecFeatures::~CExplicitSpecFeatures()
00038 {
00039     delete_kmer_spectrum();
00040 }
00041 
00042 float64_t CExplicitSpecFeatures::dot(int32_t vec_idx1, int32_t vec_idx2)
00043 {
00044     ASSERT(vec_idx1 < num_strings);
00045     ASSERT(vec_idx2 < num_strings);
00046     float64_t* vec1=k_spectrum[vec_idx1];
00047     float64_t* vec2=k_spectrum[vec_idx2];
00048 
00049     return CMath::dot(vec1, vec2, spec_size);
00050 }
00051 
00052 float64_t CExplicitSpecFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00053 {
00054     ASSERT(vec2_len == spec_size);
00055     ASSERT(vec_idx1 < num_strings);
00056     float64_t* vec1=k_spectrum[vec_idx1];
00057     float64_t result=0;
00058     
00059     for (int32_t i=0; i<spec_size; i++)
00060         result+=vec1[i]*vec2[i];
00061 
00062     return result;
00063 }
00064 
00065 void CExplicitSpecFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00066 {
00067     ASSERT(vec2_len == spec_size);
00068     ASSERT(vec_idx1 < num_strings);
00069     float64_t* vec1=k_spectrum[vec_idx1];
00070 
00071     if (abs_val)
00072     {
00073         for (int32_t i=0; i<spec_size; i++)
00074             vec2[i]+=alpha*CMath::abs(vec1[i]);
00075     }
00076     else
00077     {
00078         for (int32_t i=0; i<spec_size; i++)
00079             vec2[i]+=alpha*vec1[i];
00080     }
00081 }
00082 
00083 void CExplicitSpecFeatures::obtain_kmer_spectrum(CStringFeatures<uint16_t>* str)
00084 {
00085     k_spectrum= new float64_t*[num_strings];
00086 
00087     for (int32_t i=0; i<num_strings; i++)
00088     {
00089         k_spectrum[i]=new float64_t[spec_size];
00090         memset(k_spectrum[i], 0, sizeof(float64_t)*spec_size);
00091 
00092         int32_t len=0;
00093         bool free_fv;
00094         uint16_t* fv=str->get_feature_vector(i, len, free_fv);
00095 
00096         for (int32_t j=0; j<len; j++)
00097             k_spectrum[i][fv[j]]++;
00098 
00099         str->free_feature_vector(fv, i, free_fv);
00100 
00101         if (use_normalization)
00102         {
00103             float64_t n=0;
00104             for (int32_t j=0; j<spec_size; j++)
00105                 n+=CMath::sq(k_spectrum[i][j]);
00106 
00107             n=CMath::sqrt(n);
00108 
00109             for (int32_t j=0; j<spec_size; j++)
00110                 k_spectrum[i][j]/=n;
00111         }
00112     }
00113 }
00114 
00115 void CExplicitSpecFeatures::delete_kmer_spectrum()
00116 {
00117     for (int32_t i=0; i<num_strings; i++)
00118         delete[] k_spectrum[i];
00119 
00120     delete[] k_spectrum;
00121     k_spectrum=NULL;
00122 }
00123 
00124 CFeatures* CExplicitSpecFeatures::duplicate() const
00125 {
00126     return new CExplicitSpecFeatures(*this);
00127 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation