00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "lib/common.h"
00012 #include "kernel/RegulatoryModulesStringKernel.h"
00013 #include "features/Features.h"
00014 #include "features/SimpleFeatures.h"
00015 #include "lib/io.h"
00016
00017 using namespace shogun;
00018
00019 CRegulatoryModulesStringKernel::CRegulatoryModulesStringKernel(
00020 int32_t size, float64_t w, int32_t d, int32_t s, int32_t wl)
00021 : CStringKernel<char>(size), width(w), degree(d), shift(s), window(wl),
00022 motif_positions_lhs(NULL), motif_positions_rhs(NULL), position_weights(NULL), weights(NULL)
00023 {
00024 }
00025
00026 CRegulatoryModulesStringKernel::CRegulatoryModulesStringKernel(CStringFeatures<char>* lstr, CStringFeatures<char>* rstr,
00027 CSimpleFeatures<uint16_t>* lpos, CSimpleFeatures<uint16_t>* rpos,
00028 float64_t w, int32_t d, int32_t s, int32_t wl, int32_t size)
00029 : CStringKernel<char>(size), width(w), degree(d), shift(s), window(wl),
00030 motif_positions_lhs(NULL), motif_positions_rhs(NULL), position_weights(NULL), weights(NULL)
00031 {
00032 set_motif_positions(lpos, rpos);
00033 init(lstr,rstr);
00034 }
00035
00036 CRegulatoryModulesStringKernel::~CRegulatoryModulesStringKernel()
00037 {
00038 SG_UNREF(motif_positions_lhs);
00039 SG_UNREF(motif_positions_rhs);
00040 }
00041
00042 bool CRegulatoryModulesStringKernel::init(CFeatures* l, CFeatures* r)
00043 {
00044 ASSERT(motif_positions_lhs);
00045 ASSERT(motif_positions_rhs);
00046
00047 if (l->get_num_vectors() != motif_positions_lhs->get_num_vectors())
00048 SG_ERROR("Number of vectors does not agree (LHS: %d, Motif LHS: %d).\n",
00049 l->get_num_vectors(), motif_positions_lhs->get_num_vectors());
00050 if (r->get_num_vectors() != motif_positions_rhs->get_num_vectors())
00051 SG_ERROR("Number of vectors does not agree (RHS: %d, Motif RHS: %d).\n",
00052 r->get_num_vectors(), motif_positions_rhs->get_num_vectors());
00053
00054 set_wd_weights();
00055 CStringKernel<char>::init(l, r);
00056 return init_normalizer();
00057 }
00058
00059 void CRegulatoryModulesStringKernel::set_motif_positions(
00060 CSimpleFeatures<uint16_t>* positions_lhs, CSimpleFeatures<uint16_t>* positions_rhs)
00061 {
00062 ASSERT(positions_lhs);
00063 ASSERT(positions_rhs);
00064 SG_UNREF(motif_positions_lhs);
00065 SG_UNREF(motif_positions_rhs);
00066 if (positions_lhs->get_num_features() != positions_rhs->get_num_features())
00067 SG_ERROR("Number of dimensions does not agree.\n");
00068
00069 motif_positions_lhs=positions_lhs;
00070 motif_positions_rhs=positions_rhs;
00071 SG_REF(positions_lhs);
00072 SG_REF(positions_rhs);
00073 }
00074
00075 float64_t CRegulatoryModulesStringKernel::compute(int32_t idx_a, int32_t idx_b)
00076 {
00077 ASSERT(motif_positions_lhs);
00078 ASSERT(motif_positions_rhs);
00079
00080 int32_t alen, blen;
00081 bool free_avec, free_bvec;
00082 char* avec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
00083 char* bvec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
00084
00085 int32_t alen_pos, blen_pos;
00086 bool afree_pos, bfree_pos;
00087 uint16_t* positions_a = motif_positions_lhs->get_feature_vector(idx_a, alen_pos, afree_pos);
00088 uint16_t* positions_b = motif_positions_rhs->get_feature_vector(idx_b, blen_pos, bfree_pos);
00089 ASSERT(alen_pos==blen_pos);
00090 int32_t num_pos=alen_pos;
00091
00092
00093 float64_t result_rbf=0;
00094 float64_t result_wds=0;
00095
00096 for (int32_t p=0; p<num_pos; p++)
00097 {
00098 result_rbf+=CMath::sq(positions_a[p]-positions_b[p]);
00099
00100 for (int32_t p2=0; p2<num_pos; p2++)
00101 result_rbf+=CMath::sq( (positions_a[p]-positions_a[p2]) - (positions_b[p]-positions_b[p2]) );
00102
00103 int32_t limit = window;
00104 if (window + positions_a[p] > alen)
00105 limit = alen - positions_a[p];
00106
00107 if (window + positions_b[p] > blen)
00108 limit = CMath::min(limit, blen - positions_b[p]);
00109
00110 result_wds+=compute_wds(&avec[positions_a[p]], &bvec[positions_b[p]],
00111 limit);
00112 }
00113
00114 float64_t result=exp(-result_rbf/width)+result_wds;
00115
00116 ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
00117 ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
00118 ((CSimpleFeatures<uint16_t>*) lhs)->free_feature_vector(positions_a, idx_a, afree_pos);
00119 ((CSimpleFeatures<uint16_t>*) rhs)->free_feature_vector(positions_b, idx_b, bfree_pos);
00120
00121 return result;
00122 }
00123
00124 float64_t CRegulatoryModulesStringKernel::compute_wds(
00125 char* avec, char* bvec, int32_t len)
00126 {
00127 float64_t* max_shift_vec = new float64_t[shift];
00128 float64_t sum0=0 ;
00129 for (int32_t i=0; i<shift; i++)
00130 max_shift_vec[i]=0 ;
00131
00132
00133 for (int32_t i=0; i<len; i++)
00134 {
00135 if ((position_weights!=NULL) && (position_weights[i]==0.0))
00136 continue ;
00137
00138 float64_t sumi = 0.0 ;
00139 for (int32_t j=0; (j<degree) && (i+j<len); j++)
00140 {
00141 if (avec[i+j]!=bvec[i+j])
00142 break ;
00143 sumi += weights[j];
00144 }
00145 if (position_weights!=NULL)
00146 sum0 += position_weights[i]*sumi ;
00147 else
00148 sum0 += sumi ;
00149 } ;
00150
00151 for (int32_t i=0; i<len; i++)
00152 {
00153 for (int32_t k=1; (k<=shift) && (i+k<len); k++)
00154 {
00155 if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00156 continue ;
00157
00158 float64_t sumi1 = 0.0 ;
00159
00160 for (int32_t j=0; (j<degree) && (i+j+k<len); j++)
00161 {
00162 if (avec[i+j+k]!=bvec[i+j])
00163 break ;
00164 sumi1 += weights[j];
00165 }
00166 float64_t sumi2 = 0.0 ;
00167
00168 for (int32_t j=0; (j<degree) && (i+j+k<len); j++)
00169 {
00170 if (avec[i+j]!=bvec[i+j+k])
00171 break ;
00172 sumi2 += weights[j];
00173 }
00174 if (position_weights!=NULL)
00175 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00176 else
00177 max_shift_vec[k-1] += sumi1 + sumi2 ;
00178 } ;
00179 }
00180
00181 float64_t result = sum0 ;
00182 for (int32_t i=0; i<shift; i++)
00183 result += max_shift_vec[i]/(2*(i+1)) ;
00184
00185 delete[] max_shift_vec;
00186 return result ;
00187 }
00188
00189 void CRegulatoryModulesStringKernel::set_wd_weights()
00190 {
00191 ASSERT(degree>0);
00192
00193 delete[] weights;
00194 weights=new float64_t[degree];
00195
00196 int32_t i;
00197 float64_t sum=0;
00198 for (i=0; i<degree; i++)
00199 {
00200 weights[i]=degree-i;
00201 sum+=weights[i];
00202 }
00203
00204 for (i=0; i<degree; i++)
00205 weights[i]/=sum;
00206 }