MultitaskKernelPlifNormalizer.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 2 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2010 Christian Widmer
00008  * Copyright (C) 2010 Max-Planck-Society
00009  */
00010 
00011 #ifndef _MULTITASKKERNELPLIFNORMALIZER_H___
00012 #define _MULTITASKKERNELPLIFNORMALIZER_H___
00013 
00014 #include "kernel/KernelNormalizer.h"
00015 #include "kernel/MultitaskKernelMklNormalizer.h"
00016 #include "kernel/Kernel.h"
00017 #include <algorithm>
00018 
00019 
00020 
00021 namespace shogun
00022 {
00026 class CMultitaskKernelPlifNormalizer: public CMultitaskKernelMklNormalizer
00027 {
00028 
00029 public:
00030 
00033     CMultitaskKernelPlifNormalizer(std::vector<float64_t> support_, std::vector<int32_t> task_vector)
00034     {
00035 
00036         num_betas = static_cast<int>(support_.size());
00037 
00038         support = support_;
00039 
00040         // init support points values with constant function
00041         betas = std::vector<float64_t>(num_betas);
00042         for (int i=0; i!=num_betas; i++)
00043         {
00044             betas[i] = 1;
00045         }
00046 
00047         num_tasks = get_num_unique_tasks(task_vector);
00048 
00049         // set both sides equally
00050         set_task_vector(task_vector);
00051 
00052         // init distance matrix
00053         distance_matrix = std::vector<float64_t>(num_tasks * num_tasks);
00054 
00055         // init similarity matrix
00056         similarity_matrix = std::vector<float64_t>(num_tasks * num_tasks);
00057 
00058     }
00059 
00060 
00066     inline virtual float64_t normalize(float64_t value, int32_t idx_lhs,
00067             int32_t idx_rhs)
00068     {
00069 
00070         //lookup tasks
00071         int32_t task_idx_lhs = task_vector_lhs[idx_lhs];
00072         int32_t task_idx_rhs = task_vector_rhs[idx_rhs];
00073 
00074         //lookup similarity
00075         float64_t task_similarity = get_task_similarity(task_idx_lhs,
00076                 task_idx_rhs);
00077 
00078         //take task similarity into account
00079         float64_t similarity = (value/scale) * task_similarity;
00080 
00081 
00082         return similarity;
00083 
00084     }
00085 
00091     int32_t get_num_unique_tasks(std::vector<int32_t> vec) {
00092 
00093         //sort
00094         std::sort(vec.begin(), vec.end());
00095 
00096         //reorder tasks with unique prefix
00097         std::vector<int32_t>::iterator endLocation = std::unique(vec.begin(), vec.end());
00098 
00099         //count unique tasks
00100         int32_t num_vec = std::distance(vec.begin(), endLocation);
00101 
00102         return num_vec;
00103 
00104     }
00105 
00106 
00108     virtual ~CMultitaskKernelPlifNormalizer()
00109     {
00110     }
00111 
00112 
00114     void update_cache()
00115     {
00116 
00117 
00118         for (int32_t i=0; i!=num_tasks; i++)
00119         {
00120             for (int32_t j=0; j!=num_tasks; j++)
00121             {
00122 
00123                 float64_t similarity = compute_task_similarity(i, j);
00124                 set_task_similarity(i,j,similarity);
00125 
00126             }
00127 
00128         }
00129     }
00130 
00131 
00133     float64_t compute_task_similarity(int32_t task_a, int32_t task_b)
00134     {
00135 
00136         float64_t distance = get_task_distance(task_a, task_b);
00137         float64_t similarity = -1;
00138 
00139         int32_t upper_bound_idx = -1;
00140 
00141 
00142         // determine interval
00143         for (int i=1; i!=num_betas; i++)
00144         {
00145             if (distance <= support[i])
00146             {
00147                 upper_bound_idx = i;
00148                 break;
00149             }
00150         }
00151 
00152         // perform interpolation (constant for beyond upper bound)
00153         if (upper_bound_idx == -1)
00154         {
00155 
00156             similarity = betas[num_betas-1];
00157 
00158         } else {
00159 
00160             int32_t lower_bound_idx = upper_bound_idx - 1;
00161             float64_t interval_size = support[upper_bound_idx] - support[lower_bound_idx];
00162 
00163             float64_t factor_lower = 1 - (distance - support[lower_bound_idx]) / interval_size;
00164             float64_t factor_upper = 1 - factor_lower;
00165 
00166             similarity = factor_lower*betas[lower_bound_idx] + factor_upper*betas[upper_bound_idx];
00167 
00168         }
00169 
00170         return similarity;
00171 
00172     }
00173 
00174 
00175 public:
00176 
00178     virtual std::vector<int32_t> get_task_vector_lhs() const
00179     {
00180         return task_vector_lhs;
00181     }
00182 
00184     virtual void set_task_vector_lhs(std::vector<int32_t> vec)
00185     {
00186         task_vector_lhs = vec;
00187     }
00188 
00190     virtual std::vector<int32_t> get_task_vector_rhs() const
00191     {
00192         return task_vector_rhs;
00193     }
00194 
00196     virtual void set_task_vector_rhs(std::vector<int32_t> vec)
00197     {
00198         task_vector_rhs = vec;
00199     }
00200 
00202     virtual void set_task_vector(std::vector<int32_t> vec)
00203     {
00204         task_vector_lhs = vec;
00205         task_vector_rhs = vec;
00206     }
00207 
00213     float64_t get_task_distance(int32_t task_lhs, int32_t task_rhs)
00214     {
00215 
00216         ASSERT(task_lhs < num_tasks && task_lhs >= 0);
00217         ASSERT(task_rhs < num_tasks && task_rhs >= 0);
00218 
00219         return distance_matrix[task_lhs * num_tasks + task_rhs];
00220 
00221     }
00222 
00228     void set_task_distance(int32_t task_lhs, int32_t task_rhs,
00229             float64_t distance)
00230     {
00231 
00232         ASSERT(task_lhs < num_tasks && task_lhs >= 0);
00233         ASSERT(task_rhs < num_tasks && task_rhs >= 0);
00234 
00235         distance_matrix[task_lhs * num_tasks + task_rhs] = distance;
00236 
00237     }
00238 
00244     float64_t get_task_similarity(int32_t task_lhs, int32_t task_rhs)
00245     {
00246 
00247         ASSERT(task_lhs < num_tasks && task_lhs >= 0);
00248         ASSERT(task_rhs < num_tasks && task_rhs >= 0);
00249 
00250         return similarity_matrix[task_lhs * num_tasks + task_rhs];
00251 
00252     }
00253 
00259     void set_task_similarity(int32_t task_lhs, int32_t task_rhs,
00260             float64_t similarity)
00261     {
00262 
00263         ASSERT(task_lhs < num_tasks && task_lhs >= 0);
00264         ASSERT(task_rhs < num_tasks && task_rhs >= 0);
00265 
00266         similarity_matrix[task_lhs * num_tasks + task_rhs] = similarity;
00267 
00268     }
00269 
00273     float64_t get_beta(int32_t idx)
00274     {
00275 
00276         return betas[idx];
00277 
00278     }
00279 
00284     void set_beta(int32_t idx, float64_t weight)
00285     {
00286 
00287         betas[idx] = weight;
00288 
00289         update_cache();
00290 
00291     }
00292 
00296     int32_t get_num_betas()
00297     {
00298 
00299         return num_betas;
00300 
00301     }
00302 
00303 
00305     inline virtual const char* get_name() const
00306     {
00307         return "MultitaskKernelNormalizer";
00308     }
00309 
00310 protected:
00311 
00313     int32_t num_tasks;
00314 
00316     std::vector<int32_t> task_vector_lhs;
00317 
00319     std::vector<int32_t> task_vector_rhs;
00320 
00322     std::vector<float64_t> distance_matrix;
00323 
00325     std::vector<float64_t> similarity_matrix;
00326 
00328     int32_t num_betas;
00329 
00331     std::vector<float64_t> betas;
00332 
00334     std::vector<float64_t> support;
00335 
00336 };
00337 }
00338 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation