SHOGUN  v3.2.0
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
TwoDistributionsTestStatistic.cpp
浏览该文件的文档.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012-2013 Heiko Strathmann
8  */
9 
12 
13 using namespace shogun;
14 
17 {
18  init();
19 }
20 
22  CFeatures* p_and_q,
23  index_t m) : CTestStatistic()
24 {
25  init();
26 
27  m_p_and_q=p_and_q;
29 
30  m_m=m;
31 }
32 
34  CFeatures* p, CFeatures* q) :
36 {
37  init();
38 
41 
42  m_m=p->get_num_vectors();
43 }
44 
46 {
48 }
49 
50 void CTwoDistributionsTestStatistic::init()
51 {
52  SG_ADD((CSGObject**)&m_p_and_q, "p_and_q", "Concatenated samples p and q",
54  SG_ADD(&m_m, "m", "Index of first sample of q",
56 
57  m_p_and_q=NULL;
58  m_m=0;
59 }
60 
62 {
63  SG_DEBUG("entering CTwoDistributionsTestStatistic::bootstrap_null()\n")
64 
65  REQUIRE(m_p_and_q, "CTwoDistributionsTestStatistic::bootstrap_null(): "
66  "No appended features p and q!\n");
67 
68  /* compute bootstrap statistics for null distribution */
70 
71  /* memory for index permutations. Adding of subset has to happen
72  * inside the loop since it may be copied if there already is one set */
73  SGVector<index_t> ind_permutation(2*m_m);
74  ind_permutation.range_fill();
75 
76  for (index_t i=0; i<m_bootstrap_iterations; ++i)
77  {
78  /* idea: merge features of p and q, shuffle, and compute statistic.
79  * This is done using subsets here */
80 
81  /* create index permutation and add as subset. This will mix samples
82  * from p and q */
83  SGVector<int32_t>::permute_vector(ind_permutation);
84 
85  /* compute statistic for this permutation of mixed samples */
86  m_p_and_q->add_subset(ind_permutation);
87  results[i]=compute_statistic();
89  }
90 
91  SG_DEBUG("leaving CTwoDistributionsTestStatistic::bootstrap_null()\n")
92  return results;
93 }
94 
96  float64_t statistic)
97 {
98  float64_t result=0;
99 
101  {
102  /* bootstrap a bunch of MMD values from null distribution */
104 
105  /* find out percentile of parameter "statistic" in null distribution */
106  values.qsort();
107  float64_t i=values.find_position_to_insert(statistic);
108 
109  /* return corresponding p-value */
110  result=1.0-i/values.vlen;
111  }
112  else
113  {
114  SG_ERROR("CTwoDistributionsTestStatistics::compute_p_value(): Unknown"
115  " method to approximate null distribution!\n");
116  }
117 
118  return result;
119 }
120 
122  float64_t alpha)
123 {
124  float64_t result=0;
125 
127  {
128  /* bootstrap a bunch of MMD values from null distribution */
130 
131  /* return value of (1-alpha) quantile */
132  result=values[index_t(CMath::floor(values.vlen*(1-alpha)))];
133  }
134  else
135  {
136  SG_ERROR("CTwoDistributionsTestStatistics::compute_threshold():"
137  "Unknown method to approximate null distribution!\n");
138  }
139 
140  return result;
141 }
142 
144 {
145  /* ref before unref to avoid problems when instances are equal */
146  SG_REF(p_and_q);
148  m_p_and_q=p_and_q;
149 }
150 
152 {
153  SG_REF(m_p_and_q);
154  return m_p_and_q;
155 }
156 
virtual float64_t compute_statistic()=0
virtual float64_t compute_threshold(float64_t alpha)
index_t find_position_to_insert(T element)
Definition: SGVector.cpp:237
int32_t index_t
Definition: common.h:60
#define SG_UNREF(x)
Definition: SGRefObject.h:35
virtual int32_t get_num_vectors() const =0
#define SG_ERROR(...)
Definition: SGIO.h:131
#define REQUIRE(x,...)
Definition: SGIO.h:208
static float64_t floor(float64_t d)
Definition: Math.h:208
static void permute_vector(SGVector< T > vec)
Definition: SGVector.cpp:736
virtual CFeatures * create_merged_copy(CList *others)
Definition: Features.h:229
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:102
double float64_t
Definition: common.h:48
#define SG_REF(x)
Definition: SGRefObject.h:34
void range_fill(T start=0)
Definition: SGVector.cpp:145
Test statistic base class. Provides an interface for statistical tests via three methods: compute_sta...
Definition: TestStatistic.h:47
#define SG_DEBUG(...)
Definition: SGIO.h:109
virtual void remove_subset()
Definition: Features.cpp:313
virtual float64_t compute_p_value(float64_t statistic)
The class Features is the base class of all feature objects.
Definition: Features.h:62
#define SG_ADD(...)
Definition: SGObject.h:71
ENullApproximationMethod m_null_approximation_method
virtual void add_subset(SGVector< index_t > subset)
Definition: Features.cpp:307
index_t vlen
Definition: SGVector.h:706

SHOGUN Machine Learning Toolbox - Documentation