SHOGUN  3.2.1
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义 
DependenceMaximization.cpp
浏览该文件的文档.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2014 Soumyajit De
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  */
30 
31 #include <shogun/lib/SGMatrix.h>
32 #include <shogun/labels/Labels.h>
36 
37 using namespace shogun;
38 
41 {
42  init();
43 }
44 
45 void CDependenceMaximization::init()
46 {
47  SG_ADD((CSGObject**)&m_estimator, "estimator",
48  "the estimator for computing measures", MS_NOT_AVAILABLE);
49  SG_ADD((CSGObject**)&m_labels_feats, "labels_feats",
50  "the features based on labels", MS_NOT_AVAILABLE);
51 
52  m_estimator=NULL;
53  m_labels_feats=NULL;
54 }
55 
57 {
60 }
61 
62 bool CDependenceMaximization::init(CFeatures* features)
63 {
64  REQUIRE(features, "Features are not initialized!\n");
65  REQUIRE(features->get_feature_class()==C_DENSE ||
66  features->get_feature_class()==C_SPARSE,
67  "Only allowed for dense/sparse features! Provided an instance of "
68  "%s which is of class %d!\n",
69  features->get_name(), features->get_feature_class());
70  REQUIRE(features->get_feature_type()==F_DREAL, "Only allowed for "
71  "features of double type! Provided %d!\n",
72  features->get_feature_type());
73 
74  return true;
75 }
76 
78  index_t idx)
79 {
80  SG_DEBUG("Entering!\n");
81 
82  // remove the dimension specified by the index, i.e. get X\X_i
83  // NULL check is handled in CFeatureSelection::get_num_features call
84  index_t num_features=get_num_features(features);
85  REQUIRE(num_features>idx, "Specified dimension to remove (%d) is greater "
86  "than the total number of current features (%d)!\n",
87  idx, num_features);
88 
89  SGVector<index_t> dims(num_features-1);
90  index_t n_dims=0;
91  for (index_t i=0; i<num_features; ++i)
92  {
93  if (i!=idx)
94  dims[n_dims++]=i;
95  }
96 
98  dims.display_vector("dims");
99 
100  // the following already does a SG_REF on the newly created feature
101  SG_DEBUG("Leaving!\n");
102  return features->copy_dimension_subset(dims);
103 }
104 
106  index_t idx)
107 {
108  SG_DEBUG("Entering!\n");
109 
110  // remove the dimension (feat) specified by the index idx
111  CFeatures* reduced_feats=create_transformed_copy(features, idx);
112  ASSERT(reduced_feats);
113 
114  // perform an independence test for X\X_i ~ p and Y ~ q with
115  // H_0: P(X\X_i, Y) = P(X\X_i) * P(Y)
116  // the test statistic can then be used as a measure of dependence
117  // See CIndependenceTest class documentation for details
118  m_estimator->set_p(reduced_feats);
120 
121  SG_DEBUG("statistic = %f!\n", statistic);
122 
123  SG_UNREF(reduced_feats);
124 
125  SG_DEBUG("Leaving!\n");
126  return statistic;
127 }
128 
130  SGVector<index_t> argsorted)
131 {
132  SG_DEBUG("Entering!\n");
133 
134  REQUIRE(m_num_remove>0, "Number or percentage of features to be removed is "
135  "not set! Please use set_num_remove() to set this!\n");
137  "Only N_LARGEST and PERCENTILE_LARGEST removal policy can work "
138  "with %s!\n", get_name());
139  REQUIRE(features, "Features is not intialized!\n");
140  REQUIRE(argsorted.vector, "The argsorted vector is not initialized!\n");
141  REQUIRE(get_num_features(features)==argsorted.vlen,
142  "argsorted vector should be equal to the number of features (%d)! "
143  "But it was %d!\n", argsorted.vlen);
144 
145  // compute a threshold to remove for both the policies
146  index_t threshold=m_num_remove;
148  threshold*=argsorted.vlen*0.01;
149 
150  // make sure that the threshold is valid given the current number of feats
151  REQUIRE(threshold<argsorted.vlen, "The threshold of removal is too high "
152  "(asked to remove %d features out of %d)! Please use a smaller "
153  "number for removal using set_num_remove() call",
154  threshold, argsorted.vlen);
155 
156  // remove the highest rank holders by storing indices
157  SGVector<index_t> inds(argsorted.vlen-threshold);
158  memcpy(inds.vector, argsorted.vector, sizeof(index_t)*inds.vlen);
159 
160  // sorting the indices to get the original order
161  inds.qsort();
163  inds.display_vector("selected feats");
164 
165  // copy rest of the features and SG_UNREF the original feat obj
166  CFeatures* reduced_feats=features->copy_dimension_subset(inds);
167 
168  // add the selected features to the subset
170  m_subset->add_subset(inds);
171 
172  SG_UNREF(features);
173 
174  SG_DEBUG("Leaving!\n");
175  return reduced_feats;
176 }
177 
179 {
180  REQUIRE(policy==N_LARGEST || policy==PERCENTILE_LARGEST,
181  "Only N_LARGEST and PERCENTILE_LARGEST removal policy can work "
182  "with %s!\n", get_name());
183  m_policy=policy;
184 }
185 
187 {
188  // NULL check is handled in base class CFeatureSelection
190 
191  // convert the CLabels object to CDenseFeatures
193 
194  SGMatrix<float64_t> labels_matrix(1, m_labels->get_num_labels());
195  for (index_t i=0; i<labels_matrix.num_cols; ++i)
196  labels_matrix.matrix[i]=m_labels->get_value(i);
197 
198  m_labels_feats=new CDenseFeatures<float64_t>(labels_matrix);
200 
201  // we need to set this to the estimator which is set internally
204 }
virtual const char * get_name() const =0
virtual float64_t get_value(int32_t idx)
Definition: Labels.cpp:59
int32_t index_t
Definition: common.h:62
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
virtual const char * get_name() const
#define REQUIRE(x,...)
Definition: SGIO.h:206
virtual void set_p(CFeatures *p)
virtual float64_t compute_measures(CFeatures *features, index_t idx)
#define SG_REF(x)
Definition: SGObject.h:51
Template class CFeatureSelection, base class for all feature selection preprocessors which select a s...
void display_vector(const char *name="vector", const char *prefix="") const
Definition: SGVector.cpp:426
virtual void set_labels(CLabels *labels)
index_t vlen
Definition: SGVector.h:637
virtual void add_subset(SGVector< index_t > subset)
Definition: SubsetStack.cpp:80
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
double float64_t
Definition: common.h:50
virtual EFeatureClass get_feature_class() const =0
virtual bool init(CFeatures *features)
EMessageType get_loglevel() const
Definition: SGIO.cpp:285
#define SG_UNREF(x)
Definition: SGObject.h:52
virtual CFeatures * remove_feats(CFeatures *features, SGVector< index_t > ranks)
#define SG_DEBUG(...)
Definition: SGIO.h:107
virtual CFeatures * copy_dimension_subset(SGVector< index_t > dims)
Definition: Features.cpp:348
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
index_t get_num_features(CFeatures *features) const
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual void set_labels(CLabels *labels)
virtual CFeatures * create_transformed_copy(CFeatures *features, index_t idx)
virtual void set_policy(EFeatureRemovalPolicy policy)
#define SG_ADD(...)
Definition: SGObject.h:81
virtual float64_t compute_statistic()=0
virtual EFeatureType get_feature_type() const =0
virtual void set_q(CFeatures *q)

SHOGUN 机器学习工具包 - 项目文档