faif
Validator.hpp
1 #ifndef FAIF_VALIDATOR_H_
2 #define FAIF_VALIDATOR_H_
3 
4 // File with classes and functions to check classifier quality and error
5 
6 #include <algorithm>
7 #include <cassert>
8 #include <vector>
9 #include <boost/lambda/lambda.hpp>
10 
11 #include <boost/noncopyable.hpp>
12 #include <boost/scoped_ptr.hpp>
13 
14 #include "Classifier.hpp"
15 #include "../utils/Random.hpp"
16 
17 namespace faif {
18  namespace ml {
19 
20  namespace {
21  /** helping functor (predicate), return true if the classifier result equal to given category */
22  template<typename Val>
23  struct CheckExampleFunctor {
24  Classifier<Val>& cl_;
25  CheckExampleFunctor(Classifier<Val>& c) : cl_(c)
26  { }
27 
28  bool operator()(const typename Classifier<Val>::ExampleTrain& example) {
29  return cl_.getCategory(example) == example.getFeature();
30  }
31  private:
32  CheckExampleFunctor& operator=(const CheckExampleFunctor&); //forbidden
33  };
34  } //namespace
35 
36  /**
37  \brief check the classifier
38  \return the number of test examples correctly classified
39 
40  \param test examples
41  \param classifier
42  */
43  template<typename Val>
44  int checkClassifier(const typename Classifier<Val>::ExamplesTrain& test, Classifier<Val>& classifier ) {
45  CheckExampleFunctor<Val> checkFunctor(classifier);
46  return static_cast<int>( std::count_if( test.begin(), test.end(), checkFunctor ) );
47  }
48 
49 
50  /** f. pomocnicza, zwraca liczbe prawidlowo zaklasyfikowanych przykladow z tcollect.
51  Przyklady testujace to <start_idx, end_idx). Reszta - to przyklady trenujace. */
52  template<typename Val>
53  int testRange(std::vector<const typename Classifier<Val>::ExampleTrain*>& tcollect, int start_idx, int end_idx, Classifier<Val>& classifier ) {
54 
55  typename Classifier<Val>::ExamplesTrain test;
56  typename Classifier<Val>::ExamplesTrain train;
57 
58  typename std::vector<const typename Classifier<Val>::ExampleTrain* >::iterator it = tcollect.begin();
59  for(int idx=0; it != tcollect.end(); ++it, ++idx ) {
60  if( idx < start_idx )
61  train.push_back( **it );
62  else if( idx < end_idx )
63  test.push_back( **it );
64  else
65  train.push_back( **it );
66  }
67 
68  classifier.reset();
69  classifier.train(train);
70  return checkClassifier( test, classifier );
71  }
72 
73  namespace {
74 
75  /** pomocniczy funktor */
76  struct ShuffleFunctor {
77  RandomInt& r_;
78  ShuffleFunctor(RandomInt& r)
79  : r_(r) { }
80  int operator()(int){ return r_(); }
81  private:
82  ShuffleFunctor& operator=(const ShuffleFunctor&); //zabronione przypisanie bo skladowe referencyjne
83  };
84 
85  } //namespace
86 
87 
88 
89  /**
90  check the classifier, return the probability of proper classification result.
91  The example set is divided on k sections (randomly), one section is the testing set,
92  the rest k-1 sections are the training set. This test is repeated k times.
93 
94  \param examples training examples (the part is randomly choosen as testing)
95  \param k num sections for cross-validation
96  \param classifier classifier
97  \return the probability of proper classification
98  */
99  template<typename Val>
100  double checkCross( const typename Classifier<Val>::ExamplesTrain& examples, int k, Classifier<Val>& classifier) {
101 
102  typedef typename Classifier<Val>::ExampleTrain ExampleTrain;
103 
104  int n = (int)examples.size();
105 
106  assert( k > 0 );
107  assert( n >= k );
108 
109  typedef std::vector<const ExampleTrain*> TrainCollection;
110  TrainCollection tcollect( n );
111  std::transform( examples.begin(), examples.end(), tcollect.begin(), & boost::lambda::_1 );
112  RandomInt gen(0, n-1);
113  ShuffleFunctor shuffleFunctor( gen );
114  std::random_shuffle( tcollect.begin(), tcollect.end(), shuffleFunctor );
115 
116  int start_idx = 0;
117  int end_idx = 0;
118 
119  int num_proper = 0; //liczba prawidlowo zaklasyfikowanych przykladow testujacych
120 
121  for(int i = 0; i < k; i++ ) {
122  start_idx = end_idx;
123  end_idx = (n * (i + 1) )/k;
124  //k-ty przedzial jest przedzialem testujacym
125  num_proper += testRange<Val>(tcollect, start_idx, end_idx, classifier );
126  }
127 
128  int num_all = static_cast<int>( tcollect.size() );
129  return num_proper /(double)num_all ;
130  }
131 
132  } //namespace ml
133 } //namespace faif
134 
135 #endif //FAIF_VALIDATOR_H_
PointAndFeature< Val, AttrIdd, InitValueId > ExampleTrain
the train example (test example and the category)
Definition: Classifier.hpp:78
Definition: Chain.h:17
virtual void train(const ExamplesTrain &)=0
learn classifier (on the collection of training examples)
int checkClassifier(const typename Classifier< Val >::ExamplesTrain &test, Classifier< Val > &classifier)
check the classifier
Definition: Validator.hpp:44
point and some feature
Definition: Point.hpp:58
inner class - examples train collection
Definition: Classifier.hpp:82
double checkCross(const typename Classifier< Val >::ExamplesTrain &examples, int k, Classifier< Val > &classifier)
Definition: Validator.hpp:100
the uniform distribution for int, in range <min,max>, uses RandomSingleton
Definition: Random.hpp:107
virtual void reset()=0
the clasiffier will have no knowledge
int testRange(std::vector< const typename Classifier< Val >::ExampleTrain * > &tcollect, int start_idx, int end_idx, Classifier< Val > &classifier)
Definition: Validator.hpp:53
the clasiffier interface
Definition: Classifier.hpp:43