faif
NaiveBayesian.hpp
1 // The Naive Bayesian classifier
2 
3 #ifndef FAIF_NAIVE_BAYESIAN_HPP_
4 #define FAIF_NAIVE_BAYESIAN_HPP_
5 
6 #if defined(_MSC_VER) && (_MSC_VER >= 1400)
7 //msvc14.0 warnings for Boost.Serialization
8 #pragma warning(disable:4100)
9 #pragma warning(disable:4512)
10 #endif
11 
12 #include <string>
13 #include <memory>
14 #include <algorithm>
15 
16 #include <boost/bind.hpp>
17 #include <boost/serialization/split_member.hpp>
18 #include <boost/serialization/base_object.hpp>
19 #include <boost/serialization/nvp.hpp>
20 #include <boost/serialization/map.hpp>
21 
22 #include "Classifier.hpp"
23 
24 namespace faif {
25  namespace ml {
26 
27  /** \brief Naive Bayesian Classifier.
28 
29  Contains the attributes, attribute values and categories,
30  train examples, test examples and classifier methods.
31  */
32  template<typename Val>
33  class NaiveBayesian : public Classifier<Val> {
34  public:
35  typedef typename Classifier<Val>::AttrValue AttrValue;
36  typedef typename Classifier<Val>::AttrDomain AttrDomain;
37  typedef typename Classifier<Val>::AttrIdd AttrIdd;
39  typedef typename Classifier<Val>::Domains Domains;
40  typedef typename Classifier<Val>::Beliefs Beliefs;
44  public:
45  NaiveBayesian();
46  NaiveBayesian(const Domains& attr_domains, const AttrDomain& category_domain);
47  virtual ~NaiveBayesian() { }
48 
49  /** the clear the learned parameters */
50  virtual void reset();
51 
52  /** \brief learn classifier (on the collection of training examples) */
53  virtual void train(const ExamplesTrain& e) {
54  std::for_each( e.begin(), e.end(), boost::bind( &NaiveBayesian::trainIncremental, this, _1 ) );
55  }
56 
57  /** classify (Naive Bayes Classifier) */
58  virtual AttrIdd getCategory(const ExampleTest&) const;
59 
60  /** \brief classify and return all classes with belief that the example is from given class */
61  virtual Beliefs getCategories(const ExampleTest&) const;
62 
63  /** incremental learn - add training example */
64  void trainIncremental(const ExampleTrain&);
65 
66  /** the ostream method */
67  virtual void write(std::ostream& os) const;
68 
69  /** change the internal obj to classify add return result of classification */
70  AttrIdd switchGetCategory(const ExampleTest& example);
71  /** change the internal obj to classify add return result of classification */
72  Beliefs switchGetCategories(const ExampleTest& example);
73  /** change the internal obj to train and add new example */
74  void switchAddTraining(const ExampleTrain& example);
75  /** change the internal obj to classify, because this object store internal state */
76  void switchLoadSaveState();
77  private:
78  /** change the internal obj to classify if necessary */
79  void loadSaveState() const;
80 
81  /** \brief serialization using boost::serialization */
83 
84  template<class Archive>
85  void save(Archive & ar, const unsigned int /* file_version */) const;
86 
87  template<class Archive>
88  void load(Archive & ar, const unsigned int /* file_version */);
89 
90  template<class Archive>
91  void serialize( Archive &ar, const unsigned int file_version ){
92  boost::serialization::split_member(ar, *this, file_version);
93  }
94  private:
95  /** copy c-tor not allowed */
97  /** assignment not allowed */
98  NaiveBayesian& operator=(const NaiveBayesian&);
99 
100  //forward declaration
101  class NaiveBayesianTraining;
102 
103  std::unique_ptr<NaiveBayesianTraining> impl_;
104 
105  /** \brief internal class to connect category and counter or probability
106 
107  the category collection; connection between category and attribute value.
108  In learining mode used to count train examples,
109  in classifier mode used to calculate probability
110  */
111  template<class T> class CategoryData {
112  public:
113  typedef std::map<AttrIdd,T> AttrData;
114 
115  CategoryData() : data_(0), attrData_() { }
116  CategoryData(const T& d) : data_(d), attrData_() { }
117  CategoryData(const T& d, const AttrData& ad) : data_(d), attrData_(ad) { }
118  CategoryData(const CategoryData& cd) : data_(cd.data_), attrData_(cd.attrData_) { }
119  CategoryData& operator=(const CategoryData& cd) {
120  data_ = cd.data_;
121  attrData_ = cd.attrData_;
122  return *this;
123  }
124  ~CategoryData(){}
125 
126  T data_;
127  AttrData attrData_;
128  private:
129  /** \brief serialization using boost::serialization */
130  friend class boost::serialization::access;
131 
132  template<class Archive>
133  void save(Archive & ar, const unsigned int /* file_version */) const {
134  ar << boost::serialization::make_nvp("Category", data_ );
135  ar << boost::serialization::make_nvp("Data", attrData_ );
136  }
137 
138  template<class Archive>
139  void load(Archive & ar, const unsigned int /* file_version */) {
140  ar >> boost::serialization::make_nvp("Category", data_ );
141  typedef std::map<AttrIddSerialize,T> Map;
142  Map m;
143  ar >> boost::serialization::make_nvp("Data", m );
144  attrData_.clear();
145  for(typename Map::const_iterator ii = m.begin(); ii != m.end(); ++ii) {
146  //transform from loaded std::pair (with not const key) to stored std::pair is required
147  attrData_.insert(typename AttrData::value_type(ii->first, ii->second) );
148  }
149  }
150 
151  template<class Archive>
152  void serialize( Archive &ar, const unsigned int file_version ){
153  boost::serialization::split_member(ar, *this, file_version);
154  }
155  };
156 
157 
158  /**
159  inner class - the learning state of Naive Bayesian Classifier
160  */
161  class NaiveBayesianTraining {
162  public:
163  typedef NaiveBayesian Classifier;
164  /** the data connected with each category */
165  typedef std::map<AttrIdd, CategoryData<int> > CategoryCounters;
166  /** the counter */
167  typedef typename CategoryData<int>::AttrData SimpleCounters;
168 
169  /** for load/save */
170  NaiveBayesianTraining() : parent_(0L) {}
171 
172  NaiveBayesianTraining(NaiveBayesian& parent) : parent_(&parent) {}
173 
174  virtual ~NaiveBayesianTraining() {}
175 
176  /** adds the training example, actualize counters */
177  virtual void addTraining(const ExampleTrain& example);
178 
179  /** classifies the given example. Here the re-load of classifier type and re-calling the method */
180  virtual AttrIdd getCategory(const ExampleTest& example) {
181  return parent_->switchGetCategory(example);
182  }
183 
184  /** classifies the given example. Here the re-load of classifier type and re-calling the method */
185  virtual Beliefs getCategories(const ExampleTest& example) {
186  return parent_->switchGetCategories(example);
187  }
188 
189  /** the load or save state pushes re-load classifier, because internal state is stored in classify */
190  virtual void loadSaveState() {
191  parent_->switchLoadSaveState();
192  }
193 
194  /** the helping string */
195  virtual void write(std::ostream& os) const;
196 
197  /** the counter for given category */
198  int getCategoryCounter(AttrIdd cat_val) const;
199 
200  /** the counter for given category and attribute */
201  int getCategoryValCounter(AttrIdd cat_val, AttrIdd value) const;
202  protected:
203  NaiveBayesian* parent_;
204  private:
205  /** \brief serialization using boost::serialization */
206  friend class boost::serialization::access;
207  template<class Archive>
208  void serialize(Archive & ar, const unsigned int /* file_version */){
209  //state is stored in derived class NaiveBayesianTraining
210  ar & boost::serialization::make_nvp("Parent", parent_ );
211  }
212 
213  private:
214  /** the counters - for each category each non-zero occurence of attribute */
215  CategoryCounters counters_;
216  /** noncopyable */
217  NaiveBayesianTraining(const NaiveBayesianTraining&);
218  /** noncopyable */
219  NaiveBayesianTraining& operator=(const NaiveBayesianTraining&);
220  };
221 
222 
223  /** inner class - the classify state of Naive Bayesian Classifier */
224  class NaiveBayesianClasify : public NaiveBayesianTraining {
225  public:
226  typedef typename CategoryData<Probability>::AttrData Counters;
227 
228  typedef std::map<AttrIdd, CategoryData<Probability> > InternalProbabilities;
229 
230  //for load/save
231  NaiveBayesianClasify() {}
232 
233  NaiveBayesianClasify(NaiveBayesian& parent, const NaiveBayesianTraining& nb_train) : NaiveBayesianTraining(parent) {
234  //calculate probabilities
235  calculate(nb_train);
236  }
237 
238  virtual ~NaiveBayesianClasify() {}
239 
240  /** adds the training example */
241  virtual void addTraining(const ExampleTrain& example) {
242  this->parent_->switchAddTraining(example);
243  }
244 
245  /** classifies the given example. Using Naive Bayesian approach */
246  virtual AttrIdd getCategory(const ExampleTest& example);
247 
248  /** classifies the given example. Using Naive Bayesian approach, return the AttrIdd and belief pairs */
249  virtual Beliefs getCategories(const ExampleTest& example);
250 
251  /** the load or save state pushes re-load classifier, because internal state is stored in classify */
252  virtual void loadSaveState() {
253  //empty operation in this context
254  }
255 
256  /** the helping string */
257  virtual void write(std::ostream& os) const;
258 
259  /** the probability for given category */
260  Probability getCategoryCounter(AttrIdd cat_val) const;
261  /** the log-probability for given category and attribute */
262  Probability getCategoryCounterLog(AttrIdd cat_val) const;
263  /** the probability for given category and attribute */
264  Probability getCategoryValCounter(AttrIdd cat_val, AttrIdd value) const;
265  /** the log-probability for given category and attribute */
266  Probability getCategoryValCounterLog(AttrIdd cat_val, AttrIdd value) const;
267  private:
268  /** \brief serialization using boost::serialization */
269  friend class boost::serialization::access;
270 
271  template<class Archive>
272  void save(Archive & ar, const unsigned int /* file_version */) const;
273 
274  template<class Archive>
275  void load(Archive & ar, const unsigned int /* file_version */);
276 
277  template<class Archive>
278  void serialize( Archive &ar, const unsigned int file_version ){
279  boost::serialization::split_member(ar, *this, file_version);
280  }
281  private:
282  /** the internal probabilities */
283  InternalProbabilities probabl_;
284  /** calculate the probabilities */
285  void calculate(const NaiveBayesianTraining& nb_train);
286  /** calculate log-probability for given example and category */
287  Probability calcProbabilityForExample(const ExampleTest& example, AttrIdd cat_val) const;
288 
289  /** noncopyable */
290  NaiveBayesianClasify(const NaiveBayesianClasify&);
291  /** noncopyable */
292  NaiveBayesianClasify& operator=(const NaiveBayesianClasify&);
293  };
294 
295 
296  }; //class NaiveBayesian
297 
298  //////////////////////////////////////////////////////////////////////////////////////////////////
299  // class NaiveBayesian implementation
300  //////////////////////////////////////////////////////////////////////////////////////////////////
301 
302  template<typename Val>
303  NaiveBayesian<Val>::NaiveBayesian() : Classifier<Val>()
304  {
305  impl_.reset( new NaiveBayesianTraining(*this) );
306  }
307 
308  template<typename Val>
309  NaiveBayesian<Val>::NaiveBayesian(const Domains& attr_domains, const AttrDomain& category_domain)
310  : Classifier<Val>(attr_domains, category_domain)
311  {
312  impl_.reset( new NaiveBayesianTraining(*this) );
313  }
314 
315  /** the clear the learned parameters */
316  template<typename Val>
318  impl_.reset( new NaiveBayesianTraining(*this) );
319  }
320 
321  /** classify (Naive Bayes Classifier) */
322  template<typename Val>
323  typename NaiveBayesian<Val>::AttrIdd
324  NaiveBayesian<Val>::getCategory(const ExampleTest& example) const {
325  return impl_->getCategory(example);
326  }
327 
328  /** \brief classify and return all classes with belief that the example is from each class*/
329  template<typename Val>
330  typename NaiveBayesian<Val>::Beliefs
331  NaiveBayesian<Val>::getCategories(const ExampleTest& example) const {
332  return impl_->getCategories(example);
333  }
334 
335  /** incremental learn - add training example */
336  template<typename Val>
337  void NaiveBayesian<Val>::trainIncremental(const ExampleTrain& example) {
338  impl_->addTraining(example);
339  }
340 
341  /** ostraem method */
342  template<typename Val>
343  void NaiveBayesian<Val>::write(std::ostream& os) const {
345  os << std::endl << "State: ";
346  impl_->write(os);
347  os << std::endl;
348  }
349 
350  /** the internal state - load */
351  template<typename Val>
352  void NaiveBayesian<Val>::loadSaveState() const {
353  impl_->loadSaveState();
354  }
355 
356  /** change the internal obj to classify add return result of classification */
357  template<typename Val>
358  typename NaiveBayesian<Val>::AttrIdd
359  NaiveBayesian<Val>::switchGetCategory(const ExampleTest& example) {
360  NaiveBayesianClasify* classify = new NaiveBayesianClasify(*this, *impl_.get());
361  impl_.reset( classify );
362  return getCategory(example); //re-call the method
363  }
364 
365  /** change the internal obj to classify add return result of classification */
366  template<typename Val>
367  typename NaiveBayesian<Val>::Beliefs
368  NaiveBayesian<Val>::switchGetCategories(const ExampleTest& example) {
369  NaiveBayesianClasify* classify = new NaiveBayesianClasify(*this, *impl_.get());
370  impl_.reset( classify );
371  return getCategories(example); //re-call the method
372  }
373 
374  /** change the internal obj to train, clear the train, and add new example */
375  template<typename Val>
376  void NaiveBayesian<Val>::switchAddTraining(const ExampleTrain& example) {
377  reset();
378  trainIncremental(example); //re-call the method
379  }
380 
381  /** change the internal obj to classify and return the internal state */
382  template<typename Val>
384  NaiveBayesianClasify* classify = new NaiveBayesianClasify(*this, *impl_.get());
385  impl_.reset( classify );
386  }
387 
388 
389  template<typename Val>
390  template<class Archive>
391  void NaiveBayesian<Val>::save(Archive & ar, const unsigned int /* file_version */) const {
392  ar.template register_type<NaiveBayesianClasify>();
393  ar << boost::serialization::make_nvp("NBCBase", boost::serialization::base_object<Classifier<Val> >(*this) );
394  loadSaveState(); //change state to NaiveBayesianClassify, because only there are counters
395  const NaiveBayesianTraining* const t = impl_.get();
396  ar << boost::serialization::make_nvp("NBCImpl",t); //only raw pointer is stored
397  }
398 
399  template<typename Val>
400  template<class Archive>
401  void NaiveBayesian<Val>::load(Archive & ar, const unsigned int /* file_version */) {
402  ar.template register_type<NaiveBayesianClasify>();
403  ar >> boost::serialization::make_nvp("NBCBase", boost::serialization::base_object<Classifier<Val> >(*this) );
404  NaiveBayesianTraining* t;
405  ar >> boost::serialization::make_nvp("NBCImpl",t); //restore raw pointer
406  impl_.reset(t);
407  }
408 
409  //////////////////////////////////////////////////////////////////////////////////////////////////
410  // helping classes to print counters
411  // used by NaiveBayesian::NaiveBayesianTraining and NaiveBayesian::NaiveBayesianClassify
412  //////////////////////////////////////////////////////////////////////////////////////////////////
413 
414  /** \brief class to show the classifier state, print the attribs and counters */
415  template<class Categories>
417 
418  typedef typename Categories::Classifier Cl;
419  typedef typename Cl::AttrIdd AttrIdd;
420  typedef typename Cl::AttrDomain AttrDomain;
421  typedef typename Cl::Domains Domains;
422  typedef typename Cl::ExamplesTrain ExamplesTrain;
423 
424  std::ostream& os_;
425  AttrIdd catVal_;
426  const Categories& categories_;
427 
428  PrintCountAttrFunctor(std::ostream& os, AttrIdd cat_val, const Categories& categories)
429  : os_(os), catVal_(cat_val), categories_(categories) {
430  }
431  void operator()(const AttrDomain& attr) {
432  for(typename AttrDomain::const_iterator ii = attr.begin(); ii != attr.end(); ++ii ) {
433  AttrIdd val = &(*ii);
434  // drukuje licznik dla danej wartosci w danej kategorii
435  os_ << val->get() << "(" << categories_.getCategoryValCounter(catVal_, val) << "),";// << " addr:" << val << ' ';
436  }
437  os_ << std::endl;
438  }
439  private:
440  PrintCountAttrFunctor& operator=(const PrintCountAttrFunctor&); //not allwed because references
441  };
442 
443  /** \brief print the cauters for given category */
444  template<class Categories>
446 
447  typedef typename Categories::Classifier Cl;
448  typedef typename Cl::AttrIdd AttrIdd;
449  typedef typename Cl::AttrDomain AttrDomain;
450  typedef typename Cl::Domains Domains;
451  typedef typename Cl::ExamplesTrain ExamplesTrain;
452  typedef typename Cl::Value Value;
453 
454  std::ostream& os_;
455  const Domains& attributes_;
456  const Categories& categories_;
457 
458  PrintCountersFunctor( std::ostream& os, const Domains& attrib, const Categories& categories)
459  : os_(os), attributes_(attrib), categories_(categories) {
460  }
461 
462  //void operator()(const ValueNominalString& cat_val) {
463  void operator()(const Value& cat_val) {
464  os_ << cat_val << "(" << categories_.getCategoryCounter(&cat_val) << "):" << std::endl;
465  PrintCountAttrFunctor<Categories> printCountAttr(os_, &cat_val, categories_);
466  std::for_each( attributes_.begin(), attributes_.end(), printCountAttr );
467  }
468  private:
469  PrintCountersFunctor& operator=(const PrintCountersFunctor&); //zabronione przypisanie bo skladowe referencyjne
470  };
471 
472  //////////////////////////////////////////////////////////////////////////////////////////////////
473  // class NaiveBayesian::NaiveBayesianTraining implementation
474  //////////////////////////////////////////////////////////////////////////////////////////////////
475 
476  /** adds the training example, actualize counters */
477  template<typename Val>
478  void NaiveBayesian<Val>::NaiveBayesianTraining::addTraining(const ExampleTrain& example) {
479  AttrIdd cat_val = example.getFeature();
480  typename CategoryCounters::iterator ii = counters_.find(cat_val);
481  if( ii != counters_.end() )
482  ++(*ii).second.data_;
483  else
484  ii = counters_.insert( typename CategoryCounters::value_type(cat_val,1) ).first;
485  assert( ii != counters_.end() );
486  SimpleCounters& count = (*ii).second.attrData_;
487  for(typename ExampleTrain::const_iterator i = example.begin(); i != example.end(); ++i ) {
488  AttrIdd value = *i;
489  typename SimpleCounters::iterator iii = count.find(value);
490  if( iii != count.end() )
491  ++(*iii).second; //zwieksza odpowiedni licznik
492  else
493  count.insert( std::pair<AttrIdd,int>(value,1) );
494  }
495  }
496 
497  /** ostream method */
498  template<typename Val>
499  void NaiveBayesian<Val>::NaiveBayesianTraining::write(std::ostream& os) const {
500  os << "TRAINING:" << std::endl;
501  PrintCountersFunctor<NaiveBayesianTraining> printCounters(os, parent_->getAttrDomains(), *this );
502  std::for_each(parent_->getCategoryDomain().begin(), parent_->getCategoryDomain().end(), printCounters );
503 
504  }
505 
506  /** the counter for given category */
507  template<typename Val>
509  typename CategoryCounters::const_iterator ii = counters_.find(cat_val);
510  if( ii != counters_.end() )
511  return (*ii).second.data_;
512  else
513  return 0;
514  }
515 
516  /** the counter for given category and attribute */
517  template<typename Val>
518  int NaiveBayesian<Val>::NaiveBayesianTraining::getCategoryValCounter(AttrIdd cat_val, AttrIdd value) const {
519  typename CategoryCounters::const_iterator ii = counters_.find(cat_val);
520  if( ii == counters_.end() )
521  return 0;
522  const SimpleCounters& count = (*ii).second.attrData_;
523  //przeszukuje dana kategorie;
524  typename SimpleCounters::const_iterator jj = count.find(value);
525  if( jj != count.end() )
526  return (*jj).second;
527  else
528  return 0;
529  }
530 
531  //////////////////////////////////////////////////////////////////////////////////////////////////
532  // class NaiveBayesian::NaiveBayesianClasify implementation
533  //////////////////////////////////////////////////////////////////////////////////////////////////
534 
535  /** classifies the given example. Using Naive Bayesian approach */
536  template<typename Val>
537  typename NaiveBayesian<Val>::AttrIdd
538  NaiveBayesian<Val>::NaiveBayesianClasify::getCategory(const ExampleTest& example) {
539  if( probabl_.empty() )
540  return AttrDomain::getUnknownId();
541 
542  AttrIdd cat_val_max = probabl_.begin()->first; //init not important
543  Probability max_prob = -std::numeric_limits<Probability>::max();
544  //look the categories and find the max prob of category for given example (compares the log of probability)
545  for(typename InternalProbabilities::const_iterator ii = probabl_.begin(); ii != probabl_.end(); ++ii ) {
546  AttrIdd cat_val = (*ii).first;
547  Probability prob = calcProbabilityForExample(example, cat_val);
548  if( prob > max_prob ) {
549  max_prob = prob;
550  cat_val_max = cat_val;
551  }
552  }
553  return cat_val_max;
554  }
555 
556  /** classifies the given example. Using Naive Bayesian approach, return the AttrIdd and belief pairs */
557  template<typename Val>
558  typename NaiveBayesian<Val>::Beliefs
559  NaiveBayesian<Val>::NaiveBayesianClasify::getCategories(const ExampleTest& example) {
560  Probability sum = 0.0;
561  for(typename InternalProbabilities::const_iterator ii = probabl_.begin(); ii != probabl_.end(); ++ii ) {
562  sum += std::exp( calcProbabilityForExample(example, (*ii).first ) );
563  }
564 
565  Beliefs toRet;
566  for(typename InternalProbabilities::const_iterator ii = probabl_.begin(); ii != probabl_.end(); ++ii ) {
567  AttrIdd cat_val = (*ii).first;
568  Probability prob = exp( calcProbabilityForExample(example, cat_val) ) / sum;
569  toRet.push_back(typename Beliefs::value_type(cat_val, prob));
570  }
571  std::sort( toRet.begin(), toRet.end() );
572  return toRet;
573  }
574 
575  /** ostream method */
576  template<typename Val>
577  void NaiveBayesian<Val>::NaiveBayesianClasify::write(std::ostream& os) const {
578  os << "CLASIFY:" << std::endl;
579  PrintCountersFunctor<NaiveBayesianClasify> printCounters(os, this->parent_->getAttrDomains(), *this );
580  std::for_each(this->parent_->getCategoryDomain().begin(), this->parent_->getCategoryDomain().end(), printCounters );
581  }
582 
583  /** the probability for given category */
584  template<typename Val>
585  Probability NaiveBayesian<Val>::NaiveBayesianClasify::getCategoryCounter(AttrIdd cat_val) const {
586  return std::exp( getCategoryCounterLog(cat_val) );
587  }
588 
589  /** the log-probability for given category and attribute */
590  template<typename Val>
591  Probability NaiveBayesian<Val>::NaiveBayesianClasify::getCategoryCounterLog(AttrIdd cat_val) const {
592  typename InternalProbabilities::const_iterator ii = probabl_.find(cat_val);
593  if( ii != probabl_.end() )
594  return (*ii).second.data_;
595  else
596  return 0.0;
597  }
598 
599  /** the probability for given category and attribute */
600  template<typename Val>
601  Probability NaiveBayesian<Val>::NaiveBayesianClasify::getCategoryValCounter(AttrIdd cat_val, AttrIdd value) const {
602  return exp( getCategoryValCounterLog(cat_val,value) );
603  }
604 
605  /** the log-probability for given category and attribute */
606  template<typename Val>
607  Probability NaiveBayesian<Val>::NaiveBayesianClasify::getCategoryValCounterLog(AttrIdd cat_val, AttrIdd value) const {
608  typename InternalProbabilities::const_iterator ii = probabl_.find(cat_val);
609  if( ii == probabl_.end() )
610  return 0.0;
611 
612  const Counters& counters = (*ii).second.attrData_;
613  //przeszukuje dana kategorie;
614  typename Counters::const_iterator jj = counters.find(value);
615  if( jj != counters.end() )
616  return (*jj).second;
617  else
618  return 0.0;
619  }
620 
621  namespace {
622  /** helping - calculates the probability of given value, laplace smoothing (m-szacowanie) and log */
623  Probability calcProbability( int val_count, int count_all, int val_size) {
624  return std::log((val_count + 1)/ (Probability)(count_all + val_size ));
625  }
626 
627  } //namespace
628 
629 
630  /** calculate the probabilities */
631  template<typename Val>
632  void NaiveBayesian<Val>::NaiveBayesianClasify::calculate(const NaiveBayesianTraining& nb_train) {
633 
634  //calculates the sum of category counters
635  int sumTraining = 0;
636  const AttrDomain& category = this->parent_->getCategoryDomain();
637  for(typename AttrDomain::const_iterator ii = category.begin(); ii != category.end(); ++ii ) {
638  AttrIdd catVal = AttrDomain::getValueId(ii);
639  sumTraining += nb_train.getCategoryCounter(catVal);
640  }
641 
642  //calculate probability fo each attribute value
643  int catSize = category.getSize();
644  for(typename AttrDomain::const_iterator ii = category.begin(); ii != category.end(); ++ii ) {
645  AttrIdd catVal = AttrDomain::getValueId(ii);
646  int catCounter = nb_train.getCategoryCounter(catVal);
647  Probability catProb = calcProbability( catCounter, catSize, sumTraining );
648  Counters counters; //empty counters
649  const Domains& attribs = this->parent_->getAttrDomains();
650  for(typename Domains::const_iterator jj = attribs.begin(); jj != attribs.end(); ++jj) {
651  const AttrDomain& attr = *jj;
652  int valSize = attr.getSize(); //number values for given attribute (for Laplace' smoothing)
653  for(typename AttrDomain::const_iterator kk = attr.begin(); kk != attr.end(); ++kk) {
654  AttrIdd val = AttrDomain::getValueId(kk);
655  Probability valProb = calcProbability( nb_train.getCategoryValCounter(catVal, val), valSize, catCounter );
656  counters.insert( typename Counters::value_type(val, valProb) );
657  }
658  }
659  probabl_.insert( typename InternalProbabilities::value_type(catVal, CategoryData<Probability>(catProb, counters) ) );
660  }
661  }
662 
663  /** calculate log-probability for given example and category */
664  template<typename Val>
665  Probability NaiveBayesian<Val>::NaiveBayesianClasify::calcProbabilityForExample(const ExampleTest& example, AttrIdd cat_val) const {
666  Probability prob = getCategoryCounterLog(cat_val);
667  for(typename ExampleTest::const_iterator ii = example.begin(); ii != example.end(); ++ii )
668  prob += getCategoryValCounterLog(cat_val, *ii );
669  return prob;
670  }
671 
672  /** \brief serialization using boost::serialization */
673  template<typename Val>
674  template<class Archive>
675  void NaiveBayesian<Val>::NaiveBayesianClasify::save(Archive & ar, const unsigned int /* file_version */) const {
676  ar << boost::serialization::make_nvp("Base", boost::serialization::base_object<NaiveBayesianTraining>(*this));
677  ar << boost::serialization::make_nvp("InternalProb",probabl_);
678  }
679 
680  /** \brief serialization using boost::serialization */
681  template<typename Val>
682  template<class Archive>
683  void NaiveBayesian<Val>::NaiveBayesianClasify::load(Archive & ar, const unsigned int /* file_version */) {
684  ar >> boost::serialization::make_nvp("Base", boost::serialization::base_object<NaiveBayesianTraining>(*this));
685  typedef std::map<AttrIddSerialize, CategoryData<Probability> > Map;
686  Map m;
687  ar >> boost::serialization::make_nvp("InternalProb",m);
688  probabl_.clear();
689  for(typename Map::const_iterator ii = m.begin(); ii != m.end(); ++ii) {
690  probabl_.insert( typename InternalProbabilities::value_type(ii->first, ii->second) );
691  }
692  }
693 
694  } //namespace ml
695 } //namespace faif
696 
697 #endif //FAIF_NAIVE_BAYESIAN_HPP_
Val::DomainType::ValueId AttrIdd
attribute id representation in learning
Definition: Classifier.hpp:55
void switchAddTraining(const ExampleTrain &example)
Definition: NaiveBayesian.hpp:376
Definition: Chain.h:17
virtual Beliefs getCategories(const ExampleTest &) const
classify and return all classes with belief that the example is from given class
Definition: NaiveBayesian.hpp:331
AttrIdd switchGetCategory(const ExampleTest &example)
Definition: NaiveBayesian.hpp:359
Val::Value AttrValue
attribute value representation in learning
Definition: Classifier.hpp:49
void trainIncremental(const ExampleTrain &)
Definition: NaiveBayesian.hpp:337
virtual void train(const ExamplesTrain &e)
learn classifier (on the collection of training examples)
Definition: NaiveBayesian.hpp:53
Naive Bayesian Classifier.
Definition: NaiveBayesian.hpp:33
class to show the classifier state, print the attribs and counters
Definition: NaiveBayesian.hpp:416
virtual void reset()
Definition: NaiveBayesian.hpp:317
friend class boost::serialization::access
serialization using boost::serialization
Definition: NaiveBayesian.hpp:82
virtual void write(std::ostream &os) const
Definition: Classifier.hpp:175
point and some feature
Definition: Point.hpp:58
const AttrDomain & getCategoryDomain() const
accessor
Definition: Classifier.hpp:112
Belief< Val >::Beliefs Beliefs
collection of pair (AttrIdd, Probability)
Definition: Classifier.hpp:64
inner class - examples train collection
Definition: Classifier.hpp:82
void switchLoadSaveState()
Definition: NaiveBayesian.hpp:383
virtual void write(std::ostream &os) const
Definition: NaiveBayesian.hpp:343
Point in n-space, each component of the same type.
Definition: Point.hpp:22
Val::DomainType::ValueIdSerialize AttrIddSerialize
for serialization the const interferes
Definition: Classifier.hpp:58
virtual AttrIdd getCategory(const ExampleTest &) const
Definition: NaiveBayesian.hpp:324
Beliefs switchGetCategories(const ExampleTest &example)
Definition: NaiveBayesian.hpp:368
Val::DomainType AttrDomain
the attribute domain for learning
Definition: Classifier.hpp:52
the clasiffier interface
Definition: Classifier.hpp:43
print the cauters for given category
Definition: NaiveBayesian.hpp:445