1 #ifndef FAIF_CLASIFIER_HPP_ 2 #define FAIF_CLASIFIER_HPP_ 10 #include <boost/bind.hpp> 12 #include <boost/serialization/serialization.hpp> 13 #include <boost/serialization/list.hpp> 14 #include <boost/serialization/nvp.hpp> 15 #include <boost/serialization/vector.hpp> 17 #include "../Value.hpp" 18 #include "../Point.hpp" 30 return -(freq * std::log(freq));
42 template<
typename Val>
55 typedef typename Val::DomainType::ValueId
AttrIdd;
64 typedef typename Belief<Val>::Beliefs
Beliefs;
72 static Feature init() {
73 return Val::DomainType::getUnknownId();
85 AttrIdd getMajorCategory()
const;
88 double entropy()
const;
91 friend class boost::serialization::access;
93 template<
class Archive>
94 void serialize( Archive &ar,
const unsigned int file_version ){
95 ar & boost::serialization::make_nvp(
"Examples", boost::serialization::base_object< std::vector<ExampleTrain> >(*
this) );
103 Classifier(
const Domains& attr_domains,
const AttrDomain& category_domain)
104 : domains_(attr_domains), category_(category_domain) {}
115 AttrIdd
getCategoryIdd(
const AttrValue& val)
const {
return category_.find(val); }
118 virtual void reset() = 0;
124 virtual AttrIdd
getCategory(
const ExampleTest& example)
const = 0;
128 virtual Beliefs
getCategories(
const ExampleTest& example)
const = 0;
131 virtual void write(std::ostream& os)
const;
134 friend class boost::serialization::access;
136 template<
class Archive>
137 void save(Archive & ar,
const unsigned int )
const {
141 unsigned int size =
static_cast<unsigned int>(domains_.size());
142 ar & boost::serialization::make_nvp(
"ClassifierDomainsCount", size );
143 for(
typename Domains::const_iterator i = domains_.begin(); i != domains_.end(); ++i) {
144 ar & boost::serialization::make_nvp(
"ClassifierDomain", *i);
146 ar & boost::serialization::make_nvp(
"ClassifierCategory", category_);
149 template<
class Archive>
150 void load(Archive & ar,
const unsigned int ) {
152 ar & boost::serialization::make_nvp(
"ClassifierDomainsCount", size );
154 for(
unsigned int i = 0; i < size; ++i) {
156 AttrDomain& d = domains_.back();
158 ar >> boost::serialization::make_nvp(
"ClassifierDomain", d);
160 ar & boost::serialization::make_nvp(
"ClassifierCategory", category_);
163 template<
class Archive>
164 void serialize( Archive &ar,
const unsigned int file_version ){
165 boost::serialization::split_member(ar, *
this, file_version);
170 AttrDomain category_;
174 template<
typename Val>
176 os <<
"Categories(" << category_.getSize() <<
")" << category_ << std::endl;
177 os <<
"Attributes(" <<
static_cast<int>(domains_.size()) <<
"):";
178 std::copy(domains_.begin(), domains_.end(), std::ostream_iterator<AttrDomain>(os,
"") );
184 template<
typename Val>
185 std::ostream& operator<<(std::ostream& os, const Classifier<Val>& c) {
191 template<
typename It,
typename Val>
198 template<
typename It,
typename Val>
206 template<
typename Val>
214 template<
typename Val>
221 template<
typename Val>
232 template<
typename Val>
242 typedef std::map<AttrIdd,int> Counters;
249 typename ExamplesTrain::const_iterator end) : sum_(0) {
250 std::for_each( beg, end, boost::bind(&TrainExampleCategoryCounters::inc,
this, _1 ) );
254 void inc(
const ExampleTrain& e) {
257 typename Counters::iterator it = counters_.find(e.getFeature() );
258 if(it == counters_.end() ) {
259 counters_.insert( std::make_pair(e.getFeature(), 1 ) );
266 AttrIdd maxCount()
const {
267 typename Counters::const_iterator it =
268 std::max_element( counters_.begin(), counters_.end(),
269 boost::bind(&Counters::value_type::second, _1) < boost::bind(&Counters::value_type::second, _2) );
270 if(it != counters_.end() )
273 return Val::DomainType::getUnknownId();
277 const Counters&
get()
const {
return counters_; }
286 double sum =
static_cast<double>( sum_ );
287 for(
typename Counters::const_iterator i = counters_.begin(); i != counters_.end(); ++i) {
288 entr +=
calcEntropy( static_cast<double>(i->second) / sum );
299 for(
typename Counters::const_iterator i = counters_.begin(); i != counters_.end(); ++i) {
300 histogram.push_back(
typename Beliefs::value_type(i->first,
301 static_cast<Probability>(i->second) / static_cast<Probability>(sum_) ) );
303 std::sort(histogram.begin(), histogram.end());
312 template<
typename Val>
316 return counters.maxCount();
320 template<
typename Val>
329 #endif //FAIF_CLASIFIER_HPP_ Val::DomainType::ValueId AttrIdd
attribute id representation in learning
Definition: Classifier.hpp:55
PointAndFeature< Val, AttrIdd, InitValueId > ExampleTrain
the train example (test example and the category)
Definition: Classifier.hpp:78
const Domains & getAttrDomains() const
accessor
Definition: Classifier.hpp:109
virtual AttrIdd getCategory(const ExampleTest &example) const =0
classify
Val::Value AttrValue
attribute value representation in learning
Definition: Classifier.hpp:49
virtual void train(const ExamplesTrain &)=0
learn classifier (on the collection of training examples)
Definition: Classifier.hpp:233
virtual Beliefs getCategories(const ExampleTest &example) const =0
classify and return all classes with belief that the example is from each class classes are sorted fr...
virtual void write(std::ostream &os) const
Definition: Classifier.hpp:175
point and some feature
Definition: Point.hpp:58
const AttrDomain & getCategoryDomain() const
accessor
Definition: Classifier.hpp:112
Belief< Val >::Beliefs Beliefs
collection of pair (AttrIdd, Probability)
Definition: Classifier.hpp:64
inner class - examples train collection
Definition: Classifier.hpp:82
double entropy() const
entropy of counters
Definition: Classifier.hpp:283
Point in n-space, each component of the same type.
Definition: Point.hpp:22
Beliefs getHistogram() const
histogram from counters - Beliefs class where each position is counter divided by counters sum...
Definition: Classifier.hpp:297
the value concept
Definition: Value.hpp:41
Val::DomainType::ValueIdSerialize AttrIddSerialize
for serialization the const interferes
Definition: Classifier.hpp:58
double calcEntropy(double freq)
calculate x * log(x) value. If x == 0 return 0.
Definition: Classifier.hpp:28
Classifier< Val >::ExampleTest createExampleStrict(const std::vector< std::pair< std::string, typename Classifier< Val >::AttrValue > > &collection, const Classifier< Val > &classifier)
create the test example from collection of pairs: attribute(domain) identifier and attribute value...
Definition: Classifier.hpp:216
TrainExampleCategoryCounters(typename ExamplesTrain::const_iterator beg, typename ExamplesTrain::const_iterator end)
c-tor, counters initialized by train examples collection
Definition: Classifier.hpp:248
virtual void reset()=0
the clasiffier will have no knowledge
int getSum() const
optimization: instead of accumulate all values from counters container keep the integer member ...
Definition: Classifier.hpp:280
Val::DomainType AttrDomain
the attribute domain for learning
Definition: Classifier.hpp:52
TrainExampleCategoryCounters()
c-tor, empty counters
Definition: Classifier.hpp:245
Classifier< Val >::ExampleTest createExample(It begin, It end, const Classifier< Val > &classifier)
create the test example from iterator range or C-like table of values
Definition: Classifier.hpp:193
Point< Val > ExampleTest
the test example (collection of AttrIdd)
Definition: Classifier.hpp:67
AttrIdd getCategoryIdd(const AttrValue &val) const
accessor (helper)
Definition: Classifier.hpp:115
Space< AttrDomain > Domains
Definition: Classifier.hpp:61
the helping inner class to init PointAndFeature structure using getUnknownId method ...
Definition: Classifier.hpp:71
the clasiffier interface
Definition: Classifier.hpp:43
AttrIdd getMajorCategory() const
the most common category in the training example container
Definition: Classifier.hpp:314
double entropy() const
entropy of set of examples
Definition: Classifier.hpp:321