faif
Discretizer.hpp
1 #ifndef FAIF_TS_DISCRETIZERS
2 #define FAIF_TS_DISCRETIZERS
3 
4 #if defined(_MSC_VER) && (_MSC_VER >= 1700)
5 //msvc14.0 warnings for Boost.Serialization
6 #pragma warning(disable:4100)
7 #pragma warning(disable:4512)
8 #endif
9 
10 #include <ostream>
11 
12 #include <vector>
13 #include <algorithm>
14 #include <numeric>
15 #include <iterator>
16 #include <cmath>
17 
18 #include <boost/concept_check.hpp>
19 #include <boost/scoped_array.hpp>
20 #include <boost/bind.hpp>
21 #include <boost/algorithm/minmax_element.hpp>
22 #include <boost/lambda/bind.hpp>
23 #include <boost/lambda/construct.hpp>
24 #include <boost/lambda/core.hpp>
25 #include <boost/lambda/lambda.hpp>
26 
27 #include <boost/serialization/serialization.hpp>
28 #include <boost/serialization/access.hpp>
29 #include <boost/serialization/base_object.hpp>
30 #include <boost/serialization/nvp.hpp>
31 #include <boost/serialization/utility.hpp>
32 #include <boost/serialization/vector.hpp>
33 
34 namespace faif {
35  namespace timeseries {
36 
37  /** The section representation */
38  template<typename V> class Section : public std::pair<V, V> {
39 
40  BOOST_CONCEPT_ASSERT((boost::EqualityComparable<V>));
41 
42  public:
43  Section() {} //!< required by serialization
44  Section(V min_val, V max_val) : std::pair<V, V>(min_val, max_val) { }
45  V getMin() const { return this->first; }
46  V getMax() const { return this->second; }
47  bool operator==(const Section& s) const { return this->first == s.first && this->second == s.second; }
48 
49  /** \brief serialization using boost::serialization */
50  template<class Archive>
51  void serialize(Archive & ar, const unsigned int /* file_version */){
52  ar & boost::serialization::make_nvp("Base",
53  boost::serialization::base_object<std::pair<V,V> >(*this)
54  );
55  }
56 
57  };
58 
59  template<typename V>
60  inline std::ostream& operator<<(std::ostream& os, const Section<V>& s) {
61  os << "(Min: " << s.getMin() << ", Max:" << s.getMax() << ")";
62  return os;
63  }
64 
65  /** discretizer - collection of sections */
66  template<typename V> class Discretizer : public std::vector<Section<V> > {
67  public:
68  /** calculete the class for given sections */
69  int discretize(V value) const {
70  /** sections are sorted */
71  typename Discretizer<V>::const_iterator it =
72  std::lower_bound( this->begin(), this->end(), value,
73  boost::bind(std::less<V>(), boost::bind(&Section<V>::getMax, _1), _2 ) );
74  if( it == this->end() )
75  return static_cast<int>(this->size()) - 1; //the last class
76  else
77  return static_cast<int>(it - this->begin()); //return the index of founded class
78  }
79 
80 
81  /** \brief serialization using boost::serialization */
82  template<class Archive>
83  void serialize(Archive & ar, const unsigned int /* file_version */){
84  ar & boost::serialization::make_nvp("Base",
85  boost::serialization::base_object<std::vector<Section<V> > >(*this)
86  );
87  }
88  };
89 
90  template<typename V>
91  inline std::ostream& operator<<(std::ostream& os, const Discretizer<V>& d) {
92  std::copy(d.begin(), d.end(), std::ostream_iterator< Section<V> >(os, ",") );
93  return os;
94  }
95 
96  //** creates the discretizer - sections of equal width from sample set of values */
97  template<typename It>
98  Discretizer<typename It::value_type> createEqualWidthSections(It begin, It end, unsigned int num_sections) {
99 
100  BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<It>));
101 
102  typedef typename It::value_type Value;
103  Discretizer<Value> ret;
104  if( begin == end || num_sections == 0)
105  return ret;
106  std::pair<It, It> mm = boost::minmax_element(begin, end );
107  Value length = (*(mm.second) - *(mm.first)) / static_cast<Value>(num_sections);
108  Value curr_min = *(mm.first) - length; //the current value - to generator
109  Value curr_max = *(mm.first); //the current value - to generator
110  typename boost::lambda::var_type<Value>::type vmin(boost::lambda::var(curr_min));
111  typename boost::lambda::var_type<Value>::type vmax(boost::lambda::var(curr_max));
112  std::generate_n(back_inserter(ret), num_sections,
113  boost::lambda::bind(boost::lambda::constructor< Section<Value> >(),
114  vmin += boost::lambda::constant(length),
115  vmax += boost::lambda::constant(length)));
116  return ret;
117  }
118 
119 
120  /** make the discretizer based on sequence of values- use k-means method */
121  template<typename It>
122  Discretizer<typename It::value_type> createKMeansSections( It begin, It end, const unsigned int num_sections ) {
123 
124  BOOST_CONCEPT_ASSERT((boost::ForwardIterator<It>));
125 
126  typedef typename It::value_type Value;
127  typedef typename std::vector<Value> ValVect;
128 
129  if( begin == end || num_sections == 0)
130  return Discretizer<Value>();
131  ValVect values(begin, end);
132  std::sort(values.begin(), values.end() ); //working copy, sorted vector of values
133  const Value& minValue = values.front();
134  const Value& maxValue = values.back();
135  //initialisation - equal lenghth sections
136  Discretizer<Value> current = createEqualWidthSections(values.begin(), values.end(), num_sections );
137 
138  const int MAX_NUMBER_OF_ITERATIONS = 100;
139  const Value MEANS_NOT_CHANGE_EPSILON = 0.001*(maxValue - minValue); //distance between means that alows to stop algorithm
140 
141  for(int i = 0; i < MAX_NUMBER_OF_ITERATIONS; ++i ) { //k-means algorithm
142 
143  // std::cout << "Iteration " << i << " Disc " << current << std::endl;
144 
145  //discretize using 'current'
146  boost::scoped_array< ValVect > result( new ValVect[num_sections] ); //results of discretizers
147 
148  for(typename ValVect::const_iterator it = values.begin(); it != values.end(); ++it) {
149  result[current.discretize(*it)].push_back(*it);
150  }
151  //create new central points
152  boost::scoped_array<Value> means( new Value[num_sections] );
153  Value distance = 0.0; //the sum of distances between new central points (means) and the old central points
154  const ValVect* disRes = result.get(); //iterator
155  Value* mu = means.get(); //iterator
156  for(typename Discretizer<Value>::const_iterator sec = current.begin(); sec != current.end(); ++sec ) {
157  Value secMean = ( sec->getMax() + sec->getMin() ) / 2.0; //central point of current section
158  if(disRes->empty() ) {
159  *mu = secMean; //if no result examples -> central point is not changed
160  }
161  else {
162  Value acc = std::accumulate(disRes->begin(), disRes->end(), 0.0);
163  Value m = acc / static_cast<Value>(disRes->size());
164  distance += std::fabs( secMean - m );
165  *mu = m;
166  }
167  ++disRes;
168  ++mu;
169  }
170  if(distance < MEANS_NOT_CHANGE_EPSILON) //central points are not changed enough
171  break;
172 
173  Discretizer<Value> newDisc; //create new discretizer
174  Value secStart = minValue;
175  for(unsigned int j = 0; j < num_sections - 1; ++j ) {
176  Value secStop = (means[j]+means[j+1])/2.0;
177  newDisc.push_back( Section<Value>(secStart, secStop) );
178  secStart = secStop;
179  }
180  newDisc.push_back( Section<Value>(secStart, maxValue) );
181  current = newDisc;
182  }
183  return current;
184  }
185 
186  } //namespace timeseries
187 } //namespace faif
188 
189 #endif //FAIF_TS_DISCRETIZERS
Definition: Chain.h:17
Definition: Discretizer.hpp:66
Definition: Discretizer.hpp:38
double Value
value - real number
Definition: TimeSeries.hpp:49
int discretize(V value) const
Definition: Discretizer.hpp:69
void serialize(Archive &ar, const unsigned int)
serialization using boost::serialization
Definition: Discretizer.hpp:83
Section()
required by serialization
Definition: Discretizer.hpp:43
void serialize(Archive &ar, const unsigned int)
serialization using boost::serialization
Definition: Discretizer.hpp:51
Discretizer< typename It::value_type > createKMeansSections(It begin, It end, const unsigned int num_sections)
Definition: Discretizer.hpp:122