faif
Predictions.hpp
1 #ifndef FAIF_TS_PREDICTIONS
2 #define FAIF_TS_PREDICTIONS
3 
4 // file with predictions -> calculates the digit time series based on history
5 
6 #include <ostream>
7 #include <vector>
8 
9 #include <boost/noncopyable.hpp>
10 #include <boost/tuple/tuple.hpp>
11 
12 #include "TimeSeries.hpp"
13 
14 namespace faif {
15  namespace timeseries {
16 
17  /* forward declaration */
18  class PredictionVisitor;
19 
20  /** base class to comp block */
21  class Prediction : boost::noncopyable {
22  public:
23  Prediction(const TimeSeriesDigit& history) : history_(history), default_(0, 0.0) { }
24 
25  virtual ~Prediction(){ }
26 
27  /** accessor */
28  const TimeSeriesDigit& getHistory() const { return history_; }
29 
30  /** the visitor pattern for Prediction hierarchy */
31  virtual void accept(PredictionVisitor& v) const = 0;
32 
33  /** find the value for t (the lower_bound) */
35  if( history_.size() == 0)
36  return default_;
37 
38  //TODO: there is workaroud of msvc 9.0 debug (no lower_bound template with different types)
39  TimeValueDigit v(t,0.0); //TODO: temporary object, workaround of bug in msvc 9.0 debug
40 
41  TimeSeriesDigit::const_iterator it =
42  std::lower_bound( history_.begin(), history_.end(), v,
43  boost::bind(&TimeValueDigit::getTime, _1) < boost::bind(&TimeValueDigit::getTime, _2) );
44 
45  if( it != history_.end() ) {
46  return *it;
47  }
48  else {
49  return history_.back(); //history is not empty (it was checked before)
50  }
51 
52  }
53 
54  /** calculate the prediction for period <from, to>,
55  values for negative timestamp are readed from the history
56  */
58  if(from > to)
59  throw PredictionRangeException(from, to); //bad range
60 
61  TimeSeriesDigit out;
62 
63  DigitTime history_last = std::min(to + 1, 0);
64 
65  //copy history (if needed)
66  for(DigitTime t = from; t < history_last; ++t)
67  out.push_back( getHistoricalValue(t) );
68 
69  //calculate prediction (if needed)
70  if(to >= 0) {
71  TimeSeriesDigit pred = doCalcPrediction(to);
72  TimeSeriesDigit::const_iterator first = pred.begin();
73  if(from > 0)
74  advance(first, from);
75  for(;first != pred.end(); ++first)
76  out.push_back( *first );
77  }
78  return out;
79  }
80  private:
81  TimeSeriesDigit history_; // \brief historic data
82  TimeValueDigit default_; //default value
83 
84  /** calculate the prediction for f(t), f(t+1), ... f(t+n), calculates colection of probes.
85  It isa always prediction, because the period is <0,n> */
86  virtual TimeSeriesDigit doCalcPrediction(DigitTime n) = 0;
87  };
88 
89  /* forward declaration */
90  class PredictionAR;
91  /* forward declaration */
92  class PredictionKNN;
93 
94  /** base visitor of prediction hierarchy */
96  public:
97  virtual void visit(const PredictionAR& v) = 0;
98  virtual void visit(const PredictionKNN& v) = 0;
99  };
100 
101 
102  /**
103  \brief the AR parameter collection
104  f(t) = a[0]f(t-1) + a[1]f(t-2) + ... a[n-1]f(t-n)
105  */
106  typedef std::vector<double> ARDef;
107 
108  /**
109  \brief the KNN parameters, k = num_neighbours, ref_size = size of reference block
110  */
111  struct KNNDef : public boost::tuple<int,size_t> {
112 
113  /** c-tor */
114  KNNDef(int k, size_t ref_size) : boost::tuple<int,size_t>(k, ref_size) {}
115  /** accessor */
116  int getK() const { return get<0>(); }
117  /** accessor */
118  size_t getRefSize() const { return get<1>(); }
119  };
120 
121 
122  /** the auto-regressive (AR) computation block
123 
124  f(t+1) = a(t)f(t) + a(t-1)f(t-1) + ... + a(t-n+1)f(t-n+1)
125  or
126  f(t+1) = a[0]f(t) + a[1]f(t-1) + ... a[n-1]f(t-n+1)
127  */
128  class PredictionAR : public Prediction {
129  public:
130  /** c-tor */
131  PredictionAR(const TimeSeriesDigit& history, const ARDef& definition)
132  : Prediction(history), definition_(definition)
133  { }
134 
135  /** d-tor */
136  virtual ~PredictionAR() {}
137 
138  /** visitor pattern */
139  virtual void accept(PredictionVisitor& v) const { v.visit(*this); }
140 
141  /** accessor */
142  const ARDef& getARDef() const { return definition_; }
143  private:
144  ARDef definition_; //opis modelu auto-regresywnego (wspolczynniki)
145  TimeSeriesDigit predictions_; //the prediction
146 
147  /** return the TimeValue for time t. The ranges are not checked (if bad -- assertion) */
148  const TimeValueDigit& get(DigitTime t) const {
149  if(t < 0) {//find in history
150  return getHistoricalValue(t);
151  }
152  else { //return from prediction
153  assert( t < static_cast<int>(predictions_.size()) );
154  return predictions_[t];
155  }
156  }
157 
158  /** prediction calculated by AR (n >= 0) */
159  TimeValueDigit calculateAR(DigitTime time) const {
160  Value val = 0.0;
161  for(DigitTime i=0;i<static_cast<int>(definition_.size());++i) {
162  val += definition_[i] * get(time - i - 1).getValue();
163  }
164  //cout << "calculate AR t=" << time << " val = " << val << endl;
165  return TimeValueDigit(time, val);
166  }
167 
168  /** calculate the prediction for f(t), f(t+1), ... f(t+n), calculates colection of probes */
169  virtual TimeSeriesDigit doCalcPrediction(DigitTime n) {
170  DigitTime last_predicted = -1;
171  if( ! predictions_.empty() )
172  last_predicted = predictions_.back().getTime();
173 
174  if(n > last_predicted) { //check if the prediction should be calculated
175  for(DigitTime t = last_predicted + 1; t <= n; ++t ) {
176  //LOG4CXX_INFO( logger(), "AR::calcPrediction t = " << t << " pred " << calculateAR(t) );
177  predictions_.push_back( calculateAR(t) );
178  }
179  }
180  return predictions_;
181  }
182  };
183 
184  /** to debug */
185  inline std::ostream& operator<<(std::ostream& os, const PredictionAR& ar) {
186  os << "AR:";
187  std::copy(ar.getARDef().begin(), ar.getARDef().end(), std::ostream_iterator<double>(os,",") );
188  os << ';';
189  return os;
190  }
191 
192  namespace {
193 
194  typedef TimeSeriesDigit::const_iterator TSIter;
195 
196  //the distance between two time value digit elements of the same timestamp
197  Value element_distance(const TimeValueDigit& a, const TimeValueDigit& b) {
198  //cout << "element distance " << a.getValue() << "," << b.getValue() << endl;
199  return std::abs(a.getValue() - b.getValue() );
200  }
201 
202  // the sum of element_distances, distance between two timeseries
203  Value series_distance(TSIter begin1, TSIter end1, TSIter begin2, TSIter end2) {
204  if( distance(begin1, end1) > distance(begin2, end2) ) {
205  //cout << "dist " << distance(begin1, end1) << " dist " << distance(begin2, end2) << endl;
206  return 0.0;
207  }
208  //cout << "dist beg1:" << begin1->getValue() << " beg2: " << begin2->getValue() << endl;
209  //cout << "size1 " << distance(begin1,end1) << " size2 " << distance(begin2,end2) << endl;
210  return std::inner_product( begin1, end1, begin2, 0.0, std::plus<double>(), boost::bind(&element_distance, _1, _2 ) );
211  }
212 
213  struct BestRangesFunctor : boost::noncopyable {
214  //initializes the associate table to the best offsets
215  BestRangesFunctor(TSIter begin, TSIter end, TSIter last_begin,
216  const TimeSeriesDigit& reference,
217  int num_neighbours)
218 
219  : NUM_NEIGHBOURS(num_neighbours)
220  {
221  for(; begin != last_begin; ++begin) {
222  Value dist = series_distance( reference.begin(), reference.end(), begin, end );
223  add( dist, begin );
224  }
225  }
226  //adds the new element to collection (if it is good enough)
227  void add(Value dist, TSIter it) {
228  //LOG4CXX_INFO( logger(), "add dist= " << dist << " for time=" << it->getTime() );
229  best_.insert( make_pair(dist, it) );
230  if( best_.size() > NUM_NEIGHBOURS )
231  best_.erase(--best_.end() );
232  }
233 
234  Value avg(DigitTime offset, int reference_length, TSIter last_iter) const {
235  Value v = 0.0;
236  for(BestIter ii = best_.begin(); ii != best_.end(); ++ii ) {
237  TSIter it_best = ii->second;
238  if( distance(it_best, last_iter) > (reference_length + offset) ) {
239  advance(it_best, reference_length + offset );
240  v += it_best->getValue();
241  }
242  }
243  if(!best_.empty())
244  return v/best_.size();
245  else
246  return 0.0;
247 
248  }
249 
250  const unsigned int NUM_NEIGHBOURS; //size of collection
251 
252  //collection: distance(value) and the iterator to the begin of block
253  std::multimap<Value,TSIter> best_;
254  typedef std::multimap<Value,TSIter>::const_iterator BestIter;
255  };
256 
257  inline std::ostream& operator<<(std::ostream& os, const BestRangesFunctor& b) {
258  for( BestRangesFunctor::BestIter ii = b.best_.begin(); ii != b.best_.end(); ++ii ) {
259  os << (ii->second)->getTime() << ',';
260  }
261  return os;
262  }
263 
264  } //namespace
265 
266  /** memory based predictor */
267  class PredictionKNN : public Prediction {
268  public:
269  /* c-tor */
270  PredictionKNN(const TimeSeriesDigit& history, const KNNDef& definition)
271  : Prediction(history), definition_(definition)
272  {
273  const TimeSeriesDigit& historical = getHistory();
274  TimeSeriesDigit::const_iterator reference_begin = historical.end();
275  TimeSeriesDigit::const_iterator reference_end = historical.end();
276 
277  int ref_size = static_cast<int>(definition.getRefSize() );
278 
279  //set the begin iterator to first available for reference block
280  if( distance(historical.begin(), historical.end() ) > ref_size )
281  std::advance( reference_begin, - ref_size);
282  else
283  reference_begin = historical.begin();
284 
285  reference_ = TimeSeriesDigit(reference_begin, reference_end);
286  }
287  /* d-tor */
288  virtual ~PredictionKNN() {}
289  /** accessor */
290  const KNNDef& getKNNDef() const { return definition_; }
291 
292  // \brief visitor pattern
293  virtual void accept(PredictionVisitor& v) const { return v.visit(*this); }
294  private:
295  KNNDef definition_; //KNN definition
296  TimeSeriesDigit reference_; //KNN reference block
297  TimeSeriesDigit predictions_; //the prediction
298 
299  /** return the TimeValue for time t. The ranges are not checked (if bad -- assertion) */
300  const TimeValueDigit& get(DigitTime t) const {
301  if(t < 0)
302  return getHistoricalValue(t);
303  else {
304  assert( t < static_cast<int>(predictions_.size()) );
305  return predictions_[t];
306  }
307  }
308 
309  /** calculate the prediction for f(t), f(t+1), ... f(t+n), calculates colection of probes */
310  virtual TimeSeriesDigit doCalcPrediction(DigitTime n) {
311  //std::cout << " reference: " << reference_ << std::endl;
312  //LOG4CXX_INFO( logger(), "KNN::calcPredictions n=" << n);
313  predictions_.clear();
314 
315  // dostarcza danych historycznych, ktore sa porownywane blokiem referencyjnym
316  const TimeSeriesDigit& ts = getHistory();
317 
318  //musi miec wielkosc bloku (reference_.size) oraz jeszcze przynajmniej jeden pomiar
319  int min_hist_size = static_cast<int>( reference_.size() ) + 1;
320  TSIter last_begin = ts.end();
321  if(min_hist_size < static_cast<int>( ts.size() ) )
322  advance(last_begin, -min_hist_size);
323  else
324  last_begin = ts.begin();
325 
326  // LOG4CXX_INFO( logger(), "distance (begin, last_begin) " << distance(ts.begin(), last_begin ) );
327  // LOG4CXX_INFO( logger(), "distance (last_begin, end) " << distance(last_begin, ts.end() ) );
328 
329  BestRangesFunctor best(ts.begin(), ts.end(), last_begin, reference_, definition_.getK() );
330  //std::cout << " best " << best << std::endl;
331 
332  for(DigitTime t = 0; t <= n; ++t ) {
333  Value v = best.avg(t, static_cast<int>(definition_.getRefSize()), ts.end() );
334  //std::cout << " result: t=" << t << " v=" << v << std::endl;
335  //LOG4CXX_INFO( logger(), "result: t = " << t << " v = " << v );
336  predictions_.push_back( TimeValueDigit(t,v) );
337  }
338  return predictions_;
339  }
340  };
341 
342  /** to debugging */
343  inline std::ostream& operator<<(std::ostream& os, const PredictionKNN& knn) {
344  os << "KNN: k=" << knn.getKNNDef().getK() << " ref_size= " << knn.getKNNDef().getRefSize() << ';';
345  return os;
346  }
347 
348  namespace {
349  // \brief print visitor - used in print operator
350  struct PrintVisitor : public PredictionVisitor {
351  PrintVisitor(std::ostream& os) : os_(os) {}
352  virtual void visit(const PredictionAR& v) { os_ << v; }
353  virtual void visit(const PredictionKNN& v) { os_ << v; }
354  std::ostream& os_;
355  };
356 
357  }//namespace
358 
359  //ostream operator (to debug)
360  inline std::ostream& operator<<(std::ostream& os, const Prediction& pred) {
361  PrintVisitor v(os);
362  pred.accept(v);
363  return os;
364  }
365 
366 
367 
368  } //namespace timeseries
369 } //namespace faif
370 
371 
372 #endif //FAIF_TS_PREDICTIONS
std::vector< double > ARDef
the AR parameter collection f(t) = a[0]f(t-1) + a[1]f(t-2) + ... a[n-1]f(t-n)
Definition: Predictions.hpp:106
virtual void accept(PredictionVisitor &v) const
Definition: Predictions.hpp:139
bad prediction range exception
Definition: TimeseriesExceptions.hpp:11
Definition: Chain.h:17
int getK() const
Definition: Predictions.hpp:116
virtual void accept(PredictionVisitor &v) const =0
const Value & getValue() const
Definition: TimeSeries.hpp:140
int DigitTime
digit time type. DigitTime < 0 past, DigitTime >= 0 future, DigitTime == 0 now.
Definition: TimeSeries.hpp:47
double Value
value - real number
Definition: TimeSeries.hpp:49
const DigitTime & getTime() const
Definition: TimeSeries.hpp:138
PredictionAR(const TimeSeriesDigit &history, const ARDef &definition)
Definition: Predictions.hpp:131
const KNNDef & getKNNDef() const
Definition: Predictions.hpp:290
size_t getRefSize() const
Definition: Predictions.hpp:118
Definition: TimeSeries.hpp:151
virtual ~PredictionAR()
Definition: Predictions.hpp:136
Definition: TimeSeries.hpp:131
const TimeValueDigit & getHistoricalValue(DigitTime t) const
Definition: Predictions.hpp:34
virtual void accept(PredictionVisitor &v) const
Definition: Predictions.hpp:293
Definition: Predictions.hpp:267
Definition: Predictions.hpp:21
const TimeSeriesDigit & getHistory() const
Definition: Predictions.hpp:28
TimeSeriesDigit calculatePrediction(DigitTime from, DigitTime to)
Definition: Predictions.hpp:57
const ARDef & getARDef() const
Definition: Predictions.hpp:142
Definition: Predictions.hpp:128
the KNN parameters, k = num_neighbours, ref_size = size of reference block
Definition: Predictions.hpp:111
KNNDef(int k, size_t ref_size)
Definition: Predictions.hpp:114
Definition: Predictions.hpp:95