faif
RandomCustomDistr.hpp
1 #ifndef RANDOM_CUSTOM_DISTR_H
2 #define RANDOM_CUSTOM_DISTR_H
3 
4 #if defined(_MSC_VER) && (_MSC_VER >= 1400)
5 //msvc9.0 warning, rubbish for boost::random
6 #pragma warning(disable:4512)
7 //msvc9.0 waring for boost::math
8 #pragma warning(disable:4127)
9 #endif
10 
11 #include <cmath>
12 #include <list>
13 #include <vector>
14 #include <map>
15 #include <algorithm>
16 #include <iterator>
17 #include <numeric>
18 #include <limits>
19 #include <ostream>
20 
21 #include <boost/tuple/tuple.hpp>
22 #include <boost/tuple/tuple_comparison.hpp>
23 #include <boost/bind.hpp>
24 #include <boost/math/distributions/normal.hpp>
25 
26 
27 #include "Random.hpp"
28 
29 namespace faif {
30 
31 
32  /** the value in histogram, part of distribution (single range) */
33  class DistrValue {
34  public:
35  /** \brief c-tor */
36  DistrValue(double from, double to, double value)
37  : impl_( (std::min)(from, to), (std::max)(from, to), value)
38  {}
39 
40  /** \brief copy c-tor */
41  DistrValue(const DistrValue& v) : impl_(v.impl_) {}
42  /** \brief assignment */
44  impl_ = v.impl_;
45  return *this;
46  }
47  /** \brief d-tor */
49  /** accessor */
50  double getFrom() const { return boost::get<0>(impl_); }
51  /** accessor */
52  double getTo() const { return boost::get<1>(impl_); }
53  /** accessor */
54  double getValue() const { return boost::get<2>(impl_); }
55  /** mutator */
56  void setValue(double val) { boost::get<2>(impl_) = val; }
57 
58  //comparison
59  bool operator==(const DistrValue& v) const { return impl_ == v.impl_; }
60 
61  //comparison - to sort ranges
62  bool operator<(const DistrValue& v) const {
63  if(getFrom() == v.getFrom() )
64  return getTo() < v.getTo();
65  return getFrom() < v.getFrom();
66  }
67 
68  private:
69  boost::tuple<double, double, double> impl_;
70  };
71 
72  //stream operator (to debug)
73  inline std::ostream& operator<<(std::ostream& os, const DistrValue& v) {
74  os << "[" << v.getFrom() << ", " << v.getTo() << "]:" << v.getValue();
75  return os;
76  }
77 
78  /** \brief collection of histogram values, data for histogram distribution */
79  typedef std::vector<DistrValue> DistrValues;
80 
81  namespace {
82 
83  //the lenght of histogram value
84  double getLength(const DistrValue& v) { return v.getTo() - v.getFrom(); }
85  //the area of range
86  double getArea(const DistrValue& v) { return getLength(v)*v.getValue(); }
87  //the arithmetic mean of range
88  double getMeanTerm(const DistrValue& v) {
89  return 0.5*(v.getFrom() + v.getTo())*getArea(v);
90  }
91  //the term to calcualte variation
92  double getVariationTerm(const DistrValue& v, const double mi) {
93  double a = v.getFrom() - mi;
94  double b = v.getTo() - mi;
95  return v.getValue()* ( pow(b, 3.0) - pow(a, 3.0) )/ 3.0;
96  }
97  }
98 
99  /** modify the values in vector of ranges to make the integral = 1,
100  the ranges are sorted, dense (no gaps), no overlapped (no common parts)
101  */
102  inline DistrValues makeProbabilityDensity(const DistrValues& in) {
103  typedef std::list<DistrValue> Val;
104  Val val(in.begin(), in.end() );
105  val.sort();
106 
107  //remove gaps and overllaped ranges
108  Val::iterator curr = val.begin();
109  Val::iterator next = curr;
110  if( next != val.end() )
111  ++next;
112  while( next != val.end() ) {
113  if(next->getFrom() < curr->getTo() ) { //overlapped ranges
114  if( next->getTo() <= curr->getTo() ) {
115  //remove the next period
116  Val::iterator it = next;
117  ++next;
118  val.erase(it);
119  continue;
120  }
121  else {
122  //change the from parameter for the next period
123  *next = DistrValue( curr->getTo(), next->getTo(), next->getValue() );
124  }
125  }
126  else if(next->getFrom() > curr->getTo() ) { //gap
127  val.insert(next, DistrValue( curr->getTo(), next->getFrom(), 0.0 ) );
128  }
129  //accept
130  curr = next;
131  ++next;
132  }
133  double sum = std::accumulate( val.begin(), val.end(), 0.0,
134  boost::bind( std::plus<double>(), _1,
135  boost::bind(&getArea, _2) ) );
136 
137  if(sum == 0.0) sum = 1.0;
138 
139  //modify values
140  for(Val::iterator i = val.begin(); i != val.end(); ++i ) {
141  i->setValue( i->getValue() / sum );
142  }
143  return DistrValues( val.begin(), val.end() );
144  }
145 
146  //stream operator (to debug)
147  inline std::ostream& operator<<(std::ostream& os, const DistrValues& v) {
148  std::copy(v.begin(), v.end(), std::ostream_iterator<DistrValue>(os,"; ") );
149  return os;
150  }
151 
152  /** \brief the distribution described by histogram (sum of ranges) */
154  public:
155  RandomCustomDistr() : gen_() {
156  values_.push_back( DistrValue(0.0, 1.0, 1.0) );
157  }
158  RandomCustomDistr( const DistrValues& values )
159  : values_( makeProbabilityDensity(values) ), gen_()
160  { }
161 
162  /** assignment */
164  values_ = h.values_;
165  return *this;
166  }
167 
168  RandomCustomDistr(const RandomCustomDistr& h) : values_(h.values_), gen_(h.gen_) {}
169  virtual ~RandomCustomDistr() {}
170 
171  /** accessor */
172  const DistrValues& getValues() const { return values_; }
173 
174  /** \brief the method to generate the random variable with given distribution */
175  double operator()() {
176  return getQuantile( gen_() );
177  }
178 
179  /** method to calc mean */
180  double getMean() const {
181  return std::accumulate( values_.begin(), values_.end(), 0.0,
182  boost::bind( std::plus<double>(), _1,
183  boost::bind(&getMeanTerm, _2) ) );
184  }
185 
186  /** method to calc standard deviation */
187  double getStandardDeviation() const {
188  double mi = getMean();
189  double variation = std::accumulate( values_.begin(), values_.end(), 0.0,
190  boost::bind( std::plus<double>(), _1,
191  boost::bind(&getVariationTerm, _2, mi) ) );
192  return std::sqrt(variation);
193  }
194 
195  /** method to calc quantile */
196  double getQuantile(double k) const {
197  if( values_.empty() )
198  return std::numeric_limits<double>::quiet_NaN();
199 
200  double sum = 0.0;
201  for(DistrValues::const_iterator it = values_.begin(); it != values_.end(); ++it ) {
202  double area = getArea(*it);
203  if( k >= sum && k < sum + area) {
204  //the area > 0 (because if area == 0 the condition is false (k >= sum && k < sum )
205  return getLength(*it) * ( k - sum ) / area + it->getFrom();
206  }
207  sum += area;
208  }
209  //return the last value
210  return values_.back().getTo();
211  }
212  /** method to calc probability density */
213  double getProbabilityDensity(double x) const;
214 
215  /** method to calc distribution */
216  double getDistribution(double x) const;
217  private:
218  DistrValues values_; //probablity distribution given by sum ranges
219  RandomDouble gen_; //generates uniform distribution <0,1)
220  };
221 
222 
223 
224 
225  // \brief performs the simulation, create the random custom distribution based on given values
227  public:
228  /** the epsilon (length of interval in histogram) is dependent of num_steps in Monte Carlo simulation.
229  It is assumed
230  Normal(mi,sigma)(x = mi + 3*sigma - epsilon/2, x = mi + 3*sigma + epsilon/2) >= sqrt(num_steps),
231  so y = (x - mi)/sigma
232  Normal(0,1)(y = 3 - epsil/2, y = 3 + epsil/2) >= sqrt(num_steps),
233  so:
234 
235  epsilon = sigma/(sqrt(N)*pdf(3), where pdf(3) = N(0,1) dla x = 3
236  */
237  static double calculateEpsilon(double sigma, long num_steps) {
238  const double SIG = sigma;
239  const double N = num_steps >= 0 ? static_cast<double>(num_steps) : 1.0;
240  boost::math::normal normal01(0.0,1.0);
241  const double EPSILON = 0.5 * SIG / (boost::math::pdf(normal01, 3.0) * sqrt(N) );
242  return EPSILON;
243  }
244 
245  /** c-tor, the length of interval in histogram */
246  RandomCustomCreator(double e) : epsilon_(e), numValues_(0L) { }
247 
248  /** c-tor */
250  : epsilon_(c.epsilon_), histogram_(c.histogram_), numValues_(c.numValues_)
251  {}
252 
253  /** d-tor */
254  virtual ~RandomCustomCreator() {}
255 
256  //add the value to histogram, increments the proper interval
257  double addValue(double value) {
258  ++numValues_;
259 
260  //the mean of interval for given value
261  double key = floor(value/epsilon_)*epsilon_;
262 
263  Histogram::iterator it = histogram_.find(key);
264  if(it != histogram_.end() )
265  ++(it->second);
266  else
267  histogram_.insert( std::make_pair( key, 1L ) );
268 
269  return value;
270  }
271 
272  /** accessor - return the number of added */
273  long getNumValues() const { return numValues_; }
274 
275  /** accessor - transform internal data to histogram distribution */
277  const double epsilon2 = epsilon_/2.0;
278  DistrValues values;
279  for(Histogram::const_iterator it = histogram_.begin(); it != histogram_.end(); ++it) {
280  values.push_back( DistrValue( it->first - epsilon2,
281  it->first + epsilon2,
282  static_cast<double>(it->second) ) );
283  }
284  return RandomCustomDistr(values);
285  }
286  private:
287  //forbidden the assign operator
289 
290  double epsilon_; //!< the length of the segment
291 
292  typedef std::map<double,long> Histogram;
293  Histogram histogram_; //the interval of variable and the num of simulations in given interval
294  long numValues_; //the number of values
295  };
296 
297  namespace {
298  //the comparison functor
299  bool rangeLessThanValue(const DistrValue& range, const DistrValue& x ) {
300  return range.getTo() <= x.getTo();
301  }
302  }
303 
304  /** method to calc probability density */
305  inline double RandomCustomDistr::getProbabilityDensity(double x) const {
306  //TODO: msvc not implements lower_bound with diff types, so temporary object
307  DistrValue v(x, x, 0.0);
308  DistrValues::const_iterator it =
309  std::lower_bound( values_.begin(), values_.end(), v, rangeLessThanValue );
310  if( it == values_.end() )
311  return 0.0; //above histogram or empty
312  if( it == values_.begin() && x < it->getFrom() )
313  return 0.0; //below histogram
314  return it->getValue();
315  }
316 
317  /** method to calc distribution */
318  inline double RandomCustomDistr::getDistribution(double x) const {
319  //TODO: msvc not implements lower_bound with diff types, so temporary object
320  DistrValue v(x, x, 0.0);
321  DistrValues::const_iterator it =
322  std::lower_bound( values_.begin(), values_.end(), v, rangeLessThanValue );
323  //sum the values before x
324  double sum = std::accumulate( values_.begin(), it, 0.0, boost::bind( std::plus<double>(), _1, boost::bind(&getArea, _2) ) );
325  if( it != values_.end() )
326  if(it->getFrom() <= x && x <= it->getTo() )
327  sum += ( x - it->getFrom() ) * it->getValue();
328  return sum;
329  }
330 
331 } //namespace faif
332 
333 #endif //RANDOM_CUSTOM_DISTR_H
long getNumValues() const
Definition: RandomCustomDistr.hpp:273
double getQuantile(double k) const
Definition: RandomCustomDistr.hpp:196
virtual ~RandomCustomCreator()
Definition: RandomCustomDistr.hpp:254
Definition: Chain.h:17
static double calculateEpsilon(double sigma, long num_steps)
Definition: RandomCustomDistr.hpp:237
DistrValue(double from, double to, double value)
c-tor
Definition: RandomCustomDistr.hpp:36
double getTo() const
Definition: RandomCustomDistr.hpp:52
STL namespace.
double getFrom() const
Definition: RandomCustomDistr.hpp:50
RandomCustomCreator(double e)
Definition: RandomCustomDistr.hpp:246
double getMean() const
Definition: RandomCustomDistr.hpp:180
DistrValue(const DistrValue &v)
copy c-tor
Definition: RandomCustomDistr.hpp:41
Definition: RandomCustomDistr.hpp:226
RandomCustomDistr getRandomCustomDistr() const
Definition: RandomCustomDistr.hpp:276
the uniform distribution for double, in given range, e.g. <0,1), uses RandomSingleton ...
Definition: Random.hpp:74
~DistrValue()
d-tor
Definition: RandomCustomDistr.hpp:48
double getProbabilityDensity(double x) const
Definition: RandomCustomDistr.hpp:305
double getValue() const
Definition: RandomCustomDistr.hpp:54
DistrValue & operator=(const DistrValue &v)
assignment
Definition: RandomCustomDistr.hpp:43
the distribution described by histogram (sum of ranges)
Definition: RandomCustomDistr.hpp:153
Definition: RandomCustomDistr.hpp:33
double getDistribution(double x) const
Definition: RandomCustomDistr.hpp:318
void setValue(double val)
Definition: RandomCustomDistr.hpp:56
double getStandardDeviation() const
Definition: RandomCustomDistr.hpp:187
double operator()()
the method to generate the random variable with given distribution
Definition: RandomCustomDistr.hpp:175
RandomCustomDistr & operator=(const RandomCustomDistr &h)
Definition: RandomCustomDistr.hpp:163
const DistrValues & getValues() const
Definition: RandomCustomDistr.hpp:172
RandomCustomCreator(const RandomCustomCreator &c)
Definition: RandomCustomDistr.hpp:249