Added the running_stats object

--HG--
extra : convert_revision : svn%3Afdd8eb12-d10e-0410-9acb-85c331704f74/trunk%402340
This commit is contained in:
Davis King 2008-06-19 02:21:32 +00:00
parent e8537b1f7b
commit cd9dbd575a
3 changed files with 263 additions and 0 deletions

11
dlib/statistics.h Normal file
View File

@ -0,0 +1,11 @@
// Copyright (C) 2008 Davis E. King (davisking@users.sourceforge.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_STATISTICs_H_
#define DLIB_STATISTICs_H_
#include "statistics/statistics.h"
#endif // DLIB_STATISTICs_H_

View File

@ -0,0 +1,120 @@
// Copyright (C) 2008 Davis E. King (davisking@users.sourceforge.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_STATISTICs_
#define DLIB_STATISTICs_
#include "statistics_abstract.h"
#include <limits>
#include <cmath>
#include "../algs.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
template <
typename T
>
class running_stats
{
public:
running_stats()
{
clear();
COMPILE_TIME_ASSERT ((
is_same_type<float,T>::value ||
is_same_type<double,T>::value ||
is_same_type<long double,T>::value
));
}
void clear()
{
sum = 0;
sum_sqr = 0;
n = 0;
maximum_n = std::numeric_limits<T>::max();
}
void set_max_n (
const T& val
)
{
maximum_n = val;
}
void add (
const T& val
)
{
const T div_n = 1/(n+1);
const T n_div_n = n*div_n;
sum = n_div_n*sum + val*div_n;
sum_sqr = n_div_n*sum_sqr + val*div_n*val;
if (n < maximum_n)
++n;
}
T max_n (
) const
{
return max_n;
}
T current_n (
) const
{
return n;
}
T mean (
) const
{
return sum;
}
T variance (
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(current_n() > 1,
"\tT running_stats::variance"
<< "\n\tsize of queue should not be zero"
<< "\n\tthis: " << this
);
const T temp = n/(n-1);
return temp*(sum_sqr - sum*sum);
}
T scale (
const T& val
) const
{
// make sure requires clause is not broken
DLIB_ASSERT(current_n() > 1,
"\tT running_stats::variance"
<< "\n\tsize of queue should not be zero"
<< "\n\tthis: " << this
);
return (val-mean())/std::sqrt(variance());
}
private:
T sum;
T sum_sqr;
T n;
T maximum_n;
};
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_STATISTICs_

View File

@ -0,0 +1,132 @@
// Copyright (C) 2008 Davis E. King (davisking@users.sourceforge.net)
// License: Boost Software License See LICENSE.txt for the full license.
#undef DLIB_STATISTICs_ABSTRACT_
#ifdef DLIB_STATISTICs_ABSTRACT_
#include <limits>
#include <cmath>
namespace dlib
{
template <
typename T
>
class running_stats
{
/*!
REQUIREMENTS ON T
- T must be a float, double, or long double type
INITIAL VALUE
- max_n() == std::numeric_limits<T>::max()
- mean() == 0
- current_n() == 0
WHAT THIS OBJECT REPRESENTS
This object represents something that can compute the running mean and
variance of a stream of real numbers.
As this object accumulates more and more numbers it will be the case
that each new number impacts the current mean and variance estimate
less and less. This may be what you want. But it might not be.
For example, your stream of numbers might be non-stationary, that is,
the mean and variance might change over time. To enable you to use
this object on such a stream of numbers this object provides the
ability to set a "max_n." The meaning of the max_n() parameter
is that after max_n() samples have been seen each new sample will
have the same impact on the mean and variance estimates from then on.
So if you have a highly non-stationary stream of data you might
set the max_n to a small value while if you have a very stationary
stream you might set it to a very large value.
!*/
public:
running_stats(
);
/*!
ensures
- this object is properly initialized
!*/
void clear(
);
/*!
ensures
- this object has its initial value
- clears all memory of any previous data points
!*/
void set_max_n (
const T& val
);
/*!
ensures
- #max_n() == val
!*/
T max_n (
) const;
/*!
ensures
- returns the max value that current_n() is allowed to take on
!*/
T current_n (
) const;
/*!
ensures
- returns the number of points given to this object so far or
max_n(), whichever is smallest.
!*/
void add (
const T& val
);
/*!
ensures
- updates the mean and variance stored in this object so that
the new value is factored into them
- #mean() == mean()*current_n()/(current_n()+1) + val/(current_n()+1)
- #variance() == the updated variance that takes this new value into account
- if (current_n() < max_n()) then
- #current_n() == current_n() + 1
- else
- #current_n() == current_n()
!*/
T mean (
) const;
/*!
ensures
- returns the mean of all the values presented to this object
so far.
!*/
T variance (
) const;
/*!
requires
- current_n() > 1
ensures
- returns the variance of all the values presented to this
object so far.
!*/
T scale (
const T& val
) const;
/*!
requires
- current_n() > 1
ensures
- return (val-mean())/std::sqrt(variance());
!*/
};
}
#endif // DLIB_STATISTICs_ABSTRACT_