Merge pull request #124 from CartoDB/add-kurtosis

Add kurtosis and skewness
This commit is contained in:
Raul Ochoa 2015-09-07 12:11:34 +02:00
commit c6cdaea626
4 changed files with 67 additions and 0 deletions

View File

@ -0,0 +1,47 @@
--
-- Calculate basic statistics of a given dataset
--
-- @param in_array A numeric array of numbers
--
-- Returns: statistical quantity chosen
--
-- References: http://www.itl.nist.gov/div898/handbook/eda/section3/eda35b.htm
--
-- Calculate kurtosis
CREATE OR REPLACE FUNCTION CDB_Kurtosis ( in_array NUMERIC[] ) RETURNS NUMERIC as $$
DECLARE
a numeric;
c numeric;
s numeric;
k numeric;
BEGIN
SELECT AVG(e), COUNT(e)::numeric, stddev(e) INTO a, c, s FROM ( SELECT unnest(in_array) e ) x;
EXECUTE 'SELECT sum(power($1 - e, 4)) / ( $2 * power($3, 4)) - 3
FROM (SELECT unnest($4) e ) x'
INTO k
USING a, c, s, in_array;
RETURN k;
END;
$$ language plpgsql IMMUTABLE;
-- Calculate skewness
CREATE OR REPLACE FUNCTION CDB_Skewness ( in_array NUMERIC[] ) RETURNS NUMERIC as $$
DECLARE
a numeric;
c numeric;
s numeric;
sk numeric;
BEGIN
SELECT AVG(e), COUNT(e)::numeric, stddev(e) INTO a, c, s FROM ( SELECT unnest(in_array) e ) x;
EXECUTE 'SELECT sum(power($1 - e, 3)) / ( $2 * power($3, 3))
FROM (SELECT unnest($4) e ) x'
INTO sk
USING a, c, s, in_array;
RETURN sk;
END;
$$ language plpgsql IMMUTABLE;

View File

@ -0,0 +1 @@
../scripts-available/CDB_Stats.sql

16
test/CDB_StatsTest.sql Normal file
View File

@ -0,0 +1,16 @@
-- continuous uniform distribution has kurtosis = -6/5, skewness = 0.0
-- http://mathworld.wolfram.com/UniformDistribution.html
set client_min_messages to ERROR;
With dist As (
SELECT random()::numeric As val
FROM generate_series(1,50000) t
)
SELECT
-- does random dist values match within 1% of known values
abs(CDB_Kurtosis(array_agg(val)) + 1.20) < 1e-2 As kurtosis,
abs(CDB_Skewness(array_agg(val)) - 0) < 1e-2 As skewness
FROM dist;
set client_min_messages to NOTICE;

View File

@ -0,0 +1,3 @@
SET
t|t
SET