Merge pull request #124 from CartoDB/add-kurtosis
Add kurtosis and skewness
This commit is contained in:
commit
c6cdaea626
47
scripts-available/CDB_Stats.sql
Normal file
47
scripts-available/CDB_Stats.sql
Normal file
@ -0,0 +1,47 @@
|
||||
--
|
||||
-- Calculate basic statistics of a given dataset
|
||||
--
|
||||
-- @param in_array A numeric array of numbers
|
||||
--
|
||||
-- Returns: statistical quantity chosen
|
||||
--
|
||||
-- References: http://www.itl.nist.gov/div898/handbook/eda/section3/eda35b.htm
|
||||
--
|
||||
|
||||
-- Calculate kurtosis
|
||||
CREATE OR REPLACE FUNCTION CDB_Kurtosis ( in_array NUMERIC[] ) RETURNS NUMERIC as $$
|
||||
DECLARE
|
||||
a numeric;
|
||||
c numeric;
|
||||
s numeric;
|
||||
k numeric;
|
||||
BEGIN
|
||||
SELECT AVG(e), COUNT(e)::numeric, stddev(e) INTO a, c, s FROM ( SELECT unnest(in_array) e ) x;
|
||||
|
||||
EXECUTE 'SELECT sum(power($1 - e, 4)) / ( $2 * power($3, 4)) - 3
|
||||
FROM (SELECT unnest($4) e ) x'
|
||||
INTO k
|
||||
USING a, c, s, in_array;
|
||||
|
||||
RETURN k;
|
||||
END;
|
||||
$$ language plpgsql IMMUTABLE;
|
||||
|
||||
-- Calculate skewness
|
||||
CREATE OR REPLACE FUNCTION CDB_Skewness ( in_array NUMERIC[] ) RETURNS NUMERIC as $$
|
||||
DECLARE
|
||||
a numeric;
|
||||
c numeric;
|
||||
s numeric;
|
||||
sk numeric;
|
||||
BEGIN
|
||||
SELECT AVG(e), COUNT(e)::numeric, stddev(e) INTO a, c, s FROM ( SELECT unnest(in_array) e ) x;
|
||||
|
||||
EXECUTE 'SELECT sum(power($1 - e, 3)) / ( $2 * power($3, 3))
|
||||
FROM (SELECT unnest($4) e ) x'
|
||||
INTO sk
|
||||
USING a, c, s, in_array;
|
||||
|
||||
RETURN sk;
|
||||
END;
|
||||
$$ language plpgsql IMMUTABLE;
|
1
scripts-enabled/CDB_Stats.sql
Symbolic link
1
scripts-enabled/CDB_Stats.sql
Symbolic link
@ -0,0 +1 @@
|
||||
../scripts-available/CDB_Stats.sql
|
16
test/CDB_StatsTest.sql
Normal file
16
test/CDB_StatsTest.sql
Normal file
@ -0,0 +1,16 @@
|
||||
-- continuous uniform distribution has kurtosis = -6/5, skewness = 0.0
|
||||
-- http://mathworld.wolfram.com/UniformDistribution.html
|
||||
set client_min_messages to ERROR;
|
||||
|
||||
With dist As (
|
||||
SELECT random()::numeric As val
|
||||
FROM generate_series(1,50000) t
|
||||
)
|
||||
|
||||
SELECT
|
||||
-- does random dist values match within 1% of known values
|
||||
abs(CDB_Kurtosis(array_agg(val)) + 1.20) < 1e-2 As kurtosis,
|
||||
abs(CDB_Skewness(array_agg(val)) - 0) < 1e-2 As skewness
|
||||
FROM dist;
|
||||
|
||||
set client_min_messages to NOTICE;
|
3
test/CDB_StatsTest_expect
Normal file
3
test/CDB_StatsTest_expect
Normal file
@ -0,0 +1,3 @@
|
||||
SET
|
||||
t|t
|
||||
SET
|
Loading…
Reference in New Issue
Block a user