moves quantile bins to use postgres precentile functions
This commit is contained in:
parent
5243192296
commit
691b9a8312
@ -5,39 +5,15 @@
|
||||
-- bins based on the Quantile method.
|
||||
--
|
||||
-- @param breaks The number of bins you want to find.
|
||||
--
|
||||
--
|
||||
CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$
|
||||
DECLARE
|
||||
element_count INT4;
|
||||
break_size numeric;
|
||||
tmp_val numeric;
|
||||
i INT := 1;
|
||||
reply numeric[];
|
||||
BEGIN
|
||||
-- sort our values
|
||||
SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x;
|
||||
-- get the total size of our data
|
||||
element_count := array_length(in_array, 1);
|
||||
break_size := element_count::numeric / breaks;
|
||||
-- slice our bread
|
||||
LOOP
|
||||
IF i < breaks THEN
|
||||
IF break_size * i % 1 > 0 THEN
|
||||
SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x;
|
||||
ELSE
|
||||
SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x;
|
||||
END IF;
|
||||
ELSIF i = breaks THEN
|
||||
-- select the last value
|
||||
SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x;
|
||||
ELSE
|
||||
EXIT;
|
||||
END IF;
|
||||
|
||||
reply = array_append(reply, tmp_val);
|
||||
i := i+1;
|
||||
END LOOP;
|
||||
RETURN reply;
|
||||
END;
|
||||
$$ language plpgsql IMMUTABLE STRICT PARALLEL SAFE;
|
||||
--
|
||||
CREATE OR REPLACE FUNCTION CDB_QuantileBins(in_array numeric[], breaks int)
|
||||
RETURNS numeric[]
|
||||
AS $$
|
||||
SELECT array_agg(p) FROM (
|
||||
SELECT percentile_disc(idx::numeric / breaks::numeric)
|
||||
WITHIN GROUP (ORDER BY x ASC) AS p
|
||||
FROM generate_series(1, breaks) AS idx, unnest(in_array) AS x
|
||||
GROUP BY idx
|
||||
) AS quantiles;
|
||||
$$ language sql;
|
||||
|
@ -1,11 +1,17 @@
|
||||
WITH data AS (
|
||||
SELECT array_agg(x::numeric) s FROM generate_series(1,100) x
|
||||
WHERE x % 5 != 0 AND x % 7 != 0
|
||||
SELECT array_agg(x::numeric) AS s
|
||||
FROM generate_series(0, 99) AS x
|
||||
)
|
||||
SELECT unnest(CDB_QuantileBins(s, 7)) FROM data;
|
||||
SELECT unnest(CDB_QuantileBins(s, 10))
|
||||
FROM data;
|
||||
|
||||
WITH data_nulls AS (
|
||||
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,100) x
|
||||
WHERE x % 5 != 0 AND x % 7 != 0
|
||||
SELECT array_agg(x::numeric)
|
||||
FROM (
|
||||
SELECT x FROM generate_series(0, 99) AS x
|
||||
UNION ALL
|
||||
SELECT null AS s FROM generate_series(1, 10) AS x
|
||||
) _wrap
|
||||
)
|
||||
SELECT unnest(CDB_QuantileBins(s, 7)) FROM data_nulls;
|
||||
SELECT unnest(CDB_QuantileBins(s, 10))
|
||||
FROM data_nulls;
|
||||
|
@ -1,14 +1,20 @@
|
||||
13
|
||||
9
|
||||
19
|
||||
29
|
||||
43
|
||||
57
|
||||
71
|
||||
86
|
||||
39
|
||||
49
|
||||
59
|
||||
69
|
||||
79
|
||||
89
|
||||
99
|
||||
9
|
||||
19
|
||||
29
|
||||
57
|
||||
87
|
||||
|
||||
|
||||
|
||||
39
|
||||
49
|
||||
59
|
||||
69
|
||||
79
|
||||
89
|
||||
99
|
||||
|
Loading…
Reference in New Issue
Block a user