Merge pull request #316 from CartoDB/quantiles-bins-updates
Updates quantile bins to use native PostgreSQL precentile functions
This commit is contained in:
commit
04dc39bb16
3
Makefile
3
Makefile
@ -1,7 +1,7 @@
|
||||
# cartodb/Makefile
|
||||
|
||||
EXTENSION = cartodb
|
||||
EXTVERSION = 0.21.0
|
||||
EXTVERSION = 0.22.0
|
||||
|
||||
SED = sed
|
||||
AWK = awk
|
||||
@ -85,6 +85,7 @@ UPGRADABLE = \
|
||||
0.19.2 \
|
||||
0.20.0 \
|
||||
0.21.0 \
|
||||
0.22.0 \
|
||||
$(EXTVERSION)dev \
|
||||
$(EXTVERSION)next \
|
||||
$(END)
|
||||
|
1
NEWS.md
1
NEWS.md
@ -1,5 +1,6 @@
|
||||
0.22.0 (yyyy-mm-dd)
|
||||
* Fix: allow older ogr2ogr to work in -append mode (#319)
|
||||
* Refactors CDB_QuantileBins to rely on PostgreSQL function `percentile_disc` #316
|
||||
|
||||
0.21.0 (2018-02-15)
|
||||
* Add optional parameter to limit the number of cells in grid-generation functions #322
|
||||
|
@ -5,39 +5,14 @@
|
||||
-- bins based on the Quantile method.
|
||||
--
|
||||
-- @param breaks The number of bins you want to find.
|
||||
--
|
||||
--
|
||||
CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$
|
||||
DECLARE
|
||||
element_count INT4;
|
||||
break_size numeric;
|
||||
tmp_val numeric;
|
||||
i INT := 1;
|
||||
reply numeric[];
|
||||
BEGIN
|
||||
-- sort our values
|
||||
SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x;
|
||||
-- get the total size of our data
|
||||
element_count := array_length(in_array, 1);
|
||||
break_size := element_count::numeric / breaks;
|
||||
-- slice our bread
|
||||
LOOP
|
||||
IF i < breaks THEN
|
||||
IF break_size * i % 1 > 0 THEN
|
||||
SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x;
|
||||
ELSE
|
||||
SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x;
|
||||
END IF;
|
||||
ELSIF i = breaks THEN
|
||||
-- select the last value
|
||||
SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x;
|
||||
ELSE
|
||||
EXIT;
|
||||
END IF;
|
||||
|
||||
reply = array_append(reply, tmp_val);
|
||||
i := i+1;
|
||||
END LOOP;
|
||||
RETURN reply;
|
||||
END;
|
||||
$$ language plpgsql IMMUTABLE STRICT PARALLEL SAFE;
|
||||
--
|
||||
CREATE OR REPLACE FUNCTION CDB_QuantileBins(in_array numeric[], breaks int)
|
||||
RETURNS numeric[]
|
||||
AS $$
|
||||
SELECT
|
||||
percentile_disc(Array(SELECT generate_series(1, breaks) / breaks::numeric))
|
||||
WITHIN GROUP (ORDER BY x ASC) AS p
|
||||
FROM
|
||||
unnest(in_array) AS x;
|
||||
$$ language SQL IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
@ -1,11 +1,23 @@
|
||||
WITH data AS (
|
||||
SELECT array_agg(x::numeric) s FROM generate_series(1,300) x
|
||||
WHERE x % 5 != 0 AND x % 7 != 0
|
||||
)
|
||||
SELECT unnest(CDB_JenksBins(s, 7)) FROM data;
|
||||
SELECT Array[0.99, 1.0, 1.01,
|
||||
4.99, 5.01,
|
||||
10.01, 10.01,
|
||||
15.01, 14.99,
|
||||
20.1, 19.9]::numeric[] AS s
|
||||
)
|
||||
-- expectation is: 1, 5, 10, 15, 20
|
||||
-- TODO: fix cdb_jenksbins to match ^^
|
||||
SELECT round(unnest(CDB_JenksBins(s, 5))) FROM data;
|
||||
|
||||
WITH data_nulls AS (
|
||||
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,300) x
|
||||
WHERE x % 5 != 0 AND x % 7 != 0
|
||||
)
|
||||
SELECT unnest(CDB_JenksBins(s, 7)) FROM data_nulls;
|
||||
SELECT Array[0.99, 1.0, 1.01,
|
||||
4.99, 5.01,
|
||||
null, null,
|
||||
10.01, 10.01,
|
||||
15.01, 14.99,
|
||||
null, null,
|
||||
20.1, 19.9]::numeric[] AS s
|
||||
)
|
||||
-- expectation is: 1, 5, 10, 15, 20
|
||||
-- TODO: fix cdb_jenksbins to match ^^
|
||||
SELECT round(unnest(CDB_JenksBins(s, 5))) FROM data_nulls;
|
||||
|
@ -1,14 +1,10 @@
|
||||
43
|
||||
86
|
||||
129
|
||||
172
|
||||
213
|
||||
257
|
||||
299
|
||||
37
|
||||
51
|
||||
97
|
||||
157
|
||||
213
|
||||
241
|
||||
1
|
||||
5
|
||||
10
|
||||
20
|
||||
20
|
||||
1
|
||||
5
|
||||
10
|
||||
20
|
||||
|
||||
|
@ -1,11 +1,17 @@
|
||||
WITH data AS (
|
||||
SELECT array_agg(x::numeric) s FROM generate_series(1,100) x
|
||||
WHERE x % 5 != 0 AND x % 7 != 0
|
||||
SELECT array_agg(x::numeric) AS s
|
||||
FROM generate_series(0, 99) AS x
|
||||
)
|
||||
SELECT unnest(CDB_QuantileBins(s, 7)) FROM data;
|
||||
SELECT unnest(CDB_QuantileBins(s, 10))
|
||||
FROM data;
|
||||
|
||||
WITH data_nulls AS (
|
||||
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,100) x
|
||||
WHERE x % 5 != 0 AND x % 7 != 0
|
||||
SELECT array_agg(x::numeric) AS s
|
||||
FROM (
|
||||
SELECT x FROM generate_series(0, 99) AS x
|
||||
UNION ALL
|
||||
SELECT null AS x FROM generate_series(1, 10) AS x
|
||||
) _wrap
|
||||
)
|
||||
SELECT unnest(CDB_QuantileBins(s, 7)) FROM data_nulls;
|
||||
SELECT unnest(CDB_QuantileBins(s, 10))
|
||||
FROM data_nulls;
|
||||
|
@ -1,14 +1,20 @@
|
||||
13
|
||||
9
|
||||
19
|
||||
29
|
||||
43
|
||||
57
|
||||
71
|
||||
86
|
||||
39
|
||||
49
|
||||
59
|
||||
69
|
||||
79
|
||||
89
|
||||
99
|
||||
9
|
||||
19
|
||||
29
|
||||
57
|
||||
87
|
||||
|
||||
|
||||
|
||||
39
|
||||
49
|
||||
59
|
||||
69
|
||||
79
|
||||
89
|
||||
99
|
||||
|
Loading…
Reference in New Issue
Block a user