Merge pull request #316 from CartoDB/quantiles-bins-updates

Updates quantile bins to use native PostgreSQL precentile functions
This commit is contained in:
Mario de Frutos 2018-03-19 13:09:13 +01:00 committed by GitHub
commit 04dc39bb16
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 70 additions and 73 deletions

View File

@ -1,7 +1,7 @@
# cartodb/Makefile
EXTENSION = cartodb
EXTVERSION = 0.21.0
EXTVERSION = 0.22.0
SED = sed
AWK = awk
@ -85,6 +85,7 @@ UPGRADABLE = \
0.19.2 \
0.20.0 \
0.21.0 \
0.22.0 \
$(EXTVERSION)dev \
$(EXTVERSION)next \
$(END)

View File

@ -1,5 +1,6 @@
0.22.0 (yyyy-mm-dd)
* Fix: allow older ogr2ogr to work in -append mode (#319)
* Refactors CDB_QuantileBins to rely on PostgreSQL function `percentile_disc` #316
0.21.0 (2018-02-15)
* Add optional parameter to limit the number of cells in grid-generation functions #322

View File

@ -7,37 +7,12 @@
-- @param breaks The number of bins you want to find.
--
--
CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$
DECLARE
element_count INT4;
break_size numeric;
tmp_val numeric;
i INT := 1;
reply numeric[];
BEGIN
-- sort our values
SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x;
-- get the total size of our data
element_count := array_length(in_array, 1);
break_size := element_count::numeric / breaks;
-- slice our bread
LOOP
IF i < breaks THEN
IF break_size * i % 1 > 0 THEN
SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x;
ELSE
SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x;
END IF;
ELSIF i = breaks THEN
-- select the last value
SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x;
ELSE
EXIT;
END IF;
reply = array_append(reply, tmp_val);
i := i+1;
END LOOP;
RETURN reply;
END;
$$ language plpgsql IMMUTABLE STRICT PARALLEL SAFE;
CREATE OR REPLACE FUNCTION CDB_QuantileBins(in_array numeric[], breaks int)
RETURNS numeric[]
AS $$
SELECT
percentile_disc(Array(SELECT generate_series(1, breaks) / breaks::numeric))
WITHIN GROUP (ORDER BY x ASC) AS p
FROM
unnest(in_array) AS x;
$$ language SQL IMMUTABLE STRICT PARALLEL SAFE;

View File

@ -1,11 +1,23 @@
WITH data AS (
SELECT array_agg(x::numeric) s FROM generate_series(1,300) x
WHERE x % 5 != 0 AND x % 7 != 0
SELECT Array[0.99, 1.0, 1.01,
4.99, 5.01,
10.01, 10.01,
15.01, 14.99,
20.1, 19.9]::numeric[] AS s
)
SELECT unnest(CDB_JenksBins(s, 7)) FROM data;
-- expectation is: 1, 5, 10, 15, 20
-- TODO: fix cdb_jenksbins to match ^^
SELECT round(unnest(CDB_JenksBins(s, 5))) FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,300) x
WHERE x % 5 != 0 AND x % 7 != 0
SELECT Array[0.99, 1.0, 1.01,
4.99, 5.01,
null, null,
10.01, 10.01,
15.01, 14.99,
null, null,
20.1, 19.9]::numeric[] AS s
)
SELECT unnest(CDB_JenksBins(s, 7)) FROM data_nulls;
-- expectation is: 1, 5, 10, 15, 20
-- TODO: fix cdb_jenksbins to match ^^
SELECT round(unnest(CDB_JenksBins(s, 5))) FROM data_nulls;

View File

@ -1,14 +1,10 @@
43
86
129
172
213
257
299
37
51
97
157
213
241
1
5
10
20
20
1
5
10
20

View File

@ -1,11 +1,17 @@
WITH data AS (
SELECT array_agg(x::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
SELECT array_agg(x::numeric) AS s
FROM generate_series(0, 99) AS x
)
SELECT unnest(CDB_QuantileBins(s, 7)) FROM data;
SELECT unnest(CDB_QuantileBins(s, 10))
FROM data;
WITH data_nulls AS (
SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,100) x
WHERE x % 5 != 0 AND x % 7 != 0
SELECT array_agg(x::numeric) AS s
FROM (
SELECT x FROM generate_series(0, 99) AS x
UNION ALL
SELECT null AS x FROM generate_series(1, 10) AS x
) _wrap
)
SELECT unnest(CDB_QuantileBins(s, 7)) FROM data_nulls;
SELECT unnest(CDB_QuantileBins(s, 10))
FROM data_nulls;

View File

@ -1,14 +1,20 @@
13
9
19
29
43
57
71
86
39
49
59
69
79
89
99
9
19
29
57
87
39
49
59
69
79
89
99