diff --git a/Makefile b/Makefile index 2f31953..00d9b05 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # cartodb/Makefile EXTENSION = cartodb -EXTVERSION = 0.21.0 +EXTVERSION = 0.22.0 SED = sed AWK = awk @@ -85,6 +85,7 @@ UPGRADABLE = \ 0.19.2 \ 0.20.0 \ 0.21.0 \ + 0.22.0 \ $(EXTVERSION)dev \ $(EXTVERSION)next \ $(END) diff --git a/NEWS.md b/NEWS.md index a22986a..b1956a7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ 0.22.0 (yyyy-mm-dd) * Fix: allow older ogr2ogr to work in -append mode (#319) +* Refactors CDB_QuantileBins to rely on PostgreSQL function `percentile_disc` #316 0.21.0 (2018-02-15) * Add optional parameter to limit the number of cells in grid-generation functions #322 diff --git a/scripts-available/CDB_QuantileBins.sql b/scripts-available/CDB_QuantileBins.sql index 20fa440..deaa79d 100644 --- a/scripts-available/CDB_QuantileBins.sql +++ b/scripts-available/CDB_QuantileBins.sql @@ -5,39 +5,14 @@ -- bins based on the Quantile method. -- -- @param breaks The number of bins you want to find. --- -- -CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ -DECLARE - element_count INT4; - break_size numeric; - tmp_val numeric; - i INT := 1; - reply numeric[]; -BEGIN - -- sort our values - SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x; - -- get the total size of our data - element_count := array_length(in_array, 1); - break_size := element_count::numeric / breaks; - -- slice our bread - LOOP - IF i < breaks THEN - IF break_size * i % 1 > 0 THEN - SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x; - ELSE - SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x; - END IF; - ELSIF i = breaks THEN - -- select the last value - SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x; - ELSE - EXIT; - END IF; - - reply = array_append(reply, tmp_val); - i := i+1; - END LOOP; - RETURN reply; -END; -$$ language plpgsql IMMUTABLE STRICT PARALLEL SAFE; +-- +CREATE OR REPLACE FUNCTION CDB_QuantileBins(in_array numeric[], breaks int) +RETURNS numeric[] +AS $$ + SELECT + percentile_disc(Array(SELECT generate_series(1, breaks) / breaks::numeric)) + WITHIN GROUP (ORDER BY x ASC) AS p + FROM + unnest(in_array) AS x; +$$ language SQL IMMUTABLE STRICT PARALLEL SAFE; diff --git a/test/CDB_JenksBinsTest.sql b/test/CDB_JenksBinsTest.sql index b8bfbaa..11d4312 100644 --- a/test/CDB_JenksBinsTest.sql +++ b/test/CDB_JenksBinsTest.sql @@ -1,11 +1,23 @@ WITH data AS ( - SELECT array_agg(x::numeric) s FROM generate_series(1,300) x - WHERE x % 5 != 0 AND x % 7 != 0 - ) -SELECT unnest(CDB_JenksBins(s, 7)) FROM data; + SELECT Array[0.99, 1.0, 1.01, + 4.99, 5.01, + 10.01, 10.01, + 15.01, 14.99, + 20.1, 19.9]::numeric[] AS s +) +-- expectation is: 1, 5, 10, 15, 20 +-- TODO: fix cdb_jenksbins to match ^^ +SELECT round(unnest(CDB_JenksBins(s, 5))) FROM data; WITH data_nulls AS ( - SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,300) x - WHERE x % 5 != 0 AND x % 7 != 0 - ) -SELECT unnest(CDB_JenksBins(s, 7)) FROM data_nulls; + SELECT Array[0.99, 1.0, 1.01, + 4.99, 5.01, + null, null, + 10.01, 10.01, + 15.01, 14.99, + null, null, + 20.1, 19.9]::numeric[] AS s +) +-- expectation is: 1, 5, 10, 15, 20 +-- TODO: fix cdb_jenksbins to match ^^ +SELECT round(unnest(CDB_JenksBins(s, 5))) FROM data_nulls; diff --git a/test/CDB_JenksBinsTest_expect b/test/CDB_JenksBinsTest_expect index 496e6be..8e69822 100644 --- a/test/CDB_JenksBinsTest_expect +++ b/test/CDB_JenksBinsTest_expect @@ -1,14 +1,10 @@ -43 -86 -129 -172 -213 -257 -299 -37 -51 -97 -157 -213 -241 +1 +5 +10 +20 +20 +1 +5 +10 +20 diff --git a/test/CDB_QuantileBinsTest.sql b/test/CDB_QuantileBinsTest.sql index 4a02a0c..342122d 100644 --- a/test/CDB_QuantileBinsTest.sql +++ b/test/CDB_QuantileBinsTest.sql @@ -1,11 +1,17 @@ WITH data AS ( - SELECT array_agg(x::numeric) s FROM generate_series(1,100) x - WHERE x % 5 != 0 AND x % 7 != 0 + SELECT array_agg(x::numeric) AS s + FROM generate_series(0, 99) AS x ) -SELECT unnest(CDB_QuantileBins(s, 7)) FROM data; +SELECT unnest(CDB_QuantileBins(s, 10)) + FROM data; WITH data_nulls AS ( - SELECT array_agg(CASE WHEN x % 2 != 0 THEN x ELSE NULL END::numeric) s FROM generate_series(1,100) x - WHERE x % 5 != 0 AND x % 7 != 0 + SELECT array_agg(x::numeric) AS s + FROM ( + SELECT x FROM generate_series(0, 99) AS x + UNION ALL + SELECT null AS x FROM generate_series(1, 10) AS x + ) _wrap ) -SELECT unnest(CDB_QuantileBins(s, 7)) FROM data_nulls; +SELECT unnest(CDB_QuantileBins(s, 10)) + FROM data_nulls; diff --git a/test/CDB_QuantileBinsTest_expect b/test/CDB_QuantileBinsTest_expect index 9261ae3..fbb4b0e 100644 --- a/test/CDB_QuantileBinsTest_expect +++ b/test/CDB_QuantileBinsTest_expect @@ -1,14 +1,20 @@ -13 +9 +19 29 -43 -57 -71 -86 +39 +49 +59 +69 +79 +89 99 +9 +19 29 -57 -87 - - - +39 +49 +59 +69 +79 +89 99