From cbd3c447b6aab75fad33a3bd9c7d1e6af35a0872 Mon Sep 17 00:00:00 2001 From: sariogonfer Date: Tue, 14 Apr 2015 20:45:37 +0200 Subject: [PATCH 1/4] Update CDB_QuantileBins.sql --- scripts-available/CDB_QuantileBins.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts-available/CDB_QuantileBins.sql b/scripts-available/CDB_QuantileBins.sql index d0a0374..ba73496 100644 --- a/scripts-available/CDB_QuantileBins.sql +++ b/scripts-available/CDB_QuantileBins.sql @@ -18,15 +18,15 @@ BEGIN -- get our unique values SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e GROUP BY e ORDER BY e ASC) x; -- get the total size of our row - element_count := array_upper(in_array, 1) - array_lower(in_array, 1); + element_count := array_length(in_array, 1); break_size := element_count::numeric / breaks; -- slice our bread LOOP IF i > breaks THEN EXIT; END IF; - SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET round(break_size * i)) x; + SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET round(break_size * i) - 1) x; reply = array_append(reply, tmp_val); i := i+1; END LOOP; RETURN reply; END; -$$ language plpgsql IMMUTABLE; \ No newline at end of file +$$ language plpgsql IMMUTABLE; From 1552c03dd4444f44c34062a123a2112e469afcdb Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Mon, 27 Apr 2015 17:59:40 -0400 Subject: [PATCH 2/4] removed group by; made binning more reliable --- scripts-available/CDB_QuantileBins.sql | 27 ++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/scripts-available/CDB_QuantileBins.sql b/scripts-available/CDB_QuantileBins.sql index ba73496..23e5dcc 100644 --- a/scripts-available/CDB_QuantileBins.sql +++ b/scripts-available/CDB_QuantileBins.sql @@ -15,18 +15,29 @@ DECLARE i INT := 1; reply numeric[]; BEGIN - -- get our unique values - SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e GROUP BY e ORDER BY e ASC) x; - -- get the total size of our row + -- sort our values + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x; + -- get the total size of our data element_count := array_length(in_array, 1); break_size := element_count::numeric / breaks; -- slice our bread LOOP - IF i > breaks THEN EXIT; END IF; - SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET round(break_size * i) - 1) x; + IF i < breaks THEN + IF break_size * i % 1 > 0 THEN + SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x; + ELSE + SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x; + END IF; + ELSIF i = breaks THEN + -- select the last value + SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x; + ELSE + EXIT; + END IF; + reply = array_append(reply, tmp_val); - i := i+1; - END LOOP; - RETURN reply; + i := i+1; + END LOOP; + RETURN reply; END; $$ language plpgsql IMMUTABLE; From 7d0efa95fb634652053c7cd66735a48947d38eb4 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 28 Apr 2015 09:59:57 -0400 Subject: [PATCH 3/4] updated test --- test/CDB_QuantileBinsTest_expect | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CDB_QuantileBinsTest_expect b/test/CDB_QuantileBinsTest_expect index aebc9ad..e50c251 100644 --- a/test/CDB_QuantileBinsTest_expect +++ b/test/CDB_QuantileBinsTest_expect @@ -3,5 +3,5 @@ 43 57 71 -83 +86 99 From d9e254dbd55d699aff6d1ae01c034c237c40a165 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 28 Apr 2015 10:21:34 -0400 Subject: [PATCH 4/4] missed updating value --- test/CDB_QuantileBinsTest_expect | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CDB_QuantileBinsTest_expect b/test/CDB_QuantileBinsTest_expect index e50c251..46c2997 100644 --- a/test/CDB_QuantileBinsTest_expect +++ b/test/CDB_QuantileBinsTest_expect @@ -1,4 +1,4 @@ -16 +13 29 43 57