diff --git a/scripts-available/CDB_QuantileBins.sql b/scripts-available/CDB_QuantileBins.sql index d0a0374..23e5dcc 100644 --- a/scripts-available/CDB_QuantileBins.sql +++ b/scripts-available/CDB_QuantileBins.sql @@ -15,18 +15,29 @@ DECLARE i INT := 1; reply numeric[]; BEGIN - -- get our unique values - SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e GROUP BY e ORDER BY e ASC) x; - -- get the total size of our row - element_count := array_upper(in_array, 1) - array_lower(in_array, 1); + -- sort our values + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x; + -- get the total size of our data + element_count := array_length(in_array, 1); break_size := element_count::numeric / breaks; -- slice our bread LOOP - IF i > breaks THEN EXIT; END IF; - SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET round(break_size * i)) x; + IF i < breaks THEN + IF break_size * i % 1 > 0 THEN + SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x; + ELSE + SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x; + END IF; + ELSIF i = breaks THEN + -- select the last value + SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x; + ELSE + EXIT; + END IF; + reply = array_append(reply, tmp_val); - i := i+1; - END LOOP; - RETURN reply; + i := i+1; + END LOOP; + RETURN reply; END; -$$ language plpgsql IMMUTABLE; \ No newline at end of file +$$ language plpgsql IMMUTABLE; diff --git a/test/CDB_QuantileBinsTest_expect b/test/CDB_QuantileBinsTest_expect index aebc9ad..46c2997 100644 --- a/test/CDB_QuantileBinsTest_expect +++ b/test/CDB_QuantileBinsTest_expect @@ -1,7 +1,7 @@ -16 +13 29 43 57 71 -83 +86 99