diff --git a/scripts-available/CDB_QuantileBins.sql b/scripts-available/CDB_QuantileBins.sql index ba73496..23e5dcc 100644 --- a/scripts-available/CDB_QuantileBins.sql +++ b/scripts-available/CDB_QuantileBins.sql @@ -15,18 +15,29 @@ DECLARE i INT := 1; reply numeric[]; BEGIN - -- get our unique values - SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e GROUP BY e ORDER BY e ASC) x; - -- get the total size of our row + -- sort our values + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x; + -- get the total size of our data element_count := array_length(in_array, 1); break_size := element_count::numeric / breaks; -- slice our bread LOOP - IF i > breaks THEN EXIT; END IF; - SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET round(break_size * i) - 1) x; + IF i < breaks THEN + IF break_size * i % 1 > 0 THEN + SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x; + ELSE + SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x; + END IF; + ELSIF i = breaks THEN + -- select the last value + SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x; + ELSE + EXIT; + END IF; + reply = array_append(reply, tmp_val); - i := i+1; - END LOOP; - RETURN reply; + i := i+1; + END LOOP; + RETURN reply; END; $$ language plpgsql IMMUTABLE;