Convert CDB_EqualIntervalBins to a single SQL statement and add float version

This commit is contained in:
Paul Norman 2016-04-20 16:07:00 -07:00
parent 1358964628
commit b19a5fc3dc

View File

@ -1,7 +1,7 @@
-- --
-- Calculate the equal interval bins for a given column -- Calculate the equal interval bins for a given column
-- --
-- @param in_array A numeric array of numbers to determine the best -- @param in_array A numeric or double precision array of numbers to determine the best
-- to determine the bin boundary -- to determine the bin boundary
-- --
-- @param breaks The number of bins you want to find. -- @param breaks The number of bins you want to find.
@ -10,28 +10,22 @@
-- Returns: upper edges of bins -- Returns: upper edges of bins
-- --
-- --
CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$ CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$
DECLARE WITH stats AS (
diff numeric; SELECT min(e), (max(e)-min(e))/breaks AS del
min_val numeric; FROM (SELECT unnest(in_array) e) AS p)
max_val numeric; SELECT array_agg(bins)
tmp_val numeric; FROM (
i INT := 1; SELECT min + generate_series(1,breaks)*del AS bins
reply numeric[]; FROM stats) q;
BEGIN $$ LANGUAGE SQL IMMUTABLE;
SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL;
diff = (max_val - min_val) / breaks::numeric; CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array double precision[], breaks INT ) RETURNS double precision[] as $$
LOOP WITH stats AS (
IF i < breaks THEN SELECT min(e), (max(e)-min(e))/breaks AS del
tmp_val = min_val + i::numeric * diff; FROM (SELECT unnest(in_array) e) AS p)
reply = array_append(reply, tmp_val); SELECT array_agg(bins)
i := i+1; FROM (
ELSE SELECT min + generate_series(1,breaks)*del AS bins
reply = array_append(reply, max_val); FROM stats) q;
EXIT; $$ LANGUAGE SQL IMMUTABLE;
END IF;
END LOOP;
RETURN reply;
END;
$$ language plpgsql IMMUTABLE;