cartodb-postgresql/scripts-available/CDB_JenksBins.sql

--
-- Determine the Jenks classifications from a numeric array
--
-- @param in_array A numeric array of numbers to determine the best
--            bins based on the Jenks method.
--
-- @param breaks The number of bins you want to find.
--
-- @param iterations The number of different starting positions to test.
--
-- @param invert Optional wheter to return the top of each bin (default)
--               or the bottom. BOOLEAN, default=FALSE.
--
--

CREATE OR REPLACE FUNCTION @extschema@.CDB_JenksBins(in_array NUMERIC[], breaks INT, iterations INT DEFAULT 0, invert BOOLEAN DEFAULT FALSE)
RETURNS NUMERIC[] as
$$
DECLARE
    in_matrix NUMERIC[][];
    in_unique_count BIGINT;

    shuffles INT;
    arr_mean NUMERIC;
    sdam NUMERIC;

    i INT;
    bot INT;
    top INT;

    tops INT[];
    classes INT[][];
    j INT := 1;
    curr_result NUMERIC[];
    best_result NUMERIC[];
    seedtarget TEXT;

BEGIN
    -- We clean the input array (remove NULLs) and create 2 arrays
    -- [1] contains the unique values in in_array
    -- [2] contains the number of appearances of those unique values
    SELECT ARRAY[array_agg(value), array_agg(count)] FROM
    (
        SELECT value, count(1)::numeric as count
        FROM  unnest(in_array) AS value
        WHERE value is NOT NULL
        GROUP BY value
        ORDER BY value
    ) __clean_array_q INTO in_matrix;

    -- Get the number of unique values
    in_unique_count := array_length(in_matrix[1:1], 2);

    IF in_unique_count IS NULL THEN
        RETURN NULL;
    END IF;

    IF in_unique_count <= breaks THEN
        -- There isn't enough distinct values for the requested breaks
        RETURN ARRAY(Select unnest(in_matrix[1:1])) _a;
    END IF;

    -- If not declated explicitly we iterate based on the length of the array
    IF iterations < 1 THEN
        -- This is based on a 'looks fine' heuristic
        iterations := log(in_unique_count)::integer + 1;
    END IF;

    -- We set the number of shuffles per iteration as the number of unique values but
    -- this is just another 'looks fine' heuristic
    shuffles := in_unique_count;

    -- Get the mean value of the whole vector (already ignores NULLs)
    SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x;

    -- Calculate the sum of squared deviations from the array mean (SDAM).
    SELECT sum(((arr_mean - v)^2) * w) INTO sdam FROM (
        SELECT unnest(in_matrix[1:1]) as v, unnest(in_matrix[2:2]) as w
        ) x;

    -- To start, we create ranges with approximately the same amount of different values
    top := 0;
    i := 1;
    LOOP
        bot := top + 1;
        top := ROUND(i * in_unique_count::numeric / breaks::NUMERIC);

        IF i = 1 THEN
            classes = ARRAY[ARRAY[bot,top]];
        ELSE
            classes = ARRAY_CAT(classes, ARRAY[bot,top]);
        END IF;

        i := i + 1;
        IF i > breaks THEN EXIT; END IF;
    END LOOP;

    best_result = @extschema@.CDB_JenksBinsIteration(in_matrix, breaks, classes, invert, sdam, shuffles);

    --set the seed so we can ensure the same results
    SELECT setseed(0.4567) INTO seedtarget;
    --loop through random starting positions
    LOOP
        IF j > iterations-1 THEN  EXIT;  END IF;
        i = 1;
        tops = ARRAY[in_unique_count];
        LOOP
            IF i = breaks THEN  EXIT;  END IF;
            SELECT array_agg(distinct e) INTO tops FROM (
                SELECT unnest(array_cat(tops, ARRAY[trunc(random() * in_unique_count::float8)::int + 1])) as e ORDER BY e
                ) x;
            i = array_length(tops, 1);
        END LOOP;
        top := 0;
        i = 1;
        LOOP
            bot := top + 1;
            top = tops[i];
            IF i = 1 THEN
                classes = ARRAY[ARRAY[bot,top]];
            ELSE
                classes = ARRAY_CAT(classes, ARRAY[bot,top]);
            END IF;

            i := i+1;
            IF i > breaks THEN EXIT; END IF;
        END LOOP;

        curr_result = @extschema@.CDB_JenksBinsIteration(in_matrix, breaks, classes, invert, sdam, shuffles);

        IF curr_result[1] > best_result[1] THEN
            best_result = curr_result;
        END IF;

        j = j+1;
    END LOOP;

    RETURN (best_result)[2:array_upper(best_result, 1)];
END;
$$ LANGUAGE PLPGSQL IMMUTABLE PARALLEL RESTRICTED;


--
-- Perform a single iteration of the Jenks classification
--
-- Returns an array with:
-- - First element: gvf
-- - Second to 2+n: Category limits
DROP FUNCTION IF EXISTS @extschema@.CDB_JenksBinsIteration ( in_matrix NUMERIC[], breaks INT, classes INT[], invert BOOLEAN, element_count INT4, arr_mean NUMERIC, max_search INT); -- Old signature

CREATE OR REPLACE FUNCTION @extschema@.CDB_JenksBinsIteration ( in_matrix NUMERIC[], breaks INT, classes INT[], invert BOOLEAN, sdam NUMERIC, max_search INT DEFAULT 50) RETURNS NUMERIC[] as $$
DECLARE
    i INT;
    iterations INT = 0;

    side INT := 2;

    gvf numeric := 0.0;
    new_gvf numeric;
    arr_gvf numeric[];
    arr_avg numeric[];
    class_avg numeric;
    class_dev numeric;

    class_max_i INT = 0;
    class_min_i INT = 0;
    dev_max numeric;
    dev_min numeric;

    best_classes INT[] = classes;
    best_gvf numeric[];
    best_avg numeric[];
    move_elements INT = 1;

    reply numeric[];

BEGIN

    -- We fill the arrays with the initial values
    i = 0;
    LOOP
        IF i = breaks THEN EXIT; END IF;
        i = i + 1;

        -- Get class mean
        SELECT (sum(v * w) / sum(w)) INTO class_avg FROM (
            SELECT unnest(in_matrix[1:1][classes[i][1]:classes[i][2]]) as v,
                    unnest(in_matrix[2:2][classes[i][1]:classes[i][2]]) as w
            ) x;

        -- Get class deviation
        SELECT sum((class_avg - v)^2 * w) INTO class_dev FROM (
            SELECT unnest(in_matrix[1:1][classes[i][1]:classes[i][2]]) as v,
                    unnest(in_matrix[2:2][classes[i][1]:classes[i][2]]) as w
            ) x;


        IF i = 1 THEN
            arr_avg = ARRAY[class_avg];
            arr_gvf = ARRAY[class_dev];
        ELSE
            arr_avg = array_append(arr_avg, class_avg);
            arr_gvf = array_append(arr_gvf, class_dev);
        END IF;
    END LOOP;

    -- We copy the values to avoid recalculation when a failure happens
    best_avg = arr_avg;
    best_gvf = arr_gvf;

    iterations = 0;
    LOOP
        IF iterations = max_search THEN EXIT; END IF;
        iterations = iterations + 1;

        -- calculate our new GVF
        SELECT sdam - sum(e) INTO new_gvf FROM ( SELECT unnest(arr_gvf) as e ) x;

        -- Check if any improvement was made
        IF new_gvf <= gvf THEN
            -- If we were moving too many elements, go back and move less
            IF move_elements <= 2 OR class_max_i = class_min_i THEN
                EXIT;
            END IF;

            move_elements = GREATEST(move_elements / 8, 1);

            -- Rollback from saved statuses
            classes = best_classes;
            new_gvf = gvf;

            i = class_min_i;
            LOOP
                arr_avg[i] = best_avg[i];
                arr_gvf[i] = best_gvf[i];

                IF i = class_max_i THEN EXIT; END IF;
                i = i + 1;
            END LOOP;
        END IF;

        -- We search for the classes with the min and max deviation
        i = 1;
        class_min_i = 1;
        class_max_i = 1;
        dev_max = arr_gvf[1];
        dev_min = arr_gvf[1];
        LOOP
            IF i = breaks THEN EXIT; END IF;
            i = i + 1;

            IF arr_gvf[i] < dev_min THEN
                dev_min = arr_gvf[i];
                class_min_i = i;
            ELSE
                IF arr_gvf[i] > dev_max THEN
                    dev_max = arr_gvf[i];
                    class_max_i = i;
                END IF;
            END IF;
        END LOOP;


        -- Save best values for comparison and output
        gvf = new_gvf;
        best_classes = classes;

        -- Limit the moved elements as to not remove everything from class_max_i
        move_elements = LEAST(move_elements, classes[class_max_i][2] - classes[class_max_i][1]);

        -- Move `move_elements` from class_max_i to class_min_i
        IF class_min_i < class_max_i THEN
            i := class_min_i;
            LOOP
                IF i = class_max_i THEN EXIT; END IF;
                classes[i][2] = classes[i][2] + move_elements;
                i := i + 1;
            END LOOP;

            i := class_max_i;
            LOOP
                IF i = class_min_i THEN EXIT; END IF;
                classes[i][1] = classes[i][1] + move_elements;
                i := i - 1;
            END LOOP;
        ELSE
            i := class_min_i;
            LOOP
                IF i = class_max_i THEN EXIT; END IF;
                classes[i][1] = classes[i][1] - move_elements;
                i := i - 1;
            END LOOP;

            i := class_max_i;
            LOOP
                IF i = class_min_i THEN EXIT; END IF;
                classes[i][2] = classes[i][2] - move_elements;
                i := i + 1;
            END LOOP;
        END IF;

        -- Recalculate avg and deviation ONLY for the affected classes
        i = LEAST(class_min_i, class_max_i);
        class_max_i = GREATEST(class_min_i, class_max_i);
        class_min_i = i;
        LOOP
            SELECT (sum(v * w) / sum(w)) INTO class_avg FROM (
                SELECT unnest(in_matrix[1:1][classes[i][1]:classes[i][2]]) as v,
                        unnest(in_matrix[2:2][classes[i][1]:classes[i][2]]) as w
                ) x;

            SELECT sum((class_avg - v)^2 * w) INTO class_dev FROM (
                SELECT unnest(in_matrix[1:1][classes[i][1]:classes[i][2]]) as v,
                        unnest(in_matrix[2:2][classes[i][1]:classes[i][2]]) as w
                ) x;

            -- Save status (in case it's needed for rollback) and store the new one
            best_avg[i] = arr_avg[i];
            arr_avg[i] = class_avg;

            best_gvf[i] = arr_gvf[i];
            arr_gvf[i] = class_dev;

            IF i = class_max_i THEN EXIT; END IF;
            i = i + 1;
        END LOOP;

        move_elements = move_elements * 2;

    END LOOP;

    i = 1;
    LOOP
        IF invert = TRUE THEN
            side = 1; --default returns bottom side of breaks, invert returns top side
        END IF;
        reply = array_append(reply, unnest(in_matrix[1:1][best_classes[i][side]:best_classes[i][side]]));
        i = i+1;
        IF i > breaks THEN  EXIT; END IF;
    END LOOP;

    reply = array_prepend(gvf, reply);
    RETURN reply;

END;
$$ LANGUAGE PLPGSQL IMMUTABLE PARALLEL SAFE;
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`--`
			`-- Determine the Jenks classifications from a numeric array`
			`--`
			`-- @param in_array A numeric array of numbers to determine the best`
			`-- bins based on the Jenks method.`
			`--`
			`-- @param breaks The number of bins you want to find.`
			`--`
			`-- @param iterations The number of different starting positions to test.`
			`--`
			`-- @param invert Optional wheter to return the top of each bin (default)`
			`-- or the bottom. BOOLEAN, default=FALSE.`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`--`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`--`

Fully qualify function calls 2019-05-31 21:29:28 +08:00			`CREATE OR REPLACE FUNCTION @extschema@.CDB_JenksBins(in_array NUMERIC[], breaks INT, iterations INT DEFAULT 0, invert BOOLEAN DEFAULT FALSE)`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`RETURNS NUMERIC[] as`
			`$$`
			`DECLARE`
			`in_matrix NUMERIC[][];`
			`in_unique_count BIGINT;`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`shuffles INT;`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`arr_mean NUMERIC;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`sdam NUMERIC;`

			`i INT;`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`bot INT;`
			`top INT;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`tops INT[];`
			`classes INT[][];`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`j INT := 1;`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`curr_result NUMERIC[];`
			`best_result NUMERIC[];`
			`seedtarget TEXT;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`BEGIN`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`-- We clean the input array (remove NULLs) and create 2 arrays`
			`-- [1] contains the unique values in in_array`
			`-- [2] contains the number of appearances of those unique values`
			`SELECT ARRAY[array_agg(value), array_agg(count)] FROM`
			`(`
			`SELECT value, count(1)::numeric as count`
			`FROM unnest(in_array) AS value`
			`WHERE value is NOT NULL`
			`GROUP BY value`
			`ORDER BY value`
			`) __clean_array_q INTO in_matrix;`

			`-- Get the number of unique values`
			`in_unique_count := array_length(in_matrix[1:1], 2);`

			`IF in_unique_count IS NULL THEN`
			`RETURN NULL;`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`END IF;`

Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`IF in_unique_count <= breaks THEN`
			`-- There isn't enough distinct values for the requested breaks`
			`RETURN ARRAY(Select unnest(in_matrix[1:1])) _a;`
			`END IF;`

			`-- If not declated explicitly we iterate based on the length of the array`
			`IF iterations < 1 THEN`
			`-- This is based on a 'looks fine' heuristic`
			`iterations := log(in_unique_count)::integer + 1;`
			`END IF;`

			`-- We set the number of shuffles per iteration as the number of unique values but`
			`-- this is just another 'looks fine' heuristic`
			`shuffles := in_unique_count;`

			`-- Get the mean value of the whole vector (already ignores NULLs)`
			`SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x;`

			`-- Calculate the sum of squared deviations from the array mean (SDAM).`
			`SELECT sum(((arr_mean - v)^2) * w) INTO sdam FROM (`
			`SELECT unnest(in_matrix[1:1]) as v, unnest(in_matrix[2:2]) as w`
			`) x;`

			`-- To start, we create ranges with approximately the same amount of different values`
			`top := 0;`
			`i := 1;`
			`LOOP`
			`bot := top + 1;`
			`top := ROUND(i * in_unique_count::numeric / breaks::NUMERIC);`

			`IF i = 1 THEN`
			`classes = ARRAY[ARRAY[bot,top]];`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`ELSE`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`classes = ARRAY_CAT(classes, ARRAY[bot,top]);`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`END IF;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00
			`i := i + 1;`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`IF i > breaks THEN EXIT; END IF;`
			`END LOOP;`

Fully qualify function calls 2019-05-31 21:29:28 +08:00			`best_result = @extschema@.CDB_JenksBinsIteration(in_matrix, breaks, classes, invert, sdam, shuffles);`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00
			`--set the seed so we can ensure the same results`
			`SELECT setseed(0.4567) INTO seedtarget;`
			`--loop through random starting positions`
			`LOOP`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`IF j > iterations-1 THEN EXIT; END IF;`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`i = 1;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`tops = ARRAY[in_unique_count];`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`LOOP`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`IF i = breaks THEN EXIT; END IF;`
			`SELECT array_agg(distinct e) INTO tops FROM (`
			`SELECT unnest(array_cat(tops, ARRAY[trunc(random() * in_unique_count::float8)::int + 1])) as e ORDER BY e`
			`) x;`
			`i = array_length(tops, 1);`
			`END LOOP;`
			`top := 0;`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`i = 1;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`LOOP`
			`bot := top + 1;`
			`top = tops[i];`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`IF i = 1 THEN`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`classes = ARRAY[ARRAY[bot,top]];`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`ELSE`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`classes = ARRAY_CAT(classes, ARRAY[bot,top]);`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`END IF;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00
			`i := i+1;`
			`IF i > breaks THEN EXIT; END IF;`
			`END LOOP;`

Fully qualify function calls 2019-05-31 21:29:28 +08:00			`curr_result = @extschema@.CDB_JenksBinsIteration(in_matrix, breaks, classes, invert, sdam, shuffles);`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00
			`IF curr_result[1] > best_result[1] THEN`
			`best_result = curr_result;`
			`END IF;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`j = j+1;`
			`END LOOP;`

			`RETURN (best_result)[2:array_upper(best_result, 1)];`
			`END;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`$$ LANGUAGE PLPGSQL IMMUTABLE PARALLEL RESTRICTED;`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00

			`--`
			`-- Perform a single iteration of the Jenks classification`
			`--`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`-- Returns an array with:`
			`-- - First element: gvf`
			`-- - Second to 2+n: Category limits`
Fully qualify function calls 2019-05-31 21:29:28 +08:00			`DROP FUNCTION IF EXISTS @extschema@.CDB_JenksBinsIteration ( in_matrix NUMERIC[], breaks INT, classes INT[], invert BOOLEAN, element_count INT4, arr_mean NUMERIC, max_search INT); -- Old signature`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00
Fully qualify function calls 2019-05-31 21:29:28 +08:00			`CREATE OR REPLACE FUNCTION @extschema@.CDB_JenksBinsIteration ( in_matrix NUMERIC[], breaks INT, classes INT[], invert BOOLEAN, sdam NUMERIC, max_search INT DEFAULT 50) RETURNS NUMERIC[] as $$`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`DECLARE`
			`i INT;`
			`iterations INT = 0;`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00
			`side INT := 2;`

Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`gvf numeric := 0.0;`
			`new_gvf numeric;`
			`arr_gvf numeric[];`
			`arr_avg numeric[];`
			`class_avg numeric;`
			`class_dev numeric;`

Jenks: Iterate increasing the number of moved elements exponentially 2018-09-12 01:22:48 +08:00			`class_max_i INT = 0;`
			`class_min_i INT = 0;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`dev_max numeric;`
			`dev_min numeric;`
Jenks: Iterate increasing the number of moved elements exponentially 2018-09-12 01:22:48 +08:00
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`best_classes INT[] = classes;`
Jenks: Iterate increasing the number of moved elements exponentially 2018-09-12 01:22:48 +08:00			`best_gvf numeric[];`
			`best_avg numeric[];`
			`move_elements INT = 1;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00
			`reply numeric[];`

			`BEGIN`

			`-- We fill the arrays with the initial values`
			`i = 0;`
			`LOOP`
			`IF i = breaks THEN EXIT; END IF;`
			`i = i + 1;`

			`-- Get class mean`
			`SELECT (sum(v * w) / sum(w)) INTO class_avg FROM (`
			`SELECT unnest(in_matrix[1:1][classes[i][1]:classes[i][2]]) as v,`
			`unnest(in_matrix[2:2][classes[i][1]:classes[i][2]]) as w`
			`) x;`

			`-- Get class deviation`
			`SELECT sum((class_avg - v)^2 * w) INTO class_dev FROM (`
			`SELECT unnest(in_matrix[1:1][classes[i][1]:classes[i][2]]) as v,`
			`unnest(in_matrix[2:2][classes[i][1]:classes[i][2]]) as w`
			`) x;`


			`IF i = 1 THEN`
			`arr_avg = ARRAY[class_avg];`
			`arr_gvf = ARRAY[class_dev];`
			`ELSE`
			`arr_avg = array_append(arr_avg, class_avg);`
			`arr_gvf = array_append(arr_gvf, class_dev);`
			`END IF;`
			`END LOOP;`

Jenks: Iterate increasing the number of moved elements exponentially 2018-09-12 01:22:48 +08:00			`-- We copy the values to avoid recalculation when a failure happens`
			`best_avg = arr_avg;`
			`best_gvf = arr_gvf;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00
			`iterations = 0;`
			`LOOP`
			`IF iterations = max_search THEN EXIT; END IF;`
			`iterations = iterations + 1;`

Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`-- calculate our new GVF`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`SELECT sdam - sum(e) INTO new_gvf FROM ( SELECT unnest(arr_gvf) as e ) x;`

Jenks: Iterate increasing the number of moved elements exponentially 2018-09-12 01:22:48 +08:00			`-- Check if any improvement was made`
			`IF new_gvf <= gvf THEN`
			`-- If we were moving too many elements, go back and move less`
			`IF move_elements <= 2 OR class_max_i = class_min_i THEN`
			`EXIT;`
			`END IF;`

			`move_elements = GREATEST(move_elements / 8, 1);`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00
Jenks: Iterate increasing the number of moved elements exponentially 2018-09-12 01:22:48 +08:00			`-- Rollback from saved statuses`
			`classes = best_classes;`
			`new_gvf = gvf;`

			`i = class_min_i;`
			`LOOP`
			`arr_avg[i] = best_avg[i];`
			`arr_gvf[i] = best_gvf[i];`

			`IF i = class_max_i THEN EXIT; END IF;`
			`i = i + 1;`
			`END LOOP;`
			`END IF;`

			`-- We search for the classes with the min and max deviation`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`i = 1;`
			`class_min_i = 1;`
			`class_max_i = 1;`
			`dev_max = arr_gvf[1];`
			`dev_min = arr_gvf[1];`
			`LOOP`
			`IF i = breaks THEN EXIT; END IF;`
			`i = i + 1;`

			`IF arr_gvf[i] < dev_min THEN`
			`dev_min = arr_gvf[i];`
			`class_min_i = i;`
			`ELSE`
			`IF arr_gvf[i] > dev_max THEN`
			`dev_max = arr_gvf[i];`
			`class_max_i = i;`
			`END IF;`
			`END IF;`
			`END LOOP;`


			`-- Save best values for comparison and output`
			`gvf = new_gvf;`
			`best_classes = classes;`

Jenks: Iterate increasing the number of moved elements exponentially 2018-09-12 01:22:48 +08:00			`-- Limit the moved elements as to not remove everything from class_max_i`
			`move_elements = LEAST(move_elements, classes[class_max_i][2] - classes[class_max_i][1]);`

			-- Move `move_elements` from class_max_i to class_min_i
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`IF class_min_i < class_max_i THEN`
			`i := class_min_i;`
			`LOOP`
			`IF i = class_max_i THEN EXIT; END IF;`
Jenks: Iterate increasing the number of moved elements exponentially 2018-09-12 01:22:48 +08:00			`classes[i][2] = classes[i][2] + move_elements;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`i := i + 1;`
			`END LOOP;`

			`i := class_max_i;`
			`LOOP`
			`IF i = class_min_i THEN EXIT; END IF;`
Jenks: Iterate increasing the number of moved elements exponentially 2018-09-12 01:22:48 +08:00			`classes[i][1] = classes[i][1] + move_elements;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`i := i - 1;`
			`END LOOP;`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`ELSE`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`i := class_min_i;`
			`LOOP`
			`IF i = class_max_i THEN EXIT; END IF;`
Jenks: Iterate increasing the number of moved elements exponentially 2018-09-12 01:22:48 +08:00			`classes[i][1] = classes[i][1] - move_elements;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`i := i - 1;`
			`END LOOP;`

			`i := class_max_i;`
			`LOOP`
			`IF i = class_min_i THEN EXIT; END IF;`
Jenks: Iterate increasing the number of moved elements exponentially 2018-09-12 01:22:48 +08:00			`classes[i][2] = classes[i][2] - move_elements;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`i := i + 1;`
			`END LOOP;`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`END IF;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00
Jenks: Iterate increasing the number of moved elements exponentially 2018-09-12 01:22:48 +08:00			`-- Recalculate avg and deviation ONLY for the affected classes`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`i = LEAST(class_min_i, class_max_i);`
			`class_max_i = GREATEST(class_min_i, class_max_i);`
Jenks: Iterate increasing the number of moved elements exponentially 2018-09-12 01:22:48 +08:00			`class_min_i = i;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`LOOP`
			`SELECT (sum(v * w) / sum(w)) INTO class_avg FROM (`
			`SELECT unnest(in_matrix[1:1][classes[i][1]:classes[i][2]]) as v,`
			`unnest(in_matrix[2:2][classes[i][1]:classes[i][2]]) as w`
			`) x;`

			`SELECT sum((class_avg - v)^2 * w) INTO class_dev FROM (`
			`SELECT unnest(in_matrix[1:1][classes[i][1]:classes[i][2]]) as v,`
			`unnest(in_matrix[2:2][classes[i][1]:classes[i][2]]) as w`
			`) x;`

Jenks: Iterate increasing the number of moved elements exponentially 2018-09-12 01:22:48 +08:00			`-- Save status (in case it's needed for rollback) and store the new one`
			`best_avg[i] = arr_avg[i];`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`arr_avg[i] = class_avg;`
Jenks: Iterate increasing the number of moved elements exponentially 2018-09-12 01:22:48 +08:00
			`best_gvf[i] = arr_gvf[i];`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`arr_gvf[i] = class_dev;`

			`IF i = class_max_i THEN EXIT; END IF;`
			`i = i + 1;`
			`END LOOP;`

Jenks: Iterate increasing the number of moved elements exponentially 2018-09-12 01:22:48 +08:00			`move_elements = move_elements * 2;`

Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`END LOOP;`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00
			`i = 1;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`LOOP`
Copy cartodb lib/sql scripts from CDB_CartodbfyTable branch 2014-05-05 23:13:06 +08:00			`IF invert = TRUE THEN`
			`side = 1; --default returns bottom side of breaks, invert returns top side`
			`END IF;`
Jenks: Fix multiple bugs 2018-09-10 19:16:54 +08:00			`reply = array_append(reply, unnest(in_matrix[1:1][best_classes[i][side]:best_classes[i][side]]));`
			`i = i+1;`
			`IF i > breaks THEN EXIT; END IF;`
			`END LOOP;`

			`reply = array_prepend(gvf, reply);`
			`RETURN reply;`

			`END;`
			`$$ LANGUAGE PLPGSQL IMMUTABLE PARALLEL SAFE;`