commit
c3e12036e7
50
doc/19_contour.md
Normal file
50
doc/19_contour.md
Normal file
@ -0,0 +1,50 @@
|
||||
## Contour maps
|
||||
|
||||
Function to generate a contour map from an scatter dataset of points, using one of three methos:
|
||||
|
||||
* [Nearest neighbor](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
|
||||
* [Barycentric](https://en.wikipedia.org/wiki/Barycentric_coordinate_system)
|
||||
* [IDW](https://en.wikipedia.org/wiki/Inverse_distance_weighting)
|
||||
|
||||
### CDB_Contour (geom geometry[], values numeric[], resolution integer, buffer numeric, method, classmethod integer, steps integer)
|
||||
|
||||
#### Arguments
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| geom | geometry[] | Array of points's geometries |
|
||||
| values | numeric[] | Array of points' values for the param under study|
|
||||
| buffer | numeric | Value between 0 and 1 for spatial buffer of the set of points
|
||||
| method | integer | 0:nearest neighbor, 1: barycentric, 2: IDW|
|
||||
| classmethod | integer | 0:equals, 1: heads&tails, 2:jenks, 3:quantiles |
|
||||
| steps | integer | Number of steps in the classification|
|
||||
| max_time | integer | Max time in millisecons for processing time
|
||||
|
||||
### Returns
|
||||
Returns a table object
|
||||
|
||||
| Name | Type | Description |
|
||||
|------|------|-------------|
|
||||
| the_geom | geometry | Geometries of the classified contour map|
|
||||
| avg_value | numeric | Avg value of the area|
|
||||
| min_value | numeric | Min value of the area|
|
||||
| max_value | numeric | Max value of the areal|
|
||||
| bin | integer | Index of the class of the area|
|
||||
|
||||
#### Example Usage
|
||||
|
||||
```sql
|
||||
WITH a AS (
|
||||
SELECT
|
||||
ARRAY[800, 700, 600, 500, 400, 300, 200, 100]::numeric[] AS vals,
|
||||
ARRAY[ST_GeomFromText('POINT(2.1744 41.403)',4326),ST_GeomFromText('POINT(2.1228 41.380)',4326),ST_GeomFromText('POINT(2.1511 41.374)',4326),ST_GeomFromText('POINT(2.1528 41.413)',4326),ST_GeomFromText('POINT(2.165 41.391)',4326),ST_GeomFromText('POINT(2.1498 41.371)',4326),ST_GeomFromText('POINT(2.1533 41.368)',4326),ST_GeomFromText('POINT(2.131386 41.41399)',4326)] AS g
|
||||
),
|
||||
b as(
|
||||
SELECT
|
||||
foo.*
|
||||
FROM
|
||||
a,
|
||||
cdb_crankshaft.CDB_contour(a.g, a.vals, 0.0, 1, 3, 5, 60) foo
|
||||
)
|
||||
SELECT bin, avg_value from b order by bin;
|
||||
```
|
205
src/pg/sql/19_contour.sql
Normal file
205
src/pg/sql/19_contour.sql
Normal file
@ -0,0 +1,205 @@
|
||||
CREATE OR REPLACE FUNCTION CDB_Contour(
|
||||
IN geomin geometry[],
|
||||
IN colin numeric[],
|
||||
IN buffer numeric,
|
||||
IN intmethod integer,
|
||||
IN classmethod integer,
|
||||
IN steps integer,
|
||||
IN max_time integer DEFAULT 60000
|
||||
)
|
||||
RETURNS TABLE(
|
||||
the_geom geometry,
|
||||
bin integer,
|
||||
min_value numeric,
|
||||
max_value numeric,
|
||||
avg_value numeric
|
||||
) AS $$
|
||||
DECLARE
|
||||
cell_count integer;
|
||||
tin geometry[];
|
||||
BEGIN
|
||||
-- calc the cell size in web mercator units
|
||||
-- WITH center as (
|
||||
-- SELECT ST_centroid(ST_Collect(geomin)) as c
|
||||
-- )
|
||||
-- SELECT
|
||||
-- round(resolution / cos(ST_y(c) * pi()/180))
|
||||
-- INTO cell
|
||||
-- FROM center;
|
||||
-- raise notice 'Resol: %', cell;
|
||||
|
||||
-- calc the optimal number of cells for the current dataset
|
||||
SELECT
|
||||
CASE intmethod
|
||||
WHEN 0 THEN round(3.7745903782 * max_time - 9.4399210051 * array_length(geomin,1) - 1350.8778213073)
|
||||
WHEN 1 THEN round(2.2855592156 * max_time - 87.285217133 * array_length(geomin,1) + 17255.7085601797)
|
||||
WHEN 2 THEN round(0.9799471999 * max_time - 127.0334085369 * array_length(geomin,1) + 22707.9579721218)
|
||||
ELSE 10000
|
||||
END INTO cell_count;
|
||||
|
||||
-- we don't have iterative barycentric interpolation in CDB_interpolation,
|
||||
-- and it's a costy function, so let's make a custom one here till
|
||||
-- we update the code
|
||||
-- tin := ARRAY[]::geometry[];
|
||||
IF intmethod=1 THEN
|
||||
WITH
|
||||
a as (SELECT unnest(geomin) AS e),
|
||||
b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a),
|
||||
c as (SELECT (ST_Dump(t)).geom as v FROM b)
|
||||
SELECT array_agg(v) INTO tin FROM c;
|
||||
END IF;
|
||||
-- Delaunay stuff performed just ONCE!!
|
||||
|
||||
-- magic
|
||||
RETURN QUERY
|
||||
WITH
|
||||
convexhull as (
|
||||
SELECT
|
||||
ST_ConvexHull(ST_Collect(geomin)) as g,
|
||||
buffer * |/ st_area(ST_ConvexHull(ST_Collect(geomin)))/PI() as r
|
||||
),
|
||||
envelope as (
|
||||
SELECT
|
||||
st_expand(a.g, a.r) as e
|
||||
FROM convexhull a
|
||||
),
|
||||
envelope3857 as(
|
||||
SELECT
|
||||
ST_Transform(e, 3857) as geom
|
||||
FROM envelope
|
||||
),
|
||||
resolution as(
|
||||
SELECT
|
||||
round(|/ (
|
||||
ST_area(geom) / cell_count
|
||||
)) as cell
|
||||
FROM envelope3857
|
||||
),
|
||||
grid as(
|
||||
SELECT
|
||||
ST_Transform(cdb_crankshaft.CDB_RectangleGrid(e.geom, r.cell, r.cell), 4326) as geom
|
||||
FROM envelope3857 e, resolution r
|
||||
),
|
||||
interp as(
|
||||
SELECT
|
||||
geom,
|
||||
CASE
|
||||
WHEN intmethod=1 THEN cdb_crankshaft._interp_in_tin(geomin, colin, tin, ST_Centroid(geom))
|
||||
ELSE cdb_crankshaft.CDB_SpatialInterpolation(geomin, colin, ST_Centroid(geom), intmethod)
|
||||
END as val
|
||||
FROM grid
|
||||
),
|
||||
classes as(
|
||||
SELECT CASE
|
||||
WHEN classmethod = 0 THEN
|
||||
cdb_crankshaft.CDB_EqualIntervalBins(array_agg(val), steps)
|
||||
WHEN classmethod = 1 THEN
|
||||
cdb_crankshaft.CDB_HeadsTailsBins(array_agg(val), steps)
|
||||
WHEN classmethod = 2 THEN
|
||||
cdb_crankshaft.CDB_JenksBins(array_agg(val), steps)
|
||||
ELSE
|
||||
cdb_crankshaft.CDB_QuantileBins(array_agg(val), steps)
|
||||
END as b
|
||||
FROM interp
|
||||
where val is not null
|
||||
),
|
||||
classified as(
|
||||
SELECT
|
||||
i.*,
|
||||
width_bucket(i.val, c.b) as bucket
|
||||
FROM interp i left join classes c
|
||||
ON 1=1
|
||||
),
|
||||
classified2 as(
|
||||
SELECT
|
||||
geom,
|
||||
val,
|
||||
CASE
|
||||
WHEN bucket = steps THEN bucket - 1
|
||||
ELSE bucket
|
||||
END as b
|
||||
FROM classified
|
||||
),
|
||||
final as(
|
||||
SELECT
|
||||
st_union(geom) as the_geom,
|
||||
b as bin,
|
||||
min(val) as min_value,
|
||||
max(val) as max_value,
|
||||
avg(val) as avg_value
|
||||
FROM classified2
|
||||
GROUP BY bin
|
||||
)
|
||||
SELECT
|
||||
*
|
||||
FROM final
|
||||
where final.bin is not null
|
||||
;
|
||||
END;
|
||||
$$ language plpgsql;
|
||||
|
||||
|
||||
|
||||
-- =====================================================================
|
||||
-- Interp in grid, so we can use barycentric with a precalculated tin (NNI)
|
||||
-- =====================================================================
|
||||
CREATE OR REPLACE FUNCTION _interp_in_tin(
|
||||
IN geomin geometry[],
|
||||
IN colin numeric[],
|
||||
IN tin geometry[],
|
||||
IN point geometry
|
||||
)
|
||||
RETURNS numeric AS
|
||||
$$
|
||||
DECLARE
|
||||
g geometry;
|
||||
vertex geometry[];
|
||||
sg numeric;
|
||||
sa numeric;
|
||||
sb numeric;
|
||||
sc numeric;
|
||||
va numeric;
|
||||
vb numeric;
|
||||
vc numeric;
|
||||
output numeric;
|
||||
BEGIN
|
||||
-- get the cell the point is within
|
||||
WITH
|
||||
a as (SELECT unnest(tin) as v),
|
||||
b as (SELECT v FROM a WHERE ST_Within(point, v))
|
||||
SELECT v INTO g FROM b;
|
||||
|
||||
-- if we're out of the data realm,
|
||||
-- return null
|
||||
IF g is null THEN
|
||||
RETURN null;
|
||||
END IF;
|
||||
|
||||
-- vertex of the selected cell
|
||||
WITH a AS (
|
||||
SELECT (ST_DumpPoints(g)).geom AS v
|
||||
)
|
||||
SELECT array_agg(v) INTO vertex FROM a;
|
||||
|
||||
-- retrieve the value of each vertex
|
||||
WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c)
|
||||
SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]);
|
||||
|
||||
WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c)
|
||||
SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]);
|
||||
|
||||
WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c)
|
||||
SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]);
|
||||
|
||||
-- calc the areas
|
||||
SELECT
|
||||
ST_area(g),
|
||||
ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))),
|
||||
ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))),
|
||||
ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc;
|
||||
|
||||
output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg,1);
|
||||
RETURN output;
|
||||
END;
|
||||
$$
|
||||
language plpgsql;
|
447
src/pg/sql/cdb_utils.sql
Normal file
447
src/pg/sql/cdb_utils.sql
Normal file
@ -0,0 +1,447 @@
|
||||
--
|
||||
-- Fill given extent with a rectangular coverage
|
||||
--
|
||||
-- @param ext Extent to fill. Only rectangles with center point falling
|
||||
-- inside the extent (or at the lower or leftmost edge) will
|
||||
-- be emitted. The returned hexagons will have the same SRID
|
||||
-- as this extent.
|
||||
--
|
||||
-- @param width With of each rectangle
|
||||
--
|
||||
-- @param height Height of each rectangle
|
||||
--
|
||||
-- @param origin Optional origin to allow for exact tiling.
|
||||
-- If omitted the origin will be 0,0.
|
||||
-- The parameter is checked for having the same SRID
|
||||
-- as the extent.
|
||||
--
|
||||
--
|
||||
CREATE OR REPLACE FUNCTION CDB_RectangleGrid(ext GEOMETRY, width FLOAT8, height FLOAT8, origin GEOMETRY DEFAULT NULL)
|
||||
RETURNS SETOF GEOMETRY
|
||||
AS $$
|
||||
DECLARE
|
||||
h GEOMETRY; -- rectangle cell
|
||||
hstep FLOAT8; -- horizontal step
|
||||
vstep FLOAT8; -- vertical step
|
||||
hw FLOAT8; -- half width
|
||||
hh FLOAT8; -- half height
|
||||
vstart FLOAT8;
|
||||
hstart FLOAT8;
|
||||
hend FLOAT8;
|
||||
vend FLOAT8;
|
||||
xoff FLOAT8;
|
||||
yoff FLOAT8;
|
||||
xgrd FLOAT8;
|
||||
ygrd FLOAT8;
|
||||
x FLOAT8;
|
||||
y FLOAT8;
|
||||
srid INTEGER;
|
||||
BEGIN
|
||||
|
||||
srid := ST_SRID(ext);
|
||||
|
||||
xoff := 0;
|
||||
yoff := 0;
|
||||
|
||||
IF origin IS NOT NULL THEN
|
||||
IF ST_SRID(origin) != srid THEN
|
||||
RAISE EXCEPTION 'SRID mismatch between extent (%) and origin (%)', srid, ST_SRID(origin);
|
||||
END IF;
|
||||
xoff := ST_X(origin);
|
||||
yoff := ST_Y(origin);
|
||||
END IF;
|
||||
|
||||
--RAISE DEBUG 'X offset: %', xoff;
|
||||
--RAISE DEBUG 'Y offset: %', yoff;
|
||||
|
||||
hw := width/2.0;
|
||||
hh := height/2.0;
|
||||
|
||||
xgrd := hw;
|
||||
ygrd := hh;
|
||||
--RAISE DEBUG 'X grid size: %', xgrd;
|
||||
--RAISE DEBUG 'Y grid size: %', ygrd;
|
||||
|
||||
hstep := width;
|
||||
vstep := height;
|
||||
|
||||
-- Tweak horizontal start on hstep grid from origin
|
||||
hstart := xoff + ceil((ST_XMin(ext)-xoff)/hstep)*hstep;
|
||||
--RAISE DEBUG 'hstart: %', hstart;
|
||||
|
||||
-- Tweak vertical start on vstep grid from origin
|
||||
vstart := yoff + ceil((ST_Ymin(ext)-yoff)/vstep)*vstep;
|
||||
--RAISE DEBUG 'vstart: %', vstart;
|
||||
|
||||
hend := ST_XMax(ext);
|
||||
vend := ST_YMax(ext);
|
||||
|
||||
--RAISE DEBUG 'hend: %', hend;
|
||||
--RAISE DEBUG 'vend: %', vend;
|
||||
|
||||
x := hstart;
|
||||
WHILE x < hend LOOP -- over X
|
||||
y := vstart;
|
||||
h := ST_MakeEnvelope(x-hw, y-hh, x+hw, y+hh, srid);
|
||||
WHILE y < vend LOOP -- over Y
|
||||
RETURN NEXT h;
|
||||
h := ST_Translate(h, 0, vstep);
|
||||
y := yoff + round(((y + vstep)-yoff)/ygrd)*ygrd; -- round to grid
|
||||
END LOOP;
|
||||
x := xoff + round(((x + hstep)-xoff)/xgrd)*xgrd; -- round to grid
|
||||
END LOOP;
|
||||
|
||||
RETURN;
|
||||
END
|
||||
$$ LANGUAGE 'plpgsql' IMMUTABLE;
|
||||
|
||||
--
|
||||
-- Calculate the equal interval bins for a given column
|
||||
--
|
||||
-- @param in_array A numeric array of numbers to determine the best
|
||||
-- to determine the bin boundary
|
||||
--
|
||||
-- @param breaks The number of bins you want to find.
|
||||
--
|
||||
--
|
||||
-- Returns: upper edges of bins
|
||||
--
|
||||
--
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$
|
||||
DECLARE
|
||||
diff numeric;
|
||||
min_val numeric;
|
||||
max_val numeric;
|
||||
tmp_val numeric;
|
||||
i INT := 1;
|
||||
reply numeric[];
|
||||
BEGIN
|
||||
SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL;
|
||||
diff = (max_val - min_val) / breaks::numeric;
|
||||
LOOP
|
||||
IF i < breaks THEN
|
||||
tmp_val = min_val + i::numeric * diff;
|
||||
reply = array_append(reply, tmp_val);
|
||||
i := i+1;
|
||||
ELSE
|
||||
reply = array_append(reply, max_val);
|
||||
EXIT;
|
||||
END IF;
|
||||
END LOOP;
|
||||
RETURN reply;
|
||||
END;
|
||||
$$ language plpgsql IMMUTABLE;
|
||||
|
||||
--
|
||||
-- Determine the Heads/Tails classifications from a numeric array
|
||||
--
|
||||
-- @param in_array A numeric array of numbers to determine the best
|
||||
-- bins based on the Heads/Tails method.
|
||||
--
|
||||
-- @param breaks The number of bins you want to find.
|
||||
--
|
||||
--
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_HeadsTailsBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$
|
||||
DECLARE
|
||||
element_count INT4;
|
||||
arr_mean numeric;
|
||||
i INT := 2;
|
||||
reply numeric[];
|
||||
BEGIN
|
||||
-- get the total size of our row
|
||||
element_count := array_upper(in_array, 1) - array_lower(in_array, 1);
|
||||
-- ensure the ordering of in_array
|
||||
SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x;
|
||||
-- stop if no rows
|
||||
IF element_count IS NULL THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
-- stop if our breaks are more than our input array size
|
||||
IF element_count < breaks THEN
|
||||
RETURN in_array;
|
||||
END IF;
|
||||
|
||||
-- get our mean value
|
||||
SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x;
|
||||
|
||||
reply = Array[arr_mean];
|
||||
-- slice our bread
|
||||
LOOP
|
||||
IF i > breaks THEN EXIT; END IF;
|
||||
SELECT avg(e) INTO arr_mean FROM ( SELECT unnest(in_array) e) x WHERE e > reply[i-1];
|
||||
IF arr_mean IS NOT NULL THEN
|
||||
reply = array_append(reply, arr_mean);
|
||||
END IF;
|
||||
i := i+1;
|
||||
END LOOP;
|
||||
RETURN reply;
|
||||
END;
|
||||
$$ language plpgsql IMMUTABLE;
|
||||
|
||||
--
|
||||
-- Determine the Jenks classifications from a numeric array
|
||||
--
|
||||
-- @param in_array A numeric array of numbers to determine the best
|
||||
-- bins based on the Jenks method.
|
||||
--
|
||||
-- @param breaks The number of bins you want to find.
|
||||
--
|
||||
-- @param iterations The number of different starting positions to test.
|
||||
--
|
||||
-- @param invert Optional wheter to return the top of each bin (default)
|
||||
-- or the bottom. BOOLEAN, default=FALSE.
|
||||
--
|
||||
--
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_JenksBins ( in_array NUMERIC[], breaks INT, iterations INT DEFAULT 5, invert BOOLEAN DEFAULT FALSE) RETURNS NUMERIC[] as $$
|
||||
DECLARE
|
||||
element_count INT4;
|
||||
arr_mean NUMERIC;
|
||||
bot INT;
|
||||
top INT;
|
||||
tops INT[];
|
||||
classes INT[][];
|
||||
i INT := 1; j INT := 1;
|
||||
curr_result NUMERIC[];
|
||||
best_result NUMERIC[];
|
||||
seedtarget TEXT;
|
||||
quant NUMERIC[];
|
||||
shuffles INT;
|
||||
BEGIN
|
||||
-- get the total size of our row
|
||||
element_count := array_length(in_array, 1); --array_upper(in_array, 1) - array_lower(in_array, 1);
|
||||
-- ensure the ordering of in_array
|
||||
SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x;
|
||||
-- stop if no rows
|
||||
IF element_count IS NULL THEN
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
-- stop if our breaks are more than our input array size
|
||||
IF element_count < breaks THEN
|
||||
RETURN in_array;
|
||||
END IF;
|
||||
|
||||
shuffles := LEAST(GREATEST(floor(2500000.0/(element_count::float*iterations::float)), 1), 750)::int;
|
||||
-- get our mean value
|
||||
SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x;
|
||||
|
||||
-- assume best is actually Quantile
|
||||
SELECT cdb_crankshaft.CDB_QuantileBins(in_array, breaks) INTO quant;
|
||||
|
||||
-- if data is very very large, just return quant and be done
|
||||
IF element_count > 5000000 THEN
|
||||
RETURN quant;
|
||||
END IF;
|
||||
|
||||
-- change quant into bottom, top markers
|
||||
LOOP
|
||||
IF i = 1 THEN
|
||||
bot = 1;
|
||||
ELSE
|
||||
-- use last top to find this bot
|
||||
bot = top+1;
|
||||
END IF;
|
||||
IF i = breaks THEN
|
||||
top = element_count;
|
||||
ELSE
|
||||
SELECT count(*) INTO top FROM ( SELECT unnest(in_array) as v) x WHERE v <= quant[i];
|
||||
END IF;
|
||||
IF i = 1 THEN
|
||||
classes = ARRAY[ARRAY[bot,top]];
|
||||
ELSE
|
||||
classes = ARRAY_CAT(classes,ARRAY[bot,top]);
|
||||
END IF;
|
||||
IF i > breaks THEN EXIT; END IF;
|
||||
i = i+1;
|
||||
END LOOP;
|
||||
|
||||
best_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles);
|
||||
|
||||
--set the seed so we can ensure the same results
|
||||
SELECT setseed(0.4567) INTO seedtarget;
|
||||
--loop through random starting positions
|
||||
LOOP
|
||||
IF j > iterations-1 THEN EXIT; END IF;
|
||||
i = 1;
|
||||
tops = ARRAY[element_count];
|
||||
LOOP
|
||||
IF i = breaks THEN EXIT; END IF;
|
||||
SELECT array_agg(distinct e) INTO tops FROM (SELECT unnest(array_cat(tops, ARRAY[floor(random()*element_count::float)::int])) as e ORDER BY e) x WHERE e != 1;
|
||||
i = array_length(tops, 1);
|
||||
END LOOP;
|
||||
i = 1;
|
||||
LOOP
|
||||
IF i > breaks THEN EXIT; END IF;
|
||||
IF i = 1 THEN
|
||||
bot = 1;
|
||||
ELSE
|
||||
bot = top+1;
|
||||
END IF;
|
||||
top = tops[i];
|
||||
IF i = 1 THEN
|
||||
classes = ARRAY[ARRAY[bot,top]];
|
||||
ELSE
|
||||
classes = ARRAY_CAT(classes,ARRAY[bot,top]);
|
||||
END IF;
|
||||
i := i+1;
|
||||
END LOOP;
|
||||
curr_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles);
|
||||
|
||||
IF curr_result[1] > best_result[1] THEN
|
||||
best_result = curr_result;
|
||||
j = j-1; -- if we found a better result, add one more search
|
||||
END IF;
|
||||
j = j+1;
|
||||
END LOOP;
|
||||
|
||||
RETURN (best_result)[2:array_upper(best_result, 1)];
|
||||
END;
|
||||
$$ language plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
|
||||
--
|
||||
-- Perform a single iteration of the Jenks classification
|
||||
--
|
||||
|
||||
CREATE OR REPLACE FUNCTION CDB_JenksBinsIteration ( in_array NUMERIC[], breaks INT, classes INT[][], invert BOOLEAN, element_count INT4, arr_mean NUMERIC, max_search INT DEFAULT 50) RETURNS NUMERIC[] as $$
|
||||
DECLARE
|
||||
tmp_val numeric;
|
||||
new_classes int[][];
|
||||
tmp_class int[];
|
||||
i INT := 1;
|
||||
j INT := 1;
|
||||
side INT := 2;
|
||||
sdam numeric;
|
||||
gvf numeric := 0.0;
|
||||
new_gvf numeric;
|
||||
arr_gvf numeric[];
|
||||
class_avg numeric;
|
||||
class_max_i INT;
|
||||
class_min_i INT;
|
||||
class_max numeric;
|
||||
class_min numeric;
|
||||
reply numeric[];
|
||||
BEGIN
|
||||
|
||||
-- Calculate the sum of squared deviations from the array mean (SDAM).
|
||||
SELECT sum((arr_mean - e)^2) INTO sdam FROM ( SELECT unnest(in_array) as e ) x;
|
||||
--Identify the breaks for the lowest GVF
|
||||
LOOP
|
||||
i = 1;
|
||||
LOOP
|
||||
-- get our mean
|
||||
SELECT avg(e) INTO class_avg FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e) x;
|
||||
-- find the deviation
|
||||
SELECT sum((class_avg-e)^2) INTO tmp_val FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e ) x;
|
||||
IF i = 1 THEN
|
||||
arr_gvf = ARRAY[tmp_val];
|
||||
-- init our min/max map for later
|
||||
class_max = arr_gvf[i];
|
||||
class_min = arr_gvf[i];
|
||||
class_min_i = 1;
|
||||
class_max_i = 1;
|
||||
ELSE
|
||||
arr_gvf = array_append(arr_gvf, tmp_val);
|
||||
END IF;
|
||||
i := i+1;
|
||||
IF i > breaks THEN EXIT; END IF;
|
||||
END LOOP;
|
||||
-- calculate our new GVF
|
||||
SELECT sdam-sum(e) INTO new_gvf FROM ( SELECT unnest(arr_gvf) as e ) x;
|
||||
-- if no improvement was made, exit
|
||||
IF new_gvf < gvf THEN EXIT; END IF;
|
||||
gvf = new_gvf;
|
||||
IF j > max_search THEN EXIT; END IF;
|
||||
j = j+1;
|
||||
i = 1;
|
||||
LOOP
|
||||
--establish directionality (uppward through classes or downward)
|
||||
IF arr_gvf[i] < class_min THEN
|
||||
class_min = arr_gvf[i];
|
||||
class_min_i = i;
|
||||
END IF;
|
||||
IF arr_gvf[i] > class_max THEN
|
||||
class_max = arr_gvf[i];
|
||||
class_max_i = i;
|
||||
END IF;
|
||||
i := i+1;
|
||||
IF i > breaks THEN EXIT; END IF;
|
||||
END LOOP;
|
||||
IF class_max_i > class_min_i THEN
|
||||
class_min_i = class_max_i - 1;
|
||||
ELSE
|
||||
class_min_i = class_max_i + 1;
|
||||
END IF;
|
||||
--Move from higher class to a lower gid order
|
||||
IF class_max_i > class_min_i THEN
|
||||
classes[class_max_i][1] = classes[class_max_i][1] + 1;
|
||||
classes[class_min_i][2] = classes[class_min_i][2] + 1;
|
||||
ELSE -- Move from lower class UP into a higher class by gid
|
||||
classes[class_max_i][2] = classes[class_max_i][2] - 1;
|
||||
classes[class_min_i][1] = classes[class_min_i][1] - 1;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
i = 1;
|
||||
LOOP
|
||||
IF invert = TRUE THEN
|
||||
side = 1; --default returns bottom side of breaks, invert returns top side
|
||||
END IF;
|
||||
reply = array_append(reply, in_array[classes[i][side]]);
|
||||
i = i+1;
|
||||
IF i > breaks THEN EXIT; END IF;
|
||||
END LOOP;
|
||||
|
||||
RETURN array_prepend(gvf, reply);
|
||||
|
||||
END;
|
||||
$$ language plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
--
|
||||
-- Determine the Quantile classifications from a numeric array
|
||||
--
|
||||
-- @param in_array A numeric array of numbers to determine the best
|
||||
-- bins based on the Quantile method.
|
||||
--
|
||||
-- @param breaks The number of bins you want to find.
|
||||
--
|
||||
--
|
||||
CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$
|
||||
DECLARE
|
||||
element_count INT4;
|
||||
break_size numeric;
|
||||
tmp_val numeric;
|
||||
i INT := 1;
|
||||
reply numeric[];
|
||||
BEGIN
|
||||
-- sort our values
|
||||
SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x;
|
||||
-- get the total size of our data
|
||||
element_count := array_length(in_array, 1);
|
||||
break_size := element_count::numeric / breaks;
|
||||
-- slice our bread
|
||||
LOOP
|
||||
IF i < breaks THEN
|
||||
IF break_size * i % 1 > 0 THEN
|
||||
SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x;
|
||||
ELSE
|
||||
SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x;
|
||||
END IF;
|
||||
ELSIF i = breaks THEN
|
||||
-- select the last value
|
||||
SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x;
|
||||
ELSE
|
||||
EXIT;
|
||||
END IF;
|
||||
|
||||
reply = array_append(reply, tmp_val);
|
||||
i := i+1;
|
||||
END LOOP;
|
||||
RETURN reply;
|
||||
END;
|
||||
$$ language plpgsql IMMUTABLE;
|
9
src/pg/test/expected/19_contour_test.out
Normal file
9
src/pg/test/expected/19_contour_test.out
Normal file
@ -0,0 +1,9 @@
|
||||
SET client_min_messages TO WARNING;
|
||||
\set ECHO none
|
||||
bin|avg_value
|
||||
0|280.23070673030491816424178
|
||||
1|413.81702914846213479025305
|
||||
2|479.6334491374486884098328
|
||||
3|529.1545236882183479447113
|
||||
4|614.1132081424930103122037
|
||||
(5 rows)
|
17
src/pg/test/sql/19_contour_test.sql
Normal file
17
src/pg/test/sql/19_contour_test.sql
Normal file
@ -0,0 +1,17 @@
|
||||
SET client_min_messages TO WARNING;
|
||||
\set ECHO none
|
||||
\pset format unaligned
|
||||
|
||||
WITH a AS (
|
||||
SELECT
|
||||
ARRAY[800, 700, 600, 500, 400, 300, 200, 100]::numeric[] AS vals,
|
||||
ARRAY[ST_GeomFromText('POINT(2.1744 41.403)',4326),ST_GeomFromText('POINT(2.1228 41.380)',4326),ST_GeomFromText('POINT(2.1511 41.374)',4326),ST_GeomFromText('POINT(2.1528 41.413)',4326),ST_GeomFromText('POINT(2.165 41.391)',4326),ST_GeomFromText('POINT(2.1498 41.371)',4326),ST_GeomFromText('POINT(2.1533 41.368)',4326),ST_GeomFromText('POINT(2.131386 41.41399)',4326)] AS g
|
||||
),
|
||||
b as(
|
||||
SELECT
|
||||
foo.*
|
||||
FROM
|
||||
a,
|
||||
cdb_crankshaft.CDB_contour(a.g, a.vals, 0.0, 1, 3, 5, 60) foo
|
||||
)
|
||||
SELECT bin, avg_value from b order by bin;
|
Loading…
Reference in New Issue
Block a user