From cb3a49594d75db1c01fb7e1ce4b519f8936c7445 Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 12 Jul 2016 15:48:38 +0200 Subject: [PATCH 01/62] add PIA, first commit --- doc/09_voronoi.md | 35 ---- doc/13_PIA.md | 30 ++++ src/pg/sql/09_voronoi.sql | 205 ----------------------- src/pg/sql/13_PIA.sql | 125 ++++++++++++++ src/pg/test/expected/09_voronoi_test.out | 5 - src/pg/test/expected/13_pia_test.out | 5 + src/pg/test/sql/09_voronoi_test.sql | 7 - src/pg/test/sql/13_pia_test.sql | 4 + 8 files changed, 164 insertions(+), 252 deletions(-) delete mode 100644 doc/09_voronoi.md create mode 100644 doc/13_PIA.md delete mode 100644 src/pg/sql/09_voronoi.sql create mode 100644 src/pg/sql/13_PIA.sql delete mode 100644 src/pg/test/expected/09_voronoi_test.out create mode 100644 src/pg/test/expected/13_pia_test.out delete mode 100644 src/pg/test/sql/09_voronoi_test.sql create mode 100644 src/pg/test/sql/13_pia_test.sql diff --git a/doc/09_voronoi.md b/doc/09_voronoi.md deleted file mode 100644 index 331f9ea..0000000 --- a/doc/09_voronoi.md +++ /dev/null @@ -1,35 +0,0 @@ -## Spacial interpolation - -Function to construct the [Voronoi Diagram](https://en.wikipedia.org/wiki/Voronoi_diagram) from a dataset of scatter points, clipped to the significant area - -PostGIS wil include this in future versions ([doc for dev branch](http://postgis.net/docs/manual-dev/ST_Voronoi.html)) and will perform faster for sure, but in the meantime... - - -### CDB_Voronoi (geom geometry[], buffer numeric DEFAULT 0.5, tolerance numeric DEFAULT 1e-9) - -#### Arguments - -| Name | Type | Description | -|------|------|-------------| -| geom | geometry[] | Array of points's geometries | -| buffer | numeric | enlargment ratio for the envelope area used for the restraints| -| tolerance | geometry | The target point to calc the value | - -### Returns - -| Column Name | Type | Description | -|-------------|------|-------------| -| geom | geometry collection | Collection of polygons of the Voronoi cells| - - -#### Example Usage - -```sql -WITH a AS ( - SELECT - ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS geomin -) -SELECT - CDB_voronoi(geomin, 0.2, 1e-9) as result -FROM a; -``` diff --git a/doc/13_PIA.md b/doc/13_PIA.md new file mode 100644 index 0000000..35b980b --- /dev/null +++ b/doc/13_PIA.md @@ -0,0 +1,30 @@ +## Pole of inaccessibility (PIA) + +Function to find the [PIA](https://en.wikipedia.org/wiki/Pole_of_inaccessibility) from a given polygon and tolerance, following the quadtree approach by [Vladimir Agafonkin](https://github.com/mourner) described [here](https://github.com/mapbox/polylabel) + + + +### CDB_PIA (polygon geometry, tolerance numeric DEFAULT 1.0) + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| polygon | geometry | Target polygon | +| tolerance | numeric | Threshold to decide to take a cell into account | + +### Returns + +| Column Name | Type | Description | +|-------------|------|-------------| +| point | geometry| Pole of inaccessibility | + + +#### Example Usage + +```sql +with a as( + select st_geomfromtext('POLYGON((-432540.453078056 4949775.20452642,-432329.947920966 4951361.232584,-431245.028163694 4952223.31516671,-429131.071033529 4951768.00415574,-424622.07505895 4952843.13503987,-423688.327170174 4953499.20752423,-424086.294349759 4954968.38274191,-423068.388925945 4954378.63345336,-423387.653225542 4953355.67417084,-420594.869840519 4953781.00230592,-416026.095299382 4951484.06849063,-412483.018546414 4951024.5410983,-410490.399661215 4954502.24032205,-408186.197521284 4956398.91417441,-407627.262358013 4959300.94633864,-406948.770061627 4959874.85407739,-404949.583326472 4959047.74518163,-402570.908447199 4953743.46829807,-400971.358683991 4952193.11680804,-403533.488084088 4949649.89857885,-406335.177028373 4950193.19571096,-407790.456731515 4952391.46015616,-412060.672398345 4950381.2389307,-410716.93482498 4949156.7509561,-408464.162289794 4943912.8940387,-409350.599394983 4942819.84896006,-408087.791091424 4942451.6711778,-407274.045613725 4940572.4807777,-404446.196589102 4939976.71501489,-402422.964843936 4940450.3670813,-401010.654464241 4939054.8061663,-397647.247369412 4940679.80737878,-395658.413346901 4940528.84765185,-395536.852462953 4938829.79565997,-394268.923462818 4938003.7277717,-393388.720249116 4934757.80596815,-392393.301362444 4934326.71675815,-392573.527618037 4932323.40974412,-393464.640141837 4931903.10653605,-393085.597275686 4931094.7353605,-398426.261165985 4929156.87541607,-398261.174361137 4926238.00816416,-394045.059966834 4925765.18668498,-392982.960705174 4926391.81893628,-393090.272694301 4927176.84692181,-391648.240010564 4924626.06386961,-391889.914625075 4923086.14787613,-394345.177314013 4923235.086036,-395550.878718795 4917812.79243978,-399009.463978251 4912927.7157945,-398948.794855767 4911941.91010796,-398092.636652078 4911806.57392519,-401991.601817112 4911722.9204501,-406225.972607907 4914505.47286319,-411104.994569885 4912569.26941163,-412925.513522316 4913030.3608866,-414630.148884835 4914436.69169949,-414207.691417276 4919205.78028405,-418306.141109809 4917994.9580478,-424184.700779621 4918938.12432889,-426816.961458921 4923664.37379373,-420956.324227126 4923381.98014807,-420186.661267781 4924286.48693378,-420943.411166194 4926812.76394433,-419779.45457046 4928527.43466337,-419768.767899344 4930681.94459216,-421911.668097113 4930432.40620397,-423482.386112205 4933451.28047252,-427272.814773717 4934151.56473242,-427144.908678797 4939731.77191996,-428982.125554848 4940522.84445172,-428986.133056516 4942437.17281266,-431237.792396792 4947309.68284815,-432476.889648814 4947791.74800037,-432540.453078056 4949775.20452642))', 3857) as g +) +SELECT st_astext(CDB_PIA(g)) from a; +``` diff --git a/src/pg/sql/09_voronoi.sql b/src/pg/sql/09_voronoi.sql deleted file mode 100644 index 51d22d4..0000000 --- a/src/pg/sql/09_voronoi.sql +++ /dev/null @@ -1,205 +0,0 @@ -CREATE OR REPLACE FUNCTION CDB_voronoi( - IN geomin geometry[], - IN buffer numeric DEFAULT 0.5, - IN tolerance numeric DEFAULT 1e-9 - ) -RETURNS geometry AS $$ -DECLARE - geomout geometry; -BEGIN - WITH - convexhull_1 as ( - SELECT - ST_ConvexHull(ST_Collect(geomin)) as g, - buffer * |/ st_area(ST_ConvexHull(ST_Collect(geomin))) as r - ), - clipper as( - SELECT - st_buffer(ST_MinimumBoundingCircle(a.g), buffer*a.r) as g - FROM convexhull_1 a - ), - env0 as ( - SELECT - (st_dumppoints(st_expand(a.g, buffer*a.r))).geom as e - FROM convexhull_1 a - ), - env as ( - SELECT - array_agg(env0.e) as e - FROM env0 - ), - sample AS ( - SELECT - ST_Collect(geomin || env.e) as geom - FROM env - ), - convexhull as ( - SELECT - ST_ConvexHull(ST_Collect(geomin)) as cg - ), - tin as ( - SELECT - ST_Dump(ST_DelaunayTriangles(geom, tolerance, 0)) as gd - FROM - sample - ), - tin_polygons as ( - SELECT - (gd).Path as id, - (gd).Geom as pg, - ST_Centroid(ST_MinimumBoundingCircle((gd).Geom, 180)) as ct - FROM tin - ), - tin_lines as ( - SELECT - id, - ST_ExteriorRing(pg) as lg - FROM tin_polygons - ), - tin_nodes as ( - SELECT - id, - ST_PointN(lg,1) p1, - ST_PointN(lg,2) p2, - ST_PointN(lg,3) p3 - FROM tin_lines - ), - tin_edges AS ( - SELECT - p.id, - UNNEST(ARRAY[ - ST_MakeLine(n.p1,n.p2) , - ST_MakeLine(n.p2,n.p3) , - ST_MakeLine(n.p3,n.p1)]) as Edge, - ST_Force_2D(_Find_Circle(n.p1,n.p2,n.p3)) as ct , - CASE WHEN st_distance(p.ct, ST_ExteriorRing(p.pg)) < tolerance THEN - TRUE - ELSE FALSE END AS ctx, - p.pg, - ST_within(p.ct, convexhull.cg) as ctin - FROM - tin_polygons p, - tin_nodes n, - convexhull - WHERE p.id = n.id - ), - voro_nodes as ( - SELECT - CASE WHEN x.ctx = TRUE THEN - ST_Centroid(x.edge) - ELSE - x.ct - END as xct, - CASE WHEN y.id is null THEN - CASE WHEN x.ctin = TRUE THEN - ST_SetSRID(ST_MakePoint( - ST_X(x.ct) + ((ST_X(ST_Centroid(x.edge)) - ST_X(x.ct)) * (1+buffer)), - ST_Y(x.ct) + ((ST_Y(ST_Centroid(x.edge)) - ST_Y(x.ct)) * (1+buffer)) - ), ST_SRID(x.ct)) - END - ELSE - y.ct - END as yct - FROM - tin_edges x - LEFT OUTER JOIN - tin_edges y - ON x.id <> y.id AND ST_Equals(x.edge, y.edge) - ), - voro_edges as( - SELECT - ST_LineMerge(ST_Collect(ST_MakeLine(xct, yct))) as v - FROM - voro_nodes - ), - voro_cells as( - SELECT - ST_Polygonize( - ST_Node( - ST_LineMerge( - ST_Union(v, ST_ExteriorRing( - ST_Convexhull(v) - ) - ) - ) - ) - ) as g - FROM - voro_edges - ), - voro_set as( - SELECT - (st_dump(v.g)).geom as g - FROM voro_cells v - ) - SELECT - st_collect(ST_intersection(c.g, v.g)) - -- ST_intersection(c.g, v.g) - INTO geomout - FROM - voro_set v, - clipper c; - RETURN geomout; -END; -$$ language plpgsql IMMUTABLE; - -/** ---------------------------------------------------------------------------------------- - * @function : FindCircle - * @precis : Function that determines if three points form a circle. If so a table containing - * centre and radius is returned. If not, a null table is returned. - * @version : 1.0 - * @param : p_pt1 : First point in curve - * @param : p_pt2 : Second point in curve - * @param : p_pt3 : Third point in curve - * @return : geometry : In which X,Y ordinates are the centre X, Y and the Z being the radius of found circle - * or NULL if three points do not form a circle. - * @history : Simon Greener - Feb 2012 - Original coding. - * @copyright : Simon Greener @ 2012 - * Licensed under a Creative Commons Attribution-Share Alike 2.5 Australia License. (http://creativecommons.org/licenses/by-sa/2.5/au/) -**/ -CREATE OR REPLACE FUNCTION _Find_Circle(p_pt1 geometry, p_pt2 geometry, p_pt3 geometry) - RETURNS geometry AS -$BODY$ -DECLARE - v_Centre geometry; - v_radius NUMERIC; - v_CX NUMERIC; - v_CY NUMERIC; - v_dA NUMERIC; - v_dB NUMERIC; - v_dC NUMERIC; - v_dD NUMERIC; - v_dE NUMERIC; - v_dF NUMERIC; - v_dG NUMERIC; -BEGIN - IF ( p_pt1 IS NULL OR p_pt2 IS NULL OR p_pt3 IS NULL ) THEN - RAISE EXCEPTION 'All supplied points must be not null.'; - RETURN NULL; - END IF; - IF ( ST_GeometryType(p_pt1) <> 'ST_Point' OR - ST_GeometryType(p_pt1) <> 'ST_Point' OR - ST_GeometryType(p_pt1) <> 'ST_Point' ) THEN - RAISE EXCEPTION 'All supplied geometries must be points.'; - RETURN NULL; - END IF; - v_dA := ST_X(p_pt2) - ST_X(p_pt1); - v_dB := ST_Y(p_pt2) - ST_Y(p_pt1); - v_dC := ST_X(p_pt3) - ST_X(p_pt1); - v_dD := ST_Y(p_pt3) - ST_Y(p_pt1); - v_dE := v_dA * (ST_X(p_pt1) + ST_X(p_pt2)) + v_dB * (ST_Y(p_pt1) + ST_Y(p_pt2)); - v_dF := v_dC * (ST_X(p_pt1) + ST_X(p_pt3)) + v_dD * (ST_Y(p_pt1) + ST_Y(p_pt3)); - v_dG := 2.0 * (v_dA * (ST_Y(p_pt3) - ST_Y(p_pt2)) - v_dB * (ST_X(p_pt3) - ST_X(p_pt2))); - -- If v_dG is zero then the three points are collinear and no finite-radius - -- circle through them exists. - IF ( v_dG = 0 ) THEN - RETURN NULL; - ELSE - v_CX := (v_dD * v_dE - v_dB * v_dF) / v_dG; - v_CY := (v_dA * v_dF - v_dC * v_dE) / v_dG; - v_Radius := SQRT(POWER(ST_X(p_pt1) - v_CX,2) + POWER(ST_Y(p_pt1) - v_CY,2) ); - END IF; - RETURN ST_SetSRID(ST_MakePoint(v_CX, v_CY, v_radius),ST_Srid(p_pt1)); -END; -$BODY$ - LANGUAGE plpgsql VOLATILE STRICT; diff --git a/src/pg/sql/13_PIA.sql b/src/pg/sql/13_PIA.sql new file mode 100644 index 0000000..7bb5b7b --- /dev/null +++ b/src/pg/sql/13_PIA.sql @@ -0,0 +1,125 @@ +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +CREATE OR REPLACE FUNCTION CDB_PIA( + IN polygon geometry, + IN tolerance numeric DEFAULT 1.0 + ) +RETURNS geometry AS $$ +DECLARE + env geometry[]; + cells geometry[]; + best_c geometry; + best_d numeric; + test_d numeric; + test_mx numeric; + test_h numeric; + test_tol numeric; + test_cells geometry[]; + width numeric; + height numeric; + h numeric; + i integer; + n integer; + sqr numeric; + p geometry; +BEGIN + sqr := |/2; + + -- grid #0 cell size + height := ST_YMax(polygon) - ST_YMin(polygon); + width := ST_XMax(polygon) - ST_XMin(polygon); + SELECT 0.5*LEAST(height, width) INTO h; + + -- grid #0 + with c1 as( + SELECT CDB_RectangleGrid(polygon, h, h) as cell + ) + -- ,c2 as( + -- SELECT cell FROM c1 WHERE ST_Intersects(cell, polygon) + -- ) + SELECT array_agg(cell) INTO cells FROM c1; + + -- 1st guess: centroid + SELECT _Signed_Dist(polygon, ST_Centroid(Polygon)) INTO best_d; + + -- looping the loop + n := array_length(cells,1); + i := 1; + LOOP + + EXIT WHEN i > n; + + -- cell side size + SELECT ST_XMax(cells[i]) - ST_XMin(cells[i]) INTO test_h; + + -- check distance + SELECT _Signed_Dist(polygon, ST_Centroid(cells[i])) INTO test_d; + IF test_d > best_d THEN + best_d := test_d; + best_c := cells[i]; + END IF; + + -- longest distance within the cell + SELECT test_d + (test_h * sqr) INTO test_mx; + + -- if the cell has no chance to contains the desired point, continue + SELECT test_mx - best_d INTO test_tol; + IF test_tol <= tolerance THEN + i := i+1; + CONTINUE; + END IF; + + -- resample the cell + with c1 as( + SELECT CDB_RectangleGrid(cells[i], test_h/2, test_h/2) as cell + ) + -- , c2 as( + -- SELECT cell FROM c1 WHERE ST_Intersects(cell, polygon) + -- ) + SELECT array_agg(cell) INTO test_cells FROM c1; + + -- concat the new cells to the former array + cells := cells || test_cells; + + -- prepare next iteration + n := array_length(cells,1); + i := i+1; + + END LOOP; + + RETURN ST_Centroid(best_c); + +END; +$$ language plpgsql IMMUTABLE; + + +-- signed distance point to polygon with holes +-- negative is the point is out the polygon +CREATE OR REPLACE FUNCTION _Signed_Dist( + IN polygon geometry, + IN point geometry + ) +RETURNS numeric AS $$ +DECLARE + i integer; + within integer; + holes integer; + dist numeric; +BEGIN + dist := 1e999; + SELECT LEAST(dist, ST_distance(point, ST_ExteriorRing(polygon))::numeric) INTO dist; + SELECT CASE WHEN ST_Within(point,polygon) THEN 1 ELSE -1 END INTO within; + SELECT ST_NumInteriorRings(polygon) INTO holes; + IF holes > 0 THEN + FOR i IN 1..holes + LOOP + SELECT LEAST(dist, ST_distance(point, ST_InteriorRingN(polygon, i))::numeric) INTO dist; + END LOOP; + END IF; + dist := dist * within::numeric; + RETURN dist; +END; +$$ language plpgsql IMMUTABLE; diff --git a/src/pg/test/expected/09_voronoi_test.out b/src/pg/test/expected/09_voronoi_test.out deleted file mode 100644 index 5d5a1e5..0000000 --- a/src/pg/test/expected/09_voronoi_test.out +++ /dev/null @@ -1,5 +0,0 @@ - result ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - 010600000014000000010300000001000000220000000422561581F40040EBF03C9CE6B244401D29CDF9CAF4004061DE46FEFBB24440848FC014D1F4004035F438A1FDB2444020D799D546F5004007ABC8021BB344401C0109CB4DF50040041E4FA21CB344403F4952DDD2F500402AE532C239B344400CDE54ABDAF50040C1CCDB5D3BB34440D68493EA6EF60040DABF173458B34440E6CC058F77F60040C04272CB59B344406F3396D21AF7004066AE1E5076B34440A1AE194B24F700408521BBE277B34440EF5C3A66D6F700400AAE060E94B34440BD6836B0E0F700401AB3769B95B34440A2521472A1F800402D89A865B1B3444055E0B68AACF80040F72C7FEDB2B34440C8C77ABE7BF900408F13F94ECEB344407622B9A287F90040CDEBCAD0CFB34440D813960F65FA0040C55E0BC2EAB34440E1B52DBC71FA004092A76E3DECB344401A9A70255DFB004066E612B706B44440C411E8966AFB00403A9F9F2B08B44440025208BC63FC00405DB3652622B444406F35B0EE71FC004094BBB59323B44440F86B618B78FD0040C1757E083DB44440DE5B557B87FD00409CA82D6E3EB44440130D9A479BFE00409D94FE5557B444409AC5C1F0AAFE0040ACE4AAB358B44440111CFFA0CBFF00401833B00771B4444058940FFFDBFF004023C5F95C72B444409F1A2244090101407E2A88168AB4444006B29E521A010140CC6F11638BB444406B3F740A62010140A30D8BAB90B4444016C7433955FD0040FB1809D6ECB244400422561581F40040EBF03C9CE6B244400103000000010000003D0000009161176DCA000140467D4FCDB8AF444057940FFFDBFF0040AB6D0594CBAF4440111CFFA0CBFF0040B6FF4EE9CCAF44409AC5C1F0AAFE0040224E543DE5AF4440130D9A479BFE0040319E009BE6AF4440DE5B557B87FD0040328AD182FFAF4440F86B618B78FD00400DBD80E800B044406F35B0EE71FC00403A77495D1AB04440025208BC63FC0040717F99CA1BB04440C411E8966AFB004094935FC535B044401A9A70255DFB0040684CEC3937B04440E1B52DBC71FA00403C8B90B351B04440D813960F65FA004009D4F32E53B044407622B9A287F90040014734206EB04440C9C77ABE7BF900403F1F06A26FB0444055E0B68AACF80040D70580038BB04440A1521472A1F80040A1A9568B8CB04440BD6836B0E0F70040B47F8855A8B04440EF5C3A66D6F70040C484F8E2A9B04440A0AE194B24F700404911440EC6B044406E3396D21AF700406884E0A0C7B04440E6CC058F77F600400EF08C25E4B04440D68493EA6EF60040F472E7BCE5B044400CDE54ABDAF500400D66239302B144403F4952DDD2F50040A44DCC2E04B144401C0109CB4DF50040CA14B04E21B1444021D799D546F50040C78736EE22B14440848FC014D1F40040993EC64F40B144401B29CDF9CAF400406D54B8F241B14440DF86ABAA64F40040DD15E68D5FB144402958E06B5FF40040EFF5D03361B14440C92982AA08F400407D117F007FB14440CA4C4F4904F40040A712EFA880B1444033DA7C2DBDF30040F145F29E9EB14440F2A915ABB9F30040320E7349A0B14440812F4D4882F3004037C29460BEB14440C0F3A7A57FF300409B66B10CC0B144404A4A180B58F3004026F0B13CDEB1444009F4ED4856F30040CF14F5E9DFB14440596772813EF300404EF78D2AFEB144401C5E3EA03DF30040A5EF81D8FFB144400FB35BB235F30040EC2168211EB244400FB35BB235F30040E21097CF1FB244401C5E3EA03DF3004029437D183EB24440596772813EF30040803B71C63FB2444009F4ED4856F30040FF1D0A075EB244404A4A180B58F30040A8424DB45FB24440C0F3A7A57FF3004033CC4DE47DB24440812F4D4882F3004097706A907FB24440F2A915ABB9F300409C248CA79DB2444033DA7C2DBDF30040DDEC0C529FB24440CA4C4F4904F4004027201048BDB24440C92982AA08F40040512180F0BEB244402958E06B5FF40040DF3C2EBDDCB24440E086ABAA64F40040F11C1963DEB244400422561581F40040EBF03C9CE6B2444016C7433955FD0040FB1809D6ECB24440BA703922B01F0140157201FC61B24440D83423AD0219014015C2F40C6AB0444025E06E858E14014003F2824594AF44409161176DCA000140467D4FCDB8AF4440010300000001000000200000001B476FC4EF16014083AEB518A6AE4440993DF9654F150140E2378E67B2AE4440B5B9DB48371501408D1DD225B3AE4440408AD43488130140A3EC9FFCC0AE444070FD9F7E70130140C36669C7C1AE44402B105CE4C81101400795FD7DD0AE4440EF82909BB1110140820C1555D1AE4440B7402FEF11100140861067E7E0AE44403EBD2E1AFB0F0140BD8E91CAE1AE4440791AA3CD630E014025A25C34F2AE4440C1ECAF724D0E014024E15B23F3AE4440B42CA1F5BE0C0140382C206004AF4440E22BDC1AA90C0140BFA7B25A05AF44401B4587DA230B0140267DB66517AF44404F24EE850E0B0140AB84976B18AF44409BCE07ED9209014010ADE83F2BAF4440C17973247E0901408676D0502CAF4440C7ED0A9B0C080140AB8B45E93FAF444002EE2D64F80701403D47E90441AF4440A35E904F91060140411E235C55AF4440AE4BF5AF7D060140C70A358256AF4440EB20927221050140162DA0926BAF4440191B9A6F0E05014074ADCFC26CAF444020F6E768BD03014020E0A58682AF4440AB2DC907AB03014050919FC083AF444076BD2B9465020140466AE9319AAF44406104F0D953020140413A57759BAF444006B29E521A01014001C3ED8DB2AF44409D1A224409010140500877DAB3AF44409161176DCA000140467D4FCDB8AF444025E06E858E14014003F2824594AF44401B476FC4EF16014083AEB518A6AE44400103000000010000002F0000006B3F740A62010140A30D8BAB90B444406004F0D9530201408CF8A77BA2B4444077BD2B946502014088C815BFA3B44440AD2DC907AB0301407EA15F30BAB4444020F6E768BD030140AE52596ABBB44440181B9A6F0E0501405A852F2ED1B44440DD20927221050140B7055F5ED2B44440A04BF5AF7D0601400728CA6EE7B44440B15E904F910601408E14DC94E8B4444010EE2D64F807014091EB15ECFCB44440B8ED0A9B0C08014022A7B907FEB44440B37973247E09014048BC2EA011B54440ACCE07ED92090140C08516B112B544406024EE850E0B014024AE678525B544401C4587DA230B0140A8B5488B26B54440E22BDC1AA90C01400F8B4C9638B54440B42CA1F5BE0C01409606DF9039B54440C2ECAF724D0E0140AA51A3CD4AB544407A1AA3CD630E0140A990A2BC4BB544403EBD2E1AFB0F014011A46D265CB54440B7402FEF11100140482298095DB54440EF82909BB11101404C26EA9B6CB544402B105CE4C8110140C79D01736DB5444071FD9F7E701301400BCC95297CB54440418AD434881301402B465FF47CB54440B5B9DB483715014041152DCB8AB54440993DF9654F150140ECFA70898BB544401DA7977D051701406339AD7C98B54440C5E301FB1D1701403F62372E99B54440FC441F9EDA180140DD40553AA5B54440EF951F75F31801409901F5DEA5B544403B5ED829B61A01407E0CA700B1B544401F909F53CF1A014031442F98B1B54440794A669E971C01409A4A68CCBBB54440FB780E14B11C01401C6FAF56BCB5444096AACD777E1E01408859A39AC5B54440E2235C32981E0140D4828317C6B54440419698306A2001405717A868CEB5444031C6FF2884200140F709FFD7CEB544404E33FB415A220140709E0C34D6B5444070901C7174220140D6D4BB95D6B5444074EC9A4DB12301400391B3DCDAB544409AEC9FA7012301409E077767EDB444408D12585F28210140C7A69D936DB24440BA703922B01F0140157201FC61B2444016C7433955FD0040FB1809D6ECB244406B3F740A62010140A30D8BAB90B44440010300000001000000220000006C6873F224290140640589D054AE4440C72CC8DC5928014004E6574656AE4440A5A8BB343F28014008597E7E56AE4440107488D45F26014032803BF05AAE44407D61884D4526014002AE4E365BAE44409AA8A68268240140E89064A260AE44406EA7F8234E240140D44351F660AE44406F901C7174220140F85D435B67AE44404D33FB415A2201405E94F2BC67AE444031C6FF2884200140D72800196FAE4440419698306A200140771B57886FAE4440E2235C32981E0140FAAF7BD977AE444097AACD777E1E014046D95B5678AE4440FB780E14B11C0140B2C34F9A81AE4440784A669E971C014034E8962482AE44401E909F53CF1A01409DEECF588CAE4440395ED829B61A0140502658F08CAE4440278677A0731901403CACA8E794AE444088C82E251F19014014C8BBF200AF4440258677A0731901403CACA8E794AE4440EE951F75F318014035310A1298AE4440FC441F9EDA180140F1F1A9B698AE4440C4E301FB1D1701408FD0C7C2A4AE44401CA7977D051701406BF95174A5AE44401B476FC4EF16014083AEB518A6AE444025E06E858E14014003F2824594AF4440D83423AD0219014015C2F40C6AB04440E43423AD0219014015C2F40C6AB0444032772D211F34014047E17A14AEAF44401B6338B19A390140BCB71C1188AF44400F9C33A2B4370140736891ED7CAF44400B6238B19A390140A8B71C1188AF4440A0ABADD85F3601409EEFA7C64BAF44406C6873F224290140640589D054AE444001030000000100000004000000E43423AD0219014015C2F40C6AB04440D83423AD0219014015C2F40C6AB04440BA703922B01F0140157201FC61B24440E43423AD0219014015C2F40C6AB044400103000000010000000A000000E43423AD0219014015C2F40C6AB04440BA703922B01F0140157201FC61B244408312585F28210140B9A69D936DB2444069DF5D3F49270140A80F6D4D69B244406671FB2ACF5201408029E4EA2FB04440551DBFA9615201407723356D19B044400B6238B19A390140A8B71C1188AF44401B6338B19A390140BCB71C1188AF444032772D211F34014047E17A14AEAF4440E43423AD0219014015C2F40C6AB0444001030000000100000004000000278677A0731901403CACA8E794AE4440258677A0731901403CACA8E794AE444088C82E251F19014014C8BBF200AF4440278677A0731901403CACA8E794AE4440010300000001000000320000009D9022A60E510140E7A48BF4E6AE4440F002F3808E500140BD8E91CAE1AE4440777FF2AB77500140861067E7E0AE44403F3D91FFD74E0140820C1555D1AE444003B0C5B6C04E01400795FD7DD0AE4440BEC2811C194D0140C36669C7C1AE4440EE354D66014D0140A3EC9FFCC0AE44407A064652524B01408D1DD225B3AE4440968228353A4B0140E2378E67B2AE444012198A1D844901406BF95174A5AE44406ADC1FA06B4901408FD0C7C2A4AE4440337B02FDAE470140F1F1A9B698AE4440402A02269647014035310A1298AE4440F4614971D3450140502658F08CAE444010308247BA4501409DEECF588CAE4440B675BBFCF143014034E8962482AE444034471387D8430140B2C34F9A81AE4440991554230B42014046D95B5678AE44404D9CC568F1410140FAAF7BD977AE4440ED29896A1F400140771B57886FAE4440FDF9217205400140D72800196FAE4440E18C26592F3E01405E94F2BC67AE4440BF2F052A153E0140F85D435B67AE4440C01829773B3C0140D44351F660AE444094177B18213C0140E89064A260AE4440B15E994D443A014002AE4E365BAE44401E4C99C6293A014032803BF05AAE4440891766664A38014008597E7E56AE4440679359BE2F38014004E6574656AE44400C533E4C4E3601405D5E2BD052AE4440CE0C748A33360140A10A01A652AE444025746B8A503401400A10582C50AE44409B2939B635340140A66A351050AE4440B001ABAC51320140DBB1BD934EAE44403B7C6BCD363201404771AA854EAE4440C457083F523001409646CC064EAE44406A68195C373001409646CC064EAE4440F443B6CD522E01404771AA854EAE44407FBE76EE372E0140DBB1BD934EAE44409396E8E4532C0140A66A351050AE4440094CB610392C01400A10582C50AE444060B3AD10562A0140A10A01A652AE4440226DE34E3B2A01405D5E2BD052AE44406C6873F224290140640589D054AE4440A0ABADD85F3601409EEFA7C64BAF44400B6238B19A390140A8B71C1188AF4440551DBFA9615201407723356D19B044404E1DBFA9615201407123356D19B044401184E6662B51014014C8BBF200AF44409D9022A60E510140E7A48BF4E6AE4440010300000001000000040000008312585F28210140B9A69D936DB24440BA703922B01F0140157201FC61B244408D12585F28210140C7A69D936DB244408312585F28210140B9A69D936DB244400103000000010000003B00000074EC9A4DB12301400391B3DCDAB544406FA7F8234E240140FAEEADFADCB544409BA8A68268240140E6A19A4EDDB544407E61884D45260140CC84B0BAE2B54440117488D45F2601409CB2C300E3B54440A6A8BB343F280140C6D98072E7B54440C82CC8DC59280140CA4CA7AAE7B54440236DE34E3B2A014071D4D320EBB5444061B3AD10562A01402D28FE4AEBB544400A4CB610392C0140C422A7C4EDB544409496E8E4532C014028C8C9E0EDB544407FBE76EE372E0140F380415DEFB54440F443B6CD522E014087C1546BEFB544406B68195C3730014038EC32EAEFB54440C557083F5230014038EC32EAEFB544403B7C6BCD3632014087C1546BEFB54440B001ABAC51320140F380415DEFB544409C2939B63534014028C8C9E0EDB5444026746B8A50340140C422A7C4EDB54440CE0C748A333601402D28FE4AEBB544400C533E4C4E36014071D4D320EBB54440689359BE2F380140CA4CA7AAE7B544408A1766664A380140C6D98072E7B544401F4C99C6293A01409CB2C300E3B54440B25E994D443A0140CC84B0BAE2B54440F7FB7763483A014006AECBAEE2B54440BC3741C7BA3A0140321ED64C17B54440F8FB7763483A014006AECBAEE2B5444095177B18213C0140E6A19A4EDDB54440C11829773B3C0140FAEEADFADCB54440C02F052A153E0140D6D4BB95D6B54440E28C26592F3E0140709E0C34D6B54440FEF9217205400140F709FFD7CEB54440EE29896A1F4001405717A868CEB544404D9CC568F1410140D4828317C6B54440981554230B4201408859A39AC5B5444034471387D84301401C6FAF56BCB54440B775BBFCF14301409A4A68CCBBB5444011308247BA45014031442F98B1B54440F6614971D34501407E0CA700B1B54440412A0226964701409901F5DEA5B54440337B02FDAE470140DD40553AA5B544406BDC1FA06B4901403F62372E99B5444013198A1D844901406339AD7C98B54440968228353A4B0140ECFA70898BB544407A064652524B014041152DCB8AB54440EF354D66014D01402B465FF47CB54440BFC2811C194D01400BCC95297CB5444004B0C5B6C04E0140C79D01736DB54440403D91FFD74E01404C26EA9B6CB54440F5FFE537095001402251D82C61B54440082885A17E510140453402BC8DB444400AECA544B94A0140EEAAFDBAA3B3444047DE5D3F49270140B70F6D4D69B2444069DF5D3F49270140A80F6D4D69B244408312585F28210140B9A69D936DB244408D12585F28210140C7A69D936DB244409AEC9FA7012301409E077767EDB4444074EC9A4DB12301400391B3DCDAB544400103000000010000001B0000003BC1E63C426D01405C8F2BE4F2B144402DCC3352336D0140D014F5E9DFB14440E2750990316D014026F0B13CDEB1444077CC79F5096D01409B66B10CC0B14440B190D452076D014037C29460BEB144403E160CF0CF6C0140330E7349A0B14440F5E5A46DCC6C0140F245F29E9EB144406F73D251856C0140A712EFA880B1444065969FF0806C01407D117F007FB144400368412F2A6C0140EFF5D03361B144404B3976F0246C0140DD15E68D5FB14440199754A1BE6B01406E54B8F241B144408E5FD20DBE6B0140603F2ECB41B14440A7306186B86B01409A3EC64F40B1444018E987C5426B0140C78736EE22B1444009BF18D03B6B0140CA14B04E21B14440F276CFBDB66A0140A44DCC2E04B144401EE2CCEFAE6A01400D66239302B144405B3B8EB01A6A0140F572E7BCE5B044404AF31B0C126A01400EF08C25E4B0444003A2D519B269014006981D63D3B044406571FB2ACF5201408029E4EA2FB044406EDF5D3F49270140A80F6D4D69B244408104C58F31370140EF7C3F355EB2444049DE5D3F49270140B80F6D4D69B2444010ECA544B94A0140EFAAFDBAA3B344403BC1E63C426D01405C8F2BE4F2B144400103000000010000000400000069DF5D3F49270140A80F6D4D69B2444047DE5D3F49270140B70F6D4D69B244408204C58F31370140EE7C3F355EB2444069DF5D3F49270140A80F6D4D69B244400103000000010000002E000000F9A1D519B269014006981D63D3B04440C08C8BC86E6901406884E0A0C7B044408E110850656901404911440EC6B044403F63E734B3680140C484F8E2A9B044407157EBEAA8680140B47F8855A8B044408D6D0D29E8670140A1A9568B8CB04440D9DF6A10DD670140D70580038BB0444065F8A6DC0D6701403F1F06A26FB04440B89D68F801670140014734206EB0444056AC8B8B2466014009D4F32E53B044404D0AF4DE176601403C8B90B351B044401426B1752C650140684CEC3937B044406AAE39041F65014094935FC535B044402C6E19DF25640140717F99CA1BB04440BF8A71AC176401403A77495D1AB044403654C00F116301400DBD80E800B044405064CC1F02630140328AD182FFAF44401BB38753EE610140319E009BE6AF444094FA5FAADE610140224E543DE5AF44401DA422FABD600140B6FF4EE9CCAF4440D72B129CAD600140AB6D0594CBAF444091A5FF56805F0140500877DAB3AF4440280E83486F5F014001C3ED8DB2AF4440CDBB31C1355E0140413A57759BAF4440B802F606245E0140466AE9319AAF444083925893DE5C014050919FC083AF44400ECA3932CC5C014020E0A58682AF444015A5872B7B5B014074ADCFC26CAF4440439F8F28685B0140162DA0926BAF444080742CEB0B5A0140C70A358256AF44408B61914BF8590140411E235C55AF44402CD2F336915801403D47E90441AF444067D216007D580140AB8B45E93FAF44406D46AE760B5701408676D0502CAF444093F119AEF656014010ADE83F2BAF4440DF9B33157B550140AB84976B18AF4440137B9AC065550140267DB66517AF44404C944580E0530140BFA7B25A05AF44407A9380A5CA530140382C206004AF44406DD371283C52014024E15B23F3AE4440B5A57ECD2552014025A25C34F2AE44409D9022A60E510140E7A48BF4E6AE44401184E6662B51014014C8BBF200AF44404E1DBFA9615201407123356D19B044406671FB2ACF5201408029E4EA2FB04440F9A1D519B269014006981D63D3B044400103000000010000001C000000F5FFE537095001402251D82C61B54440787FF2AB77500140482298095DB54440F102F3808E50014011A46D265CB54440B6A57ECD25520140A990A2BC4BB544406ED371283C520140AA51A3CD4AB544407B9380A5CA5301409606DF9039B544404D944580E05301400F8B4C9638B54440137B9AC065550140A8B5488B26B54440DE9B33157B55014023AE678525B5444093F119AEF6560140BE8516B112B544406E46AE760B57014048BC2EA011B5444068D216007D58014023A7B907FEB444402DD2F3369158014091EB15ECFCB444408C61914BF85901408D14DC94E8B4444081742CEB0B5A01400728CA6EE7B44440449F8F28685B0140B8055F5ED2B4444016A5872B7B5B01405A852F2ED1B444400FCA3932CC5C0140AE52596ABBB4444084925893DE5C01407EA15F30BAB44440B902F606245E014088C815BFA3B44440CFBB31C1355E01408CF8A77BA2B44440290E83486F5F0140CC6F11638BB4444090A5FF56805F01407E2A88168AB44440D72B129CAD60014023C5F95C72B4444028A422FABD6001401733B00771B444404337B5C0D4600140C753561C6FB44440082885A17E510140453402BC8DB44440F5FFE537095001402251D82C61B5444001030000000100000004000000F7FB7763483A014006AECBAEE2B54440F8FB7763483A014006AECBAEE2B54440BC3741C7BA3A0140321ED64C17B54440F7FB7763483A014006AECBAEE2B54440010300000001000000040000001B6338B19A390140BCB71C1188AF44400B6238B19A390140A8B71C1188AF44400F9C33A2B4370140736891ED7CAF44401B6338B19A390140BCB71C1188AF44400103000000010000002F0000001F0DC6E8536D0140E21097CF1FB244401F0DC6E8536D0140EC2168211EB244401262E3FA4B6D0140A5EF81D8FFB14440D558AF194B6D01404EF78D2AFEB1444035C1E63C426D01405B8F2BE4F2B144400AECA544B94A0140EEAAFDBAA3B34440082885A17E510140453402BC8DB444404337B5C0D4600140C753561C6FB444409FFA5FAADE610140AAE4AAB358B4444013B38753EE6101409D94FE5557B444404864CC1F026301409DA82D6E3EB444403754C00F11630140C1757E083DB44440C08A71AC1764014094BBB59323B444402D6E19DF256401405DB3652622B444406BAE39041F6501403A9F9F2B08B444401C26B1752C65014065E612B706B44440540AF4DE1766014090A76E3DECB3444050AC8B8B24660140C45E0BC2EAB34440B39D68F801670140CDEBCAD0CFB3444067F8A6DC0D6701408F13F94ECEB34440DADF6A10DD670140F72C7FEDB2B344408D6D0D29E86701402D89A865B1B344407157EBEAA86801401AB3769B95B344403F63E734B36801400AAE060E94B344408E110850656901408521BBE277B34440C08C8BC86E69014066AE1E5076B3444048F31B0C126A0140C04272CB59B344405A3B8EB01A6A0140D9BF173458B3444024E2CCEFAE6A0140BFCCDB5D3BB34440EF76CFBDB66A014029E532C239B3444013BF18D03B6B0140031E4FA21CB344400DE987C5426B014006ABC8021BB34440A9306186B86B014035F438A1FDB24440139754A1BE6B014061DE46FEFBB244404F3976F0246C0140F11C1963DEB244400568412F2A6C0140DF3C2EBDDCB2444065969FF0806C0140512180F0BEB244406473D251856C014027201048BDB24440FBE5A46DCC6C0140DDEC0C529FB244403C160CF0CF6C01409C248CA79DB24440AE90D452076D014097706A907FB244406FCC79F5096D014033CC4DE47DB24440E5750990316D0140A8424DB45FB2444026CC3352336D0140FF1D0A075EB24440D558AF194B6D0140803B71C63FB244401262E3FA4B6D014029437D183EB244401F0DC6E8536D0140E21097CF1FB24440010300000001000000040000006671FB2ACF5201408029E4EA2FB044404E1DBFA9615201407123356D19B04440551DBFA9615201407723356D19B044406671FB2ACF5201408029E4EA2FB04440010300000001000000040000008B5FD20DBE6B01405F3F2ECB41B14440885FD20DBE6B01405F3F2ECB41B144405C4CEEA3C66601401E9F5F633EB144408B5FD20DBE6B01405F3F2ECB41B14440 -(1 row) - diff --git a/src/pg/test/expected/13_pia_test.out b/src/pg/test/expected/13_pia_test.out new file mode 100644 index 0000000..27e38a6 --- /dev/null +++ b/src/pg/test/expected/13_pia_test.out @@ -0,0 +1,5 @@ + st_astext +------------------------------------------- + POINT(-409081.241940808 4930027.50443989) +(1 row) + diff --git a/src/pg/test/sql/09_voronoi_test.sql b/src/pg/test/sql/09_voronoi_test.sql deleted file mode 100644 index 8e27849..0000000 --- a/src/pg/test/sql/09_voronoi_test.sql +++ /dev/null @@ -1,7 +0,0 @@ -WITH a AS ( - SELECT - ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS geomin -) -SELECT - CDB_voronoi(geomin, 0.2, 1e-9) as result -FROM a; diff --git a/src/pg/test/sql/13_pia_test.sql b/src/pg/test/sql/13_pia_test.sql new file mode 100644 index 0000000..5d0b6f0 --- /dev/null +++ b/src/pg/test/sql/13_pia_test.sql @@ -0,0 +1,4 @@ +with a as( + select st_geomfromtext('POLYGON((-432540.453078056 4949775.20452642,-432329.947920966 4951361.232584,-431245.028163694 4952223.31516671,-429131.071033529 4951768.00415574,-424622.07505895 4952843.13503987,-423688.327170174 4953499.20752423,-424086.294349759 4954968.38274191,-423068.388925945 4954378.63345336,-423387.653225542 4953355.67417084,-420594.869840519 4953781.00230592,-416026.095299382 4951484.06849063,-412483.018546414 4951024.5410983,-410490.399661215 4954502.24032205,-408186.197521284 4956398.91417441,-407627.262358013 4959300.94633864,-406948.770061627 4959874.85407739,-404949.583326472 4959047.74518163,-402570.908447199 4953743.46829807,-400971.358683991 4952193.11680804,-403533.488084088 4949649.89857885,-406335.177028373 4950193.19571096,-407790.456731515 4952391.46015616,-412060.672398345 4950381.2389307,-410716.93482498 4949156.7509561,-408464.162289794 4943912.8940387,-409350.599394983 4942819.84896006,-408087.791091424 4942451.6711778,-407274.045613725 4940572.4807777,-404446.196589102 4939976.71501489,-402422.964843936 4940450.3670813,-401010.654464241 4939054.8061663,-397647.247369412 4940679.80737878,-395658.413346901 4940528.84765185,-395536.852462953 4938829.79565997,-394268.923462818 4938003.7277717,-393388.720249116 4934757.80596815,-392393.301362444 4934326.71675815,-392573.527618037 4932323.40974412,-393464.640141837 4931903.10653605,-393085.597275686 4931094.7353605,-398426.261165985 4929156.87541607,-398261.174361137 4926238.00816416,-394045.059966834 4925765.18668498,-392982.960705174 4926391.81893628,-393090.272694301 4927176.84692181,-391648.240010564 4924626.06386961,-391889.914625075 4923086.14787613,-394345.177314013 4923235.086036,-395550.878718795 4917812.79243978,-399009.463978251 4912927.7157945,-398948.794855767 4911941.91010796,-398092.636652078 4911806.57392519,-401991.601817112 4911722.9204501,-406225.972607907 4914505.47286319,-411104.994569885 4912569.26941163,-412925.513522316 4913030.3608866,-414630.148884835 4914436.69169949,-414207.691417276 4919205.78028405,-418306.141109809 4917994.9580478,-424184.700779621 4918938.12432889,-426816.961458921 4923664.37379373,-420956.324227126 4923381.98014807,-420186.661267781 4924286.48693378,-420943.411166194 4926812.76394433,-419779.45457046 4928527.43466337,-419768.767899344 4930681.94459216,-421911.668097113 4930432.40620397,-423482.386112205 4933451.28047252,-427272.814773717 4934151.56473242,-427144.908678797 4939731.77191996,-428982.125554848 4940522.84445172,-428986.133056516 4942437.17281266,-431237.792396792 4947309.68284815,-432476.889648814 4947791.74800037,-432540.453078056 4949775.20452642))', 3857) as g +) +SELECT st_astext(CDB_PIA(g)) from a; From 0eca26d515b0720bf05ab73b20af29a0c9f4c43d Mon Sep 17 00:00:00 2001 From: abelvm Date: Thu, 14 Jul 2016 15:07:45 +0200 Subject: [PATCH 02/62] performance x20 --- src/pg/sql/13_PIA.sql | 40 ++++++++++++++-------------- src/pg/test/expected/13_pia_test.out | 4 +-- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/pg/sql/13_PIA.sql b/src/pg/sql/13_PIA.sql index 7bb5b7b..e5e8754 100644 --- a/src/pg/sql/13_PIA.sql +++ b/src/pg/sql/13_PIA.sql @@ -3,6 +3,11 @@ -- https://sites.google.com/site/polesofinaccessibility/ -- Requires: https://github.com/CartoDB/cartodb-postgresql +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + CREATE OR REPLACE FUNCTION CDB_PIA( IN polygon geometry, IN tolerance numeric DEFAULT 1.0 @@ -11,12 +16,12 @@ RETURNS geometry AS $$ DECLARE env geometry[]; cells geometry[]; + cell geometry; best_c geometry; best_d numeric; test_d numeric; test_mx numeric; test_h numeric; - test_tol numeric; test_cells geometry[]; width numeric; height numeric; @@ -31,19 +36,16 @@ BEGIN -- grid #0 cell size height := ST_YMax(polygon) - ST_YMin(polygon); width := ST_XMax(polygon) - ST_XMin(polygon); - SELECT 0.5*LEAST(height, width) INTO h; + h := 0.5*LEAST(height, width); -- grid #0 with c1 as( - SELECT CDB_RectangleGrid(polygon, h, h) as cell + SELECT CDB_RectangleGrid(polygon, h, h) as c ) - -- ,c2 as( - -- SELECT cell FROM c1 WHERE ST_Intersects(cell, polygon) - -- ) - SELECT array_agg(cell) INTO cells FROM c1; + SELECT array_agg(c) INTO cells FROM c1; -- 1st guess: centroid - SELECT _Signed_Dist(polygon, ST_Centroid(Polygon)) INTO best_d; + best_d := _Signed_Dist(polygon, ST_Centroid(Polygon)); -- looping the loop n := array_length(cells,1); @@ -52,41 +54,39 @@ BEGIN EXIT WHEN i > n; - -- cell side size - SELECT ST_XMax(cells[i]) - ST_XMin(cells[i]) INTO test_h; + cell := cells[i]; + i := i+1; + + -- cell side size, it's square + test_h := ST_XMax(cell) - ST_XMin(cell) ; -- check distance - SELECT _Signed_Dist(polygon, ST_Centroid(cells[i])) INTO test_d; + test_d := _Signed_Dist(polygon, ST_Centroid(cell)); IF test_d > best_d THEN best_d := test_d; best_c := cells[i]; END IF; -- longest distance within the cell - SELECT test_d + (test_h * sqr) INTO test_mx; + test_mx := test_d + (test_h/2 * sqr); -- if the cell has no chance to contains the desired point, continue - SELECT test_mx - best_d INTO test_tol; - IF test_tol <= tolerance THEN - i := i+1; - CONTINUE; - END IF; + CONTINUE WHEN test_mx - best_d <= tolerance; -- resample the cell with c1 as( - SELECT CDB_RectangleGrid(cells[i], test_h/2, test_h/2) as cell + SELECT CDB_RectangleGrid(cell, test_h/2, test_h/2) as c ) -- , c2 as( -- SELECT cell FROM c1 WHERE ST_Intersects(cell, polygon) -- ) - SELECT array_agg(cell) INTO test_cells FROM c1; + SELECT array_agg(c) INTO test_cells FROM c1; -- concat the new cells to the former array cells := cells || test_cells; -- prepare next iteration n := array_length(cells,1); - i := i+1; END LOOP; diff --git a/src/pg/test/expected/13_pia_test.out b/src/pg/test/expected/13_pia_test.out index 27e38a6..037d339 100644 --- a/src/pg/test/expected/13_pia_test.out +++ b/src/pg/test/expected/13_pia_test.out @@ -1,5 +1,5 @@ - st_astext + st_astext ------------------------------------------- - POINT(-409081.241940808 4930027.50443989) + POINT(-409081.865887381 4930017.52075948) (1 row) From 15aad5a695ae97757ee28bf37fd3e85fd4aa9473 Mon Sep 17 00:00:00 2001 From: abelvm Date: Thu, 14 Jul 2016 15:46:09 +0200 Subject: [PATCH 03/62] clean --- src/pg/sql/13_PIA.sql | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/pg/sql/13_PIA.sql b/src/pg/sql/13_PIA.sql index e5e8754..c395720 100644 --- a/src/pg/sql/13_PIA.sql +++ b/src/pg/sql/13_PIA.sql @@ -77,9 +77,6 @@ BEGIN with c1 as( SELECT CDB_RectangleGrid(cell, test_h/2, test_h/2) as c ) - -- , c2 as( - -- SELECT cell FROM c1 WHERE ST_Intersects(cell, polygon) - -- ) SELECT array_agg(c) INTO test_cells FROM c1; -- concat the new cells to the former array From 5f339a01f79e78a854e77dfe151c408046b8c27b Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 9 Aug 2016 12:09:19 -0400 Subject: [PATCH 04/62] recommit from postgresql repo --- .brackets.json | 3 + doc/14_densify.md | 35 +++++++++++ doc/15_tinmap.md | 36 +++++++++++ src/pg/sql/14_densify.sql | 67 ++++++++++++++++++++ src/pg/sql/15_tinmap.sql | 43 +++++++++++++ src/pg/test/expected/14_densify_test.out | 47 ++++++++++++++ src/pg/test/expected/15_tinmap_test.out | 80 ++++++++++++++++++++++++ src/pg/test/sql/14_densify_test.sql | 6 ++ src/pg/test/sql/15_tinmap_test.sql | 6 ++ 9 files changed, 323 insertions(+) create mode 100644 .brackets.json create mode 100644 doc/14_densify.md create mode 100644 doc/15_tinmap.md create mode 100644 src/pg/sql/14_densify.sql create mode 100644 src/pg/sql/15_tinmap.sql create mode 100644 src/pg/test/expected/14_densify_test.out create mode 100644 src/pg/test/expected/15_tinmap_test.out create mode 100644 src/pg/test/sql/14_densify_test.sql create mode 100644 src/pg/test/sql/15_tinmap_test.sql diff --git a/.brackets.json b/.brackets.json new file mode 100644 index 0000000..abceceb --- /dev/null +++ b/.brackets.json @@ -0,0 +1,3 @@ +{ + "sbruchmann.staticpreview.basepath": "/home/carto/Projects/crankshaft/" +} \ No newline at end of file diff --git a/doc/14_densify.md b/doc/14_densify.md new file mode 100644 index 0000000..2cec7e6 --- /dev/null +++ b/doc/14_densify.md @@ -0,0 +1,35 @@ +## Densify function + +Iterative densification of a set of scattered points using Delaunay triangulation. The new points are located at the centroids of the grid cells and have as assigned value the barycentric average value of the cell's vertex. + +### CDB_Densify(geomin geometry[], colin numeric[], iterations integer) + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| geomin | geometry[] | Array of points geometries | +| colin | numeric[] | Array of points' values | +| iterations | integer | Number of iterations | + +### Returns + +Returns a table object + +| Name | Type | Description | +|------|------|-------------| +| geomout | geometry | Geometries of new dataset of points| +| colout | numeric | Values of points| + +#### Example Usage + +```sql +with data as ( + select + ARRAY[7.0,8.0,1.0,2.0,3.0,5.0,6.0,4.0] as colin, + ARRAY[ST_GeomFromText('POINT(2.1744 41.4036)'),ST_GeomFromText('POINT(2.1228 41.3809)'),ST_GeomFromText('POINT(2.1511 41.3742)'),ST_GeomFromText('POINT(2.1528 41.4136)'),ST_GeomFromText('POINT(2.165 41.3917)'),ST_GeomFromText('POINT(2.1498 41.3713)'),ST_GeomFromText('POINT(2.1533 41.3683)'),ST_GeomFromText('POINT(2.131386 41.413998)')] as geomin +) +select CDB_Densify(geomin, colin, 2) from data; +``` + + diff --git a/doc/15_tinmap.md b/doc/15_tinmap.md new file mode 100644 index 0000000..7bbd165 --- /dev/null +++ b/doc/15_tinmap.md @@ -0,0 +1,36 @@ +## TINMAP function + +Generates a fake contour map, in the form of a TIN map, from a set of scattered points.Depends on **CDB_Densify**. + +Its iterative nature let's the user smooth the final result as much as desired, but with a exponential time cost increase + +### CDB_TINmap(geomin geometry[], colin numeric[], iterations integer) + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| geomin | geometry[] | Array of points geometries | +| colin | numeric[] | Array of points' values | +| iterations | integer | Number of iterations | + +### Returns + +Returns a table object + +| Name | Type | Description | +|------|------|-------------| +| geomout | geometry | Geometries of new dataset of polygons| +| colout | numeric | Values of each cell| + +#### Example Usage + +```sql +with data as ( + select + ARRAY[7.0,8.0,1.0,2.0,3.0,5.0,6.0,4.0] as colin, + ARRAY[ST_GeomFromText('POINT(2.1744 41.4036)'),ST_GeomFromText('POINT(2.1228 41.3809)'),ST_GeomFromText('POINT(2.1511 41.3742)'),ST_GeomFromText('POINT(2.1528 41.4136)'),ST_GeomFromText('POINT(2.165 41.3917)'),ST_GeomFromText('POINT(2.1498 41.3713)'),ST_GeomFromText('POINT(2.1533 41.3683)'),ST_GeomFromText('POINT(2.131386 41.413998)')] as geomin +) +select CDB_TINmap(geomin, colin, 2) from data; +``` + diff --git a/src/pg/sql/14_densify.sql b/src/pg/sql/14_densify.sql new file mode 100644 index 0000000..47a200e --- /dev/null +++ b/src/pg/sql/14_densify.sql @@ -0,0 +1,67 @@ +-- +-- Iterative densification of a set of points using Delaunay triangulation +-- the new points have as assigned value the average value of the 3 vertex (centroid) +-- +-- @param geomin - array of geometries (points) +-- +-- @param colin - array of numeric values in that points +-- +-- @param iterations - integer, number of iterations +-- +-- +-- Returns: TABLE(geomout geometry, colout numeric) +-- +-- +CREATE OR REPLACE FUNCTION CDB_Densify( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + geotemp geometry[]; + coltemp numeric[]; + i integer; + gs geometry[]; + g geometry; + vertex geometry[]; + va numeric; + vb numeric; + vc numeric; + center geometry; + centerval numeric; + tmp integer; +BEGIN + geotemp := geomin; + coltemp := colin; + FOR i IN 1..iterations + LOOP + -- generate TIN + WITH a as (SELECT unnest(geotemp) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + -- loop cells + FOREACH g IN ARRAY gs + LOOP + -- append centroid + SELECT ST_Centroid(g) INTO center; + geotemp := array_append(geotemp, center); + -- retrieve the value of each vertex + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + END LOOP; + RETURN QUERY SELECT unnest(geotemp ) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; diff --git a/src/pg/sql/15_tinmap.sql b/src/pg/sql/15_tinmap.sql new file mode 100644 index 0000000..973eede --- /dev/null +++ b/src/pg/sql/15_tinmap.sql @@ -0,0 +1,43 @@ +CREATE OR REPLACE FUNCTION CDB_TINmap( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + p geometry[]; + vals numeric[]; + gs geometry[]; + g geometry; + vertex geometry[]; + centerval numeric; + va numeric; + vb numeric; + vc numeric; + coltemp numeric[]; +BEGIN + SELECT array_agg(dens.geomout), array_agg(dens.colout) INTO p, vals FROM CDB_Densify(geomin, colin, iterations) dens; + WITH a as (SELECT unnest(p) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + FOREACH g IN ARRAY gs + LOOP + -- retrieve the vertex of each triangle + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + RETURN QUERY SELECT unnest(gs) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; diff --git a/src/pg/test/expected/14_densify_test.out b/src/pg/test/expected/14_densify_test.out new file mode 100644 index 0000000..fef9228 --- /dev/null +++ b/src/pg/test/expected/14_densify_test.out @@ -0,0 +1,47 @@ + cdb_densify +----------------------------------------------------------------- + (01010000001361C3D32B6501403255302AA9B34440,7.0) + (01010000002497FF907EFB0040F085C954C1B04440,8.0) + (0101000000A167B3EA73350140E4141DC9E5AF4440,1.0) + (010100000062A1D634EF38014014D044D8F0B44440,2.0) + (010100000052B81E85EB510140EEEBC03923B24440,3.0) + (0101000000C286A757CA3201409D8026C286AF4440,5.0) + (01010000007DD0B359F5390140F38E537424AF4440,6.0) + (0101000000D237691A140D0140014EEFE2FDB44440,4.0) + (01010000003312B4DCAC14014047F8F1AAE1B14440,4.3333333333333333) + (010100000048C0FBBD27290140A9BBC5D646B34440,2.3333333333333333) + (01010000001DEBE2361A400140F79A0B4953B24440,2.0000000000000000) + (010100000098933D2F02500140115B676994B34440,4.0000000000000000) + (01010000004BA3DC90595001405C456C9DA5B14440,5.3333333333333333) + (0101000000D1FA8198714001404285107D64B04440,3.3333333333333333) + (01010000004AEA043411360140D2B687AA85AF4440,4.0000000000000000) + (0101000000CCA4736BBF2201402B8716D9CEAF4440,6.3333333333333333) + (0101000000832C1EF13E2101402609AF4A0FB04440,4.6666666666666667) + (010100000063A009D8BF090140BEEE38F68AB24440,5.4444444444444444) + (010100000019AE5D3CF8180140A5008D2162B34440,3.5555555555555555) + (01010000007E88BE590E250140EB9DA83067B44440,2.7777777777777778) + (0101000000C05105B65D3B014044A2D0B2EEB34440,2.7777777777777778) + (010100000005329D125F4F01401C8049790FB44440,4.3333333333333333) + (010100000054E45F2DB3570140BBDE724420B34440,4.6666666666666667) + (0101000000E53EEA4DD05701407FD7C9557BB24440,5.1111111111111111) + (01010000004A9CC694D34F01402B63A5BE7BB14440,6.1111111111111111) + (0101000000DD24068195430140317345DA64B04440,4.8888888888888889) + (010100000033E768B7D23A014002994E89AFAF4440,4.4444444444444444) + (010100000083C0CAA145360140CAEC55A065AF4440,5.0000000000000000) + (010100000004549A09D52F01403E3230057EAF4440,5.7777777777777778) + (010100000024040D72661D0140B0DEBBE0E6AF4440,6.7777777777777778) + (01010000007CCD85A42915014017B22F2835B04440,6.3333333333333333) + (0101000000F5F145CA781001401F2DCE18E6B04440,5.6666666666666667) + (0101000000F10DE756572701402134E4146CB14440,3.6666666666666667) + (01010000008894DB45FA290140F8C4EB987EB24440,2.8888888888888889) + (0101000000AABF5E61C13D0140E6E512830FB34440,2.7777777777777778) + (0101000000581215F9574B0140FDF5BB4EAEB24440,3.0000000000000000) + (0101000000EA6C9F19754B0140C0EE126009B24440,3.4444444444444444) + (01010000001383C0CAA145014088CC827674B14440,3.5555555555555555) + (0101000000D0B02B40EE350140C90D9905EDB04440,3.3333333333333333) + (010100000051DA1B7C613201406F36F4DA1DB04440,3.0000000000000000) + (0101000000EA6E133D52390140FD1AE7FAEFAF4440,2.7777777777777778) + (01010000003A487527C5340140C76EEE11A6AF4440,3.3333333333333333) + (0101000000BADB448F542E01403AB4C876BEAF4440,4.1111111111111111) + (0101000000FB12176D7B280140BDE1A0F9EBAF4440,4.0000000000000000) +(44 rows) diff --git a/src/pg/test/expected/15_tinmap_test.out b/src/pg/test/expected/15_tinmap_test.out new file mode 100644 index 0000000..e11e28c --- /dev/null +++ b/src/pg/test/expected/15_tinmap_test.out @@ -0,0 +1,80 @@ + cdb_tinmap +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + (01030000000100000004000000D237691A140D0140014EEFE2FDB444402497FF907EFB0040F085C954C1B0444063A009D8BF090140BEEE38F68AB24440D237691A140D0140014EEFE2FDB44440,5.8148148148148148) + (01030000000100000004000000D237691A140D0140014EEFE2FDB4444063A009D8BF090140BEEE38F68AB2444019AE5D3CF8180140A5008D2162B34440D237691A140D0140014EEFE2FDB44440,4.3333333333333333) + (01030000000100000004000000D237691A140D0140014EEFE2FDB4444019AE5D3CF8180140A5008D2162B344407E88BE590E250140EB9DA83067B44440D237691A140D0140014EEFE2FDB44440,3.4444444444444444) + (01030000000100000004000000D237691A140D0140014EEFE2FDB444407E88BE590E250140EB9DA83067B4444062A1D634EF38014014D044D8F0B44440D237691A140D0140014EEFE2FDB44440,2.9259259259259259) + (0103000000010000000400000062A1D634EF38014014D044D8F0B444407E88BE590E250140EB9DA83067B44440C05105B65D3B014044A2D0B2EEB3444062A1D634EF38014014D044D8F0B44440,2.5185185185185185) + (0103000000010000000400000062A1D634EF38014014D044D8F0B44440C05105B65D3B014044A2D0B2EEB3444005329D125F4F01401C8049790FB4444062A1D634EF38014014D044D8F0B44440,3.0370370370370370) + (0103000000010000000400000062A1D634EF38014014D044D8F0B4444005329D125F4F01401C8049790FB444401361C3D32B6501403255302AA9B3444062A1D634EF38014014D044D8F0B44440,4.4444444444444444) + (010300000001000000040000001361C3D32B6501403255302AA9B3444005329D125F4F01401C8049790FB4444098933D2F02500140115B676994B344401361C3D32B6501403255302AA9B34440,5.1111111111111111) + (010300000001000000040000001361C3D32B6501403255302AA9B3444098933D2F02500140115B676994B3444054E45F2DB3570140BBDE724420B344401361C3D32B6501403255302AA9B34440,5.2222222222222222) + (010300000001000000040000001361C3D32B6501403255302AA9B3444054E45F2DB3570140BBDE724420B34440E53EEA4DD05701407FD7C9557BB244401361C3D32B6501403255302AA9B34440,5.5925925925925926) + (010300000001000000040000001361C3D32B6501403255302AA9B34440E53EEA4DD05701407FD7C9557BB244404A9CC694D34F01402B63A5BE7BB144401361C3D32B6501403255302AA9B34440,6.0740740740740741) + (010300000001000000040000007DD0B359F5390140F38E537424AF44404A9CC694D34F01402B63A5BE7BB14440DD24068195430140317345DA64B044407DD0B359F5390140F38E537424AF4440,5.6666666666666667) + (010300000001000000040000007DD0B359F5390140F38E537424AF4440DD24068195430140317345DA64B0444033E768B7D23A014002994E89AFAF44407DD0B359F5390140F38E537424AF4440,5.1111111111111111) + (010300000001000000040000007DD0B359F5390140F38E537424AF444033E768B7D23A014002994E89AFAF44404AEA043411360140D2B687AA85AF44407DD0B359F5390140F38E537424AF4440,4.8148148148148148) + (010300000001000000040000007DD0B359F5390140F38E537424AF44404AEA043411360140D2B687AA85AF4440C286A757CA3201409D8026C286AF44407DD0B359F5390140F38E537424AF4440,5.0000000000000000) + (010300000001000000040000007DD0B359F5390140F38E537424AF4440C286A757CA3201409D8026C286AF444004549A09D52F01403E3230057EAF44407DD0B359F5390140F38E537424AF4440,5.5925925925925926) + (010300000001000000040000007DD0B359F5390140F38E537424AF444004549A09D52F01403E3230057EAF4440CCA4736BBF2201402B8716D9CEAF44407DD0B359F5390140F38E537424AF4440,6.0370370370370370) + (010300000001000000040000007DD0B359F5390140F38E537424AF4440CCA4736BBF2201402B8716D9CEAF444024040D72661D0140B0DEBBE0E6AF44407DD0B359F5390140F38E537424AF4440,6.3703703703703704) + (0103000000010000000400000024040D72661D0140B0DEBBE0E6AF4440CCA4736BBF2201402B8716D9CEAF4440832C1EF13E2101402609AF4A0FB0444024040D72661D0140B0DEBBE0E6AF4440,5.9259259259259259) + (0103000000010000000400000024040D72661D0140B0DEBBE0E6AF4440832C1EF13E2101402609AF4A0FB044407CCD85A42915014017B22F2835B0444024040D72661D0140B0DEBBE0E6AF4440,5.9259259259259259) + (0103000000010000000400000024040D72661D0140B0DEBBE0E6AF44407CCD85A42915014017B22F2835B044402497FF907EFB0040F085C954C1B0444024040D72661D0140B0DEBBE0E6AF4440,7.0370370370370370) + (010300000001000000040000002497FF907EFB0040F085C954C1B044407CCD85A42915014017B22F2835B04440F5F145CA781001401F2DCE18E6B044402497FF907EFB0040F085C954C1B04440,6.6666666666666667) + (010300000001000000040000002497FF907EFB0040F085C954C1B04440F5F145CA781001401F2DCE18E6B0444063A009D8BF090140BEEE38F68AB244402497FF907EFB0040F085C954C1B04440,6.3703703703703704) + (0103000000010000000400000063A009D8BF090140BEEE38F68AB24440F5F145CA781001401F2DCE18E6B044403312B4DCAC14014047F8F1AAE1B1444063A009D8BF090140BEEE38F68AB24440,5.1481481481481481) + (0103000000010000000400000063A009D8BF090140BEEE38F68AB244403312B4DCAC14014047F8F1AAE1B1444019AE5D3CF8180140A5008D2162B3444063A009D8BF090140BEEE38F68AB24440,4.4444444444444444) + (0103000000010000000400000019AE5D3CF8180140A5008D2162B344403312B4DCAC14014047F8F1AAE1B144408894DB45FA290140F8C4EB987EB2444019AE5D3CF8180140A5008D2162B34440,3.5925925925925926) + (0103000000010000000400000019AE5D3CF8180140A5008D2162B344408894DB45FA290140F8C4EB987EB2444048C0FBBD27290140A9BBC5D646B3444019AE5D3CF8180140A5008D2162B34440,2.9259259259259259) + (0103000000010000000400000019AE5D3CF8180140A5008D2162B3444048C0FBBD27290140A9BBC5D646B344407E88BE590E250140EB9DA83067B4444019AE5D3CF8180140A5008D2162B34440,2.8888888888888889) + (010300000001000000040000007E88BE590E250140EB9DA83067B4444048C0FBBD27290140A9BBC5D646B34440C05105B65D3B014044A2D0B2EEB344407E88BE590E250140EB9DA83067B44440,2.6296296296296296) + (01030000000100000004000000C05105B65D3B014044A2D0B2EEB3444048C0FBBD27290140A9BBC5D646B34440AABF5E61C13D0140E6E512830FB34440C05105B65D3B014044A2D0B2EEB34440,2.6296296296296296) + (01030000000100000004000000C05105B65D3B014044A2D0B2EEB34440AABF5E61C13D0140E6E512830FB3444098933D2F02500140115B676994B34440C05105B65D3B014044A2D0B2EEB34440,3.1851851851851852) + (01030000000100000004000000C05105B65D3B014044A2D0B2EEB3444098933D2F02500140115B676994B3444005329D125F4F01401C8049790FB44440C05105B65D3B014044A2D0B2EEB34440,3.7037037037037037) + (0103000000010000000400000098933D2F02500140115B676994B34440AABF5E61C13D0140E6E512830FB34440581215F9574B0140FDF5BB4EAEB2444098933D2F02500140115B676994B34440,3.2592592592592593) + (0103000000010000000400000098933D2F02500140115B676994B34440581215F9574B0140FDF5BB4EAEB2444054E45F2DB3570140BBDE724420B3444098933D2F02500140115B676994B34440,3.8888888888888889) + (0103000000010000000400000054E45F2DB3570140BBDE724420B34440581215F9574B0140FDF5BB4EAEB24440E53EEA4DD05701407FD7C9557BB2444054E45F2DB3570140BBDE724420B34440,4.2592592592592593) + (01030000000100000004000000E53EEA4DD05701407FD7C9557BB24440581215F9574B0140FDF5BB4EAEB2444052B81E85EB510140EEEBC03923B24440E53EEA4DD05701407FD7C9557BB24440,3.7037037037037037) + (01030000000100000004000000E53EEA4DD05701407FD7C9557BB2444052B81E85EB510140EEEBC03923B244404BA3DC90595001405C456C9DA5B14440E53EEA4DD05701407FD7C9557BB24440,4.4814814814814815) + (01030000000100000004000000E53EEA4DD05701407FD7C9557BB244404BA3DC90595001405C456C9DA5B144404A9CC694D34F01402B63A5BE7BB14440E53EEA4DD05701407FD7C9557BB24440,5.5185185185185185) + (010300000001000000040000004A9CC694D34F01402B63A5BE7BB144404BA3DC90595001405C456C9DA5B144401383C0CAA145014088CC827674B144404A9CC694D34F01402B63A5BE7BB14440,5.0000000000000000) + (010300000001000000040000004A9CC694D34F01402B63A5BE7BB144401383C0CAA145014088CC827674B14440DD24068195430140317345DA64B044404A9CC694D34F01402B63A5BE7BB14440,4.8518518518518518) + (01030000000100000004000000DD24068195430140317345DA64B044401383C0CAA145014088CC827674B14440D1FA8198714001404285107D64B04440DD24068195430140317345DA64B04440,3.9259259259259259) + (01030000000100000004000000DD24068195430140317345DA64B04440D1FA8198714001404285107D64B0444033E768B7D23A014002994E89AFAF4440DD24068195430140317345DA64B04440,4.2222222222222222) + (0103000000010000000400000033E768B7D23A014002994E89AFAF4440D1FA8198714001404285107D64B04440EA6E133D52390140FD1AE7FAEFAF444033E768B7D23A014002994E89AFAF4440,3.5185185185185185) + (0103000000010000000400000033E768B7D23A014002994E89AFAF4440EA6E133D52390140FD1AE7FAEFAF4440A167B3EA73350140E4141DC9E5AF444033E768B7D23A014002994E89AFAF4440,2.7407407407407407) + (0103000000010000000400000033E768B7D23A014002994E89AFAF4440A167B3EA73350140E4141DC9E5AF44403A487527C5340140C76EEE11A6AF444033E768B7D23A014002994E89AFAF4440,2.9259259259259259) + (0103000000010000000400000033E768B7D23A014002994E89AFAF44403A487527C5340140C76EEE11A6AF44404AEA043411360140D2B687AA85AF444033E768B7D23A014002994E89AFAF4440,3.9259259259259259) + (010300000001000000040000004AEA043411360140D2B687AA85AF44403A487527C5340140C76EEE11A6AF4440C286A757CA3201409D8026C286AF44404AEA043411360140D2B687AA85AF4440,4.1111111111111111) + (01030000000100000004000000C286A757CA3201409D8026C286AF44403A487527C5340140C76EEE11A6AF4440BADB448F542E01403AB4C876BEAF4440C286A757CA3201409D8026C286AF4440,4.1481481481481481) + (01030000000100000004000000C286A757CA3201409D8026C286AF4440BADB448F542E01403AB4C876BEAF444004549A09D52F01403E3230057EAF4440C286A757CA3201409D8026C286AF4440,4.9629629629629630) + (0103000000010000000400000004549A09D52F01403E3230057EAF4440BADB448F542E01403AB4C876BEAF4440FB12176D7B280140BDE1A0F9EBAF444004549A09D52F01403E3230057EAF4440,4.6296296296296296) + (0103000000010000000400000004549A09D52F01403E3230057EAF4440FB12176D7B280140BDE1A0F9EBAF4440CCA4736BBF2201402B8716D9CEAF444004549A09D52F01403E3230057EAF4440,5.3703703703703704) + (01030000000100000004000000CCA4736BBF2201402B8716D9CEAF4440FB12176D7B280140BDE1A0F9EBAF4440832C1EF13E2101402609AF4A0FB04440CCA4736BBF2201402B8716D9CEAF4440,5.0000000000000000) + (01030000000100000004000000832C1EF13E2101402609AF4A0FB04440FB12176D7B280140BDE1A0F9EBAF4440F10DE756572701402134E4146CB14440832C1EF13E2101402609AF4A0FB04440,4.1111111111111111) + (01030000000100000004000000832C1EF13E2101402609AF4A0FB04440F10DE756572701402134E4146CB14440F5F145CA781001401F2DCE18E6B04440832C1EF13E2101402609AF4A0FB04440,4.6666666666666667) + (01030000000100000004000000832C1EF13E2101402609AF4A0FB04440F5F145CA781001401F2DCE18E6B044407CCD85A42915014017B22F2835B04440832C1EF13E2101402609AF4A0FB04440,5.5555555555555556) + (01030000000100000004000000F5F145CA781001401F2DCE18E6B04440F10DE756572701402134E4146CB144403312B4DCAC14014047F8F1AAE1B14440F5F145CA781001401F2DCE18E6B04440,4.5555555555555556) + (010300000001000000040000003312B4DCAC14014047F8F1AAE1B14440F10DE756572701402134E4146CB144408894DB45FA290140F8C4EB987EB244403312B4DCAC14014047F8F1AAE1B14440,3.6296296296296296) + (010300000001000000040000008894DB45FA290140F8C4EB987EB24440F10DE756572701402134E4146CB144401DEBE2361A400140F79A0B4953B244408894DB45FA290140F8C4EB987EB24440,2.8518518518518519) + (010300000001000000040000008894DB45FA290140F8C4EB987EB244401DEBE2361A400140F79A0B4953B24440AABF5E61C13D0140E6E512830FB344408894DB45FA290140F8C4EB987EB24440,2.5555555555555556) + (010300000001000000040000008894DB45FA290140F8C4EB987EB24440AABF5E61C13D0140E6E512830FB3444048C0FBBD27290140A9BBC5D646B344408894DB45FA290140F8C4EB987EB24440,2.6666666666666667) + (01030000000100000004000000AABF5E61C13D0140E6E512830FB344401DEBE2361A400140F79A0B4953B24440581215F9574B0140FDF5BB4EAEB24440AABF5E61C13D0140E6E512830FB34440,2.5925925925925926) + (01030000000100000004000000581215F9574B0140FDF5BB4EAEB244401DEBE2361A400140F79A0B4953B24440EA6C9F19754B0140C0EE126009B24440581215F9574B0140FDF5BB4EAEB24440,2.8148148148148148) + (01030000000100000004000000581215F9574B0140FDF5BB4EAEB24440EA6C9F19754B0140C0EE126009B2444052B81E85EB510140EEEBC03923B24440581215F9574B0140FDF5BB4EAEB24440,3.1481481481481481) + (0103000000010000000400000052B81E85EB510140EEEBC03923B24440EA6C9F19754B0140C0EE126009B244404BA3DC90595001405C456C9DA5B1444052B81E85EB510140EEEBC03923B24440,3.9259259259259259) + (010300000001000000040000004BA3DC90595001405C456C9DA5B14440EA6C9F19754B0140C0EE126009B244401383C0CAA145014088CC827674B144404BA3DC90595001405C456C9DA5B14440,4.1111111111111111) + (010300000001000000040000001383C0CAA145014088CC827674B14440EA6C9F19754B0140C0EE126009B244401DEBE2361A400140F79A0B4953B244401383C0CAA145014088CC827674B14440,3.0000000000000000) + (010300000001000000040000001383C0CAA145014088CC827674B144401DEBE2361A400140F79A0B4953B24440D0B02B40EE350140C90D9905EDB044401383C0CAA145014088CC827674B14440,2.9629629629629629) + (010300000001000000040000001383C0CAA145014088CC827674B14440D0B02B40EE350140C90D9905EDB04440D1FA8198714001404285107D64B044401383C0CAA145014088CC827674B14440,3.4074074074074074) + (01030000000100000004000000D1FA8198714001404285107D64B04440D0B02B40EE350140C90D9905EDB0444051DA1B7C613201406F36F4DA1DB04440D1FA8198714001404285107D64B04440,3.2222222222222222) + (01030000000100000004000000D1FA8198714001404285107D64B0444051DA1B7C613201406F36F4DA1DB04440EA6E133D52390140FD1AE7FAEFAF4440D1FA8198714001404285107D64B04440,3.0370370370370370) + (01030000000100000004000000EA6E133D52390140FD1AE7FAEFAF444051DA1B7C613201406F36F4DA1DB04440A167B3EA73350140E4141DC9E5AF4440EA6E133D52390140FD1AE7FAEFAF4440,2.2592592592592593) + (01030000000100000004000000A167B3EA73350140E4141DC9E5AF444051DA1B7C613201406F36F4DA1DB04440BADB448F542E01403AB4C876BEAF4440A167B3EA73350140E4141DC9E5AF4440,2.7037037037037037) + (01030000000100000004000000A167B3EA73350140E4141DC9E5AF4440BADB448F542E01403AB4C876BEAF44403A487527C5340140C76EEE11A6AF4440A167B3EA73350140E4141DC9E5AF4440,2.8148148148148148) + (01030000000100000004000000BADB448F542E01403AB4C876BEAF444051DA1B7C613201406F36F4DA1DB04440FB12176D7B280140BDE1A0F9EBAF4440BADB448F542E01403AB4C876BEAF4440,3.7037037037037037) + (01030000000100000004000000FB12176D7B280140BDE1A0F9EBAF444051DA1B7C613201406F36F4DA1DB04440F10DE756572701402134E4146CB14440FB12176D7B280140BDE1A0F9EBAF4440,3.5555555555555556) + (01030000000100000004000000F10DE756572701402134E4146CB1444051DA1B7C613201406F36F4DA1DB04440D0B02B40EE350140C90D9905EDB04440F10DE756572701402134E4146CB14440,3.3333333333333333) + (01030000000100000004000000F10DE756572701402134E4146CB14440D0B02B40EE350140C90D9905EDB044401DEBE2361A400140F79A0B4953B24440F10DE756572701402134E4146CB14440,3.0000000000000000) +(77 rows) diff --git a/src/pg/test/sql/14_densify_test.sql b/src/pg/test/sql/14_densify_test.sql new file mode 100644 index 0000000..3684295 --- /dev/null +++ b/src/pg/test/sql/14_densify_test.sql @@ -0,0 +1,6 @@ +with data as ( + select + ARRAY[7.0,8.0,1.0,2.0,3.0,5.0,6.0,4.0] as colin, + ARRAY[ST_GeomFromText('POINT(2.1744 41.4036)'),ST_GeomFromText('POINT(2.1228 41.3809)'),ST_GeomFromText('POINT(2.1511 41.3742)'),ST_GeomFromText('POINT(2.1528 41.4136)'),ST_GeomFromText('POINT(2.165 41.3917)'),ST_GeomFromText('POINT(2.1498 41.3713)'),ST_GeomFromText('POINT(2.1533 41.3683)'),ST_GeomFromText('POINT(2.131386 41.413998)')] as geomin +) +select CDB_Densify(geomin, colin, 2) from data; diff --git a/src/pg/test/sql/15_tinmap_test.sql b/src/pg/test/sql/15_tinmap_test.sql new file mode 100644 index 0000000..9393a4c --- /dev/null +++ b/src/pg/test/sql/15_tinmap_test.sql @@ -0,0 +1,6 @@ +with data as ( + select + ARRAY[7.0,8.0,1.0,2.0,3.0,5.0,6.0,4.0] as colin, + ARRAY[ST_GeomFromText('POINT(2.1744 41.4036)'),ST_GeomFromText('POINT(2.1228 41.3809)'),ST_GeomFromText('POINT(2.1511 41.3742)'),ST_GeomFromText('POINT(2.1528 41.4136)'),ST_GeomFromText('POINT(2.165 41.3917)'),ST_GeomFromText('POINT(2.1498 41.3713)'),ST_GeomFromText('POINT(2.1533 41.3683)'),ST_GeomFromText('POINT(2.131386 41.413998)')] as geomin +) +select CDB_TINmap(geomin, colin, 2) from data; From 3becd449c20b281f4eb9659e68e978ae6ffc8d71 Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 9 Aug 2016 13:02:23 -0400 Subject: [PATCH 05/62] check tests --- src/pg/test/sql/13_pia_test.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/test/sql/13_pia_test.sql b/src/pg/test/sql/13_pia_test.sql index 5d0b6f0..3af1e7e 100644 --- a/src/pg/test/sql/13_pia_test.sql +++ b/src/pg/test/sql/13_pia_test.sql @@ -1,4 +1,4 @@ with a as( select st_geomfromtext('POLYGON((-432540.453078056 4949775.20452642,-432329.947920966 4951361.232584,-431245.028163694 4952223.31516671,-429131.071033529 4951768.00415574,-424622.07505895 4952843.13503987,-423688.327170174 4953499.20752423,-424086.294349759 4954968.38274191,-423068.388925945 4954378.63345336,-423387.653225542 4953355.67417084,-420594.869840519 4953781.00230592,-416026.095299382 4951484.06849063,-412483.018546414 4951024.5410983,-410490.399661215 4954502.24032205,-408186.197521284 4956398.91417441,-407627.262358013 4959300.94633864,-406948.770061627 4959874.85407739,-404949.583326472 4959047.74518163,-402570.908447199 4953743.46829807,-400971.358683991 4952193.11680804,-403533.488084088 4949649.89857885,-406335.177028373 4950193.19571096,-407790.456731515 4952391.46015616,-412060.672398345 4950381.2389307,-410716.93482498 4949156.7509561,-408464.162289794 4943912.8940387,-409350.599394983 4942819.84896006,-408087.791091424 4942451.6711778,-407274.045613725 4940572.4807777,-404446.196589102 4939976.71501489,-402422.964843936 4940450.3670813,-401010.654464241 4939054.8061663,-397647.247369412 4940679.80737878,-395658.413346901 4940528.84765185,-395536.852462953 4938829.79565997,-394268.923462818 4938003.7277717,-393388.720249116 4934757.80596815,-392393.301362444 4934326.71675815,-392573.527618037 4932323.40974412,-393464.640141837 4931903.10653605,-393085.597275686 4931094.7353605,-398426.261165985 4929156.87541607,-398261.174361137 4926238.00816416,-394045.059966834 4925765.18668498,-392982.960705174 4926391.81893628,-393090.272694301 4927176.84692181,-391648.240010564 4924626.06386961,-391889.914625075 4923086.14787613,-394345.177314013 4923235.086036,-395550.878718795 4917812.79243978,-399009.463978251 4912927.7157945,-398948.794855767 4911941.91010796,-398092.636652078 4911806.57392519,-401991.601817112 4911722.9204501,-406225.972607907 4914505.47286319,-411104.994569885 4912569.26941163,-412925.513522316 4913030.3608866,-414630.148884835 4914436.69169949,-414207.691417276 4919205.78028405,-418306.141109809 4917994.9580478,-424184.700779621 4918938.12432889,-426816.961458921 4923664.37379373,-420956.324227126 4923381.98014807,-420186.661267781 4924286.48693378,-420943.411166194 4926812.76394433,-419779.45457046 4928527.43466337,-419768.767899344 4930681.94459216,-421911.668097113 4930432.40620397,-423482.386112205 4933451.28047252,-427272.814773717 4934151.56473242,-427144.908678797 4939731.77191996,-428982.125554848 4940522.84445172,-428986.133056516 4942437.17281266,-431237.792396792 4947309.68284815,-432476.889648814 4947791.74800037,-432540.453078056 4949775.20452642))', 3857) as g ) -SELECT st_astext(CDB_PIA(g)) from a; +SELECT st_astext(cdb_crankshaft.CDB_PIA(g)) from a; From 358202d324e1deef8f0714f896ff7226257b4b73 Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 9 Aug 2016 13:03:24 -0400 Subject: [PATCH 06/62] check tests --- src/pg/sql/15_tinmap.sql | 2 +- src/pg/test/sql/14_densify_test.sql | 2 +- src/pg/test/sql/15_tinmap_test.sql | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pg/sql/15_tinmap.sql b/src/pg/sql/15_tinmap.sql index 973eede..493c9cf 100644 --- a/src/pg/sql/15_tinmap.sql +++ b/src/pg/sql/15_tinmap.sql @@ -16,7 +16,7 @@ DECLARE vc numeric; coltemp numeric[]; BEGIN - SELECT array_agg(dens.geomout), array_agg(dens.colout) INTO p, vals FROM CDB_Densify(geomin, colin, iterations) dens; + SELECT array_agg(dens.geomout), array_agg(dens.colout) INTO p, vals FROM cdb_crankshaft.CDB_Densify(geomin, colin, iterations) dens; WITH a as (SELECT unnest(p) AS e), b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), c as (SELECT (ST_Dump(t)).geom AS v FROM b) diff --git a/src/pg/test/sql/14_densify_test.sql b/src/pg/test/sql/14_densify_test.sql index 3684295..ea72de4 100644 --- a/src/pg/test/sql/14_densify_test.sql +++ b/src/pg/test/sql/14_densify_test.sql @@ -3,4 +3,4 @@ with data as ( ARRAY[7.0,8.0,1.0,2.0,3.0,5.0,6.0,4.0] as colin, ARRAY[ST_GeomFromText('POINT(2.1744 41.4036)'),ST_GeomFromText('POINT(2.1228 41.3809)'),ST_GeomFromText('POINT(2.1511 41.3742)'),ST_GeomFromText('POINT(2.1528 41.4136)'),ST_GeomFromText('POINT(2.165 41.3917)'),ST_GeomFromText('POINT(2.1498 41.3713)'),ST_GeomFromText('POINT(2.1533 41.3683)'),ST_GeomFromText('POINT(2.131386 41.413998)')] as geomin ) -select CDB_Densify(geomin, colin, 2) from data; +select cdb_crankshaft.CDB_Densify(geomin, colin, 2) from data; diff --git a/src/pg/test/sql/15_tinmap_test.sql b/src/pg/test/sql/15_tinmap_test.sql index 9393a4c..3db265b 100644 --- a/src/pg/test/sql/15_tinmap_test.sql +++ b/src/pg/test/sql/15_tinmap_test.sql @@ -3,4 +3,4 @@ with data as ( ARRAY[7.0,8.0,1.0,2.0,3.0,5.0,6.0,4.0] as colin, ARRAY[ST_GeomFromText('POINT(2.1744 41.4036)'),ST_GeomFromText('POINT(2.1228 41.3809)'),ST_GeomFromText('POINT(2.1511 41.3742)'),ST_GeomFromText('POINT(2.1528 41.4136)'),ST_GeomFromText('POINT(2.165 41.3917)'),ST_GeomFromText('POINT(2.1498 41.3713)'),ST_GeomFromText('POINT(2.1533 41.3683)'),ST_GeomFromText('POINT(2.131386 41.413998)')] as geomin ) -select CDB_TINmap(geomin, colin, 2) from data; +select cdb_crankshaft.CDB_TINmap(geomin, colin, 2) from data; From 2d8d5e8c2ea91e862bebb67eda518f87358e2b26 Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 9 Aug 2016 14:09:45 -0400 Subject: [PATCH 07/62] check tests --- src/pg/test/sql/07_gravity_test.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/test/sql/07_gravity_test.sql b/src/pg/test/sql/07_gravity_test.sql index a86bb23..e6a9301 100644 --- a/src/pg/test/sql/07_gravity_test.sql +++ b/src/pg/test/sql/07_gravity_test.sql @@ -18,4 +18,4 @@ SELECT FROM t, s, - CDB_Gravity(t.id, t.g, t.w, s.id, s.g, s.p, 2, 100000, 3) g; + cdb_crankshaft.CDB_Gravity(t.id, t.g, t.w, s.id, s.g, s.p, 2, 100000, 3) g; From d88c967bb72ee885edf93e6938a1a57bd7e76d36 Mon Sep 17 00:00:00 2001 From: abelvm Date: Wed, 10 Aug 2016 12:13:55 -0400 Subject: [PATCH 08/62] fixed tests --- src/pg/test/expected/14_densify_test.out | 3 +++ src/pg/test/expected/15_tinmap_test.out | 3 +++ src/pg/test/sql/14_densify_test.sql | 3 +++ src/pg/test/sql/15_tinmap_test.sql | 3 +++ 4 files changed, 12 insertions(+) diff --git a/src/pg/test/expected/14_densify_test.out b/src/pg/test/expected/14_densify_test.out index fef9228..ad26b55 100644 --- a/src/pg/test/expected/14_densify_test.out +++ b/src/pg/test/expected/14_densify_test.out @@ -1,3 +1,6 @@ +SET client_min_messages TO WARNING; +\set ECHO none + cdb_densify ----------------------------------------------------------------- (01010000001361C3D32B6501403255302AA9B34440,7.0) diff --git a/src/pg/test/expected/15_tinmap_test.out b/src/pg/test/expected/15_tinmap_test.out index e11e28c..8396078 100644 --- a/src/pg/test/expected/15_tinmap_test.out +++ b/src/pg/test/expected/15_tinmap_test.out @@ -1,3 +1,6 @@ +SET client_min_messages TO WARNING; +\set ECHO none + cdb_tinmap --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- (01030000000100000004000000D237691A140D0140014EEFE2FDB444402497FF907EFB0040F085C954C1B0444063A009D8BF090140BEEE38F68AB24440D237691A140D0140014EEFE2FDB44440,5.8148148148148148) diff --git a/src/pg/test/sql/14_densify_test.sql b/src/pg/test/sql/14_densify_test.sql index ea72de4..9b716f8 100644 --- a/src/pg/test/sql/14_densify_test.sql +++ b/src/pg/test/sql/14_densify_test.sql @@ -1,3 +1,6 @@ +SET client_min_messages TO WARNING; +\set ECHO none + with data as ( select ARRAY[7.0,8.0,1.0,2.0,3.0,5.0,6.0,4.0] as colin, diff --git a/src/pg/test/sql/15_tinmap_test.sql b/src/pg/test/sql/15_tinmap_test.sql index 3db265b..dcb7fe4 100644 --- a/src/pg/test/sql/15_tinmap_test.sql +++ b/src/pg/test/sql/15_tinmap_test.sql @@ -1,3 +1,6 @@ +SET client_min_messages TO WARNING; +\set ECHO none + with data as ( select ARRAY[7.0,8.0,1.0,2.0,3.0,5.0,6.0,4.0] as colin, From 667bdb79a2a0c814f9c784843275719379718c67 Mon Sep 17 00:00:00 2001 From: abelvm Date: Wed, 10 Aug 2016 12:16:36 -0400 Subject: [PATCH 09/62] fixed tests --- src/pg/test/expected/13_pia_test.out | 3 +++ src/pg/test/sql/13_pia_test.sql | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/pg/test/expected/13_pia_test.out b/src/pg/test/expected/13_pia_test.out index 037d339..31aa78e 100644 --- a/src/pg/test/expected/13_pia_test.out +++ b/src/pg/test/expected/13_pia_test.out @@ -1,3 +1,6 @@ +SET client_min_messages TO WARNING; +\set ECHO none + st_astext ------------------------------------------- POINT(-409081.865887381 4930017.52075948) diff --git a/src/pg/test/sql/13_pia_test.sql b/src/pg/test/sql/13_pia_test.sql index 3af1e7e..8b37082 100644 --- a/src/pg/test/sql/13_pia_test.sql +++ b/src/pg/test/sql/13_pia_test.sql @@ -1,3 +1,6 @@ +SET client_min_messages TO WARNING; +\set ECHO none + with a as( select st_geomfromtext('POLYGON((-432540.453078056 4949775.20452642,-432329.947920966 4951361.232584,-431245.028163694 4952223.31516671,-429131.071033529 4951768.00415574,-424622.07505895 4952843.13503987,-423688.327170174 4953499.20752423,-424086.294349759 4954968.38274191,-423068.388925945 4954378.63345336,-423387.653225542 4953355.67417084,-420594.869840519 4953781.00230592,-416026.095299382 4951484.06849063,-412483.018546414 4951024.5410983,-410490.399661215 4954502.24032205,-408186.197521284 4956398.91417441,-407627.262358013 4959300.94633864,-406948.770061627 4959874.85407739,-404949.583326472 4959047.74518163,-402570.908447199 4953743.46829807,-400971.358683991 4952193.11680804,-403533.488084088 4949649.89857885,-406335.177028373 4950193.19571096,-407790.456731515 4952391.46015616,-412060.672398345 4950381.2389307,-410716.93482498 4949156.7509561,-408464.162289794 4943912.8940387,-409350.599394983 4942819.84896006,-408087.791091424 4942451.6711778,-407274.045613725 4940572.4807777,-404446.196589102 4939976.71501489,-402422.964843936 4940450.3670813,-401010.654464241 4939054.8061663,-397647.247369412 4940679.80737878,-395658.413346901 4940528.84765185,-395536.852462953 4938829.79565997,-394268.923462818 4938003.7277717,-393388.720249116 4934757.80596815,-392393.301362444 4934326.71675815,-392573.527618037 4932323.40974412,-393464.640141837 4931903.10653605,-393085.597275686 4931094.7353605,-398426.261165985 4929156.87541607,-398261.174361137 4926238.00816416,-394045.059966834 4925765.18668498,-392982.960705174 4926391.81893628,-393090.272694301 4927176.84692181,-391648.240010564 4924626.06386961,-391889.914625075 4923086.14787613,-394345.177314013 4923235.086036,-395550.878718795 4917812.79243978,-399009.463978251 4912927.7157945,-398948.794855767 4911941.91010796,-398092.636652078 4911806.57392519,-401991.601817112 4911722.9204501,-406225.972607907 4914505.47286319,-411104.994569885 4912569.26941163,-412925.513522316 4913030.3608866,-414630.148884835 4914436.69169949,-414207.691417276 4919205.78028405,-418306.141109809 4917994.9580478,-424184.700779621 4918938.12432889,-426816.961458921 4923664.37379373,-420956.324227126 4923381.98014807,-420186.661267781 4924286.48693378,-420943.411166194 4926812.76394433,-419779.45457046 4928527.43466337,-419768.767899344 4930681.94459216,-421911.668097113 4930432.40620397,-423482.386112205 4933451.28047252,-427272.814773717 4934151.56473242,-427144.908678797 4939731.77191996,-428982.125554848 4940522.84445172,-428986.133056516 4942437.17281266,-431237.792396792 4947309.68284815,-432476.889648814 4947791.74800037,-432540.453078056 4949775.20452642))', 3857) as g ) From 4986743ce94b323d14b0df603b674ae498f3947b Mon Sep 17 00:00:00 2001 From: abelvm Date: Wed, 10 Aug 2016 13:52:00 -0400 Subject: [PATCH 10/62] tests spaces --- src/pg/test/expected/14_densify_test.out | 1 - src/pg/test/expected/15_tinmap_test.out | 1 - 2 files changed, 2 deletions(-) diff --git a/src/pg/test/expected/14_densify_test.out b/src/pg/test/expected/14_densify_test.out index ad26b55..cc9001a 100644 --- a/src/pg/test/expected/14_densify_test.out +++ b/src/pg/test/expected/14_densify_test.out @@ -1,6 +1,5 @@ SET client_min_messages TO WARNING; \set ECHO none - cdb_densify ----------------------------------------------------------------- (01010000001361C3D32B6501403255302AA9B34440,7.0) diff --git a/src/pg/test/expected/15_tinmap_test.out b/src/pg/test/expected/15_tinmap_test.out index 8396078..bf12b57 100644 --- a/src/pg/test/expected/15_tinmap_test.out +++ b/src/pg/test/expected/15_tinmap_test.out @@ -1,6 +1,5 @@ SET client_min_messages TO WARNING; \set ECHO none - cdb_tinmap --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- (01030000000100000004000000D237691A140D0140014EEFE2FDB444402497FF907EFB0040F085C954C1B0444063A009D8BF090140BEEE38F68AB24440D237691A140D0140014EEFE2FDB44440,5.8148148148148148) From 01b0efd82b60ee09a9c783cbc2300a18f1ae2ae2 Mon Sep 17 00:00:00 2001 From: abelvm Date: Wed, 10 Aug 2016 13:55:22 -0400 Subject: [PATCH 11/62] tests spaces --- src/pg/test/expected/13_pia_test.out | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pg/test/expected/13_pia_test.out b/src/pg/test/expected/13_pia_test.out index 31aa78e..cff1550 100644 --- a/src/pg/test/expected/13_pia_test.out +++ b/src/pg/test/expected/13_pia_test.out @@ -1,6 +1,5 @@ SET client_min_messages TO WARNING; \set ECHO none - st_astext ------------------------------------------- POINT(-409081.865887381 4930017.52075948) From 7c2d160da0e0dc18218d68818d9c43e41fc08c5c Mon Sep 17 00:00:00 2001 From: abelvm Date: Wed, 10 Aug 2016 13:58:37 -0400 Subject: [PATCH 12/62] tests rectangle grid --- src/pg/sql/13_PIA.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/sql/13_PIA.sql b/src/pg/sql/13_PIA.sql index c395720..a0ed670 100644 --- a/src/pg/sql/13_PIA.sql +++ b/src/pg/sql/13_PIA.sql @@ -40,7 +40,7 @@ BEGIN -- grid #0 with c1 as( - SELECT CDB_RectangleGrid(polygon, h, h) as c + SELECT CDB_RectangleGrid(polygon, h::float, h::float) as c ) SELECT array_agg(c) INTO cells FROM c1; From b80324c0d17317ade7438ae25c6333b7958aeace Mon Sep 17 00:00:00 2001 From: abelvm Date: Wed, 10 Aug 2016 14:06:28 -0400 Subject: [PATCH 13/62] tests rectangle grid --- src/pg/sql/13_PIA.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/sql/13_PIA.sql b/src/pg/sql/13_PIA.sql index a0ed670..8ab3532 100644 --- a/src/pg/sql/13_PIA.sql +++ b/src/pg/sql/13_PIA.sql @@ -40,7 +40,7 @@ BEGIN -- grid #0 with c1 as( - SELECT CDB_RectangleGrid(polygon, h::float, h::float) as c + SELECT CDB_RectangleGrid(polygon, h::float8, h::float8) as c ) SELECT array_agg(c) INTO cells FROM c1; From 592eb68d42b716e782e453bb3756b713c8f56aaa Mon Sep 17 00:00:00 2001 From: abelvm Date: Wed, 10 Aug 2016 14:15:22 -0400 Subject: [PATCH 14/62] tests rectangle grid --- src/pg/sql/13_PIA.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/sql/13_PIA.sql b/src/pg/sql/13_PIA.sql index 8ab3532..c395720 100644 --- a/src/pg/sql/13_PIA.sql +++ b/src/pg/sql/13_PIA.sql @@ -40,7 +40,7 @@ BEGIN -- grid #0 with c1 as( - SELECT CDB_RectangleGrid(polygon, h::float8, h::float8) as c + SELECT CDB_RectangleGrid(polygon, h, h) as c ) SELECT array_agg(c) INTO cells FROM c1; From 60eb316cbe5216138dd674d26eb0908fd418c268 Mon Sep 17 00:00:00 2001 From: abelvm Date: Wed, 10 Aug 2016 14:32:59 -0400 Subject: [PATCH 15/62] spaces again --- src/pg/test/expected/14_densify_test.out | 2 +- src/pg/test/expected/15_tinmap_test.out | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pg/test/expected/14_densify_test.out b/src/pg/test/expected/14_densify_test.out index cc9001a..5583338 100644 --- a/src/pg/test/expected/14_densify_test.out +++ b/src/pg/test/expected/14_densify_test.out @@ -1,6 +1,6 @@ SET client_min_messages TO WARNING; \set ECHO none - cdb_densify + cdb_densify ----------------------------------------------------------------- (01010000001361C3D32B6501403255302AA9B34440,7.0) (01010000002497FF907EFB0040F085C954C1B04440,8.0) diff --git a/src/pg/test/expected/15_tinmap_test.out b/src/pg/test/expected/15_tinmap_test.out index bf12b57..f7fd670 100644 --- a/src/pg/test/expected/15_tinmap_test.out +++ b/src/pg/test/expected/15_tinmap_test.out @@ -1,6 +1,6 @@ SET client_min_messages TO WARNING; \set ECHO none - cdb_tinmap + cdb_tinmap --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- (01030000000100000004000000D237691A140D0140014EEFE2FDB444402497FF907EFB0040F085C954C1B0444063A009D8BF090140BEEE38F68AB24440D237691A140D0140014EEFE2FDB44440,5.8148148148148148) (01030000000100000004000000D237691A140D0140014EEFE2FDB4444063A009D8BF090140BEEE38F68AB2444019AE5D3CF8180140A5008D2162B34440D237691A140D0140014EEFE2FDB44440,4.3333333333333333) From 66f4dfa4eaf33a1328e5ec0b9b12acafbb0c8e8c Mon Sep 17 00:00:00 2001 From: abelvm Date: Wed, 10 Aug 2016 14:39:31 -0400 Subject: [PATCH 16/62] spaces again --- src/pg/test/expected/14_densify_test.out | 1 + src/pg/test/expected/15_tinmap_test.out | 1 + 2 files changed, 2 insertions(+) diff --git a/src/pg/test/expected/14_densify_test.out b/src/pg/test/expected/14_densify_test.out index 5583338..549652b 100644 --- a/src/pg/test/expected/14_densify_test.out +++ b/src/pg/test/expected/14_densify_test.out @@ -47,3 +47,4 @@ SET client_min_messages TO WARNING; (0101000000BADB448F542E01403AB4C876BEAF4440,4.1111111111111111) (0101000000FB12176D7B280140BDE1A0F9EBAF4440,4.0000000000000000) (44 rows) + diff --git a/src/pg/test/expected/15_tinmap_test.out b/src/pg/test/expected/15_tinmap_test.out index f7fd670..af0221f 100644 --- a/src/pg/test/expected/15_tinmap_test.out +++ b/src/pg/test/expected/15_tinmap_test.out @@ -80,3 +80,4 @@ SET client_min_messages TO WARNING; (01030000000100000004000000F10DE756572701402134E4146CB1444051DA1B7C613201406F36F4DA1DB04440D0B02B40EE350140C90D9905EDB04440F10DE756572701402134E4146CB14440,3.3333333333333333) (01030000000100000004000000F10DE756572701402134E4146CB14440D0B02B40EE350140C90D9905EDB044401DEBE2361A400140F79A0B4953B24440F10DE756572701402134E4146CB14440,3.0000000000000000) (77 rows) + From 5b6d2c568b9799b7ebc899c79b7a46999219d47a Mon Sep 17 00:00:00 2001 From: abelvm Date: Mon, 15 Aug 2016 17:15:36 -0400 Subject: [PATCH 17/62] first commit --- doc/19_contour.md | 47 ++++++ src/pg/sql/19_contour.sql | 191 +++++++++++++++++++++++ src/pg/test/expected/19_contour_test.out | 2 + src/pg/test/sql/19_contour_test.sql | 14 ++ 4 files changed, 254 insertions(+) create mode 100644 doc/19_contour.md create mode 100644 src/pg/sql/19_contour.sql create mode 100644 src/pg/test/expected/19_contour_test.out create mode 100644 src/pg/test/sql/19_contour_test.sql diff --git a/doc/19_contour.md b/doc/19_contour.md new file mode 100644 index 0000000..f34f7c0 --- /dev/null +++ b/doc/19_contour.md @@ -0,0 +1,47 @@ +## Contour maps + +Function to generate a contour map from an scatter dataset of points, using one of three methos: + +* [Nearest neighbor](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) +* [Barycentric](https://en.wikipedia.org/wiki/Barycentric_coordinate_system) +* [IDW](https://en.wikipedia.org/wiki/Inverse_distance_weighting) + +### CDB_Contour (geom geometry[], values numeric[], resolution integer, buffer numeric, method, classmethod integer, steps integer) + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| geom | geometry[] | Array of points's geometries | +| values | numeric[] | Array of points' values for the param under study| +| resolution | integer | Size in meters of the cells in the grid| +| buffer | numeric | Value between 0 and 1 for spatial buffer of the set of points +| method | integer | 0:nearest neighbor, 1: barycentric, 2: IDW| +| classmethod | integer | 0:equals, 1: heads&tails, 2:jenks, 3:quantiles | +| steps | integer | Number of steps in the classification| + +### Returns +Returns a table object + +| Name | Type | Description | +|------|------|-------------| +| the_geom | geometry | Geometries of the classified contour map| +| avg_value | numeric | Avg value of the area| +| min_value | numeric | Min value of the area| +| max_value | numeric | Max value of the areal| +| bin | integer | Index of the class of the area| + +#### Example Usage + +```sql +WITH a AS ( + SELECT + ARRAY[800, 700, 600, 500, 400, 300, 200, 100]::numeric[] AS vals, + ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g +) +SELECT + foo.* +FROM + a, + CDB_contour(a.g, a.vals, 500, 0.0, 1,3,5) foo; +``` diff --git a/src/pg/sql/19_contour.sql b/src/pg/sql/19_contour.sql new file mode 100644 index 0000000..5118972 --- /dev/null +++ b/src/pg/sql/19_contour.sql @@ -0,0 +1,191 @@ +CREATE OR REPLACE FUNCTION CDB_Contour( + IN geomin geometry[], + IN colin numeric[], + IN resolution integer, + IN buffer numeric, + IN intmethod integer, + IN classmethod integer, + IN steps integer + ) +RETURNS TABLE( + the_geom geometry, + bin integer, + min_value numeric, + max_value numeric, + avg_value numeric +) AS $$ +DECLARE + cell numeric; + tin geometry[]; +BEGIN + -- calc the cell size in web mercator units + WITH center as ( + SELECT ST_centroid(ST_Collect(geomin)) as c + ) + SELECT + round(resolution / cos(ST_y(c) * pi()/180)) + INTO cell + FROM center; + -- raise notice 'Resol: %', cell; + + -- we don't have iterative barycentric interpolation in CDB_interpolation, + -- and it's a costy function, so let's make a custom one here till + -- we update the code + -- tin := ARRAY[]::geometry[]; + IF intmethod=1 THEN + WITH + a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b) + SELECT array_agg(v) INTO tin FROM c; + END IF; + -- Delaunay stuff performed just ONCE!! + + -- magic + RETURN QUERY + WITH + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ st_area(ST_ConvexHull(ST_Collect(geomin)))/PI() as r + ), + envelope as ( + SELECT + st_expand(a.g, a.r) as e + FROM convexhull a + ), + envelope3857 as( + SELECT + ST_Transform(e, 3857) as geom + FROM envelope + ), + grid as( + SELECT + ST_Transform(CDB_RectangleGrid(geom, cell, cell), 4326) as geom + FROM envelope3857 + ), + interp as( + SELECT + geom, + CASE + WHEN intmethod=1 THEN cdb_crankshaft._interp_in_tin(geomin, colin, tin, ST_Centroid(geom)) + ELSE db_crankshaft.CDB_SpatialInterpolation(geomin, colin, ST_Centroid(geom), intmethod) + END as val + FROM grid + ), + classes as( + SELECT CASE + WHEN classmethod = 0 THEN + CDB_EqualIntervalBins(array_agg(val), steps) + WHEN classmethod = 1 THEN + CDB_HeadsTailsBins(array_agg(val), steps) + WHEN classmethod = 2 THEN + CDB_JenksBins(array_agg(val), steps) + ELSE + CDB_QuantileBins(array_agg(val), steps) + END as b + FROM interp + where val is not null + ), + classified as( + SELECT + i.*, + width_bucket(i.val, c.b) as bucket + FROM interp i left join classes c + ON 1=1 + ), + classified2 as( + SELECT + geom, + val, + CASE + WHEN bucket = steps THEN bucket - 1 + ELSE bucket + END as b + FROM classified + ), + final as( + SELECT + st_union(geom) as the_geom, + b as bin, + min(val) as min_value, + max(val) as max_value, + avg(val) as avg_value + FROM classified2 + GROUP BY bin + ) + SELECT + * + FROM final + where final.bin is not null + ; +END; +$$ language plpgsql; + + + +-- ===================================================================== +-- Interp in grid, so we can use barycentric with a precalculated tin (NNI) +-- ===================================================================== +CREATE OR REPLACE FUNCTION cdb_crankshaft._interp_in_tin( + IN geomin geometry[], + IN colin numeric[], + IN tin geometry[], + IN point geometry + ) +RETURNS numeric AS +$$ +DECLARE + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + -- get the cell the point is within + WITH + a as (SELECT unnest(tin) as v), + b as (SELECT v FROM a WHERE ST_Within(point, v)) + SELECT v INTO g FROM b; + + -- if we're out of the data realm, + -- return null + IF g is null THEN + RETURN null; + END IF; + + -- vertex of the selected cell + WITH a AS ( + SELECT (ST_DumpPoints(g)).geom AS v + ) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + -- calc the areas + SELECT + ST_area(g), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg,1); + RETURN output; +END; +$$ +language plpgsql; + + diff --git a/src/pg/test/expected/19_contour_test.out b/src/pg/test/expected/19_contour_test.out new file mode 100644 index 0000000..da0e94b --- /dev/null +++ b/src/pg/test/expected/19_contour_test.out @@ -0,0 +1,2 @@ +SET client_min_messages TO WARNING; +\set ECHO none diff --git a/src/pg/test/sql/19_contour_test.sql b/src/pg/test/sql/19_contour_test.sql new file mode 100644 index 0000000..fa32609 --- /dev/null +++ b/src/pg/test/sql/19_contour_test.sql @@ -0,0 +1,14 @@ +SET client_min_messages TO WARNING; +\set ECHO none +\pset format unaligned + +WITH a AS ( + SELECT + ARRAY[800, 700, 600, 500, 400, 300, 200, 100]::numeric[] AS vals, + ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g +) +SELECT + foo.* +FROM + a, + CDB_contour(a.g, a.vals, 500, 0.0, 1,3,5) foo; From cb39d1830da659c47b3f48f2ab081f065f7ef247 Mon Sep 17 00:00:00 2001 From: abelvm Date: Mon, 15 Aug 2016 17:23:52 -0400 Subject: [PATCH 18/62] 2nd commit --- src/pg/sql/19_contour.sql | 2 +- src/pg/test/sql/19_contour_test.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pg/sql/19_contour.sql b/src/pg/sql/19_contour.sql index 5118972..8c614b6 100644 --- a/src/pg/sql/19_contour.sql +++ b/src/pg/sql/19_contour.sql @@ -69,7 +69,7 @@ BEGIN geom, CASE WHEN intmethod=1 THEN cdb_crankshaft._interp_in_tin(geomin, colin, tin, ST_Centroid(geom)) - ELSE db_crankshaft.CDB_SpatialInterpolation(geomin, colin, ST_Centroid(geom), intmethod) + ELSE cdb_crankshaft.CDB_SpatialInterpolation(geomin, colin, ST_Centroid(geom), intmethod) END as val FROM grid ), diff --git a/src/pg/test/sql/19_contour_test.sql b/src/pg/test/sql/19_contour_test.sql index fa32609..92438cc 100644 --- a/src/pg/test/sql/19_contour_test.sql +++ b/src/pg/test/sql/19_contour_test.sql @@ -11,4 +11,4 @@ SELECT foo.* FROM a, - CDB_contour(a.g, a.vals, 500, 0.0, 1,3,5) foo; + cdb_crankshaft.CDB_contour(a.g, a.vals, 500, 0.0, 1,3,5) foo; From 8a529fc956cce95b025e562fcc68c6e2b103f010 Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 16 Aug 2016 11:16:51 -0400 Subject: [PATCH 19/62] add CDB_dependencies --- src/pg/sql/13_PIA.sql | 103 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 100 insertions(+), 3 deletions(-) diff --git a/src/pg/sql/13_PIA.sql b/src/pg/sql/13_PIA.sql index c395720..e59a3b8 100644 --- a/src/pg/sql/13_PIA.sql +++ b/src/pg/sql/13_PIA.sql @@ -40,12 +40,12 @@ BEGIN -- grid #0 with c1 as( - SELECT CDB_RectangleGrid(polygon, h, h) as c + SELECT cdb_crankshaft.CDB_RectangleGrid(polygon, h, h) as c ) SELECT array_agg(c) INTO cells FROM c1; -- 1st guess: centroid - best_d := _Signed_Dist(polygon, ST_Centroid(Polygon)); + best_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(Polygon)); -- looping the loop n := array_length(cells,1); @@ -75,7 +75,7 @@ BEGIN -- resample the cell with c1 as( - SELECT CDB_RectangleGrid(cell, test_h/2, test_h/2) as c + SELECT cdb_crankshaft.CDB_RectangleGrid(cell, test_h/2, test_h/2) as c ) SELECT array_agg(c) INTO test_cells FROM c1; @@ -120,3 +120,100 @@ BEGIN RETURN dist; END; $$ language plpgsql IMMUTABLE; + +-- +-- Fill given extent with a rectangular coverage +-- +-- @param ext Extent to fill. Only rectangles with center point falling +-- inside the extent (or at the lower or leftmost edge) will +-- be emitted. The returned hexagons will have the same SRID +-- as this extent. +-- +-- @param width With of each rectangle +-- +-- @param height Height of each rectangle +-- +-- @param origin Optional origin to allow for exact tiling. +-- If omitted the origin will be 0,0. +-- The parameter is checked for having the same SRID +-- as the extent. +-- +-- +CREATE OR REPLACE FUNCTION CDB_RectangleGrid(ext GEOMETRY, width FLOAT8, height FLOAT8, origin GEOMETRY DEFAULT NULL) +RETURNS SETOF GEOMETRY +AS $$ +DECLARE + h GEOMETRY; -- rectangle cell + hstep FLOAT8; -- horizontal step + vstep FLOAT8; -- vertical step + hw FLOAT8; -- half width + hh FLOAT8; -- half height + vstart FLOAT8; + hstart FLOAT8; + hend FLOAT8; + vend FLOAT8; + xoff FLOAT8; + yoff FLOAT8; + xgrd FLOAT8; + ygrd FLOAT8; + x FLOAT8; + y FLOAT8; + srid INTEGER; +BEGIN + + srid := ST_SRID(ext); + + xoff := 0; + yoff := 0; + + IF origin IS NOT NULL THEN + IF ST_SRID(origin) != srid THEN + RAISE EXCEPTION 'SRID mismatch between extent (%) and origin (%)', srid, ST_SRID(origin); + END IF; + xoff := ST_X(origin); + yoff := ST_Y(origin); + END IF; + + --RAISE DEBUG 'X offset: %', xoff; + --RAISE DEBUG 'Y offset: %', yoff; + + hw := width/2.0; + hh := height/2.0; + + xgrd := hw; + ygrd := hh; + --RAISE DEBUG 'X grid size: %', xgrd; + --RAISE DEBUG 'Y grid size: %', ygrd; + + hstep := width; + vstep := height; + + -- Tweak horizontal start on hstep grid from origin + hstart := xoff + ceil((ST_XMin(ext)-xoff)/hstep)*hstep; + --RAISE DEBUG 'hstart: %', hstart; + + -- Tweak vertical start on vstep grid from origin + vstart := yoff + ceil((ST_Ymin(ext)-yoff)/vstep)*vstep; + --RAISE DEBUG 'vstart: %', vstart; + + hend := ST_XMax(ext); + vend := ST_YMax(ext); + + --RAISE DEBUG 'hend: %', hend; + --RAISE DEBUG 'vend: %', vend; + + x := hstart; + WHILE x < hend LOOP -- over X + y := vstart; + h := ST_MakeEnvelope(x-hw, y-hh, x+hw, y+hh, srid); + WHILE y < vend LOOP -- over Y + RETURN NEXT h; + h := ST_Translate(h, 0, vstep); + y := yoff + round(((y + vstep)-yoff)/ygrd)*ygrd; -- round to grid + END LOOP; + x := xoff + round(((x + hstep)-xoff)/xgrd)*xgrd; -- round to grid + END LOOP; + + RETURN; +END +$$ LANGUAGE 'plpgsql' IMMUTABLE; From 2c373dd9c797662c03992f8aefe0705fe072a70e Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 16 Aug 2016 11:28:34 -0400 Subject: [PATCH 20/62] add CDB_dependencies --- src/pg/sql/13_PIA.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/sql/13_PIA.sql b/src/pg/sql/13_PIA.sql index e59a3b8..be16871 100644 --- a/src/pg/sql/13_PIA.sql +++ b/src/pg/sql/13_PIA.sql @@ -61,7 +61,7 @@ BEGIN test_h := ST_XMax(cell) - ST_XMin(cell) ; -- check distance - test_d := _Signed_Dist(polygon, ST_Centroid(cell)); + test_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(cell)); IF test_d > best_d THEN best_d := test_d; best_c := cells[i]; From 250f932915fe9d3dc7cb735ee827cdb7051f60a6 Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 16 Aug 2016 11:35:15 -0400 Subject: [PATCH 21/62] add CDB_dependencies --- src/pg/test/expected/13_pia_test.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/test/expected/13_pia_test.out b/src/pg/test/expected/13_pia_test.out index cff1550..d51c9c8 100644 --- a/src/pg/test/expected/13_pia_test.out +++ b/src/pg/test/expected/13_pia_test.out @@ -1,6 +1,6 @@ SET client_min_messages TO WARNING; \set ECHO none - st_astext + st_astext ------------------------------------------- POINT(-409081.865887381 4930017.52075948) (1 row) From edeac1838913c06dfe1616c20dc1e9ce8c1b6e07 Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 16 Aug 2016 13:02:28 -0400 Subject: [PATCH 22/62] changed test --- src/pg/test/sql/19_contour_test.sql | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/pg/test/sql/19_contour_test.sql b/src/pg/test/sql/19_contour_test.sql index 92438cc..da82327 100644 --- a/src/pg/test/sql/19_contour_test.sql +++ b/src/pg/test/sql/19_contour_test.sql @@ -6,9 +6,12 @@ WITH a AS ( SELECT ARRAY[800, 700, 600, 500, 400, 300, 200, 100]::numeric[] AS vals, ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g -) +), +b as( SELECT foo.* FROM a, - cdb_crankshaft.CDB_contour(a.g, a.vals, 500, 0.0, 1,3,5) foo; + CDB_contour(a.g, a.vals, 500, 0.0, 1, 3, 5) foo +) +SELECT bin, avg_value from b order by bin; From bae3362b59a9fa365e98a77e1199818f5f7ba6a5 Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 16 Aug 2016 13:15:13 -0400 Subject: [PATCH 23/62] added CDB stuff --- src/pg/sql/19_contour.sql | 459 +++++++++++++++++++++++++++++++++++++- 1 file changed, 453 insertions(+), 6 deletions(-) diff --git a/src/pg/sql/19_contour.sql b/src/pg/sql/19_contour.sql index 8c614b6..528ad9a 100644 --- a/src/pg/sql/19_contour.sql +++ b/src/pg/sql/19_contour.sql @@ -61,7 +61,7 @@ BEGIN ), grid as( SELECT - ST_Transform(CDB_RectangleGrid(geom, cell, cell), 4326) as geom + ST_Transform(cdb_crankshaft.CDB_RectangleGrid(geom, cell, cell), 4326) as geom FROM envelope3857 ), interp as( @@ -76,13 +76,13 @@ BEGIN classes as( SELECT CASE WHEN classmethod = 0 THEN - CDB_EqualIntervalBins(array_agg(val), steps) + cdb_crankshaft.CDB_EqualIntervalBins(array_agg(val), steps) WHEN classmethod = 1 THEN - CDB_HeadsTailsBins(array_agg(val), steps) + cdb_crankshaft.CDB_HeadsTailsBins(array_agg(val), steps) WHEN classmethod = 2 THEN - CDB_JenksBins(array_agg(val), steps) + cdb_crankshaft.CDB_JenksBins(array_agg(val), steps) ELSE - CDB_QuantileBins(array_agg(val), steps) + cdb_crankshaft.CDB_QuantileBins(array_agg(val), steps) END as b FROM interp where val is not null @@ -127,7 +127,7 @@ $$ language plpgsql; -- ===================================================================== -- Interp in grid, so we can use barycentric with a precalculated tin (NNI) -- ===================================================================== -CREATE OR REPLACE FUNCTION cdb_crankshaft._interp_in_tin( +CREATE OR REPLACE FUNCTION _interp_in_tin( IN geomin geometry[], IN colin numeric[], IN tin geometry[], @@ -189,3 +189,450 @@ $$ language plpgsql; +-- +-- Fill given extent with a rectangular coverage +-- +-- @param ext Extent to fill. Only rectangles with center point falling +-- inside the extent (or at the lower or leftmost edge) will +-- be emitted. The returned hexagons will have the same SRID +-- as this extent. +-- +-- @param width With of each rectangle +-- +-- @param height Height of each rectangle +-- +-- @param origin Optional origin to allow for exact tiling. +-- If omitted the origin will be 0,0. +-- The parameter is checked for having the same SRID +-- as the extent. +-- +-- +CREATE OR REPLACE FUNCTION CDB_RectangleGrid(ext GEOMETRY, width FLOAT8, height FLOAT8, origin GEOMETRY DEFAULT NULL) +RETURNS SETOF GEOMETRY +AS $$ +DECLARE + h GEOMETRY; -- rectangle cell + hstep FLOAT8; -- horizontal step + vstep FLOAT8; -- vertical step + hw FLOAT8; -- half width + hh FLOAT8; -- half height + vstart FLOAT8; + hstart FLOAT8; + hend FLOAT8; + vend FLOAT8; + xoff FLOAT8; + yoff FLOAT8; + xgrd FLOAT8; + ygrd FLOAT8; + x FLOAT8; + y FLOAT8; + srid INTEGER; +BEGIN + + srid := ST_SRID(ext); + + xoff := 0; + yoff := 0; + + IF origin IS NOT NULL THEN + IF ST_SRID(origin) != srid THEN + RAISE EXCEPTION 'SRID mismatch between extent (%) and origin (%)', srid, ST_SRID(origin); + END IF; + xoff := ST_X(origin); + yoff := ST_Y(origin); + END IF; + + --RAISE DEBUG 'X offset: %', xoff; + --RAISE DEBUG 'Y offset: %', yoff; + + hw := width/2.0; + hh := height/2.0; + + xgrd := hw; + ygrd := hh; + --RAISE DEBUG 'X grid size: %', xgrd; + --RAISE DEBUG 'Y grid size: %', ygrd; + + hstep := width; + vstep := height; + + -- Tweak horizontal start on hstep grid from origin + hstart := xoff + ceil((ST_XMin(ext)-xoff)/hstep)*hstep; + --RAISE DEBUG 'hstart: %', hstart; + + -- Tweak vertical start on vstep grid from origin + vstart := yoff + ceil((ST_Ymin(ext)-yoff)/vstep)*vstep; + --RAISE DEBUG 'vstart: %', vstart; + + hend := ST_XMax(ext); + vend := ST_YMax(ext); + + --RAISE DEBUG 'hend: %', hend; + --RAISE DEBUG 'vend: %', vend; + + x := hstart; + WHILE x < hend LOOP -- over X + y := vstart; + h := ST_MakeEnvelope(x-hw, y-hh, x+hw, y+hh, srid); + WHILE y < vend LOOP -- over Y + RETURN NEXT h; + h := ST_Translate(h, 0, vstep); + y := yoff + round(((y + vstep)-yoff)/ygrd)*ygrd; -- round to grid + END LOOP; + x := xoff + round(((x + hstep)-xoff)/xgrd)*xgrd; -- round to grid + END LOOP; + + RETURN; +END +$$ LANGUAGE 'plpgsql' IMMUTABLE; + +-- +-- Calculate the equal interval bins for a given column +-- +-- @param in_array A numeric array of numbers to determine the best +-- to determine the bin boundary +-- +-- @param breaks The number of bins you want to find. +-- +-- +-- Returns: upper edges of bins +-- +-- + +CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$ +DECLARE + diff numeric; + min_val numeric; + max_val numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL; + diff = (max_val - min_val) / breaks::numeric; + LOOP + IF i < breaks THEN + tmp_val = min_val + i::numeric * diff; + reply = array_append(reply, tmp_val); + i := i+1; + ELSE + reply = array_append(reply, max_val); + EXIT; + END IF; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Heads/Tails classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Heads/Tails method. +-- +-- @param breaks The number of bins you want to find. +-- +-- + +CREATE OR REPLACE FUNCTION CDB_HeadsTailsBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean numeric; + i INT := 2; + reply numeric[]; +BEGIN + -- get the total size of our row + element_count := array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + reply = Array[arr_mean]; + -- slice our bread + LOOP + IF i > breaks THEN EXIT; END IF; + SELECT avg(e) INTO arr_mean FROM ( SELECT unnest(in_array) e) x WHERE e > reply[i-1]; + IF arr_mean IS NOT NULL THEN + reply = array_append(reply, arr_mean); + END IF; + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Jenks classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Jenks method. +-- +-- @param breaks The number of bins you want to find. +-- +-- @param iterations The number of different starting positions to test. +-- +-- @param invert Optional wheter to return the top of each bin (default) +-- or the bottom. BOOLEAN, default=FALSE. +-- +-- + + +CREATE OR REPLACE FUNCTION CDB_JenksBins ( in_array NUMERIC[], breaks INT, iterations INT DEFAULT 5, invert BOOLEAN DEFAULT FALSE) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean NUMERIC; + bot INT; + top INT; + tops INT[]; + classes INT[][]; + i INT := 1; j INT := 1; + curr_result NUMERIC[]; + best_result NUMERIC[]; + seedtarget TEXT; + quant NUMERIC[]; + shuffles INT; +BEGIN + -- get the total size of our row + element_count := array_length(in_array, 1); --array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + shuffles := LEAST(GREATEST(floor(2500000.0/(element_count::float*iterations::float)), 1), 750)::int; + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + -- assume best is actually Quantile + SELECT cdb_crankshaft.CDB_QuantileBins(in_array, breaks) INTO quant; + + -- if data is very very large, just return quant and be done + IF element_count > 5000000 THEN + RETURN quant; + END IF; + + -- change quant into bottom, top markers + LOOP + IF i = 1 THEN + bot = 1; + ELSE + -- use last top to find this bot + bot = top+1; + END IF; + IF i = breaks THEN + top = element_count; + ELSE + SELECT count(*) INTO top FROM ( SELECT unnest(in_array) as v) x WHERE v <= quant[i]; + END IF; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + IF i > breaks THEN EXIT; END IF; + i = i+1; + END LOOP; + + best_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + --set the seed so we can ensure the same results + SELECT setseed(0.4567) INTO seedtarget; + --loop through random starting positions + LOOP + IF j > iterations-1 THEN EXIT; END IF; + i = 1; + tops = ARRAY[element_count]; + LOOP + IF i = breaks THEN EXIT; END IF; + SELECT array_agg(distinct e) INTO tops FROM (SELECT unnest(array_cat(tops, ARRAY[floor(random()*element_count::float)::int])) as e ORDER BY e) x WHERE e != 1; + i = array_length(tops, 1); + END LOOP; + i = 1; + LOOP + IF i > breaks THEN EXIT; END IF; + IF i = 1 THEN + bot = 1; + ELSE + bot = top+1; + END IF; + top = tops[i]; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + i := i+1; + END LOOP; + curr_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + IF curr_result[1] > best_result[1] THEN + best_result = curr_result; + j = j-1; -- if we found a better result, add one more search + END IF; + j = j+1; + END LOOP; + + RETURN (best_result)[2:array_upper(best_result, 1)]; +END; +$$ language plpgsql IMMUTABLE; + + + +-- +-- Perform a single iteration of the Jenks classification +-- + +CREATE OR REPLACE FUNCTION CDB_JenksBinsIteration ( in_array NUMERIC[], breaks INT, classes INT[][], invert BOOLEAN, element_count INT4, arr_mean NUMERIC, max_search INT DEFAULT 50) RETURNS NUMERIC[] as $$ +DECLARE + tmp_val numeric; + new_classes int[][]; + tmp_class int[]; + i INT := 1; + j INT := 1; + side INT := 2; + sdam numeric; + gvf numeric := 0.0; + new_gvf numeric; + arr_gvf numeric[]; + class_avg numeric; + class_max_i INT; + class_min_i INT; + class_max numeric; + class_min numeric; + reply numeric[]; +BEGIN + + -- Calculate the sum of squared deviations from the array mean (SDAM). + SELECT sum((arr_mean - e)^2) INTO sdam FROM ( SELECT unnest(in_array) as e ) x; + --Identify the breaks for the lowest GVF + LOOP + i = 1; + LOOP + -- get our mean + SELECT avg(e) INTO class_avg FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e) x; + -- find the deviation + SELECT sum((class_avg-e)^2) INTO tmp_val FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e ) x; + IF i = 1 THEN + arr_gvf = ARRAY[tmp_val]; + -- init our min/max map for later + class_max = arr_gvf[i]; + class_min = arr_gvf[i]; + class_min_i = 1; + class_max_i = 1; + ELSE + arr_gvf = array_append(arr_gvf, tmp_val); + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + -- calculate our new GVF + SELECT sdam-sum(e) INTO new_gvf FROM ( SELECT unnest(arr_gvf) as e ) x; + -- if no improvement was made, exit + IF new_gvf < gvf THEN EXIT; END IF; + gvf = new_gvf; + IF j > max_search THEN EXIT; END IF; + j = j+1; + i = 1; + LOOP + --establish directionality (uppward through classes or downward) + IF arr_gvf[i] < class_min THEN + class_min = arr_gvf[i]; + class_min_i = i; + END IF; + IF arr_gvf[i] > class_max THEN + class_max = arr_gvf[i]; + class_max_i = i; + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + IF class_max_i > class_min_i THEN + class_min_i = class_max_i - 1; + ELSE + class_min_i = class_max_i + 1; + END IF; + --Move from higher class to a lower gid order + IF class_max_i > class_min_i THEN + classes[class_max_i][1] = classes[class_max_i][1] + 1; + classes[class_min_i][2] = classes[class_min_i][2] + 1; + ELSE -- Move from lower class UP into a higher class by gid + classes[class_max_i][2] = classes[class_max_i][2] - 1; + classes[class_min_i][1] = classes[class_min_i][1] - 1; + END IF; + END LOOP; + + i = 1; + LOOP + IF invert = TRUE THEN + side = 1; --default returns bottom side of breaks, invert returns top side + END IF; + reply = array_append(reply, in_array[classes[i][side]]); + i = i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + + RETURN array_prepend(gvf, reply); + +END; +$$ language plpgsql IMMUTABLE; + + +-- +-- Determine the Quantile classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Quantile method. +-- +-- @param breaks The number of bins you want to find. +-- +-- +CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + break_size numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + -- sort our values + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x; + -- get the total size of our data + element_count := array_length(in_array, 1); + break_size := element_count::numeric / breaks; + -- slice our bread + LOOP + IF i < breaks THEN + IF break_size * i % 1 > 0 THEN + SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x; + ELSE + SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x; + END IF; + ELSIF i = breaks THEN + -- select the last value + SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x; + ELSE + EXIT; + END IF; + + reply = array_append(reply, tmp_val); + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; From ed6ecd0fef4346abe0facfe62a5ca51367da854f Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 16 Aug 2016 13:29:55 -0400 Subject: [PATCH 24/62] added schema --- src/pg/test/sql/19_contour_test.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/test/sql/19_contour_test.sql b/src/pg/test/sql/19_contour_test.sql index da82327..889f48a 100644 --- a/src/pg/test/sql/19_contour_test.sql +++ b/src/pg/test/sql/19_contour_test.sql @@ -12,6 +12,6 @@ SELECT foo.* FROM a, - CDB_contour(a.g, a.vals, 500, 0.0, 1, 3, 5) foo + cdb_crankshaft.CDB_contour(a.g, a.vals, 500, 0.0, 1, 3, 5) foo ) SELECT bin, avg_value from b order by bin; From 0ea060a69816a2318a77d8c7e23579ed90601a92 Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 16 Aug 2016 13:56:01 -0400 Subject: [PATCH 25/62] added srid to tests --- src/pg/test/sql/19_contour_test.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/test/sql/19_contour_test.sql b/src/pg/test/sql/19_contour_test.sql index 889f48a..eb1da6c 100644 --- a/src/pg/test/sql/19_contour_test.sql +++ b/src/pg/test/sql/19_contour_test.sql @@ -5,7 +5,7 @@ SET client_min_messages TO WARNING; WITH a AS ( SELECT ARRAY[800, 700, 600, 500, 400, 300, 200, 100]::numeric[] AS vals, - ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g + ARRAY[ST_GeomFromText('POINT(2.1744 41.403)',4326),ST_GeomFromText('POINT(2.1228 41.380)',4326),ST_GeomFromText('POINT(2.1511 41.374)',4326),ST_GeomFromText('POINT(2.1528 41.413)',4326),ST_GeomFromText('POINT(2.165 41.391)',4326),ST_GeomFromText('POINT(2.1498 41.371)',4326),ST_GeomFromText('POINT(2.1533 41.368)',4326),ST_GeomFromText('POINT(2.131386 41.41399)',4326)] AS g ), b as( SELECT From f6cedd7c866af3223d158924141a2bbc1232ec4b Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 16 Aug 2016 14:05:53 -0400 Subject: [PATCH 26/62] fixed spaces --- src/pg/test/expected/19_contour_test.out | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/pg/test/expected/19_contour_test.out b/src/pg/test/expected/19_contour_test.out index da0e94b..ef53315 100644 --- a/src/pg/test/expected/19_contour_test.out +++ b/src/pg/test/expected/19_contour_test.out @@ -1,2 +1,10 @@ SET client_min_messages TO WARNING; \set ECHO none + bin|avg_value + 0|288.072587252400038769154 + 1|419.135632263450792294773 + 2|476.5442571943126671224 + 3|524.0779810136330772726 + 4|611.26099562647644421297 + (5 rows) + From b7a412fa4dc49c4720fa4ce781c3262237c1b667 Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 16 Aug 2016 14:17:14 -0400 Subject: [PATCH 27/62] fixed spaces --- src/pg/test/expected/19_contour_test.out | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/pg/test/expected/19_contour_test.out b/src/pg/test/expected/19_contour_test.out index ef53315..ff25bdf 100644 --- a/src/pg/test/expected/19_contour_test.out +++ b/src/pg/test/expected/19_contour_test.out @@ -1,10 +1,9 @@ SET client_min_messages TO WARNING; \set ECHO none - bin|avg_value - 0|288.072587252400038769154 - 1|419.135632263450792294773 - 2|476.5442571943126671224 - 3|524.0779810136330772726 - 4|611.26099562647644421297 - (5 rows) - +bin|avg_value +0|288.072587252400038769154 +1|419.135632263450792294773 +2|476.5442571943126671224 +3|524.0779810136330772726 +4|611.26099562647644421297 +(5 rows) From 0f47659cf9ee25a6b6bb548707a60d1d8f2c983f Mon Sep 17 00:00:00 2001 From: abelvm Date: Thu, 18 Aug 2016 16:13:56 -0400 Subject: [PATCH 28/62] smart resolution guessing --- doc/19_contour.md | 11 +++++--- src/pg/sql/19_contour.sql | 40 ++++++++++++++++++++--------- src/pg/test/sql/19_contour_test.sql | 2 +- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/doc/19_contour.md b/doc/19_contour.md index f34f7c0..d6629e4 100644 --- a/doc/19_contour.md +++ b/doc/19_contour.md @@ -14,11 +14,11 @@ Function to generate a contour map from an scatter dataset of points, using one |------|------|-------------| | geom | geometry[] | Array of points's geometries | | values | numeric[] | Array of points' values for the param under study| -| resolution | integer | Size in meters of the cells in the grid| | buffer | numeric | Value between 0 and 1 for spatial buffer of the set of points | method | integer | 0:nearest neighbor, 1: barycentric, 2: IDW| | classmethod | integer | 0:equals, 1: heads&tails, 2:jenks, 3:quantiles | | steps | integer | Number of steps in the classification| +| max_time | integer | Max time in millisecons for processing time ### Returns Returns a table object @@ -37,11 +37,14 @@ Returns a table object WITH a AS ( SELECT ARRAY[800, 700, 600, 500, 400, 300, 200, 100]::numeric[] AS vals, - ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g -) + ARRAY[ST_GeomFromText('POINT(2.1744 41.403)',4326),ST_GeomFromText('POINT(2.1228 41.380)',4326),ST_GeomFromText('POINT(2.1511 41.374)',4326),ST_GeomFromText('POINT(2.1528 41.413)',4326),ST_GeomFromText('POINT(2.165 41.391)',4326),ST_GeomFromText('POINT(2.1498 41.371)',4326),ST_GeomFromText('POINT(2.1533 41.368)',4326),ST_GeomFromText('POINT(2.131386 41.41399)',4326)] AS g +), +b as( SELECT foo.* FROM a, - CDB_contour(a.g, a.vals, 500, 0.0, 1,3,5) foo; + cdb_crankshaft.CDB_contour(a.g, a.vals, 0.0, 1, 3, 5, 60) foo +) +SELECT bin, avg_value from b order by bin; ``` diff --git a/src/pg/sql/19_contour.sql b/src/pg/sql/19_contour.sql index 528ad9a..c0a64c2 100644 --- a/src/pg/sql/19_contour.sql +++ b/src/pg/sql/19_contour.sql @@ -1,11 +1,11 @@ CREATE OR REPLACE FUNCTION CDB_Contour( IN geomin geometry[], IN colin numeric[], - IN resolution integer, IN buffer numeric, IN intmethod integer, IN classmethod integer, - IN steps integer + IN steps integer, + IN max_time integer DEFAULT 60000 ) RETURNS TABLE( the_geom geometry, @@ -15,19 +15,28 @@ RETURNS TABLE( avg_value numeric ) AS $$ DECLARE - cell numeric; + cell_count integer; tin geometry[]; BEGIN -- calc the cell size in web mercator units - WITH center as ( - SELECT ST_centroid(ST_Collect(geomin)) as c - ) - SELECT - round(resolution / cos(ST_y(c) * pi()/180)) - INTO cell - FROM center; + -- WITH center as ( + -- SELECT ST_centroid(ST_Collect(geomin)) as c + -- ) + -- SELECT + -- round(resolution / cos(ST_y(c) * pi()/180)) + -- INTO cell + -- FROM center; -- raise notice 'Resol: %', cell; + -- calc the optimal number of cells for the current dataset + SELECT + CASE intmethod + WHEN 0 THEN round(3.7745903782 * max_time - 9.4399210051 * array_length(geomin,1) - 1350.8778213073) + WHEN 1 THEN round(2.2855592156 * max_time - 87.285217133 * array_length(geomin,1) + 17255.7085601797) + WHEN 2 THEN round(0.9799471999 * max_time - 127.0334085369 * array_length(geomin,1) + 22707.9579721218) + ELSE 10000 + END INTO cell_count; + -- we don't have iterative barycentric interpolation in CDB_interpolation, -- and it's a costy function, so let's make a custom one here till -- we update the code @@ -59,10 +68,17 @@ BEGIN ST_Transform(e, 3857) as geom FROM envelope ), + resolution as( + SELECT + round(|/ ( + ST_area(geom) / cell_count + )) as cell + FROM envelope3857 + ), grid as( SELECT - ST_Transform(cdb_crankshaft.CDB_RectangleGrid(geom, cell, cell), 4326) as geom - FROM envelope3857 + ST_Transform(cdb_crankshaft.CDB_RectangleGrid(e.geom, r.cell, r.cell), 4326) as geom + FROM envelope3857 e, resolution r ), interp as( SELECT diff --git a/src/pg/test/sql/19_contour_test.sql b/src/pg/test/sql/19_contour_test.sql index eb1da6c..70e5cf9 100644 --- a/src/pg/test/sql/19_contour_test.sql +++ b/src/pg/test/sql/19_contour_test.sql @@ -12,6 +12,6 @@ SELECT foo.* FROM a, - cdb_crankshaft.CDB_contour(a.g, a.vals, 500, 0.0, 1, 3, 5) foo + cdb_crankshaft.CDB_contour(a.g, a.vals, 0.0, 1, 3, 5, 60) foo ) SELECT bin, avg_value from b order by bin; From fa14b8d9bbdbd878c5eb0bfdc0d9ef2827ba02fc Mon Sep 17 00:00:00 2001 From: abelvm Date: Thu, 18 Aug 2016 16:33:38 -0400 Subject: [PATCH 29/62] SRG test --- src/pg/test/expected/19_contour_test.out | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/pg/test/expected/19_contour_test.out b/src/pg/test/expected/19_contour_test.out index ff25bdf..1404c96 100644 --- a/src/pg/test/expected/19_contour_test.out +++ b/src/pg/test/expected/19_contour_test.out @@ -1,9 +1,9 @@ SET client_min_messages TO WARNING; \set ECHO none bin|avg_value -0|288.072587252400038769154 -1|419.135632263450792294773 -2|476.5442571943126671224 -3|524.0779810136330772726 -4|611.26099562647644421297 +0|280.23070673030491816424178 +1|413.81702914846213479025305 +2|479.6334491374486884098328 +3|529.1545236882183479447113 +4|614.1132081424930103122037 (5 rows) From ef436546b4f113eb425c967e30fd577d07076f69 Mon Sep 17 00:00:00 2001 From: abelvm Date: Mon, 29 Aug 2016 11:26:02 -0400 Subject: [PATCH 30/62] add cdb_utils --- src/pg/sql/19_contour.sql | 449 -------------------------------------- src/pg/sql/cdb_utils.sql | 447 +++++++++++++++++++++++++++++++++++++ 2 files changed, 447 insertions(+), 449 deletions(-) create mode 100644 src/pg/sql/cdb_utils.sql diff --git a/src/pg/sql/19_contour.sql b/src/pg/sql/19_contour.sql index c0a64c2..759b151 100644 --- a/src/pg/sql/19_contour.sql +++ b/src/pg/sql/19_contour.sql @@ -203,452 +203,3 @@ BEGIN END; $$ language plpgsql; - - --- --- Fill given extent with a rectangular coverage --- --- @param ext Extent to fill. Only rectangles with center point falling --- inside the extent (or at the lower or leftmost edge) will --- be emitted. The returned hexagons will have the same SRID --- as this extent. --- --- @param width With of each rectangle --- --- @param height Height of each rectangle --- --- @param origin Optional origin to allow for exact tiling. --- If omitted the origin will be 0,0. --- The parameter is checked for having the same SRID --- as the extent. --- --- -CREATE OR REPLACE FUNCTION CDB_RectangleGrid(ext GEOMETRY, width FLOAT8, height FLOAT8, origin GEOMETRY DEFAULT NULL) -RETURNS SETOF GEOMETRY -AS $$ -DECLARE - h GEOMETRY; -- rectangle cell - hstep FLOAT8; -- horizontal step - vstep FLOAT8; -- vertical step - hw FLOAT8; -- half width - hh FLOAT8; -- half height - vstart FLOAT8; - hstart FLOAT8; - hend FLOAT8; - vend FLOAT8; - xoff FLOAT8; - yoff FLOAT8; - xgrd FLOAT8; - ygrd FLOAT8; - x FLOAT8; - y FLOAT8; - srid INTEGER; -BEGIN - - srid := ST_SRID(ext); - - xoff := 0; - yoff := 0; - - IF origin IS NOT NULL THEN - IF ST_SRID(origin) != srid THEN - RAISE EXCEPTION 'SRID mismatch between extent (%) and origin (%)', srid, ST_SRID(origin); - END IF; - xoff := ST_X(origin); - yoff := ST_Y(origin); - END IF; - - --RAISE DEBUG 'X offset: %', xoff; - --RAISE DEBUG 'Y offset: %', yoff; - - hw := width/2.0; - hh := height/2.0; - - xgrd := hw; - ygrd := hh; - --RAISE DEBUG 'X grid size: %', xgrd; - --RAISE DEBUG 'Y grid size: %', ygrd; - - hstep := width; - vstep := height; - - -- Tweak horizontal start on hstep grid from origin - hstart := xoff + ceil((ST_XMin(ext)-xoff)/hstep)*hstep; - --RAISE DEBUG 'hstart: %', hstart; - - -- Tweak vertical start on vstep grid from origin - vstart := yoff + ceil((ST_Ymin(ext)-yoff)/vstep)*vstep; - --RAISE DEBUG 'vstart: %', vstart; - - hend := ST_XMax(ext); - vend := ST_YMax(ext); - - --RAISE DEBUG 'hend: %', hend; - --RAISE DEBUG 'vend: %', vend; - - x := hstart; - WHILE x < hend LOOP -- over X - y := vstart; - h := ST_MakeEnvelope(x-hw, y-hh, x+hw, y+hh, srid); - WHILE y < vend LOOP -- over Y - RETURN NEXT h; - h := ST_Translate(h, 0, vstep); - y := yoff + round(((y + vstep)-yoff)/ygrd)*ygrd; -- round to grid - END LOOP; - x := xoff + round(((x + hstep)-xoff)/xgrd)*xgrd; -- round to grid - END LOOP; - - RETURN; -END -$$ LANGUAGE 'plpgsql' IMMUTABLE; - --- --- Calculate the equal interval bins for a given column --- --- @param in_array A numeric array of numbers to determine the best --- to determine the bin boundary --- --- @param breaks The number of bins you want to find. --- --- --- Returns: upper edges of bins --- --- - -CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$ -DECLARE - diff numeric; - min_val numeric; - max_val numeric; - tmp_val numeric; - i INT := 1; - reply numeric[]; -BEGIN - SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL; - diff = (max_val - min_val) / breaks::numeric; - LOOP - IF i < breaks THEN - tmp_val = min_val + i::numeric * diff; - reply = array_append(reply, tmp_val); - i := i+1; - ELSE - reply = array_append(reply, max_val); - EXIT; - END IF; - END LOOP; - RETURN reply; -END; -$$ language plpgsql IMMUTABLE; - --- --- Determine the Heads/Tails classifications from a numeric array --- --- @param in_array A numeric array of numbers to determine the best --- bins based on the Heads/Tails method. --- --- @param breaks The number of bins you want to find. --- --- - -CREATE OR REPLACE FUNCTION CDB_HeadsTailsBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ -DECLARE - element_count INT4; - arr_mean numeric; - i INT := 2; - reply numeric[]; -BEGIN - -- get the total size of our row - element_count := array_upper(in_array, 1) - array_lower(in_array, 1); - -- ensure the ordering of in_array - SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; - -- stop if no rows - IF element_count IS NULL THEN - RETURN NULL; - END IF; - -- stop if our breaks are more than our input array size - IF element_count < breaks THEN - RETURN in_array; - END IF; - - -- get our mean value - SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; - - reply = Array[arr_mean]; - -- slice our bread - LOOP - IF i > breaks THEN EXIT; END IF; - SELECT avg(e) INTO arr_mean FROM ( SELECT unnest(in_array) e) x WHERE e > reply[i-1]; - IF arr_mean IS NOT NULL THEN - reply = array_append(reply, arr_mean); - END IF; - i := i+1; - END LOOP; - RETURN reply; -END; -$$ language plpgsql IMMUTABLE; - --- --- Determine the Jenks classifications from a numeric array --- --- @param in_array A numeric array of numbers to determine the best --- bins based on the Jenks method. --- --- @param breaks The number of bins you want to find. --- --- @param iterations The number of different starting positions to test. --- --- @param invert Optional wheter to return the top of each bin (default) --- or the bottom. BOOLEAN, default=FALSE. --- --- - - -CREATE OR REPLACE FUNCTION CDB_JenksBins ( in_array NUMERIC[], breaks INT, iterations INT DEFAULT 5, invert BOOLEAN DEFAULT FALSE) RETURNS NUMERIC[] as $$ -DECLARE - element_count INT4; - arr_mean NUMERIC; - bot INT; - top INT; - tops INT[]; - classes INT[][]; - i INT := 1; j INT := 1; - curr_result NUMERIC[]; - best_result NUMERIC[]; - seedtarget TEXT; - quant NUMERIC[]; - shuffles INT; -BEGIN - -- get the total size of our row - element_count := array_length(in_array, 1); --array_upper(in_array, 1) - array_lower(in_array, 1); - -- ensure the ordering of in_array - SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; - -- stop if no rows - IF element_count IS NULL THEN - RETURN NULL; - END IF; - -- stop if our breaks are more than our input array size - IF element_count < breaks THEN - RETURN in_array; - END IF; - - shuffles := LEAST(GREATEST(floor(2500000.0/(element_count::float*iterations::float)), 1), 750)::int; - -- get our mean value - SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; - - -- assume best is actually Quantile - SELECT cdb_crankshaft.CDB_QuantileBins(in_array, breaks) INTO quant; - - -- if data is very very large, just return quant and be done - IF element_count > 5000000 THEN - RETURN quant; - END IF; - - -- change quant into bottom, top markers - LOOP - IF i = 1 THEN - bot = 1; - ELSE - -- use last top to find this bot - bot = top+1; - END IF; - IF i = breaks THEN - top = element_count; - ELSE - SELECT count(*) INTO top FROM ( SELECT unnest(in_array) as v) x WHERE v <= quant[i]; - END IF; - IF i = 1 THEN - classes = ARRAY[ARRAY[bot,top]]; - ELSE - classes = ARRAY_CAT(classes,ARRAY[bot,top]); - END IF; - IF i > breaks THEN EXIT; END IF; - i = i+1; - END LOOP; - - best_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); - - --set the seed so we can ensure the same results - SELECT setseed(0.4567) INTO seedtarget; - --loop through random starting positions - LOOP - IF j > iterations-1 THEN EXIT; END IF; - i = 1; - tops = ARRAY[element_count]; - LOOP - IF i = breaks THEN EXIT; END IF; - SELECT array_agg(distinct e) INTO tops FROM (SELECT unnest(array_cat(tops, ARRAY[floor(random()*element_count::float)::int])) as e ORDER BY e) x WHERE e != 1; - i = array_length(tops, 1); - END LOOP; - i = 1; - LOOP - IF i > breaks THEN EXIT; END IF; - IF i = 1 THEN - bot = 1; - ELSE - bot = top+1; - END IF; - top = tops[i]; - IF i = 1 THEN - classes = ARRAY[ARRAY[bot,top]]; - ELSE - classes = ARRAY_CAT(classes,ARRAY[bot,top]); - END IF; - i := i+1; - END LOOP; - curr_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); - - IF curr_result[1] > best_result[1] THEN - best_result = curr_result; - j = j-1; -- if we found a better result, add one more search - END IF; - j = j+1; - END LOOP; - - RETURN (best_result)[2:array_upper(best_result, 1)]; -END; -$$ language plpgsql IMMUTABLE; - - - --- --- Perform a single iteration of the Jenks classification --- - -CREATE OR REPLACE FUNCTION CDB_JenksBinsIteration ( in_array NUMERIC[], breaks INT, classes INT[][], invert BOOLEAN, element_count INT4, arr_mean NUMERIC, max_search INT DEFAULT 50) RETURNS NUMERIC[] as $$ -DECLARE - tmp_val numeric; - new_classes int[][]; - tmp_class int[]; - i INT := 1; - j INT := 1; - side INT := 2; - sdam numeric; - gvf numeric := 0.0; - new_gvf numeric; - arr_gvf numeric[]; - class_avg numeric; - class_max_i INT; - class_min_i INT; - class_max numeric; - class_min numeric; - reply numeric[]; -BEGIN - - -- Calculate the sum of squared deviations from the array mean (SDAM). - SELECT sum((arr_mean - e)^2) INTO sdam FROM ( SELECT unnest(in_array) as e ) x; - --Identify the breaks for the lowest GVF - LOOP - i = 1; - LOOP - -- get our mean - SELECT avg(e) INTO class_avg FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e) x; - -- find the deviation - SELECT sum((class_avg-e)^2) INTO tmp_val FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e ) x; - IF i = 1 THEN - arr_gvf = ARRAY[tmp_val]; - -- init our min/max map for later - class_max = arr_gvf[i]; - class_min = arr_gvf[i]; - class_min_i = 1; - class_max_i = 1; - ELSE - arr_gvf = array_append(arr_gvf, tmp_val); - END IF; - i := i+1; - IF i > breaks THEN EXIT; END IF; - END LOOP; - -- calculate our new GVF - SELECT sdam-sum(e) INTO new_gvf FROM ( SELECT unnest(arr_gvf) as e ) x; - -- if no improvement was made, exit - IF new_gvf < gvf THEN EXIT; END IF; - gvf = new_gvf; - IF j > max_search THEN EXIT; END IF; - j = j+1; - i = 1; - LOOP - --establish directionality (uppward through classes or downward) - IF arr_gvf[i] < class_min THEN - class_min = arr_gvf[i]; - class_min_i = i; - END IF; - IF arr_gvf[i] > class_max THEN - class_max = arr_gvf[i]; - class_max_i = i; - END IF; - i := i+1; - IF i > breaks THEN EXIT; END IF; - END LOOP; - IF class_max_i > class_min_i THEN - class_min_i = class_max_i - 1; - ELSE - class_min_i = class_max_i + 1; - END IF; - --Move from higher class to a lower gid order - IF class_max_i > class_min_i THEN - classes[class_max_i][1] = classes[class_max_i][1] + 1; - classes[class_min_i][2] = classes[class_min_i][2] + 1; - ELSE -- Move from lower class UP into a higher class by gid - classes[class_max_i][2] = classes[class_max_i][2] - 1; - classes[class_min_i][1] = classes[class_min_i][1] - 1; - END IF; - END LOOP; - - i = 1; - LOOP - IF invert = TRUE THEN - side = 1; --default returns bottom side of breaks, invert returns top side - END IF; - reply = array_append(reply, in_array[classes[i][side]]); - i = i+1; - IF i > breaks THEN EXIT; END IF; - END LOOP; - - RETURN array_prepend(gvf, reply); - -END; -$$ language plpgsql IMMUTABLE; - - --- --- Determine the Quantile classifications from a numeric array --- --- @param in_array A numeric array of numbers to determine the best --- bins based on the Quantile method. --- --- @param breaks The number of bins you want to find. --- --- -CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ -DECLARE - element_count INT4; - break_size numeric; - tmp_val numeric; - i INT := 1; - reply numeric[]; -BEGIN - -- sort our values - SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x; - -- get the total size of our data - element_count := array_length(in_array, 1); - break_size := element_count::numeric / breaks; - -- slice our bread - LOOP - IF i < breaks THEN - IF break_size * i % 1 > 0 THEN - SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x; - ELSE - SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x; - END IF; - ELSIF i = breaks THEN - -- select the last value - SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x; - ELSE - EXIT; - END IF; - - reply = array_append(reply, tmp_val); - i := i+1; - END LOOP; - RETURN reply; -END; -$$ language plpgsql IMMUTABLE; diff --git a/src/pg/sql/cdb_utils.sql b/src/pg/sql/cdb_utils.sql new file mode 100644 index 0000000..8f87084 --- /dev/null +++ b/src/pg/sql/cdb_utils.sql @@ -0,0 +1,447 @@ +-- +-- Fill given extent with a rectangular coverage +-- +-- @param ext Extent to fill. Only rectangles with center point falling +-- inside the extent (or at the lower or leftmost edge) will +-- be emitted. The returned hexagons will have the same SRID +-- as this extent. +-- +-- @param width With of each rectangle +-- +-- @param height Height of each rectangle +-- +-- @param origin Optional origin to allow for exact tiling. +-- If omitted the origin will be 0,0. +-- The parameter is checked for having the same SRID +-- as the extent. +-- +-- +CREATE OR REPLACE FUNCTION CDB_RectangleGrid(ext GEOMETRY, width FLOAT8, height FLOAT8, origin GEOMETRY DEFAULT NULL) +RETURNS SETOF GEOMETRY +AS $$ +DECLARE + h GEOMETRY; -- rectangle cell + hstep FLOAT8; -- horizontal step + vstep FLOAT8; -- vertical step + hw FLOAT8; -- half width + hh FLOAT8; -- half height + vstart FLOAT8; + hstart FLOAT8; + hend FLOAT8; + vend FLOAT8; + xoff FLOAT8; + yoff FLOAT8; + xgrd FLOAT8; + ygrd FLOAT8; + x FLOAT8; + y FLOAT8; + srid INTEGER; +BEGIN + + srid := ST_SRID(ext); + + xoff := 0; + yoff := 0; + + IF origin IS NOT NULL THEN + IF ST_SRID(origin) != srid THEN + RAISE EXCEPTION 'SRID mismatch between extent (%) and origin (%)', srid, ST_SRID(origin); + END IF; + xoff := ST_X(origin); + yoff := ST_Y(origin); + END IF; + + --RAISE DEBUG 'X offset: %', xoff; + --RAISE DEBUG 'Y offset: %', yoff; + + hw := width/2.0; + hh := height/2.0; + + xgrd := hw; + ygrd := hh; + --RAISE DEBUG 'X grid size: %', xgrd; + --RAISE DEBUG 'Y grid size: %', ygrd; + + hstep := width; + vstep := height; + + -- Tweak horizontal start on hstep grid from origin + hstart := xoff + ceil((ST_XMin(ext)-xoff)/hstep)*hstep; + --RAISE DEBUG 'hstart: %', hstart; + + -- Tweak vertical start on vstep grid from origin + vstart := yoff + ceil((ST_Ymin(ext)-yoff)/vstep)*vstep; + --RAISE DEBUG 'vstart: %', vstart; + + hend := ST_XMax(ext); + vend := ST_YMax(ext); + + --RAISE DEBUG 'hend: %', hend; + --RAISE DEBUG 'vend: %', vend; + + x := hstart; + WHILE x < hend LOOP -- over X + y := vstart; + h := ST_MakeEnvelope(x-hw, y-hh, x+hw, y+hh, srid); + WHILE y < vend LOOP -- over Y + RETURN NEXT h; + h := ST_Translate(h, 0, vstep); + y := yoff + round(((y + vstep)-yoff)/ygrd)*ygrd; -- round to grid + END LOOP; + x := xoff + round(((x + hstep)-xoff)/xgrd)*xgrd; -- round to grid + END LOOP; + + RETURN; +END +$$ LANGUAGE 'plpgsql' IMMUTABLE; + +-- +-- Calculate the equal interval bins for a given column +-- +-- @param in_array A numeric array of numbers to determine the best +-- to determine the bin boundary +-- +-- @param breaks The number of bins you want to find. +-- +-- +-- Returns: upper edges of bins +-- +-- + +CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$ +DECLARE + diff numeric; + min_val numeric; + max_val numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL; + diff = (max_val - min_val) / breaks::numeric; + LOOP + IF i < breaks THEN + tmp_val = min_val + i::numeric * diff; + reply = array_append(reply, tmp_val); + i := i+1; + ELSE + reply = array_append(reply, max_val); + EXIT; + END IF; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Heads/Tails classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Heads/Tails method. +-- +-- @param breaks The number of bins you want to find. +-- +-- + +CREATE OR REPLACE FUNCTION CDB_HeadsTailsBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean numeric; + i INT := 2; + reply numeric[]; +BEGIN + -- get the total size of our row + element_count := array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + reply = Array[arr_mean]; + -- slice our bread + LOOP + IF i > breaks THEN EXIT; END IF; + SELECT avg(e) INTO arr_mean FROM ( SELECT unnest(in_array) e) x WHERE e > reply[i-1]; + IF arr_mean IS NOT NULL THEN + reply = array_append(reply, arr_mean); + END IF; + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Jenks classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Jenks method. +-- +-- @param breaks The number of bins you want to find. +-- +-- @param iterations The number of different starting positions to test. +-- +-- @param invert Optional wheter to return the top of each bin (default) +-- or the bottom. BOOLEAN, default=FALSE. +-- +-- + + +CREATE OR REPLACE FUNCTION CDB_JenksBins ( in_array NUMERIC[], breaks INT, iterations INT DEFAULT 5, invert BOOLEAN DEFAULT FALSE) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean NUMERIC; + bot INT; + top INT; + tops INT[]; + classes INT[][]; + i INT := 1; j INT := 1; + curr_result NUMERIC[]; + best_result NUMERIC[]; + seedtarget TEXT; + quant NUMERIC[]; + shuffles INT; +BEGIN + -- get the total size of our row + element_count := array_length(in_array, 1); --array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + shuffles := LEAST(GREATEST(floor(2500000.0/(element_count::float*iterations::float)), 1), 750)::int; + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + -- assume best is actually Quantile + SELECT cdb_crankshaft.CDB_QuantileBins(in_array, breaks) INTO quant; + + -- if data is very very large, just return quant and be done + IF element_count > 5000000 THEN + RETURN quant; + END IF; + + -- change quant into bottom, top markers + LOOP + IF i = 1 THEN + bot = 1; + ELSE + -- use last top to find this bot + bot = top+1; + END IF; + IF i = breaks THEN + top = element_count; + ELSE + SELECT count(*) INTO top FROM ( SELECT unnest(in_array) as v) x WHERE v <= quant[i]; + END IF; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + IF i > breaks THEN EXIT; END IF; + i = i+1; + END LOOP; + + best_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + --set the seed so we can ensure the same results + SELECT setseed(0.4567) INTO seedtarget; + --loop through random starting positions + LOOP + IF j > iterations-1 THEN EXIT; END IF; + i = 1; + tops = ARRAY[element_count]; + LOOP + IF i = breaks THEN EXIT; END IF; + SELECT array_agg(distinct e) INTO tops FROM (SELECT unnest(array_cat(tops, ARRAY[floor(random()*element_count::float)::int])) as e ORDER BY e) x WHERE e != 1; + i = array_length(tops, 1); + END LOOP; + i = 1; + LOOP + IF i > breaks THEN EXIT; END IF; + IF i = 1 THEN + bot = 1; + ELSE + bot = top+1; + END IF; + top = tops[i]; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + i := i+1; + END LOOP; + curr_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + IF curr_result[1] > best_result[1] THEN + best_result = curr_result; + j = j-1; -- if we found a better result, add one more search + END IF; + j = j+1; + END LOOP; + + RETURN (best_result)[2:array_upper(best_result, 1)]; +END; +$$ language plpgsql IMMUTABLE; + + + +-- +-- Perform a single iteration of the Jenks classification +-- + +CREATE OR REPLACE FUNCTION CDB_JenksBinsIteration ( in_array NUMERIC[], breaks INT, classes INT[][], invert BOOLEAN, element_count INT4, arr_mean NUMERIC, max_search INT DEFAULT 50) RETURNS NUMERIC[] as $$ +DECLARE + tmp_val numeric; + new_classes int[][]; + tmp_class int[]; + i INT := 1; + j INT := 1; + side INT := 2; + sdam numeric; + gvf numeric := 0.0; + new_gvf numeric; + arr_gvf numeric[]; + class_avg numeric; + class_max_i INT; + class_min_i INT; + class_max numeric; + class_min numeric; + reply numeric[]; +BEGIN + + -- Calculate the sum of squared deviations from the array mean (SDAM). + SELECT sum((arr_mean - e)^2) INTO sdam FROM ( SELECT unnest(in_array) as e ) x; + --Identify the breaks for the lowest GVF + LOOP + i = 1; + LOOP + -- get our mean + SELECT avg(e) INTO class_avg FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e) x; + -- find the deviation + SELECT sum((class_avg-e)^2) INTO tmp_val FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e ) x; + IF i = 1 THEN + arr_gvf = ARRAY[tmp_val]; + -- init our min/max map for later + class_max = arr_gvf[i]; + class_min = arr_gvf[i]; + class_min_i = 1; + class_max_i = 1; + ELSE + arr_gvf = array_append(arr_gvf, tmp_val); + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + -- calculate our new GVF + SELECT sdam-sum(e) INTO new_gvf FROM ( SELECT unnest(arr_gvf) as e ) x; + -- if no improvement was made, exit + IF new_gvf < gvf THEN EXIT; END IF; + gvf = new_gvf; + IF j > max_search THEN EXIT; END IF; + j = j+1; + i = 1; + LOOP + --establish directionality (uppward through classes or downward) + IF arr_gvf[i] < class_min THEN + class_min = arr_gvf[i]; + class_min_i = i; + END IF; + IF arr_gvf[i] > class_max THEN + class_max = arr_gvf[i]; + class_max_i = i; + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + IF class_max_i > class_min_i THEN + class_min_i = class_max_i - 1; + ELSE + class_min_i = class_max_i + 1; + END IF; + --Move from higher class to a lower gid order + IF class_max_i > class_min_i THEN + classes[class_max_i][1] = classes[class_max_i][1] + 1; + classes[class_min_i][2] = classes[class_min_i][2] + 1; + ELSE -- Move from lower class UP into a higher class by gid + classes[class_max_i][2] = classes[class_max_i][2] - 1; + classes[class_min_i][1] = classes[class_min_i][1] - 1; + END IF; + END LOOP; + + i = 1; + LOOP + IF invert = TRUE THEN + side = 1; --default returns bottom side of breaks, invert returns top side + END IF; + reply = array_append(reply, in_array[classes[i][side]]); + i = i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + + RETURN array_prepend(gvf, reply); + +END; +$$ language plpgsql IMMUTABLE; + + +-- +-- Determine the Quantile classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Quantile method. +-- +-- @param breaks The number of bins you want to find. +-- +-- +CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + break_size numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + -- sort our values + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x; + -- get the total size of our data + element_count := array_length(in_array, 1); + break_size := element_count::numeric / breaks; + -- slice our bread + LOOP + IF i < breaks THEN + IF break_size * i % 1 > 0 THEN + SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x; + ELSE + SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x; + END IF; + ELSIF i = breaks THEN + -- select the last value + SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x; + ELSE + EXIT; + END IF; + + reply = array_append(reply, tmp_val); + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; From 605e6e88aeec027b3b729a4ad990d7df0d4c6b43 Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 30 Aug 2016 05:47:20 -0400 Subject: [PATCH 31/62] remove redundant function --- src/pg/sql/13_PIA.sql | 97 ------------------------------------------- 1 file changed, 97 deletions(-) diff --git a/src/pg/sql/13_PIA.sql b/src/pg/sql/13_PIA.sql index be16871..36682b3 100644 --- a/src/pg/sql/13_PIA.sql +++ b/src/pg/sql/13_PIA.sql @@ -120,100 +120,3 @@ BEGIN RETURN dist; END; $$ language plpgsql IMMUTABLE; - --- --- Fill given extent with a rectangular coverage --- --- @param ext Extent to fill. Only rectangles with center point falling --- inside the extent (or at the lower or leftmost edge) will --- be emitted. The returned hexagons will have the same SRID --- as this extent. --- --- @param width With of each rectangle --- --- @param height Height of each rectangle --- --- @param origin Optional origin to allow for exact tiling. --- If omitted the origin will be 0,0. --- The parameter is checked for having the same SRID --- as the extent. --- --- -CREATE OR REPLACE FUNCTION CDB_RectangleGrid(ext GEOMETRY, width FLOAT8, height FLOAT8, origin GEOMETRY DEFAULT NULL) -RETURNS SETOF GEOMETRY -AS $$ -DECLARE - h GEOMETRY; -- rectangle cell - hstep FLOAT8; -- horizontal step - vstep FLOAT8; -- vertical step - hw FLOAT8; -- half width - hh FLOAT8; -- half height - vstart FLOAT8; - hstart FLOAT8; - hend FLOAT8; - vend FLOAT8; - xoff FLOAT8; - yoff FLOAT8; - xgrd FLOAT8; - ygrd FLOAT8; - x FLOAT8; - y FLOAT8; - srid INTEGER; -BEGIN - - srid := ST_SRID(ext); - - xoff := 0; - yoff := 0; - - IF origin IS NOT NULL THEN - IF ST_SRID(origin) != srid THEN - RAISE EXCEPTION 'SRID mismatch between extent (%) and origin (%)', srid, ST_SRID(origin); - END IF; - xoff := ST_X(origin); - yoff := ST_Y(origin); - END IF; - - --RAISE DEBUG 'X offset: %', xoff; - --RAISE DEBUG 'Y offset: %', yoff; - - hw := width/2.0; - hh := height/2.0; - - xgrd := hw; - ygrd := hh; - --RAISE DEBUG 'X grid size: %', xgrd; - --RAISE DEBUG 'Y grid size: %', ygrd; - - hstep := width; - vstep := height; - - -- Tweak horizontal start on hstep grid from origin - hstart := xoff + ceil((ST_XMin(ext)-xoff)/hstep)*hstep; - --RAISE DEBUG 'hstart: %', hstart; - - -- Tweak vertical start on vstep grid from origin - vstart := yoff + ceil((ST_Ymin(ext)-yoff)/vstep)*vstep; - --RAISE DEBUG 'vstart: %', vstart; - - hend := ST_XMax(ext); - vend := ST_YMax(ext); - - --RAISE DEBUG 'hend: %', hend; - --RAISE DEBUG 'vend: %', vend; - - x := hstart; - WHILE x < hend LOOP -- over X - y := vstart; - h := ST_MakeEnvelope(x-hw, y-hh, x+hw, y+hh, srid); - WHILE y < vend LOOP -- over Y - RETURN NEXT h; - h := ST_Translate(h, 0, vstep); - y := yoff + round(((y + vstep)-yoff)/ygrd)*ygrd; -- round to grid - END LOOP; - x := xoff + round(((x + hstep)-xoff)/xgrd)*xgrd; -- round to grid - END LOOP; - - RETURN; -END -$$ LANGUAGE 'plpgsql' IMMUTABLE; From 72ee1a6d4839d1035ba42fa89547cf16b74ffec3 Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 30 Aug 2016 05:50:20 -0400 Subject: [PATCH 32/62] missed files --- doc/09_voronoi.md | 38 ++++ src/pg/sql/09_voronoi.sql | 236 +++++++++++++++++++++++ src/pg/test/expected/09_voronoi_test.out | 7 + src/pg/test/sql/09_voronoi_test.sql | 15 ++ 4 files changed, 296 insertions(+) create mode 100644 doc/09_voronoi.md create mode 100644 src/pg/sql/09_voronoi.sql create mode 100644 src/pg/test/expected/09_voronoi_test.out create mode 100644 src/pg/test/sql/09_voronoi_test.sql diff --git a/doc/09_voronoi.md b/doc/09_voronoi.md new file mode 100644 index 0000000..1a19103 --- /dev/null +++ b/doc/09_voronoi.md @@ -0,0 +1,38 @@ +## Voronoi + +Function to construct the [Voronoi Diagram](https://en.wikipedia.org/wiki/Voronoi_diagram) from a dataset of scatter points, clipped to the significant area + +PostGIS wil include this in future versions ([doc for dev branch](http://postgis.net/docs/manual-dev/ST_Voronoi.html)) and will perform faster for sure, but in the meantime... + + +### CDB_Voronoi (geom geometry[], buffer numeric DEFAULT 0.5, tolerance numeric DEFAULT 1e-9) + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| geom | geometry[] | Array of points's geometries | +| buffer | numeric | enlargment ratio for the envelope area used for the restraints| +| tolerance | numeric | Delaunay tolerance, optional | + +### Returns + +| Column Name | Type | Description | +|-------------|------|-------------| +| geom | geometry collection | Collection of polygons of the Voronoi cells| + + +#### Example Usage + +```sql +WITH a AS ( + SELECT + ARRAY[ST_GeomFromText('POINT(2.1744 41.403)', 4326),ST_GeomFromText('POINT(2.1228 41.380)', 4326),ST_GeomFromText('POINT(2.1511 41.374)', 4326),ST_GeomFromText('POINT(2.1528 41.413)', 4326),ST_GeomFromText('POINT(2.165 41.391)', 4326),ST_GeomFromText('POINT(2.1498 41.371)', 4326),ST_GeomFromText('POINT(2.1533 41.368)', 4326),ST_GeomFromText('POINT(2.131386 41.41399)', 4326)] AS geomin +) +SELECT + st_transform( + (st_dump(CDB_voronoi(geomin, 0.2, 1e-9) + )).geom + , 3857) as the_geom_webmercator +FROM a; +``` diff --git a/src/pg/sql/09_voronoi.sql b/src/pg/sql/09_voronoi.sql new file mode 100644 index 0000000..361525c --- /dev/null +++ b/src/pg/sql/09_voronoi.sql @@ -0,0 +1,236 @@ +-- ============================================================================================= +-- +-- CDB_Voronoi +-- +-- ============================================================================================= +CREATE OR REPLACE FUNCTION CDB_voronoi( + IN geomin geometry[], + IN buffer numeric DEFAULT 0.5, + IN tolerance numeric DEFAULT 1e-9 + ) +RETURNS geometry AS $$ +DECLARE + geomout geometry; +BEGIN + -- we need to make the geometry calculations in (pseudo)meters!!! + with a as ( + SELECT unnest(geomin) as g1 + ), + b as( + SELECT st_transform(g1, 3857) g2 from a + ) + SELECT array_agg(g2) INTO geomin from b; + + WITH + convexhull_1 as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ (st_area(ST_ConvexHull(ST_Collect(geomin)))/PI()) as r + ), + clipper as( + SELECT + st_buffer(ST_MinimumBoundingCircle(a.g), buffer*a.r) as g + FROM convexhull_1 a + ), + env0 as ( + SELECT + (st_dumppoints(st_expand(a.g, buffer*a.r))).geom as e + FROM convexhull_1 a + ), + env as ( + SELECT + array_agg(env0.e) as e + FROM env0 + ), + sample AS ( + SELECT + ST_Collect(geomin || env.e) as geom + FROM env + ), + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as cg + ), + tin as ( + SELECT + ST_Dump(ST_DelaunayTriangles(geom, tolerance, 0)) as gd + FROM + sample + ), + tin_polygons as ( + SELECT + (gd).Path as id, + (gd).Geom as pg, + ST_Centroid(ST_MinimumBoundingCircle((gd).Geom, 180)) as ct + FROM tin + ), + tin_lines as ( + SELECT + id, + ST_ExteriorRing(pg) as lg + FROM tin_polygons + ), + tin_nodes as ( + SELECT + id, + ST_PointN(lg,1) p1, + ST_PointN(lg,2) p2, + ST_PointN(lg,3) p3 + FROM tin_lines + ), + tin_edges AS ( + SELECT + p.id, + UNNEST(ARRAY[ + ST_MakeLine(n.p1,n.p2) , + ST_MakeLine(n.p2,n.p3) , + ST_MakeLine(n.p3,n.p1)]) as Edge, + ST_Force2D(cdb_crankshaft._Find_Circle(n.p1,n.p2,n.p3)) as ct, + CASE WHEN st_distance(p.ct, ST_ExteriorRing(p.pg)) < tolerance THEN + TRUE + ELSE FALSE END AS ctx, + p.pg, + ST_within(p.ct, convexhull.cg) as ctin + FROM + tin_polygons p, + tin_nodes n, + convexhull + WHERE p.id = n.id + ), + voro_nodes as ( + SELECT + CASE WHEN x.ctx = TRUE THEN + ST_Centroid(x.edge) + ELSE + x.ct + END as xct, + CASE WHEN y.id is null THEN + CASE WHEN x.ctin = TRUE THEN + ST_SetSRID(ST_MakePoint( + ST_X(x.ct) + ((ST_X(ST_Centroid(x.edge)) - ST_X(x.ct)) * (1+buffer)), + ST_Y(x.ct) + ((ST_Y(ST_Centroid(x.edge)) - ST_Y(x.ct)) * (1+buffer)) + ), ST_SRID(x.ct)) + END + ELSE + y.ct + END as yct + FROM + tin_edges x + LEFT OUTER JOIN + tin_edges y + ON x.id <> y.id AND ST_Equals(x.edge, y.edge) + ), + voro_edges as( + SELECT + ST_LineMerge(ST_Collect(ST_MakeLine(xct, yct))) as v + FROM + voro_nodes + ), + voro_cells as( + SELECT + ST_Polygonize( + ST_Node( + ST_LineMerge( + ST_Union(v, ST_ExteriorRing( + ST_Convexhull(v) + ) + ) + ) + ) + ) as g + FROM + voro_edges + ), + voro_set as( + SELECT + (st_dump(v.g)).geom as g + FROM voro_cells v + ), + clipped_voro as( + SELECT + ST_intersection(c.g, v.g) as g + FROM + voro_set v, + clipper c + WHERE + ST_GeometryType(v.g) = 'ST_Polygon' + ) + SELECT + st_collect( + ST_Transform( + ST_ConvexHull(g), + 4326 + ) + ) + INTO geomout + FROM + clipped_voro; + RETURN geomout; +END; +$$ language plpgsql IMMUTABLE; + +/** ---------------------------------------------------------------------------------------- + * @function : FindCircle + * @precis : Function that determines if three points form a circle. If so a table containing + * centre and radius is returned. If not, a null table is returned. + * @version : 1.0.1 + * @param : p_pt1 : First point in curve + * @param : p_pt2 : Second point in curve + * @param : p_pt3 : Third point in curve + * @return : geometry : In which X,Y ordinates are the centre X, Y and the Z being the radius of found circle + * or NULL if three points do not form a circle. + * @history : Simon Greener - Feb 2012 - Original coding. + * Rafa de la Torre - Aug 2016 - Small fix for type checking + * @copyright : Simon Greener @ 2012 + * Licensed under a Creative Commons Attribution-Share Alike 2.5 Australia License. (http://creativecommons.org/licenses/by-sa/2.5/au/) +**/ +CREATE OR REPLACE FUNCTION _Find_Circle( + IN p_pt1 geometry, + IN p_pt2 geometry, + IN p_pt3 geometry) + RETURNS geometry AS +$BODY$ +DECLARE + v_Centre geometry; + v_radius NUMERIC; + v_CX NUMERIC; + v_CY NUMERIC; + v_dA NUMERIC; + v_dB NUMERIC; + v_dC NUMERIC; + v_dD NUMERIC; + v_dE NUMERIC; + v_dF NUMERIC; + v_dG NUMERIC; +BEGIN + IF ( p_pt1 IS NULL OR p_pt2 IS NULL OR p_pt3 IS NULL ) THEN + RAISE EXCEPTION 'All supplied points must be not null.'; + RETURN NULL; + END IF; + IF ( ST_GeometryType(p_pt1) <> 'ST_Point' OR + ST_GeometryType(p_pt2) <> 'ST_Point' OR + ST_GeometryType(p_pt3) <> 'ST_Point' ) THEN + RAISE EXCEPTION 'All supplied geometries must be points.'; + RETURN NULL; + END IF; + v_dA := ST_X(p_pt2) - ST_X(p_pt1); + v_dB := ST_Y(p_pt2) - ST_Y(p_pt1); + v_dC := ST_X(p_pt3) - ST_X(p_pt1); + v_dD := ST_Y(p_pt3) - ST_Y(p_pt1); + v_dE := v_dA * (ST_X(p_pt1) + ST_X(p_pt2)) + v_dB * (ST_Y(p_pt1) + ST_Y(p_pt2)); + v_dF := v_dC * (ST_X(p_pt1) + ST_X(p_pt3)) + v_dD * (ST_Y(p_pt1) + ST_Y(p_pt3)); + v_dG := 2.0 * (v_dA * (ST_Y(p_pt3) - ST_Y(p_pt2)) - v_dB * (ST_X(p_pt3) - ST_X(p_pt2))); + -- If v_dG is zero then the three points are collinear and no finite-radius + -- circle through them exists. + IF ( v_dG = 0 ) THEN + RETURN NULL; + ELSE + v_CX := (v_dD * v_dE - v_dB * v_dF) / v_dG; + v_CY := (v_dA * v_dF - v_dC * v_dE) / v_dG; + v_Radius := SQRT(POWER(ST_X(p_pt1) - v_CX,2) + POWER(ST_Y(p_pt1) - v_CY,2) ); + END IF; + RETURN ST_SetSRID(ST_MakePoint(v_CX, v_CY, v_radius),ST_Srid(p_pt1)); +END; +$BODY$ + LANGUAGE plpgsql VOLATILE STRICT; + diff --git a/src/pg/test/expected/09_voronoi_test.out b/src/pg/test/expected/09_voronoi_test.out new file mode 100644 index 0000000..58b9e50 --- /dev/null +++ b/src/pg/test/expected/09_voronoi_test.out @@ -0,0 +1,7 @@ +SET client_min_messages TO WARNING; +\set ECHO none + avg_area +---------------------- + 0.000178661700690617 +(1 row) + diff --git a/src/pg/test/sql/09_voronoi_test.sql b/src/pg/test/sql/09_voronoi_test.sql new file mode 100644 index 0000000..032eeeb --- /dev/null +++ b/src/pg/test/sql/09_voronoi_test.sql @@ -0,0 +1,15 @@ +SET client_min_messages TO WARNING; +\set ECHO none + +WITH a AS ( + SELECT + ARRAY[ST_GeomFromText('POINT(2.1744 41.403)', 4326),ST_GeomFromText('POINT(2.1228 41.380)', 4326),ST_GeomFromText('POINT(2.1511 41.374)', 4326),ST_GeomFromText('POINT(2.1528 41.413)', 4326),ST_GeomFromText('POINT(2.165 41.391)', 4326),ST_GeomFromText('POINT(2.1498 41.371)', 4326),ST_GeomFromText('POINT(2.1533 41.368)', 4326),ST_GeomFromText('POINT(2.131386 41.41399)', 4326)] AS geomin +), +b as( + SELECT + (st_dump(cdb_crankshaft.CDB_voronoi(geomin, 0.2, 1e-9))).geom as result + FROM a +) +SELECT + avg(st_area(result)) as avg_area +FROM b; From 37eb85350d4f5d374cf72674dd254dec324a5735 Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 30 Aug 2016 05:55:34 -0400 Subject: [PATCH 33/62] fix projection --- doc/13_PIA.md | 5 ++++- src/pg/sql/13_PIA.sql | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/13_PIA.md b/doc/13_PIA.md index 35b980b..c8986e2 100644 --- a/doc/13_PIA.md +++ b/doc/13_PIA.md @@ -25,6 +25,9 @@ Function to find the [PIA](https://en.wikipedia.org/wiki/Pole_of_inaccessibility ```sql with a as( select st_geomfromtext('POLYGON((-432540.453078056 4949775.20452642,-432329.947920966 4951361.232584,-431245.028163694 4952223.31516671,-429131.071033529 4951768.00415574,-424622.07505895 4952843.13503987,-423688.327170174 4953499.20752423,-424086.294349759 4954968.38274191,-423068.388925945 4954378.63345336,-423387.653225542 4953355.67417084,-420594.869840519 4953781.00230592,-416026.095299382 4951484.06849063,-412483.018546414 4951024.5410983,-410490.399661215 4954502.24032205,-408186.197521284 4956398.91417441,-407627.262358013 4959300.94633864,-406948.770061627 4959874.85407739,-404949.583326472 4959047.74518163,-402570.908447199 4953743.46829807,-400971.358683991 4952193.11680804,-403533.488084088 4949649.89857885,-406335.177028373 4950193.19571096,-407790.456731515 4952391.46015616,-412060.672398345 4950381.2389307,-410716.93482498 4949156.7509561,-408464.162289794 4943912.8940387,-409350.599394983 4942819.84896006,-408087.791091424 4942451.6711778,-407274.045613725 4940572.4807777,-404446.196589102 4939976.71501489,-402422.964843936 4940450.3670813,-401010.654464241 4939054.8061663,-397647.247369412 4940679.80737878,-395658.413346901 4940528.84765185,-395536.852462953 4938829.79565997,-394268.923462818 4938003.7277717,-393388.720249116 4934757.80596815,-392393.301362444 4934326.71675815,-392573.527618037 4932323.40974412,-393464.640141837 4931903.10653605,-393085.597275686 4931094.7353605,-398426.261165985 4929156.87541607,-398261.174361137 4926238.00816416,-394045.059966834 4925765.18668498,-392982.960705174 4926391.81893628,-393090.272694301 4927176.84692181,-391648.240010564 4924626.06386961,-391889.914625075 4923086.14787613,-394345.177314013 4923235.086036,-395550.878718795 4917812.79243978,-399009.463978251 4912927.7157945,-398948.794855767 4911941.91010796,-398092.636652078 4911806.57392519,-401991.601817112 4911722.9204501,-406225.972607907 4914505.47286319,-411104.994569885 4912569.26941163,-412925.513522316 4913030.3608866,-414630.148884835 4914436.69169949,-414207.691417276 4919205.78028405,-418306.141109809 4917994.9580478,-424184.700779621 4918938.12432889,-426816.961458921 4923664.37379373,-420956.324227126 4923381.98014807,-420186.661267781 4924286.48693378,-420943.411166194 4926812.76394433,-419779.45457046 4928527.43466337,-419768.767899344 4930681.94459216,-421911.668097113 4930432.40620397,-423482.386112205 4933451.28047252,-427272.814773717 4934151.56473242,-427144.908678797 4939731.77191996,-428982.125554848 4940522.84445172,-428986.133056516 4942437.17281266,-431237.792396792 4947309.68284815,-432476.889648814 4947791.74800037,-432540.453078056 4949775.20452642))', 3857) as g +), +b as ( + select ST_Transform(g, 4326) as g from a ) -SELECT st_astext(CDB_PIA(g)) from a; +SELECT st_astext(CDB_PIA(g)) from b; ``` diff --git a/src/pg/sql/13_PIA.sql b/src/pg/sql/13_PIA.sql index 36682b3..d9a224d 100644 --- a/src/pg/sql/13_PIA.sql +++ b/src/pg/sql/13_PIA.sql @@ -32,6 +32,7 @@ DECLARE p geometry; BEGIN sqr := |/2; + polygon := ST_Transform(polygon, 3857); -- grid #0 cell size height := ST_YMax(polygon) - ST_YMin(polygon); @@ -87,7 +88,7 @@ BEGIN END LOOP; - RETURN ST_Centroid(best_c); + RETURN ST_transform(ST_Centroid(best_c), 4326); END; $$ language plpgsql IMMUTABLE; From 2b75da1db21232ad4a9435d534460e141a99085b Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 30 Aug 2016 06:02:42 -0400 Subject: [PATCH 34/62] fix test --- src/pg/test/expected/13_pia_test.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/test/expected/13_pia_test.out b/src/pg/test/expected/13_pia_test.out index d51c9c8..2367e20 100644 --- a/src/pg/test/expected/13_pia_test.out +++ b/src/pg/test/expected/13_pia_test.out @@ -2,6 +2,6 @@ SET client_min_messages TO WARNING; \set ECHO none st_astext ------------------------------------------- - POINT(-409081.865887381 4930017.52075948) + POINT(-3.67484492582767 40.4395084885993) (1 row) From 457f54007e603e97c039d5723c50b49f3c251c3c Mon Sep 17 00:00:00 2001 From: Carla Date: Tue, 30 Aug 2016 12:27:10 +0200 Subject: [PATCH 35/62] crankshaft typo --- RELEASE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index 6f09042..96f5fb4 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -15,7 +15,7 @@ shall be performed by the designated *Release Manager*. 1. Generate an upgrade path from the previous to the next release by copying the generated release file. E.g: ```shell - cp release/cranckshaft--X.Y.Z.sql release/cranckshaft--A.B.C--X.Y.Z.sql + cp release/crankshaft--X.Y.Z.sql release/crankshaft--A.B.C--X.Y.Z.sql ``` NOTE: you can rely on this thanks to the compatibility checks. From a42eb400d8fd5199b98b5c27a0b24ff7c94e6de3 Mon Sep 17 00:00:00 2001 From: Carla Iriberri Date: Tue, 30 Aug 2016 12:30:39 +0200 Subject: [PATCH 36/62] Release crankshaft 0.4.0 --- release/crankshaft--0.3.1--0.4.0.sql | 1948 +++++++++++++++++ release/crankshaft--0.4.0.sql | 1948 +++++++++++++++++ release/crankshaft.control | 2 +- .../0.4.0/crankshaft/crankshaft/__init__.py | 5 + .../crankshaft/clustering/__init__.py | 3 + .../crankshaft/clustering/kmeans.py | 18 + .../crankshaft/crankshaft/clustering/moran.py | 262 +++ .../crankshaft/pysal_utils/__init__.py | 2 + .../crankshaft/pysal_utils/pysal_utils.py | 188 ++ .../crankshaft/crankshaft/random_seeds.py | 11 + .../crankshaft/segmentation/__init__.py | 1 + .../crankshaft/segmentation/segmentation.py | 176 ++ .../space_time_dynamics/__init__.py | 2 + .../crankshaft/space_time_dynamics/markov.py | 189 ++ release/python/0.4.0/crankshaft/setup.py | 49 + release/python/0.4.0/crankshaft/setup.py-r | 49 + .../crankshaft/test/fixtures/kmeans.json | 1 + .../crankshaft/test/fixtures/markov.json | 1 + .../0.4.0/crankshaft/test/fixtures/moran.json | 52 + .../crankshaft/test/fixtures/neighbors.json | 54 + .../test/fixtures/neighbors_markov.json | 1 + .../python/0.4.0/crankshaft/test/helper.py | 13 + .../python/0.4.0/crankshaft/test/mock_plpy.py | 52 + .../crankshaft/test/test_cluster_kmeans.py | 38 + .../crankshaft/test/test_clustering_moran.py | 88 + .../0.4.0/crankshaft/test/test_pysal_utils.py | 142 ++ .../crankshaft/test/test_segmentation.py | 64 + .../test/test_space_time_dynamics.py | 324 +++ src/pg/crankshaft.control | 2 +- 29 files changed, 5683 insertions(+), 2 deletions(-) create mode 100644 release/crankshaft--0.3.1--0.4.0.sql create mode 100644 release/crankshaft--0.4.0.sql create mode 100644 release/python/0.4.0/crankshaft/crankshaft/__init__.py create mode 100644 release/python/0.4.0/crankshaft/crankshaft/clustering/__init__.py create mode 100644 release/python/0.4.0/crankshaft/crankshaft/clustering/kmeans.py create mode 100644 release/python/0.4.0/crankshaft/crankshaft/clustering/moran.py create mode 100644 release/python/0.4.0/crankshaft/crankshaft/pysal_utils/__init__.py create mode 100644 release/python/0.4.0/crankshaft/crankshaft/pysal_utils/pysal_utils.py create mode 100644 release/python/0.4.0/crankshaft/crankshaft/random_seeds.py create mode 100644 release/python/0.4.0/crankshaft/crankshaft/segmentation/__init__.py create mode 100644 release/python/0.4.0/crankshaft/crankshaft/segmentation/segmentation.py create mode 100644 release/python/0.4.0/crankshaft/crankshaft/space_time_dynamics/__init__.py create mode 100644 release/python/0.4.0/crankshaft/crankshaft/space_time_dynamics/markov.py create mode 100644 release/python/0.4.0/crankshaft/setup.py create mode 100644 release/python/0.4.0/crankshaft/setup.py-r create mode 100644 release/python/0.4.0/crankshaft/test/fixtures/kmeans.json create mode 100644 release/python/0.4.0/crankshaft/test/fixtures/markov.json create mode 100644 release/python/0.4.0/crankshaft/test/fixtures/moran.json create mode 100644 release/python/0.4.0/crankshaft/test/fixtures/neighbors.json create mode 100644 release/python/0.4.0/crankshaft/test/fixtures/neighbors_markov.json create mode 100644 release/python/0.4.0/crankshaft/test/helper.py create mode 100644 release/python/0.4.0/crankshaft/test/mock_plpy.py create mode 100644 release/python/0.4.0/crankshaft/test/test_cluster_kmeans.py create mode 100644 release/python/0.4.0/crankshaft/test/test_clustering_moran.py create mode 100644 release/python/0.4.0/crankshaft/test/test_pysal_utils.py create mode 100644 release/python/0.4.0/crankshaft/test/test_segmentation.py create mode 100644 release/python/0.4.0/crankshaft/test/test_space_time_dynamics.py diff --git a/release/crankshaft--0.3.1--0.4.0.sql b/release/crankshaft--0.3.1--0.4.0.sql new file mode 100644 index 0000000..0619987 --- /dev/null +++ b/release/crankshaft--0.3.1--0.4.0.sql @@ -0,0 +1,1948 @@ +--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit +-- Version number of the extension release +CREATE OR REPLACE FUNCTION cdb_crankshaft_version() +RETURNS text AS $$ + SELECT '0.4.0'::text; +$$ language 'sql' STABLE STRICT; + +-- Internal identifier of the installed extension instence +-- e.g. 'dev' for current development version +CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version() +RETURNS text AS $$ + SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL; +$$ language 'sql' STABLE STRICT; +-- Internal function. +-- Set the seeds of the RNGs (Random Number Generators) +-- used internally. +CREATE OR REPLACE FUNCTION +_cdb_random_seeds (seed_value INTEGER) RETURNS VOID +AS $$ + from crankshaft import random_seeds + random_seeds.set_random_seeds(seed_value) +$$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION + CDB_PyAggS(current_state Numeric[], current_row Numeric[]) + returns NUMERIC[] as $$ + BEGIN + if array_upper(current_state,1) is null then + RAISE NOTICE 'setting state %',array_upper(current_row,1); + current_state[1] = array_upper(current_row,1); + end if; + return array_cat(current_state,current_row) ; + END + $$ LANGUAGE plpgsql; + +-- Create aggregate if it did not exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'cdb_crankshaft' + AND p.proname = 'cdb_pyagg' + AND p.proisagg) + THEN + CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) ( + SFUNC = CDB_PyAggS, + STYPE = Numeric[], + INITCOND = "{}" + ); + END IF; +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION + CDB_CreateAndPredictSegment( + target NUMERIC[], + features NUMERIC[], + target_features NUMERIC[], + target_ids NUMERIC[], + n_estimators INTEGER DEFAULT 1200, + max_depth INTEGER DEFAULT 3, + subsample DOUBLE PRECISION DEFAULT 0.5, + learning_rate DOUBLE PRECISION DEFAULT 0.01, + min_samples_leaf INTEGER DEFAULT 1) +RETURNS TABLE(cartodb_id NUMERIC, prediction NUMERIC, accuracy NUMERIC) +AS $$ + import numpy as np + import plpy + + from crankshaft.segmentation import create_and_predict_segment_agg + model_params = {'n_estimators': n_estimators, + 'max_depth': max_depth, + 'subsample': subsample, + 'learning_rate': learning_rate, + 'min_samples_leaf': min_samples_leaf} + + def unpack2D(data): + dimension = data.pop(0) + a = np.array(data, dtype=float) + return a.reshape(len(a)/dimension, dimension) + + return create_and_predict_segment_agg(np.array(target, dtype=float), + unpack2D(features), + unpack2D(target_features), + target_ids, + model_params) + +$$ LANGUAGE plpythonu; + +CREATE OR REPLACE FUNCTION + CDB_CreateAndPredictSegment ( + query TEXT, + variable_name TEXT, + target_table TEXT, + n_estimators INTEGER DEFAULT 1200, + max_depth INTEGER DEFAULT 3, + subsample DOUBLE PRECISION DEFAULT 0.5, + learning_rate DOUBLE PRECISION DEFAULT 0.01, + min_samples_leaf INTEGER DEFAULT 1) +RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC) +AS $$ + from crankshaft.segmentation import create_and_predict_segment + model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf} + return create_and_predict_segment(query,variable_name,target_table, model_params) +$$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN target_query text, + IN weight_column text, + IN source_query text, + IN pop_column text, + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_id bigint[]; + t_geom geometry[]; + t_weight numeric[]; + s_id bigint[]; + s_geom geometry[]; + s_pop numeric[]; +BEGIN + EXECUTE 'WITH foo as('+target_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || weight_column || ') FROM foo' INTO t_id, t_geom, t_weight; + EXECUTE 'WITH foo as('+source_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || pop_column || ') FROM foo' INTO s_id, s_geom, s_pop; + RETURN QUERY + SELECT g.* FROM t, s, CDB_Gravity(t_id, t_geom, t_weight, s_id, s_geom, s_pop, target, radius, minval) g; +END; +$$ language plpgsql; + +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN t_id bigint[], + IN t_geom geometry[], + IN t_weight numeric[], + IN s_id bigint[], + IN s_geom geometry[], + IN s_pop numeric[], + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_type text; + s_type text; + t_center geometry[]; + s_center geometry[]; +BEGIN + t_type := GeometryType(t_geom[1]); + s_type := GeometryType(s_geom[1]); + IF t_type = 'POINT' THEN + t_center := t_geom; + ELSE + WITH tmp as (SELECT unnest(t_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO t_center FROM tmp; + END IF; + IF s_type = 'POINT' THEN + s_center := s_geom; + ELSE + WITH tmp as (SELECT unnest(s_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO s_center FROM tmp; + END IF; + RETURN QUERY + with target0 as( + SELECT unnest(t_center) as tc, unnest(t_weight) as tw, unnest(t_id) as td + ), + source0 as( + SELECT unnest(s_center) as sc, unnest(s_id) as sd, unnest (s_geom) as sg, unnest(s_pop) as sp + ), + prev0 as( + SELECT + source0.sg, + source0.sd as sourc_id, + coalesce(source0.sp,0) as sp, + target.td as targ_id, + coalesce(target.tw,0) as tw, + GREATEST(1.0,ST_Distance(geography(target.tc), geography(source0.sc)))::numeric as distance + FROM source0 + CROSS JOIN LATERAL + ( + SELECT + * + FROM target0 + WHERE tw > minval + AND ST_DWithin(geography(source0.sc), geography(tc), radius) + ) AS target + ), + deno as( + SELECT + sourc_id, + sum(tw/distance) as h_deno + FROM + prev0 + GROUP BY sourc_id + ) + SELECT + p.sg as the_geom, + p.sourc_id as source_id, + p.targ_id as target_id, + case when p.distance > 1 then p.distance else 0.0 end as dist, + 100*(p.tw/p.distance)/d.h_deno as h, + p.sp*(p.tw/p.distance)/d.h_deno as hpop + FROM + prev0 p, + deno d + WHERE + p.targ_id = target AND + p.sourc_id = d.sourc_id; +END; +$$ language plpgsql; +-- 0: nearest neighbor +-- 1: barymetric +-- 2: IDW + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN query text, + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + output numeric; +BEGIN + EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs; + SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a; + + RETURN output; +END; +$$ +language plpgsql IMMUTABLE; + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN geomin geometry[], + IN colin numeric[], + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + gs2 geometry[]; + vs2 numeric[]; + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + output := -999.999; + -- nearest + IF method = 0 THEN + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v) + SELECT a.v INTO output FROM a ORDER BY point<->a.g LIMIT 1; + RETURN output; + + -- barymetric + ELSIF method = 1 THEN + WITH a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b), + d as (SELECT v FROM c WHERE ST_Within(point, v)) + SELECT v INTO g FROM d; + IF g is null THEN + -- out of the realm of the input data + RETURN -888.888; + END IF; + -- vertex of the selected cell + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg); + RETURN output; + + -- IDW + -- p1: limit the number of neighbors, 0->no limit + -- p2: order of distance decay, 0-> order 1 + ELSIF method = 2 THEN + + IF p2 = 0 THEN + p2 := 1; + END IF; + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v), + b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g) + SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b; + IF p1::integer>0 THEN + gs2:=gs; + vs2:=vs; + FOR i IN 1..p1 + LOOP + gs2 := gs2 || gs[i]; + vs2 := vs2 || vs[i]; + END LOOP; + ELSE + gs2:=gs; + vs2:=vs; + END IF; + + WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v), + b as ( + SELECT + (1/ST_distance(point, a.g)^p2::integer) as k, + (a.v/ST_distance(point, a.g)^p2::integer) as f + FROM a + ) + SELECT sum(b.f)/sum(b.k) INTO output FROM b; + RETURN output; + + END IF; + + RETURN -777.777; + +END; +$$ +language plpgsql IMMUTABLE; +-- ============================================================================================= +-- +-- CDB_Voronoi +-- +-- ============================================================================================= +CREATE OR REPLACE FUNCTION CDB_voronoi( + IN geomin geometry[], + IN buffer numeric DEFAULT 0.5, + IN tolerance numeric DEFAULT 1e-9 + ) +RETURNS geometry AS $$ +DECLARE + geomout geometry; +BEGIN + -- we need to make the geometry calculations in (pseudo)meters!!! + with a as ( + SELECT unnest(geomin) as g1 + ), + b as( + SELECT st_transform(g1, 3857) g2 from a + ) + SELECT array_agg(g2) INTO geomin from b; + + WITH + convexhull_1 as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ (st_area(ST_ConvexHull(ST_Collect(geomin)))/PI()) as r + ), + clipper as( + SELECT + st_buffer(ST_MinimumBoundingCircle(a.g), buffer*a.r) as g + FROM convexhull_1 a + ), + env0 as ( + SELECT + (st_dumppoints(st_expand(a.g, buffer*a.r))).geom as e + FROM convexhull_1 a + ), + env as ( + SELECT + array_agg(env0.e) as e + FROM env0 + ), + sample AS ( + SELECT + ST_Collect(geomin || env.e) as geom + FROM env + ), + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as cg + ), + tin as ( + SELECT + ST_Dump(ST_DelaunayTriangles(geom, tolerance, 0)) as gd + FROM + sample + ), + tin_polygons as ( + SELECT + (gd).Path as id, + (gd).Geom as pg, + ST_Centroid(ST_MinimumBoundingCircle((gd).Geom, 180)) as ct + FROM tin + ), + tin_lines as ( + SELECT + id, + ST_ExteriorRing(pg) as lg + FROM tin_polygons + ), + tin_nodes as ( + SELECT + id, + ST_PointN(lg,1) p1, + ST_PointN(lg,2) p2, + ST_PointN(lg,3) p3 + FROM tin_lines + ), + tin_edges AS ( + SELECT + p.id, + UNNEST(ARRAY[ + ST_MakeLine(n.p1,n.p2) , + ST_MakeLine(n.p2,n.p3) , + ST_MakeLine(n.p3,n.p1)]) as Edge, + ST_Force2D(cdb_crankshaft._Find_Circle(n.p1,n.p2,n.p3)) as ct, + CASE WHEN st_distance(p.ct, ST_ExteriorRing(p.pg)) < tolerance THEN + TRUE + ELSE FALSE END AS ctx, + p.pg, + ST_within(p.ct, convexhull.cg) as ctin + FROM + tin_polygons p, + tin_nodes n, + convexhull + WHERE p.id = n.id + ), + voro_nodes as ( + SELECT + CASE WHEN x.ctx = TRUE THEN + ST_Centroid(x.edge) + ELSE + x.ct + END as xct, + CASE WHEN y.id is null THEN + CASE WHEN x.ctin = TRUE THEN + ST_SetSRID(ST_MakePoint( + ST_X(x.ct) + ((ST_X(ST_Centroid(x.edge)) - ST_X(x.ct)) * (1+buffer)), + ST_Y(x.ct) + ((ST_Y(ST_Centroid(x.edge)) - ST_Y(x.ct)) * (1+buffer)) + ), ST_SRID(x.ct)) + END + ELSE + y.ct + END as yct + FROM + tin_edges x + LEFT OUTER JOIN + tin_edges y + ON x.id <> y.id AND ST_Equals(x.edge, y.edge) + ), + voro_edges as( + SELECT + ST_LineMerge(ST_Collect(ST_MakeLine(xct, yct))) as v + FROM + voro_nodes + ), + voro_cells as( + SELECT + ST_Polygonize( + ST_Node( + ST_LineMerge( + ST_Union(v, ST_ExteriorRing( + ST_Convexhull(v) + ) + ) + ) + ) + ) as g + FROM + voro_edges + ), + voro_set as( + SELECT + (st_dump(v.g)).geom as g + FROM voro_cells v + ), + clipped_voro as( + SELECT + ST_intersection(c.g, v.g) as g + FROM + voro_set v, + clipper c + WHERE + ST_GeometryType(v.g) = 'ST_Polygon' + ) + SELECT + st_collect( + ST_Transform( + ST_ConvexHull(g), + 4326 + ) + ) + INTO geomout + FROM + clipped_voro; + RETURN geomout; +END; +$$ language plpgsql IMMUTABLE; + +/** ---------------------------------------------------------------------------------------- + * @function : FindCircle + * @precis : Function that determines if three points form a circle. If so a table containing + * centre and radius is returned. If not, a null table is returned. + * @version : 1.0.1 + * @param : p_pt1 : First point in curve + * @param : p_pt2 : Second point in curve + * @param : p_pt3 : Third point in curve + * @return : geometry : In which X,Y ordinates are the centre X, Y and the Z being the radius of found circle + * or NULL if three points do not form a circle. + * @history : Simon Greener - Feb 2012 - Original coding. + * Rafa de la Torre - Aug 2016 - Small fix for type checking + * @copyright : Simon Greener @ 2012 + * Licensed under a Creative Commons Attribution-Share Alike 2.5 Australia License. (http://creativecommons.org/licenses/by-sa/2.5/au/) +**/ +CREATE OR REPLACE FUNCTION _Find_Circle( + IN p_pt1 geometry, + IN p_pt2 geometry, + IN p_pt3 geometry) + RETURNS geometry AS +$BODY$ +DECLARE + v_Centre geometry; + v_radius NUMERIC; + v_CX NUMERIC; + v_CY NUMERIC; + v_dA NUMERIC; + v_dB NUMERIC; + v_dC NUMERIC; + v_dD NUMERIC; + v_dE NUMERIC; + v_dF NUMERIC; + v_dG NUMERIC; +BEGIN + IF ( p_pt1 IS NULL OR p_pt2 IS NULL OR p_pt3 IS NULL ) THEN + RAISE EXCEPTION 'All supplied points must be not null.'; + RETURN NULL; + END IF; + IF ( ST_GeometryType(p_pt1) <> 'ST_Point' OR + ST_GeometryType(p_pt2) <> 'ST_Point' OR + ST_GeometryType(p_pt3) <> 'ST_Point' ) THEN + RAISE EXCEPTION 'All supplied geometries must be points.'; + RETURN NULL; + END IF; + v_dA := ST_X(p_pt2) - ST_X(p_pt1); + v_dB := ST_Y(p_pt2) - ST_Y(p_pt1); + v_dC := ST_X(p_pt3) - ST_X(p_pt1); + v_dD := ST_Y(p_pt3) - ST_Y(p_pt1); + v_dE := v_dA * (ST_X(p_pt1) + ST_X(p_pt2)) + v_dB * (ST_Y(p_pt1) + ST_Y(p_pt2)); + v_dF := v_dC * (ST_X(p_pt1) + ST_X(p_pt3)) + v_dD * (ST_Y(p_pt1) + ST_Y(p_pt3)); + v_dG := 2.0 * (v_dA * (ST_Y(p_pt3) - ST_Y(p_pt2)) - v_dB * (ST_X(p_pt3) - ST_X(p_pt2))); + -- If v_dG is zero then the three points are collinear and no finite-radius + -- circle through them exists. + IF ( v_dG = 0 ) THEN + RETURN NULL; + ELSE + v_CX := (v_dD * v_dE - v_dB * v_dF) / v_dG; + v_CY := (v_dA * v_dF - v_dC * v_dE) / v_dG; + v_Radius := SQRT(POWER(ST_X(p_pt1) - v_CX,2) + POWER(ST_Y(p_pt1) - v_CY,2) ); + END IF; + RETURN ST_SetSRID(ST_MakePoint(v_CX, v_CY, v_radius),ST_Srid(p_pt1)); +END; +$BODY$ + LANGUAGE plpgsql VOLATILE STRICT; + +-- Moran's I Global Measure (public-facing) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, significance NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspots( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspots( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliers( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Global Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran FLOAT, significance FLOAT) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + + +-- Moran's I Local Rate (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local_rate + # TODO: use named parameters or a dictionary + return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliersRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20) +RETURNS table (cartodb_id integer, cluster_no integer) as $$ + + from crankshaft.clustering import kmeans + return kmeans(query,no_clusters,no_init) + +$$ language plpythonu; + + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC) +RETURNS Numeric[] AS +$$ +DECLARE + newX NUMERIC; + newY NUMERIC; + newW NUMERIC; +BEGIN + IF weight IS NULL OR the_geom IS NULL THEN + newX = state[1]; + newY = state[2]; + newW = state[3]; + ELSE + newX = state[1] + ST_X(the_geom)*weight; + newY = state[2] + ST_Y(the_geom)*weight; + newW = state[3] + weight; + END IF; + RETURN Array[newX,newY,newW]; + +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[]) +RETURNS GEOMETRY AS +$$ +BEGIN + IF state[3] = 0 THEN + RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326); + ELSE + RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326); + END IF; +END +$$ LANGUAGE plpgsql; + +-- Create aggregate if it did not exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'cdb_crankshaft' + AND p.proname = 'cdb_weightedmean' + AND p.proisagg) + THEN + CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) ( + SFUNC = CDB_WeightedMeanS, + FINALFUNC = CDB_WeightedMeanF, + STYPE = Numeric[], + INITCOND = "{0.0,0.0,0.0}" + ); + END IF; +END +$$ LANGUAGE plpgsql; +-- Spatial Markov + +-- input table format: +-- id | geom | date_1 | date_2 | date_3 +-- 1 | Pt1 | 12.3 | 13.1 | 14.2 +-- 2 | Pt2 | 11.0 | 13.2 | 12.5 +-- ... +-- Sample Function call: +-- SELECT CDB_SpatialMarkov('SELECT * FROM real_estate', +-- Array['date_1', 'date_2', 'date_3']) + +CREATE OR REPLACE FUNCTION + CDB_SpatialMarkovTrend ( + subquery TEXT, + time_cols TEXT[], + num_classes INT DEFAULT 7, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT) +AS $$ + + from crankshaft.space_time_dynamics import spatial_markov_trend + + ## TODO: use named parameters or a dictionary + return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- input table format: identical to above but in a predictable format +-- Sample function call: +-- SELECT cdb_spatial_markov('SELECT * FROM real_estate', +-- 'date_1') + + +-- CREATE OR REPLACE FUNCTION +-- cdb_spatial_markov ( +-- subquery TEXT, +-- time_col_min text, +-- time_col_max text, +-- date_format text, -- '_YYYY_MM_DD' +-- num_time_per_bin INT DEFAULT 1, +-- permutations INT DEFAULT 99, +-- geom_column TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs int DEFAULT 5) +-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +-- AS $$ +-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') +-- from crankshaft.clustering import moran_local +-- # TODO: use named parameters or a dictionary +-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; +-- +-- -- input table format: +-- -- id | geom | date | measurement +-- -- 1 | Pt1 | 12/3 | 13.2 +-- -- 2 | Pt2 | 11/5 | 11.3 +-- -- 3 | Pt1 | 11/13 | 12.9 +-- -- 4 | Pt3 | 12/19 | 10.1 +-- -- ... +-- +-- CREATE OR REPLACE FUNCTION +-- cdb_spatial_markov ( +-- subquery TEXT, +-- time_col text, +-- num_time_per_bin INT DEFAULT 1, +-- permutations INT DEFAULT 99, +-- geom_column TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs int DEFAULT 5) +-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +-- AS $$ +-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') +-- from crankshaft.clustering import moran_local +-- # TODO: use named parameters or a dictionary +-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +CREATE OR REPLACE FUNCTION CDB_PIA( + IN polygon geometry, + IN tolerance numeric DEFAULT 1.0 + ) +RETURNS geometry AS $$ +DECLARE + env geometry[]; + cells geometry[]; + cell geometry; + best_c geometry; + best_d numeric; + test_d numeric; + test_mx numeric; + test_h numeric; + test_cells geometry[]; + width numeric; + height numeric; + h numeric; + i integer; + n integer; + sqr numeric; + p geometry; +BEGIN + sqr := |/2; + polygon := ST_Transform(polygon, 3857); + + -- grid #0 cell size + height := ST_YMax(polygon) - ST_YMin(polygon); + width := ST_XMax(polygon) - ST_XMin(polygon); + h := 0.5*LEAST(height, width); + + -- grid #0 + with c1 as( + SELECT cdb_crankshaft.CDB_RectangleGrid(polygon, h, h) as c + ) + SELECT array_agg(c) INTO cells FROM c1; + + -- 1st guess: centroid + best_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(Polygon)); + + -- looping the loop + n := array_length(cells,1); + i := 1; + LOOP + + EXIT WHEN i > n; + + cell := cells[i]; + i := i+1; + + -- cell side size, it's square + test_h := ST_XMax(cell) - ST_XMin(cell) ; + + -- check distance + test_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(cell)); + IF test_d > best_d THEN + best_d := test_d; + best_c := cells[i]; + END IF; + + -- longest distance within the cell + test_mx := test_d + (test_h/2 * sqr); + + -- if the cell has no chance to contains the desired point, continue + CONTINUE WHEN test_mx - best_d <= tolerance; + + -- resample the cell + with c1 as( + SELECT cdb_crankshaft.CDB_RectangleGrid(cell, test_h/2, test_h/2) as c + ) + SELECT array_agg(c) INTO test_cells FROM c1; + + -- concat the new cells to the former array + cells := cells || test_cells; + + -- prepare next iteration + n := array_length(cells,1); + + END LOOP; + + RETURN ST_transform(ST_Centroid(best_c), 4326); + +END; +$$ language plpgsql IMMUTABLE; + + +-- signed distance point to polygon with holes +-- negative is the point is out the polygon +CREATE OR REPLACE FUNCTION _Signed_Dist( + IN polygon geometry, + IN point geometry + ) +RETURNS numeric AS $$ +DECLARE + i integer; + within integer; + holes integer; + dist numeric; +BEGIN + dist := 1e999; + SELECT LEAST(dist, ST_distance(point, ST_ExteriorRing(polygon))::numeric) INTO dist; + SELECT CASE WHEN ST_Within(point,polygon) THEN 1 ELSE -1 END INTO within; + SELECT ST_NumInteriorRings(polygon) INTO holes; + IF holes > 0 THEN + FOR i IN 1..holes + LOOP + SELECT LEAST(dist, ST_distance(point, ST_InteriorRingN(polygon, i))::numeric) INTO dist; + END LOOP; + END IF; + dist := dist * within::numeric; + RETURN dist; +END; +$$ language plpgsql IMMUTABLE; +-- +-- Iterative densification of a set of points using Delaunay triangulation +-- the new points have as assigned value the average value of the 3 vertex (centroid) +-- +-- @param geomin - array of geometries (points) +-- +-- @param colin - array of numeric values in that points +-- +-- @param iterations - integer, number of iterations +-- +-- +-- Returns: TABLE(geomout geometry, colout numeric) +-- +-- +CREATE OR REPLACE FUNCTION CDB_Densify( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + geotemp geometry[]; + coltemp numeric[]; + i integer; + gs geometry[]; + g geometry; + vertex geometry[]; + va numeric; + vb numeric; + vc numeric; + center geometry; + centerval numeric; + tmp integer; +BEGIN + geotemp := geomin; + coltemp := colin; + FOR i IN 1..iterations + LOOP + -- generate TIN + WITH a as (SELECT unnest(geotemp) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + -- loop cells + FOREACH g IN ARRAY gs + LOOP + -- append centroid + SELECT ST_Centroid(g) INTO center; + geotemp := array_append(geotemp, center); + -- retrieve the value of each vertex + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + END LOOP; + RETURN QUERY SELECT unnest(geotemp ) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; +CREATE OR REPLACE FUNCTION CDB_TINmap( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + p geometry[]; + vals numeric[]; + gs geometry[]; + g geometry; + vertex geometry[]; + centerval numeric; + va numeric; + vb numeric; + vc numeric; + coltemp numeric[]; +BEGIN + SELECT array_agg(dens.geomout), array_agg(dens.colout) INTO p, vals FROM cdb_crankshaft.CDB_Densify(geomin, colin, iterations) dens; + WITH a as (SELECT unnest(p) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + FOREACH g IN ARRAY gs + LOOP + -- retrieve the vertex of each triangle + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + RETURN QUERY SELECT unnest(gs) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; +CREATE OR REPLACE FUNCTION CDB_Contour( + IN geomin geometry[], + IN colin numeric[], + IN buffer numeric, + IN intmethod integer, + IN classmethod integer, + IN steps integer, + IN max_time integer DEFAULT 60000 + ) +RETURNS TABLE( + the_geom geometry, + bin integer, + min_value numeric, + max_value numeric, + avg_value numeric +) AS $$ +DECLARE + cell_count integer; + tin geometry[]; +BEGIN + -- calc the cell size in web mercator units + -- WITH center as ( + -- SELECT ST_centroid(ST_Collect(geomin)) as c + -- ) + -- SELECT + -- round(resolution / cos(ST_y(c) * pi()/180)) + -- INTO cell + -- FROM center; + -- raise notice 'Resol: %', cell; + + -- calc the optimal number of cells for the current dataset + SELECT + CASE intmethod + WHEN 0 THEN round(3.7745903782 * max_time - 9.4399210051 * array_length(geomin,1) - 1350.8778213073) + WHEN 1 THEN round(2.2855592156 * max_time - 87.285217133 * array_length(geomin,1) + 17255.7085601797) + WHEN 2 THEN round(0.9799471999 * max_time - 127.0334085369 * array_length(geomin,1) + 22707.9579721218) + ELSE 10000 + END INTO cell_count; + + -- we don't have iterative barycentric interpolation in CDB_interpolation, + -- and it's a costy function, so let's make a custom one here till + -- we update the code + -- tin := ARRAY[]::geometry[]; + IF intmethod=1 THEN + WITH + a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b) + SELECT array_agg(v) INTO tin FROM c; + END IF; + -- Delaunay stuff performed just ONCE!! + + -- magic + RETURN QUERY + WITH + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ st_area(ST_ConvexHull(ST_Collect(geomin)))/PI() as r + ), + envelope as ( + SELECT + st_expand(a.g, a.r) as e + FROM convexhull a + ), + envelope3857 as( + SELECT + ST_Transform(e, 3857) as geom + FROM envelope + ), + resolution as( + SELECT + round(|/ ( + ST_area(geom) / cell_count + )) as cell + FROM envelope3857 + ), + grid as( + SELECT + ST_Transform(cdb_crankshaft.CDB_RectangleGrid(e.geom, r.cell, r.cell), 4326) as geom + FROM envelope3857 e, resolution r + ), + interp as( + SELECT + geom, + CASE + WHEN intmethod=1 THEN cdb_crankshaft._interp_in_tin(geomin, colin, tin, ST_Centroid(geom)) + ELSE cdb_crankshaft.CDB_SpatialInterpolation(geomin, colin, ST_Centroid(geom), intmethod) + END as val + FROM grid + ), + classes as( + SELECT CASE + WHEN classmethod = 0 THEN + cdb_crankshaft.CDB_EqualIntervalBins(array_agg(val), steps) + WHEN classmethod = 1 THEN + cdb_crankshaft.CDB_HeadsTailsBins(array_agg(val), steps) + WHEN classmethod = 2 THEN + cdb_crankshaft.CDB_JenksBins(array_agg(val), steps) + ELSE + cdb_crankshaft.CDB_QuantileBins(array_agg(val), steps) + END as b + FROM interp + where val is not null + ), + classified as( + SELECT + i.*, + width_bucket(i.val, c.b) as bucket + FROM interp i left join classes c + ON 1=1 + ), + classified2 as( + SELECT + geom, + val, + CASE + WHEN bucket = steps THEN bucket - 1 + ELSE bucket + END as b + FROM classified + ), + final as( + SELECT + st_union(geom) as the_geom, + b as bin, + min(val) as min_value, + max(val) as max_value, + avg(val) as avg_value + FROM classified2 + GROUP BY bin + ) + SELECT + * + FROM final + where final.bin is not null + ; +END; +$$ language plpgsql; + + + +-- ===================================================================== +-- Interp in grid, so we can use barycentric with a precalculated tin (NNI) +-- ===================================================================== +CREATE OR REPLACE FUNCTION _interp_in_tin( + IN geomin geometry[], + IN colin numeric[], + IN tin geometry[], + IN point geometry + ) +RETURNS numeric AS +$$ +DECLARE + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + -- get the cell the point is within + WITH + a as (SELECT unnest(tin) as v), + b as (SELECT v FROM a WHERE ST_Within(point, v)) + SELECT v INTO g FROM b; + + -- if we're out of the data realm, + -- return null + IF g is null THEN + RETURN null; + END IF; + + -- vertex of the selected cell + WITH a AS ( + SELECT (ST_DumpPoints(g)).geom AS v + ) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + -- calc the areas + SELECT + ST_area(g), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg,1); + RETURN output; +END; +$$ +language plpgsql; +-- Function by Stuart Lynn for a simple interpolation of a value +-- from a polygon table over an arbitrary polygon +-- (weighted by the area proportion overlapped) +-- Aereal weighting is a very simple form of aereal interpolation. +-- +-- Parameters: +-- * geom a Polygon geometry which defines the area where a value will be +-- estimated as the area-weighted sum of a given table/column +-- * target_table_name table name of the table that provides the values +-- * target_column column name of the column that provides the values +-- * schema_name optional parameter to defina the schema the target table +-- belongs to, which is necessary if its not in the search_path. +-- Note that target_table_name should never include the schema in it. +-- Return value: +-- Aereal-weighted interpolation of the column values over the geometry +CREATE OR REPLACE +FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL) + RETURNS numeric AS +$$ +DECLARE + result numeric; + qualified_name text; +BEGIN + IF schema_name IS NULL THEN + qualified_name := Format('%I', target_table_name); + ELSE + qualified_name := Format('%I.%s', schema_name, target_table_name); + END IF; + EXECUTE Format(' + SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom)) + FROM %s AS a + WHERE $1 && a.the_geom + ', target_column, qualified_name) + USING geom + INTO result; + RETURN result; +END; +$$ LANGUAGE plpgsql; +-- +-- Creates N points randomly distributed arround the polygon +-- +-- @param g - the geometry to be turned in to points +-- +-- @param no_points - the number of points to generate +-- +-- @params max_iter_per_point - the function generates points in the polygon's bounding box +-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many +-- misses per point the funciton accepts before giving up. +-- +-- Returns: Multipoint with the requested points +CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000) +RETURNS GEOMETRY AS $$ +DECLARE + extent GEOMETRY; + test_point Geometry; + width NUMERIC; + height NUMERIC; + x0 NUMERIC; + y0 NUMERIC; + xp NUMERIC; + yp NUMERIC; + no_left INTEGER; + remaining_iterations INTEGER; + points GEOMETRY[]; + bbox_line GEOMETRY; + intersection_line GEOMETRY; +BEGIN + extent := ST_Envelope(geom); + width := ST_XMax(extent) - ST_XMIN(extent); + height := ST_YMax(extent) - ST_YMIN(extent); + x0 := ST_XMin(extent); + y0 := ST_YMin(extent); + no_left := no_points; + + LOOP + if(no_left=0) THEN + EXIT; + END IF; + yp = y0 + height*random(); + bbox_line = ST_MakeLine( + ST_SetSRID(ST_MakePoint(yp, x0),4326), + ST_SetSRID(ST_MakePoint(yp, x0+width),4326) + ); + intersection_line = ST_Intersection(bbox_line,geom); + test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random()); + points := points || test_point; + no_left = no_left - 1 ; + END LOOP; + RETURN ST_Collect(points); +END; +$$ +LANGUAGE plpgsql VOLATILE; +-- Make sure by default there are no permissions for publicuser +-- NOTE: this happens at extension creation time, as part of an implicit transaction. +-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE; + +-- Grant permissions on the schema to publicuser (but just the schema) +GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser; + +-- Revoke execute permissions on all functions in the schema by default +-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser; +-- +-- Fill given extent with a rectangular coverage +-- +-- @param ext Extent to fill. Only rectangles with center point falling +-- inside the extent (or at the lower or leftmost edge) will +-- be emitted. The returned hexagons will have the same SRID +-- as this extent. +-- +-- @param width With of each rectangle +-- +-- @param height Height of each rectangle +-- +-- @param origin Optional origin to allow for exact tiling. +-- If omitted the origin will be 0,0. +-- The parameter is checked for having the same SRID +-- as the extent. +-- +-- +CREATE OR REPLACE FUNCTION CDB_RectangleGrid(ext GEOMETRY, width FLOAT8, height FLOAT8, origin GEOMETRY DEFAULT NULL) +RETURNS SETOF GEOMETRY +AS $$ +DECLARE + h GEOMETRY; -- rectangle cell + hstep FLOAT8; -- horizontal step + vstep FLOAT8; -- vertical step + hw FLOAT8; -- half width + hh FLOAT8; -- half height + vstart FLOAT8; + hstart FLOAT8; + hend FLOAT8; + vend FLOAT8; + xoff FLOAT8; + yoff FLOAT8; + xgrd FLOAT8; + ygrd FLOAT8; + x FLOAT8; + y FLOAT8; + srid INTEGER; +BEGIN + + srid := ST_SRID(ext); + + xoff := 0; + yoff := 0; + + IF origin IS NOT NULL THEN + IF ST_SRID(origin) != srid THEN + RAISE EXCEPTION 'SRID mismatch between extent (%) and origin (%)', srid, ST_SRID(origin); + END IF; + xoff := ST_X(origin); + yoff := ST_Y(origin); + END IF; + + --RAISE DEBUG 'X offset: %', xoff; + --RAISE DEBUG 'Y offset: %', yoff; + + hw := width/2.0; + hh := height/2.0; + + xgrd := hw; + ygrd := hh; + --RAISE DEBUG 'X grid size: %', xgrd; + --RAISE DEBUG 'Y grid size: %', ygrd; + + hstep := width; + vstep := height; + + -- Tweak horizontal start on hstep grid from origin + hstart := xoff + ceil((ST_XMin(ext)-xoff)/hstep)*hstep; + --RAISE DEBUG 'hstart: %', hstart; + + -- Tweak vertical start on vstep grid from origin + vstart := yoff + ceil((ST_Ymin(ext)-yoff)/vstep)*vstep; + --RAISE DEBUG 'vstart: %', vstart; + + hend := ST_XMax(ext); + vend := ST_YMax(ext); + + --RAISE DEBUG 'hend: %', hend; + --RAISE DEBUG 'vend: %', vend; + + x := hstart; + WHILE x < hend LOOP -- over X + y := vstart; + h := ST_MakeEnvelope(x-hw, y-hh, x+hw, y+hh, srid); + WHILE y < vend LOOP -- over Y + RETURN NEXT h; + h := ST_Translate(h, 0, vstep); + y := yoff + round(((y + vstep)-yoff)/ygrd)*ygrd; -- round to grid + END LOOP; + x := xoff + round(((x + hstep)-xoff)/xgrd)*xgrd; -- round to grid + END LOOP; + + RETURN; +END +$$ LANGUAGE 'plpgsql' IMMUTABLE; + +-- +-- Calculate the equal interval bins for a given column +-- +-- @param in_array A numeric array of numbers to determine the best +-- to determine the bin boundary +-- +-- @param breaks The number of bins you want to find. +-- +-- +-- Returns: upper edges of bins +-- +-- + +CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$ +DECLARE + diff numeric; + min_val numeric; + max_val numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL; + diff = (max_val - min_val) / breaks::numeric; + LOOP + IF i < breaks THEN + tmp_val = min_val + i::numeric * diff; + reply = array_append(reply, tmp_val); + i := i+1; + ELSE + reply = array_append(reply, max_val); + EXIT; + END IF; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Heads/Tails classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Heads/Tails method. +-- +-- @param breaks The number of bins you want to find. +-- +-- + +CREATE OR REPLACE FUNCTION CDB_HeadsTailsBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean numeric; + i INT := 2; + reply numeric[]; +BEGIN + -- get the total size of our row + element_count := array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + reply = Array[arr_mean]; + -- slice our bread + LOOP + IF i > breaks THEN EXIT; END IF; + SELECT avg(e) INTO arr_mean FROM ( SELECT unnest(in_array) e) x WHERE e > reply[i-1]; + IF arr_mean IS NOT NULL THEN + reply = array_append(reply, arr_mean); + END IF; + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Jenks classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Jenks method. +-- +-- @param breaks The number of bins you want to find. +-- +-- @param iterations The number of different starting positions to test. +-- +-- @param invert Optional wheter to return the top of each bin (default) +-- or the bottom. BOOLEAN, default=FALSE. +-- +-- + + +CREATE OR REPLACE FUNCTION CDB_JenksBins ( in_array NUMERIC[], breaks INT, iterations INT DEFAULT 5, invert BOOLEAN DEFAULT FALSE) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean NUMERIC; + bot INT; + top INT; + tops INT[]; + classes INT[][]; + i INT := 1; j INT := 1; + curr_result NUMERIC[]; + best_result NUMERIC[]; + seedtarget TEXT; + quant NUMERIC[]; + shuffles INT; +BEGIN + -- get the total size of our row + element_count := array_length(in_array, 1); --array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + shuffles := LEAST(GREATEST(floor(2500000.0/(element_count::float*iterations::float)), 1), 750)::int; + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + -- assume best is actually Quantile + SELECT cdb_crankshaft.CDB_QuantileBins(in_array, breaks) INTO quant; + + -- if data is very very large, just return quant and be done + IF element_count > 5000000 THEN + RETURN quant; + END IF; + + -- change quant into bottom, top markers + LOOP + IF i = 1 THEN + bot = 1; + ELSE + -- use last top to find this bot + bot = top+1; + END IF; + IF i = breaks THEN + top = element_count; + ELSE + SELECT count(*) INTO top FROM ( SELECT unnest(in_array) as v) x WHERE v <= quant[i]; + END IF; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + IF i > breaks THEN EXIT; END IF; + i = i+1; + END LOOP; + + best_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + --set the seed so we can ensure the same results + SELECT setseed(0.4567) INTO seedtarget; + --loop through random starting positions + LOOP + IF j > iterations-1 THEN EXIT; END IF; + i = 1; + tops = ARRAY[element_count]; + LOOP + IF i = breaks THEN EXIT; END IF; + SELECT array_agg(distinct e) INTO tops FROM (SELECT unnest(array_cat(tops, ARRAY[floor(random()*element_count::float)::int])) as e ORDER BY e) x WHERE e != 1; + i = array_length(tops, 1); + END LOOP; + i = 1; + LOOP + IF i > breaks THEN EXIT; END IF; + IF i = 1 THEN + bot = 1; + ELSE + bot = top+1; + END IF; + top = tops[i]; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + i := i+1; + END LOOP; + curr_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + IF curr_result[1] > best_result[1] THEN + best_result = curr_result; + j = j-1; -- if we found a better result, add one more search + END IF; + j = j+1; + END LOOP; + + RETURN (best_result)[2:array_upper(best_result, 1)]; +END; +$$ language plpgsql IMMUTABLE; + + + +-- +-- Perform a single iteration of the Jenks classification +-- + +CREATE OR REPLACE FUNCTION CDB_JenksBinsIteration ( in_array NUMERIC[], breaks INT, classes INT[][], invert BOOLEAN, element_count INT4, arr_mean NUMERIC, max_search INT DEFAULT 50) RETURNS NUMERIC[] as $$ +DECLARE + tmp_val numeric; + new_classes int[][]; + tmp_class int[]; + i INT := 1; + j INT := 1; + side INT := 2; + sdam numeric; + gvf numeric := 0.0; + new_gvf numeric; + arr_gvf numeric[]; + class_avg numeric; + class_max_i INT; + class_min_i INT; + class_max numeric; + class_min numeric; + reply numeric[]; +BEGIN + + -- Calculate the sum of squared deviations from the array mean (SDAM). + SELECT sum((arr_mean - e)^2) INTO sdam FROM ( SELECT unnest(in_array) as e ) x; + --Identify the breaks for the lowest GVF + LOOP + i = 1; + LOOP + -- get our mean + SELECT avg(e) INTO class_avg FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e) x; + -- find the deviation + SELECT sum((class_avg-e)^2) INTO tmp_val FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e ) x; + IF i = 1 THEN + arr_gvf = ARRAY[tmp_val]; + -- init our min/max map for later + class_max = arr_gvf[i]; + class_min = arr_gvf[i]; + class_min_i = 1; + class_max_i = 1; + ELSE + arr_gvf = array_append(arr_gvf, tmp_val); + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + -- calculate our new GVF + SELECT sdam-sum(e) INTO new_gvf FROM ( SELECT unnest(arr_gvf) as e ) x; + -- if no improvement was made, exit + IF new_gvf < gvf THEN EXIT; END IF; + gvf = new_gvf; + IF j > max_search THEN EXIT; END IF; + j = j+1; + i = 1; + LOOP + --establish directionality (uppward through classes or downward) + IF arr_gvf[i] < class_min THEN + class_min = arr_gvf[i]; + class_min_i = i; + END IF; + IF arr_gvf[i] > class_max THEN + class_max = arr_gvf[i]; + class_max_i = i; + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + IF class_max_i > class_min_i THEN + class_min_i = class_max_i - 1; + ELSE + class_min_i = class_max_i + 1; + END IF; + --Move from higher class to a lower gid order + IF class_max_i > class_min_i THEN + classes[class_max_i][1] = classes[class_max_i][1] + 1; + classes[class_min_i][2] = classes[class_min_i][2] + 1; + ELSE -- Move from lower class UP into a higher class by gid + classes[class_max_i][2] = classes[class_max_i][2] - 1; + classes[class_min_i][1] = classes[class_min_i][1] - 1; + END IF; + END LOOP; + + i = 1; + LOOP + IF invert = TRUE THEN + side = 1; --default returns bottom side of breaks, invert returns top side + END IF; + reply = array_append(reply, in_array[classes[i][side]]); + i = i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + + RETURN array_prepend(gvf, reply); + +END; +$$ language plpgsql IMMUTABLE; + + +-- +-- Determine the Quantile classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Quantile method. +-- +-- @param breaks The number of bins you want to find. +-- +-- +CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + break_size numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + -- sort our values + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x; + -- get the total size of our data + element_count := array_length(in_array, 1); + break_size := element_count::numeric / breaks; + -- slice our bread + LOOP + IF i < breaks THEN + IF break_size * i % 1 > 0 THEN + SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x; + ELSE + SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x; + END IF; + ELSIF i = breaks THEN + -- select the last value + SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x; + ELSE + EXIT; + END IF; + + reply = array_append(reply, tmp_val); + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; diff --git a/release/crankshaft--0.4.0.sql b/release/crankshaft--0.4.0.sql new file mode 100644 index 0000000..0619987 --- /dev/null +++ b/release/crankshaft--0.4.0.sql @@ -0,0 +1,1948 @@ +--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit +-- Version number of the extension release +CREATE OR REPLACE FUNCTION cdb_crankshaft_version() +RETURNS text AS $$ + SELECT '0.4.0'::text; +$$ language 'sql' STABLE STRICT; + +-- Internal identifier of the installed extension instence +-- e.g. 'dev' for current development version +CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version() +RETURNS text AS $$ + SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL; +$$ language 'sql' STABLE STRICT; +-- Internal function. +-- Set the seeds of the RNGs (Random Number Generators) +-- used internally. +CREATE OR REPLACE FUNCTION +_cdb_random_seeds (seed_value INTEGER) RETURNS VOID +AS $$ + from crankshaft import random_seeds + random_seeds.set_random_seeds(seed_value) +$$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION + CDB_PyAggS(current_state Numeric[], current_row Numeric[]) + returns NUMERIC[] as $$ + BEGIN + if array_upper(current_state,1) is null then + RAISE NOTICE 'setting state %',array_upper(current_row,1); + current_state[1] = array_upper(current_row,1); + end if; + return array_cat(current_state,current_row) ; + END + $$ LANGUAGE plpgsql; + +-- Create aggregate if it did not exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'cdb_crankshaft' + AND p.proname = 'cdb_pyagg' + AND p.proisagg) + THEN + CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) ( + SFUNC = CDB_PyAggS, + STYPE = Numeric[], + INITCOND = "{}" + ); + END IF; +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION + CDB_CreateAndPredictSegment( + target NUMERIC[], + features NUMERIC[], + target_features NUMERIC[], + target_ids NUMERIC[], + n_estimators INTEGER DEFAULT 1200, + max_depth INTEGER DEFAULT 3, + subsample DOUBLE PRECISION DEFAULT 0.5, + learning_rate DOUBLE PRECISION DEFAULT 0.01, + min_samples_leaf INTEGER DEFAULT 1) +RETURNS TABLE(cartodb_id NUMERIC, prediction NUMERIC, accuracy NUMERIC) +AS $$ + import numpy as np + import plpy + + from crankshaft.segmentation import create_and_predict_segment_agg + model_params = {'n_estimators': n_estimators, + 'max_depth': max_depth, + 'subsample': subsample, + 'learning_rate': learning_rate, + 'min_samples_leaf': min_samples_leaf} + + def unpack2D(data): + dimension = data.pop(0) + a = np.array(data, dtype=float) + return a.reshape(len(a)/dimension, dimension) + + return create_and_predict_segment_agg(np.array(target, dtype=float), + unpack2D(features), + unpack2D(target_features), + target_ids, + model_params) + +$$ LANGUAGE plpythonu; + +CREATE OR REPLACE FUNCTION + CDB_CreateAndPredictSegment ( + query TEXT, + variable_name TEXT, + target_table TEXT, + n_estimators INTEGER DEFAULT 1200, + max_depth INTEGER DEFAULT 3, + subsample DOUBLE PRECISION DEFAULT 0.5, + learning_rate DOUBLE PRECISION DEFAULT 0.01, + min_samples_leaf INTEGER DEFAULT 1) +RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC) +AS $$ + from crankshaft.segmentation import create_and_predict_segment + model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf} + return create_and_predict_segment(query,variable_name,target_table, model_params) +$$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN target_query text, + IN weight_column text, + IN source_query text, + IN pop_column text, + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_id bigint[]; + t_geom geometry[]; + t_weight numeric[]; + s_id bigint[]; + s_geom geometry[]; + s_pop numeric[]; +BEGIN + EXECUTE 'WITH foo as('+target_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || weight_column || ') FROM foo' INTO t_id, t_geom, t_weight; + EXECUTE 'WITH foo as('+source_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || pop_column || ') FROM foo' INTO s_id, s_geom, s_pop; + RETURN QUERY + SELECT g.* FROM t, s, CDB_Gravity(t_id, t_geom, t_weight, s_id, s_geom, s_pop, target, radius, minval) g; +END; +$$ language plpgsql; + +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN t_id bigint[], + IN t_geom geometry[], + IN t_weight numeric[], + IN s_id bigint[], + IN s_geom geometry[], + IN s_pop numeric[], + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_type text; + s_type text; + t_center geometry[]; + s_center geometry[]; +BEGIN + t_type := GeometryType(t_geom[1]); + s_type := GeometryType(s_geom[1]); + IF t_type = 'POINT' THEN + t_center := t_geom; + ELSE + WITH tmp as (SELECT unnest(t_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO t_center FROM tmp; + END IF; + IF s_type = 'POINT' THEN + s_center := s_geom; + ELSE + WITH tmp as (SELECT unnest(s_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO s_center FROM tmp; + END IF; + RETURN QUERY + with target0 as( + SELECT unnest(t_center) as tc, unnest(t_weight) as tw, unnest(t_id) as td + ), + source0 as( + SELECT unnest(s_center) as sc, unnest(s_id) as sd, unnest (s_geom) as sg, unnest(s_pop) as sp + ), + prev0 as( + SELECT + source0.sg, + source0.sd as sourc_id, + coalesce(source0.sp,0) as sp, + target.td as targ_id, + coalesce(target.tw,0) as tw, + GREATEST(1.0,ST_Distance(geography(target.tc), geography(source0.sc)))::numeric as distance + FROM source0 + CROSS JOIN LATERAL + ( + SELECT + * + FROM target0 + WHERE tw > minval + AND ST_DWithin(geography(source0.sc), geography(tc), radius) + ) AS target + ), + deno as( + SELECT + sourc_id, + sum(tw/distance) as h_deno + FROM + prev0 + GROUP BY sourc_id + ) + SELECT + p.sg as the_geom, + p.sourc_id as source_id, + p.targ_id as target_id, + case when p.distance > 1 then p.distance else 0.0 end as dist, + 100*(p.tw/p.distance)/d.h_deno as h, + p.sp*(p.tw/p.distance)/d.h_deno as hpop + FROM + prev0 p, + deno d + WHERE + p.targ_id = target AND + p.sourc_id = d.sourc_id; +END; +$$ language plpgsql; +-- 0: nearest neighbor +-- 1: barymetric +-- 2: IDW + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN query text, + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + output numeric; +BEGIN + EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs; + SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a; + + RETURN output; +END; +$$ +language plpgsql IMMUTABLE; + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN geomin geometry[], + IN colin numeric[], + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + gs2 geometry[]; + vs2 numeric[]; + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + output := -999.999; + -- nearest + IF method = 0 THEN + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v) + SELECT a.v INTO output FROM a ORDER BY point<->a.g LIMIT 1; + RETURN output; + + -- barymetric + ELSIF method = 1 THEN + WITH a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b), + d as (SELECT v FROM c WHERE ST_Within(point, v)) + SELECT v INTO g FROM d; + IF g is null THEN + -- out of the realm of the input data + RETURN -888.888; + END IF; + -- vertex of the selected cell + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg); + RETURN output; + + -- IDW + -- p1: limit the number of neighbors, 0->no limit + -- p2: order of distance decay, 0-> order 1 + ELSIF method = 2 THEN + + IF p2 = 0 THEN + p2 := 1; + END IF; + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v), + b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g) + SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b; + IF p1::integer>0 THEN + gs2:=gs; + vs2:=vs; + FOR i IN 1..p1 + LOOP + gs2 := gs2 || gs[i]; + vs2 := vs2 || vs[i]; + END LOOP; + ELSE + gs2:=gs; + vs2:=vs; + END IF; + + WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v), + b as ( + SELECT + (1/ST_distance(point, a.g)^p2::integer) as k, + (a.v/ST_distance(point, a.g)^p2::integer) as f + FROM a + ) + SELECT sum(b.f)/sum(b.k) INTO output FROM b; + RETURN output; + + END IF; + + RETURN -777.777; + +END; +$$ +language plpgsql IMMUTABLE; +-- ============================================================================================= +-- +-- CDB_Voronoi +-- +-- ============================================================================================= +CREATE OR REPLACE FUNCTION CDB_voronoi( + IN geomin geometry[], + IN buffer numeric DEFAULT 0.5, + IN tolerance numeric DEFAULT 1e-9 + ) +RETURNS geometry AS $$ +DECLARE + geomout geometry; +BEGIN + -- we need to make the geometry calculations in (pseudo)meters!!! + with a as ( + SELECT unnest(geomin) as g1 + ), + b as( + SELECT st_transform(g1, 3857) g2 from a + ) + SELECT array_agg(g2) INTO geomin from b; + + WITH + convexhull_1 as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ (st_area(ST_ConvexHull(ST_Collect(geomin)))/PI()) as r + ), + clipper as( + SELECT + st_buffer(ST_MinimumBoundingCircle(a.g), buffer*a.r) as g + FROM convexhull_1 a + ), + env0 as ( + SELECT + (st_dumppoints(st_expand(a.g, buffer*a.r))).geom as e + FROM convexhull_1 a + ), + env as ( + SELECT + array_agg(env0.e) as e + FROM env0 + ), + sample AS ( + SELECT + ST_Collect(geomin || env.e) as geom + FROM env + ), + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as cg + ), + tin as ( + SELECT + ST_Dump(ST_DelaunayTriangles(geom, tolerance, 0)) as gd + FROM + sample + ), + tin_polygons as ( + SELECT + (gd).Path as id, + (gd).Geom as pg, + ST_Centroid(ST_MinimumBoundingCircle((gd).Geom, 180)) as ct + FROM tin + ), + tin_lines as ( + SELECT + id, + ST_ExteriorRing(pg) as lg + FROM tin_polygons + ), + tin_nodes as ( + SELECT + id, + ST_PointN(lg,1) p1, + ST_PointN(lg,2) p2, + ST_PointN(lg,3) p3 + FROM tin_lines + ), + tin_edges AS ( + SELECT + p.id, + UNNEST(ARRAY[ + ST_MakeLine(n.p1,n.p2) , + ST_MakeLine(n.p2,n.p3) , + ST_MakeLine(n.p3,n.p1)]) as Edge, + ST_Force2D(cdb_crankshaft._Find_Circle(n.p1,n.p2,n.p3)) as ct, + CASE WHEN st_distance(p.ct, ST_ExteriorRing(p.pg)) < tolerance THEN + TRUE + ELSE FALSE END AS ctx, + p.pg, + ST_within(p.ct, convexhull.cg) as ctin + FROM + tin_polygons p, + tin_nodes n, + convexhull + WHERE p.id = n.id + ), + voro_nodes as ( + SELECT + CASE WHEN x.ctx = TRUE THEN + ST_Centroid(x.edge) + ELSE + x.ct + END as xct, + CASE WHEN y.id is null THEN + CASE WHEN x.ctin = TRUE THEN + ST_SetSRID(ST_MakePoint( + ST_X(x.ct) + ((ST_X(ST_Centroid(x.edge)) - ST_X(x.ct)) * (1+buffer)), + ST_Y(x.ct) + ((ST_Y(ST_Centroid(x.edge)) - ST_Y(x.ct)) * (1+buffer)) + ), ST_SRID(x.ct)) + END + ELSE + y.ct + END as yct + FROM + tin_edges x + LEFT OUTER JOIN + tin_edges y + ON x.id <> y.id AND ST_Equals(x.edge, y.edge) + ), + voro_edges as( + SELECT + ST_LineMerge(ST_Collect(ST_MakeLine(xct, yct))) as v + FROM + voro_nodes + ), + voro_cells as( + SELECT + ST_Polygonize( + ST_Node( + ST_LineMerge( + ST_Union(v, ST_ExteriorRing( + ST_Convexhull(v) + ) + ) + ) + ) + ) as g + FROM + voro_edges + ), + voro_set as( + SELECT + (st_dump(v.g)).geom as g + FROM voro_cells v + ), + clipped_voro as( + SELECT + ST_intersection(c.g, v.g) as g + FROM + voro_set v, + clipper c + WHERE + ST_GeometryType(v.g) = 'ST_Polygon' + ) + SELECT + st_collect( + ST_Transform( + ST_ConvexHull(g), + 4326 + ) + ) + INTO geomout + FROM + clipped_voro; + RETURN geomout; +END; +$$ language plpgsql IMMUTABLE; + +/** ---------------------------------------------------------------------------------------- + * @function : FindCircle + * @precis : Function that determines if three points form a circle. If so a table containing + * centre and radius is returned. If not, a null table is returned. + * @version : 1.0.1 + * @param : p_pt1 : First point in curve + * @param : p_pt2 : Second point in curve + * @param : p_pt3 : Third point in curve + * @return : geometry : In which X,Y ordinates are the centre X, Y and the Z being the radius of found circle + * or NULL if three points do not form a circle. + * @history : Simon Greener - Feb 2012 - Original coding. + * Rafa de la Torre - Aug 2016 - Small fix for type checking + * @copyright : Simon Greener @ 2012 + * Licensed under a Creative Commons Attribution-Share Alike 2.5 Australia License. (http://creativecommons.org/licenses/by-sa/2.5/au/) +**/ +CREATE OR REPLACE FUNCTION _Find_Circle( + IN p_pt1 geometry, + IN p_pt2 geometry, + IN p_pt3 geometry) + RETURNS geometry AS +$BODY$ +DECLARE + v_Centre geometry; + v_radius NUMERIC; + v_CX NUMERIC; + v_CY NUMERIC; + v_dA NUMERIC; + v_dB NUMERIC; + v_dC NUMERIC; + v_dD NUMERIC; + v_dE NUMERIC; + v_dF NUMERIC; + v_dG NUMERIC; +BEGIN + IF ( p_pt1 IS NULL OR p_pt2 IS NULL OR p_pt3 IS NULL ) THEN + RAISE EXCEPTION 'All supplied points must be not null.'; + RETURN NULL; + END IF; + IF ( ST_GeometryType(p_pt1) <> 'ST_Point' OR + ST_GeometryType(p_pt2) <> 'ST_Point' OR + ST_GeometryType(p_pt3) <> 'ST_Point' ) THEN + RAISE EXCEPTION 'All supplied geometries must be points.'; + RETURN NULL; + END IF; + v_dA := ST_X(p_pt2) - ST_X(p_pt1); + v_dB := ST_Y(p_pt2) - ST_Y(p_pt1); + v_dC := ST_X(p_pt3) - ST_X(p_pt1); + v_dD := ST_Y(p_pt3) - ST_Y(p_pt1); + v_dE := v_dA * (ST_X(p_pt1) + ST_X(p_pt2)) + v_dB * (ST_Y(p_pt1) + ST_Y(p_pt2)); + v_dF := v_dC * (ST_X(p_pt1) + ST_X(p_pt3)) + v_dD * (ST_Y(p_pt1) + ST_Y(p_pt3)); + v_dG := 2.0 * (v_dA * (ST_Y(p_pt3) - ST_Y(p_pt2)) - v_dB * (ST_X(p_pt3) - ST_X(p_pt2))); + -- If v_dG is zero then the three points are collinear and no finite-radius + -- circle through them exists. + IF ( v_dG = 0 ) THEN + RETURN NULL; + ELSE + v_CX := (v_dD * v_dE - v_dB * v_dF) / v_dG; + v_CY := (v_dA * v_dF - v_dC * v_dE) / v_dG; + v_Radius := SQRT(POWER(ST_X(p_pt1) - v_CX,2) + POWER(ST_Y(p_pt1) - v_CY,2) ); + END IF; + RETURN ST_SetSRID(ST_MakePoint(v_CX, v_CY, v_radius),ST_Srid(p_pt1)); +END; +$BODY$ + LANGUAGE plpgsql VOLATILE STRICT; + +-- Moran's I Global Measure (public-facing) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, significance NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspots( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspots( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliers( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Global Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran FLOAT, significance FLOAT) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + + +-- Moran's I Local Rate (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local_rate + # TODO: use named parameters or a dictionary + return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliersRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20) +RETURNS table (cartodb_id integer, cluster_no integer) as $$ + + from crankshaft.clustering import kmeans + return kmeans(query,no_clusters,no_init) + +$$ language plpythonu; + + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC) +RETURNS Numeric[] AS +$$ +DECLARE + newX NUMERIC; + newY NUMERIC; + newW NUMERIC; +BEGIN + IF weight IS NULL OR the_geom IS NULL THEN + newX = state[1]; + newY = state[2]; + newW = state[3]; + ELSE + newX = state[1] + ST_X(the_geom)*weight; + newY = state[2] + ST_Y(the_geom)*weight; + newW = state[3] + weight; + END IF; + RETURN Array[newX,newY,newW]; + +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[]) +RETURNS GEOMETRY AS +$$ +BEGIN + IF state[3] = 0 THEN + RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326); + ELSE + RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326); + END IF; +END +$$ LANGUAGE plpgsql; + +-- Create aggregate if it did not exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'cdb_crankshaft' + AND p.proname = 'cdb_weightedmean' + AND p.proisagg) + THEN + CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) ( + SFUNC = CDB_WeightedMeanS, + FINALFUNC = CDB_WeightedMeanF, + STYPE = Numeric[], + INITCOND = "{0.0,0.0,0.0}" + ); + END IF; +END +$$ LANGUAGE plpgsql; +-- Spatial Markov + +-- input table format: +-- id | geom | date_1 | date_2 | date_3 +-- 1 | Pt1 | 12.3 | 13.1 | 14.2 +-- 2 | Pt2 | 11.0 | 13.2 | 12.5 +-- ... +-- Sample Function call: +-- SELECT CDB_SpatialMarkov('SELECT * FROM real_estate', +-- Array['date_1', 'date_2', 'date_3']) + +CREATE OR REPLACE FUNCTION + CDB_SpatialMarkovTrend ( + subquery TEXT, + time_cols TEXT[], + num_classes INT DEFAULT 7, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT) +AS $$ + + from crankshaft.space_time_dynamics import spatial_markov_trend + + ## TODO: use named parameters or a dictionary + return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- input table format: identical to above but in a predictable format +-- Sample function call: +-- SELECT cdb_spatial_markov('SELECT * FROM real_estate', +-- 'date_1') + + +-- CREATE OR REPLACE FUNCTION +-- cdb_spatial_markov ( +-- subquery TEXT, +-- time_col_min text, +-- time_col_max text, +-- date_format text, -- '_YYYY_MM_DD' +-- num_time_per_bin INT DEFAULT 1, +-- permutations INT DEFAULT 99, +-- geom_column TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs int DEFAULT 5) +-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +-- AS $$ +-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') +-- from crankshaft.clustering import moran_local +-- # TODO: use named parameters or a dictionary +-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; +-- +-- -- input table format: +-- -- id | geom | date | measurement +-- -- 1 | Pt1 | 12/3 | 13.2 +-- -- 2 | Pt2 | 11/5 | 11.3 +-- -- 3 | Pt1 | 11/13 | 12.9 +-- -- 4 | Pt3 | 12/19 | 10.1 +-- -- ... +-- +-- CREATE OR REPLACE FUNCTION +-- cdb_spatial_markov ( +-- subquery TEXT, +-- time_col text, +-- num_time_per_bin INT DEFAULT 1, +-- permutations INT DEFAULT 99, +-- geom_column TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs int DEFAULT 5) +-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +-- AS $$ +-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') +-- from crankshaft.clustering import moran_local +-- # TODO: use named parameters or a dictionary +-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +CREATE OR REPLACE FUNCTION CDB_PIA( + IN polygon geometry, + IN tolerance numeric DEFAULT 1.0 + ) +RETURNS geometry AS $$ +DECLARE + env geometry[]; + cells geometry[]; + cell geometry; + best_c geometry; + best_d numeric; + test_d numeric; + test_mx numeric; + test_h numeric; + test_cells geometry[]; + width numeric; + height numeric; + h numeric; + i integer; + n integer; + sqr numeric; + p geometry; +BEGIN + sqr := |/2; + polygon := ST_Transform(polygon, 3857); + + -- grid #0 cell size + height := ST_YMax(polygon) - ST_YMin(polygon); + width := ST_XMax(polygon) - ST_XMin(polygon); + h := 0.5*LEAST(height, width); + + -- grid #0 + with c1 as( + SELECT cdb_crankshaft.CDB_RectangleGrid(polygon, h, h) as c + ) + SELECT array_agg(c) INTO cells FROM c1; + + -- 1st guess: centroid + best_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(Polygon)); + + -- looping the loop + n := array_length(cells,1); + i := 1; + LOOP + + EXIT WHEN i > n; + + cell := cells[i]; + i := i+1; + + -- cell side size, it's square + test_h := ST_XMax(cell) - ST_XMin(cell) ; + + -- check distance + test_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(cell)); + IF test_d > best_d THEN + best_d := test_d; + best_c := cells[i]; + END IF; + + -- longest distance within the cell + test_mx := test_d + (test_h/2 * sqr); + + -- if the cell has no chance to contains the desired point, continue + CONTINUE WHEN test_mx - best_d <= tolerance; + + -- resample the cell + with c1 as( + SELECT cdb_crankshaft.CDB_RectangleGrid(cell, test_h/2, test_h/2) as c + ) + SELECT array_agg(c) INTO test_cells FROM c1; + + -- concat the new cells to the former array + cells := cells || test_cells; + + -- prepare next iteration + n := array_length(cells,1); + + END LOOP; + + RETURN ST_transform(ST_Centroid(best_c), 4326); + +END; +$$ language plpgsql IMMUTABLE; + + +-- signed distance point to polygon with holes +-- negative is the point is out the polygon +CREATE OR REPLACE FUNCTION _Signed_Dist( + IN polygon geometry, + IN point geometry + ) +RETURNS numeric AS $$ +DECLARE + i integer; + within integer; + holes integer; + dist numeric; +BEGIN + dist := 1e999; + SELECT LEAST(dist, ST_distance(point, ST_ExteriorRing(polygon))::numeric) INTO dist; + SELECT CASE WHEN ST_Within(point,polygon) THEN 1 ELSE -1 END INTO within; + SELECT ST_NumInteriorRings(polygon) INTO holes; + IF holes > 0 THEN + FOR i IN 1..holes + LOOP + SELECT LEAST(dist, ST_distance(point, ST_InteriorRingN(polygon, i))::numeric) INTO dist; + END LOOP; + END IF; + dist := dist * within::numeric; + RETURN dist; +END; +$$ language plpgsql IMMUTABLE; +-- +-- Iterative densification of a set of points using Delaunay triangulation +-- the new points have as assigned value the average value of the 3 vertex (centroid) +-- +-- @param geomin - array of geometries (points) +-- +-- @param colin - array of numeric values in that points +-- +-- @param iterations - integer, number of iterations +-- +-- +-- Returns: TABLE(geomout geometry, colout numeric) +-- +-- +CREATE OR REPLACE FUNCTION CDB_Densify( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + geotemp geometry[]; + coltemp numeric[]; + i integer; + gs geometry[]; + g geometry; + vertex geometry[]; + va numeric; + vb numeric; + vc numeric; + center geometry; + centerval numeric; + tmp integer; +BEGIN + geotemp := geomin; + coltemp := colin; + FOR i IN 1..iterations + LOOP + -- generate TIN + WITH a as (SELECT unnest(geotemp) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + -- loop cells + FOREACH g IN ARRAY gs + LOOP + -- append centroid + SELECT ST_Centroid(g) INTO center; + geotemp := array_append(geotemp, center); + -- retrieve the value of each vertex + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + END LOOP; + RETURN QUERY SELECT unnest(geotemp ) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; +CREATE OR REPLACE FUNCTION CDB_TINmap( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + p geometry[]; + vals numeric[]; + gs geometry[]; + g geometry; + vertex geometry[]; + centerval numeric; + va numeric; + vb numeric; + vc numeric; + coltemp numeric[]; +BEGIN + SELECT array_agg(dens.geomout), array_agg(dens.colout) INTO p, vals FROM cdb_crankshaft.CDB_Densify(geomin, colin, iterations) dens; + WITH a as (SELECT unnest(p) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + FOREACH g IN ARRAY gs + LOOP + -- retrieve the vertex of each triangle + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + RETURN QUERY SELECT unnest(gs) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; +CREATE OR REPLACE FUNCTION CDB_Contour( + IN geomin geometry[], + IN colin numeric[], + IN buffer numeric, + IN intmethod integer, + IN classmethod integer, + IN steps integer, + IN max_time integer DEFAULT 60000 + ) +RETURNS TABLE( + the_geom geometry, + bin integer, + min_value numeric, + max_value numeric, + avg_value numeric +) AS $$ +DECLARE + cell_count integer; + tin geometry[]; +BEGIN + -- calc the cell size in web mercator units + -- WITH center as ( + -- SELECT ST_centroid(ST_Collect(geomin)) as c + -- ) + -- SELECT + -- round(resolution / cos(ST_y(c) * pi()/180)) + -- INTO cell + -- FROM center; + -- raise notice 'Resol: %', cell; + + -- calc the optimal number of cells for the current dataset + SELECT + CASE intmethod + WHEN 0 THEN round(3.7745903782 * max_time - 9.4399210051 * array_length(geomin,1) - 1350.8778213073) + WHEN 1 THEN round(2.2855592156 * max_time - 87.285217133 * array_length(geomin,1) + 17255.7085601797) + WHEN 2 THEN round(0.9799471999 * max_time - 127.0334085369 * array_length(geomin,1) + 22707.9579721218) + ELSE 10000 + END INTO cell_count; + + -- we don't have iterative barycentric interpolation in CDB_interpolation, + -- and it's a costy function, so let's make a custom one here till + -- we update the code + -- tin := ARRAY[]::geometry[]; + IF intmethod=1 THEN + WITH + a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b) + SELECT array_agg(v) INTO tin FROM c; + END IF; + -- Delaunay stuff performed just ONCE!! + + -- magic + RETURN QUERY + WITH + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ st_area(ST_ConvexHull(ST_Collect(geomin)))/PI() as r + ), + envelope as ( + SELECT + st_expand(a.g, a.r) as e + FROM convexhull a + ), + envelope3857 as( + SELECT + ST_Transform(e, 3857) as geom + FROM envelope + ), + resolution as( + SELECT + round(|/ ( + ST_area(geom) / cell_count + )) as cell + FROM envelope3857 + ), + grid as( + SELECT + ST_Transform(cdb_crankshaft.CDB_RectangleGrid(e.geom, r.cell, r.cell), 4326) as geom + FROM envelope3857 e, resolution r + ), + interp as( + SELECT + geom, + CASE + WHEN intmethod=1 THEN cdb_crankshaft._interp_in_tin(geomin, colin, tin, ST_Centroid(geom)) + ELSE cdb_crankshaft.CDB_SpatialInterpolation(geomin, colin, ST_Centroid(geom), intmethod) + END as val + FROM grid + ), + classes as( + SELECT CASE + WHEN classmethod = 0 THEN + cdb_crankshaft.CDB_EqualIntervalBins(array_agg(val), steps) + WHEN classmethod = 1 THEN + cdb_crankshaft.CDB_HeadsTailsBins(array_agg(val), steps) + WHEN classmethod = 2 THEN + cdb_crankshaft.CDB_JenksBins(array_agg(val), steps) + ELSE + cdb_crankshaft.CDB_QuantileBins(array_agg(val), steps) + END as b + FROM interp + where val is not null + ), + classified as( + SELECT + i.*, + width_bucket(i.val, c.b) as bucket + FROM interp i left join classes c + ON 1=1 + ), + classified2 as( + SELECT + geom, + val, + CASE + WHEN bucket = steps THEN bucket - 1 + ELSE bucket + END as b + FROM classified + ), + final as( + SELECT + st_union(geom) as the_geom, + b as bin, + min(val) as min_value, + max(val) as max_value, + avg(val) as avg_value + FROM classified2 + GROUP BY bin + ) + SELECT + * + FROM final + where final.bin is not null + ; +END; +$$ language plpgsql; + + + +-- ===================================================================== +-- Interp in grid, so we can use barycentric with a precalculated tin (NNI) +-- ===================================================================== +CREATE OR REPLACE FUNCTION _interp_in_tin( + IN geomin geometry[], + IN colin numeric[], + IN tin geometry[], + IN point geometry + ) +RETURNS numeric AS +$$ +DECLARE + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + -- get the cell the point is within + WITH + a as (SELECT unnest(tin) as v), + b as (SELECT v FROM a WHERE ST_Within(point, v)) + SELECT v INTO g FROM b; + + -- if we're out of the data realm, + -- return null + IF g is null THEN + RETURN null; + END IF; + + -- vertex of the selected cell + WITH a AS ( + SELECT (ST_DumpPoints(g)).geom AS v + ) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + -- calc the areas + SELECT + ST_area(g), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg,1); + RETURN output; +END; +$$ +language plpgsql; +-- Function by Stuart Lynn for a simple interpolation of a value +-- from a polygon table over an arbitrary polygon +-- (weighted by the area proportion overlapped) +-- Aereal weighting is a very simple form of aereal interpolation. +-- +-- Parameters: +-- * geom a Polygon geometry which defines the area where a value will be +-- estimated as the area-weighted sum of a given table/column +-- * target_table_name table name of the table that provides the values +-- * target_column column name of the column that provides the values +-- * schema_name optional parameter to defina the schema the target table +-- belongs to, which is necessary if its not in the search_path. +-- Note that target_table_name should never include the schema in it. +-- Return value: +-- Aereal-weighted interpolation of the column values over the geometry +CREATE OR REPLACE +FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL) + RETURNS numeric AS +$$ +DECLARE + result numeric; + qualified_name text; +BEGIN + IF schema_name IS NULL THEN + qualified_name := Format('%I', target_table_name); + ELSE + qualified_name := Format('%I.%s', schema_name, target_table_name); + END IF; + EXECUTE Format(' + SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom)) + FROM %s AS a + WHERE $1 && a.the_geom + ', target_column, qualified_name) + USING geom + INTO result; + RETURN result; +END; +$$ LANGUAGE plpgsql; +-- +-- Creates N points randomly distributed arround the polygon +-- +-- @param g - the geometry to be turned in to points +-- +-- @param no_points - the number of points to generate +-- +-- @params max_iter_per_point - the function generates points in the polygon's bounding box +-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many +-- misses per point the funciton accepts before giving up. +-- +-- Returns: Multipoint with the requested points +CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000) +RETURNS GEOMETRY AS $$ +DECLARE + extent GEOMETRY; + test_point Geometry; + width NUMERIC; + height NUMERIC; + x0 NUMERIC; + y0 NUMERIC; + xp NUMERIC; + yp NUMERIC; + no_left INTEGER; + remaining_iterations INTEGER; + points GEOMETRY[]; + bbox_line GEOMETRY; + intersection_line GEOMETRY; +BEGIN + extent := ST_Envelope(geom); + width := ST_XMax(extent) - ST_XMIN(extent); + height := ST_YMax(extent) - ST_YMIN(extent); + x0 := ST_XMin(extent); + y0 := ST_YMin(extent); + no_left := no_points; + + LOOP + if(no_left=0) THEN + EXIT; + END IF; + yp = y0 + height*random(); + bbox_line = ST_MakeLine( + ST_SetSRID(ST_MakePoint(yp, x0),4326), + ST_SetSRID(ST_MakePoint(yp, x0+width),4326) + ); + intersection_line = ST_Intersection(bbox_line,geom); + test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random()); + points := points || test_point; + no_left = no_left - 1 ; + END LOOP; + RETURN ST_Collect(points); +END; +$$ +LANGUAGE plpgsql VOLATILE; +-- Make sure by default there are no permissions for publicuser +-- NOTE: this happens at extension creation time, as part of an implicit transaction. +-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE; + +-- Grant permissions on the schema to publicuser (but just the schema) +GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser; + +-- Revoke execute permissions on all functions in the schema by default +-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser; +-- +-- Fill given extent with a rectangular coverage +-- +-- @param ext Extent to fill. Only rectangles with center point falling +-- inside the extent (or at the lower or leftmost edge) will +-- be emitted. The returned hexagons will have the same SRID +-- as this extent. +-- +-- @param width With of each rectangle +-- +-- @param height Height of each rectangle +-- +-- @param origin Optional origin to allow for exact tiling. +-- If omitted the origin will be 0,0. +-- The parameter is checked for having the same SRID +-- as the extent. +-- +-- +CREATE OR REPLACE FUNCTION CDB_RectangleGrid(ext GEOMETRY, width FLOAT8, height FLOAT8, origin GEOMETRY DEFAULT NULL) +RETURNS SETOF GEOMETRY +AS $$ +DECLARE + h GEOMETRY; -- rectangle cell + hstep FLOAT8; -- horizontal step + vstep FLOAT8; -- vertical step + hw FLOAT8; -- half width + hh FLOAT8; -- half height + vstart FLOAT8; + hstart FLOAT8; + hend FLOAT8; + vend FLOAT8; + xoff FLOAT8; + yoff FLOAT8; + xgrd FLOAT8; + ygrd FLOAT8; + x FLOAT8; + y FLOAT8; + srid INTEGER; +BEGIN + + srid := ST_SRID(ext); + + xoff := 0; + yoff := 0; + + IF origin IS NOT NULL THEN + IF ST_SRID(origin) != srid THEN + RAISE EXCEPTION 'SRID mismatch between extent (%) and origin (%)', srid, ST_SRID(origin); + END IF; + xoff := ST_X(origin); + yoff := ST_Y(origin); + END IF; + + --RAISE DEBUG 'X offset: %', xoff; + --RAISE DEBUG 'Y offset: %', yoff; + + hw := width/2.0; + hh := height/2.0; + + xgrd := hw; + ygrd := hh; + --RAISE DEBUG 'X grid size: %', xgrd; + --RAISE DEBUG 'Y grid size: %', ygrd; + + hstep := width; + vstep := height; + + -- Tweak horizontal start on hstep grid from origin + hstart := xoff + ceil((ST_XMin(ext)-xoff)/hstep)*hstep; + --RAISE DEBUG 'hstart: %', hstart; + + -- Tweak vertical start on vstep grid from origin + vstart := yoff + ceil((ST_Ymin(ext)-yoff)/vstep)*vstep; + --RAISE DEBUG 'vstart: %', vstart; + + hend := ST_XMax(ext); + vend := ST_YMax(ext); + + --RAISE DEBUG 'hend: %', hend; + --RAISE DEBUG 'vend: %', vend; + + x := hstart; + WHILE x < hend LOOP -- over X + y := vstart; + h := ST_MakeEnvelope(x-hw, y-hh, x+hw, y+hh, srid); + WHILE y < vend LOOP -- over Y + RETURN NEXT h; + h := ST_Translate(h, 0, vstep); + y := yoff + round(((y + vstep)-yoff)/ygrd)*ygrd; -- round to grid + END LOOP; + x := xoff + round(((x + hstep)-xoff)/xgrd)*xgrd; -- round to grid + END LOOP; + + RETURN; +END +$$ LANGUAGE 'plpgsql' IMMUTABLE; + +-- +-- Calculate the equal interval bins for a given column +-- +-- @param in_array A numeric array of numbers to determine the best +-- to determine the bin boundary +-- +-- @param breaks The number of bins you want to find. +-- +-- +-- Returns: upper edges of bins +-- +-- + +CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$ +DECLARE + diff numeric; + min_val numeric; + max_val numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL; + diff = (max_val - min_val) / breaks::numeric; + LOOP + IF i < breaks THEN + tmp_val = min_val + i::numeric * diff; + reply = array_append(reply, tmp_val); + i := i+1; + ELSE + reply = array_append(reply, max_val); + EXIT; + END IF; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Heads/Tails classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Heads/Tails method. +-- +-- @param breaks The number of bins you want to find. +-- +-- + +CREATE OR REPLACE FUNCTION CDB_HeadsTailsBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean numeric; + i INT := 2; + reply numeric[]; +BEGIN + -- get the total size of our row + element_count := array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + reply = Array[arr_mean]; + -- slice our bread + LOOP + IF i > breaks THEN EXIT; END IF; + SELECT avg(e) INTO arr_mean FROM ( SELECT unnest(in_array) e) x WHERE e > reply[i-1]; + IF arr_mean IS NOT NULL THEN + reply = array_append(reply, arr_mean); + END IF; + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Jenks classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Jenks method. +-- +-- @param breaks The number of bins you want to find. +-- +-- @param iterations The number of different starting positions to test. +-- +-- @param invert Optional wheter to return the top of each bin (default) +-- or the bottom. BOOLEAN, default=FALSE. +-- +-- + + +CREATE OR REPLACE FUNCTION CDB_JenksBins ( in_array NUMERIC[], breaks INT, iterations INT DEFAULT 5, invert BOOLEAN DEFAULT FALSE) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean NUMERIC; + bot INT; + top INT; + tops INT[]; + classes INT[][]; + i INT := 1; j INT := 1; + curr_result NUMERIC[]; + best_result NUMERIC[]; + seedtarget TEXT; + quant NUMERIC[]; + shuffles INT; +BEGIN + -- get the total size of our row + element_count := array_length(in_array, 1); --array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + shuffles := LEAST(GREATEST(floor(2500000.0/(element_count::float*iterations::float)), 1), 750)::int; + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + -- assume best is actually Quantile + SELECT cdb_crankshaft.CDB_QuantileBins(in_array, breaks) INTO quant; + + -- if data is very very large, just return quant and be done + IF element_count > 5000000 THEN + RETURN quant; + END IF; + + -- change quant into bottom, top markers + LOOP + IF i = 1 THEN + bot = 1; + ELSE + -- use last top to find this bot + bot = top+1; + END IF; + IF i = breaks THEN + top = element_count; + ELSE + SELECT count(*) INTO top FROM ( SELECT unnest(in_array) as v) x WHERE v <= quant[i]; + END IF; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + IF i > breaks THEN EXIT; END IF; + i = i+1; + END LOOP; + + best_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + --set the seed so we can ensure the same results + SELECT setseed(0.4567) INTO seedtarget; + --loop through random starting positions + LOOP + IF j > iterations-1 THEN EXIT; END IF; + i = 1; + tops = ARRAY[element_count]; + LOOP + IF i = breaks THEN EXIT; END IF; + SELECT array_agg(distinct e) INTO tops FROM (SELECT unnest(array_cat(tops, ARRAY[floor(random()*element_count::float)::int])) as e ORDER BY e) x WHERE e != 1; + i = array_length(tops, 1); + END LOOP; + i = 1; + LOOP + IF i > breaks THEN EXIT; END IF; + IF i = 1 THEN + bot = 1; + ELSE + bot = top+1; + END IF; + top = tops[i]; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + i := i+1; + END LOOP; + curr_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + IF curr_result[1] > best_result[1] THEN + best_result = curr_result; + j = j-1; -- if we found a better result, add one more search + END IF; + j = j+1; + END LOOP; + + RETURN (best_result)[2:array_upper(best_result, 1)]; +END; +$$ language plpgsql IMMUTABLE; + + + +-- +-- Perform a single iteration of the Jenks classification +-- + +CREATE OR REPLACE FUNCTION CDB_JenksBinsIteration ( in_array NUMERIC[], breaks INT, classes INT[][], invert BOOLEAN, element_count INT4, arr_mean NUMERIC, max_search INT DEFAULT 50) RETURNS NUMERIC[] as $$ +DECLARE + tmp_val numeric; + new_classes int[][]; + tmp_class int[]; + i INT := 1; + j INT := 1; + side INT := 2; + sdam numeric; + gvf numeric := 0.0; + new_gvf numeric; + arr_gvf numeric[]; + class_avg numeric; + class_max_i INT; + class_min_i INT; + class_max numeric; + class_min numeric; + reply numeric[]; +BEGIN + + -- Calculate the sum of squared deviations from the array mean (SDAM). + SELECT sum((arr_mean - e)^2) INTO sdam FROM ( SELECT unnest(in_array) as e ) x; + --Identify the breaks for the lowest GVF + LOOP + i = 1; + LOOP + -- get our mean + SELECT avg(e) INTO class_avg FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e) x; + -- find the deviation + SELECT sum((class_avg-e)^2) INTO tmp_val FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e ) x; + IF i = 1 THEN + arr_gvf = ARRAY[tmp_val]; + -- init our min/max map for later + class_max = arr_gvf[i]; + class_min = arr_gvf[i]; + class_min_i = 1; + class_max_i = 1; + ELSE + arr_gvf = array_append(arr_gvf, tmp_val); + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + -- calculate our new GVF + SELECT sdam-sum(e) INTO new_gvf FROM ( SELECT unnest(arr_gvf) as e ) x; + -- if no improvement was made, exit + IF new_gvf < gvf THEN EXIT; END IF; + gvf = new_gvf; + IF j > max_search THEN EXIT; END IF; + j = j+1; + i = 1; + LOOP + --establish directionality (uppward through classes or downward) + IF arr_gvf[i] < class_min THEN + class_min = arr_gvf[i]; + class_min_i = i; + END IF; + IF arr_gvf[i] > class_max THEN + class_max = arr_gvf[i]; + class_max_i = i; + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + IF class_max_i > class_min_i THEN + class_min_i = class_max_i - 1; + ELSE + class_min_i = class_max_i + 1; + END IF; + --Move from higher class to a lower gid order + IF class_max_i > class_min_i THEN + classes[class_max_i][1] = classes[class_max_i][1] + 1; + classes[class_min_i][2] = classes[class_min_i][2] + 1; + ELSE -- Move from lower class UP into a higher class by gid + classes[class_max_i][2] = classes[class_max_i][2] - 1; + classes[class_min_i][1] = classes[class_min_i][1] - 1; + END IF; + END LOOP; + + i = 1; + LOOP + IF invert = TRUE THEN + side = 1; --default returns bottom side of breaks, invert returns top side + END IF; + reply = array_append(reply, in_array[classes[i][side]]); + i = i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + + RETURN array_prepend(gvf, reply); + +END; +$$ language plpgsql IMMUTABLE; + + +-- +-- Determine the Quantile classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Quantile method. +-- +-- @param breaks The number of bins you want to find. +-- +-- +CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + break_size numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + -- sort our values + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x; + -- get the total size of our data + element_count := array_length(in_array, 1); + break_size := element_count::numeric / breaks; + -- slice our bread + LOOP + IF i < breaks THEN + IF break_size * i % 1 > 0 THEN + SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x; + ELSE + SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x; + END IF; + ELSIF i = breaks THEN + -- select the last value + SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x; + ELSE + EXIT; + END IF; + + reply = array_append(reply, tmp_val); + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; diff --git a/release/crankshaft.control b/release/crankshaft.control index bf7cf0f..7df077c 100644 --- a/release/crankshaft.control +++ b/release/crankshaft.control @@ -1,5 +1,5 @@ comment = 'CartoDB Spatial Analysis extension' -default_version = '0.3.1' +default_version = '0.4.0' requires = 'plpythonu, postgis' superuser = true schema = cdb_crankshaft diff --git a/release/python/0.4.0/crankshaft/crankshaft/__init__.py b/release/python/0.4.0/crankshaft/crankshaft/__init__.py new file mode 100644 index 0000000..4e06bc5 --- /dev/null +++ b/release/python/0.4.0/crankshaft/crankshaft/__init__.py @@ -0,0 +1,5 @@ +"""Import all modules""" +import crankshaft.random_seeds +import crankshaft.clustering +import crankshaft.space_time_dynamics +import crankshaft.segmentation diff --git a/release/python/0.4.0/crankshaft/crankshaft/clustering/__init__.py b/release/python/0.4.0/crankshaft/crankshaft/clustering/__init__.py new file mode 100644 index 0000000..ed34fe0 --- /dev/null +++ b/release/python/0.4.0/crankshaft/crankshaft/clustering/__init__.py @@ -0,0 +1,3 @@ +"""Import all functions from for clustering""" +from moran import * +from kmeans import * diff --git a/release/python/0.4.0/crankshaft/crankshaft/clustering/kmeans.py b/release/python/0.4.0/crankshaft/crankshaft/clustering/kmeans.py new file mode 100644 index 0000000..4134062 --- /dev/null +++ b/release/python/0.4.0/crankshaft/crankshaft/clustering/kmeans.py @@ -0,0 +1,18 @@ +from sklearn.cluster import KMeans +import plpy + +def kmeans(query, no_clusters, no_init=20): + data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids, + array_agg(ST_X(the_geom) order by cartodb_id) xs, + array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a + where the_geom is not null + '''.format(query=query)) + + xs = data[0]['xs'] + ys = data[0]['ys'] + ids = data[0]['ids'] + + km = KMeans(n_clusters= no_clusters, n_init=no_init) + labels = km.fit_predict(zip(xs,ys)) + return zip(ids,labels) + diff --git a/release/python/0.4.0/crankshaft/crankshaft/clustering/moran.py b/release/python/0.4.0/crankshaft/crankshaft/clustering/moran.py new file mode 100644 index 0000000..3282f5f --- /dev/null +++ b/release/python/0.4.0/crankshaft/crankshaft/clustering/moran.py @@ -0,0 +1,262 @@ +""" +Moran's I geostatistics (global clustering & outliers presence) +""" + +# TODO: Fill in local neighbors which have null/NoneType values with the +# average of the their neighborhood + +import pysal as ps +import plpy +from collections import OrderedDict + +# crankshaft module +import crankshaft.pysal_utils as pu + +# High level interface --------------------------------------- + +def moran(subquery, attr_name, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I (global) + Implementation building neighbors with a PostGIS database and Moran's I + core clusters with PySAL. + Andy Eschbacher + """ + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr_name), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) + + plpy.notice('** Query: %s' % query) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(2) + plpy.notice('** Query returned with %d rows' % len(result)) + except plpy.SPIError: + plpy.error('Error: areas of interest query failed, check input parameters') + plpy.notice('** Query failed: "%s"' % query) + plpy.notice('** Error: %s' % plpy.SPIError) + return pu.empty_zipped_array(2) + + ## collect attributes + attr_vals = pu.get_attributes(result) + + ## calculate weights + weight = pu.get_weight(result, w_type, num_ngbrs) + + ## calculate moran global + moran_global = ps.esda.moran.Moran(attr_vals, weight, + permutations=permutations) + + return zip([moran_global.I], [moran_global.EI]) + +def moran_local(subquery, attr, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I implementation for PL/Python + Andy Eschbacher + """ + + # geometries with attributes that are null are ignored + # resulting in a collection of not as near neighbors + + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(5) + except plpy.SPIError: + plpy.error('Error: areas of interest query failed, check input parameters') + plpy.notice('** Query failed: "%s"' % query) + return pu.empty_zipped_array(5) + + attr_vals = pu.get_attributes(result) + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local(attr_vals, weight, + permutations=permutations) + + # find quadrants for each geometry + quads = quad_position(lisa.q) + + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + +def moran_rate(subquery, numerator, denominator, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I Rate (global) + Andy Eschbacher + """ + qvals = OrderedDict([("id_col", id_col), + ("attr1", numerator), + ("attr2", denominator) + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) + + plpy.notice('** Query: %s' % query) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(2) + plpy.notice('** Query returned with %d rows' % len(result)) + except plpy.SPIError: + plpy.error('Error: areas of interest query failed, check input parameters') + plpy.notice('** Query failed: "%s"' % query) + plpy.notice('** Error: %s' % plpy.SPIError) + return pu.empty_zipped_array(2) + + ## collect attributes + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) + + weight = pu.get_weight(result, w_type, num_ngbrs) + + ## calculate moran global rate + lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight, + permutations=permutations) + + return zip([lisa_rate.I], [lisa_rate.EI]) + +def moran_local_rate(subquery, numerator, denominator, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I Local Rate + Andy Eschbacher + """ + # geometries with values that are null are ignored + # resulting in a collection of not as near neighbors + + qvals = OrderedDict([("id_col", id_col), + ("numerator", numerator), + ("denominator", denominator), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(5) + except plpy.SPIError: + plpy.error('Error: areas of interest query failed, check input parameters') + plpy.notice('** Query failed: "%s"' % query) + plpy.notice('** Error: %s' % plpy.SPIError) + return pu.empty_zipped_array(5) + + ## collect attributes + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) + + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight, + permutations=permutations) + + # find quadrants for each geometry + quads = quad_position(lisa.q) + + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + +def moran_local_bv(subquery, attr1, attr2, + permutations, geom_col, id_col, w_type, num_ngbrs): + """ + Moran's I (local) Bivariate (untested) + """ + plpy.notice('** Constructing query') + + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr1), + ("attr2", attr2), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(4) + except plpy.SPIError: + plpy.error("Error: areas of interest query failed, " \ + "check input parameters") + plpy.notice('** Query failed: "%s"' % query) + return pu.empty_zipped_array(4) + + ## collect attributes + attr1_vals = pu.get_attributes(result, 1) + attr2_vals = pu.get_attributes(result, 2) + + # create weights + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight, + permutations=permutations) + + plpy.notice("len of Is: %d" % len(lisa.Is)) + + # find clustering of significance + lisa_sig = quad_position(lisa.q) + + plpy.notice('** Finished calculations') + + return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order) + +# Low level functions ---------------------------------------- + +def map_quads(coord): + """ + Map a quadrant number to Moran's I designation + HH=1, LH=2, LL=3, HL=4 + Input: + @param coord (int): quadrant of a specific measurement + Output: + classification (one of 'HH', 'LH', 'LL', or 'HL') + """ + if coord == 1: + return 'HH' + elif coord == 2: + return 'LH' + elif coord == 3: + return 'LL' + elif coord == 4: + return 'HL' + else: + return None + +def quad_position(quads): + """ + Produce Moran's I classification based of n + Input: + @param quads ndarray: an array of quads classified by + 1-4 (PySAL default) + Output: + @param list: an array of quads classied by 'HH', 'LL', etc. + """ + return [map_quads(q) for q in quads] diff --git a/release/python/0.4.0/crankshaft/crankshaft/pysal_utils/__init__.py b/release/python/0.4.0/crankshaft/crankshaft/pysal_utils/__init__.py new file mode 100644 index 0000000..fdf073b --- /dev/null +++ b/release/python/0.4.0/crankshaft/crankshaft/pysal_utils/__init__.py @@ -0,0 +1,2 @@ +"""Import all functions for pysal_utils""" +from crankshaft.pysal_utils.pysal_utils import * diff --git a/release/python/0.4.0/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/release/python/0.4.0/crankshaft/crankshaft/pysal_utils/pysal_utils.py new file mode 100644 index 0000000..4622925 --- /dev/null +++ b/release/python/0.4.0/crankshaft/crankshaft/pysal_utils/pysal_utils.py @@ -0,0 +1,188 @@ +""" + Utilities module for generic PySAL functionality, mainly centered on + translating queries into numpy arrays or PySAL weights objects +""" + +import numpy as np +import pysal as ps + +def construct_neighbor_query(w_type, query_vals): + """Return query (a string) used for finding neighbors + @param w_type text: type of neighbors to calculate ('knn' or 'queen') + @param query_vals dict: values used to construct the query + """ + + if w_type.lower() == 'knn': + return knn(query_vals) + else: + return queen(query_vals) + +## Build weight object +def get_weight(query_res, w_type='knn', num_ngbrs=5): + """ + Construct PySAL weight from return value of query + @param query_res dict-like: query results with attributes and neighbors + """ + # if w_type.lower() == 'knn': + # row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs + # weights = {x['id']: row_normed_weights for x in query_res} + # else: + # weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors']) + # if len(x['neighbors']) > 0 + # else [] for x in query_res} + + neighbors = {x['id']: x['neighbors'] for x in query_res} + print 'len of neighbors: %d' % len(neighbors) + + built_weight = ps.W(neighbors) + built_weight.transform = 'r' + + return built_weight + +def query_attr_select(params): + """ + Create portion of SELECT statement for attributes inolved in query. + @param params: dict of information used in query (column names, + table name, etc.) + """ + + attr_string = "" + template = "i.\"%(col)s\"::numeric As attr%(alias_num)s, " + + if 'time_cols' in params: + ## if markov analysis + attrs = params['time_cols'] + + for idx, val in enumerate(attrs): + attr_string += template % {"col": val, "alias_num": idx + 1} + else: + ## if moran's analysis + attrs = [k for k in params + if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs', 'subquery')] + + for idx, val in enumerate(sorted(attrs)): + attr_string += template % {"col": params[val], "alias_num": idx + 1} + + return attr_string + +def query_attr_where(params): + """ + Construct where conditions when building neighbors query + Create portion of WHERE clauses for weeding out NULL-valued geometries + Input: dict of params: + {'subquery': ..., + 'numerator': 'data1', + 'denominator': 'data2', + '': ...} + Output: 'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL' + Input: + {'subquery': ..., + 'time_cols': ['time1', 'time2', 'time3'], + 'etc': ...} + Output: 'idx_replace."time1" IS NOT NULL AND idx_replace."time2" IS NOT + NULL AND idx_replace."time3" IS NOT NULL' + """ + attr_string = [] + template = "idx_replace.\"%s\" IS NOT NULL" + + if 'time_cols' in params: + ## markov where clauses + attrs = params['time_cols'] + # add values to template + for attr in attrs: + attr_string.append(template % attr) + else: + ## moran where clauses + + # get keys + attrs = sorted([k for k in params + if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs', 'subquery')]) + # add values to template + for attr in attrs: + attr_string.append(template % params[attr]) + + if len(attrs) == 2: + attr_string.append("idx_replace.\"%s\" <> 0" % params[attrs[1]]) + + out = " AND ".join(attr_string) + + return out + +def knn(params): + """SQL query for k-nearest neighbors. + @param vars: dict of values to fill template + """ + + attr_select = query_attr_select(params) + attr_where = query_attr_where(params) + + replacements = {"attr_select": attr_select, + "attr_where_i": attr_where.replace("idx_replace", "i"), + "attr_where_j": attr_where.replace("idx_replace", "j")} + + query = "SELECT " \ + "i.\"{id_col}\" As id, " \ + "%(attr_select)s" \ + "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ + "FROM ({subquery}) As j " \ + "WHERE " \ + "i.\"{id_col}\" <> j.\"{id_col}\" AND " \ + "%(attr_where_j)s " \ + "ORDER BY " \ + "j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \ + "LIMIT {num_ngbrs})" \ + ") As neighbors " \ + "FROM ({subquery}) As i " \ + "WHERE " \ + "%(attr_where_i)s " \ + "ORDER BY i.\"{id_col}\" ASC;" % replacements + + return query.format(**params) + +## SQL query for finding queens neighbors (all contiguous polygons) +def queen(params): + """SQL query for queen neighbors. + @param params dict: information to fill query + """ + attr_select = query_attr_select(params) + attr_where = query_attr_where(params) + + replacements = {"attr_select": attr_select, + "attr_where_i": attr_where.replace("idx_replace", "i"), + "attr_where_j": attr_where.replace("idx_replace", "j")} + + query = "SELECT " \ + "i.\"{id_col}\" As id, " \ + "%(attr_select)s" \ + "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ + "FROM ({subquery}) As j " \ + "WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \ + "ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \ + "%(attr_where_j)s)" \ + ") As neighbors " \ + "FROM ({subquery}) As i " \ + "WHERE " \ + "%(attr_where_i)s " \ + "ORDER BY i.\"{id_col}\" ASC;" % replacements + + return query.format(**params) + +## to add more weight methods open a ticket or pull request + +def get_attributes(query_res, attr_num=1): + """ + @param query_res: query results with attributes and neighbors + @param attr_num: attribute number (1, 2, ...) + """ + return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float) + +def empty_zipped_array(num_nones): + """ + prepare return values for cases of empty weights objects (no neighbors) + Input: + @param num_nones int: number of columns (e.g., 4) + Output: + [(None, None, None, None)] + """ + + return [tuple([None] * num_nones)] diff --git a/release/python/0.4.0/crankshaft/crankshaft/random_seeds.py b/release/python/0.4.0/crankshaft/crankshaft/random_seeds.py new file mode 100644 index 0000000..31958cb --- /dev/null +++ b/release/python/0.4.0/crankshaft/crankshaft/random_seeds.py @@ -0,0 +1,11 @@ +"""Random seed generator used for non-deterministic functions in crankshaft""" +import random +import numpy + +def set_random_seeds(value): + """ + Set the seeds of the RNGs (Random Number Generators) + used internally. + """ + random.seed(value) + numpy.random.seed(value) diff --git a/release/python/0.4.0/crankshaft/crankshaft/segmentation/__init__.py b/release/python/0.4.0/crankshaft/crankshaft/segmentation/__init__.py new file mode 100644 index 0000000..b825e85 --- /dev/null +++ b/release/python/0.4.0/crankshaft/crankshaft/segmentation/__init__.py @@ -0,0 +1 @@ +from segmentation import * diff --git a/release/python/0.4.0/crankshaft/crankshaft/segmentation/segmentation.py b/release/python/0.4.0/crankshaft/crankshaft/segmentation/segmentation.py new file mode 100644 index 0000000..ed61139 --- /dev/null +++ b/release/python/0.4.0/crankshaft/crankshaft/segmentation/segmentation.py @@ -0,0 +1,176 @@ +""" +Segmentation creation and prediction +""" + +import sklearn +import numpy as np +import plpy +from sklearn.ensemble import GradientBoostingRegressor +from sklearn import metrics +from sklearn.cross_validation import train_test_split + +# Lower level functions +#---------------------- + +def replace_nan_with_mean(array): + """ + Input: + @param array: an array of floats which may have null-valued entries + Output: + array with nans filled in with the mean of the dataset + """ + # returns an array of rows and column indices + indices = np.where(np.isnan(array)) + + # iterate through entries which have nan values + for row, col in zip(*indices): + array[row, col] = np.mean(array[~np.isnan(array[:, col]), col]) + + return array + +def get_data(variable, feature_columns, query): + """ + Fetch data from the database, clean, and package into + numpy arrays + Input: + @param variable: name of the target variable + @param feature_columns: list of column names + @param query: subquery that data is pulled from for the packaging + Output: + prepared data, packaged into NumPy arrays + """ + + columns = ','.join(['array_agg("{col}") As "{col}"'.format(col=col) for col in feature_columns]) + + try: + data = plpy.execute('''SELECT array_agg("{variable}") As target, {columns} FROM ({query}) As a'''.format( + variable=variable, + columns=columns, + query=query)) + except Exception, e: + plpy.error('Failed to access data to build segmentation model: %s' % e) + + # extract target data from plpy object + target = np.array(data[0]['target']) + + # put n feature data arrays into an n x m array of arrays + features = np.column_stack([np.array(data[0][col], dtype=float) for col in feature_columns]) + + return replace_nan_with_mean(target), replace_nan_with_mean(features) + +# High level interface +# -------------------- + +def create_and_predict_segment_agg(target, features, target_features, target_ids, model_parameters): + """ + Version of create_and_predict_segment that works on arrays that come stright form the SQL calling + the function. + + Input: + @param target: The 1D array of lenth NSamples containing the target variable we want the model to predict + @param features: Thw 2D array of size NSamples * NFeatures that form the imput to the model + @param target_ids: A 1D array of target_ids that will be used to associate the results of the prediction with the rows which they come from + @param model_parameters: A dictionary containing parameters for the model. + """ + + clean_target = replace_nan_with_mean(target) + clean_features = replace_nan_with_mean(features) + target_features = replace_nan_with_mean(target_features) + + model, accuracy = train_model(clean_target, clean_features, model_parameters, 0.2) + prediction = model.predict(target_features) + accuracy_array = [accuracy]*prediction.shape[0] + return zip(target_ids, prediction, np.full(prediction.shape, accuracy_array)) + + + +def create_and_predict_segment(query, variable, target_query, model_params): + """ + generate a segment with machine learning + Stuart Lynn + """ + + ## fetch column names + try: + columns = plpy.execute('SELECT * FROM ({query}) As a LIMIT 1 '.format(query=query))[0].keys() + except Exception, e: + plpy.error('Failed to build segmentation model: %s' % e) + + ## extract column names to be used in building the segmentation model + feature_columns = set(columns) - set([variable, 'cartodb_id', 'the_geom', 'the_geom_webmercator']) + ## get data from database + target, features = get_data(variable, feature_columns, query) + + model, accuracy = train_model(target, features, model_params, 0.2) + cartodb_ids, result = predict_segment(model, feature_columns, target_query) + accuracy_array = [accuracy]*result.shape[0] + return zip(cartodb_ids, result, accuracy_array) + + +def train_model(target, features, model_params, test_split): + """ + Train the Gradient Boosting model on the provided data and calculate the accuracy of the model + Input: + @param target: 1D Array of the variable that the model is to be trianed to predict + @param features: 2D Array NSamples * NFeatures to use in trining the model + @param model_params: A dictionary of model parameters, the full specification can be found on the + scikit learn page for [GradientBoostingRegressor](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html) + @parma test_split: The fraction of the data to be withheld for testing the model / calculating the accuray + """ + features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=test_split) + model = GradientBoostingRegressor(**model_params) + model.fit(features_train, target_train) + accuracy = calculate_model_accuracy(model, features, target) + return model, accuracy + +def calculate_model_accuracy(model, features, target): + """ + Calculate the mean squared error of the model prediction + Input: + @param model: model trained from input features + @param features: features to make a prediction from + @param target: target to compare prediction to + Output: + mean squared error of the model prection compared to the target + """ + prediction = model.predict(features) + return metrics.mean_squared_error(prediction, target) + +def predict_segment(model, features, target_query): + """ + Use the provided model to predict the values for the new feature set + Input: + @param model: The pretrained model + @features: A list of features to use in the model prediction (list of column names) + @target_query: The query to run to obtain the data to predict on and the cartdb_ids associated with it. + """ + + batch_size = 1000 + joined_features = ','.join(['"{0}"::numeric'.format(a) for a in features]) + + try: + cursor = plpy.cursor('SELECT Array[{joined_features}] As features FROM ({target_query}) As a'.format( + joined_features=joined_features, + target_query=target_query)) + except Exception, e: + plpy.error('Failed to build segmentation model: %s' % e) + + results = [] + + while True: + rows = cursor.fetch(batch_size) + if not rows: + break + batch = np.row_stack([np.array(row['features'], dtype=float) for row in rows]) + + #Need to fix this. Should be global mean. This will cause weird effects + batch = replace_nan_with_mean(batch) + prediction = model.predict(batch) + results.append(prediction) + + try: + cartodb_ids = plpy.execute('''SELECT array_agg(cartodb_id ORDER BY cartodb_id) As cartodb_ids FROM ({0}) As a'''.format(target_query))[0]['cartodb_ids'] + except Exception, e: + plpy.error('Failed to build segmentation model: %s' % e) + + return cartodb_ids, np.concatenate(results) diff --git a/release/python/0.4.0/crankshaft/crankshaft/space_time_dynamics/__init__.py b/release/python/0.4.0/crankshaft/crankshaft/space_time_dynamics/__init__.py new file mode 100644 index 0000000..a439286 --- /dev/null +++ b/release/python/0.4.0/crankshaft/crankshaft/space_time_dynamics/__init__.py @@ -0,0 +1,2 @@ +"""Import all functions from clustering libraries.""" +from markov import * diff --git a/release/python/0.4.0/crankshaft/crankshaft/space_time_dynamics/markov.py b/release/python/0.4.0/crankshaft/crankshaft/space_time_dynamics/markov.py new file mode 100644 index 0000000..bbf524d --- /dev/null +++ b/release/python/0.4.0/crankshaft/crankshaft/space_time_dynamics/markov.py @@ -0,0 +1,189 @@ +""" +Spatial dynamics measurements using Spatial Markov +""" + + +import numpy as np +import pysal as ps +import plpy +import crankshaft.pysal_utils as pu + +def spatial_markov_trend(subquery, time_cols, num_classes=7, + w_type='knn', num_ngbrs=5, permutations=0, + geom_col='the_geom', id_col='cartodb_id'): + """ + Predict the trends of a unit based on: + 1. history of its transitions to different classes (e.g., 1st quantile -> 2nd quantile) + 2. average class of its neighbors + + Inputs: + @param subquery string: e.g., SELECT the_geom, cartodb_id, + interesting_time_column FROM table_name + @param time_cols list of strings: list of strings of column names + @param num_classes (optional): number of classes to break distribution + of values into. Currently uses quantile bins. + @param w_type string (optional): weight type ('knn' or 'queen') + @param num_ngbrs int (optional): number of neighbors (if knn type) + @param permutations int (optional): number of permutations for test + stats + @param geom_col string (optional): name of column which contains the + geometries + @param id_col string (optional): name of column which has the ids of + the table + + Outputs: + @param trend_up float: probablity that a geom will move to a higher + class + @param trend_down float: probablity that a geom will move to a lower + class + @param trend float: (trend_up - trend_down) / trend_static + @param volatility float: a measure of the volatility based on + probability stddev(prob array) + """ + + if len(time_cols) < 2: + plpy.error('More than one time column needs to be passed') + + qvals = {"id_col": id_col, + "time_cols": time_cols, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs} + + try: + query_result = plpy.execute( + pu.construct_neighbor_query(w_type, qvals) + ) + if len(query_result) == 0: + return zip([None], [None], [None], [None], [None]) + except plpy.SPIError, err: + plpy.debug('Query failed with exception %s: %s' % (err, pu.construct_neighbor_query(w_type, qvals))) + plpy.error('Query failed, check the input parameters') + return zip([None], [None], [None], [None], [None]) + + ## build weight + weights = pu.get_weight(query_result, w_type) + weights.transform = 'r' + + ## prep time data + t_data = get_time_data(query_result, time_cols) + + plpy.debug('shape of t_data %d, %d' % t_data.shape) + plpy.debug('number of weight objects: %d, %d' % (weights.sparse).shape) + plpy.debug('first num elements: %f' % t_data[0, 0]) + + sp_markov_result = ps.Spatial_Markov(t_data, + weights, + k=num_classes, + fixed=False, + permutations=permutations) + + ## get lag classes + lag_classes = ps.Quantiles( + ps.lag_spatial(weights, t_data[:, -1]), + k=num_classes).yb + + ## look up probablity distribution for each unit according to class and lag class + prob_dist = get_prob_dist(sp_markov_result.P, + lag_classes, + sp_markov_result.classes[:, -1]) + + ## find the ups and down and overall distribution of each cell + trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, + sp_markov_result.classes[:, -1]) + + ## output the results + return zip(trend, trend_up, trend_down, volatility, weights.id_order) + +def get_time_data(markov_data, time_cols): + """ + Extract the time columns and bin appropriately + """ + num_attrs = len(time_cols) + return np.array([[x['attr' + str(i)] for x in markov_data] + for i in range(1, num_attrs+1)], dtype=float).transpose() + +## not currently used +def rebin_data(time_data, num_time_per_bin): + """ + Convert an n x l matrix into an (n/m) x l matrix where the values are + reduced (averaged) for the intervening states: + 1 2 3 4 1.5 3.5 + 5 6 7 8 -> 5.5 7.5 + 9 8 7 6 8.5 6.5 + 5 4 3 2 4.5 2.5 + + if m = 2, the 4 x 4 matrix is transformed to a 2 x 4 matrix. + + This process effectively resamples the data at a longer time span n + units longer than the input data. + For cases when there is a remainder (remainder(5/3) = 2), the remaining + two columns are binned together as the last time period, while the + first three are binned together for the first period. + + Input: + @param time_data n x l ndarray: measurements of an attribute at + different time intervals + @param num_time_per_bin int: number of columns to average into a new + column + Output: + ceil(n / m) x l ndarray of resampled time series + """ + + if time_data.shape[1] % num_time_per_bin == 0: + ## if fit is perfect, then use it + n_max = time_data.shape[1] / num_time_per_bin + else: + ## fit remainders into an additional column + n_max = time_data.shape[1] / num_time_per_bin + 1 + + return np.array([time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1) + for i in range(n_max)]).T + +def get_prob_dist(transition_matrix, lag_indices, unit_indices): + """ + Given an array of transition matrices, look up the probability + associated with the arrangements passed + + Input: + @param transition_matrix ndarray[k,k,k]: + @param lag_indices ndarray: + @param unit_indices ndarray: + + Output: + Array of probability distributions + """ + + return np.array([transition_matrix[(lag_indices[i], unit_indices[i])] + for i in range(len(lag_indices))]) + +def get_prob_stats(prob_dist, unit_indices): + """ + get the statistics of the probability distributions + + Outputs: + @param trend_up ndarray(float): sum of probabilities for upward + movement (relative to the unit index of that prob) + @param trend_down ndarray(float): sum of probabilities for downward + movement (relative to the unit index of that prob) + @param trend ndarray(float): difference of upward and downward + movements + """ + + num_elements = len(unit_indices) + trend_up = np.empty(num_elements, dtype=float) + trend_down = np.empty(num_elements, dtype=float) + trend = np.empty(num_elements, dtype=float) + + for i in range(num_elements): + trend_up[i] = prob_dist[i, (unit_indices[i]+1):].sum() + trend_down[i] = prob_dist[i, :unit_indices[i]].sum() + if prob_dist[i, unit_indices[i]] > 0.0: + trend[i] = (trend_up[i] - trend_down[i]) / prob_dist[i, unit_indices[i]] + else: + trend[i] = None + + ## calculate volatility of distribution + volatility = prob_dist.std(axis=1) + + return trend_up, trend_down, trend, volatility diff --git a/release/python/0.4.0/crankshaft/setup.py b/release/python/0.4.0/crankshaft/setup.py new file mode 100644 index 0000000..cd8ad99 --- /dev/null +++ b/release/python/0.4.0/crankshaft/setup.py @@ -0,0 +1,49 @@ + +""" +CartoDB Spatial Analysis Python Library +See: +https://github.com/CartoDB/crankshaft +""" + +from setuptools import setup, find_packages + +setup( + name='crankshaft', + + version='0.0.0', + + description='CartoDB Spatial Analysis Python Library', + + url='https://github.com/CartoDB/crankshaft', + + author='Data Services Team - CartoDB', + author_email='dataservices@cartodb.com', + + license='MIT', + + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Mapping comunity', + 'Topic :: Maps :: Mapping Tools', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 2.7', + ], + + keywords='maps mapping tools spatial analysis geostatistics', + + packages=find_packages(exclude=['contrib', 'docs', 'tests']), + + extras_require={ + 'dev': ['unittest'], + 'test': ['unittest', 'nose', 'mock'], + }, + + # The choice of component versions is dictated by what's + # provisioned in the production servers. + # IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation. + install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'], + + requires=['pysal', 'numpy', 'sklearn'], + + test_suite='test' +) diff --git a/release/python/0.4.0/crankshaft/setup.py-r b/release/python/0.4.0/crankshaft/setup.py-r new file mode 100644 index 0000000..cd8ad99 --- /dev/null +++ b/release/python/0.4.0/crankshaft/setup.py-r @@ -0,0 +1,49 @@ + +""" +CartoDB Spatial Analysis Python Library +See: +https://github.com/CartoDB/crankshaft +""" + +from setuptools import setup, find_packages + +setup( + name='crankshaft', + + version='0.0.0', + + description='CartoDB Spatial Analysis Python Library', + + url='https://github.com/CartoDB/crankshaft', + + author='Data Services Team - CartoDB', + author_email='dataservices@cartodb.com', + + license='MIT', + + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Mapping comunity', + 'Topic :: Maps :: Mapping Tools', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 2.7', + ], + + keywords='maps mapping tools spatial analysis geostatistics', + + packages=find_packages(exclude=['contrib', 'docs', 'tests']), + + extras_require={ + 'dev': ['unittest'], + 'test': ['unittest', 'nose', 'mock'], + }, + + # The choice of component versions is dictated by what's + # provisioned in the production servers. + # IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation. + install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'], + + requires=['pysal', 'numpy', 'sklearn'], + + test_suite='test' +) diff --git a/release/python/0.4.0/crankshaft/test/fixtures/kmeans.json b/release/python/0.4.0/crankshaft/test/fixtures/kmeans.json new file mode 100644 index 0000000..8f31c79 --- /dev/null +++ b/release/python/0.4.0/crankshaft/test/fixtures/kmeans.json @@ -0,0 +1 @@ +[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}] \ No newline at end of file diff --git a/release/python/0.4.0/crankshaft/test/fixtures/markov.json b/release/python/0.4.0/crankshaft/test/fixtures/markov.json new file mode 100644 index 0000000..d60e4e0 --- /dev/null +++ b/release/python/0.4.0/crankshaft/test/fixtures/markov.json @@ -0,0 +1 @@ +[[0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 0], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 1], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 2], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 3], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 4], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 5], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 6], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 7], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 8], [0.19047619047619049, 0.16, 0.0, 0.32594478059941379, 9], [-0.23529411764705882, 0.0, 0.19047619047619047, 0.31356338348865387, 10], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 11], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 12], [0.027777777777777783, 0.11111111111111112, 0.088888888888888892, 0.30339641183779581, 13], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 14], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 15], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 16], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 17], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 18], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 19], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 20], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 21], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 22], [-0.16666666666666663, 0.18181818181818182, 0.27272727272727271, 0.20246415864836445, 23], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 24], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 25], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 26], [-0.043478260869565216, 0.0, 0.041666666666666664, 0.37950991789118999, 27], [0.22222222222222221, 0.18181818181818182, 0.0, 0.31701083225750354, 28], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 29], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 30], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 31], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 32], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 33], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 34], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 35], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 36], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 37], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 38], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 39], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 40], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 41], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 42], [0.0, 0.0, 0.0, 0.40000000000000002, 43], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 44], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 45], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 46], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 47]] diff --git a/release/python/0.4.0/crankshaft/test/fixtures/moran.json b/release/python/0.4.0/crankshaft/test/fixtures/moran.json new file mode 100644 index 0000000..2f75cf1 --- /dev/null +++ b/release/python/0.4.0/crankshaft/test/fixtures/moran.json @@ -0,0 +1,52 @@ +[[0.9319096128346788, "HH"], +[-1.135787401862846, "HL"], +[0.11732030672508517, "LL"], +[0.6152779669180425, "LL"], +[-0.14657336660125297, "LH"], +[0.6967858120189607, "LL"], +[0.07949310115714454, "HH"], +[0.4703198759258987, "HH"], +[0.4421125200498064, "HH"], +[0.5724288737143592, "LL"], +[0.8970743435692062, "LL"], +[0.18327334401918674, "LL"], +[-0.01466729201304962, "HL"], +[0.3481559372544409, "LL"], +[0.06547094736902978, "LL"], +[0.15482141569329988, "HH"], +[0.4373841193538136, "HH"], +[0.15971286468915544, "LL"], +[1.0543588860308968, "HH"], +[1.7372866900020818, "HH"], +[1.091998586053999, "LL"], +[0.1171572584252222, "HH"], +[0.08438455015300014, "LL"], +[0.06547094736902978, "LL"], +[0.15482141569329985, "HH"], +[1.1627044812890683, "HH"], +[0.06547094736902978, "LL"], +[0.795275137550483, "HH"], +[0.18562939195219, "LL"], +[0.3010757406693439, "LL"], +[2.8205795942839376, "HH"], +[0.11259190602909264, "LL"], +[-0.07116352791516614, "HL"], +[-0.09945240794119009, "LH"], +[0.18562939195219, "LL"], +[0.1832733440191868, "LL"], +[-0.39054253768447705, "HL"], +[-0.1672071289487642, "HL"], +[0.3337669247916343, "HH"], +[0.2584386102554792, "HH"], +[-0.19733845476322634, "HL"], +[-0.9379282899805409, "LH"], +[-0.028770969951095866, "LH"], +[0.051367269430983485, "LL"], +[-0.2172548045913472, "LH"], +[0.05136726943098351, "LL"], +[0.04191046803899837, "LL"], +[0.7482357030403517, "HH"], +[-0.014585767863118111, "LH"], +[0.5410013139159929, "HH"], +[1.0223932668429925, "LL"], +[1.4179402898927476, "LL"]] \ No newline at end of file diff --git a/release/python/0.4.0/crankshaft/test/fixtures/neighbors.json b/release/python/0.4.0/crankshaft/test/fixtures/neighbors.json new file mode 100644 index 0000000..055b359 --- /dev/null +++ b/release/python/0.4.0/crankshaft/test/fixtures/neighbors.json @@ -0,0 +1,54 @@ +[ + {"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5}, + {"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7}, + {"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2}, + {"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1}, + {"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3}, + {"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05}, + {"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4}, + {"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7}, + {"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5}, + {"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04}, + {"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08}, + {"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2}, + {"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4}, + {"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2}, + {"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3}, + {"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4}, + {"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6}, + {"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3}, + {"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7}, + {"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8}, + {"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1}, + {"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4}, + {"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1}, + {"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3}, + {"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4}, + {"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6}, + {"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3}, + {"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8}, + {"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3}, + {"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1}, + {"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9}, + {"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3}, + {"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4}, + {"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3}, + {"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3}, + {"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2}, + {"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5}, + {"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4}, + {"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6}, + {"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5}, + {"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4}, + {"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2}, + {"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3}, + {"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2}, + {"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3}, + {"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2}, + {"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3}, + {"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5}, + {"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2}, + {"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6}, + {"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01}, + {"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01} + ] diff --git a/release/python/0.4.0/crankshaft/test/fixtures/neighbors_markov.json b/release/python/0.4.0/crankshaft/test/fixtures/neighbors_markov.json new file mode 100644 index 0000000..45a20e7 --- /dev/null +++ b/release/python/0.4.0/crankshaft/test/fixtures/neighbors_markov.json @@ -0,0 +1 @@ +[{"neighbors": [10, 7, 21, 23, 1], "y1995": 0.87654416055651474, "y1997": 0.85637566664752718, "y1996": 0.8631470006766887, "y1999": 0.84461540228037335, "y1998": 0.84811668329242784, "y2006": 0.86302631339545688, "y2007": 0.86148266513456728, "y2004": 0.86416611731111015, "y2005": 0.87119374831581786, "y2002": 0.85012592862683589, "y2003": 0.8550965633336135, "y2000": 0.83271652434603094, "y2001": 0.83786313566577242, "id": 0, "y2008": 0.86252252380501315, "y2009": 0.86746356478544273}, {"neighbors": [5, 7, 22, 29, 3], "y1995": 0.91889509774542122, "y1997": 0.92333257900976462, "y1996": 0.91757931190043385, "y1999": 0.92552387732371888, "y1998": 0.92517289327379471, "y2006": 0.91706053906277052, "y2007": 0.90139504820726424, "y2004": 0.89815175749309051, "y2005": 0.91832090781161113, "y2002": 0.89431990798552208, "y2003": 0.88924793576523797, "y2000": 0.90746978227271013, "y2001": 0.89830489127332913, "id": 1, "y2008": 0.87897455159080617, "y2009": 0.86216858051752643}, {"neighbors": [11, 8, 13, 18, 17], "y1995": 0.82591007476914713, "y1997": 0.81989792988843901, "y1996": 0.82548595539161707, "y1999": 0.81731522200916285, "y1998": 0.81503235035017918, "y2006": 0.81814804358939286, "y2007": 0.83675961003285626, "y2004": 0.82668195534569056, "y2005": 0.82373723764184559, "y2002": 0.80849979516360859, "y2003": 0.82258550658074148, "y2000": 0.78964559168205917, "y2001": 0.8058444152731008, "id": 2, "y2008": 0.8357419865626442, "y2009": 0.84647177436289112}, {"neighbors": [4, 14, 9, 5, 12], "y1995": 1.0908817638059434, "y1997": 1.0845641754849344, "y1996": 1.0853768890893893, "y1999": 1.098988414417104, "y1998": 1.0841540389418189, "y2006": 1.1316479722785828, "y2007": 1.1295850763954971, "y2004": 1.1139980568106316, "y2005": 1.1216802898290368, "y2002": 1.1116069731657288, "y2003": 1.1088862051501811, "y2000": 1.1450694824791507, "y2001": 1.1215113292620285, "id": 3, "y2008": 1.1137181812756343, "y2009": 1.0993677488645406}, {"neighbors": [14, 3, 9, 31, 12], "y1995": 1.1073144618319228, "y1997": 1.1328363804627946, "y1996": 1.1137394350312471, "y1999": 1.1591002514611153, "y1998": 1.144725587086376, "y2006": 1.1173646811350333, "y2007": 1.1086324218539598, "y2004": 1.1102496406140896, "y2005": 1.11943471361418, "y2002": 1.1475230282561595, "y2003": 1.1184328424005199, "y2000": 1.1689820101690329, "y2001": 1.1721248787169682, "id": 4, "y2008": 1.0964251552643696, "y2009": 1.0776233718455337}, {"neighbors": [29, 1, 22, 7, 4], "y1995": 1.422697571371182, "y1997": 1.4427350196405593, "y1996": 1.4211843379728528, "y1999": 1.4440068434166562, "y1998": 1.4357757095632602, "y2006": 1.4405276647793266, "y2007": 1.4524121586440921, "y2004": 1.4059372049179741, "y2005": 1.4078864636665769, "y2002": 1.4197822680667809, "y2003": 1.3909220829548647, "y2000": 1.4418473669388905, "y2001": 1.4478283203013527, "id": 5, "y2008": 1.4330609762040207, "y2009": 1.4174430982377491}, {"neighbors": [12, 47, 9, 25, 20], "y1995": 1.1307388498039153, "y1997": 1.1107470843142355, "y1996": 1.1311051255854685, "y1999": 1.130881491772973, "y1998": 1.1336463608751246, "y2006": 1.1088003408832796, "y2007": 1.0840170924825394, "y2004": 1.1244623853593112, "y2005": 1.1167100811401538, "y2002": 1.1306293052597198, "y2003": 1.1194498381213465, "y2000": 1.1088813841947593, "y2001": 1.1185662918783175, "id": 6, "y2008": 1.0695920556329086, "y2009": 1.0787522517402164}, {"neighbors": [21, 1, 22, 10, 0], "y1995": 1.0470612357366649, "y1997": 1.0425337165747406, "y1996": 1.0451683097376836, "y1999": 1.0207254480945218, "y1998": 1.0323998680588111, "y2006": 1.0405109962442973, "y2007": 1.0174964540280445, "y2004": 1.0140090547678748, "y2005": 1.0317674181861733, "y2002": 0.99669586934394627, "y2003": 0.99327675611171373, "y2000": 0.99854316295509526, "y2001": 0.98802579761429143, "id": 7, "y2008": 0.9936394033949828, "y2009": 0.98279746069218921}, {"neighbors": [11, 13, 17, 18, 15], "y1995": 0.98996985668705595, "y1997": 0.99491000469481983, "y1996": 1.0014356415938011, "y1999": 1.0045584503565237, "y1998": 1.0018840754492748, "y2006": 0.92232873520447411, "y2007": 0.91284090705064902, "y2004": 0.93694786512729977, "y2005": 0.94308212820743131, "y2002": 0.96834820215592055, "y2003": 0.95335147249088092, "y2000": 0.99127006477048718, "y2001": 0.97925917470464008, "id": 8, "y2008": 0.89689832627117483, "y2009": 0.88928857608264111}, {"neighbors": [12, 6, 4, 3, 14], "y1995": 0.87418390853652306, "y1997": 0.84425695187978567, "y1996": 0.86416601430334228, "y1999": 0.83903043942542854, "y1998": 0.8404493987171674, "y2006": 0.87204140839730271, "y2007": 0.86633032299764789, "y2004": 0.86981997840756087, "y2005": 0.86837929279319737, "y2002": 0.86107306112852877, "y2003": 0.85007719735663123, "y2000": 0.85787080050645603, "y2001": 0.86036185149249467, "id": 9, "y2008": 0.84946077011565357, "y2009": 0.83287145944123797}, {"neighbors": [0, 7, 21, 23, 22], "y1995": 1.1419611801631209, "y1997": 1.1489271154554144, "y1996": 1.146602624490825, "y1999": 1.1443662376135306, "y1998": 1.1490959392942743, "y2006": 1.1049125811637337, "y2007": 1.1105984164317646, "y2004": 1.1119989015058092, "y2005": 1.1025779214946556, "y2002": 1.1259666377127024, "y2003": 1.1221399558345004, "y2000": 1.144501826035474, "y2001": 1.1234975172649961, "id": 10, "y2008": 1.1050979494645479, "y2009": 1.1002009697391872}, {"neighbors": [8, 13, 18, 17, 2], "y1995": 0.97282462974938089, "y1997": 0.96252588061647382, "y1996": 0.96700147279313231, "y1999": 0.96057686787383312, "y1998": 0.96538780087103548, "y2006": 0.91010201260822066, "y2007": 0.89280392121658247, "y2004": 0.94103988614185807, "y2005": 0.9212251863828258, "y2002": 0.94804194711420009, "y2003": 0.9543028555845573, "y2000": 0.95831051250950716, "y2001": 0.94480908623936988, "id": 11, "y2008": 0.89298242828382146, "y2009": 0.89165384824292859}, {"neighbors": [33, 9, 6, 25, 31], "y1995": 0.94325467991401402, "y1997": 0.96455242154753429, "y1996": 0.96436902092427723, "y1999": 0.94117647058823528, "y1998": 0.95243008993884537, "y2006": 0.9346681464882507, "y2007": 0.94281559150403071, "y2004": 0.96918424441756057, "y2005": 0.94781280876672958, "y2002": 0.95388717527096822, "y2003": 0.94597005193649519, "y2000": 0.94809269652332606, "y2001": 0.93539181553564288, "id": 12, "y2008": 0.965203150896216, "y2009": 0.967154410723015}, {"neighbors": [18, 17, 11, 8, 19], "y1995": 0.97478408425654373, "y1997": 0.98712808751954773, "y1996": 0.98169225257738801, "y1999": 0.985598971191053, "y1998": 0.98474769442356791, "y2006": 0.98416665248276058, "y2007": 0.98423613480079708, "y2004": 0.97399471186978948, "y2005": 0.96910087128357136, "y2002": 0.9820996926750224, "y2003": 0.98776529543110569, "y2000": 0.98687072733199255, "y2001": 0.99237486444837619, "id": 13, "y2008": 0.99823861244053191, "y2009": 0.99545704236827348}, {"neighbors": [4, 31, 3, 29, 12], "y1995": 0.85570268988941878, "y1997": 0.85986131704895119, "y1996": 0.85575915188345031, "y1999": 0.85380119644969055, "y1998": 0.85693406055397725, "y2006": 0.82803647591954255, "y2007": 0.81987360180979219, "y2004": 0.83998883284341452, "y2005": 0.83478547261894065, "y2002": 0.85472102128186755, "y2003": 0.84564834502399988, "y2000": 0.86191535266765262, "y2001": 0.84981450830432048, "id": 14, "y2008": 0.82265395167873867, "y2009": 0.83994039782937002}, {"neighbors": [19, 8, 17, 16, 13], "y1995": 0.87022046646521634, "y1997": 0.85961813213722393, "y1996": 0.85996258309339635, "y1999": 0.8394713575455558, "y1998": 0.85689572413110093, "y2006": 0.94202108334913126, "y2007": 0.94222309998743192, "y2004": 0.86763340229291142, "y2005": 0.89179316746010362, "y2002": 0.86776297543511893, "y2003": 0.86720209304280604, "y2000": 0.82785596604704892, "y2001": 0.86008789452656809, "id": 15, "y2008": 0.93902708112840494, "y2009": 0.94479183757120588}, {"neighbors": [28, 26, 15, 19, 32], "y1995": 0.90134907329491731, "y1997": 0.90403990934606904, "y1996": 0.904077381347274, "y1999": 0.90399237579083946, "y1998": 0.90201769385650832, "y2006": 0.91108803862404764, "y2007": 0.90543476309316473, "y2004": 0.94338264626469681, "y2005": 0.91981795862151561, "y2002": 0.93695966482853577, "y2003": 0.94242697007039, "y2000": 0.90906631602055099, "y2001": 0.92693339421265908, "id": 16, "y2008": 0.91737137682250491, "y2009": 0.94793657442067902}, {"neighbors": [13, 18, 11, 19, 8], "y1995": 1.1977611005602815, "y1997": 1.1843915817489725, "y1996": 1.1822256425225894, "y1999": 1.1928672308275252, "y1998": 1.1826786457339149, "y2006": 1.2392938410349985, "y2007": 1.2341867605077472, "y2004": 1.2385704217423759, "y2005": 1.2441989281116201, "y2002": 1.2262477774195681, "y2003": 1.2239707531714479, "y2000": 1.2017286912636342, "y2001": 1.2132869128474402, "id": 17, "y2008": 1.2362673914436095, "y2009": 1.2675439750795283}, {"neighbors": [13, 17, 11, 8, 19], "y1995": 1.2491967813733067, "y1997": 1.2699116090397236, "y1996": 1.2575477330927329, "y1999": 1.3062566740535762, "y1998": 1.2802065055312271, "y2006": 1.3210776560048689, "y2007": 1.329362443219563, "y2004": 1.3054484140490119, "y2005": 1.3030330249408666, "y2002": 1.3257518058685978, "y2003": 1.3079549159235695, "y2000": 1.3479002255103918, "y2001": 1.3439986302151703, "id": 18, "y2008": 1.3300124123891741, "y2009": 1.3328846185074705}, {"neighbors": [26, 17, 28, 15, 16], "y1995": 1.0676800411188558, "y1997": 1.0363730321443168, "y1996": 1.0379927554499979, "y1999": 1.0329609259280523, "y1998": 1.027684488045026, "y2006": 0.94241549375546196, "y2007": 0.92754546923532677, "y2004": 0.99614160423102482, "y2005": 0.97356208269708677, "y2002": 1.0274762326434594, "y2003": 1.0316273366809443, "y2000": 1.0505901631347052, "y2001": 1.0340505678899605, "id": 19, "y2008": 0.92549226593721745, "y2009": 0.92138101880290568}, {"neighbors": [30, 25, 24, 37, 47], "y1995": 1.0947561397632881, "y1997": 1.1165429913770684, "y1996": 1.1152679554712275, "y1999": 1.1314326394231322, "y1998": 1.1310394841195361, "y2006": 1.1090538904302065, "y2007": 1.1057776900012568, "y2004": 1.1402994437897009, "y2005": 1.1197940058085571, "y2002": 1.133670175399079, "y2003": 1.139822558851451, "y2000": 1.1388962186541665, "y2001": 1.1244221220249986, "id": 20, "y2008": 1.1116682481010467, "y2009": 1.0998515545336902}, {"neighbors": [23, 22, 7, 10, 34], "y1995": 0.76530058421804126, "y1997": 0.76542450966153397, "y1996": 0.76612841163904621, "y1999": 0.76014283909933289, "y1998": 0.7672268310234307, "y2006": 0.76842416021983684, "y2007": 0.77487117798086069, "y2004": 0.76533287692895391, "y2005": 0.78205934309410463, "y2002": 0.76156903267949927, "y2003": 0.76651951668098528, "y2000": 0.74480073263159763, "y2001": 0.76098396210261965, "id": 21, "y2008": 0.77768682781054099, "y2009": 0.78801192267396702}, {"neighbors": [21, 34, 5, 7, 29], "y1995": 0.98391336093764348, "y1997": 0.98295341320156315, "y1996": 0.98075815675295552, "y1999": 0.96913802803963667, "y1998": 0.97386015032669815, "y2006": 0.93965462091114671, "y2007": 0.93069644684632924, "y2004": 0.9635616201227476, "y2005": 0.94745351657235244, "y2002": 0.97209860866113018, "y2003": 0.97441312580606143, "y2000": 0.97370819354423843, "y2001": 0.96419154157867693, "id": 22, "y2008": 0.94020973488297466, "y2009": 0.94358232339833159}, {"neighbors": [21, 10, 22, 34, 7], "y1995": 0.83561828119099946, "y1997": 0.81738501913392403, "y1996": 0.82298088022609361, "y1999": 0.80904800725677739, "y1998": 0.81748588141426259, "y2006": 0.87170334233473346, "y2007": 0.8786379876833581, "y2004": 0.85954307066870839, "y2005": 0.86790023653402792, "y2002": 0.83451612857812574, "y2003": 0.85175031934895873, "y2000": 0.80071489233375537, "y2001": 0.83358255807316928, "id": 23, "y2008": 0.87497981001981484, "y2009": 0.87888675419592222}, {"neighbors": [27, 20, 30, 32, 47], "y1995": 0.98845573274970278, "y1997": 0.99665282989553183, "y1996": 1.0209242772035507, "y1999": 0.99386618594343845, "y1998": 0.99141823200404444, "y2006": 0.97906748937234156, "y2007": 0.9932312332800689, "y2004": 1.0111665058188304, "y2005": 0.9998802359352077, "y2002": 0.99669586934394627, "y2003": 1.0255909749831356, "y2000": 0.98733194819247994, "y2001": 0.99644997431653437, "id": 24, "y2008": 1.0020493856497013, "y2009": 0.99602148231561483}, {"neighbors": [20, 33, 6, 30, 12], "y1995": 1.1493091345649815, "y1997": 1.143009615936718, "y1996": 1.1524194939429724, "y1999": 1.1398468268822266, "y1998": 1.1426554202510555, "y2006": 1.0889107875354573, "y2007": 1.0860369499254896, "y2004": 1.0856975145267398, "y2005": 1.1244348633192611, "y2002": 1.0423089214343333, "y2003": 1.0557727834721793, "y2000": 1.0831239730629278, "y2001": 1.0519262599166714, "id": 25, "y2008": 1.0599731384290745, "y2009": 1.0216094265950888}, {"neighbors": [28, 19, 16, 32, 17], "y1995": 1.1136826889802023, "y1997": 1.1189343096757198, "y1996": 1.1057147027213501, "y1999": 1.1432271991365353, "y1998": 1.1377866945457653, "y2006": 1.1268023587150906, "y2007": 1.1235793669317915, "y2004": 1.1482023546040769, "y2005": 1.1238659840114973, "y2002": 1.1600919581655105, "y2003": 1.1446778932605579, "y2000": 1.1825702862895446, "y2001": 1.1622624279436105, "id": 26, "y2008": 1.115925801617498, "y2009": 1.1257082797404696}, {"neighbors": [32, 24, 36, 16, 28], "y1995": 1.303794309231981, "y1997": 1.3120636604057812, "y1996": 1.3075218596998686, "y1999": 1.3062566740535762, "y1998": 1.3153226688859194, "y2006": 1.2865667454509278, "y2007": 1.2973409698906584, "y2004": 1.2683078569016086, "y2005": 1.2617743046198988, "y2002": 1.2920319347677043, "y2003": 1.2718351646774422, "y2000": 1.3121023910310281, "y2001": 1.2998915587009874, "id": 27, "y2008": 1.2939020510829768, "y2009": 1.2934544564717687}, {"neighbors": [26, 16, 19, 32, 27], "y1995": 0.83953719020532513, "y1997": 0.82006005316292385, "y1996": 0.82701447583159737, "y1999": 0.80294863992835086, "y1998": 0.8118887636743225, "y2006": 0.8389109342655191, "y2007": 0.84349246817602375, "y2004": 0.83108634437662732, "y2005": 0.84373783646216949, "y2002": 0.82596790474192727, "y2003": 0.82435704751379402, "y2000": 0.78772975118465016, "y2001": 0.82848010958278628, "id": 28, "y2008": 0.85637272428125033, "y2009": 0.86539395164519117}, {"neighbors": [5, 39, 22, 14, 31], "y1995": 1.2345008725695852, "y1997": 1.2353793515744536, "y1996": 1.2426021999018138, "y1999": 1.2452262575926329, "y1998": 1.2358129278404693, "y2006": 1.2365329681906834, "y2007": 1.2796200872578414, "y2004": 1.1967443443492951, "y2005": 1.2153657295128597, "y2002": 1.1937780418204111, "y2003": 1.1835533748469893, "y2000": 1.2256766974812463, "y2001": 1.2112664802237314, "id": 29, "y2008": 1.2796839248335934, "y2009": 1.2590773758694083}, {"neighbors": [37, 20, 24, 25, 27], "y1995": 0.97696620404861145, "y1997": 0.98035944080980575, "y1996": 0.9740071914763756, "y1999": 0.95543282313901556, "y1998": 0.97581530789338955, "y2006": 0.92100464312607799, "y2007": 0.9147530387633086, "y2004": 0.9298883479571457, "y2005": 0.93442917452618346, "y2002": 0.93679072759857129, "y2003": 0.92540049332494034, "y2000": 0.96480308308405971, "y2001": 0.9468637634838194, "id": 30, "y2008": 0.90249622070947177, "y2009": 0.90213630440783921}, {"neighbors": [35, 14, 33, 12, 4], "y1995": 0.84986885942491119, "y1997": 0.84295996568390696, "y1996": 0.89868510090623221, "y1999": 0.85659367787716301, "y1998": 0.87280533962476625, "y2006": 0.92562487931452408, "y2007": 0.96635366357254426, "y2004": 0.92698332540482575, "y2005": 0.94745351657235244, "y2002": 0.90448992922937876, "y2003": 0.95495898185605821, "y2000": 0.88937573313051443, "y2001": 0.89440100450887505, "id": 31, "y2008": 1.025203118044723, "y2009": 1.0394296020754366}, {"neighbors": [36, 27, 28, 16, 26], "y1995": 1.0192280751235561, "y1997": 1.0097442843101825, "y1996": 1.0025820319237864, "y1999": 0.99765073314119712, "y1998": 1.0030341681355639, "y2006": 0.94779637858468868, "y2007": 0.93759089358493275, "y2004": 0.97583768316642261, "y2005": 0.96101679691008712, "y2002": 0.99747298060178258, "y2003": 0.99550758543481688, "y2000": 1.0075901875261932, "y2001": 0.99192968437874551, "id": 32, "y2008": 0.93353431146829191, "y2009": 0.94121705123804411}, {"neighbors": [44, 25, 12, 35, 31], "y1995": 0.86367410708901315, "y1997": 0.85544345781923936, "y1996": 0.85558931627900803, "y1999": 0.84336613427334628, "y1998": 0.85103025143102673, "y2006": 0.89455097373003656, "y2007": 0.88283929116469462, "y2004": 0.85951183386707053, "y2005": 0.87194227372077004, "y2002": 0.84667960913556228, "y2003": 0.84374557883664714, "y2000": 0.83434853662160158, "y2001": 0.85813595114434105, "id": 33, "y2008": 0.90349490610221961, "y2009": 0.9060067497610369}, {"neighbors": [22, 39, 21, 29, 23], "y1995": 1.0094753356447226, "y1997": 1.0069881886439402, "y1996": 1.0041105523637666, "y1999": 0.99291086334982948, "y1998": 0.99513686502304577, "y2006": 0.96382634438484593, "y2007": 0.95011400973122428, "y2004": 0.975119236728752, "y2005": 0.96134614808826613, "y2002": 0.99291167539274383, "y2003": 0.98983209318633369, "y2000": 1.0058162611397035, "y2001": 0.98850522230466298, "id": 34, "y2008": 0.94346860300667812, "y2009": 0.9463776450423077}, {"neighbors": [31, 38, 44, 33, 14], "y1995": 1.0571257066143651, "y1997": 1.0575301194645879, "y1996": 1.0545941857842291, "y1999": 1.0510385688532684, "y1998": 1.0488078570498685, "y2006": 1.0247627521629479, "y2007": 1.0234752320591773, "y2004": 1.0329697933620496, "y2005": 1.0219168238570018, "y2002": 1.0420048344203974, "y2003": 1.0402553971511816, "y2000": 1.0480002306104303, "y2001": 1.030249414987729, "id": 35, "y2008": 1.0251768368501768, "y2009": 1.0435957064486703}, {"neighbors": [32, 43, 27, 28, 42], "y1995": 1.070841888164505, "y1997": 1.0793762307014196, "y1996": 1.0666949726007404, "y1999": 1.0794043012481198, "y1998": 1.0738798776109699, "y2006": 1.087727556316465, "y2007": 1.0885954360198933, "y2004": 1.1032213602455734, "y2005": 1.0916793915985508, "y2002": 1.0938347765734742, "y2003": 1.1052447043433509, "y2000": 1.0531800956589803, "y2001": 1.0745277096056161, "id": 36, "y2008": 1.0917733838297285, "y2009": 1.1096083021948762}, {"neighbors": [30, 40, 20, 42, 41], "y1995": 0.8671922185905101, "y1997": 0.86675155621455668, "y1996": 0.86628895935887062, "y1999": 0.86511809486628932, "y1998": 0.86425631732335095, "y2006": 0.84488343470424199, "y2007": 0.83374328958471722, "y2004": 0.84517414191529749, "y2005": 0.84843857600526962, "y2002": 0.85411284725399572, "y2003": 0.84886336375435456, "y2000": 0.86287327291635718, "y2001": 0.8516979624450659, "id": 37, "y2008": 0.82812044014430564, "y2009": 0.82878598934619596}, {"neighbors": [35, 31, 45, 39, 44], "y1995": 0.8838921149583755, "y1997": 0.90282398478743275, "y1996": 0.92288667453925455, "y1999": 0.92023285988219217, "y1998": 0.91229185518735723, "y2006": 0.93869676706720051, "y2007": 0.96947770975097391, "y2004": 0.99223700402629367, "y2005": 0.97984969609868555, "y2002": 0.93682451504456421, "y2003": 0.98655146182882891, "y2000": 0.92652175166361039, "y2001": 0.94278865361566122, "id": 38, "y2008": 1.0036262573224608, "y2009": 0.98102350657197357}, {"neighbors": [29, 34, 38, 22, 35], "y1995": 0.970820642185237, "y1997": 0.94534081352108112, "y1996": 0.95320232993219844, "y1999": 0.93967000034446724, "y1998": 0.94215592860799646, "y2006": 0.91035556215514757, "y2007": 0.90430364292511256, "y2004": 0.92879505989982103, "y2005": 0.9211054223180335, "y2002": 0.93412151936513388, "y2003": 0.93501274320242933, "y2000": 0.93092108910210503, "y2001": 0.92662519262599163, "id": 39, "y2008": 0.89994694483851023, "y2009": 0.9007386435858511}, {"neighbors": [41, 37, 42, 30, 45], "y1995": 0.95861858457245008, "y1997": 0.98254810501535106, "y1996": 0.95774543235102894, "y1999": 0.98684823919808018, "y1998": 0.98919471947721893, "y2006": 0.97163003599581876, "y2007": 0.97007020126757271, "y2004": 0.9493488753775261, "y2005": 0.97152609359561659, "y2002": 0.95601578436851964, "y2003": 0.94905384541254967, "y2000": 0.98882204635713133, "y2001": 0.97662233890759653, "id": 40, "y2008": 0.97158948117089283, "y2009": 0.95884908006927827}, {"neighbors": [40, 45, 44, 37, 42], "y1995": 0.83980438854721107, "y1997": 0.85746999875029983, "y1996": 0.84726737166133714, "y1999": 0.85567509846023126, "y1998": 0.85467221160427542, "y2006": 0.8333891885768886, "y2007": 0.83511679264592342, "y2004": 0.81743586206088703, "y2005": 0.83550405700769481, "y2002": 0.84502402428191115, "y2003": 0.82645665158259707, "y2000": 0.84818516243622177, "y2001": 0.85265681182580899, "id": 41, "y2008": 0.82136617314598481, "y2009": 0.80921873783836296}, {"neighbors": [43, 40, 46, 37, 36], "y1995": 0.95118156405662746, "y1997": 0.94688098462868708, "y1996": 0.9466212002600608, "y1999": 0.95124410099780687, "y1998": 0.95085829660091703, "y2006": 0.96895367966714574, "y2007": 0.9700163384024274, "y2004": 0.97583768316642261, "y2005": 0.95571723704302525, "y2002": 0.96804411514198463, "y2003": 0.97136213864358201, "y2000": 0.95440787445922959, "y2001": 0.96364362764682376, "id": 42, "y2008": 0.97082732652905901, "y2009": 0.9878236640328002}, {"neighbors": [36, 42, 32, 27, 46], "y1995": 1.0891004415267045, "y1997": 1.0849289528525252, "y1996": 1.0824896838138709, "y1999": 1.0945424900391545, "y1998": 1.0865692335830259, "y2006": 1.1450297539219478, "y2007": 1.1447474729339102, "y2004": 1.1334273474293739, "y2005": 1.1468606844516303, "y2002": 1.1229257675733433, "y2003": 1.1302103089739621, "y2000": 1.1055818811158884, "y2001": 1.1214085953998059, "id": 43, "y2008": 1.1408403740471014, "y2009": 1.1614292649793569}, {"neighbors": [33, 41, 45, 35, 40], "y1995": 1.0633603345917013, "y1997": 1.0869149629649646, "y1996": 1.0736582323828732, "y1999": 1.1166986255755473, "y1998": 1.0976484597942771, "y2006": 1.0839806574563229, "y2007": 1.0983176831786272, "y2004": 1.0927882684985315, "y2005": 1.0700320368873319, "y2002": 1.0881584856466706, "y2003": 1.0804431312806149, "y2000": 1.1185670222649935, "y2001": 1.0976428286056732, "id": 44, "y2008": 1.0929823187788443, "y2009": 1.0917612486217978}, {"neighbors": [41, 44, 40, 35, 33], "y1995": 0.79772064970019041, "y1997": 0.7858115114280021, "y1996": 0.78829195801876151, "y1999": 0.77035744221561353, "y1998": 0.77615921755360906, "y2006": 0.79949806580432425, "y2007": 0.80172181625581262, "y2004": 0.79603865293896003, "y2005": 0.78966436120841943, "y2002": 0.81437881076636964, "y2003": 0.80788827809912023, "y2000": 0.77751193519846906, "y2001": 0.79902973574567659, "id": 45, "y2008": 0.82168154748053679, "y2009": 0.85587910681858015}, {"neighbors": [42, 43, 40, 36, 37], "y1995": 1.0052446952315301, "y1997": 1.0047589936197736, "y1996": 1.0000769567582628, "y1999": 1.0063956091903872, "y1998": 1.0061394183885444, "y2006": 0.97292595590233411, "y2007": 0.96519561197191939, "y2004": 0.99030032232474696, "y2005": 0.97682565346267858, "y2002": 1.0081498135355325, "y2003": 1.0057431552702318, "y2000": 1.0016297948675874, "y2001": 0.99860738542320637, "id": 46, "y2008": 0.9617340332161447, "y2009": 0.95890283625473927}, {"neighbors": [20, 6, 24, 25, 30], "y1995": 0.95808418788867844, "y1997": 0.9654440995572009, "y1996": 0.93825679674127938, "y1999": 0.96987289157318213, "y1998": 0.95561201303757848, "y2006": 1.1704973973021624, "y2007": 1.1702515395802287, "y2004": 1.0533361880299275, "y2005": 1.0983262971945267, "y2002": 1.0078119390756035, "y2003": 1.0348423554112989, "y2000": 0.96608031008233231, "y2001": 0.99727184521431422, "id": 47, "y2008": 1.1873055260044207, "y2009": 1.1424264534188653}] diff --git a/release/python/0.4.0/crankshaft/test/helper.py b/release/python/0.4.0/crankshaft/test/helper.py new file mode 100644 index 0000000..7d28b94 --- /dev/null +++ b/release/python/0.4.0/crankshaft/test/helper.py @@ -0,0 +1,13 @@ +import unittest + +from mock_plpy import MockPlPy +plpy = MockPlPy() + +import sys +sys.modules['plpy'] = plpy + +import os + +def fixture_file(name): + dir = os.path.dirname(os.path.realpath(__file__)) + return os.path.join(dir, 'fixtures', name) diff --git a/release/python/0.4.0/crankshaft/test/mock_plpy.py b/release/python/0.4.0/crankshaft/test/mock_plpy.py new file mode 100644 index 0000000..a982ebe --- /dev/null +++ b/release/python/0.4.0/crankshaft/test/mock_plpy.py @@ -0,0 +1,52 @@ +import re + +class MockCursor: + def __init__(self, data): + self.cursor_pos = 0 + self.data = data + + def fetch(self, batch_size): + batch = self.data[self.cursor_pos : self.cursor_pos + batch_size] + self.cursor_pos += batch_size + return batch + + +class MockPlPy: + def __init__(self): + self._reset() + + def _reset(self): + self.infos = [] + self.notices = [] + self.debugs = [] + self.logs = [] + self.warnings = [] + self.errors = [] + self.fatals = [] + self.executes = [] + self.results = [] + self.prepares = [] + self.results = [] + + def _define_result(self, query, result): + pattern = re.compile(query, re.IGNORECASE | re.MULTILINE) + self.results.append([pattern, result]) + + def notice(self, msg): + self.notices.append(msg) + + def debug(self, msg): + self.notices.append(msg) + + def info(self, msg): + self.infos.append(msg) + + def cursor(self, query): + data = self.execute(query) + return MockCursor(data) + + def execute(self, query): # TODO: additional arguments + for result in self.results: + if result[0].match(query): + return result[1] + return [] diff --git a/release/python/0.4.0/crankshaft/test/test_cluster_kmeans.py b/release/python/0.4.0/crankshaft/test/test_cluster_kmeans.py new file mode 100644 index 0000000..aba8e07 --- /dev/null +++ b/release/python/0.4.0/crankshaft/test/test_cluster_kmeans.py @@ -0,0 +1,38 @@ +import unittest +import numpy as np + + +# from mock_plpy import MockPlPy +# plpy = MockPlPy() +# +# import sys +# sys.modules['plpy'] = plpy +from helper import plpy, fixture_file +import numpy as np +import crankshaft.clustering as cc +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds +import json + +class KMeansTest(unittest.TestCase): + """Testing class for Moran's I functions""" + + def setUp(self): + plpy._reset() + self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read()) + self.params = {"subquery": "select * from table", + "no_clusters": "10" + } + + def test_kmeans(self): + data = self.cluster_data + plpy._define_result('select' ,data) + clusters = cc.kmeans('subquery', 2) + labels = [a[1] for a in clusters] + c1 = [a for a in clusters if a[1]==0] + c2 = [a for a in clusters if a[1]==1] + + self.assertEqual(len(np.unique(labels)),2) + self.assertEqual(len(c1),20) + self.assertEqual(len(c2),20) + diff --git a/release/python/0.4.0/crankshaft/test/test_clustering_moran.py b/release/python/0.4.0/crankshaft/test/test_clustering_moran.py new file mode 100644 index 0000000..2b683cf --- /dev/null +++ b/release/python/0.4.0/crankshaft/test/test_clustering_moran.py @@ -0,0 +1,88 @@ +import unittest +import numpy as np + + +# from mock_plpy import MockPlPy +# plpy = MockPlPy() +# +# import sys +# sys.modules['plpy'] = plpy +from helper import plpy, fixture_file + +import crankshaft.clustering as cc +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds +import json + +class MoranTest(unittest.TestCase): + """Testing class for Moran's I functions""" + + def setUp(self): + plpy._reset() + self.params = {"id_col": "cartodb_id", + "attr1": "andy", + "attr2": "jay_z", + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + self.params_markov = {"id_col": "cartodb_id", + "time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"], + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + self.neighbors_data = json.loads(open(fixture_file('neighbors.json')).read()) + self.moran_data = json.loads(open(fixture_file('moran.json')).read()) + + def test_map_quads(self): + """Test map_quads""" + self.assertEqual(cc.map_quads(1), 'HH') + self.assertEqual(cc.map_quads(2), 'LH') + self.assertEqual(cc.map_quads(3), 'LL') + self.assertEqual(cc.map_quads(4), 'HL') + self.assertEqual(cc.map_quads(33), None) + self.assertEqual(cc.map_quads('andy'), None) + + def test_quad_position(self): + """Test lisa_sig_vals""" + + quads = np.array([1, 2, 3, 4], np.int) + + ans = np.array(['HH', 'LH', 'LL', 'HL']) + test_ans = cc.quad_position(quads) + + self.assertTrue((test_ans == ans).all()) + + def test_moran_local(self): + """Test Moran's I local""" + data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) + result = cc.moran_local('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') + result = [(row[0], row[1]) for row in result] + expected = self.moran_data + for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): + self.assertAlmostEqual(res_val, exp_val) + self.assertEqual(res_quad, exp_quad) + + def test_moran_local_rate(self): + """Test Moran's I rate""" + data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) + result = cc.moran_local_rate('subquery', 'numerator', 'denominator', 'knn', 5, 99, 'the_geom', 'cartodb_id') + print 'result == None? ', result == None + result = [(row[0], row[1]) for row in result] + expected = self.moran_data + for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): + self.assertAlmostEqual(res_val, exp_val) + + def test_moran(self): + """Test Moran's I global""" + data = [{ 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1235) + result = cc.moran('table', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') + print 'result == None?', result == None + result_moran = result[0][0] + expected_moran = np.array([row[0] for row in self.moran_data]).mean() + self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2) diff --git a/release/python/0.4.0/crankshaft/test/test_pysal_utils.py b/release/python/0.4.0/crankshaft/test/test_pysal_utils.py new file mode 100644 index 0000000..171fdbc --- /dev/null +++ b/release/python/0.4.0/crankshaft/test/test_pysal_utils.py @@ -0,0 +1,142 @@ +import unittest + +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds + + +class PysalUtilsTest(unittest.TestCase): + """Testing class for utility functions related to PySAL integrations""" + + def setUp(self): + self.params = {"id_col": "cartodb_id", + "attr1": "andy", + "attr2": "jay_z", + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + + self.params_array = {"id_col": "cartodb_id", + "time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"], + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + + def test_query_attr_select(self): + """Test query_attr_select""" + + ans = "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " + + ans_array = "i.\"_2013_dec\"::numeric As attr1, " \ + "i.\"_2014_jan\"::numeric As attr2, " \ + "i.\"_2014_feb\"::numeric As attr3, " + + self.assertEqual(pu.query_attr_select(self.params), ans) + self.assertEqual(pu.query_attr_select(self.params_array), ans_array) + + def test_query_attr_where(self): + """Test pu.query_attr_where""" + + ans = "idx_replace.\"andy\" IS NOT NULL AND " \ + "idx_replace.\"jay_z\" IS NOT NULL AND " \ + "idx_replace.\"jay_z\" <> 0" + + ans_array = "idx_replace.\"_2013_dec\" IS NOT NULL AND " \ + "idx_replace.\"_2014_jan\" IS NOT NULL AND " \ + "idx_replace.\"_2014_feb\" IS NOT NULL" + + self.assertEqual(pu.query_attr_where(self.params), ans) + self.assertEqual(pu.query_attr_where(self.params_array), ans_array) + + def test_knn(self): + """Test knn neighbors constructor""" + + ans = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE " \ + "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ + "j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL AND " \ + "j.\"jay_z\" <> 0 " \ + "ORDER BY " \ + "j.\"the_geom\" <-> i.\"the_geom\" ASC " \ + "LIMIT 321)) As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" + + ans_array = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"_2013_dec\"::numeric As attr1, " \ + "i.\"_2014_jan\"::numeric As attr2, " \ + "i.\"_2014_feb\"::numeric As attr3, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ + "j.\"_2013_dec\" IS NOT NULL AND " \ + "j.\"_2014_jan\" IS NOT NULL AND " \ + "j.\"_2014_feb\" IS NOT NULL " \ + "ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC " \ + "LIMIT 321)) As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"_2013_dec\" IS NOT NULL AND " \ + "i.\"_2014_jan\" IS NOT NULL AND " \ + "i.\"_2014_feb\" IS NOT NULL "\ + "ORDER BY i.\"cartodb_id\" ASC;" + + self.assertEqual(pu.knn(self.params), ans) + self.assertEqual(pu.knn(self.params_array), ans_array) + + def test_queen(self): + """Test queen neighbors constructor""" + + ans = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE " \ + "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ + "ST_Touches(i.\"the_geom\", " \ + "j.\"the_geom\") AND " \ + "j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL AND " \ + "j.\"jay_z\" <> 0)" \ + ") As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" + + self.assertEqual(pu.queen(self.params), ans) + + def test_construct_neighbor_query(self): + """Test construct_neighbor_query""" + + # Compare to raw knn query + self.assertEqual(pu.construct_neighbor_query('knn', self.params), + pu.knn(self.params)) + + def test_get_attributes(self): + """Test get_attributes""" + + ## need to add tests + + self.assertEqual(True, True) + + def test_get_weight(self): + """Test get_weight""" + + self.assertEqual(True, True) + + def test_empty_zipped_array(self): + """Test empty_zipped_array""" + ans2 = [(None, None)] + ans4 = [(None, None, None, None)] + self.assertEqual(pu.empty_zipped_array(2), ans2) + self.assertEqual(pu.empty_zipped_array(4), ans4) diff --git a/release/python/0.4.0/crankshaft/test/test_segmentation.py b/release/python/0.4.0/crankshaft/test/test_segmentation.py new file mode 100644 index 0000000..d02e8b1 --- /dev/null +++ b/release/python/0.4.0/crankshaft/test/test_segmentation.py @@ -0,0 +1,64 @@ +import unittest +import numpy as np +from helper import plpy, fixture_file +import crankshaft.segmentation as segmentation +import json + +class SegmentationTest(unittest.TestCase): + """Testing class for Moran's I functions""" + + def setUp(self): + plpy._reset() + + def generate_random_data(self,n_samples,random_state, row_type=False): + x1 = random_state.uniform(size=n_samples) + x2 = random_state.uniform(size=n_samples) + x3 = random_state.randint(0, 4, size=n_samples) + + y = x1+x2*x2+x3 + cartodb_id = range(len(x1)) + + if row_type: + return [ {'features': vals} for vals in zip(x1,x2,x3)], y + else: + return [dict( zip(['x1','x2','x3','target', 'cartodb_id'],[x1,x2,x3,y,cartodb_id]))] + + def test_replace_nan_with_mean(self): + test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan]) + + def test_create_and_predict_segment(self): + n_samples = 1000 + + random_state_train = np.random.RandomState(13) + random_state_test = np.random.RandomState(134) + training_data = self.generate_random_data(n_samples, random_state_train) + test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True) + + + ids = [{'cartodb_ids': range(len(test_data))}] + rows = [{'x1': 0,'x2':0,'x3':0,'y':0,'cartodb_id':0}] + + plpy._define_result('select \* from \(select \* from training\) a limit 1',rows) + plpy._define_result('.*from \(select \* from training\) as a' ,training_data) + plpy._define_result('select array_agg\(cartodb\_id order by cartodb\_id\) as cartodb_ids from \(.*\) a',ids) + plpy._define_result('.*select \* from test.*' ,test_data) + + model_parameters = {'n_estimators': 1200, + 'max_depth': 3, + 'subsample' : 0.5, + 'learning_rate': 0.01, + 'min_samples_leaf': 1} + + result = segmentation.create_and_predict_segment( + 'select * from training', + 'target', + 'select * from test', + model_parameters) + + prediction = [r[1] for r in result] + + accuracy =np.sqrt(np.mean( np.square( np.array(prediction) - np.array(test_y)))) + + self.assertEqual(len(result),len(test_data)) + self.assertTrue( result[0][2] < 0.01) + self.assertTrue( accuracy < 0.5*np.mean(test_y) ) diff --git a/release/python/0.4.0/crankshaft/test/test_space_time_dynamics.py b/release/python/0.4.0/crankshaft/test/test_space_time_dynamics.py new file mode 100644 index 0000000..54ffc9d --- /dev/null +++ b/release/python/0.4.0/crankshaft/test/test_space_time_dynamics.py @@ -0,0 +1,324 @@ +import unittest +import numpy as np + +import unittest + + +# from mock_plpy import MockPlPy +# plpy = MockPlPy() +# +# import sys +# sys.modules['plpy'] = plpy +from helper import plpy, fixture_file + +import crankshaft.space_time_dynamics as std +from crankshaft import random_seeds +import json + +class SpaceTimeTests(unittest.TestCase): + """Testing class for Markov Functions.""" + + def setUp(self): + plpy._reset() + self.params = {"id_col": "cartodb_id", + "time_cols": ['dec_2013', 'jan_2014', 'feb_2014'], + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + self.neighbors_data = json.loads(open(fixture_file('neighbors_markov.json')).read()) + self.markov_data = json.loads(open(fixture_file('markov.json')).read()) + + self.time_data = np.array([i * np.ones(10, dtype=float) for i in range(10)]).T + + self.transition_matrix = np.array([ + [[ 0.96341463, 0.0304878 , 0.00609756, 0. , 0. ], + [ 0.06040268, 0.83221477, 0.10738255, 0. , 0. ], + [ 0. , 0.14 , 0.74 , 0.12 , 0. ], + [ 0. , 0.03571429, 0.32142857, 0.57142857, 0.07142857], + [ 0. , 0. , 0. , 0.16666667, 0.83333333]], + [[ 0.79831933, 0.16806723, 0.03361345, 0. , 0. ], + [ 0.0754717 , 0.88207547, 0.04245283, 0. , 0. ], + [ 0.00537634, 0.06989247, 0.8655914 , 0.05913978, 0. ], + [ 0. , 0. , 0.06372549, 0.90196078, 0.03431373], + [ 0. , 0. , 0. , 0.19444444, 0.80555556]], + [[ 0.84693878, 0.15306122, 0. , 0. , 0. ], + [ 0.08133971, 0.78947368, 0.1291866 , 0. , 0. ], + [ 0.00518135, 0.0984456 , 0.79274611, 0.0984456 , 0.00518135], + [ 0. , 0. , 0.09411765, 0.87058824, 0.03529412], + [ 0. , 0. , 0. , 0.10204082, 0.89795918]], + [[ 0.8852459 , 0.09836066, 0. , 0.01639344, 0. ], + [ 0.03875969, 0.81395349, 0.13953488, 0. , 0.00775194], + [ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ], + [ 0. , 0.02339181, 0.12865497, 0.75438596, 0.09356725], + [ 0. , 0. , 0. , 0.09661836, 0.90338164]], + [[ 0.33333333, 0.66666667, 0. , 0. , 0. ], + [ 0.0483871 , 0.77419355, 0.16129032, 0.01612903, 0. ], + [ 0.01149425, 0.16091954, 0.74712644, 0.08045977, 0. ], + [ 0. , 0.01036269, 0.06217617, 0.89637306, 0.03108808], + [ 0. , 0. , 0. , 0.02352941, 0.97647059]]] + ) + + def test_spatial_markov(self): + """Test Spatial Markov.""" + data = [ { 'id': d['id'], + 'attr1': d['y1995'], + 'attr2': d['y1996'], + 'attr3': d['y1997'], + 'attr4': d['y1998'], + 'attr5': d['y1999'], + 'attr6': d['y2000'], + 'attr7': d['y2001'], + 'attr8': d['y2002'], + 'attr9': d['y2003'], + 'attr10': d['y2004'], + 'attr11': d['y2005'], + 'attr12': d['y2006'], + 'attr13': d['y2007'], + 'attr14': d['y2008'], + 'attr15': d['y2009'], + 'neighbors': d['neighbors'] } for d in self.neighbors_data] + print(str(data[0])) + plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) + + result = std.spatial_markov_trend('subquery', ['y1995', 'y1996', 'y1997', 'y1998', 'y1999', 'y2000', 'y2001', 'y2002', 'y2003', 'y2004', 'y2005', 'y2006', 'y2007', 'y2008', 'y2009'], 5, 'knn', 5, 0, 'the_geom', 'cartodb_id') + + self.assertTrue(result != None) + result = [(row[0], row[1], row[2], row[3], row[4]) for row in result] + print result[0] + expected = self.markov_data + for ([res_trend, res_up, res_down, res_vol, res_id], + [exp_trend, exp_up, exp_down, exp_vol, exp_id] + ) in zip(result, expected): + self.assertAlmostEqual(res_trend, exp_trend) + + def test_get_time_data(self): + """Test get_time_data""" + data = [ { 'attr1': d['y1995'], + 'attr2': d['y1996'], + 'attr3': d['y1997'], + 'attr4': d['y1998'], + 'attr5': d['y1999'], + 'attr6': d['y2000'], + 'attr7': d['y2001'], + 'attr8': d['y2002'], + 'attr9': d['y2003'], + 'attr10': d['y2004'], + 'attr11': d['y2005'], + 'attr12': d['y2006'], + 'attr13': d['y2007'], + 'attr14': d['y2008'], + 'attr15': d['y2009'] } for d in self.neighbors_data] + + result = std.get_time_data(data, ['y1995', 'y1996', 'y1997', 'y1998', 'y1999', 'y2000', 'y2001', 'y2002', 'y2003', 'y2004', 'y2005', 'y2006', 'y2007', 'y2008', 'y2009']) + + ## expected was prepared from PySAL example: + ### f = ps.open(ps.examples.get_path("usjoin.csv")) + ### pci = np.array([f.by_col[str(y)] for y in range(1995, 2010)]).transpose() + ### rpci = pci / (pci.mean(axis = 0)) + + expected = np.array([[ 0.87654416, 0.863147, 0.85637567, 0.84811668, 0.8446154, 0.83271652 + , 0.83786314, 0.85012593, 0.85509656, 0.86416612, 0.87119375, 0.86302631 + , 0.86148267, 0.86252252, 0.86746356], + [ 0.9188951, 0.91757931, 0.92333258, 0.92517289, 0.92552388, 0.90746978 + , 0.89830489, 0.89431991, 0.88924794, 0.89815176, 0.91832091, 0.91706054 + , 0.90139505, 0.87897455, 0.86216858], + [ 0.82591007, 0.82548596, 0.81989793, 0.81503235, 0.81731522, 0.78964559 + , 0.80584442, 0.8084998, 0.82258551, 0.82668196, 0.82373724, 0.81814804 + , 0.83675961, 0.83574199, 0.84647177], + [ 1.09088176, 1.08537689, 1.08456418, 1.08415404, 1.09898841, 1.14506948 + , 1.12151133, 1.11160697, 1.10888621, 1.11399806, 1.12168029, 1.13164797 + , 1.12958508, 1.11371818, 1.09936775], + [ 1.10731446, 1.11373944, 1.13283638, 1.14472559, 1.15910025, 1.16898201 + , 1.17212488, 1.14752303, 1.11843284, 1.11024964, 1.11943471, 1.11736468 + , 1.10863242, 1.09642516, 1.07762337], + [ 1.42269757, 1.42118434, 1.44273502, 1.43577571, 1.44400684, 1.44184737 + , 1.44782832, 1.41978227, 1.39092208, 1.4059372, 1.40788646, 1.44052766 + , 1.45241216, 1.43306098, 1.4174431 ], + [ 1.13073885, 1.13110513, 1.11074708, 1.13364636, 1.13088149, 1.10888138 + , 1.11856629, 1.13062931, 1.11944984, 1.12446239, 1.11671008, 1.10880034 + , 1.08401709, 1.06959206, 1.07875225], + [ 1.04706124, 1.04516831, 1.04253372, 1.03239987, 1.02072545, 0.99854316 + , 0.9880258, 0.99669587, 0.99327676, 1.01400905, 1.03176742, 1.040511 + , 1.01749645, 0.9936394, 0.98279746], + [ 0.98996986, 1.00143564, 0.99491, 1.00188408, 1.00455845, 0.99127006 + , 0.97925917, 0.9683482, 0.95335147, 0.93694787, 0.94308213, 0.92232874 + , 0.91284091, 0.89689833, 0.88928858], + [ 0.87418391, 0.86416601, 0.84425695, 0.8404494, 0.83903044, 0.8578708 + , 0.86036185, 0.86107306, 0.8500772, 0.86981998, 0.86837929, 0.87204141 + , 0.86633032, 0.84946077, 0.83287146], + [ 1.14196118, 1.14660262, 1.14892712, 1.14909594, 1.14436624, 1.14450183 + , 1.12349752, 1.12596664, 1.12213996, 1.1119989, 1.10257792, 1.10491258 + , 1.11059842, 1.10509795, 1.10020097], + [ 0.97282463, 0.96700147, 0.96252588, 0.9653878, 0.96057687, 0.95831051 + , 0.94480909, 0.94804195, 0.95430286, 0.94103989, 0.92122519, 0.91010201 + , 0.89280392, 0.89298243, 0.89165385], + [ 0.94325468, 0.96436902, 0.96455242, 0.95243009, 0.94117647, 0.9480927 + , 0.93539182, 0.95388718, 0.94597005, 0.96918424, 0.94781281, 0.93466815 + , 0.94281559, 0.96520315, 0.96715441], + [ 0.97478408, 0.98169225, 0.98712809, 0.98474769, 0.98559897, 0.98687073 + , 0.99237486, 0.98209969, 0.9877653, 0.97399471, 0.96910087, 0.98416665 + , 0.98423613, 0.99823861, 0.99545704], + [ 0.85570269, 0.85575915, 0.85986132, 0.85693406, 0.8538012, 0.86191535 + , 0.84981451, 0.85472102, 0.84564835, 0.83998883, 0.83478547, 0.82803648 + , 0.8198736, 0.82265395, 0.8399404 ], + [ 0.87022047, 0.85996258, 0.85961813, 0.85689572, 0.83947136, 0.82785597 + , 0.86008789, 0.86776298, 0.86720209, 0.8676334, 0.89179317, 0.94202108 + , 0.9422231, 0.93902708, 0.94479184], + [ 0.90134907, 0.90407738, 0.90403991, 0.90201769, 0.90399238, 0.90906632 + , 0.92693339, 0.93695966, 0.94242697, 0.94338265, 0.91981796, 0.91108804 + , 0.90543476, 0.91737138, 0.94793657], + [ 1.1977611, 1.18222564, 1.18439158, 1.18267865, 1.19286723, 1.20172869 + , 1.21328691, 1.22624778, 1.22397075, 1.23857042, 1.24419893, 1.23929384 + , 1.23418676, 1.23626739, 1.26754398], + [ 1.24919678, 1.25754773, 1.26991161, 1.28020651, 1.30625667, 1.34790023 + , 1.34399863, 1.32575181, 1.30795492, 1.30544841, 1.30303302, 1.32107766 + , 1.32936244, 1.33001241, 1.33288462], + [ 1.06768004, 1.03799276, 1.03637303, 1.02768449, 1.03296093, 1.05059016 + , 1.03405057, 1.02747623, 1.03162734, 0.9961416, 0.97356208, 0.94241549 + , 0.92754547, 0.92549227, 0.92138102], + [ 1.09475614, 1.11526796, 1.11654299, 1.13103948, 1.13143264, 1.13889622 + , 1.12442212, 1.13367018, 1.13982256, 1.14029944, 1.11979401, 1.10905389 + , 1.10577769, 1.11166825, 1.09985155], + [ 0.76530058, 0.76612841, 0.76542451, 0.76722683, 0.76014284, 0.74480073 + , 0.76098396, 0.76156903, 0.76651952, 0.76533288, 0.78205934, 0.76842416 + , 0.77487118, 0.77768683, 0.78801192], + [ 0.98391336, 0.98075816, 0.98295341, 0.97386015, 0.96913803, 0.97370819 + , 0.96419154, 0.97209861, 0.97441313, 0.96356162, 0.94745352, 0.93965462 + , 0.93069645, 0.94020973, 0.94358232], + [ 0.83561828, 0.82298088, 0.81738502, 0.81748588, 0.80904801, 0.80071489 + , 0.83358256, 0.83451613, 0.85175032, 0.85954307, 0.86790024, 0.87170334 + , 0.87863799, 0.87497981, 0.87888675], + [ 0.98845573, 1.02092428, 0.99665283, 0.99141823, 0.99386619, 0.98733195 + , 0.99644997, 0.99669587, 1.02559097, 1.01116651, 0.99988024, 0.97906749 + , 0.99323123, 1.00204939, 0.99602148], + [ 1.14930913, 1.15241949, 1.14300962, 1.14265542, 1.13984683, 1.08312397 + , 1.05192626, 1.04230892, 1.05577278, 1.08569751, 1.12443486, 1.08891079 + , 1.08603695, 1.05997314, 1.02160943], + [ 1.11368269, 1.1057147, 1.11893431, 1.13778669, 1.1432272, 1.18257029 + , 1.16226243, 1.16009196, 1.14467789, 1.14820235, 1.12386598, 1.12680236 + , 1.12357937, 1.1159258, 1.12570828], + [ 1.30379431, 1.30752186, 1.31206366, 1.31532267, 1.30625667, 1.31210239 + , 1.29989156, 1.29203193, 1.27183516, 1.26830786, 1.2617743, 1.28656675 + , 1.29734097, 1.29390205, 1.29345446], + [ 0.83953719, 0.82701448, 0.82006005, 0.81188876, 0.80294864, 0.78772975 + , 0.82848011, 0.8259679, 0.82435705, 0.83108634, 0.84373784, 0.83891093 + , 0.84349247, 0.85637272, 0.86539395], + [ 1.23450087, 1.2426022, 1.23537935, 1.23581293, 1.24522626, 1.2256767 + , 1.21126648, 1.19377804, 1.18355337, 1.19674434, 1.21536573, 1.23653297 + , 1.27962009, 1.27968392, 1.25907738], + [ 0.9769662, 0.97400719, 0.98035944, 0.97581531, 0.95543282, 0.96480308 + , 0.94686376, 0.93679073, 0.92540049, 0.92988835, 0.93442917, 0.92100464 + , 0.91475304, 0.90249622, 0.9021363 ], + [ 0.84986886, 0.8986851, 0.84295997, 0.87280534, 0.85659368, 0.88937573 + , 0.894401, 0.90448993, 0.95495898, 0.92698333, 0.94745352, 0.92562488 + , 0.96635366, 1.02520312, 1.0394296 ], + [ 1.01922808, 1.00258203, 1.00974428, 1.00303417, 0.99765073, 1.00759019 + , 0.99192968, 0.99747298, 0.99550759, 0.97583768, 0.9610168, 0.94779638 + , 0.93759089, 0.93353431, 0.94121705], + [ 0.86367411, 0.85558932, 0.85544346, 0.85103025, 0.84336613, 0.83434854 + , 0.85813595, 0.84667961, 0.84374558, 0.85951183, 0.87194227, 0.89455097 + , 0.88283929, 0.90349491, 0.90600675], + [ 1.00947534, 1.00411055, 1.00698819, 0.99513687, 0.99291086, 1.00581626 + , 0.98850522, 0.99291168, 0.98983209, 0.97511924, 0.96134615, 0.96382634 + , 0.95011401, 0.9434686, 0.94637765], + [ 1.05712571, 1.05459419, 1.05753012, 1.04880786, 1.05103857, 1.04800023 + , 1.03024941, 1.04200483, 1.0402554, 1.03296979, 1.02191682, 1.02476275 + , 1.02347523, 1.02517684, 1.04359571], + [ 1.07084189, 1.06669497, 1.07937623, 1.07387988, 1.0794043, 1.0531801 + , 1.07452771, 1.09383478, 1.1052447, 1.10322136, 1.09167939, 1.08772756 + , 1.08859544, 1.09177338, 1.1096083 ], + [ 0.86719222, 0.86628896, 0.86675156, 0.86425632, 0.86511809, 0.86287327 + , 0.85169796, 0.85411285, 0.84886336, 0.84517414, 0.84843858, 0.84488343 + , 0.83374329, 0.82812044, 0.82878599], + [ 0.88389211, 0.92288667, 0.90282398, 0.91229186, 0.92023286, 0.92652175 + , 0.94278865, 0.93682452, 0.98655146, 0.992237, 0.9798497, 0.93869677 + , 0.96947771, 1.00362626, 0.98102351], + [ 0.97082064, 0.95320233, 0.94534081, 0.94215593, 0.93967, 0.93092109 + , 0.92662519, 0.93412152, 0.93501274, 0.92879506, 0.92110542, 0.91035556 + , 0.90430364, 0.89994694, 0.90073864], + [ 0.95861858, 0.95774543, 0.98254811, 0.98919472, 0.98684824, 0.98882205 + , 0.97662234, 0.95601578, 0.94905385, 0.94934888, 0.97152609, 0.97163004 + , 0.9700702, 0.97158948, 0.95884908], + [ 0.83980439, 0.84726737, 0.85747, 0.85467221, 0.8556751, 0.84818516 + , 0.85265681, 0.84502402, 0.82645665, 0.81743586, 0.83550406, 0.83338919 + , 0.83511679, 0.82136617, 0.80921874], + [ 0.95118156, 0.9466212, 0.94688098, 0.9508583, 0.9512441, 0.95440787 + , 0.96364363, 0.96804412, 0.97136214, 0.97583768, 0.95571724, 0.96895368 + , 0.97001634, 0.97082733, 0.98782366], + [ 1.08910044, 1.08248968, 1.08492895, 1.08656923, 1.09454249, 1.10558188 + , 1.1214086, 1.12292577, 1.13021031, 1.13342735, 1.14686068, 1.14502975 + , 1.14474747, 1.14084037, 1.16142926], + [ 1.06336033, 1.07365823, 1.08691496, 1.09764846, 1.11669863, 1.11856702 + , 1.09764283, 1.08815849, 1.08044313, 1.09278827, 1.07003204, 1.08398066 + , 1.09831768, 1.09298232, 1.09176125], + [ 0.79772065, 0.78829196, 0.78581151, 0.77615922, 0.77035744, 0.77751194 + , 0.79902974, 0.81437881, 0.80788828, 0.79603865, 0.78966436, 0.79949807 + , 0.80172182, 0.82168155, 0.85587911], + [ 1.0052447, 1.00007696, 1.00475899, 1.00613942, 1.00639561, 1.00162979 + , 0.99860739, 1.00814981, 1.00574316, 0.99030032, 0.97682565, 0.97292596 + , 0.96519561, 0.96173403, 0.95890284], + [ 0.95808419, 0.9382568, 0.9654441, 0.95561201, 0.96987289, 0.96608031 + , 0.99727185, 1.00781194, 1.03484236, 1.05333619, 1.0983263, 1.1704974 + , 1.17025154, 1.18730553, 1.14242645]]) + + self.assertTrue(np.allclose(result, expected)) + self.assertTrue(type(result) == type(expected)) + self.assertTrue(result.shape == expected.shape) + + def test_rebin_data(self): + """Test rebin_data""" + ## sample in double the time (even case since 10 % 2 = 0): + ## (0+1)/2, (2+3)/2, (4+5)/2, (6+7)/2, (8+9)/2 + ## = 0.5, 2.5, 4.5, 6.5, 8.5 + ans_even = np.array([(i + 0.5) * np.ones(10, dtype=float) + for i in range(0, 10, 2)]).T + + self.assertTrue(np.array_equal(std.rebin_data(self.time_data, 2), ans_even)) + + ## sample in triple the time (uneven since 10 % 3 = 1): + ## (0+1+2)/3, (3+4+5)/3, (6+7+8)/3, (9)/1 + ## = 1, 4, 7, 9 + ans_odd = np.array([i * np.ones(10, dtype=float) + for i in (1, 4, 7, 9)]).T + self.assertTrue(np.array_equal(std.rebin_data(self.time_data, 3), ans_odd)) + + def test_get_prob_dist(self): + """Test get_prob_dist""" + lag_indices = np.array([1, 2, 3, 4]) + unit_indices = np.array([1, 3, 2, 4]) + answer = np.array([ + [ 0.0754717 , 0.88207547, 0.04245283, 0. , 0. ], + [ 0. , 0. , 0.09411765, 0.87058824, 0.03529412], + [ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ], + [ 0. , 0. , 0. , 0.02352941, 0.97647059] + ]) + result = std.get_prob_dist(self.transition_matrix, lag_indices, unit_indices) + + self.assertTrue(np.array_equal(result, answer)) + + def test_get_prob_stats(self): + """Test get_prob_stats""" + + probs = np.array([ + [ 0.0754717 , 0.88207547, 0.04245283, 0. , 0. ], + [ 0. , 0. , 0.09411765, 0.87058824, 0.03529412], + [ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ], + [ 0. , 0. , 0. , 0.02352941, 0.97647059] + ]) + unit_indices = np.array([1, 3, 2, 4]) + answer_up = np.array([0.04245283, 0.03529412, 0.12376238, 0.]) + answer_down = np.array([0.0754717, 0.09411765, 0.0990099, 0.02352941]) + answer_trend = np.array([-0.03301887 / 0.88207547, -0.05882353 / 0.87058824, 0.02475248 / 0.77722772, -0.02352941 / 0.97647059]) + answer_volatility = np.array([ 0.34221495, 0.33705421, 0.29226542, 0.38834223]) + + result = std.get_prob_stats(probs, unit_indices) + result_up = result[0] + result_down = result[1] + result_trend = result[2] + result_volatility = result[3] + + self.assertTrue(np.allclose(result_up, answer_up)) + self.assertTrue(np.allclose(result_down, answer_down)) + self.assertTrue(np.allclose(result_trend, answer_trend)) + self.assertTrue(np.allclose(result_volatility, answer_volatility)) diff --git a/src/pg/crankshaft.control b/src/pg/crankshaft.control index bf7cf0f..7df077c 100644 --- a/src/pg/crankshaft.control +++ b/src/pg/crankshaft.control @@ -1,5 +1,5 @@ comment = 'CartoDB Spatial Analysis extension' -default_version = '0.3.1' +default_version = '0.4.0' requires = 'plpythonu, postgis' superuser = true schema = cdb_crankshaft From 6ed99011030ad7dcb37f3dfbe5eef7754c312e80 Mon Sep 17 00:00:00 2001 From: Carla Iriberri Date: Tue, 30 Aug 2016 12:51:57 +0200 Subject: [PATCH 37/62] Update NEWS.md for version 0.4.0 --- NEWS.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/NEWS.md b/NEWS.md index c9187f7..791cc50 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +0.4.0 (2016-08-30) +------------------ +* Add CDB_Contour +* Add CDB_PIA +* Add CDB_Densify +* Add CDB_TINmap + 0.3.1 (2016-08-18) ------------------ * Fix Voronoi projection issue From 22cdc53c49a290febdcc32aa574c2b005b2464d3 Mon Sep 17 00:00:00 2001 From: Carla Date: Tue, 30 Aug 2016 13:38:09 +0200 Subject: [PATCH 38/62] Fix typos --- doc/19_contour.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/19_contour.md b/doc/19_contour.md index d6629e4..2e5b440 100644 --- a/doc/19_contour.md +++ b/doc/19_contour.md @@ -1,6 +1,6 @@ ## Contour maps -Function to generate a contour map from an scatter dataset of points, using one of three methos: +Function to generate a contour map from an scatter dataset of points, using one of these three methods: * [Nearest neighbor](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) * [Barycentric](https://en.wikipedia.org/wiki/Barycentric_coordinate_system) @@ -18,7 +18,7 @@ Function to generate a contour map from an scatter dataset of points, using one | method | integer | 0:nearest neighbor, 1: barycentric, 2: IDW| | classmethod | integer | 0:equals, 1: heads&tails, 2:jenks, 3:quantiles | | steps | integer | Number of steps in the classification| -| max_time | integer | Max time in millisecons for processing time +| max_time | integer | Max time in milliseconds for processing time ### Returns Returns a table object From 2a665a60db16f7da675c62e2cc1fa564930daa44 Mon Sep 17 00:00:00 2001 From: Carla Date: Tue, 30 Aug 2016 13:44:58 +0200 Subject: [PATCH 39/62] Fix typo --- doc/15_tinmap.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/15_tinmap.md b/doc/15_tinmap.md index 7bbd165..240acbe 100644 --- a/doc/15_tinmap.md +++ b/doc/15_tinmap.md @@ -2,7 +2,7 @@ Generates a fake contour map, in the form of a TIN map, from a set of scattered points.Depends on **CDB_Densify**. -Its iterative nature let's the user smooth the final result as much as desired, but with a exponential time cost increase +Its iterative nature lets the user smooth the final result as much as desired, but with a exponential time cost increase. ### CDB_TINmap(geomin geometry[], colin numeric[], iterations integer) From 2261d11de055f1dfacb1ff9da96a579e2b6c4e53 Mon Sep 17 00:00:00 2001 From: Carla Date: Tue, 30 Aug 2016 14:25:24 +0200 Subject: [PATCH 40/62] Reordering NEWS.md update step so that if somebody needs to go to a specific tag, the NEWS.md file is updated according to the version and not afterwards. --- RELEASE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index 96f5fb4..3ed577a 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -20,6 +20,7 @@ shall be performed by the designated *Release Manager*. NOTE: you can rely on this thanks to the compatibility checks. TODO: automate this step [#94](https://github.com/CartoDB/crankshaft/issues/94) +2. Update the [NEWS.md](https://github.com/CartoDB/crankshaft/blob/master/NEWS.md) file 1. Commit and push the generated files. 1. Tag the release: @@ -29,7 +30,6 @@ shall be performed by the designated *Release Manager*. ``` 1. Deploy and test in staging 1. Deploy and test in production -2. Update the [NEWS.md](https://github.com/CartoDB/crankshaft/blob/master/NEWS.md) file 1. Merge back into develop From bbdd4de6ee305308d2d131d8bf5dc5e8f475a068 Mon Sep 17 00:00:00 2001 From: abelvm Date: Fri, 2 Sep 2016 12:23:47 +0200 Subject: [PATCH 41/62] smart guessing optional --- doc/19_contour.md | 2 +- src/pg/sql/19_contour.sql | 23 ++++++++++------------- src/pg/test/sql/19_contour_test.sql | 2 +- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/doc/19_contour.md b/doc/19_contour.md index d6629e4..1463918 100644 --- a/doc/19_contour.md +++ b/doc/19_contour.md @@ -18,7 +18,7 @@ Function to generate a contour map from an scatter dataset of points, using one | method | integer | 0:nearest neighbor, 1: barycentric, 2: IDW| | classmethod | integer | 0:equals, 1: heads&tails, 2:jenks, 3:quantiles | | steps | integer | Number of steps in the classification| -| max_time | integer | Max time in millisecons for processing time +| resolution | integer | if <= 0: max processing time in seconds (smart resolution) , if >0: resolution in meters ### Returns Returns a table object diff --git a/src/pg/sql/19_contour.sql b/src/pg/sql/19_contour.sql index 759b151..2dd9f62 100644 --- a/src/pg/sql/19_contour.sql +++ b/src/pg/sql/19_contour.sql @@ -5,7 +5,7 @@ CREATE OR REPLACE FUNCTION CDB_Contour( IN intmethod integer, IN classmethod integer, IN steps integer, - IN max_time integer DEFAULT 60000 + IN resolution integer DEFAULT -90 ) RETURNS TABLE( the_geom geometry, @@ -17,18 +17,11 @@ RETURNS TABLE( DECLARE cell_count integer; tin geometry[]; + max_time integer; BEGIN - -- calc the cell size in web mercator units - -- WITH center as ( - -- SELECT ST_centroid(ST_Collect(geomin)) as c - -- ) - -- SELECT - -- round(resolution / cos(ST_y(c) * pi()/180)) - -- INTO cell - -- FROM center; - -- raise notice 'Resol: %', cell; -- calc the optimal number of cells for the current dataset + max_time := -1 * resolution; SELECT CASE intmethod WHEN 0 THEN round(3.7745903782 * max_time - 9.4399210051 * array_length(geomin,1) - 1350.8778213073) @@ -70,9 +63,13 @@ BEGIN ), resolution as( SELECT - round(|/ ( - ST_area(geom) / cell_count - )) as cell + CASE WHEN resolution <= 0 THEN + round(|/ ( + ST_area(geom) / abs(cell_count) + )) + ELSE + resolution + END AS cell FROM envelope3857 ), grid as( diff --git a/src/pg/test/sql/19_contour_test.sql b/src/pg/test/sql/19_contour_test.sql index 70e5cf9..9225612 100644 --- a/src/pg/test/sql/19_contour_test.sql +++ b/src/pg/test/sql/19_contour_test.sql @@ -12,6 +12,6 @@ SELECT foo.* FROM a, - cdb_crankshaft.CDB_contour(a.g, a.vals, 0.0, 1, 3, 5, 60) foo + cdb_crankshaft.CDB_contour(a.g, a.vals, 0.0, 1, 3, 5, -60) foo ) SELECT bin, avg_value from b order by bin; From 90f36b40585835c1b60ad4491c52a2af8a0a9af9 Mon Sep 17 00:00:00 2001 From: abelvm Date: Fri, 2 Sep 2016 12:35:54 +0200 Subject: [PATCH 42/62] drop --- src/pg/sql/19_contour.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pg/sql/19_contour.sql b/src/pg/sql/19_contour.sql index 2dd9f62..784779e 100644 --- a/src/pg/sql/19_contour.sql +++ b/src/pg/sql/19_contour.sql @@ -1,3 +1,5 @@ +DROP FUNCTION cdb_contour(geometry[],numeric[],numeric,integer,integer,integer,integer); + CREATE OR REPLACE FUNCTION CDB_Contour( IN geomin geometry[], IN colin numeric[], From 036a33aceddb3bfa041f1cb405d72b467653d3e1 Mon Sep 17 00:00:00 2001 From: abelvm Date: Fri, 2 Sep 2016 12:58:13 +0200 Subject: [PATCH 43/62] drop --- src/pg/sql/19_contour.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/sql/19_contour.sql b/src/pg/sql/19_contour.sql index 784779e..ae48eb5 100644 --- a/src/pg/sql/19_contour.sql +++ b/src/pg/sql/19_contour.sql @@ -1,4 +1,4 @@ -DROP FUNCTION cdb_contour(geometry[],numeric[],numeric,integer,integer,integer,integer); +DROP FUNCTION IF EXISTS cdb_contour(geometry[],numeric[],numeric,integer,integer,integer,integer); CREATE OR REPLACE FUNCTION CDB_Contour( IN geomin geometry[], From b8ce37eb60c5da80d966330982d2dd952562464d Mon Sep 17 00:00:00 2001 From: abelvm Date: Fri, 2 Sep 2016 14:37:52 +0200 Subject: [PATCH 44/62] back to the original signature --- src/pg/sql/19_contour.sql | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/pg/sql/19_contour.sql b/src/pg/sql/19_contour.sql index ae48eb5..5c2cfc2 100644 --- a/src/pg/sql/19_contour.sql +++ b/src/pg/sql/19_contour.sql @@ -1,5 +1,3 @@ -DROP FUNCTION IF EXISTS cdb_contour(geometry[],numeric[],numeric,integer,integer,integer,integer); - CREATE OR REPLACE FUNCTION CDB_Contour( IN geomin geometry[], IN colin numeric[], @@ -7,7 +5,7 @@ CREATE OR REPLACE FUNCTION CDB_Contour( IN intmethod integer, IN classmethod integer, IN steps integer, - IN resolution integer DEFAULT -90 + IN max_time integer DEFAULT -90 ) RETURNS TABLE( the_geom geometry, @@ -19,11 +17,14 @@ RETURNS TABLE( DECLARE cell_count integer; tin geometry[]; - max_time integer; + resolution integer; BEGIN - -- calc the optimal number of cells for the current dataset + -- nasty trick to override issue #121 + resolution := max_time; max_time := -1 * resolution; + + -- calc the optimal number of cells for the current dataset SELECT CASE intmethod WHEN 0 THEN round(3.7745903782 * max_time - 9.4399210051 * array_length(geomin,1) - 1350.8778213073) From 139e86d414ce7b227ecb806862c0f45c1fd6b6bb Mon Sep 17 00:00:00 2001 From: abelvm Date: Fri, 2 Sep 2016 14:43:48 +0200 Subject: [PATCH 45/62] back to the original signature --- src/pg/sql/19_contour.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/sql/19_contour.sql b/src/pg/sql/19_contour.sql index 5c2cfc2..18430c0 100644 --- a/src/pg/sql/19_contour.sql +++ b/src/pg/sql/19_contour.sql @@ -5,7 +5,7 @@ CREATE OR REPLACE FUNCTION CDB_Contour( IN intmethod integer, IN classmethod integer, IN steps integer, - IN max_time integer DEFAULT -90 + IN max_time integer DEFAULT 60000 ) RETURNS TABLE( the_geom geometry, From a530de80f106ab1ff1eef56272c8c1005aec9530 Mon Sep 17 00:00:00 2001 From: abelvm Date: Fri, 2 Sep 2016 14:57:21 +0200 Subject: [PATCH 46/62] back to the original signature --- doc/19_contour.md | 2 +- src/pg/sql/19_contour.sql | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/19_contour.md b/doc/19_contour.md index b29ed89..fdee52e 100644 --- a/doc/19_contour.md +++ b/doc/19_contour.md @@ -18,7 +18,7 @@ Function to generate a contour map from an scatter dataset of points, using one | method | integer | 0:nearest neighbor, 1: barycentric, 2: IDW| | classmethod | integer | 0:equals, 1: heads&tails, 2:jenks, 3:quantiles | | steps | integer | Number of steps in the classification| -| resolution | integer | if <= 0: max processing time in seconds (smart resolution) , if >0: resolution in meters +| max_time | integer | if <= 0: max processing time in seconds (smart resolution) , if >0: resolution in meters ### Returns Returns a table object diff --git a/src/pg/sql/19_contour.sql b/src/pg/sql/19_contour.sql index 18430c0..78f0cfd 100644 --- a/src/pg/sql/19_contour.sql +++ b/src/pg/sql/19_contour.sql @@ -21,6 +21,9 @@ DECLARE BEGIN -- nasty trick to override issue #121 + IF max_time = 0 THEN + max_time = -90; + END IF; resolution := max_time; max_time := -1 * resolution; From a6440f2ef7d98c7f2477806b9c8ce5efb9b165b8 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 2 Sep 2016 16:39:35 -0400 Subject: [PATCH 47/62] reports error string --- .../crankshaft/crankshaft/clustering/moran.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 3282f5f..a524892 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -38,10 +38,9 @@ def moran(subquery, attr_name, if len(result) == 0: return pu.empty_zipped_array(2) plpy.notice('** Query returned with %d rows' % len(result)) - except plpy.SPIError: - plpy.error('Error: areas of interest query failed, check input parameters') + except plpy.SPIError, e: + plpy.error('Query failed: %s' % e) plpy.notice('** Query failed: "%s"' % query) - plpy.notice('** Error: %s' % plpy.SPIError) return pu.empty_zipped_array(2) ## collect attributes @@ -79,8 +78,8 @@ def moran_local(subquery, attr, # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(5) - except plpy.SPIError: - plpy.error('Error: areas of interest query failed, check input parameters') + except plpy.SPIError, e: + plpy.error('Query failed: %s' % e) plpy.notice('** Query failed: "%s"' % query) return pu.empty_zipped_array(5) @@ -119,10 +118,9 @@ def moran_rate(subquery, numerator, denominator, if len(result) == 0: return pu.empty_zipped_array(2) plpy.notice('** Query returned with %d rows' % len(result)) - except plpy.SPIError: - plpy.error('Error: areas of interest query failed, check input parameters') + except plpy.SPIError, e: + plpy.error('Query failed: %s' % e) plpy.notice('** Query failed: "%s"' % query) - plpy.notice('** Error: %s' % plpy.SPIError) return pu.empty_zipped_array(2) ## collect attributes @@ -160,10 +158,9 @@ def moran_local_rate(subquery, numerator, denominator, # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(5) - except plpy.SPIError: - plpy.error('Error: areas of interest query failed, check input parameters') + except plpy.SPIError, e: + plpy.error('Query failed: %s' % e) plpy.notice('** Query failed: "%s"' % query) - plpy.notice('** Error: %s' % plpy.SPIError) return pu.empty_zipped_array(5) ## collect attributes From feab6f177e44eea131f421345505431ee756e414 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 2 Sep 2016 17:52:41 -0400 Subject: [PATCH 48/62] clearer error message --- src/py/crankshaft/crankshaft/clustering/moran.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index a524892..8c5d909 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -39,7 +39,7 @@ def moran(subquery, attr_name, return pu.empty_zipped_array(2) plpy.notice('** Query returned with %d rows' % len(result)) except plpy.SPIError, e: - plpy.error('Query failed: %s' % e) + plpy.error('Analysis failed: %s' % e) plpy.notice('** Query failed: "%s"' % query) return pu.empty_zipped_array(2) @@ -79,7 +79,7 @@ def moran_local(subquery, attr, if len(result) == 0: return pu.empty_zipped_array(5) except plpy.SPIError, e: - plpy.error('Query failed: %s' % e) + plpy.error('Analysis failed: %s' % e) plpy.notice('** Query failed: "%s"' % query) return pu.empty_zipped_array(5) @@ -119,7 +119,7 @@ def moran_rate(subquery, numerator, denominator, return pu.empty_zipped_array(2) plpy.notice('** Query returned with %d rows' % len(result)) except plpy.SPIError, e: - plpy.error('Query failed: %s' % e) + plpy.error('Analysis failed: %s' % e) plpy.notice('** Query failed: "%s"' % query) return pu.empty_zipped_array(2) @@ -159,7 +159,7 @@ def moran_local_rate(subquery, numerator, denominator, if len(result) == 0: return pu.empty_zipped_array(5) except plpy.SPIError, e: - plpy.error('Query failed: %s' % e) + plpy.error('Analysis failed: %s' % e) plpy.notice('** Query failed: "%s"' % query) return pu.empty_zipped_array(5) From 6d6d7ef2ba56af7fa7cff77c4c25f2f59602ab9b Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 2 Sep 2016 17:58:19 -0400 Subject: [PATCH 49/62] removing notices --- src/py/crankshaft/crankshaft/clustering/moran.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 8c5d909..7a97233 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -30,17 +30,13 @@ def moran(subquery, attr_name, query = pu.construct_neighbor_query(w_type, qvals) - plpy.notice('** Query: %s' % query) - try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(2) - plpy.notice('** Query returned with %d rows' % len(result)) except plpy.SPIError, e: plpy.error('Analysis failed: %s' % e) - plpy.notice('** Query failed: "%s"' % query) return pu.empty_zipped_array(2) ## collect attributes @@ -80,7 +76,6 @@ def moran_local(subquery, attr, return pu.empty_zipped_array(5) except plpy.SPIError, e: plpy.error('Analysis failed: %s' % e) - plpy.notice('** Query failed: "%s"' % query) return pu.empty_zipped_array(5) attr_vals = pu.get_attributes(result) @@ -110,17 +105,13 @@ def moran_rate(subquery, numerator, denominator, query = pu.construct_neighbor_query(w_type, qvals) - plpy.notice('** Query: %s' % query) - try: result = plpy.execute(query) # if there are no neighbors, exit if len(result) == 0: return pu.empty_zipped_array(2) - plpy.notice('** Query returned with %d rows' % len(result)) except plpy.SPIError, e: plpy.error('Analysis failed: %s' % e) - plpy.notice('** Query failed: "%s"' % query) return pu.empty_zipped_array(2) ## collect attributes @@ -160,7 +151,6 @@ def moran_local_rate(subquery, numerator, denominator, return pu.empty_zipped_array(5) except plpy.SPIError, e: plpy.error('Analysis failed: %s' % e) - plpy.notice('** Query failed: "%s"' % query) return pu.empty_zipped_array(5) ## collect attributes @@ -183,7 +173,6 @@ def moran_local_bv(subquery, attr1, attr2, """ Moran's I (local) Bivariate (untested) """ - plpy.notice('** Constructing query') qvals = OrderedDict([("id_col", id_col), ("attr1", attr1), @@ -202,7 +191,6 @@ def moran_local_bv(subquery, attr1, attr2, except plpy.SPIError: plpy.error("Error: areas of interest query failed, " \ "check input parameters") - plpy.notice('** Query failed: "%s"' % query) return pu.empty_zipped_array(4) ## collect attributes @@ -216,13 +204,9 @@ def moran_local_bv(subquery, attr1, attr2, lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight, permutations=permutations) - plpy.notice("len of Is: %d" % len(lisa.Is)) - # find clustering of significance lisa_sig = quad_position(lisa.q) - plpy.notice('** Finished calculations') - return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order) # Low level functions ---------------------------------------- From c229a8549101a127fa0abdcdde6c53833874d00a Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 2 Sep 2016 18:02:56 -0400 Subject: [PATCH 50/62] adding better reporting to markov --- src/py/crankshaft/crankshaft/space_time_dynamics/markov.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py index bbf524d..ae788d7 100644 --- a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py +++ b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py @@ -56,9 +56,9 @@ def spatial_markov_trend(subquery, time_cols, num_classes=7, ) if len(query_result) == 0: return zip([None], [None], [None], [None], [None]) - except plpy.SPIError, err: + except plpy.SPIError, e: plpy.debug('Query failed with exception %s: %s' % (err, pu.construct_neighbor_query(w_type, qvals))) - plpy.error('Query failed, check the input parameters') + plpy.error('Analysis failed: %s' % e) return zip([None], [None], [None], [None], [None]) ## build weight From 2ff20e596ee8f8ec0dbc30b53a25777262b27694 Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 20 Sep 2016 12:57:41 +0200 Subject: [PATCH 51/62] add NN(s) support --- doc/08_interpolation.md | 6 +++--- src/pg/sql/08_interpolation.sql | 17 ++++++++++++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/doc/08_interpolation.md b/doc/08_interpolation.md index 22fc1bc..87b5124 100644 --- a/doc/08_interpolation.md +++ b/doc/08_interpolation.md @@ -2,7 +2,7 @@ Function to interpolate a numeric attribute of a point in a scatter dataset of points, using one of three methos: -* [Nearest neighbor](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) +* [Nearest neighbor(s)](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) * [Barycentric](https://en.wikipedia.org/wiki/Barycentric_coordinate_system) * [IDW](https://en.wikipedia.org/wiki/Inverse_distance_weighting) @@ -15,7 +15,7 @@ Function to interpolate a numeric attribute of a point in a scatter dataset of p | query | text | query that returns at least `the_geom` and a numeric value as `attrib` | | point | geometry | The target point to calc the value | | method | integer | 0:nearest neighbor, 1: barycentric, 2: IDW| -| p1 | integer | IDW: limit the number of neighbors, 0->no limit| +| p1 | integer | limit the number of neighbors, IDW: 0->no limit, NN: 0-> closest one| | p2 | integer | IDW: order of distance decay, 0-> order 1| ### CDB_SpatialInterpolation (geom geometry[], values numeric[], point geometry, method integer DEFAULT 1, p1 integer DEFAULT 0, ps integer DEFAULT 0) @@ -28,7 +28,7 @@ Function to interpolate a numeric attribute of a point in a scatter dataset of p | values | numeric[] | Array of points' values for the param under study| | point | geometry | The target point to calc the value | | method | integer | 0:nearest neighbor, 1: barycentric, 2: IDW| -| p1 | integer | IDW: limit the number of neighbors, 0->no limit| +| p1 | integer | limit the number of neighbors, IDW: 0->no limit, NN: 0-> closest one| | p2 | integer | IDW: order of distance decay, 0-> order 1| ### Returns diff --git a/src/pg/sql/08_interpolation.sql b/src/pg/sql/08_interpolation.sql index 0937f09..608583d 100644 --- a/src/pg/sql/08_interpolation.sql +++ b/src/pg/sql/08_interpolation.sql @@ -1,4 +1,4 @@ --- 0: nearest neighbor +-- 0: nearest neighbor (s) -- 1: barymetric -- 2: IDW @@ -51,11 +51,17 @@ DECLARE output numeric; BEGIN output := -999.999; - -- nearest + + -- nearest neighbors + -- p1: limit the number of neighbors, 0-> closest one IF method = 0 THEN + IF p1 = 0 THEN + p1 := 1; + END IF; + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v) - SELECT a.v INTO output FROM a ORDER BY point<->a.g LIMIT 1; + SELECT avg(a.v) INTO output FROM a ORDER BY point<->a.g LIMIT p1::integer; RETURN output; -- barymetric @@ -121,6 +127,11 @@ BEGIN SELECT sum(b.f)/sum(b.k) INTO output FROM b; RETURN output; + -- krigin + ELSIF method = 3 THEN + + + END IF; RETURN -777.777; From 4902f6a9d487f9f9754c54353fec83741e7a8f4a Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 20 Sep 2016 13:37:50 +0200 Subject: [PATCH 52/62] add NN(s) support --- src/pg/sql/08_interpolation.sql | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/pg/sql/08_interpolation.sql b/src/pg/sql/08_interpolation.sql index 608583d..d13d8e7 100644 --- a/src/pg/sql/08_interpolation.sql +++ b/src/pg/sql/08_interpolation.sql @@ -1,6 +1,8 @@ --- 0: nearest neighbor (s) +-- 0: nearest neighbor(s) -- 1: barymetric -- 2: IDW +-- 3: krigin + CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( IN query text, @@ -60,8 +62,9 @@ BEGIN p1 := 1; END IF; - WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v) - SELECT avg(a.v) INTO output FROM a ORDER BY point<->a.g LIMIT p1::integer; + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v), + b as (SELECT a.v as v FROM a ORDER BY point<->a.g LIMIT p1::integer) + SELECT avg(b.v) INTO output FROM b; RETURN output; -- barymetric From 37e4fc7cad6510a80281832ee25513387a1f85aa Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 20 Sep 2016 13:57:58 +0200 Subject: [PATCH 53/62] add NN(s) support --- src/pg/test/expected/08_interpolation_test.out | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pg/test/expected/08_interpolation_test.out b/src/pg/test/expected/08_interpolation_test.out index 1e4502a..e2c9287 100644 --- a/src/pg/test/expected/08_interpolation_test.out +++ b/src/pg/test/expected/08_interpolation_test.out @@ -1,7 +1,7 @@ SET client_min_messages TO WARNING; \set ECHO none - nn | nni | idw ------+--------------------------+----------------- - 200 | 238.41059602632179224595 | 341.46260750526 + nn | nni | idw +----------------------+--------------------------+----------------- + 200.0000000000000000 | 238.41059602632179224595 | 341.46260750526 (1 row) From 754e66c02cc4159be161098cc8389da7d48179f6 Mon Sep 17 00:00:00 2001 From: abelvm Date: Wed, 21 Sep 2016 14:24:35 +0200 Subject: [PATCH 54/62] leftovers and docs --- doc/08_interpolation.md | 3 +++ src/pg/sql/08_interpolation.sql | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/doc/08_interpolation.md b/doc/08_interpolation.md index 87b5124..c17269e 100644 --- a/doc/08_interpolation.md +++ b/doc/08_interpolation.md @@ -37,6 +37,9 @@ Function to interpolate a numeric attribute of a point in a scatter dataset of p |-------------|------|-------------| | value | numeric | Interpolated value at the given point, `-888.888` if the given point is out of the boundaries of the source points set | +Default values: +* -888.888: when using Barycentric, the target point is out of the realm of the input points +* -777.777: asking for a method not available #### Example Usage diff --git a/src/pg/sql/08_interpolation.sql b/src/pg/sql/08_interpolation.sql index d13d8e7..cb0a20a 100644 --- a/src/pg/sql/08_interpolation.sql +++ b/src/pg/sql/08_interpolation.sql @@ -1,7 +1,7 @@ -- 0: nearest neighbor(s) -- 1: barymetric -- 2: IDW --- 3: krigin +-- 3: krigin ---> TO DO CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( @@ -52,7 +52,7 @@ DECLARE vc numeric; output numeric; BEGIN - output := -999.999; + -- output := -999.999; -- nearest neighbors -- p1: limit the number of neighbors, 0-> closest one @@ -133,7 +133,7 @@ BEGIN -- krigin ELSIF method = 3 THEN - + -- TO DO END IF; From 84896e0634e5500cf6f4a5b4c7816fa0475e3a30 Mon Sep 17 00:00:00 2001 From: Carla Iriberri Date: Wed, 21 Sep 2016 17:47:55 +0200 Subject: [PATCH 55/62] Release 0.4.1 and update NEWS.md --- NEWS.md | 6 + release/crankshaft--0.4.0--0.4.1.sql | 1965 +++++++++++++++++ release/crankshaft--0.4.1.sql | 1965 +++++++++++++++++ release/crankshaft.control | 2 +- .../0.4.1/crankshaft/crankshaft/__init__.py | 5 + .../crankshaft/clustering/__init__.py | 3 + .../crankshaft/clustering/kmeans.py | 18 + .../crankshaft/crankshaft/clustering/moran.py | 243 ++ .../crankshaft/pysal_utils/__init__.py | 2 + .../crankshaft/pysal_utils/pysal_utils.py | 188 ++ .../crankshaft/crankshaft/random_seeds.py | 11 + .../crankshaft/segmentation/__init__.py | 1 + .../crankshaft/segmentation/segmentation.py | 176 ++ .../space_time_dynamics/__init__.py | 2 + .../crankshaft/space_time_dynamics/markov.py | 189 ++ release/python/0.4.1/crankshaft/setup.py | 49 + release/python/0.4.1/crankshaft/setup.py-r | 49 + .../crankshaft/test/fixtures/kmeans.json | 1 + .../crankshaft/test/fixtures/markov.json | 1 + .../0.4.1/crankshaft/test/fixtures/moran.json | 52 + .../crankshaft/test/fixtures/neighbors.json | 54 + .../test/fixtures/neighbors_markov.json | 1 + .../python/0.4.1/crankshaft/test/helper.py | 13 + .../python/0.4.1/crankshaft/test/mock_plpy.py | 52 + .../crankshaft/test/test_cluster_kmeans.py | 38 + .../crankshaft/test/test_clustering_moran.py | 88 + .../0.4.1/crankshaft/test/test_pysal_utils.py | 142 ++ .../crankshaft/test/test_segmentation.py | 64 + .../test/test_space_time_dynamics.py | 324 +++ src/pg/crankshaft.control | 2 +- 30 files changed, 5704 insertions(+), 2 deletions(-) create mode 100644 release/crankshaft--0.4.0--0.4.1.sql create mode 100644 release/crankshaft--0.4.1.sql create mode 100644 release/python/0.4.1/crankshaft/crankshaft/__init__.py create mode 100644 release/python/0.4.1/crankshaft/crankshaft/clustering/__init__.py create mode 100644 release/python/0.4.1/crankshaft/crankshaft/clustering/kmeans.py create mode 100644 release/python/0.4.1/crankshaft/crankshaft/clustering/moran.py create mode 100644 release/python/0.4.1/crankshaft/crankshaft/pysal_utils/__init__.py create mode 100644 release/python/0.4.1/crankshaft/crankshaft/pysal_utils/pysal_utils.py create mode 100644 release/python/0.4.1/crankshaft/crankshaft/random_seeds.py create mode 100644 release/python/0.4.1/crankshaft/crankshaft/segmentation/__init__.py create mode 100644 release/python/0.4.1/crankshaft/crankshaft/segmentation/segmentation.py create mode 100644 release/python/0.4.1/crankshaft/crankshaft/space_time_dynamics/__init__.py create mode 100644 release/python/0.4.1/crankshaft/crankshaft/space_time_dynamics/markov.py create mode 100644 release/python/0.4.1/crankshaft/setup.py create mode 100644 release/python/0.4.1/crankshaft/setup.py-r create mode 100644 release/python/0.4.1/crankshaft/test/fixtures/kmeans.json create mode 100644 release/python/0.4.1/crankshaft/test/fixtures/markov.json create mode 100644 release/python/0.4.1/crankshaft/test/fixtures/moran.json create mode 100644 release/python/0.4.1/crankshaft/test/fixtures/neighbors.json create mode 100644 release/python/0.4.1/crankshaft/test/fixtures/neighbors_markov.json create mode 100644 release/python/0.4.1/crankshaft/test/helper.py create mode 100644 release/python/0.4.1/crankshaft/test/mock_plpy.py create mode 100644 release/python/0.4.1/crankshaft/test/test_cluster_kmeans.py create mode 100644 release/python/0.4.1/crankshaft/test/test_clustering_moran.py create mode 100644 release/python/0.4.1/crankshaft/test/test_pysal_utils.py create mode 100644 release/python/0.4.1/crankshaft/test/test_segmentation.py create mode 100644 release/python/0.4.1/crankshaft/test/test_space_time_dynamics.py diff --git a/NEWS.md b/NEWS.md index 791cc50..1769902 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +0.4.1 (2016-09-21) +------------------ +* Let the user set the resolution in CDB_Contour function +* Add Nearest Neighbors method to CDB_SpatialInterpolation +* Improve error reporting for moran and markov functions + 0.4.0 (2016-08-30) ------------------ * Add CDB_Contour diff --git a/release/crankshaft--0.4.0--0.4.1.sql b/release/crankshaft--0.4.0--0.4.1.sql new file mode 100644 index 0000000..eac53fe --- /dev/null +++ b/release/crankshaft--0.4.0--0.4.1.sql @@ -0,0 +1,1965 @@ +--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit +-- Version number of the extension release +CREATE OR REPLACE FUNCTION cdb_crankshaft_version() +RETURNS text AS $$ + SELECT '0.4.1'::text; +$$ language 'sql' STABLE STRICT; + +-- Internal identifier of the installed extension instence +-- e.g. 'dev' for current development version +CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version() +RETURNS text AS $$ + SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL; +$$ language 'sql' STABLE STRICT; +-- Internal function. +-- Set the seeds of the RNGs (Random Number Generators) +-- used internally. +CREATE OR REPLACE FUNCTION +_cdb_random_seeds (seed_value INTEGER) RETURNS VOID +AS $$ + from crankshaft import random_seeds + random_seeds.set_random_seeds(seed_value) +$$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION + CDB_PyAggS(current_state Numeric[], current_row Numeric[]) + returns NUMERIC[] as $$ + BEGIN + if array_upper(current_state,1) is null then + RAISE NOTICE 'setting state %',array_upper(current_row,1); + current_state[1] = array_upper(current_row,1); + end if; + return array_cat(current_state,current_row) ; + END + $$ LANGUAGE plpgsql; + +-- Create aggregate if it did not exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'cdb_crankshaft' + AND p.proname = 'cdb_pyagg' + AND p.proisagg) + THEN + CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) ( + SFUNC = CDB_PyAggS, + STYPE = Numeric[], + INITCOND = "{}" + ); + END IF; +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION + CDB_CreateAndPredictSegment( + target NUMERIC[], + features NUMERIC[], + target_features NUMERIC[], + target_ids NUMERIC[], + n_estimators INTEGER DEFAULT 1200, + max_depth INTEGER DEFAULT 3, + subsample DOUBLE PRECISION DEFAULT 0.5, + learning_rate DOUBLE PRECISION DEFAULT 0.01, + min_samples_leaf INTEGER DEFAULT 1) +RETURNS TABLE(cartodb_id NUMERIC, prediction NUMERIC, accuracy NUMERIC) +AS $$ + import numpy as np + import plpy + + from crankshaft.segmentation import create_and_predict_segment_agg + model_params = {'n_estimators': n_estimators, + 'max_depth': max_depth, + 'subsample': subsample, + 'learning_rate': learning_rate, + 'min_samples_leaf': min_samples_leaf} + + def unpack2D(data): + dimension = data.pop(0) + a = np.array(data, dtype=float) + return a.reshape(len(a)/dimension, dimension) + + return create_and_predict_segment_agg(np.array(target, dtype=float), + unpack2D(features), + unpack2D(target_features), + target_ids, + model_params) + +$$ LANGUAGE plpythonu; + +CREATE OR REPLACE FUNCTION + CDB_CreateAndPredictSegment ( + query TEXT, + variable_name TEXT, + target_table TEXT, + n_estimators INTEGER DEFAULT 1200, + max_depth INTEGER DEFAULT 3, + subsample DOUBLE PRECISION DEFAULT 0.5, + learning_rate DOUBLE PRECISION DEFAULT 0.01, + min_samples_leaf INTEGER DEFAULT 1) +RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC) +AS $$ + from crankshaft.segmentation import create_and_predict_segment + model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf} + return create_and_predict_segment(query,variable_name,target_table, model_params) +$$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN target_query text, + IN weight_column text, + IN source_query text, + IN pop_column text, + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_id bigint[]; + t_geom geometry[]; + t_weight numeric[]; + s_id bigint[]; + s_geom geometry[]; + s_pop numeric[]; +BEGIN + EXECUTE 'WITH foo as('+target_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || weight_column || ') FROM foo' INTO t_id, t_geom, t_weight; + EXECUTE 'WITH foo as('+source_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || pop_column || ') FROM foo' INTO s_id, s_geom, s_pop; + RETURN QUERY + SELECT g.* FROM t, s, CDB_Gravity(t_id, t_geom, t_weight, s_id, s_geom, s_pop, target, radius, minval) g; +END; +$$ language plpgsql; + +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN t_id bigint[], + IN t_geom geometry[], + IN t_weight numeric[], + IN s_id bigint[], + IN s_geom geometry[], + IN s_pop numeric[], + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_type text; + s_type text; + t_center geometry[]; + s_center geometry[]; +BEGIN + t_type := GeometryType(t_geom[1]); + s_type := GeometryType(s_geom[1]); + IF t_type = 'POINT' THEN + t_center := t_geom; + ELSE + WITH tmp as (SELECT unnest(t_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO t_center FROM tmp; + END IF; + IF s_type = 'POINT' THEN + s_center := s_geom; + ELSE + WITH tmp as (SELECT unnest(s_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO s_center FROM tmp; + END IF; + RETURN QUERY + with target0 as( + SELECT unnest(t_center) as tc, unnest(t_weight) as tw, unnest(t_id) as td + ), + source0 as( + SELECT unnest(s_center) as sc, unnest(s_id) as sd, unnest (s_geom) as sg, unnest(s_pop) as sp + ), + prev0 as( + SELECT + source0.sg, + source0.sd as sourc_id, + coalesce(source0.sp,0) as sp, + target.td as targ_id, + coalesce(target.tw,0) as tw, + GREATEST(1.0,ST_Distance(geography(target.tc), geography(source0.sc)))::numeric as distance + FROM source0 + CROSS JOIN LATERAL + ( + SELECT + * + FROM target0 + WHERE tw > minval + AND ST_DWithin(geography(source0.sc), geography(tc), radius) + ) AS target + ), + deno as( + SELECT + sourc_id, + sum(tw/distance) as h_deno + FROM + prev0 + GROUP BY sourc_id + ) + SELECT + p.sg as the_geom, + p.sourc_id as source_id, + p.targ_id as target_id, + case when p.distance > 1 then p.distance else 0.0 end as dist, + 100*(p.tw/p.distance)/d.h_deno as h, + p.sp*(p.tw/p.distance)/d.h_deno as hpop + FROM + prev0 p, + deno d + WHERE + p.targ_id = target AND + p.sourc_id = d.sourc_id; +END; +$$ language plpgsql; +-- 0: nearest neighbor(s) +-- 1: barymetric +-- 2: IDW +-- 3: krigin ---> TO DO + + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN query text, + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + output numeric; +BEGIN + EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs; + SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a; + + RETURN output; +END; +$$ +language plpgsql IMMUTABLE; + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN geomin geometry[], + IN colin numeric[], + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + gs2 geometry[]; + vs2 numeric[]; + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + -- output := -999.999; + + -- nearest neighbors + -- p1: limit the number of neighbors, 0-> closest one + IF method = 0 THEN + + IF p1 = 0 THEN + p1 := 1; + END IF; + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v), + b as (SELECT a.v as v FROM a ORDER BY point<->a.g LIMIT p1::integer) + SELECT avg(b.v) INTO output FROM b; + RETURN output; + + -- barymetric + ELSIF method = 1 THEN + WITH a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b), + d as (SELECT v FROM c WHERE ST_Within(point, v)) + SELECT v INTO g FROM d; + IF g is null THEN + -- out of the realm of the input data + RETURN -888.888; + END IF; + -- vertex of the selected cell + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg); + RETURN output; + + -- IDW + -- p1: limit the number of neighbors, 0->no limit + -- p2: order of distance decay, 0-> order 1 + ELSIF method = 2 THEN + + IF p2 = 0 THEN + p2 := 1; + END IF; + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v), + b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g) + SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b; + IF p1::integer>0 THEN + gs2:=gs; + vs2:=vs; + FOR i IN 1..p1 + LOOP + gs2 := gs2 || gs[i]; + vs2 := vs2 || vs[i]; + END LOOP; + ELSE + gs2:=gs; + vs2:=vs; + END IF; + + WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v), + b as ( + SELECT + (1/ST_distance(point, a.g)^p2::integer) as k, + (a.v/ST_distance(point, a.g)^p2::integer) as f + FROM a + ) + SELECT sum(b.f)/sum(b.k) INTO output FROM b; + RETURN output; + + -- krigin + ELSIF method = 3 THEN + + -- TO DO + + END IF; + + RETURN -777.777; + +END; +$$ +language plpgsql IMMUTABLE; +-- ============================================================================================= +-- +-- CDB_Voronoi +-- +-- ============================================================================================= +CREATE OR REPLACE FUNCTION CDB_voronoi( + IN geomin geometry[], + IN buffer numeric DEFAULT 0.5, + IN tolerance numeric DEFAULT 1e-9 + ) +RETURNS geometry AS $$ +DECLARE + geomout geometry; +BEGIN + -- we need to make the geometry calculations in (pseudo)meters!!! + with a as ( + SELECT unnest(geomin) as g1 + ), + b as( + SELECT st_transform(g1, 3857) g2 from a + ) + SELECT array_agg(g2) INTO geomin from b; + + WITH + convexhull_1 as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ (st_area(ST_ConvexHull(ST_Collect(geomin)))/PI()) as r + ), + clipper as( + SELECT + st_buffer(ST_MinimumBoundingCircle(a.g), buffer*a.r) as g + FROM convexhull_1 a + ), + env0 as ( + SELECT + (st_dumppoints(st_expand(a.g, buffer*a.r))).geom as e + FROM convexhull_1 a + ), + env as ( + SELECT + array_agg(env0.e) as e + FROM env0 + ), + sample AS ( + SELECT + ST_Collect(geomin || env.e) as geom + FROM env + ), + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as cg + ), + tin as ( + SELECT + ST_Dump(ST_DelaunayTriangles(geom, tolerance, 0)) as gd + FROM + sample + ), + tin_polygons as ( + SELECT + (gd).Path as id, + (gd).Geom as pg, + ST_Centroid(ST_MinimumBoundingCircle((gd).Geom, 180)) as ct + FROM tin + ), + tin_lines as ( + SELECT + id, + ST_ExteriorRing(pg) as lg + FROM tin_polygons + ), + tin_nodes as ( + SELECT + id, + ST_PointN(lg,1) p1, + ST_PointN(lg,2) p2, + ST_PointN(lg,3) p3 + FROM tin_lines + ), + tin_edges AS ( + SELECT + p.id, + UNNEST(ARRAY[ + ST_MakeLine(n.p1,n.p2) , + ST_MakeLine(n.p2,n.p3) , + ST_MakeLine(n.p3,n.p1)]) as Edge, + ST_Force2D(cdb_crankshaft._Find_Circle(n.p1,n.p2,n.p3)) as ct, + CASE WHEN st_distance(p.ct, ST_ExteriorRing(p.pg)) < tolerance THEN + TRUE + ELSE FALSE END AS ctx, + p.pg, + ST_within(p.ct, convexhull.cg) as ctin + FROM + tin_polygons p, + tin_nodes n, + convexhull + WHERE p.id = n.id + ), + voro_nodes as ( + SELECT + CASE WHEN x.ctx = TRUE THEN + ST_Centroid(x.edge) + ELSE + x.ct + END as xct, + CASE WHEN y.id is null THEN + CASE WHEN x.ctin = TRUE THEN + ST_SetSRID(ST_MakePoint( + ST_X(x.ct) + ((ST_X(ST_Centroid(x.edge)) - ST_X(x.ct)) * (1+buffer)), + ST_Y(x.ct) + ((ST_Y(ST_Centroid(x.edge)) - ST_Y(x.ct)) * (1+buffer)) + ), ST_SRID(x.ct)) + END + ELSE + y.ct + END as yct + FROM + tin_edges x + LEFT OUTER JOIN + tin_edges y + ON x.id <> y.id AND ST_Equals(x.edge, y.edge) + ), + voro_edges as( + SELECT + ST_LineMerge(ST_Collect(ST_MakeLine(xct, yct))) as v + FROM + voro_nodes + ), + voro_cells as( + SELECT + ST_Polygonize( + ST_Node( + ST_LineMerge( + ST_Union(v, ST_ExteriorRing( + ST_Convexhull(v) + ) + ) + ) + ) + ) as g + FROM + voro_edges + ), + voro_set as( + SELECT + (st_dump(v.g)).geom as g + FROM voro_cells v + ), + clipped_voro as( + SELECT + ST_intersection(c.g, v.g) as g + FROM + voro_set v, + clipper c + WHERE + ST_GeometryType(v.g) = 'ST_Polygon' + ) + SELECT + st_collect( + ST_Transform( + ST_ConvexHull(g), + 4326 + ) + ) + INTO geomout + FROM + clipped_voro; + RETURN geomout; +END; +$$ language plpgsql IMMUTABLE; + +/** ---------------------------------------------------------------------------------------- + * @function : FindCircle + * @precis : Function that determines if three points form a circle. If so a table containing + * centre and radius is returned. If not, a null table is returned. + * @version : 1.0.1 + * @param : p_pt1 : First point in curve + * @param : p_pt2 : Second point in curve + * @param : p_pt3 : Third point in curve + * @return : geometry : In which X,Y ordinates are the centre X, Y and the Z being the radius of found circle + * or NULL if three points do not form a circle. + * @history : Simon Greener - Feb 2012 - Original coding. + * Rafa de la Torre - Aug 2016 - Small fix for type checking + * @copyright : Simon Greener @ 2012 + * Licensed under a Creative Commons Attribution-Share Alike 2.5 Australia License. (http://creativecommons.org/licenses/by-sa/2.5/au/) +**/ +CREATE OR REPLACE FUNCTION _Find_Circle( + IN p_pt1 geometry, + IN p_pt2 geometry, + IN p_pt3 geometry) + RETURNS geometry AS +$BODY$ +DECLARE + v_Centre geometry; + v_radius NUMERIC; + v_CX NUMERIC; + v_CY NUMERIC; + v_dA NUMERIC; + v_dB NUMERIC; + v_dC NUMERIC; + v_dD NUMERIC; + v_dE NUMERIC; + v_dF NUMERIC; + v_dG NUMERIC; +BEGIN + IF ( p_pt1 IS NULL OR p_pt2 IS NULL OR p_pt3 IS NULL ) THEN + RAISE EXCEPTION 'All supplied points must be not null.'; + RETURN NULL; + END IF; + IF ( ST_GeometryType(p_pt1) <> 'ST_Point' OR + ST_GeometryType(p_pt2) <> 'ST_Point' OR + ST_GeometryType(p_pt3) <> 'ST_Point' ) THEN + RAISE EXCEPTION 'All supplied geometries must be points.'; + RETURN NULL; + END IF; + v_dA := ST_X(p_pt2) - ST_X(p_pt1); + v_dB := ST_Y(p_pt2) - ST_Y(p_pt1); + v_dC := ST_X(p_pt3) - ST_X(p_pt1); + v_dD := ST_Y(p_pt3) - ST_Y(p_pt1); + v_dE := v_dA * (ST_X(p_pt1) + ST_X(p_pt2)) + v_dB * (ST_Y(p_pt1) + ST_Y(p_pt2)); + v_dF := v_dC * (ST_X(p_pt1) + ST_X(p_pt3)) + v_dD * (ST_Y(p_pt1) + ST_Y(p_pt3)); + v_dG := 2.0 * (v_dA * (ST_Y(p_pt3) - ST_Y(p_pt2)) - v_dB * (ST_X(p_pt3) - ST_X(p_pt2))); + -- If v_dG is zero then the three points are collinear and no finite-radius + -- circle through them exists. + IF ( v_dG = 0 ) THEN + RETURN NULL; + ELSE + v_CX := (v_dD * v_dE - v_dB * v_dF) / v_dG; + v_CY := (v_dA * v_dF - v_dC * v_dE) / v_dG; + v_Radius := SQRT(POWER(ST_X(p_pt1) - v_CX,2) + POWER(ST_Y(p_pt1) - v_CY,2) ); + END IF; + RETURN ST_SetSRID(ST_MakePoint(v_CX, v_CY, v_radius),ST_Srid(p_pt1)); +END; +$BODY$ + LANGUAGE plpgsql VOLATILE STRICT; + +-- Moran's I Global Measure (public-facing) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, significance NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspots( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspots( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliers( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Global Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran FLOAT, significance FLOAT) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + + +-- Moran's I Local Rate (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local_rate + # TODO: use named parameters or a dictionary + return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliersRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20) +RETURNS table (cartodb_id integer, cluster_no integer) as $$ + + from crankshaft.clustering import kmeans + return kmeans(query,no_clusters,no_init) + +$$ language plpythonu; + + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC) +RETURNS Numeric[] AS +$$ +DECLARE + newX NUMERIC; + newY NUMERIC; + newW NUMERIC; +BEGIN + IF weight IS NULL OR the_geom IS NULL THEN + newX = state[1]; + newY = state[2]; + newW = state[3]; + ELSE + newX = state[1] + ST_X(the_geom)*weight; + newY = state[2] + ST_Y(the_geom)*weight; + newW = state[3] + weight; + END IF; + RETURN Array[newX,newY,newW]; + +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[]) +RETURNS GEOMETRY AS +$$ +BEGIN + IF state[3] = 0 THEN + RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326); + ELSE + RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326); + END IF; +END +$$ LANGUAGE plpgsql; + +-- Create aggregate if it did not exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'cdb_crankshaft' + AND p.proname = 'cdb_weightedmean' + AND p.proisagg) + THEN + CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) ( + SFUNC = CDB_WeightedMeanS, + FINALFUNC = CDB_WeightedMeanF, + STYPE = Numeric[], + INITCOND = "{0.0,0.0,0.0}" + ); + END IF; +END +$$ LANGUAGE plpgsql; +-- Spatial Markov + +-- input table format: +-- id | geom | date_1 | date_2 | date_3 +-- 1 | Pt1 | 12.3 | 13.1 | 14.2 +-- 2 | Pt2 | 11.0 | 13.2 | 12.5 +-- ... +-- Sample Function call: +-- SELECT CDB_SpatialMarkov('SELECT * FROM real_estate', +-- Array['date_1', 'date_2', 'date_3']) + +CREATE OR REPLACE FUNCTION + CDB_SpatialMarkovTrend ( + subquery TEXT, + time_cols TEXT[], + num_classes INT DEFAULT 7, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT) +AS $$ + + from crankshaft.space_time_dynamics import spatial_markov_trend + + ## TODO: use named parameters or a dictionary + return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- input table format: identical to above but in a predictable format +-- Sample function call: +-- SELECT cdb_spatial_markov('SELECT * FROM real_estate', +-- 'date_1') + + +-- CREATE OR REPLACE FUNCTION +-- cdb_spatial_markov ( +-- subquery TEXT, +-- time_col_min text, +-- time_col_max text, +-- date_format text, -- '_YYYY_MM_DD' +-- num_time_per_bin INT DEFAULT 1, +-- permutations INT DEFAULT 99, +-- geom_column TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs int DEFAULT 5) +-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +-- AS $$ +-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') +-- from crankshaft.clustering import moran_local +-- # TODO: use named parameters or a dictionary +-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; +-- +-- -- input table format: +-- -- id | geom | date | measurement +-- -- 1 | Pt1 | 12/3 | 13.2 +-- -- 2 | Pt2 | 11/5 | 11.3 +-- -- 3 | Pt1 | 11/13 | 12.9 +-- -- 4 | Pt3 | 12/19 | 10.1 +-- -- ... +-- +-- CREATE OR REPLACE FUNCTION +-- cdb_spatial_markov ( +-- subquery TEXT, +-- time_col text, +-- num_time_per_bin INT DEFAULT 1, +-- permutations INT DEFAULT 99, +-- geom_column TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs int DEFAULT 5) +-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +-- AS $$ +-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') +-- from crankshaft.clustering import moran_local +-- # TODO: use named parameters or a dictionary +-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +CREATE OR REPLACE FUNCTION CDB_PIA( + IN polygon geometry, + IN tolerance numeric DEFAULT 1.0 + ) +RETURNS geometry AS $$ +DECLARE + env geometry[]; + cells geometry[]; + cell geometry; + best_c geometry; + best_d numeric; + test_d numeric; + test_mx numeric; + test_h numeric; + test_cells geometry[]; + width numeric; + height numeric; + h numeric; + i integer; + n integer; + sqr numeric; + p geometry; +BEGIN + sqr := |/2; + polygon := ST_Transform(polygon, 3857); + + -- grid #0 cell size + height := ST_YMax(polygon) - ST_YMin(polygon); + width := ST_XMax(polygon) - ST_XMin(polygon); + h := 0.5*LEAST(height, width); + + -- grid #0 + with c1 as( + SELECT cdb_crankshaft.CDB_RectangleGrid(polygon, h, h) as c + ) + SELECT array_agg(c) INTO cells FROM c1; + + -- 1st guess: centroid + best_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(Polygon)); + + -- looping the loop + n := array_length(cells,1); + i := 1; + LOOP + + EXIT WHEN i > n; + + cell := cells[i]; + i := i+1; + + -- cell side size, it's square + test_h := ST_XMax(cell) - ST_XMin(cell) ; + + -- check distance + test_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(cell)); + IF test_d > best_d THEN + best_d := test_d; + best_c := cells[i]; + END IF; + + -- longest distance within the cell + test_mx := test_d + (test_h/2 * sqr); + + -- if the cell has no chance to contains the desired point, continue + CONTINUE WHEN test_mx - best_d <= tolerance; + + -- resample the cell + with c1 as( + SELECT cdb_crankshaft.CDB_RectangleGrid(cell, test_h/2, test_h/2) as c + ) + SELECT array_agg(c) INTO test_cells FROM c1; + + -- concat the new cells to the former array + cells := cells || test_cells; + + -- prepare next iteration + n := array_length(cells,1); + + END LOOP; + + RETURN ST_transform(ST_Centroid(best_c), 4326); + +END; +$$ language plpgsql IMMUTABLE; + + +-- signed distance point to polygon with holes +-- negative is the point is out the polygon +CREATE OR REPLACE FUNCTION _Signed_Dist( + IN polygon geometry, + IN point geometry + ) +RETURNS numeric AS $$ +DECLARE + i integer; + within integer; + holes integer; + dist numeric; +BEGIN + dist := 1e999; + SELECT LEAST(dist, ST_distance(point, ST_ExteriorRing(polygon))::numeric) INTO dist; + SELECT CASE WHEN ST_Within(point,polygon) THEN 1 ELSE -1 END INTO within; + SELECT ST_NumInteriorRings(polygon) INTO holes; + IF holes > 0 THEN + FOR i IN 1..holes + LOOP + SELECT LEAST(dist, ST_distance(point, ST_InteriorRingN(polygon, i))::numeric) INTO dist; + END LOOP; + END IF; + dist := dist * within::numeric; + RETURN dist; +END; +$$ language plpgsql IMMUTABLE; +-- +-- Iterative densification of a set of points using Delaunay triangulation +-- the new points have as assigned value the average value of the 3 vertex (centroid) +-- +-- @param geomin - array of geometries (points) +-- +-- @param colin - array of numeric values in that points +-- +-- @param iterations - integer, number of iterations +-- +-- +-- Returns: TABLE(geomout geometry, colout numeric) +-- +-- +CREATE OR REPLACE FUNCTION CDB_Densify( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + geotemp geometry[]; + coltemp numeric[]; + i integer; + gs geometry[]; + g geometry; + vertex geometry[]; + va numeric; + vb numeric; + vc numeric; + center geometry; + centerval numeric; + tmp integer; +BEGIN + geotemp := geomin; + coltemp := colin; + FOR i IN 1..iterations + LOOP + -- generate TIN + WITH a as (SELECT unnest(geotemp) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + -- loop cells + FOREACH g IN ARRAY gs + LOOP + -- append centroid + SELECT ST_Centroid(g) INTO center; + geotemp := array_append(geotemp, center); + -- retrieve the value of each vertex + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + END LOOP; + RETURN QUERY SELECT unnest(geotemp ) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; +CREATE OR REPLACE FUNCTION CDB_TINmap( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + p geometry[]; + vals numeric[]; + gs geometry[]; + g geometry; + vertex geometry[]; + centerval numeric; + va numeric; + vb numeric; + vc numeric; + coltemp numeric[]; +BEGIN + SELECT array_agg(dens.geomout), array_agg(dens.colout) INTO p, vals FROM cdb_crankshaft.CDB_Densify(geomin, colin, iterations) dens; + WITH a as (SELECT unnest(p) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + FOREACH g IN ARRAY gs + LOOP + -- retrieve the vertex of each triangle + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + RETURN QUERY SELECT unnest(gs) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; +CREATE OR REPLACE FUNCTION CDB_Contour( + IN geomin geometry[], + IN colin numeric[], + IN buffer numeric, + IN intmethod integer, + IN classmethod integer, + IN steps integer, + IN max_time integer DEFAULT 60000 + ) +RETURNS TABLE( + the_geom geometry, + bin integer, + min_value numeric, + max_value numeric, + avg_value numeric +) AS $$ +DECLARE + cell_count integer; + tin geometry[]; + resolution integer; +BEGIN + + -- nasty trick to override issue #121 + IF max_time = 0 THEN + max_time = -90; + END IF; + resolution := max_time; + max_time := -1 * resolution; + + -- calc the optimal number of cells for the current dataset + SELECT + CASE intmethod + WHEN 0 THEN round(3.7745903782 * max_time - 9.4399210051 * array_length(geomin,1) - 1350.8778213073) + WHEN 1 THEN round(2.2855592156 * max_time - 87.285217133 * array_length(geomin,1) + 17255.7085601797) + WHEN 2 THEN round(0.9799471999 * max_time - 127.0334085369 * array_length(geomin,1) + 22707.9579721218) + ELSE 10000 + END INTO cell_count; + + -- we don't have iterative barycentric interpolation in CDB_interpolation, + -- and it's a costy function, so let's make a custom one here till + -- we update the code + -- tin := ARRAY[]::geometry[]; + IF intmethod=1 THEN + WITH + a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b) + SELECT array_agg(v) INTO tin FROM c; + END IF; + -- Delaunay stuff performed just ONCE!! + + -- magic + RETURN QUERY + WITH + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ st_area(ST_ConvexHull(ST_Collect(geomin)))/PI() as r + ), + envelope as ( + SELECT + st_expand(a.g, a.r) as e + FROM convexhull a + ), + envelope3857 as( + SELECT + ST_Transform(e, 3857) as geom + FROM envelope + ), + resolution as( + SELECT + CASE WHEN resolution <= 0 THEN + round(|/ ( + ST_area(geom) / abs(cell_count) + )) + ELSE + resolution + END AS cell + FROM envelope3857 + ), + grid as( + SELECT + ST_Transform(cdb_crankshaft.CDB_RectangleGrid(e.geom, r.cell, r.cell), 4326) as geom + FROM envelope3857 e, resolution r + ), + interp as( + SELECT + geom, + CASE + WHEN intmethod=1 THEN cdb_crankshaft._interp_in_tin(geomin, colin, tin, ST_Centroid(geom)) + ELSE cdb_crankshaft.CDB_SpatialInterpolation(geomin, colin, ST_Centroid(geom), intmethod) + END as val + FROM grid + ), + classes as( + SELECT CASE + WHEN classmethod = 0 THEN + cdb_crankshaft.CDB_EqualIntervalBins(array_agg(val), steps) + WHEN classmethod = 1 THEN + cdb_crankshaft.CDB_HeadsTailsBins(array_agg(val), steps) + WHEN classmethod = 2 THEN + cdb_crankshaft.CDB_JenksBins(array_agg(val), steps) + ELSE + cdb_crankshaft.CDB_QuantileBins(array_agg(val), steps) + END as b + FROM interp + where val is not null + ), + classified as( + SELECT + i.*, + width_bucket(i.val, c.b) as bucket + FROM interp i left join classes c + ON 1=1 + ), + classified2 as( + SELECT + geom, + val, + CASE + WHEN bucket = steps THEN bucket - 1 + ELSE bucket + END as b + FROM classified + ), + final as( + SELECT + st_union(geom) as the_geom, + b as bin, + min(val) as min_value, + max(val) as max_value, + avg(val) as avg_value + FROM classified2 + GROUP BY bin + ) + SELECT + * + FROM final + where final.bin is not null + ; +END; +$$ language plpgsql; + + + +-- ===================================================================== +-- Interp in grid, so we can use barycentric with a precalculated tin (NNI) +-- ===================================================================== +CREATE OR REPLACE FUNCTION _interp_in_tin( + IN geomin geometry[], + IN colin numeric[], + IN tin geometry[], + IN point geometry + ) +RETURNS numeric AS +$$ +DECLARE + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + -- get the cell the point is within + WITH + a as (SELECT unnest(tin) as v), + b as (SELECT v FROM a WHERE ST_Within(point, v)) + SELECT v INTO g FROM b; + + -- if we're out of the data realm, + -- return null + IF g is null THEN + RETURN null; + END IF; + + -- vertex of the selected cell + WITH a AS ( + SELECT (ST_DumpPoints(g)).geom AS v + ) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + -- calc the areas + SELECT + ST_area(g), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg,1); + RETURN output; +END; +$$ +language plpgsql; +-- Function by Stuart Lynn for a simple interpolation of a value +-- from a polygon table over an arbitrary polygon +-- (weighted by the area proportion overlapped) +-- Aereal weighting is a very simple form of aereal interpolation. +-- +-- Parameters: +-- * geom a Polygon geometry which defines the area where a value will be +-- estimated as the area-weighted sum of a given table/column +-- * target_table_name table name of the table that provides the values +-- * target_column column name of the column that provides the values +-- * schema_name optional parameter to defina the schema the target table +-- belongs to, which is necessary if its not in the search_path. +-- Note that target_table_name should never include the schema in it. +-- Return value: +-- Aereal-weighted interpolation of the column values over the geometry +CREATE OR REPLACE +FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL) + RETURNS numeric AS +$$ +DECLARE + result numeric; + qualified_name text; +BEGIN + IF schema_name IS NULL THEN + qualified_name := Format('%I', target_table_name); + ELSE + qualified_name := Format('%I.%s', schema_name, target_table_name); + END IF; + EXECUTE Format(' + SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom)) + FROM %s AS a + WHERE $1 && a.the_geom + ', target_column, qualified_name) + USING geom + INTO result; + RETURN result; +END; +$$ LANGUAGE plpgsql; +-- +-- Creates N points randomly distributed arround the polygon +-- +-- @param g - the geometry to be turned in to points +-- +-- @param no_points - the number of points to generate +-- +-- @params max_iter_per_point - the function generates points in the polygon's bounding box +-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many +-- misses per point the funciton accepts before giving up. +-- +-- Returns: Multipoint with the requested points +CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000) +RETURNS GEOMETRY AS $$ +DECLARE + extent GEOMETRY; + test_point Geometry; + width NUMERIC; + height NUMERIC; + x0 NUMERIC; + y0 NUMERIC; + xp NUMERIC; + yp NUMERIC; + no_left INTEGER; + remaining_iterations INTEGER; + points GEOMETRY[]; + bbox_line GEOMETRY; + intersection_line GEOMETRY; +BEGIN + extent := ST_Envelope(geom); + width := ST_XMax(extent) - ST_XMIN(extent); + height := ST_YMax(extent) - ST_YMIN(extent); + x0 := ST_XMin(extent); + y0 := ST_YMin(extent); + no_left := no_points; + + LOOP + if(no_left=0) THEN + EXIT; + END IF; + yp = y0 + height*random(); + bbox_line = ST_MakeLine( + ST_SetSRID(ST_MakePoint(yp, x0),4326), + ST_SetSRID(ST_MakePoint(yp, x0+width),4326) + ); + intersection_line = ST_Intersection(bbox_line,geom); + test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random()); + points := points || test_point; + no_left = no_left - 1 ; + END LOOP; + RETURN ST_Collect(points); +END; +$$ +LANGUAGE plpgsql VOLATILE; +-- Make sure by default there are no permissions for publicuser +-- NOTE: this happens at extension creation time, as part of an implicit transaction. +-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE; + +-- Grant permissions on the schema to publicuser (but just the schema) +GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser; + +-- Revoke execute permissions on all functions in the schema by default +-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser; +-- +-- Fill given extent with a rectangular coverage +-- +-- @param ext Extent to fill. Only rectangles with center point falling +-- inside the extent (or at the lower or leftmost edge) will +-- be emitted. The returned hexagons will have the same SRID +-- as this extent. +-- +-- @param width With of each rectangle +-- +-- @param height Height of each rectangle +-- +-- @param origin Optional origin to allow for exact tiling. +-- If omitted the origin will be 0,0. +-- The parameter is checked for having the same SRID +-- as the extent. +-- +-- +CREATE OR REPLACE FUNCTION CDB_RectangleGrid(ext GEOMETRY, width FLOAT8, height FLOAT8, origin GEOMETRY DEFAULT NULL) +RETURNS SETOF GEOMETRY +AS $$ +DECLARE + h GEOMETRY; -- rectangle cell + hstep FLOAT8; -- horizontal step + vstep FLOAT8; -- vertical step + hw FLOAT8; -- half width + hh FLOAT8; -- half height + vstart FLOAT8; + hstart FLOAT8; + hend FLOAT8; + vend FLOAT8; + xoff FLOAT8; + yoff FLOAT8; + xgrd FLOAT8; + ygrd FLOAT8; + x FLOAT8; + y FLOAT8; + srid INTEGER; +BEGIN + + srid := ST_SRID(ext); + + xoff := 0; + yoff := 0; + + IF origin IS NOT NULL THEN + IF ST_SRID(origin) != srid THEN + RAISE EXCEPTION 'SRID mismatch between extent (%) and origin (%)', srid, ST_SRID(origin); + END IF; + xoff := ST_X(origin); + yoff := ST_Y(origin); + END IF; + + --RAISE DEBUG 'X offset: %', xoff; + --RAISE DEBUG 'Y offset: %', yoff; + + hw := width/2.0; + hh := height/2.0; + + xgrd := hw; + ygrd := hh; + --RAISE DEBUG 'X grid size: %', xgrd; + --RAISE DEBUG 'Y grid size: %', ygrd; + + hstep := width; + vstep := height; + + -- Tweak horizontal start on hstep grid from origin + hstart := xoff + ceil((ST_XMin(ext)-xoff)/hstep)*hstep; + --RAISE DEBUG 'hstart: %', hstart; + + -- Tweak vertical start on vstep grid from origin + vstart := yoff + ceil((ST_Ymin(ext)-yoff)/vstep)*vstep; + --RAISE DEBUG 'vstart: %', vstart; + + hend := ST_XMax(ext); + vend := ST_YMax(ext); + + --RAISE DEBUG 'hend: %', hend; + --RAISE DEBUG 'vend: %', vend; + + x := hstart; + WHILE x < hend LOOP -- over X + y := vstart; + h := ST_MakeEnvelope(x-hw, y-hh, x+hw, y+hh, srid); + WHILE y < vend LOOP -- over Y + RETURN NEXT h; + h := ST_Translate(h, 0, vstep); + y := yoff + round(((y + vstep)-yoff)/ygrd)*ygrd; -- round to grid + END LOOP; + x := xoff + round(((x + hstep)-xoff)/xgrd)*xgrd; -- round to grid + END LOOP; + + RETURN; +END +$$ LANGUAGE 'plpgsql' IMMUTABLE; + +-- +-- Calculate the equal interval bins for a given column +-- +-- @param in_array A numeric array of numbers to determine the best +-- to determine the bin boundary +-- +-- @param breaks The number of bins you want to find. +-- +-- +-- Returns: upper edges of bins +-- +-- + +CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$ +DECLARE + diff numeric; + min_val numeric; + max_val numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL; + diff = (max_val - min_val) / breaks::numeric; + LOOP + IF i < breaks THEN + tmp_val = min_val + i::numeric * diff; + reply = array_append(reply, tmp_val); + i := i+1; + ELSE + reply = array_append(reply, max_val); + EXIT; + END IF; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Heads/Tails classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Heads/Tails method. +-- +-- @param breaks The number of bins you want to find. +-- +-- + +CREATE OR REPLACE FUNCTION CDB_HeadsTailsBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean numeric; + i INT := 2; + reply numeric[]; +BEGIN + -- get the total size of our row + element_count := array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + reply = Array[arr_mean]; + -- slice our bread + LOOP + IF i > breaks THEN EXIT; END IF; + SELECT avg(e) INTO arr_mean FROM ( SELECT unnest(in_array) e) x WHERE e > reply[i-1]; + IF arr_mean IS NOT NULL THEN + reply = array_append(reply, arr_mean); + END IF; + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Jenks classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Jenks method. +-- +-- @param breaks The number of bins you want to find. +-- +-- @param iterations The number of different starting positions to test. +-- +-- @param invert Optional wheter to return the top of each bin (default) +-- or the bottom. BOOLEAN, default=FALSE. +-- +-- + + +CREATE OR REPLACE FUNCTION CDB_JenksBins ( in_array NUMERIC[], breaks INT, iterations INT DEFAULT 5, invert BOOLEAN DEFAULT FALSE) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean NUMERIC; + bot INT; + top INT; + tops INT[]; + classes INT[][]; + i INT := 1; j INT := 1; + curr_result NUMERIC[]; + best_result NUMERIC[]; + seedtarget TEXT; + quant NUMERIC[]; + shuffles INT; +BEGIN + -- get the total size of our row + element_count := array_length(in_array, 1); --array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + shuffles := LEAST(GREATEST(floor(2500000.0/(element_count::float*iterations::float)), 1), 750)::int; + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + -- assume best is actually Quantile + SELECT cdb_crankshaft.CDB_QuantileBins(in_array, breaks) INTO quant; + + -- if data is very very large, just return quant and be done + IF element_count > 5000000 THEN + RETURN quant; + END IF; + + -- change quant into bottom, top markers + LOOP + IF i = 1 THEN + bot = 1; + ELSE + -- use last top to find this bot + bot = top+1; + END IF; + IF i = breaks THEN + top = element_count; + ELSE + SELECT count(*) INTO top FROM ( SELECT unnest(in_array) as v) x WHERE v <= quant[i]; + END IF; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + IF i > breaks THEN EXIT; END IF; + i = i+1; + END LOOP; + + best_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + --set the seed so we can ensure the same results + SELECT setseed(0.4567) INTO seedtarget; + --loop through random starting positions + LOOP + IF j > iterations-1 THEN EXIT; END IF; + i = 1; + tops = ARRAY[element_count]; + LOOP + IF i = breaks THEN EXIT; END IF; + SELECT array_agg(distinct e) INTO tops FROM (SELECT unnest(array_cat(tops, ARRAY[floor(random()*element_count::float)::int])) as e ORDER BY e) x WHERE e != 1; + i = array_length(tops, 1); + END LOOP; + i = 1; + LOOP + IF i > breaks THEN EXIT; END IF; + IF i = 1 THEN + bot = 1; + ELSE + bot = top+1; + END IF; + top = tops[i]; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + i := i+1; + END LOOP; + curr_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + IF curr_result[1] > best_result[1] THEN + best_result = curr_result; + j = j-1; -- if we found a better result, add one more search + END IF; + j = j+1; + END LOOP; + + RETURN (best_result)[2:array_upper(best_result, 1)]; +END; +$$ language plpgsql IMMUTABLE; + + + +-- +-- Perform a single iteration of the Jenks classification +-- + +CREATE OR REPLACE FUNCTION CDB_JenksBinsIteration ( in_array NUMERIC[], breaks INT, classes INT[][], invert BOOLEAN, element_count INT4, arr_mean NUMERIC, max_search INT DEFAULT 50) RETURNS NUMERIC[] as $$ +DECLARE + tmp_val numeric; + new_classes int[][]; + tmp_class int[]; + i INT := 1; + j INT := 1; + side INT := 2; + sdam numeric; + gvf numeric := 0.0; + new_gvf numeric; + arr_gvf numeric[]; + class_avg numeric; + class_max_i INT; + class_min_i INT; + class_max numeric; + class_min numeric; + reply numeric[]; +BEGIN + + -- Calculate the sum of squared deviations from the array mean (SDAM). + SELECT sum((arr_mean - e)^2) INTO sdam FROM ( SELECT unnest(in_array) as e ) x; + --Identify the breaks for the lowest GVF + LOOP + i = 1; + LOOP + -- get our mean + SELECT avg(e) INTO class_avg FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e) x; + -- find the deviation + SELECT sum((class_avg-e)^2) INTO tmp_val FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e ) x; + IF i = 1 THEN + arr_gvf = ARRAY[tmp_val]; + -- init our min/max map for later + class_max = arr_gvf[i]; + class_min = arr_gvf[i]; + class_min_i = 1; + class_max_i = 1; + ELSE + arr_gvf = array_append(arr_gvf, tmp_val); + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + -- calculate our new GVF + SELECT sdam-sum(e) INTO new_gvf FROM ( SELECT unnest(arr_gvf) as e ) x; + -- if no improvement was made, exit + IF new_gvf < gvf THEN EXIT; END IF; + gvf = new_gvf; + IF j > max_search THEN EXIT; END IF; + j = j+1; + i = 1; + LOOP + --establish directionality (uppward through classes or downward) + IF arr_gvf[i] < class_min THEN + class_min = arr_gvf[i]; + class_min_i = i; + END IF; + IF arr_gvf[i] > class_max THEN + class_max = arr_gvf[i]; + class_max_i = i; + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + IF class_max_i > class_min_i THEN + class_min_i = class_max_i - 1; + ELSE + class_min_i = class_max_i + 1; + END IF; + --Move from higher class to a lower gid order + IF class_max_i > class_min_i THEN + classes[class_max_i][1] = classes[class_max_i][1] + 1; + classes[class_min_i][2] = classes[class_min_i][2] + 1; + ELSE -- Move from lower class UP into a higher class by gid + classes[class_max_i][2] = classes[class_max_i][2] - 1; + classes[class_min_i][1] = classes[class_min_i][1] - 1; + END IF; + END LOOP; + + i = 1; + LOOP + IF invert = TRUE THEN + side = 1; --default returns bottom side of breaks, invert returns top side + END IF; + reply = array_append(reply, in_array[classes[i][side]]); + i = i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + + RETURN array_prepend(gvf, reply); + +END; +$$ language plpgsql IMMUTABLE; + + +-- +-- Determine the Quantile classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Quantile method. +-- +-- @param breaks The number of bins you want to find. +-- +-- +CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + break_size numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + -- sort our values + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x; + -- get the total size of our data + element_count := array_length(in_array, 1); + break_size := element_count::numeric / breaks; + -- slice our bread + LOOP + IF i < breaks THEN + IF break_size * i % 1 > 0 THEN + SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x; + ELSE + SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x; + END IF; + ELSIF i = breaks THEN + -- select the last value + SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x; + ELSE + EXIT; + END IF; + + reply = array_append(reply, tmp_val); + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; diff --git a/release/crankshaft--0.4.1.sql b/release/crankshaft--0.4.1.sql new file mode 100644 index 0000000..eac53fe --- /dev/null +++ b/release/crankshaft--0.4.1.sql @@ -0,0 +1,1965 @@ +--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit +-- Version number of the extension release +CREATE OR REPLACE FUNCTION cdb_crankshaft_version() +RETURNS text AS $$ + SELECT '0.4.1'::text; +$$ language 'sql' STABLE STRICT; + +-- Internal identifier of the installed extension instence +-- e.g. 'dev' for current development version +CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version() +RETURNS text AS $$ + SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL; +$$ language 'sql' STABLE STRICT; +-- Internal function. +-- Set the seeds of the RNGs (Random Number Generators) +-- used internally. +CREATE OR REPLACE FUNCTION +_cdb_random_seeds (seed_value INTEGER) RETURNS VOID +AS $$ + from crankshaft import random_seeds + random_seeds.set_random_seeds(seed_value) +$$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION + CDB_PyAggS(current_state Numeric[], current_row Numeric[]) + returns NUMERIC[] as $$ + BEGIN + if array_upper(current_state,1) is null then + RAISE NOTICE 'setting state %',array_upper(current_row,1); + current_state[1] = array_upper(current_row,1); + end if; + return array_cat(current_state,current_row) ; + END + $$ LANGUAGE plpgsql; + +-- Create aggregate if it did not exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'cdb_crankshaft' + AND p.proname = 'cdb_pyagg' + AND p.proisagg) + THEN + CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) ( + SFUNC = CDB_PyAggS, + STYPE = Numeric[], + INITCOND = "{}" + ); + END IF; +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION + CDB_CreateAndPredictSegment( + target NUMERIC[], + features NUMERIC[], + target_features NUMERIC[], + target_ids NUMERIC[], + n_estimators INTEGER DEFAULT 1200, + max_depth INTEGER DEFAULT 3, + subsample DOUBLE PRECISION DEFAULT 0.5, + learning_rate DOUBLE PRECISION DEFAULT 0.01, + min_samples_leaf INTEGER DEFAULT 1) +RETURNS TABLE(cartodb_id NUMERIC, prediction NUMERIC, accuracy NUMERIC) +AS $$ + import numpy as np + import plpy + + from crankshaft.segmentation import create_and_predict_segment_agg + model_params = {'n_estimators': n_estimators, + 'max_depth': max_depth, + 'subsample': subsample, + 'learning_rate': learning_rate, + 'min_samples_leaf': min_samples_leaf} + + def unpack2D(data): + dimension = data.pop(0) + a = np.array(data, dtype=float) + return a.reshape(len(a)/dimension, dimension) + + return create_and_predict_segment_agg(np.array(target, dtype=float), + unpack2D(features), + unpack2D(target_features), + target_ids, + model_params) + +$$ LANGUAGE plpythonu; + +CREATE OR REPLACE FUNCTION + CDB_CreateAndPredictSegment ( + query TEXT, + variable_name TEXT, + target_table TEXT, + n_estimators INTEGER DEFAULT 1200, + max_depth INTEGER DEFAULT 3, + subsample DOUBLE PRECISION DEFAULT 0.5, + learning_rate DOUBLE PRECISION DEFAULT 0.01, + min_samples_leaf INTEGER DEFAULT 1) +RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC) +AS $$ + from crankshaft.segmentation import create_and_predict_segment + model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf} + return create_and_predict_segment(query,variable_name,target_table, model_params) +$$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN target_query text, + IN weight_column text, + IN source_query text, + IN pop_column text, + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_id bigint[]; + t_geom geometry[]; + t_weight numeric[]; + s_id bigint[]; + s_geom geometry[]; + s_pop numeric[]; +BEGIN + EXECUTE 'WITH foo as('+target_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || weight_column || ') FROM foo' INTO t_id, t_geom, t_weight; + EXECUTE 'WITH foo as('+source_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || pop_column || ') FROM foo' INTO s_id, s_geom, s_pop; + RETURN QUERY + SELECT g.* FROM t, s, CDB_Gravity(t_id, t_geom, t_weight, s_id, s_geom, s_pop, target, radius, minval) g; +END; +$$ language plpgsql; + +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN t_id bigint[], + IN t_geom geometry[], + IN t_weight numeric[], + IN s_id bigint[], + IN s_geom geometry[], + IN s_pop numeric[], + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_type text; + s_type text; + t_center geometry[]; + s_center geometry[]; +BEGIN + t_type := GeometryType(t_geom[1]); + s_type := GeometryType(s_geom[1]); + IF t_type = 'POINT' THEN + t_center := t_geom; + ELSE + WITH tmp as (SELECT unnest(t_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO t_center FROM tmp; + END IF; + IF s_type = 'POINT' THEN + s_center := s_geom; + ELSE + WITH tmp as (SELECT unnest(s_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO s_center FROM tmp; + END IF; + RETURN QUERY + with target0 as( + SELECT unnest(t_center) as tc, unnest(t_weight) as tw, unnest(t_id) as td + ), + source0 as( + SELECT unnest(s_center) as sc, unnest(s_id) as sd, unnest (s_geom) as sg, unnest(s_pop) as sp + ), + prev0 as( + SELECT + source0.sg, + source0.sd as sourc_id, + coalesce(source0.sp,0) as sp, + target.td as targ_id, + coalesce(target.tw,0) as tw, + GREATEST(1.0,ST_Distance(geography(target.tc), geography(source0.sc)))::numeric as distance + FROM source0 + CROSS JOIN LATERAL + ( + SELECT + * + FROM target0 + WHERE tw > minval + AND ST_DWithin(geography(source0.sc), geography(tc), radius) + ) AS target + ), + deno as( + SELECT + sourc_id, + sum(tw/distance) as h_deno + FROM + prev0 + GROUP BY sourc_id + ) + SELECT + p.sg as the_geom, + p.sourc_id as source_id, + p.targ_id as target_id, + case when p.distance > 1 then p.distance else 0.0 end as dist, + 100*(p.tw/p.distance)/d.h_deno as h, + p.sp*(p.tw/p.distance)/d.h_deno as hpop + FROM + prev0 p, + deno d + WHERE + p.targ_id = target AND + p.sourc_id = d.sourc_id; +END; +$$ language plpgsql; +-- 0: nearest neighbor(s) +-- 1: barymetric +-- 2: IDW +-- 3: krigin ---> TO DO + + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN query text, + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + output numeric; +BEGIN + EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs; + SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a; + + RETURN output; +END; +$$ +language plpgsql IMMUTABLE; + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN geomin geometry[], + IN colin numeric[], + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + gs2 geometry[]; + vs2 numeric[]; + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + -- output := -999.999; + + -- nearest neighbors + -- p1: limit the number of neighbors, 0-> closest one + IF method = 0 THEN + + IF p1 = 0 THEN + p1 := 1; + END IF; + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v), + b as (SELECT a.v as v FROM a ORDER BY point<->a.g LIMIT p1::integer) + SELECT avg(b.v) INTO output FROM b; + RETURN output; + + -- barymetric + ELSIF method = 1 THEN + WITH a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b), + d as (SELECT v FROM c WHERE ST_Within(point, v)) + SELECT v INTO g FROM d; + IF g is null THEN + -- out of the realm of the input data + RETURN -888.888; + END IF; + -- vertex of the selected cell + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg); + RETURN output; + + -- IDW + -- p1: limit the number of neighbors, 0->no limit + -- p2: order of distance decay, 0-> order 1 + ELSIF method = 2 THEN + + IF p2 = 0 THEN + p2 := 1; + END IF; + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v), + b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g) + SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b; + IF p1::integer>0 THEN + gs2:=gs; + vs2:=vs; + FOR i IN 1..p1 + LOOP + gs2 := gs2 || gs[i]; + vs2 := vs2 || vs[i]; + END LOOP; + ELSE + gs2:=gs; + vs2:=vs; + END IF; + + WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v), + b as ( + SELECT + (1/ST_distance(point, a.g)^p2::integer) as k, + (a.v/ST_distance(point, a.g)^p2::integer) as f + FROM a + ) + SELECT sum(b.f)/sum(b.k) INTO output FROM b; + RETURN output; + + -- krigin + ELSIF method = 3 THEN + + -- TO DO + + END IF; + + RETURN -777.777; + +END; +$$ +language plpgsql IMMUTABLE; +-- ============================================================================================= +-- +-- CDB_Voronoi +-- +-- ============================================================================================= +CREATE OR REPLACE FUNCTION CDB_voronoi( + IN geomin geometry[], + IN buffer numeric DEFAULT 0.5, + IN tolerance numeric DEFAULT 1e-9 + ) +RETURNS geometry AS $$ +DECLARE + geomout geometry; +BEGIN + -- we need to make the geometry calculations in (pseudo)meters!!! + with a as ( + SELECT unnest(geomin) as g1 + ), + b as( + SELECT st_transform(g1, 3857) g2 from a + ) + SELECT array_agg(g2) INTO geomin from b; + + WITH + convexhull_1 as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ (st_area(ST_ConvexHull(ST_Collect(geomin)))/PI()) as r + ), + clipper as( + SELECT + st_buffer(ST_MinimumBoundingCircle(a.g), buffer*a.r) as g + FROM convexhull_1 a + ), + env0 as ( + SELECT + (st_dumppoints(st_expand(a.g, buffer*a.r))).geom as e + FROM convexhull_1 a + ), + env as ( + SELECT + array_agg(env0.e) as e + FROM env0 + ), + sample AS ( + SELECT + ST_Collect(geomin || env.e) as geom + FROM env + ), + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as cg + ), + tin as ( + SELECT + ST_Dump(ST_DelaunayTriangles(geom, tolerance, 0)) as gd + FROM + sample + ), + tin_polygons as ( + SELECT + (gd).Path as id, + (gd).Geom as pg, + ST_Centroid(ST_MinimumBoundingCircle((gd).Geom, 180)) as ct + FROM tin + ), + tin_lines as ( + SELECT + id, + ST_ExteriorRing(pg) as lg + FROM tin_polygons + ), + tin_nodes as ( + SELECT + id, + ST_PointN(lg,1) p1, + ST_PointN(lg,2) p2, + ST_PointN(lg,3) p3 + FROM tin_lines + ), + tin_edges AS ( + SELECT + p.id, + UNNEST(ARRAY[ + ST_MakeLine(n.p1,n.p2) , + ST_MakeLine(n.p2,n.p3) , + ST_MakeLine(n.p3,n.p1)]) as Edge, + ST_Force2D(cdb_crankshaft._Find_Circle(n.p1,n.p2,n.p3)) as ct, + CASE WHEN st_distance(p.ct, ST_ExteriorRing(p.pg)) < tolerance THEN + TRUE + ELSE FALSE END AS ctx, + p.pg, + ST_within(p.ct, convexhull.cg) as ctin + FROM + tin_polygons p, + tin_nodes n, + convexhull + WHERE p.id = n.id + ), + voro_nodes as ( + SELECT + CASE WHEN x.ctx = TRUE THEN + ST_Centroid(x.edge) + ELSE + x.ct + END as xct, + CASE WHEN y.id is null THEN + CASE WHEN x.ctin = TRUE THEN + ST_SetSRID(ST_MakePoint( + ST_X(x.ct) + ((ST_X(ST_Centroid(x.edge)) - ST_X(x.ct)) * (1+buffer)), + ST_Y(x.ct) + ((ST_Y(ST_Centroid(x.edge)) - ST_Y(x.ct)) * (1+buffer)) + ), ST_SRID(x.ct)) + END + ELSE + y.ct + END as yct + FROM + tin_edges x + LEFT OUTER JOIN + tin_edges y + ON x.id <> y.id AND ST_Equals(x.edge, y.edge) + ), + voro_edges as( + SELECT + ST_LineMerge(ST_Collect(ST_MakeLine(xct, yct))) as v + FROM + voro_nodes + ), + voro_cells as( + SELECT + ST_Polygonize( + ST_Node( + ST_LineMerge( + ST_Union(v, ST_ExteriorRing( + ST_Convexhull(v) + ) + ) + ) + ) + ) as g + FROM + voro_edges + ), + voro_set as( + SELECT + (st_dump(v.g)).geom as g + FROM voro_cells v + ), + clipped_voro as( + SELECT + ST_intersection(c.g, v.g) as g + FROM + voro_set v, + clipper c + WHERE + ST_GeometryType(v.g) = 'ST_Polygon' + ) + SELECT + st_collect( + ST_Transform( + ST_ConvexHull(g), + 4326 + ) + ) + INTO geomout + FROM + clipped_voro; + RETURN geomout; +END; +$$ language plpgsql IMMUTABLE; + +/** ---------------------------------------------------------------------------------------- + * @function : FindCircle + * @precis : Function that determines if three points form a circle. If so a table containing + * centre and radius is returned. If not, a null table is returned. + * @version : 1.0.1 + * @param : p_pt1 : First point in curve + * @param : p_pt2 : Second point in curve + * @param : p_pt3 : Third point in curve + * @return : geometry : In which X,Y ordinates are the centre X, Y and the Z being the radius of found circle + * or NULL if three points do not form a circle. + * @history : Simon Greener - Feb 2012 - Original coding. + * Rafa de la Torre - Aug 2016 - Small fix for type checking + * @copyright : Simon Greener @ 2012 + * Licensed under a Creative Commons Attribution-Share Alike 2.5 Australia License. (http://creativecommons.org/licenses/by-sa/2.5/au/) +**/ +CREATE OR REPLACE FUNCTION _Find_Circle( + IN p_pt1 geometry, + IN p_pt2 geometry, + IN p_pt3 geometry) + RETURNS geometry AS +$BODY$ +DECLARE + v_Centre geometry; + v_radius NUMERIC; + v_CX NUMERIC; + v_CY NUMERIC; + v_dA NUMERIC; + v_dB NUMERIC; + v_dC NUMERIC; + v_dD NUMERIC; + v_dE NUMERIC; + v_dF NUMERIC; + v_dG NUMERIC; +BEGIN + IF ( p_pt1 IS NULL OR p_pt2 IS NULL OR p_pt3 IS NULL ) THEN + RAISE EXCEPTION 'All supplied points must be not null.'; + RETURN NULL; + END IF; + IF ( ST_GeometryType(p_pt1) <> 'ST_Point' OR + ST_GeometryType(p_pt2) <> 'ST_Point' OR + ST_GeometryType(p_pt3) <> 'ST_Point' ) THEN + RAISE EXCEPTION 'All supplied geometries must be points.'; + RETURN NULL; + END IF; + v_dA := ST_X(p_pt2) - ST_X(p_pt1); + v_dB := ST_Y(p_pt2) - ST_Y(p_pt1); + v_dC := ST_X(p_pt3) - ST_X(p_pt1); + v_dD := ST_Y(p_pt3) - ST_Y(p_pt1); + v_dE := v_dA * (ST_X(p_pt1) + ST_X(p_pt2)) + v_dB * (ST_Y(p_pt1) + ST_Y(p_pt2)); + v_dF := v_dC * (ST_X(p_pt1) + ST_X(p_pt3)) + v_dD * (ST_Y(p_pt1) + ST_Y(p_pt3)); + v_dG := 2.0 * (v_dA * (ST_Y(p_pt3) - ST_Y(p_pt2)) - v_dB * (ST_X(p_pt3) - ST_X(p_pt2))); + -- If v_dG is zero then the three points are collinear and no finite-radius + -- circle through them exists. + IF ( v_dG = 0 ) THEN + RETURN NULL; + ELSE + v_CX := (v_dD * v_dE - v_dB * v_dF) / v_dG; + v_CY := (v_dA * v_dF - v_dC * v_dE) / v_dG; + v_Radius := SQRT(POWER(ST_X(p_pt1) - v_CX,2) + POWER(ST_Y(p_pt1) - v_CY,2) ); + END IF; + RETURN ST_SetSRID(ST_MakePoint(v_CX, v_CY, v_radius),ST_Srid(p_pt1)); +END; +$BODY$ + LANGUAGE plpgsql VOLATILE STRICT; + +-- Moran's I Global Measure (public-facing) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, significance NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspots( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspots( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliers( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Global Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran FLOAT, significance FLOAT) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + + +-- Moran's I Local Rate (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local_rate + # TODO: use named parameters or a dictionary + return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliersRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20) +RETURNS table (cartodb_id integer, cluster_no integer) as $$ + + from crankshaft.clustering import kmeans + return kmeans(query,no_clusters,no_init) + +$$ language plpythonu; + + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC) +RETURNS Numeric[] AS +$$ +DECLARE + newX NUMERIC; + newY NUMERIC; + newW NUMERIC; +BEGIN + IF weight IS NULL OR the_geom IS NULL THEN + newX = state[1]; + newY = state[2]; + newW = state[3]; + ELSE + newX = state[1] + ST_X(the_geom)*weight; + newY = state[2] + ST_Y(the_geom)*weight; + newW = state[3] + weight; + END IF; + RETURN Array[newX,newY,newW]; + +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[]) +RETURNS GEOMETRY AS +$$ +BEGIN + IF state[3] = 0 THEN + RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326); + ELSE + RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326); + END IF; +END +$$ LANGUAGE plpgsql; + +-- Create aggregate if it did not exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'cdb_crankshaft' + AND p.proname = 'cdb_weightedmean' + AND p.proisagg) + THEN + CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) ( + SFUNC = CDB_WeightedMeanS, + FINALFUNC = CDB_WeightedMeanF, + STYPE = Numeric[], + INITCOND = "{0.0,0.0,0.0}" + ); + END IF; +END +$$ LANGUAGE plpgsql; +-- Spatial Markov + +-- input table format: +-- id | geom | date_1 | date_2 | date_3 +-- 1 | Pt1 | 12.3 | 13.1 | 14.2 +-- 2 | Pt2 | 11.0 | 13.2 | 12.5 +-- ... +-- Sample Function call: +-- SELECT CDB_SpatialMarkov('SELECT * FROM real_estate', +-- Array['date_1', 'date_2', 'date_3']) + +CREATE OR REPLACE FUNCTION + CDB_SpatialMarkovTrend ( + subquery TEXT, + time_cols TEXT[], + num_classes INT DEFAULT 7, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT) +AS $$ + + from crankshaft.space_time_dynamics import spatial_markov_trend + + ## TODO: use named parameters or a dictionary + return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- input table format: identical to above but in a predictable format +-- Sample function call: +-- SELECT cdb_spatial_markov('SELECT * FROM real_estate', +-- 'date_1') + + +-- CREATE OR REPLACE FUNCTION +-- cdb_spatial_markov ( +-- subquery TEXT, +-- time_col_min text, +-- time_col_max text, +-- date_format text, -- '_YYYY_MM_DD' +-- num_time_per_bin INT DEFAULT 1, +-- permutations INT DEFAULT 99, +-- geom_column TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs int DEFAULT 5) +-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +-- AS $$ +-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') +-- from crankshaft.clustering import moran_local +-- # TODO: use named parameters or a dictionary +-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; +-- +-- -- input table format: +-- -- id | geom | date | measurement +-- -- 1 | Pt1 | 12/3 | 13.2 +-- -- 2 | Pt2 | 11/5 | 11.3 +-- -- 3 | Pt1 | 11/13 | 12.9 +-- -- 4 | Pt3 | 12/19 | 10.1 +-- -- ... +-- +-- CREATE OR REPLACE FUNCTION +-- cdb_spatial_markov ( +-- subquery TEXT, +-- time_col text, +-- num_time_per_bin INT DEFAULT 1, +-- permutations INT DEFAULT 99, +-- geom_column TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs int DEFAULT 5) +-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +-- AS $$ +-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') +-- from crankshaft.clustering import moran_local +-- # TODO: use named parameters or a dictionary +-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +CREATE OR REPLACE FUNCTION CDB_PIA( + IN polygon geometry, + IN tolerance numeric DEFAULT 1.0 + ) +RETURNS geometry AS $$ +DECLARE + env geometry[]; + cells geometry[]; + cell geometry; + best_c geometry; + best_d numeric; + test_d numeric; + test_mx numeric; + test_h numeric; + test_cells geometry[]; + width numeric; + height numeric; + h numeric; + i integer; + n integer; + sqr numeric; + p geometry; +BEGIN + sqr := |/2; + polygon := ST_Transform(polygon, 3857); + + -- grid #0 cell size + height := ST_YMax(polygon) - ST_YMin(polygon); + width := ST_XMax(polygon) - ST_XMin(polygon); + h := 0.5*LEAST(height, width); + + -- grid #0 + with c1 as( + SELECT cdb_crankshaft.CDB_RectangleGrid(polygon, h, h) as c + ) + SELECT array_agg(c) INTO cells FROM c1; + + -- 1st guess: centroid + best_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(Polygon)); + + -- looping the loop + n := array_length(cells,1); + i := 1; + LOOP + + EXIT WHEN i > n; + + cell := cells[i]; + i := i+1; + + -- cell side size, it's square + test_h := ST_XMax(cell) - ST_XMin(cell) ; + + -- check distance + test_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(cell)); + IF test_d > best_d THEN + best_d := test_d; + best_c := cells[i]; + END IF; + + -- longest distance within the cell + test_mx := test_d + (test_h/2 * sqr); + + -- if the cell has no chance to contains the desired point, continue + CONTINUE WHEN test_mx - best_d <= tolerance; + + -- resample the cell + with c1 as( + SELECT cdb_crankshaft.CDB_RectangleGrid(cell, test_h/2, test_h/2) as c + ) + SELECT array_agg(c) INTO test_cells FROM c1; + + -- concat the new cells to the former array + cells := cells || test_cells; + + -- prepare next iteration + n := array_length(cells,1); + + END LOOP; + + RETURN ST_transform(ST_Centroid(best_c), 4326); + +END; +$$ language plpgsql IMMUTABLE; + + +-- signed distance point to polygon with holes +-- negative is the point is out the polygon +CREATE OR REPLACE FUNCTION _Signed_Dist( + IN polygon geometry, + IN point geometry + ) +RETURNS numeric AS $$ +DECLARE + i integer; + within integer; + holes integer; + dist numeric; +BEGIN + dist := 1e999; + SELECT LEAST(dist, ST_distance(point, ST_ExteriorRing(polygon))::numeric) INTO dist; + SELECT CASE WHEN ST_Within(point,polygon) THEN 1 ELSE -1 END INTO within; + SELECT ST_NumInteriorRings(polygon) INTO holes; + IF holes > 0 THEN + FOR i IN 1..holes + LOOP + SELECT LEAST(dist, ST_distance(point, ST_InteriorRingN(polygon, i))::numeric) INTO dist; + END LOOP; + END IF; + dist := dist * within::numeric; + RETURN dist; +END; +$$ language plpgsql IMMUTABLE; +-- +-- Iterative densification of a set of points using Delaunay triangulation +-- the new points have as assigned value the average value of the 3 vertex (centroid) +-- +-- @param geomin - array of geometries (points) +-- +-- @param colin - array of numeric values in that points +-- +-- @param iterations - integer, number of iterations +-- +-- +-- Returns: TABLE(geomout geometry, colout numeric) +-- +-- +CREATE OR REPLACE FUNCTION CDB_Densify( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + geotemp geometry[]; + coltemp numeric[]; + i integer; + gs geometry[]; + g geometry; + vertex geometry[]; + va numeric; + vb numeric; + vc numeric; + center geometry; + centerval numeric; + tmp integer; +BEGIN + geotemp := geomin; + coltemp := colin; + FOR i IN 1..iterations + LOOP + -- generate TIN + WITH a as (SELECT unnest(geotemp) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + -- loop cells + FOREACH g IN ARRAY gs + LOOP + -- append centroid + SELECT ST_Centroid(g) INTO center; + geotemp := array_append(geotemp, center); + -- retrieve the value of each vertex + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + END LOOP; + RETURN QUERY SELECT unnest(geotemp ) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; +CREATE OR REPLACE FUNCTION CDB_TINmap( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + p geometry[]; + vals numeric[]; + gs geometry[]; + g geometry; + vertex geometry[]; + centerval numeric; + va numeric; + vb numeric; + vc numeric; + coltemp numeric[]; +BEGIN + SELECT array_agg(dens.geomout), array_agg(dens.colout) INTO p, vals FROM cdb_crankshaft.CDB_Densify(geomin, colin, iterations) dens; + WITH a as (SELECT unnest(p) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + FOREACH g IN ARRAY gs + LOOP + -- retrieve the vertex of each triangle + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + RETURN QUERY SELECT unnest(gs) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; +CREATE OR REPLACE FUNCTION CDB_Contour( + IN geomin geometry[], + IN colin numeric[], + IN buffer numeric, + IN intmethod integer, + IN classmethod integer, + IN steps integer, + IN max_time integer DEFAULT 60000 + ) +RETURNS TABLE( + the_geom geometry, + bin integer, + min_value numeric, + max_value numeric, + avg_value numeric +) AS $$ +DECLARE + cell_count integer; + tin geometry[]; + resolution integer; +BEGIN + + -- nasty trick to override issue #121 + IF max_time = 0 THEN + max_time = -90; + END IF; + resolution := max_time; + max_time := -1 * resolution; + + -- calc the optimal number of cells for the current dataset + SELECT + CASE intmethod + WHEN 0 THEN round(3.7745903782 * max_time - 9.4399210051 * array_length(geomin,1) - 1350.8778213073) + WHEN 1 THEN round(2.2855592156 * max_time - 87.285217133 * array_length(geomin,1) + 17255.7085601797) + WHEN 2 THEN round(0.9799471999 * max_time - 127.0334085369 * array_length(geomin,1) + 22707.9579721218) + ELSE 10000 + END INTO cell_count; + + -- we don't have iterative barycentric interpolation in CDB_interpolation, + -- and it's a costy function, so let's make a custom one here till + -- we update the code + -- tin := ARRAY[]::geometry[]; + IF intmethod=1 THEN + WITH + a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b) + SELECT array_agg(v) INTO tin FROM c; + END IF; + -- Delaunay stuff performed just ONCE!! + + -- magic + RETURN QUERY + WITH + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ st_area(ST_ConvexHull(ST_Collect(geomin)))/PI() as r + ), + envelope as ( + SELECT + st_expand(a.g, a.r) as e + FROM convexhull a + ), + envelope3857 as( + SELECT + ST_Transform(e, 3857) as geom + FROM envelope + ), + resolution as( + SELECT + CASE WHEN resolution <= 0 THEN + round(|/ ( + ST_area(geom) / abs(cell_count) + )) + ELSE + resolution + END AS cell + FROM envelope3857 + ), + grid as( + SELECT + ST_Transform(cdb_crankshaft.CDB_RectangleGrid(e.geom, r.cell, r.cell), 4326) as geom + FROM envelope3857 e, resolution r + ), + interp as( + SELECT + geom, + CASE + WHEN intmethod=1 THEN cdb_crankshaft._interp_in_tin(geomin, colin, tin, ST_Centroid(geom)) + ELSE cdb_crankshaft.CDB_SpatialInterpolation(geomin, colin, ST_Centroid(geom), intmethod) + END as val + FROM grid + ), + classes as( + SELECT CASE + WHEN classmethod = 0 THEN + cdb_crankshaft.CDB_EqualIntervalBins(array_agg(val), steps) + WHEN classmethod = 1 THEN + cdb_crankshaft.CDB_HeadsTailsBins(array_agg(val), steps) + WHEN classmethod = 2 THEN + cdb_crankshaft.CDB_JenksBins(array_agg(val), steps) + ELSE + cdb_crankshaft.CDB_QuantileBins(array_agg(val), steps) + END as b + FROM interp + where val is not null + ), + classified as( + SELECT + i.*, + width_bucket(i.val, c.b) as bucket + FROM interp i left join classes c + ON 1=1 + ), + classified2 as( + SELECT + geom, + val, + CASE + WHEN bucket = steps THEN bucket - 1 + ELSE bucket + END as b + FROM classified + ), + final as( + SELECT + st_union(geom) as the_geom, + b as bin, + min(val) as min_value, + max(val) as max_value, + avg(val) as avg_value + FROM classified2 + GROUP BY bin + ) + SELECT + * + FROM final + where final.bin is not null + ; +END; +$$ language plpgsql; + + + +-- ===================================================================== +-- Interp in grid, so we can use barycentric with a precalculated tin (NNI) +-- ===================================================================== +CREATE OR REPLACE FUNCTION _interp_in_tin( + IN geomin geometry[], + IN colin numeric[], + IN tin geometry[], + IN point geometry + ) +RETURNS numeric AS +$$ +DECLARE + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + -- get the cell the point is within + WITH + a as (SELECT unnest(tin) as v), + b as (SELECT v FROM a WHERE ST_Within(point, v)) + SELECT v INTO g FROM b; + + -- if we're out of the data realm, + -- return null + IF g is null THEN + RETURN null; + END IF; + + -- vertex of the selected cell + WITH a AS ( + SELECT (ST_DumpPoints(g)).geom AS v + ) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + -- calc the areas + SELECT + ST_area(g), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg,1); + RETURN output; +END; +$$ +language plpgsql; +-- Function by Stuart Lynn for a simple interpolation of a value +-- from a polygon table over an arbitrary polygon +-- (weighted by the area proportion overlapped) +-- Aereal weighting is a very simple form of aereal interpolation. +-- +-- Parameters: +-- * geom a Polygon geometry which defines the area where a value will be +-- estimated as the area-weighted sum of a given table/column +-- * target_table_name table name of the table that provides the values +-- * target_column column name of the column that provides the values +-- * schema_name optional parameter to defina the schema the target table +-- belongs to, which is necessary if its not in the search_path. +-- Note that target_table_name should never include the schema in it. +-- Return value: +-- Aereal-weighted interpolation of the column values over the geometry +CREATE OR REPLACE +FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL) + RETURNS numeric AS +$$ +DECLARE + result numeric; + qualified_name text; +BEGIN + IF schema_name IS NULL THEN + qualified_name := Format('%I', target_table_name); + ELSE + qualified_name := Format('%I.%s', schema_name, target_table_name); + END IF; + EXECUTE Format(' + SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom)) + FROM %s AS a + WHERE $1 && a.the_geom + ', target_column, qualified_name) + USING geom + INTO result; + RETURN result; +END; +$$ LANGUAGE plpgsql; +-- +-- Creates N points randomly distributed arround the polygon +-- +-- @param g - the geometry to be turned in to points +-- +-- @param no_points - the number of points to generate +-- +-- @params max_iter_per_point - the function generates points in the polygon's bounding box +-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many +-- misses per point the funciton accepts before giving up. +-- +-- Returns: Multipoint with the requested points +CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000) +RETURNS GEOMETRY AS $$ +DECLARE + extent GEOMETRY; + test_point Geometry; + width NUMERIC; + height NUMERIC; + x0 NUMERIC; + y0 NUMERIC; + xp NUMERIC; + yp NUMERIC; + no_left INTEGER; + remaining_iterations INTEGER; + points GEOMETRY[]; + bbox_line GEOMETRY; + intersection_line GEOMETRY; +BEGIN + extent := ST_Envelope(geom); + width := ST_XMax(extent) - ST_XMIN(extent); + height := ST_YMax(extent) - ST_YMIN(extent); + x0 := ST_XMin(extent); + y0 := ST_YMin(extent); + no_left := no_points; + + LOOP + if(no_left=0) THEN + EXIT; + END IF; + yp = y0 + height*random(); + bbox_line = ST_MakeLine( + ST_SetSRID(ST_MakePoint(yp, x0),4326), + ST_SetSRID(ST_MakePoint(yp, x0+width),4326) + ); + intersection_line = ST_Intersection(bbox_line,geom); + test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random()); + points := points || test_point; + no_left = no_left - 1 ; + END LOOP; + RETURN ST_Collect(points); +END; +$$ +LANGUAGE plpgsql VOLATILE; +-- Make sure by default there are no permissions for publicuser +-- NOTE: this happens at extension creation time, as part of an implicit transaction. +-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE; + +-- Grant permissions on the schema to publicuser (but just the schema) +GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser; + +-- Revoke execute permissions on all functions in the schema by default +-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser; +-- +-- Fill given extent with a rectangular coverage +-- +-- @param ext Extent to fill. Only rectangles with center point falling +-- inside the extent (or at the lower or leftmost edge) will +-- be emitted. The returned hexagons will have the same SRID +-- as this extent. +-- +-- @param width With of each rectangle +-- +-- @param height Height of each rectangle +-- +-- @param origin Optional origin to allow for exact tiling. +-- If omitted the origin will be 0,0. +-- The parameter is checked for having the same SRID +-- as the extent. +-- +-- +CREATE OR REPLACE FUNCTION CDB_RectangleGrid(ext GEOMETRY, width FLOAT8, height FLOAT8, origin GEOMETRY DEFAULT NULL) +RETURNS SETOF GEOMETRY +AS $$ +DECLARE + h GEOMETRY; -- rectangle cell + hstep FLOAT8; -- horizontal step + vstep FLOAT8; -- vertical step + hw FLOAT8; -- half width + hh FLOAT8; -- half height + vstart FLOAT8; + hstart FLOAT8; + hend FLOAT8; + vend FLOAT8; + xoff FLOAT8; + yoff FLOAT8; + xgrd FLOAT8; + ygrd FLOAT8; + x FLOAT8; + y FLOAT8; + srid INTEGER; +BEGIN + + srid := ST_SRID(ext); + + xoff := 0; + yoff := 0; + + IF origin IS NOT NULL THEN + IF ST_SRID(origin) != srid THEN + RAISE EXCEPTION 'SRID mismatch between extent (%) and origin (%)', srid, ST_SRID(origin); + END IF; + xoff := ST_X(origin); + yoff := ST_Y(origin); + END IF; + + --RAISE DEBUG 'X offset: %', xoff; + --RAISE DEBUG 'Y offset: %', yoff; + + hw := width/2.0; + hh := height/2.0; + + xgrd := hw; + ygrd := hh; + --RAISE DEBUG 'X grid size: %', xgrd; + --RAISE DEBUG 'Y grid size: %', ygrd; + + hstep := width; + vstep := height; + + -- Tweak horizontal start on hstep grid from origin + hstart := xoff + ceil((ST_XMin(ext)-xoff)/hstep)*hstep; + --RAISE DEBUG 'hstart: %', hstart; + + -- Tweak vertical start on vstep grid from origin + vstart := yoff + ceil((ST_Ymin(ext)-yoff)/vstep)*vstep; + --RAISE DEBUG 'vstart: %', vstart; + + hend := ST_XMax(ext); + vend := ST_YMax(ext); + + --RAISE DEBUG 'hend: %', hend; + --RAISE DEBUG 'vend: %', vend; + + x := hstart; + WHILE x < hend LOOP -- over X + y := vstart; + h := ST_MakeEnvelope(x-hw, y-hh, x+hw, y+hh, srid); + WHILE y < vend LOOP -- over Y + RETURN NEXT h; + h := ST_Translate(h, 0, vstep); + y := yoff + round(((y + vstep)-yoff)/ygrd)*ygrd; -- round to grid + END LOOP; + x := xoff + round(((x + hstep)-xoff)/xgrd)*xgrd; -- round to grid + END LOOP; + + RETURN; +END +$$ LANGUAGE 'plpgsql' IMMUTABLE; + +-- +-- Calculate the equal interval bins for a given column +-- +-- @param in_array A numeric array of numbers to determine the best +-- to determine the bin boundary +-- +-- @param breaks The number of bins you want to find. +-- +-- +-- Returns: upper edges of bins +-- +-- + +CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$ +DECLARE + diff numeric; + min_val numeric; + max_val numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL; + diff = (max_val - min_val) / breaks::numeric; + LOOP + IF i < breaks THEN + tmp_val = min_val + i::numeric * diff; + reply = array_append(reply, tmp_val); + i := i+1; + ELSE + reply = array_append(reply, max_val); + EXIT; + END IF; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Heads/Tails classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Heads/Tails method. +-- +-- @param breaks The number of bins you want to find. +-- +-- + +CREATE OR REPLACE FUNCTION CDB_HeadsTailsBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean numeric; + i INT := 2; + reply numeric[]; +BEGIN + -- get the total size of our row + element_count := array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + reply = Array[arr_mean]; + -- slice our bread + LOOP + IF i > breaks THEN EXIT; END IF; + SELECT avg(e) INTO arr_mean FROM ( SELECT unnest(in_array) e) x WHERE e > reply[i-1]; + IF arr_mean IS NOT NULL THEN + reply = array_append(reply, arr_mean); + END IF; + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Jenks classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Jenks method. +-- +-- @param breaks The number of bins you want to find. +-- +-- @param iterations The number of different starting positions to test. +-- +-- @param invert Optional wheter to return the top of each bin (default) +-- or the bottom. BOOLEAN, default=FALSE. +-- +-- + + +CREATE OR REPLACE FUNCTION CDB_JenksBins ( in_array NUMERIC[], breaks INT, iterations INT DEFAULT 5, invert BOOLEAN DEFAULT FALSE) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean NUMERIC; + bot INT; + top INT; + tops INT[]; + classes INT[][]; + i INT := 1; j INT := 1; + curr_result NUMERIC[]; + best_result NUMERIC[]; + seedtarget TEXT; + quant NUMERIC[]; + shuffles INT; +BEGIN + -- get the total size of our row + element_count := array_length(in_array, 1); --array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + shuffles := LEAST(GREATEST(floor(2500000.0/(element_count::float*iterations::float)), 1), 750)::int; + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + -- assume best is actually Quantile + SELECT cdb_crankshaft.CDB_QuantileBins(in_array, breaks) INTO quant; + + -- if data is very very large, just return quant and be done + IF element_count > 5000000 THEN + RETURN quant; + END IF; + + -- change quant into bottom, top markers + LOOP + IF i = 1 THEN + bot = 1; + ELSE + -- use last top to find this bot + bot = top+1; + END IF; + IF i = breaks THEN + top = element_count; + ELSE + SELECT count(*) INTO top FROM ( SELECT unnest(in_array) as v) x WHERE v <= quant[i]; + END IF; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + IF i > breaks THEN EXIT; END IF; + i = i+1; + END LOOP; + + best_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + --set the seed so we can ensure the same results + SELECT setseed(0.4567) INTO seedtarget; + --loop through random starting positions + LOOP + IF j > iterations-1 THEN EXIT; END IF; + i = 1; + tops = ARRAY[element_count]; + LOOP + IF i = breaks THEN EXIT; END IF; + SELECT array_agg(distinct e) INTO tops FROM (SELECT unnest(array_cat(tops, ARRAY[floor(random()*element_count::float)::int])) as e ORDER BY e) x WHERE e != 1; + i = array_length(tops, 1); + END LOOP; + i = 1; + LOOP + IF i > breaks THEN EXIT; END IF; + IF i = 1 THEN + bot = 1; + ELSE + bot = top+1; + END IF; + top = tops[i]; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + i := i+1; + END LOOP; + curr_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + IF curr_result[1] > best_result[1] THEN + best_result = curr_result; + j = j-1; -- if we found a better result, add one more search + END IF; + j = j+1; + END LOOP; + + RETURN (best_result)[2:array_upper(best_result, 1)]; +END; +$$ language plpgsql IMMUTABLE; + + + +-- +-- Perform a single iteration of the Jenks classification +-- + +CREATE OR REPLACE FUNCTION CDB_JenksBinsIteration ( in_array NUMERIC[], breaks INT, classes INT[][], invert BOOLEAN, element_count INT4, arr_mean NUMERIC, max_search INT DEFAULT 50) RETURNS NUMERIC[] as $$ +DECLARE + tmp_val numeric; + new_classes int[][]; + tmp_class int[]; + i INT := 1; + j INT := 1; + side INT := 2; + sdam numeric; + gvf numeric := 0.0; + new_gvf numeric; + arr_gvf numeric[]; + class_avg numeric; + class_max_i INT; + class_min_i INT; + class_max numeric; + class_min numeric; + reply numeric[]; +BEGIN + + -- Calculate the sum of squared deviations from the array mean (SDAM). + SELECT sum((arr_mean - e)^2) INTO sdam FROM ( SELECT unnest(in_array) as e ) x; + --Identify the breaks for the lowest GVF + LOOP + i = 1; + LOOP + -- get our mean + SELECT avg(e) INTO class_avg FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e) x; + -- find the deviation + SELECT sum((class_avg-e)^2) INTO tmp_val FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e ) x; + IF i = 1 THEN + arr_gvf = ARRAY[tmp_val]; + -- init our min/max map for later + class_max = arr_gvf[i]; + class_min = arr_gvf[i]; + class_min_i = 1; + class_max_i = 1; + ELSE + arr_gvf = array_append(arr_gvf, tmp_val); + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + -- calculate our new GVF + SELECT sdam-sum(e) INTO new_gvf FROM ( SELECT unnest(arr_gvf) as e ) x; + -- if no improvement was made, exit + IF new_gvf < gvf THEN EXIT; END IF; + gvf = new_gvf; + IF j > max_search THEN EXIT; END IF; + j = j+1; + i = 1; + LOOP + --establish directionality (uppward through classes or downward) + IF arr_gvf[i] < class_min THEN + class_min = arr_gvf[i]; + class_min_i = i; + END IF; + IF arr_gvf[i] > class_max THEN + class_max = arr_gvf[i]; + class_max_i = i; + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + IF class_max_i > class_min_i THEN + class_min_i = class_max_i - 1; + ELSE + class_min_i = class_max_i + 1; + END IF; + --Move from higher class to a lower gid order + IF class_max_i > class_min_i THEN + classes[class_max_i][1] = classes[class_max_i][1] + 1; + classes[class_min_i][2] = classes[class_min_i][2] + 1; + ELSE -- Move from lower class UP into a higher class by gid + classes[class_max_i][2] = classes[class_max_i][2] - 1; + classes[class_min_i][1] = classes[class_min_i][1] - 1; + END IF; + END LOOP; + + i = 1; + LOOP + IF invert = TRUE THEN + side = 1; --default returns bottom side of breaks, invert returns top side + END IF; + reply = array_append(reply, in_array[classes[i][side]]); + i = i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + + RETURN array_prepend(gvf, reply); + +END; +$$ language plpgsql IMMUTABLE; + + +-- +-- Determine the Quantile classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Quantile method. +-- +-- @param breaks The number of bins you want to find. +-- +-- +CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + break_size numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + -- sort our values + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x; + -- get the total size of our data + element_count := array_length(in_array, 1); + break_size := element_count::numeric / breaks; + -- slice our bread + LOOP + IF i < breaks THEN + IF break_size * i % 1 > 0 THEN + SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x; + ELSE + SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x; + END IF; + ELSIF i = breaks THEN + -- select the last value + SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x; + ELSE + EXIT; + END IF; + + reply = array_append(reply, tmp_val); + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; diff --git a/release/crankshaft.control b/release/crankshaft.control index 7df077c..b3992a6 100644 --- a/release/crankshaft.control +++ b/release/crankshaft.control @@ -1,5 +1,5 @@ comment = 'CartoDB Spatial Analysis extension' -default_version = '0.4.0' +default_version = '0.4.1' requires = 'plpythonu, postgis' superuser = true schema = cdb_crankshaft diff --git a/release/python/0.4.1/crankshaft/crankshaft/__init__.py b/release/python/0.4.1/crankshaft/crankshaft/__init__.py new file mode 100644 index 0000000..4e06bc5 --- /dev/null +++ b/release/python/0.4.1/crankshaft/crankshaft/__init__.py @@ -0,0 +1,5 @@ +"""Import all modules""" +import crankshaft.random_seeds +import crankshaft.clustering +import crankshaft.space_time_dynamics +import crankshaft.segmentation diff --git a/release/python/0.4.1/crankshaft/crankshaft/clustering/__init__.py b/release/python/0.4.1/crankshaft/crankshaft/clustering/__init__.py new file mode 100644 index 0000000..ed34fe0 --- /dev/null +++ b/release/python/0.4.1/crankshaft/crankshaft/clustering/__init__.py @@ -0,0 +1,3 @@ +"""Import all functions from for clustering""" +from moran import * +from kmeans import * diff --git a/release/python/0.4.1/crankshaft/crankshaft/clustering/kmeans.py b/release/python/0.4.1/crankshaft/crankshaft/clustering/kmeans.py new file mode 100644 index 0000000..4134062 --- /dev/null +++ b/release/python/0.4.1/crankshaft/crankshaft/clustering/kmeans.py @@ -0,0 +1,18 @@ +from sklearn.cluster import KMeans +import plpy + +def kmeans(query, no_clusters, no_init=20): + data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids, + array_agg(ST_X(the_geom) order by cartodb_id) xs, + array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a + where the_geom is not null + '''.format(query=query)) + + xs = data[0]['xs'] + ys = data[0]['ys'] + ids = data[0]['ids'] + + km = KMeans(n_clusters= no_clusters, n_init=no_init) + labels = km.fit_predict(zip(xs,ys)) + return zip(ids,labels) + diff --git a/release/python/0.4.1/crankshaft/crankshaft/clustering/moran.py b/release/python/0.4.1/crankshaft/crankshaft/clustering/moran.py new file mode 100644 index 0000000..7a97233 --- /dev/null +++ b/release/python/0.4.1/crankshaft/crankshaft/clustering/moran.py @@ -0,0 +1,243 @@ +""" +Moran's I geostatistics (global clustering & outliers presence) +""" + +# TODO: Fill in local neighbors which have null/NoneType values with the +# average of the their neighborhood + +import pysal as ps +import plpy +from collections import OrderedDict + +# crankshaft module +import crankshaft.pysal_utils as pu + +# High level interface --------------------------------------- + +def moran(subquery, attr_name, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I (global) + Implementation building neighbors with a PostGIS database and Moran's I + core clusters with PySAL. + Andy Eschbacher + """ + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr_name), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(2) + except plpy.SPIError, e: + plpy.error('Analysis failed: %s' % e) + return pu.empty_zipped_array(2) + + ## collect attributes + attr_vals = pu.get_attributes(result) + + ## calculate weights + weight = pu.get_weight(result, w_type, num_ngbrs) + + ## calculate moran global + moran_global = ps.esda.moran.Moran(attr_vals, weight, + permutations=permutations) + + return zip([moran_global.I], [moran_global.EI]) + +def moran_local(subquery, attr, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I implementation for PL/Python + Andy Eschbacher + """ + + # geometries with attributes that are null are ignored + # resulting in a collection of not as near neighbors + + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(5) + except plpy.SPIError, e: + plpy.error('Analysis failed: %s' % e) + return pu.empty_zipped_array(5) + + attr_vals = pu.get_attributes(result) + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local(attr_vals, weight, + permutations=permutations) + + # find quadrants for each geometry + quads = quad_position(lisa.q) + + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + +def moran_rate(subquery, numerator, denominator, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I Rate (global) + Andy Eschbacher + """ + qvals = OrderedDict([("id_col", id_col), + ("attr1", numerator), + ("attr2", denominator) + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(2) + except plpy.SPIError, e: + plpy.error('Analysis failed: %s' % e) + return pu.empty_zipped_array(2) + + ## collect attributes + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) + + weight = pu.get_weight(result, w_type, num_ngbrs) + + ## calculate moran global rate + lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight, + permutations=permutations) + + return zip([lisa_rate.I], [lisa_rate.EI]) + +def moran_local_rate(subquery, numerator, denominator, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I Local Rate + Andy Eschbacher + """ + # geometries with values that are null are ignored + # resulting in a collection of not as near neighbors + + qvals = OrderedDict([("id_col", id_col), + ("numerator", numerator), + ("denominator", denominator), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(5) + except plpy.SPIError, e: + plpy.error('Analysis failed: %s' % e) + return pu.empty_zipped_array(5) + + ## collect attributes + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) + + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight, + permutations=permutations) + + # find quadrants for each geometry + quads = quad_position(lisa.q) + + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + +def moran_local_bv(subquery, attr1, attr2, + permutations, geom_col, id_col, w_type, num_ngbrs): + """ + Moran's I (local) Bivariate (untested) + """ + + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr1), + ("attr2", attr2), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(4) + except plpy.SPIError: + plpy.error("Error: areas of interest query failed, " \ + "check input parameters") + return pu.empty_zipped_array(4) + + ## collect attributes + attr1_vals = pu.get_attributes(result, 1) + attr2_vals = pu.get_attributes(result, 2) + + # create weights + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight, + permutations=permutations) + + # find clustering of significance + lisa_sig = quad_position(lisa.q) + + return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order) + +# Low level functions ---------------------------------------- + +def map_quads(coord): + """ + Map a quadrant number to Moran's I designation + HH=1, LH=2, LL=3, HL=4 + Input: + @param coord (int): quadrant of a specific measurement + Output: + classification (one of 'HH', 'LH', 'LL', or 'HL') + """ + if coord == 1: + return 'HH' + elif coord == 2: + return 'LH' + elif coord == 3: + return 'LL' + elif coord == 4: + return 'HL' + else: + return None + +def quad_position(quads): + """ + Produce Moran's I classification based of n + Input: + @param quads ndarray: an array of quads classified by + 1-4 (PySAL default) + Output: + @param list: an array of quads classied by 'HH', 'LL', etc. + """ + return [map_quads(q) for q in quads] diff --git a/release/python/0.4.1/crankshaft/crankshaft/pysal_utils/__init__.py b/release/python/0.4.1/crankshaft/crankshaft/pysal_utils/__init__.py new file mode 100644 index 0000000..fdf073b --- /dev/null +++ b/release/python/0.4.1/crankshaft/crankshaft/pysal_utils/__init__.py @@ -0,0 +1,2 @@ +"""Import all functions for pysal_utils""" +from crankshaft.pysal_utils.pysal_utils import * diff --git a/release/python/0.4.1/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/release/python/0.4.1/crankshaft/crankshaft/pysal_utils/pysal_utils.py new file mode 100644 index 0000000..4622925 --- /dev/null +++ b/release/python/0.4.1/crankshaft/crankshaft/pysal_utils/pysal_utils.py @@ -0,0 +1,188 @@ +""" + Utilities module for generic PySAL functionality, mainly centered on + translating queries into numpy arrays or PySAL weights objects +""" + +import numpy as np +import pysal as ps + +def construct_neighbor_query(w_type, query_vals): + """Return query (a string) used for finding neighbors + @param w_type text: type of neighbors to calculate ('knn' or 'queen') + @param query_vals dict: values used to construct the query + """ + + if w_type.lower() == 'knn': + return knn(query_vals) + else: + return queen(query_vals) + +## Build weight object +def get_weight(query_res, w_type='knn', num_ngbrs=5): + """ + Construct PySAL weight from return value of query + @param query_res dict-like: query results with attributes and neighbors + """ + # if w_type.lower() == 'knn': + # row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs + # weights = {x['id']: row_normed_weights for x in query_res} + # else: + # weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors']) + # if len(x['neighbors']) > 0 + # else [] for x in query_res} + + neighbors = {x['id']: x['neighbors'] for x in query_res} + print 'len of neighbors: %d' % len(neighbors) + + built_weight = ps.W(neighbors) + built_weight.transform = 'r' + + return built_weight + +def query_attr_select(params): + """ + Create portion of SELECT statement for attributes inolved in query. + @param params: dict of information used in query (column names, + table name, etc.) + """ + + attr_string = "" + template = "i.\"%(col)s\"::numeric As attr%(alias_num)s, " + + if 'time_cols' in params: + ## if markov analysis + attrs = params['time_cols'] + + for idx, val in enumerate(attrs): + attr_string += template % {"col": val, "alias_num": idx + 1} + else: + ## if moran's analysis + attrs = [k for k in params + if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs', 'subquery')] + + for idx, val in enumerate(sorted(attrs)): + attr_string += template % {"col": params[val], "alias_num": idx + 1} + + return attr_string + +def query_attr_where(params): + """ + Construct where conditions when building neighbors query + Create portion of WHERE clauses for weeding out NULL-valued geometries + Input: dict of params: + {'subquery': ..., + 'numerator': 'data1', + 'denominator': 'data2', + '': ...} + Output: 'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL' + Input: + {'subquery': ..., + 'time_cols': ['time1', 'time2', 'time3'], + 'etc': ...} + Output: 'idx_replace."time1" IS NOT NULL AND idx_replace."time2" IS NOT + NULL AND idx_replace."time3" IS NOT NULL' + """ + attr_string = [] + template = "idx_replace.\"%s\" IS NOT NULL" + + if 'time_cols' in params: + ## markov where clauses + attrs = params['time_cols'] + # add values to template + for attr in attrs: + attr_string.append(template % attr) + else: + ## moran where clauses + + # get keys + attrs = sorted([k for k in params + if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs', 'subquery')]) + # add values to template + for attr in attrs: + attr_string.append(template % params[attr]) + + if len(attrs) == 2: + attr_string.append("idx_replace.\"%s\" <> 0" % params[attrs[1]]) + + out = " AND ".join(attr_string) + + return out + +def knn(params): + """SQL query for k-nearest neighbors. + @param vars: dict of values to fill template + """ + + attr_select = query_attr_select(params) + attr_where = query_attr_where(params) + + replacements = {"attr_select": attr_select, + "attr_where_i": attr_where.replace("idx_replace", "i"), + "attr_where_j": attr_where.replace("idx_replace", "j")} + + query = "SELECT " \ + "i.\"{id_col}\" As id, " \ + "%(attr_select)s" \ + "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ + "FROM ({subquery}) As j " \ + "WHERE " \ + "i.\"{id_col}\" <> j.\"{id_col}\" AND " \ + "%(attr_where_j)s " \ + "ORDER BY " \ + "j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \ + "LIMIT {num_ngbrs})" \ + ") As neighbors " \ + "FROM ({subquery}) As i " \ + "WHERE " \ + "%(attr_where_i)s " \ + "ORDER BY i.\"{id_col}\" ASC;" % replacements + + return query.format(**params) + +## SQL query for finding queens neighbors (all contiguous polygons) +def queen(params): + """SQL query for queen neighbors. + @param params dict: information to fill query + """ + attr_select = query_attr_select(params) + attr_where = query_attr_where(params) + + replacements = {"attr_select": attr_select, + "attr_where_i": attr_where.replace("idx_replace", "i"), + "attr_where_j": attr_where.replace("idx_replace", "j")} + + query = "SELECT " \ + "i.\"{id_col}\" As id, " \ + "%(attr_select)s" \ + "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ + "FROM ({subquery}) As j " \ + "WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \ + "ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \ + "%(attr_where_j)s)" \ + ") As neighbors " \ + "FROM ({subquery}) As i " \ + "WHERE " \ + "%(attr_where_i)s " \ + "ORDER BY i.\"{id_col}\" ASC;" % replacements + + return query.format(**params) + +## to add more weight methods open a ticket or pull request + +def get_attributes(query_res, attr_num=1): + """ + @param query_res: query results with attributes and neighbors + @param attr_num: attribute number (1, 2, ...) + """ + return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float) + +def empty_zipped_array(num_nones): + """ + prepare return values for cases of empty weights objects (no neighbors) + Input: + @param num_nones int: number of columns (e.g., 4) + Output: + [(None, None, None, None)] + """ + + return [tuple([None] * num_nones)] diff --git a/release/python/0.4.1/crankshaft/crankshaft/random_seeds.py b/release/python/0.4.1/crankshaft/crankshaft/random_seeds.py new file mode 100644 index 0000000..31958cb --- /dev/null +++ b/release/python/0.4.1/crankshaft/crankshaft/random_seeds.py @@ -0,0 +1,11 @@ +"""Random seed generator used for non-deterministic functions in crankshaft""" +import random +import numpy + +def set_random_seeds(value): + """ + Set the seeds of the RNGs (Random Number Generators) + used internally. + """ + random.seed(value) + numpy.random.seed(value) diff --git a/release/python/0.4.1/crankshaft/crankshaft/segmentation/__init__.py b/release/python/0.4.1/crankshaft/crankshaft/segmentation/__init__.py new file mode 100644 index 0000000..b825e85 --- /dev/null +++ b/release/python/0.4.1/crankshaft/crankshaft/segmentation/__init__.py @@ -0,0 +1 @@ +from segmentation import * diff --git a/release/python/0.4.1/crankshaft/crankshaft/segmentation/segmentation.py b/release/python/0.4.1/crankshaft/crankshaft/segmentation/segmentation.py new file mode 100644 index 0000000..ed61139 --- /dev/null +++ b/release/python/0.4.1/crankshaft/crankshaft/segmentation/segmentation.py @@ -0,0 +1,176 @@ +""" +Segmentation creation and prediction +""" + +import sklearn +import numpy as np +import plpy +from sklearn.ensemble import GradientBoostingRegressor +from sklearn import metrics +from sklearn.cross_validation import train_test_split + +# Lower level functions +#---------------------- + +def replace_nan_with_mean(array): + """ + Input: + @param array: an array of floats which may have null-valued entries + Output: + array with nans filled in with the mean of the dataset + """ + # returns an array of rows and column indices + indices = np.where(np.isnan(array)) + + # iterate through entries which have nan values + for row, col in zip(*indices): + array[row, col] = np.mean(array[~np.isnan(array[:, col]), col]) + + return array + +def get_data(variable, feature_columns, query): + """ + Fetch data from the database, clean, and package into + numpy arrays + Input: + @param variable: name of the target variable + @param feature_columns: list of column names + @param query: subquery that data is pulled from for the packaging + Output: + prepared data, packaged into NumPy arrays + """ + + columns = ','.join(['array_agg("{col}") As "{col}"'.format(col=col) for col in feature_columns]) + + try: + data = plpy.execute('''SELECT array_agg("{variable}") As target, {columns} FROM ({query}) As a'''.format( + variable=variable, + columns=columns, + query=query)) + except Exception, e: + plpy.error('Failed to access data to build segmentation model: %s' % e) + + # extract target data from plpy object + target = np.array(data[0]['target']) + + # put n feature data arrays into an n x m array of arrays + features = np.column_stack([np.array(data[0][col], dtype=float) for col in feature_columns]) + + return replace_nan_with_mean(target), replace_nan_with_mean(features) + +# High level interface +# -------------------- + +def create_and_predict_segment_agg(target, features, target_features, target_ids, model_parameters): + """ + Version of create_and_predict_segment that works on arrays that come stright form the SQL calling + the function. + + Input: + @param target: The 1D array of lenth NSamples containing the target variable we want the model to predict + @param features: Thw 2D array of size NSamples * NFeatures that form the imput to the model + @param target_ids: A 1D array of target_ids that will be used to associate the results of the prediction with the rows which they come from + @param model_parameters: A dictionary containing parameters for the model. + """ + + clean_target = replace_nan_with_mean(target) + clean_features = replace_nan_with_mean(features) + target_features = replace_nan_with_mean(target_features) + + model, accuracy = train_model(clean_target, clean_features, model_parameters, 0.2) + prediction = model.predict(target_features) + accuracy_array = [accuracy]*prediction.shape[0] + return zip(target_ids, prediction, np.full(prediction.shape, accuracy_array)) + + + +def create_and_predict_segment(query, variable, target_query, model_params): + """ + generate a segment with machine learning + Stuart Lynn + """ + + ## fetch column names + try: + columns = plpy.execute('SELECT * FROM ({query}) As a LIMIT 1 '.format(query=query))[0].keys() + except Exception, e: + plpy.error('Failed to build segmentation model: %s' % e) + + ## extract column names to be used in building the segmentation model + feature_columns = set(columns) - set([variable, 'cartodb_id', 'the_geom', 'the_geom_webmercator']) + ## get data from database + target, features = get_data(variable, feature_columns, query) + + model, accuracy = train_model(target, features, model_params, 0.2) + cartodb_ids, result = predict_segment(model, feature_columns, target_query) + accuracy_array = [accuracy]*result.shape[0] + return zip(cartodb_ids, result, accuracy_array) + + +def train_model(target, features, model_params, test_split): + """ + Train the Gradient Boosting model on the provided data and calculate the accuracy of the model + Input: + @param target: 1D Array of the variable that the model is to be trianed to predict + @param features: 2D Array NSamples * NFeatures to use in trining the model + @param model_params: A dictionary of model parameters, the full specification can be found on the + scikit learn page for [GradientBoostingRegressor](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html) + @parma test_split: The fraction of the data to be withheld for testing the model / calculating the accuray + """ + features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=test_split) + model = GradientBoostingRegressor(**model_params) + model.fit(features_train, target_train) + accuracy = calculate_model_accuracy(model, features, target) + return model, accuracy + +def calculate_model_accuracy(model, features, target): + """ + Calculate the mean squared error of the model prediction + Input: + @param model: model trained from input features + @param features: features to make a prediction from + @param target: target to compare prediction to + Output: + mean squared error of the model prection compared to the target + """ + prediction = model.predict(features) + return metrics.mean_squared_error(prediction, target) + +def predict_segment(model, features, target_query): + """ + Use the provided model to predict the values for the new feature set + Input: + @param model: The pretrained model + @features: A list of features to use in the model prediction (list of column names) + @target_query: The query to run to obtain the data to predict on and the cartdb_ids associated with it. + """ + + batch_size = 1000 + joined_features = ','.join(['"{0}"::numeric'.format(a) for a in features]) + + try: + cursor = plpy.cursor('SELECT Array[{joined_features}] As features FROM ({target_query}) As a'.format( + joined_features=joined_features, + target_query=target_query)) + except Exception, e: + plpy.error('Failed to build segmentation model: %s' % e) + + results = [] + + while True: + rows = cursor.fetch(batch_size) + if not rows: + break + batch = np.row_stack([np.array(row['features'], dtype=float) for row in rows]) + + #Need to fix this. Should be global mean. This will cause weird effects + batch = replace_nan_with_mean(batch) + prediction = model.predict(batch) + results.append(prediction) + + try: + cartodb_ids = plpy.execute('''SELECT array_agg(cartodb_id ORDER BY cartodb_id) As cartodb_ids FROM ({0}) As a'''.format(target_query))[0]['cartodb_ids'] + except Exception, e: + plpy.error('Failed to build segmentation model: %s' % e) + + return cartodb_ids, np.concatenate(results) diff --git a/release/python/0.4.1/crankshaft/crankshaft/space_time_dynamics/__init__.py b/release/python/0.4.1/crankshaft/crankshaft/space_time_dynamics/__init__.py new file mode 100644 index 0000000..a439286 --- /dev/null +++ b/release/python/0.4.1/crankshaft/crankshaft/space_time_dynamics/__init__.py @@ -0,0 +1,2 @@ +"""Import all functions from clustering libraries.""" +from markov import * diff --git a/release/python/0.4.1/crankshaft/crankshaft/space_time_dynamics/markov.py b/release/python/0.4.1/crankshaft/crankshaft/space_time_dynamics/markov.py new file mode 100644 index 0000000..ae788d7 --- /dev/null +++ b/release/python/0.4.1/crankshaft/crankshaft/space_time_dynamics/markov.py @@ -0,0 +1,189 @@ +""" +Spatial dynamics measurements using Spatial Markov +""" + + +import numpy as np +import pysal as ps +import plpy +import crankshaft.pysal_utils as pu + +def spatial_markov_trend(subquery, time_cols, num_classes=7, + w_type='knn', num_ngbrs=5, permutations=0, + geom_col='the_geom', id_col='cartodb_id'): + """ + Predict the trends of a unit based on: + 1. history of its transitions to different classes (e.g., 1st quantile -> 2nd quantile) + 2. average class of its neighbors + + Inputs: + @param subquery string: e.g., SELECT the_geom, cartodb_id, + interesting_time_column FROM table_name + @param time_cols list of strings: list of strings of column names + @param num_classes (optional): number of classes to break distribution + of values into. Currently uses quantile bins. + @param w_type string (optional): weight type ('knn' or 'queen') + @param num_ngbrs int (optional): number of neighbors (if knn type) + @param permutations int (optional): number of permutations for test + stats + @param geom_col string (optional): name of column which contains the + geometries + @param id_col string (optional): name of column which has the ids of + the table + + Outputs: + @param trend_up float: probablity that a geom will move to a higher + class + @param trend_down float: probablity that a geom will move to a lower + class + @param trend float: (trend_up - trend_down) / trend_static + @param volatility float: a measure of the volatility based on + probability stddev(prob array) + """ + + if len(time_cols) < 2: + plpy.error('More than one time column needs to be passed') + + qvals = {"id_col": id_col, + "time_cols": time_cols, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs} + + try: + query_result = plpy.execute( + pu.construct_neighbor_query(w_type, qvals) + ) + if len(query_result) == 0: + return zip([None], [None], [None], [None], [None]) + except plpy.SPIError, e: + plpy.debug('Query failed with exception %s: %s' % (err, pu.construct_neighbor_query(w_type, qvals))) + plpy.error('Analysis failed: %s' % e) + return zip([None], [None], [None], [None], [None]) + + ## build weight + weights = pu.get_weight(query_result, w_type) + weights.transform = 'r' + + ## prep time data + t_data = get_time_data(query_result, time_cols) + + plpy.debug('shape of t_data %d, %d' % t_data.shape) + plpy.debug('number of weight objects: %d, %d' % (weights.sparse).shape) + plpy.debug('first num elements: %f' % t_data[0, 0]) + + sp_markov_result = ps.Spatial_Markov(t_data, + weights, + k=num_classes, + fixed=False, + permutations=permutations) + + ## get lag classes + lag_classes = ps.Quantiles( + ps.lag_spatial(weights, t_data[:, -1]), + k=num_classes).yb + + ## look up probablity distribution for each unit according to class and lag class + prob_dist = get_prob_dist(sp_markov_result.P, + lag_classes, + sp_markov_result.classes[:, -1]) + + ## find the ups and down and overall distribution of each cell + trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, + sp_markov_result.classes[:, -1]) + + ## output the results + return zip(trend, trend_up, trend_down, volatility, weights.id_order) + +def get_time_data(markov_data, time_cols): + """ + Extract the time columns and bin appropriately + """ + num_attrs = len(time_cols) + return np.array([[x['attr' + str(i)] for x in markov_data] + for i in range(1, num_attrs+1)], dtype=float).transpose() + +## not currently used +def rebin_data(time_data, num_time_per_bin): + """ + Convert an n x l matrix into an (n/m) x l matrix where the values are + reduced (averaged) for the intervening states: + 1 2 3 4 1.5 3.5 + 5 6 7 8 -> 5.5 7.5 + 9 8 7 6 8.5 6.5 + 5 4 3 2 4.5 2.5 + + if m = 2, the 4 x 4 matrix is transformed to a 2 x 4 matrix. + + This process effectively resamples the data at a longer time span n + units longer than the input data. + For cases when there is a remainder (remainder(5/3) = 2), the remaining + two columns are binned together as the last time period, while the + first three are binned together for the first period. + + Input: + @param time_data n x l ndarray: measurements of an attribute at + different time intervals + @param num_time_per_bin int: number of columns to average into a new + column + Output: + ceil(n / m) x l ndarray of resampled time series + """ + + if time_data.shape[1] % num_time_per_bin == 0: + ## if fit is perfect, then use it + n_max = time_data.shape[1] / num_time_per_bin + else: + ## fit remainders into an additional column + n_max = time_data.shape[1] / num_time_per_bin + 1 + + return np.array([time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1) + for i in range(n_max)]).T + +def get_prob_dist(transition_matrix, lag_indices, unit_indices): + """ + Given an array of transition matrices, look up the probability + associated with the arrangements passed + + Input: + @param transition_matrix ndarray[k,k,k]: + @param lag_indices ndarray: + @param unit_indices ndarray: + + Output: + Array of probability distributions + """ + + return np.array([transition_matrix[(lag_indices[i], unit_indices[i])] + for i in range(len(lag_indices))]) + +def get_prob_stats(prob_dist, unit_indices): + """ + get the statistics of the probability distributions + + Outputs: + @param trend_up ndarray(float): sum of probabilities for upward + movement (relative to the unit index of that prob) + @param trend_down ndarray(float): sum of probabilities for downward + movement (relative to the unit index of that prob) + @param trend ndarray(float): difference of upward and downward + movements + """ + + num_elements = len(unit_indices) + trend_up = np.empty(num_elements, dtype=float) + trend_down = np.empty(num_elements, dtype=float) + trend = np.empty(num_elements, dtype=float) + + for i in range(num_elements): + trend_up[i] = prob_dist[i, (unit_indices[i]+1):].sum() + trend_down[i] = prob_dist[i, :unit_indices[i]].sum() + if prob_dist[i, unit_indices[i]] > 0.0: + trend[i] = (trend_up[i] - trend_down[i]) / prob_dist[i, unit_indices[i]] + else: + trend[i] = None + + ## calculate volatility of distribution + volatility = prob_dist.std(axis=1) + + return trend_up, trend_down, trend, volatility diff --git a/release/python/0.4.1/crankshaft/setup.py b/release/python/0.4.1/crankshaft/setup.py new file mode 100644 index 0000000..cd8ad99 --- /dev/null +++ b/release/python/0.4.1/crankshaft/setup.py @@ -0,0 +1,49 @@ + +""" +CartoDB Spatial Analysis Python Library +See: +https://github.com/CartoDB/crankshaft +""" + +from setuptools import setup, find_packages + +setup( + name='crankshaft', + + version='0.0.0', + + description='CartoDB Spatial Analysis Python Library', + + url='https://github.com/CartoDB/crankshaft', + + author='Data Services Team - CartoDB', + author_email='dataservices@cartodb.com', + + license='MIT', + + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Mapping comunity', + 'Topic :: Maps :: Mapping Tools', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 2.7', + ], + + keywords='maps mapping tools spatial analysis geostatistics', + + packages=find_packages(exclude=['contrib', 'docs', 'tests']), + + extras_require={ + 'dev': ['unittest'], + 'test': ['unittest', 'nose', 'mock'], + }, + + # The choice of component versions is dictated by what's + # provisioned in the production servers. + # IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation. + install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'], + + requires=['pysal', 'numpy', 'sklearn'], + + test_suite='test' +) diff --git a/release/python/0.4.1/crankshaft/setup.py-r b/release/python/0.4.1/crankshaft/setup.py-r new file mode 100644 index 0000000..cd8ad99 --- /dev/null +++ b/release/python/0.4.1/crankshaft/setup.py-r @@ -0,0 +1,49 @@ + +""" +CartoDB Spatial Analysis Python Library +See: +https://github.com/CartoDB/crankshaft +""" + +from setuptools import setup, find_packages + +setup( + name='crankshaft', + + version='0.0.0', + + description='CartoDB Spatial Analysis Python Library', + + url='https://github.com/CartoDB/crankshaft', + + author='Data Services Team - CartoDB', + author_email='dataservices@cartodb.com', + + license='MIT', + + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Mapping comunity', + 'Topic :: Maps :: Mapping Tools', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 2.7', + ], + + keywords='maps mapping tools spatial analysis geostatistics', + + packages=find_packages(exclude=['contrib', 'docs', 'tests']), + + extras_require={ + 'dev': ['unittest'], + 'test': ['unittest', 'nose', 'mock'], + }, + + # The choice of component versions is dictated by what's + # provisioned in the production servers. + # IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation. + install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'], + + requires=['pysal', 'numpy', 'sklearn'], + + test_suite='test' +) diff --git a/release/python/0.4.1/crankshaft/test/fixtures/kmeans.json b/release/python/0.4.1/crankshaft/test/fixtures/kmeans.json new file mode 100644 index 0000000..8f31c79 --- /dev/null +++ b/release/python/0.4.1/crankshaft/test/fixtures/kmeans.json @@ -0,0 +1 @@ +[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}] \ No newline at end of file diff --git a/release/python/0.4.1/crankshaft/test/fixtures/markov.json b/release/python/0.4.1/crankshaft/test/fixtures/markov.json new file mode 100644 index 0000000..d60e4e0 --- /dev/null +++ b/release/python/0.4.1/crankshaft/test/fixtures/markov.json @@ -0,0 +1 @@ +[[0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 0], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 1], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 2], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 3], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 4], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 5], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 6], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 7], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 8], [0.19047619047619049, 0.16, 0.0, 0.32594478059941379, 9], [-0.23529411764705882, 0.0, 0.19047619047619047, 0.31356338348865387, 10], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 11], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 12], [0.027777777777777783, 0.11111111111111112, 0.088888888888888892, 0.30339641183779581, 13], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 14], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 15], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 16], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 17], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 18], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 19], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 20], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 21], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 22], [-0.16666666666666663, 0.18181818181818182, 0.27272727272727271, 0.20246415864836445, 23], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 24], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 25], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 26], [-0.043478260869565216, 0.0, 0.041666666666666664, 0.37950991789118999, 27], [0.22222222222222221, 0.18181818181818182, 0.0, 0.31701083225750354, 28], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 29], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 30], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 31], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 32], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 33], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 34], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 35], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 36], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 37], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 38], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 39], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 40], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 41], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 42], [0.0, 0.0, 0.0, 0.40000000000000002, 43], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 44], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 45], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 46], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 47]] diff --git a/release/python/0.4.1/crankshaft/test/fixtures/moran.json b/release/python/0.4.1/crankshaft/test/fixtures/moran.json new file mode 100644 index 0000000..2f75cf1 --- /dev/null +++ b/release/python/0.4.1/crankshaft/test/fixtures/moran.json @@ -0,0 +1,52 @@ +[[0.9319096128346788, "HH"], +[-1.135787401862846, "HL"], +[0.11732030672508517, "LL"], +[0.6152779669180425, "LL"], +[-0.14657336660125297, "LH"], +[0.6967858120189607, "LL"], +[0.07949310115714454, "HH"], +[0.4703198759258987, "HH"], +[0.4421125200498064, "HH"], +[0.5724288737143592, "LL"], +[0.8970743435692062, "LL"], +[0.18327334401918674, "LL"], +[-0.01466729201304962, "HL"], +[0.3481559372544409, "LL"], +[0.06547094736902978, "LL"], +[0.15482141569329988, "HH"], +[0.4373841193538136, "HH"], +[0.15971286468915544, "LL"], +[1.0543588860308968, "HH"], +[1.7372866900020818, "HH"], +[1.091998586053999, "LL"], +[0.1171572584252222, "HH"], +[0.08438455015300014, "LL"], +[0.06547094736902978, "LL"], +[0.15482141569329985, "HH"], +[1.1627044812890683, "HH"], +[0.06547094736902978, "LL"], +[0.795275137550483, "HH"], +[0.18562939195219, "LL"], +[0.3010757406693439, "LL"], +[2.8205795942839376, "HH"], +[0.11259190602909264, "LL"], +[-0.07116352791516614, "HL"], +[-0.09945240794119009, "LH"], +[0.18562939195219, "LL"], +[0.1832733440191868, "LL"], +[-0.39054253768447705, "HL"], +[-0.1672071289487642, "HL"], +[0.3337669247916343, "HH"], +[0.2584386102554792, "HH"], +[-0.19733845476322634, "HL"], +[-0.9379282899805409, "LH"], +[-0.028770969951095866, "LH"], +[0.051367269430983485, "LL"], +[-0.2172548045913472, "LH"], +[0.05136726943098351, "LL"], +[0.04191046803899837, "LL"], +[0.7482357030403517, "HH"], +[-0.014585767863118111, "LH"], +[0.5410013139159929, "HH"], +[1.0223932668429925, "LL"], +[1.4179402898927476, "LL"]] \ No newline at end of file diff --git a/release/python/0.4.1/crankshaft/test/fixtures/neighbors.json b/release/python/0.4.1/crankshaft/test/fixtures/neighbors.json new file mode 100644 index 0000000..055b359 --- /dev/null +++ b/release/python/0.4.1/crankshaft/test/fixtures/neighbors.json @@ -0,0 +1,54 @@ +[ + {"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5}, + {"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7}, + {"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2}, + {"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1}, + {"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3}, + {"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05}, + {"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4}, + {"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7}, + {"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5}, + {"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04}, + {"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08}, + {"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2}, + {"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4}, + {"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2}, + {"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3}, + {"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4}, + {"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6}, + {"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3}, + {"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7}, + {"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8}, + {"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1}, + {"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4}, + {"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1}, + {"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3}, + {"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4}, + {"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6}, + {"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3}, + {"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8}, + {"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3}, + {"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1}, + {"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9}, + {"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3}, + {"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4}, + {"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3}, + {"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3}, + {"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2}, + {"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5}, + {"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4}, + {"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6}, + {"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5}, + {"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4}, + {"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2}, + {"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3}, + {"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2}, + {"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3}, + {"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2}, + {"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3}, + {"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5}, + {"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2}, + {"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6}, + {"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01}, + {"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01} + ] diff --git a/release/python/0.4.1/crankshaft/test/fixtures/neighbors_markov.json b/release/python/0.4.1/crankshaft/test/fixtures/neighbors_markov.json new file mode 100644 index 0000000..45a20e7 --- /dev/null +++ b/release/python/0.4.1/crankshaft/test/fixtures/neighbors_markov.json @@ -0,0 +1 @@ +[{"neighbors": [10, 7, 21, 23, 1], "y1995": 0.87654416055651474, "y1997": 0.85637566664752718, "y1996": 0.8631470006766887, "y1999": 0.84461540228037335, "y1998": 0.84811668329242784, "y2006": 0.86302631339545688, "y2007": 0.86148266513456728, "y2004": 0.86416611731111015, "y2005": 0.87119374831581786, "y2002": 0.85012592862683589, "y2003": 0.8550965633336135, "y2000": 0.83271652434603094, "y2001": 0.83786313566577242, "id": 0, "y2008": 0.86252252380501315, "y2009": 0.86746356478544273}, {"neighbors": [5, 7, 22, 29, 3], "y1995": 0.91889509774542122, "y1997": 0.92333257900976462, "y1996": 0.91757931190043385, "y1999": 0.92552387732371888, "y1998": 0.92517289327379471, "y2006": 0.91706053906277052, "y2007": 0.90139504820726424, "y2004": 0.89815175749309051, "y2005": 0.91832090781161113, "y2002": 0.89431990798552208, "y2003": 0.88924793576523797, "y2000": 0.90746978227271013, "y2001": 0.89830489127332913, "id": 1, "y2008": 0.87897455159080617, "y2009": 0.86216858051752643}, {"neighbors": [11, 8, 13, 18, 17], "y1995": 0.82591007476914713, "y1997": 0.81989792988843901, "y1996": 0.82548595539161707, "y1999": 0.81731522200916285, "y1998": 0.81503235035017918, "y2006": 0.81814804358939286, "y2007": 0.83675961003285626, "y2004": 0.82668195534569056, "y2005": 0.82373723764184559, "y2002": 0.80849979516360859, "y2003": 0.82258550658074148, "y2000": 0.78964559168205917, "y2001": 0.8058444152731008, "id": 2, "y2008": 0.8357419865626442, "y2009": 0.84647177436289112}, {"neighbors": [4, 14, 9, 5, 12], "y1995": 1.0908817638059434, "y1997": 1.0845641754849344, "y1996": 1.0853768890893893, "y1999": 1.098988414417104, "y1998": 1.0841540389418189, "y2006": 1.1316479722785828, "y2007": 1.1295850763954971, "y2004": 1.1139980568106316, "y2005": 1.1216802898290368, "y2002": 1.1116069731657288, "y2003": 1.1088862051501811, "y2000": 1.1450694824791507, "y2001": 1.1215113292620285, "id": 3, "y2008": 1.1137181812756343, "y2009": 1.0993677488645406}, {"neighbors": [14, 3, 9, 31, 12], "y1995": 1.1073144618319228, "y1997": 1.1328363804627946, "y1996": 1.1137394350312471, "y1999": 1.1591002514611153, "y1998": 1.144725587086376, "y2006": 1.1173646811350333, "y2007": 1.1086324218539598, "y2004": 1.1102496406140896, "y2005": 1.11943471361418, "y2002": 1.1475230282561595, "y2003": 1.1184328424005199, "y2000": 1.1689820101690329, "y2001": 1.1721248787169682, "id": 4, "y2008": 1.0964251552643696, "y2009": 1.0776233718455337}, {"neighbors": [29, 1, 22, 7, 4], "y1995": 1.422697571371182, "y1997": 1.4427350196405593, "y1996": 1.4211843379728528, "y1999": 1.4440068434166562, "y1998": 1.4357757095632602, "y2006": 1.4405276647793266, "y2007": 1.4524121586440921, "y2004": 1.4059372049179741, "y2005": 1.4078864636665769, "y2002": 1.4197822680667809, "y2003": 1.3909220829548647, "y2000": 1.4418473669388905, "y2001": 1.4478283203013527, "id": 5, "y2008": 1.4330609762040207, "y2009": 1.4174430982377491}, {"neighbors": [12, 47, 9, 25, 20], "y1995": 1.1307388498039153, "y1997": 1.1107470843142355, "y1996": 1.1311051255854685, "y1999": 1.130881491772973, "y1998": 1.1336463608751246, "y2006": 1.1088003408832796, "y2007": 1.0840170924825394, "y2004": 1.1244623853593112, "y2005": 1.1167100811401538, "y2002": 1.1306293052597198, "y2003": 1.1194498381213465, "y2000": 1.1088813841947593, "y2001": 1.1185662918783175, "id": 6, "y2008": 1.0695920556329086, "y2009": 1.0787522517402164}, {"neighbors": [21, 1, 22, 10, 0], "y1995": 1.0470612357366649, "y1997": 1.0425337165747406, "y1996": 1.0451683097376836, "y1999": 1.0207254480945218, "y1998": 1.0323998680588111, "y2006": 1.0405109962442973, "y2007": 1.0174964540280445, "y2004": 1.0140090547678748, "y2005": 1.0317674181861733, "y2002": 0.99669586934394627, "y2003": 0.99327675611171373, "y2000": 0.99854316295509526, "y2001": 0.98802579761429143, "id": 7, "y2008": 0.9936394033949828, "y2009": 0.98279746069218921}, {"neighbors": [11, 13, 17, 18, 15], "y1995": 0.98996985668705595, "y1997": 0.99491000469481983, "y1996": 1.0014356415938011, "y1999": 1.0045584503565237, "y1998": 1.0018840754492748, "y2006": 0.92232873520447411, "y2007": 0.91284090705064902, "y2004": 0.93694786512729977, "y2005": 0.94308212820743131, "y2002": 0.96834820215592055, "y2003": 0.95335147249088092, "y2000": 0.99127006477048718, "y2001": 0.97925917470464008, "id": 8, "y2008": 0.89689832627117483, "y2009": 0.88928857608264111}, {"neighbors": [12, 6, 4, 3, 14], "y1995": 0.87418390853652306, "y1997": 0.84425695187978567, "y1996": 0.86416601430334228, "y1999": 0.83903043942542854, "y1998": 0.8404493987171674, "y2006": 0.87204140839730271, "y2007": 0.86633032299764789, "y2004": 0.86981997840756087, "y2005": 0.86837929279319737, "y2002": 0.86107306112852877, "y2003": 0.85007719735663123, "y2000": 0.85787080050645603, "y2001": 0.86036185149249467, "id": 9, "y2008": 0.84946077011565357, "y2009": 0.83287145944123797}, {"neighbors": [0, 7, 21, 23, 22], "y1995": 1.1419611801631209, "y1997": 1.1489271154554144, "y1996": 1.146602624490825, "y1999": 1.1443662376135306, "y1998": 1.1490959392942743, "y2006": 1.1049125811637337, "y2007": 1.1105984164317646, "y2004": 1.1119989015058092, "y2005": 1.1025779214946556, "y2002": 1.1259666377127024, "y2003": 1.1221399558345004, "y2000": 1.144501826035474, "y2001": 1.1234975172649961, "id": 10, "y2008": 1.1050979494645479, "y2009": 1.1002009697391872}, {"neighbors": [8, 13, 18, 17, 2], "y1995": 0.97282462974938089, "y1997": 0.96252588061647382, "y1996": 0.96700147279313231, "y1999": 0.96057686787383312, "y1998": 0.96538780087103548, "y2006": 0.91010201260822066, "y2007": 0.89280392121658247, "y2004": 0.94103988614185807, "y2005": 0.9212251863828258, "y2002": 0.94804194711420009, "y2003": 0.9543028555845573, "y2000": 0.95831051250950716, "y2001": 0.94480908623936988, "id": 11, "y2008": 0.89298242828382146, "y2009": 0.89165384824292859}, {"neighbors": [33, 9, 6, 25, 31], "y1995": 0.94325467991401402, "y1997": 0.96455242154753429, "y1996": 0.96436902092427723, "y1999": 0.94117647058823528, "y1998": 0.95243008993884537, "y2006": 0.9346681464882507, "y2007": 0.94281559150403071, "y2004": 0.96918424441756057, "y2005": 0.94781280876672958, "y2002": 0.95388717527096822, "y2003": 0.94597005193649519, "y2000": 0.94809269652332606, "y2001": 0.93539181553564288, "id": 12, "y2008": 0.965203150896216, "y2009": 0.967154410723015}, {"neighbors": [18, 17, 11, 8, 19], "y1995": 0.97478408425654373, "y1997": 0.98712808751954773, "y1996": 0.98169225257738801, "y1999": 0.985598971191053, "y1998": 0.98474769442356791, "y2006": 0.98416665248276058, "y2007": 0.98423613480079708, "y2004": 0.97399471186978948, "y2005": 0.96910087128357136, "y2002": 0.9820996926750224, "y2003": 0.98776529543110569, "y2000": 0.98687072733199255, "y2001": 0.99237486444837619, "id": 13, "y2008": 0.99823861244053191, "y2009": 0.99545704236827348}, {"neighbors": [4, 31, 3, 29, 12], "y1995": 0.85570268988941878, "y1997": 0.85986131704895119, "y1996": 0.85575915188345031, "y1999": 0.85380119644969055, "y1998": 0.85693406055397725, "y2006": 0.82803647591954255, "y2007": 0.81987360180979219, "y2004": 0.83998883284341452, "y2005": 0.83478547261894065, "y2002": 0.85472102128186755, "y2003": 0.84564834502399988, "y2000": 0.86191535266765262, "y2001": 0.84981450830432048, "id": 14, "y2008": 0.82265395167873867, "y2009": 0.83994039782937002}, {"neighbors": [19, 8, 17, 16, 13], "y1995": 0.87022046646521634, "y1997": 0.85961813213722393, "y1996": 0.85996258309339635, "y1999": 0.8394713575455558, "y1998": 0.85689572413110093, "y2006": 0.94202108334913126, "y2007": 0.94222309998743192, "y2004": 0.86763340229291142, "y2005": 0.89179316746010362, "y2002": 0.86776297543511893, "y2003": 0.86720209304280604, "y2000": 0.82785596604704892, "y2001": 0.86008789452656809, "id": 15, "y2008": 0.93902708112840494, "y2009": 0.94479183757120588}, {"neighbors": [28, 26, 15, 19, 32], "y1995": 0.90134907329491731, "y1997": 0.90403990934606904, "y1996": 0.904077381347274, "y1999": 0.90399237579083946, "y1998": 0.90201769385650832, "y2006": 0.91108803862404764, "y2007": 0.90543476309316473, "y2004": 0.94338264626469681, "y2005": 0.91981795862151561, "y2002": 0.93695966482853577, "y2003": 0.94242697007039, "y2000": 0.90906631602055099, "y2001": 0.92693339421265908, "id": 16, "y2008": 0.91737137682250491, "y2009": 0.94793657442067902}, {"neighbors": [13, 18, 11, 19, 8], "y1995": 1.1977611005602815, "y1997": 1.1843915817489725, "y1996": 1.1822256425225894, "y1999": 1.1928672308275252, "y1998": 1.1826786457339149, "y2006": 1.2392938410349985, "y2007": 1.2341867605077472, "y2004": 1.2385704217423759, "y2005": 1.2441989281116201, "y2002": 1.2262477774195681, "y2003": 1.2239707531714479, "y2000": 1.2017286912636342, "y2001": 1.2132869128474402, "id": 17, "y2008": 1.2362673914436095, "y2009": 1.2675439750795283}, {"neighbors": [13, 17, 11, 8, 19], "y1995": 1.2491967813733067, "y1997": 1.2699116090397236, "y1996": 1.2575477330927329, "y1999": 1.3062566740535762, "y1998": 1.2802065055312271, "y2006": 1.3210776560048689, "y2007": 1.329362443219563, "y2004": 1.3054484140490119, "y2005": 1.3030330249408666, "y2002": 1.3257518058685978, "y2003": 1.3079549159235695, "y2000": 1.3479002255103918, "y2001": 1.3439986302151703, "id": 18, "y2008": 1.3300124123891741, "y2009": 1.3328846185074705}, {"neighbors": [26, 17, 28, 15, 16], "y1995": 1.0676800411188558, "y1997": 1.0363730321443168, "y1996": 1.0379927554499979, "y1999": 1.0329609259280523, "y1998": 1.027684488045026, "y2006": 0.94241549375546196, "y2007": 0.92754546923532677, "y2004": 0.99614160423102482, "y2005": 0.97356208269708677, "y2002": 1.0274762326434594, "y2003": 1.0316273366809443, "y2000": 1.0505901631347052, "y2001": 1.0340505678899605, "id": 19, "y2008": 0.92549226593721745, "y2009": 0.92138101880290568}, {"neighbors": [30, 25, 24, 37, 47], "y1995": 1.0947561397632881, "y1997": 1.1165429913770684, "y1996": 1.1152679554712275, "y1999": 1.1314326394231322, "y1998": 1.1310394841195361, "y2006": 1.1090538904302065, "y2007": 1.1057776900012568, "y2004": 1.1402994437897009, "y2005": 1.1197940058085571, "y2002": 1.133670175399079, "y2003": 1.139822558851451, "y2000": 1.1388962186541665, "y2001": 1.1244221220249986, "id": 20, "y2008": 1.1116682481010467, "y2009": 1.0998515545336902}, {"neighbors": [23, 22, 7, 10, 34], "y1995": 0.76530058421804126, "y1997": 0.76542450966153397, "y1996": 0.76612841163904621, "y1999": 0.76014283909933289, "y1998": 0.7672268310234307, "y2006": 0.76842416021983684, "y2007": 0.77487117798086069, "y2004": 0.76533287692895391, "y2005": 0.78205934309410463, "y2002": 0.76156903267949927, "y2003": 0.76651951668098528, "y2000": 0.74480073263159763, "y2001": 0.76098396210261965, "id": 21, "y2008": 0.77768682781054099, "y2009": 0.78801192267396702}, {"neighbors": [21, 34, 5, 7, 29], "y1995": 0.98391336093764348, "y1997": 0.98295341320156315, "y1996": 0.98075815675295552, "y1999": 0.96913802803963667, "y1998": 0.97386015032669815, "y2006": 0.93965462091114671, "y2007": 0.93069644684632924, "y2004": 0.9635616201227476, "y2005": 0.94745351657235244, "y2002": 0.97209860866113018, "y2003": 0.97441312580606143, "y2000": 0.97370819354423843, "y2001": 0.96419154157867693, "id": 22, "y2008": 0.94020973488297466, "y2009": 0.94358232339833159}, {"neighbors": [21, 10, 22, 34, 7], "y1995": 0.83561828119099946, "y1997": 0.81738501913392403, "y1996": 0.82298088022609361, "y1999": 0.80904800725677739, "y1998": 0.81748588141426259, "y2006": 0.87170334233473346, "y2007": 0.8786379876833581, "y2004": 0.85954307066870839, "y2005": 0.86790023653402792, "y2002": 0.83451612857812574, "y2003": 0.85175031934895873, "y2000": 0.80071489233375537, "y2001": 0.83358255807316928, "id": 23, "y2008": 0.87497981001981484, "y2009": 0.87888675419592222}, {"neighbors": [27, 20, 30, 32, 47], "y1995": 0.98845573274970278, "y1997": 0.99665282989553183, "y1996": 1.0209242772035507, "y1999": 0.99386618594343845, "y1998": 0.99141823200404444, "y2006": 0.97906748937234156, "y2007": 0.9932312332800689, "y2004": 1.0111665058188304, "y2005": 0.9998802359352077, "y2002": 0.99669586934394627, "y2003": 1.0255909749831356, "y2000": 0.98733194819247994, "y2001": 0.99644997431653437, "id": 24, "y2008": 1.0020493856497013, "y2009": 0.99602148231561483}, {"neighbors": [20, 33, 6, 30, 12], "y1995": 1.1493091345649815, "y1997": 1.143009615936718, "y1996": 1.1524194939429724, "y1999": 1.1398468268822266, "y1998": 1.1426554202510555, "y2006": 1.0889107875354573, "y2007": 1.0860369499254896, "y2004": 1.0856975145267398, "y2005": 1.1244348633192611, "y2002": 1.0423089214343333, "y2003": 1.0557727834721793, "y2000": 1.0831239730629278, "y2001": 1.0519262599166714, "id": 25, "y2008": 1.0599731384290745, "y2009": 1.0216094265950888}, {"neighbors": [28, 19, 16, 32, 17], "y1995": 1.1136826889802023, "y1997": 1.1189343096757198, "y1996": 1.1057147027213501, "y1999": 1.1432271991365353, "y1998": 1.1377866945457653, "y2006": 1.1268023587150906, "y2007": 1.1235793669317915, "y2004": 1.1482023546040769, "y2005": 1.1238659840114973, "y2002": 1.1600919581655105, "y2003": 1.1446778932605579, "y2000": 1.1825702862895446, "y2001": 1.1622624279436105, "id": 26, "y2008": 1.115925801617498, "y2009": 1.1257082797404696}, {"neighbors": [32, 24, 36, 16, 28], "y1995": 1.303794309231981, "y1997": 1.3120636604057812, "y1996": 1.3075218596998686, "y1999": 1.3062566740535762, "y1998": 1.3153226688859194, "y2006": 1.2865667454509278, "y2007": 1.2973409698906584, "y2004": 1.2683078569016086, "y2005": 1.2617743046198988, "y2002": 1.2920319347677043, "y2003": 1.2718351646774422, "y2000": 1.3121023910310281, "y2001": 1.2998915587009874, "id": 27, "y2008": 1.2939020510829768, "y2009": 1.2934544564717687}, {"neighbors": [26, 16, 19, 32, 27], "y1995": 0.83953719020532513, "y1997": 0.82006005316292385, "y1996": 0.82701447583159737, "y1999": 0.80294863992835086, "y1998": 0.8118887636743225, "y2006": 0.8389109342655191, "y2007": 0.84349246817602375, "y2004": 0.83108634437662732, "y2005": 0.84373783646216949, "y2002": 0.82596790474192727, "y2003": 0.82435704751379402, "y2000": 0.78772975118465016, "y2001": 0.82848010958278628, "id": 28, "y2008": 0.85637272428125033, "y2009": 0.86539395164519117}, {"neighbors": [5, 39, 22, 14, 31], "y1995": 1.2345008725695852, "y1997": 1.2353793515744536, "y1996": 1.2426021999018138, "y1999": 1.2452262575926329, "y1998": 1.2358129278404693, "y2006": 1.2365329681906834, "y2007": 1.2796200872578414, "y2004": 1.1967443443492951, "y2005": 1.2153657295128597, "y2002": 1.1937780418204111, "y2003": 1.1835533748469893, "y2000": 1.2256766974812463, "y2001": 1.2112664802237314, "id": 29, "y2008": 1.2796839248335934, "y2009": 1.2590773758694083}, {"neighbors": [37, 20, 24, 25, 27], "y1995": 0.97696620404861145, "y1997": 0.98035944080980575, "y1996": 0.9740071914763756, "y1999": 0.95543282313901556, "y1998": 0.97581530789338955, "y2006": 0.92100464312607799, "y2007": 0.9147530387633086, "y2004": 0.9298883479571457, "y2005": 0.93442917452618346, "y2002": 0.93679072759857129, "y2003": 0.92540049332494034, "y2000": 0.96480308308405971, "y2001": 0.9468637634838194, "id": 30, "y2008": 0.90249622070947177, "y2009": 0.90213630440783921}, {"neighbors": [35, 14, 33, 12, 4], "y1995": 0.84986885942491119, "y1997": 0.84295996568390696, "y1996": 0.89868510090623221, "y1999": 0.85659367787716301, "y1998": 0.87280533962476625, "y2006": 0.92562487931452408, "y2007": 0.96635366357254426, "y2004": 0.92698332540482575, "y2005": 0.94745351657235244, "y2002": 0.90448992922937876, "y2003": 0.95495898185605821, "y2000": 0.88937573313051443, "y2001": 0.89440100450887505, "id": 31, "y2008": 1.025203118044723, "y2009": 1.0394296020754366}, {"neighbors": [36, 27, 28, 16, 26], "y1995": 1.0192280751235561, "y1997": 1.0097442843101825, "y1996": 1.0025820319237864, "y1999": 0.99765073314119712, "y1998": 1.0030341681355639, "y2006": 0.94779637858468868, "y2007": 0.93759089358493275, "y2004": 0.97583768316642261, "y2005": 0.96101679691008712, "y2002": 0.99747298060178258, "y2003": 0.99550758543481688, "y2000": 1.0075901875261932, "y2001": 0.99192968437874551, "id": 32, "y2008": 0.93353431146829191, "y2009": 0.94121705123804411}, {"neighbors": [44, 25, 12, 35, 31], "y1995": 0.86367410708901315, "y1997": 0.85544345781923936, "y1996": 0.85558931627900803, "y1999": 0.84336613427334628, "y1998": 0.85103025143102673, "y2006": 0.89455097373003656, "y2007": 0.88283929116469462, "y2004": 0.85951183386707053, "y2005": 0.87194227372077004, "y2002": 0.84667960913556228, "y2003": 0.84374557883664714, "y2000": 0.83434853662160158, "y2001": 0.85813595114434105, "id": 33, "y2008": 0.90349490610221961, "y2009": 0.9060067497610369}, {"neighbors": [22, 39, 21, 29, 23], "y1995": 1.0094753356447226, "y1997": 1.0069881886439402, "y1996": 1.0041105523637666, "y1999": 0.99291086334982948, "y1998": 0.99513686502304577, "y2006": 0.96382634438484593, "y2007": 0.95011400973122428, "y2004": 0.975119236728752, "y2005": 0.96134614808826613, "y2002": 0.99291167539274383, "y2003": 0.98983209318633369, "y2000": 1.0058162611397035, "y2001": 0.98850522230466298, "id": 34, "y2008": 0.94346860300667812, "y2009": 0.9463776450423077}, {"neighbors": [31, 38, 44, 33, 14], "y1995": 1.0571257066143651, "y1997": 1.0575301194645879, "y1996": 1.0545941857842291, "y1999": 1.0510385688532684, "y1998": 1.0488078570498685, "y2006": 1.0247627521629479, "y2007": 1.0234752320591773, "y2004": 1.0329697933620496, "y2005": 1.0219168238570018, "y2002": 1.0420048344203974, "y2003": 1.0402553971511816, "y2000": 1.0480002306104303, "y2001": 1.030249414987729, "id": 35, "y2008": 1.0251768368501768, "y2009": 1.0435957064486703}, {"neighbors": [32, 43, 27, 28, 42], "y1995": 1.070841888164505, "y1997": 1.0793762307014196, "y1996": 1.0666949726007404, "y1999": 1.0794043012481198, "y1998": 1.0738798776109699, "y2006": 1.087727556316465, "y2007": 1.0885954360198933, "y2004": 1.1032213602455734, "y2005": 1.0916793915985508, "y2002": 1.0938347765734742, "y2003": 1.1052447043433509, "y2000": 1.0531800956589803, "y2001": 1.0745277096056161, "id": 36, "y2008": 1.0917733838297285, "y2009": 1.1096083021948762}, {"neighbors": [30, 40, 20, 42, 41], "y1995": 0.8671922185905101, "y1997": 0.86675155621455668, "y1996": 0.86628895935887062, "y1999": 0.86511809486628932, "y1998": 0.86425631732335095, "y2006": 0.84488343470424199, "y2007": 0.83374328958471722, "y2004": 0.84517414191529749, "y2005": 0.84843857600526962, "y2002": 0.85411284725399572, "y2003": 0.84886336375435456, "y2000": 0.86287327291635718, "y2001": 0.8516979624450659, "id": 37, "y2008": 0.82812044014430564, "y2009": 0.82878598934619596}, {"neighbors": [35, 31, 45, 39, 44], "y1995": 0.8838921149583755, "y1997": 0.90282398478743275, "y1996": 0.92288667453925455, "y1999": 0.92023285988219217, "y1998": 0.91229185518735723, "y2006": 0.93869676706720051, "y2007": 0.96947770975097391, "y2004": 0.99223700402629367, "y2005": 0.97984969609868555, "y2002": 0.93682451504456421, "y2003": 0.98655146182882891, "y2000": 0.92652175166361039, "y2001": 0.94278865361566122, "id": 38, "y2008": 1.0036262573224608, "y2009": 0.98102350657197357}, {"neighbors": [29, 34, 38, 22, 35], "y1995": 0.970820642185237, "y1997": 0.94534081352108112, "y1996": 0.95320232993219844, "y1999": 0.93967000034446724, "y1998": 0.94215592860799646, "y2006": 0.91035556215514757, "y2007": 0.90430364292511256, "y2004": 0.92879505989982103, "y2005": 0.9211054223180335, "y2002": 0.93412151936513388, "y2003": 0.93501274320242933, "y2000": 0.93092108910210503, "y2001": 0.92662519262599163, "id": 39, "y2008": 0.89994694483851023, "y2009": 0.9007386435858511}, {"neighbors": [41, 37, 42, 30, 45], "y1995": 0.95861858457245008, "y1997": 0.98254810501535106, "y1996": 0.95774543235102894, "y1999": 0.98684823919808018, "y1998": 0.98919471947721893, "y2006": 0.97163003599581876, "y2007": 0.97007020126757271, "y2004": 0.9493488753775261, "y2005": 0.97152609359561659, "y2002": 0.95601578436851964, "y2003": 0.94905384541254967, "y2000": 0.98882204635713133, "y2001": 0.97662233890759653, "id": 40, "y2008": 0.97158948117089283, "y2009": 0.95884908006927827}, {"neighbors": [40, 45, 44, 37, 42], "y1995": 0.83980438854721107, "y1997": 0.85746999875029983, "y1996": 0.84726737166133714, "y1999": 0.85567509846023126, "y1998": 0.85467221160427542, "y2006": 0.8333891885768886, "y2007": 0.83511679264592342, "y2004": 0.81743586206088703, "y2005": 0.83550405700769481, "y2002": 0.84502402428191115, "y2003": 0.82645665158259707, "y2000": 0.84818516243622177, "y2001": 0.85265681182580899, "id": 41, "y2008": 0.82136617314598481, "y2009": 0.80921873783836296}, {"neighbors": [43, 40, 46, 37, 36], "y1995": 0.95118156405662746, "y1997": 0.94688098462868708, "y1996": 0.9466212002600608, "y1999": 0.95124410099780687, "y1998": 0.95085829660091703, "y2006": 0.96895367966714574, "y2007": 0.9700163384024274, "y2004": 0.97583768316642261, "y2005": 0.95571723704302525, "y2002": 0.96804411514198463, "y2003": 0.97136213864358201, "y2000": 0.95440787445922959, "y2001": 0.96364362764682376, "id": 42, "y2008": 0.97082732652905901, "y2009": 0.9878236640328002}, {"neighbors": [36, 42, 32, 27, 46], "y1995": 1.0891004415267045, "y1997": 1.0849289528525252, "y1996": 1.0824896838138709, "y1999": 1.0945424900391545, "y1998": 1.0865692335830259, "y2006": 1.1450297539219478, "y2007": 1.1447474729339102, "y2004": 1.1334273474293739, "y2005": 1.1468606844516303, "y2002": 1.1229257675733433, "y2003": 1.1302103089739621, "y2000": 1.1055818811158884, "y2001": 1.1214085953998059, "id": 43, "y2008": 1.1408403740471014, "y2009": 1.1614292649793569}, {"neighbors": [33, 41, 45, 35, 40], "y1995": 1.0633603345917013, "y1997": 1.0869149629649646, "y1996": 1.0736582323828732, "y1999": 1.1166986255755473, "y1998": 1.0976484597942771, "y2006": 1.0839806574563229, "y2007": 1.0983176831786272, "y2004": 1.0927882684985315, "y2005": 1.0700320368873319, "y2002": 1.0881584856466706, "y2003": 1.0804431312806149, "y2000": 1.1185670222649935, "y2001": 1.0976428286056732, "id": 44, "y2008": 1.0929823187788443, "y2009": 1.0917612486217978}, {"neighbors": [41, 44, 40, 35, 33], "y1995": 0.79772064970019041, "y1997": 0.7858115114280021, "y1996": 0.78829195801876151, "y1999": 0.77035744221561353, "y1998": 0.77615921755360906, "y2006": 0.79949806580432425, "y2007": 0.80172181625581262, "y2004": 0.79603865293896003, "y2005": 0.78966436120841943, "y2002": 0.81437881076636964, "y2003": 0.80788827809912023, "y2000": 0.77751193519846906, "y2001": 0.79902973574567659, "id": 45, "y2008": 0.82168154748053679, "y2009": 0.85587910681858015}, {"neighbors": [42, 43, 40, 36, 37], "y1995": 1.0052446952315301, "y1997": 1.0047589936197736, "y1996": 1.0000769567582628, "y1999": 1.0063956091903872, "y1998": 1.0061394183885444, "y2006": 0.97292595590233411, "y2007": 0.96519561197191939, "y2004": 0.99030032232474696, "y2005": 0.97682565346267858, "y2002": 1.0081498135355325, "y2003": 1.0057431552702318, "y2000": 1.0016297948675874, "y2001": 0.99860738542320637, "id": 46, "y2008": 0.9617340332161447, "y2009": 0.95890283625473927}, {"neighbors": [20, 6, 24, 25, 30], "y1995": 0.95808418788867844, "y1997": 0.9654440995572009, "y1996": 0.93825679674127938, "y1999": 0.96987289157318213, "y1998": 0.95561201303757848, "y2006": 1.1704973973021624, "y2007": 1.1702515395802287, "y2004": 1.0533361880299275, "y2005": 1.0983262971945267, "y2002": 1.0078119390756035, "y2003": 1.0348423554112989, "y2000": 0.96608031008233231, "y2001": 0.99727184521431422, "id": 47, "y2008": 1.1873055260044207, "y2009": 1.1424264534188653}] diff --git a/release/python/0.4.1/crankshaft/test/helper.py b/release/python/0.4.1/crankshaft/test/helper.py new file mode 100644 index 0000000..7d28b94 --- /dev/null +++ b/release/python/0.4.1/crankshaft/test/helper.py @@ -0,0 +1,13 @@ +import unittest + +from mock_plpy import MockPlPy +plpy = MockPlPy() + +import sys +sys.modules['plpy'] = plpy + +import os + +def fixture_file(name): + dir = os.path.dirname(os.path.realpath(__file__)) + return os.path.join(dir, 'fixtures', name) diff --git a/release/python/0.4.1/crankshaft/test/mock_plpy.py b/release/python/0.4.1/crankshaft/test/mock_plpy.py new file mode 100644 index 0000000..a982ebe --- /dev/null +++ b/release/python/0.4.1/crankshaft/test/mock_plpy.py @@ -0,0 +1,52 @@ +import re + +class MockCursor: + def __init__(self, data): + self.cursor_pos = 0 + self.data = data + + def fetch(self, batch_size): + batch = self.data[self.cursor_pos : self.cursor_pos + batch_size] + self.cursor_pos += batch_size + return batch + + +class MockPlPy: + def __init__(self): + self._reset() + + def _reset(self): + self.infos = [] + self.notices = [] + self.debugs = [] + self.logs = [] + self.warnings = [] + self.errors = [] + self.fatals = [] + self.executes = [] + self.results = [] + self.prepares = [] + self.results = [] + + def _define_result(self, query, result): + pattern = re.compile(query, re.IGNORECASE | re.MULTILINE) + self.results.append([pattern, result]) + + def notice(self, msg): + self.notices.append(msg) + + def debug(self, msg): + self.notices.append(msg) + + def info(self, msg): + self.infos.append(msg) + + def cursor(self, query): + data = self.execute(query) + return MockCursor(data) + + def execute(self, query): # TODO: additional arguments + for result in self.results: + if result[0].match(query): + return result[1] + return [] diff --git a/release/python/0.4.1/crankshaft/test/test_cluster_kmeans.py b/release/python/0.4.1/crankshaft/test/test_cluster_kmeans.py new file mode 100644 index 0000000..aba8e07 --- /dev/null +++ b/release/python/0.4.1/crankshaft/test/test_cluster_kmeans.py @@ -0,0 +1,38 @@ +import unittest +import numpy as np + + +# from mock_plpy import MockPlPy +# plpy = MockPlPy() +# +# import sys +# sys.modules['plpy'] = plpy +from helper import plpy, fixture_file +import numpy as np +import crankshaft.clustering as cc +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds +import json + +class KMeansTest(unittest.TestCase): + """Testing class for Moran's I functions""" + + def setUp(self): + plpy._reset() + self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read()) + self.params = {"subquery": "select * from table", + "no_clusters": "10" + } + + def test_kmeans(self): + data = self.cluster_data + plpy._define_result('select' ,data) + clusters = cc.kmeans('subquery', 2) + labels = [a[1] for a in clusters] + c1 = [a for a in clusters if a[1]==0] + c2 = [a for a in clusters if a[1]==1] + + self.assertEqual(len(np.unique(labels)),2) + self.assertEqual(len(c1),20) + self.assertEqual(len(c2),20) + diff --git a/release/python/0.4.1/crankshaft/test/test_clustering_moran.py b/release/python/0.4.1/crankshaft/test/test_clustering_moran.py new file mode 100644 index 0000000..2b683cf --- /dev/null +++ b/release/python/0.4.1/crankshaft/test/test_clustering_moran.py @@ -0,0 +1,88 @@ +import unittest +import numpy as np + + +# from mock_plpy import MockPlPy +# plpy = MockPlPy() +# +# import sys +# sys.modules['plpy'] = plpy +from helper import plpy, fixture_file + +import crankshaft.clustering as cc +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds +import json + +class MoranTest(unittest.TestCase): + """Testing class for Moran's I functions""" + + def setUp(self): + plpy._reset() + self.params = {"id_col": "cartodb_id", + "attr1": "andy", + "attr2": "jay_z", + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + self.params_markov = {"id_col": "cartodb_id", + "time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"], + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + self.neighbors_data = json.loads(open(fixture_file('neighbors.json')).read()) + self.moran_data = json.loads(open(fixture_file('moran.json')).read()) + + def test_map_quads(self): + """Test map_quads""" + self.assertEqual(cc.map_quads(1), 'HH') + self.assertEqual(cc.map_quads(2), 'LH') + self.assertEqual(cc.map_quads(3), 'LL') + self.assertEqual(cc.map_quads(4), 'HL') + self.assertEqual(cc.map_quads(33), None) + self.assertEqual(cc.map_quads('andy'), None) + + def test_quad_position(self): + """Test lisa_sig_vals""" + + quads = np.array([1, 2, 3, 4], np.int) + + ans = np.array(['HH', 'LH', 'LL', 'HL']) + test_ans = cc.quad_position(quads) + + self.assertTrue((test_ans == ans).all()) + + def test_moran_local(self): + """Test Moran's I local""" + data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) + result = cc.moran_local('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') + result = [(row[0], row[1]) for row in result] + expected = self.moran_data + for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): + self.assertAlmostEqual(res_val, exp_val) + self.assertEqual(res_quad, exp_quad) + + def test_moran_local_rate(self): + """Test Moran's I rate""" + data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) + result = cc.moran_local_rate('subquery', 'numerator', 'denominator', 'knn', 5, 99, 'the_geom', 'cartodb_id') + print 'result == None? ', result == None + result = [(row[0], row[1]) for row in result] + expected = self.moran_data + for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): + self.assertAlmostEqual(res_val, exp_val) + + def test_moran(self): + """Test Moran's I global""" + data = [{ 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1235) + result = cc.moran('table', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') + print 'result == None?', result == None + result_moran = result[0][0] + expected_moran = np.array([row[0] for row in self.moran_data]).mean() + self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2) diff --git a/release/python/0.4.1/crankshaft/test/test_pysal_utils.py b/release/python/0.4.1/crankshaft/test/test_pysal_utils.py new file mode 100644 index 0000000..171fdbc --- /dev/null +++ b/release/python/0.4.1/crankshaft/test/test_pysal_utils.py @@ -0,0 +1,142 @@ +import unittest + +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds + + +class PysalUtilsTest(unittest.TestCase): + """Testing class for utility functions related to PySAL integrations""" + + def setUp(self): + self.params = {"id_col": "cartodb_id", + "attr1": "andy", + "attr2": "jay_z", + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + + self.params_array = {"id_col": "cartodb_id", + "time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"], + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + + def test_query_attr_select(self): + """Test query_attr_select""" + + ans = "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " + + ans_array = "i.\"_2013_dec\"::numeric As attr1, " \ + "i.\"_2014_jan\"::numeric As attr2, " \ + "i.\"_2014_feb\"::numeric As attr3, " + + self.assertEqual(pu.query_attr_select(self.params), ans) + self.assertEqual(pu.query_attr_select(self.params_array), ans_array) + + def test_query_attr_where(self): + """Test pu.query_attr_where""" + + ans = "idx_replace.\"andy\" IS NOT NULL AND " \ + "idx_replace.\"jay_z\" IS NOT NULL AND " \ + "idx_replace.\"jay_z\" <> 0" + + ans_array = "idx_replace.\"_2013_dec\" IS NOT NULL AND " \ + "idx_replace.\"_2014_jan\" IS NOT NULL AND " \ + "idx_replace.\"_2014_feb\" IS NOT NULL" + + self.assertEqual(pu.query_attr_where(self.params), ans) + self.assertEqual(pu.query_attr_where(self.params_array), ans_array) + + def test_knn(self): + """Test knn neighbors constructor""" + + ans = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE " \ + "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ + "j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL AND " \ + "j.\"jay_z\" <> 0 " \ + "ORDER BY " \ + "j.\"the_geom\" <-> i.\"the_geom\" ASC " \ + "LIMIT 321)) As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" + + ans_array = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"_2013_dec\"::numeric As attr1, " \ + "i.\"_2014_jan\"::numeric As attr2, " \ + "i.\"_2014_feb\"::numeric As attr3, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ + "j.\"_2013_dec\" IS NOT NULL AND " \ + "j.\"_2014_jan\" IS NOT NULL AND " \ + "j.\"_2014_feb\" IS NOT NULL " \ + "ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC " \ + "LIMIT 321)) As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"_2013_dec\" IS NOT NULL AND " \ + "i.\"_2014_jan\" IS NOT NULL AND " \ + "i.\"_2014_feb\" IS NOT NULL "\ + "ORDER BY i.\"cartodb_id\" ASC;" + + self.assertEqual(pu.knn(self.params), ans) + self.assertEqual(pu.knn(self.params_array), ans_array) + + def test_queen(self): + """Test queen neighbors constructor""" + + ans = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE " \ + "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ + "ST_Touches(i.\"the_geom\", " \ + "j.\"the_geom\") AND " \ + "j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL AND " \ + "j.\"jay_z\" <> 0)" \ + ") As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" + + self.assertEqual(pu.queen(self.params), ans) + + def test_construct_neighbor_query(self): + """Test construct_neighbor_query""" + + # Compare to raw knn query + self.assertEqual(pu.construct_neighbor_query('knn', self.params), + pu.knn(self.params)) + + def test_get_attributes(self): + """Test get_attributes""" + + ## need to add tests + + self.assertEqual(True, True) + + def test_get_weight(self): + """Test get_weight""" + + self.assertEqual(True, True) + + def test_empty_zipped_array(self): + """Test empty_zipped_array""" + ans2 = [(None, None)] + ans4 = [(None, None, None, None)] + self.assertEqual(pu.empty_zipped_array(2), ans2) + self.assertEqual(pu.empty_zipped_array(4), ans4) diff --git a/release/python/0.4.1/crankshaft/test/test_segmentation.py b/release/python/0.4.1/crankshaft/test/test_segmentation.py new file mode 100644 index 0000000..d02e8b1 --- /dev/null +++ b/release/python/0.4.1/crankshaft/test/test_segmentation.py @@ -0,0 +1,64 @@ +import unittest +import numpy as np +from helper import plpy, fixture_file +import crankshaft.segmentation as segmentation +import json + +class SegmentationTest(unittest.TestCase): + """Testing class for Moran's I functions""" + + def setUp(self): + plpy._reset() + + def generate_random_data(self,n_samples,random_state, row_type=False): + x1 = random_state.uniform(size=n_samples) + x2 = random_state.uniform(size=n_samples) + x3 = random_state.randint(0, 4, size=n_samples) + + y = x1+x2*x2+x3 + cartodb_id = range(len(x1)) + + if row_type: + return [ {'features': vals} for vals in zip(x1,x2,x3)], y + else: + return [dict( zip(['x1','x2','x3','target', 'cartodb_id'],[x1,x2,x3,y,cartodb_id]))] + + def test_replace_nan_with_mean(self): + test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan]) + + def test_create_and_predict_segment(self): + n_samples = 1000 + + random_state_train = np.random.RandomState(13) + random_state_test = np.random.RandomState(134) + training_data = self.generate_random_data(n_samples, random_state_train) + test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True) + + + ids = [{'cartodb_ids': range(len(test_data))}] + rows = [{'x1': 0,'x2':0,'x3':0,'y':0,'cartodb_id':0}] + + plpy._define_result('select \* from \(select \* from training\) a limit 1',rows) + plpy._define_result('.*from \(select \* from training\) as a' ,training_data) + plpy._define_result('select array_agg\(cartodb\_id order by cartodb\_id\) as cartodb_ids from \(.*\) a',ids) + plpy._define_result('.*select \* from test.*' ,test_data) + + model_parameters = {'n_estimators': 1200, + 'max_depth': 3, + 'subsample' : 0.5, + 'learning_rate': 0.01, + 'min_samples_leaf': 1} + + result = segmentation.create_and_predict_segment( + 'select * from training', + 'target', + 'select * from test', + model_parameters) + + prediction = [r[1] for r in result] + + accuracy =np.sqrt(np.mean( np.square( np.array(prediction) - np.array(test_y)))) + + self.assertEqual(len(result),len(test_data)) + self.assertTrue( result[0][2] < 0.01) + self.assertTrue( accuracy < 0.5*np.mean(test_y) ) diff --git a/release/python/0.4.1/crankshaft/test/test_space_time_dynamics.py b/release/python/0.4.1/crankshaft/test/test_space_time_dynamics.py new file mode 100644 index 0000000..54ffc9d --- /dev/null +++ b/release/python/0.4.1/crankshaft/test/test_space_time_dynamics.py @@ -0,0 +1,324 @@ +import unittest +import numpy as np + +import unittest + + +# from mock_plpy import MockPlPy +# plpy = MockPlPy() +# +# import sys +# sys.modules['plpy'] = plpy +from helper import plpy, fixture_file + +import crankshaft.space_time_dynamics as std +from crankshaft import random_seeds +import json + +class SpaceTimeTests(unittest.TestCase): + """Testing class for Markov Functions.""" + + def setUp(self): + plpy._reset() + self.params = {"id_col": "cartodb_id", + "time_cols": ['dec_2013', 'jan_2014', 'feb_2014'], + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + self.neighbors_data = json.loads(open(fixture_file('neighbors_markov.json')).read()) + self.markov_data = json.loads(open(fixture_file('markov.json')).read()) + + self.time_data = np.array([i * np.ones(10, dtype=float) for i in range(10)]).T + + self.transition_matrix = np.array([ + [[ 0.96341463, 0.0304878 , 0.00609756, 0. , 0. ], + [ 0.06040268, 0.83221477, 0.10738255, 0. , 0. ], + [ 0. , 0.14 , 0.74 , 0.12 , 0. ], + [ 0. , 0.03571429, 0.32142857, 0.57142857, 0.07142857], + [ 0. , 0. , 0. , 0.16666667, 0.83333333]], + [[ 0.79831933, 0.16806723, 0.03361345, 0. , 0. ], + [ 0.0754717 , 0.88207547, 0.04245283, 0. , 0. ], + [ 0.00537634, 0.06989247, 0.8655914 , 0.05913978, 0. ], + [ 0. , 0. , 0.06372549, 0.90196078, 0.03431373], + [ 0. , 0. , 0. , 0.19444444, 0.80555556]], + [[ 0.84693878, 0.15306122, 0. , 0. , 0. ], + [ 0.08133971, 0.78947368, 0.1291866 , 0. , 0. ], + [ 0.00518135, 0.0984456 , 0.79274611, 0.0984456 , 0.00518135], + [ 0. , 0. , 0.09411765, 0.87058824, 0.03529412], + [ 0. , 0. , 0. , 0.10204082, 0.89795918]], + [[ 0.8852459 , 0.09836066, 0. , 0.01639344, 0. ], + [ 0.03875969, 0.81395349, 0.13953488, 0. , 0.00775194], + [ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ], + [ 0. , 0.02339181, 0.12865497, 0.75438596, 0.09356725], + [ 0. , 0. , 0. , 0.09661836, 0.90338164]], + [[ 0.33333333, 0.66666667, 0. , 0. , 0. ], + [ 0.0483871 , 0.77419355, 0.16129032, 0.01612903, 0. ], + [ 0.01149425, 0.16091954, 0.74712644, 0.08045977, 0. ], + [ 0. , 0.01036269, 0.06217617, 0.89637306, 0.03108808], + [ 0. , 0. , 0. , 0.02352941, 0.97647059]]] + ) + + def test_spatial_markov(self): + """Test Spatial Markov.""" + data = [ { 'id': d['id'], + 'attr1': d['y1995'], + 'attr2': d['y1996'], + 'attr3': d['y1997'], + 'attr4': d['y1998'], + 'attr5': d['y1999'], + 'attr6': d['y2000'], + 'attr7': d['y2001'], + 'attr8': d['y2002'], + 'attr9': d['y2003'], + 'attr10': d['y2004'], + 'attr11': d['y2005'], + 'attr12': d['y2006'], + 'attr13': d['y2007'], + 'attr14': d['y2008'], + 'attr15': d['y2009'], + 'neighbors': d['neighbors'] } for d in self.neighbors_data] + print(str(data[0])) + plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) + + result = std.spatial_markov_trend('subquery', ['y1995', 'y1996', 'y1997', 'y1998', 'y1999', 'y2000', 'y2001', 'y2002', 'y2003', 'y2004', 'y2005', 'y2006', 'y2007', 'y2008', 'y2009'], 5, 'knn', 5, 0, 'the_geom', 'cartodb_id') + + self.assertTrue(result != None) + result = [(row[0], row[1], row[2], row[3], row[4]) for row in result] + print result[0] + expected = self.markov_data + for ([res_trend, res_up, res_down, res_vol, res_id], + [exp_trend, exp_up, exp_down, exp_vol, exp_id] + ) in zip(result, expected): + self.assertAlmostEqual(res_trend, exp_trend) + + def test_get_time_data(self): + """Test get_time_data""" + data = [ { 'attr1': d['y1995'], + 'attr2': d['y1996'], + 'attr3': d['y1997'], + 'attr4': d['y1998'], + 'attr5': d['y1999'], + 'attr6': d['y2000'], + 'attr7': d['y2001'], + 'attr8': d['y2002'], + 'attr9': d['y2003'], + 'attr10': d['y2004'], + 'attr11': d['y2005'], + 'attr12': d['y2006'], + 'attr13': d['y2007'], + 'attr14': d['y2008'], + 'attr15': d['y2009'] } for d in self.neighbors_data] + + result = std.get_time_data(data, ['y1995', 'y1996', 'y1997', 'y1998', 'y1999', 'y2000', 'y2001', 'y2002', 'y2003', 'y2004', 'y2005', 'y2006', 'y2007', 'y2008', 'y2009']) + + ## expected was prepared from PySAL example: + ### f = ps.open(ps.examples.get_path("usjoin.csv")) + ### pci = np.array([f.by_col[str(y)] for y in range(1995, 2010)]).transpose() + ### rpci = pci / (pci.mean(axis = 0)) + + expected = np.array([[ 0.87654416, 0.863147, 0.85637567, 0.84811668, 0.8446154, 0.83271652 + , 0.83786314, 0.85012593, 0.85509656, 0.86416612, 0.87119375, 0.86302631 + , 0.86148267, 0.86252252, 0.86746356], + [ 0.9188951, 0.91757931, 0.92333258, 0.92517289, 0.92552388, 0.90746978 + , 0.89830489, 0.89431991, 0.88924794, 0.89815176, 0.91832091, 0.91706054 + , 0.90139505, 0.87897455, 0.86216858], + [ 0.82591007, 0.82548596, 0.81989793, 0.81503235, 0.81731522, 0.78964559 + , 0.80584442, 0.8084998, 0.82258551, 0.82668196, 0.82373724, 0.81814804 + , 0.83675961, 0.83574199, 0.84647177], + [ 1.09088176, 1.08537689, 1.08456418, 1.08415404, 1.09898841, 1.14506948 + , 1.12151133, 1.11160697, 1.10888621, 1.11399806, 1.12168029, 1.13164797 + , 1.12958508, 1.11371818, 1.09936775], + [ 1.10731446, 1.11373944, 1.13283638, 1.14472559, 1.15910025, 1.16898201 + , 1.17212488, 1.14752303, 1.11843284, 1.11024964, 1.11943471, 1.11736468 + , 1.10863242, 1.09642516, 1.07762337], + [ 1.42269757, 1.42118434, 1.44273502, 1.43577571, 1.44400684, 1.44184737 + , 1.44782832, 1.41978227, 1.39092208, 1.4059372, 1.40788646, 1.44052766 + , 1.45241216, 1.43306098, 1.4174431 ], + [ 1.13073885, 1.13110513, 1.11074708, 1.13364636, 1.13088149, 1.10888138 + , 1.11856629, 1.13062931, 1.11944984, 1.12446239, 1.11671008, 1.10880034 + , 1.08401709, 1.06959206, 1.07875225], + [ 1.04706124, 1.04516831, 1.04253372, 1.03239987, 1.02072545, 0.99854316 + , 0.9880258, 0.99669587, 0.99327676, 1.01400905, 1.03176742, 1.040511 + , 1.01749645, 0.9936394, 0.98279746], + [ 0.98996986, 1.00143564, 0.99491, 1.00188408, 1.00455845, 0.99127006 + , 0.97925917, 0.9683482, 0.95335147, 0.93694787, 0.94308213, 0.92232874 + , 0.91284091, 0.89689833, 0.88928858], + [ 0.87418391, 0.86416601, 0.84425695, 0.8404494, 0.83903044, 0.8578708 + , 0.86036185, 0.86107306, 0.8500772, 0.86981998, 0.86837929, 0.87204141 + , 0.86633032, 0.84946077, 0.83287146], + [ 1.14196118, 1.14660262, 1.14892712, 1.14909594, 1.14436624, 1.14450183 + , 1.12349752, 1.12596664, 1.12213996, 1.1119989, 1.10257792, 1.10491258 + , 1.11059842, 1.10509795, 1.10020097], + [ 0.97282463, 0.96700147, 0.96252588, 0.9653878, 0.96057687, 0.95831051 + , 0.94480909, 0.94804195, 0.95430286, 0.94103989, 0.92122519, 0.91010201 + , 0.89280392, 0.89298243, 0.89165385], + [ 0.94325468, 0.96436902, 0.96455242, 0.95243009, 0.94117647, 0.9480927 + , 0.93539182, 0.95388718, 0.94597005, 0.96918424, 0.94781281, 0.93466815 + , 0.94281559, 0.96520315, 0.96715441], + [ 0.97478408, 0.98169225, 0.98712809, 0.98474769, 0.98559897, 0.98687073 + , 0.99237486, 0.98209969, 0.9877653, 0.97399471, 0.96910087, 0.98416665 + , 0.98423613, 0.99823861, 0.99545704], + [ 0.85570269, 0.85575915, 0.85986132, 0.85693406, 0.8538012, 0.86191535 + , 0.84981451, 0.85472102, 0.84564835, 0.83998883, 0.83478547, 0.82803648 + , 0.8198736, 0.82265395, 0.8399404 ], + [ 0.87022047, 0.85996258, 0.85961813, 0.85689572, 0.83947136, 0.82785597 + , 0.86008789, 0.86776298, 0.86720209, 0.8676334, 0.89179317, 0.94202108 + , 0.9422231, 0.93902708, 0.94479184], + [ 0.90134907, 0.90407738, 0.90403991, 0.90201769, 0.90399238, 0.90906632 + , 0.92693339, 0.93695966, 0.94242697, 0.94338265, 0.91981796, 0.91108804 + , 0.90543476, 0.91737138, 0.94793657], + [ 1.1977611, 1.18222564, 1.18439158, 1.18267865, 1.19286723, 1.20172869 + , 1.21328691, 1.22624778, 1.22397075, 1.23857042, 1.24419893, 1.23929384 + , 1.23418676, 1.23626739, 1.26754398], + [ 1.24919678, 1.25754773, 1.26991161, 1.28020651, 1.30625667, 1.34790023 + , 1.34399863, 1.32575181, 1.30795492, 1.30544841, 1.30303302, 1.32107766 + , 1.32936244, 1.33001241, 1.33288462], + [ 1.06768004, 1.03799276, 1.03637303, 1.02768449, 1.03296093, 1.05059016 + , 1.03405057, 1.02747623, 1.03162734, 0.9961416, 0.97356208, 0.94241549 + , 0.92754547, 0.92549227, 0.92138102], + [ 1.09475614, 1.11526796, 1.11654299, 1.13103948, 1.13143264, 1.13889622 + , 1.12442212, 1.13367018, 1.13982256, 1.14029944, 1.11979401, 1.10905389 + , 1.10577769, 1.11166825, 1.09985155], + [ 0.76530058, 0.76612841, 0.76542451, 0.76722683, 0.76014284, 0.74480073 + , 0.76098396, 0.76156903, 0.76651952, 0.76533288, 0.78205934, 0.76842416 + , 0.77487118, 0.77768683, 0.78801192], + [ 0.98391336, 0.98075816, 0.98295341, 0.97386015, 0.96913803, 0.97370819 + , 0.96419154, 0.97209861, 0.97441313, 0.96356162, 0.94745352, 0.93965462 + , 0.93069645, 0.94020973, 0.94358232], + [ 0.83561828, 0.82298088, 0.81738502, 0.81748588, 0.80904801, 0.80071489 + , 0.83358256, 0.83451613, 0.85175032, 0.85954307, 0.86790024, 0.87170334 + , 0.87863799, 0.87497981, 0.87888675], + [ 0.98845573, 1.02092428, 0.99665283, 0.99141823, 0.99386619, 0.98733195 + , 0.99644997, 0.99669587, 1.02559097, 1.01116651, 0.99988024, 0.97906749 + , 0.99323123, 1.00204939, 0.99602148], + [ 1.14930913, 1.15241949, 1.14300962, 1.14265542, 1.13984683, 1.08312397 + , 1.05192626, 1.04230892, 1.05577278, 1.08569751, 1.12443486, 1.08891079 + , 1.08603695, 1.05997314, 1.02160943], + [ 1.11368269, 1.1057147, 1.11893431, 1.13778669, 1.1432272, 1.18257029 + , 1.16226243, 1.16009196, 1.14467789, 1.14820235, 1.12386598, 1.12680236 + , 1.12357937, 1.1159258, 1.12570828], + [ 1.30379431, 1.30752186, 1.31206366, 1.31532267, 1.30625667, 1.31210239 + , 1.29989156, 1.29203193, 1.27183516, 1.26830786, 1.2617743, 1.28656675 + , 1.29734097, 1.29390205, 1.29345446], + [ 0.83953719, 0.82701448, 0.82006005, 0.81188876, 0.80294864, 0.78772975 + , 0.82848011, 0.8259679, 0.82435705, 0.83108634, 0.84373784, 0.83891093 + , 0.84349247, 0.85637272, 0.86539395], + [ 1.23450087, 1.2426022, 1.23537935, 1.23581293, 1.24522626, 1.2256767 + , 1.21126648, 1.19377804, 1.18355337, 1.19674434, 1.21536573, 1.23653297 + , 1.27962009, 1.27968392, 1.25907738], + [ 0.9769662, 0.97400719, 0.98035944, 0.97581531, 0.95543282, 0.96480308 + , 0.94686376, 0.93679073, 0.92540049, 0.92988835, 0.93442917, 0.92100464 + , 0.91475304, 0.90249622, 0.9021363 ], + [ 0.84986886, 0.8986851, 0.84295997, 0.87280534, 0.85659368, 0.88937573 + , 0.894401, 0.90448993, 0.95495898, 0.92698333, 0.94745352, 0.92562488 + , 0.96635366, 1.02520312, 1.0394296 ], + [ 1.01922808, 1.00258203, 1.00974428, 1.00303417, 0.99765073, 1.00759019 + , 0.99192968, 0.99747298, 0.99550759, 0.97583768, 0.9610168, 0.94779638 + , 0.93759089, 0.93353431, 0.94121705], + [ 0.86367411, 0.85558932, 0.85544346, 0.85103025, 0.84336613, 0.83434854 + , 0.85813595, 0.84667961, 0.84374558, 0.85951183, 0.87194227, 0.89455097 + , 0.88283929, 0.90349491, 0.90600675], + [ 1.00947534, 1.00411055, 1.00698819, 0.99513687, 0.99291086, 1.00581626 + , 0.98850522, 0.99291168, 0.98983209, 0.97511924, 0.96134615, 0.96382634 + , 0.95011401, 0.9434686, 0.94637765], + [ 1.05712571, 1.05459419, 1.05753012, 1.04880786, 1.05103857, 1.04800023 + , 1.03024941, 1.04200483, 1.0402554, 1.03296979, 1.02191682, 1.02476275 + , 1.02347523, 1.02517684, 1.04359571], + [ 1.07084189, 1.06669497, 1.07937623, 1.07387988, 1.0794043, 1.0531801 + , 1.07452771, 1.09383478, 1.1052447, 1.10322136, 1.09167939, 1.08772756 + , 1.08859544, 1.09177338, 1.1096083 ], + [ 0.86719222, 0.86628896, 0.86675156, 0.86425632, 0.86511809, 0.86287327 + , 0.85169796, 0.85411285, 0.84886336, 0.84517414, 0.84843858, 0.84488343 + , 0.83374329, 0.82812044, 0.82878599], + [ 0.88389211, 0.92288667, 0.90282398, 0.91229186, 0.92023286, 0.92652175 + , 0.94278865, 0.93682452, 0.98655146, 0.992237, 0.9798497, 0.93869677 + , 0.96947771, 1.00362626, 0.98102351], + [ 0.97082064, 0.95320233, 0.94534081, 0.94215593, 0.93967, 0.93092109 + , 0.92662519, 0.93412152, 0.93501274, 0.92879506, 0.92110542, 0.91035556 + , 0.90430364, 0.89994694, 0.90073864], + [ 0.95861858, 0.95774543, 0.98254811, 0.98919472, 0.98684824, 0.98882205 + , 0.97662234, 0.95601578, 0.94905385, 0.94934888, 0.97152609, 0.97163004 + , 0.9700702, 0.97158948, 0.95884908], + [ 0.83980439, 0.84726737, 0.85747, 0.85467221, 0.8556751, 0.84818516 + , 0.85265681, 0.84502402, 0.82645665, 0.81743586, 0.83550406, 0.83338919 + , 0.83511679, 0.82136617, 0.80921874], + [ 0.95118156, 0.9466212, 0.94688098, 0.9508583, 0.9512441, 0.95440787 + , 0.96364363, 0.96804412, 0.97136214, 0.97583768, 0.95571724, 0.96895368 + , 0.97001634, 0.97082733, 0.98782366], + [ 1.08910044, 1.08248968, 1.08492895, 1.08656923, 1.09454249, 1.10558188 + , 1.1214086, 1.12292577, 1.13021031, 1.13342735, 1.14686068, 1.14502975 + , 1.14474747, 1.14084037, 1.16142926], + [ 1.06336033, 1.07365823, 1.08691496, 1.09764846, 1.11669863, 1.11856702 + , 1.09764283, 1.08815849, 1.08044313, 1.09278827, 1.07003204, 1.08398066 + , 1.09831768, 1.09298232, 1.09176125], + [ 0.79772065, 0.78829196, 0.78581151, 0.77615922, 0.77035744, 0.77751194 + , 0.79902974, 0.81437881, 0.80788828, 0.79603865, 0.78966436, 0.79949807 + , 0.80172182, 0.82168155, 0.85587911], + [ 1.0052447, 1.00007696, 1.00475899, 1.00613942, 1.00639561, 1.00162979 + , 0.99860739, 1.00814981, 1.00574316, 0.99030032, 0.97682565, 0.97292596 + , 0.96519561, 0.96173403, 0.95890284], + [ 0.95808419, 0.9382568, 0.9654441, 0.95561201, 0.96987289, 0.96608031 + , 0.99727185, 1.00781194, 1.03484236, 1.05333619, 1.0983263, 1.1704974 + , 1.17025154, 1.18730553, 1.14242645]]) + + self.assertTrue(np.allclose(result, expected)) + self.assertTrue(type(result) == type(expected)) + self.assertTrue(result.shape == expected.shape) + + def test_rebin_data(self): + """Test rebin_data""" + ## sample in double the time (even case since 10 % 2 = 0): + ## (0+1)/2, (2+3)/2, (4+5)/2, (6+7)/2, (8+9)/2 + ## = 0.5, 2.5, 4.5, 6.5, 8.5 + ans_even = np.array([(i + 0.5) * np.ones(10, dtype=float) + for i in range(0, 10, 2)]).T + + self.assertTrue(np.array_equal(std.rebin_data(self.time_data, 2), ans_even)) + + ## sample in triple the time (uneven since 10 % 3 = 1): + ## (0+1+2)/3, (3+4+5)/3, (6+7+8)/3, (9)/1 + ## = 1, 4, 7, 9 + ans_odd = np.array([i * np.ones(10, dtype=float) + for i in (1, 4, 7, 9)]).T + self.assertTrue(np.array_equal(std.rebin_data(self.time_data, 3), ans_odd)) + + def test_get_prob_dist(self): + """Test get_prob_dist""" + lag_indices = np.array([1, 2, 3, 4]) + unit_indices = np.array([1, 3, 2, 4]) + answer = np.array([ + [ 0.0754717 , 0.88207547, 0.04245283, 0. , 0. ], + [ 0. , 0. , 0.09411765, 0.87058824, 0.03529412], + [ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ], + [ 0. , 0. , 0. , 0.02352941, 0.97647059] + ]) + result = std.get_prob_dist(self.transition_matrix, lag_indices, unit_indices) + + self.assertTrue(np.array_equal(result, answer)) + + def test_get_prob_stats(self): + """Test get_prob_stats""" + + probs = np.array([ + [ 0.0754717 , 0.88207547, 0.04245283, 0. , 0. ], + [ 0. , 0. , 0.09411765, 0.87058824, 0.03529412], + [ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ], + [ 0. , 0. , 0. , 0.02352941, 0.97647059] + ]) + unit_indices = np.array([1, 3, 2, 4]) + answer_up = np.array([0.04245283, 0.03529412, 0.12376238, 0.]) + answer_down = np.array([0.0754717, 0.09411765, 0.0990099, 0.02352941]) + answer_trend = np.array([-0.03301887 / 0.88207547, -0.05882353 / 0.87058824, 0.02475248 / 0.77722772, -0.02352941 / 0.97647059]) + answer_volatility = np.array([ 0.34221495, 0.33705421, 0.29226542, 0.38834223]) + + result = std.get_prob_stats(probs, unit_indices) + result_up = result[0] + result_down = result[1] + result_trend = result[2] + result_volatility = result[3] + + self.assertTrue(np.allclose(result_up, answer_up)) + self.assertTrue(np.allclose(result_down, answer_down)) + self.assertTrue(np.allclose(result_trend, answer_trend)) + self.assertTrue(np.allclose(result_volatility, answer_volatility)) diff --git a/src/pg/crankshaft.control b/src/pg/crankshaft.control index 7df077c..b3992a6 100644 --- a/src/pg/crankshaft.control +++ b/src/pg/crankshaft.control @@ -1,5 +1,5 @@ comment = 'CartoDB Spatial Analysis extension' -default_version = '0.4.0' +default_version = '0.4.1' requires = 'plpythonu, postgis' superuser = true schema = cdb_crankshaft From 2ed9479e8f833b78667e21ae2cfb1b9689a24bf7 Mon Sep 17 00:00:00 2001 From: Carla Date: Wed, 21 Sep 2016 18:23:27 +0200 Subject: [PATCH 56/62] Typos in function names --- doc/02_moran.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/02_moran.md b/doc/02_moran.md index 2d9ae99..e83c2f1 100644 --- a/doc/02_moran.md +++ b/doc/02_moran.md @@ -37,7 +37,7 @@ SELECT aoi.quads, aoi.significance, c.num_cyclists_per_total_population -FROM CDB_GetAreasOfInterestLocal('SELECT * FROM commute_data' +FROM CDB_AreasOfInterestLocal('SELECT * FROM commute_data' 'num_cyclists_per_total_population') As aoi JOIN commute_data As c ON c.cartodb_id = aoi.rowid; @@ -113,7 +113,7 @@ SELECT aoi.quads, aoi.significance, c.cyclists_per_total_population -FROM CDB_GetAreasOfInterestLocalRate('SELECT * FROM commute_data' +FROM CDB_AreasOfInterestLocalRate('SELECT * FROM commute_data' 'num_cyclists', 'total_population') As aoi JOIN commute_data As c From 24b9cda5aa00fc18e51954d15404abd90d2481a8 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 21 Sep 2016 12:55:21 -0400 Subject: [PATCH 57/62] import correct function from python lib --- src/pg/sql/10_moran.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql index 3be31a2..070392d 100644 --- a/src/pg/sql/10_moran.sql +++ b/src/pg/sql/10_moran.sql @@ -10,7 +10,7 @@ CREATE OR REPLACE FUNCTION id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE (moran NUMERIC, significance NUMERIC) AS $$ - from crankshaft.clustering import moran_local + from crankshaft.clustering import moran # TODO: use named parameters or a dictionary return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) $$ LANGUAGE plpythonu; From dc150e693697b276e8a12ec8b0a05dac00b9ca98 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 21 Sep 2016 12:55:58 -0400 Subject: [PATCH 58/62] add test for moran global --- src/pg/test/expected/02_moran_test.out | 6 ++++++ src/pg/test/sql/02_moran_test.sql | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/src/pg/test/expected/02_moran_test.out b/src/pg/test/expected/02_moran_test.out index 3ba5bfd..7897622 100644 --- a/src/pg/test/expected/02_moran_test.out +++ b/src/pg/test/expected/02_moran_test.out @@ -5,6 +5,12 @@ SET client_min_messages TO WARNING; \set ECHO none _cdb_random_seeds +(1 row) +moran|significance +0.3399|-0.0196 +(1 row) +_cdb_random_seeds + (1 row) code|quads 01|HH diff --git a/src/pg/test/sql/02_moran_test.sql b/src/pg/test/sql/02_moran_test.sql index 5545a4a..d2cfaef 100644 --- a/src/pg/test/sql/02_moran_test.sql +++ b/src/pg/test/sql/02_moran_test.sql @@ -6,6 +6,14 @@ -- Areas of Interest functions perform some nondeterministic computations -- (to estimate the significance); we will set the seeds for the RNGs -- that affect those results to have repeateble results + +-- Moran's I Global +SELECT cdb_crankshaft._cdb_random_seeds(1234); + +SELECT round(moran, 4) As moran, round(significance, 4) As significance + FROM cdb_crankshaft.CDB_AreasOfInterestGlobal('SELECT * FROM ppoints', 'value') m(moran, significance); + +-- Moran's I Local SELECT cdb_crankshaft._cdb_random_seeds(1234); SELECT ppoints.code, m.quads From e924abbaccb14b10c5b43e910595448b96e210bd Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 21 Sep 2016 13:09:42 -0400 Subject: [PATCH 59/62] formatted for pep8 --- .../crankshaft/crankshaft/clustering/moran.py | 23 +++++---- .../crankshaft/pysal_utils/pysal_utils.py | 37 +++++++++----- .../crankshaft/test/test_clustering_moran.py | 48 +++++++++++++------ 3 files changed, 73 insertions(+), 35 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 7a97233..4e7086e 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -14,6 +14,7 @@ import crankshaft.pysal_utils as pu # High level interface --------------------------------------- + def moran(subquery, attr_name, w_type, num_ngbrs, permutations, geom_col, id_col): """ @@ -39,18 +40,19 @@ def moran(subquery, attr_name, plpy.error('Analysis failed: %s' % e) return pu.empty_zipped_array(2) - ## collect attributes + # collect attributes attr_vals = pu.get_attributes(result) - ## calculate weights + # calculate weights weight = pu.get_weight(result, w_type, num_ngbrs) - ## calculate moran global + # calculate moran global moran_global = ps.esda.moran.Moran(attr_vals, weight, permutations=permutations) return zip([moran_global.I], [moran_global.EI]) + def moran_local(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col): """ @@ -90,6 +92,7 @@ def moran_local(subquery, attr, return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + def moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col): """ @@ -114,18 +117,19 @@ def moran_rate(subquery, numerator, denominator, plpy.error('Analysis failed: %s' % e) return pu.empty_zipped_array(2) - ## collect attributes + # collect attributes numer = pu.get_attributes(result, 1) denom = pu.get_attributes(result, 2) weight = pu.get_weight(result, w_type, num_ngbrs) - ## calculate moran global rate + # calculate moran global rate lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight, permutations=permutations) return zip([lisa_rate.I], [lisa_rate.EI]) + def moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col): """ @@ -153,7 +157,7 @@ def moran_local_rate(subquery, numerator, denominator, plpy.error('Analysis failed: %s' % e) return pu.empty_zipped_array(5) - ## collect attributes + # collect attributes numer = pu.get_attributes(result, 1) denom = pu.get_attributes(result, 2) @@ -168,6 +172,7 @@ def moran_local_rate(subquery, numerator, denominator, return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + def moran_local_bv(subquery, attr1, attr2, permutations, geom_col, id_col, w_type, num_ngbrs): """ @@ -189,11 +194,11 @@ def moran_local_bv(subquery, attr1, attr2, if len(result) == 0: return pu.empty_zipped_array(4) except plpy.SPIError: - plpy.error("Error: areas of interest query failed, " \ + plpy.error("Error: areas of interest query failed, " "check input parameters") return pu.empty_zipped_array(4) - ## collect attributes + # collect attributes attr1_vals = pu.get_attributes(result, 1) attr2_vals = pu.get_attributes(result, 2) @@ -211,6 +216,7 @@ def moran_local_bv(subquery, attr1, attr2, # Low level functions ---------------------------------------- + def map_quads(coord): """ Map a quadrant number to Moran's I designation @@ -231,6 +237,7 @@ def map_quads(coord): else: return None + def quad_position(quads): """ Produce Moran's I classification based of n diff --git a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py index 4622925..6dbcbd8 100644 --- a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py +++ b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py @@ -6,6 +6,7 @@ import numpy as np import pysal as ps + def construct_neighbor_query(w_type, query_vals): """Return query (a string) used for finding neighbors @param w_type text: type of neighbors to calculate ('knn' or 'queen') @@ -17,7 +18,8 @@ def construct_neighbor_query(w_type, query_vals): else: return queen(query_vals) -## Build weight object + +# Build weight object def get_weight(query_res, w_type='knn', num_ngbrs=5): """ Construct PySAL weight from return value of query @@ -39,6 +41,7 @@ def get_weight(query_res, w_type='knn', num_ngbrs=5): return built_weight + def query_attr_select(params): """ Create portion of SELECT statement for attributes inolved in query. @@ -50,21 +53,24 @@ def query_attr_select(params): template = "i.\"%(col)s\"::numeric As attr%(alias_num)s, " if 'time_cols' in params: - ## if markov analysis + # if markov analysis attrs = params['time_cols'] for idx, val in enumerate(attrs): attr_string += template % {"col": val, "alias_num": idx + 1} else: - ## if moran's analysis + # if moran's analysis attrs = [k for k in params - if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs', 'subquery')] + if k not in ('id_col', 'geom_col', 'subquery', + 'num_ngbrs', 'subquery')] for idx, val in enumerate(sorted(attrs)): - attr_string += template % {"col": params[val], "alias_num": idx + 1} + attr_string += template % {"col": params[val], + "alias_num": idx + 1} return attr_string + def query_attr_where(params): """ Construct where conditions when building neighbors query @@ -74,7 +80,8 @@ def query_attr_where(params): 'numerator': 'data1', 'denominator': 'data2', '': ...} - Output: 'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL' + Output: 'idx_replace."data1" IS NOT NULL AND idx_replace."data2" + IS NOT NULL' Input: {'subquery': ..., 'time_cols': ['time1', 'time2', 'time3'], @@ -86,17 +93,18 @@ def query_attr_where(params): template = "idx_replace.\"%s\" IS NOT NULL" if 'time_cols' in params: - ## markov where clauses + # markov where clauses attrs = params['time_cols'] # add values to template for attr in attrs: attr_string.append(template % attr) else: - ## moran where clauses + # moran where clauses # get keys attrs = sorted([k for k in params - if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs', 'subquery')]) + if k not in ('id_col', 'geom_col', 'subquery', + 'num_ngbrs', 'subquery')]) # add values to template for attr in attrs: attr_string.append(template % params[attr]) @@ -108,6 +116,7 @@ def query_attr_where(params): return out + def knn(params): """SQL query for k-nearest neighbors. @param vars: dict of values to fill template @@ -139,7 +148,8 @@ def knn(params): return query.format(**params) -## SQL query for finding queens neighbors (all contiguous polygons) + +# SQL query for finding queens neighbors (all contiguous polygons) def queen(params): """SQL query for queen neighbors. @param params dict: information to fill query @@ -167,14 +177,17 @@ def queen(params): return query.format(**params) -## to add more weight methods open a ticket or pull request +# to add more weight methods open a ticket or pull request + def get_attributes(query_res, attr_num=1): """ @param query_res: query results with attributes and neighbors @param attr_num: attribute number (1, 2, ...) """ - return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float) + return np.array([x['attr' + str(attr_num)] for x in query_res], + dtype=np.float) + def empty_zipped_array(num_nones): """ diff --git a/src/py/crankshaft/test/test_clustering_moran.py b/src/py/crankshaft/test/test_clustering_moran.py index 2b683cf..cb54902 100644 --- a/src/py/crankshaft/test/test_clustering_moran.py +++ b/src/py/crankshaft/test/test_clustering_moran.py @@ -14,6 +14,7 @@ import crankshaft.pysal_utils as pu from crankshaft import random_seeds import json + class MoranTest(unittest.TestCase): """Testing class for Moran's I functions""" @@ -26,12 +27,15 @@ class MoranTest(unittest.TestCase): "geom_col": "the_geom", "num_ngbrs": 321} self.params_markov = {"id_col": "cartodb_id", - "time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"], + "time_cols": ["_2013_dec", "_2014_jan", + "_2014_feb"], "subquery": "SELECT * FROM a_list", "geom_col": "the_geom", "num_ngbrs": 321} - self.neighbors_data = json.loads(open(fixture_file('neighbors.json')).read()) - self.moran_data = json.loads(open(fixture_file('moran.json')).read()) + self.neighbors_data = json.loads( + open(fixture_file('neighbors.json')).read()) + self.moran_data = json.loads( + open(fixture_file('moran.json')).read()) def test_map_quads(self): """Test map_quads""" @@ -54,35 +58,49 @@ class MoranTest(unittest.TestCase): def test_moran_local(self): """Test Moran's I local""" - data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] + data = [{'id': d['id'], + 'attr1': d['value'], + 'neighbors': d['neighbors']} for d in self.neighbors_data] + plpy._define_result('select', data) random_seeds.set_random_seeds(1234) - result = cc.moran_local('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') + result = cc.moran_local('subquery', 'value', + 'knn', 5, 99, 'the_geom', 'cartodb_id') result = [(row[0], row[1]) for row in result] - expected = self.moran_data - for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): + zipped_values = zip(result, self.moran_data) + + for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values: self.assertAlmostEqual(res_val, exp_val) self.assertEqual(res_quad, exp_quad) def test_moran_local_rate(self): """Test Moran's I rate""" - data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data] + data = [{'id': d['id'], + 'attr1': d['value'], + 'attr2': 1, + 'neighbors': d['neighbors']} for d in self.neighbors_data] + plpy._define_result('select', data) random_seeds.set_random_seeds(1234) - result = cc.moran_local_rate('subquery', 'numerator', 'denominator', 'knn', 5, 99, 'the_geom', 'cartodb_id') - print 'result == None? ', result == None + result = cc.moran_local_rate('subquery', 'numerator', 'denominator', + 'knn', 5, 99, 'the_geom', 'cartodb_id') result = [(row[0], row[1]) for row in result] - expected = self.moran_data - for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): + + zipped_values = zip(result, self.moran_data) + + for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values: self.assertAlmostEqual(res_val, exp_val) def test_moran(self): """Test Moran's I global""" - data = [{ 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] + data = [{'id': d['id'], + 'attr1': d['value'], + 'neighbors': d['neighbors']} for d in self.neighbors_data] plpy._define_result('select', data) random_seeds.set_random_seeds(1235) - result = cc.moran('table', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') - print 'result == None?', result == None + result = cc.moran('table', 'value', + 'knn', 5, 99, 'the_geom', 'cartodb_id') + result_moran = result[0][0] expected_moran = np.array([row[0] for row in self.moran_data]).mean() self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2) From 375f765531fb93518135cec70613536fba337da0 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 21 Sep 2016 15:57:49 -0400 Subject: [PATCH 60/62] more pep8 updates --- .../crankshaft/pysal_utils/pysal_utils.py | 66 ++++++++++--------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py index 6dbcbd8..358270f 100644 --- a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py +++ b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py @@ -50,7 +50,7 @@ def query_attr_select(params): """ attr_string = "" - template = "i.\"%(col)s\"::numeric As attr%(alias_num)s, " + template = 'i."%(col)s"::numeric As attr%(alias_num)s, ' if 'time_cols' in params: # if markov analysis @@ -90,7 +90,7 @@ def query_attr_where(params): NULL AND idx_replace."time3" IS NOT NULL' """ attr_string = [] - template = "idx_replace.\"%s\" IS NOT NULL" + template = 'idx_replace."%s" IS NOT NULL' if 'time_cols' in params: # markov where clauses @@ -110,7 +110,7 @@ def query_attr_where(params): attr_string.append(template % params[attr]) if len(attrs) == 2: - attr_string.append("idx_replace.\"%s\" <> 0" % params[attrs[1]]) + attr_string.append('idx_replace."%s" <> 0' % params[attrs[1]]) out = " AND ".join(attr_string) @@ -129,22 +129,23 @@ def knn(params): "attr_where_i": attr_where.replace("idx_replace", "i"), "attr_where_j": attr_where.replace("idx_replace", "j")} - query = "SELECT " \ - "i.\"{id_col}\" As id, " \ - "%(attr_select)s" \ - "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ - "FROM ({subquery}) As j " \ - "WHERE " \ - "i.\"{id_col}\" <> j.\"{id_col}\" AND " \ - "%(attr_where_j)s " \ - "ORDER BY " \ - "j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \ - "LIMIT {num_ngbrs})" \ - ") As neighbors " \ - "FROM ({subquery}) As i " \ - "WHERE " \ - "%(attr_where_i)s " \ - "ORDER BY i.\"{id_col}\" ASC;" % replacements + query = ''' + SELECT + i."{id_col}" As id, + %(attr_select)s + (SELECT ARRAY(SELECT j."{id_col}" + FROM ({subquery}) As j + WHERE + i."{id_col}" <> j."{id_col}" AND + %(attr_where_j)s + ORDER BY + j."{geom_col}" <-> i."{geom_col}" ASC + LIMIT {num_ngbrs}) + ) As neighbors + FROM ({subquery}) As i + WHERE %(attr_where_i)s + ORDER BY i."{id_col}" ASC; + ''' % replacements return query.format(**params) @@ -161,19 +162,20 @@ def queen(params): "attr_where_i": attr_where.replace("idx_replace", "i"), "attr_where_j": attr_where.replace("idx_replace", "j")} - query = "SELECT " \ - "i.\"{id_col}\" As id, " \ - "%(attr_select)s" \ - "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ - "FROM ({subquery}) As j " \ - "WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \ - "ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \ - "%(attr_where_j)s)" \ - ") As neighbors " \ - "FROM ({subquery}) As i " \ - "WHERE " \ - "%(attr_where_i)s " \ - "ORDER BY i.\"{id_col}\" ASC;" % replacements + query = ''' + SELECT + i."{id_col}" As id, + %(attr_select)s + (SELECT ARRAY(SELECT j."{id_col}" + FROM ({subquery}) As j + WHERE i."{id_col}" <> j."{id_col}" AND + ST_Touches(i."{geom_col}", j."{geom_col}") AND + %(attr_where_j)s + ) As neighbors + FROM ({subquery}) As i + WHERE %(attr_where_i)s + ORDER BY i."{id_col}" ASC; + ''' % replacements return query.format(**params) From c52eb507ead71a2acc7e44dd79e7e941de43cd5b Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 21 Sep 2016 16:43:30 -0400 Subject: [PATCH 61/62] revert to old query styles even though it breaks pep8 --- .../crankshaft/pysal_utils/pysal_utils.py | 66 +++++++++---------- 1 file changed, 32 insertions(+), 34 deletions(-) diff --git a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py index 358270f..6dbcbd8 100644 --- a/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py +++ b/src/py/crankshaft/crankshaft/pysal_utils/pysal_utils.py @@ -50,7 +50,7 @@ def query_attr_select(params): """ attr_string = "" - template = 'i."%(col)s"::numeric As attr%(alias_num)s, ' + template = "i.\"%(col)s\"::numeric As attr%(alias_num)s, " if 'time_cols' in params: # if markov analysis @@ -90,7 +90,7 @@ def query_attr_where(params): NULL AND idx_replace."time3" IS NOT NULL' """ attr_string = [] - template = 'idx_replace."%s" IS NOT NULL' + template = "idx_replace.\"%s\" IS NOT NULL" if 'time_cols' in params: # markov where clauses @@ -110,7 +110,7 @@ def query_attr_where(params): attr_string.append(template % params[attr]) if len(attrs) == 2: - attr_string.append('idx_replace."%s" <> 0' % params[attrs[1]]) + attr_string.append("idx_replace.\"%s\" <> 0" % params[attrs[1]]) out = " AND ".join(attr_string) @@ -129,23 +129,22 @@ def knn(params): "attr_where_i": attr_where.replace("idx_replace", "i"), "attr_where_j": attr_where.replace("idx_replace", "j")} - query = ''' - SELECT - i."{id_col}" As id, - %(attr_select)s - (SELECT ARRAY(SELECT j."{id_col}" - FROM ({subquery}) As j - WHERE - i."{id_col}" <> j."{id_col}" AND - %(attr_where_j)s - ORDER BY - j."{geom_col}" <-> i."{geom_col}" ASC - LIMIT {num_ngbrs}) - ) As neighbors - FROM ({subquery}) As i - WHERE %(attr_where_i)s - ORDER BY i."{id_col}" ASC; - ''' % replacements + query = "SELECT " \ + "i.\"{id_col}\" As id, " \ + "%(attr_select)s" \ + "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ + "FROM ({subquery}) As j " \ + "WHERE " \ + "i.\"{id_col}\" <> j.\"{id_col}\" AND " \ + "%(attr_where_j)s " \ + "ORDER BY " \ + "j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \ + "LIMIT {num_ngbrs})" \ + ") As neighbors " \ + "FROM ({subquery}) As i " \ + "WHERE " \ + "%(attr_where_i)s " \ + "ORDER BY i.\"{id_col}\" ASC;" % replacements return query.format(**params) @@ -162,20 +161,19 @@ def queen(params): "attr_where_i": attr_where.replace("idx_replace", "i"), "attr_where_j": attr_where.replace("idx_replace", "j")} - query = ''' - SELECT - i."{id_col}" As id, - %(attr_select)s - (SELECT ARRAY(SELECT j."{id_col}" - FROM ({subquery}) As j - WHERE i."{id_col}" <> j."{id_col}" AND - ST_Touches(i."{geom_col}", j."{geom_col}") AND - %(attr_where_j)s - ) As neighbors - FROM ({subquery}) As i - WHERE %(attr_where_i)s - ORDER BY i."{id_col}" ASC; - ''' % replacements + query = "SELECT " \ + "i.\"{id_col}\" As id, " \ + "%(attr_select)s" \ + "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ + "FROM ({subquery}) As j " \ + "WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \ + "ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \ + "%(attr_where_j)s)" \ + ") As neighbors " \ + "FROM ({subquery}) As i " \ + "WHERE " \ + "%(attr_where_i)s " \ + "ORDER BY i.\"{id_col}\" ASC;" % replacements return query.format(**params) From b68f1c53b681e73dce8f81906d819bb6d3ad8618 Mon Sep 17 00:00:00 2001 From: Carla Iriberri Date: Thu, 22 Sep 2016 11:11:58 +0200 Subject: [PATCH 62/62] Release 0.4.2 --- NEWS.md | 4 + release/crankshaft--0.4.1--0.4.2.sql | 1965 +++++++++++++++++ release/crankshaft--0.4.2.sql | 1965 +++++++++++++++++ release/crankshaft.control | 2 +- .../0.4.2/crankshaft/crankshaft/__init__.py | 5 + .../crankshaft/clustering/__init__.py | 3 + .../crankshaft/clustering/kmeans.py | 18 + .../crankshaft/crankshaft/clustering/moran.py | 250 +++ .../crankshaft/pysal_utils/__init__.py | 2 + .../crankshaft/pysal_utils/pysal_utils.py | 201 ++ .../crankshaft/crankshaft/random_seeds.py | 11 + .../crankshaft/segmentation/__init__.py | 1 + .../crankshaft/segmentation/segmentation.py | 176 ++ .../space_time_dynamics/__init__.py | 2 + .../crankshaft/space_time_dynamics/markov.py | 189 ++ release/python/0.4.2/crankshaft/setup.py | 49 + release/python/0.4.2/crankshaft/setup.py-r | 49 + .../crankshaft/test/fixtures/kmeans.json | 1 + .../crankshaft/test/fixtures/markov.json | 1 + .../0.4.2/crankshaft/test/fixtures/moran.json | 52 + .../crankshaft/test/fixtures/neighbors.json | 54 + .../test/fixtures/neighbors_markov.json | 1 + .../python/0.4.2/crankshaft/test/helper.py | 13 + .../python/0.4.2/crankshaft/test/mock_plpy.py | 52 + .../crankshaft/test/test_cluster_kmeans.py | 38 + .../crankshaft/test/test_clustering_moran.py | 106 + .../0.4.2/crankshaft/test/test_pysal_utils.py | 142 ++ .../crankshaft/test/test_segmentation.py | 64 + .../test/test_space_time_dynamics.py | 324 +++ src/pg/crankshaft.control | 2 +- 30 files changed, 5740 insertions(+), 2 deletions(-) create mode 100644 release/crankshaft--0.4.1--0.4.2.sql create mode 100644 release/crankshaft--0.4.2.sql create mode 100644 release/python/0.4.2/crankshaft/crankshaft/__init__.py create mode 100644 release/python/0.4.2/crankshaft/crankshaft/clustering/__init__.py create mode 100644 release/python/0.4.2/crankshaft/crankshaft/clustering/kmeans.py create mode 100644 release/python/0.4.2/crankshaft/crankshaft/clustering/moran.py create mode 100644 release/python/0.4.2/crankshaft/crankshaft/pysal_utils/__init__.py create mode 100644 release/python/0.4.2/crankshaft/crankshaft/pysal_utils/pysal_utils.py create mode 100644 release/python/0.4.2/crankshaft/crankshaft/random_seeds.py create mode 100644 release/python/0.4.2/crankshaft/crankshaft/segmentation/__init__.py create mode 100644 release/python/0.4.2/crankshaft/crankshaft/segmentation/segmentation.py create mode 100644 release/python/0.4.2/crankshaft/crankshaft/space_time_dynamics/__init__.py create mode 100644 release/python/0.4.2/crankshaft/crankshaft/space_time_dynamics/markov.py create mode 100644 release/python/0.4.2/crankshaft/setup.py create mode 100644 release/python/0.4.2/crankshaft/setup.py-r create mode 100644 release/python/0.4.2/crankshaft/test/fixtures/kmeans.json create mode 100644 release/python/0.4.2/crankshaft/test/fixtures/markov.json create mode 100644 release/python/0.4.2/crankshaft/test/fixtures/moran.json create mode 100644 release/python/0.4.2/crankshaft/test/fixtures/neighbors.json create mode 100644 release/python/0.4.2/crankshaft/test/fixtures/neighbors_markov.json create mode 100644 release/python/0.4.2/crankshaft/test/helper.py create mode 100644 release/python/0.4.2/crankshaft/test/mock_plpy.py create mode 100644 release/python/0.4.2/crankshaft/test/test_cluster_kmeans.py create mode 100644 release/python/0.4.2/crankshaft/test/test_clustering_moran.py create mode 100644 release/python/0.4.2/crankshaft/test/test_pysal_utils.py create mode 100644 release/python/0.4.2/crankshaft/test/test_segmentation.py create mode 100644 release/python/0.4.2/crankshaft/test/test_space_time_dynamics.py diff --git a/NEWS.md b/NEWS.md index 1769902..db04a2a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +0.4.2 (2016-09-22) +------------------ +* Bugfix for cdb_areasofinterestglobal: import correct modules + 0.4.1 (2016-09-21) ------------------ * Let the user set the resolution in CDB_Contour function diff --git a/release/crankshaft--0.4.1--0.4.2.sql b/release/crankshaft--0.4.1--0.4.2.sql new file mode 100644 index 0000000..4c68ffc --- /dev/null +++ b/release/crankshaft--0.4.1--0.4.2.sql @@ -0,0 +1,1965 @@ +--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit +-- Version number of the extension release +CREATE OR REPLACE FUNCTION cdb_crankshaft_version() +RETURNS text AS $$ + SELECT '0.4.2'::text; +$$ language 'sql' STABLE STRICT; + +-- Internal identifier of the installed extension instence +-- e.g. 'dev' for current development version +CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version() +RETURNS text AS $$ + SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL; +$$ language 'sql' STABLE STRICT; +-- Internal function. +-- Set the seeds of the RNGs (Random Number Generators) +-- used internally. +CREATE OR REPLACE FUNCTION +_cdb_random_seeds (seed_value INTEGER) RETURNS VOID +AS $$ + from crankshaft import random_seeds + random_seeds.set_random_seeds(seed_value) +$$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION + CDB_PyAggS(current_state Numeric[], current_row Numeric[]) + returns NUMERIC[] as $$ + BEGIN + if array_upper(current_state,1) is null then + RAISE NOTICE 'setting state %',array_upper(current_row,1); + current_state[1] = array_upper(current_row,1); + end if; + return array_cat(current_state,current_row) ; + END + $$ LANGUAGE plpgsql; + +-- Create aggregate if it did not exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'cdb_crankshaft' + AND p.proname = 'cdb_pyagg' + AND p.proisagg) + THEN + CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) ( + SFUNC = CDB_PyAggS, + STYPE = Numeric[], + INITCOND = "{}" + ); + END IF; +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION + CDB_CreateAndPredictSegment( + target NUMERIC[], + features NUMERIC[], + target_features NUMERIC[], + target_ids NUMERIC[], + n_estimators INTEGER DEFAULT 1200, + max_depth INTEGER DEFAULT 3, + subsample DOUBLE PRECISION DEFAULT 0.5, + learning_rate DOUBLE PRECISION DEFAULT 0.01, + min_samples_leaf INTEGER DEFAULT 1) +RETURNS TABLE(cartodb_id NUMERIC, prediction NUMERIC, accuracy NUMERIC) +AS $$ + import numpy as np + import plpy + + from crankshaft.segmentation import create_and_predict_segment_agg + model_params = {'n_estimators': n_estimators, + 'max_depth': max_depth, + 'subsample': subsample, + 'learning_rate': learning_rate, + 'min_samples_leaf': min_samples_leaf} + + def unpack2D(data): + dimension = data.pop(0) + a = np.array(data, dtype=float) + return a.reshape(len(a)/dimension, dimension) + + return create_and_predict_segment_agg(np.array(target, dtype=float), + unpack2D(features), + unpack2D(target_features), + target_ids, + model_params) + +$$ LANGUAGE plpythonu; + +CREATE OR REPLACE FUNCTION + CDB_CreateAndPredictSegment ( + query TEXT, + variable_name TEXT, + target_table TEXT, + n_estimators INTEGER DEFAULT 1200, + max_depth INTEGER DEFAULT 3, + subsample DOUBLE PRECISION DEFAULT 0.5, + learning_rate DOUBLE PRECISION DEFAULT 0.01, + min_samples_leaf INTEGER DEFAULT 1) +RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC) +AS $$ + from crankshaft.segmentation import create_and_predict_segment + model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf} + return create_and_predict_segment(query,variable_name,target_table, model_params) +$$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN target_query text, + IN weight_column text, + IN source_query text, + IN pop_column text, + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_id bigint[]; + t_geom geometry[]; + t_weight numeric[]; + s_id bigint[]; + s_geom geometry[]; + s_pop numeric[]; +BEGIN + EXECUTE 'WITH foo as('+target_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || weight_column || ') FROM foo' INTO t_id, t_geom, t_weight; + EXECUTE 'WITH foo as('+source_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || pop_column || ') FROM foo' INTO s_id, s_geom, s_pop; + RETURN QUERY + SELECT g.* FROM t, s, CDB_Gravity(t_id, t_geom, t_weight, s_id, s_geom, s_pop, target, radius, minval) g; +END; +$$ language plpgsql; + +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN t_id bigint[], + IN t_geom geometry[], + IN t_weight numeric[], + IN s_id bigint[], + IN s_geom geometry[], + IN s_pop numeric[], + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_type text; + s_type text; + t_center geometry[]; + s_center geometry[]; +BEGIN + t_type := GeometryType(t_geom[1]); + s_type := GeometryType(s_geom[1]); + IF t_type = 'POINT' THEN + t_center := t_geom; + ELSE + WITH tmp as (SELECT unnest(t_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO t_center FROM tmp; + END IF; + IF s_type = 'POINT' THEN + s_center := s_geom; + ELSE + WITH tmp as (SELECT unnest(s_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO s_center FROM tmp; + END IF; + RETURN QUERY + with target0 as( + SELECT unnest(t_center) as tc, unnest(t_weight) as tw, unnest(t_id) as td + ), + source0 as( + SELECT unnest(s_center) as sc, unnest(s_id) as sd, unnest (s_geom) as sg, unnest(s_pop) as sp + ), + prev0 as( + SELECT + source0.sg, + source0.sd as sourc_id, + coalesce(source0.sp,0) as sp, + target.td as targ_id, + coalesce(target.tw,0) as tw, + GREATEST(1.0,ST_Distance(geography(target.tc), geography(source0.sc)))::numeric as distance + FROM source0 + CROSS JOIN LATERAL + ( + SELECT + * + FROM target0 + WHERE tw > minval + AND ST_DWithin(geography(source0.sc), geography(tc), radius) + ) AS target + ), + deno as( + SELECT + sourc_id, + sum(tw/distance) as h_deno + FROM + prev0 + GROUP BY sourc_id + ) + SELECT + p.sg as the_geom, + p.sourc_id as source_id, + p.targ_id as target_id, + case when p.distance > 1 then p.distance else 0.0 end as dist, + 100*(p.tw/p.distance)/d.h_deno as h, + p.sp*(p.tw/p.distance)/d.h_deno as hpop + FROM + prev0 p, + deno d + WHERE + p.targ_id = target AND + p.sourc_id = d.sourc_id; +END; +$$ language plpgsql; +-- 0: nearest neighbor(s) +-- 1: barymetric +-- 2: IDW +-- 3: krigin ---> TO DO + + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN query text, + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + output numeric; +BEGIN + EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs; + SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a; + + RETURN output; +END; +$$ +language plpgsql IMMUTABLE; + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN geomin geometry[], + IN colin numeric[], + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + gs2 geometry[]; + vs2 numeric[]; + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + -- output := -999.999; + + -- nearest neighbors + -- p1: limit the number of neighbors, 0-> closest one + IF method = 0 THEN + + IF p1 = 0 THEN + p1 := 1; + END IF; + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v), + b as (SELECT a.v as v FROM a ORDER BY point<->a.g LIMIT p1::integer) + SELECT avg(b.v) INTO output FROM b; + RETURN output; + + -- barymetric + ELSIF method = 1 THEN + WITH a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b), + d as (SELECT v FROM c WHERE ST_Within(point, v)) + SELECT v INTO g FROM d; + IF g is null THEN + -- out of the realm of the input data + RETURN -888.888; + END IF; + -- vertex of the selected cell + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg); + RETURN output; + + -- IDW + -- p1: limit the number of neighbors, 0->no limit + -- p2: order of distance decay, 0-> order 1 + ELSIF method = 2 THEN + + IF p2 = 0 THEN + p2 := 1; + END IF; + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v), + b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g) + SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b; + IF p1::integer>0 THEN + gs2:=gs; + vs2:=vs; + FOR i IN 1..p1 + LOOP + gs2 := gs2 || gs[i]; + vs2 := vs2 || vs[i]; + END LOOP; + ELSE + gs2:=gs; + vs2:=vs; + END IF; + + WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v), + b as ( + SELECT + (1/ST_distance(point, a.g)^p2::integer) as k, + (a.v/ST_distance(point, a.g)^p2::integer) as f + FROM a + ) + SELECT sum(b.f)/sum(b.k) INTO output FROM b; + RETURN output; + + -- krigin + ELSIF method = 3 THEN + + -- TO DO + + END IF; + + RETURN -777.777; + +END; +$$ +language plpgsql IMMUTABLE; +-- ============================================================================================= +-- +-- CDB_Voronoi +-- +-- ============================================================================================= +CREATE OR REPLACE FUNCTION CDB_voronoi( + IN geomin geometry[], + IN buffer numeric DEFAULT 0.5, + IN tolerance numeric DEFAULT 1e-9 + ) +RETURNS geometry AS $$ +DECLARE + geomout geometry; +BEGIN + -- we need to make the geometry calculations in (pseudo)meters!!! + with a as ( + SELECT unnest(geomin) as g1 + ), + b as( + SELECT st_transform(g1, 3857) g2 from a + ) + SELECT array_agg(g2) INTO geomin from b; + + WITH + convexhull_1 as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ (st_area(ST_ConvexHull(ST_Collect(geomin)))/PI()) as r + ), + clipper as( + SELECT + st_buffer(ST_MinimumBoundingCircle(a.g), buffer*a.r) as g + FROM convexhull_1 a + ), + env0 as ( + SELECT + (st_dumppoints(st_expand(a.g, buffer*a.r))).geom as e + FROM convexhull_1 a + ), + env as ( + SELECT + array_agg(env0.e) as e + FROM env0 + ), + sample AS ( + SELECT + ST_Collect(geomin || env.e) as geom + FROM env + ), + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as cg + ), + tin as ( + SELECT + ST_Dump(ST_DelaunayTriangles(geom, tolerance, 0)) as gd + FROM + sample + ), + tin_polygons as ( + SELECT + (gd).Path as id, + (gd).Geom as pg, + ST_Centroid(ST_MinimumBoundingCircle((gd).Geom, 180)) as ct + FROM tin + ), + tin_lines as ( + SELECT + id, + ST_ExteriorRing(pg) as lg + FROM tin_polygons + ), + tin_nodes as ( + SELECT + id, + ST_PointN(lg,1) p1, + ST_PointN(lg,2) p2, + ST_PointN(lg,3) p3 + FROM tin_lines + ), + tin_edges AS ( + SELECT + p.id, + UNNEST(ARRAY[ + ST_MakeLine(n.p1,n.p2) , + ST_MakeLine(n.p2,n.p3) , + ST_MakeLine(n.p3,n.p1)]) as Edge, + ST_Force2D(cdb_crankshaft._Find_Circle(n.p1,n.p2,n.p3)) as ct, + CASE WHEN st_distance(p.ct, ST_ExteriorRing(p.pg)) < tolerance THEN + TRUE + ELSE FALSE END AS ctx, + p.pg, + ST_within(p.ct, convexhull.cg) as ctin + FROM + tin_polygons p, + tin_nodes n, + convexhull + WHERE p.id = n.id + ), + voro_nodes as ( + SELECT + CASE WHEN x.ctx = TRUE THEN + ST_Centroid(x.edge) + ELSE + x.ct + END as xct, + CASE WHEN y.id is null THEN + CASE WHEN x.ctin = TRUE THEN + ST_SetSRID(ST_MakePoint( + ST_X(x.ct) + ((ST_X(ST_Centroid(x.edge)) - ST_X(x.ct)) * (1+buffer)), + ST_Y(x.ct) + ((ST_Y(ST_Centroid(x.edge)) - ST_Y(x.ct)) * (1+buffer)) + ), ST_SRID(x.ct)) + END + ELSE + y.ct + END as yct + FROM + tin_edges x + LEFT OUTER JOIN + tin_edges y + ON x.id <> y.id AND ST_Equals(x.edge, y.edge) + ), + voro_edges as( + SELECT + ST_LineMerge(ST_Collect(ST_MakeLine(xct, yct))) as v + FROM + voro_nodes + ), + voro_cells as( + SELECT + ST_Polygonize( + ST_Node( + ST_LineMerge( + ST_Union(v, ST_ExteriorRing( + ST_Convexhull(v) + ) + ) + ) + ) + ) as g + FROM + voro_edges + ), + voro_set as( + SELECT + (st_dump(v.g)).geom as g + FROM voro_cells v + ), + clipped_voro as( + SELECT + ST_intersection(c.g, v.g) as g + FROM + voro_set v, + clipper c + WHERE + ST_GeometryType(v.g) = 'ST_Polygon' + ) + SELECT + st_collect( + ST_Transform( + ST_ConvexHull(g), + 4326 + ) + ) + INTO geomout + FROM + clipped_voro; + RETURN geomout; +END; +$$ language plpgsql IMMUTABLE; + +/** ---------------------------------------------------------------------------------------- + * @function : FindCircle + * @precis : Function that determines if three points form a circle. If so a table containing + * centre and radius is returned. If not, a null table is returned. + * @version : 1.0.1 + * @param : p_pt1 : First point in curve + * @param : p_pt2 : Second point in curve + * @param : p_pt3 : Third point in curve + * @return : geometry : In which X,Y ordinates are the centre X, Y and the Z being the radius of found circle + * or NULL if three points do not form a circle. + * @history : Simon Greener - Feb 2012 - Original coding. + * Rafa de la Torre - Aug 2016 - Small fix for type checking + * @copyright : Simon Greener @ 2012 + * Licensed under a Creative Commons Attribution-Share Alike 2.5 Australia License. (http://creativecommons.org/licenses/by-sa/2.5/au/) +**/ +CREATE OR REPLACE FUNCTION _Find_Circle( + IN p_pt1 geometry, + IN p_pt2 geometry, + IN p_pt3 geometry) + RETURNS geometry AS +$BODY$ +DECLARE + v_Centre geometry; + v_radius NUMERIC; + v_CX NUMERIC; + v_CY NUMERIC; + v_dA NUMERIC; + v_dB NUMERIC; + v_dC NUMERIC; + v_dD NUMERIC; + v_dE NUMERIC; + v_dF NUMERIC; + v_dG NUMERIC; +BEGIN + IF ( p_pt1 IS NULL OR p_pt2 IS NULL OR p_pt3 IS NULL ) THEN + RAISE EXCEPTION 'All supplied points must be not null.'; + RETURN NULL; + END IF; + IF ( ST_GeometryType(p_pt1) <> 'ST_Point' OR + ST_GeometryType(p_pt2) <> 'ST_Point' OR + ST_GeometryType(p_pt3) <> 'ST_Point' ) THEN + RAISE EXCEPTION 'All supplied geometries must be points.'; + RETURN NULL; + END IF; + v_dA := ST_X(p_pt2) - ST_X(p_pt1); + v_dB := ST_Y(p_pt2) - ST_Y(p_pt1); + v_dC := ST_X(p_pt3) - ST_X(p_pt1); + v_dD := ST_Y(p_pt3) - ST_Y(p_pt1); + v_dE := v_dA * (ST_X(p_pt1) + ST_X(p_pt2)) + v_dB * (ST_Y(p_pt1) + ST_Y(p_pt2)); + v_dF := v_dC * (ST_X(p_pt1) + ST_X(p_pt3)) + v_dD * (ST_Y(p_pt1) + ST_Y(p_pt3)); + v_dG := 2.0 * (v_dA * (ST_Y(p_pt3) - ST_Y(p_pt2)) - v_dB * (ST_X(p_pt3) - ST_X(p_pt2))); + -- If v_dG is zero then the three points are collinear and no finite-radius + -- circle through them exists. + IF ( v_dG = 0 ) THEN + RETURN NULL; + ELSE + v_CX := (v_dD * v_dE - v_dB * v_dF) / v_dG; + v_CY := (v_dA * v_dF - v_dC * v_dE) / v_dG; + v_Radius := SQRT(POWER(ST_X(p_pt1) - v_CX,2) + POWER(ST_Y(p_pt1) - v_CY,2) ); + END IF; + RETURN ST_SetSRID(ST_MakePoint(v_CX, v_CY, v_radius),ST_Srid(p_pt1)); +END; +$BODY$ + LANGUAGE plpgsql VOLATILE STRICT; + +-- Moran's I Global Measure (public-facing) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, significance NUMERIC) +AS $$ + from crankshaft.clustering import moran + # TODO: use named parameters or a dictionary + return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspots( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspots( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliers( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Global Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran FLOAT, significance FLOAT) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + + +-- Moran's I Local Rate (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local_rate + # TODO: use named parameters or a dictionary + return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliersRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20) +RETURNS table (cartodb_id integer, cluster_no integer) as $$ + + from crankshaft.clustering import kmeans + return kmeans(query,no_clusters,no_init) + +$$ language plpythonu; + + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC) +RETURNS Numeric[] AS +$$ +DECLARE + newX NUMERIC; + newY NUMERIC; + newW NUMERIC; +BEGIN + IF weight IS NULL OR the_geom IS NULL THEN + newX = state[1]; + newY = state[2]; + newW = state[3]; + ELSE + newX = state[1] + ST_X(the_geom)*weight; + newY = state[2] + ST_Y(the_geom)*weight; + newW = state[3] + weight; + END IF; + RETURN Array[newX,newY,newW]; + +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[]) +RETURNS GEOMETRY AS +$$ +BEGIN + IF state[3] = 0 THEN + RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326); + ELSE + RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326); + END IF; +END +$$ LANGUAGE plpgsql; + +-- Create aggregate if it did not exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'cdb_crankshaft' + AND p.proname = 'cdb_weightedmean' + AND p.proisagg) + THEN + CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) ( + SFUNC = CDB_WeightedMeanS, + FINALFUNC = CDB_WeightedMeanF, + STYPE = Numeric[], + INITCOND = "{0.0,0.0,0.0}" + ); + END IF; +END +$$ LANGUAGE plpgsql; +-- Spatial Markov + +-- input table format: +-- id | geom | date_1 | date_2 | date_3 +-- 1 | Pt1 | 12.3 | 13.1 | 14.2 +-- 2 | Pt2 | 11.0 | 13.2 | 12.5 +-- ... +-- Sample Function call: +-- SELECT CDB_SpatialMarkov('SELECT * FROM real_estate', +-- Array['date_1', 'date_2', 'date_3']) + +CREATE OR REPLACE FUNCTION + CDB_SpatialMarkovTrend ( + subquery TEXT, + time_cols TEXT[], + num_classes INT DEFAULT 7, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT) +AS $$ + + from crankshaft.space_time_dynamics import spatial_markov_trend + + ## TODO: use named parameters or a dictionary + return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- input table format: identical to above but in a predictable format +-- Sample function call: +-- SELECT cdb_spatial_markov('SELECT * FROM real_estate', +-- 'date_1') + + +-- CREATE OR REPLACE FUNCTION +-- cdb_spatial_markov ( +-- subquery TEXT, +-- time_col_min text, +-- time_col_max text, +-- date_format text, -- '_YYYY_MM_DD' +-- num_time_per_bin INT DEFAULT 1, +-- permutations INT DEFAULT 99, +-- geom_column TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs int DEFAULT 5) +-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +-- AS $$ +-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') +-- from crankshaft.clustering import moran_local +-- # TODO: use named parameters or a dictionary +-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; +-- +-- -- input table format: +-- -- id | geom | date | measurement +-- -- 1 | Pt1 | 12/3 | 13.2 +-- -- 2 | Pt2 | 11/5 | 11.3 +-- -- 3 | Pt1 | 11/13 | 12.9 +-- -- 4 | Pt3 | 12/19 | 10.1 +-- -- ... +-- +-- CREATE OR REPLACE FUNCTION +-- cdb_spatial_markov ( +-- subquery TEXT, +-- time_col text, +-- num_time_per_bin INT DEFAULT 1, +-- permutations INT DEFAULT 99, +-- geom_column TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs int DEFAULT 5) +-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +-- AS $$ +-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') +-- from crankshaft.clustering import moran_local +-- # TODO: use named parameters or a dictionary +-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +CREATE OR REPLACE FUNCTION CDB_PIA( + IN polygon geometry, + IN tolerance numeric DEFAULT 1.0 + ) +RETURNS geometry AS $$ +DECLARE + env geometry[]; + cells geometry[]; + cell geometry; + best_c geometry; + best_d numeric; + test_d numeric; + test_mx numeric; + test_h numeric; + test_cells geometry[]; + width numeric; + height numeric; + h numeric; + i integer; + n integer; + sqr numeric; + p geometry; +BEGIN + sqr := |/2; + polygon := ST_Transform(polygon, 3857); + + -- grid #0 cell size + height := ST_YMax(polygon) - ST_YMin(polygon); + width := ST_XMax(polygon) - ST_XMin(polygon); + h := 0.5*LEAST(height, width); + + -- grid #0 + with c1 as( + SELECT cdb_crankshaft.CDB_RectangleGrid(polygon, h, h) as c + ) + SELECT array_agg(c) INTO cells FROM c1; + + -- 1st guess: centroid + best_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(Polygon)); + + -- looping the loop + n := array_length(cells,1); + i := 1; + LOOP + + EXIT WHEN i > n; + + cell := cells[i]; + i := i+1; + + -- cell side size, it's square + test_h := ST_XMax(cell) - ST_XMin(cell) ; + + -- check distance + test_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(cell)); + IF test_d > best_d THEN + best_d := test_d; + best_c := cells[i]; + END IF; + + -- longest distance within the cell + test_mx := test_d + (test_h/2 * sqr); + + -- if the cell has no chance to contains the desired point, continue + CONTINUE WHEN test_mx - best_d <= tolerance; + + -- resample the cell + with c1 as( + SELECT cdb_crankshaft.CDB_RectangleGrid(cell, test_h/2, test_h/2) as c + ) + SELECT array_agg(c) INTO test_cells FROM c1; + + -- concat the new cells to the former array + cells := cells || test_cells; + + -- prepare next iteration + n := array_length(cells,1); + + END LOOP; + + RETURN ST_transform(ST_Centroid(best_c), 4326); + +END; +$$ language plpgsql IMMUTABLE; + + +-- signed distance point to polygon with holes +-- negative is the point is out the polygon +CREATE OR REPLACE FUNCTION _Signed_Dist( + IN polygon geometry, + IN point geometry + ) +RETURNS numeric AS $$ +DECLARE + i integer; + within integer; + holes integer; + dist numeric; +BEGIN + dist := 1e999; + SELECT LEAST(dist, ST_distance(point, ST_ExteriorRing(polygon))::numeric) INTO dist; + SELECT CASE WHEN ST_Within(point,polygon) THEN 1 ELSE -1 END INTO within; + SELECT ST_NumInteriorRings(polygon) INTO holes; + IF holes > 0 THEN + FOR i IN 1..holes + LOOP + SELECT LEAST(dist, ST_distance(point, ST_InteriorRingN(polygon, i))::numeric) INTO dist; + END LOOP; + END IF; + dist := dist * within::numeric; + RETURN dist; +END; +$$ language plpgsql IMMUTABLE; +-- +-- Iterative densification of a set of points using Delaunay triangulation +-- the new points have as assigned value the average value of the 3 vertex (centroid) +-- +-- @param geomin - array of geometries (points) +-- +-- @param colin - array of numeric values in that points +-- +-- @param iterations - integer, number of iterations +-- +-- +-- Returns: TABLE(geomout geometry, colout numeric) +-- +-- +CREATE OR REPLACE FUNCTION CDB_Densify( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + geotemp geometry[]; + coltemp numeric[]; + i integer; + gs geometry[]; + g geometry; + vertex geometry[]; + va numeric; + vb numeric; + vc numeric; + center geometry; + centerval numeric; + tmp integer; +BEGIN + geotemp := geomin; + coltemp := colin; + FOR i IN 1..iterations + LOOP + -- generate TIN + WITH a as (SELECT unnest(geotemp) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + -- loop cells + FOREACH g IN ARRAY gs + LOOP + -- append centroid + SELECT ST_Centroid(g) INTO center; + geotemp := array_append(geotemp, center); + -- retrieve the value of each vertex + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + END LOOP; + RETURN QUERY SELECT unnest(geotemp ) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; +CREATE OR REPLACE FUNCTION CDB_TINmap( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + p geometry[]; + vals numeric[]; + gs geometry[]; + g geometry; + vertex geometry[]; + centerval numeric; + va numeric; + vb numeric; + vc numeric; + coltemp numeric[]; +BEGIN + SELECT array_agg(dens.geomout), array_agg(dens.colout) INTO p, vals FROM cdb_crankshaft.CDB_Densify(geomin, colin, iterations) dens; + WITH a as (SELECT unnest(p) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + FOREACH g IN ARRAY gs + LOOP + -- retrieve the vertex of each triangle + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + RETURN QUERY SELECT unnest(gs) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; +CREATE OR REPLACE FUNCTION CDB_Contour( + IN geomin geometry[], + IN colin numeric[], + IN buffer numeric, + IN intmethod integer, + IN classmethod integer, + IN steps integer, + IN max_time integer DEFAULT 60000 + ) +RETURNS TABLE( + the_geom geometry, + bin integer, + min_value numeric, + max_value numeric, + avg_value numeric +) AS $$ +DECLARE + cell_count integer; + tin geometry[]; + resolution integer; +BEGIN + + -- nasty trick to override issue #121 + IF max_time = 0 THEN + max_time = -90; + END IF; + resolution := max_time; + max_time := -1 * resolution; + + -- calc the optimal number of cells for the current dataset + SELECT + CASE intmethod + WHEN 0 THEN round(3.7745903782 * max_time - 9.4399210051 * array_length(geomin,1) - 1350.8778213073) + WHEN 1 THEN round(2.2855592156 * max_time - 87.285217133 * array_length(geomin,1) + 17255.7085601797) + WHEN 2 THEN round(0.9799471999 * max_time - 127.0334085369 * array_length(geomin,1) + 22707.9579721218) + ELSE 10000 + END INTO cell_count; + + -- we don't have iterative barycentric interpolation in CDB_interpolation, + -- and it's a costy function, so let's make a custom one here till + -- we update the code + -- tin := ARRAY[]::geometry[]; + IF intmethod=1 THEN + WITH + a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b) + SELECT array_agg(v) INTO tin FROM c; + END IF; + -- Delaunay stuff performed just ONCE!! + + -- magic + RETURN QUERY + WITH + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ st_area(ST_ConvexHull(ST_Collect(geomin)))/PI() as r + ), + envelope as ( + SELECT + st_expand(a.g, a.r) as e + FROM convexhull a + ), + envelope3857 as( + SELECT + ST_Transform(e, 3857) as geom + FROM envelope + ), + resolution as( + SELECT + CASE WHEN resolution <= 0 THEN + round(|/ ( + ST_area(geom) / abs(cell_count) + )) + ELSE + resolution + END AS cell + FROM envelope3857 + ), + grid as( + SELECT + ST_Transform(cdb_crankshaft.CDB_RectangleGrid(e.geom, r.cell, r.cell), 4326) as geom + FROM envelope3857 e, resolution r + ), + interp as( + SELECT + geom, + CASE + WHEN intmethod=1 THEN cdb_crankshaft._interp_in_tin(geomin, colin, tin, ST_Centroid(geom)) + ELSE cdb_crankshaft.CDB_SpatialInterpolation(geomin, colin, ST_Centroid(geom), intmethod) + END as val + FROM grid + ), + classes as( + SELECT CASE + WHEN classmethod = 0 THEN + cdb_crankshaft.CDB_EqualIntervalBins(array_agg(val), steps) + WHEN classmethod = 1 THEN + cdb_crankshaft.CDB_HeadsTailsBins(array_agg(val), steps) + WHEN classmethod = 2 THEN + cdb_crankshaft.CDB_JenksBins(array_agg(val), steps) + ELSE + cdb_crankshaft.CDB_QuantileBins(array_agg(val), steps) + END as b + FROM interp + where val is not null + ), + classified as( + SELECT + i.*, + width_bucket(i.val, c.b) as bucket + FROM interp i left join classes c + ON 1=1 + ), + classified2 as( + SELECT + geom, + val, + CASE + WHEN bucket = steps THEN bucket - 1 + ELSE bucket + END as b + FROM classified + ), + final as( + SELECT + st_union(geom) as the_geom, + b as bin, + min(val) as min_value, + max(val) as max_value, + avg(val) as avg_value + FROM classified2 + GROUP BY bin + ) + SELECT + * + FROM final + where final.bin is not null + ; +END; +$$ language plpgsql; + + + +-- ===================================================================== +-- Interp in grid, so we can use barycentric with a precalculated tin (NNI) +-- ===================================================================== +CREATE OR REPLACE FUNCTION _interp_in_tin( + IN geomin geometry[], + IN colin numeric[], + IN tin geometry[], + IN point geometry + ) +RETURNS numeric AS +$$ +DECLARE + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + -- get the cell the point is within + WITH + a as (SELECT unnest(tin) as v), + b as (SELECT v FROM a WHERE ST_Within(point, v)) + SELECT v INTO g FROM b; + + -- if we're out of the data realm, + -- return null + IF g is null THEN + RETURN null; + END IF; + + -- vertex of the selected cell + WITH a AS ( + SELECT (ST_DumpPoints(g)).geom AS v + ) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + -- calc the areas + SELECT + ST_area(g), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg,1); + RETURN output; +END; +$$ +language plpgsql; +-- Function by Stuart Lynn for a simple interpolation of a value +-- from a polygon table over an arbitrary polygon +-- (weighted by the area proportion overlapped) +-- Aereal weighting is a very simple form of aereal interpolation. +-- +-- Parameters: +-- * geom a Polygon geometry which defines the area where a value will be +-- estimated as the area-weighted sum of a given table/column +-- * target_table_name table name of the table that provides the values +-- * target_column column name of the column that provides the values +-- * schema_name optional parameter to defina the schema the target table +-- belongs to, which is necessary if its not in the search_path. +-- Note that target_table_name should never include the schema in it. +-- Return value: +-- Aereal-weighted interpolation of the column values over the geometry +CREATE OR REPLACE +FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL) + RETURNS numeric AS +$$ +DECLARE + result numeric; + qualified_name text; +BEGIN + IF schema_name IS NULL THEN + qualified_name := Format('%I', target_table_name); + ELSE + qualified_name := Format('%I.%s', schema_name, target_table_name); + END IF; + EXECUTE Format(' + SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom)) + FROM %s AS a + WHERE $1 && a.the_geom + ', target_column, qualified_name) + USING geom + INTO result; + RETURN result; +END; +$$ LANGUAGE plpgsql; +-- +-- Creates N points randomly distributed arround the polygon +-- +-- @param g - the geometry to be turned in to points +-- +-- @param no_points - the number of points to generate +-- +-- @params max_iter_per_point - the function generates points in the polygon's bounding box +-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many +-- misses per point the funciton accepts before giving up. +-- +-- Returns: Multipoint with the requested points +CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000) +RETURNS GEOMETRY AS $$ +DECLARE + extent GEOMETRY; + test_point Geometry; + width NUMERIC; + height NUMERIC; + x0 NUMERIC; + y0 NUMERIC; + xp NUMERIC; + yp NUMERIC; + no_left INTEGER; + remaining_iterations INTEGER; + points GEOMETRY[]; + bbox_line GEOMETRY; + intersection_line GEOMETRY; +BEGIN + extent := ST_Envelope(geom); + width := ST_XMax(extent) - ST_XMIN(extent); + height := ST_YMax(extent) - ST_YMIN(extent); + x0 := ST_XMin(extent); + y0 := ST_YMin(extent); + no_left := no_points; + + LOOP + if(no_left=0) THEN + EXIT; + END IF; + yp = y0 + height*random(); + bbox_line = ST_MakeLine( + ST_SetSRID(ST_MakePoint(yp, x0),4326), + ST_SetSRID(ST_MakePoint(yp, x0+width),4326) + ); + intersection_line = ST_Intersection(bbox_line,geom); + test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random()); + points := points || test_point; + no_left = no_left - 1 ; + END LOOP; + RETURN ST_Collect(points); +END; +$$ +LANGUAGE plpgsql VOLATILE; +-- Make sure by default there are no permissions for publicuser +-- NOTE: this happens at extension creation time, as part of an implicit transaction. +-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE; + +-- Grant permissions on the schema to publicuser (but just the schema) +GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser; + +-- Revoke execute permissions on all functions in the schema by default +-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser; +-- +-- Fill given extent with a rectangular coverage +-- +-- @param ext Extent to fill. Only rectangles with center point falling +-- inside the extent (or at the lower or leftmost edge) will +-- be emitted. The returned hexagons will have the same SRID +-- as this extent. +-- +-- @param width With of each rectangle +-- +-- @param height Height of each rectangle +-- +-- @param origin Optional origin to allow for exact tiling. +-- If omitted the origin will be 0,0. +-- The parameter is checked for having the same SRID +-- as the extent. +-- +-- +CREATE OR REPLACE FUNCTION CDB_RectangleGrid(ext GEOMETRY, width FLOAT8, height FLOAT8, origin GEOMETRY DEFAULT NULL) +RETURNS SETOF GEOMETRY +AS $$ +DECLARE + h GEOMETRY; -- rectangle cell + hstep FLOAT8; -- horizontal step + vstep FLOAT8; -- vertical step + hw FLOAT8; -- half width + hh FLOAT8; -- half height + vstart FLOAT8; + hstart FLOAT8; + hend FLOAT8; + vend FLOAT8; + xoff FLOAT8; + yoff FLOAT8; + xgrd FLOAT8; + ygrd FLOAT8; + x FLOAT8; + y FLOAT8; + srid INTEGER; +BEGIN + + srid := ST_SRID(ext); + + xoff := 0; + yoff := 0; + + IF origin IS NOT NULL THEN + IF ST_SRID(origin) != srid THEN + RAISE EXCEPTION 'SRID mismatch between extent (%) and origin (%)', srid, ST_SRID(origin); + END IF; + xoff := ST_X(origin); + yoff := ST_Y(origin); + END IF; + + --RAISE DEBUG 'X offset: %', xoff; + --RAISE DEBUG 'Y offset: %', yoff; + + hw := width/2.0; + hh := height/2.0; + + xgrd := hw; + ygrd := hh; + --RAISE DEBUG 'X grid size: %', xgrd; + --RAISE DEBUG 'Y grid size: %', ygrd; + + hstep := width; + vstep := height; + + -- Tweak horizontal start on hstep grid from origin + hstart := xoff + ceil((ST_XMin(ext)-xoff)/hstep)*hstep; + --RAISE DEBUG 'hstart: %', hstart; + + -- Tweak vertical start on vstep grid from origin + vstart := yoff + ceil((ST_Ymin(ext)-yoff)/vstep)*vstep; + --RAISE DEBUG 'vstart: %', vstart; + + hend := ST_XMax(ext); + vend := ST_YMax(ext); + + --RAISE DEBUG 'hend: %', hend; + --RAISE DEBUG 'vend: %', vend; + + x := hstart; + WHILE x < hend LOOP -- over X + y := vstart; + h := ST_MakeEnvelope(x-hw, y-hh, x+hw, y+hh, srid); + WHILE y < vend LOOP -- over Y + RETURN NEXT h; + h := ST_Translate(h, 0, vstep); + y := yoff + round(((y + vstep)-yoff)/ygrd)*ygrd; -- round to grid + END LOOP; + x := xoff + round(((x + hstep)-xoff)/xgrd)*xgrd; -- round to grid + END LOOP; + + RETURN; +END +$$ LANGUAGE 'plpgsql' IMMUTABLE; + +-- +-- Calculate the equal interval bins for a given column +-- +-- @param in_array A numeric array of numbers to determine the best +-- to determine the bin boundary +-- +-- @param breaks The number of bins you want to find. +-- +-- +-- Returns: upper edges of bins +-- +-- + +CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$ +DECLARE + diff numeric; + min_val numeric; + max_val numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL; + diff = (max_val - min_val) / breaks::numeric; + LOOP + IF i < breaks THEN + tmp_val = min_val + i::numeric * diff; + reply = array_append(reply, tmp_val); + i := i+1; + ELSE + reply = array_append(reply, max_val); + EXIT; + END IF; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Heads/Tails classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Heads/Tails method. +-- +-- @param breaks The number of bins you want to find. +-- +-- + +CREATE OR REPLACE FUNCTION CDB_HeadsTailsBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean numeric; + i INT := 2; + reply numeric[]; +BEGIN + -- get the total size of our row + element_count := array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + reply = Array[arr_mean]; + -- slice our bread + LOOP + IF i > breaks THEN EXIT; END IF; + SELECT avg(e) INTO arr_mean FROM ( SELECT unnest(in_array) e) x WHERE e > reply[i-1]; + IF arr_mean IS NOT NULL THEN + reply = array_append(reply, arr_mean); + END IF; + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Jenks classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Jenks method. +-- +-- @param breaks The number of bins you want to find. +-- +-- @param iterations The number of different starting positions to test. +-- +-- @param invert Optional wheter to return the top of each bin (default) +-- or the bottom. BOOLEAN, default=FALSE. +-- +-- + + +CREATE OR REPLACE FUNCTION CDB_JenksBins ( in_array NUMERIC[], breaks INT, iterations INT DEFAULT 5, invert BOOLEAN DEFAULT FALSE) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean NUMERIC; + bot INT; + top INT; + tops INT[]; + classes INT[][]; + i INT := 1; j INT := 1; + curr_result NUMERIC[]; + best_result NUMERIC[]; + seedtarget TEXT; + quant NUMERIC[]; + shuffles INT; +BEGIN + -- get the total size of our row + element_count := array_length(in_array, 1); --array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + shuffles := LEAST(GREATEST(floor(2500000.0/(element_count::float*iterations::float)), 1), 750)::int; + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + -- assume best is actually Quantile + SELECT cdb_crankshaft.CDB_QuantileBins(in_array, breaks) INTO quant; + + -- if data is very very large, just return quant and be done + IF element_count > 5000000 THEN + RETURN quant; + END IF; + + -- change quant into bottom, top markers + LOOP + IF i = 1 THEN + bot = 1; + ELSE + -- use last top to find this bot + bot = top+1; + END IF; + IF i = breaks THEN + top = element_count; + ELSE + SELECT count(*) INTO top FROM ( SELECT unnest(in_array) as v) x WHERE v <= quant[i]; + END IF; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + IF i > breaks THEN EXIT; END IF; + i = i+1; + END LOOP; + + best_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + --set the seed so we can ensure the same results + SELECT setseed(0.4567) INTO seedtarget; + --loop through random starting positions + LOOP + IF j > iterations-1 THEN EXIT; END IF; + i = 1; + tops = ARRAY[element_count]; + LOOP + IF i = breaks THEN EXIT; END IF; + SELECT array_agg(distinct e) INTO tops FROM (SELECT unnest(array_cat(tops, ARRAY[floor(random()*element_count::float)::int])) as e ORDER BY e) x WHERE e != 1; + i = array_length(tops, 1); + END LOOP; + i = 1; + LOOP + IF i > breaks THEN EXIT; END IF; + IF i = 1 THEN + bot = 1; + ELSE + bot = top+1; + END IF; + top = tops[i]; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + i := i+1; + END LOOP; + curr_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + IF curr_result[1] > best_result[1] THEN + best_result = curr_result; + j = j-1; -- if we found a better result, add one more search + END IF; + j = j+1; + END LOOP; + + RETURN (best_result)[2:array_upper(best_result, 1)]; +END; +$$ language plpgsql IMMUTABLE; + + + +-- +-- Perform a single iteration of the Jenks classification +-- + +CREATE OR REPLACE FUNCTION CDB_JenksBinsIteration ( in_array NUMERIC[], breaks INT, classes INT[][], invert BOOLEAN, element_count INT4, arr_mean NUMERIC, max_search INT DEFAULT 50) RETURNS NUMERIC[] as $$ +DECLARE + tmp_val numeric; + new_classes int[][]; + tmp_class int[]; + i INT := 1; + j INT := 1; + side INT := 2; + sdam numeric; + gvf numeric := 0.0; + new_gvf numeric; + arr_gvf numeric[]; + class_avg numeric; + class_max_i INT; + class_min_i INT; + class_max numeric; + class_min numeric; + reply numeric[]; +BEGIN + + -- Calculate the sum of squared deviations from the array mean (SDAM). + SELECT sum((arr_mean - e)^2) INTO sdam FROM ( SELECT unnest(in_array) as e ) x; + --Identify the breaks for the lowest GVF + LOOP + i = 1; + LOOP + -- get our mean + SELECT avg(e) INTO class_avg FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e) x; + -- find the deviation + SELECT sum((class_avg-e)^2) INTO tmp_val FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e ) x; + IF i = 1 THEN + arr_gvf = ARRAY[tmp_val]; + -- init our min/max map for later + class_max = arr_gvf[i]; + class_min = arr_gvf[i]; + class_min_i = 1; + class_max_i = 1; + ELSE + arr_gvf = array_append(arr_gvf, tmp_val); + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + -- calculate our new GVF + SELECT sdam-sum(e) INTO new_gvf FROM ( SELECT unnest(arr_gvf) as e ) x; + -- if no improvement was made, exit + IF new_gvf < gvf THEN EXIT; END IF; + gvf = new_gvf; + IF j > max_search THEN EXIT; END IF; + j = j+1; + i = 1; + LOOP + --establish directionality (uppward through classes or downward) + IF arr_gvf[i] < class_min THEN + class_min = arr_gvf[i]; + class_min_i = i; + END IF; + IF arr_gvf[i] > class_max THEN + class_max = arr_gvf[i]; + class_max_i = i; + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + IF class_max_i > class_min_i THEN + class_min_i = class_max_i - 1; + ELSE + class_min_i = class_max_i + 1; + END IF; + --Move from higher class to a lower gid order + IF class_max_i > class_min_i THEN + classes[class_max_i][1] = classes[class_max_i][1] + 1; + classes[class_min_i][2] = classes[class_min_i][2] + 1; + ELSE -- Move from lower class UP into a higher class by gid + classes[class_max_i][2] = classes[class_max_i][2] - 1; + classes[class_min_i][1] = classes[class_min_i][1] - 1; + END IF; + END LOOP; + + i = 1; + LOOP + IF invert = TRUE THEN + side = 1; --default returns bottom side of breaks, invert returns top side + END IF; + reply = array_append(reply, in_array[classes[i][side]]); + i = i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + + RETURN array_prepend(gvf, reply); + +END; +$$ language plpgsql IMMUTABLE; + + +-- +-- Determine the Quantile classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Quantile method. +-- +-- @param breaks The number of bins you want to find. +-- +-- +CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + break_size numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + -- sort our values + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x; + -- get the total size of our data + element_count := array_length(in_array, 1); + break_size := element_count::numeric / breaks; + -- slice our bread + LOOP + IF i < breaks THEN + IF break_size * i % 1 > 0 THEN + SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x; + ELSE + SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x; + END IF; + ELSIF i = breaks THEN + -- select the last value + SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x; + ELSE + EXIT; + END IF; + + reply = array_append(reply, tmp_val); + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; diff --git a/release/crankshaft--0.4.2.sql b/release/crankshaft--0.4.2.sql new file mode 100644 index 0000000..4c68ffc --- /dev/null +++ b/release/crankshaft--0.4.2.sql @@ -0,0 +1,1965 @@ +--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit +-- Version number of the extension release +CREATE OR REPLACE FUNCTION cdb_crankshaft_version() +RETURNS text AS $$ + SELECT '0.4.2'::text; +$$ language 'sql' STABLE STRICT; + +-- Internal identifier of the installed extension instence +-- e.g. 'dev' for current development version +CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version() +RETURNS text AS $$ + SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL; +$$ language 'sql' STABLE STRICT; +-- Internal function. +-- Set the seeds of the RNGs (Random Number Generators) +-- used internally. +CREATE OR REPLACE FUNCTION +_cdb_random_seeds (seed_value INTEGER) RETURNS VOID +AS $$ + from crankshaft import random_seeds + random_seeds.set_random_seeds(seed_value) +$$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION + CDB_PyAggS(current_state Numeric[], current_row Numeric[]) + returns NUMERIC[] as $$ + BEGIN + if array_upper(current_state,1) is null then + RAISE NOTICE 'setting state %',array_upper(current_row,1); + current_state[1] = array_upper(current_row,1); + end if; + return array_cat(current_state,current_row) ; + END + $$ LANGUAGE plpgsql; + +-- Create aggregate if it did not exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'cdb_crankshaft' + AND p.proname = 'cdb_pyagg' + AND p.proisagg) + THEN + CREATE AGGREGATE CDB_PyAgg(NUMERIC[]) ( + SFUNC = CDB_PyAggS, + STYPE = Numeric[], + INITCOND = "{}" + ); + END IF; +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION + CDB_CreateAndPredictSegment( + target NUMERIC[], + features NUMERIC[], + target_features NUMERIC[], + target_ids NUMERIC[], + n_estimators INTEGER DEFAULT 1200, + max_depth INTEGER DEFAULT 3, + subsample DOUBLE PRECISION DEFAULT 0.5, + learning_rate DOUBLE PRECISION DEFAULT 0.01, + min_samples_leaf INTEGER DEFAULT 1) +RETURNS TABLE(cartodb_id NUMERIC, prediction NUMERIC, accuracy NUMERIC) +AS $$ + import numpy as np + import plpy + + from crankshaft.segmentation import create_and_predict_segment_agg + model_params = {'n_estimators': n_estimators, + 'max_depth': max_depth, + 'subsample': subsample, + 'learning_rate': learning_rate, + 'min_samples_leaf': min_samples_leaf} + + def unpack2D(data): + dimension = data.pop(0) + a = np.array(data, dtype=float) + return a.reshape(len(a)/dimension, dimension) + + return create_and_predict_segment_agg(np.array(target, dtype=float), + unpack2D(features), + unpack2D(target_features), + target_ids, + model_params) + +$$ LANGUAGE plpythonu; + +CREATE OR REPLACE FUNCTION + CDB_CreateAndPredictSegment ( + query TEXT, + variable_name TEXT, + target_table TEXT, + n_estimators INTEGER DEFAULT 1200, + max_depth INTEGER DEFAULT 3, + subsample DOUBLE PRECISION DEFAULT 0.5, + learning_rate DOUBLE PRECISION DEFAULT 0.01, + min_samples_leaf INTEGER DEFAULT 1) +RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC) +AS $$ + from crankshaft.segmentation import create_and_predict_segment + model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf} + return create_and_predict_segment(query,variable_name,target_table, model_params) +$$ LANGUAGE plpythonu; +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN target_query text, + IN weight_column text, + IN source_query text, + IN pop_column text, + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_id bigint[]; + t_geom geometry[]; + t_weight numeric[]; + s_id bigint[]; + s_geom geometry[]; + s_pop numeric[]; +BEGIN + EXECUTE 'WITH foo as('+target_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || weight_column || ') FROM foo' INTO t_id, t_geom, t_weight; + EXECUTE 'WITH foo as('+source_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || pop_column || ') FROM foo' INTO s_id, s_geom, s_pop; + RETURN QUERY + SELECT g.* FROM t, s, CDB_Gravity(t_id, t_geom, t_weight, s_id, s_geom, s_pop, target, radius, minval) g; +END; +$$ language plpgsql; + +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN t_id bigint[], + IN t_geom geometry[], + IN t_weight numeric[], + IN s_id bigint[], + IN s_geom geometry[], + IN s_pop numeric[], + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_type text; + s_type text; + t_center geometry[]; + s_center geometry[]; +BEGIN + t_type := GeometryType(t_geom[1]); + s_type := GeometryType(s_geom[1]); + IF t_type = 'POINT' THEN + t_center := t_geom; + ELSE + WITH tmp as (SELECT unnest(t_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO t_center FROM tmp; + END IF; + IF s_type = 'POINT' THEN + s_center := s_geom; + ELSE + WITH tmp as (SELECT unnest(s_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO s_center FROM tmp; + END IF; + RETURN QUERY + with target0 as( + SELECT unnest(t_center) as tc, unnest(t_weight) as tw, unnest(t_id) as td + ), + source0 as( + SELECT unnest(s_center) as sc, unnest(s_id) as sd, unnest (s_geom) as sg, unnest(s_pop) as sp + ), + prev0 as( + SELECT + source0.sg, + source0.sd as sourc_id, + coalesce(source0.sp,0) as sp, + target.td as targ_id, + coalesce(target.tw,0) as tw, + GREATEST(1.0,ST_Distance(geography(target.tc), geography(source0.sc)))::numeric as distance + FROM source0 + CROSS JOIN LATERAL + ( + SELECT + * + FROM target0 + WHERE tw > minval + AND ST_DWithin(geography(source0.sc), geography(tc), radius) + ) AS target + ), + deno as( + SELECT + sourc_id, + sum(tw/distance) as h_deno + FROM + prev0 + GROUP BY sourc_id + ) + SELECT + p.sg as the_geom, + p.sourc_id as source_id, + p.targ_id as target_id, + case when p.distance > 1 then p.distance else 0.0 end as dist, + 100*(p.tw/p.distance)/d.h_deno as h, + p.sp*(p.tw/p.distance)/d.h_deno as hpop + FROM + prev0 p, + deno d + WHERE + p.targ_id = target AND + p.sourc_id = d.sourc_id; +END; +$$ language plpgsql; +-- 0: nearest neighbor(s) +-- 1: barymetric +-- 2: IDW +-- 3: krigin ---> TO DO + + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN query text, + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + output numeric; +BEGIN + EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs; + SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a; + + RETURN output; +END; +$$ +language plpgsql IMMUTABLE; + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN geomin geometry[], + IN colin numeric[], + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + gs2 geometry[]; + vs2 numeric[]; + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + -- output := -999.999; + + -- nearest neighbors + -- p1: limit the number of neighbors, 0-> closest one + IF method = 0 THEN + + IF p1 = 0 THEN + p1 := 1; + END IF; + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v), + b as (SELECT a.v as v FROM a ORDER BY point<->a.g LIMIT p1::integer) + SELECT avg(b.v) INTO output FROM b; + RETURN output; + + -- barymetric + ELSIF method = 1 THEN + WITH a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b), + d as (SELECT v FROM c WHERE ST_Within(point, v)) + SELECT v INTO g FROM d; + IF g is null THEN + -- out of the realm of the input data + RETURN -888.888; + END IF; + -- vertex of the selected cell + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg); + RETURN output; + + -- IDW + -- p1: limit the number of neighbors, 0->no limit + -- p2: order of distance decay, 0-> order 1 + ELSIF method = 2 THEN + + IF p2 = 0 THEN + p2 := 1; + END IF; + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v), + b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g) + SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b; + IF p1::integer>0 THEN + gs2:=gs; + vs2:=vs; + FOR i IN 1..p1 + LOOP + gs2 := gs2 || gs[i]; + vs2 := vs2 || vs[i]; + END LOOP; + ELSE + gs2:=gs; + vs2:=vs; + END IF; + + WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v), + b as ( + SELECT + (1/ST_distance(point, a.g)^p2::integer) as k, + (a.v/ST_distance(point, a.g)^p2::integer) as f + FROM a + ) + SELECT sum(b.f)/sum(b.k) INTO output FROM b; + RETURN output; + + -- krigin + ELSIF method = 3 THEN + + -- TO DO + + END IF; + + RETURN -777.777; + +END; +$$ +language plpgsql IMMUTABLE; +-- ============================================================================================= +-- +-- CDB_Voronoi +-- +-- ============================================================================================= +CREATE OR REPLACE FUNCTION CDB_voronoi( + IN geomin geometry[], + IN buffer numeric DEFAULT 0.5, + IN tolerance numeric DEFAULT 1e-9 + ) +RETURNS geometry AS $$ +DECLARE + geomout geometry; +BEGIN + -- we need to make the geometry calculations in (pseudo)meters!!! + with a as ( + SELECT unnest(geomin) as g1 + ), + b as( + SELECT st_transform(g1, 3857) g2 from a + ) + SELECT array_agg(g2) INTO geomin from b; + + WITH + convexhull_1 as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ (st_area(ST_ConvexHull(ST_Collect(geomin)))/PI()) as r + ), + clipper as( + SELECT + st_buffer(ST_MinimumBoundingCircle(a.g), buffer*a.r) as g + FROM convexhull_1 a + ), + env0 as ( + SELECT + (st_dumppoints(st_expand(a.g, buffer*a.r))).geom as e + FROM convexhull_1 a + ), + env as ( + SELECT + array_agg(env0.e) as e + FROM env0 + ), + sample AS ( + SELECT + ST_Collect(geomin || env.e) as geom + FROM env + ), + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as cg + ), + tin as ( + SELECT + ST_Dump(ST_DelaunayTriangles(geom, tolerance, 0)) as gd + FROM + sample + ), + tin_polygons as ( + SELECT + (gd).Path as id, + (gd).Geom as pg, + ST_Centroid(ST_MinimumBoundingCircle((gd).Geom, 180)) as ct + FROM tin + ), + tin_lines as ( + SELECT + id, + ST_ExteriorRing(pg) as lg + FROM tin_polygons + ), + tin_nodes as ( + SELECT + id, + ST_PointN(lg,1) p1, + ST_PointN(lg,2) p2, + ST_PointN(lg,3) p3 + FROM tin_lines + ), + tin_edges AS ( + SELECT + p.id, + UNNEST(ARRAY[ + ST_MakeLine(n.p1,n.p2) , + ST_MakeLine(n.p2,n.p3) , + ST_MakeLine(n.p3,n.p1)]) as Edge, + ST_Force2D(cdb_crankshaft._Find_Circle(n.p1,n.p2,n.p3)) as ct, + CASE WHEN st_distance(p.ct, ST_ExteriorRing(p.pg)) < tolerance THEN + TRUE + ELSE FALSE END AS ctx, + p.pg, + ST_within(p.ct, convexhull.cg) as ctin + FROM + tin_polygons p, + tin_nodes n, + convexhull + WHERE p.id = n.id + ), + voro_nodes as ( + SELECT + CASE WHEN x.ctx = TRUE THEN + ST_Centroid(x.edge) + ELSE + x.ct + END as xct, + CASE WHEN y.id is null THEN + CASE WHEN x.ctin = TRUE THEN + ST_SetSRID(ST_MakePoint( + ST_X(x.ct) + ((ST_X(ST_Centroid(x.edge)) - ST_X(x.ct)) * (1+buffer)), + ST_Y(x.ct) + ((ST_Y(ST_Centroid(x.edge)) - ST_Y(x.ct)) * (1+buffer)) + ), ST_SRID(x.ct)) + END + ELSE + y.ct + END as yct + FROM + tin_edges x + LEFT OUTER JOIN + tin_edges y + ON x.id <> y.id AND ST_Equals(x.edge, y.edge) + ), + voro_edges as( + SELECT + ST_LineMerge(ST_Collect(ST_MakeLine(xct, yct))) as v + FROM + voro_nodes + ), + voro_cells as( + SELECT + ST_Polygonize( + ST_Node( + ST_LineMerge( + ST_Union(v, ST_ExteriorRing( + ST_Convexhull(v) + ) + ) + ) + ) + ) as g + FROM + voro_edges + ), + voro_set as( + SELECT + (st_dump(v.g)).geom as g + FROM voro_cells v + ), + clipped_voro as( + SELECT + ST_intersection(c.g, v.g) as g + FROM + voro_set v, + clipper c + WHERE + ST_GeometryType(v.g) = 'ST_Polygon' + ) + SELECT + st_collect( + ST_Transform( + ST_ConvexHull(g), + 4326 + ) + ) + INTO geomout + FROM + clipped_voro; + RETURN geomout; +END; +$$ language plpgsql IMMUTABLE; + +/** ---------------------------------------------------------------------------------------- + * @function : FindCircle + * @precis : Function that determines if three points form a circle. If so a table containing + * centre and radius is returned. If not, a null table is returned. + * @version : 1.0.1 + * @param : p_pt1 : First point in curve + * @param : p_pt2 : Second point in curve + * @param : p_pt3 : Third point in curve + * @return : geometry : In which X,Y ordinates are the centre X, Y and the Z being the radius of found circle + * or NULL if three points do not form a circle. + * @history : Simon Greener - Feb 2012 - Original coding. + * Rafa de la Torre - Aug 2016 - Small fix for type checking + * @copyright : Simon Greener @ 2012 + * Licensed under a Creative Commons Attribution-Share Alike 2.5 Australia License. (http://creativecommons.org/licenses/by-sa/2.5/au/) +**/ +CREATE OR REPLACE FUNCTION _Find_Circle( + IN p_pt1 geometry, + IN p_pt2 geometry, + IN p_pt3 geometry) + RETURNS geometry AS +$BODY$ +DECLARE + v_Centre geometry; + v_radius NUMERIC; + v_CX NUMERIC; + v_CY NUMERIC; + v_dA NUMERIC; + v_dB NUMERIC; + v_dC NUMERIC; + v_dD NUMERIC; + v_dE NUMERIC; + v_dF NUMERIC; + v_dG NUMERIC; +BEGIN + IF ( p_pt1 IS NULL OR p_pt2 IS NULL OR p_pt3 IS NULL ) THEN + RAISE EXCEPTION 'All supplied points must be not null.'; + RETURN NULL; + END IF; + IF ( ST_GeometryType(p_pt1) <> 'ST_Point' OR + ST_GeometryType(p_pt2) <> 'ST_Point' OR + ST_GeometryType(p_pt3) <> 'ST_Point' ) THEN + RAISE EXCEPTION 'All supplied geometries must be points.'; + RETURN NULL; + END IF; + v_dA := ST_X(p_pt2) - ST_X(p_pt1); + v_dB := ST_Y(p_pt2) - ST_Y(p_pt1); + v_dC := ST_X(p_pt3) - ST_X(p_pt1); + v_dD := ST_Y(p_pt3) - ST_Y(p_pt1); + v_dE := v_dA * (ST_X(p_pt1) + ST_X(p_pt2)) + v_dB * (ST_Y(p_pt1) + ST_Y(p_pt2)); + v_dF := v_dC * (ST_X(p_pt1) + ST_X(p_pt3)) + v_dD * (ST_Y(p_pt1) + ST_Y(p_pt3)); + v_dG := 2.0 * (v_dA * (ST_Y(p_pt3) - ST_Y(p_pt2)) - v_dB * (ST_X(p_pt3) - ST_X(p_pt2))); + -- If v_dG is zero then the three points are collinear and no finite-radius + -- circle through them exists. + IF ( v_dG = 0 ) THEN + RETURN NULL; + ELSE + v_CX := (v_dD * v_dE - v_dB * v_dF) / v_dG; + v_CY := (v_dA * v_dF - v_dC * v_dE) / v_dG; + v_Radius := SQRT(POWER(ST_X(p_pt1) - v_CX,2) + POWER(ST_Y(p_pt1) - v_CY,2) ); + END IF; + RETURN ST_SetSRID(ST_MakePoint(v_CX, v_CY, v_radius),ST_Srid(p_pt1)); +END; +$BODY$ + LANGUAGE plpgsql VOLATILE STRICT; + +-- Moran's I Global Measure (public-facing) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, significance NUMERIC) +AS $$ + from crankshaft.clustering import moran + # TODO: use named parameters or a dictionary + return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspots( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspots( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliers( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Global Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran FLOAT, significance FLOAT) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + + +-- Moran's I Local Rate (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local_rate + # TODO: use named parameters or a dictionary + return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliersRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20) +RETURNS table (cartodb_id integer, cluster_no integer) as $$ + + from crankshaft.clustering import kmeans + return kmeans(query,no_clusters,no_init) + +$$ language plpythonu; + + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC) +RETURNS Numeric[] AS +$$ +DECLARE + newX NUMERIC; + newY NUMERIC; + newW NUMERIC; +BEGIN + IF weight IS NULL OR the_geom IS NULL THEN + newX = state[1]; + newY = state[2]; + newW = state[3]; + ELSE + newX = state[1] + ST_X(the_geom)*weight; + newY = state[2] + ST_Y(the_geom)*weight; + newW = state[3] + weight; + END IF; + RETURN Array[newX,newY,newW]; + +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[]) +RETURNS GEOMETRY AS +$$ +BEGIN + IF state[3] = 0 THEN + RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326); + ELSE + RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326); + END IF; +END +$$ LANGUAGE plpgsql; + +-- Create aggregate if it did not exist +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT * + FROM pg_catalog.pg_proc p + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'cdb_crankshaft' + AND p.proname = 'cdb_weightedmean' + AND p.proisagg) + THEN + CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC) ( + SFUNC = CDB_WeightedMeanS, + FINALFUNC = CDB_WeightedMeanF, + STYPE = Numeric[], + INITCOND = "{0.0,0.0,0.0}" + ); + END IF; +END +$$ LANGUAGE plpgsql; +-- Spatial Markov + +-- input table format: +-- id | geom | date_1 | date_2 | date_3 +-- 1 | Pt1 | 12.3 | 13.1 | 14.2 +-- 2 | Pt2 | 11.0 | 13.2 | 12.5 +-- ... +-- Sample Function call: +-- SELECT CDB_SpatialMarkov('SELECT * FROM real_estate', +-- Array['date_1', 'date_2', 'date_3']) + +CREATE OR REPLACE FUNCTION + CDB_SpatialMarkovTrend ( + subquery TEXT, + time_cols TEXT[], + num_classes INT DEFAULT 7, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (trend NUMERIC, trend_up NUMERIC, trend_down NUMERIC, volatility NUMERIC, rowid INT) +AS $$ + + from crankshaft.space_time_dynamics import spatial_markov_trend + + ## TODO: use named parameters or a dictionary + return spatial_markov_trend(subquery, time_cols, num_classes, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- input table format: identical to above but in a predictable format +-- Sample function call: +-- SELECT cdb_spatial_markov('SELECT * FROM real_estate', +-- 'date_1') + + +-- CREATE OR REPLACE FUNCTION +-- cdb_spatial_markov ( +-- subquery TEXT, +-- time_col_min text, +-- time_col_max text, +-- date_format text, -- '_YYYY_MM_DD' +-- num_time_per_bin INT DEFAULT 1, +-- permutations INT DEFAULT 99, +-- geom_column TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs int DEFAULT 5) +-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +-- AS $$ +-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') +-- from crankshaft.clustering import moran_local +-- # TODO: use named parameters or a dictionary +-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; +-- +-- -- input table format: +-- -- id | geom | date | measurement +-- -- 1 | Pt1 | 12/3 | 13.2 +-- -- 2 | Pt2 | 11/5 | 11.3 +-- -- 3 | Pt1 | 11/13 | 12.9 +-- -- 4 | Pt3 | 12/19 | 10.1 +-- -- ... +-- +-- CREATE OR REPLACE FUNCTION +-- cdb_spatial_markov ( +-- subquery TEXT, +-- time_col text, +-- num_time_per_bin INT DEFAULT 1, +-- permutations INT DEFAULT 99, +-- geom_column TEXT DEFAULT 'the_geom', +-- id_col TEXT DEFAULT 'cartodb_id', +-- w_type TEXT DEFAULT 'knn', +-- num_ngbrs int DEFAULT 5) +-- RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +-- AS $$ +-- plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') +-- from crankshaft.clustering import moran_local +-- # TODO: use named parameters or a dictionary +-- return spatial_markov(subquery, time_cols, permutations, geom_column, id_col, w_type, num_ngbrs) +-- $$ LANGUAGE plpythonu; +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +-- Based on: +-- https://github.com/mapbox/polylabel/blob/master/index.js +-- https://sites.google.com/site/polesofinaccessibility/ +-- Requires: https://github.com/CartoDB/cartodb-postgresql + +CREATE OR REPLACE FUNCTION CDB_PIA( + IN polygon geometry, + IN tolerance numeric DEFAULT 1.0 + ) +RETURNS geometry AS $$ +DECLARE + env geometry[]; + cells geometry[]; + cell geometry; + best_c geometry; + best_d numeric; + test_d numeric; + test_mx numeric; + test_h numeric; + test_cells geometry[]; + width numeric; + height numeric; + h numeric; + i integer; + n integer; + sqr numeric; + p geometry; +BEGIN + sqr := |/2; + polygon := ST_Transform(polygon, 3857); + + -- grid #0 cell size + height := ST_YMax(polygon) - ST_YMin(polygon); + width := ST_XMax(polygon) - ST_XMin(polygon); + h := 0.5*LEAST(height, width); + + -- grid #0 + with c1 as( + SELECT cdb_crankshaft.CDB_RectangleGrid(polygon, h, h) as c + ) + SELECT array_agg(c) INTO cells FROM c1; + + -- 1st guess: centroid + best_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(Polygon)); + + -- looping the loop + n := array_length(cells,1); + i := 1; + LOOP + + EXIT WHEN i > n; + + cell := cells[i]; + i := i+1; + + -- cell side size, it's square + test_h := ST_XMax(cell) - ST_XMin(cell) ; + + -- check distance + test_d := cdb_crankshaft._Signed_Dist(polygon, ST_Centroid(cell)); + IF test_d > best_d THEN + best_d := test_d; + best_c := cells[i]; + END IF; + + -- longest distance within the cell + test_mx := test_d + (test_h/2 * sqr); + + -- if the cell has no chance to contains the desired point, continue + CONTINUE WHEN test_mx - best_d <= tolerance; + + -- resample the cell + with c1 as( + SELECT cdb_crankshaft.CDB_RectangleGrid(cell, test_h/2, test_h/2) as c + ) + SELECT array_agg(c) INTO test_cells FROM c1; + + -- concat the new cells to the former array + cells := cells || test_cells; + + -- prepare next iteration + n := array_length(cells,1); + + END LOOP; + + RETURN ST_transform(ST_Centroid(best_c), 4326); + +END; +$$ language plpgsql IMMUTABLE; + + +-- signed distance point to polygon with holes +-- negative is the point is out the polygon +CREATE OR REPLACE FUNCTION _Signed_Dist( + IN polygon geometry, + IN point geometry + ) +RETURNS numeric AS $$ +DECLARE + i integer; + within integer; + holes integer; + dist numeric; +BEGIN + dist := 1e999; + SELECT LEAST(dist, ST_distance(point, ST_ExteriorRing(polygon))::numeric) INTO dist; + SELECT CASE WHEN ST_Within(point,polygon) THEN 1 ELSE -1 END INTO within; + SELECT ST_NumInteriorRings(polygon) INTO holes; + IF holes > 0 THEN + FOR i IN 1..holes + LOOP + SELECT LEAST(dist, ST_distance(point, ST_InteriorRingN(polygon, i))::numeric) INTO dist; + END LOOP; + END IF; + dist := dist * within::numeric; + RETURN dist; +END; +$$ language plpgsql IMMUTABLE; +-- +-- Iterative densification of a set of points using Delaunay triangulation +-- the new points have as assigned value the average value of the 3 vertex (centroid) +-- +-- @param geomin - array of geometries (points) +-- +-- @param colin - array of numeric values in that points +-- +-- @param iterations - integer, number of iterations +-- +-- +-- Returns: TABLE(geomout geometry, colout numeric) +-- +-- +CREATE OR REPLACE FUNCTION CDB_Densify( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + geotemp geometry[]; + coltemp numeric[]; + i integer; + gs geometry[]; + g geometry; + vertex geometry[]; + va numeric; + vb numeric; + vc numeric; + center geometry; + centerval numeric; + tmp integer; +BEGIN + geotemp := geomin; + coltemp := colin; + FOR i IN 1..iterations + LOOP + -- generate TIN + WITH a as (SELECT unnest(geotemp) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + -- loop cells + FOREACH g IN ARRAY gs + LOOP + -- append centroid + SELECT ST_Centroid(g) INTO center; + geotemp := array_append(geotemp, center); + -- retrieve the value of each vertex + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(geotemp) as geo, unnest(coltemp) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + END LOOP; + RETURN QUERY SELECT unnest(geotemp ) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; +CREATE OR REPLACE FUNCTION CDB_TINmap( + IN geomin geometry[], + IN colin numeric[], + IN iterations integer + ) +RETURNS TABLE(geomout geometry, colout numeric) AS $$ +DECLARE + p geometry[]; + vals numeric[]; + gs geometry[]; + g geometry; + vertex geometry[]; + centerval numeric; + va numeric; + vb numeric; + vc numeric; + coltemp numeric[]; +BEGIN + SELECT array_agg(dens.geomout), array_agg(dens.colout) INTO p, vals FROM cdb_crankshaft.CDB_Densify(geomin, colin, iterations) dens; + WITH a as (SELECT unnest(p) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom AS v FROM b) + SELECT array_agg(v) INTO gs FROM c; + FOREACH g IN ARRAY gs + LOOP + -- retrieve the vertex of each triangle + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(p) as geo, unnest(vals) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + -- calc the value at the center + centerval := (va + vb + vc) / 3; + -- append the value + coltemp := array_append(coltemp, centerval); + END LOOP; + RETURN QUERY SELECT unnest(gs) as geomout, unnest(coltemp ) as colout; +END; +$$ language plpgsql IMMUTABLE; +CREATE OR REPLACE FUNCTION CDB_Contour( + IN geomin geometry[], + IN colin numeric[], + IN buffer numeric, + IN intmethod integer, + IN classmethod integer, + IN steps integer, + IN max_time integer DEFAULT 60000 + ) +RETURNS TABLE( + the_geom geometry, + bin integer, + min_value numeric, + max_value numeric, + avg_value numeric +) AS $$ +DECLARE + cell_count integer; + tin geometry[]; + resolution integer; +BEGIN + + -- nasty trick to override issue #121 + IF max_time = 0 THEN + max_time = -90; + END IF; + resolution := max_time; + max_time := -1 * resolution; + + -- calc the optimal number of cells for the current dataset + SELECT + CASE intmethod + WHEN 0 THEN round(3.7745903782 * max_time - 9.4399210051 * array_length(geomin,1) - 1350.8778213073) + WHEN 1 THEN round(2.2855592156 * max_time - 87.285217133 * array_length(geomin,1) + 17255.7085601797) + WHEN 2 THEN round(0.9799471999 * max_time - 127.0334085369 * array_length(geomin,1) + 22707.9579721218) + ELSE 10000 + END INTO cell_count; + + -- we don't have iterative barycentric interpolation in CDB_interpolation, + -- and it's a costy function, so let's make a custom one here till + -- we update the code + -- tin := ARRAY[]::geometry[]; + IF intmethod=1 THEN + WITH + a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b) + SELECT array_agg(v) INTO tin FROM c; + END IF; + -- Delaunay stuff performed just ONCE!! + + -- magic + RETURN QUERY + WITH + convexhull as ( + SELECT + ST_ConvexHull(ST_Collect(geomin)) as g, + buffer * |/ st_area(ST_ConvexHull(ST_Collect(geomin)))/PI() as r + ), + envelope as ( + SELECT + st_expand(a.g, a.r) as e + FROM convexhull a + ), + envelope3857 as( + SELECT + ST_Transform(e, 3857) as geom + FROM envelope + ), + resolution as( + SELECT + CASE WHEN resolution <= 0 THEN + round(|/ ( + ST_area(geom) / abs(cell_count) + )) + ELSE + resolution + END AS cell + FROM envelope3857 + ), + grid as( + SELECT + ST_Transform(cdb_crankshaft.CDB_RectangleGrid(e.geom, r.cell, r.cell), 4326) as geom + FROM envelope3857 e, resolution r + ), + interp as( + SELECT + geom, + CASE + WHEN intmethod=1 THEN cdb_crankshaft._interp_in_tin(geomin, colin, tin, ST_Centroid(geom)) + ELSE cdb_crankshaft.CDB_SpatialInterpolation(geomin, colin, ST_Centroid(geom), intmethod) + END as val + FROM grid + ), + classes as( + SELECT CASE + WHEN classmethod = 0 THEN + cdb_crankshaft.CDB_EqualIntervalBins(array_agg(val), steps) + WHEN classmethod = 1 THEN + cdb_crankshaft.CDB_HeadsTailsBins(array_agg(val), steps) + WHEN classmethod = 2 THEN + cdb_crankshaft.CDB_JenksBins(array_agg(val), steps) + ELSE + cdb_crankshaft.CDB_QuantileBins(array_agg(val), steps) + END as b + FROM interp + where val is not null + ), + classified as( + SELECT + i.*, + width_bucket(i.val, c.b) as bucket + FROM interp i left join classes c + ON 1=1 + ), + classified2 as( + SELECT + geom, + val, + CASE + WHEN bucket = steps THEN bucket - 1 + ELSE bucket + END as b + FROM classified + ), + final as( + SELECT + st_union(geom) as the_geom, + b as bin, + min(val) as min_value, + max(val) as max_value, + avg(val) as avg_value + FROM classified2 + GROUP BY bin + ) + SELECT + * + FROM final + where final.bin is not null + ; +END; +$$ language plpgsql; + + + +-- ===================================================================== +-- Interp in grid, so we can use barycentric with a precalculated tin (NNI) +-- ===================================================================== +CREATE OR REPLACE FUNCTION _interp_in_tin( + IN geomin geometry[], + IN colin numeric[], + IN tin geometry[], + IN point geometry + ) +RETURNS numeric AS +$$ +DECLARE + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + -- get the cell the point is within + WITH + a as (SELECT unnest(tin) as v), + b as (SELECT v FROM a WHERE ST_Within(point, v)) + SELECT v INTO g FROM b; + + -- if we're out of the data realm, + -- return null + IF g is null THEN + RETURN null; + END IF; + + -- vertex of the selected cell + WITH a AS ( + SELECT (ST_DumpPoints(g)).geom AS v + ) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + + WITH a AS(SELECT unnest(geomin) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + -- calc the areas + SELECT + ST_area(g), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), + ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg,1); + RETURN output; +END; +$$ +language plpgsql; +-- Function by Stuart Lynn for a simple interpolation of a value +-- from a polygon table over an arbitrary polygon +-- (weighted by the area proportion overlapped) +-- Aereal weighting is a very simple form of aereal interpolation. +-- +-- Parameters: +-- * geom a Polygon geometry which defines the area where a value will be +-- estimated as the area-weighted sum of a given table/column +-- * target_table_name table name of the table that provides the values +-- * target_column column name of the column that provides the values +-- * schema_name optional parameter to defina the schema the target table +-- belongs to, which is necessary if its not in the search_path. +-- Note that target_table_name should never include the schema in it. +-- Return value: +-- Aereal-weighted interpolation of the column values over the geometry +CREATE OR REPLACE +FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL) + RETURNS numeric AS +$$ +DECLARE + result numeric; + qualified_name text; +BEGIN + IF schema_name IS NULL THEN + qualified_name := Format('%I', target_table_name); + ELSE + qualified_name := Format('%I.%s', schema_name, target_table_name); + END IF; + EXECUTE Format(' + SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom)) + FROM %s AS a + WHERE $1 && a.the_geom + ', target_column, qualified_name) + USING geom + INTO result; + RETURN result; +END; +$$ LANGUAGE plpgsql; +-- +-- Creates N points randomly distributed arround the polygon +-- +-- @param g - the geometry to be turned in to points +-- +-- @param no_points - the number of points to generate +-- +-- @params max_iter_per_point - the function generates points in the polygon's bounding box +-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many +-- misses per point the funciton accepts before giving up. +-- +-- Returns: Multipoint with the requested points +CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000) +RETURNS GEOMETRY AS $$ +DECLARE + extent GEOMETRY; + test_point Geometry; + width NUMERIC; + height NUMERIC; + x0 NUMERIC; + y0 NUMERIC; + xp NUMERIC; + yp NUMERIC; + no_left INTEGER; + remaining_iterations INTEGER; + points GEOMETRY[]; + bbox_line GEOMETRY; + intersection_line GEOMETRY; +BEGIN + extent := ST_Envelope(geom); + width := ST_XMax(extent) - ST_XMIN(extent); + height := ST_YMax(extent) - ST_YMIN(extent); + x0 := ST_XMin(extent); + y0 := ST_YMin(extent); + no_left := no_points; + + LOOP + if(no_left=0) THEN + EXIT; + END IF; + yp = y0 + height*random(); + bbox_line = ST_MakeLine( + ST_SetSRID(ST_MakePoint(yp, x0),4326), + ST_SetSRID(ST_MakePoint(yp, x0+width),4326) + ); + intersection_line = ST_Intersection(bbox_line,geom); + test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random()); + points := points || test_point; + no_left = no_left - 1 ; + END LOOP; + RETURN ST_Collect(points); +END; +$$ +LANGUAGE plpgsql VOLATILE; +-- Make sure by default there are no permissions for publicuser +-- NOTE: this happens at extension creation time, as part of an implicit transaction. +-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE; + +-- Grant permissions on the schema to publicuser (but just the schema) +GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser; + +-- Revoke execute permissions on all functions in the schema by default +-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser; +-- +-- Fill given extent with a rectangular coverage +-- +-- @param ext Extent to fill. Only rectangles with center point falling +-- inside the extent (or at the lower or leftmost edge) will +-- be emitted. The returned hexagons will have the same SRID +-- as this extent. +-- +-- @param width With of each rectangle +-- +-- @param height Height of each rectangle +-- +-- @param origin Optional origin to allow for exact tiling. +-- If omitted the origin will be 0,0. +-- The parameter is checked for having the same SRID +-- as the extent. +-- +-- +CREATE OR REPLACE FUNCTION CDB_RectangleGrid(ext GEOMETRY, width FLOAT8, height FLOAT8, origin GEOMETRY DEFAULT NULL) +RETURNS SETOF GEOMETRY +AS $$ +DECLARE + h GEOMETRY; -- rectangle cell + hstep FLOAT8; -- horizontal step + vstep FLOAT8; -- vertical step + hw FLOAT8; -- half width + hh FLOAT8; -- half height + vstart FLOAT8; + hstart FLOAT8; + hend FLOAT8; + vend FLOAT8; + xoff FLOAT8; + yoff FLOAT8; + xgrd FLOAT8; + ygrd FLOAT8; + x FLOAT8; + y FLOAT8; + srid INTEGER; +BEGIN + + srid := ST_SRID(ext); + + xoff := 0; + yoff := 0; + + IF origin IS NOT NULL THEN + IF ST_SRID(origin) != srid THEN + RAISE EXCEPTION 'SRID mismatch between extent (%) and origin (%)', srid, ST_SRID(origin); + END IF; + xoff := ST_X(origin); + yoff := ST_Y(origin); + END IF; + + --RAISE DEBUG 'X offset: %', xoff; + --RAISE DEBUG 'Y offset: %', yoff; + + hw := width/2.0; + hh := height/2.0; + + xgrd := hw; + ygrd := hh; + --RAISE DEBUG 'X grid size: %', xgrd; + --RAISE DEBUG 'Y grid size: %', ygrd; + + hstep := width; + vstep := height; + + -- Tweak horizontal start on hstep grid from origin + hstart := xoff + ceil((ST_XMin(ext)-xoff)/hstep)*hstep; + --RAISE DEBUG 'hstart: %', hstart; + + -- Tweak vertical start on vstep grid from origin + vstart := yoff + ceil((ST_Ymin(ext)-yoff)/vstep)*vstep; + --RAISE DEBUG 'vstart: %', vstart; + + hend := ST_XMax(ext); + vend := ST_YMax(ext); + + --RAISE DEBUG 'hend: %', hend; + --RAISE DEBUG 'vend: %', vend; + + x := hstart; + WHILE x < hend LOOP -- over X + y := vstart; + h := ST_MakeEnvelope(x-hw, y-hh, x+hw, y+hh, srid); + WHILE y < vend LOOP -- over Y + RETURN NEXT h; + h := ST_Translate(h, 0, vstep); + y := yoff + round(((y + vstep)-yoff)/ygrd)*ygrd; -- round to grid + END LOOP; + x := xoff + round(((x + hstep)-xoff)/xgrd)*xgrd; -- round to grid + END LOOP; + + RETURN; +END +$$ LANGUAGE 'plpgsql' IMMUTABLE; + +-- +-- Calculate the equal interval bins for a given column +-- +-- @param in_array A numeric array of numbers to determine the best +-- to determine the bin boundary +-- +-- @param breaks The number of bins you want to find. +-- +-- +-- Returns: upper edges of bins +-- +-- + +CREATE OR REPLACE FUNCTION CDB_EqualIntervalBins ( in_array NUMERIC[], breaks INT ) RETURNS NUMERIC[] as $$ +DECLARE + diff numeric; + min_val numeric; + max_val numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + SELECT min(e), max(e) INTO min_val, max_val FROM ( SELECT unnest(in_array) e ) x WHERE e IS NOT NULL; + diff = (max_val - min_val) / breaks::numeric; + LOOP + IF i < breaks THEN + tmp_val = min_val + i::numeric * diff; + reply = array_append(reply, tmp_val); + i := i+1; + ELSE + reply = array_append(reply, max_val); + EXIT; + END IF; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Heads/Tails classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Heads/Tails method. +-- +-- @param breaks The number of bins you want to find. +-- +-- + +CREATE OR REPLACE FUNCTION CDB_HeadsTailsBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean numeric; + i INT := 2; + reply numeric[]; +BEGIN + -- get the total size of our row + element_count := array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + reply = Array[arr_mean]; + -- slice our bread + LOOP + IF i > breaks THEN EXIT; END IF; + SELECT avg(e) INTO arr_mean FROM ( SELECT unnest(in_array) e) x WHERE e > reply[i-1]; + IF arr_mean IS NOT NULL THEN + reply = array_append(reply, arr_mean); + END IF; + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; + +-- +-- Determine the Jenks classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Jenks method. +-- +-- @param breaks The number of bins you want to find. +-- +-- @param iterations The number of different starting positions to test. +-- +-- @param invert Optional wheter to return the top of each bin (default) +-- or the bottom. BOOLEAN, default=FALSE. +-- +-- + + +CREATE OR REPLACE FUNCTION CDB_JenksBins ( in_array NUMERIC[], breaks INT, iterations INT DEFAULT 5, invert BOOLEAN DEFAULT FALSE) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + arr_mean NUMERIC; + bot INT; + top INT; + tops INT[]; + classes INT[][]; + i INT := 1; j INT := 1; + curr_result NUMERIC[]; + best_result NUMERIC[]; + seedtarget TEXT; + quant NUMERIC[]; + shuffles INT; +BEGIN + -- get the total size of our row + element_count := array_length(in_array, 1); --array_upper(in_array, 1) - array_lower(in_array, 1); + -- ensure the ordering of in_array + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e) x; + -- stop if no rows + IF element_count IS NULL THEN + RETURN NULL; + END IF; + -- stop if our breaks are more than our input array size + IF element_count < breaks THEN + RETURN in_array; + END IF; + + shuffles := LEAST(GREATEST(floor(2500000.0/(element_count::float*iterations::float)), 1), 750)::int; + -- get our mean value + SELECT avg(v) INTO arr_mean FROM ( SELECT unnest(in_array) as v ) x; + + -- assume best is actually Quantile + SELECT cdb_crankshaft.CDB_QuantileBins(in_array, breaks) INTO quant; + + -- if data is very very large, just return quant and be done + IF element_count > 5000000 THEN + RETURN quant; + END IF; + + -- change quant into bottom, top markers + LOOP + IF i = 1 THEN + bot = 1; + ELSE + -- use last top to find this bot + bot = top+1; + END IF; + IF i = breaks THEN + top = element_count; + ELSE + SELECT count(*) INTO top FROM ( SELECT unnest(in_array) as v) x WHERE v <= quant[i]; + END IF; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + IF i > breaks THEN EXIT; END IF; + i = i+1; + END LOOP; + + best_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + --set the seed so we can ensure the same results + SELECT setseed(0.4567) INTO seedtarget; + --loop through random starting positions + LOOP + IF j > iterations-1 THEN EXIT; END IF; + i = 1; + tops = ARRAY[element_count]; + LOOP + IF i = breaks THEN EXIT; END IF; + SELECT array_agg(distinct e) INTO tops FROM (SELECT unnest(array_cat(tops, ARRAY[floor(random()*element_count::float)::int])) as e ORDER BY e) x WHERE e != 1; + i = array_length(tops, 1); + END LOOP; + i = 1; + LOOP + IF i > breaks THEN EXIT; END IF; + IF i = 1 THEN + bot = 1; + ELSE + bot = top+1; + END IF; + top = tops[i]; + IF i = 1 THEN + classes = ARRAY[ARRAY[bot,top]]; + ELSE + classes = ARRAY_CAT(classes,ARRAY[bot,top]); + END IF; + i := i+1; + END LOOP; + curr_result = cdb_crankshaft.CDB_JenksBinsIteration( in_array, breaks, classes, invert, element_count, arr_mean, shuffles); + + IF curr_result[1] > best_result[1] THEN + best_result = curr_result; + j = j-1; -- if we found a better result, add one more search + END IF; + j = j+1; + END LOOP; + + RETURN (best_result)[2:array_upper(best_result, 1)]; +END; +$$ language plpgsql IMMUTABLE; + + + +-- +-- Perform a single iteration of the Jenks classification +-- + +CREATE OR REPLACE FUNCTION CDB_JenksBinsIteration ( in_array NUMERIC[], breaks INT, classes INT[][], invert BOOLEAN, element_count INT4, arr_mean NUMERIC, max_search INT DEFAULT 50) RETURNS NUMERIC[] as $$ +DECLARE + tmp_val numeric; + new_classes int[][]; + tmp_class int[]; + i INT := 1; + j INT := 1; + side INT := 2; + sdam numeric; + gvf numeric := 0.0; + new_gvf numeric; + arr_gvf numeric[]; + class_avg numeric; + class_max_i INT; + class_min_i INT; + class_max numeric; + class_min numeric; + reply numeric[]; +BEGIN + + -- Calculate the sum of squared deviations from the array mean (SDAM). + SELECT sum((arr_mean - e)^2) INTO sdam FROM ( SELECT unnest(in_array) as e ) x; + --Identify the breaks for the lowest GVF + LOOP + i = 1; + LOOP + -- get our mean + SELECT avg(e) INTO class_avg FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e) x; + -- find the deviation + SELECT sum((class_avg-e)^2) INTO tmp_val FROM ( SELECT unnest(in_array[classes[i][1]:classes[i][2]]) as e ) x; + IF i = 1 THEN + arr_gvf = ARRAY[tmp_val]; + -- init our min/max map for later + class_max = arr_gvf[i]; + class_min = arr_gvf[i]; + class_min_i = 1; + class_max_i = 1; + ELSE + arr_gvf = array_append(arr_gvf, tmp_val); + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + -- calculate our new GVF + SELECT sdam-sum(e) INTO new_gvf FROM ( SELECT unnest(arr_gvf) as e ) x; + -- if no improvement was made, exit + IF new_gvf < gvf THEN EXIT; END IF; + gvf = new_gvf; + IF j > max_search THEN EXIT; END IF; + j = j+1; + i = 1; + LOOP + --establish directionality (uppward through classes or downward) + IF arr_gvf[i] < class_min THEN + class_min = arr_gvf[i]; + class_min_i = i; + END IF; + IF arr_gvf[i] > class_max THEN + class_max = arr_gvf[i]; + class_max_i = i; + END IF; + i := i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + IF class_max_i > class_min_i THEN + class_min_i = class_max_i - 1; + ELSE + class_min_i = class_max_i + 1; + END IF; + --Move from higher class to a lower gid order + IF class_max_i > class_min_i THEN + classes[class_max_i][1] = classes[class_max_i][1] + 1; + classes[class_min_i][2] = classes[class_min_i][2] + 1; + ELSE -- Move from lower class UP into a higher class by gid + classes[class_max_i][2] = classes[class_max_i][2] - 1; + classes[class_min_i][1] = classes[class_min_i][1] - 1; + END IF; + END LOOP; + + i = 1; + LOOP + IF invert = TRUE THEN + side = 1; --default returns bottom side of breaks, invert returns top side + END IF; + reply = array_append(reply, in_array[classes[i][side]]); + i = i+1; + IF i > breaks THEN EXIT; END IF; + END LOOP; + + RETURN array_prepend(gvf, reply); + +END; +$$ language plpgsql IMMUTABLE; + + +-- +-- Determine the Quantile classifications from a numeric array +-- +-- @param in_array A numeric array of numbers to determine the best +-- bins based on the Quantile method. +-- +-- @param breaks The number of bins you want to find. +-- +-- +CREATE OR REPLACE FUNCTION CDB_QuantileBins ( in_array NUMERIC[], breaks INT) RETURNS NUMERIC[] as $$ +DECLARE + element_count INT4; + break_size numeric; + tmp_val numeric; + i INT := 1; + reply numeric[]; +BEGIN + -- sort our values + SELECT array_agg(e) INTO in_array FROM (SELECT unnest(in_array) e ORDER BY e ASC) x; + -- get the total size of our data + element_count := array_length(in_array, 1); + break_size := element_count::numeric / breaks; + -- slice our bread + LOOP + IF i < breaks THEN + IF break_size * i % 1 > 0 THEN + SELECT e INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 1 OFFSET ceil(break_size * i) - 1) x; + ELSE + SELECT avg(e) INTO tmp_val FROM ( SELECT unnest(in_array) e LIMIT 2 OFFSET ceil(break_size * i) - 1 ) x; + END IF; + ELSIF i = breaks THEN + -- select the last value + SELECT max(e) INTO tmp_val FROM ( SELECT unnest(in_array) e ) x; + ELSE + EXIT; + END IF; + + reply = array_append(reply, tmp_val); + i := i+1; + END LOOP; + RETURN reply; +END; +$$ language plpgsql IMMUTABLE; diff --git a/release/crankshaft.control b/release/crankshaft.control index b3992a6..1e02d92 100644 --- a/release/crankshaft.control +++ b/release/crankshaft.control @@ -1,5 +1,5 @@ comment = 'CartoDB Spatial Analysis extension' -default_version = '0.4.1' +default_version = '0.4.2' requires = 'plpythonu, postgis' superuser = true schema = cdb_crankshaft diff --git a/release/python/0.4.2/crankshaft/crankshaft/__init__.py b/release/python/0.4.2/crankshaft/crankshaft/__init__.py new file mode 100644 index 0000000..4e06bc5 --- /dev/null +++ b/release/python/0.4.2/crankshaft/crankshaft/__init__.py @@ -0,0 +1,5 @@ +"""Import all modules""" +import crankshaft.random_seeds +import crankshaft.clustering +import crankshaft.space_time_dynamics +import crankshaft.segmentation diff --git a/release/python/0.4.2/crankshaft/crankshaft/clustering/__init__.py b/release/python/0.4.2/crankshaft/crankshaft/clustering/__init__.py new file mode 100644 index 0000000..ed34fe0 --- /dev/null +++ b/release/python/0.4.2/crankshaft/crankshaft/clustering/__init__.py @@ -0,0 +1,3 @@ +"""Import all functions from for clustering""" +from moran import * +from kmeans import * diff --git a/release/python/0.4.2/crankshaft/crankshaft/clustering/kmeans.py b/release/python/0.4.2/crankshaft/crankshaft/clustering/kmeans.py new file mode 100644 index 0000000..4134062 --- /dev/null +++ b/release/python/0.4.2/crankshaft/crankshaft/clustering/kmeans.py @@ -0,0 +1,18 @@ +from sklearn.cluster import KMeans +import plpy + +def kmeans(query, no_clusters, no_init=20): + data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids, + array_agg(ST_X(the_geom) order by cartodb_id) xs, + array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a + where the_geom is not null + '''.format(query=query)) + + xs = data[0]['xs'] + ys = data[0]['ys'] + ids = data[0]['ids'] + + km = KMeans(n_clusters= no_clusters, n_init=no_init) + labels = km.fit_predict(zip(xs,ys)) + return zip(ids,labels) + diff --git a/release/python/0.4.2/crankshaft/crankshaft/clustering/moran.py b/release/python/0.4.2/crankshaft/crankshaft/clustering/moran.py new file mode 100644 index 0000000..4e7086e --- /dev/null +++ b/release/python/0.4.2/crankshaft/crankshaft/clustering/moran.py @@ -0,0 +1,250 @@ +""" +Moran's I geostatistics (global clustering & outliers presence) +""" + +# TODO: Fill in local neighbors which have null/NoneType values with the +# average of the their neighborhood + +import pysal as ps +import plpy +from collections import OrderedDict + +# crankshaft module +import crankshaft.pysal_utils as pu + +# High level interface --------------------------------------- + + +def moran(subquery, attr_name, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I (global) + Implementation building neighbors with a PostGIS database and Moran's I + core clusters with PySAL. + Andy Eschbacher + """ + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr_name), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(2) + except plpy.SPIError, e: + plpy.error('Analysis failed: %s' % e) + return pu.empty_zipped_array(2) + + # collect attributes + attr_vals = pu.get_attributes(result) + + # calculate weights + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate moran global + moran_global = ps.esda.moran.Moran(attr_vals, weight, + permutations=permutations) + + return zip([moran_global.I], [moran_global.EI]) + + +def moran_local(subquery, attr, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I implementation for PL/Python + Andy Eschbacher + """ + + # geometries with attributes that are null are ignored + # resulting in a collection of not as near neighbors + + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(5) + except plpy.SPIError, e: + plpy.error('Analysis failed: %s' % e) + return pu.empty_zipped_array(5) + + attr_vals = pu.get_attributes(result) + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local(attr_vals, weight, + permutations=permutations) + + # find quadrants for each geometry + quads = quad_position(lisa.q) + + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + + +def moran_rate(subquery, numerator, denominator, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I Rate (global) + Andy Eschbacher + """ + qvals = OrderedDict([("id_col", id_col), + ("attr1", numerator), + ("attr2", denominator) + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(2) + except plpy.SPIError, e: + plpy.error('Analysis failed: %s' % e) + return pu.empty_zipped_array(2) + + # collect attributes + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) + + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate moran global rate + lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight, + permutations=permutations) + + return zip([lisa_rate.I], [lisa_rate.EI]) + + +def moran_local_rate(subquery, numerator, denominator, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I Local Rate + Andy Eschbacher + """ + # geometries with values that are null are ignored + # resulting in a collection of not as near neighbors + + qvals = OrderedDict([("id_col", id_col), + ("numerator", numerator), + ("denominator", denominator), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(5) + except plpy.SPIError, e: + plpy.error('Analysis failed: %s' % e) + return pu.empty_zipped_array(5) + + # collect attributes + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) + + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight, + permutations=permutations) + + # find quadrants for each geometry + quads = quad_position(lisa.q) + + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + + +def moran_local_bv(subquery, attr1, attr2, + permutations, geom_col, id_col, w_type, num_ngbrs): + """ + Moran's I (local) Bivariate (untested) + """ + + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr1), + ("attr2", attr2), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(4) + except plpy.SPIError: + plpy.error("Error: areas of interest query failed, " + "check input parameters") + return pu.empty_zipped_array(4) + + # collect attributes + attr1_vals = pu.get_attributes(result, 1) + attr2_vals = pu.get_attributes(result, 2) + + # create weights + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight, + permutations=permutations) + + # find clustering of significance + lisa_sig = quad_position(lisa.q) + + return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order) + +# Low level functions ---------------------------------------- + + +def map_quads(coord): + """ + Map a quadrant number to Moran's I designation + HH=1, LH=2, LL=3, HL=4 + Input: + @param coord (int): quadrant of a specific measurement + Output: + classification (one of 'HH', 'LH', 'LL', or 'HL') + """ + if coord == 1: + return 'HH' + elif coord == 2: + return 'LH' + elif coord == 3: + return 'LL' + elif coord == 4: + return 'HL' + else: + return None + + +def quad_position(quads): + """ + Produce Moran's I classification based of n + Input: + @param quads ndarray: an array of quads classified by + 1-4 (PySAL default) + Output: + @param list: an array of quads classied by 'HH', 'LL', etc. + """ + return [map_quads(q) for q in quads] diff --git a/release/python/0.4.2/crankshaft/crankshaft/pysal_utils/__init__.py b/release/python/0.4.2/crankshaft/crankshaft/pysal_utils/__init__.py new file mode 100644 index 0000000..fdf073b --- /dev/null +++ b/release/python/0.4.2/crankshaft/crankshaft/pysal_utils/__init__.py @@ -0,0 +1,2 @@ +"""Import all functions for pysal_utils""" +from crankshaft.pysal_utils.pysal_utils import * diff --git a/release/python/0.4.2/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/release/python/0.4.2/crankshaft/crankshaft/pysal_utils/pysal_utils.py new file mode 100644 index 0000000..6dbcbd8 --- /dev/null +++ b/release/python/0.4.2/crankshaft/crankshaft/pysal_utils/pysal_utils.py @@ -0,0 +1,201 @@ +""" + Utilities module for generic PySAL functionality, mainly centered on + translating queries into numpy arrays or PySAL weights objects +""" + +import numpy as np +import pysal as ps + + +def construct_neighbor_query(w_type, query_vals): + """Return query (a string) used for finding neighbors + @param w_type text: type of neighbors to calculate ('knn' or 'queen') + @param query_vals dict: values used to construct the query + """ + + if w_type.lower() == 'knn': + return knn(query_vals) + else: + return queen(query_vals) + + +# Build weight object +def get_weight(query_res, w_type='knn', num_ngbrs=5): + """ + Construct PySAL weight from return value of query + @param query_res dict-like: query results with attributes and neighbors + """ + # if w_type.lower() == 'knn': + # row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs + # weights = {x['id']: row_normed_weights for x in query_res} + # else: + # weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors']) + # if len(x['neighbors']) > 0 + # else [] for x in query_res} + + neighbors = {x['id']: x['neighbors'] for x in query_res} + print 'len of neighbors: %d' % len(neighbors) + + built_weight = ps.W(neighbors) + built_weight.transform = 'r' + + return built_weight + + +def query_attr_select(params): + """ + Create portion of SELECT statement for attributes inolved in query. + @param params: dict of information used in query (column names, + table name, etc.) + """ + + attr_string = "" + template = "i.\"%(col)s\"::numeric As attr%(alias_num)s, " + + if 'time_cols' in params: + # if markov analysis + attrs = params['time_cols'] + + for idx, val in enumerate(attrs): + attr_string += template % {"col": val, "alias_num": idx + 1} + else: + # if moran's analysis + attrs = [k for k in params + if k not in ('id_col', 'geom_col', 'subquery', + 'num_ngbrs', 'subquery')] + + for idx, val in enumerate(sorted(attrs)): + attr_string += template % {"col": params[val], + "alias_num": idx + 1} + + return attr_string + + +def query_attr_where(params): + """ + Construct where conditions when building neighbors query + Create portion of WHERE clauses for weeding out NULL-valued geometries + Input: dict of params: + {'subquery': ..., + 'numerator': 'data1', + 'denominator': 'data2', + '': ...} + Output: 'idx_replace."data1" IS NOT NULL AND idx_replace."data2" + IS NOT NULL' + Input: + {'subquery': ..., + 'time_cols': ['time1', 'time2', 'time3'], + 'etc': ...} + Output: 'idx_replace."time1" IS NOT NULL AND idx_replace."time2" IS NOT + NULL AND idx_replace."time3" IS NOT NULL' + """ + attr_string = [] + template = "idx_replace.\"%s\" IS NOT NULL" + + if 'time_cols' in params: + # markov where clauses + attrs = params['time_cols'] + # add values to template + for attr in attrs: + attr_string.append(template % attr) + else: + # moran where clauses + + # get keys + attrs = sorted([k for k in params + if k not in ('id_col', 'geom_col', 'subquery', + 'num_ngbrs', 'subquery')]) + # add values to template + for attr in attrs: + attr_string.append(template % params[attr]) + + if len(attrs) == 2: + attr_string.append("idx_replace.\"%s\" <> 0" % params[attrs[1]]) + + out = " AND ".join(attr_string) + + return out + + +def knn(params): + """SQL query for k-nearest neighbors. + @param vars: dict of values to fill template + """ + + attr_select = query_attr_select(params) + attr_where = query_attr_where(params) + + replacements = {"attr_select": attr_select, + "attr_where_i": attr_where.replace("idx_replace", "i"), + "attr_where_j": attr_where.replace("idx_replace", "j")} + + query = "SELECT " \ + "i.\"{id_col}\" As id, " \ + "%(attr_select)s" \ + "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ + "FROM ({subquery}) As j " \ + "WHERE " \ + "i.\"{id_col}\" <> j.\"{id_col}\" AND " \ + "%(attr_where_j)s " \ + "ORDER BY " \ + "j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \ + "LIMIT {num_ngbrs})" \ + ") As neighbors " \ + "FROM ({subquery}) As i " \ + "WHERE " \ + "%(attr_where_i)s " \ + "ORDER BY i.\"{id_col}\" ASC;" % replacements + + return query.format(**params) + + +# SQL query for finding queens neighbors (all contiguous polygons) +def queen(params): + """SQL query for queen neighbors. + @param params dict: information to fill query + """ + attr_select = query_attr_select(params) + attr_where = query_attr_where(params) + + replacements = {"attr_select": attr_select, + "attr_where_i": attr_where.replace("idx_replace", "i"), + "attr_where_j": attr_where.replace("idx_replace", "j")} + + query = "SELECT " \ + "i.\"{id_col}\" As id, " \ + "%(attr_select)s" \ + "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ + "FROM ({subquery}) As j " \ + "WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \ + "ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \ + "%(attr_where_j)s)" \ + ") As neighbors " \ + "FROM ({subquery}) As i " \ + "WHERE " \ + "%(attr_where_i)s " \ + "ORDER BY i.\"{id_col}\" ASC;" % replacements + + return query.format(**params) + +# to add more weight methods open a ticket or pull request + + +def get_attributes(query_res, attr_num=1): + """ + @param query_res: query results with attributes and neighbors + @param attr_num: attribute number (1, 2, ...) + """ + return np.array([x['attr' + str(attr_num)] for x in query_res], + dtype=np.float) + + +def empty_zipped_array(num_nones): + """ + prepare return values for cases of empty weights objects (no neighbors) + Input: + @param num_nones int: number of columns (e.g., 4) + Output: + [(None, None, None, None)] + """ + + return [tuple([None] * num_nones)] diff --git a/release/python/0.4.2/crankshaft/crankshaft/random_seeds.py b/release/python/0.4.2/crankshaft/crankshaft/random_seeds.py new file mode 100644 index 0000000..31958cb --- /dev/null +++ b/release/python/0.4.2/crankshaft/crankshaft/random_seeds.py @@ -0,0 +1,11 @@ +"""Random seed generator used for non-deterministic functions in crankshaft""" +import random +import numpy + +def set_random_seeds(value): + """ + Set the seeds of the RNGs (Random Number Generators) + used internally. + """ + random.seed(value) + numpy.random.seed(value) diff --git a/release/python/0.4.2/crankshaft/crankshaft/segmentation/__init__.py b/release/python/0.4.2/crankshaft/crankshaft/segmentation/__init__.py new file mode 100644 index 0000000..b825e85 --- /dev/null +++ b/release/python/0.4.2/crankshaft/crankshaft/segmentation/__init__.py @@ -0,0 +1 @@ +from segmentation import * diff --git a/release/python/0.4.2/crankshaft/crankshaft/segmentation/segmentation.py b/release/python/0.4.2/crankshaft/crankshaft/segmentation/segmentation.py new file mode 100644 index 0000000..ed61139 --- /dev/null +++ b/release/python/0.4.2/crankshaft/crankshaft/segmentation/segmentation.py @@ -0,0 +1,176 @@ +""" +Segmentation creation and prediction +""" + +import sklearn +import numpy as np +import plpy +from sklearn.ensemble import GradientBoostingRegressor +from sklearn import metrics +from sklearn.cross_validation import train_test_split + +# Lower level functions +#---------------------- + +def replace_nan_with_mean(array): + """ + Input: + @param array: an array of floats which may have null-valued entries + Output: + array with nans filled in with the mean of the dataset + """ + # returns an array of rows and column indices + indices = np.where(np.isnan(array)) + + # iterate through entries which have nan values + for row, col in zip(*indices): + array[row, col] = np.mean(array[~np.isnan(array[:, col]), col]) + + return array + +def get_data(variable, feature_columns, query): + """ + Fetch data from the database, clean, and package into + numpy arrays + Input: + @param variable: name of the target variable + @param feature_columns: list of column names + @param query: subquery that data is pulled from for the packaging + Output: + prepared data, packaged into NumPy arrays + """ + + columns = ','.join(['array_agg("{col}") As "{col}"'.format(col=col) for col in feature_columns]) + + try: + data = plpy.execute('''SELECT array_agg("{variable}") As target, {columns} FROM ({query}) As a'''.format( + variable=variable, + columns=columns, + query=query)) + except Exception, e: + plpy.error('Failed to access data to build segmentation model: %s' % e) + + # extract target data from plpy object + target = np.array(data[0]['target']) + + # put n feature data arrays into an n x m array of arrays + features = np.column_stack([np.array(data[0][col], dtype=float) for col in feature_columns]) + + return replace_nan_with_mean(target), replace_nan_with_mean(features) + +# High level interface +# -------------------- + +def create_and_predict_segment_agg(target, features, target_features, target_ids, model_parameters): + """ + Version of create_and_predict_segment that works on arrays that come stright form the SQL calling + the function. + + Input: + @param target: The 1D array of lenth NSamples containing the target variable we want the model to predict + @param features: Thw 2D array of size NSamples * NFeatures that form the imput to the model + @param target_ids: A 1D array of target_ids that will be used to associate the results of the prediction with the rows which they come from + @param model_parameters: A dictionary containing parameters for the model. + """ + + clean_target = replace_nan_with_mean(target) + clean_features = replace_nan_with_mean(features) + target_features = replace_nan_with_mean(target_features) + + model, accuracy = train_model(clean_target, clean_features, model_parameters, 0.2) + prediction = model.predict(target_features) + accuracy_array = [accuracy]*prediction.shape[0] + return zip(target_ids, prediction, np.full(prediction.shape, accuracy_array)) + + + +def create_and_predict_segment(query, variable, target_query, model_params): + """ + generate a segment with machine learning + Stuart Lynn + """ + + ## fetch column names + try: + columns = plpy.execute('SELECT * FROM ({query}) As a LIMIT 1 '.format(query=query))[0].keys() + except Exception, e: + plpy.error('Failed to build segmentation model: %s' % e) + + ## extract column names to be used in building the segmentation model + feature_columns = set(columns) - set([variable, 'cartodb_id', 'the_geom', 'the_geom_webmercator']) + ## get data from database + target, features = get_data(variable, feature_columns, query) + + model, accuracy = train_model(target, features, model_params, 0.2) + cartodb_ids, result = predict_segment(model, feature_columns, target_query) + accuracy_array = [accuracy]*result.shape[0] + return zip(cartodb_ids, result, accuracy_array) + + +def train_model(target, features, model_params, test_split): + """ + Train the Gradient Boosting model on the provided data and calculate the accuracy of the model + Input: + @param target: 1D Array of the variable that the model is to be trianed to predict + @param features: 2D Array NSamples * NFeatures to use in trining the model + @param model_params: A dictionary of model parameters, the full specification can be found on the + scikit learn page for [GradientBoostingRegressor](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html) + @parma test_split: The fraction of the data to be withheld for testing the model / calculating the accuray + """ + features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=test_split) + model = GradientBoostingRegressor(**model_params) + model.fit(features_train, target_train) + accuracy = calculate_model_accuracy(model, features, target) + return model, accuracy + +def calculate_model_accuracy(model, features, target): + """ + Calculate the mean squared error of the model prediction + Input: + @param model: model trained from input features + @param features: features to make a prediction from + @param target: target to compare prediction to + Output: + mean squared error of the model prection compared to the target + """ + prediction = model.predict(features) + return metrics.mean_squared_error(prediction, target) + +def predict_segment(model, features, target_query): + """ + Use the provided model to predict the values for the new feature set + Input: + @param model: The pretrained model + @features: A list of features to use in the model prediction (list of column names) + @target_query: The query to run to obtain the data to predict on and the cartdb_ids associated with it. + """ + + batch_size = 1000 + joined_features = ','.join(['"{0}"::numeric'.format(a) for a in features]) + + try: + cursor = plpy.cursor('SELECT Array[{joined_features}] As features FROM ({target_query}) As a'.format( + joined_features=joined_features, + target_query=target_query)) + except Exception, e: + plpy.error('Failed to build segmentation model: %s' % e) + + results = [] + + while True: + rows = cursor.fetch(batch_size) + if not rows: + break + batch = np.row_stack([np.array(row['features'], dtype=float) for row in rows]) + + #Need to fix this. Should be global mean. This will cause weird effects + batch = replace_nan_with_mean(batch) + prediction = model.predict(batch) + results.append(prediction) + + try: + cartodb_ids = plpy.execute('''SELECT array_agg(cartodb_id ORDER BY cartodb_id) As cartodb_ids FROM ({0}) As a'''.format(target_query))[0]['cartodb_ids'] + except Exception, e: + plpy.error('Failed to build segmentation model: %s' % e) + + return cartodb_ids, np.concatenate(results) diff --git a/release/python/0.4.2/crankshaft/crankshaft/space_time_dynamics/__init__.py b/release/python/0.4.2/crankshaft/crankshaft/space_time_dynamics/__init__.py new file mode 100644 index 0000000..a439286 --- /dev/null +++ b/release/python/0.4.2/crankshaft/crankshaft/space_time_dynamics/__init__.py @@ -0,0 +1,2 @@ +"""Import all functions from clustering libraries.""" +from markov import * diff --git a/release/python/0.4.2/crankshaft/crankshaft/space_time_dynamics/markov.py b/release/python/0.4.2/crankshaft/crankshaft/space_time_dynamics/markov.py new file mode 100644 index 0000000..ae788d7 --- /dev/null +++ b/release/python/0.4.2/crankshaft/crankshaft/space_time_dynamics/markov.py @@ -0,0 +1,189 @@ +""" +Spatial dynamics measurements using Spatial Markov +""" + + +import numpy as np +import pysal as ps +import plpy +import crankshaft.pysal_utils as pu + +def spatial_markov_trend(subquery, time_cols, num_classes=7, + w_type='knn', num_ngbrs=5, permutations=0, + geom_col='the_geom', id_col='cartodb_id'): + """ + Predict the trends of a unit based on: + 1. history of its transitions to different classes (e.g., 1st quantile -> 2nd quantile) + 2. average class of its neighbors + + Inputs: + @param subquery string: e.g., SELECT the_geom, cartodb_id, + interesting_time_column FROM table_name + @param time_cols list of strings: list of strings of column names + @param num_classes (optional): number of classes to break distribution + of values into. Currently uses quantile bins. + @param w_type string (optional): weight type ('knn' or 'queen') + @param num_ngbrs int (optional): number of neighbors (if knn type) + @param permutations int (optional): number of permutations for test + stats + @param geom_col string (optional): name of column which contains the + geometries + @param id_col string (optional): name of column which has the ids of + the table + + Outputs: + @param trend_up float: probablity that a geom will move to a higher + class + @param trend_down float: probablity that a geom will move to a lower + class + @param trend float: (trend_up - trend_down) / trend_static + @param volatility float: a measure of the volatility based on + probability stddev(prob array) + """ + + if len(time_cols) < 2: + plpy.error('More than one time column needs to be passed') + + qvals = {"id_col": id_col, + "time_cols": time_cols, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs} + + try: + query_result = plpy.execute( + pu.construct_neighbor_query(w_type, qvals) + ) + if len(query_result) == 0: + return zip([None], [None], [None], [None], [None]) + except plpy.SPIError, e: + plpy.debug('Query failed with exception %s: %s' % (err, pu.construct_neighbor_query(w_type, qvals))) + plpy.error('Analysis failed: %s' % e) + return zip([None], [None], [None], [None], [None]) + + ## build weight + weights = pu.get_weight(query_result, w_type) + weights.transform = 'r' + + ## prep time data + t_data = get_time_data(query_result, time_cols) + + plpy.debug('shape of t_data %d, %d' % t_data.shape) + plpy.debug('number of weight objects: %d, %d' % (weights.sparse).shape) + plpy.debug('first num elements: %f' % t_data[0, 0]) + + sp_markov_result = ps.Spatial_Markov(t_data, + weights, + k=num_classes, + fixed=False, + permutations=permutations) + + ## get lag classes + lag_classes = ps.Quantiles( + ps.lag_spatial(weights, t_data[:, -1]), + k=num_classes).yb + + ## look up probablity distribution for each unit according to class and lag class + prob_dist = get_prob_dist(sp_markov_result.P, + lag_classes, + sp_markov_result.classes[:, -1]) + + ## find the ups and down and overall distribution of each cell + trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, + sp_markov_result.classes[:, -1]) + + ## output the results + return zip(trend, trend_up, trend_down, volatility, weights.id_order) + +def get_time_data(markov_data, time_cols): + """ + Extract the time columns and bin appropriately + """ + num_attrs = len(time_cols) + return np.array([[x['attr' + str(i)] for x in markov_data] + for i in range(1, num_attrs+1)], dtype=float).transpose() + +## not currently used +def rebin_data(time_data, num_time_per_bin): + """ + Convert an n x l matrix into an (n/m) x l matrix where the values are + reduced (averaged) for the intervening states: + 1 2 3 4 1.5 3.5 + 5 6 7 8 -> 5.5 7.5 + 9 8 7 6 8.5 6.5 + 5 4 3 2 4.5 2.5 + + if m = 2, the 4 x 4 matrix is transformed to a 2 x 4 matrix. + + This process effectively resamples the data at a longer time span n + units longer than the input data. + For cases when there is a remainder (remainder(5/3) = 2), the remaining + two columns are binned together as the last time period, while the + first three are binned together for the first period. + + Input: + @param time_data n x l ndarray: measurements of an attribute at + different time intervals + @param num_time_per_bin int: number of columns to average into a new + column + Output: + ceil(n / m) x l ndarray of resampled time series + """ + + if time_data.shape[1] % num_time_per_bin == 0: + ## if fit is perfect, then use it + n_max = time_data.shape[1] / num_time_per_bin + else: + ## fit remainders into an additional column + n_max = time_data.shape[1] / num_time_per_bin + 1 + + return np.array([time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1) + for i in range(n_max)]).T + +def get_prob_dist(transition_matrix, lag_indices, unit_indices): + """ + Given an array of transition matrices, look up the probability + associated with the arrangements passed + + Input: + @param transition_matrix ndarray[k,k,k]: + @param lag_indices ndarray: + @param unit_indices ndarray: + + Output: + Array of probability distributions + """ + + return np.array([transition_matrix[(lag_indices[i], unit_indices[i])] + for i in range(len(lag_indices))]) + +def get_prob_stats(prob_dist, unit_indices): + """ + get the statistics of the probability distributions + + Outputs: + @param trend_up ndarray(float): sum of probabilities for upward + movement (relative to the unit index of that prob) + @param trend_down ndarray(float): sum of probabilities for downward + movement (relative to the unit index of that prob) + @param trend ndarray(float): difference of upward and downward + movements + """ + + num_elements = len(unit_indices) + trend_up = np.empty(num_elements, dtype=float) + trend_down = np.empty(num_elements, dtype=float) + trend = np.empty(num_elements, dtype=float) + + for i in range(num_elements): + trend_up[i] = prob_dist[i, (unit_indices[i]+1):].sum() + trend_down[i] = prob_dist[i, :unit_indices[i]].sum() + if prob_dist[i, unit_indices[i]] > 0.0: + trend[i] = (trend_up[i] - trend_down[i]) / prob_dist[i, unit_indices[i]] + else: + trend[i] = None + + ## calculate volatility of distribution + volatility = prob_dist.std(axis=1) + + return trend_up, trend_down, trend, volatility diff --git a/release/python/0.4.2/crankshaft/setup.py b/release/python/0.4.2/crankshaft/setup.py new file mode 100644 index 0000000..cd8ad99 --- /dev/null +++ b/release/python/0.4.2/crankshaft/setup.py @@ -0,0 +1,49 @@ + +""" +CartoDB Spatial Analysis Python Library +See: +https://github.com/CartoDB/crankshaft +""" + +from setuptools import setup, find_packages + +setup( + name='crankshaft', + + version='0.0.0', + + description='CartoDB Spatial Analysis Python Library', + + url='https://github.com/CartoDB/crankshaft', + + author='Data Services Team - CartoDB', + author_email='dataservices@cartodb.com', + + license='MIT', + + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Mapping comunity', + 'Topic :: Maps :: Mapping Tools', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 2.7', + ], + + keywords='maps mapping tools spatial analysis geostatistics', + + packages=find_packages(exclude=['contrib', 'docs', 'tests']), + + extras_require={ + 'dev': ['unittest'], + 'test': ['unittest', 'nose', 'mock'], + }, + + # The choice of component versions is dictated by what's + # provisioned in the production servers. + # IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation. + install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'], + + requires=['pysal', 'numpy', 'sklearn'], + + test_suite='test' +) diff --git a/release/python/0.4.2/crankshaft/setup.py-r b/release/python/0.4.2/crankshaft/setup.py-r new file mode 100644 index 0000000..cd8ad99 --- /dev/null +++ b/release/python/0.4.2/crankshaft/setup.py-r @@ -0,0 +1,49 @@ + +""" +CartoDB Spatial Analysis Python Library +See: +https://github.com/CartoDB/crankshaft +""" + +from setuptools import setup, find_packages + +setup( + name='crankshaft', + + version='0.0.0', + + description='CartoDB Spatial Analysis Python Library', + + url='https://github.com/CartoDB/crankshaft', + + author='Data Services Team - CartoDB', + author_email='dataservices@cartodb.com', + + license='MIT', + + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Mapping comunity', + 'Topic :: Maps :: Mapping Tools', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 2.7', + ], + + keywords='maps mapping tools spatial analysis geostatistics', + + packages=find_packages(exclude=['contrib', 'docs', 'tests']), + + extras_require={ + 'dev': ['unittest'], + 'test': ['unittest', 'nose', 'mock'], + }, + + # The choice of component versions is dictated by what's + # provisioned in the production servers. + # IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation. + install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'], + + requires=['pysal', 'numpy', 'sklearn'], + + test_suite='test' +) diff --git a/release/python/0.4.2/crankshaft/test/fixtures/kmeans.json b/release/python/0.4.2/crankshaft/test/fixtures/kmeans.json new file mode 100644 index 0000000..8f31c79 --- /dev/null +++ b/release/python/0.4.2/crankshaft/test/fixtures/kmeans.json @@ -0,0 +1 @@ +[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}] \ No newline at end of file diff --git a/release/python/0.4.2/crankshaft/test/fixtures/markov.json b/release/python/0.4.2/crankshaft/test/fixtures/markov.json new file mode 100644 index 0000000..d60e4e0 --- /dev/null +++ b/release/python/0.4.2/crankshaft/test/fixtures/markov.json @@ -0,0 +1 @@ +[[0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 0], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 1], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 2], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 3], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 4], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 5], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 6], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 7], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 8], [0.19047619047619049, 0.16, 0.0, 0.32594478059941379, 9], [-0.23529411764705882, 0.0, 0.19047619047619047, 0.31356338348865387, 10], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 11], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 12], [0.027777777777777783, 0.11111111111111112, 0.088888888888888892, 0.30339641183779581, 13], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 14], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 15], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 16], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 17], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 18], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 19], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 20], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 21], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 22], [-0.16666666666666663, 0.18181818181818182, 0.27272727272727271, 0.20246415864836445, 23], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 24], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 25], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 26], [-0.043478260869565216, 0.0, 0.041666666666666664, 0.37950991789118999, 27], [0.22222222222222221, 0.18181818181818182, 0.0, 0.31701083225750354, 28], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 29], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 30], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 31], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 32], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 33], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 34], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 35], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 36], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 37], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 38], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 39], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 40], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 41], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 42], [0.0, 0.0, 0.0, 0.40000000000000002, 43], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 44], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 45], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 46], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 47]] diff --git a/release/python/0.4.2/crankshaft/test/fixtures/moran.json b/release/python/0.4.2/crankshaft/test/fixtures/moran.json new file mode 100644 index 0000000..2f75cf1 --- /dev/null +++ b/release/python/0.4.2/crankshaft/test/fixtures/moran.json @@ -0,0 +1,52 @@ +[[0.9319096128346788, "HH"], +[-1.135787401862846, "HL"], +[0.11732030672508517, "LL"], +[0.6152779669180425, "LL"], +[-0.14657336660125297, "LH"], +[0.6967858120189607, "LL"], +[0.07949310115714454, "HH"], +[0.4703198759258987, "HH"], +[0.4421125200498064, "HH"], +[0.5724288737143592, "LL"], +[0.8970743435692062, "LL"], +[0.18327334401918674, "LL"], +[-0.01466729201304962, "HL"], +[0.3481559372544409, "LL"], +[0.06547094736902978, "LL"], +[0.15482141569329988, "HH"], +[0.4373841193538136, "HH"], +[0.15971286468915544, "LL"], +[1.0543588860308968, "HH"], +[1.7372866900020818, "HH"], +[1.091998586053999, "LL"], +[0.1171572584252222, "HH"], +[0.08438455015300014, "LL"], +[0.06547094736902978, "LL"], +[0.15482141569329985, "HH"], +[1.1627044812890683, "HH"], +[0.06547094736902978, "LL"], +[0.795275137550483, "HH"], +[0.18562939195219, "LL"], +[0.3010757406693439, "LL"], +[2.8205795942839376, "HH"], +[0.11259190602909264, "LL"], +[-0.07116352791516614, "HL"], +[-0.09945240794119009, "LH"], +[0.18562939195219, "LL"], +[0.1832733440191868, "LL"], +[-0.39054253768447705, "HL"], +[-0.1672071289487642, "HL"], +[0.3337669247916343, "HH"], +[0.2584386102554792, "HH"], +[-0.19733845476322634, "HL"], +[-0.9379282899805409, "LH"], +[-0.028770969951095866, "LH"], +[0.051367269430983485, "LL"], +[-0.2172548045913472, "LH"], +[0.05136726943098351, "LL"], +[0.04191046803899837, "LL"], +[0.7482357030403517, "HH"], +[-0.014585767863118111, "LH"], +[0.5410013139159929, "HH"], +[1.0223932668429925, "LL"], +[1.4179402898927476, "LL"]] \ No newline at end of file diff --git a/release/python/0.4.2/crankshaft/test/fixtures/neighbors.json b/release/python/0.4.2/crankshaft/test/fixtures/neighbors.json new file mode 100644 index 0000000..055b359 --- /dev/null +++ b/release/python/0.4.2/crankshaft/test/fixtures/neighbors.json @@ -0,0 +1,54 @@ +[ + {"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5}, + {"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7}, + {"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2}, + {"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1}, + {"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3}, + {"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05}, + {"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4}, + {"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7}, + {"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5}, + {"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04}, + {"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08}, + {"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2}, + {"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4}, + {"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2}, + {"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3}, + {"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4}, + {"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6}, + {"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3}, + {"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7}, + {"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8}, + {"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1}, + {"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4}, + {"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1}, + {"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3}, + {"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4}, + {"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6}, + {"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3}, + {"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8}, + {"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3}, + {"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1}, + {"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9}, + {"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3}, + {"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4}, + {"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3}, + {"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3}, + {"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2}, + {"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5}, + {"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4}, + {"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6}, + {"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5}, + {"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4}, + {"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2}, + {"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3}, + {"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2}, + {"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3}, + {"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2}, + {"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3}, + {"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5}, + {"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2}, + {"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6}, + {"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01}, + {"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01} + ] diff --git a/release/python/0.4.2/crankshaft/test/fixtures/neighbors_markov.json b/release/python/0.4.2/crankshaft/test/fixtures/neighbors_markov.json new file mode 100644 index 0000000..45a20e7 --- /dev/null +++ b/release/python/0.4.2/crankshaft/test/fixtures/neighbors_markov.json @@ -0,0 +1 @@ +[{"neighbors": [10, 7, 21, 23, 1], "y1995": 0.87654416055651474, "y1997": 0.85637566664752718, "y1996": 0.8631470006766887, "y1999": 0.84461540228037335, "y1998": 0.84811668329242784, "y2006": 0.86302631339545688, "y2007": 0.86148266513456728, "y2004": 0.86416611731111015, "y2005": 0.87119374831581786, "y2002": 0.85012592862683589, "y2003": 0.8550965633336135, "y2000": 0.83271652434603094, "y2001": 0.83786313566577242, "id": 0, "y2008": 0.86252252380501315, "y2009": 0.86746356478544273}, {"neighbors": [5, 7, 22, 29, 3], "y1995": 0.91889509774542122, "y1997": 0.92333257900976462, "y1996": 0.91757931190043385, "y1999": 0.92552387732371888, "y1998": 0.92517289327379471, "y2006": 0.91706053906277052, "y2007": 0.90139504820726424, "y2004": 0.89815175749309051, "y2005": 0.91832090781161113, "y2002": 0.89431990798552208, "y2003": 0.88924793576523797, "y2000": 0.90746978227271013, "y2001": 0.89830489127332913, "id": 1, "y2008": 0.87897455159080617, "y2009": 0.86216858051752643}, {"neighbors": [11, 8, 13, 18, 17], "y1995": 0.82591007476914713, "y1997": 0.81989792988843901, "y1996": 0.82548595539161707, "y1999": 0.81731522200916285, "y1998": 0.81503235035017918, "y2006": 0.81814804358939286, "y2007": 0.83675961003285626, "y2004": 0.82668195534569056, "y2005": 0.82373723764184559, "y2002": 0.80849979516360859, "y2003": 0.82258550658074148, "y2000": 0.78964559168205917, "y2001": 0.8058444152731008, "id": 2, "y2008": 0.8357419865626442, "y2009": 0.84647177436289112}, {"neighbors": [4, 14, 9, 5, 12], "y1995": 1.0908817638059434, "y1997": 1.0845641754849344, "y1996": 1.0853768890893893, "y1999": 1.098988414417104, "y1998": 1.0841540389418189, "y2006": 1.1316479722785828, "y2007": 1.1295850763954971, "y2004": 1.1139980568106316, "y2005": 1.1216802898290368, "y2002": 1.1116069731657288, "y2003": 1.1088862051501811, "y2000": 1.1450694824791507, "y2001": 1.1215113292620285, "id": 3, "y2008": 1.1137181812756343, "y2009": 1.0993677488645406}, {"neighbors": [14, 3, 9, 31, 12], "y1995": 1.1073144618319228, "y1997": 1.1328363804627946, "y1996": 1.1137394350312471, "y1999": 1.1591002514611153, "y1998": 1.144725587086376, "y2006": 1.1173646811350333, "y2007": 1.1086324218539598, "y2004": 1.1102496406140896, "y2005": 1.11943471361418, "y2002": 1.1475230282561595, "y2003": 1.1184328424005199, "y2000": 1.1689820101690329, "y2001": 1.1721248787169682, "id": 4, "y2008": 1.0964251552643696, "y2009": 1.0776233718455337}, {"neighbors": [29, 1, 22, 7, 4], "y1995": 1.422697571371182, "y1997": 1.4427350196405593, "y1996": 1.4211843379728528, "y1999": 1.4440068434166562, "y1998": 1.4357757095632602, "y2006": 1.4405276647793266, "y2007": 1.4524121586440921, "y2004": 1.4059372049179741, "y2005": 1.4078864636665769, "y2002": 1.4197822680667809, "y2003": 1.3909220829548647, "y2000": 1.4418473669388905, "y2001": 1.4478283203013527, "id": 5, "y2008": 1.4330609762040207, "y2009": 1.4174430982377491}, {"neighbors": [12, 47, 9, 25, 20], "y1995": 1.1307388498039153, "y1997": 1.1107470843142355, "y1996": 1.1311051255854685, "y1999": 1.130881491772973, "y1998": 1.1336463608751246, "y2006": 1.1088003408832796, "y2007": 1.0840170924825394, "y2004": 1.1244623853593112, "y2005": 1.1167100811401538, "y2002": 1.1306293052597198, "y2003": 1.1194498381213465, "y2000": 1.1088813841947593, "y2001": 1.1185662918783175, "id": 6, "y2008": 1.0695920556329086, "y2009": 1.0787522517402164}, {"neighbors": [21, 1, 22, 10, 0], "y1995": 1.0470612357366649, "y1997": 1.0425337165747406, "y1996": 1.0451683097376836, "y1999": 1.0207254480945218, "y1998": 1.0323998680588111, "y2006": 1.0405109962442973, "y2007": 1.0174964540280445, "y2004": 1.0140090547678748, "y2005": 1.0317674181861733, "y2002": 0.99669586934394627, "y2003": 0.99327675611171373, "y2000": 0.99854316295509526, "y2001": 0.98802579761429143, "id": 7, "y2008": 0.9936394033949828, "y2009": 0.98279746069218921}, {"neighbors": [11, 13, 17, 18, 15], "y1995": 0.98996985668705595, "y1997": 0.99491000469481983, "y1996": 1.0014356415938011, "y1999": 1.0045584503565237, "y1998": 1.0018840754492748, "y2006": 0.92232873520447411, "y2007": 0.91284090705064902, "y2004": 0.93694786512729977, "y2005": 0.94308212820743131, "y2002": 0.96834820215592055, "y2003": 0.95335147249088092, "y2000": 0.99127006477048718, "y2001": 0.97925917470464008, "id": 8, "y2008": 0.89689832627117483, "y2009": 0.88928857608264111}, {"neighbors": [12, 6, 4, 3, 14], "y1995": 0.87418390853652306, "y1997": 0.84425695187978567, "y1996": 0.86416601430334228, "y1999": 0.83903043942542854, "y1998": 0.8404493987171674, "y2006": 0.87204140839730271, "y2007": 0.86633032299764789, "y2004": 0.86981997840756087, "y2005": 0.86837929279319737, "y2002": 0.86107306112852877, "y2003": 0.85007719735663123, "y2000": 0.85787080050645603, "y2001": 0.86036185149249467, "id": 9, "y2008": 0.84946077011565357, "y2009": 0.83287145944123797}, {"neighbors": [0, 7, 21, 23, 22], "y1995": 1.1419611801631209, "y1997": 1.1489271154554144, "y1996": 1.146602624490825, "y1999": 1.1443662376135306, "y1998": 1.1490959392942743, "y2006": 1.1049125811637337, "y2007": 1.1105984164317646, "y2004": 1.1119989015058092, "y2005": 1.1025779214946556, "y2002": 1.1259666377127024, "y2003": 1.1221399558345004, "y2000": 1.144501826035474, "y2001": 1.1234975172649961, "id": 10, "y2008": 1.1050979494645479, "y2009": 1.1002009697391872}, {"neighbors": [8, 13, 18, 17, 2], "y1995": 0.97282462974938089, "y1997": 0.96252588061647382, "y1996": 0.96700147279313231, "y1999": 0.96057686787383312, "y1998": 0.96538780087103548, "y2006": 0.91010201260822066, "y2007": 0.89280392121658247, "y2004": 0.94103988614185807, "y2005": 0.9212251863828258, "y2002": 0.94804194711420009, "y2003": 0.9543028555845573, "y2000": 0.95831051250950716, "y2001": 0.94480908623936988, "id": 11, "y2008": 0.89298242828382146, "y2009": 0.89165384824292859}, {"neighbors": [33, 9, 6, 25, 31], "y1995": 0.94325467991401402, "y1997": 0.96455242154753429, "y1996": 0.96436902092427723, "y1999": 0.94117647058823528, "y1998": 0.95243008993884537, "y2006": 0.9346681464882507, "y2007": 0.94281559150403071, "y2004": 0.96918424441756057, "y2005": 0.94781280876672958, "y2002": 0.95388717527096822, "y2003": 0.94597005193649519, "y2000": 0.94809269652332606, "y2001": 0.93539181553564288, "id": 12, "y2008": 0.965203150896216, "y2009": 0.967154410723015}, {"neighbors": [18, 17, 11, 8, 19], "y1995": 0.97478408425654373, "y1997": 0.98712808751954773, "y1996": 0.98169225257738801, "y1999": 0.985598971191053, "y1998": 0.98474769442356791, "y2006": 0.98416665248276058, "y2007": 0.98423613480079708, "y2004": 0.97399471186978948, "y2005": 0.96910087128357136, "y2002": 0.9820996926750224, "y2003": 0.98776529543110569, "y2000": 0.98687072733199255, "y2001": 0.99237486444837619, "id": 13, "y2008": 0.99823861244053191, "y2009": 0.99545704236827348}, {"neighbors": [4, 31, 3, 29, 12], "y1995": 0.85570268988941878, "y1997": 0.85986131704895119, "y1996": 0.85575915188345031, "y1999": 0.85380119644969055, "y1998": 0.85693406055397725, "y2006": 0.82803647591954255, "y2007": 0.81987360180979219, "y2004": 0.83998883284341452, "y2005": 0.83478547261894065, "y2002": 0.85472102128186755, "y2003": 0.84564834502399988, "y2000": 0.86191535266765262, "y2001": 0.84981450830432048, "id": 14, "y2008": 0.82265395167873867, "y2009": 0.83994039782937002}, {"neighbors": [19, 8, 17, 16, 13], "y1995": 0.87022046646521634, "y1997": 0.85961813213722393, "y1996": 0.85996258309339635, "y1999": 0.8394713575455558, "y1998": 0.85689572413110093, "y2006": 0.94202108334913126, "y2007": 0.94222309998743192, "y2004": 0.86763340229291142, "y2005": 0.89179316746010362, "y2002": 0.86776297543511893, "y2003": 0.86720209304280604, "y2000": 0.82785596604704892, "y2001": 0.86008789452656809, "id": 15, "y2008": 0.93902708112840494, "y2009": 0.94479183757120588}, {"neighbors": [28, 26, 15, 19, 32], "y1995": 0.90134907329491731, "y1997": 0.90403990934606904, "y1996": 0.904077381347274, "y1999": 0.90399237579083946, "y1998": 0.90201769385650832, "y2006": 0.91108803862404764, "y2007": 0.90543476309316473, "y2004": 0.94338264626469681, "y2005": 0.91981795862151561, "y2002": 0.93695966482853577, "y2003": 0.94242697007039, "y2000": 0.90906631602055099, "y2001": 0.92693339421265908, "id": 16, "y2008": 0.91737137682250491, "y2009": 0.94793657442067902}, {"neighbors": [13, 18, 11, 19, 8], "y1995": 1.1977611005602815, "y1997": 1.1843915817489725, "y1996": 1.1822256425225894, "y1999": 1.1928672308275252, "y1998": 1.1826786457339149, "y2006": 1.2392938410349985, "y2007": 1.2341867605077472, "y2004": 1.2385704217423759, "y2005": 1.2441989281116201, "y2002": 1.2262477774195681, "y2003": 1.2239707531714479, "y2000": 1.2017286912636342, "y2001": 1.2132869128474402, "id": 17, "y2008": 1.2362673914436095, "y2009": 1.2675439750795283}, {"neighbors": [13, 17, 11, 8, 19], "y1995": 1.2491967813733067, "y1997": 1.2699116090397236, "y1996": 1.2575477330927329, "y1999": 1.3062566740535762, "y1998": 1.2802065055312271, "y2006": 1.3210776560048689, "y2007": 1.329362443219563, "y2004": 1.3054484140490119, "y2005": 1.3030330249408666, "y2002": 1.3257518058685978, "y2003": 1.3079549159235695, "y2000": 1.3479002255103918, "y2001": 1.3439986302151703, "id": 18, "y2008": 1.3300124123891741, "y2009": 1.3328846185074705}, {"neighbors": [26, 17, 28, 15, 16], "y1995": 1.0676800411188558, "y1997": 1.0363730321443168, "y1996": 1.0379927554499979, "y1999": 1.0329609259280523, "y1998": 1.027684488045026, "y2006": 0.94241549375546196, "y2007": 0.92754546923532677, "y2004": 0.99614160423102482, "y2005": 0.97356208269708677, "y2002": 1.0274762326434594, "y2003": 1.0316273366809443, "y2000": 1.0505901631347052, "y2001": 1.0340505678899605, "id": 19, "y2008": 0.92549226593721745, "y2009": 0.92138101880290568}, {"neighbors": [30, 25, 24, 37, 47], "y1995": 1.0947561397632881, "y1997": 1.1165429913770684, "y1996": 1.1152679554712275, "y1999": 1.1314326394231322, "y1998": 1.1310394841195361, "y2006": 1.1090538904302065, "y2007": 1.1057776900012568, "y2004": 1.1402994437897009, "y2005": 1.1197940058085571, "y2002": 1.133670175399079, "y2003": 1.139822558851451, "y2000": 1.1388962186541665, "y2001": 1.1244221220249986, "id": 20, "y2008": 1.1116682481010467, "y2009": 1.0998515545336902}, {"neighbors": [23, 22, 7, 10, 34], "y1995": 0.76530058421804126, "y1997": 0.76542450966153397, "y1996": 0.76612841163904621, "y1999": 0.76014283909933289, "y1998": 0.7672268310234307, "y2006": 0.76842416021983684, "y2007": 0.77487117798086069, "y2004": 0.76533287692895391, "y2005": 0.78205934309410463, "y2002": 0.76156903267949927, "y2003": 0.76651951668098528, "y2000": 0.74480073263159763, "y2001": 0.76098396210261965, "id": 21, "y2008": 0.77768682781054099, "y2009": 0.78801192267396702}, {"neighbors": [21, 34, 5, 7, 29], "y1995": 0.98391336093764348, "y1997": 0.98295341320156315, "y1996": 0.98075815675295552, "y1999": 0.96913802803963667, "y1998": 0.97386015032669815, "y2006": 0.93965462091114671, "y2007": 0.93069644684632924, "y2004": 0.9635616201227476, "y2005": 0.94745351657235244, "y2002": 0.97209860866113018, "y2003": 0.97441312580606143, "y2000": 0.97370819354423843, "y2001": 0.96419154157867693, "id": 22, "y2008": 0.94020973488297466, "y2009": 0.94358232339833159}, {"neighbors": [21, 10, 22, 34, 7], "y1995": 0.83561828119099946, "y1997": 0.81738501913392403, "y1996": 0.82298088022609361, "y1999": 0.80904800725677739, "y1998": 0.81748588141426259, "y2006": 0.87170334233473346, "y2007": 0.8786379876833581, "y2004": 0.85954307066870839, "y2005": 0.86790023653402792, "y2002": 0.83451612857812574, "y2003": 0.85175031934895873, "y2000": 0.80071489233375537, "y2001": 0.83358255807316928, "id": 23, "y2008": 0.87497981001981484, "y2009": 0.87888675419592222}, {"neighbors": [27, 20, 30, 32, 47], "y1995": 0.98845573274970278, "y1997": 0.99665282989553183, "y1996": 1.0209242772035507, "y1999": 0.99386618594343845, "y1998": 0.99141823200404444, "y2006": 0.97906748937234156, "y2007": 0.9932312332800689, "y2004": 1.0111665058188304, "y2005": 0.9998802359352077, "y2002": 0.99669586934394627, "y2003": 1.0255909749831356, "y2000": 0.98733194819247994, "y2001": 0.99644997431653437, "id": 24, "y2008": 1.0020493856497013, "y2009": 0.99602148231561483}, {"neighbors": [20, 33, 6, 30, 12], "y1995": 1.1493091345649815, "y1997": 1.143009615936718, "y1996": 1.1524194939429724, "y1999": 1.1398468268822266, "y1998": 1.1426554202510555, "y2006": 1.0889107875354573, "y2007": 1.0860369499254896, "y2004": 1.0856975145267398, "y2005": 1.1244348633192611, "y2002": 1.0423089214343333, "y2003": 1.0557727834721793, "y2000": 1.0831239730629278, "y2001": 1.0519262599166714, "id": 25, "y2008": 1.0599731384290745, "y2009": 1.0216094265950888}, {"neighbors": [28, 19, 16, 32, 17], "y1995": 1.1136826889802023, "y1997": 1.1189343096757198, "y1996": 1.1057147027213501, "y1999": 1.1432271991365353, "y1998": 1.1377866945457653, "y2006": 1.1268023587150906, "y2007": 1.1235793669317915, "y2004": 1.1482023546040769, "y2005": 1.1238659840114973, "y2002": 1.1600919581655105, "y2003": 1.1446778932605579, "y2000": 1.1825702862895446, "y2001": 1.1622624279436105, "id": 26, "y2008": 1.115925801617498, "y2009": 1.1257082797404696}, {"neighbors": [32, 24, 36, 16, 28], "y1995": 1.303794309231981, "y1997": 1.3120636604057812, "y1996": 1.3075218596998686, "y1999": 1.3062566740535762, "y1998": 1.3153226688859194, "y2006": 1.2865667454509278, "y2007": 1.2973409698906584, "y2004": 1.2683078569016086, "y2005": 1.2617743046198988, "y2002": 1.2920319347677043, "y2003": 1.2718351646774422, "y2000": 1.3121023910310281, "y2001": 1.2998915587009874, "id": 27, "y2008": 1.2939020510829768, "y2009": 1.2934544564717687}, {"neighbors": [26, 16, 19, 32, 27], "y1995": 0.83953719020532513, "y1997": 0.82006005316292385, "y1996": 0.82701447583159737, "y1999": 0.80294863992835086, "y1998": 0.8118887636743225, "y2006": 0.8389109342655191, "y2007": 0.84349246817602375, "y2004": 0.83108634437662732, "y2005": 0.84373783646216949, "y2002": 0.82596790474192727, "y2003": 0.82435704751379402, "y2000": 0.78772975118465016, "y2001": 0.82848010958278628, "id": 28, "y2008": 0.85637272428125033, "y2009": 0.86539395164519117}, {"neighbors": [5, 39, 22, 14, 31], "y1995": 1.2345008725695852, "y1997": 1.2353793515744536, "y1996": 1.2426021999018138, "y1999": 1.2452262575926329, "y1998": 1.2358129278404693, "y2006": 1.2365329681906834, "y2007": 1.2796200872578414, "y2004": 1.1967443443492951, "y2005": 1.2153657295128597, "y2002": 1.1937780418204111, "y2003": 1.1835533748469893, "y2000": 1.2256766974812463, "y2001": 1.2112664802237314, "id": 29, "y2008": 1.2796839248335934, "y2009": 1.2590773758694083}, {"neighbors": [37, 20, 24, 25, 27], "y1995": 0.97696620404861145, "y1997": 0.98035944080980575, "y1996": 0.9740071914763756, "y1999": 0.95543282313901556, "y1998": 0.97581530789338955, "y2006": 0.92100464312607799, "y2007": 0.9147530387633086, "y2004": 0.9298883479571457, "y2005": 0.93442917452618346, "y2002": 0.93679072759857129, "y2003": 0.92540049332494034, "y2000": 0.96480308308405971, "y2001": 0.9468637634838194, "id": 30, "y2008": 0.90249622070947177, "y2009": 0.90213630440783921}, {"neighbors": [35, 14, 33, 12, 4], "y1995": 0.84986885942491119, "y1997": 0.84295996568390696, "y1996": 0.89868510090623221, "y1999": 0.85659367787716301, "y1998": 0.87280533962476625, "y2006": 0.92562487931452408, "y2007": 0.96635366357254426, "y2004": 0.92698332540482575, "y2005": 0.94745351657235244, "y2002": 0.90448992922937876, "y2003": 0.95495898185605821, "y2000": 0.88937573313051443, "y2001": 0.89440100450887505, "id": 31, "y2008": 1.025203118044723, "y2009": 1.0394296020754366}, {"neighbors": [36, 27, 28, 16, 26], "y1995": 1.0192280751235561, "y1997": 1.0097442843101825, "y1996": 1.0025820319237864, "y1999": 0.99765073314119712, "y1998": 1.0030341681355639, "y2006": 0.94779637858468868, "y2007": 0.93759089358493275, "y2004": 0.97583768316642261, "y2005": 0.96101679691008712, "y2002": 0.99747298060178258, "y2003": 0.99550758543481688, "y2000": 1.0075901875261932, "y2001": 0.99192968437874551, "id": 32, "y2008": 0.93353431146829191, "y2009": 0.94121705123804411}, {"neighbors": [44, 25, 12, 35, 31], "y1995": 0.86367410708901315, "y1997": 0.85544345781923936, "y1996": 0.85558931627900803, "y1999": 0.84336613427334628, "y1998": 0.85103025143102673, "y2006": 0.89455097373003656, "y2007": 0.88283929116469462, "y2004": 0.85951183386707053, "y2005": 0.87194227372077004, "y2002": 0.84667960913556228, "y2003": 0.84374557883664714, "y2000": 0.83434853662160158, "y2001": 0.85813595114434105, "id": 33, "y2008": 0.90349490610221961, "y2009": 0.9060067497610369}, {"neighbors": [22, 39, 21, 29, 23], "y1995": 1.0094753356447226, "y1997": 1.0069881886439402, "y1996": 1.0041105523637666, "y1999": 0.99291086334982948, "y1998": 0.99513686502304577, "y2006": 0.96382634438484593, "y2007": 0.95011400973122428, "y2004": 0.975119236728752, "y2005": 0.96134614808826613, "y2002": 0.99291167539274383, "y2003": 0.98983209318633369, "y2000": 1.0058162611397035, "y2001": 0.98850522230466298, "id": 34, "y2008": 0.94346860300667812, "y2009": 0.9463776450423077}, {"neighbors": [31, 38, 44, 33, 14], "y1995": 1.0571257066143651, "y1997": 1.0575301194645879, "y1996": 1.0545941857842291, "y1999": 1.0510385688532684, "y1998": 1.0488078570498685, "y2006": 1.0247627521629479, "y2007": 1.0234752320591773, "y2004": 1.0329697933620496, "y2005": 1.0219168238570018, "y2002": 1.0420048344203974, "y2003": 1.0402553971511816, "y2000": 1.0480002306104303, "y2001": 1.030249414987729, "id": 35, "y2008": 1.0251768368501768, "y2009": 1.0435957064486703}, {"neighbors": [32, 43, 27, 28, 42], "y1995": 1.070841888164505, "y1997": 1.0793762307014196, "y1996": 1.0666949726007404, "y1999": 1.0794043012481198, "y1998": 1.0738798776109699, "y2006": 1.087727556316465, "y2007": 1.0885954360198933, "y2004": 1.1032213602455734, "y2005": 1.0916793915985508, "y2002": 1.0938347765734742, "y2003": 1.1052447043433509, "y2000": 1.0531800956589803, "y2001": 1.0745277096056161, "id": 36, "y2008": 1.0917733838297285, "y2009": 1.1096083021948762}, {"neighbors": [30, 40, 20, 42, 41], "y1995": 0.8671922185905101, "y1997": 0.86675155621455668, "y1996": 0.86628895935887062, "y1999": 0.86511809486628932, "y1998": 0.86425631732335095, "y2006": 0.84488343470424199, "y2007": 0.83374328958471722, "y2004": 0.84517414191529749, "y2005": 0.84843857600526962, "y2002": 0.85411284725399572, "y2003": 0.84886336375435456, "y2000": 0.86287327291635718, "y2001": 0.8516979624450659, "id": 37, "y2008": 0.82812044014430564, "y2009": 0.82878598934619596}, {"neighbors": [35, 31, 45, 39, 44], "y1995": 0.8838921149583755, "y1997": 0.90282398478743275, "y1996": 0.92288667453925455, "y1999": 0.92023285988219217, "y1998": 0.91229185518735723, "y2006": 0.93869676706720051, "y2007": 0.96947770975097391, "y2004": 0.99223700402629367, "y2005": 0.97984969609868555, "y2002": 0.93682451504456421, "y2003": 0.98655146182882891, "y2000": 0.92652175166361039, "y2001": 0.94278865361566122, "id": 38, "y2008": 1.0036262573224608, "y2009": 0.98102350657197357}, {"neighbors": [29, 34, 38, 22, 35], "y1995": 0.970820642185237, "y1997": 0.94534081352108112, "y1996": 0.95320232993219844, "y1999": 0.93967000034446724, "y1998": 0.94215592860799646, "y2006": 0.91035556215514757, "y2007": 0.90430364292511256, "y2004": 0.92879505989982103, "y2005": 0.9211054223180335, "y2002": 0.93412151936513388, "y2003": 0.93501274320242933, "y2000": 0.93092108910210503, "y2001": 0.92662519262599163, "id": 39, "y2008": 0.89994694483851023, "y2009": 0.9007386435858511}, {"neighbors": [41, 37, 42, 30, 45], "y1995": 0.95861858457245008, "y1997": 0.98254810501535106, "y1996": 0.95774543235102894, "y1999": 0.98684823919808018, "y1998": 0.98919471947721893, "y2006": 0.97163003599581876, "y2007": 0.97007020126757271, "y2004": 0.9493488753775261, "y2005": 0.97152609359561659, "y2002": 0.95601578436851964, "y2003": 0.94905384541254967, "y2000": 0.98882204635713133, "y2001": 0.97662233890759653, "id": 40, "y2008": 0.97158948117089283, "y2009": 0.95884908006927827}, {"neighbors": [40, 45, 44, 37, 42], "y1995": 0.83980438854721107, "y1997": 0.85746999875029983, "y1996": 0.84726737166133714, "y1999": 0.85567509846023126, "y1998": 0.85467221160427542, "y2006": 0.8333891885768886, "y2007": 0.83511679264592342, "y2004": 0.81743586206088703, "y2005": 0.83550405700769481, "y2002": 0.84502402428191115, "y2003": 0.82645665158259707, "y2000": 0.84818516243622177, "y2001": 0.85265681182580899, "id": 41, "y2008": 0.82136617314598481, "y2009": 0.80921873783836296}, {"neighbors": [43, 40, 46, 37, 36], "y1995": 0.95118156405662746, "y1997": 0.94688098462868708, "y1996": 0.9466212002600608, "y1999": 0.95124410099780687, "y1998": 0.95085829660091703, "y2006": 0.96895367966714574, "y2007": 0.9700163384024274, "y2004": 0.97583768316642261, "y2005": 0.95571723704302525, "y2002": 0.96804411514198463, "y2003": 0.97136213864358201, "y2000": 0.95440787445922959, "y2001": 0.96364362764682376, "id": 42, "y2008": 0.97082732652905901, "y2009": 0.9878236640328002}, {"neighbors": [36, 42, 32, 27, 46], "y1995": 1.0891004415267045, "y1997": 1.0849289528525252, "y1996": 1.0824896838138709, "y1999": 1.0945424900391545, "y1998": 1.0865692335830259, "y2006": 1.1450297539219478, "y2007": 1.1447474729339102, "y2004": 1.1334273474293739, "y2005": 1.1468606844516303, "y2002": 1.1229257675733433, "y2003": 1.1302103089739621, "y2000": 1.1055818811158884, "y2001": 1.1214085953998059, "id": 43, "y2008": 1.1408403740471014, "y2009": 1.1614292649793569}, {"neighbors": [33, 41, 45, 35, 40], "y1995": 1.0633603345917013, "y1997": 1.0869149629649646, "y1996": 1.0736582323828732, "y1999": 1.1166986255755473, "y1998": 1.0976484597942771, "y2006": 1.0839806574563229, "y2007": 1.0983176831786272, "y2004": 1.0927882684985315, "y2005": 1.0700320368873319, "y2002": 1.0881584856466706, "y2003": 1.0804431312806149, "y2000": 1.1185670222649935, "y2001": 1.0976428286056732, "id": 44, "y2008": 1.0929823187788443, "y2009": 1.0917612486217978}, {"neighbors": [41, 44, 40, 35, 33], "y1995": 0.79772064970019041, "y1997": 0.7858115114280021, "y1996": 0.78829195801876151, "y1999": 0.77035744221561353, "y1998": 0.77615921755360906, "y2006": 0.79949806580432425, "y2007": 0.80172181625581262, "y2004": 0.79603865293896003, "y2005": 0.78966436120841943, "y2002": 0.81437881076636964, "y2003": 0.80788827809912023, "y2000": 0.77751193519846906, "y2001": 0.79902973574567659, "id": 45, "y2008": 0.82168154748053679, "y2009": 0.85587910681858015}, {"neighbors": [42, 43, 40, 36, 37], "y1995": 1.0052446952315301, "y1997": 1.0047589936197736, "y1996": 1.0000769567582628, "y1999": 1.0063956091903872, "y1998": 1.0061394183885444, "y2006": 0.97292595590233411, "y2007": 0.96519561197191939, "y2004": 0.99030032232474696, "y2005": 0.97682565346267858, "y2002": 1.0081498135355325, "y2003": 1.0057431552702318, "y2000": 1.0016297948675874, "y2001": 0.99860738542320637, "id": 46, "y2008": 0.9617340332161447, "y2009": 0.95890283625473927}, {"neighbors": [20, 6, 24, 25, 30], "y1995": 0.95808418788867844, "y1997": 0.9654440995572009, "y1996": 0.93825679674127938, "y1999": 0.96987289157318213, "y1998": 0.95561201303757848, "y2006": 1.1704973973021624, "y2007": 1.1702515395802287, "y2004": 1.0533361880299275, "y2005": 1.0983262971945267, "y2002": 1.0078119390756035, "y2003": 1.0348423554112989, "y2000": 0.96608031008233231, "y2001": 0.99727184521431422, "id": 47, "y2008": 1.1873055260044207, "y2009": 1.1424264534188653}] diff --git a/release/python/0.4.2/crankshaft/test/helper.py b/release/python/0.4.2/crankshaft/test/helper.py new file mode 100644 index 0000000..7d28b94 --- /dev/null +++ b/release/python/0.4.2/crankshaft/test/helper.py @@ -0,0 +1,13 @@ +import unittest + +from mock_plpy import MockPlPy +plpy = MockPlPy() + +import sys +sys.modules['plpy'] = plpy + +import os + +def fixture_file(name): + dir = os.path.dirname(os.path.realpath(__file__)) + return os.path.join(dir, 'fixtures', name) diff --git a/release/python/0.4.2/crankshaft/test/mock_plpy.py b/release/python/0.4.2/crankshaft/test/mock_plpy.py new file mode 100644 index 0000000..a982ebe --- /dev/null +++ b/release/python/0.4.2/crankshaft/test/mock_plpy.py @@ -0,0 +1,52 @@ +import re + +class MockCursor: + def __init__(self, data): + self.cursor_pos = 0 + self.data = data + + def fetch(self, batch_size): + batch = self.data[self.cursor_pos : self.cursor_pos + batch_size] + self.cursor_pos += batch_size + return batch + + +class MockPlPy: + def __init__(self): + self._reset() + + def _reset(self): + self.infos = [] + self.notices = [] + self.debugs = [] + self.logs = [] + self.warnings = [] + self.errors = [] + self.fatals = [] + self.executes = [] + self.results = [] + self.prepares = [] + self.results = [] + + def _define_result(self, query, result): + pattern = re.compile(query, re.IGNORECASE | re.MULTILINE) + self.results.append([pattern, result]) + + def notice(self, msg): + self.notices.append(msg) + + def debug(self, msg): + self.notices.append(msg) + + def info(self, msg): + self.infos.append(msg) + + def cursor(self, query): + data = self.execute(query) + return MockCursor(data) + + def execute(self, query): # TODO: additional arguments + for result in self.results: + if result[0].match(query): + return result[1] + return [] diff --git a/release/python/0.4.2/crankshaft/test/test_cluster_kmeans.py b/release/python/0.4.2/crankshaft/test/test_cluster_kmeans.py new file mode 100644 index 0000000..aba8e07 --- /dev/null +++ b/release/python/0.4.2/crankshaft/test/test_cluster_kmeans.py @@ -0,0 +1,38 @@ +import unittest +import numpy as np + + +# from mock_plpy import MockPlPy +# plpy = MockPlPy() +# +# import sys +# sys.modules['plpy'] = plpy +from helper import plpy, fixture_file +import numpy as np +import crankshaft.clustering as cc +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds +import json + +class KMeansTest(unittest.TestCase): + """Testing class for Moran's I functions""" + + def setUp(self): + plpy._reset() + self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read()) + self.params = {"subquery": "select * from table", + "no_clusters": "10" + } + + def test_kmeans(self): + data = self.cluster_data + plpy._define_result('select' ,data) + clusters = cc.kmeans('subquery', 2) + labels = [a[1] for a in clusters] + c1 = [a for a in clusters if a[1]==0] + c2 = [a for a in clusters if a[1]==1] + + self.assertEqual(len(np.unique(labels)),2) + self.assertEqual(len(c1),20) + self.assertEqual(len(c2),20) + diff --git a/release/python/0.4.2/crankshaft/test/test_clustering_moran.py b/release/python/0.4.2/crankshaft/test/test_clustering_moran.py new file mode 100644 index 0000000..cb54902 --- /dev/null +++ b/release/python/0.4.2/crankshaft/test/test_clustering_moran.py @@ -0,0 +1,106 @@ +import unittest +import numpy as np + + +# from mock_plpy import MockPlPy +# plpy = MockPlPy() +# +# import sys +# sys.modules['plpy'] = plpy +from helper import plpy, fixture_file + +import crankshaft.clustering as cc +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds +import json + + +class MoranTest(unittest.TestCase): + """Testing class for Moran's I functions""" + + def setUp(self): + plpy._reset() + self.params = {"id_col": "cartodb_id", + "attr1": "andy", + "attr2": "jay_z", + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + self.params_markov = {"id_col": "cartodb_id", + "time_cols": ["_2013_dec", "_2014_jan", + "_2014_feb"], + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + self.neighbors_data = json.loads( + open(fixture_file('neighbors.json')).read()) + self.moran_data = json.loads( + open(fixture_file('moran.json')).read()) + + def test_map_quads(self): + """Test map_quads""" + self.assertEqual(cc.map_quads(1), 'HH') + self.assertEqual(cc.map_quads(2), 'LH') + self.assertEqual(cc.map_quads(3), 'LL') + self.assertEqual(cc.map_quads(4), 'HL') + self.assertEqual(cc.map_quads(33), None) + self.assertEqual(cc.map_quads('andy'), None) + + def test_quad_position(self): + """Test lisa_sig_vals""" + + quads = np.array([1, 2, 3, 4], np.int) + + ans = np.array(['HH', 'LH', 'LL', 'HL']) + test_ans = cc.quad_position(quads) + + self.assertTrue((test_ans == ans).all()) + + def test_moran_local(self): + """Test Moran's I local""" + data = [{'id': d['id'], + 'attr1': d['value'], + 'neighbors': d['neighbors']} for d in self.neighbors_data] + + plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) + result = cc.moran_local('subquery', 'value', + 'knn', 5, 99, 'the_geom', 'cartodb_id') + result = [(row[0], row[1]) for row in result] + zipped_values = zip(result, self.moran_data) + + for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values: + self.assertAlmostEqual(res_val, exp_val) + self.assertEqual(res_quad, exp_quad) + + def test_moran_local_rate(self): + """Test Moran's I rate""" + data = [{'id': d['id'], + 'attr1': d['value'], + 'attr2': 1, + 'neighbors': d['neighbors']} for d in self.neighbors_data] + + plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) + result = cc.moran_local_rate('subquery', 'numerator', 'denominator', + 'knn', 5, 99, 'the_geom', 'cartodb_id') + result = [(row[0], row[1]) for row in result] + + zipped_values = zip(result, self.moran_data) + + for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values: + self.assertAlmostEqual(res_val, exp_val) + + def test_moran(self): + """Test Moran's I global""" + data = [{'id': d['id'], + 'attr1': d['value'], + 'neighbors': d['neighbors']} for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1235) + result = cc.moran('table', 'value', + 'knn', 5, 99, 'the_geom', 'cartodb_id') + + result_moran = result[0][0] + expected_moran = np.array([row[0] for row in self.moran_data]).mean() + self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2) diff --git a/release/python/0.4.2/crankshaft/test/test_pysal_utils.py b/release/python/0.4.2/crankshaft/test/test_pysal_utils.py new file mode 100644 index 0000000..171fdbc --- /dev/null +++ b/release/python/0.4.2/crankshaft/test/test_pysal_utils.py @@ -0,0 +1,142 @@ +import unittest + +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds + + +class PysalUtilsTest(unittest.TestCase): + """Testing class for utility functions related to PySAL integrations""" + + def setUp(self): + self.params = {"id_col": "cartodb_id", + "attr1": "andy", + "attr2": "jay_z", + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + + self.params_array = {"id_col": "cartodb_id", + "time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"], + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + + def test_query_attr_select(self): + """Test query_attr_select""" + + ans = "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " + + ans_array = "i.\"_2013_dec\"::numeric As attr1, " \ + "i.\"_2014_jan\"::numeric As attr2, " \ + "i.\"_2014_feb\"::numeric As attr3, " + + self.assertEqual(pu.query_attr_select(self.params), ans) + self.assertEqual(pu.query_attr_select(self.params_array), ans_array) + + def test_query_attr_where(self): + """Test pu.query_attr_where""" + + ans = "idx_replace.\"andy\" IS NOT NULL AND " \ + "idx_replace.\"jay_z\" IS NOT NULL AND " \ + "idx_replace.\"jay_z\" <> 0" + + ans_array = "idx_replace.\"_2013_dec\" IS NOT NULL AND " \ + "idx_replace.\"_2014_jan\" IS NOT NULL AND " \ + "idx_replace.\"_2014_feb\" IS NOT NULL" + + self.assertEqual(pu.query_attr_where(self.params), ans) + self.assertEqual(pu.query_attr_where(self.params_array), ans_array) + + def test_knn(self): + """Test knn neighbors constructor""" + + ans = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE " \ + "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ + "j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL AND " \ + "j.\"jay_z\" <> 0 " \ + "ORDER BY " \ + "j.\"the_geom\" <-> i.\"the_geom\" ASC " \ + "LIMIT 321)) As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" + + ans_array = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"_2013_dec\"::numeric As attr1, " \ + "i.\"_2014_jan\"::numeric As attr2, " \ + "i.\"_2014_feb\"::numeric As attr3, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ + "j.\"_2013_dec\" IS NOT NULL AND " \ + "j.\"_2014_jan\" IS NOT NULL AND " \ + "j.\"_2014_feb\" IS NOT NULL " \ + "ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC " \ + "LIMIT 321)) As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"_2013_dec\" IS NOT NULL AND " \ + "i.\"_2014_jan\" IS NOT NULL AND " \ + "i.\"_2014_feb\" IS NOT NULL "\ + "ORDER BY i.\"cartodb_id\" ASC;" + + self.assertEqual(pu.knn(self.params), ans) + self.assertEqual(pu.knn(self.params_array), ans_array) + + def test_queen(self): + """Test queen neighbors constructor""" + + ans = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE " \ + "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ + "ST_Touches(i.\"the_geom\", " \ + "j.\"the_geom\") AND " \ + "j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL AND " \ + "j.\"jay_z\" <> 0)" \ + ") As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" + + self.assertEqual(pu.queen(self.params), ans) + + def test_construct_neighbor_query(self): + """Test construct_neighbor_query""" + + # Compare to raw knn query + self.assertEqual(pu.construct_neighbor_query('knn', self.params), + pu.knn(self.params)) + + def test_get_attributes(self): + """Test get_attributes""" + + ## need to add tests + + self.assertEqual(True, True) + + def test_get_weight(self): + """Test get_weight""" + + self.assertEqual(True, True) + + def test_empty_zipped_array(self): + """Test empty_zipped_array""" + ans2 = [(None, None)] + ans4 = [(None, None, None, None)] + self.assertEqual(pu.empty_zipped_array(2), ans2) + self.assertEqual(pu.empty_zipped_array(4), ans4) diff --git a/release/python/0.4.2/crankshaft/test/test_segmentation.py b/release/python/0.4.2/crankshaft/test/test_segmentation.py new file mode 100644 index 0000000..d02e8b1 --- /dev/null +++ b/release/python/0.4.2/crankshaft/test/test_segmentation.py @@ -0,0 +1,64 @@ +import unittest +import numpy as np +from helper import plpy, fixture_file +import crankshaft.segmentation as segmentation +import json + +class SegmentationTest(unittest.TestCase): + """Testing class for Moran's I functions""" + + def setUp(self): + plpy._reset() + + def generate_random_data(self,n_samples,random_state, row_type=False): + x1 = random_state.uniform(size=n_samples) + x2 = random_state.uniform(size=n_samples) + x3 = random_state.randint(0, 4, size=n_samples) + + y = x1+x2*x2+x3 + cartodb_id = range(len(x1)) + + if row_type: + return [ {'features': vals} for vals in zip(x1,x2,x3)], y + else: + return [dict( zip(['x1','x2','x3','target', 'cartodb_id'],[x1,x2,x3,y,cartodb_id]))] + + def test_replace_nan_with_mean(self): + test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan]) + + def test_create_and_predict_segment(self): + n_samples = 1000 + + random_state_train = np.random.RandomState(13) + random_state_test = np.random.RandomState(134) + training_data = self.generate_random_data(n_samples, random_state_train) + test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True) + + + ids = [{'cartodb_ids': range(len(test_data))}] + rows = [{'x1': 0,'x2':0,'x3':0,'y':0,'cartodb_id':0}] + + plpy._define_result('select \* from \(select \* from training\) a limit 1',rows) + plpy._define_result('.*from \(select \* from training\) as a' ,training_data) + plpy._define_result('select array_agg\(cartodb\_id order by cartodb\_id\) as cartodb_ids from \(.*\) a',ids) + plpy._define_result('.*select \* from test.*' ,test_data) + + model_parameters = {'n_estimators': 1200, + 'max_depth': 3, + 'subsample' : 0.5, + 'learning_rate': 0.01, + 'min_samples_leaf': 1} + + result = segmentation.create_and_predict_segment( + 'select * from training', + 'target', + 'select * from test', + model_parameters) + + prediction = [r[1] for r in result] + + accuracy =np.sqrt(np.mean( np.square( np.array(prediction) - np.array(test_y)))) + + self.assertEqual(len(result),len(test_data)) + self.assertTrue( result[0][2] < 0.01) + self.assertTrue( accuracy < 0.5*np.mean(test_y) ) diff --git a/release/python/0.4.2/crankshaft/test/test_space_time_dynamics.py b/release/python/0.4.2/crankshaft/test/test_space_time_dynamics.py new file mode 100644 index 0000000..54ffc9d --- /dev/null +++ b/release/python/0.4.2/crankshaft/test/test_space_time_dynamics.py @@ -0,0 +1,324 @@ +import unittest +import numpy as np + +import unittest + + +# from mock_plpy import MockPlPy +# plpy = MockPlPy() +# +# import sys +# sys.modules['plpy'] = plpy +from helper import plpy, fixture_file + +import crankshaft.space_time_dynamics as std +from crankshaft import random_seeds +import json + +class SpaceTimeTests(unittest.TestCase): + """Testing class for Markov Functions.""" + + def setUp(self): + plpy._reset() + self.params = {"id_col": "cartodb_id", + "time_cols": ['dec_2013', 'jan_2014', 'feb_2014'], + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + self.neighbors_data = json.loads(open(fixture_file('neighbors_markov.json')).read()) + self.markov_data = json.loads(open(fixture_file('markov.json')).read()) + + self.time_data = np.array([i * np.ones(10, dtype=float) for i in range(10)]).T + + self.transition_matrix = np.array([ + [[ 0.96341463, 0.0304878 , 0.00609756, 0. , 0. ], + [ 0.06040268, 0.83221477, 0.10738255, 0. , 0. ], + [ 0. , 0.14 , 0.74 , 0.12 , 0. ], + [ 0. , 0.03571429, 0.32142857, 0.57142857, 0.07142857], + [ 0. , 0. , 0. , 0.16666667, 0.83333333]], + [[ 0.79831933, 0.16806723, 0.03361345, 0. , 0. ], + [ 0.0754717 , 0.88207547, 0.04245283, 0. , 0. ], + [ 0.00537634, 0.06989247, 0.8655914 , 0.05913978, 0. ], + [ 0. , 0. , 0.06372549, 0.90196078, 0.03431373], + [ 0. , 0. , 0. , 0.19444444, 0.80555556]], + [[ 0.84693878, 0.15306122, 0. , 0. , 0. ], + [ 0.08133971, 0.78947368, 0.1291866 , 0. , 0. ], + [ 0.00518135, 0.0984456 , 0.79274611, 0.0984456 , 0.00518135], + [ 0. , 0. , 0.09411765, 0.87058824, 0.03529412], + [ 0. , 0. , 0. , 0.10204082, 0.89795918]], + [[ 0.8852459 , 0.09836066, 0. , 0.01639344, 0. ], + [ 0.03875969, 0.81395349, 0.13953488, 0. , 0.00775194], + [ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ], + [ 0. , 0.02339181, 0.12865497, 0.75438596, 0.09356725], + [ 0. , 0. , 0. , 0.09661836, 0.90338164]], + [[ 0.33333333, 0.66666667, 0. , 0. , 0. ], + [ 0.0483871 , 0.77419355, 0.16129032, 0.01612903, 0. ], + [ 0.01149425, 0.16091954, 0.74712644, 0.08045977, 0. ], + [ 0. , 0.01036269, 0.06217617, 0.89637306, 0.03108808], + [ 0. , 0. , 0. , 0.02352941, 0.97647059]]] + ) + + def test_spatial_markov(self): + """Test Spatial Markov.""" + data = [ { 'id': d['id'], + 'attr1': d['y1995'], + 'attr2': d['y1996'], + 'attr3': d['y1997'], + 'attr4': d['y1998'], + 'attr5': d['y1999'], + 'attr6': d['y2000'], + 'attr7': d['y2001'], + 'attr8': d['y2002'], + 'attr9': d['y2003'], + 'attr10': d['y2004'], + 'attr11': d['y2005'], + 'attr12': d['y2006'], + 'attr13': d['y2007'], + 'attr14': d['y2008'], + 'attr15': d['y2009'], + 'neighbors': d['neighbors'] } for d in self.neighbors_data] + print(str(data[0])) + plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) + + result = std.spatial_markov_trend('subquery', ['y1995', 'y1996', 'y1997', 'y1998', 'y1999', 'y2000', 'y2001', 'y2002', 'y2003', 'y2004', 'y2005', 'y2006', 'y2007', 'y2008', 'y2009'], 5, 'knn', 5, 0, 'the_geom', 'cartodb_id') + + self.assertTrue(result != None) + result = [(row[0], row[1], row[2], row[3], row[4]) for row in result] + print result[0] + expected = self.markov_data + for ([res_trend, res_up, res_down, res_vol, res_id], + [exp_trend, exp_up, exp_down, exp_vol, exp_id] + ) in zip(result, expected): + self.assertAlmostEqual(res_trend, exp_trend) + + def test_get_time_data(self): + """Test get_time_data""" + data = [ { 'attr1': d['y1995'], + 'attr2': d['y1996'], + 'attr3': d['y1997'], + 'attr4': d['y1998'], + 'attr5': d['y1999'], + 'attr6': d['y2000'], + 'attr7': d['y2001'], + 'attr8': d['y2002'], + 'attr9': d['y2003'], + 'attr10': d['y2004'], + 'attr11': d['y2005'], + 'attr12': d['y2006'], + 'attr13': d['y2007'], + 'attr14': d['y2008'], + 'attr15': d['y2009'] } for d in self.neighbors_data] + + result = std.get_time_data(data, ['y1995', 'y1996', 'y1997', 'y1998', 'y1999', 'y2000', 'y2001', 'y2002', 'y2003', 'y2004', 'y2005', 'y2006', 'y2007', 'y2008', 'y2009']) + + ## expected was prepared from PySAL example: + ### f = ps.open(ps.examples.get_path("usjoin.csv")) + ### pci = np.array([f.by_col[str(y)] for y in range(1995, 2010)]).transpose() + ### rpci = pci / (pci.mean(axis = 0)) + + expected = np.array([[ 0.87654416, 0.863147, 0.85637567, 0.84811668, 0.8446154, 0.83271652 + , 0.83786314, 0.85012593, 0.85509656, 0.86416612, 0.87119375, 0.86302631 + , 0.86148267, 0.86252252, 0.86746356], + [ 0.9188951, 0.91757931, 0.92333258, 0.92517289, 0.92552388, 0.90746978 + , 0.89830489, 0.89431991, 0.88924794, 0.89815176, 0.91832091, 0.91706054 + , 0.90139505, 0.87897455, 0.86216858], + [ 0.82591007, 0.82548596, 0.81989793, 0.81503235, 0.81731522, 0.78964559 + , 0.80584442, 0.8084998, 0.82258551, 0.82668196, 0.82373724, 0.81814804 + , 0.83675961, 0.83574199, 0.84647177], + [ 1.09088176, 1.08537689, 1.08456418, 1.08415404, 1.09898841, 1.14506948 + , 1.12151133, 1.11160697, 1.10888621, 1.11399806, 1.12168029, 1.13164797 + , 1.12958508, 1.11371818, 1.09936775], + [ 1.10731446, 1.11373944, 1.13283638, 1.14472559, 1.15910025, 1.16898201 + , 1.17212488, 1.14752303, 1.11843284, 1.11024964, 1.11943471, 1.11736468 + , 1.10863242, 1.09642516, 1.07762337], + [ 1.42269757, 1.42118434, 1.44273502, 1.43577571, 1.44400684, 1.44184737 + , 1.44782832, 1.41978227, 1.39092208, 1.4059372, 1.40788646, 1.44052766 + , 1.45241216, 1.43306098, 1.4174431 ], + [ 1.13073885, 1.13110513, 1.11074708, 1.13364636, 1.13088149, 1.10888138 + , 1.11856629, 1.13062931, 1.11944984, 1.12446239, 1.11671008, 1.10880034 + , 1.08401709, 1.06959206, 1.07875225], + [ 1.04706124, 1.04516831, 1.04253372, 1.03239987, 1.02072545, 0.99854316 + , 0.9880258, 0.99669587, 0.99327676, 1.01400905, 1.03176742, 1.040511 + , 1.01749645, 0.9936394, 0.98279746], + [ 0.98996986, 1.00143564, 0.99491, 1.00188408, 1.00455845, 0.99127006 + , 0.97925917, 0.9683482, 0.95335147, 0.93694787, 0.94308213, 0.92232874 + , 0.91284091, 0.89689833, 0.88928858], + [ 0.87418391, 0.86416601, 0.84425695, 0.8404494, 0.83903044, 0.8578708 + , 0.86036185, 0.86107306, 0.8500772, 0.86981998, 0.86837929, 0.87204141 + , 0.86633032, 0.84946077, 0.83287146], + [ 1.14196118, 1.14660262, 1.14892712, 1.14909594, 1.14436624, 1.14450183 + , 1.12349752, 1.12596664, 1.12213996, 1.1119989, 1.10257792, 1.10491258 + , 1.11059842, 1.10509795, 1.10020097], + [ 0.97282463, 0.96700147, 0.96252588, 0.9653878, 0.96057687, 0.95831051 + , 0.94480909, 0.94804195, 0.95430286, 0.94103989, 0.92122519, 0.91010201 + , 0.89280392, 0.89298243, 0.89165385], + [ 0.94325468, 0.96436902, 0.96455242, 0.95243009, 0.94117647, 0.9480927 + , 0.93539182, 0.95388718, 0.94597005, 0.96918424, 0.94781281, 0.93466815 + , 0.94281559, 0.96520315, 0.96715441], + [ 0.97478408, 0.98169225, 0.98712809, 0.98474769, 0.98559897, 0.98687073 + , 0.99237486, 0.98209969, 0.9877653, 0.97399471, 0.96910087, 0.98416665 + , 0.98423613, 0.99823861, 0.99545704], + [ 0.85570269, 0.85575915, 0.85986132, 0.85693406, 0.8538012, 0.86191535 + , 0.84981451, 0.85472102, 0.84564835, 0.83998883, 0.83478547, 0.82803648 + , 0.8198736, 0.82265395, 0.8399404 ], + [ 0.87022047, 0.85996258, 0.85961813, 0.85689572, 0.83947136, 0.82785597 + , 0.86008789, 0.86776298, 0.86720209, 0.8676334, 0.89179317, 0.94202108 + , 0.9422231, 0.93902708, 0.94479184], + [ 0.90134907, 0.90407738, 0.90403991, 0.90201769, 0.90399238, 0.90906632 + , 0.92693339, 0.93695966, 0.94242697, 0.94338265, 0.91981796, 0.91108804 + , 0.90543476, 0.91737138, 0.94793657], + [ 1.1977611, 1.18222564, 1.18439158, 1.18267865, 1.19286723, 1.20172869 + , 1.21328691, 1.22624778, 1.22397075, 1.23857042, 1.24419893, 1.23929384 + , 1.23418676, 1.23626739, 1.26754398], + [ 1.24919678, 1.25754773, 1.26991161, 1.28020651, 1.30625667, 1.34790023 + , 1.34399863, 1.32575181, 1.30795492, 1.30544841, 1.30303302, 1.32107766 + , 1.32936244, 1.33001241, 1.33288462], + [ 1.06768004, 1.03799276, 1.03637303, 1.02768449, 1.03296093, 1.05059016 + , 1.03405057, 1.02747623, 1.03162734, 0.9961416, 0.97356208, 0.94241549 + , 0.92754547, 0.92549227, 0.92138102], + [ 1.09475614, 1.11526796, 1.11654299, 1.13103948, 1.13143264, 1.13889622 + , 1.12442212, 1.13367018, 1.13982256, 1.14029944, 1.11979401, 1.10905389 + , 1.10577769, 1.11166825, 1.09985155], + [ 0.76530058, 0.76612841, 0.76542451, 0.76722683, 0.76014284, 0.74480073 + , 0.76098396, 0.76156903, 0.76651952, 0.76533288, 0.78205934, 0.76842416 + , 0.77487118, 0.77768683, 0.78801192], + [ 0.98391336, 0.98075816, 0.98295341, 0.97386015, 0.96913803, 0.97370819 + , 0.96419154, 0.97209861, 0.97441313, 0.96356162, 0.94745352, 0.93965462 + , 0.93069645, 0.94020973, 0.94358232], + [ 0.83561828, 0.82298088, 0.81738502, 0.81748588, 0.80904801, 0.80071489 + , 0.83358256, 0.83451613, 0.85175032, 0.85954307, 0.86790024, 0.87170334 + , 0.87863799, 0.87497981, 0.87888675], + [ 0.98845573, 1.02092428, 0.99665283, 0.99141823, 0.99386619, 0.98733195 + , 0.99644997, 0.99669587, 1.02559097, 1.01116651, 0.99988024, 0.97906749 + , 0.99323123, 1.00204939, 0.99602148], + [ 1.14930913, 1.15241949, 1.14300962, 1.14265542, 1.13984683, 1.08312397 + , 1.05192626, 1.04230892, 1.05577278, 1.08569751, 1.12443486, 1.08891079 + , 1.08603695, 1.05997314, 1.02160943], + [ 1.11368269, 1.1057147, 1.11893431, 1.13778669, 1.1432272, 1.18257029 + , 1.16226243, 1.16009196, 1.14467789, 1.14820235, 1.12386598, 1.12680236 + , 1.12357937, 1.1159258, 1.12570828], + [ 1.30379431, 1.30752186, 1.31206366, 1.31532267, 1.30625667, 1.31210239 + , 1.29989156, 1.29203193, 1.27183516, 1.26830786, 1.2617743, 1.28656675 + , 1.29734097, 1.29390205, 1.29345446], + [ 0.83953719, 0.82701448, 0.82006005, 0.81188876, 0.80294864, 0.78772975 + , 0.82848011, 0.8259679, 0.82435705, 0.83108634, 0.84373784, 0.83891093 + , 0.84349247, 0.85637272, 0.86539395], + [ 1.23450087, 1.2426022, 1.23537935, 1.23581293, 1.24522626, 1.2256767 + , 1.21126648, 1.19377804, 1.18355337, 1.19674434, 1.21536573, 1.23653297 + , 1.27962009, 1.27968392, 1.25907738], + [ 0.9769662, 0.97400719, 0.98035944, 0.97581531, 0.95543282, 0.96480308 + , 0.94686376, 0.93679073, 0.92540049, 0.92988835, 0.93442917, 0.92100464 + , 0.91475304, 0.90249622, 0.9021363 ], + [ 0.84986886, 0.8986851, 0.84295997, 0.87280534, 0.85659368, 0.88937573 + , 0.894401, 0.90448993, 0.95495898, 0.92698333, 0.94745352, 0.92562488 + , 0.96635366, 1.02520312, 1.0394296 ], + [ 1.01922808, 1.00258203, 1.00974428, 1.00303417, 0.99765073, 1.00759019 + , 0.99192968, 0.99747298, 0.99550759, 0.97583768, 0.9610168, 0.94779638 + , 0.93759089, 0.93353431, 0.94121705], + [ 0.86367411, 0.85558932, 0.85544346, 0.85103025, 0.84336613, 0.83434854 + , 0.85813595, 0.84667961, 0.84374558, 0.85951183, 0.87194227, 0.89455097 + , 0.88283929, 0.90349491, 0.90600675], + [ 1.00947534, 1.00411055, 1.00698819, 0.99513687, 0.99291086, 1.00581626 + , 0.98850522, 0.99291168, 0.98983209, 0.97511924, 0.96134615, 0.96382634 + , 0.95011401, 0.9434686, 0.94637765], + [ 1.05712571, 1.05459419, 1.05753012, 1.04880786, 1.05103857, 1.04800023 + , 1.03024941, 1.04200483, 1.0402554, 1.03296979, 1.02191682, 1.02476275 + , 1.02347523, 1.02517684, 1.04359571], + [ 1.07084189, 1.06669497, 1.07937623, 1.07387988, 1.0794043, 1.0531801 + , 1.07452771, 1.09383478, 1.1052447, 1.10322136, 1.09167939, 1.08772756 + , 1.08859544, 1.09177338, 1.1096083 ], + [ 0.86719222, 0.86628896, 0.86675156, 0.86425632, 0.86511809, 0.86287327 + , 0.85169796, 0.85411285, 0.84886336, 0.84517414, 0.84843858, 0.84488343 + , 0.83374329, 0.82812044, 0.82878599], + [ 0.88389211, 0.92288667, 0.90282398, 0.91229186, 0.92023286, 0.92652175 + , 0.94278865, 0.93682452, 0.98655146, 0.992237, 0.9798497, 0.93869677 + , 0.96947771, 1.00362626, 0.98102351], + [ 0.97082064, 0.95320233, 0.94534081, 0.94215593, 0.93967, 0.93092109 + , 0.92662519, 0.93412152, 0.93501274, 0.92879506, 0.92110542, 0.91035556 + , 0.90430364, 0.89994694, 0.90073864], + [ 0.95861858, 0.95774543, 0.98254811, 0.98919472, 0.98684824, 0.98882205 + , 0.97662234, 0.95601578, 0.94905385, 0.94934888, 0.97152609, 0.97163004 + , 0.9700702, 0.97158948, 0.95884908], + [ 0.83980439, 0.84726737, 0.85747, 0.85467221, 0.8556751, 0.84818516 + , 0.85265681, 0.84502402, 0.82645665, 0.81743586, 0.83550406, 0.83338919 + , 0.83511679, 0.82136617, 0.80921874], + [ 0.95118156, 0.9466212, 0.94688098, 0.9508583, 0.9512441, 0.95440787 + , 0.96364363, 0.96804412, 0.97136214, 0.97583768, 0.95571724, 0.96895368 + , 0.97001634, 0.97082733, 0.98782366], + [ 1.08910044, 1.08248968, 1.08492895, 1.08656923, 1.09454249, 1.10558188 + , 1.1214086, 1.12292577, 1.13021031, 1.13342735, 1.14686068, 1.14502975 + , 1.14474747, 1.14084037, 1.16142926], + [ 1.06336033, 1.07365823, 1.08691496, 1.09764846, 1.11669863, 1.11856702 + , 1.09764283, 1.08815849, 1.08044313, 1.09278827, 1.07003204, 1.08398066 + , 1.09831768, 1.09298232, 1.09176125], + [ 0.79772065, 0.78829196, 0.78581151, 0.77615922, 0.77035744, 0.77751194 + , 0.79902974, 0.81437881, 0.80788828, 0.79603865, 0.78966436, 0.79949807 + , 0.80172182, 0.82168155, 0.85587911], + [ 1.0052447, 1.00007696, 1.00475899, 1.00613942, 1.00639561, 1.00162979 + , 0.99860739, 1.00814981, 1.00574316, 0.99030032, 0.97682565, 0.97292596 + , 0.96519561, 0.96173403, 0.95890284], + [ 0.95808419, 0.9382568, 0.9654441, 0.95561201, 0.96987289, 0.96608031 + , 0.99727185, 1.00781194, 1.03484236, 1.05333619, 1.0983263, 1.1704974 + , 1.17025154, 1.18730553, 1.14242645]]) + + self.assertTrue(np.allclose(result, expected)) + self.assertTrue(type(result) == type(expected)) + self.assertTrue(result.shape == expected.shape) + + def test_rebin_data(self): + """Test rebin_data""" + ## sample in double the time (even case since 10 % 2 = 0): + ## (0+1)/2, (2+3)/2, (4+5)/2, (6+7)/2, (8+9)/2 + ## = 0.5, 2.5, 4.5, 6.5, 8.5 + ans_even = np.array([(i + 0.5) * np.ones(10, dtype=float) + for i in range(0, 10, 2)]).T + + self.assertTrue(np.array_equal(std.rebin_data(self.time_data, 2), ans_even)) + + ## sample in triple the time (uneven since 10 % 3 = 1): + ## (0+1+2)/3, (3+4+5)/3, (6+7+8)/3, (9)/1 + ## = 1, 4, 7, 9 + ans_odd = np.array([i * np.ones(10, dtype=float) + for i in (1, 4, 7, 9)]).T + self.assertTrue(np.array_equal(std.rebin_data(self.time_data, 3), ans_odd)) + + def test_get_prob_dist(self): + """Test get_prob_dist""" + lag_indices = np.array([1, 2, 3, 4]) + unit_indices = np.array([1, 3, 2, 4]) + answer = np.array([ + [ 0.0754717 , 0.88207547, 0.04245283, 0. , 0. ], + [ 0. , 0. , 0.09411765, 0.87058824, 0.03529412], + [ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ], + [ 0. , 0. , 0. , 0.02352941, 0.97647059] + ]) + result = std.get_prob_dist(self.transition_matrix, lag_indices, unit_indices) + + self.assertTrue(np.array_equal(result, answer)) + + def test_get_prob_stats(self): + """Test get_prob_stats""" + + probs = np.array([ + [ 0.0754717 , 0.88207547, 0.04245283, 0. , 0. ], + [ 0. , 0. , 0.09411765, 0.87058824, 0.03529412], + [ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ], + [ 0. , 0. , 0. , 0.02352941, 0.97647059] + ]) + unit_indices = np.array([1, 3, 2, 4]) + answer_up = np.array([0.04245283, 0.03529412, 0.12376238, 0.]) + answer_down = np.array([0.0754717, 0.09411765, 0.0990099, 0.02352941]) + answer_trend = np.array([-0.03301887 / 0.88207547, -0.05882353 / 0.87058824, 0.02475248 / 0.77722772, -0.02352941 / 0.97647059]) + answer_volatility = np.array([ 0.34221495, 0.33705421, 0.29226542, 0.38834223]) + + result = std.get_prob_stats(probs, unit_indices) + result_up = result[0] + result_down = result[1] + result_trend = result[2] + result_volatility = result[3] + + self.assertTrue(np.allclose(result_up, answer_up)) + self.assertTrue(np.allclose(result_down, answer_down)) + self.assertTrue(np.allclose(result_trend, answer_trend)) + self.assertTrue(np.allclose(result_volatility, answer_volatility)) diff --git a/src/pg/crankshaft.control b/src/pg/crankshaft.control index b3992a6..1e02d92 100644 --- a/src/pg/crankshaft.control +++ b/src/pg/crankshaft.control @@ -1,5 +1,5 @@ comment = 'CartoDB Spatial Analysis extension' -default_version = '0.4.1' +default_version = '0.4.2' requires = 'plpythonu, postgis' superuser = true schema = cdb_crankshaft