From 0400b1a88000c77fde34a731d60e3be04da96b01 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 20 May 2016 13:23:56 -0400 Subject: [PATCH 01/38] adding template for code reviews --- .github/PULL_REQUEST_TEMPLATE.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .github/PULL_REQUEST_TEMPLATE.md diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..941542d --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,6 @@ + +- [ ] All declared geometries are `geometry(Geometry, 4326)` for general geoms, or `geometry(Point, 4326)` +- [ ] Include python is activated for new functions. Include this before importing modules: `plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')` +- [ ] Docs for public-facing functions are written +- [ ] New functions follow the naming conventions: `CDB_NameOfFunction`. Where internal functions begin with an underscore `_`. + From ca5175f15b5dc1a2ad5822056eb8e29f9db1716d Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Fri, 20 May 2016 16:26:43 -0400 Subject: [PATCH 02/38] adding reference to subquery argument requirement --- .github/PULL_REQUEST_TEMPLATE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 941542d..882cece 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -3,4 +3,5 @@ - [ ] Include python is activated for new functions. Include this before importing modules: `plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')` - [ ] Docs for public-facing functions are written - [ ] New functions follow the naming conventions: `CDB_NameOfFunction`. Where internal functions begin with an underscore `_`. +- [ ] If appropriate, new functions accepts an arbitrary query as an input (see [Crankshaft Issue #6](https://github.com/CartoDB/crankshaft/issues/6) for more information) From 4782d39849da034d272f7b4666cc27c9a4a1c41c Mon Sep 17 00:00:00 2001 From: Stuart Lynn Date: Fri, 3 Jun 2016 10:36:21 -0400 Subject: [PATCH 03/38] stubbing out gravity model --- doc/07_gravity.md | 40 +++++++++++ src/pg/sql/07_gravity.sql | 84 ++++++++++++++++++++++++ src/pg/test/expected/07_gravity_test.out | 0 src/pg/test/sql/07_gravity_test.sql | 1 + 4 files changed, 125 insertions(+) create mode 100644 doc/07_gravity.md create mode 100644 src/pg/sql/07_gravity.sql create mode 100644 src/pg/test/expected/07_gravity_test.out create mode 100644 src/pg/test/sql/07_gravity_test.sql diff --git a/doc/07_gravity.md b/doc/07_gravity.md new file mode 100644 index 0000000..0ce2532 --- /dev/null +++ b/doc/07_gravity.md @@ -0,0 +1,40 @@ +## Gravity Model + +### CDB_Gravity() + +The Gravity Model is derived from newtons law of gravity and is used to estimate the degree of interaction between two places + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| t_id | bigint[] | | +| t_geom | geometry[] | | +| t_weight | numeric[] | | +| s_id | bigint[] | | +| s_geom | geometry[] | | +| s_pop | numeric[] | | +| target | bigint | | +| radius | integer | | +| minval | numeric | | + + +#### Returns + +| Column Name | Type | Description | +|-------------|------|-------------| +| the_geom | Numeric | | +| source_id | bigint | | +| target_id | bigint | | +| dist | Numeric | | +| n | Numeric | | +| hpop | NUMERIC | | + + +#### Example Usage + +```sql +SELECT CDB_GRAVITY (); +``` + + diff --git a/src/pg/sql/07_gravity.sql b/src/pg/sql/07_gravity.sql new file mode 100644 index 0000000..7c4e220 --- /dev/null +++ b/src/pg/sql/07_gravity.sql @@ -0,0 +1,84 @@ +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN t_id bigint[], + IN t_geom geometry[], + IN t_weight numeric[], + IN s_id bigint[], + IN s_geom geometry[], + IN s_pop numeric[], + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_type text; + s_type text; + t_center geometry[]; + s_center geometry[]; +BEGIN + t_type := GeometryType(t_geom[1]); + s_type := GeometryType(s_geom[1]); + IF t_type = 'POINT' THEN + t_center := t_geom; + ELSE + WITH tmp as (SELECT unnest(t_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO t_center FROM tmp; + END IF; + IF s_type = 'POINT' THEN + s_center := s_geom; + ELSE + WITH tmp as (SELECT unnest(s_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO s_center FROM tmp; + END IF; + RETURN QUERY + with target0 as( + SELECT unnest(t_center) as tc, unnest(t_weight) as tw, unnest(t_id) as td + ), + source0 as( + SELECT unnest(s_center) as sc, unnest(s_id) as sd, unnest (s_geom) as sg, unnest(s_pop) as sp + ), + prev0 as( + SELECT + source0.sg, + source0.sd as sourc_id, + coalesce(source0.sp,0) as sp, + target.td as targ_id, + coalesce(target.tw,0) as tw, + GREATEST(1.0,ST_Distance(geography(target.tc), geography(source0.sc)))::numeric as distance + FROM source0 + CROSS JOIN LATERAL + ( + SELECT + * + FROM target0 + WHERE tw > minval + AND ST_DWithin(geography(source0.sc), geography(tc), radius) + ) AS target + ), + deno as( + SELECT + sourc_id, + sum(tw/distance) as h_deno + FROM + prev0 + GROUP BY sourc_id + ) + SELECT + p.sg as the_geom, + p.sourc_id as source_id, + p.targ_id as target_id, + case when p.distance > 1 then p.distance else 0.0 end as dist, + 100*(p.tw/p.distance)/d.h_deno as h, + p.sp*(p.tw/p.distance)/d.h_deno as hpop + FROM + prev0 p, + deno d + WHERE + p.targ_id = target AND + p.sourc_id = d.sourc_id; +END; +$$ language plpgsql; diff --git a/src/pg/test/expected/07_gravity_test.out b/src/pg/test/expected/07_gravity_test.out new file mode 100644 index 0000000..e69de29 diff --git a/src/pg/test/sql/07_gravity_test.sql b/src/pg/test/sql/07_gravity_test.sql new file mode 100644 index 0000000..22d22dd --- /dev/null +++ b/src/pg/test/sql/07_gravity_test.sql @@ -0,0 +1 @@ +select * form CDB_Gravity() From 73d38bbbaac66c8dacd9f90cc6acca6a92a4a171 Mon Sep 17 00:00:00 2001 From: abelvm Date: Fri, 3 Jun 2016 17:32:39 +0200 Subject: [PATCH 04/38] filling the gaps --- doc/07_gravity.md | 68 ++++++++++++++++-------- src/pg/test/expected/07_gravity_test.out | 11 ++++ src/pg/test/sql/07_gravity_test.sql | 22 +++++++- 3 files changed, 78 insertions(+), 23 deletions(-) diff --git a/doc/07_gravity.md b/doc/07_gravity.md index 0ce2532..9c8d5e4 100644 --- a/doc/07_gravity.md +++ b/doc/07_gravity.md @@ -1,40 +1,64 @@ -## Gravity Model +## Gravity Model -### CDB_Gravity() +### CDB_Gravity(t_id bigint[], t_geom geometry[], t_weight numeric[], s_id bigint[], s_geom geometry[], s_pop numeric[], target bigint, radius integer, minval numeric DEFAULT -10e307) -The Gravity Model is derived from newtons law of gravity and is used to estimate the degree of interaction between two places +Gravity Models are derived from Newton's Law of Gravity and are used to predict the interaction between a group of populated areas (sources) and a specific target among a group of potential targets, in terms of an attraction factor (weight) -#### Arguments +**CDB_Gravity** is based on the model defined in *Huff's Law of Shopper attraction (1963)* -| Name | Type | Description | +#### Arguments + +| Name | Type | Description | |------|------|-------------| -| t_id | bigint[] | | -| t_geom | geometry[] | | -| t_weight | numeric[] | | -| s_id | bigint[] | | -| s_geom | geometry[] | | -| s_pop | numeric[] | | -| target | bigint | | -| radius | integer | | -| minval | numeric | | +| t_id | bigint[] | Array of targets ID | +| t_geom | geometry[] | Array of targets' geometries | +| t_weight | numeric[] | Array of targets's weights | +| s_id | bigint[] | Array of sources ID | +| s_geom | geometry[] | Array of sources' geometries | +| s_pop | numeric[] | Array of sources's population | +| target | bigint | ID of the target under study | +| radius | integer | Radius in meters around the target under study that will be taken into account| +| minval (optional) | numeric | Lowest accepted value of weight, defaults to numeric min_value | -#### Returns +#### Returns | Column Name | Type | Description | |-------------|------|-------------| -| the_geom | Numeric | | -| source_id | bigint | | -| target_id | bigint | | -| dist | Numeric | | -| n | Numeric | | -| hpop | NUMERIC | | +| the_geom | geometry | Geometries of the sources within the radius | +| source_id | bigint | ID of the source | +| target_id | bigint | Target ID from input | +| dist | numeric | Distance in meters source to target (if not points, distance between centroids) | +| h | numeric | Probability of patronage | +| hpop | numeric | Patronaging population | #### Example Usage ```sql -SELECT CDB_GRAVITY (); +with t as ( +SELECT + array_agg(cartodb_id::bigint) as id, + array_agg(the_geom) as g, + array_agg(coalesce(gla,0)::numeric) as w +FROM + abel.centros_comerciales_de_madrid +WHERE not no_cc +), +s as ( +SELECT + array_agg(cartodb_id::bigint) as id, + array_agg(center) as g, + array_agg(coalesce(t1_1, 0)::numeric) as p +FROM + sscc_madrid +) +select + g.the_geom, + trunc(g.h,2) as h, + round(g.hpop) as hpop, + trunc(g.dist/1000,2) as dist_km +FROM t, s, CDB_Gravity1(t.id, t.g, t.w, s.id, s.g, s.p, newmall_ID, 100000, 5000) g ``` diff --git a/src/pg/test/expected/07_gravity_test.out b/src/pg/test/expected/07_gravity_test.out index e69de29..c101b24 100644 --- a/src/pg/test/expected/07_gravity_test.out +++ b/src/pg/test/expected/07_gravity_test.out @@ -0,0 +1,11 @@ + the_geom | h | hpop | dist +--------------------------------------------+-------------------------+--------------------------+---------------- + 01010000001361C3D32B650140DD24068195B34440 | 1.51078258369747945249 | 12.08626066957983561994 | 4964.714459152 + 01010000002497FF907EFB0040713D0AD7A3B04440 | 98.29730954183620807430 | 688.08116679285345652007 | 99.955141922 + 0101000000A167B3EA733501401D5A643BDFAF4440 | 63.70532894711274639196 | 382.23197368267647835174 | 2488.330566505 + 010100000062A1D634EF380140BE9F1A2FDDB44440 | 35.35415870080995954879 | 176.77079350404979774397 | 4359.370460594 + 010100000052B81E85EB510140355EBA490CB24440 | 33.12290506987740864904 | 132.49162027950963459615 | 3703.664449828 + 0101000000C286A757CA320140736891ED7CAF4440 | 65.45251754279248087849 | 196.35755262837744263547 | 2512.092358644 + 01010000007DD0B359F5390140C976BE9F1AAF4440 | 62.83927792471345639225 | 125.67855584942691278449 | 2926.25725244 + 0101000000D237691A140D01407E6FD39FFDB44440 | 53.54905726651871279586 | 53.54905726651871279586 | 3744.515577777 +(8 rows) diff --git a/src/pg/test/sql/07_gravity_test.sql b/src/pg/test/sql/07_gravity_test.sql index 22d22dd..a86bb23 100644 --- a/src/pg/test/sql/07_gravity_test.sql +++ b/src/pg/test/sql/07_gravity_test.sql @@ -1 +1,21 @@ -select * form CDB_Gravity() +WITH t AS ( + SELECT + ARRAY[1,2,3] AS id, + ARRAY[7.0,8.0,3.0] AS w, + ARRAY[ST_GeomFromText('POINT(2.1744 41.4036)'),ST_GeomFromText('POINT(2.1228 41.3809)'),ST_GeomFromText('POINT(2.1511 41.3742)')] AS g +), +s AS ( + SELECT + ARRAY[10,20,30,40,50,60,70,80] AS id, + ARRAY[800, 700, 600, 500, 400, 300, 200, 100] AS p, + ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g +) +SELECT + g.the_geom, + g.h, + g.hpop, + g.dist +FROM + t, + s, + CDB_Gravity(t.id, t.g, t.w, s.id, s.g, s.p, 2, 100000, 3) g; From 5183f5ff92628f289d75477a7da2cb9f75260710 Mon Sep 17 00:00:00 2001 From: abelvm Date: Fri, 3 Jun 2016 18:22:26 +0200 Subject: [PATCH 05/38] added function overload with subqueries input --- doc/07_gravity.md | 20 +++++++++++++++++--- src/pg/sql/07_gravity.sql | 31 +++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/doc/07_gravity.md b/doc/07_gravity.md index 9c8d5e4..e4e439e 100644 --- a/doc/07_gravity.md +++ b/doc/07_gravity.md @@ -1,11 +1,11 @@ ## Gravity Model -### CDB_Gravity(t_id bigint[], t_geom geometry[], t_weight numeric[], s_id bigint[], s_geom geometry[], s_pop numeric[], target bigint, radius integer, minval numeric DEFAULT -10e307) - Gravity Models are derived from Newton's Law of Gravity and are used to predict the interaction between a group of populated areas (sources) and a specific target among a group of potential targets, in terms of an attraction factor (weight) **CDB_Gravity** is based on the model defined in *Huff's Law of Shopper attraction (1963)* +### CDB_Gravity(t_id bigint[], t_geom geometry[], t_weight numeric[], s_id bigint[], s_geom geometry[], s_pop numeric[], target bigint, radius integer, minval numeric DEFAULT -10e307) + #### Arguments | Name | Type | Description | @@ -20,8 +20,22 @@ Gravity Models are derived from Newton's Law of Gravity and are used to predict | radius | integer | Radius in meters around the target under study that will be taken into account| | minval (optional) | numeric | Lowest accepted value of weight, defaults to numeric min_value | +### CDB_Gravity( target_query text, weight_column text, source_query text, pop_column text, target bigint, radius integer, minval numeric DEFAULT -10e307) -#### Returns +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| target_query | text | Query that defines targets | +| weight_column | text | Column name of weights | +| source_query | text | Query that defines sources | +| pop_column | text | Column name of population | +| target | bigint | cartodb_id of the target under study | +| radius | integer | Radius in meters around the target under study that will be taken into account| +| minval (optional) | numeric | Lowest accepted value of weight, defaults to numeric min_value | + + +### Returns | Column Name | Type | Description | |-------------|------|-------------| diff --git a/src/pg/sql/07_gravity.sql b/src/pg/sql/07_gravity.sql index 7c4e220..47e5b8e 100644 --- a/src/pg/sql/07_gravity.sql +++ b/src/pg/sql/07_gravity.sql @@ -1,3 +1,34 @@ +CREATE OR REPLACE FUNCTION CDB_Gravity( + IN target_query text, + IN weight_column text, + IN source_query text, + IN pop_column text, + IN target bigint, + IN radius integer, + IN minval numeric DEFAULT -10e307 + ) +RETURNS TABLE( + the_geom geometry, + source_id bigint, + target_id bigint, + dist numeric, + h numeric, + hpop numeric) AS $$ +DECLARE + t_id bigint[]; + t_geom geometry[]; + t_weight numeric[]; + s_id bigint[]; + s_geom geometry[]; + s_pop numeric[]; +BEGIN + EXECUTE 'WITH foo as('+target_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || weight_column || ') FROM foo' INTO t_id, t_geom, t_weight; + EXECUTE 'WITH foo as('+source_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || pop_column || ') FROM foo' INTO s_id, s_geom, s_pop; + RETURN QUERY + SELECT g.* FROM t, s, CDB_Gravity(t_id, t_geom, t_weight, s_id, s_geom, s_pop, target, radius, minval) g; +END; +$$ language plpgsql; + CREATE OR REPLACE FUNCTION CDB_Gravity( IN t_id bigint[], IN t_geom geometry[], From 4e86965f033f14252ded35534bbd2c911e104da7 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 7 Jun 2016 19:58:32 +0000 Subject: [PATCH 06/38] KMeans clustering and weighted centroid analysis --- doc/11_kmeans.md | 62 +++++++++++++++++++ src/pg/sql/11_kmeans.sql | 31 ++++++++++ src/pg/test/expected/05_kmeans_test.out | 10 +++ src/pg/test/sql/05_kmeans_test.sql | 6 ++ .../crankshaft/clustering/__init__.py | 1 + .../crankshaft/clustering/kmeans.py | 17 +++++ src/py/crankshaft/test/fixtures/kmeans.json | 1 + src/py/crankshaft/test/test_cluster_kmeans.py | 38 ++++++++++++ 8 files changed, 166 insertions(+) create mode 100644 doc/11_kmeans.md create mode 100644 src/pg/sql/11_kmeans.sql create mode 100644 src/pg/test/expected/05_kmeans_test.out create mode 100644 src/pg/test/sql/05_kmeans_test.sql create mode 100644 src/py/crankshaft/crankshaft/clustering/kmeans.py create mode 100644 src/py/crankshaft/test/fixtures/kmeans.json create mode 100644 src/py/crankshaft/test/test_cluster_kmeans.py diff --git a/doc/11_kmeans.md b/doc/11_kmeans.md new file mode 100644 index 0000000..6153010 --- /dev/null +++ b/doc/11_kmeans.md @@ -0,0 +1,62 @@ +## K-Means Functions + +### CDB_KMeans(subquery text, no_clusters INTEGER) + +This function attempts to find n clusters within the input data. It will return a table to CartoDB ids and +the number of the cluster each point in the input was assigend to. + + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments | +| no\_clusters | INTEGER | The number of clusters to try and find | + +#### Returns + +A table with the following columns. + +| Column Name | Type | Description | +|-------------|------|-------------| +| cartodb\_id | INTEGER | The CartoDB id of the row in the input table.| +| cluster\_no | INTEGER | The cluster that this point belongs to. | + + +#### Example Usage + +```sql +SELECT + customers.*, + km.cluster_no + FROM cdb_crankshaft.CDB_Kmeans('SELECT * from customers' , 6) km, customers_3 + WHERE customers.cartodb_id = km.cartodb_id +``` + +### CDB_WeightedMean(subquery text, weight_column text, category_column text) + +Function that computes the weighted centroid of a number of clusters by some weight column. + +### Arguments + +| Name | Type | Description | +|------|------|-------------| +| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column and the columns specified as the weight and category columns| +| weight\_column | TEXT | The name of the column to use as a weight | +| category\_column | TEXT | The name of the column to use as a category | + +### Returns + +A table with the following columns. + +| Column Name | Type | Description | +|-------------|------|-------------| +| the\_geom | GEOMETRY | A point for the weighted cluster center | +| class | INTEGER | The cluster class | + +### Example Usage + +```sql +SELECT ST_TRANSFORM(the_geom, 3857) as the_geom_webmercator, class +FROM cdb_weighted_mean('SELECT *, customer_value FROM customers','customer_value','cluster_no') +``` diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql new file mode 100644 index 0000000..73e2f1d --- /dev/null +++ b/src/pg/sql/11_kmeans.sql @@ -0,0 +1,31 @@ +CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20) +RETURNS table (cartodb_id integer, cluster_no integer) as $$ + + import plpy + plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') + from crankshaft.clustering import kmeans + return kmeans(query,no_clusters,no_init) + +$$ language plpythonu; + +CREATE OR REPLACE FUNCTION CDB_WeightedMean(query text, weight_column text, category_column text default null ) +RETURNS table (the_geom geometry,class integer ) as $$ +BEGIN + +RETURN QUERY + EXECUTE format( $string$ + select ST_SETSRID(st_makepoint(cx, cy),4326) the_geom, class from ( + select + %I as class, + sum(st_x(the_geom)*%I)/sum(%I) cx, + sum(st_y(the_geom)*%I)/sum(%I) cy + from (%s) a + group by %I + ) q + + $string$, category_column, weight_column,weight_column,weight_column,weight_column,query, category_column + ) + using the_geom + RETURN; +END +$$ LANGUAGE plpgsql; diff --git a/src/pg/test/expected/05_kmeans_test.out b/src/pg/test/expected/05_kmeans_test.out new file mode 100644 index 0000000..4e6db09 --- /dev/null +++ b/src/pg/test/expected/05_kmeans_test.out @@ -0,0 +1,10 @@ +\pset format unaligned +\set ECHO all +SELECT count(DISTINCT cluster_no) as clusters from cdb_crankshaft.cdb_kmeans('select * from ppoints', 2); +clusters +2 +(1 row) +SELECT count(*) clusters from cdb_crankshaft.cdb_WeightedMean( 'select *, code::INTEGER as cluster from ppoints' , 'value', 'cluster' ); +clusters +52 +(1 row) diff --git a/src/pg/test/sql/05_kmeans_test.sql b/src/pg/test/sql/05_kmeans_test.sql new file mode 100644 index 0000000..a400e5e --- /dev/null +++ b/src/pg/test/sql/05_kmeans_test.sql @@ -0,0 +1,6 @@ +\pset format unaligned +\set ECHO all + +SELECT count(DISTINCT cluster_no) as clusters from cdb_crankshaft.cdb_kmeans('select * from ppoints', 2); + +SELECT count(*) clusters from cdb_crankshaft.cdb_WeightedMean( 'select *, code::INTEGER as cluster from ppoints' , 'value', 'cluster' ); diff --git a/src/py/crankshaft/crankshaft/clustering/__init__.py b/src/py/crankshaft/crankshaft/clustering/__init__.py index 0df080f..338e8ea 100644 --- a/src/py/crankshaft/crankshaft/clustering/__init__.py +++ b/src/py/crankshaft/crankshaft/clustering/__init__.py @@ -1 +1,2 @@ from moran import * +from kmeans import * diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py new file mode 100644 index 0000000..3d9ed58 --- /dev/null +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -0,0 +1,17 @@ +from sklearn.cluster import KMeans +import plpy + +def kmeans(query, no_clusters, no_init=20): + data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids, + array_agg(ST_X(the_geom) order by cartodb_id) xs, + array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a + '''.format(query=query)) + + xs = data[0]['xs'] + ys = data[0]['ys'] + ids = data[0]['ids'] + + km = KMeans(n_clusters= no_clusters, n_init=no_init) + labels = km.fit_predict(zip(xs,ys)) + return zip(ids,labels) + diff --git a/src/py/crankshaft/test/fixtures/kmeans.json b/src/py/crankshaft/test/fixtures/kmeans.json new file mode 100644 index 0000000..8f31c79 --- /dev/null +++ b/src/py/crankshaft/test/fixtures/kmeans.json @@ -0,0 +1 @@ +[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}] \ No newline at end of file diff --git a/src/py/crankshaft/test/test_cluster_kmeans.py b/src/py/crankshaft/test/test_cluster_kmeans.py new file mode 100644 index 0000000..aba8e07 --- /dev/null +++ b/src/py/crankshaft/test/test_cluster_kmeans.py @@ -0,0 +1,38 @@ +import unittest +import numpy as np + + +# from mock_plpy import MockPlPy +# plpy = MockPlPy() +# +# import sys +# sys.modules['plpy'] = plpy +from helper import plpy, fixture_file +import numpy as np +import crankshaft.clustering as cc +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds +import json + +class KMeansTest(unittest.TestCase): + """Testing class for Moran's I functions""" + + def setUp(self): + plpy._reset() + self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read()) + self.params = {"subquery": "select * from table", + "no_clusters": "10" + } + + def test_kmeans(self): + data = self.cluster_data + plpy._define_result('select' ,data) + clusters = cc.kmeans('subquery', 2) + labels = [a[1] for a in clusters] + c1 = [a for a in clusters if a[1]==0] + c2 = [a for a in clusters if a[1]==1] + + self.assertEqual(len(np.unique(labels)),2) + self.assertEqual(len(c1),20) + self.assertEqual(len(c2),20) + From e95c40c2f9bea57156449276d80603646cd4317d Mon Sep 17 00:00:00 2001 From: Raul Ochoa Date: Thu, 9 Jun 2016 11:27:15 +0200 Subject: [PATCH 07/38] Ignore idea based configurations --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1161ea2..a09b4fe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ envs/ *.pyc .DS_Store +.idea/ From 7f3b23f67a958faa9162efef2362cc13a1e665ff Mon Sep 17 00:00:00 2001 From: Stuart Lynn Date: Fri, 10 Jun 2016 13:06:49 +0000 Subject: [PATCH 08/38] reworking CDB_WeightedMean to be an aggregate function --- src/pg/sql/11_kmeans.sql | 60 +++++++++++++++++-------- src/pg/test/expected/05_kmeans_test.out | 2 +- src/pg/test/sql/05_kmeans_test.sql | 2 +- 3 files changed, 43 insertions(+), 21 deletions(-) diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql index 73e2f1d..87f07ea 100644 --- a/src/pg/sql/11_kmeans.sql +++ b/src/pg/sql/11_kmeans.sql @@ -8,24 +8,46 @@ RETURNS table (cartodb_id integer, cluster_no integer) as $$ $$ language plpythonu; -CREATE OR REPLACE FUNCTION CDB_WeightedMean(query text, weight_column text, category_column text default null ) -RETURNS table (the_geom geometry,class integer ) as $$ -BEGIN -RETURN QUERY - EXECUTE format( $string$ - select ST_SETSRID(st_makepoint(cx, cy),4326) the_geom, class from ( - select - %I as class, - sum(st_x(the_geom)*%I)/sum(%I) cx, - sum(st_y(the_geom)*%I)/sum(%I) cy - from (%s) a - group by %I - ) q - - $string$, category_column, weight_column,weight_column,weight_column,weight_column,query, category_column - ) - using the_geom - RETURN; -END +CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC) +RETURNS Numeric[] AS +$$ +DECLARE + newX NUMERIC; + newY NUMERIC; + newW NUMERIC; +BEGIN + IF weight IS NULL OR the_geom IS NULL THEN + newX = state[1]; + newY = state[2]; + newW = state[3]; + ELSE + newX = state[1] + ST_X(the_geom)*weight; + newY = state[2] + ST_Y(the_geom)*weight; + newW = state[3] + weight; + END IF; + RETURN Array[newX,newY,newW]; + +END $$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[]) +RETURNS GEOMETRY AS +$$ +BEGIN + IF state[3] = 0 THEN + RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326); + ELSE + RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326); + END IF; +END +$$ LANGUAGE plpgsql; + +CREATE AGGREGATE CDB_WeightedMean(the_geom geometry(Point, 4326), weight NUMERIC)( + SFUNC = CDB_WeightedMeanS, + FINALFUNC = CDB_WeightedMeanF, + STYPE = Numeric[], + INITCOND = "{0.0,0.0,0.0}" +); + + diff --git a/src/pg/test/expected/05_kmeans_test.out b/src/pg/test/expected/05_kmeans_test.out index 4e6db09..8c6ffa1 100644 --- a/src/pg/test/expected/05_kmeans_test.out +++ b/src/pg/test/expected/05_kmeans_test.out @@ -4,7 +4,7 @@ SELECT count(DISTINCT cluster_no) as clusters from cdb_crankshaft.cdb_kmeans('se clusters 2 (1 row) -SELECT count(*) clusters from cdb_crankshaft.cdb_WeightedMean( 'select *, code::INTEGER as cluster from ppoints' , 'value', 'cluster' ); +SELECT count(*) clusters from (select cdb_crankshaft.CDB_WeightedMean(the_geom, value::NUMERIC), code from ppoints group by code) p; clusters 52 (1 row) diff --git a/src/pg/test/sql/05_kmeans_test.sql b/src/pg/test/sql/05_kmeans_test.sql index a400e5e..2298b85 100644 --- a/src/pg/test/sql/05_kmeans_test.sql +++ b/src/pg/test/sql/05_kmeans_test.sql @@ -3,4 +3,4 @@ SELECT count(DISTINCT cluster_no) as clusters from cdb_crankshaft.cdb_kmeans('select * from ppoints', 2); -SELECT count(*) clusters from cdb_crankshaft.cdb_WeightedMean( 'select *, code::INTEGER as cluster from ppoints' , 'value', 'cluster' ); +SELECT count(*) clusters from (select cdb_crankshaft.CDB_WeightedMean(the_geom, value::NUMERIC), code from ppoints group by code) p; From 9d3de5a8ef13be63539248f3e4d82d7b4b68df9d Mon Sep 17 00:00:00 2001 From: Stuart Lynn Date: Fri, 10 Jun 2016 13:12:55 +0000 Subject: [PATCH 09/38] adding not null filter for geom on kmeans --- src/py/crankshaft/crankshaft/clustering/kmeans.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index 3d9ed58..4134062 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -5,6 +5,7 @@ def kmeans(query, no_clusters, no_init=20): data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids, array_agg(ST_X(the_geom) order by cartodb_id) xs, array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a + where the_geom is not null '''.format(query=query)) xs = data[0]['xs'] From 1a4944b9600250a972458bfe1952f79ffce76ff2 Mon Sep 17 00:00:00 2001 From: Stuart Lynn Date: Fri, 10 Jun 2016 13:16:16 +0000 Subject: [PATCH 10/38] adding sklearn as a dep --- src/py/crankshaft/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py index 8d5e622..baa88e3 100644 --- a/src/py/crankshaft/setup.py +++ b/src/py/crankshaft/setup.py @@ -40,9 +40,9 @@ setup( # The choice of component versions is dictated by what's # provisioned in the production servers. - install_requires=['pysal==1.9.1'], + install_requires=['pysal==1.9.1', 'sklearn==0.17.1'], - requires=['pysal', 'numpy' ], + requires=['pysal', 'numpy', 'sklearn' ], test_suite='test' ) From 889cd5c5791d2f87e35b3e510b7c7ac14eac9fcf Mon Sep 17 00:00:00 2001 From: Raul Ochoa Date: Fri, 10 Jun 2016 17:47:46 +0200 Subject: [PATCH 11/38] Fix scikit-learn dep name --- src/py/crankshaft/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py index baa88e3..68f9e17 100644 --- a/src/py/crankshaft/setup.py +++ b/src/py/crankshaft/setup.py @@ -40,7 +40,7 @@ setup( # The choice of component versions is dictated by what's # provisioned in the production servers. - install_requires=['pysal==1.9.1', 'sklearn==0.17.1'], + install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'], requires=['pysal', 'numpy', 'sklearn' ], From b33ba2d2949ab0bef092f25acf82d2308775a2a5 Mon Sep 17 00:00:00 2001 From: Raul Ochoa Date: Fri, 10 Jun 2016 18:24:43 +0200 Subject: [PATCH 12/38] Do not use names for the aggregate params --- src/pg/sql/11_kmeans.sql | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql index 87f07ea..a27f803 100644 --- a/src/pg/sql/11_kmeans.sql +++ b/src/pg/sql/11_kmeans.sql @@ -43,11 +43,9 @@ BEGIN END $$ LANGUAGE plpgsql; -CREATE AGGREGATE CDB_WeightedMean(the_geom geometry(Point, 4326), weight NUMERIC)( +CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)( SFUNC = CDB_WeightedMeanS, FINALFUNC = CDB_WeightedMeanF, STYPE = Numeric[], INITCOND = "{0.0,0.0,0.0}" ); - - From 1e8bc12e0a6ea2ffefe580b63133b88f4db045a7 Mon Sep 17 00:00:00 2001 From: Raul Ochoa Date: Mon, 13 Jun 2016 12:17:46 +0200 Subject: [PATCH 13/38] Declare scipy as dep --- src/py/crankshaft/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py index 68f9e17..e787d32 100644 --- a/src/py/crankshaft/setup.py +++ b/src/py/crankshaft/setup.py @@ -40,9 +40,9 @@ setup( # The choice of component versions is dictated by what's # provisioned in the production servers. - install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'], + install_requires=['scipy==0.17.1', 'pysal==1.9.1', 'scikit-learn==0.17.1'], - requires=['pysal', 'numpy', 'sklearn' ], + requires=['scipy', 'pysal', 'numpy', 'sklearn'], test_suite='test' ) From c870f68c77652a11f8401bbbb981797694174288 Mon Sep 17 00:00:00 2001 From: Raul Ochoa Date: Mon, 13 Jun 2016 13:05:50 +0200 Subject: [PATCH 14/38] Revert "Declare scipy as dep" This reverts commit 1e8bc12e0a6ea2ffefe580b63133b88f4db045a7. --- src/py/crankshaft/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py index e787d32..68f9e17 100644 --- a/src/py/crankshaft/setup.py +++ b/src/py/crankshaft/setup.py @@ -40,9 +40,9 @@ setup( # The choice of component versions is dictated by what's # provisioned in the production servers. - install_requires=['scipy==0.17.1', 'pysal==1.9.1', 'scikit-learn==0.17.1'], + install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'], - requires=['scipy', 'pysal', 'numpy', 'sklearn'], + requires=['pysal', 'numpy', 'sklearn' ], test_suite='test' ) From fd1862167c123ad7e59906801027e06c88fbf90e Mon Sep 17 00:00:00 2001 From: Raul Ochoa Date: Mon, 13 Jun 2016 13:06:21 +0200 Subject: [PATCH 15/38] Remove trailing space --- src/py/crankshaft/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py index 68f9e17..04822dd 100644 --- a/src/py/crankshaft/setup.py +++ b/src/py/crankshaft/setup.py @@ -42,7 +42,7 @@ setup( # provisioned in the production servers. install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'], - requires=['pysal', 'numpy', 'sklearn' ], + requires=['pysal', 'numpy', 'sklearn'], test_suite='test' ) From 9db4b7f5192c3f946bc6c6dab4a956e3e9a39d16 Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 14 Jun 2016 17:55:45 +0200 Subject: [PATCH 16/38] first commit --- doc/08_interpolation.md | 51 +++++++ src/pg/sql/08_interpolation.sql | 127 ++++++++++++++++++ .../test/expected/08_interpolation_test.out | 4 + src/pg/test/sql/08_interpolation_test.sql | 6 + 4 files changed, 188 insertions(+) create mode 100644 doc/08_interpolation.md create mode 100644 src/pg/sql/08_interpolation.sql create mode 100644 src/pg/test/expected/08_interpolation_test.out create mode 100644 src/pg/test/sql/08_interpolation_test.sql diff --git a/doc/08_interpolation.md b/doc/08_interpolation.md new file mode 100644 index 0000000..22fc1bc --- /dev/null +++ b/doc/08_interpolation.md @@ -0,0 +1,51 @@ +## Spacial interpolation + +Function to interpolate a numeric attribute of a point in a scatter dataset of points, using one of three methos: + +* [Nearest neighbor](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) +* [Barycentric](https://en.wikipedia.org/wiki/Barycentric_coordinate_system) +* [IDW](https://en.wikipedia.org/wiki/Inverse_distance_weighting) + +### CDB_SpatialInterpolation (query text, point geometry, method integer DEFAULT 1, p1 integer DEFAULT 0, ps integer DEFAULT 0) + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| query | text | query that returns at least `the_geom` and a numeric value as `attrib` | +| point | geometry | The target point to calc the value | +| method | integer | 0:nearest neighbor, 1: barycentric, 2: IDW| +| p1 | integer | IDW: limit the number of neighbors, 0->no limit| +| p2 | integer | IDW: order of distance decay, 0-> order 1| + +### CDB_SpatialInterpolation (geom geometry[], values numeric[], point geometry, method integer DEFAULT 1, p1 integer DEFAULT 0, ps integer DEFAULT 0) + +#### Arguments + +| Name | Type | Description | +|------|------|-------------| +| geom | geometry[] | Array of points's geometries | +| values | numeric[] | Array of points' values for the param under study| +| point | geometry | The target point to calc the value | +| method | integer | 0:nearest neighbor, 1: barycentric, 2: IDW| +| p1 | integer | IDW: limit the number of neighbors, 0->no limit| +| p2 | integer | IDW: order of distance decay, 0-> order 1| + +### Returns + +| Column Name | Type | Description | +|-------------|------|-------------| +| value | numeric | Interpolated value at the given point, `-888.888` if the given point is out of the boundaries of the source points set | + + +#### Example Usage + +```sql +with a as ( + select + array_agg(the_geom) as geomin, + array_agg(temp::numeric) as colin + from table_4804232032 +) +SELECT CDB_SpatialInterpolation(geomin, colin, CDB_latlng(41.38, 2.15),1) FROM a; +``` diff --git a/src/pg/sql/08_interpolation.sql b/src/pg/sql/08_interpolation.sql new file mode 100644 index 0000000..04f1584 --- /dev/null +++ b/src/pg/sql/08_interpolation.sql @@ -0,0 +1,127 @@ +-- 0: nearest neighbor +-- 1: barymetric +-- 2: IDW + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN query text, + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; +BEGIN + EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs; + RETURN QUERY SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) FROM a; +END; +$$ +language plpgsql IMMUTABLE; + +CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation( + IN geomin geometry[], + IN colin numeric[], + IN point geometry, + IN method integer DEFAULT 1, + IN p1 numeric DEFAULT 0, + IN p2 numeric DEFAULT 0 + ) +RETURNS numeric AS +$$ +DECLARE + gs geometry[]; + vs numeric[]; + gs2 geometry[]; + vs2 numeric[]; + g geometry; + vertex geometry[]; + sg numeric; + sa numeric; + sb numeric; + sc numeric; + va numeric; + vb numeric; + vc numeric; + output numeric; +BEGIN + output := -999.999; + -- nearest + IF method = 0 THEN + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v) + SELECT a.v INTO output FROM a ORDER BY point<->a.g LIMIT 1; + RETURN output; + + -- barymetric + ELSIF method = 1 THEN + WITH a as (SELECT unnest(geomin) AS e), + b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a), + c as (SELECT (ST_Dump(t)).geom as v FROM b), + d as (SELECT v FROM c WHERE ST_Within(point, v)) + SELECT v INTO g FROM d; + IF g is null THEN + -- out of the realm of the input data + RETURN -888.888; + END IF; + -- vertex of the selected cell + WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v) + SELECT array_agg(v) INTO vertex FROM a; + + -- retrieve the value of each vertex + WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c) + SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]); + WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c) + SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]); + WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c) + SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]); + + SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc; + + output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg); + RETURN output; + + -- IDW + -- p1: limit the number of neighbors, 0->no limit + -- p2: order of distance decay, 0-> order 1 + ELSIF method = 2 THEN + + IF p2 = 0 THEN + p2 := 1; + END IF; + + WITH a as (SELECT unnest(geomin) as g, unnest(colin) as v), + b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g) + SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b; + IF p1::integer>0 THEN + gs2:=gs; + vs2:=vs; + FOR i IN 1..p1 + LOOP + gs2 := gs2 || gs[i]; + vs2 := vs2 || vs[i]; + END LOOP; + ELSE + gs2:=gs; + vs2:=vs; + END IF; + + WITH a as (SELECT unnest(gs2) as g, unnest(vs2) as v), + b as ( + SELECT + (1/ST_distance(point, a.g)^p2::integer) as k, + (a.v/ST_distance(point, a.g)^p2::integer) as f + FROM a + ) + SELECT sum(b.f)/sum(b.k) INTO output FROM b; + RETURN output; + + END IF; + + RETURN -777.777; + +END; +$$ +language plpgsql IMMUTABLE; diff --git a/src/pg/test/expected/08_interpolation_test.out b/src/pg/test/expected/08_interpolation_test.out new file mode 100644 index 0000000..42d24cb --- /dev/null +++ b/src/pg/test/expected/08_interpolation_test.out @@ -0,0 +1,4 @@ + cdb_spatialinterpolation +-------------------------- + 780.79470198683925288365 +(1 row) diff --git a/src/pg/test/sql/08_interpolation_test.sql b/src/pg/test/sql/08_interpolation_test.sql new file mode 100644 index 0000000..c8db89d --- /dev/null +++ b/src/pg/test/sql/08_interpolation_test.sql @@ -0,0 +1,6 @@ +WITH a AS ( + SELECT + ARRAY[800, 700, 600, 500, 400, 300, 200, 100] AS vals, + ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g +) +SELECT CDB_SpatialInterpolation(g, vals, ST_GeomFromText('POINT(2.154 41.37)'),1) FROM a; From 5a2319db72c2a04aa3a7ef1a2cf6fa3263ecfd86 Mon Sep 17 00:00:00 2001 From: abelvm Date: Tue, 14 Jun 2016 18:01:03 +0200 Subject: [PATCH 17/38] remove garbage --- doc/07_gravity.md | 78 --------------- src/pg/sql/07_gravity.sql | 115 ----------------------- src/pg/test/expected/07_gravity_test.out | 11 --- src/pg/test/sql/07_gravity_test.sql | 21 ----- 4 files changed, 225 deletions(-) delete mode 100644 doc/07_gravity.md delete mode 100644 src/pg/sql/07_gravity.sql delete mode 100644 src/pg/test/expected/07_gravity_test.out delete mode 100644 src/pg/test/sql/07_gravity_test.sql diff --git a/doc/07_gravity.md b/doc/07_gravity.md deleted file mode 100644 index e4e439e..0000000 --- a/doc/07_gravity.md +++ /dev/null @@ -1,78 +0,0 @@ -## Gravity Model - -Gravity Models are derived from Newton's Law of Gravity and are used to predict the interaction between a group of populated areas (sources) and a specific target among a group of potential targets, in terms of an attraction factor (weight) - -**CDB_Gravity** is based on the model defined in *Huff's Law of Shopper attraction (1963)* - -### CDB_Gravity(t_id bigint[], t_geom geometry[], t_weight numeric[], s_id bigint[], s_geom geometry[], s_pop numeric[], target bigint, radius integer, minval numeric DEFAULT -10e307) - -#### Arguments - -| Name | Type | Description | -|------|------|-------------| -| t_id | bigint[] | Array of targets ID | -| t_geom | geometry[] | Array of targets' geometries | -| t_weight | numeric[] | Array of targets's weights | -| s_id | bigint[] | Array of sources ID | -| s_geom | geometry[] | Array of sources' geometries | -| s_pop | numeric[] | Array of sources's population | -| target | bigint | ID of the target under study | -| radius | integer | Radius in meters around the target under study that will be taken into account| -| minval (optional) | numeric | Lowest accepted value of weight, defaults to numeric min_value | - -### CDB_Gravity( target_query text, weight_column text, source_query text, pop_column text, target bigint, radius integer, minval numeric DEFAULT -10e307) - -#### Arguments - -| Name | Type | Description | -|------|------|-------------| -| target_query | text | Query that defines targets | -| weight_column | text | Column name of weights | -| source_query | text | Query that defines sources | -| pop_column | text | Column name of population | -| target | bigint | cartodb_id of the target under study | -| radius | integer | Radius in meters around the target under study that will be taken into account| -| minval (optional) | numeric | Lowest accepted value of weight, defaults to numeric min_value | - - -### Returns - -| Column Name | Type | Description | -|-------------|------|-------------| -| the_geom | geometry | Geometries of the sources within the radius | -| source_id | bigint | ID of the source | -| target_id | bigint | Target ID from input | -| dist | numeric | Distance in meters source to target (if not points, distance between centroids) | -| h | numeric | Probability of patronage | -| hpop | numeric | Patronaging population | - - -#### Example Usage - -```sql -with t as ( -SELECT - array_agg(cartodb_id::bigint) as id, - array_agg(the_geom) as g, - array_agg(coalesce(gla,0)::numeric) as w -FROM - abel.centros_comerciales_de_madrid -WHERE not no_cc -), -s as ( -SELECT - array_agg(cartodb_id::bigint) as id, - array_agg(center) as g, - array_agg(coalesce(t1_1, 0)::numeric) as p -FROM - sscc_madrid -) -select - g.the_geom, - trunc(g.h,2) as h, - round(g.hpop) as hpop, - trunc(g.dist/1000,2) as dist_km -FROM t, s, CDB_Gravity1(t.id, t.g, t.w, s.id, s.g, s.p, newmall_ID, 100000, 5000) g -``` - - diff --git a/src/pg/sql/07_gravity.sql b/src/pg/sql/07_gravity.sql deleted file mode 100644 index 47e5b8e..0000000 --- a/src/pg/sql/07_gravity.sql +++ /dev/null @@ -1,115 +0,0 @@ -CREATE OR REPLACE FUNCTION CDB_Gravity( - IN target_query text, - IN weight_column text, - IN source_query text, - IN pop_column text, - IN target bigint, - IN radius integer, - IN minval numeric DEFAULT -10e307 - ) -RETURNS TABLE( - the_geom geometry, - source_id bigint, - target_id bigint, - dist numeric, - h numeric, - hpop numeric) AS $$ -DECLARE - t_id bigint[]; - t_geom geometry[]; - t_weight numeric[]; - s_id bigint[]; - s_geom geometry[]; - s_pop numeric[]; -BEGIN - EXECUTE 'WITH foo as('+target_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || weight_column || ') FROM foo' INTO t_id, t_geom, t_weight; - EXECUTE 'WITH foo as('+source_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || pop_column || ') FROM foo' INTO s_id, s_geom, s_pop; - RETURN QUERY - SELECT g.* FROM t, s, CDB_Gravity(t_id, t_geom, t_weight, s_id, s_geom, s_pop, target, radius, minval) g; -END; -$$ language plpgsql; - -CREATE OR REPLACE FUNCTION CDB_Gravity( - IN t_id bigint[], - IN t_geom geometry[], - IN t_weight numeric[], - IN s_id bigint[], - IN s_geom geometry[], - IN s_pop numeric[], - IN target bigint, - IN radius integer, - IN minval numeric DEFAULT -10e307 - ) -RETURNS TABLE( - the_geom geometry, - source_id bigint, - target_id bigint, - dist numeric, - h numeric, - hpop numeric) AS $$ -DECLARE - t_type text; - s_type text; - t_center geometry[]; - s_center geometry[]; -BEGIN - t_type := GeometryType(t_geom[1]); - s_type := GeometryType(s_geom[1]); - IF t_type = 'POINT' THEN - t_center := t_geom; - ELSE - WITH tmp as (SELECT unnest(t_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO t_center FROM tmp; - END IF; - IF s_type = 'POINT' THEN - s_center := s_geom; - ELSE - WITH tmp as (SELECT unnest(s_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO s_center FROM tmp; - END IF; - RETURN QUERY - with target0 as( - SELECT unnest(t_center) as tc, unnest(t_weight) as tw, unnest(t_id) as td - ), - source0 as( - SELECT unnest(s_center) as sc, unnest(s_id) as sd, unnest (s_geom) as sg, unnest(s_pop) as sp - ), - prev0 as( - SELECT - source0.sg, - source0.sd as sourc_id, - coalesce(source0.sp,0) as sp, - target.td as targ_id, - coalesce(target.tw,0) as tw, - GREATEST(1.0,ST_Distance(geography(target.tc), geography(source0.sc)))::numeric as distance - FROM source0 - CROSS JOIN LATERAL - ( - SELECT - * - FROM target0 - WHERE tw > minval - AND ST_DWithin(geography(source0.sc), geography(tc), radius) - ) AS target - ), - deno as( - SELECT - sourc_id, - sum(tw/distance) as h_deno - FROM - prev0 - GROUP BY sourc_id - ) - SELECT - p.sg as the_geom, - p.sourc_id as source_id, - p.targ_id as target_id, - case when p.distance > 1 then p.distance else 0.0 end as dist, - 100*(p.tw/p.distance)/d.h_deno as h, - p.sp*(p.tw/p.distance)/d.h_deno as hpop - FROM - prev0 p, - deno d - WHERE - p.targ_id = target AND - p.sourc_id = d.sourc_id; -END; -$$ language plpgsql; diff --git a/src/pg/test/expected/07_gravity_test.out b/src/pg/test/expected/07_gravity_test.out deleted file mode 100644 index c101b24..0000000 --- a/src/pg/test/expected/07_gravity_test.out +++ /dev/null @@ -1,11 +0,0 @@ - the_geom | h | hpop | dist ---------------------------------------------+-------------------------+--------------------------+---------------- - 01010000001361C3D32B650140DD24068195B34440 | 1.51078258369747945249 | 12.08626066957983561994 | 4964.714459152 - 01010000002497FF907EFB0040713D0AD7A3B04440 | 98.29730954183620807430 | 688.08116679285345652007 | 99.955141922 - 0101000000A167B3EA733501401D5A643BDFAF4440 | 63.70532894711274639196 | 382.23197368267647835174 | 2488.330566505 - 010100000062A1D634EF380140BE9F1A2FDDB44440 | 35.35415870080995954879 | 176.77079350404979774397 | 4359.370460594 - 010100000052B81E85EB510140355EBA490CB24440 | 33.12290506987740864904 | 132.49162027950963459615 | 3703.664449828 - 0101000000C286A757CA320140736891ED7CAF4440 | 65.45251754279248087849 | 196.35755262837744263547 | 2512.092358644 - 01010000007DD0B359F5390140C976BE9F1AAF4440 | 62.83927792471345639225 | 125.67855584942691278449 | 2926.25725244 - 0101000000D237691A140D01407E6FD39FFDB44440 | 53.54905726651871279586 | 53.54905726651871279586 | 3744.515577777 -(8 rows) diff --git a/src/pg/test/sql/07_gravity_test.sql b/src/pg/test/sql/07_gravity_test.sql deleted file mode 100644 index a86bb23..0000000 --- a/src/pg/test/sql/07_gravity_test.sql +++ /dev/null @@ -1,21 +0,0 @@ -WITH t AS ( - SELECT - ARRAY[1,2,3] AS id, - ARRAY[7.0,8.0,3.0] AS w, - ARRAY[ST_GeomFromText('POINT(2.1744 41.4036)'),ST_GeomFromText('POINT(2.1228 41.3809)'),ST_GeomFromText('POINT(2.1511 41.3742)')] AS g -), -s AS ( - SELECT - ARRAY[10,20,30,40,50,60,70,80] AS id, - ARRAY[800, 700, 600, 500, 400, 300, 200, 100] AS p, - ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g -) -SELECT - g.the_geom, - g.h, - g.hpop, - g.dist -FROM - t, - s, - CDB_Gravity(t.id, t.g, t.w, s.id, s.g, s.p, 2, 100000, 3) g; From 7b98415da318e5dd5119e7c10b5b0b2ca54f3c8d Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Tue, 14 Jun 2016 18:06:23 +0200 Subject: [PATCH 18/38] Remove virtualenv activation #60 --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- src/pg/sql/02_py.sql | 23 ----------------------- src/pg/sql/03_random_seeds.sql | 1 - src/pg/sql/10_moran.sql | 4 ---- 4 files changed, 1 insertion(+), 29 deletions(-) delete mode 100644 src/pg/sql/02_py.sql diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 882cece..9bb2e75 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,6 +1,6 @@ - [ ] All declared geometries are `geometry(Geometry, 4326)` for general geoms, or `geometry(Point, 4326)` -- [ ] Include python is activated for new functions. Include this before importing modules: `plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')` +- [ ] Existing functions in crankshaft python library called from the extension are kept at least from version N to version N+1 (to avoid breakage during upgrades). - [ ] Docs for public-facing functions are written - [ ] New functions follow the naming conventions: `CDB_NameOfFunction`. Where internal functions begin with an underscore `_`. - [ ] If appropriate, new functions accepts an arbitrary query as an input (see [Crankshaft Issue #6](https://github.com/CartoDB/crankshaft/issues/6) for more information) diff --git a/src/pg/sql/02_py.sql b/src/pg/sql/02_py.sql deleted file mode 100644 index 7da5f47..0000000 --- a/src/pg/sql/02_py.sql +++ /dev/null @@ -1,23 +0,0 @@ -CREATE OR REPLACE FUNCTION _cdb_crankshaft_virtualenvs_path() -RETURNS text -AS $$ - BEGIN - -- RETURN '/opt/virtualenvs/crankshaft'; - RETURN '@@VIRTUALENV_PATH@@'; - END; -$$ language plpgsql IMMUTABLE STRICT; - --- Use the crankshaft python module -CREATE OR REPLACE FUNCTION _cdb_crankshaft_activate_py() -RETURNS VOID -AS $$ - import os - # plpy.notice('%',str(os.environ)) - # activate virtualenv - crankshaft_version = plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_internal_version()')[0]['_cdb_crankshaft_internal_version'] - base_path = plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_virtualenvs_path()')[0]['_cdb_crankshaft_virtualenvs_path'] - default_venv_path = os.path.join(base_path, crankshaft_version) - venv_path = os.environ.get('CRANKSHAFT_VENV', default_venv_path) - activate_path = venv_path + '/bin/activate_this.py' - exec(open(activate_path).read(), dict(__file__=activate_path)) -$$ LANGUAGE plpythonu; diff --git a/src/pg/sql/03_random_seeds.sql b/src/pg/sql/03_random_seeds.sql index 9a0cca6..2b62be3 100644 --- a/src/pg/sql/03_random_seeds.sql +++ b/src/pg/sql/03_random_seeds.sql @@ -4,7 +4,6 @@ CREATE OR REPLACE FUNCTION _cdb_random_seeds (seed_value INTEGER) RETURNS VOID AS $$ - plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft import random_seeds random_seeds.set_random_seeds(seed_value) $$ LANGUAGE plpythonu; diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql index a336867..3be31a2 100644 --- a/src/pg/sql/10_moran.sql +++ b/src/pg/sql/10_moran.sql @@ -10,7 +10,6 @@ CREATE OR REPLACE FUNCTION id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE (moran NUMERIC, significance NUMERIC) AS $$ - plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local # TODO: use named parameters or a dictionary return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) @@ -28,7 +27,6 @@ CREATE OR REPLACE FUNCTION id_col TEXT) RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) AS $$ - plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local # TODO: use named parameters or a dictionary return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) @@ -122,7 +120,6 @@ CREATE OR REPLACE FUNCTION id_col TEXT DEFAULT 'cartodb_id') RETURNS TABLE (moran FLOAT, significance FLOAT) AS $$ - plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local # TODO: use named parameters or a dictionary return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) @@ -143,7 +140,6 @@ CREATE OR REPLACE FUNCTION RETURNS TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) AS $$ - plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import moran_local_rate # TODO: use named parameters or a dictionary return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) From 0acae8240f777e042f59dfcf3f0a3e1430dcb984 Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Tue, 14 Jun 2016 18:23:30 +0200 Subject: [PATCH 19/38] Remove virtualenv stuff from Makefiles #60 --- Makefile | 6 ------ src/pg/Makefile | 7 +------ src/py/Makefile | 11 +++-------- 3 files changed, 4 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index 6c3e219..ef9415b 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,6 @@ PYP_DIR = src/py # Generate and install developmet versions of the extension # and python package. # The extension is named 'dev' with a 'current' alias for easily upgrading. -# The Python package is installed in a virtual environment envs/dev/ # Requires sudo. install: ## Generate and install development version of the extension; requires sudo. $(MAKE) -C $(PYP_DIR) install @@ -29,7 +28,6 @@ release: ## Generate a new release of the extension. Only for telease manager $(MAKE) -C $(PYP_DIR) release # Install the current release. -# The Python package is installed in a virtual environment envs/X.Y.Z/ # Requires sudo. # Use the RELEASE_VERSION environment variable to deploy a specific version: # sudo make deploy RELEASE_VERSION=1.0.0 @@ -52,10 +50,6 @@ clean-release: ## clean up current release rm -rf release/python/$(RELEASE_VERSION) rm -f release/$(RELEASE_VERSION)--*.sql -# Cleanup all virtual environments -clean-environments: ## clean up all virtual environments - rm -rf envs/* - clean-all: clean-dev clean-release clean-environments help: diff --git a/src/pg/Makefile b/src/pg/Makefile index 8a745c4..178ed08 100644 --- a/src/pg/Makefile +++ b/src/pg/Makefile @@ -7,7 +7,6 @@ include ../../Makefile.global # requires sudo. In additionof the current development version # named 'dev', an alias 'current' is generating for ease of # update (upgrade to 'current', then to 'dev'). -# the python module is installed in a virtualenv in envs/dev/ # * test runs the tests for the currently generated Development # extension. @@ -18,11 +17,8 @@ DATA = $(EXTENSION)--dev.sql \ SOURCES_DATA_DIR = sql SOURCES_DATA = $(wildcard $(SOURCES_DATA_DIR)/*.sql) -VIRTUALENV_PATH = $(realpath ../../envs) -ESC_VIRVIRTUALENV_PATH = $(subst /,\/,$(VIRTUALENV_PATH)) -REPLACEMENTS = -e 's/@@VERSION@@/$(EXTVERSION)/g' \ - -e 's/@@VIRTUALENV_PATH@@/$(ESC_VIRVIRTUALENV_PATH)/g' +REPLACEMENTS = -e 's/@@VERSION@@/$(EXTVERSION)/g' $(DATA): $(SOURCES_DATA) $(SED) $(REPLACEMENTS) $(SOURCES_DATA_DIR)/*.sql > $@ @@ -54,7 +50,6 @@ release: ../../release/$(EXTENSION).control $(SOURCES_DATA) $(SED) $(REPLACEMENTS) $(SOURCES_DATA_DIR)/*.sql > ../../release/$(EXTENSION)--$(EXTVERSION).sql # Install the current relese into the PostgreSQL extensions directory -# and the Python package in a virtual environment envs/X.Y.Z deploy: $(INSTALL_DATA) ../../release/$(EXTENSION).control '$(DESTDIR)$(datadir)/extension/' $(INSTALL_DATA) ../../release/*.sql '$(DESTDIR)$(datadir)/extension/' diff --git a/src/py/Makefile b/src/py/Makefile index 90b22b8..403c5a1 100644 --- a/src/py/Makefile +++ b/src/py/Makefile @@ -2,14 +2,11 @@ include ../../Makefile.global # Install the package locally for development install: - virtualenv --system-site-packages ../../envs/dev - # source ../../envs/dev/bin/activate - ../../envs/dev/bin/pip install -I ./crankshaft - ../../envs/dev/bin/pip install -I nose + pip install ./crankshaft # Test develpment install test: - ../../envs/dev/bin/nosetests crankshaft/test/ + nosetests crankshaft/test/ release: ../../release/$(EXTENSION).control $(SOURCES_DATA) mkdir -p ../../release/python/$(EXTVERSION) @@ -17,6 +14,4 @@ release: ../../release/$(EXTENSION).control $(SOURCES_DATA) $(SED) -i -r 's/version='"'"'[0-9]+\.[0-9]+\.[0-9]+'"'"'/version='"'"'$(EXTVERSION)'"'"'/g' ../../release/python/$(EXTVERSION)/$(PACKAGE)/setup.py deploy: - virtualenv --system-site-packages $(VIRTUALENV_PATH)/$(RELEASE_VERSION) - $(VIRTUALENV_PATH)/$(RELEASE_VERSION)/bin/pip install -I -U ../../release/python/$(RELEASE_VERSION)/$(PACKAGE) - $(VIRTUALENV_PATH)/$(RELEASE_VERSION)/bin/pip install -I nose + pip install --upgrade ../../release/python/$(RELEASE_VERSION)/$(PACKAGE) From 75531b671e247b507d0a11d6f2fdced5ef3a8084 Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Tue, 14 Jun 2016 18:24:43 +0200 Subject: [PATCH 20/38] Remove virtualenv references from READMEs #60 --- README.md | 3 +-- src/py/README.md | 17 +---------------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 68a64fb..0ff9090 100644 --- a/README.md +++ b/README.md @@ -9,11 +9,10 @@ CartoDB Spatial Analysis extension for PostgreSQL. * - *src/pg* contains the PostgreSQL extension source code * - *src/py* Python module source code * *release* reseleased versions -* *env* base directory for Python virtual environments ## Requirements -* pip, virtualenv, PostgreSQL +* pip, PostgreSQL * python-scipy system package (see [src/py/README.md](https://github.com/CartoDB/crankshaft/blob/master/src/py/README.md)) # Working Process -- Quickstart Guide diff --git a/src/py/README.md b/src/py/README.md index 29a3145..8fcfcb7 100644 --- a/src/py/README.md +++ b/src/py/README.md @@ -10,7 +10,6 @@ nosetests test/ ## Notes about Python dependencies * This extension is targeted at production databases. Therefore certain restrictions must be assumed about the production environment vs other experimental environments. -* We're using `pip` and `virtualenv` to generate a suitable isolated environment for python code that has all the dependencies * Every dependency should be: - Added to the `setup.py` file - Installed through it @@ -30,21 +29,7 @@ PySAL 1.10 or later, so we'll stick to 1.9.1. apt-get install -y python-scipy ``` -We'll use virtual environments to install our packages, -but configued to use also system modules so that the -mentioned scipy and numpy are used. - - # Create a virtual environment for python - $ virtualenv --system-site-packages dev - - # Activate the virtualenv - $ source dev/bin/activate - - # Install all the requirements - # expect this to take a while, as it will trigger a few compilations - (dev) $ pip install -I ./crankshaft - -#### Test the libraries with that virtual env +#### Test the libraries ##### Test numpy library dependency: From a8943bae985acc4d960d7cb614c5e6ad4bb68ed1 Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Tue, 14 Jun 2016 18:27:35 +0200 Subject: [PATCH 21/38] Remove reference to clean-environments #60 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ef9415b..50f690c 100644 --- a/Makefile +++ b/Makefile @@ -50,7 +50,7 @@ clean-release: ## clean up current release rm -rf release/python/$(RELEASE_VERSION) rm -f release/$(RELEASE_VERSION)--*.sql -clean-all: clean-dev clean-release clean-environments +clean-all: clean-dev clean-release help: @IFS=$$'\n' ; \ From d08a2b6d2d756be58a16e80bf4ded3d134dfb97a Mon Sep 17 00:00:00 2001 From: Raul Ochoa Date: Thu, 16 Jun 2016 14:12:28 +0200 Subject: [PATCH 22/38] Remove _cdb_crankshaft_activate_py activation call from kmeans function --- src/pg/sql/11_kmeans.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql index a27f803..125aac3 100644 --- a/src/pg/sql/11_kmeans.sql +++ b/src/pg/sql/11_kmeans.sql @@ -1,8 +1,6 @@ CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20) RETURNS table (cartodb_id integer, cluster_no integer) as $$ - import plpy - plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') from crankshaft.clustering import kmeans return kmeans(query,no_clusters,no_init) From 8b5e9102345fc2a7218961ef26c033715a441d6b Mon Sep 17 00:00:00 2001 From: Raul Ochoa Date: Thu, 16 Jun 2016 14:16:32 +0200 Subject: [PATCH 23/38] Release 0.0.3 --- NEWS.md | 5 + release/crankshaft--0.0.2--0.0.3.sql | 413 ++++++++++++++++++ release/crankshaft--0.0.3--0.0.2.sql | 209 +++++++++ release/crankshaft--0.0.3.sql | 403 +++++++++++++++++ release/crankshaft.control | 2 +- .../0.0.3/crankshaft/crankshaft/__init__.py | 2 + .../crankshaft/clustering/__init__.py | 2 + .../crankshaft/clustering/kmeans.py | 18 + .../crankshaft/crankshaft/clustering/moran.py | 260 +++++++++++ .../crankshaft/pysal_utils/__init__.py | 1 + .../crankshaft/pysal_utils/pysal_utils.py | 152 +++++++ .../crankshaft/crankshaft/random_seeds.py | 10 + release/python/0.0.3/crankshaft/setup.py | 48 ++ .../crankshaft/test/fixtures/kmeans.json | 1 + .../0.0.3/crankshaft/test/fixtures/moran.json | 52 +++ .../crankshaft/test/fixtures/neighbors.json | 54 +++ .../python/0.0.3/crankshaft/test/helper.py | 13 + .../python/0.0.3/crankshaft/test/mock_plpy.py | 34 ++ .../crankshaft/test/test_cluster_kmeans.py | 38 ++ .../crankshaft/test/test_clustering_moran.py | 83 ++++ .../0.0.3/crankshaft/test/test_pysal_utils.py | 107 +++++ src/pg/crankshaft.control | 2 +- 22 files changed, 1907 insertions(+), 2 deletions(-) create mode 100644 release/crankshaft--0.0.2--0.0.3.sql create mode 100644 release/crankshaft--0.0.3--0.0.2.sql create mode 100644 release/crankshaft--0.0.3.sql create mode 100644 release/python/0.0.3/crankshaft/crankshaft/__init__.py create mode 100644 release/python/0.0.3/crankshaft/crankshaft/clustering/__init__.py create mode 100644 release/python/0.0.3/crankshaft/crankshaft/clustering/kmeans.py create mode 100644 release/python/0.0.3/crankshaft/crankshaft/clustering/moran.py create mode 100644 release/python/0.0.3/crankshaft/crankshaft/pysal_utils/__init__.py create mode 100644 release/python/0.0.3/crankshaft/crankshaft/pysal_utils/pysal_utils.py create mode 100644 release/python/0.0.3/crankshaft/crankshaft/random_seeds.py create mode 100644 release/python/0.0.3/crankshaft/setup.py create mode 100644 release/python/0.0.3/crankshaft/test/fixtures/kmeans.json create mode 100644 release/python/0.0.3/crankshaft/test/fixtures/moran.json create mode 100644 release/python/0.0.3/crankshaft/test/fixtures/neighbors.json create mode 100644 release/python/0.0.3/crankshaft/test/helper.py create mode 100644 release/python/0.0.3/crankshaft/test/mock_plpy.py create mode 100644 release/python/0.0.3/crankshaft/test/test_cluster_kmeans.py create mode 100644 release/python/0.0.3/crankshaft/test/test_clustering_moran.py create mode 100644 release/python/0.0.3/crankshaft/test/test_pysal_utils.py diff --git a/NEWS.md b/NEWS.md index 0b8c2da..ed66fd9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +0.0.3 (2016-06-16) +------------------ +* Adds new functions: kmeans, weighted centroids. +* Replaces moran functions with new areas of interest naming. + 0.0.2 (2016-03-16) ------------------ * New versioning approach using per-version Python virtual environments diff --git a/release/crankshaft--0.0.2--0.0.3.sql b/release/crankshaft--0.0.2--0.0.3.sql new file mode 100644 index 0000000..8a865d5 --- /dev/null +++ b/release/crankshaft--0.0.2--0.0.3.sql @@ -0,0 +1,413 @@ +--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit + +-- [MANUALLY] DROP FUNCTIONS REMOVED SINCE 0.0.2 version + +DROP FUNCTION IF EXISTS cdb_moran_local(TEXT, TEXT, float, INT, INT, TEXT, TEXT, TEXT); +DROP FUNCTION IF EXISTS cdb_moran_local_rate(TEXT, TEXT, TEXT, FLOAT, INT, INT, TEXT, TEXT, TEXT); +DROP FUNCTION IF EXISTS _cdb_crankshaft_virtualenvs_path(); +DROP FUNCTION IF EXISTS _cdb_crankshaft_activate_py(); + +-- [END MANUALLY] DROP FUNCTIONS REMOVED SINCE 0.0.2 version + +-- Version number of the extension release +CREATE OR REPLACE FUNCTION cdb_crankshaft_version() + RETURNS text AS $$ + SELECT '0.0.3'::text; +$$ language 'sql' STABLE STRICT; + +-- Internal identifier of the installed extension instence +-- e.g. 'dev' for current development version +CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version() + RETURNS text AS $$ + SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL; +$$ language 'sql' STABLE STRICT; +-- Internal function. +-- Set the seeds of the RNGs (Random Number Generators) +-- used internally. +CREATE OR REPLACE FUNCTION + _cdb_random_seeds (seed_value INTEGER) RETURNS VOID +AS $$ + from crankshaft import random_seeds + random_seeds.set_random_seeds(seed_value) +$$ LANGUAGE plpythonu; +-- Moran's I Global Measure (public-facing) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, significance NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspots( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspots( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliers( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Global Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran FLOAT, significance FLOAT) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + + +-- Moran's I Local Rate (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) + RETURNS + TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local_rate + # TODO: use named parameters or a dictionary + return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS + TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS + TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS + TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliersRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS + TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20) + RETURNS table (cartodb_id integer, cluster_no integer) as $$ + + from crankshaft.clustering import kmeans + return kmeans(query,no_clusters,no_init) + +$$ language plpythonu; + + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC) + RETURNS Numeric[] AS + $$ +DECLARE + newX NUMERIC; + newY NUMERIC; + newW NUMERIC; +BEGIN + IF weight IS NULL OR the_geom IS NULL THEN + newX = state[1]; + newY = state[2]; + newW = state[3]; + ELSE + newX = state[1] + ST_X(the_geom)*weight; + newY = state[2] + ST_Y(the_geom)*weight; + newW = state[3] + weight; + END IF; + RETURN Array[newX,newY,newW]; + +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[]) + RETURNS GEOMETRY AS + $$ +BEGIN + IF state[3] = 0 THEN + RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326); + ELSE + RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326); + END IF; +END +$$ LANGUAGE plpgsql; + +CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)( +SFUNC = CDB_WeightedMeanS, +FINALFUNC = CDB_WeightedMeanF, +STYPE = Numeric[], +INITCOND = "{0.0,0.0,0.0}" +); +-- Function by Stuart Lynn for a simple interpolation of a value +-- from a polygon table over an arbitrary polygon +-- (weighted by the area proportion overlapped) +-- Aereal weighting is a very simple form of aereal interpolation. +-- +-- Parameters: +-- * geom a Polygon geometry which defines the area where a value will be +-- estimated as the area-weighted sum of a given table/column +-- * target_table_name table name of the table that provides the values +-- * target_column column name of the column that provides the values +-- * schema_name optional parameter to defina the schema the target table +-- belongs to, which is necessary if its not in the search_path. +-- Note that target_table_name should never include the schema in it. +-- Return value: +-- Aereal-weighted interpolation of the column values over the geometry +CREATE OR REPLACE +FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL) + RETURNS numeric AS + $$ + DECLARE + result numeric; + qualified_name text; + BEGIN + IF schema_name IS NULL THEN + qualified_name := Format('%I', target_table_name); + ELSE + qualified_name := Format('%I.%s', schema_name, target_table_name); + END IF; + EXECUTE Format(' + SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom)) + FROM %s AS a + WHERE $1 && a.the_geom + ', target_column, qualified_name) + USING geom + INTO result; + RETURN result; + END; + $$ LANGUAGE plpgsql; +-- +-- Creates N points randomly distributed arround the polygon +-- +-- @param g - the geometry to be turned in to points +-- +-- @param no_points - the number of points to generate +-- +-- @params max_iter_per_point - the function generates points in the polygon's bounding box +-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many +-- misses per point the funciton accepts before giving up. +-- +-- Returns: Multipoint with the requested points +CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000) + RETURNS GEOMETRY AS $$ +DECLARE + extent GEOMETRY; + test_point Geometry; + width NUMERIC; + height NUMERIC; + x0 NUMERIC; + y0 NUMERIC; + xp NUMERIC; + yp NUMERIC; + no_left INTEGER; + remaining_iterations INTEGER; + points GEOMETRY[]; + bbox_line GEOMETRY; + intersection_line GEOMETRY; +BEGIN + extent := ST_Envelope(geom); + width := ST_XMax(extent) - ST_XMIN(extent); + height := ST_YMax(extent) - ST_YMIN(extent); + x0 := ST_XMin(extent); + y0 := ST_YMin(extent); + no_left := no_points; + + LOOP + if(no_left=0) THEN + EXIT; + END IF; + yp = y0 + height*random(); + bbox_line = ST_MakeLine( + ST_SetSRID(ST_MakePoint(yp, x0),4326), + ST_SetSRID(ST_MakePoint(yp, x0+width),4326) + ); + intersection_line = ST_Intersection(bbox_line,geom); + test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random()); + points := points || test_point; + no_left = no_left - 1 ; + END LOOP; + RETURN ST_Collect(points); +END; +$$ +LANGUAGE plpgsql VOLATILE; +-- Make sure by default there are no permissions for publicuser +-- NOTE: this happens at extension creation time, as part of an implicit transaction. +-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE; + +-- Grant permissions on the schema to publicuser (but just the schema) +GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser; + +-- Revoke execute permissions on all functions in the schema by default +-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser; diff --git a/release/crankshaft--0.0.3--0.0.2.sql b/release/crankshaft--0.0.3--0.0.2.sql new file mode 100644 index 0000000..a2ccd2f --- /dev/null +++ b/release/crankshaft--0.0.3--0.0.2.sql @@ -0,0 +1,209 @@ +--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit + +-- [MANUALLY] DROP FUNCTIONS INTRODUCED IN 0.0.3 version + +DROP FUNCTION IF EXISTS CDB_AreasOfInterestGlobal(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT); +DROP FUNCTION IF EXISTS _CDB_AreasOfInterestLocal(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT); +DROP FUNCTION IF EXISTS CDB_AreasOfInterestLocal(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT); +DROP FUNCTION IF EXISTS CDB_GetSpatialHotspots(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT); +DROP FUNCTION IF EXISTS CDB_GetSpatialColdspots(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT); +DROP FUNCTION IF EXISTS CDB_GetSpatialOutliers(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT); +DROP FUNCTION IF EXISTS CDB_AreasOfInterestGlobalRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT); +DROP FUNCTION IF EXISTS CDB_AreasOfInterestLocalRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT); +DROP FUNCTION IF EXISTS _CDB_AreasOfInterestLocalRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT); +DROP FUNCTION IF EXISTS CDB_GetSpatialHotspotsRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT); +DROP FUNCTION IF EXISTS CDB_GetSpatialColdspotsRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT); +DROP FUNCTION IF EXISTS CDB_GetSpatialOutliersRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT); +DROP FUNCTION IF EXISTS CDB_KMeans(text,integer,integer); +DROP AGGREGATE IF EXISTS CDB_WeightedMean(geometry(Point, 4326), NUMERIC); +DROP FUNCTION IF EXISTS CDB_WeightedMeanS(Numeric[], GEOMETRY(Point, 4326), NUMERIC); +DROP FUNCTION IF EXISTS CDB_WeightedMeanF(Numeric[]); + + +-- [END MANUALLY] DROP FUNCTIONS INTRODUCED IN 0.0.3 version + +-- Version number of the extension release +CREATE OR REPLACE FUNCTION cdb_crankshaft_version() +RETURNS text AS $$ + SELECT '0.0.2'::text; +$$ language 'sql' STABLE STRICT; + +-- Internal identifier of the installed extension instence +-- e.g. 'dev' for current development version +CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version() +RETURNS text AS $$ + SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL; +$$ language 'sql' STABLE STRICT; +CREATE OR REPLACE FUNCTION _cdb_crankshaft_virtualenvs_path() +RETURNS text +AS $$ + BEGIN + -- RETURN '/opt/virtualenvs/crankshaft'; + RETURN '/home/ubuntu/crankshaft/envs'; + END; +$$ language plpgsql IMMUTABLE STRICT; + +-- Use the crankshaft python module +CREATE OR REPLACE FUNCTION _cdb_crankshaft_activate_py() +RETURNS VOID +AS $$ + import os + # plpy.notice('%',str(os.environ)) + # activate virtualenv + crankshaft_version = plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_internal_version()')[0]['_cdb_crankshaft_internal_version'] + base_path = plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_virtualenvs_path()')[0]['_cdb_crankshaft_virtualenvs_path'] + default_venv_path = os.path.join(base_path, crankshaft_version) + venv_path = os.environ.get('CRANKSHAFT_VENV', default_venv_path) + activate_path = venv_path + '/bin/activate_this.py' + exec(open(activate_path).read(), dict(__file__=activate_path)) +$$ LANGUAGE plpythonu; +-- Internal function. +-- Set the seeds of the RNGs (Random Number Generators) +-- used internally. +CREATE OR REPLACE FUNCTION +_cdb_random_seeds (seed_value INTEGER) RETURNS VOID +AS $$ + plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') + from crankshaft import random_seeds + random_seeds.set_random_seeds(seed_value) +$$ LANGUAGE plpythonu; +-- Moran's I +CREATE OR REPLACE FUNCTION + cdb_moran_local ( + t TEXT, + attr TEXT, + significance float DEFAULT 0.05, + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_column TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id', + w_type TEXT DEFAULT 'knn') +RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT) +AS $$ + plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type) +$$ LANGUAGE plpythonu; + +-- Moran's I Local Rate +CREATE OR REPLACE FUNCTION + cdb_moran_local_rate(t TEXT, + numerator TEXT, + denominator TEXT, + significance FLOAT DEFAULT 0.05, + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_column TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id', + w_type TEXT DEFAULT 'knn') +RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric) +AS $$ + plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()') + from crankshaft.clustering import moran_local_rate + # TODO: use named parameters or a dictionary + return moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type) +$$ LANGUAGE plpythonu; +-- Function by Stuart Lynn for a simple interpolation of a value +-- from a polygon table over an arbitrary polygon +-- (weighted by the area proportion overlapped) +-- Aereal weighting is a very simple form of aereal interpolation. +-- +-- Parameters: +-- * geom a Polygon geometry which defines the area where a value will be +-- estimated as the area-weighted sum of a given table/column +-- * target_table_name table name of the table that provides the values +-- * target_column column name of the column that provides the values +-- * schema_name optional parameter to defina the schema the target table +-- belongs to, which is necessary if its not in the search_path. +-- Note that target_table_name should never include the schema in it. +-- Return value: +-- Aereal-weighted interpolation of the column values over the geometry +CREATE OR REPLACE +FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL) + RETURNS numeric AS +$$ +DECLARE + result numeric; + qualified_name text; +BEGIN + IF schema_name IS NULL THEN + qualified_name := Format('%I', target_table_name); + ELSE + qualified_name := Format('%I.%s', schema_name, target_table_name); + END IF; + EXECUTE Format(' + SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom)) + FROM %s AS a + WHERE $1 && a.the_geom + ', target_column, qualified_name) + USING geom + INTO result; + RETURN result; +END; +$$ LANGUAGE plpgsql; +-- +-- Creates N points randomly distributed arround the polygon +-- +-- @param g - the geometry to be turned in to points +-- +-- @param no_points - the number of points to generate +-- +-- @params max_iter_per_point - the function generates points in the polygon's bounding box +-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many +-- misses per point the funciton accepts before giving up. +-- +-- Returns: Multipoint with the requested points +CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000) +RETURNS GEOMETRY AS $$ +DECLARE + extent GEOMETRY; + test_point Geometry; + width NUMERIC; + height NUMERIC; + x0 NUMERIC; + y0 NUMERIC; + xp NUMERIC; + yp NUMERIC; + no_left INTEGER; + remaining_iterations INTEGER; + points GEOMETRY[]; + bbox_line GEOMETRY; + intersection_line GEOMETRY; +BEGIN + extent := ST_Envelope(geom); + width := ST_XMax(extent) - ST_XMIN(extent); + height := ST_YMax(extent) - ST_YMIN(extent); + x0 := ST_XMin(extent); + y0 := ST_YMin(extent); + no_left := no_points; + + LOOP + if(no_left=0) THEN + EXIT; + END IF; + yp = y0 + height*random(); + bbox_line = ST_MakeLine( + ST_SetSRID(ST_MakePoint(yp, x0),4326), + ST_SetSRID(ST_MakePoint(yp, x0+width),4326) + ); + intersection_line = ST_Intersection(bbox_line,geom); + test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random()); + points := points || test_point; + no_left = no_left - 1 ; + END LOOP; + RETURN ST_Collect(points); +END; +$$ +LANGUAGE plpgsql VOLATILE; +-- Make sure by default there are no permissions for publicuser +-- NOTE: this happens at extension creation time, as part of an implicit transaction. +-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE; + +-- Grant permissions on the schema to publicuser (but just the schema) +GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser; + +-- Revoke execute permissions on all functions in the schema by default +-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser; diff --git a/release/crankshaft--0.0.3.sql b/release/crankshaft--0.0.3.sql new file mode 100644 index 0000000..caacd75 --- /dev/null +++ b/release/crankshaft--0.0.3.sql @@ -0,0 +1,403 @@ +--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit +-- Version number of the extension release +CREATE OR REPLACE FUNCTION cdb_crankshaft_version() +RETURNS text AS $$ + SELECT '0.0.3'::text; +$$ language 'sql' STABLE STRICT; + +-- Internal identifier of the installed extension instence +-- e.g. 'dev' for current development version +CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version() +RETURNS text AS $$ + SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL; +$$ language 'sql' STABLE STRICT; +-- Internal function. +-- Set the seeds of the RNGs (Random Number Generators) +-- used internally. +CREATE OR REPLACE FUNCTION +_cdb_random_seeds (seed_value INTEGER) RETURNS VOID +AS $$ + from crankshaft import random_seeds + random_seeds.set_random_seeds(seed_value) +$$ LANGUAGE plpythonu; +-- Moran's I Global Measure (public-facing) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, significance NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspots( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspots( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliers( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Global Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran FLOAT, significance FLOAT) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + + +-- Moran's I Local Rate (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local_rate + # TODO: use named parameters or a dictionary + return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliersRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20) +RETURNS table (cartodb_id integer, cluster_no integer) as $$ + + from crankshaft.clustering import kmeans + return kmeans(query,no_clusters,no_init) + +$$ language plpythonu; + + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC) +RETURNS Numeric[] AS +$$ +DECLARE + newX NUMERIC; + newY NUMERIC; + newW NUMERIC; +BEGIN + IF weight IS NULL OR the_geom IS NULL THEN + newX = state[1]; + newY = state[2]; + newW = state[3]; + ELSE + newX = state[1] + ST_X(the_geom)*weight; + newY = state[2] + ST_Y(the_geom)*weight; + newW = state[3] + weight; + END IF; + RETURN Array[newX,newY,newW]; + +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[]) +RETURNS GEOMETRY AS +$$ +BEGIN + IF state[3] = 0 THEN + RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326); + ELSE + RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326); + END IF; +END +$$ LANGUAGE plpgsql; + +CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)( + SFUNC = CDB_WeightedMeanS, + FINALFUNC = CDB_WeightedMeanF, + STYPE = Numeric[], + INITCOND = "{0.0,0.0,0.0}" +); +-- Function by Stuart Lynn for a simple interpolation of a value +-- from a polygon table over an arbitrary polygon +-- (weighted by the area proportion overlapped) +-- Aereal weighting is a very simple form of aereal interpolation. +-- +-- Parameters: +-- * geom a Polygon geometry which defines the area where a value will be +-- estimated as the area-weighted sum of a given table/column +-- * target_table_name table name of the table that provides the values +-- * target_column column name of the column that provides the values +-- * schema_name optional parameter to defina the schema the target table +-- belongs to, which is necessary if its not in the search_path. +-- Note that target_table_name should never include the schema in it. +-- Return value: +-- Aereal-weighted interpolation of the column values over the geometry +CREATE OR REPLACE +FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL) + RETURNS numeric AS +$$ +DECLARE + result numeric; + qualified_name text; +BEGIN + IF schema_name IS NULL THEN + qualified_name := Format('%I', target_table_name); + ELSE + qualified_name := Format('%I.%s', schema_name, target_table_name); + END IF; + EXECUTE Format(' + SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom)) + FROM %s AS a + WHERE $1 && a.the_geom + ', target_column, qualified_name) + USING geom + INTO result; + RETURN result; +END; +$$ LANGUAGE plpgsql; +-- +-- Creates N points randomly distributed arround the polygon +-- +-- @param g - the geometry to be turned in to points +-- +-- @param no_points - the number of points to generate +-- +-- @params max_iter_per_point - the function generates points in the polygon's bounding box +-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many +-- misses per point the funciton accepts before giving up. +-- +-- Returns: Multipoint with the requested points +CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000) +RETURNS GEOMETRY AS $$ +DECLARE + extent GEOMETRY; + test_point Geometry; + width NUMERIC; + height NUMERIC; + x0 NUMERIC; + y0 NUMERIC; + xp NUMERIC; + yp NUMERIC; + no_left INTEGER; + remaining_iterations INTEGER; + points GEOMETRY[]; + bbox_line GEOMETRY; + intersection_line GEOMETRY; +BEGIN + extent := ST_Envelope(geom); + width := ST_XMax(extent) - ST_XMIN(extent); + height := ST_YMax(extent) - ST_YMIN(extent); + x0 := ST_XMin(extent); + y0 := ST_YMin(extent); + no_left := no_points; + + LOOP + if(no_left=0) THEN + EXIT; + END IF; + yp = y0 + height*random(); + bbox_line = ST_MakeLine( + ST_SetSRID(ST_MakePoint(yp, x0),4326), + ST_SetSRID(ST_MakePoint(yp, x0+width),4326) + ); + intersection_line = ST_Intersection(bbox_line,geom); + test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random()); + points := points || test_point; + no_left = no_left - 1 ; + END LOOP; + RETURN ST_Collect(points); +END; +$$ +LANGUAGE plpgsql VOLATILE; +-- Make sure by default there are no permissions for publicuser +-- NOTE: this happens at extension creation time, as part of an implicit transaction. +-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE; + +-- Grant permissions on the schema to publicuser (but just the schema) +GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser; + +-- Revoke execute permissions on all functions in the schema by default +-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser; diff --git a/release/crankshaft.control b/release/crankshaft.control index 49c0d22..2029b7e 100644 --- a/release/crankshaft.control +++ b/release/crankshaft.control @@ -1,5 +1,5 @@ comment = 'CartoDB Spatial Analysis extension' -default_version = '0.0.2' +default_version = '0.0.3' requires = 'plpythonu, postgis, cartodb' superuser = true schema = cdb_crankshaft diff --git a/release/python/0.0.3/crankshaft/crankshaft/__init__.py b/release/python/0.0.3/crankshaft/crankshaft/__init__.py new file mode 100644 index 0000000..d07e330 --- /dev/null +++ b/release/python/0.0.3/crankshaft/crankshaft/__init__.py @@ -0,0 +1,2 @@ +import random_seeds +import clustering diff --git a/release/python/0.0.3/crankshaft/crankshaft/clustering/__init__.py b/release/python/0.0.3/crankshaft/crankshaft/clustering/__init__.py new file mode 100644 index 0000000..338e8ea --- /dev/null +++ b/release/python/0.0.3/crankshaft/crankshaft/clustering/__init__.py @@ -0,0 +1,2 @@ +from moran import * +from kmeans import * diff --git a/release/python/0.0.3/crankshaft/crankshaft/clustering/kmeans.py b/release/python/0.0.3/crankshaft/crankshaft/clustering/kmeans.py new file mode 100644 index 0000000..4134062 --- /dev/null +++ b/release/python/0.0.3/crankshaft/crankshaft/clustering/kmeans.py @@ -0,0 +1,18 @@ +from sklearn.cluster import KMeans +import plpy + +def kmeans(query, no_clusters, no_init=20): + data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids, + array_agg(ST_X(the_geom) order by cartodb_id) xs, + array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a + where the_geom is not null + '''.format(query=query)) + + xs = data[0]['xs'] + ys = data[0]['ys'] + ids = data[0]['ids'] + + km = KMeans(n_clusters= no_clusters, n_init=no_init) + labels = km.fit_predict(zip(xs,ys)) + return zip(ids,labels) + diff --git a/release/python/0.0.3/crankshaft/crankshaft/clustering/moran.py b/release/python/0.0.3/crankshaft/crankshaft/clustering/moran.py new file mode 100644 index 0000000..39b3ff6 --- /dev/null +++ b/release/python/0.0.3/crankshaft/crankshaft/clustering/moran.py @@ -0,0 +1,260 @@ +""" +Moran's I geostatistics (global clustering & outliers presence) +""" + +# TODO: Fill in local neighbors which have null/NoneType values with the +# average of the their neighborhood + +import pysal as ps +import plpy + +# crankshaft module +import crankshaft.pysal_utils as pu + +# High level interface --------------------------------------- + +def moran(subquery, attr_name, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I (global) + Implementation building neighbors with a PostGIS database and Moran's I + core clusters with PySAL. + Andy Eschbacher + """ + qvals = {"id_col": id_col, + "attr1": attr_name, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs} + + query = pu.construct_neighbor_query(w_type, qvals) + + plpy.notice('** Query: %s' % query) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(2) + plpy.notice('** Query returned with %d rows' % len(result)) + except plpy.SPIError: + plpy.error('Error: areas of interest query failed, check input parameters') + plpy.notice('** Query failed: "%s"' % query) + plpy.notice('** Error: %s' % plpy.SPIError) + return pu.empty_zipped_array(2) + + ## collect attributes + attr_vals = pu.get_attributes(result) + + ## calculate weights + weight = pu.get_weight(result, w_type, num_ngbrs) + + ## calculate moran global + moran_global = ps.esda.moran.Moran(attr_vals, weight, + permutations=permutations) + + return zip([moran_global.I], [moran_global.EI]) + +def moran_local(subquery, attr, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I implementation for PL/Python + Andy Eschbacher + """ + + # geometries with attributes that are null are ignored + # resulting in a collection of not as near neighbors + + qvals = {"id_col": id_col, + "attr1": attr, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs} + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(5) + except plpy.SPIError: + plpy.error('Error: areas of interest query failed, check input parameters') + plpy.notice('** Query failed: "%s"' % query) + return pu.empty_zipped_array(5) + + attr_vals = pu.get_attributes(result) + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local(attr_vals, weight, + permutations=permutations) + + # find quadrants for each geometry + quads = quad_position(lisa.q) + + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + +def moran_rate(subquery, numerator, denominator, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I Rate (global) + Andy Eschbacher + """ + qvals = {"id_col": id_col, + "attr1": numerator, + "attr2": denominator, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs} + + query = pu.construct_neighbor_query(w_type, qvals) + + plpy.notice('** Query: %s' % query) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(2) + plpy.notice('** Query returned with %d rows' % len(result)) + except plpy.SPIError: + plpy.error('Error: areas of interest query failed, check input parameters') + plpy.notice('** Query failed: "%s"' % query) + plpy.notice('** Error: %s' % plpy.SPIError) + return pu.empty_zipped_array(2) + + ## collect attributes + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) + + weight = pu.get_weight(result, w_type, num_ngbrs) + + ## calculate moran global rate + lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight, + permutations=permutations) + + return zip([lisa_rate.I], [lisa_rate.EI]) + +def moran_local_rate(subquery, numerator, denominator, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I Local Rate + Andy Eschbacher + """ + # geometries with values that are null are ignored + # resulting in a collection of not as near neighbors + + query = pu.construct_neighbor_query(w_type, + {"id_col": id_col, + "numerator": numerator, + "denominator": denominator, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs}) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(5) + except plpy.SPIError: + plpy.error('Error: areas of interest query failed, check input parameters') + plpy.notice('** Query failed: "%s"' % query) + plpy.notice('** Error: %s' % plpy.SPIError) + return pu.empty_zipped_array(5) + + ## collect attributes + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) + + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight, + permutations=permutations) + + # find units of significance + quads = quad_position(lisa.q) + + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + +def moran_local_bv(subquery, attr1, attr2, + permutations, geom_col, id_col, w_type, num_ngbrs): + """ + Moran's I (local) Bivariate (untested) + """ + plpy.notice('** Constructing query') + + qvals = {"num_ngbrs": num_ngbrs, + "attr1": attr1, + "attr2": attr2, + "subquery": subquery, + "geom_col": geom_col, + "id_col": id_col} + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(4) + except plpy.SPIError: + plpy.error("Error: areas of interest query failed, " \ + "check input parameters") + plpy.notice('** Query failed: "%s"' % query) + return pu.empty_zipped_array(4) + + ## collect attributes + attr1_vals = pu.get_attributes(result, 1) + attr2_vals = pu.get_attributes(result, 2) + + # create weights + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight, + permutations=permutations) + + plpy.notice("len of Is: %d" % len(lisa.Is)) + + # find clustering of significance + lisa_sig = quad_position(lisa.q) + + plpy.notice('** Finished calculations') + + return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order) + +# Low level functions ---------------------------------------- + +def map_quads(coord): + """ + Map a quadrant number to Moran's I designation + HH=1, LH=2, LL=3, HL=4 + Input: + @param coord (int): quadrant of a specific measurement + Output: + classification (one of 'HH', 'LH', 'LL', or 'HL') + """ + if coord == 1: + return 'HH' + elif coord == 2: + return 'LH' + elif coord == 3: + return 'LL' + elif coord == 4: + return 'HL' + else: + return None + +def quad_position(quads): + """ + Produce Moran's I classification based of n + Input: + @param quads ndarray: an array of quads classified by + 1-4 (PySAL default) + Output: + @param list: an array of quads classied by 'HH', 'LL', etc. + """ + return [map_quads(q) for q in quads] diff --git a/release/python/0.0.3/crankshaft/crankshaft/pysal_utils/__init__.py b/release/python/0.0.3/crankshaft/crankshaft/pysal_utils/__init__.py new file mode 100644 index 0000000..835880d --- /dev/null +++ b/release/python/0.0.3/crankshaft/crankshaft/pysal_utils/__init__.py @@ -0,0 +1 @@ +from pysal_utils import * diff --git a/release/python/0.0.3/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/release/python/0.0.3/crankshaft/crankshaft/pysal_utils/pysal_utils.py new file mode 100644 index 0000000..02b5e35 --- /dev/null +++ b/release/python/0.0.3/crankshaft/crankshaft/pysal_utils/pysal_utils.py @@ -0,0 +1,152 @@ +""" + Utilities module for generic PySAL functionality, mainly centered on translating queries into numpy arrays or PySAL weights objects +""" + +import numpy as np +import pysal as ps + +def construct_neighbor_query(w_type, query_vals): + """Return query (a string) used for finding neighbors + @param w_type text: type of neighbors to calculate ('knn' or 'queen') + @param query_vals dict: values used to construct the query + """ + + if w_type.lower() == 'knn': + return knn(query_vals) + else: + return queen(query_vals) + +## Build weight object +def get_weight(query_res, w_type='knn', num_ngbrs=5): + """ + Construct PySAL weight from return value of query + @param query_res: query results with attributes and neighbors + """ + if w_type.lower() == 'knn': + row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs + weights = {x['id']: row_normed_weights for x in query_res} + else: + weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors']) + if len(x['neighbors']) > 0 + else [] for x in query_res} + + neighbors = {x['id']: x['neighbors'] for x in query_res} + + return ps.W(neighbors, weights) + +def query_attr_select(params): + """ + Create portion of SELECT statement for attributes inolved in query. + @param params: dict of information used in query (column names, + table name, etc.) + """ + + attrs = [k for k in params + if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')] + + template = "i.\"{%(col)s}\"::numeric As attr%(alias_num)s, " + + attr_string = "" + + for idx, val in enumerate(sorted(attrs)): + attr_string += template % {"col": val, "alias_num": idx + 1} + + return attr_string + +def query_attr_where(params): + """ + Create portion of WHERE clauses for weeding out NULL-valued geometries + """ + attrs = sorted([k for k in params + if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')]) + + attr_string = [] + + for attr in attrs: + attr_string.append("idx_replace.\"{%s}\" IS NOT NULL" % attr) + + if len(attrs) == 2: + attr_string.append("idx_replace.\"{%s}\" <> 0" % attrs[1]) + + out = " AND ".join(attr_string) + + return out + +def knn(params): + """SQL query for k-nearest neighbors. + @param vars: dict of values to fill template + """ + + attr_select = query_attr_select(params) + attr_where = query_attr_where(params) + + replacements = {"attr_select": attr_select, + "attr_where_i": attr_where.replace("idx_replace", "i"), + "attr_where_j": attr_where.replace("idx_replace", "j")} + + query = "SELECT " \ + "i.\"{id_col}\" As id, " \ + "%(attr_select)s" \ + "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ + "FROM ({subquery}) As j " \ + "WHERE " \ + "i.\"{id_col}\" <> j.\"{id_col}\" AND " \ + "%(attr_where_j)s " \ + "ORDER BY " \ + "j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \ + "LIMIT {num_ngbrs})" \ + ") As neighbors " \ + "FROM ({subquery}) As i " \ + "WHERE " \ + "%(attr_where_i)s " \ + "ORDER BY i.\"{id_col}\" ASC;" % replacements + + return query.format(**params) + +## SQL query for finding queens neighbors (all contiguous polygons) +def queen(params): + """SQL query for queen neighbors. + @param params dict: information to fill query + """ + attr_select = query_attr_select(params) + attr_where = query_attr_where(params) + + replacements = {"attr_select": attr_select, + "attr_where_i": attr_where.replace("idx_replace", "i"), + "attr_where_j": attr_where.replace("idx_replace", "j")} + + query = "SELECT " \ + "i.\"{id_col}\" As id, " \ + "%(attr_select)s" \ + "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ + "FROM ({subquery}) As j " \ + "WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \ + "ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \ + "%(attr_where_j)s)" \ + ") As neighbors " \ + "FROM ({subquery}) As i " \ + "WHERE " \ + "%(attr_where_i)s " \ + "ORDER BY i.\"{id_col}\" ASC;" % replacements + + return query.format(**params) + +## to add more weight methods open a ticket or pull request + +def get_attributes(query_res, attr_num=1): + """ + @param query_res: query results with attributes and neighbors + @param attr_num: attribute number (1, 2, ...) + """ + return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float) + +def empty_zipped_array(num_nones): + """ + prepare return values for cases of empty weights objects (no neighbors) + Input: + @param num_nones int: number of columns (e.g., 4) + Output: + [(None, None, None, None)] + """ + + return [tuple([None] * num_nones)] diff --git a/release/python/0.0.3/crankshaft/crankshaft/random_seeds.py b/release/python/0.0.3/crankshaft/crankshaft/random_seeds.py new file mode 100644 index 0000000..b7c8eed --- /dev/null +++ b/release/python/0.0.3/crankshaft/crankshaft/random_seeds.py @@ -0,0 +1,10 @@ +import random +import numpy + +def set_random_seeds(value): + """ + Set the seeds of the RNGs (Random Number Generators) + used internally. + """ + random.seed(value) + numpy.random.seed(value) diff --git a/release/python/0.0.3/crankshaft/setup.py b/release/python/0.0.3/crankshaft/setup.py new file mode 100644 index 0000000..33a3b62 --- /dev/null +++ b/release/python/0.0.3/crankshaft/setup.py @@ -0,0 +1,48 @@ + +""" +CartoDB Spatial Analysis Python Library +See: +https://github.com/CartoDB/crankshaft +""" + +from setuptools import setup, find_packages + +setup( + name='crankshaft', + + version='0.0.3', + + description='CartoDB Spatial Analysis Python Library', + + url='https://github.com/CartoDB/crankshaft', + + author='Data Services Team - CartoDB', + author_email='dataservices@cartodb.com', + + license='MIT', + + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Mapping comunity', + 'Topic :: Maps :: Mapping Tools', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 2.7', + ], + + keywords='maps mapping tools spatial analysis geostatistics', + + packages=find_packages(exclude=['contrib', 'docs', 'tests']), + + extras_require={ + 'dev': ['unittest'], + 'test': ['unittest', 'nose', 'mock'], + }, + + # The choice of component versions is dictated by what's + # provisioned in the production servers. + install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'], + + requires=['pysal', 'numpy', 'sklearn'], + + test_suite='test' +) diff --git a/release/python/0.0.3/crankshaft/test/fixtures/kmeans.json b/release/python/0.0.3/crankshaft/test/fixtures/kmeans.json new file mode 100644 index 0000000..8f31c79 --- /dev/null +++ b/release/python/0.0.3/crankshaft/test/fixtures/kmeans.json @@ -0,0 +1 @@ +[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}] \ No newline at end of file diff --git a/release/python/0.0.3/crankshaft/test/fixtures/moran.json b/release/python/0.0.3/crankshaft/test/fixtures/moran.json new file mode 100644 index 0000000..2f75cf1 --- /dev/null +++ b/release/python/0.0.3/crankshaft/test/fixtures/moran.json @@ -0,0 +1,52 @@ +[[0.9319096128346788, "HH"], +[-1.135787401862846, "HL"], +[0.11732030672508517, "LL"], +[0.6152779669180425, "LL"], +[-0.14657336660125297, "LH"], +[0.6967858120189607, "LL"], +[0.07949310115714454, "HH"], +[0.4703198759258987, "HH"], +[0.4421125200498064, "HH"], +[0.5724288737143592, "LL"], +[0.8970743435692062, "LL"], +[0.18327334401918674, "LL"], +[-0.01466729201304962, "HL"], +[0.3481559372544409, "LL"], +[0.06547094736902978, "LL"], +[0.15482141569329988, "HH"], +[0.4373841193538136, "HH"], +[0.15971286468915544, "LL"], +[1.0543588860308968, "HH"], +[1.7372866900020818, "HH"], +[1.091998586053999, "LL"], +[0.1171572584252222, "HH"], +[0.08438455015300014, "LL"], +[0.06547094736902978, "LL"], +[0.15482141569329985, "HH"], +[1.1627044812890683, "HH"], +[0.06547094736902978, "LL"], +[0.795275137550483, "HH"], +[0.18562939195219, "LL"], +[0.3010757406693439, "LL"], +[2.8205795942839376, "HH"], +[0.11259190602909264, "LL"], +[-0.07116352791516614, "HL"], +[-0.09945240794119009, "LH"], +[0.18562939195219, "LL"], +[0.1832733440191868, "LL"], +[-0.39054253768447705, "HL"], +[-0.1672071289487642, "HL"], +[0.3337669247916343, "HH"], +[0.2584386102554792, "HH"], +[-0.19733845476322634, "HL"], +[-0.9379282899805409, "LH"], +[-0.028770969951095866, "LH"], +[0.051367269430983485, "LL"], +[-0.2172548045913472, "LH"], +[0.05136726943098351, "LL"], +[0.04191046803899837, "LL"], +[0.7482357030403517, "HH"], +[-0.014585767863118111, "LH"], +[0.5410013139159929, "HH"], +[1.0223932668429925, "LL"], +[1.4179402898927476, "LL"]] \ No newline at end of file diff --git a/release/python/0.0.3/crankshaft/test/fixtures/neighbors.json b/release/python/0.0.3/crankshaft/test/fixtures/neighbors.json new file mode 100644 index 0000000..055b359 --- /dev/null +++ b/release/python/0.0.3/crankshaft/test/fixtures/neighbors.json @@ -0,0 +1,54 @@ +[ + {"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5}, + {"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7}, + {"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2}, + {"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1}, + {"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3}, + {"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05}, + {"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4}, + {"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7}, + {"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5}, + {"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04}, + {"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08}, + {"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2}, + {"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4}, + {"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2}, + {"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3}, + {"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4}, + {"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6}, + {"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3}, + {"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7}, + {"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8}, + {"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1}, + {"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4}, + {"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1}, + {"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3}, + {"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4}, + {"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6}, + {"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3}, + {"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8}, + {"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3}, + {"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1}, + {"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9}, + {"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3}, + {"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4}, + {"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3}, + {"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3}, + {"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2}, + {"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5}, + {"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4}, + {"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6}, + {"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5}, + {"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4}, + {"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2}, + {"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3}, + {"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2}, + {"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3}, + {"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2}, + {"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3}, + {"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5}, + {"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2}, + {"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6}, + {"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01}, + {"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01} + ] diff --git a/release/python/0.0.3/crankshaft/test/helper.py b/release/python/0.0.3/crankshaft/test/helper.py new file mode 100644 index 0000000..7d28b94 --- /dev/null +++ b/release/python/0.0.3/crankshaft/test/helper.py @@ -0,0 +1,13 @@ +import unittest + +from mock_plpy import MockPlPy +plpy = MockPlPy() + +import sys +sys.modules['plpy'] = plpy + +import os + +def fixture_file(name): + dir = os.path.dirname(os.path.realpath(__file__)) + return os.path.join(dir, 'fixtures', name) diff --git a/release/python/0.0.3/crankshaft/test/mock_plpy.py b/release/python/0.0.3/crankshaft/test/mock_plpy.py new file mode 100644 index 0000000..63c88f6 --- /dev/null +++ b/release/python/0.0.3/crankshaft/test/mock_plpy.py @@ -0,0 +1,34 @@ +import re + +class MockPlPy: + def __init__(self): + self._reset() + + def _reset(self): + self.infos = [] + self.notices = [] + self.debugs = [] + self.logs = [] + self.warnings = [] + self.errors = [] + self.fatals = [] + self.executes = [] + self.results = [] + self.prepares = [] + self.results = [] + + def _define_result(self, query, result): + pattern = re.compile(query, re.IGNORECASE | re.MULTILINE) + self.results.append([pattern, result]) + + def notice(self, msg): + self.notices.append(msg) + + def info(self, msg): + self.infos.append(msg) + + def execute(self, query): # TODO: additional arguments + for result in self.results: + if result[0].match(query): + return result[1] + return [] diff --git a/release/python/0.0.3/crankshaft/test/test_cluster_kmeans.py b/release/python/0.0.3/crankshaft/test/test_cluster_kmeans.py new file mode 100644 index 0000000..aba8e07 --- /dev/null +++ b/release/python/0.0.3/crankshaft/test/test_cluster_kmeans.py @@ -0,0 +1,38 @@ +import unittest +import numpy as np + + +# from mock_plpy import MockPlPy +# plpy = MockPlPy() +# +# import sys +# sys.modules['plpy'] = plpy +from helper import plpy, fixture_file +import numpy as np +import crankshaft.clustering as cc +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds +import json + +class KMeansTest(unittest.TestCase): + """Testing class for Moran's I functions""" + + def setUp(self): + plpy._reset() + self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read()) + self.params = {"subquery": "select * from table", + "no_clusters": "10" + } + + def test_kmeans(self): + data = self.cluster_data + plpy._define_result('select' ,data) + clusters = cc.kmeans('subquery', 2) + labels = [a[1] for a in clusters] + c1 = [a for a in clusters if a[1]==0] + c2 = [a for a in clusters if a[1]==1] + + self.assertEqual(len(np.unique(labels)),2) + self.assertEqual(len(c1),20) + self.assertEqual(len(c2),20) + diff --git a/release/python/0.0.3/crankshaft/test/test_clustering_moran.py b/release/python/0.0.3/crankshaft/test/test_clustering_moran.py new file mode 100644 index 0000000..393e93b --- /dev/null +++ b/release/python/0.0.3/crankshaft/test/test_clustering_moran.py @@ -0,0 +1,83 @@ +import unittest +import numpy as np + + +# from mock_plpy import MockPlPy +# plpy = MockPlPy() +# +# import sys +# sys.modules['plpy'] = plpy +from helper import plpy, fixture_file + +import crankshaft.clustering as cc +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds +import json + +class MoranTest(unittest.TestCase): + """Testing class for Moran's I functions""" + + def setUp(self): + plpy._reset() + self.params = {"id_col": "cartodb_id", + "attr1": "andy", + "attr2": "jay_z", + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + self.neighbors_data = json.loads(open(fixture_file('neighbors.json')).read()) + self.moran_data = json.loads(open(fixture_file('moran.json')).read()) + + def test_map_quads(self): + """Test map_quads""" + self.assertEqual(cc.map_quads(1), 'HH') + self.assertEqual(cc.map_quads(2), 'LH') + self.assertEqual(cc.map_quads(3), 'LL') + self.assertEqual(cc.map_quads(4), 'HL') + self.assertEqual(cc.map_quads(33), None) + self.assertEqual(cc.map_quads('andy'), None) + + def test_quad_position(self): + """Test lisa_sig_vals""" + + quads = np.array([1, 2, 3, 4], np.int) + + ans = np.array(['HH', 'LH', 'LL', 'HL']) + test_ans = cc.quad_position(quads) + + self.assertTrue((test_ans == ans).all()) + + def test_moran_local(self): + """Test Moran's I local""" + data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) + result = cc.moran_local('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') + result = [(row[0], row[1]) for row in result] + expected = self.moran_data + for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): + self.assertAlmostEqual(res_val, exp_val) + self.assertEqual(res_quad, exp_quad) + + def test_moran_local_rate(self): + """Test Moran's I rate""" + data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) + result = cc.moran_local_rate('subquery', 'numerator', 'denominator', 'knn', 5, 99, 'the_geom', 'cartodb_id') + print 'result == None? ', result == None + result = [(row[0], row[1]) for row in result] + expected = self.moran_data + for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): + self.assertAlmostEqual(res_val, exp_val) + + def test_moran(self): + """Test Moran's I global""" + data = [{ 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1235) + result = cc.moran('table', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') + print 'result == None?', result == None + result_moran = result[0][0] + expected_moran = np.array([row[0] for row in self.moran_data]).mean() + self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2) diff --git a/release/python/0.0.3/crankshaft/test/test_pysal_utils.py b/release/python/0.0.3/crankshaft/test/test_pysal_utils.py new file mode 100644 index 0000000..4ea0d9b --- /dev/null +++ b/release/python/0.0.3/crankshaft/test/test_pysal_utils.py @@ -0,0 +1,107 @@ +import unittest + +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds + + +class PysalUtilsTest(unittest.TestCase): + """Testing class for utility functions related to PySAL integrations""" + + def setUp(self): + self.params = {"id_col": "cartodb_id", + "attr1": "andy", + "attr2": "jay_z", + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + + def test_query_attr_select(self): + """Test query_attr_select""" + + ans = "i.\"{attr1}\"::numeric As attr1, " \ + "i.\"{attr2}\"::numeric As attr2, " + + self.assertEqual(pu.query_attr_select(self.params), ans) + + def test_query_attr_where(self): + """Test pu.query_attr_where""" + + ans = "idx_replace.\"{attr1}\" IS NOT NULL AND " \ + "idx_replace.\"{attr2}\" IS NOT NULL AND " \ + "idx_replace.\"{attr2}\" <> 0" + + self.assertEqual(pu.query_attr_where(self.params), ans) + + def test_knn(self): + """Test knn neighbors constructor""" + + ans = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE " \ + "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ + "j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL AND " \ + "j.\"jay_z\" <> 0 " \ + "ORDER BY " \ + "j.\"the_geom\" <-> i.\"the_geom\" ASC " \ + "LIMIT 321)) As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" + + self.assertEqual(pu.knn(self.params), ans) + + def test_queen(self): + """Test queen neighbors constructor""" + + ans = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE " \ + "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ + "ST_Touches(i.\"the_geom\", " \ + "j.\"the_geom\") AND " \ + "j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL AND " \ + "j.\"jay_z\" <> 0)" \ + ") As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" + + self.assertEqual(pu.queen(self.params), ans) + + def test_construct_neighbor_query(self): + """Test construct_neighbor_query""" + + # Compare to raw knn query + self.assertEqual(pu.construct_neighbor_query('knn', self.params), + pu.knn(self.params)) + + def test_get_attributes(self): + """Test get_attributes""" + + ## need to add tests + + self.assertEqual(True, True) + + def test_get_weight(self): + """Test get_weight""" + + self.assertEqual(True, True) + + def test_empty_zipped_array(self): + """Test empty_zipped_array""" + ans2 = [(None, None)] + ans4 = [(None, None, None, None)] + self.assertEqual(pu.empty_zipped_array(2), ans2) + self.assertEqual(pu.empty_zipped_array(4), ans4) diff --git a/src/pg/crankshaft.control b/src/pg/crankshaft.control index 49c0d22..2029b7e 100644 --- a/src/pg/crankshaft.control +++ b/src/pg/crankshaft.control @@ -1,5 +1,5 @@ comment = 'CartoDB Spatial Analysis extension' -default_version = '0.0.2' +default_version = '0.0.3' requires = 'plpythonu, postgis, cartodb' superuser = true schema = cdb_crankshaft From 1e19f468ebfc0626c37b85fd88ad0d1da1531771 Mon Sep 17 00:00:00 2001 From: Raul Ochoa Date: Thu, 16 Jun 2016 16:23:43 +0200 Subject: [PATCH 24/38] Declare numpy dep --- src/py/crankshaft/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py index 04822dd..f072f17 100644 --- a/src/py/crankshaft/setup.py +++ b/src/py/crankshaft/setup.py @@ -40,7 +40,7 @@ setup( # The choice of component versions is dictated by what's # provisioned in the production servers. - install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'], + install_requires=['numpy==1.11.0', 'pysal==1.9.1', 'scikit-learn==0.17.1'], requires=['pysal', 'numpy', 'sklearn'], From 237aa1c5818f003ffb459817ea5e72392c765c5c Mon Sep 17 00:00:00 2001 From: Raul Ochoa Date: Thu, 16 Jun 2016 16:34:45 +0200 Subject: [PATCH 25/38] Declare scipy as dep --- src/py/crankshaft/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py index f072f17..266b6f1 100644 --- a/src/py/crankshaft/setup.py +++ b/src/py/crankshaft/setup.py @@ -40,7 +40,7 @@ setup( # The choice of component versions is dictated by what's # provisioned in the production servers. - install_requires=['numpy==1.11.0', 'pysal==1.9.1', 'scikit-learn==0.17.1'], + install_requires=['numpy==1.11.0', 'scipy==0.17.1', 'pysal==1.9.1', 'scikit-learn==0.17.1'], requires=['pysal', 'numpy', 'sklearn'], From 3480a0d252b1b7f9e79397b126b08f65837d3036 Mon Sep 17 00:00:00 2001 From: Luis Bosque Date: Thu, 16 Jun 2016 16:56:16 +0200 Subject: [PATCH 26/38] Allow passing options to pip install --- src/py/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/py/Makefile b/src/py/Makefile index 403c5a1..b584645 100644 --- a/src/py/Makefile +++ b/src/py/Makefile @@ -13,5 +13,5 @@ release: ../../release/$(EXTENSION).control $(SOURCES_DATA) cp -r ./$(PACKAGE) ../../release/python/$(EXTVERSION)/ $(SED) -i -r 's/version='"'"'[0-9]+\.[0-9]+\.[0-9]+'"'"'/version='"'"'$(EXTVERSION)'"'"'/g' ../../release/python/$(EXTVERSION)/$(PACKAGE)/setup.py -deploy: - pip install --upgrade ../../release/python/$(RELEASE_VERSION)/$(PACKAGE) +deploy: + pip install $(RUN_OPTIONS) --upgrade ../../release/python/$(RELEASE_VERSION)/$(PACKAGE) From 1db938c450634532133190b3c28425e7313acc72 Mon Sep 17 00:00:00 2001 From: Raul Ochoa Date: Thu, 16 Jun 2016 19:07:42 +0200 Subject: [PATCH 27/38] Removes cartodb-extension-dep --- CONTRIBUTING.md | 1 - src/pg/crankshaft.control | 2 +- src/pg/test/expected/01_install_test.out | 1 - src/pg/test/sql/01_install_test.sql | 1 - src/pg/test/sql/90_permissions.sql | 2 +- 5 files changed, 2 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f642d45..42385dc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -60,7 +60,6 @@ it can be installed directly with: * `CREATE EXTENSION IF NOT EXISTS plpythonu;` `CREATE EXTENSION IF NOT EXISTS postgis;` - `CREATE EXTENSION IF NOT EXISTS cartodb;` `CREATE EXTENSION crankshaft WITH VERSION 'dev';` Note: the development extension uses the development python virtual diff --git a/src/pg/crankshaft.control b/src/pg/crankshaft.control index 2029b7e..e71321f 100644 --- a/src/pg/crankshaft.control +++ b/src/pg/crankshaft.control @@ -1,5 +1,5 @@ comment = 'CartoDB Spatial Analysis extension' default_version = '0.0.3' -requires = 'plpythonu, postgis, cartodb' +requires = 'plpythonu, postgis' superuser = true schema = cdb_crankshaft diff --git a/src/pg/test/expected/01_install_test.out b/src/pg/test/expected/01_install_test.out index e40d267..e84a48a 100644 --- a/src/pg/test/expected/01_install_test.out +++ b/src/pg/test/expected/01_install_test.out @@ -1,6 +1,5 @@ -- Install dependencies CREATE EXTENSION plpythonu; CREATE EXTENSION postgis; -CREATE EXTENSION cartodb; -- Install the extension CREATE EXTENSION crankshaft VERSION 'dev'; diff --git a/src/pg/test/sql/01_install_test.sql b/src/pg/test/sql/01_install_test.sql index fc3ea80..bbce805 100644 --- a/src/pg/test/sql/01_install_test.sql +++ b/src/pg/test/sql/01_install_test.sql @@ -1,7 +1,6 @@ -- Install dependencies CREATE EXTENSION plpythonu; CREATE EXTENSION postgis; -CREATE EXTENSION cartodb; -- Install the extension CREATE EXTENSION crankshaft VERSION 'dev'; diff --git a/src/pg/test/sql/90_permissions.sql b/src/pg/test/sql/90_permissions.sql index 187f795..1e9ea99 100644 --- a/src/pg/test/sql/90_permissions.sql +++ b/src/pg/test/sql/90_permissions.sql @@ -4,7 +4,7 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234); SET ROLE test_regular_user; -- Add to the search path the schema -SET search_path TO public,cartodb,cdb_crankshaft; +SET search_path TO public,cdb_crankshaft; -- Exercise public functions SELECT ppoints.code, m.quads From f5fb4499db226521adb952b7524449ae15ddcc3a Mon Sep 17 00:00:00 2001 From: Luis Bosque Date: Mon, 20 Jun 2016 09:44:52 +0200 Subject: [PATCH 28/38] Set final dependencies versions --- src/py/crankshaft/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py index 266b6f1..abd4dae 100644 --- a/src/py/crankshaft/setup.py +++ b/src/py/crankshaft/setup.py @@ -40,7 +40,7 @@ setup( # The choice of component versions is dictated by what's # provisioned in the production servers. - install_requires=['numpy==1.11.0', 'scipy==0.17.1', 'pysal==1.9.1', 'scikit-learn==0.17.1'], + install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'], requires=['pysal', 'numpy', 'sklearn'], From 01fc2c1dd1087e58679cc2728ee88a0df5702ea6 Mon Sep 17 00:00:00 2001 From: Luis Bosque Date: Mon, 20 Jun 2016 10:04:22 +0200 Subject: [PATCH 29/38] Release 0.0.4 --- NEWS.md | 5 + release/crankshaft--0.0.3--0.0.4.sql | 8 + release/crankshaft--0.0.4--0.0.3.sql | 8 + release/crankshaft--0.0.4.sql | 403 ++++++++++++++++++ release/crankshaft.control | 4 +- .../0.0.4/crankshaft/crankshaft/__init__.py | 2 + .../crankshaft/clustering/__init__.py | 2 + .../crankshaft/clustering/kmeans.py | 18 + .../crankshaft/crankshaft/clustering/moran.py | 260 +++++++++++ .../crankshaft/pysal_utils/__init__.py | 1 + .../crankshaft/pysal_utils/pysal_utils.py | 152 +++++++ .../crankshaft/crankshaft/random_seeds.py | 10 + release/python/0.0.4/crankshaft/setup.py | 48 +++ .../crankshaft/test/fixtures/kmeans.json | 1 + .../0.0.4/crankshaft/test/fixtures/moran.json | 52 +++ .../crankshaft/test/fixtures/neighbors.json | 54 +++ .../python/0.0.4/crankshaft/test/helper.py | 13 + .../python/0.0.4/crankshaft/test/mock_plpy.py | 34 ++ .../crankshaft/test/test_cluster_kmeans.py | 38 ++ .../crankshaft/test/test_clustering_moran.py | 83 ++++ .../0.0.4/crankshaft/test/test_pysal_utils.py | 107 +++++ src/pg/crankshaft.control | 2 +- 22 files changed, 1302 insertions(+), 3 deletions(-) create mode 100644 release/crankshaft--0.0.3--0.0.4.sql create mode 100644 release/crankshaft--0.0.4--0.0.3.sql create mode 100644 release/crankshaft--0.0.4.sql create mode 100644 release/python/0.0.4/crankshaft/crankshaft/__init__.py create mode 100644 release/python/0.0.4/crankshaft/crankshaft/clustering/__init__.py create mode 100644 release/python/0.0.4/crankshaft/crankshaft/clustering/kmeans.py create mode 100644 release/python/0.0.4/crankshaft/crankshaft/clustering/moran.py create mode 100644 release/python/0.0.4/crankshaft/crankshaft/pysal_utils/__init__.py create mode 100644 release/python/0.0.4/crankshaft/crankshaft/pysal_utils/pysal_utils.py create mode 100644 release/python/0.0.4/crankshaft/crankshaft/random_seeds.py create mode 100644 release/python/0.0.4/crankshaft/setup.py create mode 100644 release/python/0.0.4/crankshaft/test/fixtures/kmeans.json create mode 100644 release/python/0.0.4/crankshaft/test/fixtures/moran.json create mode 100644 release/python/0.0.4/crankshaft/test/fixtures/neighbors.json create mode 100644 release/python/0.0.4/crankshaft/test/helper.py create mode 100644 release/python/0.0.4/crankshaft/test/mock_plpy.py create mode 100644 release/python/0.0.4/crankshaft/test/test_cluster_kmeans.py create mode 100644 release/python/0.0.4/crankshaft/test/test_clustering_moran.py create mode 100644 release/python/0.0.4/crankshaft/test/test_pysal_utils.py diff --git a/NEWS.md b/NEWS.md index ed66fd9..c011a0d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +0.0.4 (2016-06-20) +------------------ +* Remove cartodb extension dependency from tests +* Declare all correct dependencies with correct versions in setup.py + 0.0.3 (2016-06-16) ------------------ * Adds new functions: kmeans, weighted centroids. diff --git a/release/crankshaft--0.0.3--0.0.4.sql b/release/crankshaft--0.0.3--0.0.4.sql new file mode 100644 index 0000000..69038a3 --- /dev/null +++ b/release/crankshaft--0.0.3--0.0.4.sql @@ -0,0 +1,8 @@ +--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit +-- Version number of the extension release +CREATE OR REPLACE FUNCTION cdb_crankshaft_version() +RETURNS text AS $$ + SELECT '0.0.4'::text; +$$ language 'sql' STABLE STRICT; diff --git a/release/crankshaft--0.0.4--0.0.3.sql b/release/crankshaft--0.0.4--0.0.3.sql new file mode 100644 index 0000000..bd8ed82 --- /dev/null +++ b/release/crankshaft--0.0.4--0.0.3.sql @@ -0,0 +1,8 @@ +--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit +-- Version number of the extension release +CREATE OR REPLACE FUNCTION cdb_crankshaft_version() +RETURNS text AS $$ + SELECT '0.0.3'::text; +$$ language 'sql' STABLE STRICT; diff --git a/release/crankshaft--0.0.4.sql b/release/crankshaft--0.0.4.sql new file mode 100644 index 0000000..c855958 --- /dev/null +++ b/release/crankshaft--0.0.4.sql @@ -0,0 +1,403 @@ +--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES +-- Complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit +-- Version number of the extension release +CREATE OR REPLACE FUNCTION cdb_crankshaft_version() +RETURNS text AS $$ + SELECT '0.0.4'::text; +$$ language 'sql' STABLE STRICT; + +-- Internal identifier of the installed extension instence +-- e.g. 'dev' for current development version +CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version() +RETURNS text AS $$ + SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL; +$$ language 'sql' STABLE STRICT; +-- Internal function. +-- Set the seeds of the RNGs (Random Number Generators) +-- used internally. +CREATE OR REPLACE FUNCTION +_cdb_random_seeds (seed_value INTEGER) RETURNS VOID +AS $$ + from crankshaft import random_seeds + random_seeds.set_random_seeds(seed_value) +$$ LANGUAGE plpythonu; +-- Moran's I Global Measure (public-facing) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, significance NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocal( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspots( + subquery TEXT, + column_name TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspots( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliers( + subquery TEXT, + attr TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') + RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Global Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestGlobalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS TABLE (moran FLOAT, significance FLOAT) +AS $$ + from crankshaft.clustering import moran_local + # TODO: use named parameters or a dictionary + return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + + +-- Moran's I Local Rate (internal function) +CREATE OR REPLACE FUNCTION + _CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT, + num_ngbrs INT, + permutations INT, + geom_col TEXT, + id_col TEXT) +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + from crankshaft.clustering import moran_local_rate + # TODO: use named parameters or a dictionary + return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) +$$ LANGUAGE plpythonu; + +-- Moran's I Local Rate (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_AreasOfInterestLocalRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for HH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialHotspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HH', 'HL'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LL and LH (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialColdspotsRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('LL', 'LH'); + +$$ LANGUAGE SQL; + +-- Moran's I Local Rate only for LH and HL (public-facing function) +CREATE OR REPLACE FUNCTION + CDB_GetSpatialOutliersRate( + subquery TEXT, + numerator TEXT, + denominator TEXT, + w_type TEXT DEFAULT 'knn', + num_ngbrs INT DEFAULT 5, + permutations INT DEFAULT 99, + geom_col TEXT DEFAULT 'the_geom', + id_col TEXT DEFAULT 'cartodb_id') +RETURNS +TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC) +AS $$ + + SELECT moran, quads, significance, rowid, vals + FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col) + WHERE quads IN ('HL', 'LH'); + +$$ LANGUAGE SQL; +CREATE OR REPLACE FUNCTION CDB_KMeans(query text, no_clusters integer,no_init integer default 20) +RETURNS table (cartodb_id integer, cluster_no integer) as $$ + + from crankshaft.clustering import kmeans + return kmeans(query,no_clusters,no_init) + +$$ language plpythonu; + + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC) +RETURNS Numeric[] AS +$$ +DECLARE + newX NUMERIC; + newY NUMERIC; + newW NUMERIC; +BEGIN + IF weight IS NULL OR the_geom IS NULL THEN + newX = state[1]; + newY = state[2]; + newW = state[3]; + ELSE + newX = state[1] + ST_X(the_geom)*weight; + newY = state[2] + ST_Y(the_geom)*weight; + newW = state[3] + weight; + END IF; + RETURN Array[newX,newY,newW]; + +END +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[]) +RETURNS GEOMETRY AS +$$ +BEGIN + IF state[3] = 0 THEN + RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326); + ELSE + RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326); + END IF; +END +$$ LANGUAGE plpgsql; + +CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)( + SFUNC = CDB_WeightedMeanS, + FINALFUNC = CDB_WeightedMeanF, + STYPE = Numeric[], + INITCOND = "{0.0,0.0,0.0}" +); +-- Function by Stuart Lynn for a simple interpolation of a value +-- from a polygon table over an arbitrary polygon +-- (weighted by the area proportion overlapped) +-- Aereal weighting is a very simple form of aereal interpolation. +-- +-- Parameters: +-- * geom a Polygon geometry which defines the area where a value will be +-- estimated as the area-weighted sum of a given table/column +-- * target_table_name table name of the table that provides the values +-- * target_column column name of the column that provides the values +-- * schema_name optional parameter to defina the schema the target table +-- belongs to, which is necessary if its not in the search_path. +-- Note that target_table_name should never include the schema in it. +-- Return value: +-- Aereal-weighted interpolation of the column values over the geometry +CREATE OR REPLACE +FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL) + RETURNS numeric AS +$$ +DECLARE + result numeric; + qualified_name text; +BEGIN + IF schema_name IS NULL THEN + qualified_name := Format('%I', target_table_name); + ELSE + qualified_name := Format('%I.%s', schema_name, target_table_name); + END IF; + EXECUTE Format(' + SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom)) + FROM %s AS a + WHERE $1 && a.the_geom + ', target_column, qualified_name) + USING geom + INTO result; + RETURN result; +END; +$$ LANGUAGE plpgsql; +-- +-- Creates N points randomly distributed arround the polygon +-- +-- @param g - the geometry to be turned in to points +-- +-- @param no_points - the number of points to generate +-- +-- @params max_iter_per_point - the function generates points in the polygon's bounding box +-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many +-- misses per point the funciton accepts before giving up. +-- +-- Returns: Multipoint with the requested points +CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000) +RETURNS GEOMETRY AS $$ +DECLARE + extent GEOMETRY; + test_point Geometry; + width NUMERIC; + height NUMERIC; + x0 NUMERIC; + y0 NUMERIC; + xp NUMERIC; + yp NUMERIC; + no_left INTEGER; + remaining_iterations INTEGER; + points GEOMETRY[]; + bbox_line GEOMETRY; + intersection_line GEOMETRY; +BEGIN + extent := ST_Envelope(geom); + width := ST_XMax(extent) - ST_XMIN(extent); + height := ST_YMax(extent) - ST_YMIN(extent); + x0 := ST_XMin(extent); + y0 := ST_YMin(extent); + no_left := no_points; + + LOOP + if(no_left=0) THEN + EXIT; + END IF; + yp = y0 + height*random(); + bbox_line = ST_MakeLine( + ST_SetSRID(ST_MakePoint(yp, x0),4326), + ST_SetSRID(ST_MakePoint(yp, x0+width),4326) + ); + intersection_line = ST_Intersection(bbox_line,geom); + test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random()); + points := points || test_point; + no_left = no_left - 1 ; + END LOOP; + RETURN ST_Collect(points); +END; +$$ +LANGUAGE plpgsql VOLATILE; +-- Make sure by default there are no permissions for publicuser +-- NOTE: this happens at extension creation time, as part of an implicit transaction. +-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE; + +-- Grant permissions on the schema to publicuser (but just the schema) +GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser; + +-- Revoke execute permissions on all functions in the schema by default +-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser; diff --git a/release/crankshaft.control b/release/crankshaft.control index 2029b7e..01088b1 100644 --- a/release/crankshaft.control +++ b/release/crankshaft.control @@ -1,5 +1,5 @@ comment = 'CartoDB Spatial Analysis extension' -default_version = '0.0.3' -requires = 'plpythonu, postgis, cartodb' +default_version = '0.0.4' +requires = 'plpythonu, postgis' superuser = true schema = cdb_crankshaft diff --git a/release/python/0.0.4/crankshaft/crankshaft/__init__.py b/release/python/0.0.4/crankshaft/crankshaft/__init__.py new file mode 100644 index 0000000..d07e330 --- /dev/null +++ b/release/python/0.0.4/crankshaft/crankshaft/__init__.py @@ -0,0 +1,2 @@ +import random_seeds +import clustering diff --git a/release/python/0.0.4/crankshaft/crankshaft/clustering/__init__.py b/release/python/0.0.4/crankshaft/crankshaft/clustering/__init__.py new file mode 100644 index 0000000..338e8ea --- /dev/null +++ b/release/python/0.0.4/crankshaft/crankshaft/clustering/__init__.py @@ -0,0 +1,2 @@ +from moran import * +from kmeans import * diff --git a/release/python/0.0.4/crankshaft/crankshaft/clustering/kmeans.py b/release/python/0.0.4/crankshaft/crankshaft/clustering/kmeans.py new file mode 100644 index 0000000..4134062 --- /dev/null +++ b/release/python/0.0.4/crankshaft/crankshaft/clustering/kmeans.py @@ -0,0 +1,18 @@ +from sklearn.cluster import KMeans +import plpy + +def kmeans(query, no_clusters, no_init=20): + data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids, + array_agg(ST_X(the_geom) order by cartodb_id) xs, + array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a + where the_geom is not null + '''.format(query=query)) + + xs = data[0]['xs'] + ys = data[0]['ys'] + ids = data[0]['ids'] + + km = KMeans(n_clusters= no_clusters, n_init=no_init) + labels = km.fit_predict(zip(xs,ys)) + return zip(ids,labels) + diff --git a/release/python/0.0.4/crankshaft/crankshaft/clustering/moran.py b/release/python/0.0.4/crankshaft/crankshaft/clustering/moran.py new file mode 100644 index 0000000..39b3ff6 --- /dev/null +++ b/release/python/0.0.4/crankshaft/crankshaft/clustering/moran.py @@ -0,0 +1,260 @@ +""" +Moran's I geostatistics (global clustering & outliers presence) +""" + +# TODO: Fill in local neighbors which have null/NoneType values with the +# average of the their neighborhood + +import pysal as ps +import plpy + +# crankshaft module +import crankshaft.pysal_utils as pu + +# High level interface --------------------------------------- + +def moran(subquery, attr_name, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I (global) + Implementation building neighbors with a PostGIS database and Moran's I + core clusters with PySAL. + Andy Eschbacher + """ + qvals = {"id_col": id_col, + "attr1": attr_name, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs} + + query = pu.construct_neighbor_query(w_type, qvals) + + plpy.notice('** Query: %s' % query) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(2) + plpy.notice('** Query returned with %d rows' % len(result)) + except plpy.SPIError: + plpy.error('Error: areas of interest query failed, check input parameters') + plpy.notice('** Query failed: "%s"' % query) + plpy.notice('** Error: %s' % plpy.SPIError) + return pu.empty_zipped_array(2) + + ## collect attributes + attr_vals = pu.get_attributes(result) + + ## calculate weights + weight = pu.get_weight(result, w_type, num_ngbrs) + + ## calculate moran global + moran_global = ps.esda.moran.Moran(attr_vals, weight, + permutations=permutations) + + return zip([moran_global.I], [moran_global.EI]) + +def moran_local(subquery, attr, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I implementation for PL/Python + Andy Eschbacher + """ + + # geometries with attributes that are null are ignored + # resulting in a collection of not as near neighbors + + qvals = {"id_col": id_col, + "attr1": attr, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs} + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(5) + except plpy.SPIError: + plpy.error('Error: areas of interest query failed, check input parameters') + plpy.notice('** Query failed: "%s"' % query) + return pu.empty_zipped_array(5) + + attr_vals = pu.get_attributes(result) + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local(attr_vals, weight, + permutations=permutations) + + # find quadrants for each geometry + quads = quad_position(lisa.q) + + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + +def moran_rate(subquery, numerator, denominator, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I Rate (global) + Andy Eschbacher + """ + qvals = {"id_col": id_col, + "attr1": numerator, + "attr2": denominator, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs} + + query = pu.construct_neighbor_query(w_type, qvals) + + plpy.notice('** Query: %s' % query) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(2) + plpy.notice('** Query returned with %d rows' % len(result)) + except plpy.SPIError: + plpy.error('Error: areas of interest query failed, check input parameters') + plpy.notice('** Query failed: "%s"' % query) + plpy.notice('** Error: %s' % plpy.SPIError) + return pu.empty_zipped_array(2) + + ## collect attributes + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) + + weight = pu.get_weight(result, w_type, num_ngbrs) + + ## calculate moran global rate + lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight, + permutations=permutations) + + return zip([lisa_rate.I], [lisa_rate.EI]) + +def moran_local_rate(subquery, numerator, denominator, + w_type, num_ngbrs, permutations, geom_col, id_col): + """ + Moran's I Local Rate + Andy Eschbacher + """ + # geometries with values that are null are ignored + # resulting in a collection of not as near neighbors + + query = pu.construct_neighbor_query(w_type, + {"id_col": id_col, + "numerator": numerator, + "denominator": denominator, + "geom_col": geom_col, + "subquery": subquery, + "num_ngbrs": num_ngbrs}) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(5) + except plpy.SPIError: + plpy.error('Error: areas of interest query failed, check input parameters') + plpy.notice('** Query failed: "%s"' % query) + plpy.notice('** Error: %s' % plpy.SPIError) + return pu.empty_zipped_array(5) + + ## collect attributes + numer = pu.get_attributes(result, 1) + denom = pu.get_attributes(result, 2) + + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight, + permutations=permutations) + + # find units of significance + quads = quad_position(lisa.q) + + return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y) + +def moran_local_bv(subquery, attr1, attr2, + permutations, geom_col, id_col, w_type, num_ngbrs): + """ + Moran's I (local) Bivariate (untested) + """ + plpy.notice('** Constructing query') + + qvals = {"num_ngbrs": num_ngbrs, + "attr1": attr1, + "attr2": attr2, + "subquery": subquery, + "geom_col": geom_col, + "id_col": id_col} + + query = pu.construct_neighbor_query(w_type, qvals) + + try: + result = plpy.execute(query) + # if there are no neighbors, exit + if len(result) == 0: + return pu.empty_zipped_array(4) + except plpy.SPIError: + plpy.error("Error: areas of interest query failed, " \ + "check input parameters") + plpy.notice('** Query failed: "%s"' % query) + return pu.empty_zipped_array(4) + + ## collect attributes + attr1_vals = pu.get_attributes(result, 1) + attr2_vals = pu.get_attributes(result, 2) + + # create weights + weight = pu.get_weight(result, w_type, num_ngbrs) + + # calculate LISA values + lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight, + permutations=permutations) + + plpy.notice("len of Is: %d" % len(lisa.Is)) + + # find clustering of significance + lisa_sig = quad_position(lisa.q) + + plpy.notice('** Finished calculations') + + return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order) + +# Low level functions ---------------------------------------- + +def map_quads(coord): + """ + Map a quadrant number to Moran's I designation + HH=1, LH=2, LL=3, HL=4 + Input: + @param coord (int): quadrant of a specific measurement + Output: + classification (one of 'HH', 'LH', 'LL', or 'HL') + """ + if coord == 1: + return 'HH' + elif coord == 2: + return 'LH' + elif coord == 3: + return 'LL' + elif coord == 4: + return 'HL' + else: + return None + +def quad_position(quads): + """ + Produce Moran's I classification based of n + Input: + @param quads ndarray: an array of quads classified by + 1-4 (PySAL default) + Output: + @param list: an array of quads classied by 'HH', 'LL', etc. + """ + return [map_quads(q) for q in quads] diff --git a/release/python/0.0.4/crankshaft/crankshaft/pysal_utils/__init__.py b/release/python/0.0.4/crankshaft/crankshaft/pysal_utils/__init__.py new file mode 100644 index 0000000..835880d --- /dev/null +++ b/release/python/0.0.4/crankshaft/crankshaft/pysal_utils/__init__.py @@ -0,0 +1 @@ +from pysal_utils import * diff --git a/release/python/0.0.4/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/release/python/0.0.4/crankshaft/crankshaft/pysal_utils/pysal_utils.py new file mode 100644 index 0000000..02b5e35 --- /dev/null +++ b/release/python/0.0.4/crankshaft/crankshaft/pysal_utils/pysal_utils.py @@ -0,0 +1,152 @@ +""" + Utilities module for generic PySAL functionality, mainly centered on translating queries into numpy arrays or PySAL weights objects +""" + +import numpy as np +import pysal as ps + +def construct_neighbor_query(w_type, query_vals): + """Return query (a string) used for finding neighbors + @param w_type text: type of neighbors to calculate ('knn' or 'queen') + @param query_vals dict: values used to construct the query + """ + + if w_type.lower() == 'knn': + return knn(query_vals) + else: + return queen(query_vals) + +## Build weight object +def get_weight(query_res, w_type='knn', num_ngbrs=5): + """ + Construct PySAL weight from return value of query + @param query_res: query results with attributes and neighbors + """ + if w_type.lower() == 'knn': + row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs + weights = {x['id']: row_normed_weights for x in query_res} + else: + weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors']) + if len(x['neighbors']) > 0 + else [] for x in query_res} + + neighbors = {x['id']: x['neighbors'] for x in query_res} + + return ps.W(neighbors, weights) + +def query_attr_select(params): + """ + Create portion of SELECT statement for attributes inolved in query. + @param params: dict of information used in query (column names, + table name, etc.) + """ + + attrs = [k for k in params + if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')] + + template = "i.\"{%(col)s}\"::numeric As attr%(alias_num)s, " + + attr_string = "" + + for idx, val in enumerate(sorted(attrs)): + attr_string += template % {"col": val, "alias_num": idx + 1} + + return attr_string + +def query_attr_where(params): + """ + Create portion of WHERE clauses for weeding out NULL-valued geometries + """ + attrs = sorted([k for k in params + if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')]) + + attr_string = [] + + for attr in attrs: + attr_string.append("idx_replace.\"{%s}\" IS NOT NULL" % attr) + + if len(attrs) == 2: + attr_string.append("idx_replace.\"{%s}\" <> 0" % attrs[1]) + + out = " AND ".join(attr_string) + + return out + +def knn(params): + """SQL query for k-nearest neighbors. + @param vars: dict of values to fill template + """ + + attr_select = query_attr_select(params) + attr_where = query_attr_where(params) + + replacements = {"attr_select": attr_select, + "attr_where_i": attr_where.replace("idx_replace", "i"), + "attr_where_j": attr_where.replace("idx_replace", "j")} + + query = "SELECT " \ + "i.\"{id_col}\" As id, " \ + "%(attr_select)s" \ + "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ + "FROM ({subquery}) As j " \ + "WHERE " \ + "i.\"{id_col}\" <> j.\"{id_col}\" AND " \ + "%(attr_where_j)s " \ + "ORDER BY " \ + "j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \ + "LIMIT {num_ngbrs})" \ + ") As neighbors " \ + "FROM ({subquery}) As i " \ + "WHERE " \ + "%(attr_where_i)s " \ + "ORDER BY i.\"{id_col}\" ASC;" % replacements + + return query.format(**params) + +## SQL query for finding queens neighbors (all contiguous polygons) +def queen(params): + """SQL query for queen neighbors. + @param params dict: information to fill query + """ + attr_select = query_attr_select(params) + attr_where = query_attr_where(params) + + replacements = {"attr_select": attr_select, + "attr_where_i": attr_where.replace("idx_replace", "i"), + "attr_where_j": attr_where.replace("idx_replace", "j")} + + query = "SELECT " \ + "i.\"{id_col}\" As id, " \ + "%(attr_select)s" \ + "(SELECT ARRAY(SELECT j.\"{id_col}\" " \ + "FROM ({subquery}) As j " \ + "WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \ + "ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \ + "%(attr_where_j)s)" \ + ") As neighbors " \ + "FROM ({subquery}) As i " \ + "WHERE " \ + "%(attr_where_i)s " \ + "ORDER BY i.\"{id_col}\" ASC;" % replacements + + return query.format(**params) + +## to add more weight methods open a ticket or pull request + +def get_attributes(query_res, attr_num=1): + """ + @param query_res: query results with attributes and neighbors + @param attr_num: attribute number (1, 2, ...) + """ + return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float) + +def empty_zipped_array(num_nones): + """ + prepare return values for cases of empty weights objects (no neighbors) + Input: + @param num_nones int: number of columns (e.g., 4) + Output: + [(None, None, None, None)] + """ + + return [tuple([None] * num_nones)] diff --git a/release/python/0.0.4/crankshaft/crankshaft/random_seeds.py b/release/python/0.0.4/crankshaft/crankshaft/random_seeds.py new file mode 100644 index 0000000..b7c8eed --- /dev/null +++ b/release/python/0.0.4/crankshaft/crankshaft/random_seeds.py @@ -0,0 +1,10 @@ +import random +import numpy + +def set_random_seeds(value): + """ + Set the seeds of the RNGs (Random Number Generators) + used internally. + """ + random.seed(value) + numpy.random.seed(value) diff --git a/release/python/0.0.4/crankshaft/setup.py b/release/python/0.0.4/crankshaft/setup.py new file mode 100644 index 0000000..32d1ead --- /dev/null +++ b/release/python/0.0.4/crankshaft/setup.py @@ -0,0 +1,48 @@ + +""" +CartoDB Spatial Analysis Python Library +See: +https://github.com/CartoDB/crankshaft +""" + +from setuptools import setup, find_packages + +setup( + name='crankshaft', + + version='0.0.4', + + description='CartoDB Spatial Analysis Python Library', + + url='https://github.com/CartoDB/crankshaft', + + author='Data Services Team - CartoDB', + author_email='dataservices@cartodb.com', + + license='MIT', + + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Mapping comunity', + 'Topic :: Maps :: Mapping Tools', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 2.7', + ], + + keywords='maps mapping tools spatial analysis geostatistics', + + packages=find_packages(exclude=['contrib', 'docs', 'tests']), + + extras_require={ + 'dev': ['unittest'], + 'test': ['unittest', 'nose', 'mock'], + }, + + # The choice of component versions is dictated by what's + # provisioned in the production servers. + install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'], + + requires=['pysal', 'numpy', 'sklearn'], + + test_suite='test' +) diff --git a/release/python/0.0.4/crankshaft/test/fixtures/kmeans.json b/release/python/0.0.4/crankshaft/test/fixtures/kmeans.json new file mode 100644 index 0000000..8f31c79 --- /dev/null +++ b/release/python/0.0.4/crankshaft/test/fixtures/kmeans.json @@ -0,0 +1 @@ +[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}] \ No newline at end of file diff --git a/release/python/0.0.4/crankshaft/test/fixtures/moran.json b/release/python/0.0.4/crankshaft/test/fixtures/moran.json new file mode 100644 index 0000000..2f75cf1 --- /dev/null +++ b/release/python/0.0.4/crankshaft/test/fixtures/moran.json @@ -0,0 +1,52 @@ +[[0.9319096128346788, "HH"], +[-1.135787401862846, "HL"], +[0.11732030672508517, "LL"], +[0.6152779669180425, "LL"], +[-0.14657336660125297, "LH"], +[0.6967858120189607, "LL"], +[0.07949310115714454, "HH"], +[0.4703198759258987, "HH"], +[0.4421125200498064, "HH"], +[0.5724288737143592, "LL"], +[0.8970743435692062, "LL"], +[0.18327334401918674, "LL"], +[-0.01466729201304962, "HL"], +[0.3481559372544409, "LL"], +[0.06547094736902978, "LL"], +[0.15482141569329988, "HH"], +[0.4373841193538136, "HH"], +[0.15971286468915544, "LL"], +[1.0543588860308968, "HH"], +[1.7372866900020818, "HH"], +[1.091998586053999, "LL"], +[0.1171572584252222, "HH"], +[0.08438455015300014, "LL"], +[0.06547094736902978, "LL"], +[0.15482141569329985, "HH"], +[1.1627044812890683, "HH"], +[0.06547094736902978, "LL"], +[0.795275137550483, "HH"], +[0.18562939195219, "LL"], +[0.3010757406693439, "LL"], +[2.8205795942839376, "HH"], +[0.11259190602909264, "LL"], +[-0.07116352791516614, "HL"], +[-0.09945240794119009, "LH"], +[0.18562939195219, "LL"], +[0.1832733440191868, "LL"], +[-0.39054253768447705, "HL"], +[-0.1672071289487642, "HL"], +[0.3337669247916343, "HH"], +[0.2584386102554792, "HH"], +[-0.19733845476322634, "HL"], +[-0.9379282899805409, "LH"], +[-0.028770969951095866, "LH"], +[0.051367269430983485, "LL"], +[-0.2172548045913472, "LH"], +[0.05136726943098351, "LL"], +[0.04191046803899837, "LL"], +[0.7482357030403517, "HH"], +[-0.014585767863118111, "LH"], +[0.5410013139159929, "HH"], +[1.0223932668429925, "LL"], +[1.4179402898927476, "LL"]] \ No newline at end of file diff --git a/release/python/0.0.4/crankshaft/test/fixtures/neighbors.json b/release/python/0.0.4/crankshaft/test/fixtures/neighbors.json new file mode 100644 index 0000000..055b359 --- /dev/null +++ b/release/python/0.0.4/crankshaft/test/fixtures/neighbors.json @@ -0,0 +1,54 @@ +[ + {"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5}, + {"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7}, + {"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2}, + {"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1}, + {"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3}, + {"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05}, + {"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4}, + {"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7}, + {"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5}, + {"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04}, + {"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08}, + {"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2}, + {"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4}, + {"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2}, + {"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3}, + {"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4}, + {"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6}, + {"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3}, + {"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7}, + {"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8}, + {"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1}, + {"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4}, + {"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1}, + {"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3}, + {"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4}, + {"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6}, + {"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3}, + {"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8}, + {"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3}, + {"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1}, + {"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9}, + {"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3}, + {"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4}, + {"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3}, + {"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3}, + {"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2}, + {"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5}, + {"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4}, + {"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6}, + {"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5}, + {"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4}, + {"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2}, + {"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3}, + {"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2}, + {"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3}, + {"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2}, + {"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3}, + {"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5}, + {"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2}, + {"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6}, + {"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01}, + {"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01} + ] diff --git a/release/python/0.0.4/crankshaft/test/helper.py b/release/python/0.0.4/crankshaft/test/helper.py new file mode 100644 index 0000000..7d28b94 --- /dev/null +++ b/release/python/0.0.4/crankshaft/test/helper.py @@ -0,0 +1,13 @@ +import unittest + +from mock_plpy import MockPlPy +plpy = MockPlPy() + +import sys +sys.modules['plpy'] = plpy + +import os + +def fixture_file(name): + dir = os.path.dirname(os.path.realpath(__file__)) + return os.path.join(dir, 'fixtures', name) diff --git a/release/python/0.0.4/crankshaft/test/mock_plpy.py b/release/python/0.0.4/crankshaft/test/mock_plpy.py new file mode 100644 index 0000000..63c88f6 --- /dev/null +++ b/release/python/0.0.4/crankshaft/test/mock_plpy.py @@ -0,0 +1,34 @@ +import re + +class MockPlPy: + def __init__(self): + self._reset() + + def _reset(self): + self.infos = [] + self.notices = [] + self.debugs = [] + self.logs = [] + self.warnings = [] + self.errors = [] + self.fatals = [] + self.executes = [] + self.results = [] + self.prepares = [] + self.results = [] + + def _define_result(self, query, result): + pattern = re.compile(query, re.IGNORECASE | re.MULTILINE) + self.results.append([pattern, result]) + + def notice(self, msg): + self.notices.append(msg) + + def info(self, msg): + self.infos.append(msg) + + def execute(self, query): # TODO: additional arguments + for result in self.results: + if result[0].match(query): + return result[1] + return [] diff --git a/release/python/0.0.4/crankshaft/test/test_cluster_kmeans.py b/release/python/0.0.4/crankshaft/test/test_cluster_kmeans.py new file mode 100644 index 0000000..aba8e07 --- /dev/null +++ b/release/python/0.0.4/crankshaft/test/test_cluster_kmeans.py @@ -0,0 +1,38 @@ +import unittest +import numpy as np + + +# from mock_plpy import MockPlPy +# plpy = MockPlPy() +# +# import sys +# sys.modules['plpy'] = plpy +from helper import plpy, fixture_file +import numpy as np +import crankshaft.clustering as cc +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds +import json + +class KMeansTest(unittest.TestCase): + """Testing class for Moran's I functions""" + + def setUp(self): + plpy._reset() + self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read()) + self.params = {"subquery": "select * from table", + "no_clusters": "10" + } + + def test_kmeans(self): + data = self.cluster_data + plpy._define_result('select' ,data) + clusters = cc.kmeans('subquery', 2) + labels = [a[1] for a in clusters] + c1 = [a for a in clusters if a[1]==0] + c2 = [a for a in clusters if a[1]==1] + + self.assertEqual(len(np.unique(labels)),2) + self.assertEqual(len(c1),20) + self.assertEqual(len(c2),20) + diff --git a/release/python/0.0.4/crankshaft/test/test_clustering_moran.py b/release/python/0.0.4/crankshaft/test/test_clustering_moran.py new file mode 100644 index 0000000..393e93b --- /dev/null +++ b/release/python/0.0.4/crankshaft/test/test_clustering_moran.py @@ -0,0 +1,83 @@ +import unittest +import numpy as np + + +# from mock_plpy import MockPlPy +# plpy = MockPlPy() +# +# import sys +# sys.modules['plpy'] = plpy +from helper import plpy, fixture_file + +import crankshaft.clustering as cc +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds +import json + +class MoranTest(unittest.TestCase): + """Testing class for Moran's I functions""" + + def setUp(self): + plpy._reset() + self.params = {"id_col": "cartodb_id", + "attr1": "andy", + "attr2": "jay_z", + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + self.neighbors_data = json.loads(open(fixture_file('neighbors.json')).read()) + self.moran_data = json.loads(open(fixture_file('moran.json')).read()) + + def test_map_quads(self): + """Test map_quads""" + self.assertEqual(cc.map_quads(1), 'HH') + self.assertEqual(cc.map_quads(2), 'LH') + self.assertEqual(cc.map_quads(3), 'LL') + self.assertEqual(cc.map_quads(4), 'HL') + self.assertEqual(cc.map_quads(33), None) + self.assertEqual(cc.map_quads('andy'), None) + + def test_quad_position(self): + """Test lisa_sig_vals""" + + quads = np.array([1, 2, 3, 4], np.int) + + ans = np.array(['HH', 'LH', 'LL', 'HL']) + test_ans = cc.quad_position(quads) + + self.assertTrue((test_ans == ans).all()) + + def test_moran_local(self): + """Test Moran's I local""" + data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) + result = cc.moran_local('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') + result = [(row[0], row[1]) for row in result] + expected = self.moran_data + for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): + self.assertAlmostEqual(res_val, exp_val) + self.assertEqual(res_quad, exp_quad) + + def test_moran_local_rate(self): + """Test Moran's I rate""" + data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1234) + result = cc.moran_local_rate('subquery', 'numerator', 'denominator', 'knn', 5, 99, 'the_geom', 'cartodb_id') + print 'result == None? ', result == None + result = [(row[0], row[1]) for row in result] + expected = self.moran_data + for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected): + self.assertAlmostEqual(res_val, exp_val) + + def test_moran(self): + """Test Moran's I global""" + data = [{ 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data] + plpy._define_result('select', data) + random_seeds.set_random_seeds(1235) + result = cc.moran('table', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id') + print 'result == None?', result == None + result_moran = result[0][0] + expected_moran = np.array([row[0] for row in self.moran_data]).mean() + self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2) diff --git a/release/python/0.0.4/crankshaft/test/test_pysal_utils.py b/release/python/0.0.4/crankshaft/test/test_pysal_utils.py new file mode 100644 index 0000000..4ea0d9b --- /dev/null +++ b/release/python/0.0.4/crankshaft/test/test_pysal_utils.py @@ -0,0 +1,107 @@ +import unittest + +import crankshaft.pysal_utils as pu +from crankshaft import random_seeds + + +class PysalUtilsTest(unittest.TestCase): + """Testing class for utility functions related to PySAL integrations""" + + def setUp(self): + self.params = {"id_col": "cartodb_id", + "attr1": "andy", + "attr2": "jay_z", + "subquery": "SELECT * FROM a_list", + "geom_col": "the_geom", + "num_ngbrs": 321} + + def test_query_attr_select(self): + """Test query_attr_select""" + + ans = "i.\"{attr1}\"::numeric As attr1, " \ + "i.\"{attr2}\"::numeric As attr2, " + + self.assertEqual(pu.query_attr_select(self.params), ans) + + def test_query_attr_where(self): + """Test pu.query_attr_where""" + + ans = "idx_replace.\"{attr1}\" IS NOT NULL AND " \ + "idx_replace.\"{attr2}\" IS NOT NULL AND " \ + "idx_replace.\"{attr2}\" <> 0" + + self.assertEqual(pu.query_attr_where(self.params), ans) + + def test_knn(self): + """Test knn neighbors constructor""" + + ans = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE " \ + "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ + "j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL AND " \ + "j.\"jay_z\" <> 0 " \ + "ORDER BY " \ + "j.\"the_geom\" <-> i.\"the_geom\" ASC " \ + "LIMIT 321)) As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" + + self.assertEqual(pu.knn(self.params), ans) + + def test_queen(self): + """Test queen neighbors constructor""" + + ans = "SELECT i.\"cartodb_id\" As id, " \ + "i.\"andy\"::numeric As attr1, " \ + "i.\"jay_z\"::numeric As attr2, " \ + "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \ + "FROM (SELECT * FROM a_list) As j " \ + "WHERE " \ + "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \ + "ST_Touches(i.\"the_geom\", " \ + "j.\"the_geom\") AND " \ + "j.\"andy\" IS NOT NULL AND " \ + "j.\"jay_z\" IS NOT NULL AND " \ + "j.\"jay_z\" <> 0)" \ + ") As neighbors " \ + "FROM (SELECT * FROM a_list) As i " \ + "WHERE i.\"andy\" IS NOT NULL AND " \ + "i.\"jay_z\" IS NOT NULL AND " \ + "i.\"jay_z\" <> 0 " \ + "ORDER BY i.\"cartodb_id\" ASC;" + + self.assertEqual(pu.queen(self.params), ans) + + def test_construct_neighbor_query(self): + """Test construct_neighbor_query""" + + # Compare to raw knn query + self.assertEqual(pu.construct_neighbor_query('knn', self.params), + pu.knn(self.params)) + + def test_get_attributes(self): + """Test get_attributes""" + + ## need to add tests + + self.assertEqual(True, True) + + def test_get_weight(self): + """Test get_weight""" + + self.assertEqual(True, True) + + def test_empty_zipped_array(self): + """Test empty_zipped_array""" + ans2 = [(None, None)] + ans4 = [(None, None, None, None)] + self.assertEqual(pu.empty_zipped_array(2), ans2) + self.assertEqual(pu.empty_zipped_array(4), ans4) diff --git a/src/pg/crankshaft.control b/src/pg/crankshaft.control index e71321f..01088b1 100644 --- a/src/pg/crankshaft.control +++ b/src/pg/crankshaft.control @@ -1,5 +1,5 @@ comment = 'CartoDB Spatial Analysis extension' -default_version = '0.0.3' +default_version = '0.0.4' requires = 'plpythonu, postgis' superuser = true schema = cdb_crankshaft From 1912d57891d539a15da5df8511f184e8e0c20771 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 21 Jun 2016 17:31:17 -0400 Subject: [PATCH 30/38] replacing dict with ordered dict --- .../crankshaft/crankshaft/clustering/moran.py | 60 ++++++++++--------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 39b3ff6..103670f 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -7,6 +7,7 @@ Moran's I geostatistics (global clustering & outliers presence) import pysal as ps import plpy +from collections import OrderedDict # crankshaft module import crankshaft.pysal_utils as pu @@ -21,11 +22,11 @@ def moran(subquery, attr_name, core clusters with PySAL. Andy Eschbacher """ - qvals = {"id_col": id_col, - "attr1": attr_name, - "geom_col": geom_col, - "subquery": subquery, - "num_ngbrs": num_ngbrs} + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr_name), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) query = pu.construct_neighbor_query(w_type, qvals) @@ -65,11 +66,11 @@ def moran_local(subquery, attr, # geometries with attributes that are null are ignored # resulting in a collection of not as near neighbors - qvals = {"id_col": id_col, - "attr1": attr, - "geom_col": geom_col, - "subquery": subquery, - "num_ngbrs": num_ngbrs} + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr_name), + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) query = pu.construct_neighbor_query(w_type, qvals) @@ -101,12 +102,12 @@ def moran_rate(subquery, numerator, denominator, Moran's I Rate (global) Andy Eschbacher """ - qvals = {"id_col": id_col, - "attr1": numerator, - "attr2": denominator, - "geom_col": geom_col, - "subquery": subquery, - "num_ngbrs": num_ngbrs} + qvals = OrderedDict([("id_col", id_col), + ("attr1", numerator), + ("attr2", denominator) + ("geom_col", geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) query = pu.construct_neighbor_query(w_type, qvals) @@ -145,13 +146,14 @@ def moran_local_rate(subquery, numerator, denominator, # geometries with values that are null are ignored # resulting in a collection of not as near neighbors - query = pu.construct_neighbor_query(w_type, - {"id_col": id_col, - "numerator": numerator, - "denominator": denominator, - "geom_col": geom_col, - "subquery": subquery, - "num_ngbrs": num_ngbrs}) + qvals = OrderedDict([("id_col", id_col), + ("numerator", numerator), + ("denominator", denominator), + ("geom_col": geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) + + query = pu.construct_neighbor_query(w_type, qvals) try: result = plpy.execute(query) @@ -186,12 +188,12 @@ def moran_local_bv(subquery, attr1, attr2, """ plpy.notice('** Constructing query') - qvals = {"num_ngbrs": num_ngbrs, - "attr1": attr1, - "attr2": attr2, - "subquery": subquery, - "geom_col": geom_col, - "id_col": id_col} + qvals = OrderedDict([("id_col", id_col), + ("attr1", attr1), + ("attr2", attr2), + ("geom_col": geom_col), + ("subquery", subquery), + ("num_ngbrs", num_ngbrs)]) query = pu.construct_neighbor_query(w_type, qvals) From 7c4314a4113baf852e260af4995e77ca8f8a4a9e Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 21 Jun 2016 17:38:49 -0400 Subject: [PATCH 31/38] fix tuple colon --- src/py/crankshaft/crankshaft/clustering/moran.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 103670f..08fe127 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -149,7 +149,7 @@ def moran_local_rate(subquery, numerator, denominator, qvals = OrderedDict([("id_col", id_col), ("numerator", numerator), ("denominator", denominator), - ("geom_col": geom_col), + ("geom_col", geom_col), ("subquery", subquery), ("num_ngbrs", num_ngbrs)]) @@ -191,7 +191,7 @@ def moran_local_bv(subquery, attr1, attr2, qvals = OrderedDict([("id_col", id_col), ("attr1", attr1), ("attr2", attr2), - ("geom_col": geom_col), + ("geom_col", geom_col), ("subquery", subquery), ("num_ngbrs", num_ngbrs)]) From b62d7b32efdb5c96f642a4947fee12f51dd489b9 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Tue, 21 Jun 2016 17:41:52 -0400 Subject: [PATCH 32/38] fix variable name --- src/py/crankshaft/crankshaft/clustering/moran.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index 08fe127..4bced89 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -67,7 +67,7 @@ def moran_local(subquery, attr, # resulting in a collection of not as near neighbors qvals = OrderedDict([("id_col", id_col), - ("attr1", attr_name), + ("attr1", attr), ("geom_col", geom_col), ("subquery", subquery), ("num_ngbrs", num_ngbrs)]) From 81d7af9e9aeaaedda2e12b3e454d61b0a28286f3 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 22 Jun 2016 15:21:09 -0400 Subject: [PATCH 33/38] fixes return problem --- src/pg/sql/08_interpolation.sql | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/pg/sql/08_interpolation.sql b/src/pg/sql/08_interpolation.sql index 04f1584..76fad01 100644 --- a/src/pg/sql/08_interpolation.sql +++ b/src/pg/sql/08_interpolation.sql @@ -14,9 +14,12 @@ $$ DECLARE gs geometry[]; vs numeric[]; + output numeric; BEGIN EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs; - RETURN QUERY SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) FROM a; + SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a; + + RETURN output; END; $$ language plpgsql IMMUTABLE; From 6f72075999b3d3887018d9574e4e25abd5214873 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 22 Jun 2016 16:50:10 -0400 Subject: [PATCH 34/38] altering test outputs for less formatting --- src/pg/test/expected/08_interpolation_test.out | 6 ++---- src/pg/test/sql/08_interpolation_test.sql | 5 ++++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/pg/test/expected/08_interpolation_test.out b/src/pg/test/expected/08_interpolation_test.out index 42d24cb..b927f63 100644 --- a/src/pg/test/expected/08_interpolation_test.out +++ b/src/pg/test/expected/08_interpolation_test.out @@ -1,4 +1,2 @@ - cdb_spatialinterpolation --------------------------- - 780.79470198683925288365 -(1 row) +cdb_spatialinterpolation +t diff --git a/src/pg/test/sql/08_interpolation_test.sql b/src/pg/test/sql/08_interpolation_test.sql index c8db89d..43e7ee9 100644 --- a/src/pg/test/sql/08_interpolation_test.sql +++ b/src/pg/test/sql/08_interpolation_test.sql @@ -1,6 +1,9 @@ +\pset format unaligned +\set ECHO all + WITH a AS ( SELECT ARRAY[800, 700, 600, 500, 400, 300, 200, 100] AS vals, ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g ) -SELECT CDB_SpatialInterpolation(g, vals, ST_GeomFromText('POINT(2.154 41.37)'),1) FROM a; +SELECT (cdb_crankshaft.CDB_SpatialInterpolation(g, vals, ST_GeomFromText('POINT(2.154 41.37)'), 1) - 780.79470198683925288365) / 780.79470198683925288365 < 0.001 FROM a; From 6a9045ba62551e4db3637ad99acad52d591092bd Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 22 Jun 2016 16:56:35 -0400 Subject: [PATCH 35/38] updating test outputs --- src/pg/test/expected/08_interpolation_test.out | 8 ++++++++ src/pg/test/sql/08_interpolation_test.sql | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/pg/test/expected/08_interpolation_test.out b/src/pg/test/expected/08_interpolation_test.out index b927f63..49566db 100644 --- a/src/pg/test/expected/08_interpolation_test.out +++ b/src/pg/test/expected/08_interpolation_test.out @@ -1,2 +1,10 @@ +\pset format unaligned +\set ECHO all + +WITH a AS ( + SELECT + ARRAY[800, 700, 600, 500, 400, 300, 200, 100] AS vals, + ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g +) cdb_spatialinterpolation t diff --git a/src/pg/test/sql/08_interpolation_test.sql b/src/pg/test/sql/08_interpolation_test.sql index 43e7ee9..ba8968f 100644 --- a/src/pg/test/sql/08_interpolation_test.sql +++ b/src/pg/test/sql/08_interpolation_test.sql @@ -6,4 +6,4 @@ WITH a AS ( ARRAY[800, 700, 600, 500, 400, 300, 200, 100] AS vals, ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g ) -SELECT (cdb_crankshaft.CDB_SpatialInterpolation(g, vals, ST_GeomFromText('POINT(2.154 41.37)'), 1) - 780.79470198683925288365) / 780.79470198683925288365 < 0.001 FROM a; +SELECT (cdb_crankshaft.CDB_SpatialInterpolation(g, vals, ST_GeomFromText('POINT(2.154 41.37)'), 1) - 780.79470198683925288365) / 780.79470198683925288365 < 0.001 As cdb_spatialinterpolation FROM a; From 3f210c2a71b02b5b8b6a527b79514a9c19260ce3 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 22 Jun 2016 17:08:50 -0400 Subject: [PATCH 36/38] reducing amt of text in outputs --- src/pg/test/expected/08_interpolation_test.out | 10 ++-------- src/pg/test/sql/08_interpolation_test.sql | 3 ++- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/pg/test/expected/08_interpolation_test.out b/src/pg/test/expected/08_interpolation_test.out index 49566db..bb8c73e 100644 --- a/src/pg/test/expected/08_interpolation_test.out +++ b/src/pg/test/expected/08_interpolation_test.out @@ -1,10 +1,4 @@ -\pset format unaligned -\set ECHO all - -WITH a AS ( - SELECT - ARRAY[800, 700, 600, 500, 400, 300, 200, 100] AS vals, - ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g -) +SET client_min_messages TO WARNING; +\set ECHO none cdb_spatialinterpolation t diff --git a/src/pg/test/sql/08_interpolation_test.sql b/src/pg/test/sql/08_interpolation_test.sql index ba8968f..bd9c729 100644 --- a/src/pg/test/sql/08_interpolation_test.sql +++ b/src/pg/test/sql/08_interpolation_test.sql @@ -1,5 +1,6 @@ +SET client_min_messages TO WARNING; +\set ECHO none \pset format unaligned -\set ECHO all WITH a AS ( SELECT From 2fa087bb62544cda84931f8c39233074572c9564 Mon Sep 17 00:00:00 2001 From: Andy Eschbacher Date: Wed, 22 Jun 2016 17:11:51 -0400 Subject: [PATCH 37/38] adding row info :/ --- src/pg/test/expected/08_interpolation_test.out | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pg/test/expected/08_interpolation_test.out b/src/pg/test/expected/08_interpolation_test.out index bb8c73e..635ca2a 100644 --- a/src/pg/test/expected/08_interpolation_test.out +++ b/src/pg/test/expected/08_interpolation_test.out @@ -2,3 +2,4 @@ SET client_min_messages TO WARNING; \set ECHO none cdb_spatialinterpolation t +(1 row) From faa899cf8707adab4fc49834afa0a4d566abb481 Mon Sep 17 00:00:00 2001 From: Rafa de la Torre Date: Thu, 23 Jun 2016 10:11:59 +0200 Subject: [PATCH 38/38] Fix installation for development mode --- src/py/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/py/Makefile b/src/py/Makefile index b584645..cc3c67e 100644 --- a/src/py/Makefile +++ b/src/py/Makefile @@ -2,7 +2,7 @@ include ../../Makefile.global # Install the package locally for development install: - pip install ./crankshaft + pip install --upgrade ./crankshaft # Test develpment install test: