From 0400b1a88000c77fde34a731d60e3be04da96b01 Mon Sep 17 00:00:00 2001
From: Andy Eschbacher <andy.eschbacher@gmail.com>
Date: Fri, 20 May 2016 13:23:56 -0400
Subject: [PATCH 01/38] adding template for code reviews

---
 .github/PULL_REQUEST_TEMPLATE.md | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 .github/PULL_REQUEST_TEMPLATE.md

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..941542d
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,6 @@
+
+- [ ] All declared geometries are `geometry(Geometry, 4326)` for general geoms, or `geometry(Point, 4326)`
+- [ ] Include python is activated for new functions. Include this before importing modules: `plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')`
+- [ ] Docs for public-facing functions are written
+- [ ] New functions follow the naming conventions: `CDB_NameOfFunction`. Where internal functions begin with an underscore `_`.
+ 

From ca5175f15b5dc1a2ad5822056eb8e29f9db1716d Mon Sep 17 00:00:00 2001
From: Andy Eschbacher <ohasselblad@users.noreply.github.com>
Date: Fri, 20 May 2016 16:26:43 -0400
Subject: [PATCH 02/38] adding reference to subquery argument requirement

---
 .github/PULL_REQUEST_TEMPLATE.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 941542d..882cece 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -3,4 +3,5 @@
 - [ ] Include python is activated for new functions. Include this before importing modules: `plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')`
 - [ ] Docs for public-facing functions are written
 - [ ] New functions follow the naming conventions: `CDB_NameOfFunction`. Where internal functions begin with an underscore `_`.
+- [ ] If appropriate, new functions accepts an arbitrary query as an input (see [Crankshaft Issue #6](https://github.com/CartoDB/crankshaft/issues/6) for more information)
  

From 4782d39849da034d272f7b4666cc27c9a4a1c41c Mon Sep 17 00:00:00 2001
From: Stuart Lynn <stuart.lynn@gmail.com>
Date: Fri, 3 Jun 2016 10:36:21 -0400
Subject: [PATCH 03/38] stubbing out gravity model

---
 doc/07_gravity.md                        | 40 +++++++++++
 src/pg/sql/07_gravity.sql                | 84 ++++++++++++++++++++++++
 src/pg/test/expected/07_gravity_test.out |  0
 src/pg/test/sql/07_gravity_test.sql      |  1 +
 4 files changed, 125 insertions(+)
 create mode 100644 doc/07_gravity.md
 create mode 100644 src/pg/sql/07_gravity.sql
 create mode 100644 src/pg/test/expected/07_gravity_test.out
 create mode 100644 src/pg/test/sql/07_gravity_test.sql

diff --git a/doc/07_gravity.md b/doc/07_gravity.md
new file mode 100644
index 0000000..0ce2532
--- /dev/null
+++ b/doc/07_gravity.md
@@ -0,0 +1,40 @@
+## Gravity Model 
+
+### CDB_Gravity()
+
+The Gravity Model is derived from newtons law of gravity and is used to estimate the degree of interaction between two places 
+
+#### Arguments 
+
+| Name | Type | Description | 
+|------|------|-------------|
+| t_id     | bigint[]    |     |
+| t_geom   | geometry[]  |     |
+| t_weight | numeric[]   |     |
+| s_id     | bigint[]    |     |
+| s_geom   | geometry[]  |     |
+| s_pop    | numeric[]   |     |
+| target   | bigint      |     |
+| radius   | integer     |     |
+| minval   | numeric     |     |
+
+
+#### Returns 
+
+| Column Name | Type | Description |
+|-------------|------|-------------|
+| the_geom  | Numeric | |
+| source_id | bigint  | |
+| target_id | bigint  | |
+| dist      | Numeric | |
+| n         | Numeric | |
+| hpop      | NUMERIC | |
+
+
+#### Example Usage
+
+```sql
+SELECT CDB_GRAVITY ();
+```
+
+
diff --git a/src/pg/sql/07_gravity.sql b/src/pg/sql/07_gravity.sql
new file mode 100644
index 0000000..7c4e220
--- /dev/null
+++ b/src/pg/sql/07_gravity.sql
@@ -0,0 +1,84 @@
+CREATE OR REPLACE FUNCTION CDB_Gravity(
+    IN t_id bigint[],
+    IN t_geom geometry[],
+    IN t_weight numeric[],
+    IN s_id bigint[],
+    IN s_geom geometry[],
+    IN s_pop numeric[],
+    IN target bigint,
+    IN radius integer,
+    IN minval numeric DEFAULT -10e307
+    )
+RETURNS TABLE(
+    the_geom geometry,
+    source_id bigint,
+    target_id bigint,
+    dist numeric,
+    h numeric,
+    hpop numeric)  AS $$
+DECLARE
+    t_type text;
+    s_type text;
+    t_center geometry[];
+    s_center geometry[];
+BEGIN
+    t_type := GeometryType(t_geom[1]);
+    s_type := GeometryType(s_geom[1]);
+    IF t_type = 'POINT' THEN
+        t_center := t_geom;
+    ELSE
+        WITH tmp as (SELECT unnest(t_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO t_center FROM tmp;
+    END IF;
+    IF s_type = 'POINT' THEN
+        s_center := s_geom;
+    ELSE
+        WITH tmp as (SELECT unnest(s_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO s_center FROM tmp;
+    END IF;
+    RETURN QUERY
+        with target0 as(
+            SELECT unnest(t_center) as tc, unnest(t_weight) as tw, unnest(t_id) as td
+        ),
+        source0 as(
+            SELECT unnest(s_center) as sc, unnest(s_id) as sd, unnest (s_geom) as sg, unnest(s_pop) as sp
+        ),
+        prev0 as(
+            SELECT
+                source0.sg,
+                source0.sd as sourc_id,
+                coalesce(source0.sp,0) as sp,
+                target.td as targ_id,
+                coalesce(target.tw,0) as tw,
+                GREATEST(1.0,ST_Distance(geography(target.tc), geography(source0.sc)))::numeric as distance
+            FROM source0
+            CROSS JOIN LATERAL
+                (
+                SELECT
+                    *
+                FROM target0
+                    WHERE tw > minval
+                    AND ST_DWithin(geography(source0.sc), geography(tc), radius)
+                ) AS target
+        ),
+        deno as(
+            SELECT
+                sourc_id,
+                sum(tw/distance) as h_deno
+            FROM
+                prev0
+            GROUP BY sourc_id
+        )
+        SELECT
+            p.sg as the_geom,
+            p.sourc_id as source_id,
+            p.targ_id as target_id,
+            case when p.distance > 1 then p.distance else 0.0 end as dist,
+            100*(p.tw/p.distance)/d.h_deno as h,
+            p.sp*(p.tw/p.distance)/d.h_deno as hpop
+        FROM
+            prev0 p,
+            deno d
+        WHERE
+            p.targ_id = target AND
+            p.sourc_id = d.sourc_id;
+END;
+$$ language plpgsql;
diff --git a/src/pg/test/expected/07_gravity_test.out b/src/pg/test/expected/07_gravity_test.out
new file mode 100644
index 0000000..e69de29
diff --git a/src/pg/test/sql/07_gravity_test.sql b/src/pg/test/sql/07_gravity_test.sql
new file mode 100644
index 0000000..22d22dd
--- /dev/null
+++ b/src/pg/test/sql/07_gravity_test.sql
@@ -0,0 +1 @@
+select * form CDB_Gravity()

From 73d38bbbaac66c8dacd9f90cc6acca6a92a4a171 Mon Sep 17 00:00:00 2001
From: abelvm <abel@cartodb.com>
Date: Fri, 3 Jun 2016 17:32:39 +0200
Subject: [PATCH 04/38] filling the gaps

---
 doc/07_gravity.md                        | 68 ++++++++++++++++--------
 src/pg/test/expected/07_gravity_test.out | 11 ++++
 src/pg/test/sql/07_gravity_test.sql      | 22 +++++++-
 3 files changed, 78 insertions(+), 23 deletions(-)

diff --git a/doc/07_gravity.md b/doc/07_gravity.md
index 0ce2532..9c8d5e4 100644
--- a/doc/07_gravity.md
+++ b/doc/07_gravity.md
@@ -1,40 +1,64 @@
-## Gravity Model 
+## Gravity Model
 
-### CDB_Gravity()
+### CDB_Gravity(t_id bigint[], t_geom geometry[], t_weight numeric[], s_id bigint[], s_geom geometry[], s_pop numeric[], target bigint, radius integer, minval numeric DEFAULT -10e307)
 
-The Gravity Model is derived from newtons law of gravity and is used to estimate the degree of interaction between two places 
+Gravity Models are derived from Newton's Law of Gravity and are used to predict the interaction between a group of populated areas (sources) and a specific target among a group of potential targets, in terms of an attraction factor (weight)
 
-#### Arguments 
+**CDB_Gravity** is based on the model defined in *Huff's Law of Shopper attraction (1963)*
 
-| Name | Type | Description | 
+#### Arguments
+
+| Name | Type | Description |
 |------|------|-------------|
-| t_id     | bigint[]    |     |
-| t_geom   | geometry[]  |     |
-| t_weight | numeric[]   |     |
-| s_id     | bigint[]    |     |
-| s_geom   | geometry[]  |     |
-| s_pop    | numeric[]   |     |
-| target   | bigint      |     |
-| radius   | integer     |     |
-| minval   | numeric     |     |
+| t_id     | bigint[]    | Array of targets ID |
+| t_geom   | geometry[]  | Array of targets' geometries |
+| t_weight | numeric[]   | Array of targets's weights |
+| s_id     | bigint[]    | Array of sources ID |
+| s_geom   | geometry[]  | Array of sources' geometries |
+| s_pop    | numeric[]   | Array of sources's population |
+| target   | bigint      | ID of the target under study |
+| radius   | integer     | Radius in meters around the target under study that will be taken into account|
+| minval (optional)   | numeric     | Lowest accepted value of weight, defaults to numeric min_value |
 
 
-#### Returns 
+#### Returns
 
 | Column Name | Type | Description |
 |-------------|------|-------------|
-| the_geom  | Numeric | |
-| source_id | bigint  | |
-| target_id | bigint  | |
-| dist      | Numeric | |
-| n         | Numeric | |
-| hpop      | NUMERIC | |
+| the_geom  | geometry | Geometries of the sources within the radius |
+| source_id | bigint  | ID of the source |
+| target_id | bigint  | Target ID from input |
+| dist      | numeric | Distance in meters source to target (if not points, distance between centroids) |
+| h         | numeric | Probability of patronage |
+| hpop      | numeric | Patronaging population |
 
 
 #### Example Usage
 
 ```sql
-SELECT CDB_GRAVITY ();
+with t as (
+SELECT
+    array_agg(cartodb_id::bigint) as id,
+    array_agg(the_geom) as g,
+    array_agg(coalesce(gla,0)::numeric) as w
+FROM
+    abel.centros_comerciales_de_madrid
+WHERE not no_cc
+),
+s as (
+SELECT
+    array_agg(cartodb_id::bigint) as id,
+    array_agg(center) as g,
+    array_agg(coalesce(t1_1, 0)::numeric) as p
+FROM
+    sscc_madrid
+)
+select
+    g.the_geom,
+    trunc(g.h,2) as h,
+    round(g.hpop) as hpop,
+    trunc(g.dist/1000,2) as dist_km
+FROM t, s, CDB_Gravity1(t.id, t.g, t.w, s.id, s.g, s.p, newmall_ID, 100000, 5000) g
 ```
 
 
diff --git a/src/pg/test/expected/07_gravity_test.out b/src/pg/test/expected/07_gravity_test.out
index e69de29..c101b24 100644
--- a/src/pg/test/expected/07_gravity_test.out
+++ b/src/pg/test/expected/07_gravity_test.out
@@ -0,0 +1,11 @@
+                  the_geom                  |            h            |           hpop           |      dist
+--------------------------------------------+-------------------------+--------------------------+----------------
+ 01010000001361C3D32B650140DD24068195B34440 |  1.51078258369747945249 |  12.08626066957983561994 | 4964.714459152
+ 01010000002497FF907EFB0040713D0AD7A3B04440 | 98.29730954183620807430 | 688.08116679285345652007 |   99.955141922
+ 0101000000A167B3EA733501401D5A643BDFAF4440 | 63.70532894711274639196 | 382.23197368267647835174 | 2488.330566505
+ 010100000062A1D634EF380140BE9F1A2FDDB44440 | 35.35415870080995954879 | 176.77079350404979774397 | 4359.370460594
+ 010100000052B81E85EB510140355EBA490CB24440 | 33.12290506987740864904 | 132.49162027950963459615 | 3703.664449828
+ 0101000000C286A757CA320140736891ED7CAF4440 | 65.45251754279248087849 | 196.35755262837744263547 | 2512.092358644
+ 01010000007DD0B359F5390140C976BE9F1AAF4440 | 62.83927792471345639225 | 125.67855584942691278449 |  2926.25725244
+ 0101000000D237691A140D01407E6FD39FFDB44440 | 53.54905726651871279586 |  53.54905726651871279586 | 3744.515577777
+(8 rows)
diff --git a/src/pg/test/sql/07_gravity_test.sql b/src/pg/test/sql/07_gravity_test.sql
index 22d22dd..a86bb23 100644
--- a/src/pg/test/sql/07_gravity_test.sql
+++ b/src/pg/test/sql/07_gravity_test.sql
@@ -1 +1,21 @@
-select * form CDB_Gravity()
+WITH t AS (
+    SELECT
+    ARRAY[1,2,3] AS id,
+    ARRAY[7.0,8.0,3.0] AS w,
+    ARRAY[ST_GeomFromText('POINT(2.1744 41.4036)'),ST_GeomFromText('POINT(2.1228 41.3809)'),ST_GeomFromText('POINT(2.1511 41.3742)')] AS g
+),
+s AS (
+    SELECT
+    ARRAY[10,20,30,40,50,60,70,80] AS id,
+    ARRAY[800, 700, 600, 500, 400, 300, 200, 100] AS p,
+    ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g
+)
+SELECT
+    g.the_geom,
+    g.h,
+    g.hpop,
+    g.dist
+FROM
+    t,
+    s,
+    CDB_Gravity(t.id, t.g, t.w, s.id, s.g, s.p, 2, 100000, 3) g;

From 5183f5ff92628f289d75477a7da2cb9f75260710 Mon Sep 17 00:00:00 2001
From: abelvm <abel@cartodb.com>
Date: Fri, 3 Jun 2016 18:22:26 +0200
Subject: [PATCH 05/38] added function overload with subqueries input

---
 doc/07_gravity.md         | 20 +++++++++++++++++---
 src/pg/sql/07_gravity.sql | 31 +++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/doc/07_gravity.md b/doc/07_gravity.md
index 9c8d5e4..e4e439e 100644
--- a/doc/07_gravity.md
+++ b/doc/07_gravity.md
@@ -1,11 +1,11 @@
 ## Gravity Model
 
-### CDB_Gravity(t_id bigint[], t_geom geometry[], t_weight numeric[], s_id bigint[], s_geom geometry[], s_pop numeric[], target bigint, radius integer, minval numeric DEFAULT -10e307)
-
 Gravity Models are derived from Newton's Law of Gravity and are used to predict the interaction between a group of populated areas (sources) and a specific target among a group of potential targets, in terms of an attraction factor (weight)
 
 **CDB_Gravity** is based on the model defined in *Huff's Law of Shopper attraction (1963)*
 
+### CDB_Gravity(t_id bigint[], t_geom geometry[], t_weight numeric[], s_id bigint[], s_geom geometry[], s_pop numeric[], target bigint, radius integer, minval numeric DEFAULT -10e307)
+
 #### Arguments
 
 | Name | Type | Description |
@@ -20,8 +20,22 @@ Gravity Models are derived from Newton's Law of Gravity and are used to predict
 | radius   | integer     | Radius in meters around the target under study that will be taken into account|
 | minval (optional)   | numeric     | Lowest accepted value of weight, defaults to numeric min_value |
 
+### CDB_Gravity( target_query text, weight_column text, source_query text, pop_column text, target bigint, radius integer, minval numeric DEFAULT -10e307)
 
-#### Returns
+#### Arguments
+
+| Name | Type | Description |
+|------|------|-------------|
+| target_query     | text    | Query that defines targets |
+| weight_column   | text  | Column name of weights |
+| source_query     | text    | Query that defines sources |
+| pop_column   | text  | Column name of population |
+| target   | bigint      | cartodb_id of the target under study |
+| radius   | integer     | Radius in meters around the target under study that will be taken into account|
+| minval (optional)   | numeric     | Lowest accepted value of weight, defaults to numeric min_value |
+
+
+### Returns
 
 | Column Name | Type | Description |
 |-------------|------|-------------|
diff --git a/src/pg/sql/07_gravity.sql b/src/pg/sql/07_gravity.sql
index 7c4e220..47e5b8e 100644
--- a/src/pg/sql/07_gravity.sql
+++ b/src/pg/sql/07_gravity.sql
@@ -1,3 +1,34 @@
+CREATE OR REPLACE FUNCTION CDB_Gravity(
+    IN target_query text,
+    IN weight_column text,
+    IN source_query text,
+    IN pop_column text,
+    IN target bigint,
+    IN radius integer,
+    IN minval numeric DEFAULT -10e307
+    )
+RETURNS TABLE(
+    the_geom geometry,
+    source_id bigint,
+    target_id bigint,
+    dist numeric,
+    h numeric,
+    hpop numeric)  AS $$
+DECLARE
+    t_id bigint[];
+    t_geom geometry[];
+    t_weight numeric[];
+    s_id bigint[];
+    s_geom geometry[];
+    s_pop numeric[];
+BEGIN
+    EXECUTE 'WITH foo as('+target_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || weight_column || ') FROM foo' INTO t_id, t_geom, t_weight;
+    EXECUTE 'WITH foo as('+source_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || pop_column || ') FROM foo' INTO s_id, s_geom, s_pop;
+    RETURN QUERY
+    SELECT g.* FROM t, s, CDB_Gravity(t_id, t_geom, t_weight, s_id, s_geom, s_pop, target, radius, minval) g;
+END;
+$$ language plpgsql;
+
 CREATE OR REPLACE FUNCTION CDB_Gravity(
     IN t_id bigint[],
     IN t_geom geometry[],

From 4e86965f033f14252ded35534bbd2c911e104da7 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@cdb-dev.localhost.lan>
Date: Tue, 7 Jun 2016 19:58:32 +0000
Subject: [PATCH 06/38] KMeans clustering and weighted centroid analysis

---
 doc/11_kmeans.md                              | 62 +++++++++++++++++++
 src/pg/sql/11_kmeans.sql                      | 31 ++++++++++
 src/pg/test/expected/05_kmeans_test.out       | 10 +++
 src/pg/test/sql/05_kmeans_test.sql            |  6 ++
 .../crankshaft/clustering/__init__.py         |  1 +
 .../crankshaft/clustering/kmeans.py           | 17 +++++
 src/py/crankshaft/test/fixtures/kmeans.json   |  1 +
 src/py/crankshaft/test/test_cluster_kmeans.py | 38 ++++++++++++
 8 files changed, 166 insertions(+)
 create mode 100644 doc/11_kmeans.md
 create mode 100644 src/pg/sql/11_kmeans.sql
 create mode 100644 src/pg/test/expected/05_kmeans_test.out
 create mode 100644 src/pg/test/sql/05_kmeans_test.sql
 create mode 100644 src/py/crankshaft/crankshaft/clustering/kmeans.py
 create mode 100644 src/py/crankshaft/test/fixtures/kmeans.json
 create mode 100644 src/py/crankshaft/test/test_cluster_kmeans.py

diff --git a/doc/11_kmeans.md b/doc/11_kmeans.md
new file mode 100644
index 0000000..6153010
--- /dev/null
+++ b/doc/11_kmeans.md
@@ -0,0 +1,62 @@
+## K-Means Functions
+
+### CDB_KMeans(subquery text, no_clusters INTEGER)
+
+This function attempts to find n clusters within the input data. It will return a table to CartoDB ids and 
+the number of the cluster each point in the input was assigend to.
+
+
+#### Arguments
+
+| Name | Type | Description |
+|------|------|-------------|
+| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments |
+| no\_clusters | INTEGER | The number of clusters to try and find |
+
+#### Returns
+
+A table with the following columns.
+
+| Column Name | Type | Description |
+|-------------|------|-------------|
+| cartodb\_id | INTEGER | The CartoDB id of the row in the input table.|
+| cluster\_no | INTEGER | The cluster that this point belongs to. |
+
+
+#### Example Usage
+
+```sql
+SELECT 
+    customers.*, 
+    km.cluster_no 
+    FROM cdb_crankshaft.CDB_Kmeans('SELECT * from customers' , 6) km, customers_3
+    WHERE customers.cartodb_id = km.cartodb_id
+```
+
+### CDB_WeightedMean(subquery text, weight_column text, category_column text)
+
+Function that computes the weighted centroid of a number of clusters by some weight column.
+
+### Arguments 
+
+| Name | Type | Description |
+|------|------|-------------|
+| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column and the columns specified as the weight and category columns|
+| weight\_column | TEXT | The name of the column to use as a weight |
+| category\_column | TEXT | The name of the column to use as a category |
+
+### Returns 
+
+A table with the following columns.
+
+| Column Name | Type | Description |
+|-------------|------|-------------|
+| the\_geom | GEOMETRY | A point for the weighted cluster center |
+| class | INTEGER | The cluster class | 
+
+### Example Usage 
+
+```sql 
+SELECT ST_TRANSFORM(the_geom, 3857) as the_geom_webmercator, class 
+FROM cdb_weighted_mean('SELECT *, customer_value FROM customers','customer_value','cluster_no')
+```
diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql
new file mode 100644
index 0000000..73e2f1d
--- /dev/null
+++ b/src/pg/sql/11_kmeans.sql
@@ -0,0 +1,31 @@
+CREATE OR REPLACE FUNCTION  CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
+RETURNS table (cartodb_id integer, cluster_no integer) as $$
+    
+    import plpy 
+    plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
+    from crankshaft.clustering import kmeans
+    return kmeans(query,no_clusters,no_init)
+
+$$ language plpythonu;
+
+CREATE OR REPLACE FUNCTION CDB_WeightedMean(query text, weight_column text, category_column text default null )
+RETURNS table (the_geom geometry,class integer ) as $$
+BEGIN
+
+RETURN QUERY
+    EXECUTE format( $string$
+        select ST_SETSRID(st_makepoint(cx, cy),4326) the_geom, class  from (
+            select  
+                   %I as class,
+                   sum(st_x(the_geom)*%I)/sum(%I) cx,
+                   sum(st_y(the_geom)*%I)/sum(%I) cy
+                   from (%s) a
+                   group by %I
+            ) q          
+     
+        $string$, category_column, weight_column,weight_column,weight_column,weight_column,query, category_column 
+    )
+    using the_geom
+    RETURN;
+END 
+$$ LANGUAGE plpgsql;
diff --git a/src/pg/test/expected/05_kmeans_test.out b/src/pg/test/expected/05_kmeans_test.out
new file mode 100644
index 0000000..4e6db09
--- /dev/null
+++ b/src/pg/test/expected/05_kmeans_test.out
@@ -0,0 +1,10 @@
+\pset format unaligned
+\set ECHO all
+SELECT count(DISTINCT cluster_no) as clusters from cdb_crankshaft.cdb_kmeans('select * from ppoints', 2);
+clusters
+2
+(1 row)
+SELECT count(*) clusters from cdb_crankshaft.cdb_WeightedMean( 'select *, code::INTEGER as cluster from ppoints' , 'value', 'cluster' );
+clusters
+52
+(1 row)
diff --git a/src/pg/test/sql/05_kmeans_test.sql b/src/pg/test/sql/05_kmeans_test.sql
new file mode 100644
index 0000000..a400e5e
--- /dev/null
+++ b/src/pg/test/sql/05_kmeans_test.sql
@@ -0,0 +1,6 @@
+\pset format unaligned
+\set ECHO all
+
+SELECT count(DISTINCT cluster_no) as clusters from cdb_crankshaft.cdb_kmeans('select * from ppoints', 2);
+
+SELECT count(*) clusters from cdb_crankshaft.cdb_WeightedMean( 'select *, code::INTEGER as cluster from ppoints' , 'value', 'cluster' );
diff --git a/src/py/crankshaft/crankshaft/clustering/__init__.py b/src/py/crankshaft/crankshaft/clustering/__init__.py
index 0df080f..338e8ea 100644
--- a/src/py/crankshaft/crankshaft/clustering/__init__.py
+++ b/src/py/crankshaft/crankshaft/clustering/__init__.py
@@ -1 +1,2 @@
 from moran import *
+from kmeans import *
diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py
new file mode 100644
index 0000000..3d9ed58
--- /dev/null
+++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py
@@ -0,0 +1,17 @@
+from sklearn.cluster import KMeans
+import plpy
+
+def kmeans(query, no_clusters, no_init=20):
+    data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids,
+        array_agg(ST_X(the_geom) order by cartodb_id) xs,
+        array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a
+    '''.format(query=query))
+
+    xs  = data[0]['xs']
+    ys  = data[0]['ys']
+    ids = data[0]['ids']
+
+    km = KMeans(n_clusters= no_clusters, n_init=no_init)
+    labels = km.fit_predict(zip(xs,ys))
+    return zip(ids,labels)
+
diff --git a/src/py/crankshaft/test/fixtures/kmeans.json b/src/py/crankshaft/test/fixtures/kmeans.json
new file mode 100644
index 0000000..8f31c79
--- /dev/null
+++ b/src/py/crankshaft/test/fixtures/kmeans.json
@@ -0,0 +1 @@
+[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]
\ No newline at end of file
diff --git a/src/py/crankshaft/test/test_cluster_kmeans.py b/src/py/crankshaft/test/test_cluster_kmeans.py
new file mode 100644
index 0000000..aba8e07
--- /dev/null
+++ b/src/py/crankshaft/test/test_cluster_kmeans.py
@@ -0,0 +1,38 @@
+import unittest
+import numpy as np
+
+
+# from mock_plpy import MockPlPy
+# plpy = MockPlPy()
+#
+# import sys
+# sys.modules['plpy'] = plpy
+from helper import plpy, fixture_file
+import numpy as np
+import crankshaft.clustering as cc
+import crankshaft.pysal_utils as pu
+from crankshaft import random_seeds
+import json
+
+class KMeansTest(unittest.TestCase):
+    """Testing class for Moran's I functions"""
+
+    def setUp(self):
+        plpy._reset()
+        self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read())
+        self.params = {"subquery": "select * from table",
+                       "no_clusters": "10"
+                       }
+
+    def test_kmeans(self):
+        data = self.cluster_data
+        plpy._define_result('select' ,data)
+        clusters = cc.kmeans('subquery', 2)
+        labels  = [a[1] for a in clusters]
+        c1 = [a for a in clusters if a[1]==0]
+        c2 = [a for a in clusters if a[1]==1]
+
+        self.assertEqual(len(np.unique(labels)),2)
+        self.assertEqual(len(c1),20)
+        self.assertEqual(len(c2),20)
+

From e95c40c2f9bea57156449276d80603646cd4317d Mon Sep 17 00:00:00 2001
From: Raul Ochoa <rochoaf@gmail.com>
Date: Thu, 9 Jun 2016 11:27:15 +0200
Subject: [PATCH 07/38] Ignore idea based configurations

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 1161ea2..a09b4fe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 envs/
 *.pyc
 .DS_Store
+.idea/

From 7f3b23f67a958faa9162efef2362cc13a1e665ff Mon Sep 17 00:00:00 2001
From: Stuart Lynn <stuart.lynn@gmail.com>
Date: Fri, 10 Jun 2016 13:06:49 +0000
Subject: [PATCH 08/38] reworking CDB_WeightedMean to be an aggregate function

---
 src/pg/sql/11_kmeans.sql                | 60 +++++++++++++++++--------
 src/pg/test/expected/05_kmeans_test.out |  2 +-
 src/pg/test/sql/05_kmeans_test.sql      |  2 +-
 3 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql
index 73e2f1d..87f07ea 100644
--- a/src/pg/sql/11_kmeans.sql
+++ b/src/pg/sql/11_kmeans.sql
@@ -8,24 +8,46 @@ RETURNS table (cartodb_id integer, cluster_no integer) as $$
 
 $$ language plpythonu;
 
-CREATE OR REPLACE FUNCTION CDB_WeightedMean(query text, weight_column text, category_column text default null )
-RETURNS table (the_geom geometry,class integer ) as $$
-BEGIN
 
-RETURN QUERY
-    EXECUTE format( $string$
-        select ST_SETSRID(st_makepoint(cx, cy),4326) the_geom, class  from (
-            select  
-                   %I as class,
-                   sum(st_x(the_geom)*%I)/sum(%I) cx,
-                   sum(st_y(the_geom)*%I)/sum(%I) cy
-                   from (%s) a
-                   group by %I
-            ) q          
-     
-        $string$, category_column, weight_column,weight_column,weight_column,weight_column,query, category_column 
-    )
-    using the_geom
-    RETURN;
-END 
+CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
+RETURNS Numeric[] AS 
+$$
+DECLARE 
+    newX NUMERIC;
+    newY NUMERIC;
+    newW NUMERIC;
+BEGIN
+    IF weight IS NULL OR the_geom IS NULL THEN 
+        newX = state[1];
+        newY = state[2];
+        newW = state[3];
+    ELSE
+        newX = state[1] + ST_X(the_geom)*weight;
+        newY = state[2] + ST_Y(the_geom)*weight;
+        newW = state[3] + weight;
+    END IF;
+    RETURN Array[newX,newY,newW];
+
+END
 $$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
+RETURNS GEOMETRY AS 
+$$
+BEGIN
+    IF state[3] = 0 THEN 
+        RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
+    ELSE 
+        RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
+    END IF;
+END
+$$ LANGUAGE plpgsql;
+
+CREATE AGGREGATE CDB_WeightedMean(the_geom geometry(Point, 4326), weight NUMERIC)(
+    SFUNC = CDB_WeightedMeanS,
+    FINALFUNC = CDB_WeightedMeanF,
+    STYPE = Numeric[],
+    INITCOND = "{0.0,0.0,0.0}" 
+);
+
+
diff --git a/src/pg/test/expected/05_kmeans_test.out b/src/pg/test/expected/05_kmeans_test.out
index 4e6db09..8c6ffa1 100644
--- a/src/pg/test/expected/05_kmeans_test.out
+++ b/src/pg/test/expected/05_kmeans_test.out
@@ -4,7 +4,7 @@ SELECT count(DISTINCT cluster_no) as clusters from cdb_crankshaft.cdb_kmeans('se
 clusters
 2
 (1 row)
-SELECT count(*) clusters from cdb_crankshaft.cdb_WeightedMean( 'select *, code::INTEGER as cluster from ppoints' , 'value', 'cluster' );
+SELECT count(*) clusters from (select  cdb_crankshaft.CDB_WeightedMean(the_geom, value::NUMERIC), code from ppoints group by code) p;
 clusters
 52
 (1 row)
diff --git a/src/pg/test/sql/05_kmeans_test.sql b/src/pg/test/sql/05_kmeans_test.sql
index a400e5e..2298b85 100644
--- a/src/pg/test/sql/05_kmeans_test.sql
+++ b/src/pg/test/sql/05_kmeans_test.sql
@@ -3,4 +3,4 @@
 
 SELECT count(DISTINCT cluster_no) as clusters from cdb_crankshaft.cdb_kmeans('select * from ppoints', 2);
 
-SELECT count(*) clusters from cdb_crankshaft.cdb_WeightedMean( 'select *, code::INTEGER as cluster from ppoints' , 'value', 'cluster' );
+SELECT count(*) clusters from (select  cdb_crankshaft.CDB_WeightedMean(the_geom, value::NUMERIC), code from ppoints group by code) p;

From 9d3de5a8ef13be63539248f3e4d82d7b4b68df9d Mon Sep 17 00:00:00 2001
From: Stuart Lynn <stuart.lynn@gmail.com>
Date: Fri, 10 Jun 2016 13:12:55 +0000
Subject: [PATCH 09/38] adding not null filter for geom on kmeans

---
 src/py/crankshaft/crankshaft/clustering/kmeans.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py
index 3d9ed58..4134062 100644
--- a/src/py/crankshaft/crankshaft/clustering/kmeans.py
+++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py
@@ -5,6 +5,7 @@ def kmeans(query, no_clusters, no_init=20):
     data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids,
         array_agg(ST_X(the_geom) order by cartodb_id) xs,
         array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a
+        where the_geom is not null
     '''.format(query=query))
 
     xs  = data[0]['xs']

From 1a4944b9600250a972458bfe1952f79ffce76ff2 Mon Sep 17 00:00:00 2001
From: Stuart Lynn <stuart.lynn@gmail.com>
Date: Fri, 10 Jun 2016 13:16:16 +0000
Subject: [PATCH 10/38] adding sklearn as a dep

---
 src/py/crankshaft/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py
index 8d5e622..baa88e3 100644
--- a/src/py/crankshaft/setup.py
+++ b/src/py/crankshaft/setup.py
@@ -40,9 +40,9 @@ setup(
 
     # The choice of component versions is dictated by what's
     # provisioned in the production servers.
-    install_requires=['pysal==1.9.1'],
+    install_requires=['pysal==1.9.1', 'sklearn==0.17.1'],
 
-    requires=['pysal', 'numpy' ],
+    requires=['pysal', 'numpy', 'sklearn' ],
 
     test_suite='test'
 )

From 889cd5c5791d2f87e35b3e510b7c7ac14eac9fcf Mon Sep 17 00:00:00 2001
From: Raul Ochoa <rochoaf@gmail.com>
Date: Fri, 10 Jun 2016 17:47:46 +0200
Subject: [PATCH 11/38] Fix scikit-learn dep name

---
 src/py/crankshaft/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py
index baa88e3..68f9e17 100644
--- a/src/py/crankshaft/setup.py
+++ b/src/py/crankshaft/setup.py
@@ -40,7 +40,7 @@ setup(
 
     # The choice of component versions is dictated by what's
     # provisioned in the production servers.
-    install_requires=['pysal==1.9.1', 'sklearn==0.17.1'],
+    install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'],
 
     requires=['pysal', 'numpy', 'sklearn' ],
 

From b33ba2d2949ab0bef092f25acf82d2308775a2a5 Mon Sep 17 00:00:00 2001
From: Raul Ochoa <rochoaf@gmail.com>
Date: Fri, 10 Jun 2016 18:24:43 +0200
Subject: [PATCH 12/38] Do not use names for the aggregate params

---
 src/pg/sql/11_kmeans.sql | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql
index 87f07ea..a27f803 100644
--- a/src/pg/sql/11_kmeans.sql
+++ b/src/pg/sql/11_kmeans.sql
@@ -43,11 +43,9 @@ BEGIN
 END
 $$ LANGUAGE plpgsql;
 
-CREATE AGGREGATE CDB_WeightedMean(the_geom geometry(Point, 4326), weight NUMERIC)(
+CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)(
     SFUNC = CDB_WeightedMeanS,
     FINALFUNC = CDB_WeightedMeanF,
     STYPE = Numeric[],
     INITCOND = "{0.0,0.0,0.0}" 
 );
-
-

From 1e8bc12e0a6ea2ffefe580b63133b88f4db045a7 Mon Sep 17 00:00:00 2001
From: Raul Ochoa <rochoaf@gmail.com>
Date: Mon, 13 Jun 2016 12:17:46 +0200
Subject: [PATCH 13/38] Declare scipy as dep

---
 src/py/crankshaft/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py
index 68f9e17..e787d32 100644
--- a/src/py/crankshaft/setup.py
+++ b/src/py/crankshaft/setup.py
@@ -40,9 +40,9 @@ setup(
 
     # The choice of component versions is dictated by what's
     # provisioned in the production servers.
-    install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'],
+    install_requires=['scipy==0.17.1', 'pysal==1.9.1', 'scikit-learn==0.17.1'],
 
-    requires=['pysal', 'numpy', 'sklearn' ],
+    requires=['scipy', 'pysal', 'numpy', 'sklearn'],
 
     test_suite='test'
 )

From c870f68c77652a11f8401bbbb981797694174288 Mon Sep 17 00:00:00 2001
From: Raul Ochoa <rochoaf@gmail.com>
Date: Mon, 13 Jun 2016 13:05:50 +0200
Subject: [PATCH 14/38] Revert "Declare scipy as dep"

This reverts commit 1e8bc12e0a6ea2ffefe580b63133b88f4db045a7.
---
 src/py/crankshaft/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py
index e787d32..68f9e17 100644
--- a/src/py/crankshaft/setup.py
+++ b/src/py/crankshaft/setup.py
@@ -40,9 +40,9 @@ setup(
 
     # The choice of component versions is dictated by what's
     # provisioned in the production servers.
-    install_requires=['scipy==0.17.1', 'pysal==1.9.1', 'scikit-learn==0.17.1'],
+    install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'],
 
-    requires=['scipy', 'pysal', 'numpy', 'sklearn'],
+    requires=['pysal', 'numpy', 'sklearn' ],
 
     test_suite='test'
 )

From fd1862167c123ad7e59906801027e06c88fbf90e Mon Sep 17 00:00:00 2001
From: Raul Ochoa <rochoaf@gmail.com>
Date: Mon, 13 Jun 2016 13:06:21 +0200
Subject: [PATCH 15/38] Remove trailing space

---
 src/py/crankshaft/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py
index 68f9e17..04822dd 100644
--- a/src/py/crankshaft/setup.py
+++ b/src/py/crankshaft/setup.py
@@ -42,7 +42,7 @@ setup(
     # provisioned in the production servers.
     install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'],
 
-    requires=['pysal', 'numpy', 'sklearn' ],
+    requires=['pysal', 'numpy', 'sklearn'],
 
     test_suite='test'
 )

From 9db4b7f5192c3f946bc6c6dab4a956e3e9a39d16 Mon Sep 17 00:00:00 2001
From: abelvm <abel@cartodb.com>
Date: Tue, 14 Jun 2016 17:55:45 +0200
Subject: [PATCH 16/38] first commit

---
 doc/08_interpolation.md                       |  51 +++++++
 src/pg/sql/08_interpolation.sql               | 127 ++++++++++++++++++
 .../test/expected/08_interpolation_test.out   |   4 +
 src/pg/test/sql/08_interpolation_test.sql     |   6 +
 4 files changed, 188 insertions(+)
 create mode 100644 doc/08_interpolation.md
 create mode 100644 src/pg/sql/08_interpolation.sql
 create mode 100644 src/pg/test/expected/08_interpolation_test.out
 create mode 100644 src/pg/test/sql/08_interpolation_test.sql

diff --git a/doc/08_interpolation.md b/doc/08_interpolation.md
new file mode 100644
index 0000000..22fc1bc
--- /dev/null
+++ b/doc/08_interpolation.md
@@ -0,0 +1,51 @@
+## Spacial interpolation
+
+Function to interpolate a numeric attribute of a point in a scatter dataset of points, using one of three methos:
+
+* [Nearest neighbor](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation)
+* [Barycentric](https://en.wikipedia.org/wiki/Barycentric_coordinate_system)
+* [IDW](https://en.wikipedia.org/wiki/Inverse_distance_weighting)
+
+### CDB_SpatialInterpolation (query text, point geometry, method integer DEFAULT 1, p1 integer DEFAULT 0, ps integer DEFAULT 0)
+
+#### Arguments
+
+| Name | Type | Description |
+|------|------|-------------|
+| query   | text | query that returns at least `the_geom` and a numeric value as `attrib` |
+| point   | geometry | The target point to calc the value |
+| method   | integer     | 0:nearest neighbor, 1: barycentric, 2: IDW|
+| p1   | integer     | IDW: limit the number of neighbors, 0->no limit|
+| p2   | integer     | IDW: order of distance decay, 0-> order 1|
+
+### CDB_SpatialInterpolation (geom geometry[], values numeric[], point geometry, method integer DEFAULT 1, p1 integer DEFAULT 0, ps integer DEFAULT 0)
+
+#### Arguments
+
+| Name | Type | Description |
+|------|------|-------------|
+| geom   | geometry[]  | Array of points's geometries |
+| values | numeric[]   | Array of points' values for the param under study|
+| point   | geometry | The target point to calc the value |
+| method   | integer     | 0:nearest neighbor, 1: barycentric, 2: IDW|
+| p1   | integer     | IDW: limit the number of neighbors, 0->no limit|
+| p2   | integer     | IDW: order of distance decay, 0-> order 1|
+
+### Returns
+
+| Column Name | Type | Description |
+|-------------|------|-------------|
+| value  | numeric | Interpolated value at the given point, `-888.888` if the given point is out of the boundaries of the source points set |
+
+
+#### Example Usage
+
+```sql
+with a as (
+    select
+        array_agg(the_geom) as geomin,
+        array_agg(temp::numeric) as colin
+    from table_4804232032
+)
+SELECT CDB_SpatialInterpolation(geomin, colin, CDB_latlng(41.38, 2.15),1) FROM a;
+```
diff --git a/src/pg/sql/08_interpolation.sql b/src/pg/sql/08_interpolation.sql
new file mode 100644
index 0000000..04f1584
--- /dev/null
+++ b/src/pg/sql/08_interpolation.sql
@@ -0,0 +1,127 @@
+-- 0: nearest neighbor
+-- 1: barymetric
+-- 2: IDW
+
+CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
+    IN query text,
+    IN point geometry,
+    IN method integer DEFAULT 1,
+    IN p1 numeric DEFAULT 0,
+    IN p2 numeric DEFAULT 0
+    )
+RETURNS numeric AS
+$$
+DECLARE
+    gs geometry[];
+    vs numeric[];
+BEGIN
+    EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs;
+    RETURN QUERY SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) FROM a;
+END;
+$$
+language plpgsql IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION CDB_SpatialInterpolation(
+    IN geomin geometry[],
+    IN colin numeric[],
+    IN point geometry,
+    IN method integer DEFAULT 1,
+    IN p1 numeric DEFAULT 0,
+    IN p2 numeric DEFAULT 0
+    )
+RETURNS numeric AS
+$$
+DECLARE
+    gs geometry[];
+    vs numeric[];
+    gs2 geometry[];
+    vs2 numeric[];
+    g geometry;
+    vertex geometry[];
+    sg numeric;
+    sa numeric;
+    sb numeric;
+    sc numeric;
+    va numeric;
+    vb numeric;
+    vc numeric;
+    output numeric;
+BEGIN
+    output :=  -999.999;
+    -- nearest
+    IF method = 0 THEN
+
+        WITH    a as (SELECT unnest(geomin) as g, unnest(colin) as v)
+        SELECT a.v INTO output FROM a ORDER BY point<->a.g LIMIT 1;
+        RETURN output;
+
+    -- barymetric
+    ELSIF method = 1 THEN
+        WITH    a as (SELECT unnest(geomin) AS e),
+                b as (SELECT ST_DelaunayTriangles(ST_Collect(a.e),0.001, 0) AS t FROM a),
+                c as (SELECT (ST_Dump(t)).geom as v FROM b),
+                d as (SELECT v FROM c WHERE ST_Within(point, v))
+            SELECT v INTO g FROM d;
+        IF g is null THEN
+            -- out of the realm of the input data
+            RETURN -888.888;
+        END IF;
+        -- vertex of the selected cell
+        WITH a AS (SELECT (ST_DumpPoints(g)).geom AS v)
+                SELECT array_agg(v) INTO vertex FROM a;
+
+            -- retrieve the value of each vertex
+        WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
+            SELECT c INTO va FROM a WHERE ST_Equals(geo, vertex[1]);
+        WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
+            SELECT c INTO vb FROM a WHERE ST_Equals(geo, vertex[2]);
+        WITH a AS(SELECT unnest(vertex) as geo, unnest(colin) as c)
+                SELECT c INTO vc FROM a WHERE ST_Equals(geo, vertex[3]);
+
+        SELECT ST_area(g), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[2], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point, vertex[1], vertex[3], point]))), ST_area(ST_MakePolygon(ST_MakeLine(ARRAY[point,vertex[1],vertex[2], point]))) INTO sg, sa, sb, sc;
+
+        output := (coalesce(sa,0) * coalesce(va,0) + coalesce(sb,0) * coalesce(vb,0) + coalesce(sc,0) * coalesce(vc,0)) / coalesce(sg);
+        RETURN output;
+
+    -- IDW
+    -- p1: limit the number of neighbors, 0->no limit
+    -- p2: order of distance decay, 0-> order 1
+    ELSIF method = 2 THEN
+
+        IF p2 = 0 THEN
+            p2 := 1;
+        END IF;
+
+        WITH    a as (SELECT unnest(geomin) as g, unnest(colin) as v),
+                b as (SELECT a.g, a.v FROM a ORDER BY point<->a.g)
+        SELECT array_agg(b.g), array_agg(b.v) INTO gs, vs FROM b;
+        IF p1::integer>0 THEN
+            gs2:=gs;
+            vs2:=vs;
+            FOR i IN 1..p1
+            LOOP
+                gs2 := gs2 || gs[i];
+                vs2 := vs2 || vs[i];
+            END LOOP;
+        ELSE
+            gs2:=gs;
+            vs2:=vs;
+        END IF;
+
+        WITH    a as (SELECT unnest(gs2) as g, unnest(vs2) as v),
+                b as (
+                    SELECT
+                    (1/ST_distance(point, a.g)^p2::integer) as k,
+                    (a.v/ST_distance(point, a.g)^p2::integer) as f
+                    FROM a
+                )
+        SELECT sum(b.f)/sum(b.k) INTO output FROM b;
+        RETURN output;
+
+    END IF;
+
+    RETURN -777.777;
+
+END;
+$$
+language plpgsql IMMUTABLE;
diff --git a/src/pg/test/expected/08_interpolation_test.out b/src/pg/test/expected/08_interpolation_test.out
new file mode 100644
index 0000000..42d24cb
--- /dev/null
+++ b/src/pg/test/expected/08_interpolation_test.out
@@ -0,0 +1,4 @@
+ cdb_spatialinterpolation
+--------------------------
+ 780.79470198683925288365
+(1 row)
diff --git a/src/pg/test/sql/08_interpolation_test.sql b/src/pg/test/sql/08_interpolation_test.sql
new file mode 100644
index 0000000..c8db89d
--- /dev/null
+++ b/src/pg/test/sql/08_interpolation_test.sql
@@ -0,0 +1,6 @@
+WITH a AS (
+    SELECT
+    ARRAY[800, 700, 600, 500, 400, 300, 200, 100] AS vals,
+    ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g
+)
+SELECT CDB_SpatialInterpolation(g, vals, ST_GeomFromText('POINT(2.154 41.37)'),1) FROM a;

From 5a2319db72c2a04aa3a7ef1a2cf6fa3263ecfd86 Mon Sep 17 00:00:00 2001
From: abelvm <abel@cartodb.com>
Date: Tue, 14 Jun 2016 18:01:03 +0200
Subject: [PATCH 17/38] remove garbage

---
 doc/07_gravity.md                        |  78 ---------------
 src/pg/sql/07_gravity.sql                | 115 -----------------------
 src/pg/test/expected/07_gravity_test.out |  11 ---
 src/pg/test/sql/07_gravity_test.sql      |  21 -----
 4 files changed, 225 deletions(-)
 delete mode 100644 doc/07_gravity.md
 delete mode 100644 src/pg/sql/07_gravity.sql
 delete mode 100644 src/pg/test/expected/07_gravity_test.out
 delete mode 100644 src/pg/test/sql/07_gravity_test.sql

diff --git a/doc/07_gravity.md b/doc/07_gravity.md
deleted file mode 100644
index e4e439e..0000000
--- a/doc/07_gravity.md
+++ /dev/null
@@ -1,78 +0,0 @@
-## Gravity Model
-
-Gravity Models are derived from Newton's Law of Gravity and are used to predict the interaction between a group of populated areas (sources) and a specific target among a group of potential targets, in terms of an attraction factor (weight)
-
-**CDB_Gravity** is based on the model defined in *Huff's Law of Shopper attraction (1963)*
-
-### CDB_Gravity(t_id bigint[], t_geom geometry[], t_weight numeric[], s_id bigint[], s_geom geometry[], s_pop numeric[], target bigint, radius integer, minval numeric DEFAULT -10e307)
-
-#### Arguments
-
-| Name | Type | Description |
-|------|------|-------------|
-| t_id     | bigint[]    | Array of targets ID |
-| t_geom   | geometry[]  | Array of targets' geometries |
-| t_weight | numeric[]   | Array of targets's weights |
-| s_id     | bigint[]    | Array of sources ID |
-| s_geom   | geometry[]  | Array of sources' geometries |
-| s_pop    | numeric[]   | Array of sources's population |
-| target   | bigint      | ID of the target under study |
-| radius   | integer     | Radius in meters around the target under study that will be taken into account|
-| minval (optional)   | numeric     | Lowest accepted value of weight, defaults to numeric min_value |
-
-### CDB_Gravity( target_query text, weight_column text, source_query text, pop_column text, target bigint, radius integer, minval numeric DEFAULT -10e307)
-
-#### Arguments
-
-| Name | Type | Description |
-|------|------|-------------|
-| target_query     | text    | Query that defines targets |
-| weight_column   | text  | Column name of weights |
-| source_query     | text    | Query that defines sources |
-| pop_column   | text  | Column name of population |
-| target   | bigint      | cartodb_id of the target under study |
-| radius   | integer     | Radius in meters around the target under study that will be taken into account|
-| minval (optional)   | numeric     | Lowest accepted value of weight, defaults to numeric min_value |
-
-
-### Returns
-
-| Column Name | Type | Description |
-|-------------|------|-------------|
-| the_geom  | geometry | Geometries of the sources within the radius |
-| source_id | bigint  | ID of the source |
-| target_id | bigint  | Target ID from input |
-| dist      | numeric | Distance in meters source to target (if not points, distance between centroids) |
-| h         | numeric | Probability of patronage |
-| hpop      | numeric | Patronaging population |
-
-
-#### Example Usage
-
-```sql
-with t as (
-SELECT
-    array_agg(cartodb_id::bigint) as id,
-    array_agg(the_geom) as g,
-    array_agg(coalesce(gla,0)::numeric) as w
-FROM
-    abel.centros_comerciales_de_madrid
-WHERE not no_cc
-),
-s as (
-SELECT
-    array_agg(cartodb_id::bigint) as id,
-    array_agg(center) as g,
-    array_agg(coalesce(t1_1, 0)::numeric) as p
-FROM
-    sscc_madrid
-)
-select
-    g.the_geom,
-    trunc(g.h,2) as h,
-    round(g.hpop) as hpop,
-    trunc(g.dist/1000,2) as dist_km
-FROM t, s, CDB_Gravity1(t.id, t.g, t.w, s.id, s.g, s.p, newmall_ID, 100000, 5000) g
-```
-
-
diff --git a/src/pg/sql/07_gravity.sql b/src/pg/sql/07_gravity.sql
deleted file mode 100644
index 47e5b8e..0000000
--- a/src/pg/sql/07_gravity.sql
+++ /dev/null
@@ -1,115 +0,0 @@
-CREATE OR REPLACE FUNCTION CDB_Gravity(
-    IN target_query text,
-    IN weight_column text,
-    IN source_query text,
-    IN pop_column text,
-    IN target bigint,
-    IN radius integer,
-    IN minval numeric DEFAULT -10e307
-    )
-RETURNS TABLE(
-    the_geom geometry,
-    source_id bigint,
-    target_id bigint,
-    dist numeric,
-    h numeric,
-    hpop numeric)  AS $$
-DECLARE
-    t_id bigint[];
-    t_geom geometry[];
-    t_weight numeric[];
-    s_id bigint[];
-    s_geom geometry[];
-    s_pop numeric[];
-BEGIN
-    EXECUTE 'WITH foo as('+target_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || weight_column || ') FROM foo' INTO t_id, t_geom, t_weight;
-    EXECUTE 'WITH foo as('+source_query+') SELECT array_agg(cartodb_id), array_agg(the_geom), array_agg(' || pop_column || ') FROM foo' INTO s_id, s_geom, s_pop;
-    RETURN QUERY
-    SELECT g.* FROM t, s, CDB_Gravity(t_id, t_geom, t_weight, s_id, s_geom, s_pop, target, radius, minval) g;
-END;
-$$ language plpgsql;
-
-CREATE OR REPLACE FUNCTION CDB_Gravity(
-    IN t_id bigint[],
-    IN t_geom geometry[],
-    IN t_weight numeric[],
-    IN s_id bigint[],
-    IN s_geom geometry[],
-    IN s_pop numeric[],
-    IN target bigint,
-    IN radius integer,
-    IN minval numeric DEFAULT -10e307
-    )
-RETURNS TABLE(
-    the_geom geometry,
-    source_id bigint,
-    target_id bigint,
-    dist numeric,
-    h numeric,
-    hpop numeric)  AS $$
-DECLARE
-    t_type text;
-    s_type text;
-    t_center geometry[];
-    s_center geometry[];
-BEGIN
-    t_type := GeometryType(t_geom[1]);
-    s_type := GeometryType(s_geom[1]);
-    IF t_type = 'POINT' THEN
-        t_center := t_geom;
-    ELSE
-        WITH tmp as (SELECT unnest(t_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO t_center FROM tmp;
-    END IF;
-    IF s_type = 'POINT' THEN
-        s_center := s_geom;
-    ELSE
-        WITH tmp as (SELECT unnest(s_geom) as g) SELECT array_agg(ST_Centroid(g)) INTO s_center FROM tmp;
-    END IF;
-    RETURN QUERY
-        with target0 as(
-            SELECT unnest(t_center) as tc, unnest(t_weight) as tw, unnest(t_id) as td
-        ),
-        source0 as(
-            SELECT unnest(s_center) as sc, unnest(s_id) as sd, unnest (s_geom) as sg, unnest(s_pop) as sp
-        ),
-        prev0 as(
-            SELECT
-                source0.sg,
-                source0.sd as sourc_id,
-                coalesce(source0.sp,0) as sp,
-                target.td as targ_id,
-                coalesce(target.tw,0) as tw,
-                GREATEST(1.0,ST_Distance(geography(target.tc), geography(source0.sc)))::numeric as distance
-            FROM source0
-            CROSS JOIN LATERAL
-                (
-                SELECT
-                    *
-                FROM target0
-                    WHERE tw > minval
-                    AND ST_DWithin(geography(source0.sc), geography(tc), radius)
-                ) AS target
-        ),
-        deno as(
-            SELECT
-                sourc_id,
-                sum(tw/distance) as h_deno
-            FROM
-                prev0
-            GROUP BY sourc_id
-        )
-        SELECT
-            p.sg as the_geom,
-            p.sourc_id as source_id,
-            p.targ_id as target_id,
-            case when p.distance > 1 then p.distance else 0.0 end as dist,
-            100*(p.tw/p.distance)/d.h_deno as h,
-            p.sp*(p.tw/p.distance)/d.h_deno as hpop
-        FROM
-            prev0 p,
-            deno d
-        WHERE
-            p.targ_id = target AND
-            p.sourc_id = d.sourc_id;
-END;
-$$ language plpgsql;
diff --git a/src/pg/test/expected/07_gravity_test.out b/src/pg/test/expected/07_gravity_test.out
deleted file mode 100644
index c101b24..0000000
--- a/src/pg/test/expected/07_gravity_test.out
+++ /dev/null
@@ -1,11 +0,0 @@
-                  the_geom                  |            h            |           hpop           |      dist
---------------------------------------------+-------------------------+--------------------------+----------------
- 01010000001361C3D32B650140DD24068195B34440 |  1.51078258369747945249 |  12.08626066957983561994 | 4964.714459152
- 01010000002497FF907EFB0040713D0AD7A3B04440 | 98.29730954183620807430 | 688.08116679285345652007 |   99.955141922
- 0101000000A167B3EA733501401D5A643BDFAF4440 | 63.70532894711274639196 | 382.23197368267647835174 | 2488.330566505
- 010100000062A1D634EF380140BE9F1A2FDDB44440 | 35.35415870080995954879 | 176.77079350404979774397 | 4359.370460594
- 010100000052B81E85EB510140355EBA490CB24440 | 33.12290506987740864904 | 132.49162027950963459615 | 3703.664449828
- 0101000000C286A757CA320140736891ED7CAF4440 | 65.45251754279248087849 | 196.35755262837744263547 | 2512.092358644
- 01010000007DD0B359F5390140C976BE9F1AAF4440 | 62.83927792471345639225 | 125.67855584942691278449 |  2926.25725244
- 0101000000D237691A140D01407E6FD39FFDB44440 | 53.54905726651871279586 |  53.54905726651871279586 | 3744.515577777
-(8 rows)
diff --git a/src/pg/test/sql/07_gravity_test.sql b/src/pg/test/sql/07_gravity_test.sql
deleted file mode 100644
index a86bb23..0000000
--- a/src/pg/test/sql/07_gravity_test.sql
+++ /dev/null
@@ -1,21 +0,0 @@
-WITH t AS (
-    SELECT
-    ARRAY[1,2,3] AS id,
-    ARRAY[7.0,8.0,3.0] AS w,
-    ARRAY[ST_GeomFromText('POINT(2.1744 41.4036)'),ST_GeomFromText('POINT(2.1228 41.3809)'),ST_GeomFromText('POINT(2.1511 41.3742)')] AS g
-),
-s AS (
-    SELECT
-    ARRAY[10,20,30,40,50,60,70,80] AS id,
-    ARRAY[800, 700, 600, 500, 400, 300, 200, 100] AS p,
-    ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g
-)
-SELECT
-    g.the_geom,
-    g.h,
-    g.hpop,
-    g.dist
-FROM
-    t,
-    s,
-    CDB_Gravity(t.id, t.g, t.w, s.id, s.g, s.p, 2, 100000, 3) g;

From 7b98415da318e5dd5119e7c10b5b0b2ca54f3c8d Mon Sep 17 00:00:00 2001
From: Rafa de la Torre <rtorre@cartodb.com>
Date: Tue, 14 Jun 2016 18:06:23 +0200
Subject: [PATCH 18/38] Remove virtualenv activation #60

---
 .github/PULL_REQUEST_TEMPLATE.md |  2 +-
 src/pg/sql/02_py.sql             | 23 -----------------------
 src/pg/sql/03_random_seeds.sql   |  1 -
 src/pg/sql/10_moran.sql          |  4 ----
 4 files changed, 1 insertion(+), 29 deletions(-)
 delete mode 100644 src/pg/sql/02_py.sql

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 882cece..9bb2e75 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,6 +1,6 @@
 
 - [ ] All declared geometries are `geometry(Geometry, 4326)` for general geoms, or `geometry(Point, 4326)`
-- [ ] Include python is activated for new functions. Include this before importing modules: `plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')`
+- [ ] Existing functions in crankshaft python library called from the extension are kept at least from version N to version N+1 (to avoid breakage during upgrades).
 - [ ] Docs for public-facing functions are written
 - [ ] New functions follow the naming conventions: `CDB_NameOfFunction`. Where internal functions begin with an underscore `_`.
 - [ ] If appropriate, new functions accepts an arbitrary query as an input (see [Crankshaft Issue #6](https://github.com/CartoDB/crankshaft/issues/6) for more information)
diff --git a/src/pg/sql/02_py.sql b/src/pg/sql/02_py.sql
deleted file mode 100644
index 7da5f47..0000000
--- a/src/pg/sql/02_py.sql
+++ /dev/null
@@ -1,23 +0,0 @@
-CREATE OR REPLACE FUNCTION _cdb_crankshaft_virtualenvs_path()
-RETURNS text
-AS $$
-  BEGIN
-    -- RETURN '/opt/virtualenvs/crankshaft';
-    RETURN '@@VIRTUALENV_PATH@@';
-  END;
-$$ language plpgsql IMMUTABLE STRICT;
-
--- Use the crankshaft python module
-CREATE OR REPLACE FUNCTION _cdb_crankshaft_activate_py()
-RETURNS VOID
-AS $$
-    import os
-    # plpy.notice('%',str(os.environ))
-    # activate virtualenv
-    crankshaft_version = plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_internal_version()')[0]['_cdb_crankshaft_internal_version']
-    base_path = plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_virtualenvs_path()')[0]['_cdb_crankshaft_virtualenvs_path']
-    default_venv_path = os.path.join(base_path, crankshaft_version)
-    venv_path =  os.environ.get('CRANKSHAFT_VENV', default_venv_path)
-    activate_path = venv_path + '/bin/activate_this.py'
-    exec(open(activate_path).read(), dict(__file__=activate_path))
-$$ LANGUAGE plpythonu;
diff --git a/src/pg/sql/03_random_seeds.sql b/src/pg/sql/03_random_seeds.sql
index 9a0cca6..2b62be3 100644
--- a/src/pg/sql/03_random_seeds.sql
+++ b/src/pg/sql/03_random_seeds.sql
@@ -4,7 +4,6 @@
 CREATE OR REPLACE FUNCTION
 _cdb_random_seeds (seed_value INTEGER) RETURNS VOID
 AS $$
-  plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
   from crankshaft import random_seeds
   random_seeds.set_random_seeds(seed_value)
 $$ LANGUAGE plpythonu;
diff --git a/src/pg/sql/10_moran.sql b/src/pg/sql/10_moran.sql
index a336867..3be31a2 100644
--- a/src/pg/sql/10_moran.sql
+++ b/src/pg/sql/10_moran.sql
@@ -10,7 +10,6 @@ CREATE OR REPLACE FUNCTION
       id_col TEXT DEFAULT 'cartodb_id')
 RETURNS TABLE (moran NUMERIC, significance NUMERIC)
 AS $$
-  plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
   from crankshaft.clustering import moran_local
   # TODO: use named parameters or a dictionary
   return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
@@ -28,7 +27,6 @@ CREATE OR REPLACE FUNCTION
       id_col TEXT)
 RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
 AS $$
-  plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
   from crankshaft.clustering import moran_local
   # TODO: use named parameters or a dictionary
   return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
@@ -122,7 +120,6 @@ CREATE OR REPLACE FUNCTION
       id_col TEXT DEFAULT 'cartodb_id')
 RETURNS TABLE (moran FLOAT, significance FLOAT)
 AS $$
-  plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
   from crankshaft.clustering import moran_local
   # TODO: use named parameters or a dictionary
   return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
@@ -143,7 +140,6 @@ CREATE OR REPLACE FUNCTION
 RETURNS
 TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
 AS $$
-  plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
   from crankshaft.clustering import moran_local_rate
   # TODO: use named parameters or a dictionary
   return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)

From 0acae8240f777e042f59dfcf3f0a3e1430dcb984 Mon Sep 17 00:00:00 2001
From: Rafa de la Torre <rtorre@cartodb.com>
Date: Tue, 14 Jun 2016 18:23:30 +0200
Subject: [PATCH 19/38] Remove virtualenv stuff from Makefiles #60

---
 Makefile        |  6 ------
 src/pg/Makefile |  7 +------
 src/py/Makefile | 11 +++--------
 3 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/Makefile b/Makefile
index 6c3e219..ef9415b 100644
--- a/Makefile
+++ b/Makefile
@@ -11,7 +11,6 @@ PYP_DIR = src/py
 # Generate and install developmet versions of the extension
 # and python package.
 # The extension is named 'dev' with a 'current' alias for easily upgrading.
-# The Python package is installed in a virtual environment envs/dev/
 # Requires sudo.
 install: ## Generate and install development version of the extension; requires sudo.
 	$(MAKE) -C $(PYP_DIR) install
@@ -29,7 +28,6 @@ release: ## Generate a new release of the extension. Only for telease manager
 	$(MAKE) -C $(PYP_DIR) release
 
 # Install the current release.
-# The Python package is installed in a virtual environment envs/X.Y.Z/
 # Requires sudo.
 # Use the RELEASE_VERSION environment variable to deploy a specific version:
 #     sudo make deploy RELEASE_VERSION=1.0.0
@@ -52,10 +50,6 @@ clean-release: ## clean up current release
 	rm -rf release/python/$(RELEASE_VERSION)
 	rm -f release/$(RELEASE_VERSION)--*.sql
 
-# Cleanup all virtual environments
-clean-environments: ## clean up all virtual environments
-	rm -rf envs/*
-
 clean-all: clean-dev clean-release clean-environments
 
 help:
diff --git a/src/pg/Makefile b/src/pg/Makefile
index 8a745c4..178ed08 100644
--- a/src/pg/Makefile
+++ b/src/pg/Makefile
@@ -7,7 +7,6 @@ include ../../Makefile.global
 #   requires sudo. In additionof the current development version
 #   named 'dev', an alias 'current' is generating for ease of
 #   update (upgrade to 'current', then to 'dev').
-#   the python module is installed in a virtualenv in envs/dev/
 # * test runs the tests for the currently generated Development
 #   extension.
 
@@ -18,11 +17,8 @@ DATA         = $(EXTENSION)--dev.sql \
 SOURCES_DATA_DIR = sql
 SOURCES_DATA = $(wildcard $(SOURCES_DATA_DIR)/*.sql)
 
-VIRTUALENV_PATH = $(realpath ../../envs)
-ESC_VIRVIRTUALENV_PATH = $(subst /,\/,$(VIRTUALENV_PATH))
 
-REPLACEMENTS = -e 's/@@VERSION@@/$(EXTVERSION)/g' \
-               -e 's/@@VIRTUALENV_PATH@@/$(ESC_VIRVIRTUALENV_PATH)/g'
+REPLACEMENTS = -e 's/@@VERSION@@/$(EXTVERSION)/g'
 
 $(DATA): $(SOURCES_DATA)
 	$(SED) $(REPLACEMENTS) $(SOURCES_DATA_DIR)/*.sql > $@
@@ -54,7 +50,6 @@ release: ../../release/$(EXTENSION).control $(SOURCES_DATA)
 	$(SED) $(REPLACEMENTS) $(SOURCES_DATA_DIR)/*.sql > ../../release/$(EXTENSION)--$(EXTVERSION).sql
 
 # Install the current relese into the PostgreSQL extensions directory
-# and the Python package in a virtual environment envs/X.Y.Z
 deploy:
 	$(INSTALL_DATA) ../../release/$(EXTENSION).control '$(DESTDIR)$(datadir)/extension/'
 	$(INSTALL_DATA) ../../release/*.sql '$(DESTDIR)$(datadir)/extension/'
diff --git a/src/py/Makefile b/src/py/Makefile
index 90b22b8..403c5a1 100644
--- a/src/py/Makefile
+++ b/src/py/Makefile
@@ -2,14 +2,11 @@ include ../../Makefile.global
 
 # Install the package locally for development
 install:
-	virtualenv --system-site-packages ../../envs/dev
-	# source ../../envs/dev/bin/activate
-	../../envs/dev/bin/pip install -I ./crankshaft
-	../../envs/dev/bin/pip install -I nose
+	pip install ./crankshaft
 
 # Test develpment install
 test:
-	../../envs/dev/bin/nosetests crankshaft/test/
+	nosetests crankshaft/test/
 
 release: ../../release/$(EXTENSION).control $(SOURCES_DATA)
 	mkdir -p ../../release/python/$(EXTVERSION)
@@ -17,6 +14,4 @@ release: ../../release/$(EXTENSION).control $(SOURCES_DATA)
 	$(SED) -i -r 's/version='"'"'[0-9]+\.[0-9]+\.[0-9]+'"'"'/version='"'"'$(EXTVERSION)'"'"'/g'  ../../release/python/$(EXTVERSION)/$(PACKAGE)/setup.py
 
 deploy:
-	virtualenv --system-site-packages $(VIRTUALENV_PATH)/$(RELEASE_VERSION)
-	$(VIRTUALENV_PATH)/$(RELEASE_VERSION)/bin/pip install -I -U ../../release/python/$(RELEASE_VERSION)/$(PACKAGE)
-	$(VIRTUALENV_PATH)/$(RELEASE_VERSION)/bin/pip install -I nose
+	pip install --upgrade ../../release/python/$(RELEASE_VERSION)/$(PACKAGE)

From 75531b671e247b507d0a11d6f2fdced5ef3a8084 Mon Sep 17 00:00:00 2001
From: Rafa de la Torre <rtorre@cartodb.com>
Date: Tue, 14 Jun 2016 18:24:43 +0200
Subject: [PATCH 20/38] Remove virtualenv references from READMEs #60

---
 README.md        |  3 +--
 src/py/README.md | 17 +----------------
 2 files changed, 2 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index 68a64fb..0ff9090 100644
--- a/README.md
+++ b/README.md
@@ -9,11 +9,10 @@ CartoDB Spatial Analysis extension for PostgreSQL.
 * - *src/pg* contains the PostgreSQL extension source code
 * - *src/py* Python module source code
 * *release* reseleased versions
-* *env* base directory for Python virtual environments
 
 ## Requirements
 
-* pip, virtualenv, PostgreSQL
+* pip, PostgreSQL
 * python-scipy system package (see [src/py/README.md](https://github.com/CartoDB/crankshaft/blob/master/src/py/README.md))
 
 # Working Process -- Quickstart Guide
diff --git a/src/py/README.md b/src/py/README.md
index 29a3145..8fcfcb7 100644
--- a/src/py/README.md
+++ b/src/py/README.md
@@ -10,7 +10,6 @@ nosetests test/
 
 ## Notes about Python dependencies
 * This extension is targeted at production databases. Therefore certain restrictions must be assumed about the production environment vs other experimental environments.
-* We're using `pip` and `virtualenv` to generate a suitable isolated environment for python code that has  all the dependencies
 * Every dependency should be:
   - Added to the `setup.py` file
   - Installed through it
@@ -30,21 +29,7 @@ PySAL 1.10 or later, so we'll stick to 1.9.1.
 apt-get install -y python-scipy
 ```
 
-We'll use virtual environments to install our packages,
-but configued to use also system modules so that the
-mentioned scipy and numpy are used.
-
-    # Create a virtual environment for python
-    $ virtualenv --system-site-packages dev
-
-    # Activate the virtualenv
-    $ source dev/bin/activate
-
-    # Install all the requirements
-    # expect this to take a while, as it will trigger a few compilations
-    (dev) $ pip install -I ./crankshaft
-
-#### Test the libraries with that virtual env
+#### Test the libraries
 
 ##### Test numpy library dependency:
 

From a8943bae985acc4d960d7cb614c5e6ad4bb68ed1 Mon Sep 17 00:00:00 2001
From: Rafa de la Torre <rtorre@cartodb.com>
Date: Tue, 14 Jun 2016 18:27:35 +0200
Subject: [PATCH 21/38] Remove reference to clean-environments #60

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ef9415b..50f690c 100644
--- a/Makefile
+++ b/Makefile
@@ -50,7 +50,7 @@ clean-release: ## clean up current release
 	rm -rf release/python/$(RELEASE_VERSION)
 	rm -f release/$(RELEASE_VERSION)--*.sql
 
-clean-all: clean-dev clean-release clean-environments
+clean-all: clean-dev clean-release
 
 help:
 	@IFS=$$'\n' ; \

From d08a2b6d2d756be58a16e80bf4ded3d134dfb97a Mon Sep 17 00:00:00 2001
From: Raul Ochoa <rochoaf@gmail.com>
Date: Thu, 16 Jun 2016 14:12:28 +0200
Subject: [PATCH 22/38] Remove _cdb_crankshaft_activate_py activation call from
 kmeans function

---
 src/pg/sql/11_kmeans.sql | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql
index a27f803..125aac3 100644
--- a/src/pg/sql/11_kmeans.sql
+++ b/src/pg/sql/11_kmeans.sql
@@ -1,8 +1,6 @@
 CREATE OR REPLACE FUNCTION  CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
 RETURNS table (cartodb_id integer, cluster_no integer) as $$
     
-    import plpy 
-    plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
     from crankshaft.clustering import kmeans
     return kmeans(query,no_clusters,no_init)
 

From 8b5e9102345fc2a7218961ef26c033715a441d6b Mon Sep 17 00:00:00 2001
From: Raul Ochoa <rochoaf@gmail.com>
Date: Thu, 16 Jun 2016 14:16:32 +0200
Subject: [PATCH 23/38] Release 0.0.3

---
 NEWS.md                                       |   5 +
 release/crankshaft--0.0.2--0.0.3.sql          | 413 ++++++++++++++++++
 release/crankshaft--0.0.3--0.0.2.sql          | 209 +++++++++
 release/crankshaft--0.0.3.sql                 | 403 +++++++++++++++++
 release/crankshaft.control                    |   2 +-
 .../0.0.3/crankshaft/crankshaft/__init__.py   |   2 +
 .../crankshaft/clustering/__init__.py         |   2 +
 .../crankshaft/clustering/kmeans.py           |  18 +
 .../crankshaft/crankshaft/clustering/moran.py | 260 +++++++++++
 .../crankshaft/pysal_utils/__init__.py        |   1 +
 .../crankshaft/pysal_utils/pysal_utils.py     | 152 +++++++
 .../crankshaft/crankshaft/random_seeds.py     |  10 +
 release/python/0.0.3/crankshaft/setup.py      |  48 ++
 .../crankshaft/test/fixtures/kmeans.json      |   1 +
 .../0.0.3/crankshaft/test/fixtures/moran.json |  52 +++
 .../crankshaft/test/fixtures/neighbors.json   |  54 +++
 .../python/0.0.3/crankshaft/test/helper.py    |  13 +
 .../python/0.0.3/crankshaft/test/mock_plpy.py |  34 ++
 .../crankshaft/test/test_cluster_kmeans.py    |  38 ++
 .../crankshaft/test/test_clustering_moran.py  |  83 ++++
 .../0.0.3/crankshaft/test/test_pysal_utils.py | 107 +++++
 src/pg/crankshaft.control                     |   2 +-
 22 files changed, 1907 insertions(+), 2 deletions(-)
 create mode 100644 release/crankshaft--0.0.2--0.0.3.sql
 create mode 100644 release/crankshaft--0.0.3--0.0.2.sql
 create mode 100644 release/crankshaft--0.0.3.sql
 create mode 100644 release/python/0.0.3/crankshaft/crankshaft/__init__.py
 create mode 100644 release/python/0.0.3/crankshaft/crankshaft/clustering/__init__.py
 create mode 100644 release/python/0.0.3/crankshaft/crankshaft/clustering/kmeans.py
 create mode 100644 release/python/0.0.3/crankshaft/crankshaft/clustering/moran.py
 create mode 100644 release/python/0.0.3/crankshaft/crankshaft/pysal_utils/__init__.py
 create mode 100644 release/python/0.0.3/crankshaft/crankshaft/pysal_utils/pysal_utils.py
 create mode 100644 release/python/0.0.3/crankshaft/crankshaft/random_seeds.py
 create mode 100644 release/python/0.0.3/crankshaft/setup.py
 create mode 100644 release/python/0.0.3/crankshaft/test/fixtures/kmeans.json
 create mode 100644 release/python/0.0.3/crankshaft/test/fixtures/moran.json
 create mode 100644 release/python/0.0.3/crankshaft/test/fixtures/neighbors.json
 create mode 100644 release/python/0.0.3/crankshaft/test/helper.py
 create mode 100644 release/python/0.0.3/crankshaft/test/mock_plpy.py
 create mode 100644 release/python/0.0.3/crankshaft/test/test_cluster_kmeans.py
 create mode 100644 release/python/0.0.3/crankshaft/test/test_clustering_moran.py
 create mode 100644 release/python/0.0.3/crankshaft/test/test_pysal_utils.py

diff --git a/NEWS.md b/NEWS.md
index 0b8c2da..ed66fd9 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,8 @@
+0.0.3 (2016-06-16)
+------------------
+* Adds new functions: kmeans, weighted centroids.
+* Replaces moran functions with new areas of interest naming.
+
 0.0.2 (2016-03-16)
 ------------------
 * New versioning approach using per-version Python virtual environments
diff --git a/release/crankshaft--0.0.2--0.0.3.sql b/release/crankshaft--0.0.2--0.0.3.sql
new file mode 100644
index 0000000..8a865d5
--- /dev/null
+++ b/release/crankshaft--0.0.2--0.0.3.sql
@@ -0,0 +1,413 @@
+--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
+-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
+
+-- [MANUALLY] DROP FUNCTIONS REMOVED SINCE 0.0.2 version
+
+DROP FUNCTION IF EXISTS cdb_moran_local(TEXT, TEXT, float, INT, INT, TEXT, TEXT, TEXT);
+DROP FUNCTION IF EXISTS cdb_moran_local_rate(TEXT, TEXT, TEXT, FLOAT, INT, INT, TEXT, TEXT, TEXT);
+DROP FUNCTION IF EXISTS _cdb_crankshaft_virtualenvs_path();
+DROP FUNCTION IF EXISTS _cdb_crankshaft_activate_py();
+
+-- [END MANUALLY] DROP FUNCTIONS REMOVED SINCE 0.0.2 version
+
+-- Version number of the extension release
+CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
+    RETURNS text AS $$
+  SELECT '0.0.3'::text;
+$$ language 'sql' STABLE STRICT;
+
+-- Internal identifier of the installed extension instence
+-- e.g. 'dev' for current development version
+CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
+    RETURNS text AS $$
+  SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
+$$ language 'sql' STABLE STRICT;
+-- Internal function.
+-- Set the seeds of the RNGs (Random Number Generators)
+-- used internally.
+CREATE OR REPLACE FUNCTION
+    _cdb_random_seeds (seed_value INTEGER) RETURNS VOID
+AS $$
+  from crankshaft import random_seeds
+  random_seeds.set_random_seeds(seed_value)
+$$ LANGUAGE plpythonu;
+-- Moran's I Global Measure (public-facing)
+CREATE OR REPLACE FUNCTION
+    CDB_AreasOfInterestGlobal(
+    subquery TEXT,
+    column_name TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+    RETURNS TABLE (moran NUMERIC, significance NUMERIC)
+AS $$
+  from crankshaft.clustering import moran_local
+  # TODO: use named parameters or a dictionary
+  return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
+$$ LANGUAGE plpythonu;
+
+-- Moran's I Local (internal function)
+CREATE OR REPLACE FUNCTION
+    _CDB_AreasOfInterestLocal(
+    subquery TEXT,
+    column_name TEXT,
+    w_type TEXT,
+    num_ngbrs INT,
+    permutations INT,
+    geom_col TEXT,
+    id_col TEXT)
+    RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+  from crankshaft.clustering import moran_local
+  # TODO: use named parameters or a dictionary
+  return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
+$$ LANGUAGE plpythonu;
+
+-- Moran's I Local (public-facing function)
+CREATE OR REPLACE FUNCTION
+    CDB_AreasOfInterestLocal(
+    subquery TEXT,
+    column_name TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+    RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col);
+
+$$ LANGUAGE SQL;
+
+-- Moran's I only for HH and HL (public-facing function)
+CREATE OR REPLACE FUNCTION
+    CDB_GetSpatialHotspots(
+    subquery TEXT,
+    column_name TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+    RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('HH', 'HL');
+
+$$ LANGUAGE SQL;
+
+-- Moran's I only for LL and LH (public-facing function)
+CREATE OR REPLACE FUNCTION
+    CDB_GetSpatialColdspots(
+    subquery TEXT,
+    attr TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+    RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('LL', 'LH');
+
+$$ LANGUAGE SQL;
+
+-- Moran's I only for LH and HL (public-facing function)
+CREATE OR REPLACE FUNCTION
+    CDB_GetSpatialOutliers(
+    subquery TEXT,
+    attr TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+    RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('HL', 'LH');
+
+$$ LANGUAGE SQL;
+
+-- Moran's I Global Rate (public-facing function)
+CREATE OR REPLACE FUNCTION
+    CDB_AreasOfInterestGlobalRate(
+    subquery TEXT,
+    numerator TEXT,
+    denominator TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+    RETURNS TABLE (moran FLOAT, significance FLOAT)
+AS $$
+  from crankshaft.clustering import moran_local
+  # TODO: use named parameters or a dictionary
+  return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
+$$ LANGUAGE plpythonu;
+
+
+-- Moran's I Local Rate (internal function)
+CREATE OR REPLACE FUNCTION
+    _CDB_AreasOfInterestLocalRate(
+    subquery TEXT,
+    numerator TEXT,
+    denominator TEXT,
+    w_type TEXT,
+    num_ngbrs INT,
+    permutations INT,
+    geom_col TEXT,
+    id_col TEXT)
+    RETURNS
+        TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+  from crankshaft.clustering import moran_local_rate
+  # TODO: use named parameters or a dictionary
+  return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
+$$ LANGUAGE plpythonu;
+
+-- Moran's I Local Rate (public-facing function)
+CREATE OR REPLACE FUNCTION
+    CDB_AreasOfInterestLocalRate(
+    subquery TEXT,
+    numerator TEXT,
+    denominator TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+    RETURNS
+        TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col);
+
+$$ LANGUAGE SQL;
+
+-- Moran's I Local Rate only for HH and HL (public-facing function)
+CREATE OR REPLACE FUNCTION
+    CDB_GetSpatialHotspotsRate(
+    subquery TEXT,
+    numerator TEXT,
+    denominator TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+    RETURNS
+        TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('HH', 'HL');
+
+$$ LANGUAGE SQL;
+
+-- Moran's I Local Rate only for LL and LH (public-facing function)
+CREATE OR REPLACE FUNCTION
+    CDB_GetSpatialColdspotsRate(
+    subquery TEXT,
+    numerator TEXT,
+    denominator TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+    RETURNS
+        TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('LL', 'LH');
+
+$$ LANGUAGE SQL;
+
+-- Moran's I Local Rate only for LH and HL (public-facing function)
+CREATE OR REPLACE FUNCTION
+    CDB_GetSpatialOutliersRate(
+    subquery TEXT,
+    numerator TEXT,
+    denominator TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+    RETURNS
+        TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('HL', 'LH');
+
+$$ LANGUAGE SQL;
+CREATE OR REPLACE FUNCTION  CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
+    RETURNS table (cartodb_id integer, cluster_no integer) as $$
+    
+    from crankshaft.clustering import kmeans
+    return kmeans(query,no_clusters,no_init)
+
+$$ language plpythonu;
+
+
+CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
+    RETURNS Numeric[] AS
+    $$
+DECLARE 
+    newX NUMERIC;
+    newY NUMERIC;
+    newW NUMERIC;
+BEGIN
+    IF weight IS NULL OR the_geom IS NULL THEN 
+        newX = state[1];
+        newY = state[2];
+        newW = state[3];
+    ELSE
+        newX = state[1] + ST_X(the_geom)*weight;
+        newY = state[2] + ST_Y(the_geom)*weight;
+        newW = state[3] + weight;
+    END IF;
+    RETURN Array[newX,newY,newW];
+
+END
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
+    RETURNS GEOMETRY AS
+    $$
+BEGIN
+    IF state[3] = 0 THEN 
+        RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
+    ELSE 
+        RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
+    END IF;
+END
+$$ LANGUAGE plpgsql;
+
+CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)(
+SFUNC = CDB_WeightedMeanS,
+FINALFUNC = CDB_WeightedMeanF,
+STYPE = Numeric[],
+INITCOND = "{0.0,0.0,0.0}"
+);
+-- Function by Stuart Lynn for a simple interpolation of a value
+-- from a polygon table over an arbitrary polygon
+-- (weighted by the area proportion overlapped)
+-- Aereal weighting is a very simple form of aereal interpolation.
+--
+-- Parameters:
+--   * geom a Polygon geometry which defines the area where a value will be
+--     estimated as the area-weighted sum of a given table/column
+--   * target_table_name table name of the table that provides the values
+--   * target_column column name of the column that provides the values
+--   * schema_name optional parameter to defina the schema the target table
+--     belongs to, which is necessary if its not in the search_path.
+--     Note that target_table_name should never include the schema in it.
+-- Return value:
+--   Aereal-weighted interpolation of the column values over the geometry
+CREATE OR REPLACE
+FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
+    RETURNS numeric AS
+    $$
+    DECLARE
+        result numeric;
+        qualified_name text;
+    BEGIN
+        IF schema_name IS NULL THEN
+            qualified_name := Format('%I', target_table_name);
+        ELSE
+            qualified_name := Format('%I.%s', schema_name, target_table_name);
+        END IF;
+        EXECUTE Format('
+    SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
+    FROM %s AS a
+    WHERE $1 && a.the_geom
+  ', target_column, qualified_name)
+        USING geom
+        INTO result;
+        RETURN result;
+    END;
+    $$ LANGUAGE plpgsql;
+--
+-- Creates N points randomly distributed arround the polygon
+--
+-- @param g - the geometry to be turned in to points
+--
+-- @param no_points - the number of points to generate
+--
+-- @params max_iter_per_point - the function generates points in the polygon's bounding box
+-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
+-- misses per point the funciton accepts before giving up.
+--
+-- Returns: Multipoint with the requested points
+CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
+    RETURNS GEOMETRY AS $$
+DECLARE
+    extent GEOMETRY;
+    test_point Geometry;
+    width                NUMERIC;
+    height               NUMERIC;
+    x0                   NUMERIC;
+    y0                   NUMERIC;
+    xp                   NUMERIC;
+    yp                   NUMERIC;
+    no_left              INTEGER;
+    remaining_iterations INTEGER;
+    points               GEOMETRY[];
+    bbox_line            GEOMETRY;
+    intersection_line    GEOMETRY;
+BEGIN
+    extent  := ST_Envelope(geom);
+    width   := ST_XMax(extent) - ST_XMIN(extent);
+    height  := ST_YMax(extent) - ST_YMIN(extent);
+    x0 	  := ST_XMin(extent);
+    y0 	  := ST_YMin(extent);
+    no_left := no_points;
+
+    LOOP
+        if(no_left=0) THEN
+            EXIT;
+        END IF;
+        yp = y0 + height*random();
+        bbox_line  = ST_MakeLine(
+            ST_SetSRID(ST_MakePoint(yp, x0),4326),
+            ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
+        );
+        intersection_line = ST_Intersection(bbox_line,geom);
+        test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
+        points := points || test_point;
+        no_left = no_left - 1 ;
+    END LOOP;
+    RETURN ST_Collect(points);
+END;
+$$
+LANGUAGE plpgsql VOLATILE;
+-- Make sure by default there are no permissions for publicuser
+-- NOTE: this happens at extension creation time, as part of an implicit transaction.
+-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
+
+-- Grant permissions on the schema to publicuser (but just the schema)
+GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
+
+-- Revoke execute permissions on all functions in the schema by default
+-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
diff --git a/release/crankshaft--0.0.3--0.0.2.sql b/release/crankshaft--0.0.3--0.0.2.sql
new file mode 100644
index 0000000..a2ccd2f
--- /dev/null
+++ b/release/crankshaft--0.0.3--0.0.2.sql
@@ -0,0 +1,209 @@
+--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
+-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
+
+-- [MANUALLY] DROP FUNCTIONS INTRODUCED IN 0.0.3 version
+
+DROP FUNCTION IF EXISTS CDB_AreasOfInterestGlobal(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
+DROP FUNCTION IF EXISTS _CDB_AreasOfInterestLocal(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
+DROP FUNCTION IF EXISTS CDB_AreasOfInterestLocal(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
+DROP FUNCTION IF EXISTS CDB_GetSpatialHotspots(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
+DROP FUNCTION IF EXISTS CDB_GetSpatialColdspots(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
+DROP FUNCTION IF EXISTS CDB_GetSpatialOutliers(TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
+DROP FUNCTION IF EXISTS CDB_AreasOfInterestGlobalRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
+DROP FUNCTION IF EXISTS CDB_AreasOfInterestLocalRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
+DROP FUNCTION IF EXISTS _CDB_AreasOfInterestLocalRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
+DROP FUNCTION IF EXISTS CDB_GetSpatialHotspotsRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
+DROP FUNCTION IF EXISTS CDB_GetSpatialColdspotsRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
+DROP FUNCTION IF EXISTS CDB_GetSpatialOutliersRate(TEXT,TEXT,TEXT,TEXT,INT,INT,TEXT,TEXT);
+DROP FUNCTION IF EXISTS CDB_KMeans(text,integer,integer);
+DROP AGGREGATE IF EXISTS CDB_WeightedMean(geometry(Point, 4326), NUMERIC);
+DROP FUNCTION IF EXISTS CDB_WeightedMeanS(Numeric[], GEOMETRY(Point, 4326), NUMERIC);
+DROP FUNCTION IF EXISTS CDB_WeightedMeanF(Numeric[]);
+
+
+-- [END MANUALLY] DROP FUNCTIONS INTRODUCED IN 0.0.3 version
+
+-- Version number of the extension release
+CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
+RETURNS text AS $$
+  SELECT '0.0.2'::text;
+$$ language 'sql' STABLE STRICT;
+
+-- Internal identifier of the installed extension instence
+-- e.g. 'dev' for current development version
+CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
+RETURNS text AS $$
+  SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
+$$ language 'sql' STABLE STRICT;
+CREATE OR REPLACE FUNCTION _cdb_crankshaft_virtualenvs_path()
+RETURNS text
+AS $$
+  BEGIN
+    -- RETURN '/opt/virtualenvs/crankshaft';
+    RETURN '/home/ubuntu/crankshaft/envs';
+  END;
+$$ language plpgsql IMMUTABLE STRICT;
+
+-- Use the crankshaft python module
+CREATE OR REPLACE FUNCTION _cdb_crankshaft_activate_py()
+RETURNS VOID
+AS $$
+    import os
+    # plpy.notice('%',str(os.environ))
+    # activate virtualenv
+    crankshaft_version = plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_internal_version()')[0]['_cdb_crankshaft_internal_version']
+    base_path = plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_virtualenvs_path()')[0]['_cdb_crankshaft_virtualenvs_path']
+    default_venv_path = os.path.join(base_path, crankshaft_version)
+    venv_path =  os.environ.get('CRANKSHAFT_VENV', default_venv_path)
+    activate_path = venv_path + '/bin/activate_this.py'
+    exec(open(activate_path).read(), dict(__file__=activate_path))
+$$ LANGUAGE plpythonu;
+-- Internal function.
+-- Set the seeds of the RNGs (Random Number Generators)
+-- used internally.
+CREATE OR REPLACE FUNCTION
+_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
+AS $$
+  plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
+  from crankshaft import random_seeds
+  random_seeds.set_random_seeds(seed_value)
+$$ LANGUAGE plpythonu;
+-- Moran's I
+CREATE OR REPLACE FUNCTION
+  cdb_moran_local (
+      t TEXT,
+  	  attr TEXT,
+  	  significance float DEFAULT 0.05,
+  	  num_ngbrs INT DEFAULT 5,
+  	  permutations INT DEFAULT 99,
+  	  geom_column TEXT DEFAULT 'the_geom',
+  	  id_col TEXT DEFAULT 'cartodb_id',
+      w_type TEXT DEFAULT 'knn')
+RETURNS TABLE (moran FLOAT, quads TEXT, significance FLOAT, ids INT)
+AS $$
+  plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
+  from crankshaft.clustering import moran_local
+  # TODO: use named parameters or a dictionary
+  return moran_local(t, attr, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
+$$ LANGUAGE plpythonu;
+
+-- Moran's I Local Rate
+CREATE OR REPLACE FUNCTION
+  cdb_moran_local_rate(t TEXT,
+		 numerator TEXT,
+		 denominator TEXT,
+		 significance FLOAT DEFAULT 0.05,
+		 num_ngbrs INT DEFAULT 5,
+		 permutations INT DEFAULT 99,
+		 geom_column TEXT DEFAULT 'the_geom',
+		 id_col TEXT DEFAULT 'cartodb_id',
+		 w_type TEXT DEFAULT 'knn')
+RETURNS TABLE(moran FLOAT, quads TEXT, significance FLOAT, ids INT, y numeric)
+AS $$
+  plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
+  from crankshaft.clustering import moran_local_rate
+  # TODO: use named parameters or a dictionary
+  return moran_local_rate(t, numerator, denominator, significance, num_ngbrs, permutations, geom_column, id_col, w_type)
+$$ LANGUAGE plpythonu;
+-- Function by Stuart Lynn for a simple interpolation of a value
+-- from a polygon table over an arbitrary polygon
+-- (weighted by the area proportion overlapped)
+-- Aereal weighting is a very simple form of aereal interpolation.
+--
+-- Parameters:
+--   * geom a Polygon geometry which defines the area where a value will be
+--     estimated as the area-weighted sum of a given table/column
+--   * target_table_name table name of the table that provides the values
+--   * target_column column name of the column that provides the values
+--   * schema_name optional parameter to defina the schema the target table
+--     belongs to, which is necessary if its not in the search_path.
+--     Note that target_table_name should never include the schema in it.
+-- Return value:
+--   Aereal-weighted interpolation of the column values over the geometry
+CREATE OR REPLACE
+FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
+  RETURNS numeric AS
+$$
+DECLARE
+	result numeric;
+  qualified_name text;
+BEGIN
+  IF schema_name IS NULL THEN
+    qualified_name := Format('%I', target_table_name);
+  ELSE
+    qualified_name := Format('%I.%s', schema_name, target_table_name);
+  END IF;
+  EXECUTE Format('
+    SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
+    FROM %s AS a
+    WHERE $1 && a.the_geom
+  ', target_column, qualified_name)
+  USING geom
+  INTO result;
+  RETURN result;
+END;
+$$ LANGUAGE plpgsql;
+--
+-- Creates N points randomly distributed arround the polygon
+--
+-- @param g - the geometry to be turned in to points
+--
+-- @param no_points - the number of points to generate
+--
+-- @params max_iter_per_point - the function generates points in the polygon's bounding box
+-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
+-- misses per point the funciton accepts before giving up.
+--
+-- Returns: Multipoint with the requested points
+CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
+RETURNS GEOMETRY AS $$
+DECLARE
+  extent GEOMETRY;
+  test_point Geometry;
+  width                NUMERIC;
+  height               NUMERIC;
+  x0                   NUMERIC;
+  y0                   NUMERIC;
+  xp                   NUMERIC;
+  yp                   NUMERIC;
+  no_left              INTEGER;
+  remaining_iterations INTEGER;
+  points               GEOMETRY[];
+  bbox_line            GEOMETRY;
+  intersection_line    GEOMETRY;
+BEGIN
+  extent  := ST_Envelope(geom);
+  width   := ST_XMax(extent) - ST_XMIN(extent);
+  height  := ST_YMax(extent) - ST_YMIN(extent);
+  x0 	  := ST_XMin(extent);
+  y0 	  := ST_YMin(extent);
+  no_left := no_points;
+
+  LOOP
+    if(no_left=0) THEN
+      EXIT;
+    END IF;
+    yp = y0 + height*random();
+    bbox_line  = ST_MakeLine(
+      ST_SetSRID(ST_MakePoint(yp, x0),4326),
+      ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
+    );
+    intersection_line = ST_Intersection(bbox_line,geom);
+  	test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
+	  points := points || test_point;
+	  no_left = no_left - 1 ;
+  END LOOP;
+  RETURN ST_Collect(points);
+END;
+$$
+LANGUAGE plpgsql VOLATILE;
+-- Make sure by default there are no permissions for publicuser
+-- NOTE: this happens at extension creation time, as part of an implicit transaction.
+-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
+
+-- Grant permissions on the schema to publicuser (but just the schema)
+GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
+
+-- Revoke execute permissions on all functions in the schema by default
+-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
diff --git a/release/crankshaft--0.0.3.sql b/release/crankshaft--0.0.3.sql
new file mode 100644
index 0000000..caacd75
--- /dev/null
+++ b/release/crankshaft--0.0.3.sql
@@ -0,0 +1,403 @@
+--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
+-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
+-- Version number of the extension release
+CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
+RETURNS text AS $$
+  SELECT '0.0.3'::text;
+$$ language 'sql' STABLE STRICT;
+
+-- Internal identifier of the installed extension instence
+-- e.g. 'dev' for current development version
+CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
+RETURNS text AS $$
+  SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
+$$ language 'sql' STABLE STRICT;
+-- Internal function.
+-- Set the seeds of the RNGs (Random Number Generators)
+-- used internally.
+CREATE OR REPLACE FUNCTION
+_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
+AS $$
+  from crankshaft import random_seeds
+  random_seeds.set_random_seeds(seed_value)
+$$ LANGUAGE plpythonu;
+-- Moran's I Global Measure (public-facing)
+CREATE OR REPLACE FUNCTION
+  CDB_AreasOfInterestGlobal(
+      subquery TEXT,
+      column_name TEXT,
+      w_type TEXT DEFAULT 'knn',
+      num_ngbrs INT DEFAULT 5,
+      permutations INT DEFAULT 99,
+      geom_col TEXT DEFAULT 'the_geom',
+      id_col TEXT DEFAULT 'cartodb_id')
+RETURNS TABLE (moran NUMERIC, significance NUMERIC)
+AS $$
+  from crankshaft.clustering import moran_local
+  # TODO: use named parameters or a dictionary
+  return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
+$$ LANGUAGE plpythonu;
+
+-- Moran's I Local (internal function)
+CREATE OR REPLACE FUNCTION
+  _CDB_AreasOfInterestLocal(
+      subquery TEXT,
+      column_name TEXT,
+      w_type TEXT,
+      num_ngbrs INT,
+      permutations INT,
+      geom_col TEXT,
+      id_col TEXT)
+RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+  from crankshaft.clustering import moran_local
+  # TODO: use named parameters or a dictionary
+  return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
+$$ LANGUAGE plpythonu;
+
+-- Moran's I Local (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_AreasOfInterestLocal(
+    subquery TEXT,
+    column_name TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col);
+
+$$ LANGUAGE SQL;
+
+-- Moran's I only for HH and HL (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_GetSpatialHotspots(
+    subquery TEXT,
+    column_name TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+    RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('HH', 'HL');
+
+$$ LANGUAGE SQL;
+
+-- Moran's I only for LL and LH (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_GetSpatialColdspots(
+    subquery TEXT,
+    attr TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+    RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('LL', 'LH');
+
+$$ LANGUAGE SQL;
+
+-- Moran's I only for LH and HL (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_GetSpatialOutliers(
+    subquery TEXT,
+    attr TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+    RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('HL', 'LH');
+
+$$ LANGUAGE SQL;
+
+-- Moran's I Global Rate (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_AreasOfInterestGlobalRate(
+      subquery TEXT,
+      numerator TEXT,
+      denominator TEXT,
+      w_type TEXT DEFAULT 'knn',
+      num_ngbrs INT DEFAULT 5,
+      permutations INT DEFAULT 99,
+      geom_col TEXT DEFAULT 'the_geom',
+      id_col TEXT DEFAULT 'cartodb_id')
+RETURNS TABLE (moran FLOAT, significance FLOAT)
+AS $$
+  from crankshaft.clustering import moran_local
+  # TODO: use named parameters or a dictionary
+  return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
+$$ LANGUAGE plpythonu;
+
+
+-- Moran's I Local Rate (internal function)
+CREATE OR REPLACE FUNCTION
+  _CDB_AreasOfInterestLocalRate(
+      subquery TEXT,
+      numerator TEXT,
+      denominator TEXT,
+      w_type TEXT,
+      num_ngbrs INT,
+      permutations INT,
+      geom_col TEXT,
+      id_col TEXT)
+RETURNS
+TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+  from crankshaft.clustering import moran_local_rate
+  # TODO: use named parameters or a dictionary
+  return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
+$$ LANGUAGE plpythonu;
+
+-- Moran's I Local Rate (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_AreasOfInterestLocalRate(
+      subquery TEXT,
+      numerator TEXT,
+      denominator TEXT,
+      w_type TEXT DEFAULT 'knn',
+      num_ngbrs INT DEFAULT 5,
+      permutations INT DEFAULT 99,
+      geom_col TEXT DEFAULT 'the_geom',
+      id_col TEXT DEFAULT 'cartodb_id')
+RETURNS
+TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col);
+
+$$ LANGUAGE SQL;
+
+-- Moran's I Local Rate only for HH and HL (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_GetSpatialHotspotsRate(
+      subquery TEXT,
+      numerator TEXT,
+      denominator TEXT,
+      w_type TEXT DEFAULT 'knn',
+      num_ngbrs INT DEFAULT 5,
+      permutations INT DEFAULT 99,
+      geom_col TEXT DEFAULT 'the_geom',
+      id_col TEXT DEFAULT 'cartodb_id')
+RETURNS
+TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('HH', 'HL');
+
+$$ LANGUAGE SQL;
+
+-- Moran's I Local Rate only for LL and LH (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_GetSpatialColdspotsRate(
+      subquery TEXT,
+      numerator TEXT,
+      denominator TEXT,
+      w_type TEXT DEFAULT 'knn',
+      num_ngbrs INT DEFAULT 5,
+      permutations INT DEFAULT 99,
+      geom_col TEXT DEFAULT 'the_geom',
+      id_col TEXT DEFAULT 'cartodb_id')
+RETURNS
+TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('LL', 'LH');
+
+$$ LANGUAGE SQL;
+
+-- Moran's I Local Rate only for LH and HL (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_GetSpatialOutliersRate(
+      subquery TEXT,
+      numerator TEXT,
+      denominator TEXT,
+      w_type TEXT DEFAULT 'knn',
+      num_ngbrs INT DEFAULT 5,
+      permutations INT DEFAULT 99,
+      geom_col TEXT DEFAULT 'the_geom',
+      id_col TEXT DEFAULT 'cartodb_id')
+RETURNS
+TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('HL', 'LH');
+
+$$ LANGUAGE SQL;
+CREATE OR REPLACE FUNCTION  CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
+RETURNS table (cartodb_id integer, cluster_no integer) as $$
+    
+    from crankshaft.clustering import kmeans
+    return kmeans(query,no_clusters,no_init)
+
+$$ language plpythonu;
+
+
+CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
+RETURNS Numeric[] AS 
+$$
+DECLARE 
+    newX NUMERIC;
+    newY NUMERIC;
+    newW NUMERIC;
+BEGIN
+    IF weight IS NULL OR the_geom IS NULL THEN 
+        newX = state[1];
+        newY = state[2];
+        newW = state[3];
+    ELSE
+        newX = state[1] + ST_X(the_geom)*weight;
+        newY = state[2] + ST_Y(the_geom)*weight;
+        newW = state[3] + weight;
+    END IF;
+    RETURN Array[newX,newY,newW];
+
+END
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
+RETURNS GEOMETRY AS 
+$$
+BEGIN
+    IF state[3] = 0 THEN 
+        RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
+    ELSE 
+        RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
+    END IF;
+END
+$$ LANGUAGE plpgsql;
+
+CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)(
+    SFUNC = CDB_WeightedMeanS,
+    FINALFUNC = CDB_WeightedMeanF,
+    STYPE = Numeric[],
+    INITCOND = "{0.0,0.0,0.0}" 
+);
+-- Function by Stuart Lynn for a simple interpolation of a value
+-- from a polygon table over an arbitrary polygon
+-- (weighted by the area proportion overlapped)
+-- Aereal weighting is a very simple form of aereal interpolation.
+--
+-- Parameters:
+--   * geom a Polygon geometry which defines the area where a value will be
+--     estimated as the area-weighted sum of a given table/column
+--   * target_table_name table name of the table that provides the values
+--   * target_column column name of the column that provides the values
+--   * schema_name optional parameter to defina the schema the target table
+--     belongs to, which is necessary if its not in the search_path.
+--     Note that target_table_name should never include the schema in it.
+-- Return value:
+--   Aereal-weighted interpolation of the column values over the geometry
+CREATE OR REPLACE
+FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
+  RETURNS numeric AS
+$$
+DECLARE
+	result numeric;
+  qualified_name text;
+BEGIN
+  IF schema_name IS NULL THEN
+    qualified_name := Format('%I', target_table_name);
+  ELSE
+    qualified_name := Format('%I.%s', schema_name, target_table_name);
+  END IF;
+  EXECUTE Format('
+    SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
+    FROM %s AS a
+    WHERE $1 && a.the_geom
+  ', target_column, qualified_name)
+  USING geom
+  INTO result;
+  RETURN result;
+END;
+$$ LANGUAGE plpgsql;
+--
+-- Creates N points randomly distributed arround the polygon
+--
+-- @param g - the geometry to be turned in to points
+--
+-- @param no_points - the number of points to generate
+--
+-- @params max_iter_per_point - the function generates points in the polygon's bounding box
+-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
+-- misses per point the funciton accepts before giving up.
+--
+-- Returns: Multipoint with the requested points
+CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
+RETURNS GEOMETRY AS $$
+DECLARE
+  extent GEOMETRY;
+  test_point Geometry;
+  width                NUMERIC;
+  height               NUMERIC;
+  x0                   NUMERIC;
+  y0                   NUMERIC;
+  xp                   NUMERIC;
+  yp                   NUMERIC;
+  no_left              INTEGER;
+  remaining_iterations INTEGER;
+  points               GEOMETRY[];
+  bbox_line            GEOMETRY;
+  intersection_line    GEOMETRY;
+BEGIN
+  extent  := ST_Envelope(geom);
+  width   := ST_XMax(extent) - ST_XMIN(extent);
+  height  := ST_YMax(extent) - ST_YMIN(extent);
+  x0 	  := ST_XMin(extent);
+  y0 	  := ST_YMin(extent);
+  no_left := no_points;
+
+  LOOP
+    if(no_left=0) THEN
+      EXIT;
+    END IF;
+    yp = y0 + height*random();
+    bbox_line  = ST_MakeLine(
+      ST_SetSRID(ST_MakePoint(yp, x0),4326),
+      ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
+    );
+    intersection_line = ST_Intersection(bbox_line,geom);
+  	test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
+	  points := points || test_point;
+	  no_left = no_left - 1 ;
+  END LOOP;
+  RETURN ST_Collect(points);
+END;
+$$
+LANGUAGE plpgsql VOLATILE;
+-- Make sure by default there are no permissions for publicuser
+-- NOTE: this happens at extension creation time, as part of an implicit transaction.
+-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
+
+-- Grant permissions on the schema to publicuser (but just the schema)
+GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
+
+-- Revoke execute permissions on all functions in the schema by default
+-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
diff --git a/release/crankshaft.control b/release/crankshaft.control
index 49c0d22..2029b7e 100644
--- a/release/crankshaft.control
+++ b/release/crankshaft.control
@@ -1,5 +1,5 @@
 comment = 'CartoDB Spatial Analysis extension'
-default_version = '0.0.2'
+default_version = '0.0.3'
 requires = 'plpythonu, postgis, cartodb'
 superuser = true
 schema = cdb_crankshaft
diff --git a/release/python/0.0.3/crankshaft/crankshaft/__init__.py b/release/python/0.0.3/crankshaft/crankshaft/__init__.py
new file mode 100644
index 0000000..d07e330
--- /dev/null
+++ b/release/python/0.0.3/crankshaft/crankshaft/__init__.py
@@ -0,0 +1,2 @@
+import random_seeds
+import clustering
diff --git a/release/python/0.0.3/crankshaft/crankshaft/clustering/__init__.py b/release/python/0.0.3/crankshaft/crankshaft/clustering/__init__.py
new file mode 100644
index 0000000..338e8ea
--- /dev/null
+++ b/release/python/0.0.3/crankshaft/crankshaft/clustering/__init__.py
@@ -0,0 +1,2 @@
+from moran import *
+from kmeans import *
diff --git a/release/python/0.0.3/crankshaft/crankshaft/clustering/kmeans.py b/release/python/0.0.3/crankshaft/crankshaft/clustering/kmeans.py
new file mode 100644
index 0000000..4134062
--- /dev/null
+++ b/release/python/0.0.3/crankshaft/crankshaft/clustering/kmeans.py
@@ -0,0 +1,18 @@
+from sklearn.cluster import KMeans
+import plpy
+
+def kmeans(query, no_clusters, no_init=20):
+    data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids,
+        array_agg(ST_X(the_geom) order by cartodb_id) xs,
+        array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a
+        where the_geom is not null
+    '''.format(query=query))
+
+    xs  = data[0]['xs']
+    ys  = data[0]['ys']
+    ids = data[0]['ids']
+
+    km = KMeans(n_clusters= no_clusters, n_init=no_init)
+    labels = km.fit_predict(zip(xs,ys))
+    return zip(ids,labels)
+
diff --git a/release/python/0.0.3/crankshaft/crankshaft/clustering/moran.py b/release/python/0.0.3/crankshaft/crankshaft/clustering/moran.py
new file mode 100644
index 0000000..39b3ff6
--- /dev/null
+++ b/release/python/0.0.3/crankshaft/crankshaft/clustering/moran.py
@@ -0,0 +1,260 @@
+"""
+Moran's I geostatistics (global clustering & outliers presence)
+"""
+
+# TODO: Fill in local neighbors which have null/NoneType values with the
+#       average of the their neighborhood
+
+import pysal as ps
+import plpy
+
+# crankshaft module
+import crankshaft.pysal_utils as pu
+
+# High level interface ---------------------------------------
+
+def moran(subquery, attr_name,
+          w_type, num_ngbrs, permutations, geom_col, id_col):
+    """
+    Moran's I (global)
+    Implementation building neighbors with a PostGIS database and Moran's I
+     core clusters with PySAL.
+    Andy Eschbacher
+    """
+    qvals = {"id_col": id_col,
+             "attr1": attr_name,
+             "geom_col": geom_col,
+             "subquery": subquery,
+             "num_ngbrs": num_ngbrs}
+
+    query = pu.construct_neighbor_query(w_type, qvals)
+
+    plpy.notice('** Query: %s' % query)
+
+    try:
+        result = plpy.execute(query)
+        # if there are no neighbors, exit
+        if len(result) == 0:
+            return pu.empty_zipped_array(2)
+        plpy.notice('** Query returned with %d rows' % len(result))
+    except plpy.SPIError:
+        plpy.error('Error: areas of interest query failed, check input parameters')
+        plpy.notice('** Query failed: "%s"' % query)
+        plpy.notice('** Error: %s' % plpy.SPIError)
+        return pu.empty_zipped_array(2)
+
+    ## collect attributes
+    attr_vals = pu.get_attributes(result)
+
+    ## calculate weights
+    weight = pu.get_weight(result, w_type, num_ngbrs)
+
+    ## calculate moran global
+    moran_global = ps.esda.moran.Moran(attr_vals, weight,
+                                       permutations=permutations)
+
+    return zip([moran_global.I], [moran_global.EI])
+
+def moran_local(subquery, attr,
+                w_type, num_ngbrs, permutations, geom_col, id_col):
+    """
+    Moran's I implementation for PL/Python
+    Andy Eschbacher
+    """
+
+    # geometries with attributes that are null are ignored
+    # resulting in a collection of not as near neighbors
+
+    qvals = {"id_col": id_col,
+             "attr1": attr,
+             "geom_col": geom_col,
+             "subquery": subquery,
+             "num_ngbrs": num_ngbrs}
+
+    query = pu.construct_neighbor_query(w_type, qvals)
+
+    try:
+        result = plpy.execute(query)
+        # if there are no neighbors, exit
+        if len(result) == 0:
+            return pu.empty_zipped_array(5)
+    except plpy.SPIError:
+        plpy.error('Error: areas of interest query failed, check input parameters')
+        plpy.notice('** Query failed: "%s"' % query)
+        return pu.empty_zipped_array(5)
+
+    attr_vals = pu.get_attributes(result)
+    weight = pu.get_weight(result, w_type, num_ngbrs)
+
+    # calculate LISA values
+    lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
+                                     permutations=permutations)
+
+    # find quadrants for each geometry
+    quads = quad_position(lisa.q)
+
+    return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
+
+def moran_rate(subquery, numerator, denominator,
+               w_type, num_ngbrs, permutations, geom_col, id_col):
+    """
+    Moran's I Rate (global)
+    Andy Eschbacher
+    """
+    qvals = {"id_col": id_col,
+             "attr1": numerator,
+             "attr2": denominator,
+             "geom_col": geom_col,
+             "subquery": subquery,
+             "num_ngbrs": num_ngbrs}
+
+    query = pu.construct_neighbor_query(w_type, qvals)
+
+    plpy.notice('** Query: %s' % query)
+
+    try:
+        result = plpy.execute(query)
+        # if there are no neighbors, exit
+        if len(result) == 0:
+            return pu.empty_zipped_array(2)
+        plpy.notice('** Query returned with %d rows' % len(result))
+    except plpy.SPIError:
+        plpy.error('Error: areas of interest query failed, check input parameters')
+        plpy.notice('** Query failed: "%s"' % query)
+        plpy.notice('** Error: %s' % plpy.SPIError)
+        return pu.empty_zipped_array(2)
+
+    ## collect attributes
+    numer = pu.get_attributes(result, 1)
+    denom = pu.get_attributes(result, 2)
+
+    weight = pu.get_weight(result, w_type, num_ngbrs)
+
+    ## calculate moran global rate
+    lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
+                                         permutations=permutations)
+
+    return zip([lisa_rate.I], [lisa_rate.EI])
+
+def moran_local_rate(subquery, numerator, denominator,
+                     w_type, num_ngbrs, permutations, geom_col, id_col):
+    """
+        Moran's I Local Rate
+        Andy Eschbacher
+    """
+    # geometries with values that are null are ignored
+    # resulting in a collection of not as near neighbors
+
+    query = pu.construct_neighbor_query(w_type,
+                                     {"id_col": id_col,
+                                      "numerator": numerator,
+                                      "denominator": denominator,
+                                      "geom_col": geom_col,
+                                      "subquery": subquery,
+                                      "num_ngbrs": num_ngbrs})
+
+    try:
+        result = plpy.execute(query)
+        # if there are no neighbors, exit
+        if len(result) == 0:
+            return pu.empty_zipped_array(5)
+    except plpy.SPIError:
+        plpy.error('Error: areas of interest query failed, check input parameters')
+        plpy.notice('** Query failed: "%s"' % query)
+        plpy.notice('** Error: %s' % plpy.SPIError)
+        return pu.empty_zipped_array(5)
+
+    ## collect attributes
+    numer = pu.get_attributes(result, 1)
+    denom = pu.get_attributes(result, 2)
+
+    weight = pu.get_weight(result, w_type, num_ngbrs)
+
+    # calculate LISA values
+    lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
+                                          permutations=permutations)
+
+    # find units of significance
+    quads = quad_position(lisa.q)
+
+    return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
+
+def moran_local_bv(subquery, attr1, attr2,
+                   permutations, geom_col, id_col, w_type, num_ngbrs):
+    """
+        Moran's I (local) Bivariate (untested)
+    """
+    plpy.notice('** Constructing query')
+
+    qvals = {"num_ngbrs": num_ngbrs,
+             "attr1": attr1,
+             "attr2": attr2,
+             "subquery": subquery,
+             "geom_col": geom_col,
+             "id_col": id_col}
+
+    query = pu.construct_neighbor_query(w_type, qvals)
+
+    try:
+        result = plpy.execute(query)
+        # if there are no neighbors, exit
+        if len(result) == 0:
+            return pu.empty_zipped_array(4)
+    except plpy.SPIError:
+        plpy.error("Error: areas of interest query failed, " \
+                   "check input parameters")
+        plpy.notice('** Query failed: "%s"' % query)
+        return pu.empty_zipped_array(4)
+
+    ## collect attributes
+    attr1_vals = pu.get_attributes(result, 1)
+    attr2_vals = pu.get_attributes(result, 2)
+
+    # create weights
+    weight = pu.get_weight(result, w_type, num_ngbrs)
+
+    # calculate LISA values
+    lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
+                                        permutations=permutations)
+
+    plpy.notice("len of Is: %d" % len(lisa.Is))
+
+    # find clustering of significance
+    lisa_sig = quad_position(lisa.q)
+
+    plpy.notice('** Finished calculations')
+
+    return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
+
+# Low level functions ----------------------------------------
+
+def map_quads(coord):
+    """
+        Map a quadrant number to Moran's I designation
+        HH=1, LH=2, LL=3, HL=4
+        Input:
+        @param coord (int): quadrant of a specific measurement
+        Output:
+            classification (one of 'HH', 'LH', 'LL', or 'HL')
+    """
+    if coord == 1:
+        return 'HH'
+    elif coord == 2:
+        return 'LH'
+    elif coord == 3:
+        return 'LL'
+    elif coord == 4:
+        return 'HL'
+    else:
+        return None
+
+def quad_position(quads):
+    """
+        Produce Moran's I classification based of n
+        Input:
+        @param quads ndarray: an array of quads classified by
+          1-4 (PySAL default)
+        Output:
+        @param list: an array of quads classied by 'HH', 'LL', etc.
+    """
+    return [map_quads(q) for q in quads]
diff --git a/release/python/0.0.3/crankshaft/crankshaft/pysal_utils/__init__.py b/release/python/0.0.3/crankshaft/crankshaft/pysal_utils/__init__.py
new file mode 100644
index 0000000..835880d
--- /dev/null
+++ b/release/python/0.0.3/crankshaft/crankshaft/pysal_utils/__init__.py
@@ -0,0 +1 @@
+from pysal_utils import *
diff --git a/release/python/0.0.3/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/release/python/0.0.3/crankshaft/crankshaft/pysal_utils/pysal_utils.py
new file mode 100644
index 0000000..02b5e35
--- /dev/null
+++ b/release/python/0.0.3/crankshaft/crankshaft/pysal_utils/pysal_utils.py
@@ -0,0 +1,152 @@
+"""
+    Utilities module for generic PySAL functionality, mainly centered on translating queries into numpy arrays or PySAL weights objects
+"""
+
+import numpy as np
+import pysal as ps
+
+def construct_neighbor_query(w_type, query_vals):
+    """Return query (a string) used for finding neighbors
+        @param w_type text: type of neighbors to calculate ('knn' or 'queen')
+        @param query_vals dict: values used to construct the query
+    """
+
+    if w_type.lower() == 'knn':
+        return knn(query_vals)
+    else:
+        return queen(query_vals)
+
+## Build weight object
+def get_weight(query_res, w_type='knn', num_ngbrs=5):
+    """
+        Construct PySAL weight from return value of query
+        @param query_res: query results with attributes and neighbors
+    """
+    if w_type.lower() == 'knn':
+        row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs
+        weights = {x['id']: row_normed_weights for x in query_res}
+    else:
+        weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors'])
+                            if len(x['neighbors']) > 0
+                            else [] for x in query_res}
+
+    neighbors = {x['id']: x['neighbors'] for x in query_res}
+
+    return ps.W(neighbors, weights)
+
+def query_attr_select(params):
+    """
+        Create portion of SELECT statement for attributes inolved in query.
+        @param params: dict of information used in query (column names,
+                       table name, etc.)
+    """
+
+    attrs = [k for k in params
+             if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')]
+
+    template = "i.\"{%(col)s}\"::numeric As attr%(alias_num)s, "
+
+    attr_string = ""
+
+    for idx, val in enumerate(sorted(attrs)):
+        attr_string += template % {"col": val, "alias_num": idx + 1}
+
+    return attr_string
+
+def query_attr_where(params):
+    """
+        Create portion of WHERE clauses for weeding out NULL-valued geometries
+    """
+    attrs = sorted([k for k in params
+                    if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')])
+
+    attr_string = []
+
+    for attr in attrs:
+        attr_string.append("idx_replace.\"{%s}\" IS NOT NULL" % attr)
+
+    if len(attrs) == 2:
+        attr_string.append("idx_replace.\"{%s}\" <> 0" % attrs[1])
+
+    out = " AND ".join(attr_string)
+
+    return out
+
+def knn(params):
+    """SQL query for k-nearest neighbors.
+        @param vars: dict of values to fill template
+    """
+
+    attr_select = query_attr_select(params)
+    attr_where = query_attr_where(params)
+
+    replacements = {"attr_select": attr_select,
+                    "attr_where_i": attr_where.replace("idx_replace", "i"),
+                    "attr_where_j": attr_where.replace("idx_replace", "j")}
+
+    query = "SELECT " \
+                "i.\"{id_col}\" As id, " \
+                "%(attr_select)s" \
+                "(SELECT ARRAY(SELECT j.\"{id_col}\" " \
+                              "FROM ({subquery}) As j " \
+                              "WHERE " \
+                                "i.\"{id_col}\" <> j.\"{id_col}\" AND " \
+                                "%(attr_where_j)s " \
+                              "ORDER BY " \
+                                "j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \
+                              "LIMIT {num_ngbrs})" \
+                ") As neighbors " \
+            "FROM ({subquery}) As i " \
+            "WHERE " \
+                "%(attr_where_i)s " \
+            "ORDER BY i.\"{id_col}\" ASC;" % replacements
+
+    return query.format(**params)
+
+## SQL query for finding queens neighbors (all contiguous polygons)
+def queen(params):
+    """SQL query for queen neighbors.
+        @param params dict: information to fill query
+    """
+    attr_select = query_attr_select(params)
+    attr_where = query_attr_where(params)
+
+    replacements = {"attr_select": attr_select,
+                    "attr_where_i": attr_where.replace("idx_replace", "i"),
+                    "attr_where_j": attr_where.replace("idx_replace", "j")}
+
+    query = "SELECT " \
+                "i.\"{id_col}\" As id, " \
+                "%(attr_select)s" \
+                "(SELECT ARRAY(SELECT j.\"{id_col}\" " \
+                 "FROM ({subquery}) As j " \
+                 "WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \
+                       "ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \
+                       "%(attr_where_j)s)" \
+                ") As neighbors " \
+            "FROM ({subquery}) As i " \
+            "WHERE " \
+                "%(attr_where_i)s " \
+            "ORDER BY i.\"{id_col}\" ASC;" % replacements
+
+    return query.format(**params)
+
+## to add more weight methods open a ticket or pull request
+
+def get_attributes(query_res, attr_num=1):
+    """
+        @param query_res: query results with attributes and neighbors
+        @param attr_num: attribute number (1, 2, ...)
+    """
+    return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float)
+
+def empty_zipped_array(num_nones):
+    """
+        prepare return values for cases of empty weights objects (no neighbors)
+        Input:
+        @param num_nones int: number of columns (e.g., 4)
+        Output:
+        [(None, None, None, None)]
+    """
+
+    return [tuple([None] * num_nones)]
diff --git a/release/python/0.0.3/crankshaft/crankshaft/random_seeds.py b/release/python/0.0.3/crankshaft/crankshaft/random_seeds.py
new file mode 100644
index 0000000..b7c8eed
--- /dev/null
+++ b/release/python/0.0.3/crankshaft/crankshaft/random_seeds.py
@@ -0,0 +1,10 @@
+import random
+import numpy
+
+def set_random_seeds(value):
+    """
+    Set the seeds of the RNGs (Random Number Generators)
+    used internally.
+    """
+    random.seed(value)
+    numpy.random.seed(value)
diff --git a/release/python/0.0.3/crankshaft/setup.py b/release/python/0.0.3/crankshaft/setup.py
new file mode 100644
index 0000000..33a3b62
--- /dev/null
+++ b/release/python/0.0.3/crankshaft/setup.py
@@ -0,0 +1,48 @@
+
+"""
+CartoDB Spatial Analysis Python Library
+See:
+https://github.com/CartoDB/crankshaft
+"""
+
+from setuptools import setup, find_packages
+
+setup(
+    name='crankshaft',
+
+    version='0.0.3',
+
+    description='CartoDB Spatial Analysis Python Library',
+
+    url='https://github.com/CartoDB/crankshaft',
+
+    author='Data Services Team - CartoDB',
+    author_email='dataservices@cartodb.com',
+
+    license='MIT',
+
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'Intended Audience :: Mapping comunity',
+        'Topic :: Maps :: Mapping Tools',
+        'License :: OSI Approved :: MIT License',
+        'Programming Language :: Python :: 2.7',
+    ],
+
+    keywords='maps mapping tools spatial analysis geostatistics',
+
+    packages=find_packages(exclude=['contrib', 'docs', 'tests']),
+
+    extras_require={
+        'dev': ['unittest'],
+        'test': ['unittest', 'nose', 'mock'],
+    },
+
+    # The choice of component versions is dictated by what's
+    # provisioned in the production servers.
+    install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'],
+
+    requires=['pysal', 'numpy', 'sklearn'],
+
+    test_suite='test'
+)
diff --git a/release/python/0.0.3/crankshaft/test/fixtures/kmeans.json b/release/python/0.0.3/crankshaft/test/fixtures/kmeans.json
new file mode 100644
index 0000000..8f31c79
--- /dev/null
+++ b/release/python/0.0.3/crankshaft/test/fixtures/kmeans.json
@@ -0,0 +1 @@
+[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]
\ No newline at end of file
diff --git a/release/python/0.0.3/crankshaft/test/fixtures/moran.json b/release/python/0.0.3/crankshaft/test/fixtures/moran.json
new file mode 100644
index 0000000..2f75cf1
--- /dev/null
+++ b/release/python/0.0.3/crankshaft/test/fixtures/moran.json
@@ -0,0 +1,52 @@
+[[0.9319096128346788, "HH"],
+[-1.135787401862846, "HL"],
+[0.11732030672508517, "LL"],
+[0.6152779669180425, "LL"],
+[-0.14657336660125297, "LH"],
+[0.6967858120189607, "LL"],
+[0.07949310115714454, "HH"],
+[0.4703198759258987, "HH"],
+[0.4421125200498064, "HH"],
+[0.5724288737143592, "LL"],
+[0.8970743435692062, "LL"],
+[0.18327334401918674, "LL"],
+[-0.01466729201304962, "HL"],
+[0.3481559372544409, "LL"],
+[0.06547094736902978, "LL"],
+[0.15482141569329988, "HH"],
+[0.4373841193538136, "HH"],
+[0.15971286468915544, "LL"],
+[1.0543588860308968, "HH"],
+[1.7372866900020818, "HH"],
+[1.091998586053999, "LL"],
+[0.1171572584252222, "HH"],
+[0.08438455015300014, "LL"],
+[0.06547094736902978, "LL"],
+[0.15482141569329985, "HH"],
+[1.1627044812890683, "HH"],
+[0.06547094736902978, "LL"],
+[0.795275137550483, "HH"],
+[0.18562939195219, "LL"],
+[0.3010757406693439, "LL"],
+[2.8205795942839376, "HH"],
+[0.11259190602909264, "LL"],
+[-0.07116352791516614, "HL"],
+[-0.09945240794119009, "LH"],
+[0.18562939195219, "LL"],
+[0.1832733440191868, "LL"],
+[-0.39054253768447705, "HL"],
+[-0.1672071289487642, "HL"],
+[0.3337669247916343, "HH"],
+[0.2584386102554792, "HH"],
+[-0.19733845476322634, "HL"],
+[-0.9379282899805409, "LH"],
+[-0.028770969951095866, "LH"],
+[0.051367269430983485, "LL"],
+[-0.2172548045913472, "LH"],
+[0.05136726943098351, "LL"],
+[0.04191046803899837, "LL"],
+[0.7482357030403517, "HH"],
+[-0.014585767863118111, "LH"],
+[0.5410013139159929, "HH"],
+[1.0223932668429925, "LL"],
+[1.4179402898927476, "LL"]]
\ No newline at end of file
diff --git a/release/python/0.0.3/crankshaft/test/fixtures/neighbors.json b/release/python/0.0.3/crankshaft/test/fixtures/neighbors.json
new file mode 100644
index 0000000..055b359
--- /dev/null
+++ b/release/python/0.0.3/crankshaft/test/fixtures/neighbors.json
@@ -0,0 +1,54 @@
+[
+    {"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5},
+    {"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7},
+    {"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2},
+    {"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1},
+    {"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3},
+    {"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05},
+    {"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4},
+    {"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7},
+    {"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5},
+    {"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04},
+    {"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08},
+    {"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2},
+    {"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4},
+    {"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2},
+    {"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3},
+    {"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4},
+    {"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6},
+    {"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3},
+    {"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7},
+    {"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8},
+    {"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1},
+    {"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4},
+    {"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1},
+    {"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3},
+    {"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4},
+    {"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6},
+    {"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3},
+    {"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8},
+    {"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3},
+    {"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1},
+    {"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9},
+    {"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3},
+    {"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4},
+    {"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3},
+    {"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3},
+    {"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2},
+    {"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5},
+    {"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4},
+    {"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6},
+    {"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5},
+    {"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4},
+    {"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2},
+    {"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3},
+    {"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2},
+    {"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3},
+    {"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2},
+    {"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3},
+    {"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5},
+    {"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2},
+    {"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6},
+    {"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01},
+    {"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01}
+  ]
diff --git a/release/python/0.0.3/crankshaft/test/helper.py b/release/python/0.0.3/crankshaft/test/helper.py
new file mode 100644
index 0000000..7d28b94
--- /dev/null
+++ b/release/python/0.0.3/crankshaft/test/helper.py
@@ -0,0 +1,13 @@
+import unittest
+
+from mock_plpy import MockPlPy
+plpy = MockPlPy()
+
+import sys
+sys.modules['plpy'] = plpy
+
+import os
+
+def fixture_file(name):
+    dir = os.path.dirname(os.path.realpath(__file__))
+    return os.path.join(dir, 'fixtures', name)
diff --git a/release/python/0.0.3/crankshaft/test/mock_plpy.py b/release/python/0.0.3/crankshaft/test/mock_plpy.py
new file mode 100644
index 0000000..63c88f6
--- /dev/null
+++ b/release/python/0.0.3/crankshaft/test/mock_plpy.py
@@ -0,0 +1,34 @@
+import re
+
+class MockPlPy:
+    def __init__(self):
+        self._reset()
+
+    def _reset(self):
+        self.infos = []
+        self.notices = []
+        self.debugs = []
+        self.logs = []
+        self.warnings = []
+        self.errors = []
+        self.fatals = []
+        self.executes = []
+        self.results = []
+        self.prepares = []
+        self.results = []
+
+    def _define_result(self, query, result):
+        pattern = re.compile(query, re.IGNORECASE | re.MULTILINE)
+        self.results.append([pattern, result])
+
+    def notice(self, msg):
+        self.notices.append(msg)
+
+    def info(self, msg):
+        self.infos.append(msg)
+
+    def execute(self, query): # TODO: additional arguments
+       for result in self.results:
+          if result[0].match(query):
+            return result[1]
+       return []
diff --git a/release/python/0.0.3/crankshaft/test/test_cluster_kmeans.py b/release/python/0.0.3/crankshaft/test/test_cluster_kmeans.py
new file mode 100644
index 0000000..aba8e07
--- /dev/null
+++ b/release/python/0.0.3/crankshaft/test/test_cluster_kmeans.py
@@ -0,0 +1,38 @@
+import unittest
+import numpy as np
+
+
+# from mock_plpy import MockPlPy
+# plpy = MockPlPy()
+#
+# import sys
+# sys.modules['plpy'] = plpy
+from helper import plpy, fixture_file
+import numpy as np
+import crankshaft.clustering as cc
+import crankshaft.pysal_utils as pu
+from crankshaft import random_seeds
+import json
+
+class KMeansTest(unittest.TestCase):
+    """Testing class for Moran's I functions"""
+
+    def setUp(self):
+        plpy._reset()
+        self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read())
+        self.params = {"subquery": "select * from table",
+                       "no_clusters": "10"
+                       }
+
+    def test_kmeans(self):
+        data = self.cluster_data
+        plpy._define_result('select' ,data)
+        clusters = cc.kmeans('subquery', 2)
+        labels  = [a[1] for a in clusters]
+        c1 = [a for a in clusters if a[1]==0]
+        c2 = [a for a in clusters if a[1]==1]
+
+        self.assertEqual(len(np.unique(labels)),2)
+        self.assertEqual(len(c1),20)
+        self.assertEqual(len(c2),20)
+
diff --git a/release/python/0.0.3/crankshaft/test/test_clustering_moran.py b/release/python/0.0.3/crankshaft/test/test_clustering_moran.py
new file mode 100644
index 0000000..393e93b
--- /dev/null
+++ b/release/python/0.0.3/crankshaft/test/test_clustering_moran.py
@@ -0,0 +1,83 @@
+import unittest
+import numpy as np
+
+
+# from mock_plpy import MockPlPy
+# plpy = MockPlPy()
+#
+# import sys
+# sys.modules['plpy'] = plpy
+from helper import plpy, fixture_file
+
+import crankshaft.clustering as cc
+import crankshaft.pysal_utils as pu
+from crankshaft import random_seeds
+import json
+
+class MoranTest(unittest.TestCase):
+    """Testing class for Moran's I functions"""
+
+    def setUp(self):
+        plpy._reset()
+        self.params = {"id_col": "cartodb_id",
+                       "attr1": "andy",
+                       "attr2": "jay_z",
+                       "subquery": "SELECT * FROM a_list",
+                       "geom_col": "the_geom",
+                       "num_ngbrs": 321}
+        self.neighbors_data = json.loads(open(fixture_file('neighbors.json')).read())
+        self.moran_data = json.loads(open(fixture_file('moran.json')).read())
+
+    def test_map_quads(self):
+        """Test map_quads"""
+        self.assertEqual(cc.map_quads(1), 'HH')
+        self.assertEqual(cc.map_quads(2), 'LH')
+        self.assertEqual(cc.map_quads(3), 'LL')
+        self.assertEqual(cc.map_quads(4), 'HL')
+        self.assertEqual(cc.map_quads(33), None)
+        self.assertEqual(cc.map_quads('andy'), None)
+
+    def test_quad_position(self):
+        """Test lisa_sig_vals"""
+
+        quads = np.array([1, 2, 3, 4], np.int)
+
+        ans = np.array(['HH', 'LH', 'LL', 'HL'])
+        test_ans = cc.quad_position(quads)
+
+        self.assertTrue((test_ans == ans).all())
+
+    def test_moran_local(self):
+        """Test Moran's I local"""
+        data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
+        plpy._define_result('select', data)
+        random_seeds.set_random_seeds(1234)
+        result = cc.moran_local('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id')
+        result = [(row[0], row[1]) for row in result]
+        expected = self.moran_data
+        for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
+            self.assertAlmostEqual(res_val, exp_val)
+            self.assertEqual(res_quad, exp_quad)
+
+    def test_moran_local_rate(self):
+        """Test Moran's I rate"""
+        data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data]
+        plpy._define_result('select', data)
+        random_seeds.set_random_seeds(1234)
+        result = cc.moran_local_rate('subquery', 'numerator', 'denominator', 'knn', 5, 99, 'the_geom', 'cartodb_id')
+        print 'result == None? ', result == None
+        result = [(row[0], row[1]) for row in result]
+        expected = self.moran_data
+        for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
+            self.assertAlmostEqual(res_val, exp_val)
+
+    def test_moran(self):
+        """Test Moran's I global"""
+        data = [{ 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
+        plpy._define_result('select', data)
+        random_seeds.set_random_seeds(1235)
+        result = cc.moran('table', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id')
+        print 'result == None?', result == None
+        result_moran = result[0][0]
+        expected_moran = np.array([row[0] for row in self.moran_data]).mean()
+        self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)
diff --git a/release/python/0.0.3/crankshaft/test/test_pysal_utils.py b/release/python/0.0.3/crankshaft/test/test_pysal_utils.py
new file mode 100644
index 0000000..4ea0d9b
--- /dev/null
+++ b/release/python/0.0.3/crankshaft/test/test_pysal_utils.py
@@ -0,0 +1,107 @@
+import unittest
+
+import crankshaft.pysal_utils as pu
+from crankshaft import random_seeds
+
+
+class PysalUtilsTest(unittest.TestCase):
+    """Testing class for utility functions related to PySAL integrations"""
+
+    def setUp(self):
+        self.params = {"id_col": "cartodb_id",
+                       "attr1": "andy",
+                       "attr2": "jay_z",
+                       "subquery": "SELECT * FROM a_list",
+                       "geom_col": "the_geom",
+                       "num_ngbrs": 321}
+
+    def test_query_attr_select(self):
+        """Test query_attr_select"""
+
+        ans = "i.\"{attr1}\"::numeric As attr1, " \
+              "i.\"{attr2}\"::numeric As attr2, "
+
+        self.assertEqual(pu.query_attr_select(self.params), ans)
+
+    def test_query_attr_where(self):
+        """Test pu.query_attr_where"""
+
+        ans = "idx_replace.\"{attr1}\" IS NOT NULL AND " \
+              "idx_replace.\"{attr2}\" IS NOT NULL AND " \
+              "idx_replace.\"{attr2}\" <> 0"
+
+        self.assertEqual(pu.query_attr_where(self.params), ans)
+
+    def test_knn(self):
+        """Test knn neighbors constructor"""
+
+        ans = "SELECT i.\"cartodb_id\" As id, " \
+                     "i.\"andy\"::numeric As attr1, " \
+                     "i.\"jay_z\"::numeric As attr2, " \
+                     "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
+                                   "FROM (SELECT * FROM a_list) As j " \
+                                   "WHERE " \
+                                    "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
+                                    "j.\"andy\" IS NOT NULL AND " \
+                                    "j.\"jay_z\" IS NOT NULL AND " \
+                                    "j.\"jay_z\" <> 0 " \
+                                   "ORDER BY " \
+                                    "j.\"the_geom\" <-> i.\"the_geom\" ASC " \
+                      "LIMIT 321)) As neighbors " \
+              "FROM (SELECT * FROM a_list) As i " \
+              "WHERE i.\"andy\" IS NOT NULL AND " \
+                    "i.\"jay_z\" IS NOT NULL AND " \
+                    "i.\"jay_z\" <> 0 " \
+              "ORDER BY i.\"cartodb_id\" ASC;"
+
+        self.assertEqual(pu.knn(self.params), ans)
+
+    def test_queen(self):
+        """Test queen neighbors constructor"""
+
+        ans = "SELECT i.\"cartodb_id\" As id, " \
+                     "i.\"andy\"::numeric As attr1, " \
+                     "i.\"jay_z\"::numeric As attr2, " \
+                     "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
+                                   "FROM (SELECT * FROM a_list) As j " \
+                                   "WHERE " \
+                                   "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
+                                   "ST_Touches(i.\"the_geom\", " \
+                                              "j.\"the_geom\") AND " \
+                                   "j.\"andy\" IS NOT NULL AND " \
+                                   "j.\"jay_z\" IS NOT NULL AND " \
+                                   "j.\"jay_z\" <> 0)" \
+                                  ") As neighbors " \
+              "FROM (SELECT * FROM a_list) As i " \
+              "WHERE i.\"andy\" IS NOT NULL AND " \
+                    "i.\"jay_z\" IS NOT NULL AND " \
+                    "i.\"jay_z\" <> 0 " \
+              "ORDER BY i.\"cartodb_id\" ASC;"
+
+        self.assertEqual(pu.queen(self.params), ans)
+
+    def test_construct_neighbor_query(self):
+        """Test construct_neighbor_query"""
+
+        # Compare to raw knn query
+        self.assertEqual(pu.construct_neighbor_query('knn', self.params),
+                         pu.knn(self.params))
+
+    def test_get_attributes(self):
+        """Test get_attributes"""
+
+        ## need to add tests
+
+        self.assertEqual(True, True)
+
+    def test_get_weight(self):
+        """Test get_weight"""
+
+        self.assertEqual(True, True)
+
+    def test_empty_zipped_array(self):
+        """Test empty_zipped_array"""
+        ans2 = [(None, None)]
+        ans4 = [(None, None, None, None)]
+        self.assertEqual(pu.empty_zipped_array(2), ans2)
+        self.assertEqual(pu.empty_zipped_array(4), ans4)
diff --git a/src/pg/crankshaft.control b/src/pg/crankshaft.control
index 49c0d22..2029b7e 100644
--- a/src/pg/crankshaft.control
+++ b/src/pg/crankshaft.control
@@ -1,5 +1,5 @@
 comment = 'CartoDB Spatial Analysis extension'
-default_version = '0.0.2'
+default_version = '0.0.3'
 requires = 'plpythonu, postgis, cartodb'
 superuser = true
 schema = cdb_crankshaft

From 1e19f468ebfc0626c37b85fd88ad0d1da1531771 Mon Sep 17 00:00:00 2001
From: Raul Ochoa <rochoaf@gmail.com>
Date: Thu, 16 Jun 2016 16:23:43 +0200
Subject: [PATCH 24/38] Declare numpy dep

---
 src/py/crankshaft/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py
index 04822dd..f072f17 100644
--- a/src/py/crankshaft/setup.py
+++ b/src/py/crankshaft/setup.py
@@ -40,7 +40,7 @@ setup(
 
     # The choice of component versions is dictated by what's
     # provisioned in the production servers.
-    install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'],
+    install_requires=['numpy==1.11.0', 'pysal==1.9.1', 'scikit-learn==0.17.1'],
 
     requires=['pysal', 'numpy', 'sklearn'],
 

From 237aa1c5818f003ffb459817ea5e72392c765c5c Mon Sep 17 00:00:00 2001
From: Raul Ochoa <rochoaf@gmail.com>
Date: Thu, 16 Jun 2016 16:34:45 +0200
Subject: [PATCH 25/38] Declare scipy as dep

---
 src/py/crankshaft/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py
index f072f17..266b6f1 100644
--- a/src/py/crankshaft/setup.py
+++ b/src/py/crankshaft/setup.py
@@ -40,7 +40,7 @@ setup(
 
     # The choice of component versions is dictated by what's
     # provisioned in the production servers.
-    install_requires=['numpy==1.11.0', 'pysal==1.9.1', 'scikit-learn==0.17.1'],
+    install_requires=['numpy==1.11.0', 'scipy==0.17.1', 'pysal==1.9.1', 'scikit-learn==0.17.1'],
 
     requires=['pysal', 'numpy', 'sklearn'],
 

From 3480a0d252b1b7f9e79397b126b08f65837d3036 Mon Sep 17 00:00:00 2001
From: Luis Bosque <luisico@gmail.com>
Date: Thu, 16 Jun 2016 16:56:16 +0200
Subject: [PATCH 26/38] Allow passing options to pip install

---
 src/py/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/py/Makefile b/src/py/Makefile
index 403c5a1..b584645 100644
--- a/src/py/Makefile
+++ b/src/py/Makefile
@@ -13,5 +13,5 @@ release: ../../release/$(EXTENSION).control $(SOURCES_DATA)
 	cp -r ./$(PACKAGE) ../../release/python/$(EXTVERSION)/
 	$(SED) -i -r 's/version='"'"'[0-9]+\.[0-9]+\.[0-9]+'"'"'/version='"'"'$(EXTVERSION)'"'"'/g'  ../../release/python/$(EXTVERSION)/$(PACKAGE)/setup.py
 
-deploy:
-	pip install --upgrade ../../release/python/$(RELEASE_VERSION)/$(PACKAGE)
+deploy: 
+	pip install $(RUN_OPTIONS) --upgrade ../../release/python/$(RELEASE_VERSION)/$(PACKAGE)

From 1db938c450634532133190b3c28425e7313acc72 Mon Sep 17 00:00:00 2001
From: Raul Ochoa <rochoaf@gmail.com>
Date: Thu, 16 Jun 2016 19:07:42 +0200
Subject: [PATCH 27/38] Removes cartodb-extension-dep

---
 CONTRIBUTING.md                          | 1 -
 src/pg/crankshaft.control                | 2 +-
 src/pg/test/expected/01_install_test.out | 1 -
 src/pg/test/sql/01_install_test.sql      | 1 -
 src/pg/test/sql/90_permissions.sql       | 2 +-
 5 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f642d45..42385dc 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -60,7 +60,6 @@ it can be installed directly with:
 
 * `CREATE EXTENSION IF NOT EXISTS plpythonu;`
   `CREATE EXTENSION IF NOT EXISTS postgis;`
-  `CREATE EXTENSION IF NOT EXISTS cartodb;`
   `CREATE EXTENSION crankshaft WITH VERSION 'dev';`
 
 Note: the development extension uses the development python virtual
diff --git a/src/pg/crankshaft.control b/src/pg/crankshaft.control
index 2029b7e..e71321f 100644
--- a/src/pg/crankshaft.control
+++ b/src/pg/crankshaft.control
@@ -1,5 +1,5 @@
 comment = 'CartoDB Spatial Analysis extension'
 default_version = '0.0.3'
-requires = 'plpythonu, postgis, cartodb'
+requires = 'plpythonu, postgis'
 superuser = true
 schema = cdb_crankshaft
diff --git a/src/pg/test/expected/01_install_test.out b/src/pg/test/expected/01_install_test.out
index e40d267..e84a48a 100644
--- a/src/pg/test/expected/01_install_test.out
+++ b/src/pg/test/expected/01_install_test.out
@@ -1,6 +1,5 @@
 -- Install dependencies
 CREATE EXTENSION plpythonu;
 CREATE EXTENSION postgis;
-CREATE EXTENSION cartodb;
 -- Install the extension
 CREATE EXTENSION crankshaft VERSION 'dev';
diff --git a/src/pg/test/sql/01_install_test.sql b/src/pg/test/sql/01_install_test.sql
index fc3ea80..bbce805 100644
--- a/src/pg/test/sql/01_install_test.sql
+++ b/src/pg/test/sql/01_install_test.sql
@@ -1,7 +1,6 @@
 -- Install dependencies
 CREATE EXTENSION plpythonu;
 CREATE EXTENSION postgis;
-CREATE EXTENSION cartodb;
 
 -- Install the extension
 CREATE EXTENSION crankshaft VERSION 'dev';
diff --git a/src/pg/test/sql/90_permissions.sql b/src/pg/test/sql/90_permissions.sql
index 187f795..1e9ea99 100644
--- a/src/pg/test/sql/90_permissions.sql
+++ b/src/pg/test/sql/90_permissions.sql
@@ -4,7 +4,7 @@ SELECT cdb_crankshaft._cdb_random_seeds(1234);
 SET ROLE test_regular_user;
 
 -- Add to the search path the schema
-SET search_path TO public,cartodb,cdb_crankshaft;
+SET search_path TO public,cdb_crankshaft;
 
 -- Exercise public functions
 SELECT ppoints.code, m.quads

From f5fb4499db226521adb952b7524449ae15ddcc3a Mon Sep 17 00:00:00 2001
From: Luis Bosque <luisico@gmail.com>
Date: Mon, 20 Jun 2016 09:44:52 +0200
Subject: [PATCH 28/38] Set final dependencies versions

---
 src/py/crankshaft/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py
index 266b6f1..abd4dae 100644
--- a/src/py/crankshaft/setup.py
+++ b/src/py/crankshaft/setup.py
@@ -40,7 +40,7 @@ setup(
 
     # The choice of component versions is dictated by what's
     # provisioned in the production servers.
-    install_requires=['numpy==1.11.0', 'scipy==0.17.1', 'pysal==1.9.1', 'scikit-learn==0.17.1'],
+    install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'],
 
     requires=['pysal', 'numpy', 'sklearn'],
 

From 01fc2c1dd1087e58679cc2728ee88a0df5702ea6 Mon Sep 17 00:00:00 2001
From: Luis Bosque <luisico@gmail.com>
Date: Mon, 20 Jun 2016 10:04:22 +0200
Subject: [PATCH 29/38] Release 0.0.4

---
 NEWS.md                                       |   5 +
 release/crankshaft--0.0.3--0.0.4.sql          |   8 +
 release/crankshaft--0.0.4--0.0.3.sql          |   8 +
 release/crankshaft--0.0.4.sql                 | 403 ++++++++++++++++++
 release/crankshaft.control                    |   4 +-
 .../0.0.4/crankshaft/crankshaft/__init__.py   |   2 +
 .../crankshaft/clustering/__init__.py         |   2 +
 .../crankshaft/clustering/kmeans.py           |  18 +
 .../crankshaft/crankshaft/clustering/moran.py | 260 +++++++++++
 .../crankshaft/pysal_utils/__init__.py        |   1 +
 .../crankshaft/pysal_utils/pysal_utils.py     | 152 +++++++
 .../crankshaft/crankshaft/random_seeds.py     |  10 +
 release/python/0.0.4/crankshaft/setup.py      |  48 +++
 .../crankshaft/test/fixtures/kmeans.json      |   1 +
 .../0.0.4/crankshaft/test/fixtures/moran.json |  52 +++
 .../crankshaft/test/fixtures/neighbors.json   |  54 +++
 .../python/0.0.4/crankshaft/test/helper.py    |  13 +
 .../python/0.0.4/crankshaft/test/mock_plpy.py |  34 ++
 .../crankshaft/test/test_cluster_kmeans.py    |  38 ++
 .../crankshaft/test/test_clustering_moran.py  |  83 ++++
 .../0.0.4/crankshaft/test/test_pysal_utils.py | 107 +++++
 src/pg/crankshaft.control                     |   2 +-
 22 files changed, 1302 insertions(+), 3 deletions(-)
 create mode 100644 release/crankshaft--0.0.3--0.0.4.sql
 create mode 100644 release/crankshaft--0.0.4--0.0.3.sql
 create mode 100644 release/crankshaft--0.0.4.sql
 create mode 100644 release/python/0.0.4/crankshaft/crankshaft/__init__.py
 create mode 100644 release/python/0.0.4/crankshaft/crankshaft/clustering/__init__.py
 create mode 100644 release/python/0.0.4/crankshaft/crankshaft/clustering/kmeans.py
 create mode 100644 release/python/0.0.4/crankshaft/crankshaft/clustering/moran.py
 create mode 100644 release/python/0.0.4/crankshaft/crankshaft/pysal_utils/__init__.py
 create mode 100644 release/python/0.0.4/crankshaft/crankshaft/pysal_utils/pysal_utils.py
 create mode 100644 release/python/0.0.4/crankshaft/crankshaft/random_seeds.py
 create mode 100644 release/python/0.0.4/crankshaft/setup.py
 create mode 100644 release/python/0.0.4/crankshaft/test/fixtures/kmeans.json
 create mode 100644 release/python/0.0.4/crankshaft/test/fixtures/moran.json
 create mode 100644 release/python/0.0.4/crankshaft/test/fixtures/neighbors.json
 create mode 100644 release/python/0.0.4/crankshaft/test/helper.py
 create mode 100644 release/python/0.0.4/crankshaft/test/mock_plpy.py
 create mode 100644 release/python/0.0.4/crankshaft/test/test_cluster_kmeans.py
 create mode 100644 release/python/0.0.4/crankshaft/test/test_clustering_moran.py
 create mode 100644 release/python/0.0.4/crankshaft/test/test_pysal_utils.py

diff --git a/NEWS.md b/NEWS.md
index ed66fd9..c011a0d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,8 @@
+0.0.4 (2016-06-20)
+------------------
+* Remove cartodb extension dependency from tests
+* Declare all correct dependencies with correct versions in setup.py
+
 0.0.3 (2016-06-16)
 ------------------
 * Adds new functions: kmeans, weighted centroids.
diff --git a/release/crankshaft--0.0.3--0.0.4.sql b/release/crankshaft--0.0.3--0.0.4.sql
new file mode 100644
index 0000000..69038a3
--- /dev/null
+++ b/release/crankshaft--0.0.3--0.0.4.sql
@@ -0,0 +1,8 @@
+--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
+-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
+-- Version number of the extension release
+CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
+RETURNS text AS $$
+  SELECT '0.0.4'::text;
+$$ language 'sql' STABLE STRICT;
diff --git a/release/crankshaft--0.0.4--0.0.3.sql b/release/crankshaft--0.0.4--0.0.3.sql
new file mode 100644
index 0000000..bd8ed82
--- /dev/null
+++ b/release/crankshaft--0.0.4--0.0.3.sql
@@ -0,0 +1,8 @@
+--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
+-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
+-- Version number of the extension release
+CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
+RETURNS text AS $$
+  SELECT '0.0.3'::text;
+$$ language 'sql' STABLE STRICT;
diff --git a/release/crankshaft--0.0.4.sql b/release/crankshaft--0.0.4.sql
new file mode 100644
index 0000000..c855958
--- /dev/null
+++ b/release/crankshaft--0.0.4.sql
@@ -0,0 +1,403 @@
+--DO NOT MODIFY THIS FILE, IT IS GENERATED AUTOMATICALLY FROM SOURCES
+-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION crankshaft" to load this file. \quit
+-- Version number of the extension release
+CREATE OR REPLACE FUNCTION cdb_crankshaft_version()
+RETURNS text AS $$
+  SELECT '0.0.4'::text;
+$$ language 'sql' STABLE STRICT;
+
+-- Internal identifier of the installed extension instence
+-- e.g. 'dev' for current development version
+CREATE OR REPLACE FUNCTION _cdb_crankshaft_internal_version()
+RETURNS text AS $$
+  SELECT installed_version FROM pg_available_extensions where name='crankshaft' and pg_available_extensions IS NOT NULL;
+$$ language 'sql' STABLE STRICT;
+-- Internal function.
+-- Set the seeds of the RNGs (Random Number Generators)
+-- used internally.
+CREATE OR REPLACE FUNCTION
+_cdb_random_seeds (seed_value INTEGER) RETURNS VOID
+AS $$
+  from crankshaft import random_seeds
+  random_seeds.set_random_seeds(seed_value)
+$$ LANGUAGE plpythonu;
+-- Moran's I Global Measure (public-facing)
+CREATE OR REPLACE FUNCTION
+  CDB_AreasOfInterestGlobal(
+      subquery TEXT,
+      column_name TEXT,
+      w_type TEXT DEFAULT 'knn',
+      num_ngbrs INT DEFAULT 5,
+      permutations INT DEFAULT 99,
+      geom_col TEXT DEFAULT 'the_geom',
+      id_col TEXT DEFAULT 'cartodb_id')
+RETURNS TABLE (moran NUMERIC, significance NUMERIC)
+AS $$
+  from crankshaft.clustering import moran_local
+  # TODO: use named parameters or a dictionary
+  return moran(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
+$$ LANGUAGE plpythonu;
+
+-- Moran's I Local (internal function)
+CREATE OR REPLACE FUNCTION
+  _CDB_AreasOfInterestLocal(
+      subquery TEXT,
+      column_name TEXT,
+      w_type TEXT,
+      num_ngbrs INT,
+      permutations INT,
+      geom_col TEXT,
+      id_col TEXT)
+RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+  from crankshaft.clustering import moran_local
+  # TODO: use named parameters or a dictionary
+  return moran_local(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
+$$ LANGUAGE plpythonu;
+
+-- Moran's I Local (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_AreasOfInterestLocal(
+    subquery TEXT,
+    column_name TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col);
+
+$$ LANGUAGE SQL;
+
+-- Moran's I only for HH and HL (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_GetSpatialHotspots(
+    subquery TEXT,
+    column_name TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+    RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, column_name, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('HH', 'HL');
+
+$$ LANGUAGE SQL;
+
+-- Moran's I only for LL and LH (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_GetSpatialColdspots(
+    subquery TEXT,
+    attr TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+    RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('LL', 'LH');
+
+$$ LANGUAGE SQL;
+
+-- Moran's I only for LH and HL (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_GetSpatialOutliers(
+    subquery TEXT,
+    attr TEXT,
+    w_type TEXT DEFAULT 'knn',
+    num_ngbrs INT DEFAULT 5,
+    permutations INT DEFAULT 99,
+    geom_col TEXT DEFAULT 'the_geom',
+    id_col TEXT DEFAULT 'cartodb_id')
+    RETURNS TABLE (moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocal(subquery, attr, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('HL', 'LH');
+
+$$ LANGUAGE SQL;
+
+-- Moran's I Global Rate (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_AreasOfInterestGlobalRate(
+      subquery TEXT,
+      numerator TEXT,
+      denominator TEXT,
+      w_type TEXT DEFAULT 'knn',
+      num_ngbrs INT DEFAULT 5,
+      permutations INT DEFAULT 99,
+      geom_col TEXT DEFAULT 'the_geom',
+      id_col TEXT DEFAULT 'cartodb_id')
+RETURNS TABLE (moran FLOAT, significance FLOAT)
+AS $$
+  from crankshaft.clustering import moran_local
+  # TODO: use named parameters or a dictionary
+  return moran_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
+$$ LANGUAGE plpythonu;
+
+
+-- Moran's I Local Rate (internal function)
+CREATE OR REPLACE FUNCTION
+  _CDB_AreasOfInterestLocalRate(
+      subquery TEXT,
+      numerator TEXT,
+      denominator TEXT,
+      w_type TEXT,
+      num_ngbrs INT,
+      permutations INT,
+      geom_col TEXT,
+      id_col TEXT)
+RETURNS
+TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+  from crankshaft.clustering import moran_local_rate
+  # TODO: use named parameters or a dictionary
+  return moran_local_rate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
+$$ LANGUAGE plpythonu;
+
+-- Moran's I Local Rate (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_AreasOfInterestLocalRate(
+      subquery TEXT,
+      numerator TEXT,
+      denominator TEXT,
+      w_type TEXT DEFAULT 'knn',
+      num_ngbrs INT DEFAULT 5,
+      permutations INT DEFAULT 99,
+      geom_col TEXT DEFAULT 'the_geom',
+      id_col TEXT DEFAULT 'cartodb_id')
+RETURNS
+TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col);
+
+$$ LANGUAGE SQL;
+
+-- Moran's I Local Rate only for HH and HL (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_GetSpatialHotspotsRate(
+      subquery TEXT,
+      numerator TEXT,
+      denominator TEXT,
+      w_type TEXT DEFAULT 'knn',
+      num_ngbrs INT DEFAULT 5,
+      permutations INT DEFAULT 99,
+      geom_col TEXT DEFAULT 'the_geom',
+      id_col TEXT DEFAULT 'cartodb_id')
+RETURNS
+TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('HH', 'HL');
+
+$$ LANGUAGE SQL;
+
+-- Moran's I Local Rate only for LL and LH (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_GetSpatialColdspotsRate(
+      subquery TEXT,
+      numerator TEXT,
+      denominator TEXT,
+      w_type TEXT DEFAULT 'knn',
+      num_ngbrs INT DEFAULT 5,
+      permutations INT DEFAULT 99,
+      geom_col TEXT DEFAULT 'the_geom',
+      id_col TEXT DEFAULT 'cartodb_id')
+RETURNS
+TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('LL', 'LH');
+
+$$ LANGUAGE SQL;
+
+-- Moran's I Local Rate only for LH and HL (public-facing function)
+CREATE OR REPLACE FUNCTION
+  CDB_GetSpatialOutliersRate(
+      subquery TEXT,
+      numerator TEXT,
+      denominator TEXT,
+      w_type TEXT DEFAULT 'knn',
+      num_ngbrs INT DEFAULT 5,
+      permutations INT DEFAULT 99,
+      geom_col TEXT DEFAULT 'the_geom',
+      id_col TEXT DEFAULT 'cartodb_id')
+RETURNS
+TABLE(moran NUMERIC, quads TEXT, significance NUMERIC, rowid INT, vals NUMERIC)
+AS $$
+
+  SELECT moran, quads, significance, rowid, vals
+  FROM cdb_crankshaft._CDB_AreasOfInterestLocalRate(subquery, numerator, denominator, w_type, num_ngbrs, permutations, geom_col, id_col)
+  WHERE quads IN ('HL', 'LH');
+
+$$ LANGUAGE SQL;
+CREATE OR REPLACE FUNCTION  CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
+RETURNS table (cartodb_id integer, cluster_no integer) as $$
+    
+    from crankshaft.clustering import kmeans
+    return kmeans(query,no_clusters,no_init)
+
+$$ language plpythonu;
+
+
+CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
+RETURNS Numeric[] AS 
+$$
+DECLARE 
+    newX NUMERIC;
+    newY NUMERIC;
+    newW NUMERIC;
+BEGIN
+    IF weight IS NULL OR the_geom IS NULL THEN 
+        newX = state[1];
+        newY = state[2];
+        newW = state[3];
+    ELSE
+        newX = state[1] + ST_X(the_geom)*weight;
+        newY = state[2] + ST_Y(the_geom)*weight;
+        newW = state[3] + weight;
+    END IF;
+    RETURN Array[newX,newY,newW];
+
+END
+$$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
+RETURNS GEOMETRY AS 
+$$
+BEGIN
+    IF state[3] = 0 THEN 
+        RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
+    ELSE 
+        RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
+    END IF;
+END
+$$ LANGUAGE plpgsql;
+
+CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)(
+    SFUNC = CDB_WeightedMeanS,
+    FINALFUNC = CDB_WeightedMeanF,
+    STYPE = Numeric[],
+    INITCOND = "{0.0,0.0,0.0}" 
+);
+-- Function by Stuart Lynn for a simple interpolation of a value
+-- from a polygon table over an arbitrary polygon
+-- (weighted by the area proportion overlapped)
+-- Aereal weighting is a very simple form of aereal interpolation.
+--
+-- Parameters:
+--   * geom a Polygon geometry which defines the area where a value will be
+--     estimated as the area-weighted sum of a given table/column
+--   * target_table_name table name of the table that provides the values
+--   * target_column column name of the column that provides the values
+--   * schema_name optional parameter to defina the schema the target table
+--     belongs to, which is necessary if its not in the search_path.
+--     Note that target_table_name should never include the schema in it.
+-- Return value:
+--   Aereal-weighted interpolation of the column values over the geometry
+CREATE OR REPLACE
+FUNCTION cdb_overlap_sum(geom geometry, target_table_name text, target_column text, schema_name text DEFAULT NULL)
+  RETURNS numeric AS
+$$
+DECLARE
+	result numeric;
+  qualified_name text;
+BEGIN
+  IF schema_name IS NULL THEN
+    qualified_name := Format('%I', target_table_name);
+  ELSE
+    qualified_name := Format('%I.%s', schema_name, target_table_name);
+  END IF;
+  EXECUTE Format('
+    SELECT sum(%I*ST_Area(St_Intersection($1, a.the_geom))/ST_Area(a.the_geom))
+    FROM %s AS a
+    WHERE $1 && a.the_geom
+  ', target_column, qualified_name)
+  USING geom
+  INTO result;
+  RETURN result;
+END;
+$$ LANGUAGE plpgsql;
+--
+-- Creates N points randomly distributed arround the polygon
+--
+-- @param g - the geometry to be turned in to points
+--
+-- @param no_points - the number of points to generate
+--
+-- @params max_iter_per_point - the function generates points in the polygon's bounding box
+-- and discards points which don't lie in the polygon. max_iter_per_point specifies how many
+-- misses per point the funciton accepts before giving up.
+--
+-- Returns: Multipoint with the requested points
+CREATE OR REPLACE FUNCTION cdb_dot_density(geom geometry , no_points Integer, max_iter_per_point Integer DEFAULT 1000)
+RETURNS GEOMETRY AS $$
+DECLARE
+  extent GEOMETRY;
+  test_point Geometry;
+  width                NUMERIC;
+  height               NUMERIC;
+  x0                   NUMERIC;
+  y0                   NUMERIC;
+  xp                   NUMERIC;
+  yp                   NUMERIC;
+  no_left              INTEGER;
+  remaining_iterations INTEGER;
+  points               GEOMETRY[];
+  bbox_line            GEOMETRY;
+  intersection_line    GEOMETRY;
+BEGIN
+  extent  := ST_Envelope(geom);
+  width   := ST_XMax(extent) - ST_XMIN(extent);
+  height  := ST_YMax(extent) - ST_YMIN(extent);
+  x0 	  := ST_XMin(extent);
+  y0 	  := ST_YMin(extent);
+  no_left := no_points;
+
+  LOOP
+    if(no_left=0) THEN
+      EXIT;
+    END IF;
+    yp = y0 + height*random();
+    bbox_line  = ST_MakeLine(
+      ST_SetSRID(ST_MakePoint(yp, x0),4326),
+      ST_SetSRID(ST_MakePoint(yp, x0+width),4326)
+    );
+    intersection_line = ST_Intersection(bbox_line,geom);
+  	test_point = ST_LineInterpolatePoint(st_makeline(st_linemerge(intersection_line)),random());
+	  points := points || test_point;
+	  no_left = no_left - 1 ;
+  END LOOP;
+  RETURN ST_Collect(points);
+END;
+$$
+LANGUAGE plpgsql VOLATILE;
+-- Make sure by default there are no permissions for publicuser
+-- NOTE: this happens at extension creation time, as part of an implicit transaction.
+-- REVOKE ALL PRIVILEGES ON SCHEMA cdb_crankshaft FROM PUBLIC, publicuser CASCADE;
+
+-- Grant permissions on the schema to publicuser (but just the schema)
+GRANT USAGE ON SCHEMA cdb_crankshaft TO publicuser;
+
+-- Revoke execute permissions on all functions in the schema by default
+-- REVOKE EXECUTE ON ALL FUNCTIONS IN SCHEMA cdb_crankshaft FROM PUBLIC, publicuser;
diff --git a/release/crankshaft.control b/release/crankshaft.control
index 2029b7e..01088b1 100644
--- a/release/crankshaft.control
+++ b/release/crankshaft.control
@@ -1,5 +1,5 @@
 comment = 'CartoDB Spatial Analysis extension'
-default_version = '0.0.3'
-requires = 'plpythonu, postgis, cartodb'
+default_version = '0.0.4'
+requires = 'plpythonu, postgis'
 superuser = true
 schema = cdb_crankshaft
diff --git a/release/python/0.0.4/crankshaft/crankshaft/__init__.py b/release/python/0.0.4/crankshaft/crankshaft/__init__.py
new file mode 100644
index 0000000..d07e330
--- /dev/null
+++ b/release/python/0.0.4/crankshaft/crankshaft/__init__.py
@@ -0,0 +1,2 @@
+import random_seeds
+import clustering
diff --git a/release/python/0.0.4/crankshaft/crankshaft/clustering/__init__.py b/release/python/0.0.4/crankshaft/crankshaft/clustering/__init__.py
new file mode 100644
index 0000000..338e8ea
--- /dev/null
+++ b/release/python/0.0.4/crankshaft/crankshaft/clustering/__init__.py
@@ -0,0 +1,2 @@
+from moran import *
+from kmeans import *
diff --git a/release/python/0.0.4/crankshaft/crankshaft/clustering/kmeans.py b/release/python/0.0.4/crankshaft/crankshaft/clustering/kmeans.py
new file mode 100644
index 0000000..4134062
--- /dev/null
+++ b/release/python/0.0.4/crankshaft/crankshaft/clustering/kmeans.py
@@ -0,0 +1,18 @@
+from sklearn.cluster import KMeans
+import plpy
+
+def kmeans(query, no_clusters, no_init=20):
+    data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids,
+        array_agg(ST_X(the_geom) order by cartodb_id) xs,
+        array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a
+        where the_geom is not null
+    '''.format(query=query))
+
+    xs  = data[0]['xs']
+    ys  = data[0]['ys']
+    ids = data[0]['ids']
+
+    km = KMeans(n_clusters= no_clusters, n_init=no_init)
+    labels = km.fit_predict(zip(xs,ys))
+    return zip(ids,labels)
+
diff --git a/release/python/0.0.4/crankshaft/crankshaft/clustering/moran.py b/release/python/0.0.4/crankshaft/crankshaft/clustering/moran.py
new file mode 100644
index 0000000..39b3ff6
--- /dev/null
+++ b/release/python/0.0.4/crankshaft/crankshaft/clustering/moran.py
@@ -0,0 +1,260 @@
+"""
+Moran's I geostatistics (global clustering & outliers presence)
+"""
+
+# TODO: Fill in local neighbors which have null/NoneType values with the
+#       average of the their neighborhood
+
+import pysal as ps
+import plpy
+
+# crankshaft module
+import crankshaft.pysal_utils as pu
+
+# High level interface ---------------------------------------
+
+def moran(subquery, attr_name,
+          w_type, num_ngbrs, permutations, geom_col, id_col):
+    """
+    Moran's I (global)
+    Implementation building neighbors with a PostGIS database and Moran's I
+     core clusters with PySAL.
+    Andy Eschbacher
+    """
+    qvals = {"id_col": id_col,
+             "attr1": attr_name,
+             "geom_col": geom_col,
+             "subquery": subquery,
+             "num_ngbrs": num_ngbrs}
+
+    query = pu.construct_neighbor_query(w_type, qvals)
+
+    plpy.notice('** Query: %s' % query)
+
+    try:
+        result = plpy.execute(query)
+        # if there are no neighbors, exit
+        if len(result) == 0:
+            return pu.empty_zipped_array(2)
+        plpy.notice('** Query returned with %d rows' % len(result))
+    except plpy.SPIError:
+        plpy.error('Error: areas of interest query failed, check input parameters')
+        plpy.notice('** Query failed: "%s"' % query)
+        plpy.notice('** Error: %s' % plpy.SPIError)
+        return pu.empty_zipped_array(2)
+
+    ## collect attributes
+    attr_vals = pu.get_attributes(result)
+
+    ## calculate weights
+    weight = pu.get_weight(result, w_type, num_ngbrs)
+
+    ## calculate moran global
+    moran_global = ps.esda.moran.Moran(attr_vals, weight,
+                                       permutations=permutations)
+
+    return zip([moran_global.I], [moran_global.EI])
+
+def moran_local(subquery, attr,
+                w_type, num_ngbrs, permutations, geom_col, id_col):
+    """
+    Moran's I implementation for PL/Python
+    Andy Eschbacher
+    """
+
+    # geometries with attributes that are null are ignored
+    # resulting in a collection of not as near neighbors
+
+    qvals = {"id_col": id_col,
+             "attr1": attr,
+             "geom_col": geom_col,
+             "subquery": subquery,
+             "num_ngbrs": num_ngbrs}
+
+    query = pu.construct_neighbor_query(w_type, qvals)
+
+    try:
+        result = plpy.execute(query)
+        # if there are no neighbors, exit
+        if len(result) == 0:
+            return pu.empty_zipped_array(5)
+    except plpy.SPIError:
+        plpy.error('Error: areas of interest query failed, check input parameters')
+        plpy.notice('** Query failed: "%s"' % query)
+        return pu.empty_zipped_array(5)
+
+    attr_vals = pu.get_attributes(result)
+    weight = pu.get_weight(result, w_type, num_ngbrs)
+
+    # calculate LISA values
+    lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
+                                     permutations=permutations)
+
+    # find quadrants for each geometry
+    quads = quad_position(lisa.q)
+
+    return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
+
+def moran_rate(subquery, numerator, denominator,
+               w_type, num_ngbrs, permutations, geom_col, id_col):
+    """
+    Moran's I Rate (global)
+    Andy Eschbacher
+    """
+    qvals = {"id_col": id_col,
+             "attr1": numerator,
+             "attr2": denominator,
+             "geom_col": geom_col,
+             "subquery": subquery,
+             "num_ngbrs": num_ngbrs}
+
+    query = pu.construct_neighbor_query(w_type, qvals)
+
+    plpy.notice('** Query: %s' % query)
+
+    try:
+        result = plpy.execute(query)
+        # if there are no neighbors, exit
+        if len(result) == 0:
+            return pu.empty_zipped_array(2)
+        plpy.notice('** Query returned with %d rows' % len(result))
+    except plpy.SPIError:
+        plpy.error('Error: areas of interest query failed, check input parameters')
+        plpy.notice('** Query failed: "%s"' % query)
+        plpy.notice('** Error: %s' % plpy.SPIError)
+        return pu.empty_zipped_array(2)
+
+    ## collect attributes
+    numer = pu.get_attributes(result, 1)
+    denom = pu.get_attributes(result, 2)
+
+    weight = pu.get_weight(result, w_type, num_ngbrs)
+
+    ## calculate moran global rate
+    lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
+                                         permutations=permutations)
+
+    return zip([lisa_rate.I], [lisa_rate.EI])
+
+def moran_local_rate(subquery, numerator, denominator,
+                     w_type, num_ngbrs, permutations, geom_col, id_col):
+    """
+        Moran's I Local Rate
+        Andy Eschbacher
+    """
+    # geometries with values that are null are ignored
+    # resulting in a collection of not as near neighbors
+
+    query = pu.construct_neighbor_query(w_type,
+                                     {"id_col": id_col,
+                                      "numerator": numerator,
+                                      "denominator": denominator,
+                                      "geom_col": geom_col,
+                                      "subquery": subquery,
+                                      "num_ngbrs": num_ngbrs})
+
+    try:
+        result = plpy.execute(query)
+        # if there are no neighbors, exit
+        if len(result) == 0:
+            return pu.empty_zipped_array(5)
+    except plpy.SPIError:
+        plpy.error('Error: areas of interest query failed, check input parameters')
+        plpy.notice('** Query failed: "%s"' % query)
+        plpy.notice('** Error: %s' % plpy.SPIError)
+        return pu.empty_zipped_array(5)
+
+    ## collect attributes
+    numer = pu.get_attributes(result, 1)
+    denom = pu.get_attributes(result, 2)
+
+    weight = pu.get_weight(result, w_type, num_ngbrs)
+
+    # calculate LISA values
+    lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
+                                          permutations=permutations)
+
+    # find units of significance
+    quads = quad_position(lisa.q)
+
+    return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
+
+def moran_local_bv(subquery, attr1, attr2,
+                   permutations, geom_col, id_col, w_type, num_ngbrs):
+    """
+        Moran's I (local) Bivariate (untested)
+    """
+    plpy.notice('** Constructing query')
+
+    qvals = {"num_ngbrs": num_ngbrs,
+             "attr1": attr1,
+             "attr2": attr2,
+             "subquery": subquery,
+             "geom_col": geom_col,
+             "id_col": id_col}
+
+    query = pu.construct_neighbor_query(w_type, qvals)
+
+    try:
+        result = plpy.execute(query)
+        # if there are no neighbors, exit
+        if len(result) == 0:
+            return pu.empty_zipped_array(4)
+    except plpy.SPIError:
+        plpy.error("Error: areas of interest query failed, " \
+                   "check input parameters")
+        plpy.notice('** Query failed: "%s"' % query)
+        return pu.empty_zipped_array(4)
+
+    ## collect attributes
+    attr1_vals = pu.get_attributes(result, 1)
+    attr2_vals = pu.get_attributes(result, 2)
+
+    # create weights
+    weight = pu.get_weight(result, w_type, num_ngbrs)
+
+    # calculate LISA values
+    lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
+                                        permutations=permutations)
+
+    plpy.notice("len of Is: %d" % len(lisa.Is))
+
+    # find clustering of significance
+    lisa_sig = quad_position(lisa.q)
+
+    plpy.notice('** Finished calculations')
+
+    return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
+
+# Low level functions ----------------------------------------
+
+def map_quads(coord):
+    """
+        Map a quadrant number to Moran's I designation
+        HH=1, LH=2, LL=3, HL=4
+        Input:
+        @param coord (int): quadrant of a specific measurement
+        Output:
+            classification (one of 'HH', 'LH', 'LL', or 'HL')
+    """
+    if coord == 1:
+        return 'HH'
+    elif coord == 2:
+        return 'LH'
+    elif coord == 3:
+        return 'LL'
+    elif coord == 4:
+        return 'HL'
+    else:
+        return None
+
+def quad_position(quads):
+    """
+        Produce Moran's I classification based of n
+        Input:
+        @param quads ndarray: an array of quads classified by
+          1-4 (PySAL default)
+        Output:
+        @param list: an array of quads classied by 'HH', 'LL', etc.
+    """
+    return [map_quads(q) for q in quads]
diff --git a/release/python/0.0.4/crankshaft/crankshaft/pysal_utils/__init__.py b/release/python/0.0.4/crankshaft/crankshaft/pysal_utils/__init__.py
new file mode 100644
index 0000000..835880d
--- /dev/null
+++ b/release/python/0.0.4/crankshaft/crankshaft/pysal_utils/__init__.py
@@ -0,0 +1 @@
+from pysal_utils import *
diff --git a/release/python/0.0.4/crankshaft/crankshaft/pysal_utils/pysal_utils.py b/release/python/0.0.4/crankshaft/crankshaft/pysal_utils/pysal_utils.py
new file mode 100644
index 0000000..02b5e35
--- /dev/null
+++ b/release/python/0.0.4/crankshaft/crankshaft/pysal_utils/pysal_utils.py
@@ -0,0 +1,152 @@
+"""
+    Utilities module for generic PySAL functionality, mainly centered on translating queries into numpy arrays or PySAL weights objects
+"""
+
+import numpy as np
+import pysal as ps
+
+def construct_neighbor_query(w_type, query_vals):
+    """Return query (a string) used for finding neighbors
+        @param w_type text: type of neighbors to calculate ('knn' or 'queen')
+        @param query_vals dict: values used to construct the query
+    """
+
+    if w_type.lower() == 'knn':
+        return knn(query_vals)
+    else:
+        return queen(query_vals)
+
+## Build weight object
+def get_weight(query_res, w_type='knn', num_ngbrs=5):
+    """
+        Construct PySAL weight from return value of query
+        @param query_res: query results with attributes and neighbors
+    """
+    if w_type.lower() == 'knn':
+        row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs
+        weights = {x['id']: row_normed_weights for x in query_res}
+    else:
+        weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors'])
+                            if len(x['neighbors']) > 0
+                            else [] for x in query_res}
+
+    neighbors = {x['id']: x['neighbors'] for x in query_res}
+
+    return ps.W(neighbors, weights)
+
+def query_attr_select(params):
+    """
+        Create portion of SELECT statement for attributes inolved in query.
+        @param params: dict of information used in query (column names,
+                       table name, etc.)
+    """
+
+    attrs = [k for k in params
+             if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')]
+
+    template = "i.\"{%(col)s}\"::numeric As attr%(alias_num)s, "
+
+    attr_string = ""
+
+    for idx, val in enumerate(sorted(attrs)):
+        attr_string += template % {"col": val, "alias_num": idx + 1}
+
+    return attr_string
+
+def query_attr_where(params):
+    """
+        Create portion of WHERE clauses for weeding out NULL-valued geometries
+    """
+    attrs = sorted([k for k in params
+                    if k not in ('id_col', 'geom_col', 'subquery', 'num_ngbrs')])
+
+    attr_string = []
+
+    for attr in attrs:
+        attr_string.append("idx_replace.\"{%s}\" IS NOT NULL" % attr)
+
+    if len(attrs) == 2:
+        attr_string.append("idx_replace.\"{%s}\" <> 0" % attrs[1])
+
+    out = " AND ".join(attr_string)
+
+    return out
+
+def knn(params):
+    """SQL query for k-nearest neighbors.
+        @param vars: dict of values to fill template
+    """
+
+    attr_select = query_attr_select(params)
+    attr_where = query_attr_where(params)
+
+    replacements = {"attr_select": attr_select,
+                    "attr_where_i": attr_where.replace("idx_replace", "i"),
+                    "attr_where_j": attr_where.replace("idx_replace", "j")}
+
+    query = "SELECT " \
+                "i.\"{id_col}\" As id, " \
+                "%(attr_select)s" \
+                "(SELECT ARRAY(SELECT j.\"{id_col}\" " \
+                              "FROM ({subquery}) As j " \
+                              "WHERE " \
+                                "i.\"{id_col}\" <> j.\"{id_col}\" AND " \
+                                "%(attr_where_j)s " \
+                              "ORDER BY " \
+                                "j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \
+                              "LIMIT {num_ngbrs})" \
+                ") As neighbors " \
+            "FROM ({subquery}) As i " \
+            "WHERE " \
+                "%(attr_where_i)s " \
+            "ORDER BY i.\"{id_col}\" ASC;" % replacements
+
+    return query.format(**params)
+
+## SQL query for finding queens neighbors (all contiguous polygons)
+def queen(params):
+    """SQL query for queen neighbors.
+        @param params dict: information to fill query
+    """
+    attr_select = query_attr_select(params)
+    attr_where = query_attr_where(params)
+
+    replacements = {"attr_select": attr_select,
+                    "attr_where_i": attr_where.replace("idx_replace", "i"),
+                    "attr_where_j": attr_where.replace("idx_replace", "j")}
+
+    query = "SELECT " \
+                "i.\"{id_col}\" As id, " \
+                "%(attr_select)s" \
+                "(SELECT ARRAY(SELECT j.\"{id_col}\" " \
+                 "FROM ({subquery}) As j " \
+                 "WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \
+                       "ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \
+                       "%(attr_where_j)s)" \
+                ") As neighbors " \
+            "FROM ({subquery}) As i " \
+            "WHERE " \
+                "%(attr_where_i)s " \
+            "ORDER BY i.\"{id_col}\" ASC;" % replacements
+
+    return query.format(**params)
+
+## to add more weight methods open a ticket or pull request
+
+def get_attributes(query_res, attr_num=1):
+    """
+        @param query_res: query results with attributes and neighbors
+        @param attr_num: attribute number (1, 2, ...)
+    """
+    return np.array([x['attr' + str(attr_num)] for x in query_res], dtype=np.float)
+
+def empty_zipped_array(num_nones):
+    """
+        prepare return values for cases of empty weights objects (no neighbors)
+        Input:
+        @param num_nones int: number of columns (e.g., 4)
+        Output:
+        [(None, None, None, None)]
+    """
+
+    return [tuple([None] * num_nones)]
diff --git a/release/python/0.0.4/crankshaft/crankshaft/random_seeds.py b/release/python/0.0.4/crankshaft/crankshaft/random_seeds.py
new file mode 100644
index 0000000..b7c8eed
--- /dev/null
+++ b/release/python/0.0.4/crankshaft/crankshaft/random_seeds.py
@@ -0,0 +1,10 @@
+import random
+import numpy
+
+def set_random_seeds(value):
+    """
+    Set the seeds of the RNGs (Random Number Generators)
+    used internally.
+    """
+    random.seed(value)
+    numpy.random.seed(value)
diff --git a/release/python/0.0.4/crankshaft/setup.py b/release/python/0.0.4/crankshaft/setup.py
new file mode 100644
index 0000000..32d1ead
--- /dev/null
+++ b/release/python/0.0.4/crankshaft/setup.py
@@ -0,0 +1,48 @@
+
+"""
+CartoDB Spatial Analysis Python Library
+See:
+https://github.com/CartoDB/crankshaft
+"""
+
+from setuptools import setup, find_packages
+
+setup(
+    name='crankshaft',
+
+    version='0.0.4',
+
+    description='CartoDB Spatial Analysis Python Library',
+
+    url='https://github.com/CartoDB/crankshaft',
+
+    author='Data Services Team - CartoDB',
+    author_email='dataservices@cartodb.com',
+
+    license='MIT',
+
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'Intended Audience :: Mapping comunity',
+        'Topic :: Maps :: Mapping Tools',
+        'License :: OSI Approved :: MIT License',
+        'Programming Language :: Python :: 2.7',
+    ],
+
+    keywords='maps mapping tools spatial analysis geostatistics',
+
+    packages=find_packages(exclude=['contrib', 'docs', 'tests']),
+
+    extras_require={
+        'dev': ['unittest'],
+        'test': ['unittest', 'nose', 'mock'],
+    },
+
+    # The choice of component versions is dictated by what's
+    # provisioned in the production servers.
+    install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'],
+
+    requires=['pysal', 'numpy', 'sklearn'],
+
+    test_suite='test'
+)
diff --git a/release/python/0.0.4/crankshaft/test/fixtures/kmeans.json b/release/python/0.0.4/crankshaft/test/fixtures/kmeans.json
new file mode 100644
index 0000000..8f31c79
--- /dev/null
+++ b/release/python/0.0.4/crankshaft/test/fixtures/kmeans.json
@@ -0,0 +1 @@
+[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]
\ No newline at end of file
diff --git a/release/python/0.0.4/crankshaft/test/fixtures/moran.json b/release/python/0.0.4/crankshaft/test/fixtures/moran.json
new file mode 100644
index 0000000..2f75cf1
--- /dev/null
+++ b/release/python/0.0.4/crankshaft/test/fixtures/moran.json
@@ -0,0 +1,52 @@
+[[0.9319096128346788, "HH"],
+[-1.135787401862846, "HL"],
+[0.11732030672508517, "LL"],
+[0.6152779669180425, "LL"],
+[-0.14657336660125297, "LH"],
+[0.6967858120189607, "LL"],
+[0.07949310115714454, "HH"],
+[0.4703198759258987, "HH"],
+[0.4421125200498064, "HH"],
+[0.5724288737143592, "LL"],
+[0.8970743435692062, "LL"],
+[0.18327334401918674, "LL"],
+[-0.01466729201304962, "HL"],
+[0.3481559372544409, "LL"],
+[0.06547094736902978, "LL"],
+[0.15482141569329988, "HH"],
+[0.4373841193538136, "HH"],
+[0.15971286468915544, "LL"],
+[1.0543588860308968, "HH"],
+[1.7372866900020818, "HH"],
+[1.091998586053999, "LL"],
+[0.1171572584252222, "HH"],
+[0.08438455015300014, "LL"],
+[0.06547094736902978, "LL"],
+[0.15482141569329985, "HH"],
+[1.1627044812890683, "HH"],
+[0.06547094736902978, "LL"],
+[0.795275137550483, "HH"],
+[0.18562939195219, "LL"],
+[0.3010757406693439, "LL"],
+[2.8205795942839376, "HH"],
+[0.11259190602909264, "LL"],
+[-0.07116352791516614, "HL"],
+[-0.09945240794119009, "LH"],
+[0.18562939195219, "LL"],
+[0.1832733440191868, "LL"],
+[-0.39054253768447705, "HL"],
+[-0.1672071289487642, "HL"],
+[0.3337669247916343, "HH"],
+[0.2584386102554792, "HH"],
+[-0.19733845476322634, "HL"],
+[-0.9379282899805409, "LH"],
+[-0.028770969951095866, "LH"],
+[0.051367269430983485, "LL"],
+[-0.2172548045913472, "LH"],
+[0.05136726943098351, "LL"],
+[0.04191046803899837, "LL"],
+[0.7482357030403517, "HH"],
+[-0.014585767863118111, "LH"],
+[0.5410013139159929, "HH"],
+[1.0223932668429925, "LL"],
+[1.4179402898927476, "LL"]]
\ No newline at end of file
diff --git a/release/python/0.0.4/crankshaft/test/fixtures/neighbors.json b/release/python/0.0.4/crankshaft/test/fixtures/neighbors.json
new file mode 100644
index 0000000..055b359
--- /dev/null
+++ b/release/python/0.0.4/crankshaft/test/fixtures/neighbors.json
@@ -0,0 +1,54 @@
+[
+    {"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5},
+    {"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7},
+    {"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2},
+    {"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1},
+    {"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3},
+    {"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05},
+    {"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4},
+    {"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7},
+    {"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5},
+    {"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04},
+    {"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08},
+    {"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2},
+    {"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4},
+    {"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2},
+    {"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3},
+    {"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4},
+    {"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6},
+    {"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3},
+    {"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7},
+    {"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8},
+    {"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1},
+    {"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4},
+    {"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1},
+    {"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3},
+    {"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4},
+    {"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6},
+    {"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3},
+    {"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8},
+    {"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3},
+    {"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1},
+    {"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9},
+    {"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3},
+    {"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4},
+    {"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3},
+    {"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3},
+    {"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2},
+    {"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5},
+    {"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4},
+    {"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6},
+    {"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5},
+    {"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4},
+    {"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2},
+    {"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3},
+    {"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2},
+    {"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3},
+    {"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2},
+    {"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3},
+    {"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5},
+    {"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2},
+    {"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6},
+    {"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01},
+    {"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01}
+  ]
diff --git a/release/python/0.0.4/crankshaft/test/helper.py b/release/python/0.0.4/crankshaft/test/helper.py
new file mode 100644
index 0000000..7d28b94
--- /dev/null
+++ b/release/python/0.0.4/crankshaft/test/helper.py
@@ -0,0 +1,13 @@
+import unittest
+
+from mock_plpy import MockPlPy
+plpy = MockPlPy()
+
+import sys
+sys.modules['plpy'] = plpy
+
+import os
+
+def fixture_file(name):
+    dir = os.path.dirname(os.path.realpath(__file__))
+    return os.path.join(dir, 'fixtures', name)
diff --git a/release/python/0.0.4/crankshaft/test/mock_plpy.py b/release/python/0.0.4/crankshaft/test/mock_plpy.py
new file mode 100644
index 0000000..63c88f6
--- /dev/null
+++ b/release/python/0.0.4/crankshaft/test/mock_plpy.py
@@ -0,0 +1,34 @@
+import re
+
+class MockPlPy:
+    def __init__(self):
+        self._reset()
+
+    def _reset(self):
+        self.infos = []
+        self.notices = []
+        self.debugs = []
+        self.logs = []
+        self.warnings = []
+        self.errors = []
+        self.fatals = []
+        self.executes = []
+        self.results = []
+        self.prepares = []
+        self.results = []
+
+    def _define_result(self, query, result):
+        pattern = re.compile(query, re.IGNORECASE | re.MULTILINE)
+        self.results.append([pattern, result])
+
+    def notice(self, msg):
+        self.notices.append(msg)
+
+    def info(self, msg):
+        self.infos.append(msg)
+
+    def execute(self, query): # TODO: additional arguments
+       for result in self.results:
+          if result[0].match(query):
+            return result[1]
+       return []
diff --git a/release/python/0.0.4/crankshaft/test/test_cluster_kmeans.py b/release/python/0.0.4/crankshaft/test/test_cluster_kmeans.py
new file mode 100644
index 0000000..aba8e07
--- /dev/null
+++ b/release/python/0.0.4/crankshaft/test/test_cluster_kmeans.py
@@ -0,0 +1,38 @@
+import unittest
+import numpy as np
+
+
+# from mock_plpy import MockPlPy
+# plpy = MockPlPy()
+#
+# import sys
+# sys.modules['plpy'] = plpy
+from helper import plpy, fixture_file
+import numpy as np
+import crankshaft.clustering as cc
+import crankshaft.pysal_utils as pu
+from crankshaft import random_seeds
+import json
+
+class KMeansTest(unittest.TestCase):
+    """Testing class for Moran's I functions"""
+
+    def setUp(self):
+        plpy._reset()
+        self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read())
+        self.params = {"subquery": "select * from table",
+                       "no_clusters": "10"
+                       }
+
+    def test_kmeans(self):
+        data = self.cluster_data
+        plpy._define_result('select' ,data)
+        clusters = cc.kmeans('subquery', 2)
+        labels  = [a[1] for a in clusters]
+        c1 = [a for a in clusters if a[1]==0]
+        c2 = [a for a in clusters if a[1]==1]
+
+        self.assertEqual(len(np.unique(labels)),2)
+        self.assertEqual(len(c1),20)
+        self.assertEqual(len(c2),20)
+
diff --git a/release/python/0.0.4/crankshaft/test/test_clustering_moran.py b/release/python/0.0.4/crankshaft/test/test_clustering_moran.py
new file mode 100644
index 0000000..393e93b
--- /dev/null
+++ b/release/python/0.0.4/crankshaft/test/test_clustering_moran.py
@@ -0,0 +1,83 @@
+import unittest
+import numpy as np
+
+
+# from mock_plpy import MockPlPy
+# plpy = MockPlPy()
+#
+# import sys
+# sys.modules['plpy'] = plpy
+from helper import plpy, fixture_file
+
+import crankshaft.clustering as cc
+import crankshaft.pysal_utils as pu
+from crankshaft import random_seeds
+import json
+
+class MoranTest(unittest.TestCase):
+    """Testing class for Moran's I functions"""
+
+    def setUp(self):
+        plpy._reset()
+        self.params = {"id_col": "cartodb_id",
+                       "attr1": "andy",
+                       "attr2": "jay_z",
+                       "subquery": "SELECT * FROM a_list",
+                       "geom_col": "the_geom",
+                       "num_ngbrs": 321}
+        self.neighbors_data = json.loads(open(fixture_file('neighbors.json')).read())
+        self.moran_data = json.loads(open(fixture_file('moran.json')).read())
+
+    def test_map_quads(self):
+        """Test map_quads"""
+        self.assertEqual(cc.map_quads(1), 'HH')
+        self.assertEqual(cc.map_quads(2), 'LH')
+        self.assertEqual(cc.map_quads(3), 'LL')
+        self.assertEqual(cc.map_quads(4), 'HL')
+        self.assertEqual(cc.map_quads(33), None)
+        self.assertEqual(cc.map_quads('andy'), None)
+
+    def test_quad_position(self):
+        """Test lisa_sig_vals"""
+
+        quads = np.array([1, 2, 3, 4], np.int)
+
+        ans = np.array(['HH', 'LH', 'LL', 'HL'])
+        test_ans = cc.quad_position(quads)
+
+        self.assertTrue((test_ans == ans).all())
+
+    def test_moran_local(self):
+        """Test Moran's I local"""
+        data = [ { 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
+        plpy._define_result('select', data)
+        random_seeds.set_random_seeds(1234)
+        result = cc.moran_local('subquery', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id')
+        result = [(row[0], row[1]) for row in result]
+        expected = self.moran_data
+        for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
+            self.assertAlmostEqual(res_val, exp_val)
+            self.assertEqual(res_quad, exp_quad)
+
+    def test_moran_local_rate(self):
+        """Test Moran's I rate"""
+        data = [ { 'id': d['id'], 'attr1': d['value'], 'attr2': 1, 'neighbors': d['neighbors'] } for d in self.neighbors_data]
+        plpy._define_result('select', data)
+        random_seeds.set_random_seeds(1234)
+        result = cc.moran_local_rate('subquery', 'numerator', 'denominator', 'knn', 5, 99, 'the_geom', 'cartodb_id')
+        print 'result == None? ', result == None
+        result = [(row[0], row[1]) for row in result]
+        expected = self.moran_data
+        for ([res_val, res_quad], [exp_val, exp_quad]) in zip(result, expected):
+            self.assertAlmostEqual(res_val, exp_val)
+
+    def test_moran(self):
+        """Test Moran's I global"""
+        data = [{ 'id': d['id'], 'attr1': d['value'], 'neighbors': d['neighbors'] } for d in self.neighbors_data]
+        plpy._define_result('select', data)
+        random_seeds.set_random_seeds(1235)
+        result = cc.moran('table', 'value', 'knn', 5, 99, 'the_geom', 'cartodb_id')
+        print 'result == None?', result == None
+        result_moran = result[0][0]
+        expected_moran = np.array([row[0] for row in self.moran_data]).mean()
+        self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)
diff --git a/release/python/0.0.4/crankshaft/test/test_pysal_utils.py b/release/python/0.0.4/crankshaft/test/test_pysal_utils.py
new file mode 100644
index 0000000..4ea0d9b
--- /dev/null
+++ b/release/python/0.0.4/crankshaft/test/test_pysal_utils.py
@@ -0,0 +1,107 @@
+import unittest
+
+import crankshaft.pysal_utils as pu
+from crankshaft import random_seeds
+
+
+class PysalUtilsTest(unittest.TestCase):
+    """Testing class for utility functions related to PySAL integrations"""
+
+    def setUp(self):
+        self.params = {"id_col": "cartodb_id",
+                       "attr1": "andy",
+                       "attr2": "jay_z",
+                       "subquery": "SELECT * FROM a_list",
+                       "geom_col": "the_geom",
+                       "num_ngbrs": 321}
+
+    def test_query_attr_select(self):
+        """Test query_attr_select"""
+
+        ans = "i.\"{attr1}\"::numeric As attr1, " \
+              "i.\"{attr2}\"::numeric As attr2, "
+
+        self.assertEqual(pu.query_attr_select(self.params), ans)
+
+    def test_query_attr_where(self):
+        """Test pu.query_attr_where"""
+
+        ans = "idx_replace.\"{attr1}\" IS NOT NULL AND " \
+              "idx_replace.\"{attr2}\" IS NOT NULL AND " \
+              "idx_replace.\"{attr2}\" <> 0"
+
+        self.assertEqual(pu.query_attr_where(self.params), ans)
+
+    def test_knn(self):
+        """Test knn neighbors constructor"""
+
+        ans = "SELECT i.\"cartodb_id\" As id, " \
+                     "i.\"andy\"::numeric As attr1, " \
+                     "i.\"jay_z\"::numeric As attr2, " \
+                     "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
+                                   "FROM (SELECT * FROM a_list) As j " \
+                                   "WHERE " \
+                                    "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
+                                    "j.\"andy\" IS NOT NULL AND " \
+                                    "j.\"jay_z\" IS NOT NULL AND " \
+                                    "j.\"jay_z\" <> 0 " \
+                                   "ORDER BY " \
+                                    "j.\"the_geom\" <-> i.\"the_geom\" ASC " \
+                      "LIMIT 321)) As neighbors " \
+              "FROM (SELECT * FROM a_list) As i " \
+              "WHERE i.\"andy\" IS NOT NULL AND " \
+                    "i.\"jay_z\" IS NOT NULL AND " \
+                    "i.\"jay_z\" <> 0 " \
+              "ORDER BY i.\"cartodb_id\" ASC;"
+
+        self.assertEqual(pu.knn(self.params), ans)
+
+    def test_queen(self):
+        """Test queen neighbors constructor"""
+
+        ans = "SELECT i.\"cartodb_id\" As id, " \
+                     "i.\"andy\"::numeric As attr1, " \
+                     "i.\"jay_z\"::numeric As attr2, " \
+                     "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
+                                   "FROM (SELECT * FROM a_list) As j " \
+                                   "WHERE " \
+                                   "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
+                                   "ST_Touches(i.\"the_geom\", " \
+                                              "j.\"the_geom\") AND " \
+                                   "j.\"andy\" IS NOT NULL AND " \
+                                   "j.\"jay_z\" IS NOT NULL AND " \
+                                   "j.\"jay_z\" <> 0)" \
+                                  ") As neighbors " \
+              "FROM (SELECT * FROM a_list) As i " \
+              "WHERE i.\"andy\" IS NOT NULL AND " \
+                    "i.\"jay_z\" IS NOT NULL AND " \
+                    "i.\"jay_z\" <> 0 " \
+              "ORDER BY i.\"cartodb_id\" ASC;"
+
+        self.assertEqual(pu.queen(self.params), ans)
+
+    def test_construct_neighbor_query(self):
+        """Test construct_neighbor_query"""
+
+        # Compare to raw knn query
+        self.assertEqual(pu.construct_neighbor_query('knn', self.params),
+                         pu.knn(self.params))
+
+    def test_get_attributes(self):
+        """Test get_attributes"""
+
+        ## need to add tests
+
+        self.assertEqual(True, True)
+
+    def test_get_weight(self):
+        """Test get_weight"""
+
+        self.assertEqual(True, True)
+
+    def test_empty_zipped_array(self):
+        """Test empty_zipped_array"""
+        ans2 = [(None, None)]
+        ans4 = [(None, None, None, None)]
+        self.assertEqual(pu.empty_zipped_array(2), ans2)
+        self.assertEqual(pu.empty_zipped_array(4), ans4)
diff --git a/src/pg/crankshaft.control b/src/pg/crankshaft.control
index e71321f..01088b1 100644
--- a/src/pg/crankshaft.control
+++ b/src/pg/crankshaft.control
@@ -1,5 +1,5 @@
 comment = 'CartoDB Spatial Analysis extension'
-default_version = '0.0.3'
+default_version = '0.0.4'
 requires = 'plpythonu, postgis'
 superuser = true
 schema = cdb_crankshaft

From 1912d57891d539a15da5df8511f184e8e0c20771 Mon Sep 17 00:00:00 2001
From: Andy Eschbacher <andy.eschbacher@gmail.com>
Date: Tue, 21 Jun 2016 17:31:17 -0400
Subject: [PATCH 30/38] replacing dict with ordered dict

---
 .../crankshaft/crankshaft/clustering/moran.py | 60 ++++++++++---------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py
index 39b3ff6..103670f 100644
--- a/src/py/crankshaft/crankshaft/clustering/moran.py
+++ b/src/py/crankshaft/crankshaft/clustering/moran.py
@@ -7,6 +7,7 @@ Moran's I geostatistics (global clustering & outliers presence)
 
 import pysal as ps
 import plpy
+from collections import OrderedDict
 
 # crankshaft module
 import crankshaft.pysal_utils as pu
@@ -21,11 +22,11 @@ def moran(subquery, attr_name,
      core clusters with PySAL.
     Andy Eschbacher
     """
-    qvals = {"id_col": id_col,
-             "attr1": attr_name,
-             "geom_col": geom_col,
-             "subquery": subquery,
-             "num_ngbrs": num_ngbrs}
+    qvals = OrderedDict([("id_col", id_col),
+                         ("attr1", attr_name),
+                         ("geom_col", geom_col),
+                         ("subquery", subquery),
+                         ("num_ngbrs", num_ngbrs)])
 
     query = pu.construct_neighbor_query(w_type, qvals)
 
@@ -65,11 +66,11 @@ def moran_local(subquery, attr,
     # geometries with attributes that are null are ignored
     # resulting in a collection of not as near neighbors
 
-    qvals = {"id_col": id_col,
-             "attr1": attr,
-             "geom_col": geom_col,
-             "subquery": subquery,
-             "num_ngbrs": num_ngbrs}
+    qvals = OrderedDict([("id_col", id_col),
+                         ("attr1", attr_name),
+                         ("geom_col", geom_col),
+                         ("subquery", subquery),
+                         ("num_ngbrs", num_ngbrs)])
 
     query = pu.construct_neighbor_query(w_type, qvals)
 
@@ -101,12 +102,12 @@ def moran_rate(subquery, numerator, denominator,
     Moran's I Rate (global)
     Andy Eschbacher
     """
-    qvals = {"id_col": id_col,
-             "attr1": numerator,
-             "attr2": denominator,
-             "geom_col": geom_col,
-             "subquery": subquery,
-             "num_ngbrs": num_ngbrs}
+    qvals = OrderedDict([("id_col", id_col),
+                         ("attr1", numerator),
+                         ("attr2", denominator)
+                         ("geom_col", geom_col),
+                         ("subquery", subquery),
+                         ("num_ngbrs", num_ngbrs)])
 
     query = pu.construct_neighbor_query(w_type, qvals)
 
@@ -145,13 +146,14 @@ def moran_local_rate(subquery, numerator, denominator,
     # geometries with values that are null are ignored
     # resulting in a collection of not as near neighbors
 
-    query = pu.construct_neighbor_query(w_type,
-                                     {"id_col": id_col,
-                                      "numerator": numerator,
-                                      "denominator": denominator,
-                                      "geom_col": geom_col,
-                                      "subquery": subquery,
-                                      "num_ngbrs": num_ngbrs})
+    qvals = OrderedDict([("id_col", id_col),
+                         ("numerator", numerator),
+                         ("denominator", denominator),
+                         ("geom_col": geom_col),
+                         ("subquery", subquery),
+                         ("num_ngbrs", num_ngbrs)])
+
+    query = pu.construct_neighbor_query(w_type, qvals)
 
     try:
         result = plpy.execute(query)
@@ -186,12 +188,12 @@ def moran_local_bv(subquery, attr1, attr2,
     """
     plpy.notice('** Constructing query')
 
-    qvals = {"num_ngbrs": num_ngbrs,
-             "attr1": attr1,
-             "attr2": attr2,
-             "subquery": subquery,
-             "geom_col": geom_col,
-             "id_col": id_col}
+    qvals = OrderedDict([("id_col", id_col),
+                         ("attr1", attr1),
+                         ("attr2", attr2),
+                         ("geom_col": geom_col),
+                         ("subquery", subquery),
+                         ("num_ngbrs", num_ngbrs)])
 
     query = pu.construct_neighbor_query(w_type, qvals)
 

From 7c4314a4113baf852e260af4995e77ca8f8a4a9e Mon Sep 17 00:00:00 2001
From: Andy Eschbacher <andy.eschbacher@gmail.com>
Date: Tue, 21 Jun 2016 17:38:49 -0400
Subject: [PATCH 31/38] fix tuple colon

---
 src/py/crankshaft/crankshaft/clustering/moran.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py
index 103670f..08fe127 100644
--- a/src/py/crankshaft/crankshaft/clustering/moran.py
+++ b/src/py/crankshaft/crankshaft/clustering/moran.py
@@ -149,7 +149,7 @@ def moran_local_rate(subquery, numerator, denominator,
     qvals = OrderedDict([("id_col", id_col),
                          ("numerator", numerator),
                          ("denominator", denominator),
-                         ("geom_col": geom_col),
+                         ("geom_col", geom_col),
                          ("subquery", subquery),
                          ("num_ngbrs", num_ngbrs)])
 
@@ -191,7 +191,7 @@ def moran_local_bv(subquery, attr1, attr2,
     qvals = OrderedDict([("id_col", id_col),
                          ("attr1", attr1),
                          ("attr2", attr2),
-                         ("geom_col": geom_col),
+                         ("geom_col", geom_col),
                          ("subquery", subquery),
                          ("num_ngbrs", num_ngbrs)])
 

From b62d7b32efdb5c96f642a4947fee12f51dd489b9 Mon Sep 17 00:00:00 2001
From: Andy Eschbacher <andy.eschbacher@gmail.com>
Date: Tue, 21 Jun 2016 17:41:52 -0400
Subject: [PATCH 32/38] fix variable name

---
 src/py/crankshaft/crankshaft/clustering/moran.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py
index 08fe127..4bced89 100644
--- a/src/py/crankshaft/crankshaft/clustering/moran.py
+++ b/src/py/crankshaft/crankshaft/clustering/moran.py
@@ -67,7 +67,7 @@ def moran_local(subquery, attr,
     # resulting in a collection of not as near neighbors
 
     qvals = OrderedDict([("id_col", id_col),
-                         ("attr1", attr_name),
+                         ("attr1", attr),
                          ("geom_col", geom_col),
                          ("subquery", subquery),
                          ("num_ngbrs", num_ngbrs)])

From 81d7af9e9aeaaedda2e12b3e454d61b0a28286f3 Mon Sep 17 00:00:00 2001
From: Andy Eschbacher <andy.eschbacher@gmail.com>
Date: Wed, 22 Jun 2016 15:21:09 -0400
Subject: [PATCH 33/38] fixes return problem

---
 src/pg/sql/08_interpolation.sql | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/pg/sql/08_interpolation.sql b/src/pg/sql/08_interpolation.sql
index 04f1584..76fad01 100644
--- a/src/pg/sql/08_interpolation.sql
+++ b/src/pg/sql/08_interpolation.sql
@@ -14,9 +14,12 @@ $$
 DECLARE
     gs geometry[];
     vs numeric[];
+    output numeric;
 BEGIN
     EXECUTE 'WITH a AS('||query||') SELECT array_agg(the_geom), array_agg(attrib) FROM a' INTO gs, vs;
-    RETURN QUERY SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) FROM a;
+    SELECT CDB_SpatialInterpolation(gs, vs, point, method, p1,p2) INTO output FROM a;
+
+    RETURN output;
 END;
 $$
 language plpgsql IMMUTABLE;

From 6f72075999b3d3887018d9574e4e25abd5214873 Mon Sep 17 00:00:00 2001
From: Andy Eschbacher <andy.eschbacher@gmail.com>
Date: Wed, 22 Jun 2016 16:50:10 -0400
Subject: [PATCH 34/38] altering test outputs for less formatting

---
 src/pg/test/expected/08_interpolation_test.out | 6 ++----
 src/pg/test/sql/08_interpolation_test.sql      | 5 ++++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/pg/test/expected/08_interpolation_test.out b/src/pg/test/expected/08_interpolation_test.out
index 42d24cb..b927f63 100644
--- a/src/pg/test/expected/08_interpolation_test.out
+++ b/src/pg/test/expected/08_interpolation_test.out
@@ -1,4 +1,2 @@
- cdb_spatialinterpolation
---------------------------
- 780.79470198683925288365
-(1 row)
+cdb_spatialinterpolation
+t
diff --git a/src/pg/test/sql/08_interpolation_test.sql b/src/pg/test/sql/08_interpolation_test.sql
index c8db89d..43e7ee9 100644
--- a/src/pg/test/sql/08_interpolation_test.sql
+++ b/src/pg/test/sql/08_interpolation_test.sql
@@ -1,6 +1,9 @@
+\pset format unaligned
+\set ECHO all
+
 WITH a AS (
     SELECT
     ARRAY[800, 700, 600, 500, 400, 300, 200, 100] AS vals,
     ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g
 )
-SELECT CDB_SpatialInterpolation(g, vals, ST_GeomFromText('POINT(2.154 41.37)'),1) FROM a;
+SELECT (cdb_crankshaft.CDB_SpatialInterpolation(g, vals, ST_GeomFromText('POINT(2.154 41.37)'), 1) - 780.79470198683925288365) / 780.79470198683925288365 < 0.001 FROM a;

From 6a9045ba62551e4db3637ad99acad52d591092bd Mon Sep 17 00:00:00 2001
From: Andy Eschbacher <andy.eschbacher@gmail.com>
Date: Wed, 22 Jun 2016 16:56:35 -0400
Subject: [PATCH 35/38] updating test outputs

---
 src/pg/test/expected/08_interpolation_test.out | 8 ++++++++
 src/pg/test/sql/08_interpolation_test.sql      | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/pg/test/expected/08_interpolation_test.out b/src/pg/test/expected/08_interpolation_test.out
index b927f63..49566db 100644
--- a/src/pg/test/expected/08_interpolation_test.out
+++ b/src/pg/test/expected/08_interpolation_test.out
@@ -1,2 +1,10 @@
+\pset format unaligned
+\set ECHO all
+
+WITH a AS (
+    SELECT
+    ARRAY[800, 700, 600, 500, 400, 300, 200, 100] AS vals,
+    ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g
+)
 cdb_spatialinterpolation
 t
diff --git a/src/pg/test/sql/08_interpolation_test.sql b/src/pg/test/sql/08_interpolation_test.sql
index 43e7ee9..ba8968f 100644
--- a/src/pg/test/sql/08_interpolation_test.sql
+++ b/src/pg/test/sql/08_interpolation_test.sql
@@ -6,4 +6,4 @@ WITH a AS (
     ARRAY[800, 700, 600, 500, 400, 300, 200, 100] AS vals,
     ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g
 )
-SELECT (cdb_crankshaft.CDB_SpatialInterpolation(g, vals, ST_GeomFromText('POINT(2.154 41.37)'), 1) - 780.79470198683925288365) / 780.79470198683925288365 < 0.001 FROM a;
+SELECT (cdb_crankshaft.CDB_SpatialInterpolation(g, vals, ST_GeomFromText('POINT(2.154 41.37)'), 1) - 780.79470198683925288365) / 780.79470198683925288365 < 0.001 As cdb_spatialinterpolation FROM a;

From 3f210c2a71b02b5b8b6a527b79514a9c19260ce3 Mon Sep 17 00:00:00 2001
From: Andy Eschbacher <andy.eschbacher@gmail.com>
Date: Wed, 22 Jun 2016 17:08:50 -0400
Subject: [PATCH 36/38] reducing amt of text in outputs

---
 src/pg/test/expected/08_interpolation_test.out | 10 ++--------
 src/pg/test/sql/08_interpolation_test.sql      |  3 ++-
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/pg/test/expected/08_interpolation_test.out b/src/pg/test/expected/08_interpolation_test.out
index 49566db..bb8c73e 100644
--- a/src/pg/test/expected/08_interpolation_test.out
+++ b/src/pg/test/expected/08_interpolation_test.out
@@ -1,10 +1,4 @@
-\pset format unaligned
-\set ECHO all
-
-WITH a AS (
-    SELECT
-    ARRAY[800, 700, 600, 500, 400, 300, 200, 100] AS vals,
-    ARRAY[ST_GeomFromText('POINT(2.1744 41.403)'),ST_GeomFromText('POINT(2.1228 41.380)'),ST_GeomFromText('POINT(2.1511 41.374)'),ST_GeomFromText('POINT(2.1528 41.413)'),ST_GeomFromText('POINT(2.165 41.391)'),ST_GeomFromText('POINT(2.1498 41.371)'),ST_GeomFromText('POINT(2.1533 41.368)'),ST_GeomFromText('POINT(2.131386 41.41399)')] AS g
-)
+SET client_min_messages TO WARNING;
+\set ECHO none
 cdb_spatialinterpolation
 t
diff --git a/src/pg/test/sql/08_interpolation_test.sql b/src/pg/test/sql/08_interpolation_test.sql
index ba8968f..bd9c729 100644
--- a/src/pg/test/sql/08_interpolation_test.sql
+++ b/src/pg/test/sql/08_interpolation_test.sql
@@ -1,5 +1,6 @@
+SET client_min_messages TO WARNING;
+\set ECHO none
 \pset format unaligned
-\set ECHO all
 
 WITH a AS (
     SELECT

From 2fa087bb62544cda84931f8c39233074572c9564 Mon Sep 17 00:00:00 2001
From: Andy Eschbacher <andy.eschbacher@gmail.com>
Date: Wed, 22 Jun 2016 17:11:51 -0400
Subject: [PATCH 37/38] adding row info :/

---
 src/pg/test/expected/08_interpolation_test.out | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/pg/test/expected/08_interpolation_test.out b/src/pg/test/expected/08_interpolation_test.out
index bb8c73e..635ca2a 100644
--- a/src/pg/test/expected/08_interpolation_test.out
+++ b/src/pg/test/expected/08_interpolation_test.out
@@ -2,3 +2,4 @@ SET client_min_messages TO WARNING;
 \set ECHO none
 cdb_spatialinterpolation
 t
+(1 row)

From faa899cf8707adab4fc49834afa0a4d566abb481 Mon Sep 17 00:00:00 2001
From: Rafa de la Torre <rtorre@cartodb.com>
Date: Thu, 23 Jun 2016 10:11:59 +0200
Subject: [PATCH 38/38] Fix installation for development mode

---
 src/py/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/py/Makefile b/src/py/Makefile
index b584645..cc3c67e 100644
--- a/src/py/Makefile
+++ b/src/py/Makefile
@@ -2,7 +2,7 @@ include ../../Makefile.global
 
 # Install the package locally for development
 install:
-	pip install ./crankshaft
+	pip install --upgrade ./crankshaft
 
 # Test develpment install
 test: