From 4e86965f033f14252ded35534bbd2c911e104da7 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@cdb-dev.localhost.lan>
Date: Tue, 7 Jun 2016 19:58:32 +0000
Subject: [PATCH 1/9] KMeans clustering and weighted centroid analysis

---
 doc/11_kmeans.md                              | 62 +++++++++++++++++++
 src/pg/sql/11_kmeans.sql                      | 31 ++++++++++
 src/pg/test/expected/05_kmeans_test.out       | 10 +++
 src/pg/test/sql/05_kmeans_test.sql            |  6 ++
 .../crankshaft/clustering/__init__.py         |  1 +
 .../crankshaft/clustering/kmeans.py           | 17 +++++
 src/py/crankshaft/test/fixtures/kmeans.json   |  1 +
 src/py/crankshaft/test/test_cluster_kmeans.py | 38 ++++++++++++
 8 files changed, 166 insertions(+)
 create mode 100644 doc/11_kmeans.md
 create mode 100644 src/pg/sql/11_kmeans.sql
 create mode 100644 src/pg/test/expected/05_kmeans_test.out
 create mode 100644 src/pg/test/sql/05_kmeans_test.sql
 create mode 100644 src/py/crankshaft/crankshaft/clustering/kmeans.py
 create mode 100644 src/py/crankshaft/test/fixtures/kmeans.json
 create mode 100644 src/py/crankshaft/test/test_cluster_kmeans.py

diff --git a/doc/11_kmeans.md b/doc/11_kmeans.md
new file mode 100644
index 0000000..6153010
--- /dev/null
+++ b/doc/11_kmeans.md
@@ -0,0 +1,62 @@
+## K-Means Functions
+
+### CDB_KMeans(subquery text, no_clusters INTEGER)
+
+This function attempts to find n clusters within the input data. It will return a table to CartoDB ids and 
+the number of the cluster each point in the input was assigend to.
+
+
+#### Arguments
+
+| Name | Type | Description |
+|------|------|-------------|
+| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column name `the_geom` and id column name `cartodb_id` unless otherwise specified in the input arguments |
+| no\_clusters | INTEGER | The number of clusters to try and find |
+
+#### Returns
+
+A table with the following columns.
+
+| Column Name | Type | Description |
+|-------------|------|-------------|
+| cartodb\_id | INTEGER | The CartoDB id of the row in the input table.|
+| cluster\_no | INTEGER | The cluster that this point belongs to. |
+
+
+#### Example Usage
+
+```sql
+SELECT 
+    customers.*, 
+    km.cluster_no 
+    FROM cdb_crankshaft.CDB_Kmeans('SELECT * from customers' , 6) km, customers_3
+    WHERE customers.cartodb_id = km.cartodb_id
+```
+
+### CDB_WeightedMean(subquery text, weight_column text, category_column text)
+
+Function that computes the weighted centroid of a number of clusters by some weight column.
+
+### Arguments 
+
+| Name | Type | Description |
+|------|------|-------------|
+| subquery | TEXT | SQL query that exposes the data to be analyzed (e.g., `SELECT * FROM interesting_table`). This query must have the geometry column and the columns specified as the weight and category columns|
+| weight\_column | TEXT | The name of the column to use as a weight |
+| category\_column | TEXT | The name of the column to use as a category |
+
+### Returns 
+
+A table with the following columns.
+
+| Column Name | Type | Description |
+|-------------|------|-------------|
+| the\_geom | GEOMETRY | A point for the weighted cluster center |
+| class | INTEGER | The cluster class | 
+
+### Example Usage 
+
+```sql 
+SELECT ST_TRANSFORM(the_geom, 3857) as the_geom_webmercator, class 
+FROM cdb_weighted_mean('SELECT *, customer_value FROM customers','customer_value','cluster_no')
+```
diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql
new file mode 100644
index 0000000..73e2f1d
--- /dev/null
+++ b/src/pg/sql/11_kmeans.sql
@@ -0,0 +1,31 @@
+CREATE OR REPLACE FUNCTION  CDB_KMeans(query text, no_clusters integer,no_init integer default 20)
+RETURNS table (cartodb_id integer, cluster_no integer) as $$
+    
+    import plpy 
+    plpy.execute('SELECT cdb_crankshaft._cdb_crankshaft_activate_py()')
+    from crankshaft.clustering import kmeans
+    return kmeans(query,no_clusters,no_init)
+
+$$ language plpythonu;
+
+CREATE OR REPLACE FUNCTION CDB_WeightedMean(query text, weight_column text, category_column text default null )
+RETURNS table (the_geom geometry,class integer ) as $$
+BEGIN
+
+RETURN QUERY
+    EXECUTE format( $string$
+        select ST_SETSRID(st_makepoint(cx, cy),4326) the_geom, class  from (
+            select  
+                   %I as class,
+                   sum(st_x(the_geom)*%I)/sum(%I) cx,
+                   sum(st_y(the_geom)*%I)/sum(%I) cy
+                   from (%s) a
+                   group by %I
+            ) q          
+     
+        $string$, category_column, weight_column,weight_column,weight_column,weight_column,query, category_column 
+    )
+    using the_geom
+    RETURN;
+END 
+$$ LANGUAGE plpgsql;
diff --git a/src/pg/test/expected/05_kmeans_test.out b/src/pg/test/expected/05_kmeans_test.out
new file mode 100644
index 0000000..4e6db09
--- /dev/null
+++ b/src/pg/test/expected/05_kmeans_test.out
@@ -0,0 +1,10 @@
+\pset format unaligned
+\set ECHO all
+SELECT count(DISTINCT cluster_no) as clusters from cdb_crankshaft.cdb_kmeans('select * from ppoints', 2);
+clusters
+2
+(1 row)
+SELECT count(*) clusters from cdb_crankshaft.cdb_WeightedMean( 'select *, code::INTEGER as cluster from ppoints' , 'value', 'cluster' );
+clusters
+52
+(1 row)
diff --git a/src/pg/test/sql/05_kmeans_test.sql b/src/pg/test/sql/05_kmeans_test.sql
new file mode 100644
index 0000000..a400e5e
--- /dev/null
+++ b/src/pg/test/sql/05_kmeans_test.sql
@@ -0,0 +1,6 @@
+\pset format unaligned
+\set ECHO all
+
+SELECT count(DISTINCT cluster_no) as clusters from cdb_crankshaft.cdb_kmeans('select * from ppoints', 2);
+
+SELECT count(*) clusters from cdb_crankshaft.cdb_WeightedMean( 'select *, code::INTEGER as cluster from ppoints' , 'value', 'cluster' );
diff --git a/src/py/crankshaft/crankshaft/clustering/__init__.py b/src/py/crankshaft/crankshaft/clustering/__init__.py
index 0df080f..338e8ea 100644
--- a/src/py/crankshaft/crankshaft/clustering/__init__.py
+++ b/src/py/crankshaft/crankshaft/clustering/__init__.py
@@ -1 +1,2 @@
 from moran import *
+from kmeans import *
diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py
new file mode 100644
index 0000000..3d9ed58
--- /dev/null
+++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py
@@ -0,0 +1,17 @@
+from sklearn.cluster import KMeans
+import plpy
+
+def kmeans(query, no_clusters, no_init=20):
+    data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids,
+        array_agg(ST_X(the_geom) order by cartodb_id) xs,
+        array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a
+    '''.format(query=query))
+
+    xs  = data[0]['xs']
+    ys  = data[0]['ys']
+    ids = data[0]['ids']
+
+    km = KMeans(n_clusters= no_clusters, n_init=no_init)
+    labels = km.fit_predict(zip(xs,ys))
+    return zip(ids,labels)
+
diff --git a/src/py/crankshaft/test/fixtures/kmeans.json b/src/py/crankshaft/test/fixtures/kmeans.json
new file mode 100644
index 0000000..8f31c79
--- /dev/null
+++ b/src/py/crankshaft/test/fixtures/kmeans.json
@@ -0,0 +1 @@
+[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]
\ No newline at end of file
diff --git a/src/py/crankshaft/test/test_cluster_kmeans.py b/src/py/crankshaft/test/test_cluster_kmeans.py
new file mode 100644
index 0000000..aba8e07
--- /dev/null
+++ b/src/py/crankshaft/test/test_cluster_kmeans.py
@@ -0,0 +1,38 @@
+import unittest
+import numpy as np
+
+
+# from mock_plpy import MockPlPy
+# plpy = MockPlPy()
+#
+# import sys
+# sys.modules['plpy'] = plpy
+from helper import plpy, fixture_file
+import numpy as np
+import crankshaft.clustering as cc
+import crankshaft.pysal_utils as pu
+from crankshaft import random_seeds
+import json
+
+class KMeansTest(unittest.TestCase):
+    """Testing class for Moran's I functions"""
+
+    def setUp(self):
+        plpy._reset()
+        self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read())
+        self.params = {"subquery": "select * from table",
+                       "no_clusters": "10"
+                       }
+
+    def test_kmeans(self):
+        data = self.cluster_data
+        plpy._define_result('select' ,data)
+        clusters = cc.kmeans('subquery', 2)
+        labels  = [a[1] for a in clusters]
+        c1 = [a for a in clusters if a[1]==0]
+        c2 = [a for a in clusters if a[1]==1]
+
+        self.assertEqual(len(np.unique(labels)),2)
+        self.assertEqual(len(c1),20)
+        self.assertEqual(len(c2),20)
+

From 7f3b23f67a958faa9162efef2362cc13a1e665ff Mon Sep 17 00:00:00 2001
From: Stuart Lynn <stuart.lynn@gmail.com>
Date: Fri, 10 Jun 2016 13:06:49 +0000
Subject: [PATCH 2/9] reworking CDB_WeightedMean to be an aggregate function

---
 src/pg/sql/11_kmeans.sql                | 60 +++++++++++++++++--------
 src/pg/test/expected/05_kmeans_test.out |  2 +-
 src/pg/test/sql/05_kmeans_test.sql      |  2 +-
 3 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql
index 73e2f1d..87f07ea 100644
--- a/src/pg/sql/11_kmeans.sql
+++ b/src/pg/sql/11_kmeans.sql
@@ -8,24 +8,46 @@ RETURNS table (cartodb_id integer, cluster_no integer) as $$
 
 $$ language plpythonu;
 
-CREATE OR REPLACE FUNCTION CDB_WeightedMean(query text, weight_column text, category_column text default null )
-RETURNS table (the_geom geometry,class integer ) as $$
-BEGIN
 
-RETURN QUERY
-    EXECUTE format( $string$
-        select ST_SETSRID(st_makepoint(cx, cy),4326) the_geom, class  from (
-            select  
-                   %I as class,
-                   sum(st_x(the_geom)*%I)/sum(%I) cx,
-                   sum(st_y(the_geom)*%I)/sum(%I) cy
-                   from (%s) a
-                   group by %I
-            ) q          
-     
-        $string$, category_column, weight_column,weight_column,weight_column,weight_column,query, category_column 
-    )
-    using the_geom
-    RETURN;
-END 
+CREATE OR REPLACE FUNCTION CDB_WeightedMeanS(state Numeric[],the_geom GEOMETRY(Point, 4326), weight NUMERIC)
+RETURNS Numeric[] AS 
+$$
+DECLARE 
+    newX NUMERIC;
+    newY NUMERIC;
+    newW NUMERIC;
+BEGIN
+    IF weight IS NULL OR the_geom IS NULL THEN 
+        newX = state[1];
+        newY = state[2];
+        newW = state[3];
+    ELSE
+        newX = state[1] + ST_X(the_geom)*weight;
+        newY = state[2] + ST_Y(the_geom)*weight;
+        newW = state[3] + weight;
+    END IF;
+    RETURN Array[newX,newY,newW];
+
+END
 $$ LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION CDB_WeightedMeanF(state Numeric[])
+RETURNS GEOMETRY AS 
+$$
+BEGIN
+    IF state[3] = 0 THEN 
+        RETURN ST_SetSRID(ST_MakePoint(state[1],state[2]), 4326);
+    ELSE 
+        RETURN ST_SETSRID(ST_MakePoint(state[1]/state[3], state[2]/state[3]),4326);
+    END IF;
+END
+$$ LANGUAGE plpgsql;
+
+CREATE AGGREGATE CDB_WeightedMean(the_geom geometry(Point, 4326), weight NUMERIC)(
+    SFUNC = CDB_WeightedMeanS,
+    FINALFUNC = CDB_WeightedMeanF,
+    STYPE = Numeric[],
+    INITCOND = "{0.0,0.0,0.0}" 
+);
+
+
diff --git a/src/pg/test/expected/05_kmeans_test.out b/src/pg/test/expected/05_kmeans_test.out
index 4e6db09..8c6ffa1 100644
--- a/src/pg/test/expected/05_kmeans_test.out
+++ b/src/pg/test/expected/05_kmeans_test.out
@@ -4,7 +4,7 @@ SELECT count(DISTINCT cluster_no) as clusters from cdb_crankshaft.cdb_kmeans('se
 clusters
 2
 (1 row)
-SELECT count(*) clusters from cdb_crankshaft.cdb_WeightedMean( 'select *, code::INTEGER as cluster from ppoints' , 'value', 'cluster' );
+SELECT count(*) clusters from (select  cdb_crankshaft.CDB_WeightedMean(the_geom, value::NUMERIC), code from ppoints group by code) p;
 clusters
 52
 (1 row)
diff --git a/src/pg/test/sql/05_kmeans_test.sql b/src/pg/test/sql/05_kmeans_test.sql
index a400e5e..2298b85 100644
--- a/src/pg/test/sql/05_kmeans_test.sql
+++ b/src/pg/test/sql/05_kmeans_test.sql
@@ -3,4 +3,4 @@
 
 SELECT count(DISTINCT cluster_no) as clusters from cdb_crankshaft.cdb_kmeans('select * from ppoints', 2);
 
-SELECT count(*) clusters from cdb_crankshaft.cdb_WeightedMean( 'select *, code::INTEGER as cluster from ppoints' , 'value', 'cluster' );
+SELECT count(*) clusters from (select  cdb_crankshaft.CDB_WeightedMean(the_geom, value::NUMERIC), code from ppoints group by code) p;

From 9d3de5a8ef13be63539248f3e4d82d7b4b68df9d Mon Sep 17 00:00:00 2001
From: Stuart Lynn <stuart.lynn@gmail.com>
Date: Fri, 10 Jun 2016 13:12:55 +0000
Subject: [PATCH 3/9] adding not null filter for geom on kmeans

---
 src/py/crankshaft/crankshaft/clustering/kmeans.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py
index 3d9ed58..4134062 100644
--- a/src/py/crankshaft/crankshaft/clustering/kmeans.py
+++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py
@@ -5,6 +5,7 @@ def kmeans(query, no_clusters, no_init=20):
     data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids,
         array_agg(ST_X(the_geom) order by cartodb_id) xs,
         array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a
+        where the_geom is not null
     '''.format(query=query))
 
     xs  = data[0]['xs']

From 1a4944b9600250a972458bfe1952f79ffce76ff2 Mon Sep 17 00:00:00 2001
From: Stuart Lynn <stuart.lynn@gmail.com>
Date: Fri, 10 Jun 2016 13:16:16 +0000
Subject: [PATCH 4/9] adding sklearn as a dep

---
 src/py/crankshaft/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py
index 8d5e622..baa88e3 100644
--- a/src/py/crankshaft/setup.py
+++ b/src/py/crankshaft/setup.py
@@ -40,9 +40,9 @@ setup(
 
     # The choice of component versions is dictated by what's
     # provisioned in the production servers.
-    install_requires=['pysal==1.9.1'],
+    install_requires=['pysal==1.9.1', 'sklearn==0.17.1'],
 
-    requires=['pysal', 'numpy' ],
+    requires=['pysal', 'numpy', 'sklearn' ],
 
     test_suite='test'
 )

From 889cd5c5791d2f87e35b3e510b7c7ac14eac9fcf Mon Sep 17 00:00:00 2001
From: Raul Ochoa <rochoaf@gmail.com>
Date: Fri, 10 Jun 2016 17:47:46 +0200
Subject: [PATCH 5/9] Fix scikit-learn dep name

---
 src/py/crankshaft/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py
index baa88e3..68f9e17 100644
--- a/src/py/crankshaft/setup.py
+++ b/src/py/crankshaft/setup.py
@@ -40,7 +40,7 @@ setup(
 
     # The choice of component versions is dictated by what's
     # provisioned in the production servers.
-    install_requires=['pysal==1.9.1', 'sklearn==0.17.1'],
+    install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'],
 
     requires=['pysal', 'numpy', 'sklearn' ],
 

From b33ba2d2949ab0bef092f25acf82d2308775a2a5 Mon Sep 17 00:00:00 2001
From: Raul Ochoa <rochoaf@gmail.com>
Date: Fri, 10 Jun 2016 18:24:43 +0200
Subject: [PATCH 6/9] Do not use names for the aggregate params

---
 src/pg/sql/11_kmeans.sql | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/pg/sql/11_kmeans.sql b/src/pg/sql/11_kmeans.sql
index 87f07ea..a27f803 100644
--- a/src/pg/sql/11_kmeans.sql
+++ b/src/pg/sql/11_kmeans.sql
@@ -43,11 +43,9 @@ BEGIN
 END
 $$ LANGUAGE plpgsql;
 
-CREATE AGGREGATE CDB_WeightedMean(the_geom geometry(Point, 4326), weight NUMERIC)(
+CREATE AGGREGATE CDB_WeightedMean(geometry(Point, 4326), NUMERIC)(
     SFUNC = CDB_WeightedMeanS,
     FINALFUNC = CDB_WeightedMeanF,
     STYPE = Numeric[],
     INITCOND = "{0.0,0.0,0.0}" 
 );
-
-

From 1e8bc12e0a6ea2ffefe580b63133b88f4db045a7 Mon Sep 17 00:00:00 2001
From: Raul Ochoa <rochoaf@gmail.com>
Date: Mon, 13 Jun 2016 12:17:46 +0200
Subject: [PATCH 7/9] Declare scipy as dep

---
 src/py/crankshaft/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py
index 68f9e17..e787d32 100644
--- a/src/py/crankshaft/setup.py
+++ b/src/py/crankshaft/setup.py
@@ -40,9 +40,9 @@ setup(
 
     # The choice of component versions is dictated by what's
     # provisioned in the production servers.
-    install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'],
+    install_requires=['scipy==0.17.1', 'pysal==1.9.1', 'scikit-learn==0.17.1'],
 
-    requires=['pysal', 'numpy', 'sklearn' ],
+    requires=['scipy', 'pysal', 'numpy', 'sklearn'],
 
     test_suite='test'
 )

From c870f68c77652a11f8401bbbb981797694174288 Mon Sep 17 00:00:00 2001
From: Raul Ochoa <rochoaf@gmail.com>
Date: Mon, 13 Jun 2016 13:05:50 +0200
Subject: [PATCH 8/9] Revert "Declare scipy as dep"

This reverts commit 1e8bc12e0a6ea2ffefe580b63133b88f4db045a7.
---
 src/py/crankshaft/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py
index e787d32..68f9e17 100644
--- a/src/py/crankshaft/setup.py
+++ b/src/py/crankshaft/setup.py
@@ -40,9 +40,9 @@ setup(
 
     # The choice of component versions is dictated by what's
     # provisioned in the production servers.
-    install_requires=['scipy==0.17.1', 'pysal==1.9.1', 'scikit-learn==0.17.1'],
+    install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'],
 
-    requires=['scipy', 'pysal', 'numpy', 'sklearn'],
+    requires=['pysal', 'numpy', 'sklearn' ],
 
     test_suite='test'
 )

From fd1862167c123ad7e59906801027e06c88fbf90e Mon Sep 17 00:00:00 2001
From: Raul Ochoa <rochoaf@gmail.com>
Date: Mon, 13 Jun 2016 13:06:21 +0200
Subject: [PATCH 9/9] Remove trailing space

---
 src/py/crankshaft/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/py/crankshaft/setup.py b/src/py/crankshaft/setup.py
index 68f9e17..04822dd 100644
--- a/src/py/crankshaft/setup.py
+++ b/src/py/crankshaft/setup.py
@@ -42,7 +42,7 @@ setup(
     # provisioned in the production servers.
     install_requires=['pysal==1.9.1', 'scikit-learn==0.17.1'],
 
-    requires=['pysal', 'numpy', 'sklearn' ],
+    requires=['pysal', 'numpy', 'sklearn'],
 
     test_suite='test'
 )