diff --git a/src/py/crankshaft/crankshaft/__init__.py b/src/py/crankshaft/crankshaft/__init__.py index 4e06bc5..a03b040 100644 --- a/src/py/crankshaft/crankshaft/__init__.py +++ b/src/py/crankshaft/crankshaft/__init__.py @@ -3,3 +3,4 @@ import crankshaft.random_seeds import crankshaft.clustering import crankshaft.space_time_dynamics import crankshaft.segmentation +import query_runner diff --git a/src/py/crankshaft/crankshaft/clustering/kmeans.py b/src/py/crankshaft/crankshaft/clustering/kmeans.py index 48b9bd3..06c6527 100644 --- a/src/py/crankshaft/crankshaft/clustering/kmeans.py +++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py @@ -1,36 +1,7 @@ from sklearn.cluster import KMeans -import plpy import numpy as np - -class QueryRunner: - def get_moran(self, query): - """fetch data for moran's i analyses""" - try: - result = plpy.execute(query) - # if there are no neighbors, exit - if len(result) == 0: - return pu.empty_zipped_array(2) - except plpy.SPIError, e: - plpy.error('Analysis failed: %s' % e) - return pu.empty_zipped_array(2) - - def get_columns(self, query, standarize): - """fetch data for non-spatial kmeans""" - try: - db_resp = plpy.execute(query) - except plpy.SPIError, err: - plpy.error('Analysis failed: %s' % err) - - return db_resp - - def get_result(self, query): - """fetch data for spatial kmeans""" - try: - data = plpy.execute(query) - except plpy.SPIError, err: - plpy.error("Analysis failed: %s" % err) - return data +from crankshaft.query_runner import QueryRunner class Kmeans: @@ -52,7 +23,7 @@ class Kmeans: "FROM ({query}) As a " "WHERE the_geom IS NOT NULL").format(query=query) - data = self.query_runner.get_result(full_query) + data = self.query_runner.get_spatial_kmeans(full_query) # Unpack query response xs = data[0]['xs'] @@ -92,7 +63,7 @@ class Kmeans: cols=', '.join(['array_agg({0}) As col{1}'.format(val, idx) for idx, val in enumerate(colnames)])) - db_resp = self.query_runner.get_columns(full_query, standarize) + db_resp = self.query_runner.get_nonspatial_kmeans(full_query, standarize) # fill array with values for k-means clustering if standarize: diff --git a/src/py/crankshaft/crankshaft/clustering/moran.py b/src/py/crankshaft/crankshaft/clustering/moran.py index ee82932..d2c99d6 100644 --- a/src/py/crankshaft/crankshaft/clustering/moran.py +++ b/src/py/crankshaft/crankshaft/clustering/moran.py @@ -8,6 +8,7 @@ Moran's I geostatistics (global clustering & outliers presence) import pysal as ps import plpy from collections import OrderedDict +from crankshaft.query_runner import QueryRunner # crankshaft module import crankshaft.pysal_utils as pu @@ -15,15 +16,6 @@ import crankshaft.pysal_utils as pu # High level interface --------------------------------------- -class QueryRunner: - def get_result(self, query): - try: - data = plpy.execute(query) - except plpy.SPIError, err: - plpy.error("k-means (spatial) cluster analysis failed: %s" % err) - return data - - class Moran: def __init__(self, query_runner=None): if query_runner is None: @@ -47,7 +39,7 @@ class Moran: query = pu.construct_neighbor_query(w_type, qvals) - result = self.query_runner.get_result(query) + result = self.query_runner.get_moran(query) # collect attributes attr_vals = pu.get_attributes(result) @@ -79,7 +71,7 @@ class Moran: query = pu.construct_neighbor_query(w_type, qvals) - result = self.query_runner.get_result(query) + result = self.query_runner.get_moran(query) attr_vals = pu.get_attributes(result) weight = pu.get_weight(result, w_type, num_ngbrs) @@ -108,7 +100,7 @@ class Moran: query = pu.construct_neighbor_query(w_type, qvals) - result = self.query_runner.get_result(query) + result = self.query_runner.get_moran(query) # collect attributes numer = pu.get_attributes(result, 1) @@ -140,7 +132,7 @@ class Moran: query = pu.construct_neighbor_query(w_type, qvals) - result = self.query_runner.get_result(query) + result = self.query_runner.get_moran(query) # collect attributes numer = pu.get_attributes(result, 1) @@ -173,7 +165,7 @@ class Moran: query = pu.construct_neighbor_query(w_type, qvals) - result = self.query_runner.get_result(query) + result = self.query_runner.get_moran(query) # collect attributes attr1_vals = pu.get_attributes(result, 1) diff --git a/src/py/crankshaft/crankshaft/query_runner.py b/src/py/crankshaft/crankshaft/query_runner.py new file mode 100644 index 0000000..5775e72 --- /dev/null +++ b/src/py/crankshaft/crankshaft/query_runner.py @@ -0,0 +1,43 @@ +"""class for fetching data""" +import plpy + + +class QueryRunner: + def get_markov(self, query): + try: + data = plpy.execute(query) + + if len(data) == 0: + return pu.empty_zipped_array(4) + + return data + except plpy.SPIError, err: + plpy.error('Analysis failed: %s' % err) + + def get_moran(self, query): + """fetch data for moran's i analyses""" + try: + data = plpy.execute(query) + # if there are no neighbors, exit + if len(data) == 0: + return pu.empty_zipped_array(2) + return data + except plpy.SPIError, err: + plpy.error('Analysis failed: %s' % e) + return pu.empty_zipped_array(2) + + def get_nonspatial_kmeans(self, query): + """fetch data for non-spatial kmeans""" + try: + data = plpy.execute(query) + return data + except plpy.SPIError, err: + plpy.error('Analysis failed: %s' % err) + + def get_spatial_kmeans(self, query): + """fetch data for spatial kmeans""" + try: + data = plpy.execute(query) + return data + except plpy.SPIError, err: + plpy.error("Analysis failed: %s" % err) diff --git a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py index 7984e0c..ea8dd32 100644 --- a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py +++ b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py @@ -7,19 +7,7 @@ import numpy as np import pysal as ps import plpy import crankshaft.pysal_utils as pu - - -class QueryRunner: - def get_result(self, query): - try: - data = plpy.execute(query) - - if len(data) == 0: - return zip([None], [None], [None], [None], [None]) - - return data - except plpy.SPIError, err: - plpy.error('Analysis failed: %s' % err) +from crankshaft.query_runner import QueryRunner class Markov: @@ -74,7 +62,7 @@ class Markov: query = pu.construct_neighbor_query(w_type, qvals) - query_result = self.query_runner.get_result(query) + query_result = self.query_runner.get_markov(query) # build weight weights = pu.get_weight(query_result, w_type) diff --git a/src/py/crankshaft/test/test_clustering_kmeans.py b/src/py/crankshaft/test/test_clustering_kmeans.py index 830ee9d..9fd2504 100644 --- a/src/py/crankshaft/test/test_clustering_kmeans.py +++ b/src/py/crankshaft/test/test_clustering_kmeans.py @@ -9,7 +9,7 @@ import numpy as np # sys.modules['plpy'] = plpy from helper import fixture_file from crankshaft.clustering import Kmeans -from crankshaft.clustering import QueryRunner +from crankshaft.query_runner import QueryRunner import crankshaft.clustering as cc from crankshaft import random_seeds @@ -21,10 +21,10 @@ class FakeQueryRunner(QueryRunner): def __init__(self, mocked_result): self.mocked_result = mocked_result - def get_result(self, query): + def get_spatial_kmeans(self, query): return self.mocked_result - def get_columns(self, query, standarize): + def get_nonspatial_kmeans(self, query, standarize): return self.mocked_result diff --git a/src/py/crankshaft/test/test_clustering_moran.py b/src/py/crankshaft/test/test_clustering_moran.py index 0a320fb..37cf7d0 100644 --- a/src/py/crankshaft/test/test_clustering_moran.py +++ b/src/py/crankshaft/test/test_clustering_moran.py @@ -1,12 +1,6 @@ import unittest import numpy as np - -# from mock_plpy import MockPlPy -# plpy = MockPlPy() -# -# import sys -# sys.modules['plpy'] = plpy from helper import fixture_file from crankshaft.clustering import Moran from crankshaft.clustering import QueryRunner @@ -17,14 +11,11 @@ from collections import OrderedDict class FakeQueryRunner(QueryRunner): - def __init__(self, mocked_result): - self.mocked_result = mocked_result + def __init__(self, mock_data): + self.mock_result = mock_data - def get_result(self, query): - return self.mocked_result - - def get_columns(self, query): - return self.mocked_result + def get_moran(self, query): + return self.mock_result class MoranTest(unittest.TestCase): diff --git a/src/py/crankshaft/test/test_space_time_dynamics.py b/src/py/crankshaft/test/test_space_time_dynamics.py index 21f3afc..e58c7d4 100644 --- a/src/py/crankshaft/test/test_space_time_dynamics.py +++ b/src/py/crankshaft/test/test_space_time_dynamics.py @@ -4,17 +4,12 @@ import numpy as np import unittest -# from mock_plpy import MockPlPy -# plpy = MockPlPy() -# -# import sys -# sys.modules['plpy'] = plpy from helper import fixture_file from crankshaft.space_time_dynamics import Markov import crankshaft.space_time_dynamics as std from crankshaft import random_seeds -from crankshaft.clustering import QueryRunner +from crankshaft.query_runner import QueryRunner import json @@ -22,7 +17,7 @@ class FakeQueryRunner(QueryRunner): def __init__(self, data): self.mock_result = data - def get_result(self, query): + def get_markov(self, query): return self.mock_result @@ -30,7 +25,6 @@ class SpaceTimeTests(unittest.TestCase): """Testing class for Markov Functions.""" def setUp(self): - # plpy._reset() self.params = {"id_col": "cartodb_id", "time_cols": ['dec_2013', 'jan_2014', 'feb_2014'], "subquery": "SELECT * FROM a_list",