seprates out query runner

2016-11-19 14:20:06 +00:00 · 2016-11-19 14:20:06 +00:00 · c8f5448b7c
commit c8f5448b7c
parent 224fbc2fc5
8 changed files with 64 additions and 84 deletions
--- a/src/py/crankshaft/crankshaft/init.py
+++ b/src/py/crankshaft/crankshaft/init.py
@ -3,3 +3,4 @@ import crankshaft.random_seeds
 import crankshaft.clustering
 import crankshaft.space_time_dynamics
 import crankshaft.segmentation
+import query_runner
--- a/src/py/crankshaft/crankshaft/clustering/kmeans.py
+++ b/src/py/crankshaft/crankshaft/clustering/kmeans.py
@ -1,36 +1,7 @@
 from sklearn.cluster import KMeans
-import plpy
 import numpy as np

-
-class QueryRunner:
-    def get_moran(self, query):
-        """fetch data for moran's i analyses"""
-        try:
-            result = plpy.execute(query)
-            # if there are no neighbors, exit
-            if len(result) == 0:
-                return pu.empty_zipped_array(2)
-        except plpy.SPIError, e:
-            plpy.error('Analysis failed: %s' % e)
-            return pu.empty_zipped_array(2)
-
-    def get_columns(self, query, standarize):
-        """fetch data for non-spatial kmeans"""
-        try:
-            db_resp = plpy.execute(query)
-        except plpy.SPIError, err:
-            plpy.error('Analysis failed: %s' % err)
-
-        return db_resp
-
-    def get_result(self, query):
-        """fetch data for spatial kmeans"""
-        try:
-            data = plpy.execute(query)
-        except plpy.SPIError, err:
-            plpy.error("Analysis failed: %s" % err)
-        return data
+from crankshaft.query_runner import QueryRunner


 class Kmeans:
@ -52,7 +23,7 @@ class Kmeans:
                      "FROM ({query}) As a "
                      "WHERE the_geom IS NOT NULL").format(query=query)

-        data = self.query_runner.get_result(full_query)
+        data = self.query_runner.get_spatial_kmeans(full_query)

        # Unpack query response
        xs = data[0]['xs']
@ -92,7 +63,7 @@ class Kmeans:
                   cols=', '.join(['array_agg({0}) As col{1}'.format(val, idx)
                                   for idx, val in enumerate(colnames)]))

-        db_resp = self.query_runner.get_columns(full_query, standarize)
+        db_resp = self.query_runner.get_nonspatial_kmeans(full_query, standarize)

        # fill array with values for k-means clustering
        if standarize:
--- a/src/py/crankshaft/crankshaft/clustering/moran.py
+++ b/src/py/crankshaft/crankshaft/clustering/moran.py
@ -8,6 +8,7 @@ Moran's I geostatistics (global clustering & outliers presence)
 import pysal as ps
 import plpy
 from collections import OrderedDict
+from crankshaft.query_runner import QueryRunner

 # crankshaft module
 import crankshaft.pysal_utils as pu
@ -15,15 +16,6 @@ import crankshaft.pysal_utils as pu
 # High level interface ---------------------------------------


-class QueryRunner:
-    def get_result(self, query):
-        try:
-            data = plpy.execute(query)
-        except plpy.SPIError, err:
-            plpy.error("k-means (spatial) cluster analysis failed: %s" % err)
-        return data
-
-
 class Moran:
    def __init__(self, query_runner=None):
        if query_runner is None:
@ -47,7 +39,7 @@ class Moran:

        query = pu.construct_neighbor_query(w_type, qvals)

-        result = self.query_runner.get_result(query)
+        result = self.query_runner.get_moran(query)

        # collect attributes
        attr_vals = pu.get_attributes(result)
@ -79,7 +71,7 @@ class Moran:

        query = pu.construct_neighbor_query(w_type, qvals)

-        result = self.query_runner.get_result(query)
+        result = self.query_runner.get_moran(query)

        attr_vals = pu.get_attributes(result)
        weight = pu.get_weight(result, w_type, num_ngbrs)
@ -108,7 +100,7 @@ class Moran:

        query = pu.construct_neighbor_query(w_type, qvals)

-        result = self.query_runner.get_result(query)
+        result = self.query_runner.get_moran(query)

        # collect attributes
        numer = pu.get_attributes(result, 1)
@ -140,7 +132,7 @@ class Moran:

        query = pu.construct_neighbor_query(w_type, qvals)

-        result = self.query_runner.get_result(query)
+        result = self.query_runner.get_moran(query)

        # collect attributes
        numer = pu.get_attributes(result, 1)
@ -173,7 +165,7 @@ class Moran:

        query = pu.construct_neighbor_query(w_type, qvals)

-        result = self.query_runner.get_result(query)
+        result = self.query_runner.get_moran(query)

        # collect attributes
        attr1_vals = pu.get_attributes(result, 1)
--- a/src/py/crankshaft/crankshaft/query_runner.py
+++ b/src/py/crankshaft/crankshaft/query_runner.py
@ -0,0 +1,43 @@
+"""class for fetching data"""
+import plpy
+
+
+class QueryRunner:
+    def get_markov(self, query):
+        try:
+            data = plpy.execute(query)
+
+            if len(data) == 0:
+                return pu.empty_zipped_array(4)
+
+            return data
+        except plpy.SPIError, err:
+            plpy.error('Analysis failed: %s' % err)
+
+    def get_moran(self, query):
+        """fetch data for moran's i analyses"""
+        try:
+            data = plpy.execute(query)
+            # if there are no neighbors, exit
+            if len(data) == 0:
+                return pu.empty_zipped_array(2)
+            return data
+        except plpy.SPIError, err:
+            plpy.error('Analysis failed: %s' % e)
+            return pu.empty_zipped_array(2)
+
+    def get_nonspatial_kmeans(self, query):
+        """fetch data for non-spatial kmeans"""
+        try:
+            data = plpy.execute(query)
+            return data
+        except plpy.SPIError, err:
+            plpy.error('Analysis failed: %s' % err)
+
+    def get_spatial_kmeans(self, query):
+        """fetch data for spatial kmeans"""
+        try:
+            data = plpy.execute(query)
+            return data
+        except plpy.SPIError, err:
+            plpy.error("Analysis failed: %s" % err)
--- a/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py
+++ b/src/py/crankshaft/crankshaft/space_time_dynamics/markov.py
@ -7,19 +7,7 @@ import numpy as np
 import pysal as ps
 import plpy
 import crankshaft.pysal_utils as pu
-
-
-class QueryRunner:
-    def get_result(self, query):
-        try:
-            data = plpy.execute(query)
-
-            if len(data) == 0:
-                return zip([None], [None], [None], [None], [None])
-
-            return data
-        except plpy.SPIError, err:
-            plpy.error('Analysis failed: %s' % err)
+from crankshaft.query_runner import QueryRunner


 class Markov:
@ -74,7 +62,7 @@ class Markov:

        query = pu.construct_neighbor_query(w_type, qvals)

-        query_result = self.query_runner.get_result(query)
+        query_result = self.query_runner.get_markov(query)

        # build weight
        weights = pu.get_weight(query_result, w_type)
--- a/src/py/crankshaft/test/test_clustering_kmeans.py
+++ b/src/py/crankshaft/test/test_clustering_kmeans.py
@ -9,7 +9,7 @@ import numpy as np
 # sys.modules['plpy'] = plpy
 from helper import fixture_file
 from crankshaft.clustering import Kmeans
-from crankshaft.clustering import QueryRunner
+from crankshaft.query_runner import QueryRunner
 import crankshaft.clustering as cc

 from crankshaft import random_seeds
@ -21,10 +21,10 @@ class FakeQueryRunner(QueryRunner):
    def __init__(self, mocked_result):
        self.mocked_result = mocked_result

-    def get_result(self, query):
+    def get_spatial_kmeans(self, query):
        return self.mocked_result

-    def get_columns(self, query, standarize):
+    def get_nonspatial_kmeans(self, query, standarize):
        return self.mocked_result


--- a/src/py/crankshaft/test/test_clustering_moran.py
+++ b/src/py/crankshaft/test/test_clustering_moran.py
@ -1,12 +1,6 @@
 import unittest
 import numpy as np

-
-# from mock_plpy import MockPlPy
-# plpy = MockPlPy()
-#
-# import sys
-# sys.modules['plpy'] = plpy
 from helper import fixture_file
 from crankshaft.clustering import Moran
 from crankshaft.clustering import QueryRunner
@ -17,14 +11,11 @@ from collections import OrderedDict


 class FakeQueryRunner(QueryRunner):
-    def __init__(self, mocked_result):
-        self.mocked_result = mocked_result
+    def __init__(self, mock_data):
+        self.mock_result = mock_data

-    def get_result(self, query):
-        return self.mocked_result
-
-    def get_columns(self, query):
-        return self.mocked_result
+    def get_moran(self, query):
+        return self.mock_result


 class MoranTest(unittest.TestCase):
--- a/src/py/crankshaft/test/test_space_time_dynamics.py
+++ b/src/py/crankshaft/test/test_space_time_dynamics.py
@ -4,17 +4,12 @@ import numpy as np
 import unittest


-# from mock_plpy import MockPlPy
-# plpy = MockPlPy()
-#
-# import sys
-# sys.modules['plpy'] = plpy
 from helper import fixture_file

 from crankshaft.space_time_dynamics import Markov
 import crankshaft.space_time_dynamics as std
 from crankshaft import random_seeds
-from crankshaft.clustering import QueryRunner
+from crankshaft.query_runner import QueryRunner
 import json


@ -22,7 +17,7 @@ class FakeQueryRunner(QueryRunner):
    def __init__(self, data):
        self.mock_result = data

-    def get_result(self, query):
+    def get_markov(self, query):
        return self.mock_result


@ -30,7 +25,6 @@ class SpaceTimeTests(unittest.TestCase):
    """Testing class for Markov Functions."""

    def setUp(self):
-        # plpy._reset()
        self.params = {"id_col": "cartodb_id",
                       "time_cols": ['dec_2013', 'jan_2014', 'feb_2014'],
                       "subquery": "SELECT * FROM a_list",