Release 0.4.2

2016-09-22 11:11:58 +02:00 · 2016-09-22 11:11:58 +02:00 · b68f1c53b6
commit b68f1c53b6
parent a665e41a83
30 changed files with 5740 additions and 2 deletions
--- a/NEWS.md
+++ b/NEWS.md
@ -1,3 +1,7 @@
 0.4.2 (2016-09-22)
 ------------------
 * Bugfix for cdb_areasofinterestglobal: import correct modules
 0.4.1 (2016-09-21)
 ------------------
 * Let the user set the resolution in CDB_Contour function
--- a/release/crankshaft--0.4.1--0.4.2.sql
+++ b/release/crankshaft--0.4.1--0.4.2.sql
--- a/release/crankshaft--0.4.2.sql
+++ b/release/crankshaft--0.4.2.sql
--- a/release/crankshaft.control
+++ b/release/crankshaft.control
@ -1,5 +1,5 @@
 comment = 'CartoDB Spatial Analysis extension'
-default_version = '0.4.1'
+default_version = '0.4.2'
 requires = 'plpythonu, postgis'
 superuser = true
 schema = cdb_crankshaft
--- a/release/python/0.4.2/crankshaft/crankshaft/init.py
+++ b/release/python/0.4.2/crankshaft/crankshaft/init.py
@ -0,0 +1,5 @@
 """Import all modules"""
 import crankshaft.random_seeds
 import crankshaft.clustering
 import crankshaft.space_time_dynamics
 import crankshaft.segmentation
--- a/release/python/0.4.2/crankshaft/crankshaft/clustering/init.py
+++ b/release/python/0.4.2/crankshaft/crankshaft/clustering/init.py
@ -0,0 +1,3 @@
 """Import all functions from for clustering"""
 from moran import *
 from kmeans import *
--- a/release/python/0.4.2/crankshaft/crankshaft/clustering/kmeans.py
+++ b/release/python/0.4.2/crankshaft/crankshaft/clustering/kmeans.py
@ -0,0 +1,18 @@
 from sklearn.cluster import KMeans
 import plpy
 def kmeans(query, no_clusters, no_init=20):
    data = plpy.execute('''select array_agg(cartodb_id order by cartodb_id) as ids,
        array_agg(ST_X(the_geom) order by cartodb_id) xs,
        array_agg(ST_Y(the_geom) order by cartodb_id) ys from ({query}) a
        where the_geom is not null
    '''.format(query=query))
    xs  = data[0]['xs']
    ys  = data[0]['ys']
    ids = data[0]['ids']
    km = KMeans(n_clusters= no_clusters, n_init=no_init)
    labels = km.fit_predict(zip(xs,ys))
    return zip(ids,labels)
--- a/release/python/0.4.2/crankshaft/crankshaft/clustering/moran.py
+++ b/release/python/0.4.2/crankshaft/crankshaft/clustering/moran.py
@ -0,0 +1,250 @@
 """
 Moran's I geostatistics (global clustering & outliers presence)
 """
 # TODO: Fill in local neighbors which have null/NoneType values with the
 #       average of the their neighborhood
 import pysal as ps
 import plpy
 from collections import OrderedDict
 # crankshaft module
 import crankshaft.pysal_utils as pu
 # High level interface ---------------------------------------
 def moran(subquery, attr_name,
          w_type, num_ngbrs, permutations, geom_col, id_col):
    """
    Moran's I (global)
    Implementation building neighbors with a PostGIS database and Moran's I
     core clusters with PySAL.
    Andy Eschbacher
    """
    qvals = OrderedDict([("id_col", id_col),
                         ("attr1", attr_name),
                         ("geom_col", geom_col),
                         ("subquery", subquery),
                         ("num_ngbrs", num_ngbrs)])
    query = pu.construct_neighbor_query(w_type, qvals)
    try:
        result = plpy.execute(query)
        # if there are no neighbors, exit
        if len(result) == 0:
            return pu.empty_zipped_array(2)
    except plpy.SPIError, e:
        plpy.error('Analysis failed: %s' % e)
        return pu.empty_zipped_array(2)
    # collect attributes
    attr_vals = pu.get_attributes(result)
    # calculate weights
    weight = pu.get_weight(result, w_type, num_ngbrs)
    # calculate moran global
    moran_global = ps.esda.moran.Moran(attr_vals, weight,
                                       permutations=permutations)
    return zip([moran_global.I], [moran_global.EI])
 def moran_local(subquery, attr,
                w_type, num_ngbrs, permutations, geom_col, id_col):
    """
    Moran's I implementation for PL/Python
    Andy Eschbacher
    """
    # geometries with attributes that are null are ignored
    # resulting in a collection of not as near neighbors
    qvals = OrderedDict([("id_col", id_col),
                         ("attr1", attr),
                         ("geom_col", geom_col),
                         ("subquery", subquery),
                         ("num_ngbrs", num_ngbrs)])
    query = pu.construct_neighbor_query(w_type, qvals)
    try:
        result = plpy.execute(query)
        # if there are no neighbors, exit
        if len(result) == 0:
            return pu.empty_zipped_array(5)
    except plpy.SPIError, e:
        plpy.error('Analysis failed: %s' % e)
        return pu.empty_zipped_array(5)
    attr_vals = pu.get_attributes(result)
    weight = pu.get_weight(result, w_type, num_ngbrs)
    # calculate LISA values
    lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
                                     permutations=permutations)
    # find quadrants for each geometry
    quads = quad_position(lisa.q)
    return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
 def moran_rate(subquery, numerator, denominator,
               w_type, num_ngbrs, permutations, geom_col, id_col):
    """
    Moran's I Rate (global)
    Andy Eschbacher
    """
    qvals = OrderedDict([("id_col", id_col),
                         ("attr1", numerator),
                         ("attr2", denominator)
                         ("geom_col", geom_col),
                         ("subquery", subquery),
                         ("num_ngbrs", num_ngbrs)])
    query = pu.construct_neighbor_query(w_type, qvals)
    try:
        result = plpy.execute(query)
        # if there are no neighbors, exit
        if len(result) == 0:
            return pu.empty_zipped_array(2)
    except plpy.SPIError, e:
        plpy.error('Analysis failed: %s' % e)
        return pu.empty_zipped_array(2)
    # collect attributes
    numer = pu.get_attributes(result, 1)
    denom = pu.get_attributes(result, 2)
    weight = pu.get_weight(result, w_type, num_ngbrs)
    # calculate moran global rate
    lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
                                         permutations=permutations)
    return zip([lisa_rate.I], [lisa_rate.EI])
 def moran_local_rate(subquery, numerator, denominator,
                     w_type, num_ngbrs, permutations, geom_col, id_col):
    """
        Moran's I Local Rate
        Andy Eschbacher
    """
    # geometries with values that are null are ignored
    # resulting in a collection of not as near neighbors
    qvals = OrderedDict([("id_col", id_col),
                         ("numerator", numerator),
                         ("denominator", denominator),
                         ("geom_col", geom_col),
                         ("subquery", subquery),
                         ("num_ngbrs", num_ngbrs)])
    query = pu.construct_neighbor_query(w_type, qvals)
    try:
        result = plpy.execute(query)
        # if there are no neighbors, exit
        if len(result) == 0:
            return pu.empty_zipped_array(5)
    except plpy.SPIError, e:
        plpy.error('Analysis failed: %s' % e)
        return pu.empty_zipped_array(5)
    # collect attributes
    numer = pu.get_attributes(result, 1)
    denom = pu.get_attributes(result, 2)
    weight = pu.get_weight(result, w_type, num_ngbrs)
    # calculate LISA values
    lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
                                          permutations=permutations)
    # find quadrants for each geometry
    quads = quad_position(lisa.q)
    return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
 def moran_local_bv(subquery, attr1, attr2,
                   permutations, geom_col, id_col, w_type, num_ngbrs):
    """
        Moran's I (local) Bivariate (untested)
    """
    qvals = OrderedDict([("id_col", id_col),
                         ("attr1", attr1),
                         ("attr2", attr2),
                         ("geom_col", geom_col),
                         ("subquery", subquery),
                         ("num_ngbrs", num_ngbrs)])
    query = pu.construct_neighbor_query(w_type, qvals)
    try:
        result = plpy.execute(query)
        # if there are no neighbors, exit
        if len(result) == 0:
            return pu.empty_zipped_array(4)
    except plpy.SPIError:
        plpy.error("Error: areas of interest query failed, "
                   "check input parameters")
        return pu.empty_zipped_array(4)
    # collect attributes
    attr1_vals = pu.get_attributes(result, 1)
    attr2_vals = pu.get_attributes(result, 2)
    # create weights
    weight = pu.get_weight(result, w_type, num_ngbrs)
    # calculate LISA values
    lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
                                        permutations=permutations)
    # find clustering of significance
    lisa_sig = quad_position(lisa.q)
    return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
 # Low level functions ----------------------------------------
 def map_quads(coord):
    """
        Map a quadrant number to Moran's I designation
        HH=1, LH=2, LL=3, HL=4
        Input:
        @param coord (int): quadrant of a specific measurement
        Output:
            classification (one of 'HH', 'LH', 'LL', or 'HL')
    """
    if coord == 1:
        return 'HH'
    elif coord == 2:
        return 'LH'
    elif coord == 3:
        return 'LL'
    elif coord == 4:
        return 'HL'
    else:
        return None
 def quad_position(quads):
    """
        Produce Moran's I classification based of n
        Input:
        @param quads ndarray: an array of quads classified by
          1-4 (PySAL default)
        Output:
        @param list: an array of quads classied by 'HH', 'LL', etc.
    """
    return [map_quads(q) for q in quads]
--- a/release/python/0.4.2/crankshaft/crankshaft/pysal_utils/init.py
+++ b/release/python/0.4.2/crankshaft/crankshaft/pysal_utils/init.py
@ -0,0 +1,2 @@
 """Import all functions for pysal_utils"""
 from crankshaft.pysal_utils.pysal_utils import *
--- a/release/python/0.4.2/crankshaft/crankshaft/pysal_utils/pysal_utils.py
+++ b/release/python/0.4.2/crankshaft/crankshaft/pysal_utils/pysal_utils.py
@ -0,0 +1,201 @@
 """
    Utilities module for generic PySAL functionality, mainly centered on
      translating queries into numpy arrays or PySAL weights objects
 """
 import numpy as np
 import pysal as ps
 def construct_neighbor_query(w_type, query_vals):
    """Return query (a string) used for finding neighbors
        @param w_type text: type of neighbors to calculate ('knn' or 'queen')
        @param query_vals dict: values used to construct the query
    """
    if w_type.lower() == 'knn':
        return knn(query_vals)
    else:
        return queen(query_vals)
 # Build weight object
 def get_weight(query_res, w_type='knn', num_ngbrs=5):
    """
        Construct PySAL weight from return value of query
        @param query_res dict-like: query results with attributes and neighbors
    """
    # if w_type.lower() == 'knn':
    #     row_normed_weights = [1.0 / float(num_ngbrs)] * num_ngbrs
    #     weights = {x['id']: row_normed_weights for x in query_res}
    # else:
    #     weights = {x['id']: [1.0 / len(x['neighbors'])] * len(x['neighbors'])
    #                         if len(x['neighbors']) > 0
    #                         else [] for x in query_res}
    neighbors = {x['id']: x['neighbors'] for x in query_res}
    print 'len of neighbors: %d' % len(neighbors)
    built_weight = ps.W(neighbors)
    built_weight.transform = 'r'
    return built_weight
 def query_attr_select(params):
    """
        Create portion of SELECT statement for attributes inolved in query.
        @param params: dict of information used in query (column names,
                       table name, etc.)
    """
    attr_string = ""
    template = "i.\"%(col)s\"::numeric As attr%(alias_num)s, "
    if 'time_cols' in params:
        # if markov analysis
        attrs = params['time_cols']
        for idx, val in enumerate(attrs):
            attr_string += template % {"col": val, "alias_num": idx + 1}
    else:
        # if moran's analysis
        attrs = [k for k in params
                 if k not in ('id_col', 'geom_col', 'subquery',
                              'num_ngbrs', 'subquery')]
        for idx, val in enumerate(sorted(attrs)):
            attr_string += template % {"col": params[val],
                                       "alias_num": idx + 1}
    return attr_string
 def query_attr_where(params):
    """
      Construct where conditions when building neighbors query
        Create portion of WHERE clauses for weeding out NULL-valued geometries
        Input: dict of params:
            {'subquery': ...,
             'numerator': 'data1',
             'denominator': 'data2',
             '': ...}
        Output: 'idx_replace."data1" IS NOT NULL AND idx_replace."data2"
                IS NOT NULL'
        Input:
        {'subquery': ...,
         'time_cols': ['time1', 'time2', 'time3'],
         'etc': ...}
        Output: 'idx_replace."time1" IS NOT NULL AND idx_replace."time2" IS NOT
          NULL AND idx_replace."time3" IS NOT NULL'
    """
    attr_string = []
    template = "idx_replace.\"%s\" IS NOT NULL"
    if 'time_cols' in params:
        # markov where clauses
        attrs = params['time_cols']
        # add values to template
        for attr in attrs:
            attr_string.append(template % attr)
    else:
        # moran where clauses
        # get keys
        attrs = sorted([k for k in params
                        if k not in ('id_col', 'geom_col', 'subquery',
                                     'num_ngbrs', 'subquery')])
        # add values to template
        for attr in attrs:
            attr_string.append(template % params[attr])
        if len(attrs) == 2:
            attr_string.append("idx_replace.\"%s\" <> 0" % params[attrs[1]])
    out = " AND ".join(attr_string)
    return out
 def knn(params):
    """SQL query for k-nearest neighbors.
        @param vars: dict of values to fill template
    """
    attr_select = query_attr_select(params)
    attr_where = query_attr_where(params)
    replacements = {"attr_select": attr_select,
                    "attr_where_i": attr_where.replace("idx_replace", "i"),
                    "attr_where_j": attr_where.replace("idx_replace", "j")}
    query = "SELECT " \
                "i.\"{id_col}\" As id, " \
                "%(attr_select)s" \
                "(SELECT ARRAY(SELECT j.\"{id_col}\" " \
                              "FROM ({subquery}) As j " \
                              "WHERE " \
                                "i.\"{id_col}\" <> j.\"{id_col}\" AND " \
                                "%(attr_where_j)s " \
                              "ORDER BY " \
                                "j.\"{geom_col}\" <-> i.\"{geom_col}\" ASC " \
                              "LIMIT {num_ngbrs})" \
                ") As neighbors " \
            "FROM ({subquery}) As i " \
            "WHERE " \
                "%(attr_where_i)s " \
            "ORDER BY i.\"{id_col}\" ASC;" % replacements
    return query.format(**params)
 # SQL query for finding queens neighbors (all contiguous polygons)
 def queen(params):
    """SQL query for queen neighbors.
        @param params dict: information to fill query
    """
    attr_select = query_attr_select(params)
    attr_where = query_attr_where(params)
    replacements = {"attr_select": attr_select,
                    "attr_where_i": attr_where.replace("idx_replace", "i"),
                    "attr_where_j": attr_where.replace("idx_replace", "j")}
    query = "SELECT " \
                "i.\"{id_col}\" As id, " \
                "%(attr_select)s" \
                "(SELECT ARRAY(SELECT j.\"{id_col}\" " \
                 "FROM ({subquery}) As j " \
                 "WHERE i.\"{id_col}\" <> j.\"{id_col}\" AND " \
                       "ST_Touches(i.\"{geom_col}\", j.\"{geom_col}\") AND " \
                       "%(attr_where_j)s)" \
                ") As neighbors " \
            "FROM ({subquery}) As i " \
            "WHERE " \
                "%(attr_where_i)s " \
            "ORDER BY i.\"{id_col}\" ASC;" % replacements
    return query.format(**params)
 # to add more weight methods open a ticket or pull request
 def get_attributes(query_res, attr_num=1):
    """
        @param query_res: query results with attributes and neighbors
        @param attr_num: attribute number (1, 2, ...)
    """
    return np.array([x['attr' + str(attr_num)] for x in query_res],
                    dtype=np.float)
 def empty_zipped_array(num_nones):
    """
        prepare return values for cases of empty weights objects (no neighbors)
        Input:
        @param num_nones int: number of columns (e.g., 4)
        Output:
        [(None, None, None, None)]
    """
    return [tuple([None] * num_nones)]
--- a/release/python/0.4.2/crankshaft/crankshaft/random_seeds.py
+++ b/release/python/0.4.2/crankshaft/crankshaft/random_seeds.py
@ -0,0 +1,11 @@
 """Random seed generator used for non-deterministic functions in crankshaft"""
 import random
 import numpy
 def set_random_seeds(value):
    """
    Set the seeds of the RNGs (Random Number Generators)
    used internally.
    """
    random.seed(value)
    numpy.random.seed(value)
--- a/release/python/0.4.2/crankshaft/crankshaft/segmentation/init.py
+++ b/release/python/0.4.2/crankshaft/crankshaft/segmentation/init.py
@ -0,0 +1 @@
 from segmentation import * 
--- a/release/python/0.4.2/crankshaft/crankshaft/segmentation/segmentation.py
+++ b/release/python/0.4.2/crankshaft/crankshaft/segmentation/segmentation.py
@ -0,0 +1,176 @@
 """
 Segmentation creation and prediction
 """
 import sklearn
 import numpy as np
 import plpy
 from sklearn.ensemble import GradientBoostingRegressor
 from sklearn import metrics
 from sklearn.cross_validation import train_test_split
 # Lower level functions
 #----------------------
 def replace_nan_with_mean(array):
    """
        Input:
            @param array: an array of floats which may have null-valued entries
        Output:
            array with nans filled in with the mean of the dataset
    """
    # returns an array of rows and column indices
    indices = np.where(np.isnan(array))
    # iterate through entries which have nan values
    for row, col in zip(*indices):
            array[row, col] = np.mean(array[~np.isnan(array[:, col]), col])
    return array
 def get_data(variable, feature_columns, query):
    """
        Fetch data from the database, clean, and package into
          numpy arrays
        Input:
            @param variable: name of the target variable
            @param feature_columns: list of column names
            @param query: subquery that data is pulled from for the packaging
        Output:
            prepared data, packaged into NumPy arrays
    """
    columns = ','.join(['array_agg("{col}") As "{col}"'.format(col=col) for col in feature_columns])
    try:
        data = plpy.execute('''SELECT array_agg("{variable}") As target, {columns} FROM ({query}) As a'''.format(
            variable=variable,
            columns=columns,
            query=query))
    except Exception, e:
        plpy.error('Failed to access data to build segmentation model: %s' % e)
    # extract target data from plpy object
    target = np.array(data[0]['target'])
    # put n feature data arrays into an n x m array of arrays
    features = np.column_stack([np.array(data[0][col], dtype=float) for col in feature_columns])
    return replace_nan_with_mean(target), replace_nan_with_mean(features)
 # High level interface
 # --------------------
 def create_and_predict_segment_agg(target, features, target_features, target_ids, model_parameters):
    """
    Version of create_and_predict_segment that works on arrays that come stright form the SQL calling
    the function.
        Input:
            @param target: The 1D array of lenth NSamples containing the target variable we want the model to predict
            @param features: Thw 2D array of size NSamples * NFeatures that form the imput to the model
            @param target_ids: A 1D array of target_ids that will be used to associate the results of the prediction with the rows which they come from
            @param model_parameters: A dictionary containing parameters for the model.
    """
    clean_target = replace_nan_with_mean(target)
    clean_features = replace_nan_with_mean(features)
    target_features = replace_nan_with_mean(target_features)
    model, accuracy = train_model(clean_target, clean_features, model_parameters, 0.2)
    prediction = model.predict(target_features)
    accuracy_array = [accuracy]*prediction.shape[0]
    return zip(target_ids, prediction, np.full(prediction.shape, accuracy_array))
 def create_and_predict_segment(query, variable, target_query, model_params):
    """
    generate a segment with machine learning
    Stuart Lynn
    """
    ## fetch column names
    try:
        columns = plpy.execute('SELECT * FROM ({query}) As a LIMIT 1  '.format(query=query))[0].keys()
    except Exception, e:
        plpy.error('Failed to build segmentation model: %s' % e)
    ## extract column names to be used in building the segmentation model
    feature_columns = set(columns) - set([variable, 'cartodb_id', 'the_geom', 'the_geom_webmercator'])
    ## get data from database
    target, features = get_data(variable, feature_columns, query)
    model, accuracy = train_model(target, features, model_params, 0.2)
    cartodb_ids, result = predict_segment(model, feature_columns, target_query)
    accuracy_array = [accuracy]*result.shape[0]
    return zip(cartodb_ids, result, accuracy_array)
 def train_model(target, features, model_params, test_split):
    """
        Train the Gradient Boosting model on the provided data and calculate the accuracy of the model
        Input:
            @param target: 1D Array of the variable that the model is to be trianed to predict
            @param features: 2D Array NSamples * NFeatures to use in trining the model
            @param model_params: A dictionary of model parameters, the full specification can be found on the
                scikit learn page for [GradientBoostingRegressor](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html)
            @parma test_split: The fraction of the data to be withheld for testing the model / calculating the accuray
    """
    features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=test_split)
    model = GradientBoostingRegressor(**model_params)
    model.fit(features_train, target_train)
    accuracy = calculate_model_accuracy(model, features, target)
    return model, accuracy
 def calculate_model_accuracy(model, features, target):
    """
        Calculate the mean squared error of the model prediction
        Input:
            @param model: model trained from input features
            @param features: features to make a prediction from
            @param target: target to compare prediction to
        Output:
            mean squared error of the model prection compared to the target
    """
    prediction = model.predict(features)
    return metrics.mean_squared_error(prediction, target)
 def predict_segment(model, features, target_query):
    """
    Use the provided model to predict the values for the new feature set
        Input:
            @param model: The pretrained model
            @features: A list of features to use in the model prediction (list of column names)
            @target_query: The query to run to obtain the data to predict on and the cartdb_ids associated with it.
    """
    batch_size = 1000
    joined_features = ','.join(['"{0}"::numeric'.format(a) for a in features])
    try:
        cursor = plpy.cursor('SELECT Array[{joined_features}] As features FROM ({target_query}) As a'.format(
            joined_features=joined_features,
            target_query=target_query))
    except Exception, e:
        plpy.error('Failed to build segmentation model: %s' % e)
    results = []
    while True:
        rows = cursor.fetch(batch_size)
        if not rows:
            break
        batch = np.row_stack([np.array(row['features'], dtype=float) for row in rows])
        #Need to fix this. Should be global mean. This will cause weird effects
        batch = replace_nan_with_mean(batch)
        prediction = model.predict(batch)
        results.append(prediction)
    try:
        cartodb_ids = plpy.execute('''SELECT array_agg(cartodb_id ORDER BY cartodb_id) As cartodb_ids FROM ({0}) As a'''.format(target_query))[0]['cartodb_ids']
    except Exception, e:
        plpy.error('Failed to build segmentation model: %s' % e)
    return cartodb_ids, np.concatenate(results)
--- a/release/python/0.4.2/crankshaft/crankshaft/space_time_dynamics/init.py
+++ b/release/python/0.4.2/crankshaft/crankshaft/space_time_dynamics/init.py
@ -0,0 +1,2 @@
 """Import all functions from clustering libraries."""
 from markov import *
--- a/release/python/0.4.2/crankshaft/crankshaft/space_time_dynamics/markov.py
+++ b/release/python/0.4.2/crankshaft/crankshaft/space_time_dynamics/markov.py
@ -0,0 +1,189 @@
 """
 Spatial dynamics measurements using Spatial Markov
 """
 import numpy as np
 import pysal as ps
 import plpy
 import crankshaft.pysal_utils as pu
 def spatial_markov_trend(subquery, time_cols, num_classes=7,
                         w_type='knn', num_ngbrs=5, permutations=0,
                         geom_col='the_geom', id_col='cartodb_id'):
    """
        Predict the trends of a unit based on:
        1. history of its transitions to different classes (e.g., 1st quantile -> 2nd quantile)
        2. average class of its neighbors
        Inputs:
        @param subquery string: e.g., SELECT the_geom, cartodb_id,
          interesting_time_column FROM table_name
        @param time_cols list of strings: list of strings of column names
        @param num_classes (optional): number of classes to break distribution
          of values into. Currently uses quantile bins.
        @param w_type string (optional): weight type ('knn' or 'queen')
        @param num_ngbrs int (optional): number of neighbors (if knn type)
        @param permutations int (optional): number of permutations for test
          stats
        @param geom_col string (optional): name of column which contains the
          geometries
        @param id_col string (optional): name of column which has the ids of
          the table
        Outputs:
        @param trend_up float: probablity that a geom will move to a higher
          class
        @param trend_down float: probablity that a geom will move to a lower
          class
        @param trend float: (trend_up - trend_down) / trend_static
        @param volatility float: a measure of the volatility based on
          probability stddev(prob array)
    """
    if len(time_cols) < 2:
        plpy.error('More than one time column needs to be passed')
    qvals = {"id_col": id_col,
             "time_cols": time_cols,
             "geom_col": geom_col,
             "subquery": subquery,
             "num_ngbrs": num_ngbrs}
    try:
        query_result = plpy.execute(
            pu.construct_neighbor_query(w_type, qvals)
        )
        if len(query_result) == 0:
            return zip([None], [None], [None], [None], [None])
    except plpy.SPIError, e:
        plpy.debug('Query failed with exception %s: %s' % (err, pu.construct_neighbor_query(w_type, qvals)))
        plpy.error('Analysis failed: %s' % e)
        return zip([None], [None], [None], [None], [None])
    ## build weight
    weights = pu.get_weight(query_result, w_type)
    weights.transform = 'r'
    ## prep time data
    t_data = get_time_data(query_result, time_cols)
    plpy.debug('shape of t_data %d, %d' % t_data.shape)
    plpy.debug('number of weight objects: %d, %d' % (weights.sparse).shape)
    plpy.debug('first num elements: %f' % t_data[0, 0])
    sp_markov_result = ps.Spatial_Markov(t_data,
                                         weights,
                                         k=num_classes,
                                         fixed=False,
                                         permutations=permutations)
    ## get lag classes
    lag_classes = ps.Quantiles(
        ps.lag_spatial(weights, t_data[:, -1]),
        k=num_classes).yb
    ## look up probablity distribution for each unit according to class and lag class
    prob_dist = get_prob_dist(sp_markov_result.P,
                              lag_classes,
                              sp_markov_result.classes[:, -1])
    ## find the ups and down and overall distribution of each cell
    trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist,
                                                             sp_markov_result.classes[:, -1])
    ## output the results
    return zip(trend, trend_up, trend_down, volatility, weights.id_order)
 def get_time_data(markov_data, time_cols):
    """
        Extract the time columns and bin appropriately
    """
    num_attrs = len(time_cols)
    return np.array([[x['attr' + str(i)] for x in markov_data]
                     for i in range(1, num_attrs+1)], dtype=float).transpose()
 ## not currently used
 def rebin_data(time_data, num_time_per_bin):
    """
        Convert an n x l matrix into an (n/m) x l matrix where the values are
         reduced (averaged) for the intervening states:
          1 2 3 4    1.5 3.5
          5 6 7 8 -> 5.5 7.5
          9 8 7 6    8.5 6.5
          5 4 3 2    4.5 2.5
          if m = 2, the 4 x 4 matrix is transformed to a 2 x 4 matrix.
        This process effectively resamples the data at a longer time span n
         units longer than the input data.
        For cases when there is a remainder (remainder(5/3) = 2), the remaining
         two columns are binned together as the last time period, while the
         first three are binned together for the first period.
        Input:
          @param time_data n x l  ndarray: measurements of an attribute at
           different time intervals
          @param num_time_per_bin int: number of columns to average into a new
           column
        Output:
          ceil(n / m) x l ndarray of resampled time series
    """
    if time_data.shape[1] % num_time_per_bin == 0:
        ## if fit is perfect, then use it
        n_max = time_data.shape[1] / num_time_per_bin
    else:
        ## fit remainders into an additional column
        n_max = time_data.shape[1] / num_time_per_bin + 1
    return np.array([time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1)
                     for i in range(n_max)]).T
 def get_prob_dist(transition_matrix, lag_indices, unit_indices):
    """
        Given an array of transition matrices, look up the probability
        associated with the arrangements passed
        Input:
        @param transition_matrix ndarray[k,k,k]:
        @param lag_indices ndarray:
        @param unit_indices ndarray:
        Output:
        Array of probability distributions
    """
    return np.array([transition_matrix[(lag_indices[i], unit_indices[i])]
                     for i in range(len(lag_indices))])
 def get_prob_stats(prob_dist, unit_indices):
    """
        get the statistics of the probability distributions
        Outputs:
            @param trend_up ndarray(float): sum of probabilities for upward
               movement (relative to the unit index of that prob)
            @param trend_down ndarray(float): sum of probabilities for downward
               movement (relative to the unit index of that prob)
            @param trend ndarray(float): difference of upward and downward
               movements
    """
    num_elements = len(unit_indices)
    trend_up = np.empty(num_elements, dtype=float)
    trend_down = np.empty(num_elements, dtype=float)
    trend = np.empty(num_elements, dtype=float)
    for i in range(num_elements):
        trend_up[i] = prob_dist[i, (unit_indices[i]+1):].sum()
        trend_down[i] = prob_dist[i, :unit_indices[i]].sum()
        if prob_dist[i, unit_indices[i]] > 0.0:
            trend[i] = (trend_up[i] - trend_down[i]) / prob_dist[i, unit_indices[i]]
        else:
            trend[i] = None
    ## calculate volatility of distribution
    volatility = prob_dist.std(axis=1)
    return trend_up, trend_down, trend, volatility
--- a/release/python/0.4.2/crankshaft/setup.py
+++ b/release/python/0.4.2/crankshaft/setup.py
@ -0,0 +1,49 @@
 """
 CartoDB Spatial Analysis Python Library
 See:
 https://github.com/CartoDB/crankshaft
 """
 from setuptools import setup, find_packages
 setup(
    name='crankshaft',
    version='0.0.0',
    description='CartoDB Spatial Analysis Python Library',
    url='https://github.com/CartoDB/crankshaft',
    author='Data Services Team - CartoDB',
    author_email='dataservices@cartodb.com',
    license='MIT',
    classifiers=[
        'Development Status :: 3 - Alpha',
        'Intended Audience :: Mapping comunity',
        'Topic :: Maps :: Mapping Tools',
        'License :: OSI Approved :: MIT License',
        'Programming Language :: Python :: 2.7',
    ],
    keywords='maps mapping tools spatial analysis geostatistics',
    packages=find_packages(exclude=['contrib', 'docs', 'tests']),
    extras_require={
        'dev': ['unittest'],
        'test': ['unittest', 'nose', 'mock'],
    },
    # The choice of component versions is dictated by what's
    # provisioned in the production servers.
    # IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation.
    install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'],
    requires=['pysal', 'numpy', 'sklearn'],
    test_suite='test'
 )
--- a/release/python/0.4.2/crankshaft/setup.py-r
+++ b/release/python/0.4.2/crankshaft/setup.py-r
@ -0,0 +1,49 @@
 """
 CartoDB Spatial Analysis Python Library
 See:
 https://github.com/CartoDB/crankshaft
 """
 from setuptools import setup, find_packages
 setup(
    name='crankshaft',
    version='0.0.0',
    description='CartoDB Spatial Analysis Python Library',
    url='https://github.com/CartoDB/crankshaft',
    author='Data Services Team - CartoDB',
    author_email='dataservices@cartodb.com',
    license='MIT',
    classifiers=[
        'Development Status :: 3 - Alpha',
        'Intended Audience :: Mapping comunity',
        'Topic :: Maps :: Mapping Tools',
        'License :: OSI Approved :: MIT License',
        'Programming Language :: Python :: 2.7',
    ],
    keywords='maps mapping tools spatial analysis geostatistics',
    packages=find_packages(exclude=['contrib', 'docs', 'tests']),
    extras_require={
        'dev': ['unittest'],
        'test': ['unittest', 'nose', 'mock'],
    },
    # The choice of component versions is dictated by what's
    # provisioned in the production servers.
    # IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation.
    install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.11.2', 'scikit-learn==0.14.1'],
    requires=['pysal', 'numpy', 'sklearn'],
    test_suite='test'
 )
--- a/release/python/0.4.2/crankshaft/test/fixtures/kmeans.json
+++ b/release/python/0.4.2/crankshaft/test/fixtures/kmeans.json
@ -0,0 +1 @@
 [{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]
--- a/release/python/0.4.2/crankshaft/test/fixtures/markov.json
+++ b/release/python/0.4.2/crankshaft/test/fixtures/markov.json
@ -0,0 +1 @@
 [[0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 0], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 1], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 2], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 3], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 4], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 5], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 6], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 7], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 8], [0.19047619047619049, 0.16, 0.0, 0.32594478059941379, 9], [-0.23529411764705882, 0.0, 0.19047619047619047, 0.31356338348865387, 10], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 11], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 12], [0.027777777777777783, 0.11111111111111112, 0.088888888888888892, 0.30339641183779581, 13], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 14], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 15], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 16], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 17], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 18], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 19], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 20], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 21], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 22], [-0.16666666666666663, 0.18181818181818182, 0.27272727272727271, 0.20246415864836445, 23], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 24], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 25], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 26], [-0.043478260869565216, 0.0, 0.041666666666666664, 0.37950991789118999, 27], [0.22222222222222221, 0.18181818181818182, 0.0, 0.31701083225750354, 28], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 29], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 30], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 31], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 32], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 33], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 34], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 35], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 36], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 37], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 38], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 39], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 40], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 41], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 42], [0.0, 0.0, 0.0, 0.40000000000000002, 43], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 44], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 45], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 46], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 47]]
--- a/release/python/0.4.2/crankshaft/test/fixtures/moran.json
+++ b/release/python/0.4.2/crankshaft/test/fixtures/moran.json
@ -0,0 +1,52 @@
 [[0.9319096128346788, "HH"],
 [-1.135787401862846, "HL"],
 [0.11732030672508517, "LL"],
 [0.6152779669180425, "LL"],
 [-0.14657336660125297, "LH"],
 [0.6967858120189607, "LL"],
 [0.07949310115714454, "HH"],
 [0.4703198759258987, "HH"],
 [0.4421125200498064, "HH"],
 [0.5724288737143592, "LL"],
 [0.8970743435692062, "LL"],
 [0.18327334401918674, "LL"],
 [-0.01466729201304962, "HL"],
 [0.3481559372544409, "LL"],
 [0.06547094736902978, "LL"],
 [0.15482141569329988, "HH"],
 [0.4373841193538136, "HH"],
 [0.15971286468915544, "LL"],
 [1.0543588860308968, "HH"],
 [1.7372866900020818, "HH"],
 [1.091998586053999, "LL"],
 [0.1171572584252222, "HH"],
 [0.08438455015300014, "LL"],
 [0.06547094736902978, "LL"],
 [0.15482141569329985, "HH"],
 [1.1627044812890683, "HH"],
 [0.06547094736902978, "LL"],
 [0.795275137550483, "HH"],
 [0.18562939195219, "LL"],
 [0.3010757406693439, "LL"],
 [2.8205795942839376, "HH"],
 [0.11259190602909264, "LL"],
 [-0.07116352791516614, "HL"],
 [-0.09945240794119009, "LH"],
 [0.18562939195219, "LL"],
 [0.1832733440191868, "LL"],
 [-0.39054253768447705, "HL"],
 [-0.1672071289487642, "HL"],
 [0.3337669247916343, "HH"],
 [0.2584386102554792, "HH"],
 [-0.19733845476322634, "HL"],
 [-0.9379282899805409, "LH"],
 [-0.028770969951095866, "LH"],
 [0.051367269430983485, "LL"],
 [-0.2172548045913472, "LH"],
 [0.05136726943098351, "LL"],
 [0.04191046803899837, "LL"],
 [0.7482357030403517, "HH"],
 [-0.014585767863118111, "LH"],
 [0.5410013139159929, "HH"],
 [1.0223932668429925, "LL"],
 [1.4179402898927476, "LL"]]
--- a/release/python/0.4.2/crankshaft/test/fixtures/neighbors.json
+++ b/release/python/0.4.2/crankshaft/test/fixtures/neighbors.json
@ -0,0 +1,54 @@
 [
    {"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5},
    {"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7},
    {"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2},
    {"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1},
    {"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3},
    {"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05},
    {"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4},
    {"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7},
    {"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5},
    {"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04},
    {"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08},
    {"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2},
    {"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4},
    {"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2},
    {"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3},
    {"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4},
    {"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6},
    {"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3},
    {"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7},
    {"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8},
    {"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1},
    {"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4},
    {"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1},
    {"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3},
    {"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4},
    {"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6},
    {"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3},
    {"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8},
    {"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3},
    {"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1},
    {"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9},
    {"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3},
    {"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4},
    {"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3},
    {"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3},
    {"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2},
    {"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5},
    {"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4},
    {"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6},
    {"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5},
    {"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4},
    {"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2},
    {"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3},
    {"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2},
    {"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3},
    {"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2},
    {"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3},
    {"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5},
    {"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2},
    {"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6},
    {"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01},
    {"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01}
  ]
--- a/release/python/0.4.2/crankshaft/test/fixtures/neighbors_markov.json
+++ b/release/python/0.4.2/crankshaft/test/fixtures/neighbors_markov.json
--- a/release/python/0.4.2/crankshaft/test/helper.py
+++ b/release/python/0.4.2/crankshaft/test/helper.py
@ -0,0 +1,13 @@
 import unittest
 from mock_plpy import MockPlPy
 plpy = MockPlPy()
 import sys
 sys.modules['plpy'] = plpy
 import os
 def fixture_file(name):
    dir = os.path.dirname(os.path.realpath(__file__))
    return os.path.join(dir, 'fixtures', name)
--- a/release/python/0.4.2/crankshaft/test/mock_plpy.py
+++ b/release/python/0.4.2/crankshaft/test/mock_plpy.py
@ -0,0 +1,52 @@
 import re
 class MockCursor:
    def __init__(self, data):
        self.cursor_pos = 0
        self.data = data
    def fetch(self, batch_size):
        batch = self.data[self.cursor_pos : self.cursor_pos + batch_size]
        self.cursor_pos += batch_size
        return batch
 class MockPlPy:
    def __init__(self):
        self._reset()
    def _reset(self):
        self.infos = []
        self.notices = []
        self.debugs = []
        self.logs = []
        self.warnings = []
        self.errors = []
        self.fatals = []
        self.executes = []
        self.results = []
        self.prepares = []
        self.results = []
    def _define_result(self, query, result):
        pattern = re.compile(query, re.IGNORECASE | re.MULTILINE)
        self.results.append([pattern, result])
    def notice(self, msg):
        self.notices.append(msg)
    def debug(self, msg):
        self.notices.append(msg)
    def info(self, msg):
        self.infos.append(msg)
    def cursor(self, query):
        data = self.execute(query)
        return MockCursor(data)
    def execute(self, query): # TODO: additional arguments
       for result in self.results:
          if result[0].match(query):
            return result[1]
       return []
--- a/release/python/0.4.2/crankshaft/test/test_cluster_kmeans.py
+++ b/release/python/0.4.2/crankshaft/test/test_cluster_kmeans.py
@ -0,0 +1,38 @@
 import unittest
 import numpy as np
 # from mock_plpy import MockPlPy
 # plpy = MockPlPy()
 #
 # import sys
 # sys.modules['plpy'] = plpy
 from helper import plpy, fixture_file
 import numpy as np
 import crankshaft.clustering as cc
 import crankshaft.pysal_utils as pu
 from crankshaft import random_seeds
 import json
 class KMeansTest(unittest.TestCase):
    """Testing class for Moran's I functions"""
    def setUp(self):
        plpy._reset()
        self.cluster_data = json.loads(open(fixture_file('kmeans.json')).read())
        self.params = {"subquery": "select * from table",
                       "no_clusters": "10"
                       }
    def test_kmeans(self):
        data = self.cluster_data
        plpy._define_result('select' ,data)
        clusters = cc.kmeans('subquery', 2)
        labels  = [a[1] for a in clusters]
        c1 = [a for a in clusters if a[1]==0]
        c2 = [a for a in clusters if a[1]==1]
        self.assertEqual(len(np.unique(labels)),2)
        self.assertEqual(len(c1),20)
        self.assertEqual(len(c2),20)
--- a/release/python/0.4.2/crankshaft/test/test_clustering_moran.py
+++ b/release/python/0.4.2/crankshaft/test/test_clustering_moran.py
@ -0,0 +1,106 @@
 import unittest
 import numpy as np
 # from mock_plpy import MockPlPy
 # plpy = MockPlPy()
 #
 # import sys
 # sys.modules['plpy'] = plpy
 from helper import plpy, fixture_file
 import crankshaft.clustering as cc
 import crankshaft.pysal_utils as pu
 from crankshaft import random_seeds
 import json
 class MoranTest(unittest.TestCase):
    """Testing class for Moran's I functions"""
    def setUp(self):
        plpy._reset()
        self.params = {"id_col": "cartodb_id",
                       "attr1": "andy",
                       "attr2": "jay_z",
                       "subquery": "SELECT * FROM a_list",
                       "geom_col": "the_geom",
                       "num_ngbrs": 321}
        self.params_markov = {"id_col": "cartodb_id",
                              "time_cols": ["_2013_dec", "_2014_jan",
                                            "_2014_feb"],
                              "subquery": "SELECT * FROM a_list",
                              "geom_col": "the_geom",
                              "num_ngbrs": 321}
        self.neighbors_data = json.loads(
          open(fixture_file('neighbors.json')).read())
        self.moran_data = json.loads(
          open(fixture_file('moran.json')).read())
    def test_map_quads(self):
        """Test map_quads"""
        self.assertEqual(cc.map_quads(1), 'HH')
        self.assertEqual(cc.map_quads(2), 'LH')
        self.assertEqual(cc.map_quads(3), 'LL')
        self.assertEqual(cc.map_quads(4), 'HL')
        self.assertEqual(cc.map_quads(33), None)
        self.assertEqual(cc.map_quads('andy'), None)
    def test_quad_position(self):
        """Test lisa_sig_vals"""
        quads = np.array([1, 2, 3, 4], np.int)
        ans = np.array(['HH', 'LH', 'LL', 'HL'])
        test_ans = cc.quad_position(quads)
        self.assertTrue((test_ans == ans).all())
    def test_moran_local(self):
        """Test Moran's I local"""
        data = [{'id': d['id'],
                 'attr1': d['value'],
                 'neighbors': d['neighbors']} for d in self.neighbors_data]
        plpy._define_result('select', data)
        random_seeds.set_random_seeds(1234)
        result = cc.moran_local('subquery', 'value',
                                'knn', 5, 99, 'the_geom', 'cartodb_id')
        result = [(row[0], row[1]) for row in result]
        zipped_values = zip(result, self.moran_data)
        for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values:
            self.assertAlmostEqual(res_val, exp_val)
            self.assertEqual(res_quad, exp_quad)
    def test_moran_local_rate(self):
        """Test Moran's I rate"""
        data = [{'id': d['id'],
                 'attr1': d['value'],
                 'attr2': 1,
                 'neighbors': d['neighbors']} for d in self.neighbors_data]
        plpy._define_result('select', data)
        random_seeds.set_random_seeds(1234)
        result = cc.moran_local_rate('subquery', 'numerator', 'denominator',
                                     'knn', 5, 99, 'the_geom', 'cartodb_id')
        result = [(row[0], row[1]) for row in result]
        zipped_values = zip(result, self.moran_data)
        for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values:
            self.assertAlmostEqual(res_val, exp_val)
    def test_moran(self):
        """Test Moran's I global"""
        data = [{'id': d['id'],
                 'attr1': d['value'],
                 'neighbors': d['neighbors']} for d in self.neighbors_data]
        plpy._define_result('select', data)
        random_seeds.set_random_seeds(1235)
        result = cc.moran('table', 'value',
                          'knn', 5, 99, 'the_geom', 'cartodb_id')
        result_moran = result[0][0]
        expected_moran = np.array([row[0] for row in self.moran_data]).mean()
        self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)
--- a/release/python/0.4.2/crankshaft/test/test_pysal_utils.py
+++ b/release/python/0.4.2/crankshaft/test/test_pysal_utils.py
@ -0,0 +1,142 @@
 import unittest
 import crankshaft.pysal_utils as pu
 from crankshaft import random_seeds
 class PysalUtilsTest(unittest.TestCase):
    """Testing class for utility functions related to PySAL integrations"""
    def setUp(self):
        self.params = {"id_col": "cartodb_id",
                       "attr1": "andy",
                       "attr2": "jay_z",
                       "subquery": "SELECT * FROM a_list",
                       "geom_col": "the_geom",
                       "num_ngbrs": 321}
        self.params_array = {"id_col": "cartodb_id",
                             "time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"],
                             "subquery": "SELECT * FROM a_list",
                             "geom_col": "the_geom",
                             "num_ngbrs": 321}
    def test_query_attr_select(self):
        """Test query_attr_select"""
        ans = "i.\"andy\"::numeric As attr1, " \
              "i.\"jay_z\"::numeric As attr2, "
        ans_array = "i.\"_2013_dec\"::numeric As attr1, " \
                    "i.\"_2014_jan\"::numeric As attr2, " \
                    "i.\"_2014_feb\"::numeric As attr3, "
        self.assertEqual(pu.query_attr_select(self.params), ans)
        self.assertEqual(pu.query_attr_select(self.params_array), ans_array)
    def test_query_attr_where(self):
        """Test pu.query_attr_where"""
        ans = "idx_replace.\"andy\" IS NOT NULL AND " \
              "idx_replace.\"jay_z\" IS NOT NULL AND " \
              "idx_replace.\"jay_z\" <> 0"
        ans_array = "idx_replace.\"_2013_dec\" IS NOT NULL AND " \
                    "idx_replace.\"_2014_jan\" IS NOT NULL AND " \
                    "idx_replace.\"_2014_feb\" IS NOT NULL"
        self.assertEqual(pu.query_attr_where(self.params), ans)
        self.assertEqual(pu.query_attr_where(self.params_array), ans_array)
    def test_knn(self):
        """Test knn neighbors constructor"""
        ans = "SELECT i.\"cartodb_id\" As id, " \
                     "i.\"andy\"::numeric As attr1, " \
                     "i.\"jay_z\"::numeric As attr2, " \
                     "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
                                   "FROM (SELECT * FROM a_list) As j " \
                                   "WHERE " \
                                    "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
                                    "j.\"andy\" IS NOT NULL AND " \
                                    "j.\"jay_z\" IS NOT NULL AND " \
                                    "j.\"jay_z\" <> 0 " \
                                   "ORDER BY " \
                                    "j.\"the_geom\" <-> i.\"the_geom\" ASC " \
                      "LIMIT 321)) As neighbors " \
              "FROM (SELECT * FROM a_list) As i " \
              "WHERE i.\"andy\" IS NOT NULL AND " \
                    "i.\"jay_z\" IS NOT NULL AND " \
                    "i.\"jay_z\" <> 0 " \
              "ORDER BY i.\"cartodb_id\" ASC;"
        ans_array = "SELECT i.\"cartodb_id\" As id, " \
              "i.\"_2013_dec\"::numeric As attr1, " \
              "i.\"_2014_jan\"::numeric As attr2, " \
              "i.\"_2014_feb\"::numeric As attr3, " \
              "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
                            "FROM (SELECT * FROM a_list) As j " \
                            "WHERE i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
                                  "j.\"_2013_dec\" IS NOT NULL AND " \
                                  "j.\"_2014_jan\" IS NOT NULL AND " \
                                  "j.\"_2014_feb\" IS NOT NULL " \
                            "ORDER BY j.\"the_geom\" <-> i.\"the_geom\" ASC " \
                            "LIMIT 321)) As neighbors " \
              "FROM (SELECT * FROM a_list) As i " \
              "WHERE i.\"_2013_dec\" IS NOT NULL AND " \
                    "i.\"_2014_jan\" IS NOT NULL AND " \
                    "i.\"_2014_feb\" IS NOT NULL "\
              "ORDER BY i.\"cartodb_id\" ASC;"
        self.assertEqual(pu.knn(self.params), ans)
        self.assertEqual(pu.knn(self.params_array), ans_array)
    def test_queen(self):
        """Test queen neighbors constructor"""
        ans = "SELECT i.\"cartodb_id\" As id, " \
                     "i.\"andy\"::numeric As attr1, " \
                     "i.\"jay_z\"::numeric As attr2, " \
                     "(SELECT ARRAY(SELECT j.\"cartodb_id\" " \
                                   "FROM (SELECT * FROM a_list) As j " \
                                   "WHERE " \
                                   "i.\"cartodb_id\" <> j.\"cartodb_id\" AND " \
                                   "ST_Touches(i.\"the_geom\", " \
                                              "j.\"the_geom\") AND " \
                                   "j.\"andy\" IS NOT NULL AND " \
                                   "j.\"jay_z\" IS NOT NULL AND " \
                                   "j.\"jay_z\" <> 0)" \
                                  ") As neighbors " \
              "FROM (SELECT * FROM a_list) As i " \
              "WHERE i.\"andy\" IS NOT NULL AND " \
                    "i.\"jay_z\" IS NOT NULL AND " \
                    "i.\"jay_z\" <> 0 " \
              "ORDER BY i.\"cartodb_id\" ASC;"
        self.assertEqual(pu.queen(self.params), ans)
    def test_construct_neighbor_query(self):
        """Test construct_neighbor_query"""
        # Compare to raw knn query
        self.assertEqual(pu.construct_neighbor_query('knn', self.params),
                         pu.knn(self.params))
    def test_get_attributes(self):
        """Test get_attributes"""
        ## need to add tests
        self.assertEqual(True, True)
    def test_get_weight(self):
        """Test get_weight"""
        self.assertEqual(True, True)
    def test_empty_zipped_array(self):
        """Test empty_zipped_array"""
        ans2 = [(None, None)]
        ans4 = [(None, None, None, None)]
        self.assertEqual(pu.empty_zipped_array(2), ans2)
        self.assertEqual(pu.empty_zipped_array(4), ans4)
--- a/release/python/0.4.2/crankshaft/test/test_segmentation.py
+++ b/release/python/0.4.2/crankshaft/test/test_segmentation.py
@ -0,0 +1,64 @@
 import unittest
 import numpy as np
 from helper import plpy, fixture_file
 import crankshaft.segmentation as segmentation
 import json
 class SegmentationTest(unittest.TestCase):
    """Testing class for Moran's I functions"""
    def setUp(self):
        plpy._reset()
    def generate_random_data(self,n_samples,random_state,  row_type=False):
        x1 = random_state.uniform(size=n_samples)
        x2 = random_state.uniform(size=n_samples)
        x3 = random_state.randint(0, 4, size=n_samples)
        y = x1+x2*x2+x3
        cartodb_id  = range(len(x1))
        if row_type:
            return [ {'features': vals} for vals in zip(x1,x2,x3)], y
        else:
            return  [dict( zip(['x1','x2','x3','target', 'cartodb_id'],[x1,x2,x3,y,cartodb_id]))]
    def test_replace_nan_with_mean(self):
        test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan])
    def test_create_and_predict_segment(self):
        n_samples = 1000
        random_state_train = np.random.RandomState(13)
        random_state_test = np.random.RandomState(134)
        training_data = self.generate_random_data(n_samples, random_state_train)
        test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True)
        ids =  [{'cartodb_ids': range(len(test_data))}]
        rows =  [{'x1': 0,'x2':0,'x3':0,'y':0,'cartodb_id':0}]
        plpy._define_result('select \* from  \(select \* from training\) a  limit 1',rows)
        plpy._define_result('.*from \(select \* from training\) as a' ,training_data)
        plpy._define_result('select array_agg\(cartodb\_id order by cartodb\_id\) as cartodb_ids from \(.*\) a',ids)
        plpy._define_result('.*select \* from test.*' ,test_data)
        model_parameters =  {'n_estimators': 1200,
                             'max_depth': 3,
                             'subsample' : 0.5,
                             'learning_rate': 0.01,
                             'min_samples_leaf': 1}
        result = segmentation.create_and_predict_segment(
                'select * from training',
                'target',
                'select * from test',
                model_parameters)
        prediction = [r[1] for r in result]
        accuracy =np.sqrt(np.mean( np.square( np.array(prediction) - np.array(test_y))))
        self.assertEqual(len(result),len(test_data))
        self.assertTrue( result[0][2] < 0.01)
        self.assertTrue( accuracy < 0.5*np.mean(test_y)  )
--- a/release/python/0.4.2/crankshaft/test/test_space_time_dynamics.py
+++ b/release/python/0.4.2/crankshaft/test/test_space_time_dynamics.py
@ -0,0 +1,324 @@
 import unittest
 import numpy as np
 import unittest
 # from mock_plpy import MockPlPy
 # plpy = MockPlPy()
 #
 # import sys
 # sys.modules['plpy'] = plpy
 from helper import plpy, fixture_file
 import crankshaft.space_time_dynamics as std
 from crankshaft import random_seeds
 import json
 class SpaceTimeTests(unittest.TestCase):
    """Testing class for Markov Functions."""
    def setUp(self):
        plpy._reset()
        self.params = {"id_col": "cartodb_id",
                       "time_cols": ['dec_2013', 'jan_2014', 'feb_2014'],
                       "subquery": "SELECT * FROM a_list",
                       "geom_col": "the_geom",
                       "num_ngbrs": 321}
        self.neighbors_data = json.loads(open(fixture_file('neighbors_markov.json')).read())
        self.markov_data = json.loads(open(fixture_file('markov.json')).read())
        self.time_data = np.array([i * np.ones(10, dtype=float) for i in range(10)]).T
        self.transition_matrix = np.array([
                [[ 0.96341463, 0.0304878 , 0.00609756, 0.        , 0.        ],
                 [ 0.06040268, 0.83221477, 0.10738255, 0.        , 0.        ],
                 [ 0.        , 0.14      , 0.74      , 0.12      , 0.        ],
                 [ 0.        , 0.03571429, 0.32142857, 0.57142857, 0.07142857],
                 [ 0.        , 0.        , 0.        , 0.16666667, 0.83333333]],
                [[ 0.79831933, 0.16806723, 0.03361345, 0.        , 0.        ],
                 [ 0.0754717 , 0.88207547, 0.04245283, 0.        , 0.        ],
                 [ 0.00537634, 0.06989247, 0.8655914 , 0.05913978, 0.        ],
                 [ 0.        , 0.        , 0.06372549, 0.90196078, 0.03431373],
                 [ 0.        , 0.        , 0.        , 0.19444444, 0.80555556]],
                [[ 0.84693878, 0.15306122, 0.        , 0.        , 0.        ],
                 [ 0.08133971, 0.78947368, 0.1291866 , 0.        , 0.        ],
                 [ 0.00518135, 0.0984456 , 0.79274611, 0.0984456 , 0.00518135],
                 [ 0.        , 0.        , 0.09411765, 0.87058824, 0.03529412],
                 [ 0.        , 0.        , 0.        , 0.10204082, 0.89795918]],
                [[ 0.8852459 , 0.09836066, 0.        , 0.01639344, 0.        ],
                 [ 0.03875969, 0.81395349, 0.13953488, 0.        , 0.00775194],
                 [ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ],
                 [ 0.        , 0.02339181, 0.12865497, 0.75438596, 0.09356725],
                 [ 0.        , 0.        , 0.        , 0.09661836, 0.90338164]],
                [[ 0.33333333, 0.66666667, 0.        , 0.        , 0.        ],
                 [ 0.0483871 , 0.77419355, 0.16129032, 0.01612903, 0.        ],
                 [ 0.01149425, 0.16091954, 0.74712644, 0.08045977, 0.        ],
                 [ 0.        , 0.01036269, 0.06217617, 0.89637306, 0.03108808],
                 [ 0.        , 0.        , 0.        , 0.02352941, 0.97647059]]]
                 )
    def test_spatial_markov(self):
        """Test Spatial Markov."""
        data = [ { 'id': d['id'],
                   'attr1': d['y1995'],
                   'attr2': d['y1996'],
                   'attr3': d['y1997'],
                   'attr4': d['y1998'],
                   'attr5': d['y1999'],
                   'attr6': d['y2000'],
                   'attr7': d['y2001'],
                   'attr8': d['y2002'],
                   'attr9': d['y2003'],
                   'attr10': d['y2004'],
                   'attr11': d['y2005'],
                   'attr12': d['y2006'],
                   'attr13': d['y2007'],
                   'attr14': d['y2008'],
                   'attr15': d['y2009'],
                   'neighbors': d['neighbors'] } for d in self.neighbors_data]
        print(str(data[0]))
        plpy._define_result('select', data)
        random_seeds.set_random_seeds(1234)
        result = std.spatial_markov_trend('subquery', ['y1995', 'y1996', 'y1997', 'y1998', 'y1999', 'y2000', 'y2001', 'y2002', 'y2003', 'y2004', 'y2005', 'y2006', 'y2007', 'y2008', 'y2009'], 5, 'knn', 5, 0, 'the_geom', 'cartodb_id')
        self.assertTrue(result != None)
        result = [(row[0], row[1], row[2], row[3], row[4]) for row in result]
        print result[0]
        expected = self.markov_data
        for ([res_trend, res_up, res_down, res_vol, res_id],
             [exp_trend, exp_up, exp_down, exp_vol, exp_id]
             ) in zip(result, expected):
            self.assertAlmostEqual(res_trend, exp_trend)
    def test_get_time_data(self):
        """Test get_time_data"""
        data = [ { 'attr1': d['y1995'],
                   'attr2': d['y1996'],
                   'attr3': d['y1997'],
                   'attr4': d['y1998'],
                   'attr5': d['y1999'],
                   'attr6': d['y2000'],
                   'attr7': d['y2001'],
                   'attr8': d['y2002'],
                   'attr9': d['y2003'],
                   'attr10': d['y2004'],
                   'attr11': d['y2005'],
                   'attr12': d['y2006'],
                   'attr13': d['y2007'],
                   'attr14': d['y2008'],
                   'attr15': d['y2009'] } for d in self.neighbors_data]
        result = std.get_time_data(data, ['y1995', 'y1996', 'y1997', 'y1998', 'y1999', 'y2000', 'y2001', 'y2002', 'y2003', 'y2004', 'y2005', 'y2006', 'y2007', 'y2008', 'y2009'])
        ## expected was prepared from PySAL example:
        ### f = ps.open(ps.examples.get_path("usjoin.csv"))
        ### pci = np.array([f.by_col[str(y)] for y in range(1995, 2010)]).transpose()
        ### rpci = pci / (pci.mean(axis = 0))
        expected = np.array([[ 0.87654416, 0.863147, 0.85637567, 0.84811668, 0.8446154,  0.83271652
        ,  0.83786314, 0.85012593, 0.85509656, 0.86416612, 0.87119375, 0.86302631
        ,  0.86148267, 0.86252252, 0.86746356],
         [ 0.9188951,  0.91757931, 0.92333258, 0.92517289, 0.92552388, 0.90746978
        ,  0.89830489, 0.89431991, 0.88924794, 0.89815176, 0.91832091, 0.91706054
        ,  0.90139505, 0.87897455, 0.86216858],
         [ 0.82591007, 0.82548596, 0.81989793, 0.81503235, 0.81731522, 0.78964559
        ,  0.80584442, 0.8084998,  0.82258551, 0.82668196, 0.82373724, 0.81814804
        ,  0.83675961, 0.83574199, 0.84647177],
         [ 1.09088176, 1.08537689, 1.08456418, 1.08415404, 1.09898841, 1.14506948
        ,  1.12151133, 1.11160697, 1.10888621, 1.11399806, 1.12168029, 1.13164797
        ,  1.12958508, 1.11371818, 1.09936775],
         [ 1.10731446, 1.11373944, 1.13283638, 1.14472559, 1.15910025, 1.16898201
        ,  1.17212488, 1.14752303, 1.11843284, 1.11024964, 1.11943471, 1.11736468
        ,  1.10863242, 1.09642516, 1.07762337],
         [ 1.42269757, 1.42118434, 1.44273502, 1.43577571, 1.44400684, 1.44184737
        ,  1.44782832, 1.41978227, 1.39092208, 1.4059372,  1.40788646, 1.44052766
        ,  1.45241216, 1.43306098, 1.4174431 ],
         [ 1.13073885, 1.13110513, 1.11074708, 1.13364636, 1.13088149, 1.10888138
        ,  1.11856629, 1.13062931, 1.11944984, 1.12446239, 1.11671008, 1.10880034
        ,  1.08401709, 1.06959206, 1.07875225],
         [ 1.04706124, 1.04516831, 1.04253372, 1.03239987, 1.02072545, 0.99854316
        ,  0.9880258,  0.99669587, 0.99327676, 1.01400905, 1.03176742, 1.040511
        ,  1.01749645, 0.9936394,  0.98279746],
         [ 0.98996986, 1.00143564, 0.99491,  1.00188408, 1.00455845, 0.99127006
        ,  0.97925917, 0.9683482,  0.95335147, 0.93694787, 0.94308213, 0.92232874
        ,  0.91284091, 0.89689833, 0.88928858],
         [ 0.87418391, 0.86416601, 0.84425695, 0.8404494,  0.83903044, 0.8578708
        ,  0.86036185, 0.86107306, 0.8500772,  0.86981998, 0.86837929, 0.87204141
        ,  0.86633032, 0.84946077, 0.83287146],
         [ 1.14196118, 1.14660262, 1.14892712, 1.14909594, 1.14436624, 1.14450183
        ,  1.12349752, 1.12596664, 1.12213996, 1.1119989,  1.10257792, 1.10491258
        ,  1.11059842, 1.10509795, 1.10020097],
         [ 0.97282463, 0.96700147, 0.96252588, 0.9653878,  0.96057687, 0.95831051
        ,  0.94480909, 0.94804195, 0.95430286, 0.94103989, 0.92122519, 0.91010201
        ,  0.89280392, 0.89298243, 0.89165385],
         [ 0.94325468, 0.96436902, 0.96455242, 0.95243009, 0.94117647, 0.9480927
        ,  0.93539182, 0.95388718, 0.94597005, 0.96918424, 0.94781281, 0.93466815
        ,  0.94281559, 0.96520315, 0.96715441],
         [ 0.97478408, 0.98169225, 0.98712809, 0.98474769, 0.98559897, 0.98687073
        ,  0.99237486, 0.98209969, 0.9877653,  0.97399471, 0.96910087, 0.98416665
        ,  0.98423613, 0.99823861, 0.99545704],
         [ 0.85570269, 0.85575915, 0.85986132, 0.85693406, 0.8538012,  0.86191535
        ,  0.84981451, 0.85472102, 0.84564835, 0.83998883, 0.83478547, 0.82803648
        ,  0.8198736,  0.82265395, 0.8399404 ],
         [ 0.87022047, 0.85996258, 0.85961813, 0.85689572, 0.83947136, 0.82785597
        ,  0.86008789, 0.86776298, 0.86720209, 0.8676334,  0.89179317, 0.94202108
        ,  0.9422231,  0.93902708, 0.94479184],
         [ 0.90134907, 0.90407738, 0.90403991, 0.90201769, 0.90399238, 0.90906632
        ,  0.92693339, 0.93695966, 0.94242697, 0.94338265, 0.91981796, 0.91108804
        ,  0.90543476, 0.91737138, 0.94793657],
         [ 1.1977611,  1.18222564, 1.18439158, 1.18267865, 1.19286723, 1.20172869
        ,  1.21328691, 1.22624778, 1.22397075, 1.23857042, 1.24419893, 1.23929384
        ,  1.23418676, 1.23626739, 1.26754398],
         [ 1.24919678, 1.25754773, 1.26991161, 1.28020651, 1.30625667, 1.34790023
        ,  1.34399863, 1.32575181, 1.30795492, 1.30544841, 1.30303302, 1.32107766
        ,  1.32936244, 1.33001241, 1.33288462],
         [ 1.06768004, 1.03799276, 1.03637303, 1.02768449, 1.03296093, 1.05059016
        ,  1.03405057, 1.02747623, 1.03162734, 0.9961416,  0.97356208, 0.94241549
        ,  0.92754547, 0.92549227, 0.92138102],
         [ 1.09475614, 1.11526796, 1.11654299, 1.13103948, 1.13143264, 1.13889622
        ,  1.12442212, 1.13367018, 1.13982256, 1.14029944, 1.11979401, 1.10905389
        ,  1.10577769, 1.11166825, 1.09985155],
         [ 0.76530058, 0.76612841, 0.76542451, 0.76722683, 0.76014284, 0.74480073
        ,  0.76098396, 0.76156903, 0.76651952, 0.76533288, 0.78205934, 0.76842416
        ,  0.77487118, 0.77768683, 0.78801192],
         [ 0.98391336, 0.98075816, 0.98295341, 0.97386015, 0.96913803, 0.97370819
        ,  0.96419154, 0.97209861, 0.97441313, 0.96356162, 0.94745352, 0.93965462
        ,  0.93069645, 0.94020973, 0.94358232],
         [ 0.83561828, 0.82298088, 0.81738502, 0.81748588, 0.80904801, 0.80071489
        ,  0.83358256, 0.83451613, 0.85175032, 0.85954307, 0.86790024, 0.87170334
        ,  0.87863799, 0.87497981, 0.87888675],
         [ 0.98845573, 1.02092428, 0.99665283, 0.99141823, 0.99386619, 0.98733195
        ,  0.99644997, 0.99669587, 1.02559097, 1.01116651, 0.99988024, 0.97906749
        ,  0.99323123, 1.00204939, 0.99602148],
         [ 1.14930913, 1.15241949, 1.14300962, 1.14265542, 1.13984683, 1.08312397
        ,  1.05192626, 1.04230892, 1.05577278, 1.08569751, 1.12443486, 1.08891079
        ,  1.08603695, 1.05997314, 1.02160943],
         [ 1.11368269, 1.1057147,  1.11893431, 1.13778669, 1.1432272,  1.18257029
        ,  1.16226243, 1.16009196, 1.14467789, 1.14820235, 1.12386598, 1.12680236
        ,  1.12357937, 1.1159258,  1.12570828],
         [ 1.30379431, 1.30752186, 1.31206366, 1.31532267, 1.30625667, 1.31210239
        ,  1.29989156, 1.29203193, 1.27183516, 1.26830786, 1.2617743,  1.28656675
        ,  1.29734097, 1.29390205, 1.29345446],
         [ 0.83953719, 0.82701448, 0.82006005, 0.81188876, 0.80294864, 0.78772975
        ,  0.82848011, 0.8259679,  0.82435705, 0.83108634, 0.84373784, 0.83891093
        ,  0.84349247, 0.85637272, 0.86539395],
         [ 1.23450087, 1.2426022,  1.23537935, 1.23581293, 1.24522626, 1.2256767
        ,  1.21126648, 1.19377804, 1.18355337, 1.19674434, 1.21536573, 1.23653297
        ,  1.27962009, 1.27968392, 1.25907738],
         [ 0.9769662,  0.97400719, 0.98035944, 0.97581531, 0.95543282, 0.96480308
        ,  0.94686376, 0.93679073, 0.92540049, 0.92988835, 0.93442917, 0.92100464
        ,  0.91475304, 0.90249622, 0.9021363 ],
         [ 0.84986886, 0.8986851,  0.84295997, 0.87280534, 0.85659368, 0.88937573
        ,  0.894401, 0.90448993, 0.95495898, 0.92698333, 0.94745352, 0.92562488
        ,  0.96635366, 1.02520312, 1.0394296 ],
         [ 1.01922808, 1.00258203, 1.00974428, 1.00303417, 0.99765073, 1.00759019
        ,  0.99192968, 0.99747298, 0.99550759, 0.97583768, 0.9610168,  0.94779638
        ,  0.93759089, 0.93353431, 0.94121705],
         [ 0.86367411, 0.85558932, 0.85544346, 0.85103025, 0.84336613, 0.83434854
        ,  0.85813595, 0.84667961, 0.84374558, 0.85951183, 0.87194227, 0.89455097
        ,  0.88283929, 0.90349491, 0.90600675],
         [ 1.00947534, 1.00411055, 1.00698819, 0.99513687, 0.99291086, 1.00581626
        ,  0.98850522, 0.99291168, 0.98983209, 0.97511924, 0.96134615, 0.96382634
        ,  0.95011401, 0.9434686,  0.94637765],
         [ 1.05712571, 1.05459419, 1.05753012, 1.04880786, 1.05103857, 1.04800023
        ,  1.03024941, 1.04200483, 1.0402554,  1.03296979, 1.02191682, 1.02476275
        ,  1.02347523, 1.02517684, 1.04359571],
         [ 1.07084189, 1.06669497, 1.07937623, 1.07387988, 1.0794043,  1.0531801
        ,  1.07452771, 1.09383478, 1.1052447,  1.10322136, 1.09167939, 1.08772756
        ,  1.08859544, 1.09177338, 1.1096083 ],
         [ 0.86719222, 0.86628896, 0.86675156, 0.86425632, 0.86511809, 0.86287327
        ,  0.85169796, 0.85411285, 0.84886336, 0.84517414, 0.84843858, 0.84488343
        ,  0.83374329, 0.82812044, 0.82878599],
         [ 0.88389211, 0.92288667, 0.90282398, 0.91229186, 0.92023286, 0.92652175
        ,  0.94278865, 0.93682452, 0.98655146, 0.992237, 0.9798497,  0.93869677
        ,  0.96947771, 1.00362626, 0.98102351],
         [ 0.97082064, 0.95320233, 0.94534081, 0.94215593, 0.93967,  0.93092109
        ,  0.92662519, 0.93412152, 0.93501274, 0.92879506, 0.92110542, 0.91035556
        ,  0.90430364, 0.89994694, 0.90073864],
         [ 0.95861858, 0.95774543, 0.98254811, 0.98919472, 0.98684824, 0.98882205
        ,  0.97662234, 0.95601578, 0.94905385, 0.94934888, 0.97152609, 0.97163004
        ,  0.9700702,  0.97158948, 0.95884908],
         [ 0.83980439, 0.84726737, 0.85747,  0.85467221, 0.8556751,  0.84818516
        ,  0.85265681, 0.84502402, 0.82645665, 0.81743586, 0.83550406, 0.83338919
        ,  0.83511679, 0.82136617, 0.80921874],
         [ 0.95118156, 0.9466212,  0.94688098, 0.9508583,  0.9512441,  0.95440787
        ,  0.96364363, 0.96804412, 0.97136214, 0.97583768, 0.95571724, 0.96895368
        ,  0.97001634, 0.97082733, 0.98782366],
         [ 1.08910044, 1.08248968, 1.08492895, 1.08656923, 1.09454249, 1.10558188
        ,  1.1214086,  1.12292577, 1.13021031, 1.13342735, 1.14686068, 1.14502975
        ,  1.14474747, 1.14084037, 1.16142926],
         [ 1.06336033, 1.07365823, 1.08691496, 1.09764846, 1.11669863, 1.11856702
        ,  1.09764283, 1.08815849, 1.08044313, 1.09278827, 1.07003204, 1.08398066
        ,  1.09831768, 1.09298232, 1.09176125],
         [ 0.79772065, 0.78829196, 0.78581151, 0.77615922, 0.77035744, 0.77751194
        ,  0.79902974, 0.81437881, 0.80788828, 0.79603865, 0.78966436, 0.79949807
        ,  0.80172182, 0.82168155, 0.85587911],
         [ 1.0052447,  1.00007696, 1.00475899, 1.00613942, 1.00639561, 1.00162979
        ,  0.99860739, 1.00814981, 1.00574316, 0.99030032, 0.97682565, 0.97292596
        ,  0.96519561, 0.96173403, 0.95890284],
         [ 0.95808419, 0.9382568,  0.9654441,  0.95561201, 0.96987289, 0.96608031
        ,  0.99727185, 1.00781194, 1.03484236, 1.05333619, 1.0983263,  1.1704974
        ,  1.17025154, 1.18730553, 1.14242645]])
        self.assertTrue(np.allclose(result, expected))
        self.assertTrue(type(result) == type(expected))
        self.assertTrue(result.shape == expected.shape)
    def test_rebin_data(self):
        """Test rebin_data"""
        ## sample in double the time (even case since 10 % 2 = 0):
        ##   (0+1)/2, (2+3)/2, (4+5)/2, (6+7)/2, (8+9)/2
        ## = 0.5,     2.5,     4.5,     6.5,     8.5
        ans_even = np.array([(i + 0.5) * np.ones(10, dtype=float)
                             for i in range(0, 10, 2)]).T
        self.assertTrue(np.array_equal(std.rebin_data(self.time_data, 2), ans_even))
        ## sample in triple the time (uneven since 10 % 3 = 1):
        ##   (0+1+2)/3, (3+4+5)/3, (6+7+8)/3, (9)/1
        ## = 1,         4,         7,         9
        ans_odd  = np.array([i * np.ones(10, dtype=float)
                             for i in (1, 4, 7, 9)]).T
        self.assertTrue(np.array_equal(std.rebin_data(self.time_data, 3), ans_odd))
    def test_get_prob_dist(self):
        """Test get_prob_dist"""
        lag_indices = np.array([1, 2, 3, 4])
        unit_indices = np.array([1, 3, 2, 4])
        answer = np.array([
            [ 0.0754717 , 0.88207547, 0.04245283, 0.        , 0.        ],
            [ 0.        , 0.        , 0.09411765, 0.87058824, 0.03529412],
            [ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ],
            [ 0.        , 0.        , 0.        , 0.02352941, 0.97647059]
        ])
        result = std.get_prob_dist(self.transition_matrix, lag_indices, unit_indices)
        self.assertTrue(np.array_equal(result, answer))
    def test_get_prob_stats(self):
        """Test get_prob_stats"""
        probs = np.array([
            [ 0.0754717 , 0.88207547, 0.04245283, 0.        , 0.        ],
            [ 0.        , 0.        , 0.09411765, 0.87058824, 0.03529412],
            [ 0.0049505 , 0.09405941, 0.77722772, 0.11881188, 0.0049505 ],
            [ 0.        , 0.        , 0.        , 0.02352941, 0.97647059]
        ])
        unit_indices = np.array([1, 3, 2, 4])
        answer_up = np.array([0.04245283, 0.03529412, 0.12376238, 0.])
        answer_down = np.array([0.0754717, 0.09411765, 0.0990099, 0.02352941])
        answer_trend = np.array([-0.03301887 / 0.88207547, -0.05882353 / 0.87058824,  0.02475248 / 0.77722772, -0.02352941 / 0.97647059])
        answer_volatility = np.array([ 0.34221495,  0.33705421,  0.29226542,  0.38834223])
        result = std.get_prob_stats(probs, unit_indices)
        result_up = result[0]
        result_down = result[1]
        result_trend = result[2]
        result_volatility = result[3]
        self.assertTrue(np.allclose(result_up, answer_up))
        self.assertTrue(np.allclose(result_down, answer_down))
        self.assertTrue(np.allclose(result_trend, answer_trend))
        self.assertTrue(np.allclose(result_volatility, answer_volatility))
--- a/src/pg/crankshaft.control
+++ b/src/pg/crankshaft.control
@ -1,5 +1,5 @@
 comment = 'CartoDB Spatial Analysis extension'
-default_version = '0.4.1'
+default_version = '0.4.2'
 requires = 'plpythonu, postgis'
 superuser = true
 schema = cdb_crankshaft
		`@ -0,0 +1,2 @@`
							`"""Import all functions for pysal_utils"""`
							`from crankshaft.pysal_utils.pysal_utils import *`
		`@ -0,0 +1,2 @@`
							`"""Import all functions from clustering libraries."""`
							`from markov import *`
		`@ -0,0 +1 @@`
							[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]
		`@ -0,0 +1 @@`
							[[0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 0], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 1], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 2], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 3], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 4], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 5], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 6], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 7], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 8], [0.19047619047619049, 0.16, 0.0, 0.32594478059941379, 9], [-0.23529411764705882, 0.0, 0.19047619047619047, 0.31356338348865387, 10], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 11], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 12], [0.027777777777777783, 0.11111111111111112, 0.088888888888888892, 0.30339641183779581, 13], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 14], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 15], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 16], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 17], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 18], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 19], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 20], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 21], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 22], [-0.16666666666666663, 0.18181818181818182, 0.27272727272727271, 0.20246415864836445, 23], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 24], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 25], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 26], [-0.043478260869565216, 0.0, 0.041666666666666664, 0.37950991789118999, 27], [0.22222222222222221, 0.18181818181818182, 0.0, 0.31701083225750354, 28], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 29], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 30], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 31], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 32], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 33], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 34], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 35], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 36], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 37], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 38], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 39], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 40], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 41], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 42], [0.0, 0.0, 0.0, 0.40000000000000002, 43], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 44], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 45], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 46], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 47]]