Release 0.7.0

2018-02-23 15:45:12 +01:00 · 2018-02-23 15:45:12 +01:00 · a1198627b5
commit a1198627b5
parent 58e238b9a5
60 changed files with 15909 additions and 3 deletions
--- a/NEWS.md
+++ b/NEWS.md
@ -1,5 +1,14 @@
 0.7.0 (2018-mm-dd)
 ------------------
 * Updated Moran and Markov documentation [#179](https://github.com/CartoDB/crankshaft/pull/179) [#155](https://github.com/CartoDB/crankshaft/pull/155)
 * Updated examples in documentation [#193](https://github.com/CartoDB/crankshaft/pull/193)
 * Better error management for empty values [#157](https://github.com/CartoDB/crankshaft/pull/157)
 * Added nonspatial kmeans with class framework [#150](https://github.com/CartoDB/crankshaft/pull/150)
 * Added multipolygons and geometry collections support to PIA analyssis [#165](https://github.com/CartoDB/crankshaft/pull/165)
 0.6.1 (2017-11-23)
-* Add VOLATILITY and PARALLEL categories to PostgreSQL functions
+------------------
 * Added VOLATILITY and PARALLEL categories to PostgreSQL functions [#183](https://github.com/CartoDB/crankshaft/pull/183)
 0.6.0 (2017-11-08)
 ------------------
--- a/release/crankshaft--0.6.1--0.7.0.sql
+++ b/release/crankshaft--0.6.1--0.7.0.sql
--- a/release/crankshaft--0.7.0.sql
+++ b/release/crankshaft--0.7.0.sql
--- a/release/crankshaft.control
+++ b/release/crankshaft.control
@ -1,5 +1,5 @@
 comment = 'CartoDB Spatial Analysis extension'
-default_version = '0.6.1'
+default_version = '0.7.0'
 requires = 'plpythonu, postgis'
 superuser = true
 schema = cdb_crankshaft
--- a/release/python/0.7.0/crankshaft/crankshaft/init.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/init.py
@ -0,0 +1,7 @@
 """Import all modules"""
 import crankshaft.random_seeds
 import crankshaft.clustering
 import crankshaft.space_time_dynamics
 import crankshaft.segmentation
 import crankshaft.regression
 import analysis_data_provider
--- a/release/python/0.7.0/crankshaft/crankshaft/analysis_data_provider.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/analysis_data_provider.py
@ -0,0 +1,98 @@
 """class for fetching data"""
 import plpy
 import pysal_utils as pu
 NULL_VALUE_ERROR = ('No usable data passed to analysis. Check your input rows '
                    'for null values and fill in appropriately.')
 def verify_data(func):
    """decorator to verify data result before returning to algorithm"""
    def wrapper(*args, **kwargs):
        """Error checking"""
        try:
            data = func(*args, **kwargs)
            if not data:
                plpy.error(NULL_VALUE_ERROR)
            else:
                return data
        except Exception as err:
            plpy.error('Analysis failed: {}'.format(err))
        return []
    return wrapper
 class AnalysisDataProvider(object):
    @verify_data
    def get_getis(self, w_type, params):
        """fetch data for getis ord's g"""
        query = pu.construct_neighbor_query(w_type, params)
        return plpy.execute(query)
    @verify_data
    def get_markov(self, w_type, params):
        """fetch data for spatial markov"""
        query = pu.construct_neighbor_query(w_type, params)
        return plpy.execute(query)
    @verify_data
    def get_moran(self, w_type, params):
        """fetch data for moran's i analyses"""
        query = pu.construct_neighbor_query(w_type, params)
        return plpy.execute(query)
    @verify_data
    def get_nonspatial_kmeans(self, params):
        """
            Fetch data for non-spatial k-means.
            Inputs - a dict (params) with the following keys:
                colnames: a (text) list of column names (e.g.,
                          `['andy', 'cookie']`)
                id_col: the name of the id column (e.g., `'cartodb_id'`)
                subquery: the subquery for exposing the data (e.g.,
                          SELECT * FROM favorite_things)
            Output:
                A SQL query for packaging the data for consumption within
                `KMeans().nonspatial`. Format will be a list of length one,
                with the first element a dict with keys ('rowid', 'attr1',
                'attr2', ...)
        """
        agg_cols = ', '.join([
            'array_agg({0}) As arr_col{1}'.format(val, idx+1)
            for idx, val in enumerate(params['colnames'])
        ])
        query = '''
            SELECT {cols}, array_agg({id_col}) As rowid
            FROM ({subquery}) As a
        '''.format(subquery=params['subquery'],
                   id_col=params['id_col'],
                   cols=agg_cols).strip()
        return plpy.execute(query)
    @verify_data
    def get_spatial_kmeans(self, params):
        """fetch data for spatial kmeans"""
        query = '''
                SELECT
                  array_agg("{id_col}" ORDER BY "{id_col}") as ids,
                  array_agg(ST_X("{geom_col}") ORDER BY "{id_col}") As xs,
                  array_agg(ST_Y("{geom_col}") ORDER BY "{id_col}") As ys
                FROM ({subquery}) As a
                WHERE "{geom_col}" IS NOT NULL
                '''.format(**params)
        return plpy.execute(query)
    @verify_data
    def get_gwr(self, params):
        """fetch data for gwr analysis"""
        query = pu.gwr_query(params)
        return plpy.execute(query)
    @verify_data
    def get_gwr_predict(self, params):
        """fetch data for gwr predict"""
        query = pu.gwr_predict_query(params)
        return plpy.execute(query)
--- a/release/python/0.7.0/crankshaft/crankshaft/clustering/init.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/clustering/init.py
@ -0,0 +1,4 @@
 """Import all functions from for clustering"""
 from moran import *
 from kmeans import *
 from getis import *
--- a/release/python/0.7.0/crankshaft/crankshaft/clustering/getis.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/clustering/getis.py
@ -0,0 +1,50 @@
 """
 Getis-Ord's G geostatistics (hotspot/coldspot analysis)
 """
 import pysal as ps
 from collections import OrderedDict
 # crankshaft modules
 import crankshaft.pysal_utils as pu
 from crankshaft.analysis_data_provider import AnalysisDataProvider
 # High level interface ---------------------------------------
 class Getis(object):
    def __init__(self, data_provider=None):
        if data_provider is None:
            self.data_provider = AnalysisDataProvider()
        else:
            self.data_provider = data_provider
    def getis_ord(self, subquery, attr,
                  w_type, num_ngbrs, permutations, geom_col, id_col):
        """
        Getis-Ord's G*
        Implementation building neighbors with a PostGIS database and PySAL's
          Getis-Ord's G* hotspot/coldspot module.
        Andy Eschbacher
        """
        # geometries with attributes that are null are ignored
        # resulting in a collection of not as near neighbors if kNN is chosen
        params = OrderedDict([("id_col", id_col),
                              ("attr1", attr),
                              ("geom_col", geom_col),
                              ("subquery", subquery),
                              ("num_ngbrs", num_ngbrs)])
        result = self.data_provider.get_getis(w_type, params)
        attr_vals = pu.get_attributes(result)
        # build PySAL weight object
        weight = pu.get_weight(result, w_type, num_ngbrs)
        # calculate Getis-Ord's G* z- and p-values
        getis = ps.esda.getisord.G_Local(attr_vals, weight,
                                         star=True, permutations=permutations)
        return zip(getis.z_sim, getis.p_sim, getis.p_z_sim, weight.id_order)
--- a/release/python/0.7.0/crankshaft/crankshaft/clustering/kmeans.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/clustering/kmeans.py
@ -0,0 +1,113 @@
 from sklearn.cluster import KMeans
 import numpy as np
 from crankshaft.analysis_data_provider import AnalysisDataProvider
 class Kmeans(object):
    def __init__(self, data_provider=None):
        if data_provider is None:
            self.data_provider = AnalysisDataProvider()
        else:
            self.data_provider = data_provider
    def spatial(self, query, no_clusters, no_init=20):
        """
            find centers based on clusters of latitude/longitude pairs
            query: SQL query that has a WGS84 geometry (the_geom)
        """
        params = {"subquery": query,
                  "geom_col": "the_geom",
                  "id_col": "cartodb_id"}
        result = self.data_provider.get_spatial_kmeans(params)
        # Unpack query response
        xs = result[0]['xs']
        ys = result[0]['ys']
        ids = result[0]['ids']
        km = KMeans(n_clusters=no_clusters, n_init=no_init)
        labels = km.fit_predict(zip(xs, ys))
        return zip(ids, labels)
    def nonspatial(self, subquery, colnames, no_clusters=5,
                   standardize=True, id_col='cartodb_id'):
        """
        Arguments:
            query (string): A SQL query to retrieve the data required to do the
                            k-means clustering analysis, like so:
                            SELECT * FROM iris_flower_data
            colnames (list): a list of the column names which contain the data
                             of interest, like so: ['sepal_width',
                                                    'petal_width',
                                                    'sepal_length',
                                                    'petal_length']
            no_clusters (int): number of clusters (greater than zero)
            id_col (string): name of the input id_column
        Returns:
            A list of tuples with the following columns:
            cluster labels: a label for the cluster that the row belongs to
            centers: center of the cluster that this row belongs to
            silhouettes: silhouette measure for this value
            rowid: row that these values belong to (corresponds to the value in
                   `id_col`)
        """
        import json
        from sklearn import metrics
        params = {
            "colnames": colnames,
            "subquery": subquery,
            "id_col": id_col
        }
        data = self.data_provider.get_nonspatial_kmeans(params)
        # fill array with values for k-means clustering
        if standardize:
            cluster_columns = _scale_data(
              _extract_columns(data))
        else:
            cluster_columns = _extract_columns(data)
        kmeans = KMeans(n_clusters=no_clusters,
                        random_state=0).fit(cluster_columns)
        centers = [json.dumps(dict(zip(colnames, c)))
                   for c in kmeans.cluster_centers_[kmeans.labels_]]
        silhouettes = metrics.silhouette_samples(cluster_columns,
                                                 kmeans.labels_,
                                                 metric='sqeuclidean')
        return zip(kmeans.labels_,
                   centers,
                   silhouettes,
                   [kmeans.inertia_] * kmeans.labels_.shape[0],
                   data[0]['rowid'])
 # -- Preprocessing steps
 def _extract_columns(data):
    """
        Extract the features from the query and pack them into a NumPy array
        data (list of dicts): result of the kmeans request
    """
    # number of columns minus rowid column
    n_cols = len(data[0]) - 1
    return np.array([data[0]['arr_col{0}'.format(i+1)]
                     for i in xrange(n_cols)],
                    dtype=float).T
 def _scale_data(features):
    """
        Scale all input columns to center on 0 with a standard devation of 1
        features (numpy matrix): features of dimension (n_features, n_samples)
    """
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()
    return scaler.fit_transform(features)
--- a/release/python/0.7.0/crankshaft/crankshaft/clustering/moran.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/clustering/moran.py
@ -0,0 +1,208 @@
 """
 Moran's I geostatistics (global clustering & outliers presence)
 """
 # TODO: Fill in local neighbors which have null/NoneType values with the
 #       average of the their neighborhood
 import pysal as ps
 from collections import OrderedDict
 from crankshaft.analysis_data_provider import AnalysisDataProvider
 # crankshaft module
 import crankshaft.pysal_utils as pu
 # High level interface ---------------------------------------
 class Moran(object):
    def __init__(self, data_provider=None):
        if data_provider is None:
            self.data_provider = AnalysisDataProvider()
        else:
            self.data_provider = data_provider
    def global_stat(self, subquery, attr_name,
                    w_type, num_ngbrs, permutations, geom_col, id_col):
        """
        Moran's I (global)
        Implementation building neighbors with a PostGIS database and Moran's I
         core clusters with PySAL.
        Andy Eschbacher
        """
        params = OrderedDict([("id_col", id_col),
                              ("attr1", attr_name),
                              ("geom_col", geom_col),
                              ("subquery", subquery),
                              ("num_ngbrs", num_ngbrs)])
        result = self.data_provider.get_moran(w_type, params)
        # collect attributes
        attr_vals = pu.get_attributes(result)
        # calculate weights
        weight = pu.get_weight(result, w_type, num_ngbrs)
        # calculate moran global
        moran_global = ps.esda.moran.Moran(attr_vals, weight,
                                           permutations=permutations)
        return zip([moran_global.I], [moran_global.EI])
    def local_stat(self, subquery, attr,
                   w_type, num_ngbrs, permutations, geom_col, id_col):
        """
        Moran's I implementation for PL/Python
        Andy Eschbacher
        """
        # geometries with attributes that are null are ignored
        # resulting in a collection of not as near neighbors
        params = OrderedDict([("id_col", id_col),
                              ("attr1", attr),
                              ("geom_col", geom_col),
                              ("subquery", subquery),
                              ("num_ngbrs", num_ngbrs)])
        result = self.data_provider.get_moran(w_type, params)
        attr_vals = pu.get_attributes(result)
        weight = pu.get_weight(result, w_type, num_ngbrs)
        # calculate LISA values
        lisa = ps.esda.moran.Moran_Local(attr_vals, weight,
                                         permutations=permutations)
        # find quadrants for each geometry
        quads = quad_position(lisa.q)
        return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
    def global_rate_stat(self, subquery, numerator, denominator,
                         w_type, num_ngbrs, permutations, geom_col, id_col):
        """
        Moran's I Rate (global)
        Andy Eschbacher
        """
        params = OrderedDict([("id_col", id_col),
                              ("attr1", numerator),
                              ("attr2", denominator),
                              ("geom_col", geom_col),
                              ("subquery", subquery),
                              ("num_ngbrs", num_ngbrs)])
        result = self.data_provider.get_moran(w_type, params)
        # collect attributes
        numer = pu.get_attributes(result, 1)
        denom = pu.get_attributes(result, 2)
        weight = pu.get_weight(result, w_type, num_ngbrs)
        # calculate moran global rate
        lisa_rate = ps.esda.moran.Moran_Rate(numer, denom, weight,
                                             permutations=permutations)
        return zip([lisa_rate.I], [lisa_rate.EI])
    def local_rate_stat(self, subquery, numerator, denominator,
                        w_type, num_ngbrs, permutations, geom_col, id_col):
        """
            Moran's I Local Rate
            Andy Eschbacher
        """
        # geometries with values that are null are ignored
        # resulting in a collection of not as near neighbors
        params = OrderedDict([("id_col", id_col),
                              ("numerator", numerator),
                              ("denominator", denominator),
                              ("geom_col", geom_col),
                              ("subquery", subquery),
                              ("num_ngbrs", num_ngbrs)])
        result = self.data_provider.get_moran(w_type, params)
        # collect attributes
        numer = pu.get_attributes(result, 1)
        denom = pu.get_attributes(result, 2)
        weight = pu.get_weight(result, w_type, num_ngbrs)
        # calculate LISA values
        lisa = ps.esda.moran.Moran_Local_Rate(numer, denom, weight,
                                              permutations=permutations)
        # find quadrants for each geometry
        quads = quad_position(lisa.q)
        return zip(lisa.Is, quads, lisa.p_sim, weight.id_order, lisa.y)
    def local_bivariate_stat(self, subquery, attr1, attr2,
                             permutations, geom_col, id_col,
                             w_type, num_ngbrs):
        """
            Moran's I (local) Bivariate (untested)
        """
        params = OrderedDict([("id_col", id_col),
                              ("attr1", attr1),
                              ("attr2", attr2),
                              ("geom_col", geom_col),
                              ("subquery", subquery),
                              ("num_ngbrs", num_ngbrs)])
        result = self.data_provider.get_moran(w_type, params)
        # collect attributes
        attr1_vals = pu.get_attributes(result, 1)
        attr2_vals = pu.get_attributes(result, 2)
        # create weights
        weight = pu.get_weight(result, w_type, num_ngbrs)
        # calculate LISA values
        lisa = ps.esda.moran.Moran_Local_BV(attr1_vals, attr2_vals, weight,
                                            permutations=permutations)
        # find clustering of significance
        lisa_sig = quad_position(lisa.q)
        return zip(lisa.Is, lisa_sig, lisa.p_sim, weight.id_order)
 # Low level functions ----------------------------------------
 def map_quads(coord):
    """
        Map a quadrant number to Moran's I designation
        HH=1, LH=2, LL=3, HL=4
        Input:
        @param coord (int): quadrant of a specific measurement
        Output:
            classification (one of 'HH', 'LH', 'LL', or 'HL')
    """
    if coord == 1:
        return 'HH'
    elif coord == 2:
        return 'LH'
    elif coord == 3:
        return 'LL'
    elif coord == 4:
        return 'HL'
    else:
        return None
 def quad_position(quads):
    """
        Produce Moran's I classification based of n
        Input:
        @param quads ndarray: an array of quads classified by
          1-4 (PySAL default)
        Output:
        @param list: an array of quads classied by 'HH', 'LL', etc.
    """
    return [map_quads(q) for q in quads]
--- a/release/python/0.7.0/crankshaft/crankshaft/pysal_utils/init.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/pysal_utils/init.py
@ -0,0 +1,2 @@
 """Import all functions for pysal_utils"""
 from crankshaft.pysal_utils.pysal_utils import *
--- a/release/python/0.7.0/crankshaft/crankshaft/pysal_utils/pysal_utils.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/pysal_utils/pysal_utils.py
@ -0,0 +1,251 @@
 """
    Utilities module for generic PySAL functionality, mainly centered on
      translating queries into numpy arrays or PySAL weights objects
 """
 import numpy as np
 import pysal as ps
 def construct_neighbor_query(w_type, query_vals):
    """Return query (a string) used for finding neighbors
        @param w_type text: type of neighbors to calculate ('knn' or 'queen')
        @param query_vals dict: values used to construct the query
    """
    if w_type.lower() == 'knn':
        return knn(query_vals)
    else:
        return queen(query_vals)
 # Build weight object
 def get_weight(query_res, w_type='knn', num_ngbrs=5):
    """
        Construct PySAL weight from return value of query
        @param query_res dict-like: query results with attributes and neighbors
    """
    neighbors = {x['id']: x['neighbors'] for x in query_res}
    print 'len of neighbors: %d' % len(neighbors)
    built_weight = ps.W(neighbors)
    built_weight.transform = 'r'
    return built_weight
 def query_attr_select(params, table_ref=True):
    """
        Create portion of SELECT statement for attributes inolved in query.
        Defaults to order in the params
        @param params: dict of information used in query (column names,
                       table name, etc.)
            Example:
            OrderedDict([('numerator', 'price'),
                         ('denominator', 'sq_meters'),
                         ('subquery', 'SELECT * FROM interesting_data')])
        Output:
          "i.\"price\"::numeric As attr1, " \
          "i.\"sq_meters\"::numeric As attr2, "
    """
    attr_string = ""
    template = "\"%(col)s\"::numeric As attr%(alias_num)s, "
    if table_ref:
        template = "i." + template
    if ('time_cols' in params) or ('ind_vars' in params):
        # if markov or gwr analysis
        attrs = (params['time_cols'] if 'time_cols' in params
                 else params['ind_vars'])
        if 'ind_vars' in params:
            template = "array_agg(\"%(col)s\"::numeric) As attr%(alias_num)s, "
        for idx, val in enumerate(attrs):
            attr_string += template % {"col": val, "alias_num": idx + 1}
    else:
        # if moran's analysis
        attrs = [k for k in params
                 if k not in ('id_col', 'geom_col', 'subquery',
                              'num_ngbrs', 'subquery')]
        for idx, val in enumerate(attrs):
            attr_string += template % {"col": params[val],
                                       "alias_num": idx + 1}
    return attr_string
 def query_attr_where(params, table_ref=True):
    """
      Construct where conditions when building neighbors query
        Create portion of WHERE clauses for weeding out NULL-valued geometries
        Input: dict of params:
            {'subquery': ...,
             'numerator': 'data1',
             'denominator': 'data2',
             '': ...}
        Output:
          'idx_replace."data1" IS NOT NULL AND idx_replace."data2" IS NOT NULL'
        Input:
        {'subquery': ...,
         'time_cols': ['time1', 'time2', 'time3'],
         'etc': ...}
        Output: 'idx_replace."time1" IS NOT NULL AND idx_replace."time2" IS NOT
          NULL AND idx_replace."time3" IS NOT NULL'
    """
    attr_string = []
    template = "\"%s\" IS NOT NULL"
    if table_ref:
        template = "idx_replace." + template
    if ('time_cols' in params) or ('ind_vars' in params):
        # markov or gwr where clauses
        attrs = (params['time_cols'] if 'time_cols' in params
                 else params['ind_vars'])
        # add values to template
        for attr in attrs:
            attr_string.append(template % attr)
    else:
        # moran where clauses
        # get keys
        attrs = [k for k in params
                 if k not in ('id_col', 'geom_col', 'subquery',
                              'num_ngbrs', 'subquery')]
        # add values to template
        for attr in attrs:
            attr_string.append(template % params[attr])
        if 'denominator' in attrs:
            attr_string.append(
              "idx_replace.\"%s\" <> 0" % params['denominator'])
    out = " AND ".join(attr_string)
    return out
 def knn(params):
    """SQL query for k-nearest neighbors.
        @param vars: dict of values to fill template
    """
    attr_select = query_attr_select(params, table_ref=True)
    attr_where = query_attr_where(params, table_ref=True)
    replacements = {"attr_select": attr_select,
                    "attr_where_i": attr_where.replace("idx_replace", "i"),
                    "attr_where_j": attr_where.replace("idx_replace", "j")}
    query = '''
            SELECT
              i."{id_col}" As id,
              %(attr_select)s
              (SELECT ARRAY(SELECT j."{id_col}"
                FROM ({subquery}) As j
                WHERE i."{id_col}" <> j."{id_col}" AND
                      %(attr_where_j)s AND
                      j."{geom_col}" IS NOT NULL
                ORDER BY j."{geom_col}" <-> i."{geom_col}" ASC
                LIMIT {num_ngbrs})) As neighbors
             FROM ({subquery}) As i
            WHERE %(attr_where_i)s AND i."{geom_col}" IS NOT NULL
            ORDER BY i."{id_col}" ASC;
            ''' % replacements
    return query.format(**params)
 # SQL query for finding queens neighbors (all contiguous polygons)
 def queen(params):
    """SQL query for queen neighbors.
        @param params dict: information to fill query
    """
    attr_select = query_attr_select(params)
    attr_where = query_attr_where(params)
    replacements = {"attr_select": attr_select,
                    "attr_where_i": attr_where.replace("idx_replace", "i"),
                    "attr_where_j": attr_where.replace("idx_replace", "j")}
    query = '''
            SELECT
              i."{id_col}" As id,
              %(attr_select)s
              (SELECT ARRAY(SELECT j."{id_col}"
                 FROM ({subquery}) As j
                WHERE i."{id_col}" <> j."{id_col}" AND
                      ST_Touches(i."{geom_col}", j."{geom_col}") AND
                      %(attr_where_j)s)) As neighbors
            FROM ({subquery}) As i
            WHERE
                %(attr_where_i)s
            ORDER BY i."{id_col}" ASC;
            ''' % replacements
    return query.format(**params)
 def gwr_query(params):
    """
    GWR query
    """
    replacements = {"ind_vars_select": query_attr_select(params,
                                                         table_ref=None),
                    "ind_vars_where": query_attr_where(params,
                                                       table_ref=None)}
    query = '''
      SELECT
        array_agg(ST_X(ST_Centroid("{geom_col}"))) As x,
        array_agg(ST_Y(ST_Centroid("{geom_col}"))) As y,
        array_agg("{dep_var}") As dep_var,
        %(ind_vars_select)s
        array_agg("{id_col}") As rowid
      FROM ({subquery}) As q
      WHERE
        "{dep_var}" IS NOT NULL AND
        %(ind_vars_where)s
        ''' % replacements
    return query.format(**params).strip()
 def gwr_predict_query(params):
    """
    GWR query
    """
    replacements = {"ind_vars_select": query_attr_select(params,
                                                         table_ref=None),
                    "ind_vars_where": query_attr_where(params,
                                                       table_ref=None)}
    query = '''
      SELECT
        array_agg(ST_X(ST_Centroid({geom_col}))) As x,
        array_agg(ST_Y(ST_Centroid({geom_col}))) As y,
        array_agg({dep_var}) As dep_var,
        %(ind_vars_select)s
        array_agg({id_col}) As rowid
      FROM ({subquery}) As q
      WHERE
        %(ind_vars_where)s
        ''' % replacements
    return query.format(**params).strip()
 # to add more weight methods open a ticket or pull request
 def get_attributes(query_res, attr_num=1):
    """
        @param query_res: query results with attributes and neighbors
        @param attr_num: attribute number (1, 2, ...)
    """
    return np.array([x['attr' + str(attr_num)] for x in query_res],
                    dtype=np.float)
--- a/release/python/0.7.0/crankshaft/crankshaft/random_seeds.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/random_seeds.py
@ -0,0 +1,12 @@
 """Random seed generator used for non-deterministic functions in crankshaft"""
 import random
 import numpy
 def set_random_seeds(value):
    """
    Set the seeds of the RNGs (Random Number Generators)
    used internally.
    """
    random.seed(value)
    numpy.random.seed(value)
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/init.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/init.py
@ -0,0 +1,3 @@
 from crankshaft.regression.gwr import *
 from crankshaft.regression.glm import *
 from crankshaft.regression.gwr_cs import *
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/glm/GLM_validate_estimation.ipynb
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/glm/GLM_validate_estimation.ipynb
@ -0,0 +1,444 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#Import GLM and pysal\n",
    "import os\n",
    "import numpy as np\n",
    "os.chdir('/Users/toshan/dev/pysal/pysal/contrib/glm')\n",
    "from glm import GLM\n",
    "import pysal\n",
    "import pandas as pd\n",
    "import statsmodels.formula.api as smf\n",
    "import statsmodels.api as sm\n",
    "from family import Gaussian, Binomial, Poisson, QuasiPoisson\n",
    "\n",
    "from statsmodels.api import families"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#Prepare some test data - columbus example\n",
    "db = pysal.open(pysal.examples.get_path('columbus.dbf'),'r')\n",
    "y = np.array(db.by_col(\"HOVAL\"))\n",
    "y = np.reshape(y, (49,1))\n",
    "X = []\n",
    "#X.append(np.ones(len(y)))\n",
    "X.append(db.by_col(\"INC\"))\n",
    "X.append(db.by_col(\"CRIME\"))\n",
    "X = np.array(X).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 46.42818268]\n",
      " [  0.62898397]\n",
      " [ -0.48488854]]\n"
     ]
    }
   ],
   "source": [
    "#First fit pysal OLS model\n",
    "from pysal.spreg import ols\n",
    "OLS = ols.OLS(y, X)\n",
    "print OLS.betas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'family.Gaussian'>\n",
      "<class 'family.Gaussian'>\n",
      "<class 'family.Gaussian'>\n",
      "[ 46.42818268   0.62898397  -0.48488854]\n",
      "[ 46.42818268   0.62898397  -0.48488854]\n"
     ]
    }
   ],
   "source": [
    "#Then fit Gaussian GLM\n",
    "\n",
    "#create Gaussian GLM model object\n",
    "model = GLM(y, X, Gaussian())\n",
    "model\n",
    "\n",
    "#Fit model to estimate coefficients and return GLMResults object\n",
    "results = model.fit()\n",
    "\n",
    "#Check coefficients - R betas [46.4282, 0.6290, -0.4849]\n",
    "print results.params\n",
    "\n",
    "# Gaussian GLM results from statsmodels\n",
    "sm_model = smf.GLM(y, sm.add_constant(X), family=families.Gaussian())\n",
    "sm_results = sm_model.fit()\n",
    "print sm_results.params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2 2\n",
      "<class 'family.Gaussian'>\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "<class 'family.Gaussian'>\n",
      "<class 'family.Gaussian'>\n",
      "<class 'family.Gaussian'>\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n"
     ]
    }
   ],
   "source": [
    "print results.df_model, sm_results.df_model\n",
    "print np.allclose(results.aic, sm_results.aic)\n",
    "print np.allclose(results.bic, sm_results.bic)\n",
    "print np.allclose(results.deviance, sm_results.deviance)\n",
    "print np.allclose(results.df_model, sm_results.df_model)\n",
    "print np.allclose(results.df_resid, sm_results.df_resid)\n",
    "print np.allclose(results.llf, sm_results.llf)\n",
    "print np.allclose(results.mu, sm_results.mu)\n",
    "print np.allclose(results.n, sm_results.nobs)\n",
    "print np.allclose(results.null, sm_results.null)\n",
    "print np.allclose(results.null_deviance, sm_results.null_deviance)\n",
    "print np.allclose(results.params, sm_results.params)\n",
    "print np.allclose(results.pearson_chi2, sm_results.pearson_chi2)\n",
    "print np.allclose(results.resid_anscombe, sm_results.resid_anscombe)\n",
    "print np.allclose(results.resid_deviance, sm_results.resid_deviance)\n",
    "print np.allclose(results.resid_pearson, sm_results.resid_pearson)\n",
    "print np.allclose(results.resid_response, sm_results.resid_response)\n",
    "print np.allclose(results.resid_working, sm_results.resid_working)\n",
    "print np.allclose(results.scale, sm_results.scale)\n",
    "print np.allclose(results.normalized_cov_params, sm_results.normalized_cov_params)\n",
    "print np.allclose(results.cov_params(), sm_results.cov_params())\n",
    "print np.allclose(results.bse, sm_results.bse)\n",
    "print np.allclose(results.conf_int(), sm_results.conf_int())\n",
    "print np.allclose(results.pvalues, sm_results.pvalues)\n",
    "print np.allclose(results.tvalues, sm_results.tvalues)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'family.Poisson'>\n",
      "<class 'family.Poisson'>\n",
      "<class 'family.Poisson'>\n",
      "[ 3.92159085  0.01183491 -0.01371397]\n",
      "[ 3.92159085  0.01183491 -0.01371397]\n"
     ]
    }
   ],
   "source": [
    "#Now fit a Poisson GLM \n",
    "\n",
    "poisson_y = np.round(y).astype(int)\n",
    "\n",
    "#create Poisson GLM model object\n",
    "model = GLM(poisson_y, X, Poisson())\n",
    "model\n",
    "\n",
    "#Fit model to estimate coefficients and return GLMResults object\n",
    "results = model.fit()\n",
    "\n",
    "#Check coefficients - R betas [3.91926, 0.01198, -0.01371]\n",
    "print results.params.T\n",
    "\n",
    "# Poisson GLM results from statsmodels\n",
    "sm_results = smf.GLM(poisson_y, sm.add_constant(X), family=families.Poisson()).fit()\n",
    "print sm_results.params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'family.Poisson'>\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "<class 'family.Poisson'>\n",
      "<class 'family.Poisson'>\n",
      "<class 'family.Poisson'>\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "[ 0.13049161  0.00511599  0.00193769] [ 0.13049161  0.00511599  0.00193769]\n"
     ]
    }
   ],
   "source": [
    "print np.allclose(results.aic, sm_results.aic)\n",
    "print np.allclose(results.bic, sm_results.bic)\n",
    "print np.allclose(results.deviance, sm_results.deviance)\n",
    "print np.allclose(results.df_model, sm_results.df_model)\n",
    "print np.allclose(results.df_resid, sm_results.df_resid)\n",
    "print np.allclose(results.llf, sm_results.llf)\n",
    "print np.allclose(results.mu, sm_results.mu)\n",
    "print np.allclose(results.n, sm_results.nobs)\n",
    "print np.allclose(results.null, sm_results.null)\n",
    "print np.allclose(results.null_deviance, sm_results.null_deviance)\n",
    "print np.allclose(results.params, sm_results.params)\n",
    "print np.allclose(results.pearson_chi2, sm_results.pearson_chi2)\n",
    "print np.allclose(results.resid_anscombe, sm_results.resid_anscombe)\n",
    "print np.allclose(results.resid_deviance, sm_results.resid_deviance)\n",
    "print np.allclose(results.resid_pearson, sm_results.resid_pearson)\n",
    "print np.allclose(results.resid_response, sm_results.resid_response)\n",
    "print np.allclose(results.resid_working, sm_results.resid_working)\n",
    "print np.allclose(results.scale, sm_results.scale)\n",
    "print np.allclose(results.normalized_cov_params, sm_results.normalized_cov_params)\n",
    "print np.allclose(results.cov_params(), sm_results.cov_params())\n",
    "print np.allclose(results.bse, sm_results.bse)\n",
    "print np.allclose(results.conf_int(), sm_results.conf_int())\n",
    "print np.allclose(results.pvalues, sm_results.pvalues)\n",
    "print np.allclose(results.tvalues, sm_results.tvalues)\n",
    "print results.bse, sm_results.bse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[-5.33638276  0.0287754 ]\n",
      "[-5.33638276  0.0287754 ]\n"
     ]
    }
   ],
   "source": [
    "#Now fit a binomial GLM\n",
    "londonhp = pd.read_csv('/Users/toshan/projects/londonhp.csv')\n",
    "#londonhp = pd.read_csv('/Users/qszhao/Dropbox/pysal/pysal/contrib/gwr/londonhp.csv')\n",
    "y = londonhp['BATH2'].values\n",
    "y = np.reshape(y, (316,1))\n",
    "X = londonhp['FLOORSZ'].values\n",
    "X = np.reshape(X, (316,1))\n",
    "\n",
    "#create logistic GLM model object\n",
    "model = GLM(y, X, Binomial())\n",
    "model\n",
    "\n",
    "#Fit model to estimate coefficients and return GLMResults object\n",
    "results = model.fit()\n",
    "\n",
    "#Check coefficients - R betas [-5.33638, 0.02878]\n",
    "print results.params.T\n",
    "\n",
    "# Logistic GLM results from statsmodels\n",
    "sm_results = smf.GLM(y, sm.add_constant(X), family=families.Binomial()).fit()\n",
    "print sm_results.params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 1\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n",
      "True\n"
     ]
    }
   ],
   "source": [
    "print results.df_model, sm_results.df_model\n",
    "print np.allclose(results.aic, sm_results.aic)\n",
    "print np.allclose(results.bic, sm_results.bic)\n",
    "print np.allclose(results.deviance, sm_results.deviance)\n",
    "print np.allclose(results.df_model, sm_results.df_model)\n",
    "print np.allclose(results.df_resid, sm_results.df_resid)\n",
    "print np.allclose(results.llf, sm_results.llf)\n",
    "print np.allclose(results.mu, sm_results.mu)\n",
    "print np.allclose(results.n, sm_results.nobs)\n",
    "print np.allclose(results.null, sm_results.null)\n",
    "print np.allclose(results.null_deviance, sm_results.null_deviance)\n",
    "print np.allclose(results.params, sm_results.params)\n",
    "print np.allclose(results.pearson_chi2, sm_results.pearson_chi2)\n",
    "print np.allclose(results.resid_anscombe, sm_results.resid_anscombe)\n",
    "print np.allclose(results.resid_deviance, sm_results.resid_deviance)\n",
    "print np.allclose(results.resid_pearson, sm_results.resid_pearson)\n",
    "print np.allclose(results.resid_response, sm_results.resid_response)\n",
    "print np.allclose(results.resid_working, sm_results.resid_working)\n",
    "print np.allclose(results.scale, sm_results.scale)\n",
    "print np.allclose(results.normalized_cov_params, sm_results.normalized_cov_params)\n",
    "print np.allclose(results.cov_params(), sm_results.cov_params())\n",
    "print np.allclose(results.bse, sm_results.bse)\n",
    "print np.allclose(results.conf_int(), sm_results.conf_int())\n",
    "print np.allclose(results.pvalues, sm_results.pvalues)\n",
    "print np.allclose(results.tvalues, sm_results.tvalues)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'family.QuasiPoisson'>\n",
      "<class 'family.QuasiPoisson'>\n",
      "<class 'family.QuasiPoisson'>\n"
     ]
    }
   ],
   "source": [
    "#create QUasiPoisson GLM model object\n",
    "model = GLM(poisson_y, X, QuasiPoisson())\n",
    "model\n",
    "\n",
    "#Fit model to estimate coefficients and return GLMResults object\n",
    "results = model.fit()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/glm/init.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/glm/init.py
@ -0,0 +1,4 @@
 import glm
 import family
 import utils
 import iwls
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/glm/base.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/glm/base.py
@ -0,0 +1,959 @@
 from __future__ import print_function
 import numpy as np
 from scipy import stats
 from utils import cache_readonly
 class Results(object):
    """
    Class to contain model results
    Parameters
    ----------
    model : class instance
        the previously specified model instance
    params : array
        parameter estimates from the fit model
    """
    def __init__(self, model, params, **kwd):
        self.__dict__.update(kwd)
        self.initialize(model, params, **kwd)
        self._data_attr = []
    def initialize(self, model, params, **kwd):
        self.params = params
        self.model = model
        if hasattr(model, 'k_constant'):
            self.k_constant = model.k_constant
    def predict(self, exog=None, transform=True, *args, **kwargs):
        """
        Call self.model.predict with self.params as the first argument.
        Parameters
        ----------
        exog : array-like, optional
            The values for which you want to predict.
        transform : bool, optional
            If the model was fit via a formula, do you want to pass
            exog through the formula. Default is True. E.g., if you fit
            a model y ~ log(x1) + log(x2), and transform is True, then
            you can pass a data structure that contains x1 and x2 in
            their original form. Otherwise, you'd need to log the data
            first.
        args, kwargs :
            Some models can take additional arguments or keywords, see the
            predict method of the model for the details.
        Returns
        -------
        prediction : ndarray or pandas.Series
            See self.model.predict
        """
        if transform and hasattr(self.model, 'formula') and exog is not None:
            from patsy import dmatrix
            exog = dmatrix(self.model.data.design_info.builder,
                           exog)
        if exog is not None:
            exog = np.asarray(exog)
            if exog.ndim == 1 and (self.model.exog.ndim == 1 or
                                   self.model.exog.shape[1] == 1):
                exog = exog[:, None]
            exog = np.atleast_2d(exog)  # needed in count model shape[1]
        return self.model.predict(self.params, exog, *args, **kwargs)
 #TODO: public method?
 class LikelihoodModelResults(Results):
    """
    Class to contain results from likelihood models
    Parameters
    -----------
    model : LikelihoodModel instance or subclass instance
        LikelihoodModelResults holds a reference to the model that is fit.
    params : 1d array_like
        parameter estimates from estimated model
    normalized_cov_params : 2d array
       Normalized (before scaling) covariance of params. (dot(X.T,X))**-1
    scale : float
        For (some subset of models) scale will typically be the
        mean square error from the estimated model (sigma^2)
    Returns
    -------
    **Attributes**
    mle_retvals : dict
        Contains the values returned from the chosen optimization method if
        full_output is True during the fit.  Available only if the model
        is fit by maximum likelihood.  See notes below for the output from
        the different methods.
    mle_settings : dict
        Contains the arguments passed to the chosen optimization method.
        Available if the model is fit by maximum likelihood.  See
        LikelihoodModel.fit for more information.
    model : model instance
        LikelihoodResults contains a reference to the model that is fit.
    params : ndarray
        The parameters estimated for the model.
    scale : float
        The scaling factor of the model given during instantiation.
    tvalues : array
        The t-values of the standard errors.
    Notes
    -----
    The covariance of params is given by scale times normalized_cov_params.
    Return values by solver if full_output is True during fit:
        'newton'
            fopt : float
                The value of the (negative) loglikelihood at its
                minimum.
            iterations : int
                Number of iterations performed.
            score : ndarray
                The score vector at the optimum.
            Hessian : ndarray
                The Hessian at the optimum.
            warnflag : int
                1 if maxiter is exceeded. 0 if successful convergence.
            converged : bool
                True: converged. False: did not converge.
            allvecs : list
                List of solutions at each iteration.
        'nm'
            fopt : float
                The value of the (negative) loglikelihood at its
                minimum.
            iterations : int
                Number of iterations performed.
            warnflag : int
                1: Maximum number of function evaluations made.
                2: Maximum number of iterations reached.
            converged : bool
                True: converged. False: did not converge.
            allvecs : list
                List of solutions at each iteration.
        'bfgs'
            fopt : float
                Value of the (negative) loglikelihood at its minimum.
            gopt : float
                Value of gradient at minimum, which should be near 0.
            Hinv : ndarray
                value of the inverse Hessian matrix at minimum.  Note
                that this is just an approximation and will often be
                different from the value of the analytic Hessian.
            fcalls : int
                Number of calls to loglike.
            gcalls : int
                Number of calls to gradient/score.
            warnflag : int
                1: Maximum number of iterations exceeded. 2: Gradient
                and/or function calls are not changing.
            converged : bool
                True: converged.  False: did not converge.
            allvecs : list
                Results at each iteration.
        'lbfgs'
            fopt : float
                Value of the (negative) loglikelihood at its minimum.
            gopt : float
                Value of gradient at minimum, which should be near 0.
            fcalls : int
                Number of calls to loglike.
            warnflag : int
                Warning flag:
                - 0 if converged
                - 1 if too many function evaluations or too many iterations
                - 2 if stopped for another reason
            converged : bool
                True: converged.  False: did not converge.
        'powell'
            fopt : float
                Value of the (negative) loglikelihood at its minimum.
            direc : ndarray
                Current direction set.
            iterations : int
                Number of iterations performed.
            fcalls : int
                Number of calls to loglike.
            warnflag : int
                1: Maximum number of function evaluations. 2: Maximum number
                of iterations.
            converged : bool
                True : converged. False: did not converge.
            allvecs : list
                Results at each iteration.
        'cg'
            fopt : float
                Value of the (negative) loglikelihood at its minimum.
            fcalls : int
                Number of calls to loglike.
            gcalls : int
                Number of calls to gradient/score.
            warnflag : int
                1: Maximum number of iterations exceeded. 2: Gradient and/
                or function calls not changing.
            converged : bool
                True: converged. False: did not converge.
            allvecs : list
                Results at each iteration.
        'ncg'
            fopt : float
                Value of the (negative) loglikelihood at its minimum.
            fcalls : int
                Number of calls to loglike.
            gcalls : int
                Number of calls to gradient/score.
            hcalls : int
                Number of calls to hessian.
            warnflag : int
                1: Maximum number of iterations exceeded.
            converged : bool
                True: converged. False: did not converge.
            allvecs : list
                Results at each iteration.
        """
    # by default we use normal distribution
    # can be overwritten by instances or subclasses
    use_t = False
    def __init__(self, model, params, normalized_cov_params=None, scale=1.,
                 **kwargs):
        super(LikelihoodModelResults, self).__init__(model, params)
        self.normalized_cov_params = normalized_cov_params
        self.scale = scale
        # robust covariance
        # We put cov_type in kwargs so subclasses can decide in fit whether to
        # use this generic implementation
        if 'use_t' in kwargs:
            use_t = kwargs['use_t']
            if use_t is not None:
                self.use_t = use_t
        if 'cov_type' in kwargs:
            cov_type = kwargs.get('cov_type', 'nonrobust')
            cov_kwds = kwargs.get('cov_kwds', {})
            if cov_type == 'nonrobust':
                self.cov_type = 'nonrobust'
                self.cov_kwds = {'description' : 'Standard Errors assume that the ' +
                                 'covariance matrix of the errors is correctly ' +
                                 'specified.'}
            else:
                from statsmodels.base.covtype import get_robustcov_results
                if cov_kwds is None:
                    cov_kwds = {}
                use_t = self.use_t
                # TODO: we shouldn't need use_t in get_robustcov_results
                get_robustcov_results(self, cov_type=cov_type, use_self=True,
                                           use_t=use_t, **cov_kwds)
    def normalized_cov_params(self):
        raise NotImplementedError
    def _get_robustcov_results(self, cov_type='nonrobust', use_self=True,
                                   use_t=None, **cov_kwds):
        from statsmodels.base.covtype import get_robustcov_results
        if cov_kwds is None:
            cov_kwds = {}
        if cov_type == 'nonrobust':
            self.cov_type = 'nonrobust'
            self.cov_kwds = {'description' : 'Standard Errors assume that the ' +
                             'covariance matrix of the errors is correctly ' +
                             'specified.'}
        else:
            # TODO: we shouldn't need use_t in get_robustcov_results
            get_robustcov_results(self, cov_type=cov_type, use_self=True,
                                       use_t=use_t, **cov_kwds)
    @cache_readonly
    def llf(self):
        return self.model.loglike(self.params)
    @cache_readonly
    def bse(self):
        return np.sqrt(np.diag(self.cov_params()))
    @cache_readonly
    def tvalues(self):
        """
        Return the t-statistic for a given parameter estimate.
        """
        return self.params / self.bse
    @cache_readonly
    def pvalues(self):
        if self.use_t:
            df_resid = getattr(self, 'df_resid_inference', self.df_resid)
            return stats.t.sf(np.abs(self.tvalues), df_resid)*2
        else:
            return stats.norm.sf(np.abs(self.tvalues))*2
    def cov_params(self, r_matrix=None, column=None, scale=None, cov_p=None,
            other=None):
        """
        Returns the variance/covariance matrix.
        The variance/covariance matrix can be of a linear contrast
        of the estimates of params or all params multiplied by scale which
        will usually be an estimate of sigma^2.  Scale is assumed to be
        a scalar.
        Parameters
        ----------
        r_matrix : array-like
            Can be 1d, or 2d.  Can be used alone or with other.
        column :  array-like, optional
            Must be used on its own.  Can be 0d or 1d see below.
        scale : float, optional
            Can be specified or not.  Default is None, which means that
            the scale argument is taken from the model.
        other : array-like, optional
            Can be used when r_matrix is specified.
        Returns
        -------
        cov : ndarray
            covariance matrix of the parameter estimates or of linear
            combination of parameter estimates. See Notes.
        Notes
        -----
        (The below are assumed to be in matrix notation.)
        If no argument is specified returns the covariance matrix of a model
        ``(scale)*(X.T X)^(-1)``
        If contrast is specified it pre and post-multiplies as follows
        ``(scale) * r_matrix (X.T X)^(-1) r_matrix.T``
        If contrast and other are specified returns
        ``(scale) * r_matrix (X.T X)^(-1) other.T``
        If column is specified returns
        ``(scale) * (X.T X)^(-1)[column,column]`` if column is 0d
        OR
        ``(scale) * (X.T X)^(-1)[column][:,column]`` if column is 1d
        """
        if (hasattr(self, 'mle_settings') and
                self.mle_settings['optimizer'] in ['l1', 'l1_cvxopt_cp']):
            dot_fun = nan_dot
        else:
            dot_fun = np.dot
        if (cov_p is None and self.normalized_cov_params is None and
            not hasattr(self, 'cov_params_default')):
            raise ValueError('need covariance of parameters for computing '
                             '(unnormalized) covariances')
        if column is not None and (r_matrix is not None or other is not None):
            raise ValueError('Column should be specified without other '
                             'arguments.')
        if other is not None and r_matrix is None:
            raise ValueError('other can only be specified with r_matrix')
        if cov_p is None:
            if hasattr(self, 'cov_params_default'):
                cov_p = self.cov_params_default
            else:
                if scale is None:
                    scale = self.scale
                cov_p = self.normalized_cov_params * scale
        if column is not None:
            column = np.asarray(column)
            if column.shape == ():
                return cov_p[column, column]
            else:
                #return cov_p[column][:, column]
                return cov_p[column[:, None], column]
        elif r_matrix is not None:
            r_matrix = np.asarray(r_matrix)
            if r_matrix.shape == ():
                raise ValueError("r_matrix should be 1d or 2d")
            if other is None:
                other = r_matrix
            else:
                other = np.asarray(other)
            tmp = dot_fun(r_matrix, dot_fun(cov_p, np.transpose(other)))
            return tmp
        else:  # if r_matrix is None and column is None:
            return cov_p
    #TODO: make sure this works as needed for GLMs
    def t_test(self, r_matrix, cov_p=None, scale=None,
               use_t=None):
        """
        Compute a t-test for a each linear hypothesis of the form Rb = q
        Parameters
        ----------
        r_matrix : array-like, str, tuple
            - array : If an array is given, a p x k 2d array or length k 1d
              array specifying the linear restrictions. It is assumed
              that the linear combination is equal to zero.
            - str : The full hypotheses to test can be given as a string.
              See the examples.
            - tuple : A tuple of arrays in the form (R, q). If q is given,
              can be either a scalar or a length p row vector.
        cov_p : array-like, optional
            An alternative estimate for the parameter covariance matrix.
            If None is given, self.normalized_cov_params is used.
        scale : float, optional
            An optional `scale` to use.  Default is the scale specified
            by the model fit.
        use_t : bool, optional
            If use_t is None, then the default of the model is used.
            If use_t is True, then the p-values are based on the t
            distribution.
            If use_t is False, then the p-values are based on the normal
            distribution.
        Returns
        -------
        res : ContrastResults instance
            The results for the test are attributes of this results instance.
            The available results have the same elements as the parameter table
            in `summary()`.
        Examples
        --------
        >>> import numpy as np
        >>> import statsmodels.api as sm
        >>> data = sm.datasets.longley.load()
        >>> data.exog = sm.add_constant(data.exog)
        >>> results = sm.OLS(data.endog, data.exog).fit()
        >>> r = np.zeros_like(results.params)
        >>> r[5:] = [1,-1]
        >>> print(r)
        [ 0.  0.  0.  0.  0.  1. -1.]
        r tests that the coefficients on the 5th and 6th independent
        variable are the same.
        >>> T_test = results.t_test(r)
        >>> print(T_test)
        <T contrast: effect=-1829.2025687192481, sd=455.39079425193762,
        t=-4.0167754636411717, p=0.0015163772380899498, df_denom=9>
        >>> T_test.effect
        -1829.2025687192481
        >>> T_test.sd
        455.39079425193762
        >>> T_test.tvalue
        -4.0167754636411717
        >>> T_test.pvalue
        0.0015163772380899498
        Alternatively, you can specify the hypothesis tests using a string
        >>> from statsmodels.formula.api import ols
        >>> dta = sm.datasets.longley.load_pandas().data
        >>> formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
        >>> results = ols(formula, dta).fit()
        >>> hypotheses = 'GNPDEFL = GNP, UNEMP = 2, YEAR/1829 = 1'
        >>> t_test = results.t_test(hypotheses)
        >>> print(t_test)
        See Also
        ---------
        tvalues : individual t statistics
        f_test : for F tests
        patsy.DesignInfo.linear_constraint
        """
        from patsy import DesignInfo
        names = self.model.data.param_names
        LC = DesignInfo(names).linear_constraint(r_matrix)
        r_matrix, q_matrix = LC.coefs, LC.constants
        num_ttests = r_matrix.shape[0]
        num_params = r_matrix.shape[1]
        if (cov_p is None and self.normalized_cov_params is None and
            not hasattr(self, 'cov_params_default')):
            raise ValueError('Need covariance of parameters for computing '
                             'T statistics')
        if num_params != self.params.shape[0]:
            raise ValueError('r_matrix and params are not aligned')
        if q_matrix is None:
            q_matrix = np.zeros(num_ttests)
        else:
            q_matrix = np.asarray(q_matrix)
            q_matrix = q_matrix.squeeze()
        if q_matrix.size > 1:
            if q_matrix.shape[0] != num_ttests:
                raise ValueError("r_matrix and q_matrix must have the same "
                                 "number of rows")
        if use_t is None:
            #switch to use_t false if undefined
            use_t = (hasattr(self, 'use_t') and self.use_t)
        _t = _sd = None
        _effect = np.dot(r_matrix, self.params)
        # nan_dot multiplies with the convention nan * 0 = 0
        # Perform the test
        if num_ttests > 1:
            _sd = np.sqrt(np.diag(self.cov_params(
                r_matrix=r_matrix, cov_p=cov_p)))
        else:
            _sd = np.sqrt(self.cov_params(r_matrix=r_matrix, cov_p=cov_p))
        _t = (_effect - q_matrix) * recipr(_sd)
        df_resid = getattr(self, 'df_resid_inference', self.df_resid)
        if use_t:
            return ContrastResults(effect=_effect, t=_t, sd=_sd,
                                   df_denom=df_resid)
        else:
            return ContrastResults(effect=_effect, statistic=_t, sd=_sd,
                                   df_denom=df_resid,
                                   distribution='norm')
    def f_test(self, r_matrix, cov_p=None, scale=1.0, invcov=None):
        """
        Compute the F-test for a joint linear hypothesis.
        This is a special case of `wald_test` that always uses the F
        distribution.
        Parameters
        ----------
        r_matrix : array-like, str, or tuple
            - array : An r x k array where r is the number of restrictions to
              test and k is the number of regressors. It is assumed
              that the linear combination is equal to zero.
            - str : The full hypotheses to test can be given as a string.
              See the examples.
            - tuple : A tuple of arrays in the form (R, q), ``q`` can be
              either a scalar or a length k row vector.
        cov_p : array-like, optional
            An alternative estimate for the parameter covariance matrix.
            If None is given, self.normalized_cov_params is used.
        scale : float, optional
            Default is 1.0 for no scaling.
        invcov : array-like, optional
            A q x q array to specify an inverse covariance matrix based on a
            restrictions matrix.
        Returns
        -------
        res : ContrastResults instance
            The results for the test are attributes of this results instance.
        Examples
        --------
        >>> import numpy as np
        >>> import statsmodels.api as sm
        >>> data = sm.datasets.longley.load()
        >>> data.exog = sm.add_constant(data.exog)
        >>> results = sm.OLS(data.endog, data.exog).fit()
        >>> A = np.identity(len(results.params))
        >>> A = A[1:,:]
        This tests that each coefficient is jointly statistically
        significantly different from zero.
        >>> print(results.f_test(A))
        <F contrast: F=330.28533923463488, p=4.98403052872e-10,
         df_denom=9, df_num=6>
        Compare this to
        >>> results.fvalue
        330.2853392346658
        >>> results.f_pvalue
        4.98403096572e-10
        >>> B = np.array(([0,0,1,-1,0,0,0],[0,0,0,0,0,1,-1]))
        This tests that the coefficient on the 2nd and 3rd regressors are
        equal and jointly that the coefficient on the 5th and 6th regressors
        are equal.
        >>> print(results.f_test(B))
        <F contrast: F=9.740461873303655, p=0.00560528853174, df_denom=9,
         df_num=2>
        Alternatively, you can specify the hypothesis tests using a string
        >>> from statsmodels.datasets import longley
        >>> from statsmodels.formula.api import ols
        >>> dta = longley.load_pandas().data
        >>> formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
        >>> results = ols(formula, dta).fit()
        >>> hypotheses = '(GNPDEFL = GNP), (UNEMP = 2), (YEAR/1829 = 1)'
        >>> f_test = results.f_test(hypotheses)
        >>> print(f_test)
        See Also
        --------
        statsmodels.stats.contrast.ContrastResults
        wald_test
        t_test
        patsy.DesignInfo.linear_constraint
        Notes
        -----
        The matrix `r_matrix` is assumed to be non-singular. More precisely,
        r_matrix (pX pX.T) r_matrix.T
        is assumed invertible. Here, pX is the generalized inverse of the
        design matrix of the model. There can be problems in non-OLS models
        where the rank of the covariance of the noise is not full.
        """
        res = self.wald_test(r_matrix, cov_p=cov_p, scale=scale,
                             invcov=invcov, use_f=True)
        return res
    #TODO: untested for GLMs?
    def wald_test(self, r_matrix, cov_p=None, scale=1.0, invcov=None,
                  use_f=None):
        """
        Compute a Wald-test for a joint linear hypothesis.
        Parameters
        ----------
        r_matrix : array-like, str, or tuple
            - array : An r x k array where r is the number of restrictions to
              test and k is the number of regressors. It is assumed that the
              linear combination is equal to zero.
            - str : The full hypotheses to test can be given as a string.
              See the examples.
            - tuple : A tuple of arrays in the form (R, q), ``q`` can be
              either a scalar or a length p row vector.
        cov_p : array-like, optional
            An alternative estimate for the parameter covariance matrix.
            If None is given, self.normalized_cov_params is used.
        scale : float, optional
            Default is 1.0 for no scaling.
        invcov : array-like, optional
            A q x q array to specify an inverse covariance matrix based on a
            restrictions matrix.
        use_f : bool
            If True, then the F-distribution is used. If False, then the
            asymptotic distribution, chisquare is used. If use_f is None, then
            the F distribution is used if the model specifies that use_t is True.
            The test statistic is proportionally adjusted for the distribution
            by the number of constraints in the hypothesis.
        Returns
        -------
        res : ContrastResults instance
            The results for the test are attributes of this results instance.
        See also
        --------
        statsmodels.stats.contrast.ContrastResults
        f_test
        t_test
        patsy.DesignInfo.linear_constraint
        Notes
        -----
        The matrix `r_matrix` is assumed to be non-singular. More precisely,
        r_matrix (pX pX.T) r_matrix.T
        is assumed invertible. Here, pX is the generalized inverse of the
        design matrix of the model. There can be problems in non-OLS models
        where the rank of the covariance of the noise is not full.
        """
        if use_f is None:
            #switch to use_t false if undefined
            use_f = (hasattr(self, 'use_t') and self.use_t)
        from patsy import DesignInfo
        names = self.model.data.param_names
        LC = DesignInfo(names).linear_constraint(r_matrix)
        r_matrix, q_matrix = LC.coefs, LC.constants
        if (self.normalized_cov_params is None and cov_p is None and
                invcov is None and not hasattr(self, 'cov_params_default')):
            raise ValueError('need covariance of parameters for computing '
                             'F statistics')
        cparams = np.dot(r_matrix, self.params[:, None])
        J = float(r_matrix.shape[0])  # number of restrictions
        if q_matrix is None:
            q_matrix = np.zeros(J)
        else:
            q_matrix = np.asarray(q_matrix)
        if q_matrix.ndim == 1:
            q_matrix = q_matrix[:, None]
            if q_matrix.shape[0] != J:
                raise ValueError("r_matrix and q_matrix must have the same "
                                 "number of rows")
        Rbq = cparams - q_matrix
        if invcov is None:
            cov_p = self.cov_params(r_matrix=r_matrix, cov_p=cov_p)
            if np.isnan(cov_p).max():
                raise ValueError("r_matrix performs f_test for using "
                                 "dimensions that are asymptotically "
                                 "non-normal")
            invcov = np.linalg.inv(cov_p)
        if (hasattr(self, 'mle_settings') and
                self.mle_settings['optimizer'] in ['l1', 'l1_cvxopt_cp']):
            F = nan_dot(nan_dot(Rbq.T, invcov), Rbq)
        else:
            F = np.dot(np.dot(Rbq.T, invcov), Rbq)
        df_resid = getattr(self, 'df_resid_inference', self.df_resid)
        if use_f:
            F /= J
            return ContrastResults(F=F, df_denom=df_resid,
                                   df_num=invcov.shape[0])
        else:
            return ContrastResults(chi2=F, df_denom=J, statistic=F,
                                   distribution='chi2', distargs=(J,))
    def wald_test_terms(self, skip_single=False, extra_constraints=None,
                   combine_terms=None):
        """
        Compute a sequence of Wald tests for terms over multiple columns
        This computes joined Wald tests for the hypothesis that all
        coefficients corresponding to a `term` are zero.
        `Terms` are defined by the underlying formula or by string matching.
        Parameters
        ----------
        skip_single : boolean
            If true, then terms that consist only of a single column and,
            therefore, refers only to a single parameter is skipped.
            If false, then all terms are included.
        extra_constraints : ndarray
            not tested yet
        combine_terms : None or list of strings
            Each string in this list is matched to the name of the terms or
            the name of the exogenous variables. All columns whose name
            includes that string are combined in one joint test.
        Returns
        -------
        test_result : result instance
            The result instance contains `table` which is a pandas DataFrame
            with the test results: test statistic, degrees of freedom and
            pvalues.
        Examples
        --------
        >>> res_ols = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
                          data).fit()
        >>> res_ols.wald_test_terms()
        <class 'statsmodels.stats.contrast.WaldTestResults'>
                                                  F                P>F  df constraint  df denom
        Intercept                        279.754525  2.37985521351e-22              1        51
        C(Duration, Sum)                   5.367071    0.0245738436636              1        51
        C(Weight, Sum)                    12.432445  3.99943118767e-05              2        51
        C(Duration, Sum):C(Weight, Sum)    0.176002      0.83912310946              2        51
        >>> res_poi = Poisson.from_formula("Days ~ C(Weight) * C(Duration)",
                                           data).fit(cov_type='HC0')
        >>> wt = res_poi.wald_test_terms(skip_single=False,
                                         combine_terms=['Duration', 'Weight'])
        >>> print(wt)
                                    chi2             P>chi2  df constraint
        Intercept              15.695625  7.43960374424e-05              1
        C(Weight)              16.132616  0.000313940174705              2
        C(Duration)             1.009147     0.315107378931              1
        C(Weight):C(Duration)   0.216694     0.897315972824              2
        Duration               11.187849     0.010752286833              3
        Weight                 30.263368  4.32586407145e-06              4
        """
        # lazy import
        from collections import defaultdict
        result = self
        if extra_constraints is None:
            extra_constraints = []
        if combine_terms is None:
            combine_terms = []
        design_info = getattr(result.model.data.orig_exog, 'design_info', None)
        if design_info is None and extra_constraints is None:
            raise ValueError('no constraints, nothing to do')
        identity = np.eye(len(result.params))
        constraints = []
        combined = defaultdict(list)
        if design_info is not None:
            for term in design_info.terms:
                cols = design_info.slice(term)
                name = term.name()
                constraint_matrix = identity[cols]
                # check if in combined
                for cname in combine_terms:
                    if cname in name:
                        combined[cname].append(constraint_matrix)
                k_constraint = constraint_matrix.shape[0]
                if skip_single:
                    if k_constraint == 1:
                        continue
                constraints.append((name, constraint_matrix))
            combined_constraints = []
            for cname in combine_terms:
                combined_constraints.append((cname, np.vstack(combined[cname])))
        else:
            # check by exog/params names if there is no formula info
            for col, name in enumerate(result.model.exog_names):
                constraint_matrix = identity[col]
                # check if in combined
                for cname in combine_terms:
                    if cname in name:
                        combined[cname].append(constraint_matrix)
                if skip_single:
                    continue
                constraints.append((name, constraint_matrix))
            combined_constraints = []
            for cname in combine_terms:
                combined_constraints.append((cname, np.vstack(combined[cname])))
        use_t = result.use_t
        distribution = ['chi2', 'F'][use_t]
        res_wald = []
        index = []
        for name, constraint in constraints + combined_constraints + extra_constraints:
            wt = result.wald_test(constraint)
            row = [wt.statistic.item(), wt.pvalue, constraint.shape[0]]
            if use_t:
                row.append(wt.df_denom)
            res_wald.append(row)
            index.append(name)
        # distribution nerutral names
        col_names = ['statistic', 'pvalue', 'df_constraint']
        if use_t:
            col_names.append('df_denom')
        # TODO: maybe move DataFrame creation to results class
        from pandas import DataFrame
        table = DataFrame(res_wald, index=index, columns=col_names)
        res = WaldTestResults(None, distribution, None, table=table)
        # TODO: remove temp again, added for testing
        res.temp = constraints + combined_constraints + extra_constraints
        return res
    def conf_int(self, alpha=.05, cols=None, method='default'):
        """
        Returns the confidence interval of the fitted parameters.
        Parameters
        ----------
        alpha : float, optional
            The significance level for the confidence interval.
            ie., The default `alpha` = .05 returns a 95% confidence interval.
        cols : array-like, optional
            `cols` specifies which confidence intervals to return
        method : string
            Not Implemented Yet
            Method to estimate the confidence_interval.
            "Default" : uses self.bse which is based on inverse Hessian for MLE
            "hjjh" :
            "jac" :
            "boot-bse"
            "boot_quant"
            "profile"
        Returns
        --------
        conf_int : array
            Each row contains [lower, upper] limits of the confidence interval
            for the corresponding parameter. The first column contains all
            lower, the second column contains all upper limits.
        Examples
        --------
        >>> import statsmodels.api as sm
        >>> data = sm.datasets.longley.load()
        >>> data.exog = sm.add_constant(data.exog)
        >>> results = sm.OLS(data.endog, data.exog).fit()
        >>> results.conf_int()
        array([[-5496529.48322745, -1467987.78596704],
               [    -177.02903529,      207.15277984],
               [      -0.1115811 ,        0.03994274],
               [      -3.12506664,       -0.91539297],
               [      -1.5179487 ,       -0.54850503],
               [      -0.56251721,        0.460309  ],
               [     798.7875153 ,     2859.51541392]])
        >>> results.conf_int(cols=(2,3))
        array([[-0.1115811 ,  0.03994274],
               [-3.12506664, -0.91539297]])
        Notes
        -----
        The confidence interval is based on the standard normal distribution.
        Models wish to use a different distribution should overwrite this
        method.
        """
        bse = self.bse
        if self.use_t:
            dist = stats.t
            df_resid = getattr(self, 'df_resid_inference', self.df_resid)
            q = dist.ppf(1 - alpha / 2, df_resid)
        else:
            dist = stats.norm
            q = dist.ppf(1 - alpha / 2)
        if cols is None:
            lower = self.params - q * bse
            upper = self.params + q * bse
        else:
            cols = np.asarray(cols)
            lower = self.params[cols] - q * bse[cols]
            upper = self.params[cols] + q * bse[cols]
        return np.asarray(lzip(lower, upper))
    def save(self, fname, remove_data=False):
        '''
        save a pickle of this instance
        Parameters
        ----------
        fname : string or filehandle
            fname can be a string to a file path or filename, or a filehandle.
        remove_data : bool
            If False (default), then the instance is pickled without changes.
            If True, then all arrays with length nobs are set to None before
            pickling. See the remove_data method.
            In some cases not all arrays will be set to None.
        Notes
        -----
        If remove_data is true and the model result does not implement a
        remove_data method then this will raise an exception.
        '''
        from statsmodels.iolib.smpickle import save_pickle
        if remove_data:
            self.remove_data()
        save_pickle(self, fname)
    @classmethod
    def load(cls, fname):
        '''
        load a pickle, (class method)
        Parameters
        ----------
        fname : string or filehandle
            fname can be a string to a file path or filename, or a filehandle.
        Returns
        -------
        unpickled instance
        '''
        from statsmodels.iolib.smpickle import load_pickle
        return load_pickle(fname)
    def remove_data(self):
        '''remove data arrays, all nobs arrays from result and model
        This reduces the size of the instance, so it can be pickled with less
        memory. Currently tested for use with predict from an unpickled
        results and model instance.
        .. warning:: Since data and some intermediate results have been removed
           calculating new statistics that require them will raise exceptions.
           The exception will occur the first time an attribute is accessed
           that has been set to None.
        Not fully tested for time series models, tsa, and might delete too much
        for prediction or not all that would be possible.
        The list of arrays to delete is maintained as an attribute of the
        result and model instance, except for cached values. These lists could
        be changed before calling remove_data.
        '''
        def wipe(obj, att):
            #get to last element in attribute path
            p = att.split('.')
            att_ = p.pop(-1)
            try:
                obj_ = reduce(getattr, [obj] + p)
                #print(repr(obj), repr(att))
                #print(hasattr(obj_, att_))
                if hasattr(obj_, att_):
                    #print('removing3', att_)
                    setattr(obj_, att_, None)
            except AttributeError:
                pass
        model_attr = ['model.' + i for i in self.model._data_attr]
        for att in self._data_attr + model_attr:
            #print('removing', att)
            wipe(self, att)
        data_in_cache = getattr(self, 'data_in_cache', [])
        data_in_cache += ['fittedvalues', 'resid', 'wresid']
        for key in data_in_cache:
            try:
                self._cache[key] = None
            except (AttributeError, KeyError):
                pass
 def lzip(*args, **kwargs):
    return list(zip(*args, **kwargs))
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/glm/family.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/glm/family.py
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/glm/glm.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/glm/glm.py
@ -0,0 +1,326 @@
 import numpy as np
 import numpy.linalg as la
 from pysal.spreg.utils import RegressionPropsY, spdot
 import pysal.spreg.user_output as USER
 from utils import cache_readonly
 from base import LikelihoodModelResults
 import family
 from iwls import iwls
 __all__ = ['GLM']
 class GLM(RegressionPropsY):
    """
    Generalised linear models. Can currently estimate Guassian, Poisson and
    Logisitc regression coefficients. GLM object prepares model input and fit
    method performs estimation which then returns a GLMResults object.
    Parameters
    ----------
        y             : array
                        n*1, dependent variable.
        X             : array
                        n*k, independent variable, exlcuding the constant.
        family        : string
                        Model type: 'Gaussian', 'Poisson', 'Binomial'
    Attributes
    ----------
        y             : array
                        n*1, dependent variable.
        X             : array
                        n*k, independent variable, including constant.
        family        : string
                        Model type: 'Gaussian', 'Poisson', 'logistic'
        n             : integer
                        Number of observations
        k             : integer
                        Number of independent variables
        df_model      : float
                        k-1, where k is the number of variables (including
                        intercept)
        df_residual   : float
                        observations minus variables (n-k)
        mean_y        : float
                        Mean of y
        std_y         : float
                        Standard deviation of y
        fit_params     : dict
                        Parameters passed into fit method to define estimation
                        routine.
        normalized_cov_params   : array
                                k*k, approximates [X.T*X]-1
    """
    def __init__(self, y, X, family=family.Gaussian(), constant=True):
        """
        Initialize class
        """
        self.n = USER.check_arrays(y, X)
        USER.check_y(y, self.n)
        self.y = y
        if constant:
            self.X = USER.check_constant(X)
        else:
            self.X = X
        self.family = family
        self.k = self.X.shape[1]
        self.fit_params = {}
    def fit(self, ini_betas=None, tol=1.0e-6, max_iter=200, solve='iwls'):
        """
        Method that fits a model with a particular estimation routine.
        Parameters
        ----------
        ini_betas     : array
                        k*1, initial coefficient values, including constant.
                        Default is None, which calculates initial values during
                        estimation.
        tol:            float
                        Tolerence for estimation convergence.
        max_iter       : integer
                        Maximum number of iterations if convergence not
                        achieved.
        solve         :string
                       Technique to solve MLE equations.
                       'iwls' = iteratively (re)weighted least squares (default)
        """
        self.fit_params['ini_betas'] = ini_betas
        self.fit_params['tol'] = tol
        self.fit_params['max_iter'] = max_iter
        self.fit_params['solve']=solve
        if solve.lower() == 'iwls':
            params, predy, w, n_iter = iwls(self.y, self.X, self.family,
                    ini_betas=ini_betas, tol=tol, max_iter=max_iter)
            self.fit_params['n_iter'] = n_iter
        return GLMResults(self, params.flatten(), predy, w)
    @cache_readonly
    def df_model(self):
        return self.X.shape[1] - 1
    @cache_readonly
    def df_resid(self):
        return self.n - self.df_model - 1
 class GLMResults(LikelihoodModelResults):
    """
    Results of estimated GLM and diagnostics.
    Parameters
    ----------
        model         : GLM object
                        Pointer to GLM object with estimation parameters.
        params         : array
                        k*1, estimared coefficients
        mu         : array
                        n*1, predicted y values.
        w             : array
                        n*1, final weight used for iwls
    Attributes
    ----------
        model         : GLM Object
                        Points to GLM object for which parameters have been
                        estimated.
        y             : array
                        n*1, dependent variable.
        x             : array
                        n*k, independent variable, including constant.
        family        : string
                        Model type: 'Gaussian', 'Poisson', 'Logistic'
        n             : integer
                        Number of observations
        k             : integer
                        Number of independent variables
        df_model      : float
                        k-1, where k is the number of variables (including
                        intercept)
        df_residual   : float
                        observations minus variables (n-k)
        fit_params    : dict
                        parameters passed into fit method to define estimation
                        routine.
        scale         : float
                        sigma squared used for subsequent computations.
        params         : array
                        n*k, estimared beta coefficients
        w             : array
                        n*1, final weight values of x
        mu            : array
                        n*1, predicted value of y (i.e., fittedvalues)
        cov_params    : array
                        Variance covariance matrix (kxk) of betas which has been
                        appropriately scaled by sigma-squared
        bse           : array
                        k*1, standard errors of betas
        pvalues       : array
                        k*1, two-tailed pvalues of parameters
        tvalues       : array
                        k*1, the tvalues of the standard errors
        null          : array
                        n*1, predicted values of y for null model
        deviance      : float
                        value of the deviance function evalued at params;
                        see family.py for distribution-specific deviance
        null_deviance : float
                        value of the deviance function for the model fit with
                        a constant as the only regressor
        llf           : float
                        value of the loglikelihood function evalued at params;
                        see family.py for distribution-specific loglikelihoods
        llnull       : float
                        value of log-likelihood function evaluated at null
        aic           : float 
                        AIC
        bic           : float 
                        BIC
        D2            : float
                        percent deviance explained
        adj_D2        : float
                        adjusted percent deviance explained
        pseudo_R2       : float
                        McFadden's pseudo R2  (coefficient of determination) 
        adj_pseudoR2    : float
                        adjusted McFadden's pseudo R2
        resid_response          : array
                                  response residuals; defined as y-mu
        resid_pearson           : array
                                  Pearson residuals; defined as (y-mu)/sqrt(VAR(mu))
                                  where VAR is the distribution specific variance
                                  function; see family.py and varfuncs.py for more information.
        resid_working           : array
                                  Working residuals; the working residuals are defined as
                                  resid_response/link'(mu); see links.py for the
                                  derivatives of the link functions.
        resid_anscombe          : array
                                 Anscombe residuals; see family.py for 
                                 distribution-specific Anscombe residuals.
        resid_deviance          : array
                                 deviance residuals; see family.py for 
                                 distribution-specific deviance residuals.
        pearson_chi2            : float
                                  chi-Squared statistic is defined as the sum 
                                  of the squares of the Pearson residuals
        normalized_cov_params   : array
                                k*k, approximates [X.T*X]-1
    """
    def __init__(self, model, params, mu, w):
        self.model = model
        self.n = model.n
        self.y = model.y.T.flatten()
        self.X = model.X
        self.k = model.k
        self.family = model.family
        self.fit_params = model.fit_params
        self.params = params
        self.w = w
        self.mu = mu.flatten()
        self._cache = {}
    @cache_readonly
    def df_model(self):
        return self.model.df_model
    @cache_readonly
    def df_resid(self):
        return self.model.df_resid
    @cache_readonly
    def normalized_cov_params(self):
        return la.inv(spdot(self.w.T, self.w))
    @cache_readonly
    def resid_response(self):
        return (self.y-self.mu)
    @cache_readonly
    def resid_pearson(self):
        return  ((self.y-self.mu) /
                np.sqrt(self.family.variance(self.mu)))
    @cache_readonly
    def resid_working(self):
        return (self.resid_response / self.family.link.deriv(self.mu))
    @cache_readonly
    def resid_anscombe(self):
        return (self.family.resid_anscombe(self.y, self.mu))
    @cache_readonly
    def resid_deviance(self):
        return (self.family.resid_dev(self.y, self.mu))
    @cache_readonly
    def pearson_chi2(self):
        chisq = (self.y - self.mu)**2 / self.family.variance(self.mu)
        chisqsum = np.sum(chisq)
        return chisqsum
    @cache_readonly
    def null(self):
        y = np.reshape(self.y, (-1,1))
        model = self.model
        X = np.ones((len(y), 1))
        null_mod =  GLM(y, X, family=self.family, constant=False)
        return null_mod.fit().mu
    @cache_readonly
    def scale(self):
        if isinstance(self.family, (family.Binomial, family.Poisson)):
            return 1.
        else:
            return (((np.power(self.resid_response, 2) /
                         self.family.variance(self.mu))).sum() /
                        (self.df_resid))
    @cache_readonly
    def deviance(self):
        return self.family.deviance(self.y, self.mu)
    @cache_readonly
    def null_deviance(self):
        return self.family.deviance(self.y, self.null)
    @cache_readonly
    def llnull(self):
        return self.family.loglike(self.y, self.null, scale=self.scale)
    @cache_readonly
    def llf(self):
        return self.family.loglike(self.y, self.mu, scale=self.scale)
    @cache_readonly
    def aic(self):
        if isinstance(self.family, family.QuasiPoisson):
        	return np.nan
        else:
            return -2 * self.llf + 2*(self.df_model+1)
    @cache_readonly
    def bic(self):
        return (self.deviance -
                (self.model.n - self.df_model - 1) *
                np.log(self.model.n))
    @cache_readonly
    def D2(self):
        return 1 - (self.deviance / self.null_deviance)
    @cache_readonly
    def adj_D2(self):
        return 1.0 - (float(self.n) - 1.0)/(float(self.n) - float(self.k)) * (1.0-self.D2)
    @cache_readonly
    def pseudoR2(self):
        return 1 - (self.llf/self.llnull)
    @cache_readonly
    def adj_pseudoR2(self):
        return 1 - ((self.llf-self.k)/self.llnull)
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/glm/iwls.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/glm/iwls.py
@ -0,0 +1,84 @@
 import numpy as np
 import numpy.linalg as la
 from scipy import sparse as sp
 from scipy.sparse import linalg as spla
 from pysal.spreg.utils import spdot, spmultiply
 from family import Binomial, Poisson
 def _compute_betas(y, x):
    """
    compute MLE coefficients using iwls routine
    Methods: p189, Iteratively (Re)weighted Least Squares (IWLS),
    Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
    Geographically weighted regression: the analysis of spatially varying relationships.
    """
    xT = x.T
    xtx = spdot(xT, x)
    xtx_inv = la.inv(xtx)
    xtx_inv = sp.csr_matrix(xtx_inv)
    xTy = spdot(xT, y, array_out=False)
    betas = spdot(xtx_inv, xTy)
    return betas
 def _compute_betas_gwr(y, x, wi):
    """
    compute MLE coefficients using iwls routine
    Methods: p189, Iteratively (Re)weighted Least Squares (IWLS),
    Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
    Geographically weighted regression: the analysis of spatially varying relationships.
    """
    xT = (x * wi).T
    xtx = np.dot(xT, x)
    xtx_inv = la.inv(xtx)
    xtx_inv_xt = np.dot(xtx_inv, xT)
    betas = np.dot(xtx_inv_xt, y)
    return betas, xtx_inv_xt
 def iwls(y, x, family, offset=1.0, ini_betas=None, tol=1.0e-8, max_iter=200, wi=None):
    """
    Iteratively re-weighted least squares estimation routine
    """
    n_iter = 0
    diff = 1.0e6
    if ini_betas is None:
        betas = np.zeros((x.shape[1], 1), np.float)
    else:
        betas = ini_betas
    if isinstance(family, Binomial):
        y = family.link._clean(y)
    if isinstance(family, Poisson):
    	y_off = y/offset
        y_off = family.starting_mu(y_off)
        v = family.predict(y_off)
        mu = family.starting_mu(y)
    else:
        mu = family.starting_mu(y)
        v = family.predict(mu)
    while diff > tol and n_iter < max_iter:
    	n_iter += 1
        w = family.weights(mu)
        z = v + (family.link.deriv(mu)*(y-mu))
        w = np.sqrt(w)
        if type(x) != np.ndarray:
        	w = sp.csr_matrix(w)
        	z = sp.csr_matrix(z)
        wx = spmultiply(x, w, array_out=False)
        wz = spmultiply(z, w, array_out=False)
        if wi is None:
            n_betas = _compute_betas(wz, wx)
        else:
            n_betas, xtx_inv_xt = _compute_betas_gwr(wz, wx, wi)
        v = spdot(x, n_betas)
        mu  = family.fitted(v)
        if isinstance(family, Poisson):
            mu = mu * offset
        diff = min(abs(n_betas-betas))
        betas = n_betas
    if wi is None:
        return betas, mu, wx, n_iter
    else:
        return betas, mu, v, w, z, xtx_inv_xt, n_iter
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/glm/links.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/glm/links.py
@ -0,0 +1,953 @@
 '''
 Defines the link functions to be used with GLM and GEE families.
 '''
 import numpy as np
 import scipy.stats
 FLOAT_EPS = np.finfo(float).eps
 class Link(object):
    """
    A generic link function for one-parameter exponential family.
    `Link` does nothing, but lays out the methods expected of any subclass.
    """
    def __call__(self, p):
        """
        Return the value of the link function.  This is just a placeholder.
        Parameters
        ----------
        p : array-like
            Probabilities
        Returns
        -------
        g(p) : array-like
            The value of the link function g(p) = z
        """
        return NotImplementedError
    def inverse(self, z):
        """
        Inverse of the link function.  Just a placeholder.
        Parameters
        ----------
        z : array-like
            `z` is usually the linear predictor of the transformed variable
            in the IRLS algorithm for GLM.
        Returns
        -------
        g^(-1)(z) : array
            The value of the inverse of the link function g^(-1)(z) = p
        """
        return NotImplementedError
    def deriv(self, p):
        """
        Derivative of the link function g'(p).  Just a placeholder.
        Parameters
        ----------
        p : array-like
        Returns
        -------
        g'(p) : array
            The value of the derivative of the link function g'(p)
        """
        return NotImplementedError
    def deriv2(self, p):
        """Second derivative of the link function g''(p)
        implemented through numerical differentiation
        """
        from statsmodels.tools.numdiff import approx_fprime_cs
        # TODO: workaround proplem with numdiff for 1d
        return np.diag(approx_fprime_cs(p, self.deriv))
    def inverse_deriv(self, z):
        """
        Derivative of the inverse link function g^(-1)(z).
        Notes
        -----
        This reference implementation gives the correct result but is
        inefficient, so it can be overriden in subclasses.
        Parameters
        ----------
        z : array-like
            `z` is usually the linear predictor for a GLM or GEE model.
        Returns
        -------
        g'^(-1)(z) : array
            The value of the derivative of the inverse of the link function
        """
        return 1 / self.deriv(self.inverse(z))
 class Logit(Link):
    """
    The logit transform
    Notes
    -----
    call and derivative use a private method _clean to make trim p by
    machine epsilon so that p is in (0,1)
    Alias of Logit:
    logit = Logit()
    """
    def _clean(self, p):
        """
        Clip logistic values to range (eps, 1-eps)
        Parameters
        -----------
        p : array-like
            Probabilities
        Returns
        --------
        pclip : array
            Clipped probabilities
        """
        return np.clip(p, FLOAT_EPS, 1. - FLOAT_EPS)
    def __call__(self, p):
        """
        The logit transform
        Parameters
        ----------
        p : array-like
            Probabilities
        Returns
        -------
        z : array
            Logit transform of `p`
        Notes
        -----
        g(p) = log(p / (1 - p))
        """
        p = self._clean(p)
        return np.log(p / (1. - p))
    def inverse(self, z):
        """
        Inverse of the logit transform
        Parameters
        ----------
        z : array-like
            The value of the logit transform at `p`
        Returns
        -------
        p : array
            Probabilities
        Notes
        -----
        g^(-1)(z) = exp(z)/(1+exp(z))
        """
        z = np.asarray(z)
        t = np.exp(-z)
        return 1. / (1. + t)
    def deriv(self, p):
        """
        Derivative of the logit transform
        Parameters
        ----------
        p: array-like
            Probabilities
        Returns
        -------
        g'(p) : array
            Value of the derivative of logit transform at `p`
        Notes
        -----
        g'(p) = 1 / (p * (1 - p))
        Alias for `Logit`:
        logit = Logit()
        """
        p = self._clean(p)
        return 1. / (p * (1 - p))
    def inverse_deriv(self, z):
        """
        Derivative of the inverse of the logit transform
        Parameters
        ----------
        z : array-like
            `z` is usually the linear predictor for a GLM or GEE model.
        Returns
        -------
        g'^(-1)(z) : array
            The value of the derivative of the inverse of the logit function
        """
        t = np.exp(z)
        return t/(1 + t)**2
    def deriv2(self, p):
        """
        Second derivative of the logit function.
        Parameters
        ----------
        p : array-like
            probabilities
        Returns
        -------
        g''(z) : array
            The value of the second derivative of the logit function
        """
        v = p * (1 - p)
        return (2*p - 1) / v**2
 class logit(Logit):
    pass
 class Power(Link):
    """
    The power transform
    Parameters
    ----------
    power : float
        The exponent of the power transform
    Notes
    -----
    Aliases of Power:
    inverse = Power(power=-1)
    sqrt = Power(power=.5)
    inverse_squared = Power(power=-2.)
    identity = Power(power=1.)
    """
    def __init__(self, power=1.):
        self.power = power
    def __call__(self, p):
        """
        Power transform link function
        Parameters
        ----------
        p : array-like
            Mean parameters
        Returns
        -------
        z : array-like
            Power transform of x
        Notes
        -----
        g(p) = x**self.power
        """
        z = np.power(p, self.power)
        return z
    def inverse(self, z):
        """
        Inverse of the power transform link function
        Parameters
        ----------
        `z` : array-like
            Value of the transformed mean parameters at `p`
        Returns
        -------
        `p` : array
            Mean parameters
        Notes
        -----
        g^(-1)(z`) = `z`**(1/`power`)
        """
        p = np.power(z, 1. / self.power)
        return p
    def deriv(self, p):
        """
        Derivative of the power transform
        Parameters
        ----------
        p : array-like
            Mean parameters
        Returns
        --------
        g'(p) : array
            Derivative of power transform of `p`
        Notes
        -----
        g'(`p`) = `power` * `p`**(`power` - 1)
        """
        return self.power * np.power(p, self.power - 1)
    def deriv2(self, p):
        """
        Second derivative of the power transform
        Parameters
        ----------
        p : array-like
            Mean parameters
        Returns
        --------
        g''(p) : array
            Second derivative of the power transform of `p`
        Notes
        -----
        g''(`p`) = `power` * (`power` - 1) * `p`**(`power` - 2)
        """
        return self.power * (self.power - 1) * np.power(p, self.power - 2)
    def inverse_deriv(self, z):
        """
        Derivative of the inverse of the power transform
        Parameters
        ----------
        z : array-like
            `z` is usually the linear predictor for a GLM or GEE model.
        Returns
        -------
        g^(-1)'(z) : array
            The value of the derivative of the inverse of the power transform
        function
        """
        return np.power(z, (1 - self.power)/self.power) / self.power
 class inverse_power(Power):
    """
    The inverse transform
    Notes
    -----
    g(p) = 1/p
    Alias of statsmodels.family.links.Power(power=-1.)
    """
    def __init__(self):
        super(inverse_power, self).__init__(power=-1.)
 class sqrt(Power):
    """
    The square-root transform
    Notes
    -----
    g(`p`) = sqrt(`p`)
    Alias of statsmodels.family.links.Power(power=.5)
    """
    def __init__(self):
        super(sqrt, self).__init__(power=.5)
 class inverse_squared(Power):
    """
    The inverse squared transform
    Notes
    -----
    g(`p`) = 1/(`p`\ \*\*2)
    Alias of statsmodels.family.links.Power(power=2.)
    """
    def __init__(self):
        super(inverse_squared, self).__init__(power=-2.)
 class identity(Power):
    """
    The identity transform
    Notes
    -----
    g(`p`) = `p`
    Alias of statsmodels.family.links.Power(power=1.)
    """
    def __init__(self):
        super(identity, self).__init__(power=1.)
 class Log(Link):
    """
    The log transform
    Notes
    -----
    call and derivative call a private method _clean to trim the data by
    machine epsilon so that p is in (0,1). log is an alias of Log.
    """
    def _clean(self, x):
        return np.clip(x, FLOAT_EPS, np.inf)
    def __call__(self, p, **extra):
        """
        Log transform link function
        Parameters
        ----------
        x : array-like
            Mean parameters
        Returns
        -------
        z : array
            log(x)
        Notes
        -----
        g(p) = log(p)
        """
        x = self._clean(p)
        return np.log(x)
    def inverse(self, z):
        """
        Inverse of log transform link function
        Parameters
        ----------
        z : array
            The inverse of the link function at `p`
        Returns
        -------
        p : array
            The mean probabilities given the value of the inverse `z`
        Notes
        -----
        g^{-1}(z) = exp(z)
        """
        return np.exp(z)
    def deriv(self, p):
        """
        Derivative of log transform link function
        Parameters
        ----------
        p : array-like
            Mean parameters
        Returns
        -------
        g'(p) : array
            derivative of log transform of x
        Notes
        -----
        g'(x) = 1/x
        """
        p = self._clean(p)
        return 1. / p
    def deriv2(self, p):
        """
        Second derivative of the log transform link function
        Parameters
        ----------
        p : array-like
            Mean parameters
        Returns
        -------
        g''(p) : array
            Second derivative of log transform of x
        Notes
        -----
        g''(x) = -1/x^2
        """
        p = self._clean(p)
        return -1. / p**2
    def inverse_deriv(self, z):
        """
        Derivative of the inverse of the log transform link function
        Parameters
        ----------
        z : array
            The inverse of the link function at `p`
        Returns
        -------
        g^(-1)'(z) : array
            The value of the derivative of the inverse of the log function,
            the exponential function
        """
        return np.exp(z)
 class log(Log):
    """
    The log transform
    Notes
    -----
    log is a an alias of Log.
    """
    pass
 # TODO: the CDFLink is untested
 class CDFLink(Logit):
    """
    The use the CDF of a scipy.stats distribution
    CDFLink is a subclass of logit in order to use its _clean method
    for the link and its derivative.
    Parameters
    ----------
    dbn : scipy.stats distribution
        Default is dbn=scipy.stats.norm
    Notes
    -----
    The CDF link is untested.
    """
    def __init__(self, dbn=scipy.stats.norm):
        self.dbn = dbn
    def __call__(self, p):
        """
        CDF link function
        Parameters
        ----------
        p : array-like
            Mean parameters
        Returns
        -------
        z : array
            (ppf) inverse of CDF transform of p
        Notes
        -----
        g(`p`) = `dbn`.ppf(`p`)
        """
        p = self._clean(p)
        return self.dbn.ppf(p)
    def inverse(self, z):
        """
        The inverse of the CDF link
        Parameters
        ----------
        z : array-like
            The value of the inverse of the link function at `p`
        Returns
        -------
        p : array
            Mean probabilities.  The value of the inverse of CDF link of `z`
        Notes
        -----
        g^(-1)(`z`) = `dbn`.cdf(`z`)
        """
        return self.dbn.cdf(z)
    def deriv(self, p):
        """
        Derivative of CDF link
        Parameters
        ----------
        p : array-like
            mean parameters
        Returns
        -------
        g'(p) : array
            The derivative of CDF transform at `p`
        Notes
        -----
        g'(`p`) = 1./ `dbn`.pdf(`dbn`.ppf(`p`))
        """
        p = self._clean(p)
        return 1. / self.dbn.pdf(self.dbn.ppf(p))
    def deriv2(self, p):
        """
        Second derivative of the link function g''(p)
        implemented through numerical differentiation
        """
        from statsmodels.tools.numdiff import approx_fprime
        p = np.atleast_1d(p)
        # Note: special function for norm.ppf does not support complex
        return np.diag(approx_fprime(p, self.deriv, centered=True))
    def inverse_deriv(self, z):
        """
        Derivative of the inverse of the CDF transformation link function
        Parameters
        ----------
        z : array
            The inverse of the link function at `p`
        Returns
        -------
        g^(-1)'(z) : array
            The value of the derivative of the inverse of the logit function
        """
        return 1/self.deriv(self.inverse(z))
 class probit(CDFLink):
    """
    The probit (standard normal CDF) transform
    Notes
    --------
    g(p) = scipy.stats.norm.ppf(p)
    probit is an alias of CDFLink.
    """
    pass
 class cauchy(CDFLink):
    """
    The Cauchy (standard Cauchy CDF) transform
    Notes
    -----
    g(p) = scipy.stats.cauchy.ppf(p)
    cauchy is an alias of CDFLink with dbn=scipy.stats.cauchy
    """
    def __init__(self):
        super(cauchy, self).__init__(dbn=scipy.stats.cauchy)
    def deriv2(self, p):
        """
        Second derivative of the Cauchy link function.
        Parameters
        ----------
        p: array-like
            Probabilities
        Returns
        -------
        g''(p) : array
            Value of the second derivative of Cauchy link function at `p`
        """
        a = np.pi * (p - 0.5)
        d2 = 2 * np.pi**2 * np.sin(a) / np.cos(a)**3
        return d2
 class CLogLog(Logit):
    """
    The complementary log-log transform
    CLogLog inherits from Logit in order to have access to its _clean method
    for the link and its derivative.
    Notes
    -----
    CLogLog is untested.
    """
    def __call__(self, p):
        """
        C-Log-Log transform link function
        Parameters
        ----------
        p : array
            Mean parameters
        Returns
        -------
        z : array
            The CLogLog transform of `p`
        Notes
        -----
        g(p) = log(-log(1-p))
        """
        p = self._clean(p)
        return np.log(-np.log(1 - p))
    def inverse(self, z):
        """
        Inverse of C-Log-Log transform link function
        Parameters
        ----------
        z : array-like
            The value of the inverse of the CLogLog link function at `p`
        Returns
        -------
        p : array
            Mean parameters
        Notes
        -----
        g^(-1)(`z`) = 1-exp(-exp(`z`))
        """
        return 1 - np.exp(-np.exp(z))
    def deriv(self, p):
        """
        Derivative of C-Log-Log transform link function
        Parameters
        ----------
        p : array-like
            Mean parameters
        Returns
        -------
        g'(p) : array
            The derivative of the CLogLog transform link function
        Notes
        -----
        g'(p) = - 1 / ((p-1)*log(1-p))
        """
        p = self._clean(p)
        return 1. / ((p - 1) * (np.log(1 - p)))
    def deriv2(self, p):
        """
        Second derivative of the C-Log-Log ink function
        Parameters
        ----------
        p : array-like
            Mean parameters
        Returns
        -------
        g''(p) : array
            The second derivative of the CLogLog link function
        """
        p = self._clean(p)
        fl = np.log(1 - p)
        d2 = -1 / ((1 - p)**2 * fl)
        d2 *= 1 + 1 / fl
        return d2
    def inverse_deriv(self, z):
        """
        Derivative of the inverse of the C-Log-Log transform link function
        Parameters
        ----------
        z : array-like
            The value of the inverse of the CLogLog link function at `p`
        Returns
        -------
        g^(-1)'(z) : array
            The derivative of the inverse of the CLogLog link function
        """
        return np.exp(z - np.exp(z))
 class cloglog(CLogLog):
    """
    The CLogLog transform link function.
    Notes
    -----
    g(`p`) = log(-log(1-`p`))
    cloglog is an alias for CLogLog
    cloglog = CLogLog()
    """
    pass
 class NegativeBinomial(object):
    '''
    The negative binomial link function
    Parameters
    ----------
    alpha : float, optional
        Alpha is the ancillary parameter of the Negative Binomial link
        function. It is assumed to be nonstochastic.  The default value is 1.
        Permissible values are usually assumed to be in (.01, 2).
    '''
    def __init__(self, alpha=1.):
        self.alpha = alpha
    def _clean(self, x):
        return np.clip(x, FLOAT_EPS, np.inf)
    def __call__(self, p):
        '''
        Negative Binomial transform link function
        Parameters
        ----------
        p : array-like
            Mean parameters
        Returns
        -------
        z : array
            The negative binomial transform of `p`
        Notes
        -----
        g(p) = log(p/(p + 1/alpha))
        '''
        p = self._clean(p)
        return np.log(p/(p + 1/self.alpha))
    def inverse(self, z):
        '''
        Inverse of the negative binomial transform
        Parameters
        -----------
        z : array-like
            The value of the inverse of the negative binomial link at `p`.
        Returns
        -------
        p : array
            Mean parameters
        Notes
        -----
        g^(-1)(z) = exp(z)/(alpha*(1-exp(z)))
        '''
        return -1/(self.alpha * (1 - np.exp(-z)))
    def deriv(self, p):
        '''
        Derivative of the negative binomial transform
        Parameters
        ----------
        p : array-like
            Mean parameters
        Returns
        -------
        g'(p) : array
            The derivative of the negative binomial transform link function
        Notes
        -----
        g'(x) = 1/(x+alpha*x^2)
        '''
        return 1/(p + self.alpha * p**2)
    def deriv2(self,p):
        '''
        Second derivative of the negative binomial link function.
        Parameters
        ----------
        p : array-like
            Mean parameters
        Returns
        -------
        g''(p) : array
            The second derivative of the negative binomial transform link
            function
        Notes
        -----
        g''(x) = -(1+2*alpha*x)/(x+alpha*x^2)^2
        '''
        numer = -(1 + 2 * self.alpha * p)
        denom = (p + self.alpha * p**2)**2
        return numer / denom
    def inverse_deriv(self, z):
        '''
        Derivative of the inverse of the negative binomial transform
        Parameters
        -----------
        z : array-like
            Usually the linear predictor for a GLM or GEE model
        Returns
        -------
        g^(-1)'(z) : array
            The value of the derivative of the inverse of the negative
            binomial link
        '''
        t = np.exp(z)
        return t / (self.alpha * (1-t)**2)
 class nbinom(NegativeBinomial):
    """
    The negative binomial link function.
    Notes
    -----
    g(p) = log(p/(p + 1/alpha))
    nbinom is an alias of NegativeBinomial.
    nbinom = NegativeBinomial(alpha=1.)
    """
    pass
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/glm/tests/test_glm.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/glm/tests/test_glm.py
@ -0,0 +1,993 @@
 """
 Tests for generalized linear models. Majority of code either directly borrowed
 or closely adapted from statsmodels package. Model results verfiied using glm
 function in R and GLM function in statsmodels.
 """
 __author__ = 'Taylor Oshan tayoshan@gmail.com'
 from pysal.contrib.glm.glm import GLM
 from pysal.contrib.glm.family import Gaussian, Poisson, Binomial, QuasiPoisson
 import numpy as np
 import pysal
 import unittest
 import math
 class TestGaussian(unittest.TestCase):
    """
    Tests for Poisson GLM
    """
    def setUp(self):
        db = pysal.open(pysal.examples.get_path('columbus.dbf'),'r')
        y = np.array(db.by_col("HOVAL"))
        self.y = np.reshape(y, (49,1))
        X = []
        X.append(db.by_col("INC"))
        X.append(db.by_col("CRIME"))
        self.X = np.array(X).T
    def testIWLS(self):
        model = GLM(self.y, self.X, family=Gaussian())
        results = model.fit()
        self.assertEqual(results.n, 49)
        self.assertEqual(results.df_model, 2)
        self.assertEqual(results.df_resid, 46)
        self.assertEqual(results.aic, 408.73548964604873)
        self.assertEqual(results.bic, 10467.991340493107)
        self.assertEqual(results.deviance, 10647.015074206196)
        self.assertEqual(results.llf, -201.36774482302437)
        self.assertEqual(results.null_deviance, 16367.794631703124)
        self.assertEqual(results.scale, 231.45684943926514)
        np.testing.assert_allclose(results.params, [ 46.42818268,   0.62898397,
            -0.48488854])
        np.testing.assert_allclose(results.bse, [ 13.19175703,   0.53591045,
            0.18267291])
        np.testing.assert_allclose(results.cov_params(), 
                [[  1.74022453e+02,  -6.52060364e+00,  -2.15109867e+00],
                [ -6.52060364e+00,   2.87200008e-01,   6.80956787e-02],
                [ -2.15109867e+00,   6.80956787e-02,   3.33693910e-02]])
        np.testing.assert_allclose(results.tvalues, [ 3.51948437,  1.17367365,
            -2.65440864])
        np.testing.assert_allclose(results.pvalues, [ 0.00043239,  0.24052577,
            0.00794475], atol=1.0e-8)
        np.testing.assert_allclose(results.conf_int(), 
                [[ 20.57281401,  72.28355135],
                [ -0.42138121,   1.67934915],
                [ -0.84292086,  -0.12685622]])
        np.testing.assert_allclose(results.normalized_cov_params, 
                [[  7.51857004e-01,  -2.81720055e-02,  -9.29373521e-03],
                [ -2.81720055e-02,   1.24083607e-03,   2.94204638e-04],
                [ -9.29373521e-03,   2.94204638e-04,   1.44171110e-04]])
        np.testing.assert_allclose(results.mu, 
                [ 51.08752105,  50.66601521,  41.61367567,  33.53969014,
                28.90638232,  43.87074227,  51.64910882,  34.92671563,
                42.69267622,  38.49449134,  20.92815471,  25.25228436,
                29.78223486,  25.02403635,  29.07959539,  24.63352275,
                34.71372149,  33.40443052,  27.29864225,  65.86219802,
                33.69854751,  37.44976435,  50.01304928,  36.81219959,
                22.02674837,  31.64775955,  27.63563294,  23.7697291 ,
                22.43119725,  21.76987089,  48.51169321,  49.05891819,
                32.31656426,  44.20550354,  35.49244888,  51.27811308,
                36.55047181,  27.37048914,  48.78812922,  57.31744163,
                51.22914162,  54.70515578,  37.06622277,  44.5075759 ,
                41.24328983,  49.93821824,  44.85644299,  40.93838609,  47.32045464])
        self.assertEqual(results.pearson_chi2, 10647.015074206196)
        np.testing.assert_allclose(results.resid_response, 
                [ 29.37948195,  -6.09901421, -15.26367567,  -0.33968914,
                -5.68138232, -15.12074227,  23.35089118,   2.19828437,
                9.90732178,  57.90551066,  -1.22815371,  -5.35228436,
                11.91776614,  17.87596565, -11.07959539,  -5.83352375,
                7.03627851,  26.59556948,   3.30135775,  15.40479998,
                -13.72354751,  -6.99976335,  -2.28004728,  16.38780141,
                -4.12674837, -11.34776055,   6.46436506,  -0.9197291 ,
                10.06880275,   0.73012911, -16.71169421,  -8.75891919,
                -8.71656426, -15.75550254,  -8.49244888, -14.97811408,
                6.74952719,  -4.67048814,  -9.18813122,   4.63255937,
                -9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
                -13.51028983,  26.16177976,  -2.35644299, -14.13838709, -11.52045564])
        np.testing.assert_allclose(results.resid_working, 
                [ 29.37948195,  -6.09901421, -15.26367567,  -0.33968914,
                -5.68138232, -15.12074227,  23.35089118,   2.19828437,
                9.90732178,  57.90551066,  -1.22815371,  -5.35228436,
                11.91776614,  17.87596565, -11.07959539,  -5.83352375,
                7.03627851,  26.59556948,   3.30135775,  15.40479998,
                -13.72354751,  -6.99976335,  -2.28004728,  16.38780141,
                -4.12674837, -11.34776055,   6.46436506,  -0.9197291 ,
                10.06880275,   0.73012911, -16.71169421,  -8.75891919,
                -8.71656426, -15.75550254,  -8.49244888, -14.97811408,
                6.74952719,  -4.67048814,  -9.18813122,   4.63255937,
                -9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
                -13.51028983,  26.16177976,  -2.35644299, -14.13838709, -11.52045564])
        np.testing.assert_allclose(results.resid_pearson, 
                [ 29.37948195,  -6.09901421, -15.26367567,  -0.33968914,
                -5.68138232, -15.12074227,  23.35089118,   2.19828437,
                9.90732178,  57.90551066,  -1.22815371,  -5.35228436,
                11.91776614,  17.87596565, -11.07959539,  -5.83352375,
                7.03627851,  26.59556948,   3.30135775,  15.40479998,
                -13.72354751,  -6.99976335,  -2.28004728,  16.38780141,
                -4.12674837, -11.34776055,   6.46436506,  -0.9197291 ,
                10.06880275,   0.73012911, -16.71169421,  -8.75891919,
                -8.71656426, -15.75550254,  -8.49244888, -14.97811408,
                6.74952719,  -4.67048814,  -9.18813122,   4.63255937,
                -9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
                -13.51028983,  26.16177976,  -2.35644299, -14.13838709, -11.52045564])
        np.testing.assert_allclose(results.resid_anscombe, 
                [ 29.37948195,  -6.09901421, -15.26367567,  -0.33968914,
                -5.68138232, -15.12074227,  23.35089118,   2.19828437,
                9.90732178,  57.90551066,  -1.22815371,  -5.35228436,
                11.91776614,  17.87596565, -11.07959539,  -5.83352375,
                7.03627851,  26.59556948,   3.30135775,  15.40479998,
                -13.72354751,  -6.99976335,  -2.28004728,  16.38780141,
                -4.12674837, -11.34776055,   6.46436506,  -0.9197291 ,
                10.06880275,   0.73012911, -16.71169421,  -8.75891919,
                -8.71656426, -15.75550254,  -8.49244888, -14.97811408,
                6.74952719,  -4.67048814,  -9.18813122,   4.63255937,
                -9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
                -13.51028983,  26.16177976,  -2.35644299, -14.13838709, -11.52045564])
        np.testing.assert_allclose(results.resid_deviance, 
                [ 29.37948195,  -6.09901421, -15.26367567,  -0.33968914,
                -5.68138232, -15.12074227,  23.35089118,   2.19828437,
                9.90732178,  57.90551066,  -1.22815371,  -5.35228436,
                11.91776614,  17.87596565, -11.07959539,  -5.83352375,
                7.03627851,  26.59556948,   3.30135775,  15.40479998,
                -13.72354751,  -6.99976335,  -2.28004728,  16.38780141,
                -4.12674837, -11.34776055,   6.46436506,  -0.9197291 ,
                10.06880275,   0.73012911, -16.71169421,  -8.75891919,
                -8.71656426, -15.75550254,  -8.49244888, -14.97811408,
                6.74952719,  -4.67048814,  -9.18813122,   4.63255937,
                -9.12914362, -10.37215578, -11.36622177, -11.0075759 ,
                -13.51028983,  26.16177976,  -2.35644299, -14.13838709, -11.52045564])
        np.testing.assert_allclose(results.null, 
                [ 38.43622447,  38.43622447,  38.43622447,  38.43622447,
                38.43622447,  38.43622447,  38.43622447,  38.43622447,
                38.43622447,  38.43622447,  38.43622447,  38.43622447,
                38.43622447,  38.43622447,  38.43622447,  38.43622447,
                38.43622447,  38.43622447,  38.43622447,  38.43622447,
                38.43622447,  38.43622447,  38.43622447,  38.43622447,
                38.43622447,  38.43622447,  38.43622447,  38.43622447,
                38.43622447,  38.43622447,  38.43622447,  38.43622447,
                38.43622447,  38.43622447,  38.43622447,  38.43622447,
                38.43622447,  38.43622447,  38.43622447,  38.43622447,
                38.43622447,  38.43622447,  38.43622447,  38.43622447,
                38.43622447,  38.43622447,  38.43622447,  38.43622447,  38.43622447])
        self.assertAlmostEqual(results.D2, .349514377851)
        self.assertAlmostEqual(results.adj_D2, 0.32123239427957673)
 class TestPoisson(unittest.TestCase):
    def setUp(self):
        db = pysal.open(pysal.examples.get_path('columbus.dbf'),'r')
        y = np.array(db.by_col("HOVAL"))
        y = np.reshape(y, (49,1))
        self.y = np.round(y).astype(int)
        X = []
        X.append(db.by_col("INC"))
        X.append(db.by_col("CRIME"))
        self.X = np.array(X).T
    def testIWLS(self):
        model = GLM(self.y, self.X, family=Poisson())
        results = model.fit()
        self.assertEqual(results.n, 49)
        self.assertEqual(results.df_model, 2)
        self.assertEqual(results.df_resid, 46)
        self.assertAlmostEqual(results.aic, 500.85184179938756)
        self.assertAlmostEqual(results.bic, 51.436404535087661)
        self.assertAlmostEqual(results.deviance, 230.46013824817649)
        self.assertAlmostEqual(results.llf, -247.42592089969378)
        self.assertAlmostEqual(results.null_deviance, 376.97293610347361)
        self.assertEqual(results.scale, 1.0)
        np.testing.assert_allclose(results.params, [ 3.92159085,  0.01183491,
            -0.01371397], atol=1.0e-8)
        np.testing.assert_allclose(results.bse, [ 0.13049161,  0.00511599,
            0.00193769], atol=1.0e-8)
        np.testing.assert_allclose(results.cov_params(), 
                [[  1.70280610e-02,  -6.18628383e-04,  -2.21386966e-04],
                [ -6.18628383e-04,   2.61733917e-05,   6.77496445e-06],
                [ -2.21386966e-04,   6.77496445e-06,   3.75463502e-06]])
        np.testing.assert_allclose(results.tvalues, [ 30.0524361 ,   2.31331634,
            -7.07748998])
        np.testing.assert_allclose(results.pvalues, [  2.02901657e-198,
            2.07052532e-002,   1.46788805e-012])
        np.testing.assert_allclose(results.conf_int(), 
                [[  3.66583199e+00,   4.17734972e+00],
                [  1.80774841e-03,   2.18620753e-02],
                [ -1.75117666e-02,  -9.91616901e-03]])
        np.testing.assert_allclose(results.normalized_cov_params, 
                [[  1.70280610e-02,  -6.18628383e-04,  -2.21386966e-04],
                [ -6.18628383e-04,   2.61733917e-05,   6.77496445e-06],
                [ -2.21386966e-04,   6.77496445e-06,   3.75463502e-06]])
        np.testing.assert_allclose(results.mu, 
                [ 51.26831574,  50.15022766,  40.06142973,  34.13799739,
                28.76119226,  42.6836241 ,  55.64593703,  34.08277997,
                40.90389582,  37.19727958,  23.47459217,  26.12384057,
                29.78303507,  25.96888223,  29.14073823,  26.04369592,
                34.18996367,  32.28924005,  27.42284396,  72.69207879,
                33.05316347,  36.52276972,  49.2551479 ,  35.33439632,
                24.07252457,  31.67153709,  27.81699478,  25.38021219,
                24.31759259,  23.13586161,  48.40724678,  48.57969818,
                31.92596006,  43.3679231 ,  34.32925819,  51.78908089,
                34.49778584,  27.56236198,  48.34273194,  57.50829097,
                50.66038226,  54.68701352,  35.77103116,  43.21886784,
                40.07615759,  49.98658004,  43.13352883,  40.28520774,  46.28910294])
        self.assertAlmostEqual(results.pearson_chi2, 264.62262932090221)
        np.testing.assert_allclose(results.resid_response, 
                [ 28.73168426,  -5.15022766, -14.06142973,  -1.13799739,
                -5.76119226, -13.6836241 ,  19.35406297,   2.91722003,
                12.09610418,  58.80272042,  -3.47459217,  -6.12384057,
                12.21696493,  17.03111777, -11.14073823,  -7.04369592,
                7.81003633,  27.71075995,   3.57715604,   8.30792121,
                -13.05316347,  -6.52276972,  -1.2551479 ,  17.66560368,
                -6.07252457, -11.67153709,   6.18300522,  -2.38021219,
                7.68240741,  -1.13586161, -16.40724678,  -8.57969818,
                -7.92596006, -15.3679231 ,  -7.32925819, -15.78908089,
                8.50221416,  -4.56236198,  -8.34273194,   4.49170903,
                -8.66038226, -10.68701352,  -9.77103116,  -9.21886784,
                -12.07615759,  26.01341996,  -1.13352883, -13.28520774, -10.28910294])
        np.testing.assert_allclose(results.resid_working, 
                [ 1473.02506034,  -258.28508941,  -563.32097891,   -38.84895192,
                -165.69875817,  -584.06666725,  1076.97496919,    99.42696848,
                494.77778514,  2187.30123163,   -81.56463405,  -159.97823479,
                363.858295  ,   442.27909165,  -324.64933645,  -183.44387481,
                267.02485844,   894.75938   ,    98.09579187,   603.9200634 ,
                -431.44834594,  -238.2296165 ,   -61.82249568,   624.20344168,
                -146.18099686,  -369.65551968,   171.99262399,   -60.41029031,
                186.81765356,   -26.27913713,  -794.22964417,  -416.79914795,
                -253.04388425,  -666.47490701,  -251.6079969 ,  -817.70198717,
                293.30756327,  -125.74947222,  -403.31045369,   258.31051005,
                -438.73827602,  -584.440853  ,  -349.51985996,  -398.42903071,
                -483.96599444,  1300.32189904,   -48.89309853,  -535.19735391,
                -476.27334527])
        np.testing.assert_allclose(results.resid_pearson, 
                [ 4.01269878, -0.72726045, -2.221602  , -0.19477008, -1.07425881,
                -2.09445239,  2.59451042,  0.49969118,  1.89131202,  9.64143836,
                -0.71714142, -1.19813392,  2.23861212,  3.34207756, -2.0637814 ,
                -1.3802231 ,  1.33568403,  4.87662684,  0.68309584,  0.97442591,
                -2.27043598, -1.07931992, -0.17884182,  2.97186889, -1.23768025,
                -2.07392709,  1.1723155 , -0.47246327,  1.55789092, -0.23614708,
                -2.35819937, -1.23096188, -1.40274877, -2.33362391, -1.25091503,
                -2.19400568,  1.44755952, -0.8690235 , -1.19989348,  0.59230634,
                -1.21675413, -1.44515442, -1.63370888, -1.40229988, -1.90759306,
                3.67934693, -0.17259375, -2.09312684, -1.51230062])
        np.testing.assert_allclose(results.resid_anscombe, 
                [ 3.70889134, -0.74031295, -2.37729865, -0.19586855, -1.11374751,
                -2.22611959,  2.46352013,  0.49282126,  1.80857757,  8.06444452,
                -0.73610811, -1.25061371,  2.10820431,  3.05467547, -2.22437611,
                -1.45136173,  1.28939698,  4.35942058,  0.66904552,  0.95674923,
                -2.45438937, -1.11429881, -0.17961012,  2.76715848, -1.29658591,
                -2.22816691,  1.13269136, -0.48017382,  1.48562248, -0.23812278,
                -2.51664399, -1.2703721 , -1.4683091 , -2.49907536, -1.30026484,
                -2.32398309,  1.39380683, -0.89495368, -1.23735395,  0.58485202,
                -1.25435224, -1.4968484 , -1.71888038, -1.45756652, -2.01906267,
                3.41729922, -0.17335867, -2.22921828, -1.57470549])
        np.testing.assert_allclose(results.resid_deviance, 
                [ 3.70529668, -0.74027329, -2.37536322, -0.19586751, -1.11349765,
                -2.22466106,  2.46246446,  0.4928057 ,  1.80799655,  8.02696525,
                -0.73602255, -1.25021555,  2.10699958,  3.05084608, -2.22214376,
                -1.45072221,  1.28913747,  4.35106213,  0.6689982 ,  0.95669662,
                -2.45171913, -1.11410444, -0.17960956,  2.76494217, -1.29609865,
                -2.22612429,  1.13247453, -0.48015254,  1.48508549, -0.23812   ,
                -2.51476072, -1.27015583, -1.46777697, -2.49699318, -1.29992892,
                -2.32263069,  1.39348459, -0.89482132, -1.23715363,  0.58483655,
                -1.25415329, -1.49653039, -1.7181055 , -1.45719072, -2.01791949,
                3.41437156, -0.1733581 , -2.22765605, -1.57426046])
        np.testing.assert_allclose(results.null, 
                [ 38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,  38.42857143])
        self.assertAlmostEqual(results.D2, .388656011675)
        self.assertAlmostEqual(results.adj_D2, 0.36207583826952761)#.375648692774)
    def testQuasi(self):
        model = GLM(self.y, self.X, family=QuasiPoisson())
        results = model.fit()
        self.assertEqual(results.n, 49)
        self.assertEqual(results.df_model, 2)
        self.assertEqual(results.df_resid, 46)
        self.assertTrue(math.isnan(results.aic))
        self.assertAlmostEqual(results.bic, 51.436404535087661)
        self.assertAlmostEqual(results.deviance, 230.46013824817649)
        self.assertTrue(math.isnan(results.llf))
        self.assertAlmostEqual(results.null_deviance, 376.97293610347361)
        self.assertAlmostEqual(results.scale, 5.7526658548022223)
        np.testing.assert_allclose(results.params, [ 3.92159085,  0.01183491,
            -0.01371397], atol=1.0e-8)
        np.testing.assert_allclose(results.bse, [ 0.31298042,  0.01227057,
            0.00464749], atol=1.0e-8)
        np.testing.assert_allclose(results.cov_params(), 
                [[  9.79567451e-02,  -3.55876238e-03,  -1.27356524e-03],
                [ -3.55876238e-03,   1.50566777e-04,   3.89741067e-05],
                [ -1.27356524e-03,   3.89741067e-05,   2.15991606e-05]])
        np.testing.assert_allclose(results.tvalues, [ 12.52982796,   0.96449604,
            -2.95083339])
        np.testing.assert_allclose(results.pvalues, [  5.12737770e-36,
            3.34797291e-01,   3.16917819e-03])
        np.testing.assert_allclose(results.conf_int(), 
                [[ 3.3081605 ,  4.53502121],
                [-0.01221495,  0.03588478],
                [-0.02282288, -0.00460506]], atol=1.0e-8)
        np.testing.assert_allclose(results.normalized_cov_params, 
                [[  1.70280610e-02,  -6.18628383e-04,  -2.21386966e-04],
                [ -6.18628383e-04,   2.61733917e-05,   6.77496445e-06],
                [ -2.21386966e-04,   6.77496445e-06,   3.75463502e-06]])
        np.testing.assert_allclose(results.mu, 
                [ 51.26831574,  50.15022766,  40.06142973,  34.13799739,
                28.76119226,  42.6836241 ,  55.64593703,  34.08277997,
                40.90389582,  37.19727958,  23.47459217,  26.12384057,
                29.78303507,  25.96888223,  29.14073823,  26.04369592,
                34.18996367,  32.28924005,  27.42284396,  72.69207879,
                33.05316347,  36.52276972,  49.2551479 ,  35.33439632,
                24.07252457,  31.67153709,  27.81699478,  25.38021219,
                24.31759259,  23.13586161,  48.40724678,  48.57969818,
                31.92596006,  43.3679231 ,  34.32925819,  51.78908089,
                34.49778584,  27.56236198,  48.34273194,  57.50829097,
                50.66038226,  54.68701352,  35.77103116,  43.21886784,
                40.07615759,  49.98658004,  43.13352883,  40.28520774,  46.28910294])
        self.assertAlmostEqual(results.pearson_chi2, 264.62262932090221)
        np.testing.assert_allclose(results.resid_response, 
                [ 28.73168426,  -5.15022766, -14.06142973,  -1.13799739,
                -5.76119226, -13.6836241 ,  19.35406297,   2.91722003,
                12.09610418,  58.80272042,  -3.47459217,  -6.12384057,
                12.21696493,  17.03111777, -11.14073823,  -7.04369592,
                7.81003633,  27.71075995,   3.57715604,   8.30792121,
                -13.05316347,  -6.52276972,  -1.2551479 ,  17.66560368,
                -6.07252457, -11.67153709,   6.18300522,  -2.38021219,
                7.68240741,  -1.13586161, -16.40724678,  -8.57969818,
                -7.92596006, -15.3679231 ,  -7.32925819, -15.78908089,
                8.50221416,  -4.56236198,  -8.34273194,   4.49170903,
                -8.66038226, -10.68701352,  -9.77103116,  -9.21886784,
                -12.07615759,  26.01341996,  -1.13352883, -13.28520774, -10.28910294])
        np.testing.assert_allclose(results.resid_working, 
                [ 1473.02506034,  -258.28508941,  -563.32097891,   -38.84895192,
                -165.69875817,  -584.06666725,  1076.97496919,    99.42696848,
                494.77778514,  2187.30123163,   -81.56463405,  -159.97823479,
                363.858295  ,   442.27909165,  -324.64933645,  -183.44387481,
                267.02485844,   894.75938   ,    98.09579187,   603.9200634 ,
                -431.44834594,  -238.2296165 ,   -61.82249568,   624.20344168,
                -146.18099686,  -369.65551968,   171.99262399,   -60.41029031,
                186.81765356,   -26.27913713,  -794.22964417,  -416.79914795,
                -253.04388425,  -666.47490701,  -251.6079969 ,  -817.70198717,
                293.30756327,  -125.74947222,  -403.31045369,   258.31051005,
                -438.73827602,  -584.440853  ,  -349.51985996,  -398.42903071,
                -483.96599444,  1300.32189904,   -48.89309853,  -535.19735391,
                -476.27334527])
        np.testing.assert_allclose(results.resid_pearson, 
                [ 4.01269878, -0.72726045, -2.221602  , -0.19477008, -1.07425881,
                -2.09445239,  2.59451042,  0.49969118,  1.89131202,  9.64143836,
                -0.71714142, -1.19813392,  2.23861212,  3.34207756, -2.0637814 ,
                -1.3802231 ,  1.33568403,  4.87662684,  0.68309584,  0.97442591,
                -2.27043598, -1.07931992, -0.17884182,  2.97186889, -1.23768025,
                -2.07392709,  1.1723155 , -0.47246327,  1.55789092, -0.23614708,
                -2.35819937, -1.23096188, -1.40274877, -2.33362391, -1.25091503,
                -2.19400568,  1.44755952, -0.8690235 , -1.19989348,  0.59230634,
                -1.21675413, -1.44515442, -1.63370888, -1.40229988, -1.90759306,
                3.67934693, -0.17259375, -2.09312684, -1.51230062])
        np.testing.assert_allclose(results.resid_anscombe, 
                [ 3.70889134, -0.74031295, -2.37729865, -0.19586855, -1.11374751,
                -2.22611959,  2.46352013,  0.49282126,  1.80857757,  8.06444452,
                -0.73610811, -1.25061371,  2.10820431,  3.05467547, -2.22437611,
                -1.45136173,  1.28939698,  4.35942058,  0.66904552,  0.95674923,
                -2.45438937, -1.11429881, -0.17961012,  2.76715848, -1.29658591,
                -2.22816691,  1.13269136, -0.48017382,  1.48562248, -0.23812278,
                -2.51664399, -1.2703721 , -1.4683091 , -2.49907536, -1.30026484,
                -2.32398309,  1.39380683, -0.89495368, -1.23735395,  0.58485202,
                -1.25435224, -1.4968484 , -1.71888038, -1.45756652, -2.01906267,
                3.41729922, -0.17335867, -2.22921828, -1.57470549])
        np.testing.assert_allclose(results.resid_deviance, 
                [ 3.70529668, -0.74027329, -2.37536322, -0.19586751, -1.11349765,
                -2.22466106,  2.46246446,  0.4928057 ,  1.80799655,  8.02696525,
                -0.73602255, -1.25021555,  2.10699958,  3.05084608, -2.22214376,
                -1.45072221,  1.28913747,  4.35106213,  0.6689982 ,  0.95669662,
                -2.45171913, -1.11410444, -0.17960956,  2.76494217, -1.29609865,
                -2.22612429,  1.13247453, -0.48015254,  1.48508549, -0.23812   ,
                -2.51476072, -1.27015583, -1.46777697, -2.49699318, -1.29992892,
                -2.32263069,  1.39348459, -0.89482132, -1.23715363,  0.58483655,
                -1.25415329, -1.49653039, -1.7181055 , -1.45719072, -2.01791949,
                3.41437156, -0.1733581 , -2.22765605, -1.57426046])
        np.testing.assert_allclose(results.null, 
                [ 38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,
                38.42857143,  38.42857143,  38.42857143,  38.42857143,  38.42857143])
        self.assertAlmostEqual(results.D2, .388656011675)
        self.assertAlmostEqual(results.adj_D2, 0.36207583826952761)
 class TestBinomial(unittest.TestCase):
    def setUp(self):
        #London house price data
        #y: 'BATH2'
        y = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
            0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
       	self.y = y.reshape((316,1))
        #X: 'FLOORSZ'
       	X = np.array([ 77,  75,  64,  95, 107, 100,  81, 151,  98, 260, 171, 161,  91,
            80,  50,  85,  52,  69,  60,  84, 155,  97,  69, 126,  90,  43,
            51,  41, 140,  80,  52,  86,  66,  60,  40, 155, 138,  97, 115,
            148, 206,  60,  53,  96,  88, 160,  31,  43, 154,  60, 131,  60,
            46,  61, 125, 150,  76,  92,  96, 100, 105,  72,  48,  41,  72,
            65,  60,  65,  98,  33, 144, 111,  91, 108,  38,  48,  95,  63,
            98, 129, 108,  51, 131,  66,  48, 127,  76,  68,  52,  64,  57,
            121,  67,  76, 112,  96,  90,  53,  93,  64,  97,  58,  44, 157,
            53,  70,  71, 167,  47,  70,  96,  77,  75,  71,  67,  47,  71,
            90,  69,  64,  65,  95,  60,  60,  65,  54, 121, 105,  50,  85,
            69,  69,  62,  65,  93,  93,  70,  62, 155,  68, 117,  80,  80,
            75,  98, 114,  86,  70,  50,  51, 163, 124,  59,  95,  51,  63,
            85,  53,  46, 102, 114,  83,  47,  40,  63, 123, 100,  63, 110,
            79,  98,  99, 120,  52,  48,  37,  81,  30,  88,  50,  35, 116,
            67,  45,  80,  86, 109,  59,  75,  60,  71, 141, 121,  50, 168,
            90,  51, 133,  75, 133, 127,  37,  68, 105,  61, 123, 151, 110,
            77, 220,  94,  77,  70, 100,  98, 126,  55, 105,  60, 176, 104,
            68,  62,  70,  48, 102,  80,  97,  66,  80, 102, 160,  55,  60,
            71, 125,  85,  85, 190, 137,  48,  41,  42,  51,  57,  60, 114,
            88,  84, 108,  66,  85,  42,  98,  90, 127, 100,  55,  76,  82,
            63,  80,  71,  76, 121, 109,  92, 160, 109, 185, 100,  90,  90,
            86,  88,  95, 116, 135,  61,  74,  60, 235,  76,  66, 100,  49,
            50,  37, 100,  88,  90,  52,  95,  81,  79,  96,  75,  91,  86,
            83, 180, 108,  80,  96,  49, 117, 117,  86,  46,  66,  95,  57,
            120, 137,  68, 240])
        self.X = X.reshape((316,1))
    def testIWLS(self):
        model = GLM(self.y, self.X, family=Binomial())
        results = model.fit()
        self.assertEqual(results.n, 316)
        self.assertEqual(results.df_model, 1)
        self.assertEqual(results.df_resid, 314)
        self.assertEqual(results.aic, 155.19347530342466)
        self.assertEqual(results.bic, -1656.1095797628657)
        self.assertEqual(results.deviance, 151.19347530342466)
        self.assertEqual(results.llf, -75.596737651712331)
        self.assertEqual(results.null_deviance, 189.16038985881212)
        self.assertEqual(results.scale, 1.0)
        np.testing.assert_allclose(results.params, [-5.33638276,  0.0287754 ])
        np.testing.assert_allclose(results.bse, [ 0.64499904,  0.00518312],
                atol=1.0e-8)
        np.testing.assert_allclose(results.cov_params(), 
            [[  4.16023762e-01,  -3.14338457e-03],
            [ -3.14338457e-03,   2.68646833e-05]])
        np.testing.assert_allclose(results.tvalues, [-8.27347396,  5.55175826])
        np.testing.assert_allclose(results.pvalues, [  1.30111233e-16,
            2.82810512e-08])
        np.testing.assert_allclose(results.conf_int(), 
            [[-6.60055765, -4.07220787],
            [ 0.01861668,  0.03893412]], atol=1.0e-8)
        np.testing.assert_allclose(results.normalized_cov_params, 
            [[  4.16023762e-01,  -3.14338457e-03],
            [ -3.14338457e-03,   2.68646833e-05]])
        np.testing.assert_allclose(results.mu, 
            [ 0.04226237,  0.03999333,  0.02946178,  0.0689636 ,  0.09471181,
            0.07879431,  0.04717464,  0.27065598,  0.07471691,  0.89522144,
            0.39752487,  0.33102718,  0.06192993,  0.04589793,  0.01988679,
            0.0526265 ,  0.02104007,  0.03386636,  0.02634295,  0.05121018,
            0.29396682,  0.07275173,  0.03386636,  0.15307528,  0.06027915,
            0.01631789,  0.02045547,  0.01541937,  0.2128508 ,  0.04589793,
            0.02104007,  0.05407977,  0.0311527 ,  0.02634295,  0.01498855,
            0.29396682,  0.20336776,  0.07275173,  0.11637537,  0.25395607,
            0.64367488,  0.02634295,  0.02164101,  0.07083428,  0.05710047,
            0.32468619,  0.01160845,  0.01631789,  0.28803008,  0.02634295,
            0.17267234,  0.02634295,  0.01776301,  0.02709115,  0.14938186,
            0.26501331,  0.04111287,  0.06362285,  0.07083428,  0.07879431,
            0.08989109,  0.03680743,  0.0187955 ,  0.01541937,  0.03680743,
            0.03029581,  0.02634295,  0.03029581,  0.07471691,  0.01228768,
            0.23277197,  0.10505173,  0.06192993,  0.09720799,  0.01416217,
            0.0187955 ,  0.0689636 ,  0.02865003,  0.07471691,  0.16460503,
            0.09720799,  0.02045547,  0.17267234,  0.0311527 ,  0.0187955 ,
            0.15684317,  0.04111287,  0.03293737,  0.02104007,  0.02946178,
            0.02421701,  0.1353385 ,  0.03203302,  0.04111287,  0.10778798,
            0.07083428,  0.06027915,  0.02164101,  0.06535882,  0.02946178,
            0.07275173,  0.02490638,  0.01678627,  0.30605146,  0.02164101,
            0.03482061,  0.03580075,  0.37030921,  0.0182721 ,  0.03482061,
            0.07083428,  0.04226237,  0.03999333,  0.03580075,  0.03203302,
            0.0182721 ,  0.03580075,  0.06027915,  0.03386636,  0.02946178,
            0.03029581,  0.0689636 ,  0.02634295,  0.02634295,  0.03029581,
            0.02225873,  0.1353385 ,  0.08989109,  0.01988679,  0.0526265 ,
            0.03386636,  0.03386636,  0.02786   ,  0.03029581,  0.06535882,
            0.06535882,  0.03482061,  0.02786   ,  0.29396682,  0.03293737,
            0.12242534,  0.04589793,  0.04589793,  0.03999333,  0.07471691,
            0.11344884,  0.05407977,  0.03482061,  0.01988679,  0.02045547,
            0.34389327,  0.14576223,  0.02561486,  0.0689636 ,  0.02045547,
            0.02865003,  0.0526265 ,  0.02164101,  0.01776301,  0.08307425,
            0.11344884,  0.04982997,  0.0182721 ,  0.01498855,  0.02865003,
            0.14221564,  0.07879431,  0.02865003,  0.10237696,  0.04465416,
            0.07471691,  0.07673078,  0.13200634,  0.02104007,  0.0187955 ,
            0.01376599,  0.04717464,  0.01128289,  0.05710047,  0.01988679,
            0.01300612,  0.11936722,  0.03203302,  0.01726786,  0.04589793,
            0.05407977,  0.09976271,  0.02561486,  0.03999333,  0.02634295,
            0.03580075,  0.21771181,  0.1353385 ,  0.01988679,  0.37704374,
            0.06027915,  0.02045547,  0.18104935,  0.03999333,  0.18104935,
            0.15684317,  0.01376599,  0.03293737,  0.08989109,  0.02709115,
            0.14221564,  0.27065598,  0.10237696,  0.04226237,  0.72991785,
            0.06713876,  0.04226237,  0.03482061,  0.07879431,  0.07471691,
            0.15307528,  0.02289366,  0.08989109,  0.02634295,  0.43243779,
            0.08756457,  0.03293737,  0.02786   ,  0.03482061,  0.0187955 ,
            0.08307425,  0.04589793,  0.07275173,  0.0311527 ,  0.04589793,
            0.08307425,  0.32468619,  0.02289366,  0.02634295,  0.03580075,
            0.14938186,  0.0526265 ,  0.0526265 ,  0.53268924,  0.19874565,
            0.0187955 ,  0.01541937,  0.01586237,  0.02045547,  0.02421701,
            0.02634295,  0.11344884,  0.05710047,  0.05121018,  0.09720799,
            0.0311527 ,  0.0526265 ,  0.01586237,  0.07471691,  0.06027915,
            0.15684317,  0.07879431,  0.02289366,  0.04111287,  0.04848506,
            0.02865003,  0.04589793,  0.03580075,  0.04111287,  0.1353385 ,
            0.09976271,  0.06362285,  0.32468619,  0.09976271,  0.49676673,
            0.07879431,  0.06027915,  0.06027915,  0.05407977,  0.05710047,
            0.0689636 ,  0.11936722,  0.18973955,  0.02709115,  0.03890304,
            0.02634295,  0.80625182,  0.04111287,  0.0311527 ,  0.07879431,
            0.0193336 ,  0.01988679,  0.01376599,  0.07879431,  0.05710047,
            0.06027915,  0.02104007,  0.0689636 ,  0.04717464,  0.04465416,
            0.07083428,  0.03999333,  0.06192993,  0.05407977,  0.04982997,
            0.46087756,  0.09720799,  0.04589793,  0.07083428,  0.0193336 ,
            0.12242534,  0.12242534,  0.05407977,  0.01776301,  0.0311527 ,
            0.0689636 ,  0.02421701,  0.13200634,  0.19874565,  0.03293737,
            0.82774282], atol=1.0e-8)
        self.assertAlmostEqual(results.pearson_chi2, 271.21110541713801)
        np.testing.assert_allclose(results.resid_response, 
            [-0.04226237, -0.03999333, -0.02946178, -0.0689636 , -0.09471181,
            -0.07879431, -0.04717464, -0.27065598, -0.07471691,  0.10477856,
            -0.39752487,  0.66897282, -0.06192993, -0.04589793, -0.01988679,
            -0.0526265 , -0.02104007, -0.03386636, -0.02634295, -0.05121018,
            -0.29396682,  0.92724827, -0.03386636, -0.15307528, -0.06027915,
            -0.01631789, -0.02045547, -0.01541937, -0.2128508 , -0.04589793,
            -0.02104007, -0.05407977, -0.0311527 , -0.02634295, -0.01498855,
            -0.29396682,  0.79663224, -0.07275173, -0.11637537,  0.74604393,
            -0.64367488, -0.02634295, -0.02164101, -0.07083428, -0.05710047,
            -0.32468619, -0.01160845, -0.01631789, -0.28803008, -0.02634295,
            -0.17267234, -0.02634295, -0.01776301, -0.02709115,  0.85061814,
            0.73498669, -0.04111287, -0.06362285, -0.07083428, -0.07879431,
            0.91010891, -0.03680743, -0.0187955 , -0.01541937, -0.03680743,
            -0.03029581, -0.02634295, -0.03029581, -0.07471691, -0.01228768,
            0.76722803, -0.10505173, -0.06192993, -0.09720799, -0.01416217,
            -0.0187955 , -0.0689636 , -0.02865003, -0.07471691, -0.16460503,
            -0.09720799, -0.02045547,  0.82732766, -0.0311527 , -0.0187955 ,
            -0.15684317, -0.04111287, -0.03293737, -0.02104007, -0.02946178,
            -0.02421701, -0.1353385 , -0.03203302, -0.04111287, -0.10778798,
            -0.07083428, -0.06027915, -0.02164101, -0.06535882, -0.02946178,
            -0.07275173, -0.02490638, -0.01678627, -0.30605146, -0.02164101,
            -0.03482061, -0.03580075,  0.62969079, -0.0182721 , -0.03482061,
            -0.07083428, -0.04226237, -0.03999333, -0.03580075, -0.03203302,
            -0.0182721 , -0.03580075, -0.06027915, -0.03386636, -0.02946178,
            -0.03029581, -0.0689636 , -0.02634295, -0.02634295, -0.03029581,
            -0.02225873, -0.1353385 , -0.08989109, -0.01988679, -0.0526265 ,
            -0.03386636, -0.03386636, -0.02786   , -0.03029581, -0.06535882,
            -0.06535882, -0.03482061, -0.02786   , -0.29396682, -0.03293737,
            -0.12242534, -0.04589793, -0.04589793, -0.03999333, -0.07471691,
            -0.11344884, -0.05407977, -0.03482061, -0.01988679, -0.02045547,
            0.65610673,  0.85423777, -0.02561486, -0.0689636 , -0.02045547,
            -0.02865003, -0.0526265 , -0.02164101, -0.01776301, -0.08307425,
            -0.11344884, -0.04982997, -0.0182721 , -0.01498855, -0.02865003,
            -0.14221564, -0.07879431, -0.02865003, -0.10237696, -0.04465416,
            -0.07471691, -0.07673078, -0.13200634, -0.02104007, -0.0187955 ,
            -0.01376599, -0.04717464, -0.01128289,  0.94289953, -0.01988679,
            -0.01300612, -0.11936722, -0.03203302, -0.01726786, -0.04589793,
            -0.05407977, -0.09976271, -0.02561486, -0.03999333, -0.02634295,
            -0.03580075, -0.21771181,  0.8646615 , -0.01988679,  0.62295626,
            -0.06027915, -0.02045547, -0.18104935,  0.96000667, -0.18104935,
            -0.15684317, -0.01376599, -0.03293737, -0.08989109, -0.02709115,
            -0.14221564,  0.72934402, -0.10237696, -0.04226237, -0.72991785,
            -0.06713876, -0.04226237, -0.03482061, -0.07879431, -0.07471691,
            -0.15307528,  0.97710634,  0.91010891, -0.02634295, -0.43243779,
            -0.08756457, -0.03293737, -0.02786   , -0.03482061, -0.0187955 ,
             0.91692575, -0.04589793, -0.07275173, -0.0311527 , -0.04589793,
            -0.08307425,  0.67531381, -0.02289366, -0.02634295, -0.03580075,
            -0.14938186, -0.0526265 , -0.0526265 ,  0.46731076, -0.19874565,
            -0.0187955 , -0.01541937, -0.01586237, -0.02045547, -0.02421701,
            -0.02634295, -0.11344884, -0.05710047, -0.05121018, -0.09720799,
            0.9688473 , -0.0526265 , -0.01586237, -0.07471691, -0.06027915,
            -0.15684317, -0.07879431, -0.02289366, -0.04111287, -0.04848506,
            -0.02865003, -0.04589793, -0.03580075, -0.04111287, -0.1353385 ,
            -0.09976271, -0.06362285,  0.67531381, -0.09976271, -0.49676673,
            -0.07879431, -0.06027915, -0.06027915, -0.05407977, -0.05710047,
            -0.0689636 , -0.11936722, -0.18973955, -0.02709115, -0.03890304,
            -0.02634295,  0.19374818, -0.04111287, -0.0311527 , -0.07879431,
            -0.0193336 , -0.01988679, -0.01376599, -0.07879431,  0.94289953,
            -0.06027915, -0.02104007, -0.0689636 , -0.04717464, -0.04465416,
            0.92916572, -0.03999333, -0.06192993, -0.05407977, -0.04982997,
            -0.46087756, -0.09720799, -0.04589793, -0.07083428, -0.0193336 ,
            -0.12242534, -0.12242534, -0.05407977, -0.01776301, -0.0311527 ,
            -0.0689636 , -0.02421701, -0.13200634, -0.19874565, -0.03293737,
            -0.82774282], atol=1.0e-8)
        np.testing.assert_allclose(results.resid_working, 
            [ -1.71062283e-03,  -1.53549840e-03,  -8.42423701e-04,
            -4.42798906e-03,  -8.12073047e-03,  -5.71934606e-03,
            -2.12046213e-03,  -5.34278480e-02,  -5.16550074e-03,
            9.82823035e-03,  -9.52067472e-02,   1.48142818e-01,
            -3.59779501e-03,  -2.00993083e-03,  -3.87619325e-04,
            -2.62379729e-03,  -4.33370579e-04,  -1.10808799e-03,
            -6.75670103e-04,  -2.48818484e-03,  -6.10129090e-02,
            6.25511612e-02,  -1.10808799e-03,  -1.98451739e-02,
            -3.41454749e-03,  -2.61928659e-04,  -4.09867263e-04,
            -2.34090923e-04,  -3.56621577e-02,  -2.00993083e-03,
            -4.33370579e-04,  -2.76645832e-03,  -9.40257152e-04,
            -6.75670103e-04,  -2.21289369e-04,  -6.10129090e-02,
            1.29061842e-01,  -4.90775251e-03,  -1.19671283e-02,
            1.41347263e-01,  -1.47631680e-01,  -6.75670103e-04,
            -4.58198217e-04,  -4.66208406e-03,  -3.07429001e-03,
            -7.11923401e-02,  -1.33191898e-04,  -2.61928659e-04,
            -5.90659690e-02,  -6.75670103e-04,  -2.46673839e-02,
            -6.75670103e-04,  -3.09919962e-04,  -7.14047519e-04,
            1.08085429e-01,   1.43161630e-01,  -1.62077632e-03,
            -3.79032977e-03,  -4.66208406e-03,  -5.71934606e-03,
            7.44566288e-02,  -1.30492035e-03,  -3.46630910e-04,
            -2.34090923e-04,  -1.30492035e-03,  -8.90029618e-04,
            -6.75670103e-04,  -8.90029618e-04,  -5.16550074e-03,
            -1.49131762e-04,   1.37018624e-01,  -9.87652847e-03,
            -3.59779501e-03,  -8.53083698e-03,  -1.97726627e-04,
            -3.46630910e-04,  -4.42798906e-03,  -7.97307494e-04,
            -5.16550074e-03,  -2.26348718e-02,  -8.53083698e-03,
            -4.09867263e-04,   1.18189219e-01,  -9.40257152e-04,
            -3.46630910e-04,  -2.07414715e-02,  -1.62077632e-03,
            -1.04913757e-03,  -4.33370579e-04,  -8.42423701e-04,
            -5.72261321e-04,  -1.58375811e-02,  -9.93244730e-04,
            -1.62077632e-03,  -1.03659408e-02,  -4.66208406e-03,
            -3.41454749e-03,  -4.58198217e-04,  -3.99257703e-03,
            -8.42423701e-04,  -4.90775251e-03,  -6.04877746e-04,
            -2.77048947e-04,  -6.50004229e-02,  -4.58198217e-04,
            -1.17025566e-03,  -1.23580799e-03,   1.46831486e-01,
            -3.27769165e-04,  -1.17025566e-03,  -4.66208406e-03,
            -1.71062283e-03,  -1.53549840e-03,  -1.23580799e-03,
            -9.93244730e-04,  -3.27769165e-04,  -1.23580799e-03,
            -3.41454749e-03,  -1.10808799e-03,  -8.42423701e-04,
            -8.90029618e-04,  -4.42798906e-03,  -6.75670103e-04,
            -6.75670103e-04,  -8.90029618e-04,  -4.84422741e-04,
            -1.58375811e-02,  -7.35405096e-03,  -3.87619325e-04,
            -2.62379729e-03,  -1.10808799e-03,  -1.10808799e-03,
            -7.54555329e-04,  -8.90029618e-04,  -3.99257703e-03,
            -3.99257703e-03,  -1.17025566e-03,  -7.54555329e-04,
            -6.10129090e-02,  -1.04913757e-03,  -1.31530576e-02,
            -2.00993083e-03,  -2.00993083e-03,  -1.53549840e-03,
            -5.16550074e-03,  -1.14104800e-02,  -2.76645832e-03,
            -1.17025566e-03,  -3.87619325e-04,  -4.09867263e-04,
            1.48037813e-01,   1.06365931e-01,  -6.39314594e-04,
            -4.42798906e-03,  -4.09867263e-04,  -7.97307494e-04,
            -2.62379729e-03,  -4.58198217e-04,  -3.09919962e-04,
            -6.32800839e-03,  -1.14104800e-02,  -2.35929680e-03,
            -3.27769165e-04,  -2.21289369e-04,  -7.97307494e-04,
            -1.73489362e-02,  -5.71934606e-03,  -7.97307494e-04,
            -9.40802551e-03,  -1.90495384e-03,  -5.16550074e-03,
            -5.43585191e-03,  -1.51253748e-02,  -4.33370579e-04,
            -3.46630910e-04,  -1.86893696e-04,  -2.12046213e-03,
            -1.25867293e-04,   5.07657192e-02,  -3.87619325e-04,
            -1.66959104e-04,  -1.25477263e-02,  -9.93244730e-04,
            -2.93030065e-04,  -2.00993083e-03,  -2.76645832e-03,
            -8.95970087e-03,  -6.39314594e-04,  -1.53549840e-03,
            -6.75670103e-04,  -1.23580799e-03,  -3.70792339e-02,
            1.01184411e-01,  -3.87619325e-04,   1.46321062e-01,
            -3.41454749e-03,  -4.09867263e-04,  -2.68442736e-02,
            3.68583645e-02,  -2.68442736e-02,  -2.07414715e-02,
            -1.86893696e-04,  -1.04913757e-03,  -7.35405096e-03,
            -7.14047519e-04,  -1.73489362e-02,   1.43973473e-01,
            -9.40802551e-03,  -1.71062283e-03,  -1.43894386e-01,
            -4.20497779e-03,  -1.71062283e-03,  -1.17025566e-03,
            -5.71934606e-03,  -5.16550074e-03,  -1.98451739e-02,
            2.18574168e-02,   7.44566288e-02,  -6.75670103e-04,
            -1.06135519e-01,  -6.99614755e-03,  -1.04913757e-03,
            -7.54555329e-04,  -1.17025566e-03,  -3.46630910e-04,
            6.98449121e-02,  -2.00993083e-03,  -4.90775251e-03,
            -9.40257152e-04,  -2.00993083e-03,  -6.32800839e-03,
            1.48072729e-01,  -5.12120512e-04,  -6.75670103e-04,
            -1.23580799e-03,  -1.89814939e-02,  -2.62379729e-03,
            -2.62379729e-03,   1.16328328e-01,  -3.16494123e-02,
            -3.46630910e-04,  -2.34090923e-04,  -2.47623705e-04,
            -4.09867263e-04,  -5.72261321e-04,  -6.75670103e-04,
            -1.14104800e-02,  -3.07429001e-03,  -2.48818484e-03,
            -8.53083698e-03,   2.92419496e-02,  -2.62379729e-03,
            -2.47623705e-04,  -5.16550074e-03,  -3.41454749e-03,
            -2.07414715e-02,  -5.71934606e-03,  -5.12120512e-04,
            -1.62077632e-03,  -2.23682205e-03,  -7.97307494e-04,
            -2.00993083e-03,  -1.23580799e-03,  -1.62077632e-03,
            -1.58375811e-02,  -8.95970087e-03,  -3.79032977e-03,
            1.48072729e-01,  -8.95970087e-03,  -1.24186489e-01,
            -5.71934606e-03,  -3.41454749e-03,  -3.41454749e-03,
            -2.76645832e-03,  -3.07429001e-03,  -4.42798906e-03,
            -1.25477263e-02,  -2.91702648e-02,  -7.14047519e-04,
            -1.45456868e-03,  -6.75670103e-04,   3.02653681e-02,
            -1.62077632e-03,  -9.40257152e-04,  -5.71934606e-03,
            -3.66561274e-04,  -3.87619325e-04,  -1.86893696e-04,
            -5.71934606e-03,   5.07657192e-02,  -3.41454749e-03,
            -4.33370579e-04,  -4.42798906e-03,  -2.12046213e-03,
            -1.90495384e-03,   6.11546973e-02,  -1.53549840e-03,
            -3.59779501e-03,  -2.76645832e-03,  -2.35929680e-03,
            -1.14513988e-01,  -8.53083698e-03,  -2.00993083e-03,
            -4.66208406e-03,  -3.66561274e-04,  -1.31530576e-02,
            -1.31530576e-02,  -2.76645832e-03,  -3.09919962e-04,
            -9.40257152e-04,  -4.42798906e-03,  -5.72261321e-04,
            -1.51253748e-02,  -3.16494123e-02,  -1.04913757e-03,
            -1.18023417e-01])
        np.testing.assert_allclose(results.resid_pearson, 
            [-0.21006498, -0.20410641, -0.17423009, -0.27216147, -0.3234511 ,
            -0.29246179, -0.22250903, -0.60917574, -0.28416602,  0.3421141 ,
            -0.81229277,  1.42158361, -0.25694055, -0.21933056, -0.142444  ,
            -0.23569027, -0.14660243, -0.18722578, -0.16448609, -0.2323235 ,
            -0.64526275,  3.57006696, -0.18722578, -0.42513819, -0.25327023,
            -0.12879668, -0.14450826, -0.12514332, -0.5200069 , -0.21933056,
            -0.14660243, -0.23910582, -0.17931646, -0.16448609, -0.12335569,
            -0.64526275,  1.97919183, -0.28010679, -0.36290807,  1.71396874,
            -1.3440334 , -0.16448609, -0.14872695, -0.27610555, -0.24608613,
            -0.69339243, -0.1083734 , -0.12879668, -0.63604537, -0.16448609,
            -0.45684893, -0.16448609, -0.13447767, -0.16686977,  2.3862634 ,
            1.66535145, -0.20706426, -0.26066405, -0.27610555, -0.29246179,
            3.18191348, -0.19548397, -0.13840353, -0.12514332, -0.19548397,
            -0.17675498, -0.16448609, -0.17675498, -0.28416602, -0.11153719,
            1.81550268, -0.34261205, -0.25694055, -0.32813846, -0.11985666,
            -0.13840353, -0.27216147, -0.17174127, -0.28416602, -0.44389026,
            -0.32813846, -0.14450826,  2.18890738, -0.17931646, -0.13840353,
            -0.43129917, -0.20706426, -0.18455132, -0.14660243, -0.17423009,
            -0.1575374 , -0.39562855, -0.18191506, -0.20706426, -0.34757708,
            -0.27610555, -0.25327023, -0.14872695, -0.26444152, -0.17423009,
            -0.28010679, -0.15982038, -0.13066317, -0.66410018, -0.14872695,
            -0.189939  , -0.19269154,  1.30401147, -0.13642648, -0.189939  ,
            -0.27610555, -0.21006498, -0.20410641, -0.19269154, -0.18191506,
            -0.13642648, -0.19269154, -0.25327023, -0.18722578, -0.17423009,
            -0.17675498, -0.27216147, -0.16448609, -0.16448609, -0.17675498,
            -0.15088226, -0.39562855, -0.3142763 , -0.142444  , -0.23569027,
            -0.18722578, -0.18722578, -0.169288  , -0.17675498, -0.26444152,
            -0.26444152, -0.189939  , -0.169288  , -0.64526275, -0.18455132,
            -0.3735026 , -0.21933056, -0.21933056, -0.20410641, -0.28416602,
            -0.35772404, -0.23910582, -0.189939  , -0.142444  , -0.14450826,
            1.38125991,  2.42084442, -0.16213645, -0.27216147, -0.14450826,
            -0.17174127, -0.23569027, -0.14872695, -0.13447767, -0.30099975,
            -0.35772404, -0.22900483, -0.13642648, -0.12335569, -0.17174127,
            -0.4071783 , -0.29246179, -0.17174127, -0.33771794, -0.21619749,
            -0.28416602, -0.28828407, -0.38997712, -0.14660243, -0.13840353,
            -0.11814455, -0.22250903, -0.10682532,  4.06361781, -0.142444  ,
            -0.11479334, -0.36816723, -0.18191506, -0.1325567 , -0.21933056,
            -0.23910582, -0.33289374, -0.16213645, -0.20410641, -0.16448609,
            -0.19269154, -0.52754269,  2.52762346, -0.142444  ,  1.28538406,
            -0.25327023, -0.14450826, -0.47018591,  4.89940505, -0.47018591,
            -0.43129917, -0.11814455, -0.18455132, -0.3142763 , -0.16686977,
            -0.4071783 ,  1.64156241, -0.33771794, -0.21006498, -1.6439517 ,
            -0.26827373, -0.21006498, -0.189939  , -0.29246179, -0.28416602,
            -0.42513819,  6.53301013,  3.18191348, -0.16448609, -0.87288109,
            -0.30978696, -0.18455132, -0.169288  , -0.189939  , -0.13840353,
             3.32226189, -0.21933056, -0.28010679, -0.17931646, -0.21933056,
            -0.30099975,  1.44218477, -0.1530688 , -0.16448609, -0.19269154,
            -0.41906522, -0.23569027, -0.23569027,  0.93662539, -0.4980393 ,
            -0.13840353, -0.12514332, -0.12695686, -0.14450826, -0.1575374 ,
            -0.16448609, -0.35772404, -0.24608613, -0.2323235 , -0.32813846,
            5.57673284, -0.23569027, -0.12695686, -0.28416602, -0.25327023,
            -0.43129917, -0.29246179, -0.1530688 , -0.20706426, -0.22573357,
            -0.17174127, -0.21933056, -0.19269154, -0.20706426, -0.39562855,
            -0.33289374, -0.26066405,  1.44218477, -0.33289374, -0.99355423,
            -0.29246179, -0.25327023, -0.25327023, -0.23910582, -0.24608613,
            -0.27216147, -0.36816723, -0.48391225, -0.16686977, -0.20119082,
            -0.16448609,  0.49021146, -0.20706426, -0.17931646, -0.29246179,
            -0.14040923, -0.142444  , -0.11814455, -0.29246179,  4.06361781,
            -0.25327023, -0.14660243, -0.27216147, -0.22250903, -0.21619749,
            3.6218033 , -0.20410641, -0.25694055, -0.23910582, -0.22900483,
            -0.92458976, -0.32813846, -0.21933056, -0.27610555, -0.14040923,
            -0.3735026 , -0.3735026 , -0.23910582, -0.13447767, -0.17931646,
            -0.27216147, -0.1575374 , -0.38997712, -0.4980393 , -0.18455132,
            -2.19209332])
        np.testing.assert_allclose(results.resid_anscombe, 
            [-0.31237627, -0.3036605 , -0.25978208, -0.40240831, -0.47552289,
            -0.43149255, -0.33053793, -0.85617194, -0.41962951,  0.50181328,
            -1.0954382 ,  1.66940149, -0.38048321, -0.3259044 , -0.21280762,
            -0.34971301, -0.21896842, -0.27890356, -0.2454118 , -0.34482158,
            -0.90063409,  2.80452413, -0.27890356, -0.61652596, -0.37518169,
            -0.19255932, -0.2158664 , -0.18713159, -0.74270558, -0.3259044 ,
            -0.21896842, -0.35467084, -0.2672722 , -0.2454118 , -0.18447466,
            -0.90063409,  2.05763941, -0.41381347, -0.53089521,  1.88552083,
            -1.60654218, -0.2454118 , -0.22211425, -0.40807333, -0.3647888 ,
            -0.95861559, -0.16218047, -0.19255932, -0.88935802, -0.2454118 ,
            -0.65930821, -0.2454118 , -0.20099345, -0.24892975,  2.28774016,
            1.85167195, -0.30798858, -0.38585584, -0.40807333, -0.43149255,
            2.65398426, -0.2910267 , -0.20681747, -0.18713159, -0.2910267 ,
            -0.26350118, -0.2454118 , -0.26350118, -0.41962951, -0.16689207,
             1.95381191, -0.50251231, -0.38048321, -0.48214234, -0.17927213,
            -0.20681747, -0.40240831, -0.25611424, -0.41962951, -0.64189694,
            -0.48214234, -0.2158664 ,  2.18071204, -0.2672722 , -0.20681747,
            -0.62488429, -0.30798858, -0.27497271, -0.21896842, -0.25978208,
            -0.23514749, -0.57618899, -0.27109582, -0.30798858, -0.50947546,
            -0.40807333, -0.37518169, -0.22211425, -0.39130036, -0.25978208,
            -0.41381347, -0.2385213 , -0.19533116, -0.92350689, -0.22211425,
            -0.28288904, -0.28692985,  1.5730846 , -0.20388497, -0.28288904,
            -0.40807333, -0.31237627, -0.3036605 , -0.28692985, -0.27109582,
            -0.20388497, -0.28692985, -0.37518169, -0.27890356, -0.25978208,
            -0.26350118, -0.40240831, -0.2454118 , -0.2454118 , -0.26350118,
            -0.22530448, -0.57618899, -0.46253505, -0.21280762, -0.34971301,
            -0.27890356, -0.27890356, -0.25249702, -0.26350118, -0.39130036,
            -0.39130036, -0.28288904, -0.25249702, -0.90063409, -0.27497271,
            -0.5456246 , -0.3259044 , -0.3259044 , -0.3036605 , -0.41962951,
            -0.52366614, -0.35467084, -0.28288904, -0.21280762, -0.2158664 ,
            1.63703418,  2.30570989, -0.24194253, -0.40240831, -0.2158664 ,
            -0.25611424, -0.34971301, -0.22211425, -0.20099345, -0.44366892,
            -0.52366614, -0.33999576, -0.20388497, -0.18447466, -0.25611424,
            -0.59203547, -0.43149255, -0.25611424, -0.49563627, -0.32133344,
            -0.41962951, -0.42552227, -0.56840788, -0.21896842, -0.20681747,
            -0.17672552, -0.33053793, -0.15987433,  2.9768074 , -0.21280762,
            -0.17173916, -0.53821445, -0.27109582, -0.19814236, -0.3259044 ,
            -0.35467084, -0.48884654, -0.24194253, -0.3036605 , -0.2454118 ,
            -0.28692985, -0.75249089,  2.35983933, -0.21280762,  1.55726719,
            -0.37518169, -0.2158664 , -0.67712261,  3.23165236, -0.67712261,
            -0.62488429, -0.17672552, -0.27497271, -0.46253505, -0.24892975,
            -0.59203547,  1.83482464, -0.49563627, -0.31237627, -1.83652534,
            -0.39681759, -0.31237627, -0.28288904, -0.43149255, -0.41962951,
            -0.61652596,  3.63983609,  2.65398426, -0.2454118 , -1.16171662,
            -0.45616505, -0.27497271, -0.25249702, -0.28288904, -0.20681747,
            2.71015945, -0.3259044 , -0.41381347, -0.2672722 , -0.3259044 ,
            -0.44366892,  1.68567947, -0.22853969, -0.2454118 , -0.28692985,
            -0.60826548, -0.34971301, -0.34971301,  1.2290223 , -0.71397735,
            -0.20681747, -0.18713159, -0.1898263 , -0.2158664 , -0.23514749,
            -0.2454118 , -0.52366614, -0.3647888 , -0.34482158, -0.48214234,
            3.41271513, -0.34971301, -0.1898263 , -0.41962951, -0.37518169,
            -0.62488429, -0.43149255, -0.22853969, -0.30798858, -0.3352348 ,
            -0.25611424, -0.3259044 , -0.28692985, -0.30798858, -0.57618899,
            -0.48884654, -0.38585584,  1.68567947, -0.48884654, -1.28709718,
            -0.43149255, -0.37518169, -0.37518169, -0.35467084, -0.3647888 ,
            -0.40240831, -0.53821445, -0.69534436, -0.24892975, -0.29939131,
            -0.2454118 ,  0.70366797, -0.30798858, -0.2672722 , -0.43149255,
            -0.2097915 , -0.21280762, -0.17672552, -0.43149255,  2.9768074 ,
            -0.37518169, -0.21896842, -0.40240831, -0.33053793, -0.32133344,
            2.82351017, -0.3036605 , -0.38048321, -0.35467084, -0.33999576,
            -1.21650102, -0.48214234, -0.3259044 , -0.40807333, -0.2097915 ,
            -0.5456246 , -0.5456246 , -0.35467084, -0.20099345, -0.2672722 ,
            -0.40240831, -0.23514749, -0.56840788, -0.71397735, -0.27497271,
            -2.18250381])
        np.testing.assert_allclose(results.resid_deviance, 
            [-0.29387552, -0.2857098 , -0.24455876, -0.37803944, -0.44609851,
            -0.40514674, -0.31088148, -0.79449324, -0.39409528,  0.47049798,
            -1.00668653,  1.48698001, -0.35757692, -0.30654405, -0.20043547,
            -0.32882173, -0.20622595, -0.26249995, -0.23106769, -0.32424676,
            -0.83437766,  2.28941155, -0.26249995, -0.57644334, -0.35262564,
            -0.18139734, -0.20331052, -0.17629229, -0.69186337, -0.30654405,
            -0.20622595, -0.33345774, -0.251588  , -0.23106769, -0.17379306,
            -0.83437766,  1.78479093, -0.38867448, -0.4974393 ,  1.65565332,
            -1.43660134, -0.23106769, -0.20918228, -0.38332275, -0.34291558,
            -0.88609006, -0.15281596, -0.18139734, -0.82428104, -0.23106769,
            -0.61571821, -0.23106769, -0.18932865, -0.234371  ,  1.94999969,
            1.62970871, -0.2897651 , -0.36259328, -0.38332275, -0.40514674,
            2.19506559, -0.27386827, -0.19480442, -0.17629229, -0.27386827,
            -0.24804925, -0.23106769, -0.24804925, -0.39409528, -0.15725009,
            1.7074519 , -0.47114617, -0.35757692, -0.4522457 , -0.16889886,
            -0.19480442, -0.37803944, -0.24111595, -0.39409528, -0.59975102,
            -0.4522457 , -0.20331052,  1.87422489, -0.251588  , -0.19480442,
            -0.5841272 , -0.2897651 , -0.25881274, -0.20622595, -0.24455876,
            -0.22142749, -0.53929061, -0.25517563, -0.2897651 , -0.47760126,
            -0.38332275, -0.35262564, -0.20918228, -0.36767536, -0.24455876,
            -0.38867448, -0.2245965 , -0.18400413, -0.85481866, -0.20918228,
            -0.26623785, -0.27002708,  1.40955093, -0.19204738, -0.26623785,
            -0.38332275, -0.29387552, -0.2857098 , -0.27002708, -0.25517563,
            -0.19204738, -0.27002708, -0.35262564, -0.26249995, -0.24455876,
            -0.24804925, -0.37803944, -0.23106769, -0.23106769, -0.24804925,
            -0.21218006, -0.53929061, -0.43402996, -0.20043547, -0.32882173,
            -0.26249995, -0.26249995, -0.23772023, -0.24804925, -0.36767536,
            -0.36767536, -0.26623785, -0.23772023, -0.83437766, -0.25881274,
            -0.51106408, -0.30654405, -0.30654405, -0.2857098 , -0.39409528,
            -0.49074728, -0.33345774, -0.26623785, -0.20043547, -0.20331052,
            1.46111186,  1.96253843, -0.22780971, -0.37803944, -0.20331052,
            -0.24111595, -0.32882173, -0.20918228, -0.18932865, -0.41648237,
            -0.49074728, -0.31973217, -0.19204738, -0.17379306, -0.24111595,
            -0.55389988, -0.40514674, -0.24111595, -0.46476893, -0.30226435,
            -0.39409528, -0.39958581, -0.53211065, -0.20622595, -0.19480442,
            -0.16650295, -0.31088148, -0.15064545,  2.39288231, -0.20043547,
            -0.16181126, -0.5042114 , -0.25517563, -0.18664773, -0.30654405,
            -0.33345774, -0.45846897, -0.22780971, -0.2857098 , -0.23106769,
            -0.27002708, -0.7007597 ,  1.99998811, -0.20043547,  1.39670618,
            -0.35262564, -0.20331052, -0.63203077,  2.53733821, -0.63203077,
            -0.5841272 , -0.16650295, -0.25881274, -0.43402996, -0.234371  ,
            -0.55389988,  1.61672923, -0.46476893, -0.29387552, -1.61804148,
            -0.37282386, -0.29387552, -0.26623785, -0.40514674, -0.39409528,
            -0.57644334,  2.74841605,  2.19506559, -0.23106769, -1.06433539,
            -0.42810736, -0.25881274, -0.23772023, -0.26623785, -0.19480442,
            2.23070414, -0.30654405, -0.38867448, -0.251588  , -0.30654405,
            -0.41648237,  1.49993075, -0.21521982, -0.23106769, -0.27002708,
            -0.5688444 , -0.32882173, -0.32882173,  1.12233423, -0.66569789,
            -0.19480442, -0.17629229, -0.17882689, -0.20331052, -0.22142749,
            -0.23106769, -0.49074728, -0.34291558, -0.32424676, -0.4522457 ,
            2.63395309, -0.32882173, -0.17882689, -0.39409528, -0.35262564,
            -0.5841272 , -0.40514674, -0.21521982, -0.2897651 , -0.3152773 ,
            -0.24111595, -0.30654405, -0.27002708, -0.2897651 , -0.53929061,
            -0.45846897, -0.36259328,  1.49993075, -0.45846897, -1.17192274,
            -0.40514674, -0.35262564, -0.35262564, -0.33345774, -0.34291558,
            -0.37803944, -0.5042114 , -0.64869028, -0.234371  , -0.28170899,
            -0.23106769,  0.65629132, -0.2897651 , -0.251588  , -0.40514674,
            -0.19760028, -0.20043547, -0.16650295, -0.40514674,  2.39288231,
            -0.35262564, -0.20622595, -0.37803944, -0.31088148, -0.30226435,
            2.30104857, -0.2857098 , -0.35757692, -0.33345774, -0.31973217,
            -1.11158678, -0.4522457 , -0.30654405, -0.38332275, -0.19760028,
            -0.51106408, -0.51106408, -0.33345774, -0.18932865, -0.251588  ,
            -0.37803944, -0.22142749, -0.53211065, -0.66569789, -0.25881274,
            -1.87550882])
        np.testing.assert_allclose(results.null, 
            [ 0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759,  0.08860759,  0.08860759,  0.08860759,  0.08860759,
            0.08860759])
        self.assertAlmostEqual(results.D2, .200712816165)
        self.assertAlmostEqual(results.adj_D2, 0.19816731557930456)
 if __name__ == '__main__':
 	unittest.main()
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/glm/utils.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/glm/utils.py
@ -0,0 +1,350 @@
 from __future__ import absolute_import, print_function
 import numpy as np
 import warnings
 def _bit_length_26(x):
    if x == 0:
        return 0
    elif x == 1:
        return 1
    else:
        return len(bin(x)) - 2
 try:
    from scipy.lib._version import NumpyVersion
 except ImportError:
    import re
    string_types = basestring 
    class NumpyVersion():
        """Parse and compare numpy version strings.
        Numpy has the following versioning scheme (numbers given are examples; they
        can be >9) in principle):
        - Released version: '1.8.0', '1.8.1', etc.
        - Alpha: '1.8.0a1', '1.8.0a2', etc.
        - Beta: '1.8.0b1', '1.8.0b2', etc.
        - Release candidates: '1.8.0rc1', '1.8.0rc2', etc.
        - Development versions: '1.8.0.dev-f1234afa' (git commit hash appended)
        - Development versions after a1: '1.8.0a1.dev-f1234afa',
                                        '1.8.0b2.dev-f1234afa',
                                        '1.8.1rc1.dev-f1234afa', etc.
        - Development versions (no git hash available): '1.8.0.dev-Unknown'
        Comparing needs to be done against a valid version string or other
        `NumpyVersion` instance.
        Parameters
        ----------
        vstring : str
            Numpy version string (``np.__version__``).
        Notes
        -----
        All dev versions of the same (pre-)release compare equal.
        Examples
        --------
        >>> from scipy.lib._version import NumpyVersion
        >>> if NumpyVersion(np.__version__) < '1.7.0':
        ...     print('skip')
        skip
        >>> NumpyVersion('1.7')  # raises ValueError, add ".0"
        """
        def __init__(self, vstring):
            self.vstring = vstring
            ver_main = re.match(r'\d[.]\d+[.]\d+', vstring)
            if not ver_main:
                raise ValueError("Not a valid numpy version string")
            self.version = ver_main.group()
            self.major, self.minor, self.bugfix = [int(x) for x in
                                                   self.version.split('.')]
            if len(vstring) == ver_main.end():
                self.pre_release = 'final'
            else:
                alpha = re.match(r'a\d', vstring[ver_main.end():])
                beta = re.match(r'b\d', vstring[ver_main.end():])
                rc = re.match(r'rc\d', vstring[ver_main.end():])
                pre_rel = [m for m in [alpha, beta, rc] if m is not None]
                if pre_rel:
                    self.pre_release = pre_rel[0].group()
                else:
                    self.pre_release = ''
            self.is_devversion = bool(re.search(r'.dev-', vstring))
        def _compare_version(self, other):
            """Compare major.minor.bugfix"""
            if self.major == other.major:
                if self.minor == other.minor:
                    if self.bugfix == other.bugfix:
                        vercmp = 0
                    elif self.bugfix > other.bugfix:
                        vercmp = 1
                    else:
                        vercmp = -1
                elif self.minor > other.minor:
                    vercmp = 1
                else:
                    vercmp = -1
            elif self.major > other.major:
                vercmp = 1
            else:
                vercmp = -1
            return vercmp
        def _compare_pre_release(self, other):
            """Compare alpha/beta/rc/final."""
            if self.pre_release == other.pre_release:
                vercmp = 0
            elif self.pre_release == 'final':
                vercmp = 1
            elif other.pre_release == 'final':
                vercmp = -1
            elif self.pre_release > other.pre_release:
                vercmp = 1
            else:
                vercmp = -1
            return vercmp
        def _compare(self, other):
            if not isinstance(other, (string_types, NumpyVersion)):
                raise ValueError("Invalid object to compare with NumpyVersion.")
            if isinstance(other, string_types):
                other = NumpyVersion(other)
            vercmp = self._compare_version(other)
            if vercmp == 0:
                # Same x.y.z version, check for alpha/beta/rc
                vercmp = self._compare_pre_release(other)
                if vercmp == 0:
                    # Same version and same pre-release, check if dev version
                    if self.is_devversion is other.is_devversion:
                        vercmp = 0
                    elif self.is_devversion:
                        vercmp = -1
                    else:
                        vercmp = 1
            return vercmp
        def __lt__(self, other):
            return self._compare(other) < 0
        def __le__(self, other):
            return self._compare(other) <= 0
        def __eq__(self, other):
            return self._compare(other) == 0
        def __ne__(self, other):
            return self._compare(other) != 0
        def __gt__(self, other):
            return self._compare(other) > 0
        def __ge__(self, other):
            return self._compare(other) >= 0
        def __repr(self):
            return "NumpyVersion(%s)" % self.vstring
 def _next_regular(target):
    """
    Find the next regular number greater than or equal to target.
    Regular numbers are composites of the prime factors 2, 3, and 5.
    Also known as 5-smooth numbers or Hamming numbers, these are the optimal
    size for inputs to FFTPACK.
    Target must be a positive integer.
    """
    if target <= 6:
        return target
    # Quickly check if it's already a power of 2
    if not (target & (target - 1)):
        return target
    match = float('inf')  # Anything found will be smaller
    p5 = 1
    while p5 < target:
        p35 = p5
        while p35 < target:
            # Ceiling integer division, avoiding conversion to float
            # (quotient = ceil(target / p35))
            quotient = -(-target // p35)
            # Quickly find next power of 2 >= quotient
            try:
                p2 = 2 ** ((quotient - 1).bit_length())
            except AttributeError:
                # Fallback for Python <2.7
                p2 = 2 ** _bit_length_26(quotient - 1)
            N = p2 * p35
            if N == target:
                return N
            elif N < match:
                match = N
            p35 *= 3
            if p35 == target:
                return p35
        if p35 < match:
            match = p35
        p5 *= 5
        if p5 == target:
            return p5
    if p5 < match:
        match = p5
    return match
 if NumpyVersion(np.__version__) >= '1.7.1':
    np_matrix_rank = np.linalg.matrix_rank
 else:
    def np_matrix_rank(M, tol=None):
        """
        Return matrix rank of array using SVD method
        Rank of the array is the number of SVD singular values of the array that are
        greater than `tol`.
        Parameters
        ----------
        M : {(M,), (M, N)} array_like
            array of <=2 dimensions
        tol : {None, float}, optional
        threshold below which SVD values are considered zero. If `tol` is
        None, and ``S`` is an array with singular values for `M`, and
        ``eps`` is the epsilon value for datatype of ``S``, then `tol` is
        set to ``S.max() * max(M.shape) * eps``.
        Notes
        -----
        The default threshold to detect rank deficiency is a test on the magnitude
        of the singular values of `M`.  By default, we identify singular values less
        than ``S.max() * max(M.shape) * eps`` as indicating rank deficiency (with
        the symbols defined above). This is the algorithm MATLAB uses [1].  It also
        appears in *Numerical recipes* in the discussion of SVD solutions for linear
        least squares [2].
        This default threshold is designed to detect rank deficiency accounting for
        the numerical errors of the SVD computation.  Imagine that there is a column
        in `M` that is an exact (in floating point) linear combination of other
        columns in `M`. Computing the SVD on `M` will not produce a singular value
        exactly equal to 0 in general: any difference of the smallest SVD value from
        0 will be caused by numerical imprecision in the calculation of the SVD.
        Our threshold for small SVD values takes this numerical imprecision into
        account, and the default threshold will detect such numerical rank
        deficiency.  The threshold may declare a matrix `M` rank deficient even if
        the linear combination of some columns of `M` is not exactly equal to
        another column of `M` but only numerically very close to another column of
        `M`.
        We chose our default threshold because it is in wide use.  Other thresholds
        are possible.  For example, elsewhere in the 2007 edition of *Numerical
        recipes* there is an alternative threshold of ``S.max() *
        np.finfo(M.dtype).eps / 2. * np.sqrt(m + n + 1.)``. The authors describe
        this threshold as being based on "expected roundoff error" (p 71).
        The thresholds above deal with floating point roundoff error in the
        calculation of the SVD.  However, you may have more information about the
        sources of error in `M` that would make you consider other tolerance values
        to detect *effective* rank deficiency.  The most useful measure of the
        tolerance depends on the operations you intend to use on your matrix.  For
        example, if your data come from uncertain measurements with uncertainties
        greater than floating point epsilon, choosing a tolerance near that
        uncertainty may be preferable.  The tolerance may be absolute if the
        uncertainties are absolute rather than relative.
        References
        ----------
        .. [1] MATLAB reference documention, "Rank"
            http://www.mathworks.com/help/techdoc/ref/rank.html
        .. [2] W. H. Press, S. A. Teukolsky, W. T. Vetterling and B. P. Flannery,
            "Numerical Recipes (3rd edition)", Cambridge University Press, 2007,
            page 795.
        Examples
        --------
        >>> from numpy.linalg import matrix_rank
        >>> matrix_rank(np.eye(4)) # Full rank matrix
        4
        >>> I=np.eye(4); I[-1,-1] = 0. # rank deficient matrix
        >>> matrix_rank(I)
        3
        >>> matrix_rank(np.ones((4,))) # 1 dimension - rank 1 unless all 0
        1
        >>> matrix_rank(np.zeros((4,)))
        0
        """
        M = np.asarray(M)
        if M.ndim > 2:
            raise TypeError('array should have 2 or fewer dimensions')
        if M.ndim < 2:
            return int(not all(M == 0))
        S = np.linalg.svd(M, compute_uv=False)
        if tol is None:
            tol = S.max() * max(M.shape) * np.finfo(S.dtype).eps
        return np.sum(S > tol)
 class CacheWriteWarning(UserWarning):
    pass
 class CachedAttribute(object):
    def __init__(self, func, cachename=None, resetlist=None):
        self.fget = func
        self.name = func.__name__
        self.cachename = cachename or '_cache'
        self.resetlist = resetlist or ()
    def __get__(self, obj, type=None):
        if obj is None:
            return self.fget
        # Get the cache or set a default one if needed
        _cachename = self.cachename
        _cache = getattr(obj, _cachename, None)
        if _cache is None:
            setattr(obj, _cachename, resettable_cache())
            _cache = getattr(obj, _cachename)
        # Get the name of the attribute to set and cache
        name = self.name
        _cachedval = _cache.get(name, None)
        # print("[_cachedval=%s]" % _cachedval)
        if _cachedval is None:
            # Call the "fget" function
            _cachedval = self.fget(obj)
            # Set the attribute in obj
            # print("Setting %s in cache to %s" % (name, _cachedval))
            try:
                _cache[name] = _cachedval
            except KeyError:
                setattr(_cache, name, _cachedval)
            # Update the reset list if needed (and possible)
            resetlist = self.resetlist
            if resetlist is not ():
                try:
                    _cache._resetdict[name] = self.resetlist
                except AttributeError:
                    pass
        # else:
        # print("Reading %s from cache (%s)" % (name, _cachedval))
        return _cachedval
    def __set__(self, obj, value):
        errmsg = "The attribute '%s' cannot be overwritten" % self.name
        warnings.warn(errmsg, CacheWriteWarning)
 class _cache_readonly(object):
    """
    Decorator for CachedAttribute
    """
    def __init__(self, cachename=None, resetlist=None):
        self.func = None
        self.cachename = cachename
        self.resetlist = resetlist or None
    def __call__(self, func):
        return CachedAttribute(func,
                               cachename=self.cachename,
                               resetlist=self.resetlist)
 cache_readonly = _cache_readonly()
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/glm/varfuncs.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/glm/varfuncs.py
@ -0,0 +1,284 @@
 """
 Variance functions for use with the link functions in statsmodels.family.links
 """
 __docformat__ = 'restructuredtext'
 import numpy as np
 FLOAT_EPS = np.finfo(float).eps
 class VarianceFunction(object):
    """
    Relates the variance of a random variable to its mean. Defaults to 1.
    Methods
    -------
    call
        Returns an array of ones that is the same shape as `mu`
    Notes
    -----
    After a variance function is initialized, its call method can be used.
    Alias for VarianceFunction:
    constant = VarianceFunction()
    See also
    --------
    statsmodels.family.family
    """
    def __call__(self, mu):
        """
        Default variance function
        Parameters
        -----------
        mu : array-like
            mean parameters
        Returns
        -------
        v : array
            ones(mu.shape)
        """
        mu = np.asarray(mu)
        return np.ones(mu.shape, np.float64)
    def deriv(self, mu):
        """
        Derivative of the variance function v'(mu)
        """
        from statsmodels.tools.numdiff import approx_fprime_cs
        # TODO: diag workaround proplem with numdiff for 1d
        return np.diag(approx_fprime_cs(mu, self))
 constant = VarianceFunction()
 constant.__doc__ = """
 The call method of constant returns a constant variance, i.e., a vector of ones.
 constant is an alias of VarianceFunction()
 """
 class Power(object):
    """
    Power variance function
    Parameters
    ----------
    power : float
        exponent used in power variance function
    Methods
    -------
    call
        Returns the power variance
    Formulas
    --------
    V(mu) = numpy.fabs(mu)**power
    Notes
    -----
    Aliases for Power:
    mu = Power()
    mu_squared = Power(power=2)
    mu_cubed = Power(power=3)
    """
    def __init__(self, power=1.):
        self.power = power
    def __call__(self, mu):
        """
        Power variance function
        Parameters
        ----------
        mu : array-like
            mean parameters
        Returns
        -------
        variance : array
            numpy.fabs(mu)**self.power
        """
        return np.power(np.fabs(mu), self.power)
    def deriv(self, mu):
        """
        Derivative of the variance function v'(mu)
        """
        from statsmodels.tools.numdiff import approx_fprime_cs, approx_fprime
        #return approx_fprime_cs(mu, self)  # TODO fix breaks in `fabs
        # TODO: diag is workaround problem with numdiff for 1d
        return np.diag(approx_fprime(mu, self))
 mu = Power()
 mu.__doc__ = """
 Returns np.fabs(mu)
 Notes
 -----
 This is an alias of Power()
 """
 mu_squared = Power(power=2)
 mu_squared.__doc__ = """
 Returns np.fabs(mu)**2
 Notes
 -----
 This is an alias of statsmodels.family.links.Power(power=2)
 """
 mu_cubed = Power(power=3)
 mu_cubed.__doc__ = """
 Returns np.fabs(mu)**3
 Notes
 -----
 This is an alias of statsmodels.family.links.Power(power=3)
 """
 class Binomial(object):
    """
    Binomial variance function
    Parameters
    ----------
    n : int, optional
        The number of trials for a binomial variable.  The default is 1 for
        p in (0,1)
    Methods
    -------
    call
        Returns the binomial variance
    Formulas
    --------
    V(mu) = p * (1 - p) * n
    where p = mu / n
    Notes
    -----
    Alias for Binomial:
    binary = Binomial()
    A private method _clean trims the data by machine epsilon so that p is
    in (0,1)
    """
    def __init__(self, n=1):
        self.n = n
    def _clean(self, p):
        return np.clip(p, FLOAT_EPS, 1 - FLOAT_EPS)
    def __call__(self, mu):
        """
        Binomial variance function
        Parameters
        -----------
        mu : array-like
            mean parameters
        Returns
        -------
        variance : array
           variance = mu/n * (1 - mu/n) * self.n
        """
        p = self._clean(mu / self.n)
        return p * (1 - p) * self.n
    #TODO: inherit from super
    def deriv(self, mu):
        """
        Derivative of the variance function v'(mu)
        """
        from statsmodels.tools.numdiff import approx_fprime_cs, approx_fprime
        # TODO: diag workaround proplem with numdiff for 1d
        return np.diag(approx_fprime_cs(mu, self))
 binary = Binomial()
 binary.__doc__ = """
 The binomial variance function for n = 1
 Notes
 -----
 This is an alias of Binomial(n=1)
 """
 class NegativeBinomial(object):
    '''
    Negative binomial variance function
    Parameters
    ----------
    alpha : float
        The ancillary parameter for the negative binomial variance function.
        `alpha` is assumed to be nonstochastic.  The default is 1.
    Methods
    -------
    call
        Returns the negative binomial variance
    Formulas
    --------
    V(mu) = mu + alpha*mu**2
    Notes
    -----
    Alias for NegativeBinomial:
    nbinom = NegativeBinomial()
    A private method _clean trims the data by machine epsilon so that p is
    in (0,inf)
    '''
    def __init__(self, alpha=1.):
        self.alpha = alpha
    def _clean(self, p):
        return np.clip(p, FLOAT_EPS, np.inf)
    def __call__(self, mu):
        """
        Negative binomial variance function
        Parameters
        ----------
        mu : array-like
            mean parameters
        Returns
        -------
        variance : array
            variance = mu + alpha*mu**2
        """
        p = self._clean(mu)
        return p + self.alpha*p**2
    def deriv(self, mu):
        """
        Derivative of the negative binomial variance function.
        """
        p = self._clean(mu)
        return 1 + 2 * self.alpha * p
 nbinom = NegativeBinomial()
 nbinom.__doc__ = """
 Negative Binomial variance function.
 Notes
 -----
 This is an alias of NegativeBinomial(alpha=1.)
 """
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/init.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/init.py
@ -0,0 +1 @@
 from base import *
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/init.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/init.py
@ -0,0 +1,4 @@
 import gwr
 import sel_bw
 import diagnostics
 import kernels
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/diagnostics.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/diagnostics.py
@ -0,0 +1,81 @@
 """
 Diagnostics for estimated gwr modesl
 """
 __author__ = "Taylor Oshan tayoshan@gmail.com"
 import numpy as np
 from crankshaft.regression.glm.family import Gaussian, Poisson, Binomial
 def get_AICc(gwr):
    """
    Get AICc value
    Gaussian: p61, (2.33), Fotheringham, Brunsdon and Charlton (2002)
    GWGLM: AICc=AIC+2k(k+1)/(n-k-1), Nakaya et al. (2005): p2704, (36)
    """
    n = gwr.n
    k = gwr.tr_S
    if isinstance(gwr.family, Gaussian):
        aicc = -2.0*gwr.llf + 2.0*n*(k + 1.0)/(n-k-2.0)  
    elif isinstance(gwr.family, (Poisson, Binomial)):
        aicc = get_AIC(gwr) + 2.0 * k * (k+1.0) / (n - k - 1.0) 
    return aicc
 def get_AIC(gwr):
    """
    Get AIC calue
    Gaussian: p96, (4.22), Fotheringham, Brunsdon and Charlton (2002)
    GWGLM:  AIC(G)=D(G) + 2K(G), where D and K denote the deviance and the effective
    number of parameters in the model with bandwidth G, respectively.
    """   
    k = gwr.tr_S
    #deviance = -2*log-likelihood
    y = gwr.y
    mu = gwr.mu
    if isinstance(gwr.family, Gaussian):
        aic = -2.0 * gwr.llf + 2.0 * (k+1)
    elif isinstance(gwr.family, (Poisson, Binomial)):
        aic = np.sum(gwr.family.resid_dev(y, mu)**2) + 2.0 * k
    return aic 
 def get_BIC(gwr):
    """
    Get BIC value
    Gaussian: p61 (2.34), Fotheringham, Brunsdon and Charlton (2002)
    BIC = -2log(L)+klog(n)
    GWGLM: BIC = dev + tr_S * log(n)
    """
    n = gwr.n      # (scalar) number of observations
    k = gwr.tr_S  
    y = gwr.y
    mu = gwr.mu
    if isinstance(gwr.family, Gaussian):
        bic = -2.0 * gwr.llf + (k+1) * np.log(n) 
    elif isinstance(gwr.family, (Poisson, Binomial)):
        bic = np.sum(gwr.family.resid_dev(y, mu)**2) + k * np.log(n)
    return bic
 def get_CV(gwr):
    """
    Get CV value
    Gaussian only
    Methods: p60, (2.31) or p212 (9.4)
    Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
    Geographically weighted regression: the analysis of spatially varying relationships.
    Modification: sum of residual squared is divided by n according to GWR4 results
    """
    aa = gwr.resid_response.reshape((-1,1))/(1.0-gwr.influ)
    cv = np.sum(aa**2)/gwr.n
    return cv
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/gwr.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/gwr.py
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/kernels.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/kernels.py
@ -0,0 +1,120 @@
 # GWR kernel function specifications
 __author__ = "Taylor Oshan tayoshan@gmail.com" 
 #from pysal.weights.Distance import Kernel 
 import scipy
 from scipy.spatial.kdtree import KDTree
 import numpy as np
 #adaptive specifications should be parameterized with nn-1 to match original gwr
 #implementation. That is, pysal counts self neighbors with knn automatically.
 def fix_gauss(coords, bw, points=None):
    w = _Kernel(coords, function='gwr_gaussian', bandwidth=bw,
            truncate=False, points=points)
    return w.kernel
 def adapt_gauss(coords, nn, points=None):
    w = _Kernel(coords, fixed=False, k=nn-1, function='gwr_gaussian',
            truncate=False, points=points)
    return w.kernel
 def fix_bisquare(coords, bw, points=None):
    w = _Kernel(coords, function='bisquare', bandwidth=bw, points=points)
    return w.kernel
 def adapt_bisquare(coords, nn, points=None):
    w = _Kernel(coords, fixed=False, k=nn-1, function='bisquare', points=points)
    return w.kernel
 def fix_exp(coords, bw, points=None):
    w = _Kernel(coords, function='exponential', bandwidth=bw,
            truncate=False, points=points)
    return w.kernel
 def adapt_exp(coords, nn, points=None):
    w = _Kernel(coords, fixed=False, k=nn-1, function='exponential',
            truncate=False, points=points)
    return w.kernel
 from scipy.spatial.distance import cdist
 class _Kernel(object):
    """
    """
    def __init__(self, data, bandwidth=None, fixed=True, k=None,
                 function='triangular', eps=1.0000001, ids=None, truncate=True, 
                 points=None): #Added truncate flag
        if issubclass(type(data), scipy.spatial.KDTree):
            self.data = data.data
            data = self.data
        else:
            self.data = data
        if k is not None:
            self.k = int(k) + 1
        else:
            self.k = k
        if points is None:
            self.dmat = cdist(self.data, self.data)
        else:
            self.points = points
            self.dmat = cdist(self.points, self.data)
        self.function = function.lower()
        self.fixed = fixed
        self.eps = eps
        self.trunc = truncate
        if bandwidth:
            try:
                bandwidth = np.array(bandwidth)
                bandwidth.shape = (len(bandwidth), 1)
            except:
                bandwidth = np.ones((len(data), 1), 'float') * bandwidth
            self.bandwidth = bandwidth
        else:
            self._set_bw()
        self.kernel = self._kernel_funcs(self.dmat/self.bandwidth)
        if self.trunc:
            mask = np.repeat(self.bandwidth, len(self.data), axis=1)
            self.kernel[(self.dmat >= mask)] = 0
    def _set_bw(self):
        if self.k is not None:
            dmat = np.sort(self.dmat)[:,:self.k]
        else:
            dmat = self.dmat
        if self.fixed:
            # use max knn distance as bandwidth
            bandwidth = dmat.max() * self.eps
            n = len(self.data)
            self.bandwidth = np.ones((n, 1), 'float') * bandwidth
        else:
            # use local max knn distance
            self.bandwidth = dmat.max(axis=1) * self.eps
            self.bandwidth.shape = (self.bandwidth.size, 1)
    def _kernel_funcs(self, zs):
        # functions follow Anselin and Rey (2010) table 5.4
        if self.function == 'triangular':
            return 1 - zs 
        elif self.function == 'uniform':
            return np.ones(zi.shape) * 0.5 
        elif self.function == 'quadratic':
            return (3. / 4) * (1 - zs ** 2) 
        elif self.function == 'quartic':
            return (15. / 16) * (1 - zs ** 2) ** 2 
        elif self.function == 'gaussian':
            c = np.pi * 2
            c = c ** (-0.5)
            return c * np.exp(-(zs ** 2) / 2.)
        elif self.function == 'gwr_gaussian':
            return np.exp(-0.5*(zs)**2)
        elif self.function == 'bisquare':
            return (1-(zs)**2)**2
        elif self.function =='exponential':
            return np.exp(-zs)
        else:
            print('Unsupported kernel function', self.function)
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/search.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/search.py
@ -0,0 +1,208 @@
 #Bandwidth optimization methods
 __author__ = "Taylor Oshan"
 import numpy as np
 def golden_section(a, c, delta, function, tol, max_iter, int_score=False):
    """
    Golden section search routine
    Method: p212, 9.6.4
    Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
    Geographically weighted regression: the analysis of spatially varying relationships.
    Parameters
    ----------
    a               : float
                      initial max search section value
    b               : float
                      initial min search section value
    delta           : float
                      constant used to determine width of search sections
    function        : function
                      obejective function to be evaluated at different section
                      values
    int_score       : boolean
                      False for float score, True for integer score
    tol             : float
                      tolerance used to determine convergence
    max_iter        : integer
                      maximum iterations if no convergence to tolerance
    Returns
    -------
    opt_val         : float
                      optimal value
    opt_score       : kernel
                      optimal score
    output          : list of tuples
                      searching history
    """
    b = a + delta * np.abs(c-a)
    d = c - delta * np.abs(c-a)
    score = 0.0
    diff = 1.0e9
    iters  = 0
    output = []
    while np.abs(diff) > tol and iters < max_iter:
        iters += 1
        if int_score:
        	b = np.round(b)
        	d = np.round(d)
        score_a = function(a)
        score_b = function(b)
        score_c = function(c)
        score_d = function(d)
        if score_b <= score_d:
            opt_val = b
            opt_score = score_b
            c = d
            d = b
            b = a + delta * np.abs(c-a)
            #if int_score:
                #b = np.round(b)
        else:
            opt_val = d
            opt_score = score_d
            a = b
            b = d
            d = c - delta * np.abs(c-a)
            #if int_score:
                #d = np.round(b)
        #if int_score:
        #	opt_val = np.round(opt_val)
        output.append((opt_val, opt_score))
        diff = score_b - score_d
        score = opt_score
    return np.round(opt_val, 2), opt_score, output
 def equal_interval(l_bound, u_bound, interval, function, int_score=False):
    """
    Interval search, using interval as stepsize
    Parameters
    ----------
    l_bound         : float
                      initial min search section value
    u_bound         : float
                      initial max search section value
    interval        : float
                      constant used to determine width of search sections
    function        : function
                      obejective function to be evaluated at different section
                      values
    int_score       : boolean
                      False for float score, True for integer score
    Returns
    -------
    opt_val         : float
                      optimal value
    opt_score       : kernel
                      optimal score
    output          : list of tuples
                      searching history
    """
    a = l_bound
    c = u_bound
    b = a + interval
    if int_score:
        a = np.round(a,0)
        c = np.round(c,0)
        b = np.round(b,0)
    output = []
    score_a = function(a)
    score_c = function(c)
    output.append((a,score_a))
    output.append((c,score_c))
    if score_a < score_c:
        opt_val = a
        opt_score = score_a
    else:
        opt_val = c
        opt_score = score_c
    while b < c:
        score_b = function(b)
        output.append((b,score_b))
        if score_b < opt_score:
            opt_val = b
            opt_score = score_b
        b = b + interval
    return opt_val, opt_score, output
 def flexible_bw(init, y, X, n, k, family, tol, max_iter, rss_score,
        gwr_func, bw_func, sel_func):
    if init:
        bw = sel_func(bw_func(y, X))
        print bw
        optim_model = gwr_func(y, X, bw)
        err = optim_model.resid_response.reshape((-1,1))
        est = optim_model.params
    else:
        model = GLM(y, X, family=self.family, constant=False).fit()
        err = model.resid_response.reshape((-1,1))
        est = np.repeat(model.params.T, n, axis=0)
    XB = np.multiply(est, X)
    if rss_score:
        rss = np.sum((err)**2)
    iters = 0
    scores = []
    delta = 1e6
    BWs = []
    VALs = []
    while delta > tol and iters < max_iter:
        iters += 1
        new_XB = np.zeros_like(X)
        bws = []
        vals = []
        ests = np.zeros_like(X)
        f_XB = XB.copy()
        f_err = err.copy()
        for i in range(k):
            temp_y = XB[:,i].reshape((-1,1))
            temp_y = temp_y + err
            temp_X = X[:,i].reshape((-1,1))
            bw_class = bw_func(temp_y, temp_X)
            bw = sel_func(bw_class)
            optim_model = gwr_func(temp_y, temp_X, bw)
            err = optim_model.resid_response.reshape((-1,1))
            est = optim_model.params.reshape((-1,))
            new_XB[:,i] = np.multiply(est, temp_X.reshape((-1,)))
            bws.append(bw)
            ests[:,i] = est
            vals.append(bw_class.bw[1])
        predy = np.sum(np.multiply(ests, X), axis=1).reshape((-1,1))
        num = np.sum((new_XB - XB)**2)/n
        den = np.sum(np.sum(new_XB, axis=1)**2)
        score = (num/den)**0.5
        XB = new_XB
        if rss_score:
            new_rss = np.sum((y - predy)**2)
            score = np.abs((new_rss - rss)/new_rss)
            rss = new_rss
        print score
        scores.append(score)
        delta = score
        BWs.append(bws) 
        VALs.append(vals)
    opt_bws = BWs[-1]
    return opt_bws, np.array(BWs), np.array(VALs), np.array(scores), f_XB, f_err
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/sel_bw.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/sel_bw.py
@ -0,0 +1,286 @@
 # GWR Bandwidth selection class
 #Thinking about removing the search method and just having optimization begin in
 #class __init__
 #x_glob and offset parameters dont yet do anything; former is for semiparametric
 #GWR and later is for offset variable for Poisson model
 __author__ = "Taylor Oshan Tayoshan@gmail.com"
 from kernels import *
 from search import golden_section, equal_interval, flexible_bw
 from gwr import GWR
 from crankshaft.regression.glm.family import Gaussian, Poisson, Binomial
 import pysal.spreg.user_output as USER
 from diagnostics import get_AICc, get_AIC, get_BIC, get_CV
 from scipy.spatial.distance import pdist, squareform
 from pysal.common import KDTree
 import numpy as np
 kernels = {1: fix_gauss, 2: adapt_gauss, 3: fix_bisquare, 4:
        adapt_bisquare, 5: fix_exp, 6:adapt_exp}
 getDiag = {'AICc': get_AICc,'AIC':get_AIC, 'BIC': get_BIC, 'CV': get_CV}
 class Sel_BW(object):
    """
    Select bandwidth for kernel
    Methods: p211 - p213, bandwidth selection
    Fotheringham, A. S., Brunsdon, C., & Charlton, M. (2002).
    Geographically weighted regression: the analysis of spatially varying relationships.
    Parameters
    ----------
    y              : array
                     n*1, dependent variable.
    x_glob         : array
                     n*k1, fixed independent variable.
    x_loc          : array
                     n*k2, local independent variable, including constant.
    coords         : list of tuples
                     (x,y) of points used in bandwidth selection
    family         : string
                     GWR model type: 'Gaussian', 'logistic, 'Poisson''
    offset         : array
                     n*1, offset variable for Poisson model
    kernel         : string
                     kernel function: 'gaussian', 'bisquare', 'exponetial'
    fixed          : boolean
                     True for fixed bandwidth and False for adaptive (NN)
    fb             : True for flexible (mutliple covaraite-specific) bandwidths
                     False for a traditional (same for  all covariates)
                     bandwdith; defualt is False.
    constant       : boolean
                     True to include intercept (default) in model and False to exclude
                     intercept.
    Attributes
    ----------
    y              : array
                     n*1, dependent variable.
    x_glob         : array
                     n*k1, fixed independent variable.
    x_loc          : array
                     n*k2, local independent variable, including constant.
    coords         : list of tuples
                     (x,y) of points used in bandwidth selection
    family         : string
                     GWR model type: 'Gaussian', 'logistic, 'Poisson''
    kernel         : string
                     type of kernel used and wether fixed or adaptive
    criterion      : string
                     bw selection criterion: 'AICc', 'AIC', 'BIC', 'CV'
    search         : string
                     bw search method: 'golden', 'interval'
    bw_min         : float
                     min value used in bandwidth search
    bw_max         : float
                     max value used in bandwidth search
    interval       : float
                     interval increment used in interval search
    tol            : float
                     tolerance used to determine convergence
    max_iter       : integer
                     max interations if no convergence to tol
    fb             : True for flexible (mutliple covaraite-specific) bandwidths
                     False for a traditional (same for  all covariates)
                     bandwdith; defualt is False.
    constant       : boolean
                     True to include intercept (default) in model and False to exclude
                     intercept.
    """
    def __init__(self, coords, y, x_loc, x_glob=None, family=Gaussian(),
            offset=None, kernel='bisquare', fixed=False, fb=False, constant=True):
        self.coords = coords
        self.y = y
        self.x_loc = x_loc
        if x_glob is not None:
            self.x_glob = x_glob
        else:
            self.x_glob = []
        self.family=family
        self.fixed = fixed
        self.kernel = kernel
        if offset is None:
        	self.offset = np.ones((len(y), 1))
        else:
            self.offset = offset * 1.0
        self.fb = fb
        self.constant = constant
    def search(self, search='golden_section', criterion='AICc', bw_min=0.0, 
            bw_max=0.0, interval=0.0, tol=1.0e-6, max_iter=200, init_fb=True,
            tol_fb=1.0e-5, rss_score=False, max_iter_fb=200):
        """
        Parameters
        ----------
        criterion      : string
                         bw selection criterion: 'AICc', 'AIC', 'BIC', 'CV'
        search         : string
                         bw search method: 'golden', 'interval'
        bw_min         : float
                         min value used in bandwidth search
        bw_max         : float
                         max value used in bandwidth search
        interval       : float
                         interval increment used in interval search
        tol            : float
                         tolerance used to determine convergence
        max_iter       : integer
                         max iterations if no convergence to tol
        init_fb        : True to initialize flexible bandwidth search with
                         esitmates from a traditional GWR and False to
                         initialize flexible bandwidth search with global
                         regression estimates
        tol_fb         : convergence tolerence for the flexible bandwidth
                         backfitting algorithm; a larger tolerance may stop the
                         algorith faster though it may result in a less optimal
                         model
        max_iter_fb    : max iterations if no convergence to tol for flexible
                         bandwidth backfittign algorithm
        rss_score      : True to use the residual sum of sqaures to evaluate
                         each iteration of the flexible bandwidth backfitting
                         routine and False to use a smooth function; default is
                         False
        Returns
        -------
        bw             : scalar or array
                         optimal bandwidth value or values; returns scalar for
                         fb=False and array for fb=True; ordering of bandwidths
                         matches the ordering of the covariates (columns) of the
                         designs matrix, X
        """     
        self.search = search
        self.criterion = criterion
        self.bw_min = bw_min
        self.bw_max = bw_max
        self.interval = interval
        self.tol = tol
        self.max_iter = max_iter
        self.init_fb = init_fb
        self.tol_fb = tol_fb
        self.rss_score = rss_score
        self.max_iter_fb = max_iter_fb
        if self.fixed:
            if self.kernel == 'gaussian':
                ktype = 1
            elif self.kernel == 'bisquare':
                ktype = 3
            elif self.kernel == 'exponential':
                ktype = 5
            else:
                raise TypeError('Unsupported kernel function ', self.kernel)
        else:
            if self.kernel == 'gaussian':
            	ktype = 2
            elif self.kernel == 'bisquare':
                ktype = 4
            elif self.kernel == 'exponential':
                ktype = 6
            else:
                raise TypeError('Unsupported kernel function ', self.kernel)
        function = lambda bw: getDiag[criterion](
                GWR(self.coords, self.y, self.x_loc, bw, family=self.family,
                    kernel=self.kernel, fixed=self.fixed, offset=self.offset).fit())
        if ktype % 2 == 0:
            int_score = True
        else:
            int_score = False
        self.int_score = int_score
        if self.fb:
            self._fbw()
            print self.bw[1]
            self.XB = self.bw[4]
            self.err = self.bw[5]
        else:
            self._bw()
        return self.bw[0]
    def _bw(self):
        gwr_func = lambda bw: getDiag[self.criterion](
                GWR(self.coords, self.y, self.x_loc, bw, family=self.family,
                    kernel=self.kernel, fixed=self.fixed, constant=self.constant).fit())
        if self.search == 'golden_section':
            a,c = self._init_section(self.x_glob, self.x_loc, self.coords,
                    self.constant)
            delta = 0.38197 #1 - (np.sqrt(5.0)-1.0)/2.0
            self.bw = golden_section(a, c, delta, gwr_func, self.tol, 
                    self.max_iter, self.int_score)
        elif self.search == 'interval':
            self.bw = equal_interval(self.bw_min, self.bw_max, self.interval,
                    gwr_func, self.int_score)
        else:
            raise TypeError('Unsupported computational search method ', search)
    def _fbw(self):
        y = self.y
        if self.constant:
        	X = USER.check_constant(self.x_loc)
        else:
            X = self.x_loc
        n, k = X.shape
        family = self.family
        offset = self.offset
        kernel = self.kernel
        fixed = self.fixed
        coords = self.coords
        search = self.search
        criterion = self.criterion
        bw_min = self.bw_min
        bw_max = self.bw_max
        interval = self.interval
        tol = self.tol
        max_iter = self.max_iter
        gwr_func = lambda y, X, bw: GWR(coords, y, X, bw, family=family, 
                kernel=kernel, fixed=fixed, offset=offset, constant=False).fit()
        bw_func = lambda y, X: Sel_BW(coords, y, X, x_glob=[], family=family,
                kernel=kernel, fixed=fixed, offset=offset, constant=False)
        sel_func = lambda bw_func: bw_func.search(search=search, 
                        criterion=criterion, bw_min=bw_min, bw_max=bw_max, 
                        interval=interval, tol=tol, max_iter=max_iter)
        self.bw = flexible_bw(self.init_fb, y, X, n, k, family, self.tol_fb,
               self.max_iter_fb, self.rss_score, gwr_func, bw_func, sel_func)
    def _init_section(self, x_glob, x_loc, coords, constant):
        if len(x_glob) > 0:
            n_glob = x_glob.shape[1]
        else:
            n_glob = 0
        if len(x_loc) > 0:
            n_loc = x_loc.shape[1]
        else:
            n_loc = 0
        if constant:
            n_vars = n_glob + n_loc + 1
        else:
            n_vars = n_glob + n_loc
        n = np.array(coords).shape[0]
        if self.int_score:
            a = 40 + 2 * n_vars
            c = n
        else:
            nn = 40 + 2 * n_vars
            sq_dists = squareform(pdist(coords))
            sort_dists = np.sort(sq_dists, axis=1)
            min_dists = sort_dists[:,nn-1]
            max_dists = sort_dists[:,-1]
            a = np.min(min_dists)/2.0
            c = np.max(max_dists)/2.0
        if a < self.bw_min:
            a = self.bw_min
        if c > self.bw_max and self.bw_max > 0:
            c = self.bw_max
        return a, c
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/tests/test_gwr.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/tests/test_gwr.py
@ -0,0 +1,853 @@
 """
 GWR is tested against results from GWR4
 """
 import unittest
 import pickle as pk
 from crankshaft.regression.gwr.gwr import GWR, FBGWR
 from crankshaft.regression.gwr.sel_bw import Sel_BW
 from crankshaft.regression.gwr.diagnostics import get_AICc, get_AIC, get_BIC, get_CV
 from crankshaft.regression.glm.family import Gaussian, Poisson, Binomial
 import numpy as np
 import pysal
 class TestGWRGaussian(unittest.TestCase):
    def setUp(self):
        data = pysal.open(pysal.examples.get_path('GData_utm.csv'))
        self.coords = zip(data.by_col('X'), data.by_col('Y'))
        self.y = np.array(data.by_col('PctBach')).reshape((-1,1))
        rural  = np.array(data.by_col('PctRural')).reshape((-1,1))
        pov = np.array(data.by_col('PctPov')).reshape((-1,1)) 
        black = np.array(data.by_col('PctBlack')).reshape((-1,1))
        self.X = np.hstack([rural, pov, black])
        self.BS_F = pysal.open(pysal.examples.get_path('georgia_BS_F_listwise.csv'))
        self.BS_NN = pysal.open(pysal.examples.get_path('georgia_BS_NN_listwise.csv'))
        self.GS_F = pysal.open(pysal.examples.get_path('georgia_GS_F_listwise.csv'))
        self.GS_NN = pysal.open(pysal.examples.get_path('georgia_GS_NN_listwise.csv'))
        self.FB = pk.load(open(pysal.examples.get_path('FB.p'), 'r'))
        self.XB = pk.load(open(pysal.examples.get_path('XB.p'), 'r'))
        self.err = pk.load(open(pysal.examples.get_path('err.p'), 'r'))
    def test_BS_F(self):
        est_Int = self.BS_F.by_col(' est_Intercept')
        se_Int = self.BS_F.by_col(' se_Intercept')
        t_Int = self.BS_F.by_col(' t_Intercept')
        est_rural = self.BS_F.by_col(' est_PctRural')
        se_rural = self.BS_F.by_col(' se_PctRural')
        t_rural = self.BS_F.by_col(' t_PctRural')
        est_pov = self.BS_F.by_col(' est_PctPov')
        se_pov = self.BS_F.by_col(' se_PctPov')
        t_pov = self.BS_F.by_col(' t_PctPov')
        est_black = self.BS_F.by_col(' est_PctBlack')
        se_black = self.BS_F.by_col(' se_PctBlack')
        t_black = self.BS_F.by_col(' t_PctBlack')
        yhat = self.BS_F.by_col(' yhat')
        res = np.array(self.BS_F.by_col(' residual'))
        std_res = np.array(self.BS_F.by_col(' std_residual')).reshape((-1,1))
        localR2 = np.array(self.BS_F.by_col(' localR2')).reshape((-1,1))
        inf = np.array(self.BS_F.by_col(' influence')).reshape((-1,1))
        cooksD = np.array(self.BS_F.by_col(' CooksD')).reshape((-1,1))
        model = GWR(self.coords, self.y, self.X, bw=209267.689, fixed=True)
        rslt = model.fit()
        AICc = get_AICc(rslt)
        AIC = get_AIC(rslt)
        BIC = get_BIC(rslt)
        CV = get_CV(rslt)
        self.assertAlmostEquals(np.floor(AICc), 894.0)
        self.assertAlmostEquals(np.floor(AIC), 890.0)
        self.assertAlmostEquals(np.floor(BIC), 944.0)
        self.assertAlmostEquals(np.round(CV,2), 18.25)
        np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
        np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
        np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
        np.testing.assert_allclose(est_rural, rslt.params[:,1], rtol=1e-04)
        np.testing.assert_allclose(se_rural, rslt.bse[:,1], rtol=1e-04)
        np.testing.assert_allclose(t_rural, rslt.tvalues[:,1], rtol=1e-04)
        np.testing.assert_allclose(est_pov, rslt.params[:,2], rtol=1e-04)
        np.testing.assert_allclose(se_pov, rslt.bse[:,2], rtol=1e-04)
        np.testing.assert_allclose(t_pov, rslt.tvalues[:,2], rtol=1e-04)
        np.testing.assert_allclose(est_black, rslt.params[:,3], rtol=1e-02)
        np.testing.assert_allclose(se_black, rslt.bse[:,3], rtol=1e-02)
        np.testing.assert_allclose(t_black, rslt.tvalues[:,3], rtol=1e-02)
        np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
        np.testing.assert_allclose(res, rslt.resid_response, rtol=1e-04)
        np.testing.assert_allclose(std_res, rslt.std_res, rtol=1e-04)
        np.testing.assert_allclose(localR2, rslt.localR2, rtol=1e-05)
        np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04)
        np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00)
    def test_BS_NN(self):
        est_Int = self.BS_NN.by_col(' est_Intercept')
        se_Int = self.BS_NN.by_col(' se_Intercept')
        t_Int = self.BS_NN.by_col(' t_Intercept')
        est_rural = self.BS_NN.by_col(' est_PctRural')
        se_rural = self.BS_NN.by_col(' se_PctRural')
        t_rural = self.BS_NN.by_col(' t_PctRural')
        est_pov = self.BS_NN.by_col(' est_PctPov')
        se_pov = self.BS_NN.by_col(' se_PctPov')
        t_pov = self.BS_NN.by_col(' t_PctPov')
        est_black = self.BS_NN.by_col(' est_PctBlack')
        se_black = self.BS_NN.by_col(' se_PctBlack')
        t_black = self.BS_NN.by_col(' t_PctBlack')
        yhat = self.BS_NN.by_col(' yhat')
        res = np.array(self.BS_NN.by_col(' residual'))
        std_res = np.array(self.BS_NN.by_col(' std_residual')).reshape((-1,1))
        localR2 = np.array(self.BS_NN.by_col(' localR2')).reshape((-1,1))
        inf = np.array(self.BS_NN.by_col(' influence')).reshape((-1,1))
        cooksD = np.array(self.BS_NN.by_col(' CooksD')).reshape((-1,1))
        model = GWR(self.coords, self.y, self.X, bw=90.000, fixed=False)
        rslt = model.fit()
        AICc = get_AICc(rslt)
        AIC = get_AIC(rslt)
        BIC = get_BIC(rslt)
        CV = get_CV(rslt)
        self.assertAlmostEquals(np.floor(AICc), 896.0)
        self.assertAlmostEquals(np.floor(AIC), 892.0)
        self.assertAlmostEquals(np.floor(BIC), 941.0)
        self.assertAlmostEquals(np.around(CV, 2), 19.19)
        np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
        np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
        np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
        np.testing.assert_allclose(est_rural, rslt.params[:,1], rtol=1e-04)
        np.testing.assert_allclose(se_rural, rslt.bse[:,1], rtol=1e-04)
        np.testing.assert_allclose(t_rural, rslt.tvalues[:,1], rtol=1e-04)
        np.testing.assert_allclose(est_pov, rslt.params[:,2], rtol=1e-04)
        np.testing.assert_allclose(se_pov, rslt.bse[:,2], rtol=1e-04)
        np.testing.assert_allclose(t_pov, rslt.tvalues[:,2], rtol=1e-04)
        np.testing.assert_allclose(est_black, rslt.params[:,3], rtol=1e-02)
        np.testing.assert_allclose(se_black, rslt.bse[:,3], rtol=1e-02)
        np.testing.assert_allclose(t_black, rslt.tvalues[:,3], rtol=1e-02)
        np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
        np.testing.assert_allclose(res, rslt.resid_response, rtol=1e-04)
        np.testing.assert_allclose(std_res, rslt.std_res, rtol=1e-04)
        np.testing.assert_allclose(localR2, rslt.localR2, rtol=1e-05)
        np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04)
        np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00)
    def test_GS_F(self):
        est_Int = self.GS_F.by_col(' est_Intercept')
        se_Int = self.GS_F.by_col(' se_Intercept')
        t_Int = self.GS_F.by_col(' t_Intercept')
        est_rural = self.GS_F.by_col(' est_PctRural')
        se_rural = self.GS_F.by_col(' se_PctRural')
        t_rural = self.GS_F.by_col(' t_PctRural')
        est_pov = self.GS_F.by_col(' est_PctPov')
        se_pov = self.GS_F.by_col(' se_PctPov')
        t_pov = self.GS_F.by_col(' t_PctPov')
        est_black = self.GS_F.by_col(' est_PctBlack')
        se_black = self.GS_F.by_col(' se_PctBlack')
        t_black = self.GS_F.by_col(' t_PctBlack')
        yhat = self.GS_F.by_col(' yhat')
        res = np.array(self.GS_F.by_col(' residual'))
        std_res = np.array(self.GS_F.by_col(' std_residual')).reshape((-1,1))
        localR2 = np.array(self.GS_F.by_col(' localR2')).reshape((-1,1))
        inf = np.array(self.GS_F.by_col(' influence')).reshape((-1,1))
        cooksD = np.array(self.GS_F.by_col(' CooksD')).reshape((-1,1))
        model = GWR(self.coords, self.y, self.X, bw=87308.298,
                kernel='gaussian', fixed=True)
        rslt = model.fit()
        AICc = get_AICc(rslt)
        AIC = get_AIC(rslt)
        BIC = get_BIC(rslt)
        CV = get_CV(rslt)
        self.assertAlmostEquals(np.floor(AICc), 895.0)
        self.assertAlmostEquals(np.floor(AIC), 890.0)
        self.assertAlmostEquals(np.floor(BIC), 943.0)
        self.assertAlmostEquals(np.around(CV, 2), 18.21)
        np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
        np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
        np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
        np.testing.assert_allclose(est_rural, rslt.params[:,1], rtol=1e-04)
        np.testing.assert_allclose(se_rural, rslt.bse[:,1], rtol=1e-04)
        np.testing.assert_allclose(t_rural, rslt.tvalues[:,1], rtol=1e-04)
        np.testing.assert_allclose(est_pov, rslt.params[:,2], rtol=1e-04)
        np.testing.assert_allclose(se_pov, rslt.bse[:,2], rtol=1e-04)
        np.testing.assert_allclose(t_pov, rslt.tvalues[:,2], rtol=1e-04)
        np.testing.assert_allclose(est_black, rslt.params[:,3], rtol=1e-02)
        np.testing.assert_allclose(se_black, rslt.bse[:,3], rtol=1e-02)
        np.testing.assert_allclose(t_black, rslt.tvalues[:,3], rtol=1e-02)
        np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
        np.testing.assert_allclose(res, rslt.resid_response, rtol=1e-04)
        np.testing.assert_allclose(std_res, rslt.std_res, rtol=1e-04)
        np.testing.assert_allclose(localR2, rslt.localR2, rtol=1e-05)
        np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04)
        np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00)
    def test_GS_NN(self):
        est_Int = self.GS_NN.by_col(' est_Intercept')
        se_Int = self.GS_NN.by_col(' se_Intercept')
        t_Int = self.GS_NN.by_col(' t_Intercept')
        est_rural = self.GS_NN.by_col(' est_PctRural')
        se_rural = self.GS_NN.by_col(' se_PctRural')
        t_rural = self.GS_NN.by_col(' t_PctRural')
        est_pov = self.GS_NN.by_col(' est_PctPov')
        se_pov = self.GS_NN.by_col(' se_PctPov')
        t_pov = self.GS_NN.by_col(' t_PctPov')
        est_black = self.GS_NN.by_col(' est_PctBlack')
        se_black = self.GS_NN.by_col(' se_PctBlack')
        t_black = self.GS_NN.by_col(' t_PctBlack')
        yhat = self.GS_NN.by_col(' yhat')
        res = np.array(self.GS_NN.by_col(' residual'))
        std_res = np.array(self.GS_NN.by_col(' std_residual')).reshape((-1,1))
        localR2 = np.array(self.GS_NN.by_col(' localR2')).reshape((-1,1))
        inf = np.array(self.GS_NN.by_col(' influence')).reshape((-1,1))
        cooksD = np.array(self.GS_NN.by_col(' CooksD')).reshape((-1,1))
        model = GWR(self.coords, self.y, self.X, bw=49.000,
                kernel='gaussian', fixed=False)
        rslt = model.fit()
        AICc = get_AICc(rslt)
        AIC = get_AIC(rslt)
        BIC = get_BIC(rslt)
        CV = get_CV(rslt)
        self.assertAlmostEquals(np.floor(AICc),  896)
        self.assertAlmostEquals(np.floor(AIC), 894.0)
        self.assertAlmostEquals(np.floor(BIC), 922.0)
        self.assertAlmostEquals(np.around(CV, 2), 17.91)
        np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
        np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-04)
        np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-04)
        np.testing.assert_allclose(est_rural, rslt.params[:,1], rtol=1e-04)
        np.testing.assert_allclose(se_rural, rslt.bse[:,1], rtol=1e-04)
        np.testing.assert_allclose(t_rural, rslt.tvalues[:,1], rtol=1e-04)
        np.testing.assert_allclose(est_pov, rslt.params[:,2], rtol=1e-04)
        np.testing.assert_allclose(se_pov, rslt.bse[:,2], rtol=1e-04)
        np.testing.assert_allclose(t_pov, rslt.tvalues[:,2], rtol=1e-04)
        np.testing.assert_allclose(est_black, rslt.params[:,3], rtol=1e-02)
        np.testing.assert_allclose(se_black, rslt.bse[:,3], rtol=1e-02)
        np.testing.assert_allclose(t_black, rslt.tvalues[:,3], rtol=1e-02)
        np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
        np.testing.assert_allclose(res, rslt.resid_response, rtol=1e-04)
        np.testing.assert_allclose(std_res, rslt.std_res, rtol=1e-04)
        np.testing.assert_allclose(localR2, rslt.localR2, rtol=1e-05)
        np.testing.assert_allclose(inf, rslt.influ, rtol=1e-04)
        np.testing.assert_allclose(cooksD, rslt.cooksD, rtol=1e-00)
    def test_FBGWR(self):
        model = FBGWR(self.coords, self.y, self.X, [157.0, 65.0, 52.0],
                XB=self.XB, err=self.err, constant=False)
        rslt = model.fit()
        np.testing.assert_allclose(rslt.predy, self.FB['predy'], atol=1e-07)
        np.testing.assert_allclose(rslt.params, self.FB['params'], atol=1e-07)
        np.testing.assert_allclose(rslt.resid_response, self.FB['u'], atol=1e-05)
        np.testing.assert_almost_equal(rslt.resid_ss, 6339.3497144025841)
    def test_Prediction(self):
        coords =np.array(self.coords)
        index = np.arange(len(self.y))
        #train = index[0:-10]
        test = index[-10:]
        #y_train = self.y[train]
        #X_train = self.X[train]
        #coords_train = list(coords[train])
        #y_test = self.y[test]
        X_test = self.X[test]
        coords_test = list(coords[test])
        model = GWR(self.coords, self.y, self.X, 93, family=Gaussian(),
                fixed=False, kernel='bisquare')
        results = model.predict(coords_test, X_test)
        params = np.array([22.77198, -0.10254,    -0.215093,   -0.01405,
            19.10531,    -0.094177,   -0.232529,   0.071913,
            19.743421,   -0.080447,   -0.30893,    0.083206,
            17.505759,   -0.078919,   -0.187955,   0.051719,
            27.747402,   -0.165335,   -0.208553,   0.004067,
            26.210627,   -0.138398,   -0.360514,   0.072199,
            18.034833,   -0.077047,   -0.260556,   0.084319,
            28.452802,   -0.163408,   -0.14097,    -0.063076,
            22.353095,   -0.103046,   -0.226654,   0.002992,
            18.220508,   -0.074034,   -0.309812,   0.108636]).reshape((10,4))
        np.testing.assert_allclose(params, results.params, rtol=1e-03)
        bse = np.array([2.080166,    0.021462,    0.102954,    0.049627,
            2.536355,    0.022111,    0.123857,    0.051917,
            1.967813,    0.019716,    0.102562,    0.054918,
            2.463219,    0.021745,    0.110297,    0.044189,
            1.556056,    0.019513,    0.12764,     0.040315,
            1.664108,    0.020114,    0.131208,    0.041613,
            2.5835,      0.021481,    0.113158,    0.047243,
            1.709483,    0.019752,    0.116944,    0.043636,
            1.958233,    0.020947,    0.09974,     0.049821,
            2.276849,    0.020122,    0.107867,    0.047842]).reshape((10,4))
        np.testing.assert_allclose(bse, results.bse, rtol=1e-03)
        tvalues = np.array([10.947193,   -4.777659,   -2.089223,   -0.283103,
            7.532584,    -4.259179,   -1.877395,   1.385161,
            10.033179,   -4.080362,   -3.012133,   1.515096,
            7.106862,    -3.629311,   -1.704079,   1.17042,
            17.831878,   -8.473156,   -1.633924,   0.100891,
            15.750552,   -6.880725,   -2.74765,    1.734978,
            6.980774,    -3.586757,   -2.302575,   1.784818,
            16.644095,   -8.273001,   -1.205451,   -1.445501,
            11.414933,   -4.919384,   -2.272458,   0.060064,
            8.00251, -3.679274,   -2.872176,   2.270738]).reshape((10,4))
        np.testing.assert_allclose(tvalues, results.tvalues, rtol=1e-03)
        localR2 = np.array([[ 0.53068693],
                            [ 0.59582647],
                            [ 0.59700925],
                            [ 0.45769954],
                            [ 0.54634509],
                            [ 0.5494828 ],
                            [ 0.55159604],
                            [ 0.55634237],
                            [ 0.53903842],
                            [ 0.55884954]])
        np.testing.assert_allclose(localR2, results.localR2, rtol=1e-05)
 class TestGWRPoisson(unittest.TestCase):
    def setUp(self):
        data = pysal.open(pysal.examples.get_path('Tokyomortality.csv'), mode='Ur')
        self.coords = zip(data.by_col('X_CENTROID'), data.by_col('Y_CENTROID'))
        self.y = np.array(data.by_col('db2564')).reshape((-1,1))
        self.off = np.array(data.by_col('eb2564')).reshape((-1,1))
        OCC  = np.array(data.by_col('OCC_TEC')).reshape((-1,1))
        OWN = np.array(data.by_col('OWNH')).reshape((-1,1)) 
        POP = np.array(data.by_col('POP65')).reshape((-1,1))
        UNEMP = np.array(data.by_col('UNEMP')).reshape((-1,1))
        self.X = np.hstack([OCC,OWN,POP,UNEMP])
        self.BS_F = pysal.open(pysal.examples.get_path('tokyo_BS_F_listwise.csv'))
        self.BS_NN = pysal.open(pysal.examples.get_path('tokyo_BS_NN_listwise.csv'))
        self.GS_F = pysal.open(pysal.examples.get_path('tokyo_GS_F_listwise.csv'))
        self.GS_NN = pysal.open(pysal.examples.get_path('tokyo_GS_NN_listwise.csv'))
        self.BS_NN_OFF = pysal.open(pysal.examples.get_path('tokyo_BS_NN_OFF_listwise.csv'))
    def test_BS_F(self):
        est_Int = self.BS_F.by_col(' est_Intercept')
        se_Int = self.BS_F.by_col(' se_Intercept')
        t_Int = self.BS_F.by_col(' t_Intercept')
        est_OCC = self.BS_F.by_col(' est_OCC_TEC')
        se_OCC = self.BS_F.by_col(' se_OCC_TEC')
        t_OCC = self.BS_F.by_col(' t_OCC_TEC')
        est_OWN = self.BS_F.by_col(' est_OWNH')
        se_OWN = self.BS_F.by_col(' se_OWNH')
        t_OWN = self.BS_F.by_col(' t_OWNH')
        est_POP = self.BS_F.by_col(' est_POP65')
        se_POP = self.BS_F.by_col(' se_POP65')
        t_POP = self.BS_F.by_col(' t_POP65')
        est_UNEMP = self.BS_F.by_col(' est_UNEMP')
        se_UNEMP = self.BS_F.by_col(' se_UNEMP')
        t_UNEMP = self.BS_F.by_col(' t_UNEMP')
        yhat = self.BS_F.by_col(' yhat')
        pdev = np.array(self.BS_F.by_col(' localpdev')).reshape((-1,1))
        model = GWR(self.coords, self.y, self.X, bw=26029.625, family=Poisson(), 
                kernel='bisquare', fixed=True)
        rslt = model.fit()
        AICc = get_AICc(rslt)
        AIC = get_AIC(rslt)
        BIC = get_BIC(rslt)
        self.assertAlmostEquals(np.floor(AICc), 13294.0)
        self.assertAlmostEquals(np.floor(AIC), 13247.0)
        self.assertAlmostEquals(np.floor(BIC), 13485.0)
        np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-05)
        np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-03)
        np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-03)
        np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-04)
        np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02)
        np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-02)
        np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-04)
        np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-03)
        np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-03)
        np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-04)
        np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02)
        np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-02)
        np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-04)
        np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02)
        np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-02)
        np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-05)
        np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
    def test_BS_NN(self):
        est_Int = self.BS_NN.by_col(' est_Intercept')
        se_Int = self.BS_NN.by_col(' se_Intercept')
        t_Int = self.BS_NN.by_col(' t_Intercept')
        est_OCC = self.BS_NN.by_col(' est_OCC_TEC')
        se_OCC = self.BS_NN.by_col(' se_OCC_TEC')
        t_OCC = self.BS_NN.by_col(' t_OCC_TEC')
        est_OWN = self.BS_NN.by_col(' est_OWNH')
        se_OWN = self.BS_NN.by_col(' se_OWNH')
        t_OWN = self.BS_NN.by_col(' t_OWNH')
        est_POP = self.BS_NN.by_col(' est_POP65')
        se_POP = self.BS_NN.by_col(' se_POP65')
        t_POP = self.BS_NN.by_col(' t_POP65')
        est_UNEMP = self.BS_NN.by_col(' est_UNEMP')
        se_UNEMP = self.BS_NN.by_col(' se_UNEMP')
        t_UNEMP = self.BS_NN.by_col(' t_UNEMP')
        yhat = self.BS_NN.by_col(' yhat')
        pdev = np.array(self.BS_NN.by_col(' localpdev')).reshape((-1,1))
        model = GWR(self.coords, self.y, self.X, bw=50, family=Poisson(), 
                kernel='bisquare', fixed=False)
        rslt = model.fit()
        AICc = get_AICc(rslt)
        AIC = get_AIC(rslt)
        BIC = get_BIC(rslt)
        self.assertAlmostEquals(np.floor(AICc), 13285)
        self.assertAlmostEquals(np.floor(AIC), 13259.0)
        self.assertAlmostEquals(np.floor(BIC), 13442.0)
        np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
        np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
        np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
        np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-03)
        np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02)
        np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-02)
        np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-04)
        np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-02)
        np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-02)
        np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-03)
        np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02)
        np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-02)
        np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-04)
        np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02)
        np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-02)
        np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-04)
        np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
    def test_BS_NN_Offset(self):
        est_Int = self.BS_NN_OFF.by_col(' est_Intercept')
        se_Int = self.BS_NN_OFF.by_col(' se_Intercept')
        t_Int = self.BS_NN_OFF.by_col(' t_Intercept')
        est_OCC = self.BS_NN_OFF.by_col(' est_OCC_TEC')
        se_OCC = self.BS_NN_OFF.by_col(' se_OCC_TEC')
        t_OCC = self.BS_NN_OFF.by_col(' t_OCC_TEC')
        est_OWN = self.BS_NN_OFF.by_col(' est_OWNH')
        se_OWN = self.BS_NN_OFF.by_col(' se_OWNH')
        t_OWN = self.BS_NN_OFF.by_col(' t_OWNH')
        est_POP = self.BS_NN_OFF.by_col(' est_POP65')
        se_POP = self.BS_NN_OFF.by_col(' se_POP65')
        t_POP = self.BS_NN_OFF.by_col(' t_POP65')
        est_UNEMP = self.BS_NN_OFF.by_col(' est_UNEMP')
        se_UNEMP = self.BS_NN_OFF.by_col(' se_UNEMP')
        t_UNEMP = self.BS_NN_OFF.by_col(' t_UNEMP')
        yhat = self.BS_NN_OFF.by_col(' yhat')
        pdev = np.array(self.BS_NN_OFF.by_col(' localpdev')).reshape((-1,1))
        model = GWR(self.coords, self.y, self.X, bw=100, offset=self.off, family=Poisson(), 
                kernel='bisquare', fixed=False)
        rslt = model.fit()
        AICc = get_AICc(rslt)
        AIC = get_AIC(rslt)
        BIC = get_BIC(rslt)
        self.assertAlmostEquals(np.floor(AICc), 367.0)
        self.assertAlmostEquals(np.floor(AIC), 361.0)
        self.assertAlmostEquals(np.floor(BIC), 451.0)
        np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-02,
                atol=1e-02)
        np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02, atol=1e-02)
        np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-01,
                atol=1e-02)
        np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-03,
                atol=1e-02)
        np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02, atol=1e-02)
        np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-01,
                atol=1e-02)
        np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-04,
                atol=1e-02)
        np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-02, atol=1e-02)
        np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-01,
                atol=1e-02)
        np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-03,
                atol=1e-02)
        np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02, atol=1e-02)
        np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-01,
                atol=1e-02)
        np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-04,
                atol=1e-02)
        np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02,
                atol=1e-02)
        np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-01,
                atol=1e-02)
        np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-03, atol=1e-02)
        np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-04, atol=1e-02)
    def test_GS_F(self):
        est_Int = self.GS_F.by_col(' est_Intercept')
        se_Int = self.GS_F.by_col(' se_Intercept')
        t_Int = self.GS_F.by_col(' t_Intercept')
        est_OCC = self.GS_F.by_col(' est_OCC_TEC')
        se_OCC = self.GS_F.by_col(' se_OCC_TEC')
        t_OCC = self.GS_F.by_col(' t_OCC_TEC')
        est_OWN = self.GS_F.by_col(' est_OWNH')
        se_OWN = self.GS_F.by_col(' se_OWNH')
        t_OWN = self.GS_F.by_col(' t_OWNH')
        est_POP = self.GS_F.by_col(' est_POP65')
        se_POP = self.GS_F.by_col(' se_POP65')
        t_POP = self.GS_F.by_col(' t_POP65')
        est_UNEMP = self.GS_F.by_col(' est_UNEMP')
        se_UNEMP = self.GS_F.by_col(' se_UNEMP')
        t_UNEMP = self.GS_F.by_col(' t_UNEMP')
        yhat = self.GS_F.by_col(' yhat')
        pdev = np.array(self.GS_F.by_col(' localpdev')).reshape((-1,1))
        model = GWR(self.coords, self.y, self.X, bw=8764.474, family=Poisson(), 
                kernel='gaussian', fixed=True)
        rslt = model.fit()
        AICc = get_AICc(rslt)
        AIC = get_AIC(rslt)
        BIC = get_BIC(rslt)
        self.assertAlmostEquals(np.floor(AICc), 11283.0)
        self.assertAlmostEquals(np.floor(AIC), 11211.0)
        self.assertAlmostEquals(np.floor(BIC), 11497.0)
        np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-03)
        np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
        np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
        np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-03)
        np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02)
        np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-02)
        np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-03)
        np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-02)
        np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-02)
        np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-02)
        np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02)
        np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-02)
        np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-02)
        np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02)
        np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-02)
        np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-04)
        np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
    def test_GS_NN(self):
        est_Int = self.GS_NN.by_col(' est_Intercept')
        se_Int = self.GS_NN.by_col(' se_Intercept')
        t_Int = self.GS_NN.by_col(' t_Intercept')
        est_OCC = self.GS_NN.by_col(' est_OCC_TEC')
        se_OCC = self.GS_NN.by_col(' se_OCC_TEC')
        t_OCC = self.GS_NN.by_col(' t_OCC_TEC')
        est_OWN = self.GS_NN.by_col(' est_OWNH')
        se_OWN = self.GS_NN.by_col(' se_OWNH')
        t_OWN = self.GS_NN.by_col(' t_OWNH')
        est_POP = self.GS_NN.by_col(' est_POP65')
        se_POP = self.GS_NN.by_col(' se_POP65')
        t_POP = self.GS_NN.by_col(' t_POP65')
        est_UNEMP = self.GS_NN.by_col(' est_UNEMP')
        se_UNEMP = self.GS_NN.by_col(' se_UNEMP')
        t_UNEMP = self.GS_NN.by_col(' t_UNEMP')
        yhat = self.GS_NN.by_col(' yhat')
        pdev = np.array(self.GS_NN.by_col(' localpdev')).reshape((-1,1))
        model = GWR(self.coords, self.y, self.X, bw=50, family=Poisson(), 
                kernel='gaussian', fixed=False)
        rslt = model.fit()
        AICc = get_AICc(rslt)
        AIC = get_AIC(rslt)
        BIC = get_BIC(rslt)
        self.assertAlmostEquals(np.floor(AICc), 21070.0)
        self.assertAlmostEquals(np.floor(AIC), 21069.0)
        self.assertAlmostEquals(np.floor(BIC), 21111.0)
        np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-04)
        np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-02)
        np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-02)
        np.testing.assert_allclose(est_OCC, rslt.params[:,1], rtol=1e-03)
        np.testing.assert_allclose(se_OCC, rslt.bse[:,1], rtol=1e-02)
        np.testing.assert_allclose(t_OCC, rslt.tvalues[:,1], rtol=1e-02)
        np.testing.assert_allclose(est_OWN, rslt.params[:,2], rtol=1e-04)
        np.testing.assert_allclose(se_OWN, rslt.bse[:,2], rtol=1e-02)
        np.testing.assert_allclose(t_OWN, rslt.tvalues[:,2], rtol=1e-02)
        np.testing.assert_allclose(est_POP, rslt.params[:,3], rtol=1e-02)
        np.testing.assert_allclose(se_POP, rslt.bse[:,3], rtol=1e-02)
        np.testing.assert_allclose(t_POP, rslt.tvalues[:,3], rtol=1e-02)
        np.testing.assert_allclose(est_UNEMP, rslt.params[:,4], rtol=1e-02)
        np.testing.assert_allclose(se_UNEMP, rslt.bse[:,4], rtol=1e-02)
        np.testing.assert_allclose(t_UNEMP, rslt.tvalues[:,4], rtol=1e-02)
        np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-04)
        np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
 class TestGWRBinomial(unittest.TestCase):
    def setUp(self):
        data = pysal.open(pysal.examples.get_path('landslides.csv'))
        self.coords = zip(data.by_col('X'), data.by_col('Y'))
        self.y = np.array(data.by_col('Landslid')).reshape((-1,1))
        ELEV  = np.array(data.by_col('Elev')).reshape((-1,1))
        SLOPE = np.array(data.by_col('Slope')).reshape((-1,1)) 
        SIN = np.array(data.by_col('SinAspct')).reshape((-1,1))
        COS = np.array(data.by_col('CosAspct')).reshape((-1,1))
        SOUTH = np.array(data.by_col('AbsSouth')).reshape((-1,1))
        DIST = np.array(data.by_col('DistStrm')).reshape((-1,1))
        self.X = np.hstack([ELEV, SLOPE, SIN, COS, SOUTH, DIST])
        self.BS_F = pysal.open(pysal.examples.get_path('clearwater_BS_F_listwise.csv'))
        self.BS_NN = pysal.open(pysal.examples.get_path('clearwater_BS_NN_listwise.csv'))
        self.GS_F = pysal.open(pysal.examples.get_path('clearwater_GS_F_listwise.csv'))
        self.GS_NN = pysal.open(pysal.examples.get_path('clearwater_GS_NN_listwise.csv'))
    def test_BS_F(self):
        est_Int = self.BS_F.by_col(' est_Intercept')
        se_Int = self.BS_F.by_col(' se_Intercept')
        t_Int = self.BS_F.by_col(' t_Intercept')
        est_elev = self.BS_F.by_col(' est_Elev')
        se_elev = self.BS_F.by_col(' se_Elev')
        t_elev = self.BS_F.by_col(' t_Elev')
        est_slope = self.BS_F.by_col(' est_Slope')
        se_slope = self.BS_F.by_col(' se_Slope')
        t_slope = self.BS_F.by_col(' t_Slope')
        est_sin = self.BS_F.by_col(' est_SinAspct')
        se_sin = self.BS_F.by_col(' se_SinAspct')
        t_sin = self.BS_F.by_col(' t_SinAspct')
        est_cos = self.BS_F.by_col(' est_CosAspct')
        se_cos = self.BS_F.by_col(' se_CosAspct')
        t_cos = self.BS_F.by_col(' t_CosAspct')
        est_south = self.BS_F.by_col(' est_AbsSouth')
        se_south = self.BS_F.by_col(' se_AbsSouth')
        t_south = self.BS_F.by_col(' t_AbsSouth')
        est_strm = self.BS_F.by_col(' est_DistStrm')
        se_strm = self.BS_F.by_col(' se_DistStrm')
        t_strm = self.BS_F.by_col(' t_DistStrm') 
        yhat = self.BS_F.by_col(' yhat')
        pdev = np.array(self.BS_F.by_col(' localpdev')).reshape((-1,1))
        model = GWR(self.coords, self.y, self.X, bw=19642.170, family=Binomial(), 
                kernel='bisquare', fixed=True)
        rslt = model.fit()
        AICc = get_AICc(rslt)
        AIC = get_AIC(rslt)
        BIC = get_BIC(rslt)
        self.assertAlmostEquals(np.floor(AICc), 275.0)
        self.assertAlmostEquals(np.floor(AIC), 271.0)
        self.assertAlmostEquals(np.floor(BIC), 349.0)
        np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
        np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
        np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
        np.testing.assert_allclose(est_elev, rslt.params[:,1], rtol=1e-00)
        np.testing.assert_allclose(se_elev, rslt.bse[:,1], rtol=1e-00)
        np.testing.assert_allclose(t_elev, rslt.tvalues[:,1], rtol=1e-00)
        np.testing.assert_allclose(est_slope, rslt.params[:,2], rtol=1e-00)
        np.testing.assert_allclose(se_slope, rslt.bse[:,2], rtol=1e-00)
        np.testing.assert_allclose(t_slope, rslt.tvalues[:,2], rtol=1e-00)
        np.testing.assert_allclose(est_sin, rslt.params[:,3], rtol=1e01)
        np.testing.assert_allclose(se_sin, rslt.bse[:,3], rtol=1e01)
        np.testing.assert_allclose(t_sin, rslt.tvalues[:,3], rtol=1e01)
        np.testing.assert_allclose(est_cos, rslt.params[:,4], rtol=1e01)
        np.testing.assert_allclose(se_cos, rslt.bse[:,4], rtol=1e01)
        np.testing.assert_allclose(t_cos, rslt.tvalues[:,4], rtol=1e01)
        np.testing.assert_allclose(est_south, rslt.params[:,5], rtol=1e01)
        np.testing.assert_allclose(se_south, rslt.bse[:,5], rtol=1e01)
        np.testing.assert_allclose(t_south, rslt.tvalues[:,5], rtol=1e01)
        np.testing.assert_allclose(est_strm, rslt.params[:,6], rtol=1e02)
        np.testing.assert_allclose(se_strm, rslt.bse[:,6], rtol=1e01)
        np.testing.assert_allclose(t_strm, rslt.tvalues[:,6], rtol=1e02)
        np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-01)
        #This test fails - likely due to compound rounding errors
        #Has been tested using statsmodels.family calculations and
        #code from Jing's python version, which both yield the same
        #np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
    def test_BS_NN(self):
        est_Int = self.BS_NN.by_col(' est_Intercept')
        se_Int = self.BS_NN.by_col(' se_Intercept')
        t_Int = self.BS_NN.by_col(' t_Intercept')
        est_elev = self.BS_NN.by_col(' est_Elev')
        se_elev = self.BS_NN.by_col(' se_Elev')
        t_elev = self.BS_NN.by_col(' t_Elev')
        est_slope = self.BS_NN.by_col(' est_Slope')
        se_slope = self.BS_NN.by_col(' se_Slope')
        t_slope = self.BS_NN.by_col(' t_Slope')
        est_sin = self.BS_NN.by_col(' est_SinAspct')
        se_sin = self.BS_NN.by_col(' se_SinAspct')
        t_sin = self.BS_NN.by_col(' t_SinAspct')
        est_cos = self.BS_NN.by_col(' est_CosAspct')
        se_cos = self.BS_NN.by_col(' se_CosAspct')
        t_cos = self.BS_NN.by_col(' t_CosAspct')
        est_south = self.BS_NN.by_col(' est_AbsSouth')
        se_south = self.BS_NN.by_col(' se_AbsSouth')
        t_south = self.BS_NN.by_col(' t_AbsSouth')
        est_strm = self.BS_NN.by_col(' est_DistStrm')
        se_strm = self.BS_NN.by_col(' se_DistStrm')
        t_strm = self.BS_NN.by_col(' t_DistStrm') 
        yhat = self.BS_NN.by_col(' yhat')
        pdev = self.BS_NN.by_col(' localpdev')
        model = GWR(self.coords, self.y, self.X, bw=158, family=Binomial(), 
                kernel='bisquare', fixed=False)
        rslt = model.fit()
        AICc = get_AICc(rslt)
        AIC = get_AIC(rslt)
        BIC = get_BIC(rslt)
        self.assertAlmostEquals(np.floor(AICc), 277.0)
        self.assertAlmostEquals(np.floor(AIC), 271.0)
        self.assertAlmostEquals(np.floor(BIC), 358.0)
        np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
        np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
        np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
        np.testing.assert_allclose(est_elev, rslt.params[:,1], rtol=1e-00)
        np.testing.assert_allclose(se_elev, rslt.bse[:,1], rtol=1e-00)
        np.testing.assert_allclose(t_elev, rslt.tvalues[:,1], rtol=1e-00)
        np.testing.assert_allclose(est_slope, rslt.params[:,2], rtol=1e-00)
        np.testing.assert_allclose(se_slope, rslt.bse[:,2], rtol=1e-00)
        np.testing.assert_allclose(t_slope, rslt.tvalues[:,2], rtol=1e-00)
        np.testing.assert_allclose(est_sin, rslt.params[:,3], rtol=1e01)
        np.testing.assert_allclose(se_sin, rslt.bse[:,3], rtol=1e01)
        np.testing.assert_allclose(t_sin, rslt.tvalues[:,3], rtol=1e01)
        np.testing.assert_allclose(est_cos, rslt.params[:,4], rtol=1e01)
        np.testing.assert_allclose(se_cos, rslt.bse[:,4], rtol=1e01)
        np.testing.assert_allclose(t_cos, rslt.tvalues[:,4], rtol=1e01)
        np.testing.assert_allclose(est_south, rslt.params[:,5], rtol=1e01)
        np.testing.assert_allclose(se_south, rslt.bse[:,5], rtol=1e01)
        np.testing.assert_allclose(t_south, rslt.tvalues[:,5], rtol=1e01)
        np.testing.assert_allclose(est_strm, rslt.params[:,6], rtol=1e03)
        np.testing.assert_allclose(se_strm, rslt.bse[:,6], rtol=1e01)
        np.testing.assert_allclose(t_strm, rslt.tvalues[:,6], rtol=1e03)
        np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-01)
        #This test fails - likely due to compound rounding errors
        #Has been tested using statsmodels.family calculations and
        #code from Jing's python version, which both yield the same
        #np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
    def test_GS_F(self):
        est_Int = self.GS_F.by_col(' est_Intercept')
        se_Int = self.GS_F.by_col(' se_Intercept')
        t_Int = self.GS_F.by_col(' t_Intercept')
        est_elev = self.GS_F.by_col(' est_Elev')
        se_elev = self.GS_F.by_col(' se_Elev')
        t_elev = self.GS_F.by_col(' t_Elev')
        est_slope = self.GS_F.by_col(' est_Slope')
        se_slope = self.GS_F.by_col(' se_Slope')
        t_slope = self.GS_F.by_col(' t_Slope')
        est_sin = self.GS_F.by_col(' est_SinAspct')
        se_sin = self.GS_F.by_col(' se_SinAspct')
        t_sin = self.GS_F.by_col(' t_SinAspct')
        est_cos = self.GS_F.by_col(' est_CosAspct')
        se_cos = self.GS_F.by_col(' se_CosAspct')
        t_cos = self.GS_F.by_col(' t_CosAspct')
        est_south = self.GS_F.by_col(' est_AbsSouth')
        se_south = self.GS_F.by_col(' se_AbsSouth')
        t_south = self.GS_F.by_col(' t_AbsSouth')
        est_strm = self.GS_F.by_col(' est_DistStrm')
        se_strm = self.GS_F.by_col(' se_DistStrm')
        t_strm = self.GS_F.by_col(' t_DistStrm') 
        yhat = self.GS_F.by_col(' yhat')
        pdev = self.GS_F.by_col(' localpdev')
        model = GWR(self.coords, self.y, self.X, bw=8929.061, family=Binomial(), 
                kernel='gaussian', fixed=True)
        rslt = model.fit()
        AICc = get_AICc(rslt)
        AIC = get_AIC(rslt)
        BIC = get_BIC(rslt)
        self.assertAlmostEquals(np.floor(AICc), 276.0)
        self.assertAlmostEquals(np.floor(AIC), 272.0)
        self.assertAlmostEquals(np.floor(BIC), 341.0)
        np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
        np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
        np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
        np.testing.assert_allclose(est_elev, rslt.params[:,1], rtol=1e-00)
        np.testing.assert_allclose(se_elev, rslt.bse[:,1], rtol=1e-00)
        np.testing.assert_allclose(t_elev, rslt.tvalues[:,1], rtol=1e-00)
        np.testing.assert_allclose(est_slope, rslt.params[:,2], rtol=1e-00)
        np.testing.assert_allclose(se_slope, rslt.bse[:,2], rtol=1e-00)
        np.testing.assert_allclose(t_slope, rslt.tvalues[:,2], rtol=1e-00)
        np.testing.assert_allclose(est_sin, rslt.params[:,3], rtol=1e01)
        np.testing.assert_allclose(se_sin, rslt.bse[:,3], rtol=1e01)
        np.testing.assert_allclose(t_sin, rslt.tvalues[:,3], rtol=1e01)
        np.testing.assert_allclose(est_cos, rslt.params[:,4], rtol=1e01)
        np.testing.assert_allclose(se_cos, rslt.bse[:,4], rtol=1e01)
        np.testing.assert_allclose(t_cos, rslt.tvalues[:,4], rtol=1e01)
        np.testing.assert_allclose(est_south, rslt.params[:,5], rtol=1e01)
        np.testing.assert_allclose(se_south, rslt.bse[:,5], rtol=1e01)
        np.testing.assert_allclose(t_south, rslt.tvalues[:,5], rtol=1e01)
        np.testing.assert_allclose(est_strm, rslt.params[:,6], rtol=1e02)
        np.testing.assert_allclose(se_strm, rslt.bse[:,6], rtol=1e01)
        np.testing.assert_allclose(t_strm, rslt.tvalues[:,6], rtol=1e02)
        np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-01)
        #This test fails - likely due to compound rounding errors
        #Has been tested using statsmodels.family calculations and
        #code from Jing's python version, which both yield the same
        #np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
    def test_GS_NN(self):
        est_Int = self.GS_NN.by_col(' est_Intercept')
        se_Int = self.GS_NN.by_col(' se_Intercept')
        t_Int = self.GS_NN.by_col(' t_Intercept')
        est_elev = self.GS_NN.by_col(' est_Elev')
        se_elev = self.GS_NN.by_col(' se_Elev')
        t_elev = self.GS_NN.by_col(' t_Elev')
        est_slope = self.GS_NN.by_col(' est_Slope')
        se_slope = self.GS_NN.by_col(' se_Slope')
        t_slope = self.GS_NN.by_col(' t_Slope')
        est_sin = self.GS_NN.by_col(' est_SinAspct')
        se_sin = self.GS_NN.by_col(' se_SinAspct')
        t_sin = self.GS_NN.by_col(' t_SinAspct')
        est_cos = self.GS_NN.by_col(' est_CosAspct')
        se_cos = self.GS_NN.by_col(' se_CosAspct')
        t_cos = self.GS_NN.by_col(' t_CosAspct')
        est_south = self.GS_NN.by_col(' est_AbsSouth')
        se_south = self.GS_NN.by_col(' se_AbsSouth')
        t_south = self.GS_NN.by_col(' t_AbsSouth')
        est_strm = self.GS_NN.by_col(' est_DistStrm')
        se_strm = self.GS_NN.by_col(' se_DistStrm')
        t_strm = self.GS_NN.by_col(' t_DistStrm') 
        yhat = self.GS_NN.by_col(' yhat')
        pdev = self.GS_NN.by_col(' localpdev')
        model = GWR(self.coords, self.y, self.X, bw=64, family=Binomial(), 
                kernel='gaussian', fixed=False)
        rslt = model.fit()
        AICc = get_AICc(rslt)
        AIC = get_AIC(rslt)
        BIC = get_BIC(rslt)
        self.assertAlmostEquals(np.floor(AICc), 276.0)
        self.assertAlmostEquals(np.floor(AIC), 273.0)
        self.assertAlmostEquals(np.floor(BIC), 331.0)
        np.testing.assert_allclose(est_Int, rslt.params[:,0], rtol=1e-00)
        np.testing.assert_allclose(se_Int, rslt.bse[:,0], rtol=1e-00)
        np.testing.assert_allclose(t_Int, rslt.tvalues[:,0], rtol=1e-00)
        np.testing.assert_allclose(est_elev, rslt.params[:,1], rtol=1e-00)
        np.testing.assert_allclose(se_elev, rslt.bse[:,1], rtol=1e-00)
        np.testing.assert_allclose(t_elev, rslt.tvalues[:,1], rtol=1e-00)
        np.testing.assert_allclose(est_slope, rslt.params[:,2], rtol=1e-00)
        np.testing.assert_allclose(se_slope, rslt.bse[:,2], rtol=1e-00)
        np.testing.assert_allclose(t_slope, rslt.tvalues[:,2], rtol=1e-00)
        np.testing.assert_allclose(est_sin, rslt.params[:,3], rtol=1e01)
        np.testing.assert_allclose(se_sin, rslt.bse[:,3], rtol=1e01)
        np.testing.assert_allclose(t_sin, rslt.tvalues[:,3], rtol=1e01)
        np.testing.assert_allclose(est_cos, rslt.params[:,4], rtol=1e01)
        np.testing.assert_allclose(se_cos, rslt.bse[:,4], rtol=1e01)
        np.testing.assert_allclose(t_cos, rslt.tvalues[:,4], rtol=1e01)
        np.testing.assert_allclose(est_south, rslt.params[:,5], rtol=1e01)
        np.testing.assert_allclose(se_south, rslt.bse[:,5], rtol=1e01)
        np.testing.assert_allclose(t_south, rslt.tvalues[:,5], rtol=1e01)
        np.testing.assert_allclose(est_strm, rslt.params[:,6], rtol=1e02)
        np.testing.assert_allclose(se_strm, rslt.bse[:,6], rtol=1e01)
        np.testing.assert_allclose(t_strm, rslt.tvalues[:,6], rtol=1e02)
        np.testing.assert_allclose(yhat, rslt.mu, rtol=1e-00)
        #This test fails - likely due to compound rounding errors
        #Has been tested using statsmodels.family calculations and
        #code from Jing's python version, which both yield the same
        #np.testing.assert_allclose(pdev, rslt.pDev, rtol=1e-05)
 if __name__ == '__main__':
 	unittest.main()
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/tests/test_kernels.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/tests/test_kernels.py
@ -0,0 +1,84 @@
 import unittest
 import numpy as np
 import pysal
 from pysal.contrib.gwr.kernels import *
 PEGP = pysal.examples.get_path
 class TestKernels(unittest.TestCase):
    def setUp(self):
        np.random.seed(1234)
        x = np.arange(1,6)
        y = np.arange(5,0, -1)
        np.random.shuffle(x)
        np.random.shuffle(y)
        self.coords = np.array(zip(x, y))
        self.fix_gauss_kern = np.array([
        [ 1.        ,  0.38889556,  0.48567179,  0.48567179,  0.89483932],
        [ 0.38889556,  1.        ,  0.89483932,  0.64118039,  0.48567179],
        [ 0.48567179,  0.89483932,  1.        ,  0.89483932,  0.48567179],
        [ 0.48567179,  0.64118039,  0.89483932,  1.        ,  0.38889556],
        [ 0.89483932,  0.48567179,  0.48567179,  0.38889556,  1.        ]])
        self.adapt_gauss_kern = np.array([
        [ 1.        ,  0.52004183,  0.60653072,  0.60653072,  0.92596109],
        [ 0.34559083,  1.        ,  0.88249692,  0.60653072,  0.44374738],
        [ 0.03877423,  0.60653072,  1.        ,  0.60653072,  0.03877423],
        [ 0.44374738,  0.60653072,  0.88249692,  1.        ,  0.34559083],
        [ 0.92596109,  0.60653072,  0.60653072,  0.52004183,  1.        ]])
        self.fix_bisquare_kern = np.array([
        [ 1.        ,  0.        ,  0.        ,  0.        ,  0.60493827],
        [ 0.        ,  1.        ,  0.60493827,  0.01234568,  0.        ],
        [ 0.        ,  0.60493827,  1.        ,  0.60493827,  0.        ],
        [ 0.        ,  0.01234568,  0.60493827,  1.        ,  0.        ],
        [ 0.60493827,  0.        ,  0.        ,  0.        ,  1.        ]])
        self.adapt_bisquare_kern = np.array([
        [  1.00000000e+00,   0.00000000e+00,   0.00000000e+00,
           3.99999881e-14,   7.15976383e-01],
        [  0.00000000e+00,   1.00000000e+00,   5.62500075e-01,
           3.99999881e-14,   0.00000000e+00],
        [  0.00000000e+00,   3.99999881e-14,   1.00000000e+00,
           3.99999881e-14,   0.00000000e+00],
        [  0.00000000e+00,   3.99999881e-14,   5.62500075e-01,
           1.00000000e+00,   0.00000000e+00],
        [  7.15976383e-01,   0.00000000e+00,   3.99999881e-14,
           0.00000000e+00,   1.00000000e+00]])
        self.fix_exp_kern = np.array([
        [ 1.        ,  0.2529993 ,  0.30063739,  0.30063739,  0.62412506],
        [ 0.2529993 ,  1.        ,  0.62412506,  0.38953209,  0.30063739],
        [ 0.30063739,  0.62412506,  1.        ,  0.62412506,  0.30063739],
        [ 0.30063739,  0.38953209,  0.62412506,  1.        ,  0.2529993 ],
        [ 0.62412506,  0.30063739,  0.30063739,  0.2529993 ,  1.        ]])
        self.adapt_exp_kern = np.array([
        [ 1.        ,  0.31868771,  0.36787948,  0.36787948,  0.67554721],
        [ 0.23276223,  1.        ,  0.60653069,  0.36787948,  0.27949951],
        [ 0.07811997,  0.36787948,  1.        ,  0.36787948,  0.07811997],
        [ 0.27949951,  0.36787948,  0.60653069,  1.        ,  0.23276223],
        [ 0.67554721,  0.36787948,  0.36787948,  0.31868771,  1.        ]])
    def test_fix_gauss(self):
        kern = fix_gauss(self.coords, 3)
        np.testing.assert_allclose(kern, self.fix_gauss_kern)
    def test_adapt_gauss(self):
        kern = adapt_gauss(self.coords, 3)
        np.testing.assert_allclose(kern, self.adapt_gauss_kern)
    def test_fix_biqsquare(self):
        kern = fix_bisquare(self.coords, 3)
        np.testing.assert_allclose(kern, self.fix_bisquare_kern,
                atol=1e-01)
    def test_adapt_bisqaure(self):
        kern = adapt_bisquare(self.coords, 3)
        np.testing.assert_allclose(kern, self.adapt_bisquare_kern, atol=1e-012)
    def test_fix_exp(self):
        kern = fix_exp(self.coords, 3)
        np.testing.assert_allclose(kern, self.fix_exp_kern)
    def test_adapt_exp(self):
        kern = adapt_exp(self.coords, 3)
        np.testing.assert_allclose(kern, self.adapt_exp_kern)
 if __name__ == '__main__':
    unittest.main()
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/tests/test_sel_bw.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/gwr/base/tests/test_sel_bw.py
@ -0,0 +1,139 @@
 """
 GWR is tested against results from GWR4
 """
 import unittest
 import pickle as pk
 from pysal.contrib.glm.family import Gaussian, Poisson, Binomial
 from pysal.contrib.gwr.sel_bw import Sel_BW
 import numpy as np
 import pysal
 class TestSelBW(unittest.TestCase):
    def setUp(self):
        data = pysal.open(pysal.examples.get_path('GData_utm.csv'))
        self.coords = zip(data.by_col('X'), data.by_col('Y'))
        self.y = np.array(data.by_col('PctBach')).reshape((-1,1))
        rural  = np.array(data.by_col('PctRural')).reshape((-1,1))
        pov = np.array(data.by_col('PctPov')).reshape((-1,1)) 
        black = np.array(data.by_col('PctBlack')).reshape((-1,1))
        self.X = np.hstack([rural, pov, black])
        self.XB = pk.load(open(pysal.examples.get_path('XB.p'), 'r'))
        self.err = pk.load(open(pysal.examples.get_path('err.p'), 'r'))
    def test_golden_fixed_AICc(self):
        bw1 = 211027.34
        bw2 = Sel_BW(self.coords, self.y, self.X, kernel='bisquare',
                fixed=True).search(criterion='AICc')
        self.assertAlmostEqual(bw1, bw2)
    def test_golden_adapt_AICc(self):
        bw1 = 93.0
        bw2 = Sel_BW(self.coords, self.y, self.X, kernel='bisquare',
                fixed=False).search(criterion='AICc')
        self.assertAlmostEqual(bw1, bw2)
    def test_golden_fixed_AIC(self):
        bw1 = 76169.15
        bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
                fixed=True).search(criterion='AIC')
        self.assertAlmostEqual(bw1, bw2)
    def test_golden_adapt_AIC(self):
        bw1 = 50.0
        bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
                fixed=False).search(criterion='AIC')
        self.assertAlmostEqual(bw1, bw2)
    def test_golden_fixed_BIC(self):
        bw1 = 279451.43
        bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
                fixed=True).search(criterion='BIC')
        self.assertAlmostEqual(bw1, bw2)
    def test_golden_adapt_BIC(self):
        bw1 = 62.0
        bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
                fixed=False).search(criterion='BIC')
        self.assertAlmostEqual(bw1, bw2)
    def test_golden_fixed_CV(self):
        bw1 = 130406.67
        bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
                fixed=True).search(criterion='CV')
        self.assertAlmostEqual(bw1, bw2)
    def test_golden_adapt_CV(self):
        bw1 = 68.0
        bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
                fixed=False).search(criterion='CV')
        self.assertAlmostEqual(bw1, bw2)
    def test_interval_fixed_AICc(self):
        bw1 = 211025.0#211027.00
        bw2 = Sel_BW(self.coords, self.y, self.X, kernel='bisquare',
                fixed=True).search(criterion='AICc', search='interval', bw_min=211001.,
                        bw_max=211035.0, interval=2)
        self.assertAlmostEqual(bw1, bw2)
    def test_interval_adapt_AICc(self):
        bw1 = 93.0
        bw2 = Sel_BW(self.coords, self.y, self.X, kernel='bisquare',
                fixed=False).search(criterion='AICc', search='interval',
                        bw_min=90.0, bw_max=95.0, interval=1)
        self.assertAlmostEqual(bw1, bw2)
    def test_interval_fixed_AIC(self):
        bw1 = 76175.0#76169.00
        bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
                fixed=True).search(criterion='AIC', search='interval',
                        bw_min=76161.0, bw_max=76175.0, interval=1)
        self.assertAlmostEqual(bw1, bw2)
    def test_interval_adapt_AIC(self):
        bw1 = 40.0#50.0
        bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
                fixed=False).search(criterion='AIC', search='interval', bw_min=40.0,
                        bw_max=60.0, interval=2)
        self.assertAlmostEqual(bw1, bw2)
    def test_interval_fixed_BIC(self):
        bw1 = 279461.0#279451.00
        bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
                fixed=True).search(criterion='BIC', search='interval', bw_min=279441.0,
                        bw_max=279461.0, interval=2)
        self.assertAlmostEqual(bw1, bw2)
    def test_interval_adapt_BIC(self):
        bw1 = 62.0
        bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
                fixed=False).search(criterion='BIC', search='interval',
                        bw_min=52.0, bw_max=72.0, interval=2)
        self.assertAlmostEqual(bw1, bw2)
    def test_interval_fixed_CV(self):
        bw1 = 130400.0#130406.00
        bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
                fixed=True).search(criterion='CV', search='interval', bw_min=130400.0,
                        bw_max=130410.0, interval=1)
        self.assertAlmostEqual(bw1, bw2)
    def test_interval_adapt_CV(self):
        bw1 = 62.0#68.0
        bw2 = Sel_BW(self.coords, self.y, self.X, kernel='gaussian',
                fixed=False).search(criterion='CV', search='interval', bw_min=60.0,
                        bw_max=76.0 , interval=2)
        self.assertAlmostEqual(bw1, bw2)
    def test_FBGWR_AIC(self):
        bw1 = [157.0, 65.0, 52.0]
        sel = Sel_BW(self.coords, self.y, self.X, fb=True, kernel='bisquare',
                constant=False)
        bw2 = sel.search(tol_fb=1e-03)
        np.testing.assert_allclose(bw1, bw2)
        np.testing.assert_allclose(sel.XB, self.XB, atol=1e-05)
        np.testing.assert_allclose(sel.err, self.err, atol=1e-05)
 if __name__ == '__main__':
 	unittest.main()
--- a/release/python/0.7.0/crankshaft/crankshaft/regression/gwr_cs.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/regression/gwr_cs.py
@ -0,0 +1,202 @@
 """
    Geographically weighted regression
 """
 import numpy as np
 from gwr.base.gwr import GWR as PySAL_GWR
 from gwr.base.sel_bw import Sel_BW
 import json
 from crankshaft.analysis_data_provider import AnalysisDataProvider
 import plpy
 class GWR:
    def __init__(self, data_provider=None):
        if data_provider:
            self.data_provider = data_provider
        else:
            self.data_provider = AnalysisDataProvider()
    def gwr(self, subquery, dep_var, ind_vars,
            bw=None, fixed=False, kernel='bisquare',
            geom_col='the_geom', id_col='cartodb_id'):
        """
            subquery: 'select * from demographics'
            dep_var: 'pctbachelor'
            ind_vars: ['intercept', 'pctpov', 'pctrural', 'pctblack']
            bw: value of bandwidth, if None then select optimal
            fixed: False (kNN) or True ('distance')
            kernel: 'bisquare' (default), or 'exponential', 'gaussian'
        """
        params = {'geom_col': geom_col,
                  'id_col': id_col,
                  'subquery': subquery,
                  'dep_var': dep_var,
                  'ind_vars': ind_vars}
        # get data from data provider
        query_result = self.data_provider.get_gwr(params)
        # exit if data to analyze is empty
        if len(query_result) == 0:
            plpy.error('No data passed to analysis or independent variables '
                       'are all null-valued')
        # unique ids and variable names list
        rowid = np.array(query_result[0]['rowid'], dtype=np.int)
        # x, y are centroids of input geometries
        x = np.array(query_result[0]['x'], dtype=np.float)
        y = np.array(query_result[0]['y'], dtype=np.float)
        coords = zip(x, y)
        # extract dependent variable
        Y = np.array(query_result[0]['dep_var'], dtype=np.float).reshape((-1, 1))
        n = Y.shape[0]
        k = len(ind_vars)
        X = np.zeros((n, k))
        # extract query result
        for attr in range(0, k):
            attr_name = 'attr' + str(attr + 1)
            X[:, attr] = np.array(
              query_result[0][attr_name], dtype=np.float).flatten()
        # add intercept variable name
        ind_vars.insert(0, 'intercept')
        # calculate bandwidth if none is supplied
        if bw is None:
            bw = Sel_BW(coords, Y, X,
                        fixed=fixed, kernel=kernel).search()
        model = PySAL_GWR(coords, Y, X, bw,
                          fixed=fixed, kernel=kernel).fit()
        # containers for outputs
        coeffs = []
        stand_errs = []
        t_vals = []
        filtered_t_vals = []
        # extracted model information
        c_alpha = model.adj_alpha
        filtered_t = model.filter_tvals(c_alpha[1])
        predicted = model.predy.flatten()
        residuals = model.resid_response
        r_squared = model.localR2.flatten()
        bw = np.repeat(float(bw), n)
        # create lists of json objs for model outputs
        for idx in xrange(n):
            coeffs.append(json.dumps({var: model.params[idx, k]
                                      for k, var in enumerate(ind_vars)}))
            stand_errs.append(json.dumps({var: model.bse[idx, k]
                                          for k, var in enumerate(ind_vars)}))
            t_vals.append(json.dumps({var: model.tvalues[idx, k]
                                      for k, var in enumerate(ind_vars)}))
            filtered_t_vals.append(
                    json.dumps({var: filtered_t[idx, k]
                                for k, var in enumerate(ind_vars)}))
        return zip(coeffs, stand_errs, t_vals, filtered_t_vals,
                   predicted, residuals, r_squared, bw, rowid)
    def gwr_predict(self, subquery, dep_var, ind_vars,
                    bw=None, fixed=False, kernel='bisquare',
                    geom_col='the_geom', id_col='cartodb_id'):
        """
        subquery: 'select * from demographics'
        dep_var: 'pctbachelor'
        ind_vars: ['intercept', 'pctpov', 'pctrural', 'pctblack']
        bw: value of bandwidth, if None then select optimal
        fixed: False (kNN) or True ('distance')
        kernel: 'bisquare' (default), or 'exponential', 'gaussian'
        """
        params = {'geom_col': geom_col,
                  'id_col': id_col,
                  'subquery': subquery,
                  'dep_var': dep_var,
                  'ind_vars': ind_vars}
        # get data from data provider
        query_result = self.data_provider.get_gwr_predict(params)
        # exit if data to analyze is empty
        if len(query_result) == 0:
            plpy.error('No data passed to analysis or independent variables '
                       'are all null-valued')
        # unique ids and variable names list
        rowid = np.array(query_result[0]['rowid'], dtype=np.int)
        x = np.array(query_result[0]['x'], dtype=np.float)
        y = np.array(query_result[0]['y'], dtype=np.float)
        coords = np.array(zip(x, y), dtype=np.float)
        # extract dependent variable
        Y = np.array(query_result[0]['dep_var']).reshape((-1, 1))
        n = Y.shape[0]
        k = len(ind_vars)
        X = np.empty((n, k), dtype=np.float)
        for attr in range(0, k):
            attr_name = 'attr' + str(attr + 1)
            X[:, attr] = np.array(
              query_result[0][attr_name], dtype=np.float).flatten()
        # add intercept variable name
        ind_vars.insert(0, 'intercept')
        # split data into "training" and "test" for predictions
        # create index to split based on null y values
        train = np.where(Y != np.array(None))[0]
        test = np.where(Y == np.array(None))[0]
        # report error if there is no data to predict
        if len(test) < 1:
            plpy.error('No rows flagged for prediction: verify that rows '
                       'denoting prediction locations have a dependent '
                       'variable value of `null`')
        # split dependent variable (only need training which is non-Null's)
        Y_train = Y[train].reshape((-1, 1))
        Y_train = Y_train.astype(np.float)
        # split coords
        coords_train = coords[train]
        coords_test = coords[test]
        # split explanatory variables
        X_train = X[train]
        X_test = X[test]
        # calculate bandwidth if none is supplied
        if bw is None:
            bw = Sel_BW(coords_train, Y_train, X_train,
                        fixed=fixed, kernel=kernel).search()
        # estimate model and predict at new locations
        model = PySAL_GWR(coords_train, Y_train, X_train,
                          bw, fixed=fixed,
                          kernel=kernel).predict(coords_test, X_test)
        coeffs = []
        stand_errs = []
        t_vals = []
        r_squared = model.localR2.flatten()
        predicted = model.predy.flatten()
        m = len(model.predy)
        for idx in xrange(m):
            coeffs.append(json.dumps({var: model.params[idx, k]
                                      for k, var in enumerate(ind_vars)}))
            stand_errs.append(json.dumps({var: model.bse[idx, k]
                                          for k, var in enumerate(ind_vars)}))
            t_vals.append(json.dumps({var: model.tvalues[idx, k]
                                      for k, var in enumerate(ind_vars)}))
        return zip(coeffs, stand_errs, t_vals,
                   r_squared, predicted, rowid[test])
--- a/release/python/0.7.0/crankshaft/crankshaft/segmentation/init.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/segmentation/init.py
@ -0,0 +1 @@
 from segmentation import * 
--- a/release/python/0.7.0/crankshaft/crankshaft/segmentation/segmentation.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/segmentation/segmentation.py
@ -0,0 +1,176 @@
 """
 Segmentation creation and prediction
 """
 import sklearn
 import numpy as np
 import plpy
 from sklearn.ensemble import GradientBoostingRegressor
 from sklearn import metrics
 from sklearn.cross_validation import train_test_split
 # Lower level functions
 #----------------------
 def replace_nan_with_mean(array):
    """
        Input:
            @param array: an array of floats which may have null-valued entries
        Output:
            array with nans filled in with the mean of the dataset
    """
    # returns an array of rows and column indices
    indices = np.where(np.isnan(array))
    # iterate through entries which have nan values
    for row, col in zip(*indices):
            array[row, col] = np.mean(array[~np.isnan(array[:, col]), col])
    return array
 def get_data(variable, feature_columns, query):
    """
        Fetch data from the database, clean, and package into
          numpy arrays
        Input:
            @param variable: name of the target variable
            @param feature_columns: list of column names
            @param query: subquery that data is pulled from for the packaging
        Output:
            prepared data, packaged into NumPy arrays
    """
    columns = ','.join(['array_agg("{col}") As "{col}"'.format(col=col) for col in feature_columns])
    try:
        data = plpy.execute('''SELECT array_agg("{variable}") As target, {columns} FROM ({query}) As a'''.format(
            variable=variable,
            columns=columns,
            query=query))
    except Exception, e:
        plpy.error('Failed to access data to build segmentation model: %s' % e)
    # extract target data from plpy object
    target = np.array(data[0]['target'])
    # put n feature data arrays into an n x m array of arrays
    features = np.column_stack([np.array(data[0][col], dtype=float) for col in feature_columns])
    return replace_nan_with_mean(target), replace_nan_with_mean(features)
 # High level interface
 # --------------------
 def create_and_predict_segment_agg(target, features, target_features, target_ids, model_parameters):
    """
    Version of create_and_predict_segment that works on arrays that come stright form the SQL calling
    the function.
        Input:
            @param target: The 1D array of lenth NSamples containing the target variable we want the model to predict
            @param features: Thw 2D array of size NSamples * NFeatures that form the imput to the model
            @param target_ids: A 1D array of target_ids that will be used to associate the results of the prediction with the rows which they come from
            @param model_parameters: A dictionary containing parameters for the model.
    """
    clean_target = replace_nan_with_mean(target)
    clean_features = replace_nan_with_mean(features)
    target_features = replace_nan_with_mean(target_features)
    model, accuracy = train_model(clean_target, clean_features, model_parameters, 0.2)
    prediction = model.predict(target_features)
    accuracy_array = [accuracy]*prediction.shape[0]
    return zip(target_ids, prediction, np.full(prediction.shape, accuracy_array))
 def create_and_predict_segment(query, variable, target_query, model_params):
    """
    generate a segment with machine learning
    Stuart Lynn
    """
    ## fetch column names
    try:
        columns = plpy.execute('SELECT * FROM ({query}) As a LIMIT 1  '.format(query=query))[0].keys()
    except Exception, e:
        plpy.error('Failed to build segmentation model: %s' % e)
    ## extract column names to be used in building the segmentation model
    feature_columns = set(columns) - set([variable, 'cartodb_id', 'the_geom', 'the_geom_webmercator'])
    ## get data from database
    target, features = get_data(variable, feature_columns, query)
    model, accuracy = train_model(target, features, model_params, 0.2)
    cartodb_ids, result = predict_segment(model, feature_columns, target_query)
    accuracy_array = [accuracy]*result.shape[0]
    return zip(cartodb_ids, result, accuracy_array)
 def train_model(target, features, model_params, test_split):
    """
        Train the Gradient Boosting model on the provided data and calculate the accuracy of the model
        Input:
            @param target: 1D Array of the variable that the model is to be trianed to predict
            @param features: 2D Array NSamples * NFeatures to use in trining the model
            @param model_params: A dictionary of model parameters, the full specification can be found on the
                scikit learn page for [GradientBoostingRegressor](http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html)
            @parma test_split: The fraction of the data to be withheld for testing the model / calculating the accuray
    """
    features_train, features_test, target_train, target_test = train_test_split(features, target, test_size=test_split)
    model = GradientBoostingRegressor(**model_params)
    model.fit(features_train, target_train)
    accuracy = calculate_model_accuracy(model, features, target)
    return model, accuracy
 def calculate_model_accuracy(model, features, target):
    """
        Calculate the mean squared error of the model prediction
        Input:
            @param model: model trained from input features
            @param features: features to make a prediction from
            @param target: target to compare prediction to
        Output:
            mean squared error of the model prection compared to the target
    """
    prediction = model.predict(features)
    return metrics.mean_squared_error(prediction, target)
 def predict_segment(model, features, target_query):
    """
    Use the provided model to predict the values for the new feature set
        Input:
            @param model: The pretrained model
            @features: A list of features to use in the model prediction (list of column names)
            @target_query: The query to run to obtain the data to predict on and the cartdb_ids associated with it.
    """
    batch_size = 1000
    joined_features = ','.join(['"{0}"::numeric'.format(a) for a in features])
    try:
        cursor = plpy.cursor('SELECT Array[{joined_features}] As features FROM ({target_query}) As a'.format(
            joined_features=joined_features,
            target_query=target_query))
    except Exception, e:
        plpy.error('Failed to build segmentation model: %s' % e)
    results = []
    while True:
        rows = cursor.fetch(batch_size)
        if not rows:
            break
        batch = np.row_stack([np.array(row['features'], dtype=float) for row in rows])
        #Need to fix this. Should be global mean. This will cause weird effects
        batch = replace_nan_with_mean(batch)
        prediction = model.predict(batch)
        results.append(prediction)
    try:
        cartodb_ids = plpy.execute('''SELECT array_agg(cartodb_id ORDER BY cartodb_id) As cartodb_ids FROM ({0}) As a'''.format(target_query))[0]['cartodb_ids']
    except Exception, e:
        plpy.error('Failed to build segmentation model: %s' % e)
    return cartodb_ids, np.concatenate(results)
--- a/release/python/0.7.0/crankshaft/crankshaft/space_time_dynamics/init.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/space_time_dynamics/init.py
@ -0,0 +1,2 @@
 """Import all functions from clustering libraries."""
 from markov import *
--- a/release/python/0.7.0/crankshaft/crankshaft/space_time_dynamics/markov.py
+++ b/release/python/0.7.0/crankshaft/crankshaft/space_time_dynamics/markov.py
@ -0,0 +1,194 @@
 """
 Spatial dynamics measurements using Spatial Markov
 """
 # TODO: remove all plpy dependencies
 import numpy as np
 import pysal as ps
 import plpy
 import crankshaft.pysal_utils as pu
 from crankshaft.analysis_data_provider import AnalysisDataProvider
 class Markov(object):
    def __init__(self, data_provider=None):
        if data_provider is None:
            self.data_provider = AnalysisDataProvider()
        else:
            self.data_provider = data_provider
    def spatial_trend(self, subquery, time_cols, num_classes=7,
                      w_type='knn', num_ngbrs=5, permutations=0,
                      geom_col='the_geom', id_col='cartodb_id'):
        """
            Predict the trends of a unit based on:
            1. history of its transitions to different classes (e.g., 1st
               quantile -> 2nd quantile)
            2. average class of its neighbors
            Inputs:
            @param subquery string: e.g., SELECT the_geom, cartodb_id,
              interesting_time_column FROM table_name
            @param time_cols list of strings: list of strings of column names
            @param num_classes (optional): number of classes to break
              distribution of values into. Currently uses quantile bins.
            @param w_type string (optional): weight type ('knn' or 'queen')
            @param num_ngbrs int (optional): number of neighbors (if knn type)
            @param permutations int (optional): number of permutations for test
              stats
            @param geom_col string (optional): name of column which contains
              the geometries
            @param id_col string (optional): name of column which has the ids
              of the table
            Outputs:
            @param trend_up float: probablity that a geom will move to a higher
              class
            @param trend_down float: probablity that a geom will move to a
              lower class
            @param trend float: (trend_up - trend_down) / trend_static
            @param volatility float: a measure of the volatility based on
              probability stddev(prob array)
        """
        if len(time_cols) < 2:
            plpy.error('More than one time column needs to be passed')
        params = {"id_col": id_col,
                  "time_cols": time_cols,
                  "geom_col": geom_col,
                  "subquery": subquery,
                  "num_ngbrs": num_ngbrs}
        result = self.data_provider.get_markov(w_type, params)
        # build weight
        weights = pu.get_weight(result, w_type)
        weights.transform = 'r'
        # prep time data
        t_data = get_time_data(result, time_cols)
        sp_markov_result = ps.Spatial_Markov(t_data,
                                             weights,
                                             k=num_classes,
                                             fixed=False,
                                             permutations=permutations)
        # get lag classes
        lag_classes = ps.Quantiles(
            ps.lag_spatial(weights, t_data[:, -1]),
            k=num_classes).yb
        # look up probablity distribution for each unit according to class and
        #  lag class
        prob_dist = get_prob_dist(sp_markov_result.P,
                                  lag_classes,
                                  sp_markov_result.classes[:, -1])
        # find the ups and down and overall distribution of each cell
        trend_up, trend_down, trend, volatility = get_prob_stats(prob_dist, sp_markov_result.classes[:, -1])
        # output the results
        return zip(trend, trend_up, trend_down, volatility, weights.id_order)
 def get_time_data(markov_data, time_cols):
    """
        Extract the time columns and bin appropriately
    """
    num_attrs = len(time_cols)
    return np.array([[x['attr' + str(i)] for x in markov_data]
                     for i in range(1, num_attrs+1)], dtype=float).transpose()
 # not currently used
 def rebin_data(time_data, num_time_per_bin):
    """
        Convert an n x l matrix into an (n/m) x l matrix where the values are
         reduced (averaged) for the intervening states:
          1 2 3 4    1.5 3.5
          5 6 7 8 -> 5.5 7.5
          9 8 7 6    8.5 6.5
          5 4 3 2    4.5 2.5
          if m = 2, the 4 x 4 matrix is transformed to a 2 x 4 matrix.
        This process effectively resamples the data at a longer time span n
         units longer than the input data.
        For cases when there is a remainder (remainder(5/3) = 2), the remaining
         two columns are binned together as the last time period, while the
         first three are binned together for the first period.
        Input:
          @param time_data n x l  ndarray: measurements of an attribute at
           different time intervals
          @param num_time_per_bin int: number of columns to average into a new
           column
        Output:
          ceil(n / m) x l ndarray of resampled time series
    """
    if time_data.shape[1] % num_time_per_bin == 0:
        # if fit is perfect, then use it
        n_max = time_data.shape[1] / num_time_per_bin
    else:
        # fit remainders into an additional column
        n_max = time_data.shape[1] / num_time_per_bin + 1
    return np.array(
      [time_data[:, num_time_per_bin * i:num_time_per_bin * (i+1)].mean(axis=1)
       for i in range(n_max)]).T
 def get_prob_dist(transition_matrix, lag_indices, unit_indices):
    """
        Given an array of transition matrices, look up the probability
        associated with the arrangements passed
        Input:
        @param transition_matrix ndarray[k,k,k]:
        @param lag_indices ndarray:
        @param unit_indices ndarray:
        Output:
        Array of probability distributions
    """
    return np.array([transition_matrix[(lag_indices[i], unit_indices[i])]
                     for i in range(len(lag_indices))])
 def get_prob_stats(prob_dist, unit_indices):
    """
        get the statistics of the probability distributions
        Outputs:
            @param trend_up ndarray(float): sum of probabilities for upward
               movement (relative to the unit index of that prob)
            @param trend_down ndarray(float): sum of probabilities for downward
               movement (relative to the unit index of that prob)
            @param trend ndarray(float): difference of upward and downward
               movements
    """
    num_elements = len(unit_indices)
    trend_up = np.empty(num_elements, dtype=float)
    trend_down = np.empty(num_elements, dtype=float)
    trend = np.empty(num_elements, dtype=float)
    for i in range(num_elements):
        trend_up[i] = prob_dist[i, (unit_indices[i]+1):].sum()
        trend_down[i] = prob_dist[i, :unit_indices[i]].sum()
        if prob_dist[i, unit_indices[i]] > 0.0:
            trend[i] = (trend_up[i] - trend_down[i]) / (
              prob_dist[i, unit_indices[i]])
        else:
            trend[i] = None
    # calculate volatility of distribution
    volatility = prob_dist.std(axis=1)
    return trend_up, trend_down, trend, volatility
--- a/release/python/0.7.0/crankshaft/requirements.txt
+++ b/release/python/0.7.0/crankshaft/requirements.txt
@ -0,0 +1,5 @@
 joblib==0.8.3
 numpy==1.6.1
 scipy==0.14.0
 pysal==1.14.3
 scikit-learn==0.14.1
--- a/release/python/0.7.0/crankshaft/setup.py
+++ b/release/python/0.7.0/crankshaft/setup.py
@ -0,0 +1,49 @@
 """
 CartoDB Spatial Analysis Python Library
 See:
 https://github.com/CartoDB/crankshaft
 """
 from setuptools import setup, find_packages
 setup(
    name='crankshaft',
    version='0.7.0',
    description='CartoDB Spatial Analysis Python Library',
    url='https://github.com/CartoDB/crankshaft',
    author='Data Services Team - CartoDB',
    author_email='dataservices@cartodb.com',
    license='MIT',
    classifiers=[
        'Development Status :: 3 - Alpha',
        'Intended Audience :: Mapping comunity',
        'Topic :: Maps :: Mapping Tools',
        'License :: OSI Approved :: MIT License',
        'Programming Language :: Python :: 2.7',
    ],
    keywords='maps mapping tools spatial analysis geostatistics',
    packages=find_packages(exclude=['contrib', 'docs', 'tests']),
    extras_require={
        'dev': ['unittest'],
        'test': ['unittest', 'nose', 'mock'],
    },
    # The choice of component versions is dictated by what's
    # provisioned in the production servers.
    # IMPORTANT NOTE: please don't change this line. Instead issue a ticket to systems for evaluation.
    install_requires=['joblib==0.8.3', 'numpy==1.6.1', 'scipy==0.14.0', 'pysal==1.14.3', 'scikit-learn==0.14.1'],
    requires=['pysal', 'numpy', 'sklearn'],
    test_suite='test'
 )
--- a/release/python/0.7.0/crankshaft/test/fixtures/getis.json
+++ b/release/python/0.7.0/crankshaft/test/fixtures/getis.json
@ -0,0 +1 @@
 [[0.004793783909323601, 0.17999999999999999, 0.49808756424021061], [-1.0701189472090842, 0.079000000000000001, 0.14228288580832316], [-0.67867750971877305, 0.42099999999999999, 0.24867110969448558], [-0.67407386707620487, 0.246, 0.25013217644612995], [-0.79495689068870035, 0.33200000000000002, 0.21331928959090596], [-0.49279481022182703, 0.058999999999999997, 0.31107878905057329], [-0.38075627530057132, 0.28399999999999997, 0.35169205342069643], [-0.86710921611314895, 0.23699999999999999, 0.19294108571294855], [-0.78618647240956485, 0.050000000000000003, 0.2158791250244505], [-0.76108527223116984, 0.064000000000000001, 0.22330306830813684], [-0.13340753531942209, 0.247, 0.44693554317763651], [-0.57584545722033043, 0.48999999999999999, 0.28235982246156488], [-0.78882694661192831, 0.433, 0.2151065788731219], [-0.38769767950046219, 0.375, 0.34911988661484239], [-0.56057819488052207, 0.41399999999999998, 0.28754255985169652], [-0.41354017495644935, 0.45500000000000002, 0.339605447117173], [-0.23993577722243081, 0.49099999999999999, 0.40519002230969337], [-0.1389080156677496, 0.40400000000000003, 0.44476141839645233], [-0.25485737510500855, 0.376, 0.39941662953554224], [-0.71218610582902353, 0.17399999999999999, 0.23817476979886087], [-0.54533105995872144, 0.13700000000000001, 0.2927629228714812], [-0.39547917847510977, 0.033000000000000002, 0.34624464252424236], [-0.43052658996257548, 0.35399999999999998, 0.33340631435564982], [-0.37296719193774736, 0.40300000000000002, 0.35458643102865428], [-0.66482612169465694, 0.31900000000000001, 0.25308085650392698], [-0.13772133540823422, 0.34699999999999998, 0.44523032843016275], [-0.6765304487868502, 0.20999999999999999, 0.24935196033890672], [-0.64518763494323472, 0.32200000000000001, 0.25940279912025543], [-0.5078622084312413, 0.41099999999999998, 0.30577498972600159], [-0.12652006733772059, 0.42899999999999999, 0.44966013262301163], [-0.32691133022814595, 0.498, 0.37186747562269029], [0.25533848511500978, 0.42399999999999999, 0.39923083899077472], [2.7045138116476508, 0.0050000000000000001, 0.0034202212972238577], [-0.1551614486076057, 0.44400000000000001, 0.43834701985429037], [1.9524487722567723, 0.012999999999999999, 0.025442473674991528], [-1.2055816465306763, 0.017000000000000001, 0.11398941970467646], [3.478472976017831, 0.002, 0.00025213964072468009], [-1.4621715757903719, 0.002, 0.071847099325659136], [-0.84010307600180256, 0.085000000000000006, 0.20042529779230778], [5.7097646237318243, 0.0030000000000000001, 5.6566262784940591e-09], [1.5082367956567375, 0.065000000000000002, 0.065746966514827365], [-0.58337270103430816, 0.44, 0.27982121546450034], [-0.083271860457022437, 0.45100000000000001, 0.46681768733385554], [-0.46872337815000953, 0.34599999999999997, 0.31963368715684204], [0.18490279849545319, 0.23799999999999999, 0.42665263797981101], [3.470424529947997, 0.012, 0.00025981817437825683], [-0.99942612137154796, 0.032000000000000001, 0.15879415560388499], [-1.3650387953594485, 0.034000000000000002, 0.08612042845912049], [1.8617160516432014, 0.081000000000000003, 0.03132156240215267], [1.1321188945775384, 0.11600000000000001, 0.12879222611766061], [0.064116686050580601, 0.27300000000000002, 0.4744386578180424], [-0.42032194540259099, 0.29999999999999999, 0.33712514016213468], [-0.79581215423980922, 0.123, 0.21307061309098785], [-0.42792753720906046, 0.45600000000000002, 0.33435193892883741], [-1.0629378527428395, 0.051999999999999998, 0.14390506780140866], [-0.54164761752225477, 0.33700000000000002, 0.29403064095211839], [1.0934778886820793, 0.13700000000000001, 0.13709201601893539], [-0.094068785378413719, 0.38200000000000001, 0.46252725802998929], [0.13482026574801856, 0.36799999999999999, 0.44637699118865737], [-0.13976995315653129, 0.34699999999999998, 0.44442087706276601], [-0.051047663924746682, 0.32000000000000001, 0.47964376985626245], [-0.21468297736730158, 0.41699999999999998, 0.41500724761906527], [-0.20873154637330626, 0.38800000000000001, 0.41732890604390893], [-0.32427876152583485, 0.49199999999999999, 0.37286349875557478], [-0.65254842943280977, 0.374, 0.25702372075306734], [-0.48611858196118796, 0.23300000000000001, 0.31344154643990074], [-0.14482354344529477, 0.32600000000000001, 0.44242509660469886], [-0.51052030974200002, 0.439, 0.30484349480873729], [0.56814382285283538, 0.14999999999999999, 0.28496865660103166], [0.58680919931668207, 0.161, 0.27866592887231878], [0.013390357044409013, 0.25800000000000001, 0.49465818005865647], [-0.19050728887961568, 0.41399999999999998, 0.4244558160399462], [-0.60531777422216049, 0.35199999999999998, 0.2724839368239631], [1.0899331115425805, 0.127, 0.13787130480311838], [0.17015055382651084, 0.36899999999999999, 0.43244586845546418], [-0.21738337124409801, 0.40600000000000003, 0.41395479459421991], [1.0329303331079593, 0.079000000000000001, 0.15081825117169467], [1.0218317101096221, 0.104, 0.15343027913308094]]
--- a/release/python/0.7.0/crankshaft/test/fixtures/gwr_packed_data.json
+++ b/release/python/0.7.0/crankshaft/test/fixtures/gwr_packed_data.json
--- a/release/python/0.7.0/crankshaft/test/fixtures/gwr_packed_knowns.json
+++ b/release/python/0.7.0/crankshaft/test/fixtures/gwr_packed_knowns.json
--- a/release/python/0.7.0/crankshaft/test/fixtures/kmeans.json
+++ b/release/python/0.7.0/crankshaft/test/fixtures/kmeans.json
@ -0,0 +1 @@
 [{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]
--- a/release/python/0.7.0/crankshaft/test/fixtures/markov.json
+++ b/release/python/0.7.0/crankshaft/test/fixtures/markov.json
@ -0,0 +1 @@
 [[0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 0], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 1], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 2], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 3], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 4], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 5], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 6], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 7], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 8], [0.19047619047619049, 0.16, 0.0, 0.32594478059941379, 9], [-0.23529411764705882, 0.0, 0.19047619047619047, 0.31356338348865387, 10], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 11], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 12], [0.027777777777777783, 0.11111111111111112, 0.088888888888888892, 0.30339641183779581, 13], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 14], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 15], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 16], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 17], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 18], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 19], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 20], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 21], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 22], [-0.16666666666666663, 0.18181818181818182, 0.27272727272727271, 0.20246415864836445, 23], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 24], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 25], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 26], [-0.043478260869565216, 0.0, 0.041666666666666664, 0.37950991789118999, 27], [0.22222222222222221, 0.18181818181818182, 0.0, 0.31701083225750354, 28], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 29], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 30], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 31], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 32], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 33], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 34], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 35], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 36], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 37], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 38], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 39], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 40], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 41], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 42], [0.0, 0.0, 0.0, 0.40000000000000002, 43], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 44], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 45], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 46], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 47]]
--- a/release/python/0.7.0/crankshaft/test/fixtures/moran.json
+++ b/release/python/0.7.0/crankshaft/test/fixtures/moran.json
@ -0,0 +1,52 @@
 [[0.9319096128346788, "HH"],
 [-1.135787401862846, "HL"],
 [0.11732030672508517, "LL"],
 [0.6152779669180425, "LL"],
 [-0.14657336660125297, "LH"],
 [0.6967858120189607, "LL"],
 [0.07949310115714454, "HH"],
 [0.4703198759258987, "HH"],
 [0.4421125200498064, "HH"],
 [0.5724288737143592, "LL"],
 [0.8970743435692062, "LL"],
 [0.18327334401918674, "LL"],
 [-0.01466729201304962, "HL"],
 [0.3481559372544409, "LL"],
 [0.06547094736902978, "LL"],
 [0.15482141569329988, "HH"],
 [0.4373841193538136, "HH"],
 [0.15971286468915544, "LL"],
 [1.0543588860308968, "HH"],
 [1.7372866900020818, "HH"],
 [1.091998586053999, "LL"],
 [0.1171572584252222, "HH"],
 [0.08438455015300014, "LL"],
 [0.06547094736902978, "LL"],
 [0.15482141569329985, "HH"],
 [1.1627044812890683, "HH"],
 [0.06547094736902978, "LL"],
 [0.795275137550483, "HH"],
 [0.18562939195219, "LL"],
 [0.3010757406693439, "LL"],
 [2.8205795942839376, "HH"],
 [0.11259190602909264, "LL"],
 [-0.07116352791516614, "HL"],
 [-0.09945240794119009, "LH"],
 [0.18562939195219, "LL"],
 [0.1832733440191868, "LL"],
 [-0.39054253768447705, "HL"],
 [-0.1672071289487642, "HL"],
 [0.3337669247916343, "HH"],
 [0.2584386102554792, "HH"],
 [-0.19733845476322634, "HL"],
 [-0.9379282899805409, "LH"],
 [-0.028770969951095866, "LH"],
 [0.051367269430983485, "LL"],
 [-0.2172548045913472, "LH"],
 [0.05136726943098351, "LL"],
 [0.04191046803899837, "LL"],
 [0.7482357030403517, "HH"],
 [-0.014585767863118111, "LH"],
 [0.5410013139159929, "HH"],
 [1.0223932668429925, "LL"],
 [1.4179402898927476, "LL"]]
--- a/release/python/0.7.0/crankshaft/test/fixtures/neighbors.json
+++ b/release/python/0.7.0/crankshaft/test/fixtures/neighbors.json
@ -0,0 +1,54 @@
 [
    {"neighbors": [48, 26, 20, 9, 31], "id": 1, "value": 0.5},
    {"neighbors": [30, 16, 46, 3, 4], "id": 2, "value": 0.7},
    {"neighbors": [46, 30, 2, 12, 16], "id": 3, "value": 0.2},
    {"neighbors": [18, 30, 23, 2, 52], "id": 4, "value": 0.1},
    {"neighbors": [47, 40, 45, 37, 28], "id": 5, "value": 0.3},
    {"neighbors": [10, 21, 41, 14, 37], "id": 6, "value": 0.05},
    {"neighbors": [8, 17, 43, 25, 12], "id": 7, "value": 0.4},
    {"neighbors": [17, 25, 43, 22, 7], "id": 8, "value": 0.7},
    {"neighbors": [39, 34, 1, 26, 48], "id": 9, "value": 0.5},
    {"neighbors": [6, 37, 5, 45, 49], "id": 10, "value": 0.04},
    {"neighbors": [51, 41, 29, 21, 14], "id": 11, "value": 0.08},
    {"neighbors": [44, 46, 43, 50, 3], "id": 12, "value": 0.2},
    {"neighbors": [45, 23, 14, 28, 18], "id": 13, "value": 0.4},
    {"neighbors": [41, 29, 13, 23, 6], "id": 14, "value": 0.2},
    {"neighbors": [36, 27, 32, 33, 24], "id": 15, "value": 0.3},
    {"neighbors": [19, 2, 46, 44, 28], "id": 16, "value": 0.4},
    {"neighbors": [8, 25, 43, 7, 22], "id": 17, "value": 0.6},
    {"neighbors": [23, 4, 29, 14, 13], "id": 18, "value": 0.3},
    {"neighbors": [42, 16, 28, 26, 40], "id": 19, "value": 0.7},
    {"neighbors": [1, 48, 31, 26, 42], "id": 20, "value": 0.8},
    {"neighbors": [41, 6, 11, 14, 10], "id": 21, "value": 0.1},
    {"neighbors": [25, 50, 43, 31, 44], "id": 22, "value": 0.4},
    {"neighbors": [18, 13, 14, 4, 2], "id": 23, "value": 0.1},
    {"neighbors": [33, 49, 34, 47, 27], "id": 24, "value": 0.3},
    {"neighbors": [43, 8, 22, 17, 50], "id": 25, "value": 0.4},
    {"neighbors": [1, 42, 20, 31, 48], "id": 26, "value": 0.6},
    {"neighbors": [32, 15, 36, 33, 24], "id": 27, "value": 0.3},
    {"neighbors": [40, 45, 19, 5, 13], "id": 28, "value": 0.8},
    {"neighbors": [11, 51, 41, 14, 18], "id": 29, "value": 0.3},
    {"neighbors": [2, 3, 4, 46, 18], "id": 30, "value": 0.1},
    {"neighbors": [20, 26, 1, 50, 48], "id": 31, "value": 0.9},
    {"neighbors": [27, 36, 15, 49, 24], "id": 32, "value": 0.3},
    {"neighbors": [24, 27, 49, 34, 32], "id": 33, "value": 0.4},
    {"neighbors": [47, 9, 39, 40, 24], "id": 34, "value": 0.3},
    {"neighbors": [38, 51, 11, 21, 41], "id": 35, "value": 0.3},
    {"neighbors": [15, 32, 27, 49, 33], "id": 36, "value": 0.2},
    {"neighbors": [49, 10, 5, 47, 24], "id": 37, "value": 0.5},
    {"neighbors": [35, 21, 51, 11, 41], "id": 38, "value": 0.4},
    {"neighbors": [9, 34, 48, 1, 47], "id": 39, "value": 0.6},
    {"neighbors": [28, 47, 5, 9, 34], "id": 40, "value": 0.5},
    {"neighbors": [11, 14, 29, 21, 6], "id": 41, "value": 0.4},
    {"neighbors": [26, 19, 1, 9, 31], "id": 42, "value": 0.2},
    {"neighbors": [25, 12, 8, 22, 44], "id": 43, "value": 0.3},
    {"neighbors": [12, 50, 46, 16, 43], "id": 44, "value": 0.2},
    {"neighbors": [28, 13, 5, 40, 19], "id": 45, "value": 0.3},
    {"neighbors": [3, 12, 44, 2, 16], "id": 46, "value": 0.2},
    {"neighbors": [34, 40, 5, 49, 24], "id": 47, "value": 0.3},
    {"neighbors": [1, 20, 26, 9, 39], "id": 48, "value": 0.5},
    {"neighbors": [24, 37, 47, 5, 33], "id": 49, "value": 0.2},
    {"neighbors": [44, 22, 31, 42, 26], "id": 50, "value": 0.6},
    {"neighbors": [11, 29, 41, 14, 21], "id": 51, "value": 0.01},
    {"neighbors": [4, 18, 29, 51, 23], "id": 52, "value": 0.01}
  ]
--- a/release/python/0.7.0/crankshaft/test/fixtures/neighbors_getis.json
+++ b/release/python/0.7.0/crankshaft/test/fixtures/neighbors_getis.json
--- a/release/python/0.7.0/crankshaft/test/fixtures/neighbors_markov.json
+++ b/release/python/0.7.0/crankshaft/test/fixtures/neighbors_markov.json
--- a/release/python/0.7.0/crankshaft/test/helper.py
+++ b/release/python/0.7.0/crankshaft/test/helper.py
@ -0,0 +1,13 @@
 import unittest
 from mock_plpy import MockPlPy
 plpy = MockPlPy()
 import sys
 sys.modules['plpy'] = plpy
 import os
 def fixture_file(name):
    dir = os.path.dirname(os.path.realpath(__file__))
    return os.path.join(dir, 'fixtures', name)
--- a/release/python/0.7.0/crankshaft/test/mock_plpy.py
+++ b/release/python/0.7.0/crankshaft/test/mock_plpy.py
@ -0,0 +1,57 @@
 import re
 class MockCursor:
    def __init__(self, data):
        self.cursor_pos = 0
        self.data = data
    def fetch(self, batch_size):
        batch = self.data[self.cursor_pos:self.cursor_pos + batch_size]
        self.cursor_pos += batch_size
        return batch
 class MockPlPy:
    def __init__(self):
        self._reset()
    def _reset(self):
        self.infos = []
        self.notices = []
        self.debugs = []
        self.logs = []
        self.warnings = []
        self.errors = []
        self.fatals = []
        self.executes = []
        self.results = []
        self.prepares = []
        self.results = []
    def _define_result(self, query, result):
        pattern = re.compile(query, re.IGNORECASE | re.MULTILINE)
        self.results.append([pattern, result])
    def notice(self, msg):
        self.notices.append(msg)
    def debug(self, msg):
        self.notices.append(msg)
    def info(self, msg):
        self.infos.append(msg)
    def error(self, msg):
        self.notices.append(msg)
    def cursor(self, query):
        data = self.execute(query)
        return MockCursor(data)
    # TODO: additional arguments
    def execute(self, query):
        for result in self.results:
            if result[0].match(query):
                return result[1]
        return []
--- a/release/python/0.7.0/crankshaft/test/test_clustering_getis.py
+++ b/release/python/0.7.0/crankshaft/test/test_clustering_getis.py
@ -0,0 +1,78 @@
 import unittest
 import numpy as np
 from helper import fixture_file
 from crankshaft.clustering import Getis
 import crankshaft.pysal_utils as pu
 from crankshaft import random_seeds
 import json
 from crankshaft.analysis_data_provider import AnalysisDataProvider
 # Fixture files produced as follows
 #
 # import pysal as ps
 # import numpy as np
 # import random
 #
 # # setup variables
 # f = ps.open(ps.examples.get_path("stl_hom.dbf"))
 # y = np.array(f.by_col['HR8893'])
 # w_queen = ps.queen_from_shapefile(ps.examples.get_path("stl_hom.shp"))
 #
 # out_queen = [{"id": index + 1,
 #               "neighbors": [x+1 for x in w_queen.neighbors[index]],
 #               "value": val} for index, val in enumerate(y)]
 #
 # with open('neighbors_queen_getis.json', 'w') as f:
 #     f.write(str(out_queen))
 #
 # random.seed(1234)
 # np.random.seed(1234)
 # lgstar_queen = ps.esda.getisord.G_Local(y, w_queen, star=True,
 #                                         permutations=999)
 #
 # with open('getis_queen.json', 'w') as f:
 #     f.write(str(zip(lgstar_queen.z_sim,
 #                     lgstar_queen.p_sim, lgstar_queen.p_z_sim)))
 class FakeDataProvider(AnalysisDataProvider):
    def __init__(self, mock_data):
        self.mock_result = mock_data
    def get_getis(self, w_type, param):
        return self.mock_result
 class GetisTest(unittest.TestCase):
    """Testing class for Getis-Ord's G* funtion
       This test replicates the work done in PySAL documentation:
          https://pysal.readthedocs.io/en/v1.11.0/users/tutorials/autocorrelation.html#local-g-and-g
    """
    def setUp(self):
        # load raw data for analysis
        self.neighbors_data = json.loads(
          open(fixture_file('neighbors_getis.json')).read())
        # load pre-computed/known values
        self.getis_data = json.loads(
          open(fixture_file('getis.json')).read())
    def test_getis_ord(self):
        """Test Getis-Ord's G*"""
        data = [{'id': d['id'],
                 'attr1': d['value'],
                 'neighbors': d['neighbors']} for d in self.neighbors_data]
        random_seeds.set_random_seeds(1234)
        getis = Getis(FakeDataProvider(data))
        result = getis.getis_ord('subquery', 'value',
                                 'queen', None, 999, 'the_geom',
                                 'cartodb_id')
        result = [(row[0], row[1]) for row in result]
        expected = np.array(self.getis_data)[:, 0:2]
        for ([res_z, res_p], [exp_z, exp_p]) in zip(result, expected):
            self.assertAlmostEqual(res_z, exp_z, delta=1e-2)
--- a/release/python/0.7.0/crankshaft/test/test_clustering_kmeans.py
+++ b/release/python/0.7.0/crankshaft/test/test_clustering_kmeans.py
@ -0,0 +1,87 @@
 import unittest
 import numpy as np
 from helper import fixture_file
 from crankshaft.clustering import Kmeans
 from crankshaft.analysis_data_provider import AnalysisDataProvider
 import crankshaft.clustering as cc
 from crankshaft import random_seeds
 import json
 from collections import OrderedDict
 class FakeDataProvider(AnalysisDataProvider):
    def __init__(self, mocked_result):
        self.mocked_result = mocked_result
    def get_spatial_kmeans(self, query):
        return self.mocked_result
    def get_nonspatial_kmeans(self, query):
        return self.mocked_result
 class KMeansTest(unittest.TestCase):
    """Testing class for k-means spatial"""
    def setUp(self):
        self.cluster_data = json.loads(
          open(fixture_file('kmeans.json')).read())
        self.params = {"subquery": "select * from table",
                       "no_clusters": "10"}
    def test_kmeans(self):
        """
        """
        data = [{'xs': d['xs'],
                 'ys': d['ys'],
                 'ids': d['ids']} for d in self.cluster_data]
        random_seeds.set_random_seeds(1234)
        kmeans = Kmeans(FakeDataProvider(data))
        clusters = kmeans.spatial('subquery', 2)
        labels = [a[1] for a in clusters]
        c1 = [a for a in clusters if a[1] == 0]
        c2 = [a for a in clusters if a[1] == 1]
        self.assertEqual(len(np.unique(labels)), 2)
        self.assertEqual(len(c1), 20)
        self.assertEqual(len(c2), 20)
 class KMeansNonspatialTest(unittest.TestCase):
    """Testing class for k-means non-spatial"""
    def setUp(self):
        self.params = {"subquery": "SELECT * FROM TABLE",
                       "n_clusters": 5}
    def test_kmeans_nonspatial(self):
        """
            test for k-means non-spatial
        """
        # data from:
        # http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html#sklearn-cluster-kmeans
        data_raw = [OrderedDict([("arr_col1", [1, 1, 1, 4, 4, 4]),
                                 ("arr_col2", [2, 4, 0, 2, 4, 0]),
                                 ("rowid", [1, 2, 3, 4, 5, 6])])]
        random_seeds.set_random_seeds(1234)
        kmeans = Kmeans(FakeDataProvider(data_raw))
        clusters = kmeans.nonspatial('subquery', ['col1', 'col2'], 2)
        cl1 = clusters[0][0]
        cl2 = clusters[3][0]
        for idx, val in enumerate(clusters):
            if idx < 3:
                self.assertEqual(val[0], cl1)
            else:
                self.assertEqual(val[0], cl2)
        # raises exception for no data
        with self.assertRaises(Exception):
            kmeans = Kmeans(FakeDataProvider([]))
            kmeans.nonspatial('subquery', ['col1', 'col2'], 2)
--- a/release/python/0.7.0/crankshaft/test/test_clustering_moran.py
+++ b/release/python/0.7.0/crankshaft/test/test_clustering_moran.py
@ -0,0 +1,112 @@
 import unittest
 import numpy as np
 from helper import fixture_file
 from crankshaft.clustering import Moran
 from crankshaft.analysis_data_provider import AnalysisDataProvider
 import crankshaft.pysal_utils as pu
 from crankshaft import random_seeds
 import json
 from collections import OrderedDict
 class FakeDataProvider(AnalysisDataProvider):
    def __init__(self, mock_data):
        self.mock_result = mock_data
    def get_moran(self, w_type, params):
        return self.mock_result
 class MoranTest(unittest.TestCase):
    """Testing class for Moran's I functions"""
    def setUp(self):
        self.params = {"id_col": "cartodb_id",
                       "attr1": "andy",
                       "attr2": "jay_z",
                       "subquery": "SELECT * FROM a_list",
                       "geom_col": "the_geom",
                       "num_ngbrs": 321}
        self.params_markov = {"id_col": "cartodb_id",
                              "time_cols": ["_2013_dec", "_2014_jan",
                                            "_2014_feb"],
                              "subquery": "SELECT * FROM a_list",
                              "geom_col": "the_geom",
                              "num_ngbrs": 321}
        self.neighbors_data = json.loads(
          open(fixture_file('neighbors.json')).read())
        self.moran_data = json.loads(
          open(fixture_file('moran.json')).read())
    def test_map_quads(self):
        """Test map_quads"""
        from crankshaft.clustering import map_quads
        self.assertEqual(map_quads(1), 'HH')
        self.assertEqual(map_quads(2), 'LH')
        self.assertEqual(map_quads(3), 'LL')
        self.assertEqual(map_quads(4), 'HL')
        self.assertEqual(map_quads(33), None)
        self.assertEqual(map_quads('andy'), None)
    def test_quad_position(self):
        """Test lisa_sig_vals"""
        from crankshaft.clustering import quad_position
        quads = np.array([1, 2, 3, 4], np.int)
        ans = np.array(['HH', 'LH', 'LL', 'HL'])
        test_ans = quad_position(quads)
        self.assertTrue((test_ans == ans).all())
    def test_local_stat(self):
        """Test Moran's I local"""
        data = [OrderedDict([('id', d['id']),
                             ('attr1', d['value']),
                             ('neighbors', d['neighbors'])])
                for d in self.neighbors_data]
        moran = Moran(FakeDataProvider(data))
        random_seeds.set_random_seeds(1234)
        result = moran.local_stat('subquery', 'value',
                                  'knn', 5, 99, 'the_geom', 'cartodb_id')
        result = [(row[0], row[1]) for row in result]
        zipped_values = zip(result, self.moran_data)
        for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values:
            self.assertAlmostEqual(res_val, exp_val)
            self.assertEqual(res_quad, exp_quad)
    def test_moran_local_rate(self):
        """Test Moran's I rate"""
        data = [{'id': d['id'],
                 'attr1': d['value'],
                 'attr2': 1,
                 'neighbors': d['neighbors']} for d in self.neighbors_data]
        random_seeds.set_random_seeds(1234)
        moran = Moran(FakeDataProvider(data))
        result = moran.local_rate_stat('subquery', 'numerator', 'denominator',
                                       'knn', 5, 99, 'the_geom', 'cartodb_id')
        result = [(row[0], row[1]) for row in result]
        zipped_values = zip(result, self.moran_data)
        for ([res_val, res_quad], [exp_val, exp_quad]) in zipped_values:
            self.assertAlmostEqual(res_val, exp_val)
    def test_moran(self):
        """Test Moran's I global"""
        data = [{'id': d['id'],
                 'attr1': d['value'],
                 'neighbors': d['neighbors']} for d in self.neighbors_data]
        random_seeds.set_random_seeds(1235)
        moran = Moran(FakeDataProvider(data))
        result = moran.global_stat('table', 'value',
                                   'knn', 5, 99, 'the_geom',
                                   'cartodb_id')
        result_moran = result[0][0]
        expected_moran = np.array([row[0] for row in self.moran_data]).mean()
        self.assertAlmostEqual(expected_moran, result_moran, delta=10e-2)
--- a/release/python/0.7.0/crankshaft/test/test_pysal_utils.py
+++ b/release/python/0.7.0/crankshaft/test/test_pysal_utils.py
@ -0,0 +1,83 @@
 import unittest
 import crankshaft.pysal_utils as pu
 from crankshaft import random_seeds
 from collections import OrderedDict
 class PysalUtilsTest(unittest.TestCase):
    """Testing class for utility functions related to PySAL integrations"""
    def setUp(self):
        self.params1 = OrderedDict([("id_col", "cartodb_id"),
                                    ("attr1", "andy"),
                                    ("attr2", "jay_z"),
                                    ("subquery", "SELECT * FROM a_list"),
                                    ("geom_col", "the_geom"),
                                    ("num_ngbrs", 321)])
        self.params2 = OrderedDict([("id_col", "cartodb_id"),
                                    ("numerator", "price"),
                                    ("denominator", "sq_meters"),
                                    ("subquery", "SELECT * FROM pecan"),
                                    ("geom_col", "the_geom"),
                                    ("num_ngbrs", 321)])
        self.params3 = OrderedDict([("id_col", "cartodb_id"),
                                    ("numerator", "sq_meters"),
                                    ("denominator", "price"),
                                    ("subquery", "SELECT * FROM pecan"),
                                    ("geom_col", "the_geom"),
                                    ("num_ngbrs", 321)])
        self.params_array = {"id_col": "cartodb_id",
                             "time_cols": ["_2013_dec", "_2014_jan", "_2014_feb"],
                             "subquery": "SELECT * FROM a_list",
                             "geom_col": "the_geom",
                             "num_ngbrs": 321}
    def test_query_attr_select(self):
        """Test query_attr_select"""
        ans1 = ("i.\"andy\"::numeric As attr1, "
                "i.\"jay_z\"::numeric As attr2, ")
        ans2 = ("i.\"price\"::numeric As attr1, "
                "i.\"sq_meters\"::numeric As attr2, ")
        ans3 = ("i.\"sq_meters\"::numeric As attr1, "
                "i.\"price\"::numeric As attr2, ")
        ans_array = ("i.\"_2013_dec\"::numeric As attr1, "
                     "i.\"_2014_jan\"::numeric As attr2, "
                     "i.\"_2014_feb\"::numeric As attr3, ")
        self.assertEqual(pu.query_attr_select(self.params1), ans1)
        self.assertEqual(pu.query_attr_select(self.params2), ans2)
        self.assertEqual(pu.query_attr_select(self.params3), ans3)
        self.assertEqual(pu.query_attr_select(self.params_array), ans_array)
    def test_query_attr_where(self):
        """Test pu.query_attr_where"""
        ans1 = ("idx_replace.\"andy\" IS NOT NULL AND "
                "idx_replace.\"jay_z\" IS NOT NULL")
        ans_array = ("idx_replace.\"_2013_dec\" IS NOT NULL AND "
                     "idx_replace.\"_2014_jan\" IS NOT NULL AND "
                     "idx_replace.\"_2014_feb\" IS NOT NULL")
        self.assertEqual(pu.query_attr_where(self.params1), ans1)
        self.assertEqual(pu.query_attr_where(self.params_array), ans_array)
    def test_get_attributes(self):
        """Test get_attributes"""
        # need to add tests
        self.assertEqual(True, True)
    def test_get_weight(self):
        """Test get_weight"""
        self.assertEqual(True, True)
--- a/release/python/0.7.0/crankshaft/test/test_regression_gwr.py
+++ b/release/python/0.7.0/crankshaft/test/test_regression_gwr.py
@ -0,0 +1,130 @@
 import unittest
 import json
 import numpy as np
 from crankshaft import random_seeds
 from helper import fixture_file
 from crankshaft.regression import GWR
 from crankshaft.analysis_data_provider import AnalysisDataProvider
 class FakeDataProvider(AnalysisDataProvider):
    def __init__(self, mocked_result):
        self.mocked_result = mocked_result
    def get_gwr(self, params):
        return self.mocked_result
    def get_gwr_predict(self, params):
        return self.mocked_result
 class GWRTest(unittest.TestCase):
    """Testing class for geographically weighted regression (gwr)"""
    def setUp(self):
        """
            fixture packed from canonical GWR georgia dataset using the
            following query:
                SELECT array_agg(x) As x,
                       array_agg(y) As y,
                       array_agg(pctbach) As dep_var,
                       array_agg(pctrural) As attr1,
                       array_agg(pctpov) As attr2,
                       array_agg(pctblack) As attr3,
                       array_agg(areakey) As rowid
                FROM g_utm
                WHERE pctbach is not NULL AND
                      pctrural IS NOT NULL AND
                      pctpov IS NOT NULL AND
                      pctblack IS NOT NULL
        """
        import copy
        # data packed from https://github.com/TaylorOshan/pysal/blob/1d6af33bda46b1d623f70912c56155064463383f/pysal/examples/georgia/GData_utm.csv
        self.data = json.loads(
              open(fixture_file('gwr_packed_data.json')).read())
        # data packed from https://github.com/TaylorOshan/pysal/blob/a44c5541e2e0d10a99ff05edc1b7f81b70f5a82f/pysal/examples/georgia/georgia_BS_NN_listwise.csv
        self.knowns = json.loads(
              open(fixture_file('gwr_packed_knowns.json')).read())
        # data for GWR prediction
        self.data_predict = copy.deepcopy(self.data)
        self.ids_of_unknowns = [13083, 13009, 13281, 13115, 13247, 13169]
        self.idx_ids_of_unknowns = [self.data_predict[0]['rowid'].index(idx)
                                    for idx in self.ids_of_unknowns]
        for idx in self.idx_ids_of_unknowns:
            self.data_predict[0]['dep_var'][idx] = None
        self.predicted_knowns = {13009: 10.879,
                                 13083: 4.5259,
                                 13115: 9.4022,
                                 13169: 6.0793,
                                 13247: 8.1608,
                                 13281: 13.886}
        # params, with ind_vars in same ordering as query above
        self.params = {'subquery': 'select * from table',
                       'dep_var': 'pctbach',
                       'ind_vars': ['pctrural', 'pctpov', 'pctblack'],
                       'bw': 90.000,
                       'fixed': False,
                       'geom_col': 'the_geom',
                       'id_col': 'areakey'}
    def test_gwr(self):
        """
        """
        gwr = GWR(FakeDataProvider(self.data))
        gwr_resp = gwr.gwr(self.params['subquery'],
                           self.params['dep_var'],
                           self.params['ind_vars'],
                           bw=self.params['bw'],
                           fixed=self.params['fixed'])
        # unpack response
        coeffs, stand_errs, t_vals, t_vals_filtered, predicteds, \
            residuals, r_squareds, bws, rowids = zip(*gwr_resp)
        # prepare for comparision
        coeff_known_pctpov = self.knowns['est_pctpov']
        tval_known_pctblack = self.knowns['t_pctrural']
        pctpov_se = self.knowns['se_pctpov']
        ids = self.knowns['area_key']
        resp_idx = None
        # test pctpov coefficient estimates
        for idx, val in enumerate(coeff_known_pctpov):
            resp_idx = rowids.index(ids[idx])
            self.assertAlmostEquals(val,
                                    json.loads(coeffs[resp_idx])['pctpov'],
                                    places=4)
        # test pctrural tvals
        for idx, val in enumerate(tval_known_pctblack):
            resp_idx = rowids.index(ids[idx])
            self.assertAlmostEquals(val,
                                    json.loads(t_vals[resp_idx])['pctrural'],
                                    places=4)
    def test_gwr_predict(self):
        """Testing for GWR_Predict"""
        gwr = GWR(FakeDataProvider(self.data_predict))
        gwr_resp = gwr.gwr_predict(self.params['subquery'],
                                   self.params['dep_var'],
                                   self.params['ind_vars'],
                                   bw=self.params['bw'],
                                   fixed=self.params['fixed'])
        # unpack response
        coeffs, stand_errs, t_vals, \
            r_squareds, predicteds, rowid = zip(*gwr_resp)
        threshold = 0.01
        for i, idx in enumerate(self.idx_ids_of_unknowns):
            known_val = self.predicted_knowns[rowid[i]]
            predicted_val = predicteds[i]
            test_val = abs(known_val - predicted_val) / known_val
            self.assertTrue(test_val < threshold)
--- a/release/python/0.7.0/crankshaft/test/test_segmentation.py
+++ b/release/python/0.7.0/crankshaft/test/test_segmentation.py
@ -0,0 +1,64 @@
 import unittest
 import numpy as np
 from helper import plpy, fixture_file
 import crankshaft.segmentation as segmentation
 import json
 class SegmentationTest(unittest.TestCase):
    """Testing class for Moran's I functions"""
    def setUp(self):
        plpy._reset()
    def generate_random_data(self,n_samples,random_state,  row_type=False):
        x1 = random_state.uniform(size=n_samples)
        x2 = random_state.uniform(size=n_samples)
        x3 = random_state.randint(0, 4, size=n_samples)
        y = x1+x2*x2+x3
        cartodb_id  = range(len(x1))
        if row_type:
            return [ {'features': vals} for vals in zip(x1,x2,x3)], y
        else:
            return  [dict( zip(['x1','x2','x3','target', 'cartodb_id'],[x1,x2,x3,y,cartodb_id]))]
    def test_replace_nan_with_mean(self):
        test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan])
    def test_create_and_predict_segment(self):
        n_samples = 1000
        random_state_train = np.random.RandomState(13)
        random_state_test = np.random.RandomState(134)
        training_data = self.generate_random_data(n_samples, random_state_train)
        test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True)
        ids =  [{'cartodb_ids': range(len(test_data))}]
        rows =  [{'x1': 0,'x2':0,'x3':0,'y':0,'cartodb_id':0}]
        plpy._define_result('select \* from  \(select \* from training\) a  limit 1',rows)
        plpy._define_result('.*from \(select \* from training\) as a' ,training_data)
        plpy._define_result('select array_agg\(cartodb\_id order by cartodb\_id\) as cartodb_ids from \(.*\) a',ids)
        plpy._define_result('.*select \* from test.*' ,test_data)
        model_parameters =  {'n_estimators': 1200,
                             'max_depth': 3,
                             'subsample' : 0.5,
                             'learning_rate': 0.01,
                             'min_samples_leaf': 1}
        result = segmentation.create_and_predict_segment(
                'select * from training',
                'target',
                'select * from test',
                model_parameters)
        prediction = [r[1] for r in result]
        accuracy =np.sqrt(np.mean( np.square( np.array(prediction) - np.array(test_y))))
        self.assertEqual(len(result),len(test_data))
        self.assertTrue( result[0][2] < 0.01)
        self.assertTrue( accuracy < 0.5*np.mean(test_y)  )
--- a/release/python/0.7.0/crankshaft/test/test_space_time_dynamics.py
+++ b/release/python/0.7.0/crankshaft/test/test_space_time_dynamics.py
@ -0,0 +1,349 @@
 import unittest
 import numpy as np
 import unittest
 from helper import fixture_file
 from crankshaft.space_time_dynamics import Markov
 import crankshaft.space_time_dynamics as std
 from crankshaft import random_seeds
 from crankshaft.analysis_data_provider import AnalysisDataProvider
 import json
 class FakeDataProvider(AnalysisDataProvider):
    def __init__(self, data):
        self.mock_result = data
    def get_markov(self, w_type, params):
        return self.mock_result
 class SpaceTimeTests(unittest.TestCase):
    """Testing class for Markov Functions."""
    def setUp(self):
        self.params = {"id_col": "cartodb_id",
                       "time_cols": ['dec_2013', 'jan_2014', 'feb_2014'],
                       "subquery": "SELECT * FROM a_list",
                       "geom_col": "the_geom",
                       "num_ngbrs": 321}
        self.neighbors_data = json.loads(
          open(fixture_file('neighbors_markov.json')).read())
        self.markov_data = json.loads(open(fixture_file('markov.json')).read())
        self.time_data = np.array([i * np.ones(10, dtype=float)
                                   for i in range(10)]).T
        self.transition_matrix = np.array([
                [[0.96341463, 0.0304878, 0.00609756, 0., 0.],
                 [0.06040268, 0.83221477, 0.10738255, 0., 0.],
                 [0., 0.14, 0.74, 0.12, 0.],
                 [0., 0.03571429, 0.32142857, 0.57142857, 0.07142857],
                 [0., 0., 0., 0.16666667, 0.83333333]],
                [[0.79831933, 0.16806723, 0.03361345, 0., 0.],
                 [0.0754717, 0.88207547, 0.04245283, 0., 0.],
                 [0.00537634, 0.06989247, 0.8655914, 0.05913978, 0.],
                 [0., 0., 0.06372549, 0.90196078, 0.03431373],
                 [0., 0., 0., 0.19444444, 0.80555556]],
                [[0.84693878, 0.15306122, 0., 0., 0.],
                 [0.08133971, 0.78947368, 0.1291866, 0., 0.],
                 [0.00518135, 0.0984456, 0.79274611, 0.0984456, 0.00518135],
                 [0., 0., 0.09411765, 0.87058824, 0.03529412],
                 [0., 0., 0., 0.10204082, 0.89795918]],
                [[0.8852459, 0.09836066, 0., 0.01639344, 0.],
                 [0.03875969, 0.81395349, 0.13953488, 0., 0.00775194],
                 [0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
                 [0., 0.02339181, 0.12865497, 0.75438596, 0.09356725],
                 [0., 0., 0., 0.09661836, 0.90338164]],
                [[0.33333333, 0.66666667, 0., 0., 0.],
                 [0.0483871, 0.77419355, 0.16129032, 0.01612903, 0.],
                 [0.01149425, 0.16091954, 0.74712644, 0.08045977, 0.],
                 [0., 0.01036269, 0.06217617, 0.89637306, 0.03108808],
                 [0., 0., 0., 0.02352941, 0.97647059]]]
                 )
    def test_spatial_markov(self):
        """Test Spatial Markov."""
        data = [{'id': d['id'],
                 'attr1': d['y1995'],
                 'attr2': d['y1996'],
                 'attr3': d['y1997'],
                 'attr4': d['y1998'],
                 'attr5': d['y1999'],
                 'attr6': d['y2000'],
                 'attr7': d['y2001'],
                 'attr8': d['y2002'],
                 'attr9': d['y2003'],
                 'attr10': d['y2004'],
                 'attr11': d['y2005'],
                 'attr12': d['y2006'],
                 'attr13': d['y2007'],
                 'attr14': d['y2008'],
                 'attr15': d['y2009'],
                 'neighbors': d['neighbors']} for d in self.neighbors_data]
        # print(str(data[0]))
        markov = Markov(FakeDataProvider(data))
        random_seeds.set_random_seeds(1234)
        result = markov.spatial_trend('subquery',
                                      ['y1995', 'y1996', 'y1997', 'y1998',
                                       'y1999', 'y2000', 'y2001', 'y2002',
                                       'y2003', 'y2004', 'y2005', 'y2006',
                                       'y2007', 'y2008', 'y2009'],
                                      5, 'knn', 5, 0, 'the_geom',
                                      'cartodb_id')
        self.assertTrue(result is not None)
        result = [(row[0], row[1], row[2], row[3], row[4]) for row in result]
        print result[0]
        expected = self.markov_data
        for ([res_trend, res_up, res_down, res_vol, res_id],
             [exp_trend, exp_up, exp_down, exp_vol, exp_id]
             ) in zip(result, expected):
            self.assertAlmostEqual(res_trend, exp_trend)
    def test_get_time_data(self):
        """Test get_time_data"""
        data = [{'attr1': d['y1995'],
                 'attr2': d['y1996'],
                 'attr3': d['y1997'],
                 'attr4': d['y1998'],
                 'attr5': d['y1999'],
                 'attr6': d['y2000'],
                 'attr7': d['y2001'],
                 'attr8': d['y2002'],
                 'attr9': d['y2003'],
                 'attr10': d['y2004'],
                 'attr11': d['y2005'],
                 'attr12': d['y2006'],
                 'attr13': d['y2007'],
                 'attr14': d['y2008'],
                 'attr15': d['y2009']} for d in self.neighbors_data]
        result = std.get_time_data(data, ['y1995', 'y1996', 'y1997', 'y1998',
                                          'y1999', 'y2000', 'y2001', 'y2002',
                                          'y2003', 'y2004', 'y2005', 'y2006',
                                          'y2007', 'y2008', 'y2009'])
        # expected was prepared from PySAL example:
        # f = ps.open(ps.examples.get_path("usjoin.csv"))
        # pci = np.array([f.by_col[str(y)]
        #                 for y in range(1995, 2010)]).transpose()
        # rpci = pci / (pci.mean(axis = 0))
        expected = np.array(
          [[0.87654416, 0.863147, 0.85637567, 0.84811668, 0.8446154,
            0.83271652,  0.83786314, 0.85012593, 0.85509656, 0.86416612,
            0.87119375, 0.86302631,  0.86148267, 0.86252252, 0.86746356],
           [0.9188951,  0.91757931, 0.92333258, 0.92517289, 0.92552388,
            0.90746978,  0.89830489, 0.89431991, 0.88924794, 0.89815176,
            0.91832091, 0.91706054,  0.90139505, 0.87897455, 0.86216858],
           [0.82591007, 0.82548596, 0.81989793, 0.81503235, 0.81731522,
            0.78964559,  0.80584442, 0.8084998,  0.82258551, 0.82668196,
            0.82373724, 0.81814804,  0.83675961, 0.83574199, 0.84647177],
           [1.09088176, 1.08537689, 1.08456418, 1.08415404, 1.09898841,
            1.14506948,  1.12151133, 1.11160697, 1.10888621, 1.11399806,
            1.12168029, 1.13164797,  1.12958508, 1.11371818, 1.09936775],
           [1.10731446, 1.11373944, 1.13283638, 1.14472559, 1.15910025,
            1.16898201,  1.17212488, 1.14752303, 1.11843284, 1.11024964,
            1.11943471, 1.11736468,  1.10863242, 1.09642516, 1.07762337],
           [1.42269757, 1.42118434, 1.44273502, 1.43577571, 1.44400684,
            1.44184737,  1.44782832, 1.41978227, 1.39092208, 1.4059372,
            1.40788646, 1.44052766,  1.45241216, 1.43306098, 1.4174431],
           [1.13073885, 1.13110513, 1.11074708, 1.13364636, 1.13088149,
            1.10888138,  1.11856629, 1.13062931, 1.11944984, 1.12446239,
            1.11671008, 1.10880034,  1.08401709, 1.06959206, 1.07875225],
           [1.04706124, 1.04516831, 1.04253372, 1.03239987, 1.02072545,
            0.99854316,  0.9880258,  0.99669587, 0.99327676, 1.01400905,
            1.03176742, 1.040511,  1.01749645, 0.9936394,  0.98279746],
           [0.98996986, 1.00143564, 0.99491,  1.00188408, 1.00455845,
            0.99127006,  0.97925917, 0.9683482,  0.95335147, 0.93694787,
            0.94308213, 0.92232874,  0.91284091, 0.89689833, 0.88928858],
           [0.87418391, 0.86416601, 0.84425695, 0.8404494,  0.83903044,
            0.8578708,  0.86036185, 0.86107306, 0.8500772,  0.86981998,
            0.86837929, 0.87204141,  0.86633032, 0.84946077, 0.83287146],
           [1.14196118, 1.14660262, 1.14892712, 1.14909594, 1.14436624,
            1.14450183,  1.12349752, 1.12596664, 1.12213996, 1.1119989,
            1.10257792, 1.10491258,  1.11059842, 1.10509795, 1.10020097],
           [0.97282463, 0.96700147, 0.96252588, 0.9653878,  0.96057687,
            0.95831051,  0.94480909, 0.94804195, 0.95430286, 0.94103989,
            0.92122519, 0.91010201,  0.89280392, 0.89298243, 0.89165385],
           [0.94325468, 0.96436902, 0.96455242, 0.95243009, 0.94117647,
            0.9480927,  0.93539182, 0.95388718, 0.94597005, 0.96918424,
            0.94781281, 0.93466815,  0.94281559, 0.96520315, 0.96715441],
           [0.97478408, 0.98169225, 0.98712809, 0.98474769, 0.98559897,
            0.98687073,  0.99237486, 0.98209969, 0.9877653,  0.97399471,
            0.96910087, 0.98416665,  0.98423613, 0.99823861, 0.99545704],
           [0.85570269, 0.85575915, 0.85986132, 0.85693406, 0.8538012,
            0.86191535,  0.84981451, 0.85472102, 0.84564835, 0.83998883,
            0.83478547, 0.82803648,  0.8198736,  0.82265395, 0.8399404],
           [0.87022047, 0.85996258, 0.85961813, 0.85689572, 0.83947136,
            0.82785597,  0.86008789, 0.86776298, 0.86720209, 0.8676334,
            0.89179317, 0.94202108,  0.9422231,  0.93902708, 0.94479184],
           [0.90134907, 0.90407738, 0.90403991, 0.90201769, 0.90399238,
            0.90906632,  0.92693339, 0.93695966, 0.94242697, 0.94338265,
            0.91981796, 0.91108804,  0.90543476, 0.91737138, 0.94793657],
           [1.1977611,  1.18222564, 1.18439158, 1.18267865, 1.19286723,
            1.20172869,  1.21328691, 1.22624778, 1.22397075, 1.23857042,
            1.24419893, 1.23929384,  1.23418676, 1.23626739, 1.26754398],
           [1.24919678, 1.25754773, 1.26991161, 1.28020651, 1.30625667,
            1.34790023,  1.34399863, 1.32575181, 1.30795492, 1.30544841,
            1.30303302, 1.32107766,  1.32936244, 1.33001241, 1.33288462],
           [1.06768004, 1.03799276, 1.03637303, 1.02768449, 1.03296093,
            1.05059016,  1.03405057, 1.02747623, 1.03162734, 0.9961416,
            0.97356208, 0.94241549,  0.92754547, 0.92549227, 0.92138102],
           [1.09475614, 1.11526796, 1.11654299, 1.13103948, 1.13143264,
            1.13889622,  1.12442212, 1.13367018, 1.13982256, 1.14029944,
            1.11979401, 1.10905389,  1.10577769, 1.11166825, 1.09985155],
           [0.76530058, 0.76612841, 0.76542451, 0.76722683, 0.76014284,
            0.74480073,  0.76098396, 0.76156903, 0.76651952, 0.76533288,
            0.78205934, 0.76842416,  0.77487118, 0.77768683, 0.78801192],
           [0.98391336, 0.98075816, 0.98295341, 0.97386015, 0.96913803,
            0.97370819,  0.96419154, 0.97209861, 0.97441313, 0.96356162,
            0.94745352, 0.93965462,  0.93069645, 0.94020973, 0.94358232],
           [0.83561828, 0.82298088, 0.81738502, 0.81748588, 0.80904801,
            0.80071489,  0.83358256, 0.83451613, 0.85175032, 0.85954307,
            0.86790024, 0.87170334,  0.87863799, 0.87497981, 0.87888675],
           [0.98845573, 1.02092428, 0.99665283, 0.99141823, 0.99386619,
            0.98733195,  0.99644997, 0.99669587, 1.02559097, 1.01116651,
            0.99988024, 0.97906749,  0.99323123, 1.00204939, 0.99602148],
           [1.14930913, 1.15241949, 1.14300962, 1.14265542, 1.13984683,
            1.08312397,  1.05192626, 1.04230892, 1.05577278, 1.08569751,
            1.12443486, 1.08891079,  1.08603695, 1.05997314, 1.02160943],
           [1.11368269, 1.1057147,  1.11893431, 1.13778669, 1.1432272,
            1.18257029,  1.16226243, 1.16009196, 1.14467789, 1.14820235,
            1.12386598, 1.12680236,  1.12357937, 1.1159258,  1.12570828],
           [1.30379431, 1.30752186, 1.31206366, 1.31532267, 1.30625667,
            1.31210239,  1.29989156, 1.29203193, 1.27183516, 1.26830786,
            1.2617743,  1.28656675,  1.29734097, 1.29390205, 1.29345446],
           [0.83953719, 0.82701448, 0.82006005, 0.81188876, 0.80294864,
            0.78772975,  0.82848011, 0.8259679,  0.82435705, 0.83108634,
            0.84373784, 0.83891093,  0.84349247, 0.85637272, 0.86539395],
           [1.23450087, 1.2426022,  1.23537935, 1.23581293, 1.24522626,
            1.2256767,  1.21126648, 1.19377804, 1.18355337, 1.19674434,
            1.21536573, 1.23653297,  1.27962009, 1.27968392, 1.25907738],
           [0.9769662,  0.97400719, 0.98035944, 0.97581531, 0.95543282,
            0.96480308,  0.94686376, 0.93679073, 0.92540049, 0.92988835,
            0.93442917, 0.92100464,  0.91475304, 0.90249622, 0.9021363],
           [0.84986886, 0.8986851,  0.84295997, 0.87280534, 0.85659368,
            0.88937573,  0.894401, 0.90448993, 0.95495898, 0.92698333,
            0.94745352, 0.92562488,  0.96635366, 1.02520312, 1.0394296],
           [1.01922808, 1.00258203, 1.00974428, 1.00303417, 0.99765073,
            1.00759019,  0.99192968, 0.99747298, 0.99550759, 0.97583768,
            0.9610168,  0.94779638,  0.93759089, 0.93353431, 0.94121705],
           [0.86367411, 0.85558932, 0.85544346, 0.85103025, 0.84336613,
            0.83434854,  0.85813595, 0.84667961, 0.84374558, 0.85951183,
            0.87194227, 0.89455097,  0.88283929, 0.90349491, 0.90600675],
           [1.00947534, 1.00411055, 1.00698819, 0.99513687, 0.99291086,
            1.00581626,  0.98850522, 0.99291168, 0.98983209, 0.97511924,
            0.96134615, 0.96382634,  0.95011401, 0.9434686,  0.94637765],
           [1.05712571, 1.05459419, 1.05753012, 1.04880786, 1.05103857,
            1.04800023,  1.03024941, 1.04200483, 1.0402554,  1.03296979,
            1.02191682, 1.02476275,  1.02347523, 1.02517684, 1.04359571],
           [1.07084189, 1.06669497, 1.07937623, 1.07387988, 1.0794043,
            1.0531801,  1.07452771, 1.09383478, 1.1052447,  1.10322136,
            1.09167939, 1.08772756,  1.08859544, 1.09177338, 1.1096083],
           [0.86719222, 0.86628896, 0.86675156, 0.86425632, 0.86511809,
            0.86287327,  0.85169796, 0.85411285, 0.84886336, 0.84517414,
            0.84843858, 0.84488343,  0.83374329, 0.82812044, 0.82878599],
           [0.88389211, 0.92288667, 0.90282398, 0.91229186, 0.92023286,
            0.92652175,  0.94278865, 0.93682452, 0.98655146, 0.992237,
            0.9798497,  0.93869677,  0.96947771, 1.00362626, 0.98102351],
           [0.97082064, 0.95320233, 0.94534081, 0.94215593, 0.93967,
            0.93092109,  0.92662519, 0.93412152, 0.93501274, 0.92879506,
            0.92110542, 0.91035556,  0.90430364, 0.89994694, 0.90073864],
           [0.95861858, 0.95774543, 0.98254811, 0.98919472, 0.98684824,
            0.98882205,  0.97662234, 0.95601578, 0.94905385, 0.94934888,
            0.97152609, 0.97163004,  0.9700702,  0.97158948, 0.95884908],
           [0.83980439, 0.84726737, 0.85747,  0.85467221, 0.8556751,
            0.84818516,  0.85265681, 0.84502402, 0.82645665, 0.81743586,
            0.83550406, 0.83338919,  0.83511679, 0.82136617, 0.80921874],
           [0.95118156, 0.9466212,  0.94688098, 0.9508583,  0.9512441,
            0.95440787,  0.96364363, 0.96804412, 0.97136214, 0.97583768,
            0.95571724, 0.96895368,  0.97001634, 0.97082733, 0.98782366],
           [1.08910044, 1.08248968, 1.08492895, 1.08656923, 1.09454249,
            1.10558188,  1.1214086,  1.12292577, 1.13021031, 1.13342735,
            1.14686068, 1.14502975,  1.14474747, 1.14084037, 1.16142926],
           [1.06336033, 1.07365823, 1.08691496, 1.09764846, 1.11669863,
            1.11856702,  1.09764283, 1.08815849, 1.08044313, 1.09278827,
            1.07003204, 1.08398066,  1.09831768, 1.09298232, 1.09176125],
           [0.79772065, 0.78829196, 0.78581151, 0.77615922, 0.77035744,
            0.77751194,  0.79902974, 0.81437881, 0.80788828, 0.79603865,
            0.78966436, 0.79949807,  0.80172182, 0.82168155, 0.85587911],
           [1.0052447,  1.00007696, 1.00475899, 1.00613942, 1.00639561,
            1.00162979,  0.99860739, 1.00814981, 1.00574316, 0.99030032,
            0.97682565, 0.97292596,  0.96519561, 0.96173403, 0.95890284],
           [0.95808419, 0.9382568,  0.9654441,  0.95561201, 0.96987289,
            0.96608031,  0.99727185, 1.00781194, 1.03484236, 1.05333619,
            1.0983263,  1.1704974,  1.17025154, 1.18730553, 1.14242645]])
        self.assertTrue(np.allclose(result, expected))
        self.assertTrue(type(result) == type(expected))
        self.assertTrue(result.shape == expected.shape)
    def test_rebin_data(self):
        """Test rebin_data"""
        # sample in double the time (even case since 10 % 2 = 0):
        #   (0+1)/2, (2+3)/2, (4+5)/2, (6+7)/2, (8+9)/2
        # = 0.5,     2.5,     4.5,     6.5,     8.5
        ans_even = np.array([(i + 0.5) * np.ones(10, dtype=float)
                             for i in range(0, 10, 2)]).T
        self.assertTrue(
          np.array_equal(std.rebin_data(self.time_data, 2), ans_even))
        # sample in triple the time (uneven since 10 % 3 = 1):
        #   (0+1+2)/3, (3+4+5)/3, (6+7+8)/3, (9)/1
        # = 1,         4,         7,         9
        ans_odd = np.array([i * np.ones(10, dtype=float)
                            for i in (1, 4, 7, 9)]).T
        self.assertTrue(
          np.array_equal(std.rebin_data(self.time_data, 3), ans_odd))
    def test_get_prob_dist(self):
        """Test get_prob_dist"""
        lag_indices = np.array([1, 2, 3, 4])
        unit_indices = np.array([1, 3, 2, 4])
        answer = np.array([
            [0.0754717, 0.88207547, 0.04245283, 0., 0.],
            [0., 0., 0.09411765, 0.87058824, 0.03529412],
            [0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
            [0., 0., 0., 0.02352941, 0.97647059]
        ])
        result = std.get_prob_dist(self.transition_matrix,
                                   lag_indices, unit_indices)
        self.assertTrue(np.array_equal(result, answer))
    def test_get_prob_stats(self):
        """Test get_prob_stats"""
        probs = np.array([
            [0.0754717, 0.88207547, 0.04245283, 0., 0.],
            [0., 0., 0.09411765, 0.87058824, 0.03529412],
            [0.0049505, 0.09405941, 0.77722772, 0.11881188, 0.0049505],
            [0., 0., 0., 0.02352941, 0.97647059]
        ])
        unit_indices = np.array([1, 3, 2, 4])
        answer_up = np.array([0.04245283, 0.03529412, 0.12376238, 0.])
        answer_down = np.array([0.0754717, 0.09411765, 0.0990099, 0.02352941])
        answer_trend = np.array([-0.03301887 / 0.88207547,
                                 -0.05882353 / 0.87058824,
                                 0.02475248 / 0.77722772,
                                 -0.02352941 / 0.97647059])
        answer_volatility = np.array([0.34221495,  0.33705421,
                                      0.29226542,  0.38834223])
        result = std.get_prob_stats(probs, unit_indices)
        result_up = result[0]
        result_down = result[1]
        result_trend = result[2]
        result_volatility = result[3]
        self.assertTrue(np.allclose(result_up, answer_up))
        self.assertTrue(np.allclose(result_down, answer_down))
        self.assertTrue(np.allclose(result_trend, answer_trend))
        self.assertTrue(np.allclose(result_volatility, answer_volatility))
--- a/src/pg/crankshaft.control
+++ b/src/pg/crankshaft.control
@ -1,5 +1,5 @@
 comment = 'CartoDB Spatial Analysis extension'
-default_version = '0.6.1'
+default_version = '0.7.0'
 requires = 'plpythonu, postgis'
 superuser = true
 schema = cdb_crankshaft
		`@ -0,0 +1,2 @@`
							`"""Import all functions for pysal_utils"""`
							`from crankshaft.pysal_utils.pysal_utils import *`
		`@ -0,0 +1,2 @@`
							`"""Import all functions from clustering libraries."""`
							`from markov import *`
		`@ -0,0 +1 @@`
							[[0.004793783909323601, 0.17999999999999999, 0.49808756424021061], [-1.0701189472090842, 0.079000000000000001, 0.14228288580832316], [-0.67867750971877305, 0.42099999999999999, 0.24867110969448558], [-0.67407386707620487, 0.246, 0.25013217644612995], [-0.79495689068870035, 0.33200000000000002, 0.21331928959090596], [-0.49279481022182703, 0.058999999999999997, 0.31107878905057329], [-0.38075627530057132, 0.28399999999999997, 0.35169205342069643], [-0.86710921611314895, 0.23699999999999999, 0.19294108571294855], [-0.78618647240956485, 0.050000000000000003, 0.2158791250244505], [-0.76108527223116984, 0.064000000000000001, 0.22330306830813684], [-0.13340753531942209, 0.247, 0.44693554317763651], [-0.57584545722033043, 0.48999999999999999, 0.28235982246156488], [-0.78882694661192831, 0.433, 0.2151065788731219], [-0.38769767950046219, 0.375, 0.34911988661484239], [-0.56057819488052207, 0.41399999999999998, 0.28754255985169652], [-0.41354017495644935, 0.45500000000000002, 0.339605447117173], [-0.23993577722243081, 0.49099999999999999, 0.40519002230969337], [-0.1389080156677496, 0.40400000000000003, 0.44476141839645233], [-0.25485737510500855, 0.376, 0.39941662953554224], [-0.71218610582902353, 0.17399999999999999, 0.23817476979886087], [-0.54533105995872144, 0.13700000000000001, 0.2927629228714812], [-0.39547917847510977, 0.033000000000000002, 0.34624464252424236], [-0.43052658996257548, 0.35399999999999998, 0.33340631435564982], [-0.37296719193774736, 0.40300000000000002, 0.35458643102865428], [-0.66482612169465694, 0.31900000000000001, 0.25308085650392698], [-0.13772133540823422, 0.34699999999999998, 0.44523032843016275], [-0.6765304487868502, 0.20999999999999999, 0.24935196033890672], [-0.64518763494323472, 0.32200000000000001, 0.25940279912025543], [-0.5078622084312413, 0.41099999999999998, 0.30577498972600159], [-0.12652006733772059, 0.42899999999999999, 0.44966013262301163], [-0.32691133022814595, 0.498, 0.37186747562269029], [0.25533848511500978, 0.42399999999999999, 0.39923083899077472], [2.7045138116476508, 0.0050000000000000001, 0.0034202212972238577], [-0.1551614486076057, 0.44400000000000001, 0.43834701985429037], [1.9524487722567723, 0.012999999999999999, 0.025442473674991528], [-1.2055816465306763, 0.017000000000000001, 0.11398941970467646], [3.478472976017831, 0.002, 0.00025213964072468009], [-1.4621715757903719, 0.002, 0.071847099325659136], [-0.84010307600180256, 0.085000000000000006, 0.20042529779230778], [5.7097646237318243, 0.0030000000000000001, 5.6566262784940591e-09], [1.5082367956567375, 0.065000000000000002, 0.065746966514827365], [-0.58337270103430816, 0.44, 0.27982121546450034], [-0.083271860457022437, 0.45100000000000001, 0.46681768733385554], [-0.46872337815000953, 0.34599999999999997, 0.31963368715684204], [0.18490279849545319, 0.23799999999999999, 0.42665263797981101], [3.470424529947997, 0.012, 0.00025981817437825683], [-0.99942612137154796, 0.032000000000000001, 0.15879415560388499], [-1.3650387953594485, 0.034000000000000002, 0.08612042845912049], [1.8617160516432014, 0.081000000000000003, 0.03132156240215267], [1.1321188945775384, 0.11600000000000001, 0.12879222611766061], [0.064116686050580601, 0.27300000000000002, 0.4744386578180424], [-0.42032194540259099, 0.29999999999999999, 0.33712514016213468], [-0.79581215423980922, 0.123, 0.21307061309098785], [-0.42792753720906046, 0.45600000000000002, 0.33435193892883741], [-1.0629378527428395, 0.051999999999999998, 0.14390506780140866], [-0.54164761752225477, 0.33700000000000002, 0.29403064095211839], [1.0934778886820793, 0.13700000000000001, 0.13709201601893539], [-0.094068785378413719, 0.38200000000000001, 0.46252725802998929], [0.13482026574801856, 0.36799999999999999, 0.44637699118865737], [-0.13976995315653129, 0.34699999999999998, 0.44442087706276601], [-0.051047663924746682, 0.32000000000000001, 0.47964376985626245], [-0.21468297736730158, 0.41699999999999998, 0.41500724761906527], [-0.20873154637330626, 0.38800000000000001, 0.41732890604390893], [-0.32427876152583485, 0.49199999999999999, 0.37286349875557478], [-0.65254842943280977, 0.374, 0.25702372075306734], [-0.48611858196118796, 0.23300000000000001, 0.31344154643990074], [-0.14482354344529477, 0.32600000000000001, 0.44242509660469886], [-0.51052030974200002, 0.439, 0.30484349480873729], [0.56814382285283538, 0.14999999999999999, 0.28496865660103166], [0.58680919931668207, 0.161, 0.27866592887231878], [0.013390357044409013, 0.25800000000000001, 0.49465818005865647], [-0.19050728887961568, 0.41399999999999998, 0.4244558160399462], [-0.60531777422216049, 0.35199999999999998, 0.2724839368239631], [1.0899331115425805, 0.127, 0.13787130480311838], [0.17015055382651084, 0.36899999999999999, 0.43244586845546418], [-0.21738337124409801, 0.40600000000000003, 0.41395479459421991], [1.0329303331079593, 0.079000000000000001, 0.15081825117169467], [1.0218317101096221, 0.104, 0.15343027913308094]]
		`@ -0,0 +1 @@`
							[{"xs": [9.917239463463458, 9.042767302696836, 10.798929825304187, 8.763751051762995, 11.383882954810852, 11.018206993460897, 8.939526075734316, 9.636159342565252, 10.136336896960058, 11.480610059427342, 12.115011910725082, 9.173267848893428, 10.239300931201738, 8.00012512174072, 8.979962292282131, 9.318376124429575, 10.82259513754284, 10.391747171927115, 10.04904588886165, 9.96007160443463, -0.78825626804569, -0.3511819898577426, -1.2796410003764271, -0.3977049391203402, 2.4792311265774667, 1.3670311632092624, 1.2963504112955613, 2.0404844103073025, -1.6439708506073223, 0.39122885445645805, 1.026031821452462, -0.04044477160482201, -0.7442346929085072, -0.34687120826243034, -0.23420359971379054, -0.5919629143336708, -0.202903054395391, -0.1893399644841902, 1.9331834251176807, -0.12321054392851609], "ys": [8.735627063679981, 9.857615954045011, 10.81439096759407, 10.586727233537191, 9.232919976568622, 11.54281262696508, 8.392787912674466, 9.355119689665944, 9.22380703532752, 10.542142541823122, 10.111980619367035, 10.760836265570738, 8.819773453269804, 10.25325722424816, 9.802077905695608, 8.955420161552611, 9.833801181904477, 10.491684241001613, 12.076108669877556, 11.74289693140474, -0.5685725015474191, -0.5715728344759778, -0.20180907868635137, 0.38431336480089595, -0.3402202083684184, -2.4652736827783586, 0.08295159401756182, 0.8503818775816505, 0.6488691600321166, 0.5794762568230527, -0.6770063922144103, -0.6557616416449478, -1.2834289177624947, 0.1096318195532717, -0.38986922166834853, -1.6224497706950238, 0.09429787743230483, 0.4005097316394031, -0.508002811195673, -1.2473463371366507], "ids": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39]}]
		`@ -0,0 +1 @@`
							[[0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 0], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 1], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 2], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 3], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 4], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 5], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 6], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 7], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 8], [0.19047619047619049, 0.16, 0.0, 0.32594478059941379, 9], [-0.23529411764705882, 0.0, 0.19047619047619047, 0.31356338348865387, 10], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 11], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 12], [0.027777777777777783, 0.11111111111111112, 0.088888888888888892, 0.30339641183779581, 13], [0.03125, 0.030303030303030304, 0.0, 0.3850273981640871, 14], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 15], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 16], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 17], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 18], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 19], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 20], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 21], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 22], [-0.16666666666666663, 0.18181818181818182, 0.27272727272727271, 0.20246415864836445, 23], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 24], [0.1875, 0.23999999999999999, 0.12, 0.23731835158706122, 25], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 26], [-0.043478260869565216, 0.0, 0.041666666666666664, 0.37950991789118999, 27], [0.22222222222222221, 0.18181818181818182, 0.0, 0.31701083225750354, 28], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 29], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 30], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 31], [0.030303030303030304, 0.078947368421052627, 0.052631578947368418, 0.33560628561957595, 32], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 33], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 34], [0.0, 0.10000000000000001, 0.10000000000000001, 0.30331501776206204, 35], [-0.054054054054054057, 0.0, 0.05128205128205128, 0.37488547451276033, 36], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 37], [-0.22222222222222224, 0.13333333333333333, 0.26666666666666666, 0.22310934040908681, 38], [-0.0625, 0.095238095238095233, 0.14285714285714285, 0.28634850244519822, 39], [0.034482758620689655, 0.0625, 0.03125, 0.35388469167230169, 40], [0.11111111111111112, 0.10000000000000001, 0.0, 0.35213633723318016, 41], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 42], [0.0, 0.0, 0.0, 0.40000000000000002, 43], [0.0, 0.065217391304347824, 0.065217391304347824, 0.33605067580764519, 44], [0.078947368421052641, 0.073170731707317083, 0.0, 0.36451788667842738, 45], [0.052631578947368425, 0.090909090909090912, 0.045454545454545456, 0.33352611505171165, 46], [-0.20512820512820512, 0.0, 0.1702127659574468, 0.32172013908826891, 47]]