adding more thorough docs

This commit is contained in:
Andy Eschbacher 2018-02-27 16:36:08 -05:00
parent dd32d45422
commit 0d050a2206

View File

@ -1,23 +1,26 @@
"""
Moran's I geostatistics (global clustering & outliers presence)
Functionality relies PySAL: http://pysal.readthedocs.io/en/latest/
"""
# TODO: Fill in local neighbors which have null/NoneType values with the
# average of the their neighborhood
import pysal as ps
from collections import OrderedDict
from crankshaft.analysis_data_provider import AnalysisDataProvider
import pysal as ps
# crankshaft module
import crankshaft.pysal_utils as pu
from crankshaft.analysis_data_provider import AnalysisDataProvider
# High level interface ---------------------------------------
class Moran(object):
"""Class for calculation of Moran's I statistics (global, local, and local
rate"""
rate)
Parameters:
data_provider (:obj:`AnalysisDataProvider`): Class for fetching data. See
the `crankshaft.analysis_data_provider` module for more information.
"""
def __init__(self, data_provider=None):
if data_provider is None:
self.data_provider = AnalysisDataProvider()
@ -30,7 +33,26 @@ class Moran(object):
Moran's I (global)
Implementation building neighbors with a PostGIS database and Moran's I
core clusters with PySAL.
Andy Eschbacher
Args:
subquery (str): Query to give access to the data needed. This query
must give access to ``attr_name``, ``geom_col``, and ``id_col``.
attr_name (str): Column name of data to analyze
w_type (str): Type of spatial weight. Must be one of `knn`
or `queen`. See `PySAL documentation
<http://pysal.readthedocs.io/en/latest/users/tutorials/weights.html>`__
for more information.
num_ngbrs (int): If using `knn` for ``w_type``, this
specifies the number of neighbors to be used to define the spatial
neighborhoods.
permutations (int): Number of permutations for performing
conditional randomization to find the p-value. Higher numbers
takes a longer time for getting results.
geom_col (str): Name of the geometry column in the dataset for
finding the spatial neighborhoods.
id_col (str): Row index for each value. Usually the database index.
"""
params = OrderedDict([("id_col", id_col),
("attr1", attr_name),
@ -55,8 +77,26 @@ class Moran(object):
def local_stat(self, subquery, attr,
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I implementation for PL/Python
Andy Eschbacher
Moran's I (local)
Args:
subquery (str): Query to give access to the data needed. This query
must give access to ``attr_name``, ``geom_col``, and ``id_col``.
attr (str): Column name of data to analyze
w_type (str): Type of spatial weight. Must be one of `knn`
or `queen`. See `PySAL documentation
<http://pysal.readthedocs.io/en/latest/users/tutorials/weights.html>`__
for more information.
num_ngbrs (int): If using `knn` for ``w_type``, this
specifies the number of neighbors to be used to define the spatial
neighborhoods.
permutations (int): Number of permutations for performing
conditional randomization to find the p-value. Higher numbers
takes a longer time for getting results.
geom_col (str): Name of the geometry column in the dataset for
finding the spatial neighborhoods.
id_col (str): Row index for each value. Usually the database index.
"""
# geometries with attributes that are null are ignored
@ -90,7 +130,26 @@ class Moran(object):
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I Rate (global)
Andy Eschbacher
Args:
subquery (str): Query to give access to the data needed. This query
must give access to ``attr_name``, ``geom_col``, and ``id_col``.
numerator (str): Column name of numerator to analyze
denominator (str): Column name of the denominator
w_type (str): Type of spatial weight. Must be one of `knn`
or `queen`. See `PySAL documentation
<http://pysal.readthedocs.io/en/latest/users/tutorials/weights.html>`__
for more information.
num_ngbrs (int): If using `knn` for ``w_type``, this
specifies the number of neighbors to be used to define the spatial
neighborhoods.
permutations (int): Number of permutations for performing
conditional randomization to find the p-value. Higher numbers
takes a longer time for getting results.
geom_col (str): Name of the geometry column in the dataset for
finding the spatial neighborhoods.
id_col (str): Row index for each value. Usually the database index.
"""
params = OrderedDict([("id_col", id_col),
("attr1", numerator),
@ -117,7 +176,26 @@ class Moran(object):
w_type, num_ngbrs, permutations, geom_col, id_col):
"""
Moran's I Local Rate
Andy Eschbacher
Args:
subquery (str): Query to give access to the data needed. This query
must give access to ``attr_name``, ``geom_col``, and ``id_col``.
numerator (str): Column name of numerator to analyze
denominator (str): Column name of the denominator
w_type (str): Type of spatial weight. Must be one of `knn`
or `queen`. See `PySAL documentation
<http://pysal.readthedocs.io/en/latest/users/tutorials/weights.html>`__
for more information.
num_ngbrs (int): If using `knn` for ``w_type``, this
specifies the number of neighbors to be used to define the spatial
neighborhoods.
permutations (int): Number of permutations for performing
conditional randomization to find the p-value. Higher numbers
takes a longer time for getting results.
geom_col (str): Name of the geometry column in the dataset for
finding the spatial neighborhoods.
id_col (str): Row index for each value. Usually the database index.
"""
# geometries with values that are null are ignored
# resulting in a collection of not as near neighbors
@ -188,9 +266,9 @@ def map_quads(coord):
"""
Map a quadrant number to Moran's I designation
HH=1, LH=2, LL=3, HL=4
Input:
@param coord (int): quadrant of a specific measurement
Output:
Args:
coord (int): quadrant of a specific measurement
Returns:
classification (one of 'HH', 'LH', 'LL', or 'HL')
"""
if coord == 1:
@ -206,11 +284,12 @@ def map_quads(coord):
def quad_position(quads):
"""
Produce Moran's I classification based of n
Input:
@param quads ndarray: an array of quads classified by
Map all quads
Args:
quads (:obj:`numpy.ndarray`): an array of quads classified by
1-4 (PySAL default)
Output:
@param list: an array of quads classied by 'HH', 'LL', etc.
Returns:
list: an array of quads classied by 'HH', 'LL', etc.
"""
return [map_quads(q) for q in quads]