Merge branch 'adds-nonspatial-kmeans' of https://github.com/CartoDB/crankshaft into adds-nonspatial-kmeans
This commit is contained in:
commit
5404589058
@ -1,5 +1,6 @@
|
|||||||
from sklearn.cluster import KMeans
|
from sklearn.cluster import KMeans
|
||||||
import plpy
|
import plpy
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
def kmeans(query, no_clusters, no_init=20):
|
def kmeans(query, no_clusters, no_init=20):
|
||||||
@ -39,7 +40,6 @@ def kmeans_nonspatial(query, colnames, num_clusters=5,
|
|||||||
num_clusters (int): number of clusters (greater than zero)
|
num_clusters (int): number of clusters (greater than zero)
|
||||||
id_col (string): name of the input id_column
|
id_col (string): name of the input id_column
|
||||||
"""
|
"""
|
||||||
import numpy as np
|
|
||||||
out_id_colname = 'rowids'
|
out_id_colname = 'rowids'
|
||||||
# TODO: need a random seed?
|
# TODO: need a random seed?
|
||||||
|
|
||||||
@ -54,14 +54,13 @@ def kmeans_nonspatial(query, colnames, num_clusters=5,
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
db_resp = plpy.execute(full_query)
|
db_resp = plpy.execute(full_query)
|
||||||
plpy.notice('query: %s' % full_query)
|
|
||||||
except plpy.SPIError, err:
|
except plpy.SPIError, err:
|
||||||
plpy.error('k-means cluster analysis failed: %s' % err)
|
plpy.error('k-means cluster analysis failed: %s' % err)
|
||||||
|
|
||||||
# fill array with values for kmeans clustering
|
# fill array with values for kmeans clustering
|
||||||
if standarize:
|
if standarize:
|
||||||
cluster_columns = _scale_data(
|
cluster_columns = _scale_data(
|
||||||
_extract_columns(db_resp, id_col='cartodb_id'))
|
_extract_columns(db_resp, id_col=out_id_colname))
|
||||||
else:
|
else:
|
||||||
cluster_columns = _extract_columns(db_resp)
|
cluster_columns = _extract_columns(db_resp)
|
||||||
|
|
||||||
@ -70,7 +69,7 @@ def kmeans_nonspatial(query, colnames, num_clusters=5,
|
|||||||
kmeans = KMeans(n_clusters=num_clusters,
|
kmeans = KMeans(n_clusters=num_clusters,
|
||||||
random_state=0).fit(cluster_columns)
|
random_state=0).fit(cluster_columns)
|
||||||
|
|
||||||
return zip(kmeans.predict(X),
|
return zip(kmeans.labels_,
|
||||||
map(str, kmeans.cluster_centers_[kmeans.labels_]),
|
map(str, kmeans.cluster_centers_[kmeans.labels_]),
|
||||||
db_resp[0][out_id_colname])
|
db_resp[0][out_id_colname])
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user