mirror of
https://github.com/CartoDB/crankshaft.git
synced 2024-11-01 10:20:48 +08:00
changes cluster centers to json
This commit is contained in:
parent
a188b2e104
commit
64c4b6611c
@ -15,14 +15,14 @@ CREATE OR REPLACE FUNCTION CDB_KMeansNonspatial(
|
|||||||
query TEXT,
|
query TEXT,
|
||||||
colnames TEXT[],
|
colnames TEXT[],
|
||||||
num_clusters INTEGER,
|
num_clusters INTEGER,
|
||||||
id_col TEXT DEFAULT 'cartodb_id',
|
id_colname TEXT DEFAULT 'cartodb_id',
|
||||||
standarize BOOLEAN DEFAULT true
|
standarize BOOLEAN DEFAULT true
|
||||||
)
|
)
|
||||||
RETURNS TABLE(cluster_label text, cluster_center text, rowid bigint) AS $$
|
RETURNS TABLE(cluster_label text, cluster_center json, rowid bigint) AS $$
|
||||||
|
|
||||||
from crankshaft.clustering import kmeans_nonspatial
|
from crankshaft.clustering import kmeans_nonspatial
|
||||||
return kmeans_nonspatial(query, colnames, num_clusters,
|
return kmeans_nonspatial(query, colnames, num_clusters,
|
||||||
id_col, standarize)
|
id_colname, standarize)
|
||||||
$$ LANGUAGE plpythonu;
|
$$ LANGUAGE plpythonu;
|
||||||
|
|
||||||
|
|
||||||
|
@ -40,6 +40,7 @@ def kmeans_nonspatial(query, colnames, num_clusters=5,
|
|||||||
num_clusters (int): number of clusters (greater than zero)
|
num_clusters (int): number of clusters (greater than zero)
|
||||||
id_col (string): name of the input id_column
|
id_col (string): name of the input id_column
|
||||||
"""
|
"""
|
||||||
|
import json
|
||||||
out_id_colname = 'rowids'
|
out_id_colname = 'rowids'
|
||||||
# TODO: need a random seed?
|
# TODO: need a random seed?
|
||||||
|
|
||||||
@ -60,7 +61,7 @@ def kmeans_nonspatial(query, colnames, num_clusters=5,
|
|||||||
# fill array with values for k-means clustering
|
# fill array with values for k-means clustering
|
||||||
if standarize:
|
if standarize:
|
||||||
cluster_columns = _scale_data(
|
cluster_columns = _scale_data(
|
||||||
_extract_columns(db_resp, id_col=out_id_colname))
|
_extract_columns(db_resp, out_id_colname))
|
||||||
else:
|
else:
|
||||||
cluster_columns = _extract_columns(db_resp, id_col=out_id_colname)
|
cluster_columns = _extract_columns(db_resp, id_col=out_id_colname)
|
||||||
|
|
||||||
@ -69,8 +70,9 @@ def kmeans_nonspatial(query, colnames, num_clusters=5,
|
|||||||
kmeans = KMeans(n_clusters=num_clusters,
|
kmeans = KMeans(n_clusters=num_clusters,
|
||||||
random_state=0).fit(cluster_columns)
|
random_state=0).fit(cluster_columns)
|
||||||
|
|
||||||
|
centers = [json.dumps(dict(zip(colnames, c))) for c in kmeans.cluster_centers_[kmeans.labels_]]
|
||||||
return zip(kmeans.labels_,
|
return zip(kmeans.labels_,
|
||||||
map(str, kmeans.cluster_centers_[kmeans.labels_]),
|
centers,
|
||||||
db_resp[0][out_id_colname])
|
db_resp[0][out_id_colname])
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user