adds silhouettes to output

This commit is contained in:
Andy Eschbacher 2016-11-14 23:29:38 +00:00
parent b6dae5e380
commit af536757fe
2 changed files with 3 additions and 2 deletions

View File

@ -18,7 +18,7 @@ CREATE OR REPLACE FUNCTION CDB_KMeansNonspatial(
id_colname TEXT DEFAULT 'cartodb_id', id_colname TEXT DEFAULT 'cartodb_id',
standarize BOOLEAN DEFAULT true standarize BOOLEAN DEFAULT true
) )
RETURNS TABLE(cluster_label text, cluster_center json, rowid bigint) AS $$ RETURNS TABLE(cluster_label text, cluster_center json, silhouettes numeric, rowid bigint) AS $$
from crankshaft.clustering import kmeans_nonspatial from crankshaft.clustering import kmeans_nonspatial
return kmeans_nonspatial(query, colnames, num_clusters, return kmeans_nonspatial(query, colnames, num_clusters,

View File

@ -76,11 +76,12 @@ def kmeans_nonspatial(query, colnames, num_clusters=5,
for c in kmeans.cluster_centers_[kmeans.labels_]] for c in kmeans.cluster_centers_[kmeans.labels_]]
silhouettes = metrics.silhouette_samples(cluster_columns, silhouettes = metrics.silhouette_samples(cluster_columns,
labels, kmeans.labels_,
metric='sqeuclidean') metric='sqeuclidean')
return zip(kmeans.labels_, return zip(kmeans.labels_,
centers, centers,
silhouettes,
db_resp[0][out_id_colname]) db_resp[0][out_id_colname])