updating according to class

This commit is contained in:
mehak-sachdeva 2017-01-31 11:25:27 -05:00
parent 9c2f68fcaf
commit 959747c623
4 changed files with 26 additions and 15 deletions

View File

@ -15,7 +15,8 @@ AS $$
import numpy as np
import plpy
from crankshaft.segmentation import create_and_predict_segment_agg
from crankshaft.segmentation import Segmentation
seg = Segmentation()
model_params = {'n_estimators': n_estimators,
'max_depth': max_depth,
'subsample': subsample,
@ -27,7 +28,7 @@ AS $$
a = np.array(data, dtype=float)
return a.reshape(len(a)/dimension, dimension)
return create_and_predict_segment_agg(np.array(target, dtype=float),
return seg.create_and_predict_segment_agg(np.array(target, dtype=float),
unpack2D(features),
unpack2D(target_features),
target_ids,
@ -65,7 +66,8 @@ CREATE OR REPLACE FUNCTION
min_samples_leaf INTEGER DEFAULT 1)
RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC)
AS $$
from crankshaft.segmentation import create_and_predict_segment
from crankshaft.segmentation import Segmentation
seg = Segmentation()
model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf}
return create_and_predict_segment(query,variable_name,target_table, model_params)
return seg.create_and_predict_segment(query,variable_name,target_table, model_params)
$$ LANGUAGE plpythonu;

View File

@ -1 +1,2 @@
"""Import all functions from for segmentation"""
from segmentation import *

View File

@ -68,10 +68,11 @@ class Segmentation(object):
"""
params = {"subquery": target_query,
"id_col": id_col}
"id_col": id_col,
"feature_columns": features}
target, features, target_mean, \
feature_means = self.clean_data(variable, feature_columns, query)
feature_means = self.clean_data(variable, feature_columns, query)
model, accuracy = train_model(target, features, model_params, 0.2)
result = self.predict_segment(model, feature_columns, target_query,
@ -82,7 +83,8 @@ class Segmentation(object):
return zip(rowid, result, accuracy_array)
def predict_segment(self, model, feature_columns, target_query, feature_means):
def predict_segment(self, model, feature_columns, target_query,
feature_means):
"""
Use the provided model to predict the values for the new feature set
Input:
@ -115,7 +117,6 @@ class Segmentation(object):
# NOTE: we removed the cartodb_ids calculation in here
return np.concatenate(results)
def clean_data(self, query, variable, feature_columns):
"""
Add docstring
@ -179,8 +180,8 @@ def train_model(target, features, model_params, test_split):
testing the model / calculating the accuray
"""
features_train, features_test, \
target_train, target_test = train_test_split(features, target,
test_size=test_split)
target_train, target_test = train_test_split(features, target,
test_size=test_split)
model = GradientBoostingRegressor(**model_params)
model.fit(features_train, target_train)
accuracy = calculate_model_accuracy(model, features_test, target_test)

View File

@ -1,7 +1,7 @@
import unittest
import numpy as np
from helper import plpy, fixture_file
import crankshaft.segmentation as segmentation
from crankshaft.segmentation import Segmentation
import json
class SegmentationTest(unittest.TestCase):
@ -48,16 +48,23 @@ class SegmentationTest(unittest.TestCase):
'subsample' : 0.5,
'learning_rate': 0.01,
'min_samples_leaf': 1}
seg = Segmentation()
'''
self, query, variable, feature_columns,
target_query, model_params,
id_col='cartodb_id'
'''
result = segmentation.create_and_predict_segment(
result = seg.create_and_predict_segment(
'select * from training',
'target',
'feature_columns',
'select * from test',
model_parameters)
model_parameters)
prediction = [r[1] for r in result]
accuracy =np.sqrt(np.mean( np.square( np.array(prediction) - np.array(test_y))))
accuracy = np.sqrt(np.mean( np.square( np.array(prediction) - np.array(test_y))))
self.assertEqual(len(result),len(test_data))
self.assertTrue( result[0][2] < 0.01)