debugging
This commit is contained in:
parent
d7bccc1063
commit
8c5449cfd0
@ -48,16 +48,17 @@ CREATE OR REPLACE FUNCTION
|
||||
min_samples_leaf INTEGER DEFAULT 1)
|
||||
RETURNS TABLE (cartodb_id TEXT, prediction NUMERIC, accuracy NUMERIC)
|
||||
AS $$
|
||||
from crankshaft.segmentation import create_and_predict_segment
|
||||
from crankshaft.segmentation import Segmentation
|
||||
seg = Segmentation()
|
||||
model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf}
|
||||
return create_and_predict_segment(query,variable_name,target_table, model_params)
|
||||
return seg.create_and_predict_segment(query,variable_name,target_table, model_params)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
CDB_CreateAndPredictSegment (
|
||||
query TEXT,
|
||||
variable_name TEXT,
|
||||
target_table TEXT,
|
||||
target_query TEXT,
|
||||
feature_columns TEXT[],
|
||||
n_estimators INTEGER DEFAULT 1200,
|
||||
max_depth INTEGER DEFAULT 3,
|
||||
@ -69,5 +70,5 @@ AS $$
|
||||
from crankshaft.segmentation import Segmentation
|
||||
seg = Segmentation()
|
||||
model_params = {'n_estimators': n_estimators, 'max_depth':max_depth, 'subsample' : subsample, 'learning_rate': learning_rate, 'min_samples_leaf' : min_samples_leaf}
|
||||
return seg.create_and_predict_segment(query,variable_name,target_table, model_params)
|
||||
return seg.create_and_predict_segment(query, variable_name, feature_columns, target_query, model_params)
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
@ -70,8 +70,7 @@ class Segmentation(object):
|
||||
params = {"subquery": target_query,
|
||||
"id_col": id_col}
|
||||
|
||||
target, features, target_mean, \
|
||||
feature_means = self.clean_data(variable, feature_columns, query)
|
||||
target, features, target_mean, feature_means = self.clean_data(variable, feature_columns, query)
|
||||
|
||||
model, accuracy = train_model(target, features, model_params, 0.2)
|
||||
result = self.predict_segment(model, feature_columns, target_query,
|
||||
@ -142,8 +141,6 @@ class Segmentation(object):
|
||||
]
|
||||
'''
|
||||
|
||||
[{target: [dsdfs]}]
|
||||
|
||||
# extract target data from plpy object
|
||||
target = np.array(data[0]['target'])
|
||||
|
||||
|
@ -7,26 +7,37 @@ import json
|
||||
|
||||
|
||||
class RawDataProvider(AnalysisDataProvider):
|
||||
def __init__(self, raw_data1, raw_data2, raw_data3):
|
||||
self.raw_data1 = raw_data1
|
||||
self.raw_data2 = raw_data2
|
||||
self.raw_data3 = raw_data3
|
||||
def __init__(self, test, train, predict):
|
||||
self.test = test
|
||||
self.train = train
|
||||
self.predict = predict
|
||||
|
||||
def get_segmentation_data(self, params):
|
||||
return self.raw_data1
|
||||
return self.test
|
||||
|
||||
def get_segmentation_predict_data(self, params):
|
||||
return self.raw_data2
|
||||
return self.train
|
||||
|
||||
def get_segmentation_model_data(self, params):
|
||||
return self.raw_data3
|
||||
return self.predict
|
||||
|
||||
|
||||
class SegmentationTest(unittest.TestCase):
|
||||
"""Testing class for Moran's I functions"""
|
||||
"""Testing class for Segmentation functions"""
|
||||
|
||||
def setUp(self):
|
||||
plpy._reset()
|
||||
self.params = {"query": 'SELECT * FROM seg_test',
|
||||
"variable": 'price',
|
||||
"feature_columns": ['m1', 'm2', 'm3'],
|
||||
"target_query": 'SELECT * FROM seg_test_target',
|
||||
"id_col": 'cartodb_id',
|
||||
"model_params": {'n_estimators': 1200,
|
||||
'max_depth': 3,
|
||||
'subsample': 0.5,
|
||||
'learning_rate': 0.01,
|
||||
'min_samples_leaf': 1}
|
||||
}
|
||||
|
||||
def generate_random_data(self, n_samples, random_state, row_type=False):
|
||||
x1 = random_state.uniform(size=n_samples)
|
||||
@ -39,42 +50,69 @@ class SegmentationTest(unittest.TestCase):
|
||||
if row_type:
|
||||
return [{'features': vals} for vals in zip(x1, x2, x3)], y
|
||||
else:
|
||||
return [dict(zip(['x1', 'x2', 'x3', 'target', 'cartodb_id'], [x1, x2, x3, y, cartodb_id]))]
|
||||
return [dict(zip(['x1', 'x2', 'x3', 'target', 'cartodb_id'],
|
||||
[x1, x2, x3, y, cartodb_id]))]
|
||||
|
||||
def test_replace_nan_with_mean(self):
|
||||
from crankshaft.segmentation import replace_nan_with_mean
|
||||
from numpy.testing import assert_array_equal
|
||||
test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan])
|
||||
result = replace_nan_with_mean(test_array)
|
||||
expectation = np.array([1.2, 2.2, 3.2, 2.2, 2.2])
|
||||
|
||||
self.assertTrue(assert_array_equal(result, expectation))
|
||||
|
||||
def test_create_and_predict_segment(self):
|
||||
from crankshaft.segmentation import Segmentation
|
||||
from numpy.testing import assert_array_equal
|
||||
|
||||
n_samples = 1000
|
||||
|
||||
random_state_train = np.random.RandomState(13)
|
||||
random_state_test = np.random.RandomState(134)
|
||||
training_data = self.generate_random_data(n_samples, random_state_train)
|
||||
test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True)
|
||||
training_data = self.generate_random_data(n_samples,
|
||||
random_state_train)
|
||||
test_data, test_y = self.generate_random_data(n_samples,
|
||||
random_state_test,
|
||||
row_type=True)
|
||||
|
||||
ids = [{'cartodb_ids': range(len(test_data))}]
|
||||
rows = [{'x1': 0, 'x2': 0, 'x3': 0, 'y': 0, 'cartodb_id': 0}]
|
||||
|
||||
plpy._define_result('select \* from \(select \* from training\) a limit 1', rows)
|
||||
plpy._define_result('.*from \(select \* from training\) as a', training_data)
|
||||
plpy._define_result('select array_agg\(cartodb\_id order by cartodb\_id\) as cartodb_ids from \(.*\) a', ids)
|
||||
plpy._define_result('.*select \* from test.*', test_data)
|
||||
'''
|
||||
rowid = [{'ids': [2.9, 4.9, 4, 5, 6]}]
|
||||
'''
|
||||
rows = [{'x1': 0, 'x2': 0, 'x3': 0, 'y': 0, 'cartodb_id': 0}]
|
||||
|
||||
model_parameters = {'n_estimators': 1200,
|
||||
'max_depth': 3,
|
||||
'subsample': 0.5,
|
||||
'learning_rate': 0.01,
|
||||
'min_samples_leaf': 1}
|
||||
data = [{'target': [],
|
||||
data = [{'query':
|
||||
'target': [],
|
||||
'x1': [],
|
||||
'x2': [],
|
||||
'x3': []}]
|
||||
seg = Segmentation(RawDataProvider(test, train, predict))
|
||||
'''
|
||||
cursors = [{'features': [[m1[0],m2[0],m3[0]],[m1[1],m2[1],m3[1]],
|
||||
[m1[2],m2[2],m3[2]]]}]
|
||||
'''
|
||||
data = Segmentation(RawDataProvider(test, train, predict))
|
||||
'''
|
||||
self, query, variable, feature_columns,
|
||||
target_query, model_params,
|
||||
id_col='cartodb_id'
|
||||
'''
|
||||
'''
|
||||
data = [{'target': [2.9, 4.9, 4, 5, 6]},
|
||||
{'feature1': [1,2,3,4]}, {'feature2' : [2,3,4,5]}
|
||||
]
|
||||
'''
|
||||
|
||||
# Before here figure out how to set up the data provider
|
||||
# After use data prodiver to run the query and test results.
|
||||
|
||||
seg = Segmentation(data_provider=)
|
||||
|
||||
result = seg.create_and_predict_segment(
|
||||
'select * from training',
|
||||
|
Loading…
Reference in New Issue
Block a user