syntax updates

pull/171/head
Andy Eschbacher 7 years ago
parent e2ed5cefc4
commit dc711d195f

@ -1,26 +1,32 @@
"""Tests for segmentation functionality"""
import unittest
import json
from collections import OrderedDict
import numpy as np
from helper import plpy, fixture_file
from crankshaft.analysis_data_provider import AnalysisDataProvider
from crankshaft.segmentation import Segmentation
from mock_plpy import MockCursor
import json
from collections import OrderedDict
from .fixtures import fixture_file
class RawDataProvider(AnalysisDataProvider):
"""Data Provider to overwrite the default SQL provider"""
def __init__(self, data, model, predict):
self.data = data
self.model = model
self.predict = predict
def get_segmentation_data(self, params):
def get_segmentation_data(self, params): # pylint: disable=unused-argument
"""return data"""
return self.data
def get_segmentation_model_data(self, params):
def get_segmentation_model_data(self, params): # pylint: disable=W0613
"""return model data"""
return self.model
def get_segmentation_predict_data(self, params):
def get_segmentation_predict_data(self, params): # pylint: disable=W0613
"""return predict data"""
return self.predict
@ -28,39 +34,43 @@ class SegmentationTest(unittest.TestCase):
"""Testing class for Segmentation functions"""
def setUp(self):
plpy._reset()
self.params = {"query": 'SELECT * FROM segmentation_data',
"variable": 'price',
"feature_columns": ['m1', 'm2', 'm3', 'm4', 'm5', 'm6'],
"target_query": 'SELECT * FROM segmentation_result',
"id_col": 'cartodb_id',
"model_params": {'n_estimators': 1200,
'max_depth': 3,
'subsample': 0.5,
'learning_rate': 0.01,
'min_samples_leaf': 1}
}
self.params = {
"query": 'SELECT * FROM segmentation_data',
"variable": 'price',
"feature_columns": ['m1', 'm2', 'm3', 'm4', 'm5', 'm6'],
"target_query": 'SELECT * FROM segmentation_result',
"id_col": 'cartodb_id',
"model_params": {
'n_estimators': 1200,
'max_depth': 3,
'subsample': 0.5,
'learning_rate': 0.01,
'min_samples_leaf': 1
}
}
self.model_data = json.loads(
open(fixture_file('model_data.json')).read())
open(fixture_file('model_data.json')).read())
self.data = json.loads(
open(fixture_file('data.json')).read())
open(fixture_file('data.json')).read())
self.predict_data = json.loads(
open(fixture_file('predict_data.json')).read())
open(fixture_file('predict_data.json')).read())
self.result_seg = json.loads(
open(fixture_file('segmentation_result.json')).read())
open(fixture_file('segmentation_result.json')).read())
self.true_result = json.loads(
open(fixture_file('true_result.json')).read())
open(fixture_file('true_result.json')).read())
def test_replace_nan_with_mean(self):
"""test segmentation.test_replace_nan_with_mean"""
from crankshaft.segmentation import replace_nan_with_mean
from numpy.testing import assert_array_equal
# from numpy.testing import assert_array_equal
test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan])
result = replace_nan_with_mean(test_array, means=None)[0]
expectation = np.array([1.2, 2.2, 3.2, 2.2, 2.2], dtype=float)
assert_array_equal(result, expectation)
self.assertItemsEqual(result, expectation)
# assert_array_equal(result, expectation)
def test_create_and_predict_segment(self):
"""test segmentation.test_create_and_predict"""
from crankshaft.segmentation import replace_nan_with_mean
batch_size = 1000
results = []
@ -70,13 +80,13 @@ class SegmentationTest(unittest.TestCase):
for col in feature_columns]).astype(float)
target_mean = replace_nan_with_mean(target[0])[1]
feature_means = replace_nan_with_mean(feat)[1]
'''
data_model = [OrderedDict([('target', target),
('features', feat),
('target_mean', target_mean),
('feature_means', feature_means),
('feature_columns', feature_columns)])]
'''
# data_model is of the form:
# [OrderedDict([('target', target),
# ('features', feat),
# ('target_mean', target_mean),
# ('feature_means', feature_means),
# ('feature_columns', feature_columns)])]
data_model = self.model_data
cursor = self.predict_data
batch = []
@ -91,39 +101,40 @@ class SegmentationTest(unittest.TestCase):
for d in self.predict_data]
data_predict = MockCursor(data_predict)
model_parameters = {'n_estimators': 1200,
'max_depth': 3,
'subsample': 0.5,
'learning_rate': 0.01,
'min_samples_leaf': 1}
model_parameters = {
'n_estimators': 1200,
'max_depth': 3,
'subsample': 0.5,
'learning_rate': 0.01,
'min_samples_leaf': 1
}
data = [OrderedDict([('ids', d['ids'])])
for d in self.data]
seg = Segmentation(RawDataProvider(data, data_model,
data_predict))
result = seg.create_and_predict_segment('select * from \
segmentation_test',
'x_value',
['m1', 'm2'],
'select * from \
segmentation_result',
model_parameters,
id_col='cartodb_id')
result = seg.create_and_predict_segment(
'select * from segmentation_test',
'x_value',
['m1', 'm2'],
'select * from segmentation_result',
model_parameters,
id_col='cartodb_id')
results = [(row[1], row[2]) for row in result]
zipped_values = zip(results, self.result_seg)
pre_res = [r[0] for r in self.true_result]
acc_res = [r[1] for r in self.result_seg]
'''
# test values
for ([res_pre, res_acc], [exp_pre, exp_acc]) in zipped_values:
self.assertAlmostEqual(res_pre, exp_pre)
self.assertEqual(res_acc, exp_acc)
'''
prediction = [r[0] for r in results]
accuracy = np.sqrt(np.mean(np.square(np.array(prediction) -
np.array(pre_res))))
self.assertEqual(len(results), len(self.result_seg))
self.assertTrue(accuracy < 0.3*np.mean(pre_res))
self.assertTrue(accuracy < 0.3 * np.mean(pre_res))
self.assertTrue(results[0][1] < 0.01)

Loading…
Cancel
Save