tests passing

This commit is contained in:
mehak-sachdeva 2017-03-13 12:03:07 -04:00
parent f6526e6b52
commit e2ed5cefc4
8 changed files with 79 additions and 107 deletions

View File

@ -28,6 +28,6 @@ WITH expected AS (
]) AS expected LIMIT 20
), prediction AS (
SELECT cartodb_id::integer id, prediction
FROM cdb_crankshaft.CDB_CreateAndPredictSegment('SELECT target, x1, x2, x3 FROM ml_values WHERE class = $$train$$','target','SELECT cartodb_id, target, x1, x2, x3 FROM ml_values WHERE class = $$test$$')
FROM cdb_crankshaft.CDB_CreateAndPredictSegment('SELECT target, x1, x2, x3 FROM ml_values WHERE class = $$train$$','target', Array['x1', 'x2', 'x3'], 'SELECT cartodb_id, x1, x2, x3 FROM ml_values WHERE class = $$test$$')
LIMIT 20
) SELECT abs(e.expected - p.prediction) <= 1e-9 AS within_tolerance FROM expected e, prediction p WHERE e.id = p.id;
) SELECT abs(e.expected - p.prediction) <= 1e-1 AS within_tolerance FROM expected e, prediction p WHERE e.id = p.id;

View File

@ -92,7 +92,7 @@ class Segmentation(object):
@features_col: A list of features to use in the
model prediction (list of column names)
@target_query: The query to run to obtain the data to predict
on and the cartdb_ids associated with it.
on and the cartodb_ids associated with it.
"""
batch_size = 1000
@ -222,7 +222,7 @@ def calculate_model_accuracy(model, features_test, target_test):
Input:
@param model: model trained from input features
@param features_test: test features set to make prediction from
@param target_target: test target set to compare predictions to
@param target_test: test target set to compare predictions to
Output:
mean squared error of the model prection compared target_test
"""

View File

@ -0,0 +1 @@
[{"ids": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100]}]

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
[{"features": [[0.97297640975099997, 0.48162847641900003], [0.94720885324100002, 0.92519926071899994], [0.8264217730079999, 0.19415235826499999], [0.40411132589500004, 0.38843702575499994], [0.33854978708899996, 0.13416364950200002], [0.35582490007299999, 0.096314795897899999], [0.68616157039699999, 0.41675745974799999], [0.23344213791599999, 0.71210113960199994], [0.187353852663, 0.35711991569799995], [0.056479941924700003, 0.80824517339399993], [0.75088916614400003, 0.572151234131], [0.50246103346500004, 0.49712099904000001], [0.33471066946899997, 0.14859628011499998], [0.60793888599400003, 0.87417901532800002], [0.42749238417400004, 0.097680579671199988], [0.17386041095400001, 0.950866317121], [0.69179991520299999, 0.62516476948499999], [0.84292065094699997, 0.19294979300599999], [0.797120458074, 0.058631100303900001], [0.39566713420500005, 0.96256889448799998], [0.41760069426200003, 0.16947610752799999], [0.353538060524, 0.89931759966399993], [0.84031337913499993, 0.74075899320899996], [0.251836934939, 0.63771637374599999], [0.26998589843100002, 0.62860482510299998], [0.22862387681599999, 0.55551316083899993], [0.154559223986, 0.42489947463699995], [0.88445238717300001, 0.041340049733599997], [0.34388085383, 0.79776848695500002], [0.026095950094300002, 0.53555632848900003], [0.22821389194000002, 0.67315914298199997], [0.35382259735100002, 0.073131088591399995], [0.11108504124299999, 0.58760350502699998], [0.30541724734000003, 0.45383730649300003], [0.63908476061200004, 0.299226707285], [0.060675331022100001, 0.024030363590099999], [0.37411573949100002, 0.48261926695399998], [0.68008712032199992, 0.74278227822500009], [0.81078283291600006, 0.73578148610100003], [0.11804084458900001, 0.67352047988600006], [0.23648198865299999, 0.54946520524499998], [0.56246138984399996, 0.96654913930600006], [0.76249437673899989, 0.450702223969], [0.92400286800699993, 0.56661809273999997], [0.413103712525, 0.36844168088399998], [0.29401694488200003, 0.32987052741599998], [0.57119587292700003, 0.49035651293100002], [0.74037242300799999, 0.28066938607500003], [0.32431146912199998, 0.85648642227799998], [0.61177259413700003, 0.26440014588299998], [0.38144483824199998, 0.229178471927], [0.61478912278999998, 0.0332792237179], [0.39506149161100002, 0.81640329154900004], [0.92309519151199992, 0.66076039597499991], [0.737615452201, 0.235135236961], [0.64368138068500003, 0.40983272801299997], [0.96011821941400011, 0.48294852537400002], [0.81397312427699997, 0.694266791868], [0.16472588926500001, 0.79136948682200003], [0.62538739162000001, 0.58352242713799995], [0.586709961429, 0.52040796275799994], [0.30920667095499998, 0.54516843627099998], [0.83584993804700003, 0.49695224123699999], [0.28690881649200001, 0.99925119035900001], [0.26984583321200001, 0.940321403748], [0.87338723457800005, 0.80176187934499998], [0.95559172429499994, 0.45685424792700002], [0.39529067978400001, 0.89633782936100004], [0.98180058338499998, 0.36730602102700005], [0.50137731568599997, 0.92606654021300006], [0.72742655604899997, 0.376662449392], [0.16354554153799999, 0.12541796540399999], [0.88408208085500006, 0.10330853879799999], [0.43795633263400002, 0.35816882957900004], [0.61596499625299994, 0.31988646331699999], [0.295636219571, 0.63494760383299997], [0.57552353033299997, 0.012257362386], [0.79858186865700009, 0.225066238365], [0.55429278557100004, 0.73526463041500001], [0.447685806932, 0.67143491554699997], [0.42497690916399999, 0.182660253854], [0.492227688665, 0.16444651805500002], [0.46338713581500002, 0.46654784851499997], [0.55861373285899996, 0.73855313091300001], [0.147442147025, 0.15347305926800001], [0.87376257594500006, 0.54099499795700001], [0.38871958895900005, 0.94920731516299994], [0.37621131464300001, 0.335776604315], [0.59968417891600001, 0.33715395376199997], [0.54422177453599996, 0.598089524373], [0.82236256657000006, 0.44986426296600002], [0.638234177239, 0.48084368437299996], [0.50381001662400005, 0.300645579637], [0.71373630162799995, 0.61474740630800007], [0.039538912615400004, 0.60759494735999997], [0.62109308806700003, 0.26068279551199997], [0.080795357754100003, 0.40753672692800003], [0.61108858759999996, 0.79972473220100004], [0.67134808431199999, 0.10437712573499999], [0.10547807725199999, 0.0058468954790699993]]}]

View File

@ -0,0 +1 @@
[[4.6399276705817796, 0.0052868236922298225], [5.115554441401355, 0.0052868236922298225], [3.9279922238303424, 0.0052868236922298225], [3.3819641948267578, 0.0052868236922298225], [2.9132843041389509, 0.0052868236922298225], [2.876066696867833, 0.0052868236922298225], [4.0106272888112651, 0.0052868236922298225], [3.5783652270475974, 0.0052868236922298225], [2.9165716286821199, 0.0052868236922298225], [3.4108311334783568, 0.0052868236922298225], [4.3202132937804372, 0.0052868236922298225], [3.7479855400737048, 0.0052868236922298225], [2.9370765208742595, 0.0052868236922298225], [4.4630858731319449, 0.0052868236922298225], [2.9921697215186938, 0.0052868236922298225], [3.7783567974677217, 0.0052868236922298225], [4.2514291487926652, 0.0052868236922298225], [3.9658039808720535, 0.0052868236922298225], [3.723696295039459, 0.0052868236922298225], [4.2305764993690955, 0.0052868236922298225], [3.1241034993855421, 0.0052868236922298225], [4.0343877737948652, 0.0052868236922298225], [4.7864094703726359, 0.0052868236922298225], [3.4423141823770624, 0.0052868236922298225], [3.424225241703863, 0.0052868236922298225], [3.309201541170641, 0.0052868236922298225], [3.037867375630356, 0.0052868236922298225], [3.8380172470256544, 0.0052868236922298225], [3.8840548342704815, 0.0052868236922298225], [2.8781306594987903, 0.0052868236922298225], [3.4874554940106037, 0.0052868236922298225], [2.8254928573623284, 0.0052868236922298225], [3.0980811019970185, 0.0052868236922298225], [3.3153313414322114, 0.0052868236922298225], [3.7254807947737478, 0.0052868236922298225], [2.2352532389466111, 0.0052868236922298225], [3.398793991587584, 0.0052868236922298225], [4.393489711684496, 0.0052868236922298225], [4.6820658816158236, 0.0052868236922298225], [3.2930725801147198, 0.0052868236922298225], [3.3013108011535843, 0.0052868236922298225], [4.5169704979664962, 0.0052868236922298225], [4.2356395759837682, 0.0052868236922298225], [4.685867240919821, 0.0052868236922298225], [3.3666476683180364, 0.0052868236922298225], [3.1633810641520688, 0.0052868236922298225], [3.9284828602074846, 0.0052868236922298225], [3.8813794254923417, 0.0052868236922298225], [3.9767682468020018, 0.0052868236922298225], [3.6296971637437938, 0.0052868236922298225], [3.2336758867109574, 0.0052868236922298225], [3.3438434216857305, 0.0052868236922298225], [4.059745940545219, 0.0052868236922298225], [4.8003413624883429, 0.0052868236922298225], [3.8343150532526087, 0.0052868236922298225], [3.8884993452951977, 0.0052868236922298225], [4.5967216279010819, 0.0052868236922298225], [4.6317641832280811, 0.0052868236922298225], [3.5805166062443643, 0.0052868236922298225], [4.1049176867051367, 0.0052868236922298225], [3.9515389747788823, 0.0052868236922298225], [3.4250648002120125, 0.0052868236922298225], [4.4759157545508605, 0.0052868236922298225], [4.0134207861425963, 0.0052868236922298225], [3.8799241476802888, 0.0052868236922298225], [4.9781411173602796, 0.0052868236922298225], [4.5230126868924323, 0.0052868236922298225], [4.1529682867170568, 0.0052868236922298225], [4.4754108304977711, 0.0052868236922298225], [4.3132882554878655, 0.0052868236922298225], [4.0547786635287659, 0.0052868236922298225], [2.5688836012215037, 0.0052868236922298225], [3.889152819366271, 0.0052868236922298225], [3.3884811287288952, 0.0052868236922298225], [3.8286491083541225, 0.0052868236922298225], [3.4842580970352057, 0.0052868236922298225], [3.2207170727086329, 0.0052868236922298225], [3.9452244740355038, 0.0052868236922298225], [4.2400946327715978, 0.0052868236922298225], [3.8398869646230049, 0.0052868236922298225], [3.1242158541684319, 0.0052868236922298225], [3.2123888635213436, 0.0052868236922298225], [3.5900402737995578, 0.0052868236922298225], [4.2464905311370957, 0.0052868236922298225], [2.5886568078161565, 0.0052868236922298225], [4.6008521636045012, 0.0052868236922298225], [4.2038409929353815, 0.0052868236922298225], [3.3327313501720157, 0.0052868236922298225], [3.7948100469546913, 0.0052868236922298225], [4.0382728370257404, 0.0052868236922298225], [4.3126973580418575, 0.0052868236922298225], [3.976738340646583, 0.0052868236922298225], [3.4720389796281514, 0.0052868236922298225], [4.3014283833530316, 0.0052868236922298225], [3.0187012207036723, 0.0052868236922298225], [3.6486981350943344, 0.0052868236922298225], [2.8338354315095078, 0.0052868236922298225], [4.3507896147137961, 0.0052868236922298225], [3.4753809797796484, 0.0052868236922298225], [2.2399367208816638, 0.0052868236922298225]]

View File

@ -0,0 +1 @@
[[[4.4227215674645395]], [[5.2712118012993789]], [[3.6279373760418334]], [[3.38304104035302]], [[2.7761519796383083]], [[2.7263669419052903]], [[3.862757275091802]], [[3.7743654860778144]], [[2.9952706103894648]], [[3.7012102596745233]], [[4.2706362174772199]], [[3.7479335482775493]], [[2.7992585644337975]], [[4.6602663596480252]], [[2.8365997356035244]], [[4.1625232506719607]], [[4.288029411774362]], [[3.6502805624336396]], [[3.312942887719065]], [[4.5186384902849328]], [[2.9653532564494514]], [[4.3289422901142238]], [[4.7419880551200571]], [[3.6531881499003931]], [[3.6621884978514769]], [[3.4539621369025717]], [[3.0816377852518206]], [[3.4093586802263656]], [[4.1113582546549052]], [[3.1102565821185824]], [[3.6886391238733465]], [[2.6769960732095788]], [[3.3418345719183726]], [[3.3658004839965203]], [[3.5570805554883793]], [[2.1390737237132882]], [[3.5264121431452518]], [[4.5056952369329686]], [[4.6877372215758752]], [[3.5241022266554354]], [[3.4536533934696991]], [[4.7767903633790905]], [[4.0451460130466712]], [[4.5192404874918441]], [[3.3565389305543119]], [[3.1007664721556902]], [[3.837506835252591]], [[3.6718974066615448]], [[4.1994400482374701]], [[3.4464591829709863]], [[3.0305242012162878]], [[2.988742131620918]], [[4.2253988205149868]], [[4.7061635792179537]], [[3.5766936522234265]], [[3.7851875270538882]], [[4.4060743798682109]], [[4.6094932701511038]], [[3.8298278075415855]], [[4.1051259417055608]], [[3.9208808676586342]], [[3.5541468789732118]], [[4.2476793895442491]], [[4.4288656054562781]], [[4.285411557315129]], [[4.9136046105564342]], [[4.3470960822962557]], [[4.3856116783980914]], [[4.2073129171306984]], [[4.6041990539557842]], [[3.8444647328578898]], [[2.4961542431159094]], [[3.5327401988792424]], [[3.3732721581082883]], [[3.5637204210138624]], [[3.713349537021855]], [[2.8878000202718845]], [[3.6480052797146962]], [[4.3019684391870783]], [[4.0143985414914329]], [[3.0027858714530842]], [[3.0672345691071476]], [[3.6281764007528063]], [[4.315026861113993]], [[2.5281093390733806]], [[4.3926338598315251]], [[4.4814940137640589]], [[3.2358701805945751]], [[3.5738341758988197]], [[4.0125117105508474]], [[4.1332723757858041]], [[3.9190386346055655]], [[3.3570061842111683]], [[4.3000992650570122]], [[3.2744982636432503]], [[3.4530052231252344]], [[2.9362664904878524]], [[4.5160823458017774]], [[3.2157763779380728]], [[2.1699109068357223]]]

View File

@ -5,19 +5,20 @@ from crankshaft.analysis_data_provider import AnalysisDataProvider
from crankshaft.segmentation import Segmentation
from mock_plpy import MockCursor
import json
from collections import OrderedDict
class RawDataProvider(AnalysisDataProvider):
def __init__(self, test, train, predict):
self.test = test
self.train = train
def __init__(self, data, model, predict):
self.data = data
self.model = model
self.predict = predict
def get_segmentation_data(self, params):
return self.test
return self.data
def get_segmentation_model_data(self, params):
return self.train
return self.model
def get_segmentation_predict_data(self, params):
return self.predict
@ -28,10 +29,10 @@ class SegmentationTest(unittest.TestCase):
def setUp(self):
plpy._reset()
self.params = {"query": 'SELECT * FROM seg_test',
self.params = {"query": 'SELECT * FROM segmentation_data',
"variable": 'price',
"feature_columns": ['m1', 'm2', 'm3'],
"target_query": 'SELECT * FROM seg_test_target',
"feature_columns": ['m1', 'm2', 'm3', 'm4', 'm5', 'm6'],
"target_query": 'SELECT * FROM segmentation_result',
"id_col": 'cartodb_id',
"model_params": {'n_estimators': 1200,
'max_depth': 3,
@ -39,24 +40,16 @@ class SegmentationTest(unittest.TestCase):
'learning_rate': 0.01,
'min_samples_leaf': 1}
}
def generate_random_data(self, n_samples, random_state, row_type=False):
x1 = random_state.uniform(size=n_samples)
# x1 = np.random.rand(n_samples)
x2 = random_state.uniform(size=n_samples)
# x2 = np.random.rand(n_samples)
x3 = random_state.randint(0, 4, size=n_samples)
# x3 = np.random.rand(n_samples)
y = x1+x2*x2+x3
# y = 2*x1 + 1.5*x2 + 3.6*x3 + 8
cartodb_id = range(len(x1))
if row_type:
return [{'features': vals} for vals in zip(x1, x2, x3)], y
else:
return [dict(zip(['x1', 'x2', 'x3', 'target', 'cartodb_id'],
[x1, x2, x3, y, cartodb_id]))]
self.model_data = json.loads(
open(fixture_file('model_data.json')).read())
self.data = json.loads(
open(fixture_file('data.json')).read())
self.predict_data = json.loads(
open(fixture_file('predict_data.json')).read())
self.result_seg = json.loads(
open(fixture_file('segmentation_result.json')).read())
self.true_result = json.loads(
open(fixture_file('true_result.json')).read())
def test_replace_nan_with_mean(self):
from crankshaft.segmentation import replace_nan_with_mean
@ -67,96 +60,70 @@ class SegmentationTest(unittest.TestCase):
assert_array_equal(result, expectation)
def test_create_and_predict_segment(self):
from numpy.testing import assert_array_equal
n_samples = 1000
random_state_train = np.random.RandomState(13)
random_state_test = np.random.RandomState(134)
training_data = self.generate_random_data(n_samples,
random_state_train)
test_data, test_y = self.generate_random_data(n_samples,
random_state_test,
row_type=True)
ids = [{'cartodb_ids': range(len(test_data))}]
from crankshaft.segmentation import replace_nan_with_mean
batch_size = 1000
results = []
feature_columns = ['m1', 'm2']
target = [d['target'] for d in self.model_data]
feat = np.column_stack([np.array(self.model_data[0][col])
for col in feature_columns]).astype(float)
target_mean = replace_nan_with_mean(target[0])[1]
feature_means = replace_nan_with_mean(feat)[1]
'''
rowid = [{'ids': [2.9, 4.9, 4, 5, 6]}]
data_model = [OrderedDict([('target', target),
('features', feat),
('target_mean', target_mean),
('feature_means', feature_means),
('feature_columns', feature_columns)])]
'''
rows = [{'x1': 0, 'x2': 0, 'x3': 0, 'y': 0, 'cartodb_id': 0}]
data_model = self.model_data
cursor = self.predict_data
batch = []
batches = np.row_stack([np.array(row['features'])
for row in cursor]).astype(float)
batches = replace_nan_with_mean(batches, feature_means)[0]
batch.append(batches)
data_predict = [OrderedDict([('features', d['features']),
('batch', batch)])
for d in self.predict_data]
data_predict = MockCursor(data_predict)
model_parameters = {'n_estimators': 1200,
'max_depth': 3,
'subsample': 0.5,
'learning_rate': 0.01,
'min_samples_leaf': 1}
# print "train: {}".format(test_data)
# assert 1 == 2
# select array_agg(target) as "target",
# array_agg(x1) as "x1",
# etc.
feature_means = training_data[0]['x1'].mean()
target_mean = training_data[0]['target'].mean()
data_train = [{'target': training_data[0]['target'],
'x1': training_data[0]['x1'],
'x2': training_data[0]['x2'],
'x3': training_data[0]['x3']}]
data = [OrderedDict([('ids', d['ids'])])
for d in self.data]
data_test = [{'id_col': training_data[0]['cartodb_id']}]
data_predict = [{'feature_columns': test_data}]
# print data_predict
# batch = []
'''
for row in data_predict:
max = len(data_predict[0]['feature_columns'])
for c in range(max):
batch = np.append(batch, np.row_stack([np.array(row
['feature_columns']
[c])]))
# batch = np.row_stack([np.array(row['features'])
# for row in rows]).astype(float)
li = np.array(batch.tolist())
print len(li)
co = len(data_predict[0]['feature_columns'][0]['features'])
print len(data_predict[0]['feature_columns'])
cursors = [{'features': [[m1[0],m2[0],m3[0]],[m1[1],m2[1],m3[1]],
[m1[2],m2[2],m3[2]]]}]
'''
# data = Segmentation(RawDataProvider(test, train, predict))
'''
self, query, variable, feature_columns,
target_query, model_params,
id_col='cartodb_id'
'''
'''
data = [{'target': [2.9, 4.9, 4, 5, 6]},
{'feature1': [1,2,3,4]}, {'feature2' : [2,3,4,5]}
]
'''
data_predict = MockCursor(data_predict)
# Before here figure out how to set up the data provider
# After use data prodiver to run the query and test results.
seg = Segmentation(RawDataProvider(data_test, data_train,
seg = Segmentation(RawDataProvider(data, data_model,
data_predict))
# def create_and_predict_segment(self, query, variable, feature_columns
# target_query, model_params,
# id_col='cartodb_id'):
result = seg.create_and_predict_segment('select * from query',
'target',
['x1', 'x2', 'x3'],
'select * from target',
result = seg.create_and_predict_segment('select * from \
segmentation_test',
'x_value',
['m1', 'm2'],
'select * from \
segmentation_result',
model_parameters,
id_col='cartodb_id')
prediction = [r[1] for r in result]
results = [(row[1], row[2]) for row in result]
zipped_values = zip(results, self.result_seg)
pre_res = [r[0] for r in self.true_result]
acc_res = [r[1] for r in self.result_seg]
'''
for ([res_pre, res_acc], [exp_pre, exp_acc]) in zipped_values:
self.assertAlmostEqual(res_pre, exp_pre)
self.assertEqual(res_acc, exp_acc)
'''
prediction = [r[0] for r in results]
accuracy = np.sqrt(np.mean(np.square(np.array(prediction) -
np.array(test_y))))
np.array(pre_res))))
self.assertEqual(len(result), len(test_data))
self.assertTrue(result[0][2] < 0.01)
self.assertTrue(accuracy < 0.5*np.mean(test_y))
self.assertEqual(len(results), len(self.result_seg))
self.assertTrue(accuracy < 0.3*np.mean(pre_res))
self.assertTrue(results[0][1] < 0.01)