From 5a46f8da716880d29e7bcb0f0e3f07a5ea1150fa Mon Sep 17 00:00:00 2001 From: mehak-sachdeva Date: Wed, 22 Feb 2017 15:16:18 -0500 Subject: [PATCH] modifying failing test --- .../crankshaft/segmentation/segmentation.py | 32 +++++++++++++------ src/py/crankshaft/test/test_segmentation.py | 3 -- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/py/crankshaft/crankshaft/segmentation/segmentation.py b/src/py/crankshaft/crankshaft/segmentation/segmentation.py index 2f868c4..319ba21 100644 --- a/src/py/crankshaft/crankshaft/segmentation/segmentation.py +++ b/src/py/crankshaft/crankshaft/segmentation/segmentation.py @@ -7,6 +7,7 @@ from sklearn.ensemble import GradientBoostingRegressor from sklearn import metrics from sklearn.cross_validation import train_test_split from crankshaft.analysis_data_provider import AnalysisDataProvider +from mock_plpy import MockCursor # NOTE: added optional param here @@ -78,12 +79,10 @@ class Segmentation(object): accuracy_array = [accuracy] * result.shape[0] rowid = self.data_provider.get_segmentation_data(params) - ''' rowid = [{'ids': [2.9, 4.9, 4, 5, 6]}] ''' - - return zip(rowid[0]['ids'], result, accuracy_array) + return zip(rowid[0]['id_col'], result, accuracy_array) def predict_segment(self, model, feature_columns, target_query, feature_means): @@ -102,20 +101,33 @@ class Segmentation(object): "feature_columns": feature_columns} results = [] - cursors = self.data_provider.get_segmentation_predict_data(params) + cursor = self.data_provider.get_segmentation_predict_data(params) + cursor = MockCursor(cursor) ''' - cursors = [{'features': [[m1[0],m2[0],m3[0]],[m1[1],m2[1],m3[1]], - [m1[2],m2[2],m3[2]]]}] + cursor = [{'feature_columns': [{'features': (0.81140362630858487, + 0.65758478086896821, + 0)}]}] + ''' while True: - rows = cursors.fetch(batch_size) + batch = [] + rows = cursor.fetch(batch_size) if not rows: break - batch = np.row_stack([np.array(row['features']) - for row in rows]).astype(float) - + for row in rows: + max = len(rows[0]['feature_columns']) + for c in range(max): + batch = np.append(batch, np.row_stack([np.array(row + ['feature_columns'] + [c] + ['features'])]) + .astype(float)) + # batch = np.row_stack([np.array(row['features']) + # for row in rows]).astype(float) + co = len(rows[0]['feature_columns'][0]['features']) + batch = batch.reshape((batch_size, co)) batch = replace_nan_with_mean(batch, feature_means)[0] prediction = model.predict(batch) results.append(prediction) diff --git a/src/py/crankshaft/test/test_segmentation.py b/src/py/crankshaft/test/test_segmentation.py index 11f5ea1..44c8e21 100644 --- a/src/py/crankshaft/test/test_segmentation.py +++ b/src/py/crankshaft/test/test_segmentation.py @@ -63,8 +63,6 @@ class SegmentationTest(unittest.TestCase): test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan]) result = replace_nan_with_mean(test_array, means=None)[0] expectation = np.array([1.2, 2.2, 3.2, 2.2, 2.2], dtype=float) - print result - print type(result) assert_array_equal(result, expectation) def test_create_and_predict_segment(self): @@ -122,7 +120,6 @@ class SegmentationTest(unittest.TestCase): {'feature1': [1,2,3,4]}, {'feature2' : [2,3,4,5]} ] ''' - print data_train # Before here figure out how to set up the data provider # After use data prodiver to run the query and test results. seg = Segmentation(RawDataProvider(data_test, data_train,