more changes
This commit is contained in:
parent
cbd95fa0a2
commit
d7bccc1063
@ -119,7 +119,7 @@ class AnalysisDataProvider(object):
|
|||||||
for a in params['feature_columns']])
|
for a in params['feature_columns']])
|
||||||
query = '''
|
query = '''
|
||||||
SELECT
|
SELECT
|
||||||
Array({joined_features}) As features
|
Array[{joined_features}] As features
|
||||||
FROM ({subquery}) as q
|
FROM ({subquery}) as q
|
||||||
'''.format(subquery=params['subquery'],
|
'''.format(subquery=params['subquery'],
|
||||||
joined_features=joined_features)
|
joined_features=joined_features)
|
||||||
|
@ -68,8 +68,7 @@ class Segmentation(object):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
params = {"subquery": target_query,
|
params = {"subquery": target_query,
|
||||||
"id_col": id_col,
|
"id_col": id_col}
|
||||||
"feature_columns": feature_columns}
|
|
||||||
|
|
||||||
target, features, target_mean, \
|
target, features, target_mean, \
|
||||||
feature_means = self.clean_data(variable, feature_columns, query)
|
feature_means = self.clean_data(variable, feature_columns, query)
|
||||||
@ -81,6 +80,10 @@ class Segmentation(object):
|
|||||||
|
|
||||||
rowid = self.data_provider.get_segmentation_data(params)
|
rowid = self.data_provider.get_segmentation_data(params)
|
||||||
|
|
||||||
|
'''
|
||||||
|
rowid = [{'ids': [2.9, 4.9, 4, 5, 6]}]
|
||||||
|
'''
|
||||||
|
|
||||||
return zip(rowid, result, accuracy_array)
|
return zip(rowid, result, accuracy_array)
|
||||||
|
|
||||||
def predict_segment(self, model, feature_columns, target_query,
|
def predict_segment(self, model, feature_columns, target_query,
|
||||||
@ -101,9 +104,12 @@ class Segmentation(object):
|
|||||||
|
|
||||||
results = []
|
results = []
|
||||||
cursors = self.data_provider.get_segmentation_predict_data(params)
|
cursors = self.data_provider.get_segmentation_predict_data(params)
|
||||||
# cursors = [{'': ,
|
|
||||||
# '': }]
|
'''
|
||||||
#
|
cursors = [{'features': [[m1[0],m2[0],m3[0]],[m1[1],m2[1],m3[1]],
|
||||||
|
[m1[2],m2[2],m3[2]]]}]
|
||||||
|
'''
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
rows = cursors.fetch(batch_size)
|
rows = cursors.fetch(batch_size)
|
||||||
if not rows:
|
if not rows:
|
||||||
@ -131,7 +137,7 @@ class Segmentation(object):
|
|||||||
data = self.data_provider.get_segmentation_model_data(params)
|
data = self.data_provider.get_segmentation_model_data(params)
|
||||||
|
|
||||||
'''
|
'''
|
||||||
data: [{'target': [2.9, 4.9, 4, 5, 6]},
|
data = [{'target': [2.9, 4.9, 4, 5, 6]},
|
||||||
{'feature1': [1,2,3,4]}, {'feature2' : [2,3,4,5]}
|
{'feature1': [1,2,3,4]}, {'feature2' : [2,3,4,5]}
|
||||||
]
|
]
|
||||||
'''
|
'''
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
import unittest
|
import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from helper import plpy, fixture_file
|
from helper import plpy, fixture_file
|
||||||
|
from crankshaft.analysis_data_provider import AnalysisDataProvider
|
||||||
from crankshaft.segmentation import Segmentation
|
from crankshaft.segmentation import Segmentation
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
||||||
class RawDataProvider(AnalysisDataProvider):
|
class RawDataProvider(AnalysisDataProvider):
|
||||||
def __init__(self, raw_data1, raw_data2, raw_data3):
|
def __init__(self, raw_data1, raw_data2, raw_data3):
|
||||||
self.raw_data1 = raw_data1
|
self.raw_data1 = raw_data1
|
||||||
@ -19,24 +21,25 @@ class RawDataProvider(AnalysisDataProvider):
|
|||||||
def get_segmentation_model_data(self, params):
|
def get_segmentation_model_data(self, params):
|
||||||
return self.raw_data3
|
return self.raw_data3
|
||||||
|
|
||||||
|
|
||||||
class SegmentationTest(unittest.TestCase):
|
class SegmentationTest(unittest.TestCase):
|
||||||
"""Testing class for Moran's I functions"""
|
"""Testing class for Moran's I functions"""
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
plpy._reset()
|
plpy._reset()
|
||||||
|
|
||||||
def generate_random_data(self,n_samples,random_state, row_type=False):
|
def generate_random_data(self, n_samples, random_state, row_type=False):
|
||||||
x1 = random_state.uniform(size=n_samples)
|
x1 = random_state.uniform(size=n_samples)
|
||||||
x2 = random_state.uniform(size=n_samples)
|
x2 = random_state.uniform(size=n_samples)
|
||||||
x3 = random_state.randint(0, 4, size=n_samples)
|
x3 = random_state.randint(0, 4, size=n_samples)
|
||||||
|
|
||||||
y = x1+x2*x2+x3
|
y = x1+x2*x2+x3
|
||||||
cartodb_id = range(len(x1))
|
cartodb_id = range(len(x1))
|
||||||
|
|
||||||
if row_type:
|
if row_type:
|
||||||
return [ {'features': vals} for vals in zip(x1,x2,x3)], y
|
return [{'features': vals} for vals in zip(x1, x2, x3)], y
|
||||||
else:
|
else:
|
||||||
return [dict( zip(['x1','x2','x3','target', 'cartodb_id'],[x1,x2,x3,y,cartodb_id]))]
|
return [dict(zip(['x1', 'x2', 'x3', 'target', 'cartodb_id'], [x1, x2, x3, y, cartodb_id]))]
|
||||||
|
|
||||||
def test_replace_nan_with_mean(self):
|
def test_replace_nan_with_mean(self):
|
||||||
test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan])
|
test_array = np.array([1.2, np.nan, 3.2, np.nan, np.nan])
|
||||||
@ -49,9 +52,8 @@ class SegmentationTest(unittest.TestCase):
|
|||||||
training_data = self.generate_random_data(n_samples, random_state_train)
|
training_data = self.generate_random_data(n_samples, random_state_train)
|
||||||
test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True)
|
test_data, test_y = self.generate_random_data(n_samples, random_state_test, row_type=True)
|
||||||
|
|
||||||
|
ids = [{'cartodb_ids': range(len(test_data))}]
|
||||||
ids = [{'cartodb_ids': range(len(test_data))}]
|
rows = [{'x1': 0, 'x2': 0, 'x3': 0, 'y': 0, 'cartodb_id': 0}]
|
||||||
rows = [{'x1': 0, 'x2': 0, 'x3': 0, 'y': 0, 'cartodb_id': 0}]
|
|
||||||
|
|
||||||
plpy._define_result('select \* from \(select \* from training\) a limit 1', rows)
|
plpy._define_result('select \* from \(select \* from training\) a limit 1', rows)
|
||||||
plpy._define_result('.*from \(select \* from training\) as a', training_data)
|
plpy._define_result('.*from \(select \* from training\) as a', training_data)
|
||||||
@ -60,7 +62,7 @@ class SegmentationTest(unittest.TestCase):
|
|||||||
|
|
||||||
model_parameters = {'n_estimators': 1200,
|
model_parameters = {'n_estimators': 1200,
|
||||||
'max_depth': 3,
|
'max_depth': 3,
|
||||||
'subsample' : 0.5,
|
'subsample': 0.5,
|
||||||
'learning_rate': 0.01,
|
'learning_rate': 0.01,
|
||||||
'min_samples_leaf': 1}
|
'min_samples_leaf': 1}
|
||||||
data = [{'target': [],
|
data = [{'target': [],
|
||||||
@ -79,12 +81,12 @@ class SegmentationTest(unittest.TestCase):
|
|||||||
'target',
|
'target',
|
||||||
'feature_columns',
|
'feature_columns',
|
||||||
'select * from test',
|
'select * from test',
|
||||||
model_parameters)
|
model_parameters)
|
||||||
|
|
||||||
prediction = [r[1] for r in result]
|
prediction = [r[1] for r in result]
|
||||||
|
|
||||||
accuracy = np.sqrt(np.mean( np.square( np.array(prediction) - np.array(test_y))))
|
accuracy = np.sqrt(np.mean(np.square(np.array(prediction) - np.array(test_y))))
|
||||||
|
|
||||||
self.assertEqual(len(result),len(test_data))
|
self.assertEqual(len(result), len(test_data))
|
||||||
self.assertTrue( result[0][2] < 0.01)
|
self.assertTrue(result[0][2] < 0.01)
|
||||||
self.assertTrue( accuracy < 0.5*np.mean(test_y) )
|
self.assertTrue(accuracy < 0.5*np.mean(test_y))
|
||||||
|
Loading…
Reference in New Issue
Block a user