Skip to content

Commit 124d803

Browse files
authored
Merge pull request #252 from acrellin/upload_precomputed_features
Allow uploading of pre-computed featuresets
2 parents bfca4eb + 5a8dc34 commit 124d803

14 files changed

+614
-257
lines changed

cesium_app/app_server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
ProjectHandler,
1616
DatasetHandler,
1717
FeatureHandler,
18+
PrecomputedFeaturesHandler,
1819
ModelHandler,
1920
PredictionHandler,
2021
FeatureListHandler,
@@ -58,6 +59,7 @@ def make_app(cfg, baselayer_handlers, baselayer_settings):
5859
(r'/dataset(/.*)?', DatasetHandler),
5960
(r'/features(/[0-9]+)?', FeatureHandler),
6061
(r'/features/([0-9]+)/(download)', FeatureHandler),
62+
(r'/precomputed_features(/.*)?', PrecomputedFeaturesHandler),
6163
(r'/models(/[0-9]+)?', ModelHandler),
6264
(r'/models/([0-9]+)/(download)', ModelHandler),
6365
(r'/predictions(/[0-9]+)?', PredictionHandler),

cesium_app/handlers/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@
1010
from .plot_features import PlotFeaturesHandler
1111
from .prediction import PredictionHandler, PredictRawDataHandler
1212
from .sklearn_models import SklearnModelsHandler
13+
from .feature import PrecomputedFeaturesHandler

cesium_app/handlers/feature.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from os.path import join as pjoin
1414
import uuid
1515
import datetime
16+
from io import StringIO
1617
import pandas as pd
1718

1819

@@ -99,6 +100,7 @@ async def post(self):
99100
fset = Featureset(name=featureset_name,
100101
file_uri=fset_path,
101102
project=dataset.project,
103+
dataset=dataset,
102104
features_list=features_to_use,
103105
custom_features_script=None)
104106
DBSession().add(fset)
@@ -138,3 +140,45 @@ def delete(self, featureset_id):
138140
def put(self, featureset_id):
139141
f = Featureset.get_if_owned_by(featureset_id, self.current_user)
140142
self.error("Functionality for this endpoint is not yet implemented.")
143+
144+
145+
class PrecomputedFeaturesHandler(BaseHandler):
146+
@auth_or_token
147+
def post(self):
148+
data = self.get_json()
149+
if data['datasetID'] not in [None, 'None']:
150+
dataset = Dataset.query.filter(Dataset.id == data['datasetID']).one()
151+
else:
152+
dataset = None
153+
current_project = Project.get_if_owned_by(data['projectID'],
154+
self.current_user)
155+
feature_data = StringIO(data['dataFile']['body'])
156+
fset = pd.read_csv(feature_data, index_col=0, header=[0, 1])
157+
if 'labels' in fset:
158+
labels = fset.pop('labels').values.ravel()
159+
if labels.dtype == 'O':
160+
labels = [str(label) for label in labels]
161+
else:
162+
labels = [None]
163+
fset_path = pjoin(
164+
self.cfg['paths:features_folder'],
165+
'{}_{}.npz'.format(uuid.uuid4(), data['dataFile']['name']))
166+
167+
featurize.save_featureset(fset, fset_path, labels=labels)
168+
169+
# Meta-features will have channel values of an empty string or a string
170+
# beginning with 'Unnamed:'
171+
features_list = [el[0] for el in fset.columns.tolist() if
172+
(el[1] != '' and not el[1].startswith('Unnamed:'))]
173+
174+
featureset = Featureset(name=data['featuresetName'],
175+
file_uri=fset_path,
176+
project=current_project,
177+
dataset=dataset,
178+
features_list=features_list,
179+
finished=datetime.datetime.now(),
180+
custom_features_script=None)
181+
DBSession().add(featureset)
182+
DBSession().commit()
183+
184+
self.success(featureset, 'cesium/FETCH_FEATURESETS')

cesium_app/models.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class Dataset(Base):
2626
project_id = sa.Column(sa.ForeignKey('projects.id', ondelete='CASCADE'),
2727
nullable=False, index=True)
2828
project = relationship('Project', back_populates='datasets')
29+
featureset = relationship('Featureset', back_populates='dataset')
2930
files = relationship('DatasetFile', backref='dataset', cascade='all')
3031

3132
def display_info(self):
@@ -66,15 +67,15 @@ class Featureset(Base):
6667
project_id = sa.Column(sa.ForeignKey('projects.id', ondelete='CASCADE'),
6768
nullable=False, index=True)
6869
project = relationship('Project', back_populates='featuresets')
70+
dataset_id = sa.Column(sa.ForeignKey('datasets.id'))
71+
dataset = relationship('Dataset')
6972
name = sa.Column(sa.String(), nullable=False)
7073
features_list = sa.Column(sa.ARRAY(sa.VARCHAR()), nullable=False, index=True)
7174
custom_features_script = sa.Column(sa.String())
7275
file_uri = sa.Column(sa.String(), nullable=True, index=True)
7376
task_id = sa.Column(sa.String())
7477
finished = sa.Column(sa.DateTime)
7578

76-
project = relationship('Project')
77-
7879

7980
class Model(Base):
8081
project_id = sa.Column(sa.ForeignKey('projects.id', ondelete='CASCADE'),
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
feature,amplitude,flux_percentile_ratio_mid20,flux_percentile_ratio_mid35,flux_percentile_ratio_mid50,flux_percentile_ratio_mid65,flux_percentile_ratio_mid80,max_slope,maximum,median,median_absolute_deviation,minimum,percent_amplitude,percent_beyond_1_std,percent_close_to_median,percent_difference_flux_percentile,period_fast,qso_log_chi2_qsonu,qso_log_chi2nuNULL_chi2nu,skew,std,stetson_j,stetson_k,weighted_average,meta1,meta2,meta3,labels
2+
channel,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,
3+
217801,3.4145,0.026237634586002364,0.058162853124671635,0.10309627831781239,0.17599090611586052,0.41575052067669566,24.652777777737825,14.626,12.42,0.9030000000000005,7.797,69.66429004252072,0.27001862197392923,0.3649906890130354,23.06668928889701,375.6510827444723,4.230919840093198,2.671853833598369,-0.9236797091091277,1.4663914712199768,11.369853415776587,0.9189839436611995,12.040512767456608,0.18073430690900003,0.548427238218,0.18795623725299998,Mira
4+
224635,0.5305000000000004,0.20530614882878542,0.37706209346535463,0.5453768817756192,0.7330743740788521,0.9166330384370994,17.90927021704068,8.768,8.2,0.18400000000000105,7.707,0.5747078772898464,0.4177396280400572,0.2832618025751073,0.7218041519921866,13.451012652036399,3.2134838274248385,-0.0525010150070849,-0.3398583440079825,0.23517859967140312,1.4786720429259577,1.0411229015075227,8.14368000185732,0.330610932539,0.77316026008,0.0952391836803,Classical_Cepheid
5+
232798,1.9050000000000002,0.18733902825629659,0.3230574524254608,0.45220729741608706,0.6054170290823713,0.8292614702651346,49.87593052038574,9.179,7.048,0.7784999999999997,5.369,3.6945972728154928,0.3796875,0.2390625,3.3066371717225116,149.38740734028093,4.138293582989955,1.8021678710880233,0.15731166528082127,0.9603469186752173,8.140710283210836,1.0595606571990508,7.093264577381519,0.8972212196189999,0.6016976582729999,0.587206038094,Mira
6+
235913,0.2919999999999998,0.2143858659023139,0.372513966154211,0.5364876814022899,0.7205289635708764,0.8492495259235322,21.57676348499049,10.522,10.243,0.10899999999999999,9.938,0.3243415351946631,0.39611650485436894,0.26990291262135924,0.388091294618729,63.40715775907203,1.533531229032464,-0.04280701383600338,-0.2506256293768128,0.13248029604308875,0.34575932485666866,1.0682632618314676,10.232210386899137,0.325215030244,0.8859140743739999,0.154849728193,Classical_Cepheid
7+
243412,0.6435000000000004,0.13967315443062386,0.3247627822515256,0.5190707271794328,0.6752585427537589,0.879866196783224,23.75478926785315,13.101,12.205,0.1120000000000001,11.814,0.5618730179775074,0.24202626641651032,0.5722326454033771,0.5782757551700131,37.33075381991498,3.2763034282434744,-0.049484189752863755,1.000270086380876,0.25145986677693544,1.1851795344480516,0.881970420711926,12.307386543586608,0.727226591713,2.6890731178099996,0.9426339697600001,W_Ursae_Maj
8+
247327,2.1945,0.2857724132471718,0.48556349263806586,0.6426319146340265,0.7897408686080372,0.9233730119805591,76.71641791148072,12.278,9.3305,1.089500000000001,7.889,2.7722455418473766,0.4024896265560166,0.17427385892116182,3.2994822291143238,348.582432044199,3.453919776991032,3.33954175839345,0.43017754594581625,1.2797726669476512,11.616459809345399,0.9751156767991696,9.49116371282888,0.8813121611779999,2.48443065817,0.7625254024889999,Mira
9+
257141,0.46950000000000003,0.13911916977845062,0.2554956670050432,0.39335583988843975,0.5357113476422322,0.7345991397031828,0.31574688939982387,13.869,13.295,0.08799999999999919,12.93,0.41061374975941844,0.2926315789473684,0.5305263157894737,0.414012881455879,27.448091498509122,1.9335536940956592,0.11275583542993171,0.5536755308508332,0.13923623640807684,0.18631078004976703,0.9589344760255665,13.303436442164505,0.8813121611779999,2.48443065817,0.7625254024889999,W_Ursae_Maj

cesium_app/tests/frontend/test_features.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,31 @@ def test_delete_featureset(driver, project, dataset, featureset):
177177
driver.find_element_by_partial_link_text('Delete').click()
178178
driver.wait_for_xpath("//div[contains(text(),'Feature set deleted')]")
179179
try:
180-
el = driver.wait_for_xpath("//td[contains(text(),'{test_featureset_name}')]")
180+
el = driver.wait_for_xpath(f"//td[contains(text(),'{test_featureset_name}')]")
181181
except TimeoutException:
182182
pass
183183
else:
184184
raise Exception("Featureset still present in table after delete.")
185+
186+
187+
def test_upload_precomputed_features(driver, project, dataset):
188+
driver.get('/')
189+
driver.refresh()
190+
proj_select = Select(driver.find_element_by_css_selector('[name=project]'))
191+
proj_select.select_by_value(str(project.id))
192+
193+
driver.find_element_by_id('react-tabs-4').click()
194+
driver.find_element_by_partial_link_text('Upload Pre-Computed Features')\
195+
.click()
196+
ds_select = Select(driver.find_element_by_css_selector('[name=datasetID]'))
197+
ds_select.select_by_value(str(dataset.id))
198+
199+
fs_name = driver.find_element_by_css_selector('[name=featuresetName]')
200+
fs_name.send_keys(test_featureset_name)
201+
202+
file_field = driver.find_element_by_css_selector('[name=dataFile]')
203+
file_field.send_keys(pjoin(os.path.dirname(os.path.dirname(__file__)),
204+
'data', 'downloaded_cesium_featureset.csv'))
205+
driver.find_element_by_class_name('btn-primary').click()
206+
207+
driver.wait_for_xpath(f"//td[contains(text(),'{test_featureset_name}')]")

cesium_app/tests/frontend/test_pipeline_sequentially.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import pytest
22
from selenium import webdriver
33
from selenium.webdriver.support.ui import Select
4+
from selenium.common.exceptions import NoSuchElementException, TimeoutException
45
import uuid
56
import os
67
from os.path import join as pjoin
@@ -10,6 +11,11 @@
1011
def test_pipeline_sequentially(driver):
1112
driver.get("/")
1213

14+
# Delete existing project if present
15+
try:
16+
driver.wait_for_xpath('//*[contains(text(), "Delete Project")]').click()
17+
except (NoSuchElementException, TimeoutException):
18+
pass
1319
# Add new project
1420
driver.wait_for_xpath('//*[contains(text(), "Or click here to add a new one")]').click()
1521

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import pytest
2+
from selenium import webdriver
3+
from selenium.webdriver.support.ui import Select
4+
import uuid
5+
import os
6+
from os.path import join as pjoin
7+
import time
8+
9+
10+
def test_pipeline_sequentially_precomputed_features(driver):
11+
driver.get("/")
12+
13+
# Add new project
14+
driver.wait_for_xpath(
15+
'//*[contains(text(), "Or click here to add a new one")]').click()
16+
17+
project_name = driver.find_element_by_css_selector('[name=projectName]')
18+
test_proj_name = str(uuid.uuid4())
19+
project_name.send_keys(test_proj_name)
20+
project_desc = driver.find_element_by_css_selector(
21+
'[name=projectDescription]')
22+
project_desc.send_keys("Test Description")
23+
24+
driver.find_element_by_class_name('btn-primary').click()
25+
26+
status_td = driver.wait_for_xpath(
27+
"//div[contains(text(),'Added new project')]")
28+
driver.refresh()
29+
30+
# Ensure new project is selected
31+
proj_select = Select(driver.find_element_by_css_selector('[name=project]'))
32+
proj_select.select_by_visible_text(test_proj_name)
33+
34+
# Add new dataset
35+
test_dataset_name = str(uuid.uuid4())
36+
driver.find_element_by_id('react-tabs-2').click()
37+
driver.find_element_by_partial_link_text('Upload new dataset').click()
38+
39+
dataset_name = driver.find_element_by_css_selector('[name=datasetName]')
40+
dataset_name.send_keys(test_dataset_name)
41+
42+
header_file = driver.find_element_by_css_selector('[name=headerFile]')
43+
header_file.send_keys(pjoin(
44+
os.path.dirname(os.path.dirname(__file__)), 'data',
45+
'larger_asas_training_subset_classes_with_metadata.dat'))
46+
47+
tar_file = driver.find_element_by_css_selector('[name=tarFile]')
48+
tar_file.send_keys(pjoin(os.path.dirname(os.path.dirname(__file__)), 'data',
49+
'larger_asas_training_subset.tar.gz'))
50+
51+
driver.find_element_by_class_name('btn-primary').click()
52+
53+
status_td = driver.wait_for_xpath(
54+
"//div[contains(text(),'Successfully uploaded new dataset')]")
55+
driver.refresh()
56+
57+
# Ensure new project is selected
58+
proj_select = Select(driver.find_element_by_css_selector('[name=project]'))
59+
proj_select.select_by_visible_text(test_proj_name)
60+
61+
# Generate new feature set
62+
test_featureset_name = str(uuid.uuid4())
63+
driver.find_element_by_id('react-tabs-4').click()
64+
driver.find_element_by_partial_link_text('Upload Pre-Computed Features')\
65+
.click()
66+
67+
featureset_name = driver.find_element_by_css_selector(
68+
'[name=featuresetName]')
69+
featureset_name.send_keys(test_featureset_name)
70+
71+
# Ensure dataset from previous step is selected
72+
dataset_select = Select(driver.find_element_by_css_selector(
73+
'[name=datasetID]'))
74+
dataset_select.select_by_visible_text(test_dataset_name)
75+
76+
file_field = driver.find_element_by_css_selector('[name=dataFile]')
77+
file_field.send_keys(pjoin(os.path.dirname(os.path.dirname(__file__)),
78+
'data', 'downloaded_cesium_featureset.csv'))
79+
80+
driver.find_element_by_class_name('btn-primary').click()
81+
status_td = driver.wait_for_xpath(
82+
"//div[contains(text(),'Successfully uploaded new feature set')]")
83+
status_td = driver.wait_for_xpath("//td[contains(text(),'Completed')]", 30)
84+
85+
# Build new model
86+
driver.find_element_by_id('react-tabs-6').click()
87+
driver.find_element_by_partial_link_text('Create New Model').click()
88+
89+
model_select = Select(driver.find_element_by_css_selector(
90+
'[name=modelType]'))
91+
model_select.select_by_visible_text('RandomForestClassifier (fast)')
92+
93+
model_name = driver.find_element_by_css_selector('[name=modelName]')
94+
test_model_name = str(uuid.uuid4())
95+
model_name.send_keys(test_model_name)
96+
97+
# Ensure featureset from previous step is selected
98+
fset_select = Select(driver.find_element_by_css_selector(
99+
'[name=featureset]'))
100+
fset_select.select_by_visible_text(test_featureset_name)
101+
102+
driver.find_element_by_class_name('btn-primary').click()
103+
104+
driver.wait_for_xpath("//div[contains(text(),'Model training begun')]")
105+
106+
driver.wait_for_xpath("//td[contains(text(),'Completed')]", 60)
107+
108+
# Predict using dataset and model from this test
109+
driver.find_element_by_id('react-tabs-8').click()
110+
driver.find_element_by_partial_link_text('Predict Targets').click()
111+
112+
# Ensure model from previous step is selected
113+
model_select = Select(driver.find_element_by_css_selector('[name=modelID]'))
114+
model_select.select_by_visible_text(test_model_name)
115+
116+
# Ensure dataset from previous step is selected
117+
dataset_select = Select(driver.find_element_by_css_selector(
118+
'[name=datasetID]'))
119+
dataset_select.select_by_visible_text(test_dataset_name)
120+
121+
driver.find_element_by_class_name('btn-primary').click()
122+
123+
driver.wait_for_xpath("//div[contains(text(),'Model predictions begun')]")
124+
125+
driver.wait_for_xpath("//td[contains(text(),'Completed')]", 20)

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cesium>=0.9.4
1+
cesium>=0.9.5
22
joblib>=0.11
33
bokeh==0.12.5
44
pytest-randomly

static/js/actions.js

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,53 @@ export function computeFeatures(form) {
438438
}
439439

440440

441+
export function uploadFeatureset(form, currentProject) {
442+
function fileReaderPromise(formFields, fileName, binary = false) {
443+
return new Promise(resolve => {
444+
const filereader = new FileReader();
445+
if (binary) {
446+
filereader.readAsDataURL(formFields[fileName][0]);
447+
} else {
448+
filereader.readAsText(formFields[fileName][0]);
449+
}
450+
filereader.onloadend = () => resolve({ body: filereader.result,
451+
name: formFields[fileName][0].name });
452+
});
453+
}
454+
form['projectID'] = currentProject.id;
455+
456+
return dispatch =>
457+
promiseAction(
458+
dispatch,
459+
UPLOAD_DATASET,
460+
461+
fileReaderPromise(form, 'dataFile')
462+
.then(data => {
463+
form['dataFile'] = data;
464+
return fetch('/precomputed_features', {
465+
credentials: 'same-origin',
466+
method: 'POST',
467+
body: JSON.stringify(form),
468+
headers: new Headers({
469+
'Content-Type': 'application/json'
470+
})
471+
});
472+
})
473+
.then(response => response.json())
474+
.then((json) => {
475+
if (json.status == 'success') {
476+
dispatch(showNotification('Successfully uploaded new feature set'));
477+
dispatch(hideExpander('uploadFeatsFormExpander'));
478+
dispatch(resetForm('uploadFeatures'));
479+
} else {
480+
return Promise.reject({ _error: json.message });
481+
}
482+
return json;
483+
})
484+
);
485+
}
486+
487+
441488
export function deleteDataset(id) {
442489
return dispatch =>
443490
promiseAction(

0 commit comments

Comments
 (0)