jeff1evesque
diff --git a/‎brain/session/data/dataset_to_dict.py‎
Lines changed: 1 addition & 1 deletion b/‎brain/session/data/dataset_to_dict.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎brain/session/data/validate_file_extension.py‎
Lines changed: 89 additions & 4 deletions b/‎brain/session/data/validate_file_extension.py‎
Lines changed: 89 additions & 4 deletions
diff --git a/‎brain/validator/validate_file_extension.py‎
Lines changed: 63 additions & 6 deletions b/‎brain/validator/validate_file_extension.py‎
Lines changed: 63 additions & 6 deletions
diff --git a/‎interface/static/data/json/programmatic_interface/svm/dataset_url/svm-data-append.json‎
Lines changed: 12 additions & 0 deletions b/‎interface/static/data/json/programmatic_interface/svm/dataset_url/svm-data-append.json‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎interface/static/data/json/programmatic_interface/svm/dataset_url/svm-data-new.json‎
Lines changed: 12 additions & 0 deletions b/‎interface/static/data/json/programmatic_interface/svm/dataset_url/svm-data-new.json‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎interface/static/data/json/programmatic_interface/svm/svm-model-generate.json‎ renamed to ‎interface/static/data/json/programmatic_interface/svm/dataset_url/svm-model-generate.json‎ b/‎interface/static/data/json/programmatic_interface/svm/svm-model-generate.json‎ renamed to ‎interface/static/data/json/programmatic_interface/svm/dataset_url/svm-model-generate.json‎
diff --git a/‎interface/static/data/json/programmatic_interface/svm/svm-model-predict.json‎ renamed to ‎interface/static/data/json/programmatic_interface/svm/dataset_url/svm-model-predict.json‎ b/‎interface/static/data/json/programmatic_interface/svm/svm-model-predict.json‎ renamed to ‎interface/static/data/json/programmatic_interface/svm/dataset_url/svm-model-predict.json‎
diff --git a/‎interface/static/data/json/programmatic_interface/svm/svm-data-append.json‎ renamed to ‎interface/static/data/json/programmatic_interface/svm/file_upload/svm-data-append.json‎
Lines changed: 2 additions & 2 deletions b/‎interface/static/data/json/programmatic_interface/svm/svm-data-append.json‎ renamed to ‎interface/static/data/json/programmatic_interface/svm/file_upload/svm-data-append.json‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎interface/static/data/json/programmatic_interface/svm/svm-data-new.json‎ renamed to ‎interface/static/data/json/programmatic_interface/svm/file_upload/svm-data-new.json‎
Lines changed: 1 addition & 1 deletion b/‎interface/static/data/json/programmatic_interface/svm/svm-data-new.json‎ renamed to ‎interface/static/data/json/programmatic_interface/svm/file_upload/svm-data-new.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎interface/static/data/json/programmatic_interface/svm/file_upload/svm-model-generate.json‎
Lines changed: 8 additions & 0 deletions b/‎interface/static/data/json/programmatic_interface/svm/file_upload/svm-model-generate.json‎
Lines changed: 8 additions & 0 deletions
@@ -26,7 +26,7 @@ def dataset_dictionary(id_entity, model_type, upload):
     dataset = []
     observation_labels = []
     list_error = []
-    json_upload = upload['dataset']['json_string']
+    json_upload = upload['dataset'].get('json_string', None)
     list_model_type = current_app.config.get('MODEL_TYPE')
 
     if json_upload:
 
@@ -16,8 +16,15 @@ def reduce_dataset(dataset, session_type):
     # variables
     list_error = []
 
-    # web-interface: validate, and restructure dataset
-    if dataset['data']['dataset']['file_upload']:
+    # web-interface: validate, and restructure 'file-upload' dataset
+    if (
+            dataset['data'].get('dataset', None) and
+            dataset['data']['dataset'].get('file_upload', None) and
+            dataset['data']['settings'].get(
+                'dataset_type', None) == 'file_upload'
+    ):
+
+        # validate and restructure
         validator = Validate_File_Extension(
             dataset,
             session_type
@@ -29,13 +36,91 @@ def reduce_dataset(dataset, session_type):
                 adjusted_dataset['error']
             )
 
-    # programmatic-interface: validate, do not restructure
-    elif dataset['data']['dataset']['json_string']:
+    # web-interface: validate, and restructure url dataset
+    elif (
+            dataset['data']['settings'].get('dataset[]', None) and
+            dataset['data']['settings'].get(
+                'dataset_type', None) == 'dataset_url'
+    ):
+
+        # define 'file_upload' since doesn't exist
+        data = dataset['data']
+        data['dataset'] = {}
+        if type(data['settings']['dataset[]']) is list:
+            data['dataset']['file_upload'] = data['settings']['dataset[]']
+        else:
+            data['dataset']['file_upload'] = []
+            data['dataset']['file_upload'].append(
+                data['settings']['dataset[]']
+            )
+
+        # validate and restructure
+        validator = Validate_File_Extension(
+            {
+                'data': {
+                    'dataset': {
+                        'file_upload': data['dataset']['file_upload'],
+                        'type': data['settings']['dataset_type'],
+                    }
+                },
+            },
+            session_type
+        )
+        adjusted_dataset = validator.validate()
+
+        if adjusted_dataset['error']:
+            list_error.append(
+                adjusted_dataset['error']
+            )
+
+    # programmatic-interface: validate, do not restructure file upload
+    elif (
+            dataset['data']['dataset'].get('json_string', None) and
+            dataset['data']['settings'].get(
+                'dataset_type', None) == 'file_upload'
+    ):
+
         adjusted_dataset = dataset['data']
 
         if dataset['error']:
             list_error.append(adjusted_dataset['error'])
 
+    # programmatic-interface: validate, and restructure url dataset
+    elif (
+            dataset['data']['dataset'].get('json_string', None) and
+            dataset['data']['settings'].get(
+                'dataset_type', None) == 'dataset_url'
+    ):
+
+        # define 'file_upload' since doesn't exist
+        data = dataset['data']
+        if type(data['dataset']['json_string']) is list:
+            data['dataset']['file_upload'] = data['dataset']['json_string']
+        else:
+            data['dataset']['file_upload'] = []
+            data['dataset']['file_upload'].append(
+                data['dataset']['json_string']
+            )
+
+        # validate and restructure
+        validator = Validate_File_Extension(
+            {
+                'data': {
+                    'dataset': {
+                        'file_upload': data['dataset']['file_upload'],
+                        'type': data['settings']['dataset_type'],
+                    }
+                },
+            },
+            session_type
+        )
+        adjusted_dataset = validator.validate()
+
+        if adjusted_dataset['error']:
+            list_error.append(
+                adjusted_dataset['error']
+            )
+
     # return
     if list_error:
         return {'dataset': None, 'error': list_error}
 
@@ -9,6 +9,8 @@
 '''
 
 import os.path
+import urllib
+import cStringIO
 from brain.converter.calculate_md5 import calculate_md5
 
 
@@ -45,22 +47,30 @@ def validate(self):
 
         # local variables
         list_error = []
-
-        dataset = self.premodel_data['data']['dataset']
         acceptable_type = ['csv', 'xml', 'json']
 
-        unique_hash = set()
+        unique_data = set()
         dataset_keep = []
 
-        if (dataset.get('file_upload', None)):
+        # validate and restructure: file upload
+        if (
+                self.premodel_data['data'].get('settings', None) and
+                self.premodel_data['data']['settings'].get(
+                    'dataset_type', None) == 'file_upload' and
+                self.premodel_data.get('data', None) and
+                self.premodel_data['data'].get('dataset', None) and
+                self.premodel_data['data']['dataset'].get('file_upload', None)
+        ):
+
+            dataset = self.premodel_data['data']['dataset']
 
             for index, filedata in enumerate(dataset['file_upload']):
                 try:
                     split_path = os.path.splitext(filedata['filename'])
                     filehash = calculate_md5(filedata['file'])
                     # add 'hashed' value of file reference(s) to a list
-                    if filehash not in unique_hash:
-                        unique_hash.add(filehash)
+                    if filehash not in unique_data:
+                        unique_data.add(filehash)
                         file_extension = split_path[1][1:].strip().lower()
 
                         # validate file_extension
@@ -86,6 +96,53 @@ def validate(self):
             # replace portion of dataset with unique 'file reference(s)'
             dataset['file_upload'][:] = dataset_keep
 
+        # validate and restructure: url reference
+        elif (
+                self.premodel_data.get('data', None) and
+                self.premodel_data['data'].get('dataset', None) and
+                self.premodel_data['data']['dataset'].get(
+                    'type', None) and
+                self.premodel_data['data']['dataset']['type'] == 'dataset_url'
+        ):
+
+            dataset = self.premodel_data['data']['dataset']
+            urls = self.premodel_data['data']['dataset']['file_upload']
+
+            for index, url in enumerate(urls):
+                split_path = os.path.splitext(url)
+                file_extension = split_path[1][1:].strip().lower()
+
+                try:
+                    if url not in unique_data:
+                        unique_data.add(url)
+
+                        # validate file_extension
+                        if (file_extension not in acceptable_type):
+                            msg = '''Problem: url reference, \''''
+                            msg += file_extension
+                            msg += '''\', must be one of the formats:'''
+                            msg += '\n ' + ', '.join(acceptable_type)
+                            list_error.append(msg)
+
+                        # keep non-duplicated url references
+                        else:
+                            filename = os.path.split(url)[1]
+                            dataset_keep.append({
+                                'type': file_extension,
+                                'file': cStringIO.StringIO(
+                                            urllib.urlopen(url).read()
+                                        ),
+                                'filename': filename
+                            })
+
+                except:
+                    msg = 'Problem with url reference ' + url
+                    msg += '. Please re-upload the information.'
+                    list_error.append(msg)
+
+            # define unique 'file reference(s)'
+            dataset['file_upload'][:] = dataset_keep
+
         else:
             msg = 'No file(s) were uploaded'
             list_error.append(msg)
 
@@ -0,0 +1,12 @@
+{
+    "properties": {
+        "dataset_type": "dataset_url",
+        "session_type": "data_append",
+        "model_type": "svm",
+        "session_id": "1"
+    },
+    "dataset": [
+        "https://raw.githubusercontent.com/jeff1evesque/machine-learning/master/interface/static/data/json/web_interface/svm.json",
+        "https://raw.githubusercontent.com/jeff1evesque/machine-learning/master/interface/static/data/json/web_interface/svm-1.json"
+    ]
+}
@@ -0,0 +1,12 @@
+{
+    "properties": {
+        "session_name": "sample_svm_title",
+        "dataset_type": "dataset_url",
+        "session_type": "data_new",
+        "model_type": "svm"
+    },
+    "dataset": [
+        "https://raw.githubusercontent.com/jeff1evesque/machine-learning/master/interface/static/data/json/web_interface/svm.json",
+        "https://raw.githubusercontent.com/jeff1evesque/machine-learning/master/interface/static/data/json/web_interface/svm-1.json"
+    ]
+}
@@ -1,9 +1,9 @@
 {
     "properties": {
-        "dataset_type": "json_string",
+        "dataset_type": "file_upload",
         "session_type": "data_append",
         "model_type": "svm",
-        "session_id": "1"
+        "session_id": "3"
     },
     "dataset": {
         "dep-variable-1": [
 
@@ -1,7 +1,7 @@
 {
     "properties": {
         "session_name": "sample_svm_title",
-        "dataset_type": "json_string",
+        "dataset_type": "file_upload",
         "session_type": "data_new",
         "model_type": "svm"
     },
 
@@ -0,0 +1,8 @@
+{
+    "properties": {
+        "session_type": "model_generate",
+        "session_id": "3",
+        "model_type": "svm",
+        "sv_kernel_type": "rbf"
+    }
+}
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"properties": {`
`3`	`3`	`"session_name": "sample_svm_title",`
`4`		`- "dataset_type": "json_string",`
	`4`	`+ "dataset_type": "file_upload",`
`5`	`5`	`"session_type": "data_new",`
`6`	`6`	`"model_type": "svm"`
`7`	`7`	`},`