From b17ecf5b01d060093be3de125256e1622bfc03c4 Mon Sep 17 00:00:00 2001 From: Victor Mireles-Chavez Date: Sat, 7 Nov 2020 16:34:39 +0100 Subject: [PATCH 1/7] disabled CORS --- src/pyDataverse/api.py | 69 +++++++++++++++++++++++++++++------- tools/tests-requirements.txt | 1 + 2 files changed, 58 insertions(+), 12 deletions(-) diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index 2bebc05..be97763 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -532,6 +532,18 @@ def delete_dataverse(self, identifier, auth=True): print('Dataverse {} deleted.'.format(identifier)) return resp + def get_dataset_metadata(self, identifier, auth=True, is_pid=True, + version="latestVersion"): + r = self.get_dataset(identifier=identifier, + auth=auth, + is_pid=is_pid) + rjson = r.json() + metadata_json = {} + metadata_json["datasetVersion"] = dict() + metadata_json["datasetVersion"]["metadataBlocks"] = rjson["data"][version]["metadataBlocks"] + + return metadata_json + def get_dataset(self, identifier, auth=True, is_pid=True): """Get metadata of a Dataset. @@ -724,21 +736,21 @@ def publish_dataset(self, pid, type='minor', auth=True): """ query_str = '/datasets/:persistentId/actions/:publish' - query_str += '?persistentId={0}&type={1}'.format(identifier, type) + query_str += '?persistentId={0}&type={1}'.format(pid, type) resp = self.post_request(query_str, auth=auth) if resp.status_code == 404: error_msg = resp.json()['message'] raise DatasetNotFoundError( 'ERROR: HTTP 404 - Dataset {0} was not found. MSG: {1}' - ''.format(identifier, error_msg)) + ''.format(pid, error_msg)) elif resp.status_code == 401: error_msg = resp.json()['message'] raise ApiAuthorizationError( 'ERROR: HTTP 401 - User not allowed to publish dataset {0}. ' - 'MSG: {1}'.format(identifier, error_msg)) + 'MSG: {1}'.format(pid, error_msg)) elif resp.status_code == 200: - print('Dataset {} published'.format(identifier)) + print('Dataset {} published'.format(pid)) return resp def delete_dataset(self, identifier, is_pid=True, auth=True): @@ -912,6 +924,20 @@ def get_datafiles(self, pid, version='1'): resp = self.get_request(query_str) return resp + def get_datafile_metadata(self, identifier, is_pid=True, auth=True): + if is_pid: + query_str = '/files/:persistentId/metadata/draft?persistentId={0}'.format(identifier) + else: + query_str = '/files/{0}/metadata/draft'.format(identifier) + + resp = self.get_request(query_str, auth=auth) + try: + j = resp.json() + return j + except: + print("Failed getting file metadata: " + identifier) + return resp + def get_datafile(self, identifier, is_pid=True): """Download a datafile via the Dataverse Data Access API. @@ -989,7 +1015,11 @@ def get_datafile_bundle(self, identifier): data = self.get_request(query_str) return data - def upload_file(self, identifier, filename, is_pid=True): + def upload_file(self, identifier, filename, + is_pid=True, description="", + tags=None, + fileObject=None, + restricted=True): """Add file to a dataset. Add a file to an existing Dataset. Description and tags are optional: @@ -1026,13 +1056,28 @@ def upload_file(self, identifier, filename, is_pid=True): identifier) else: query_str += '/datasets/{0}/add'.format(identifier) - shell_command = 'curl -H "X-Dataverse-key: {0}"'.format( - self.api_token) - shell_command += ' -X POST {0} -F file=@{1}'.format( - query_str, filename) - # TODO(Shell): is shell=True necessary? - result = sp.run(shell_command, shell=True, stdout=sp.PIPE) - resp = json.loads(result.stdout) + + headers = {'X-Dataverse-key': self.api_token} + + pl = {"description": description, + "restricted": str(restricted).lower()} + if tags is not None: + pl["categories"] = tags + payload = {"jsonData": json.dumps(pl)} + + if fileObject is None: + fin = open(filename, "b") + else: + fin = fileObject + fin.seek(0) + files = {'file': (filename, fin.read())} + + r = post(url=query_str, + headers=headers, + data=payload, + files=files) + + resp = r.json() return resp def get_info_version(self): diff --git a/tools/tests-requirements.txt b/tools/tests-requirements.txt index 2f80e42..1010b9f 100644 --- a/tools/tests-requirements.txt +++ b/tools/tests-requirements.txt @@ -14,3 +14,4 @@ six==1.12.0 urllib3==1.25.3 wcwidth==0.1.7 zipp==0.5.1 +python-dotenv From 6ee63ec858086742b0ce0963d37e6ee2421ce658 Mon Sep 17 00:00:00 2001 From: Victor Mireles-Chavez Date: Sun, 3 Jan 2021 15:06:41 +0100 Subject: [PATCH 2/7] allow for file objects to be uploaded --- src/pyDataverse/api.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index de4a828..6c4c7f0 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -1662,7 +1662,7 @@ def get_datafile_metadata( # CHECK: Its not really clear, if the version query can also be done via ID. return self.get_request(url, auth=auth) - def upload_datafile(self, identifier, filename, json_str=None, is_pid=True): + def upload_datafile(self, identifier, file_or_name, json_str=None, is_pid=True): """Add file to a dataset. Add a file to an existing Dataset. Description and tags are optional: @@ -1683,8 +1683,8 @@ def upload_datafile(self, identifier, filename, json_str=None, is_pid=True): ---------- identifier : str Identifier of the dataset. - filename : str - Full filename with path. + file_or_name : str + File object open in binary read mode, or file Full filename with path. json_str : str Metadata as JSON string. is_pid : bool @@ -1702,8 +1702,10 @@ def upload_datafile(self, identifier, filename, json_str=None, is_pid=True): url += "/datasets/:persistentId/add?persistentId={0}".format(identifier) else: url += "/datasets/{0}/add".format(identifier) - - files = {"file": open(filename, "rb")} + if isinstance(file_or_name, str): + files = {"file": open(file_or_name, "rb")} + else: + files = {"file", file_or_name} return self.post_request( url, data={"jsonData": json_str}, files=files, auth=True ) From f5f3075b460406d5e813a9b5414080350276b7c9 Mon Sep 17 00:00:00 2001 From: Victor Mireles-Chavez Date: Sun, 3 Jan 2021 15:20:25 +0100 Subject: [PATCH 3/7] allow for file objects to be uploaded --- src/pyDataverse/api.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index 6c4c7f0..b1e1dab 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -1662,7 +1662,8 @@ def get_datafile_metadata( # CHECK: Its not really clear, if the version query can also be done via ID. return self.get_request(url, auth=auth) - def upload_datafile(self, identifier, file_or_name, json_str=None, is_pid=True): + def upload_datafile(self, identifier, file_name, json_str=None, is_pid=True, + file_object=None): """Add file to a dataset. Add a file to an existing Dataset. Description and tags are optional: @@ -1702,10 +1703,10 @@ def upload_datafile(self, identifier, file_or_name, json_str=None, is_pid=True): url += "/datasets/:persistentId/add?persistentId={0}".format(identifier) else: url += "/datasets/{0}/add".format(identifier) - if isinstance(file_or_name, str): - files = {"file": open(file_or_name, "rb")} + if file_object is None + files = {"file": (file_name, open(file_name, "rb"))} else: - files = {"file", file_or_name} + files = {"file", (file_name, file_object)} return self.post_request( url, data={"jsonData": json_str}, files=files, auth=True ) From 88cfaf28711279e9e4771c49ac9ac071c4ff72c8 Mon Sep 17 00:00:00 2001 From: Victor Mireles-Chavez Date: Sun, 3 Jan 2021 15:21:20 +0100 Subject: [PATCH 4/7] allow for file objects to be uploaded --- src/pyDataverse/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index b1e1dab..4e9599f 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -1703,7 +1703,7 @@ def upload_datafile(self, identifier, file_name, json_str=None, is_pid=True, url += "/datasets/:persistentId/add?persistentId={0}".format(identifier) else: url += "/datasets/{0}/add".format(identifier) - if file_object is None + if file_object is None: files = {"file": (file_name, open(file_name, "rb"))} else: files = {"file", (file_name, file_object)} From e31872d72ec1c882fd33fd4fa8c425be85418f69 Mon Sep 17 00:00:00 2001 From: Victor Mireles-Chavez Date: Sun, 3 Jan 2021 15:24:59 +0100 Subject: [PATCH 5/7] allow for file objects to be uploaded --- src/pyDataverse/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index 4e9599f..d301fe9 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -1706,7 +1706,7 @@ def upload_datafile(self, identifier, file_name, json_str=None, is_pid=True, if file_object is None: files = {"file": (file_name, open(file_name, "rb"))} else: - files = {"file", (file_name, file_object)} + files = {"file": (file_name, file_object)} return self.post_request( url, data={"jsonData": json_str}, files=files, auth=True ) From a9a9e7f7780d365b6ed40bcc28a7d7a427652a2c Mon Sep 17 00:00:00 2001 From: Victor Mireles-Chavez Date: Tue, 5 Jan 2021 17:42:58 +0100 Subject: [PATCH 6/7] can now upload file object --- src/pyDataverse/api.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index d301fe9..80cb544 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -1684,13 +1684,16 @@ def upload_datafile(self, identifier, file_name, json_str=None, is_pid=True, ---------- identifier : str Identifier of the dataset. - file_or_name : str - File object open in binary read mode, or file Full filename with path. + file_name : str + File name and path. If file_object is ``None``, a file in this path is opened in + read binary mode for upload json_str : str Metadata as JSON string. is_pid : bool ``True`` to use persistent identifier. ``False``, if not. - + file_object : file + Defaults to ``None``. Otherwise, it is expected to be a file object which will be uploaded. + In this case, the filename is treated as text and passed on to dataverse Returns ------- dict @@ -1704,7 +1707,7 @@ def upload_datafile(self, identifier, file_name, json_str=None, is_pid=True, else: url += "/datasets/{0}/add".format(identifier) if file_object is None: - files = {"file": (file_name, open(file_name, "rb"))} + files = {"file": open(file_name, "rb")} else: files = {"file": (file_name, file_object)} return self.post_request( From 013420116d1692500dfbee977b364cb4550decbb Mon Sep 17 00:00:00 2001 From: syats Date: Tue, 21 Jun 2022 22:53:18 +0200 Subject: [PATCH 7/7] Made request streamable tobe able to get their raw data --- src/pyDataverse/api.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pyDataverse/api.py b/src/pyDataverse/api.py index 80cb544..cfa67d4 100644 --- a/src/pyDataverse/api.py +++ b/src/pyDataverse/api.py @@ -124,7 +124,7 @@ def get_request(self, url, params=None, auth=False): ) try: - resp = get(url, params=params) + resp = get(url, params=params, stream=True) if resp.status_code == 401: error_msg = resp.json()["message"] raise ApiAuthorizationError( @@ -180,7 +180,8 @@ def post_request(self, url, data=None, auth=False, params=None, files=None): raise ApiAuthorizationError("ERROR: POST - Api token not available.") try: - resp = post(url, data=data, params=params, files=files) + resp = post(url, data=data, params=params, files=files, + stream=True) if resp.status_code == 401: error_msg = resp.json()["message"] raise ApiAuthorizationError(