From 9d234fabd197b73dd6bc79db75d28bd49d979e92 Mon Sep 17 00:00:00 2001 From: Heather MacDonald Date: Mon, 14 Aug 2023 14:33:01 -0700 Subject: [PATCH 1/3] Rename functions for clarity --- deployment/promotion/promote.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/deployment/promotion/promote.py b/deployment/promotion/promote.py index 6a44a20..07deed5 100755 --- a/deployment/promotion/promote.py +++ b/deployment/promotion/promote.py @@ -229,17 +229,17 @@ def commit_artifact_part(auth, model_version_id, artifact_key, etag): return post(auth, path, commit) -def download_artifact(auth, model_version_id, artifact): +def download_artifact_info(auth, model_version_id, artifact): key = artifact['key'] url = signed_artifact_url(auth, model_version_id, artifact) - print("Downloading artifact '%s'" % key) + print("Downloading artifact info '%s'" % key) curl_cmd = "curl --cacert %s -o %s %s '%s'" % ( os.environ['REQUESTS_CA_BUNDLE'], key, params['VERTA_CURL_OPTS'], url) os.system(curl_cmd) -def download_artifacts(auth, model_version_id, artifacts, model_artifact): - print("Downloading %d artifacts" % len(artifacts)) +def download_artifacts_info(auth, model_version_id, artifacts, model_artifact): + print("Downloading %d artifacts info" % len(artifacts)) downloaded_artifacts = [] for artifact in artifacts: @@ -248,7 +248,7 @@ def download_artifacts(auth, model_version_id, artifacts, model_artifact): 'model_version_id': model_version_id } copy_fields(['artifact_type', 'key'], artifact, artifact_request) - download_artifact(auth, model_version_id, artifact_request) + download_artifact_info(auth, model_version_id, artifact_request) downloaded_artifacts.append( {'key': artifact['key'], 'artifact_type': artifact['artifact_type']}) @@ -257,16 +257,16 @@ def download_artifacts(auth, model_version_id, artifacts, model_artifact): 'model_version_id': model_version_id } copy_fields(['artifact_type', 'key'], model_artifact, model_artifact_request) - download_artifact(auth, model_version_id, model_artifact_request) + download_artifact_info(auth, model_version_id, model_artifact_request) return downloaded_artifacts -def upload_artifact(auth, model_version_id, artifact): +def upload_artifact_info(auth, model_version_id, artifact): key = artifact['key'] - print("Uploading artifact '%s'" % key) + print("Uploading artifact info '%s'" % key) print(artifact) - + artifact_request = { 'method': 'PUT', 'model_version_id': model_version_id, @@ -300,12 +300,12 @@ def upload_artifact(auth, model_version_id, artifact): return put_url -def upload_artifacts(auth, model_version_id, artifacts): - print("Uploading %d artifacts" % len(artifacts)) +def upload_artifacts_info(auth, model_version_id, artifacts): + print("Uploading %d artifacts info" % len(artifacts)) uploaded_artifacts = {} for artifact in artifacts: - uploaded_artifacts[artifact["key"]] = upload_artifact(auth, model_version_id, artifact) + uploaded_artifacts[artifact["key"]] = upload_artifact_info(auth, model_version_id, artifact) return uploaded_artifacts @@ -339,8 +339,8 @@ def get_promotion_data(_config): raise SystemExit(1) model = get_registered_model(source_auth, model_version['registered_model_id']) - artifacts = download_artifacts(source_auth, model_version_id, model_version['artifacts'], - model_version['model']) + artifacts = download_artifacts_info(source_auth, model_version_id, model_version['artifacts'], + model_version['model']) promotion = { 'build': build, @@ -433,7 +433,7 @@ def create_promotion(_config, promotion): artifacts_and_model = promotion['artifacts'] artifacts_and_model.append(model_version['model']) - artifact_paths = upload_artifacts(dest_auth, model_version['id'], artifacts_and_model) + artifact_paths = upload_artifacts_info(dest_auth, model_version['id'], artifacts_and_model) # Standard RMVs will have artifact path 'model' while ER->RMVs will have artifact path 'model.pkl' model_artifact = model_version['model'] From d422ac50593afb74a2f23f64504be68b9513b9e0 Mon Sep 17 00:00:00 2001 From: Heather MacDonald Date: Mon, 14 Aug 2023 14:49:58 -0700 Subject: [PATCH 2/3] Revert "Rename functions for clarity" This reverts commit 9d234fabd197b73dd6bc79db75d28bd49d979e92. --- deployment/promotion/promote.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/deployment/promotion/promote.py b/deployment/promotion/promote.py index 07deed5..6a44a20 100755 --- a/deployment/promotion/promote.py +++ b/deployment/promotion/promote.py @@ -229,17 +229,17 @@ def commit_artifact_part(auth, model_version_id, artifact_key, etag): return post(auth, path, commit) -def download_artifact_info(auth, model_version_id, artifact): +def download_artifact(auth, model_version_id, artifact): key = artifact['key'] url = signed_artifact_url(auth, model_version_id, artifact) - print("Downloading artifact info '%s'" % key) + print("Downloading artifact '%s'" % key) curl_cmd = "curl --cacert %s -o %s %s '%s'" % ( os.environ['REQUESTS_CA_BUNDLE'], key, params['VERTA_CURL_OPTS'], url) os.system(curl_cmd) -def download_artifacts_info(auth, model_version_id, artifacts, model_artifact): - print("Downloading %d artifacts info" % len(artifacts)) +def download_artifacts(auth, model_version_id, artifacts, model_artifact): + print("Downloading %d artifacts" % len(artifacts)) downloaded_artifacts = [] for artifact in artifacts: @@ -248,7 +248,7 @@ def download_artifacts_info(auth, model_version_id, artifacts, model_artifact): 'model_version_id': model_version_id } copy_fields(['artifact_type', 'key'], artifact, artifact_request) - download_artifact_info(auth, model_version_id, artifact_request) + download_artifact(auth, model_version_id, artifact_request) downloaded_artifacts.append( {'key': artifact['key'], 'artifact_type': artifact['artifact_type']}) @@ -257,16 +257,16 @@ def download_artifacts_info(auth, model_version_id, artifacts, model_artifact): 'model_version_id': model_version_id } copy_fields(['artifact_type', 'key'], model_artifact, model_artifact_request) - download_artifact_info(auth, model_version_id, model_artifact_request) + download_artifact(auth, model_version_id, model_artifact_request) return downloaded_artifacts -def upload_artifact_info(auth, model_version_id, artifact): +def upload_artifact(auth, model_version_id, artifact): key = artifact['key'] - print("Uploading artifact info '%s'" % key) + print("Uploading artifact '%s'" % key) print(artifact) - + artifact_request = { 'method': 'PUT', 'model_version_id': model_version_id, @@ -300,12 +300,12 @@ def upload_artifact_info(auth, model_version_id, artifact): return put_url -def upload_artifacts_info(auth, model_version_id, artifacts): - print("Uploading %d artifacts info" % len(artifacts)) +def upload_artifacts(auth, model_version_id, artifacts): + print("Uploading %d artifacts" % len(artifacts)) uploaded_artifacts = {} for artifact in artifacts: - uploaded_artifacts[artifact["key"]] = upload_artifact_info(auth, model_version_id, artifact) + uploaded_artifacts[artifact["key"]] = upload_artifact(auth, model_version_id, artifact) return uploaded_artifacts @@ -339,8 +339,8 @@ def get_promotion_data(_config): raise SystemExit(1) model = get_registered_model(source_auth, model_version['registered_model_id']) - artifacts = download_artifacts_info(source_auth, model_version_id, model_version['artifacts'], - model_version['model']) + artifacts = download_artifacts(source_auth, model_version_id, model_version['artifacts'], + model_version['model']) promotion = { 'build': build, @@ -433,7 +433,7 @@ def create_promotion(_config, promotion): artifacts_and_model = promotion['artifacts'] artifacts_and_model.append(model_version['model']) - artifact_paths = upload_artifacts_info(dest_auth, model_version['id'], artifacts_and_model) + artifact_paths = upload_artifacts(dest_auth, model_version['id'], artifacts_and_model) # Standard RMVs will have artifact path 'model' while ER->RMVs will have artifact path 'model.pkl' model_artifact = model_version['model'] From 2a7315d9a21a0d2a4d5e2adc61c7f647de661706 Mon Sep 17 00:00:00 2001 From: Heather MacDonald Date: Tue, 15 Aug 2023 16:36:28 -0700 Subject: [PATCH 3/3] Trying to copy to s3 --- deployment/promotion/promote.py | 30 ++++++++++++---- deployment/promotion/upload_to_s3.py | 53 ++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 6 deletions(-) create mode 100644 deployment/promotion/upload_to_s3.py diff --git a/deployment/promotion/promote.py b/deployment/promotion/promote.py index 6a44a20..9092e53 100755 --- a/deployment/promotion/promote.py +++ b/deployment/promotion/promote.py @@ -35,6 +35,8 @@ import os import datetime +from upload_to_s3 import upload_to_s3, get_base_path_to_artifact, update_artifact_path + env_vars = ['VERTA_SOURCE_MODEL_VERSION_ID', 'VERTA_SOURCE_HOST', 'VERTA_SOURCE_EMAIL', 'VERTA_SOURCE_DEV_KEY', 'VERTA_SOURCE_WORKSPACE_0', 'VERTA_DEST_HOST', 'VERTA_DEST_EMAIL', @@ -66,6 +68,7 @@ 'grpc-metadata-developer_key': os.environ.get('VERTA_SOURCE_DEV_KEY')} workspaces_source = requests.get(host, headers=headers_dict, proxies=proxies) + source_workspace = None for item in workspaces_source.json()['workspace']: if 'id' in item.keys() and item['id'] == source_workspace_id: if 'org_name' in item.keys(): @@ -81,6 +84,8 @@ os.environ['VERTA_DEST_WORKSPACE'] = item['org_name'] elif 'username' in item.keys() and item['username'] == source_workspace: os.environ['VERTA_DEST_WORKSPACE'] = item['username'] +else: + source_workspace = os.environ.get('VERTA_DEST_WORKSPACE') for param_name in env_vars: param = os.environ.get(param_name) @@ -101,7 +106,7 @@ config = { 'source': { - 'model_version_id': atoi(params['VERTA_SOURCE_MODEL_VERSION_ID'][2:-2]), + 'model_version_id': atoi(params['VERTA_SOURCE_MODEL_VERSION_ID']), # [2:-2]), 'host': params['VERTA_SOURCE_HOST'], 'email': params['VERTA_SOURCE_EMAIL'], 'devkey': params['VERTA_SOURCE_DEV_KEY'], @@ -233,8 +238,11 @@ def download_artifact(auth, model_version_id, artifact): key = artifact['key'] url = signed_artifact_url(auth, model_version_id, artifact) print("Downloading artifact '%s'" % key) - curl_cmd = "curl --cacert %s -o %s %s '%s'" % ( - os.environ['REQUESTS_CA_BUNDLE'], key, params['VERTA_CURL_OPTS'], url) + if 'REQUESTS_CA_BUNDLE' not in os.environ: + curl_cmd = "curl -o %s %s '%s'" % (key, params['VERTA_CURL_OPTS'], url) + else: + curl_cmd = "curl --cacert %s -o %s %s '%s'" % ( + os.environ['REQUESTS_CA_BUNDLE'], key, params['VERTA_CURL_OPTS'], url) os.system(curl_cmd) @@ -250,7 +258,7 @@ def download_artifacts(auth, model_version_id, artifacts, model_artifact): copy_fields(['artifact_type', 'key'], artifact, artifact_request) download_artifact(auth, model_version_id, artifact_request) downloaded_artifacts.append( - {'key': artifact['key'], 'artifact_type': artifact['artifact_type']}) + {'key': artifact['key'], 'artifact_type': artifact['artifact_type'], 'path': artifact['path'], 'filename_extension': artifact['filename_extension']}) model_artifact_request = { 'method': 'GET', @@ -264,6 +272,10 @@ def download_artifacts(auth, model_version_id, artifacts, model_artifact): def upload_artifact(auth, model_version_id, artifact): key = artifact['key'] + print("Uploading artifact '%s' to s3" % key) + + upload_to_s3(artifact, "") + print("Uploading artifact '%s'" % key) print(artifact) @@ -284,8 +296,8 @@ def upload_artifact(auth, model_version_id, artifact): if not put_response.ok: raise Exception("Failed to put artifact (%d %s). Key: %s\tURL: %s\tText: %s" % ( - put_response.status_code, - put_response.reason, key, put_url, put_response.text)) + put_response.status_code, + put_response.reason, key, put_url, put_response.text)) check_url = signed_artifact_url(auth, model_version_id, {'method': 'GET', 'model_version_id': model_version_id, @@ -304,6 +316,12 @@ def upload_artifacts(auth, model_version_id, artifacts): print("Uploading %d artifacts" % len(artifacts)) uploaded_artifacts = {} + # Update paths + base_path = get_base_path_to_artifact(artifacts) + for artifact in artifacts: + update_artifact_path(artifact, base_path) + print(f"Updated artifact path to {artifact['path']}") + for artifact in artifacts: uploaded_artifacts[artifact["key"]] = upload_artifact(auth, model_version_id, artifact) return uploaded_artifacts diff --git a/deployment/promotion/upload_to_s3.py b/deployment/promotion/upload_to_s3.py new file mode 100644 index 0000000..c5a0fbc --- /dev/null +++ b/deployment/promotion/upload_to_s3.py @@ -0,0 +1,53 @@ +import boto3 +import os + +# Set your AWS credentials (replace with your own values) +# aws_access_key = os.environ['AWS_ACCESS_KEY_ID'] +# aws_secret_key = os.environ['AWS_SECRET_ACCESS_KEY'] +# aws_region = 'us-east-1' # Change to your desired region + + +def get_base_path_to_artifact(artifacts): + if len(artifacts) == 0: + # This shouldn't happen often, if ever. + print("No artifacts found; cannot construct path.") + return None + path = artifacts[0]['path'] + split_path = str.split(path, '/') + path = '/'.join(split_path[1:-1]) + path = path.replace("hmpreprod", "hm", 1) + return path + + +def get_key_with_extension(artifact): + """Return artifact key with extension. Add one if not already present.""" + key = artifact['key'] + if '.' in key: + return key + filename_extension = artifact['filename_extension'] + return f"{key}.{filename_extension}" + + +def update_artifact_path(artifact, base_path): + artifact['path'] = f"{base_path}" + + +def upload_to_s3(artifact, s3_url): + # Connect to s3 + session = boto3.Session() + s3 = session.client('s3') + + print(f"about to upload artifact {artifact}") + + og_key = artifact['key'] + key_with_extension = get_key_with_extension(artifact) + path = artifact['path'] + bucket_name = 'vertaai-user-data-dev-us-east-1' + s3_key = f"testing-hm-preprod/{path}/{key_with_extension}" + + # Upload the file to S3 + s3.upload_file(og_key, bucket_name, s3_key) + print(f"File '{og_key}' uploaded to '{s3_key}' in bucket '{bucket_name}'.") + + +