Skip to content

Commit 4bcd9db

Browse files
authored
Merge pull request #168 from henrykironde/upload
Use mapbox Python API for uploading
2 parents 2b358c6 + e288345 commit 4bcd9db

File tree

5 files changed

+144
-56
lines changed

5 files changed

+144
-56
lines changed

Zooniverse/Snakefile

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,18 @@ rule combine_nests:
9494
shell:
9595
"python combine_nests.py {input}"
9696

97-
rule upload_mapbox:
97+
rule create_mbtile:
9898
input:
9999
"/blue/ewhite/everglades/projected_mosaics/webmercator/{year}/{site}/{flight}_projected.tif"
100100
output:
101101
"/blue/ewhite/everglades/mapbox/{year}/{site}/{flight}.mbtiles"
102102
shell:
103-
"python upload_mapbox.py {input} {config[mapbox-param]}"
103+
"python mbtile.py {input} {config[mapbox-param]}"
104+
105+
rule upload_mapbox:
106+
input:
107+
"/blue/ewhite/everglades/mapbox/{year}/{site}/{flight}.mbtiles"
108+
output:
109+
"/blue/ewhite/everglades/mapbox/{year}/{site}/{flight}.mbtiles"
110+
shell:
111+
"python upload_mapbox.py {input}"

Zooniverse/environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,4 @@ dependencies:
3232
- pytest-xdist
3333
- pdoc
3434
- rio-mbtiles
35+
- boto3

Zooniverse/mbtile.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import os
2+
import shlex
3+
import sys
4+
import subprocess
5+
import tools
6+
7+
import rasterio as rio
8+
from rasterio.warp import calculate_default_transform, reproject, Resampling
9+
10+
11+
def create_mbtile(path, year, site, force_upload=False):
12+
basename = os.path.splitext(os.path.basename(path))[0]
13+
flight = basename.replace("_projected", "")
14+
15+
if tools.get_event(basename) == "primary":
16+
flight = "_".join(basename.split('_')[0:4])
17+
18+
mbtiles_dir = os.path.join("/blue/ewhite/everglades/mapbox/", year, site)
19+
20+
if not os.path.exists(mbtiles_dir):
21+
os.makedirs(mbtiles_dir)
22+
23+
mbtiles_filename = os.path.join(mbtiles_dir, f"{flight}.mbtiles")
24+
25+
if os.path.exists(mbtiles_filename):
26+
os.remove(mbtiles_filename)
27+
28+
print("Creating mbtiles file")
29+
rio_command = f"rio mbtiles {path} -o {mbtiles_filename} --zoom-levels 17..24 -j 4 -f PNG --progress-bar"
30+
31+
rio_command_list = shlex.split(rio_command)
32+
return_code = subprocess.call(rio_command_list)
33+
34+
# Check the return code to see if the command was successful
35+
if return_code != 0:
36+
print(f"Rio command failed with return code {return_code}")
37+
return None
38+
39+
if not os.path.exists(mbtiles_filename):
40+
print(f"{mbtiles_filename} was not created")
41+
return None
42+
43+
return mbtiles_filename
44+
45+
46+
if __name__ == "__main__":
47+
path = sys.argv[1]
48+
split_path = os.path.normpath(path).split(os.path.sep)
49+
year, site = split_path[6], split_path[7]
50+
force_upload = True
51+
52+
# Create mbtiles
53+
file_path = create_mbtile(path, year, site, force_upload=force_upload)

Zooniverse/upload_mapbox.py

Lines changed: 70 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,82 @@
11
import os
22
import requests
3+
import boto3
4+
from botocore.exceptions import NoCredentialsError
35
import sys
4-
import subprocess
56
import tomli
6-
import tools
77

8-
import rasterio as rio
9-
from rasterio.warp import calculate_default_transform, reproject, Resampling
108

9+
class MapboxUploader:
1110

12-
def on_mapbox(flight):
13-
"""Check if the mbtiles file has already been uploaded to mapbox"""
11+
def __init__(self, access_token, username):
12+
self.access_token = access_token
13+
self.username = username
14+
self.base_url = f"https://api.mapbox.com/uploads/v1/{self.username}"
15+
16+
def request_s3_credentials(self):
17+
credentials_url = f"{self.base_url}/credentials?access_token={self.access_token}"
18+
response = requests.post(credentials_url)
19+
if response.status_code == 200:
20+
return response.json()
21+
else:
22+
raise Exception(f"Failed to retrieve S3 credentials. Status code: {response.status_code}")
23+
24+
def upload_to_s3(self, file_path, s3_credentials):
25+
s3_client = boto3.client(
26+
's3',
27+
aws_access_key_id=s3_credentials['accessKeyId'],
28+
aws_secret_access_key=s3_credentials['secretAccessKey'],
29+
aws_session_token=s3_credentials['sessionToken'],
30+
region_name='us-east-1' # Use the appropriate AWS region
31+
)
32+
try:
33+
s3_client.upload_file(file_path, s3_credentials['bucket'], s3_credentials['key'])
34+
except NoCredentialsError:
35+
print("Credentials not available.")
36+
raise
37+
38+
def create_upload(self, s3_credentials, tileset_id):
39+
upload_url = f"{self.base_url}?access_token={self.access_token}"
40+
headers = {'Content-Type': 'application/json', 'Cache-Control': 'no-cache'}
41+
data = {"url": s3_credentials['url'], "tileset": f"{self.username}.{tileset_id}"}
42+
response = requests.post(upload_url, json=data, headers=headers)
43+
if response.status_code == 201:
44+
return response.json()
45+
else:
46+
raise Exception(f"Failed to create upload. Status code: {response.status_code}")
47+
48+
def retrieve_upload_status(self, upload_id):
49+
status_url = f"{self.base_url}/{upload_id}?access_token={self.access_token}"
50+
response = requests.get(status_url)
51+
if response.status_code in {200, 201}:
52+
return response.json()
53+
else:
54+
raise Exception(f"Failed to retrieve upload status. Status code: {response.status_code}")
55+
56+
57+
def get_credentials():
58+
"""Get credentials from mapbox.ini"""
1459
with open("/blue/ewhite/everglades/mapbox/mapbox.ini", "rb") as f:
1560
toml_dict = tomli.load(f)
16-
token = toml_dict['mapbox']['access-token']
17-
api_base_url = "https://api.mapbox.com/v4"
18-
tileset_id = f"bweinstein.{flight}"
19-
url = f"{api_base_url}/{tileset_id}.json?access_token={token}"
20-
response = requests.get(url)
21-
return response.status_code == 200
22-
23-
24-
def upload(path, year, site, force_upload=False):
25-
# Create output filename
26-
basename = os.path.splitext(os.path.basename(path))[0]
27-
flight = basename.replace("_projected", "")
28-
if tools.get_event(basename) == "primary":
29-
# If from the primary flight strip any extra metadata from filename
30-
flight = "_".join(basename.split('_')[0:4])
31-
mbtiles_dir = os.path.join("/blue/ewhite/everglades/mapbox/", year, site)
32-
if not os.path.exists(mbtiles_dir):
33-
os.makedirs(mbtiles_dir)
34-
mbtiles_filename = os.path.join(mbtiles_dir, f"{flight}.mbtiles")
35-
36-
# Generate tiles
37-
print("Creating mbtiles file")
38-
subprocess.run([
39-
"rio", "mbtiles", path, "-o", mbtiles_filename, "--zoom-levels", "17..24", "-j", "4", "-f", "PNG",
40-
"--progress-bar"
41-
])
42-
43-
# Upload to mapbox
44-
print("Uploading to mapbox")
45-
if force_upload or not on_mapbox(flight):
46-
subprocess.run(["mapbox", "upload", f"bweinstein.{flight}", mbtiles_filename])
47-
else:
48-
print(f"{flight} is already on Mapbox, not uploading. To force reupload use --force-upload")
49-
50-
return mbtiles_filename
61+
access_token = toml_dict['mapbox']['access-token']
62+
return access_token
5163

5264

5365
if __name__ == "__main__":
54-
path = sys.argv[1]
55-
split_path = os.path.normpath(path).split(os.path.sep)
56-
year = split_path[6]
57-
site = split_path[7]
58-
if len(sys.argv) == 3 and sys.argv[2] == "--force-upload":
59-
force_upload = True
60-
else:
61-
force_upload = False
62-
upload(path, year, site, force_upload=force_upload)
66+
file_path = sys.argv[1]
67+
access_token = get_credentials()
68+
username = 'bweinstein'
69+
uploader = MapboxUploader(access_token, username)
70+
# Step 1: Request S3 credentials
71+
s3_credentials = uploader.request_s3_credentials()
72+
73+
# Step 2: Upload to Mapbox's S3 staging bucket
74+
uploader.upload_to_s3(file_path, s3_credentials)
75+
filename_without_extension = os.path.splitext(os.path.basename(file_path))[0]
76+
tileset_id = filename_without_extension
77+
78+
# Step 3: Create an upload to matbox
79+
upload_data = uploader.create_upload(s3_credentials, tileset_id)
80+
upload_id = upload_data['id']
81+
upload_status = uploader.retrieve_upload_status(upload_id)
82+
print(f"Upload status: {upload_status}")

everglades_workflow.sh

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,23 @@
22
#SBATCH --job-name=everglades_workflow
33
#SBATCH --mail-user=henrysenyondo@ufl.edu
44
#SBATCH --mail-type=FAIL
5-
#SBATCH --gpus=a100:2
5+
#SBATCH --gpus=a100:4
66
#SBATCH --cpus-per-task=10
77
#SBATCH --mem=200gb
8-
#SBATCH --time=8:00:00
8+
#SBATCH --time=18:00:00
99
#SBATCH --partition=gpu
1010
#SBATCH --output=/blue/ewhite/everglades/EvergladesTools/logs/everglades_workflow.out
1111
#SBATCH --error=/blue/ewhite/everglades/EvergladesTools/logs/everglades_workflow.err
1212

13-
echo "INFO: [$(date "+%Y-%m-%d %H:%M:%S")] Starting everglades workflow $(hostname) in $(pwd)"
13+
echo "INFO: [$(date "+%Y-%m-%d %H:%M:%S")] Starting everglades workflow on $(hostname) in $(pwd)"
14+
15+
echo "INFO [$(date "+%Y-%m-%d %H:%M:%S")] Loading required modules"
16+
source /etc/profile.d/modules.sh
1417

1518
ml conda
19+
conda env create --force --file /blue/ewhite/everglades/EvergladesTools/Zooniverse/environment.yml
20+
1621
conda activate EvergladesTools
1722
cd /blue/ewhite/everglades/EvergladesTools/Zooniverse
18-
snakemake --printshellcmds --keep-going --cores 10 --resources gpu=2 --rerun-incomplete
23+
snakemake --unlock
24+
snakemake --printshellcmds --keep-going --cores 10 --resources gpu=4 --rerun-incomplete

0 commit comments

Comments
 (0)