Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 59 additions & 1 deletion nmdc_runtime/api/endpoints/find.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import csv
import logging
from io import StringIO
from typing import Annotated

from fastapi import APIRouter, Depends, Path, Query
from fastapi import APIRouter, Depends, HTTPException, Path, Query, status
from fastapi.responses import StreamingResponse
from pymongo.database import Database as MongoDatabase

from nmdc_schema.get_nmdc_view import ViewGetter
Expand All @@ -12,12 +15,14 @@
get_nonempty_nmdc_schema_collection_names,
)
from nmdc_runtime.api.endpoints.nmdcschema import get_linked_instances
from nmdc_runtime.api.endpoints.users import is_admin
from nmdc_runtime.api.endpoints.util import (
find_resources,
strip_oid,
find_resources_spanning,
)
from nmdc_runtime.api.models.metadata import Doc
from nmdc_runtime.api.models.user import User, get_current_active_user
from nmdc_runtime.api.models.util import (
FindResponse,
FindRequest,
Expand Down Expand Up @@ -122,6 +127,59 @@ def find_data_objects(
return find_resources(req, mdb, "data_object_set")


@router.get(
"/admin/data_object_urls",
status_code=status.HTTP_200_OK,
name="Get Data Object URLs",
description="(Admins only) Download a TSV-formatted report consisting of the URL of each `DataObject` that is an output of any `WorkflowExecution`.",
)
def get_data_object_report(
user: User = Depends(get_current_active_user),
mdb: MongoDatabase = Depends(get_mongo_db),
prefix: str = Query(
"",
description="If not empty, the file will include only the URLs that begin with this prefix",
),
):
if not is_admin(user):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Only Runtime administrators can access this resource.",
)

# Get the URL of every `DataObject` that is the output of any `WorkflowExecution`.
data_object_urls = set()
wfe_output_ids = mdb.workflow_execution_set.distinct("has_output")
for data_object in mdb.data_object_set.find({"id": {"$in": wfe_output_ids}}):
if "url" in data_object and len(data_object["url"].strip()) > 0:
if len(prefix.strip()) > 0:
if data_object["url"].startswith(prefix):
data_object_urls.add(data_object["url"])
else:
data_object_urls.add(data_object["url"])

# Build the report as an in-memory TSV "file" (buffer).
# Reference: https://docs.python.org/3/library/csv.html#csv.writer
data_rows = [[url] for url in sorted(data_object_urls)]
buffer = StringIO()
writer = csv.writer(buffer, delimiter="\t", lineterminator="\n")
writer.writerows(data_rows)

# Reset the buffer's internal file pointer to the beginning of the buffer, so that,
# when we stream the buffer's contents later, all of its contents are included.
buffer.seek(0)

# Stream the buffer's contents to the HTTP client as a downloadable TSV file.
filename = "data-object-urls.tsv"
response = StreamingResponse(
buffer,
media_type="text/tab-separated-values",
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
)

return response


@router.get(
"/data_objects/study/{study_id}",
response_model_exclude_unset=True,
Expand Down
12 changes: 12 additions & 0 deletions nmdc_runtime/api/endpoints/users.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,18 @@ def check_can_create_user(requester: User):
)


def is_admin(user: User):
"""Checks whether the specified user is an admin of the Runtime."""

if isinstance(user.site_admin, list):
# Check whether the specified user is an admin of the special
# site whose administrators are all Runtime admins. This is
# what distinguishes a Runtime admin from a regular user.
special_site_name = "nmdc-runtime-useradmin"
return special_site_name in user.site_admin
return False


@router.post(
"/users",
status_code=status.HTTP_201_CREATED,
Expand Down