Skip to content

Commit cdc1abb

Browse files
authored
feat: added celery task to remove offline site if hide download is true (#2681)
* added celery task to remove offline siet if hide download is true * fixed failing test * addressed feedback * added removal pipeline for hide_download=true * removed redundant tests * Revert "added celery task to remove offline siet if hide download is true" This reverts commit e1092ec. * adde cleanup stask in offline build gate step on failure * removed redundant cleanup tasks * fixed failing tests * removed redundant code and docstring * updated and removed redundant test
1 parent 4b0393d commit cdc1abb

File tree

2 files changed

+203
-8
lines changed

2 files changed

+203
-8
lines changed

content_sync/pipelines/definitions/concourse/site_pipeline.py

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -860,15 +860,22 @@ def __init__(self, config: SitePipelineDefinitionConfig, **kwargs):
860860
resource_types.append(KeyvalResourceType())
861861
resources = SitePipelineResources(config=config)
862862
online_job = self.get_online_build_job(config=config)
863-
offline_build_gate_put_step = TryStep(
864-
try_=PutStep(
865-
put=self._offline_build_gate_identifier,
866-
timeout="1m",
867-
attempts=3,
868-
get_params={"strict": True},
869-
no_get=True,
870-
)
863+
864+
# Create the inner put step with error handlers
865+
inner_put_step = PutStep(
866+
put=self._offline_build_gate_identifier,
867+
timeout="1m",
868+
attempts=3,
869+
get_params={"strict": True},
870+
no_get=True,
871871
)
872+
# Add cleanup step to the inner put step.
873+
# This will trigger before TryStep suppresses the error
874+
inner_put_step.on_error = self.get_offline_content_cleanup_step()
875+
876+
# Wrap in TryStep to prevent online job failure
877+
offline_build_gate_put_step = TryStep(try_=inner_put_step)
878+
872879
online_job.plan.append(offline_build_gate_put_step)
873880
offline_job = self.get_offline_build_job(config=config)
874881
offline_build_gate_get_step = add_error_handling(
@@ -928,6 +935,50 @@ def __init__(self, config: SitePipelineDefinitionConfig, **kwargs):
928935
**kwargs,
929936
)
930937

938+
def get_offline_content_cleanup_step(self) -> TaskStep:
939+
"""
940+
Create a task step to remove offline content from S3 when offline build
941+
gate fails
942+
943+
Returns:
944+
TaskStep: A task step that removes content from the offline S3 bucket
945+
"""
946+
# Use same variable naming as unpublished sites for consistency
947+
minio_root_user = settings.AWS_ACCESS_KEY_ID
948+
minio_root_password = settings.AWS_SECRET_ACCESS_KEY
949+
cli_endpoint_url = get_cli_endpoint_url()
950+
951+
cleanup_task = TaskStep(
952+
task="remove-offline-content-task",
953+
timeout="5m",
954+
attempts=3,
955+
config=TaskConfig(
956+
platform="linux",
957+
image_resource=AWS_CLI_REGISTRY_IMAGE,
958+
run=Command(
959+
path="sh",
960+
args=[
961+
"-exc",
962+
f"""
963+
echo "Removing offline content for site: ((site:url_path))"
964+
aws s3{cli_endpoint_url} rm s3://((site:web_bucket))/((site:url_path))/((site:short_id)).zip
965+
aws s3{cli_endpoint_url} rm s3://((site:web_bucket))/((site:url_path))/((site:short_id))-video.zip
966+
aws s3{cli_endpoint_url} rm s3://((site:offline_bucket))/((site:url_path))/ --recursive
967+
echo "Offline content cleanup completed"
968+
""", # noqa: E501
969+
],
970+
),
971+
),
972+
)
973+
974+
if is_dev():
975+
cleanup_task.params = {
976+
"AWS_ACCESS_KEY_ID": minio_root_user,
977+
"AWS_SECRET_ACCESS_KEY": minio_root_password,
978+
}
979+
980+
return cleanup_task
981+
931982
def get_online_build_job(self, config: SitePipelineDefinitionConfig):
932983
ocw_studio_webhook_started_step = OcwStudioWebhookStep(
933984
pipeline_name=config.vars["pipeline_name"],

content_sync/pipelines/definitions/concourse/site_pipeline_test.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,3 +653,147 @@ def assert_base_build_tasks(tasks: list[dict], offline: bool): # noqa: FBT001
653653
assert f"--baseURL /{prefix.lstrip('/')}" in dummy_vars["hugo_args_online"]
654654
assert dummy_vars["hugo_args_offline"] == config.hugo_args_offline
655655
assert dummy_vars["prefix"] == expected_prefix
656+
657+
658+
@pytest.mark.parametrize("is_dev", [True, False])
659+
def test_offline_content_cleanup_step(website, settings, mocker, is_dev):
660+
"""
661+
Test that the offline content cleanup step is correctly configured
662+
"""
663+
# Setup
664+
settings.AWS_ACCESS_KEY_ID = "test_access_key_id"
665+
settings.AWS_SECRET_ACCESS_KEY = "test_secret_access_key" # noqa: S105
666+
mock_utils_is_dev = mocker.patch("content_sync.utils.is_dev")
667+
mock_pipeline_is_dev = mocker.patch(
668+
"content_sync.pipelines.definitions.concourse.site_pipeline.is_dev"
669+
)
670+
mock_main_utils_is_dev = mocker.patch("main.utils.is_dev")
671+
mock_utils_is_dev.return_value = is_dev
672+
mock_pipeline_is_dev.return_value = is_dev
673+
mock_main_utils_is_dev.return_value = is_dev
674+
cli_endpoint_url = f" --endpoint-url {DEV_ENDPOINT_URL}" if is_dev else ""
675+
676+
offline_bucket = "test-offline-bucket"
677+
config = SitePipelineDefinitionConfig(
678+
site=website,
679+
pipeline_name="test",
680+
instance_vars="",
681+
site_content_branch="main",
682+
static_api_url="https://test.example.com/",
683+
storage_bucket="test-storage",
684+
artifacts_bucket="test-artifacts",
685+
web_bucket="test-web",
686+
offline_bucket=offline_bucket,
687+
resource_base_url="https://test.example.com/",
688+
ocw_hugo_themes_branch="main",
689+
ocw_hugo_projects_branch="main",
690+
)
691+
692+
pipeline_definition = SitePipelineDefinition(config=config)
693+
cleanup_step = pipeline_definition.get_offline_content_cleanup_step()
694+
695+
# Test the cleanup step configuration
696+
assert cleanup_step.task == "remove-offline-content-task"
697+
assert cleanup_step.timeout.root == "5m"
698+
assert cleanup_step.attempts == 3
699+
700+
# Test the command configuration
701+
assert cleanup_step.config.run.path == "sh"
702+
703+
# Check that the actual command structure matches expectations
704+
actual_command = cleanup_step.config.run.args[1]
705+
706+
# Check that all three AWS S3 remove commands are present
707+
assert (
708+
f"aws s3{cli_endpoint_url} rm s3://((site:web_bucket))/((site:url_path))/((site:short_id)).zip"
709+
in actual_command
710+
)
711+
assert (
712+
f"aws s3{cli_endpoint_url} rm s3://((site:web_bucket))/((site:url_path))/((site:short_id))-video.zip"
713+
in actual_command
714+
)
715+
assert (
716+
f"aws s3{cli_endpoint_url} rm s3://((site:offline_bucket))/((site:url_path))/ --recursive"
717+
in actual_command
718+
)
719+
720+
# Check that the core components are present
721+
assert (
722+
'echo "Removing offline content for site: ((site:url_path))"' in actual_command
723+
)
724+
assert 'echo "Offline content cleanup completed"' in actual_command
725+
726+
assert cleanup_step.config.platform == "linux"
727+
assert cleanup_step.config.image_resource == AWS_CLI_REGISTRY_IMAGE
728+
729+
# Test environment variables for dev
730+
if is_dev:
731+
assert cleanup_step.params["AWS_ACCESS_KEY_ID"] == "test_access_key_id"
732+
assert cleanup_step.params["AWS_SECRET_ACCESS_KEY"] == "test_secret_access_key" # noqa: S105
733+
else:
734+
assert not hasattr(cleanup_step, "params") or cleanup_step.params is None
735+
736+
737+
def test_offline_build_gate_cleanup_task(website, settings, mocker):
738+
"""
739+
Test that the offline build gate put step has proper failure handling attached
740+
"""
741+
# Setup
742+
settings.AWS_ACCESS_KEY_ID = "test_access_key_id"
743+
settings.AWS_SECRET_ACCESS_KEY = "test_secret_access_key" # noqa: S105
744+
mock_utils_is_dev = mocker.patch("content_sync.utils.is_dev")
745+
mock_pipeline_is_dev = mocker.patch(
746+
"content_sync.pipelines.definitions.concourse.site_pipeline.is_dev"
747+
)
748+
mock_main_utils_is_dev = mocker.patch("main.utils.is_dev")
749+
mock_utils_is_dev.return_value = False
750+
mock_pipeline_is_dev.return_value = False
751+
mock_main_utils_is_dev.return_value = False
752+
753+
config = SitePipelineDefinitionConfig(
754+
site=website,
755+
pipeline_name="test",
756+
instance_vars="",
757+
site_content_branch="main",
758+
static_api_url="https://test.example.com/",
759+
storage_bucket="test-storage",
760+
artifacts_bucket="test-artifacts",
761+
web_bucket="test-web",
762+
offline_bucket="test-offline-bucket",
763+
resource_base_url="https://test.example.com/",
764+
ocw_hugo_themes_branch="main",
765+
ocw_hugo_projects_branch="main",
766+
)
767+
768+
pipeline_definition = SitePipelineDefinition(config=config)
769+
rendered_definition = json.loads(pipeline_definition.json(indent=2, by_alias=True))
770+
771+
# Find the online job
772+
online_job = None
773+
for job in rendered_definition["jobs"]:
774+
if job["name"] == "online-site-job":
775+
online_job = job
776+
break
777+
778+
assert online_job is not None, "Online job should exist"
779+
780+
# Find the offline build gate put step (should be the last step in the online job)
781+
gate_put_step = None
782+
for step in online_job["plan"]:
783+
if (
784+
"try" in step
785+
and "put" in step["try"]
786+
and step["try"]["put"] == "offline-build-gate"
787+
):
788+
gate_put_step = step["try"] # Get the inner put step, not the try step
789+
break
790+
791+
assert gate_put_step is not None, "Offline build gate put step should exist"
792+
793+
# Verify that failure handling is attached to the inner put step
794+
assert "on_error" in gate_put_step
795+
796+
# Verify the failure handling is the cleanup task
797+
assert gate_put_step["on_error"]["task"] == "remove-offline-content-task"
798+
assert gate_put_step["on_error"]["timeout"] == "5m"
799+
assert gate_put_step["on_error"]["attempts"] == 3

0 commit comments

Comments
 (0)