From fce7cb6f8ee77d203a670a6ad19ed277c2587f64 Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Wed, 9 Jul 2025 10:05:11 +0200 Subject: [PATCH 1/2] Refactor pipeline directory structure and filenames --- .github/workflows/test-pipelines.yml | 22 +++++----- .../cache_pipeline.py | 0 .../{caching => cache_pipeline}/caching.md | 0 pipelines/{fanOut => fan_pipeline}/fanOut.md | 0 .../{fanOut => fan_pipeline}/fan_pipeline.py | 0 .../helloWorld.md | 0 .../hello_pipeline.py | 0 .../{stepIO => io_pipeline}/io_pipeline.py | 0 pipelines/{stepIO => io_pipeline}/stepIO.md | 0 .../meta_pipeline.py | 0 .../{metadata => meta_pipeline}/metadata.md | 0 .../param_pipeline.py | 0 .../parameters.md | 0 .../{retries => robust_pipeline}/retries.md | 0 .../robust_pipeline.py | 0 .../tagged_pipeline.py | 6 +-- .../{tagging => tagged_pipeline}/tagging.md | 0 .../visualizations-dashboard.png | Bin .../visualizations.md | 2 +- .../viz_pipeline.py | 0 .../{yamlConfig => yaml_pipeline}/my_run.yaml | 0 .../yamlConfig.md | 0 .../yaml_pipeline.py | 2 +- src/tutorialMetadata.json | 40 +++++++++--------- 24 files changed, 36 insertions(+), 36 deletions(-) rename pipelines/{caching => cache_pipeline}/cache_pipeline.py (100%) rename pipelines/{caching => cache_pipeline}/caching.md (100%) rename pipelines/{fanOut => fan_pipeline}/fanOut.md (100%) rename pipelines/{fanOut => fan_pipeline}/fan_pipeline.py (100%) rename pipelines/{helloWorld => hello_pipeline}/helloWorld.md (100%) rename pipelines/{helloWorld => hello_pipeline}/hello_pipeline.py (100%) rename pipelines/{stepIO => io_pipeline}/io_pipeline.py (100%) rename pipelines/{stepIO => io_pipeline}/stepIO.md (100%) rename pipelines/{metadata => meta_pipeline}/meta_pipeline.py (100%) rename pipelines/{metadata => meta_pipeline}/metadata.md (100%) rename pipelines/{parameters => param_pipeline}/param_pipeline.py (100%) rename pipelines/{parameters => param_pipeline}/parameters.md (100%) rename pipelines/{retries => robust_pipeline}/retries.md (100%) rename pipelines/{retries => robust_pipeline}/robust_pipeline.py (100%) rename pipelines/{tagging => tagged_pipeline}/tagged_pipeline.py (94%) rename pipelines/{tagging => tagged_pipeline}/tagging.md (100%) rename pipelines/{visualizations => viz_pipeline}/visualizations-dashboard.png (100%) rename pipelines/{visualizations => viz_pipeline}/visualizations.md (90%) rename pipelines/{visualizations => viz_pipeline}/viz_pipeline.py (100%) rename pipelines/{yamlConfig => yaml_pipeline}/my_run.yaml (100%) rename pipelines/{yamlConfig => yaml_pipeline}/yamlConfig.md (100%) rename pipelines/{yamlConfig => yaml_pipeline}/yaml_pipeline.py (93%) diff --git a/.github/workflows/test-pipelines.yml b/.github/workflows/test-pipelines.yml index 01f5e4d..6e4bcc5 100644 --- a/.github/workflows/test-pipelines.yml +++ b/.github/workflows/test-pipelines.yml @@ -41,19 +41,19 @@ jobs: run: | failed=() for p in \ - "pipelines/helloWorld/hello_pipeline.py" \ - "pipelines/caching/cache_pipeline.py" \ - "pipelines/fanOut/fan_pipeline.py" \ - "pipelines/metadata/meta_pipeline.py" \ - "pipelines/parameters/param_pipeline.py" \ - "pipelines/retries/robust_pipeline.py" \ - "pipelines/stepIO/io_pipeline.py" \ - "pipelines/tagging/tagged_pipeline.py" \ - "pipelines/visualizations/viz_pipeline.py" \ - "pipelines/yamlConfig/yaml_pipeline.py"; do + "pipelines/hello_pipeline/hello_pipeline.py" \ + "pipelines/cache_pipeline/cache_pipeline.py" \ + "pipelines/fan_pipeline/fan_pipeline.py" \ + "pipelines/meta_pipeline/meta_pipeline.py" \ + "pipelines/param_pipeline/param_pipeline.py" \ + "pipelines/robust_pipeline/robust_pipeline.py" \ + "pipelines/io_pipeline/io_pipeline.py" \ + "pipelines/tagged_pipeline/tagged_pipeline.py" \ + "pipelines/viz_pipeline/viz_pipeline.py" \ + "pipelines/yaml_pipeline/yaml_pipeline.py"; do echo "Running $p…" - if [[ "$p" == *"retries/robust_pipeline.py" ]]; then + if [[ "$p" == *"robust_pipeline/robust_pipeline.py" ]]; then PYTHONPATH=$GITHUB_WORKSPACE:$PYTHONPATH python "$p" || echo "⚠ robust_pipeline demo: failure expected" else PYTHONPATH=$GITHUB_WORKSPACE:$PYTHONPATH python "$p" || failed+=("$p") diff --git a/pipelines/caching/cache_pipeline.py b/pipelines/cache_pipeline/cache_pipeline.py similarity index 100% rename from pipelines/caching/cache_pipeline.py rename to pipelines/cache_pipeline/cache_pipeline.py diff --git a/pipelines/caching/caching.md b/pipelines/cache_pipeline/caching.md similarity index 100% rename from pipelines/caching/caching.md rename to pipelines/cache_pipeline/caching.md diff --git a/pipelines/fanOut/fanOut.md b/pipelines/fan_pipeline/fanOut.md similarity index 100% rename from pipelines/fanOut/fanOut.md rename to pipelines/fan_pipeline/fanOut.md diff --git a/pipelines/fanOut/fan_pipeline.py b/pipelines/fan_pipeline/fan_pipeline.py similarity index 100% rename from pipelines/fanOut/fan_pipeline.py rename to pipelines/fan_pipeline/fan_pipeline.py diff --git a/pipelines/helloWorld/helloWorld.md b/pipelines/hello_pipeline/helloWorld.md similarity index 100% rename from pipelines/helloWorld/helloWorld.md rename to pipelines/hello_pipeline/helloWorld.md diff --git a/pipelines/helloWorld/hello_pipeline.py b/pipelines/hello_pipeline/hello_pipeline.py similarity index 100% rename from pipelines/helloWorld/hello_pipeline.py rename to pipelines/hello_pipeline/hello_pipeline.py diff --git a/pipelines/stepIO/io_pipeline.py b/pipelines/io_pipeline/io_pipeline.py similarity index 100% rename from pipelines/stepIO/io_pipeline.py rename to pipelines/io_pipeline/io_pipeline.py diff --git a/pipelines/stepIO/stepIO.md b/pipelines/io_pipeline/stepIO.md similarity index 100% rename from pipelines/stepIO/stepIO.md rename to pipelines/io_pipeline/stepIO.md diff --git a/pipelines/metadata/meta_pipeline.py b/pipelines/meta_pipeline/meta_pipeline.py similarity index 100% rename from pipelines/metadata/meta_pipeline.py rename to pipelines/meta_pipeline/meta_pipeline.py diff --git a/pipelines/metadata/metadata.md b/pipelines/meta_pipeline/metadata.md similarity index 100% rename from pipelines/metadata/metadata.md rename to pipelines/meta_pipeline/metadata.md diff --git a/pipelines/parameters/param_pipeline.py b/pipelines/param_pipeline/param_pipeline.py similarity index 100% rename from pipelines/parameters/param_pipeline.py rename to pipelines/param_pipeline/param_pipeline.py diff --git a/pipelines/parameters/parameters.md b/pipelines/param_pipeline/parameters.md similarity index 100% rename from pipelines/parameters/parameters.md rename to pipelines/param_pipeline/parameters.md diff --git a/pipelines/retries/retries.md b/pipelines/robust_pipeline/retries.md similarity index 100% rename from pipelines/retries/retries.md rename to pipelines/robust_pipeline/retries.md diff --git a/pipelines/retries/robust_pipeline.py b/pipelines/robust_pipeline/robust_pipeline.py similarity index 100% rename from pipelines/retries/robust_pipeline.py rename to pipelines/robust_pipeline/robust_pipeline.py diff --git a/pipelines/tagging/tagged_pipeline.py b/pipelines/tagged_pipeline/tagged_pipeline.py similarity index 94% rename from pipelines/tagging/tagged_pipeline.py rename to pipelines/tagged_pipeline/tagged_pipeline.py index e353c49..013e4d5 100644 --- a/pipelines/tagging/tagged_pipeline.py +++ b/pipelines/tagged_pipeline/tagged_pipeline.py @@ -50,7 +50,7 @@ def process_data( # Pipeline with cascade tags - these will be applied to all artifacts created during execution @pipeline(tags=["tutorial", Tag(name="experiment", cascade=True)]) -def artifact_tagging_pipeline(): +def tagged_pipeline(): """Pipeline demonstrating various artifact tagging approaches.""" raw_data = create_raw_data() processed_data = process_data(raw_data) @@ -66,8 +66,8 @@ def artifact_tagging_pipeline(): logger.info(" 4. Filtering and querying artifacts by tags") # Run the pipeline - artifact_tagging_pipeline() + tagged_pipeline() # Log dashboard URLs - log_dashboard_urls("artifact_tagging_pipeline") + log_dashboard_urls("tagged_pipeline") logger.info("Run again to see how tags accumulate across multiple runs") diff --git a/pipelines/tagging/tagging.md b/pipelines/tagged_pipeline/tagging.md similarity index 100% rename from pipelines/tagging/tagging.md rename to pipelines/tagged_pipeline/tagging.md diff --git a/pipelines/visualizations/visualizations-dashboard.png b/pipelines/viz_pipeline/visualizations-dashboard.png similarity index 100% rename from pipelines/visualizations/visualizations-dashboard.png rename to pipelines/viz_pipeline/visualizations-dashboard.png diff --git a/pipelines/visualizations/visualizations.md b/pipelines/viz_pipeline/visualizations.md similarity index 90% rename from pipelines/visualizations/visualizations.md rename to pipelines/viz_pipeline/visualizations.md index 58b0adf..f9999e6 100644 --- a/pipelines/visualizations/visualizations.md +++ b/pipelines/viz_pipeline/visualizations.md @@ -26,7 +26,7 @@ def scatter(df: pd.DataFrame) -> Annotated[HTMLString, "scatter_plot"]: - **HTMLString type** lets you embed rich content in the dashboard - **Dashboard integration** makes results easily shareable -![Two visualizations generated by this pipeline](./pipelines/visualizations/visualizations-dashboard.png) +![Two visualizations generated by this pipeline](./pipelines/viz_pipeline/visualizations-dashboard.png) ## Try it yourself diff --git a/pipelines/visualizations/viz_pipeline.py b/pipelines/viz_pipeline/viz_pipeline.py similarity index 100% rename from pipelines/visualizations/viz_pipeline.py rename to pipelines/viz_pipeline/viz_pipeline.py diff --git a/pipelines/yamlConfig/my_run.yaml b/pipelines/yaml_pipeline/my_run.yaml similarity index 100% rename from pipelines/yamlConfig/my_run.yaml rename to pipelines/yaml_pipeline/my_run.yaml diff --git a/pipelines/yamlConfig/yamlConfig.md b/pipelines/yaml_pipeline/yamlConfig.md similarity index 100% rename from pipelines/yamlConfig/yamlConfig.md rename to pipelines/yaml_pipeline/yamlConfig.md diff --git a/pipelines/yamlConfig/yaml_pipeline.py b/pipelines/yaml_pipeline/yaml_pipeline.py similarity index 93% rename from pipelines/yamlConfig/yaml_pipeline.py rename to pipelines/yaml_pipeline/yaml_pipeline.py index a4ea518..6dc10f8 100644 --- a/pipelines/yamlConfig/yaml_pipeline.py +++ b/pipelines/yaml_pipeline/yaml_pipeline.py @@ -24,7 +24,7 @@ def yaml_pipeline(name: str = "world"): logger.info("Starting YAML-configured pipeline") # run exactly with the YAML you wrote run = yaml_pipeline.with_options( - config_path="pipelines/yamlConfig/my_run.yaml" + config_path="pipelines/yaml_pipeline/my_run.yaml" )() # fetch artifact afterwards so users see something in the console diff --git a/src/tutorialMetadata.json b/src/tutorialMetadata.json index 6deee88..3ab5505 100644 --- a/src/tutorialMetadata.json +++ b/src/tutorialMetadata.json @@ -17,8 +17,8 @@ "canRunPipeline": true, "steps": [ { - "doc": "pipelines/helloWorld/helloWorld.md", - "code": "pipelines/helloWorld/hello_pipeline.py" + "doc": "pipelines/hello_pipeline/helloWorld.md", + "code": "pipelines/hello_pipeline/hello_pipeline.py" } ] }, @@ -28,8 +28,8 @@ "canRunPipeline": true, "steps": [ { - "doc": "pipelines/stepIO/stepIO.md", - "code": "pipelines/stepIO/io_pipeline.py" + "doc": "pipelines/io_pipeline/stepIO.md", + "code": "pipelines/io_pipeline/io_pipeline.py" } ] }, @@ -39,8 +39,8 @@ "canRunPipeline": true, "steps": [ { - "doc": "pipelines/parameters/parameters.md", - "code": "pipelines/parameters/param_pipeline.py" + "doc": "pipelines/param_pipeline/parameters.md", + "code": "pipelines/param_pipeline/param_pipeline.py" } ] }, @@ -50,8 +50,8 @@ "canRunPipeline": true, "steps": [ { - "doc": "pipelines/tagging/tagging.md", - "code": "pipelines/tagging/tagged_pipeline.py" + "doc": "pipelines/tagged_pipeline/tagging.md", + "code": "pipelines/tagged_pipeline/tagged_pipeline.py" } ] }, @@ -61,8 +61,8 @@ "canRunPipeline": true, "steps": [ { - "doc": "pipelines/metadata/metadata.md", - "code": "pipelines/metadata/meta_pipeline.py" + "doc": "pipelines/meta_pipeline/metadata.md", + "code": "pipelines/meta_pipeline/meta_pipeline.py" } ] }, @@ -72,8 +72,8 @@ "canRunPipeline": true, "steps": [ { - "doc": "pipelines/caching/caching.md", - "code": "pipelines/caching/cache_pipeline.py" + "doc": "pipelines/cache_pipeline/caching.md", + "code": "pipelines/cache_pipeline/cache_pipeline.py" } ] }, @@ -83,8 +83,8 @@ "canRunPipeline": true, "steps": [ { - "doc": "pipelines/visualizations/visualizations.md", - "code": "pipelines/visualizations/viz_pipeline.py" + "doc": "pipelines/viz_pipeline/visualizations.md", + "code": "pipelines/viz_pipeline/viz_pipeline.py" } ] }, @@ -94,8 +94,8 @@ "canRunPipeline": true, "steps": [ { - "doc": "pipelines/fanOut/fanOut.md", - "code": "pipelines/fanOut/fan_pipeline.py" + "doc": "pipelines/fan_pipeline/fanOut.md", + "code": "pipelines/fan_pipeline/fan_pipeline.py" } ] }, @@ -105,8 +105,8 @@ "canRunPipeline": true, "steps": [ { - "doc": "pipelines/retries/retries.md", - "code": "pipelines/retries/robust_pipeline.py" + "doc": "pipelines/robust_pipeline/retries.md", + "code": "pipelines/robust_pipeline/robust_pipeline.py" } ] }, @@ -116,8 +116,8 @@ "canRunPipeline": true, "steps": [ { - "doc": "pipelines/yamlConfig/yamlConfig.md", - "code": "pipelines/yamlConfig/yaml_pipeline.py" + "doc": "pipelines/yaml_pipeline/yamlConfig.md", + "code": "pipelines/yaml_pipeline/yaml_pipeline.py" } ] }, From 4af9e65b142d3b7dbe5294886c8436907b85f9f2 Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Wed, 9 Jul 2025 15:51:08 +0200 Subject: [PATCH 2/2] Add initial pipeline summaries for various pipeline types --- pipelines/cache_pipeline/summary.md | 1 + pipelines/fan_pipeline/summary.md | 1 + pipelines/hello_pipeline/summary.md | 1 + pipelines/io_pipeline/summary.md | 1 + pipelines/meta_pipeline/summary.md | 1 + pipelines/param_pipeline/summary.md | 1 + pipelines/robust_pipeline/summary.md | 1 + pipelines/tagged_pipeline/summary.md | 1 + pipelines/viz_pipeline/summary.md | 1 + pipelines/yaml_pipeline/summary.md | 1 + 10 files changed, 10 insertions(+) create mode 100644 pipelines/cache_pipeline/summary.md create mode 100644 pipelines/fan_pipeline/summary.md create mode 100644 pipelines/hello_pipeline/summary.md create mode 100644 pipelines/io_pipeline/summary.md create mode 100644 pipelines/meta_pipeline/summary.md create mode 100644 pipelines/param_pipeline/summary.md create mode 100644 pipelines/robust_pipeline/summary.md create mode 100644 pipelines/tagged_pipeline/summary.md create mode 100644 pipelines/viz_pipeline/summary.md create mode 100644 pipelines/yaml_pipeline/summary.md diff --git a/pipelines/cache_pipeline/summary.md b/pipelines/cache_pipeline/summary.md new file mode 100644 index 0000000..34f0f43 --- /dev/null +++ b/pipelines/cache_pipeline/summary.md @@ -0,0 +1 @@ +Demonstrates smart caching to save time on re-runs by skipping unchanged steps \ No newline at end of file diff --git a/pipelines/fan_pipeline/summary.md b/pipelines/fan_pipeline/summary.md new file mode 100644 index 0000000..f93ca43 --- /dev/null +++ b/pipelines/fan_pipeline/summary.md @@ -0,0 +1 @@ +Demonstrates parallel processing workflows using fan-out and fan-in patterns \ No newline at end of file diff --git a/pipelines/hello_pipeline/summary.md b/pipelines/hello_pipeline/summary.md new file mode 100644 index 0000000..89aafbf --- /dev/null +++ b/pipelines/hello_pipeline/summary.md @@ -0,0 +1 @@ +A basic ZenML pipeline that demonstrates the fundamentals of steps and pipelines by outputting "Hello World!" \ No newline at end of file diff --git a/pipelines/io_pipeline/summary.md b/pipelines/io_pipeline/summary.md new file mode 100644 index 0000000..b259576 --- /dev/null +++ b/pipelines/io_pipeline/summary.md @@ -0,0 +1 @@ +Demonstrates how to pass data between pipeline steps using typed inputs and outputs \ No newline at end of file diff --git a/pipelines/meta_pipeline/summary.md b/pipelines/meta_pipeline/summary.md new file mode 100644 index 0000000..2b4f073 --- /dev/null +++ b/pipelines/meta_pipeline/summary.md @@ -0,0 +1 @@ +Shows how to record useful facts about pipeline runs using metadata logging \ No newline at end of file diff --git a/pipelines/param_pipeline/summary.md b/pipelines/param_pipeline/summary.md new file mode 100644 index 0000000..64c4358 --- /dev/null +++ b/pipelines/param_pipeline/summary.md @@ -0,0 +1 @@ +Shows how to make pipeline behavior configurable using parameters \ No newline at end of file diff --git a/pipelines/robust_pipeline/summary.md b/pipelines/robust_pipeline/summary.md new file mode 100644 index 0000000..7140be9 --- /dev/null +++ b/pipelines/robust_pipeline/summary.md @@ -0,0 +1 @@ +Shows how to build resilient pipelines that handle failures using retries and hooks \ No newline at end of file diff --git a/pipelines/tagged_pipeline/summary.md b/pipelines/tagged_pipeline/summary.md new file mode 100644 index 0000000..8810118 --- /dev/null +++ b/pipelines/tagged_pipeline/summary.md @@ -0,0 +1 @@ +Demonstrates how to organize and categorize pipeline runs using tags \ No newline at end of file diff --git a/pipelines/viz_pipeline/summary.md b/pipelines/viz_pipeline/summary.md new file mode 100644 index 0000000..471c506 --- /dev/null +++ b/pipelines/viz_pipeline/summary.md @@ -0,0 +1 @@ +Shows how to create automatic and custom visualizations for pipeline data \ No newline at end of file diff --git a/pipelines/yaml_pipeline/summary.md b/pipelines/yaml_pipeline/summary.md new file mode 100644 index 0000000..e97ddaa --- /dev/null +++ b/pipelines/yaml_pipeline/summary.md @@ -0,0 +1 @@ +Demonstrates how to separate configuration from code using YAML configuration files \ No newline at end of file