From 74d80ebd7bef1ac46f155677a9b8234435aa1c8d Mon Sep 17 00:00:00 2001
From: shanjiaz <zsjwpianpian@gmail.com>
Date: Thu, 25 Sep 2025 13:22:02 -0400
Subject: [PATCH 1/5] Add block quantization e2e test

Signed-off-by: shanjiaz <zsjwpianpian@gmail.com>
---
 tests/e2e/vLLM/configs/fp8_block.yaml            | 5 +++++
 tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml | 6 ++++++
 2 files changed, 11 insertions(+)
 create mode 100644 tests/e2e/vLLM/configs/fp8_block.yaml
 create mode 100644 tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml

diff --git a/tests/e2e/vLLM/configs/fp8_block.yaml b/tests/e2e/vLLM/configs/fp8_block.yaml
new file mode 100644
index 0000000000..a468977e41
--- /dev/null
+++ b/tests/e2e/vLLM/configs/fp8_block.yaml
@@ -0,0 +1,5 @@
+cadence: "nightly"
+test_type: "regression"
+model: Qwen/Qwen2.5-0.5B
+scheme: FP8_BLOCK
+recipe: tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml
diff --git a/tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml b/tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml
new file mode 100644
index 0000000000..ff0ac634cf
--- /dev/null
+++ b/tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml
@@ -0,0 +1,6 @@
+quant_stage:
+  quant_modifiers:
+    QuantizationModifier:
+      targets: "Linear"
+      scheme: "FP8_BLOCK"
+      ignore: ["lm_head", "re:.*mlp.gate$"]

From e262cc866a655356d2c50b7df64e7061f8d00dff Mon Sep 17 00:00:00 2001
From: shanjiaz <zsjwpianpian@gmail.com>
Date: Fri, 10 Oct 2025 18:45:59 +0000
Subject: [PATCH 2/5] use tinyllama instead

Signed-off-by: shanjiaz <zsjwpianpian@gmail.com>
---
 tests/e2e/vLLM/configs/fp8_block.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/e2e/vLLM/configs/fp8_block.yaml b/tests/e2e/vLLM/configs/fp8_block.yaml
index a468977e41..f1b9b15370 100644
--- a/tests/e2e/vLLM/configs/fp8_block.yaml
+++ b/tests/e2e/vLLM/configs/fp8_block.yaml
@@ -1,5 +1,4 @@
 cadence: "nightly"
-test_type: "regression"
-model: Qwen/Qwen2.5-0.5B
+model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
 scheme: FP8_BLOCK
 recipe: tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml

From b054dd76e35213edd7de3c1ac73bde5a922f3d98 Mon Sep 17 00:00:00 2001
From: shanjiaz <zsjwpianpian@gmail.com>
Date: Tue, 14 Oct 2025 13:14:51 +0000
Subject: [PATCH 3/5] remove recipe

Signed-off-by: shanjiaz <zsjwpianpian@gmail.com>
---
 tests/e2e/vLLM/configs/fp8_block.yaml            | 4 ++--
 tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml | 6 ------
 2 files changed, 2 insertions(+), 8 deletions(-)
 delete mode 100644 tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml

diff --git a/tests/e2e/vLLM/configs/fp8_block.yaml b/tests/e2e/vLLM/configs/fp8_block.yaml
index f1b9b15370..838ac53b13 100644
--- a/tests/e2e/vLLM/configs/fp8_block.yaml
+++ b/tests/e2e/vLLM/configs/fp8_block.yaml
@@ -1,4 +1,4 @@
 cadence: "nightly"
-model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
+test_type: "regression"
+model: meta-llama/Llama-3.2-1B-Instruct
 scheme: FP8_BLOCK
-recipe: tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml
diff --git a/tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml b/tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml
deleted file mode 100644
index ff0ac634cf..0000000000
--- a/tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-quant_stage:
-  quant_modifiers:
-    QuantizationModifier:
-      targets: "Linear"
-      scheme: "FP8_BLOCK"
-      ignore: ["lm_head", "re:.*mlp.gate$"]

From df63ecae2f6e0252f21460ca6fd05b97bba7007f Mon Sep 17 00:00:00 2001
From: shanjiaz <zsjwpianpian@gmail.com>
Date: Tue, 14 Oct 2025 13:45:53 +0000
Subject: [PATCH 4/5] tiny llama

Signed-off-by: shanjiaz <zsjwpianpian@gmail.com>
---
 tests/e2e/vLLM/configs/fp8_block.yaml | 2 +-
 tests/e2e/vLLM/test_vllm.py           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/e2e/vLLM/configs/fp8_block.yaml b/tests/e2e/vLLM/configs/fp8_block.yaml
index 838ac53b13..3d0c0512e9 100644
--- a/tests/e2e/vLLM/configs/fp8_block.yaml
+++ b/tests/e2e/vLLM/configs/fp8_block.yaml
@@ -1,4 +1,4 @@
 cadence: "nightly"
 test_type: "regression"
-model: meta-llama/Llama-3.2-1B-Instruct
+model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
 scheme: FP8_BLOCK
diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 9c099a5aea..66ac5c335a 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -18,7 +18,7 @@
 HF_MODEL_HUB_NAME = "nm-testing"
 
 TEST_DATA_FILE = os.environ.get(
-    "TEST_DATA_FILE", "tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml"
+    "TEST_DATA_FILE", "tests/e2e/vLLM/configs/fp8_block.yaml"
 )
 SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "")
 # vllm python environment

From a1e5dca912882d012e03cffe9d22e0d60877e8f8 Mon Sep 17 00:00:00 2001
From: shanjiaz <zsjwpianpian@gmail.com>
Date: Tue, 14 Oct 2025 13:47:52 +0000
Subject: [PATCH 5/5] minimal change

Signed-off-by: shanjiaz <zsjwpianpian@gmail.com>
---
 tests/e2e/vLLM/test_vllm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
index 66ac5c335a..9c099a5aea 100644
--- a/tests/e2e/vLLM/test_vllm.py
+++ b/tests/e2e/vLLM/test_vllm.py
@@ -18,7 +18,7 @@
 HF_MODEL_HUB_NAME = "nm-testing"
 
 TEST_DATA_FILE = os.environ.get(
-    "TEST_DATA_FILE", "tests/e2e/vLLM/configs/fp8_block.yaml"
+    "TEST_DATA_FILE", "tests/e2e/vLLM/configs/int8_dynamic_per_token.yaml"
 )
 SKIP_HF_UPLOAD = os.environ.get("SKIP_HF_UPLOAD", "")
 # vllm python environment