Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions tests/e2e/vLLM/configs/fp8_block.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
cadence: "nightly"
test_type: "regression"
model: Qwen/Qwen2.5-0.5B
scheme: FP8_BLOCK
recipe: tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml
6 changes: 6 additions & 0 deletions tests/e2e/vLLM/recipes/FP8/recipe_fp8_block.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
quant_stage:
quant_modifiers:
QuantizationModifier:
targets: "Linear"
scheme: "FP8_BLOCK"
ignore: ["lm_head", "re:.*mlp.gate$"]