post rebase

Cyrilvallez · Cyrilvallez · commit 02d7beafba58 · 2025-08-13T11:24:39.000+02:00
diff --git a/docs/source/en/model_doc/hubert.md b/docs/source/en/model_doc/hubert.md
@@ -65,7 +65,7 @@ dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", spli
 sampling_rate = dataset.features["audio"].sampling_rate
 
 processor = AutoProcessor.from_pretrained("facebook/hubert-base-ls960")
-model = AutoModelForCTC.from_pretrained("facebook/hubert-base-ls960", torch_dtype=torch.float16, device_map="auto", attn_implementation="sdpa")
+model = AutoModelForCTC.from_pretrained("facebook/hubert-base-ls960", dtype=torch.float16, device_map="auto", attn_implementation="sdpa")
 
 inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
 with torch.no_grad():
@@ -100,7 +100,7 @@ dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", spli
 sampling_rate = dataset.features["audio"].sampling_rate
 
 processor = AutoProcessor.from_pretrained("facebook/hubert-base-ls960")
-model = AutoModelForCTC.from_pretrained("facebook/hubert-base-ls960", quantization_config=bnb_config, torch_dtype=torch.float16, device_map="auto", attn_implementation="sdpa")
+model = AutoModelForCTC.from_pretrained("facebook/hubert-base-ls960", quantization_config=bnb_config, dtype=torch.float16, device_map="auto", attn_implementation="sdpa")
 
 inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
 with torch.no_grad():
diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py
@@ -983,7 +983,7 @@ def pipeline(
         model_kwargs["device_map"] = device_map
 
         # BC for the `torch_dtype` argument
-        if (torch_dtype := kwargs.get("torch_dtype", None)) is not None:
+        if (torch_dtype := kwargs.get("torch_dtype")) is not None:
             logger.warning_once("`torch_dtype` is deprecated! Use `dtype` instead!")
             # If both are provided, keep `dtype`
             dtype = torch_dtype if dtype == "auto" else dtype
diff --git a/tests/models/fuyu/test_modeling_fuyu.py b/tests/models/fuyu/test_modeling_fuyu.py
@@ -265,7 +265,7 @@ def default_processor(self):
 
     @cached_property
     def default_model(self):
-        return FuyuForCausalLM.from_pretrained("adept/fuyu-8b", torch_dtype="float16", device_map=torch_device)
+        return FuyuForCausalLM.from_pretrained("adept/fuyu-8b", dtype="float16", device_map=torch_device)
 
     def test_greedy_generation(self):
         processor = self.default_processor
diff --git a/tests/models/gemma2/test_modeling_gemma2.py b/tests/models/gemma2/test_modeling_gemma2.py
@@ -506,7 +506,7 @@ def test_generation_beyond_sliding_window_dynamic(self, attn_implementation: str
         inputs = tokenizer(input_text, padding=True, return_tensors="pt").to(torch_device)
 
         model = AutoModelForCausalLM.from_pretrained(
-            model_id, attn_implementation=attn_implementation, torch_dtype=torch.float16
+            model_id, attn_implementation=attn_implementation, dtype=torch.float16
         ).to(torch_device)
 
         # Make sure prefill is larger than sliding window
diff --git a/tests/models/glm4_moe/test_modeling_glm4_moe.py b/tests/models/glm4_moe/test_modeling_glm4_moe.py
@@ -108,9 +108,7 @@ def test_compile_static_cache(self):
 
         prompts = ["[gMASK]<sop>hello", "[gMASK]<sop>tell me"]
         tokenizer = AutoTokenizer.from_pretrained("zai-org/GLM-4.5")
-        model = Glm4MoeForCausalLM.from_pretrained(
-            "zai-org/GLM-4.5", device_map=torch_device, dtype=torch.bfloat16
-        )
+        model = Glm4MoeForCausalLM.from_pretrained("zai-org/GLM-4.5", device_map=torch_device, dtype=torch.bfloat16)
         inputs = tokenizer(prompts, return_tensors="pt", padding=True).to(model.device)
 
         # Dynamic Cache
diff --git a/tests/models/glm4v_moe/test_modeling_glm4v_moe.py b/tests/models/glm4v_moe/test_modeling_glm4v_moe.py
@@ -328,9 +328,7 @@ def tearDown(self):
 
     @slow
     def test_small_model_integration_test(self):
-        model = Glm4vMoeForConditionalGeneration.from_pretrained(
-            "zai-org/GLM-4.5V", torch_dtype="auto", device_map="auto"
-        )
+        model = Glm4vMoeForConditionalGeneration.from_pretrained("zai-org/GLM-4.5V", dtype="auto", device_map="auto")
 
         inputs = self.processor.apply_chat_template(
             self.message, tokenize=True, add_generation_prompt=True, return_dict=True, return_tensors="pt"
@@ -364,9 +362,7 @@ def test_small_model_integration_test(self):
 
     @slow
     def test_small_model_integration_test_batch(self):
-        model = Glm4vMoeForConditionalGeneration.from_pretrained(
-            "zai-org/GLM-4.5V", torch_dtype="auto", device_map="auto"
-        )
+        model = Glm4vMoeForConditionalGeneration.from_pretrained("zai-org/GLM-4.5V", dtype="auto", device_map="auto")
         batch_messages = [self.message] * 2
         inputs = self.processor.apply_chat_template(
             batch_messages, tokenize=True, add_generation_prompt=True, return_dict=True, return_tensors="pt"
@@ -388,7 +384,7 @@ def test_small_model_integration_test_batch(self):
     def test_small_model_integration_test_with_video(self):
         processor = AutoProcessor.from_pretrained("zai-org/GLM-4.5V", max_image_size={"longest_edge": 50176})
         model = Glm4vMoeForConditionalGeneration.from_pretrained(
-            "zai-org/GLM-4.5V", torch_dtype=torch.float16, device_map="auto"
+            "zai-org/GLM-4.5V", dtype=torch.float16, device_map="auto"
         )
         questions = ["Describe this video."] * 2
         video_urls = [
@@ -424,9 +420,7 @@ def test_small_model_integration_test_with_video(self):
 
     @slow
     def test_small_model_integration_test_expand(self):
-        model = Glm4vMoeForConditionalGeneration.from_pretrained(
-            "zai-org/GLM-4.5V", torch_dtype="auto", device_map="auto"
-        )
+        model = Glm4vMoeForConditionalGeneration.from_pretrained("zai-org/GLM-4.5V", dtype="auto", device_map="auto")
         inputs = self.processor.apply_chat_template(
             self.message, tokenize=True, add_generation_prompt=True, return_dict=True, return_tensors="pt"
         ).to(torch_device)
@@ -444,9 +438,7 @@ def test_small_model_integration_test_expand(self):
 
     @slow
     def test_small_model_integration_test_batch_wo_image(self):
-        model = Glm4vMoeForConditionalGeneration.from_pretrained(
-            "zai-org/GLM-4.5V", torch_dtype="auto", device_map="auto"
-        )
+        model = Glm4vMoeForConditionalGeneration.from_pretrained("zai-org/GLM-4.5V", dtype="auto", device_map="auto")
         message_wo_image = [
             {"role": "user", "content": [{"type": "text", "text": "Who are you?"}]},
         ]
@@ -474,9 +466,7 @@ def test_small_model_integration_test_batch_wo_image(self):
 
     @slow
     def test_small_model_integration_test_batch_different_resolutions(self):
-        model = Glm4vMoeForConditionalGeneration.from_pretrained(
-            "zai-org/GLM-4.5V", torch_dtype="auto", device_map="auto"
-        )
+        model = Glm4vMoeForConditionalGeneration.from_pretrained("zai-org/GLM-4.5V", dtype="auto", device_map="auto")
         batched_messages = [self.message, self.message2]
         inputs = self.processor.apply_chat_template(
             batched_messages,
@@ -505,7 +495,7 @@ def test_small_model_integration_test_batch_different_resolutions(self):
     def test_small_model_integration_test_batch_flashatt2(self):
         model = Glm4vMoeForConditionalGeneration.from_pretrained(
             "zai-org/GLM-4.5V",
-            torch_dtype=torch.bfloat16,
+            dtype=torch.bfloat16,
             attn_implementation="flash_attention_2",
             device_map="auto",
         )
@@ -537,7 +527,7 @@ def test_small_model_integration_test_batch_flashatt2(self):
     def test_small_model_integration_test_batch_wo_image_flashatt2(self):
         model = Glm4vMoeForConditionalGeneration.from_pretrained(
             "zai-org/GLM-4.5V",
-            torch_dtype=torch.bfloat16,
+            dtype=torch.bfloat16,
             attn_implementation="flash_attention_2",
             device_map="auto",
         )
diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py
@@ -355,7 +355,7 @@ def test_generation_beyond_sliding_window_dynamic(self, attn_implementation: str
         inputs = tokenizer(input_text, padding=True, return_tensors="pt").to(torch_device)
 
         model = MistralForCausalLM.from_pretrained(
-            model_id, attn_implementation=attn_implementation, device_map=torch_device, torch_dtype=torch.float16
+            model_id, attn_implementation=attn_implementation, device_map=torch_device, dtype=torch.float16
         )
 
         # Make sure prefill is larger than sliding window
diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py
@@ -3496,7 +3496,7 @@ def flash_attn_inference_equivalence(self, attn_implementation: str, padding_sid
             model = model_class(config)
             with tempfile.TemporaryDirectory() as tmpdirname:
                 model.save_pretrained(tmpdirname)
-                model = model_class.from_pretrained(tmpdirname, torch_dtype=torch.bfloat16)
+                model = model_class.from_pretrained(tmpdirname, dtype=torch.bfloat16)
                 model.to(torch_device)
 
                 dummy_input = inputs_dict[model.main_input_name][:1]
@@ -4330,7 +4330,7 @@ def test_flash_attention_2_continue_generate_with_position_ids(self):
                 model = (
                     model_class.from_pretrained(
                         tmpdirname,
-                        torch_dtype=torch.bfloat16,
+                        dtype=torch.bfloat16,
                         attn_implementation="flash_attention_2",
                     )
                     .to(torch_device)

Original file line number	Diff line number	Diff line change
`@@ -355,7 +355,7 @@ def test_generation_beyond_sliding_window_dynamic(self, attn_implementation: str`
`355`	`355`	`inputs = tokenizer(input_text, padding=True, return_tensors="pt").to(torch_device)`
`356`	`356`
`357`	`357`	`model = MistralForCausalLM.from_pretrained(`
`358`		`- model_id, attn_implementation=attn_implementation, device_map=torch_device, torch_dtype=torch.float16`
	`358`	`+ model_id, attn_implementation=attn_implementation, device_map=torch_device, dtype=torch.float16`
`359`	`359`	`)`
`360`	`360`
`361`	`361`	`# Make sure prefill is larger than sliding window`