pytorch
diff --git a/‎py/torch_tensorrt/dynamo/_refit.py
Lines changed: 24 additions & 6 deletions b/‎py/torch_tensorrt/dynamo/_refit.py
Lines changed: 24 additions & 6 deletions
diff --git a/‎py/torch_tensorrt/dynamo/conversion/converter_utils.py
Lines changed: 4 additions & 4 deletions b/‎py/torch_tensorrt/dynamo/conversion/converter_utils.py
Lines changed: 4 additions & 4 deletions
@@ -22,6 +22,9 @@
     DYNAMO_CONVERTERS as CONVERTERS,
 )
 from torch_tensorrt.dynamo.conversion._TRTInterpreter import TRTInterpreter
+from torch_tensorrt.dynamo.conversion.impl.normalization.ops import (
+    batch_norm_constant_folding,
+)
 from torch_tensorrt.dynamo.conversion.truncate_double import repair_double_inputs
 from torch_tensorrt.dynamo.lowering import (
     get_decompositions,
@@ -78,8 +81,9 @@ def construct_refit_mapping(
         compilation_settings=settings,
     )
     interpreter._construct_trt_network_def()
+    weight_refit_map: dict[str, torch.Tensor] = interpreter.ctx.weight_refit_map
 
-    return interpreter.ctx.weight_refit_map
+    return weight_refit_map
 
 
 @needs_refit
@@ -90,7 +94,20 @@ def construct_refit_mapping_from_weight_name_map(
 ) -> dict[Any, Any]:
     engine_weight_map = {}
     for engine_weight_name, (sd_weight_name, np_weight_type) in weight_name_map.items():
-        if sd_weight_name not in state_dict:
+        # Add more constant folding converters here
+        if engine_weight_name.split(" ")[-1] in ["SCALE", "SHIFT"]:
+            # Batch Norm Layer
+            params = {}
+            for w in sd_weight_name:
+                params[w.split(".")[-1]] = state_dict[w].cuda()
+            # Batch norm constant folding
+
+            scale, shift = batch_norm_constant_folding(**params, eps=1e-7)
+            # Set scale to scale or shift to shift
+            engine_weight_map[engine_weight_name] = eval(
+                engine_weight_name.split(" ")[-1].lower()
+            )
+        elif sd_weight_name not in state_dict:
             # If weights is not in sd, we can leave it unchanged
             continue
         else:
@@ -178,10 +195,12 @@ def _refit_single_trt_engine_with_gm(
             for layer_name in weight_list:
                 if layer_name not in mapping:
                     raise AssertionError(f"{layer_name} is not found in weight mapping")
-                # Use Numpy to create weights
+                # Use Tensor to create weights
                 weight = mapping[layer_name]
                 trt_dtype = dtype._from(weight.dtype).to(trt.DataType)
-                trt_wt_tensor = trt.Weights(trt_dtype, weight.ctypes.data, weight.size)
+                trt_wt_tensor = trt.Weights(
+                    trt_dtype, weight.data_ptr(), torch.numel(weight)
+                )
                 refitter.set_named_weights(layer_name, trt_wt_tensor, trt_wt_location)
                 refitted.add(layer_name)
 
@@ -300,7 +319,7 @@ def refit_module_weights(
 
     # Check the number of supported operations in the graph
     num_supported_ops, total_ops = partitioning.get_graph_converter_support(
-        new_gm, settings.debug, settings.torch_executed_ops
+        new_gm, settings.torch_executed_ops
     )
 
     if num_supported_ops == 0 or (
@@ -363,7 +382,6 @@ def refit_module_weights(
 
     # Iterate over all components that can be accelerated
     # Generate the corresponding TRT Module for those
-    new_weight_module.module().to(CPU_DEVICE)
     for name, new_submodule in new_partitioned_module.named_children():
         # Refit each submodule
         # Extract engine from the submodule
 
@@ -335,8 +335,8 @@ def to_trt_weights(
     ctx: ConversionContext,
     value: torch.Tensor,
     name: str,
-    layer_type_name: Literal["CONVOLUTION", "DECONVOLUTION", "CONSTANT"],
-    weight_type_name: Literal["KERNEL", "BIAS", "CONSTANT"],
+    layer_type_name: Literal["CONVOLUTION", "DECONVOLUTION", "CONSTANT", "SCALE"],
+    weight_type_name: Literal["KERNEL", "BIAS", "CONSTANT", "SCALE", "SHIFT", "POWER"],
     target: Optional[Union[Target, str]] = None,
     source_ir: Optional[SourceIR] = None,
     target_quantized_type: Optional[trt.DataType] = None,
@@ -362,8 +362,8 @@ def to_trt_weights(
         )
 
     # Weight Recording
-    supported_layer_types = ["CONVOLUTION", "DECONVOLUTION", "CONSTANT"]
-    supported_weight_types = ["KERNEL", "BIAS", "CONSTANT"]
+    supported_layer_types = ["CONVOLUTION", "DECONVOLUTION", "CONSTANT", "SCALE"]
+    supported_weight_types = ["KERNEL", "BIAS", "CONSTANT", "SCALE", "SHIFT", "POWER"]
     assert (
         layer_type_name in supported_layer_types
     ), f"Encountered unsupported layer type: {layer_type_name}. Supported types are: {supported_layer_types}. Manually calling to_trt_weights with a custom layer type is not intended for general use."