Added weight recording mechanism

cehongwang · cehongwang · commit 2520a6860c00 · 2025-06-16T21:07:58.000Z
diff --git a/py/torch_tensorrt/dynamo/conversion/converter_utils.py b/py/torch_tensorrt/dynamo/conversion/converter_utils.py
@@ -321,7 +321,15 @@ def cast_int_or_float_to_bool(
 
 
 def to_trt_weights(
-    value: Any, target_quantized_type: Optional[trt.DataType] = None
+    value: Any,
+    record_weight: bool = False,
+    name: Optional[str] = None,
+    ctx: Optional[ConversionContext] = None,
+    target: Optional[Union[Target, str]] = None,
+    layer_type_name: Optional[str] = None,
+    weight_type_name: Optional[str] = None,
+    source_ir: Optional[SourceIR] = None,
+    target_quantized_type: Optional[trt.DataType] = None,
 ) -> trt.Weights:
     """
     Convert a PyTorch tensor or NumPy array to TensorRT weights.
@@ -336,6 +344,35 @@ def to_trt_weights(
         - Input tensors are made contiguous before conversion
         - Data type is preserved from the original tensor/array
     """
+    if record_weight:
+        assert name is not None, "name must be provided if record_weight is True"
+        assert ctx is not None, "ctx must be provided if record_weight is True"
+        assert target is not None, "target must be provided if record_weight is True"
+        assert (
+            layer_type_name is not None
+        ), "layer_type_name must be provided if record_weight is True"
+        assert (
+            weight_type_name is not None
+        ), "weight_type_name must be provided if record_weight is True"
+
+        supported_layer_types = ["CONVOLUTION", "DECONVOLUTION"]
+        supported_weight_types = ["KERNEL"]
+        assert (
+            layer_type_name in supported_layer_types
+        ), f"Unsupported layer type: {layer_type_name}. Please add the layer type to this function to enable refitting."
+        assert (
+            weight_type_name in supported_weight_types
+        ), f"Unsupported weight type: {weight_type_name}. Please add the weight type to this function to enable refitting."
+        source_ir = source_ir if source_ir is not None else SourceIR.UNKNOWN
+        target_name = (
+            f"{source_ir}_ops.{target}"
+            if isinstance(target, str)
+            else f"{source_ir}_ops.{target.__name__}"
+        )
+
+        name = f"[{layer_type_name}]-[{target_name}]-[{name}] {weight_type_name}"
+        record_weight_in_ctx(ctx, name, value)
+
     if isinstance(value, torch.Tensor):
         # Tensor must be contiguous before conversion
         value = value.contiguous()
@@ -351,6 +388,15 @@ def to_trt_weights(
         )
 
 
+def record_weight_in_ctx(
+    ctx: ConversionContext,
+    name: str,
+    value: torch.Tensor,
+) -> None:
+    ctx.weight_refit_map[name] = value
+    ctx.cpu_weights_reference_holder[name] = value
+
+
 def create_constant(
     ctx: ConversionContext,
     value: Union[int, float, bool, np.ndarray, torch.Tensor],
@@ -415,17 +461,14 @@ def create_constant(
                     weights,
                 )
                 constant.name = name
-                ctx.cpu_weights_reference_holder[name + " FP4_CONSTANT"] = torch_value
+                record_weight_in_ctx(ctx, name + " FP4_CONSTANT", torch_value)
                 return constant.get_output(0)
 
-            # Used for refit
-            ctx.weight_refit_map[name + " CONSTANT"] = torch_value
-
-            # This is a buffer to hold the torch.Tensor so that they are alive during the course of TRT compilation.
-            ctx.cpu_weights_reference_holder[name] = torch_value
+            # Record the weight in ctx for refit and cpu memory reference
+            record_weight_in_ctx(ctx, name + " CONSTANT", torch_value)
 
             # Convert the torch.Tensor to a trt.Weights object
-            trt_weights = to_trt_weights(torch_value)
+            trt_weights = to_trt_weights(torch_value, record_weight=False)
             constant = ctx.net.add_constant(
                 shape,
                 trt_weights,
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/conv.py b/py/torch_tensorrt/dynamo/conversion/impl/conv.py
@@ -79,14 +79,22 @@ def convNd(
         kernel_shape = weight.shape[2:]
     elif isinstance(weight, (torch.Tensor, np.ndarray)):
         weight = to_torch(weight, dtype=input.dtype)
-        torch_weight = weight
         # Append new dimension (unsqueeze) if the convolution is 1d
         if is_conv1d:
             weight = torch.unsqueeze(weight, -1)
 
         num_output_maps = weight.shape[0]
         kernel_shape = weight.shape[2:]
-        weight = to_trt_weights(weight)
+        weight = to_trt_weights(
+            weight,
+            record_weight=True,
+            name=name,
+            ctx=ctx,
+            target=target,
+            layer_type_name="CONVOLUTION",
+            weight_type_name="KERNEL",
+            source_ir=source_ir,
+        )
 
     else:
         raise RuntimeError(
@@ -113,8 +121,6 @@ def convNd(
     if isinstance(weight, TRTTensor):
         weight = cast_trt_tensor(ctx, weight, input.dtype, name)
         conv_layer.set_input(1, weight)
-    elif weight is not None:
-        ctx.weight_refit_map[f"{conv_layer.name} KERNEL"] = torch_weight
 
     # If the bias is a TRTTensor, set it as an input of the layer
     if isinstance(bias, TRTTensor):
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/deconv.py b/py/torch_tensorrt/dynamo/conversion/impl/deconv.py
@@ -80,13 +80,21 @@ def deconvNd(
 
     elif isinstance(weight, (torch.Tensor, np.ndarray)):
         weight = to_torch(weight, dtype=input.dtype)
-        torch_weight = weight
         # Append new dimension (unsqueeze) if the deconvolution is 1d
         if is_deconv1d:
             weight = torch.unsqueeze(weight, -1)
         num_output_maps = weight.shape[1]
         kernel_shape = weight.shape[2:]
-        weight = to_trt_weights(weight)
+        weight = to_trt_weights(
+            weight,
+            record_weight=True,
+            name=name,
+            ctx=ctx,
+            target=target,
+            layer_type_name="DECONVOLUTION",
+            weight_type_name="KERNEL",
+            source_ir=source_ir,
+        )
 
     else:
         raise RuntimeError(
@@ -111,8 +119,7 @@ def deconvNd(
     # If the weight is a TRTTensor, set it as an input of the layer
     if isinstance(weight, TRTTensor):
         deconv_layer.set_input(1, weight)
-    elif weight is not None:
-        ctx.weight_refit_map[f"{deconv_layer.name} KERNEL"] = torch_weight
+
     # If the bias is a TRTTensor, set it as an input of the layer
     if isinstance(bias, TRTTensor):
         deconv_layer.set_input(2, bias)