diff --git a/cmake/copyfile.py b/cmake/copyfile.py
index 277ed2c2458694..4060228ea566e6 100644
--- a/cmake/copyfile.py
+++ b/cmake/copyfile.py
@@ -26,18 +26,18 @@ def main():
         dst = os.path.join(dst, pathList[-1])
         if not os.path.exists(dst):
             shutil.copytree(src, dst)
-            print("first copy directory: {0} --->>> {1}".format(src, dst))
+            print(f"first copy directory: {src} --->>> {dst}")
         else:
             shutil.rmtree(dst)
             shutil.copytree(src, dst)
-            print("overwritten copy directory: {0} --->>> {1}".format(src, dst))
+            print(f"overwritten copy directory: {src} --->>> {dst}")
     else:  # copy file, wildcard
         if not os.path.exists(dst):
             os.makedirs(dst)
         srcFiles = glob.glob(src)
         for srcFile in srcFiles:
             shutil.copy(srcFile, dst)
-            print("copy file: {0} --->>> {1}".format(srcFile, dst))
+            print(f"copy file: {srcFile} --->>> {dst}")
 
 
 if __name__ == "__main__":
diff --git a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
index 77c7d5875b74ae..98421cf35be70b 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
@@ -579,7 +579,7 @@ def DetermineForwardPositionMap(
                 if len(forward_returns_list) == 1:
                     return_name = "out"
                 else:
-                    return_name = "out_{}".format(i + 1)
+                    return_name = f"out_{i + 1}"
             else:
                 return_name = forward_return[0]
             return_type = forward_return[1]
diff --git a/paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py b/paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
index 59f222a2a5eaed..f2dec13f8a3d5d 100644
--- a/paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
+++ b/paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
@@ -87,7 +87,7 @@ def download_concat(cache_folder, zip_path):
         download(data_urls[i], cache_folder, data_md5s[i])
         file_name = os.path.join(cache_folder, data_urls[i].split('/')[-1])
         file_names.append(file_name)
-        print("Downloaded part {0}\n".format(file_name))
+        print(f"Downloaded part {file_name}\n")
     with open(zip_path, "wb") as outfile:
         for fname in file_names:
             shutil.copyfileobj(open(fname, 'rb'), outfile)
@@ -174,13 +174,13 @@ def run_convert():
             retry = retry + 1
         else:
             raise RuntimeError(
-                "Can not convert the dataset to binary file with try limit {0}".format(
+                "Can not convert the dataset to binary file with try limit {}".format(
                     try_limit
                 )
             )
         download_concat(cache_folder, zip_path)
         convert_Imagenet_tar2bin(zip_path, output_file)
-    print("\nSuccess! The binary file can be found at {0}".format(output_file))
+    print(f"\nSuccess! The binary file can be found at {output_file}")
 
 
 def convert_Imagenet_local2bin(args):
@@ -231,7 +231,7 @@ def convert_Imagenet_local2bin(args):
         )
         if os.path.getsize(bin_file_path) == target_size:
             print(
-                "Success! The user data output binary file can be found at: {0}".format(
+                "Success! The user data output binary file can be found at: {}".format(
                     bin_file_path
                 )
             )
diff --git a/paddle/fluid/operators/generator/filters.py b/paddle/fluid/operators/generator/filters.py
index 0ef3adc3306ec9..98682a2ae32952 100644
--- a/paddle/fluid/operators/generator/filters.py
+++ b/paddle/fluid/operators/generator/filters.py
@@ -57,7 +57,7 @@ class {to_pascal_case(op_name)}InferVarType
 
 
 def quote(s):
-    return '"{}"'.format(s)
+    return f'"{s}"'
 
 
 # ------------------------------ attr -------------------------------------
@@ -132,16 +132,16 @@ def filter_intermediate(items: Sequence):
 # -------------- transform argument names from yaml to opmaker ------------
 def to_opmaker_name(s):
     if s.endswith("_grad"):
-        return 'GradVarName("{}")'.format(s[:-5])
+        return f'GradVarName("{s[:-5]}")'
     else:
-        return '"{}"'.format(s)
+        return f'"{s}"'
 
 
 def to_opmaker_name_cstr(s):
     if s.endswith("_grad"):
-        return '"{}@GRAD"'.format(s[:-5])
+        return f'"{s[:-5]}@GRAD"'
     else:
-        return '"{}"'.format(s)
+        return f'"{s}"'
 
 
 def to_pascal_case(s):
diff --git a/paddle/infrt/tests/models/efficientnet-b4/net/utils.py b/paddle/infrt/tests/models/efficientnet-b4/net/utils.py
new file mode 100644
index 00000000000000..29c02c05842190
--- /dev/null
+++ b/paddle/infrt/tests/models/efficientnet-b4/net/utils.py
@@ -0,0 +1,424 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+import math
+import re
+from functools import partial
+
+import paddle
+import paddle.nn.functional as F
+from paddle import nn
+
+# Parameters for the entire model (stem, all blocks, and head)
+GlobalParams = collections.namedtuple(
+    'GlobalParams',
+    [
+        'batch_norm_momentum',
+        'batch_norm_epsilon',
+        'dropout_rate',
+        'num_classes',
+        'width_coefficient',
+        'depth_coefficient',
+        'depth_divisor',
+        'min_depth',
+        'drop_connect_rate',
+        'image_size',
+    ],
+)
+
+# Parameters for an individual model block
+BlockArgs = collections.namedtuple(
+    'BlockArgs',
+    [
+        'kernel_size',
+        'num_repeat',
+        'input_filters',
+        'output_filters',
+        'expand_ratio',
+        'id_skip',
+        'stride',
+        'se_ratio',
+    ],
+)
+
+# Change namedtuple defaults
+GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
+BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
+
+
+def round_filters(filters, global_params):
+    """Calculate and round number of filters based on depth multiplier."""
+    multiplier = global_params.width_coefficient
+    if not multiplier:
+        return filters
+    divisor = global_params.depth_divisor
+    min_depth = global_params.min_depth
+    filters *= multiplier
+    min_depth = min_depth or divisor
+    new_filters = max(
+        min_depth, int(filters + divisor / 2) // divisor * divisor
+    )
+    if new_filters < 0.9 * filters:  # prevent rounding by more than 10%
+        new_filters += divisor
+    return int(new_filters)
+
+
+def round_repeats(repeats, global_params):
+    """Round number of filters based on depth multiplier."""
+    multiplier = global_params.depth_coefficient
+    if not multiplier:
+        return repeats
+    return int(math.ceil(multiplier * repeats))
+
+
+def drop_connect(inputs, prob, training):
+    """Drop input connection"""
+    if not training:
+        return inputs
+    keep_prob = 1.0 - prob
+    inputs_shape = paddle.shape(inputs)
+    random_tensor = keep_prob + paddle.rand(shape=[inputs_shape[0], 1, 1, 1])
+    binary_tensor = paddle.floor(random_tensor)
+    output = inputs / keep_prob * binary_tensor
+    return output
+
+
+def get_same_padding_conv2d(image_size=None):
+    """Chooses static padding if you have specified an image size, and dynamic padding otherwise.
+    Static padding is necessary for ONNX exporting of models."""
+    if image_size is None:
+        return Conv2dDynamicSamePadding
+    else:
+        return partial(Conv2dStaticSamePadding, image_size=image_size)
+
+
+class Conv2dDynamicSamePadding(nn.Conv2D):
+    """2D Convolutions like TensorFlow, for a dynamic image size"""
+
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        dilation=1,
+        groups=1,
+        bias_attr=None,
+    ):
+        super().__init__(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            0,
+            dilation,
+            groups,
+            bias_attr=bias_attr,
+        )
+        self.stride = (
+            self._stride if len(self._stride) == 2 else [self._stride[0]] * 2
+        )
+
+    def forward(self, x):
+        ih, iw = x.shape[-2:]
+        kh, kw = self.weight.shape[-2:]
+        sh, sw = self.stride
+        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
+        pad_h = max(
+            (oh - 1) * self.stride[0] + (kh - 1) * self._dilation[0] + 1 - ih, 0
+        )
+        pad_w = max(
+            (ow - 1) * self.stride[1] + (kw - 1) * self._dilation[1] + 1 - iw, 0
+        )
+        if pad_h > 0 or pad_w > 0:
+            x = F.pad(
+                x,
+                [
+                    pad_w // 2,
+                    pad_w - pad_w // 2,
+                    pad_h // 2,
+                    pad_h - pad_h // 2,
+                ],
+            )
+        return F.conv2d(
+            x,
+            self.weight,
+            self.bias,
+            self.stride,
+            self._padding,
+            self._dilation,
+            self._groups,
+        )
+
+
+class Conv2dStaticSamePadding(nn.Conv2D):
+    """2D Convolutions like TensorFlow, for a fixed image size"""
+
+    def __init__(
+        self, in_channels, out_channels, kernel_size, image_size=None, **kwargs
+    ):
+        if 'stride' in kwargs and isinstance(kwargs['stride'], list):
+            kwargs['stride'] = kwargs['stride'][0]
+        super().__init__(in_channels, out_channels, kernel_size, **kwargs)
+        self.stride = (
+            self._stride if len(self._stride) == 2 else [self._stride[0]] * 2
+        )
+
+        # Calculate padding based on image size and save it
+        assert image_size is not None
+        ih, iw = (
+            image_size if type(image_size) == list else [image_size, image_size]
+        )
+        kh, kw = self.weight.shape[-2:]
+        sh, sw = self.stride
+        oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
+        pad_h = max(
+            (oh - 1) * self.stride[0] + (kh - 1) * self._dilation[0] + 1 - ih, 0
+        )
+        pad_w = max(
+            (ow - 1) * self.stride[1] + (kw - 1) * self._dilation[1] + 1 - iw, 0
+        )
+        if pad_h > 0 or pad_w > 0:
+            self.static_padding = nn.Pad2D(
+                [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]
+            )
+        else:
+            self.static_padding = Identity()
+
+    def forward(self, x):
+        x = self.static_padding(x)
+        x = F.conv2d(
+            x,
+            self.weight,
+            self.bias,
+            self.stride,
+            self._padding,
+            self._dilation,
+            self._groups,
+        )
+        return x
+
+
+class Identity(nn.Layer):
+    def __init__(
+        self,
+    ):
+        super().__init__()
+
+    def forward(self, x):
+        return x
+
+
+def efficientnet_params(model_name):
+    """Map EfficientNet model name to parameter coefficients."""
+    params_dict = {
+        # Coefficients:   width,depth,resolution,dropout
+        'efficientnet-b0': (1.0, 1.0, 224, 0.2),
+        'efficientnet-b1': (1.0, 1.1, 240, 0.2),
+        'efficientnet-b2': (1.1, 1.2, 260, 0.3),
+        'efficientnet-b3': (1.2, 1.4, 300, 0.3),
+        'efficientnet-b4': (1.4, 1.8, 380, 0.4),
+        'efficientnet-b5': (1.6, 2.2, 456, 0.4),
+        'efficientnet-b6': (1.8, 2.6, 528, 0.5),
+        'efficientnet-b7': (2.0, 3.1, 600, 0.5),
+        'efficientnet-b8': (2.2, 3.6, 672, 0.5),
+        'efficientnet-l2': (4.3, 5.3, 800, 0.5),
+    }
+    return params_dict[model_name]
+
+
+class BlockDecoder:
+    """Block Decoder for readability, straight from the official TensorFlow repository"""
+
+    @staticmethod
+    def _decode_block_string(block_string):
+        """Gets a block through a string notation of arguments."""
+        assert isinstance(block_string, str)
+
+        ops = block_string.split('_')
+        options = {}
+        for op in ops:
+            splits = re.split(r'(\d.*)', op)
+            if len(splits) >= 2:
+                key, value = splits[:2]
+                options[key] = value
+
+        # Check stride
+        assert ('s' in options and len(options['s']) == 1) or (
+            len(options['s']) == 2 and options['s'][0] == options['s'][1]
+        )
+
+        return BlockArgs(
+            kernel_size=int(options['k']),
+            num_repeat=int(options['r']),
+            input_filters=int(options['i']),
+            output_filters=int(options['o']),
+            expand_ratio=int(options['e']),
+            id_skip=('noskip' not in block_string),
+            se_ratio=float(options['se']) if 'se' in options else None,
+            stride=[int(options['s'][0])],
+        )
+
+    @staticmethod
+    def _encode_block_string(block):
+        """Encodes a block to a string."""
+        args = [
+            'r%d' % block.num_repeat,
+            'k%d' % block.kernel_size,
+            's%d%d' % (block.strides[0], block.strides[1]),
+            'e%s' % block.expand_ratio,
+            'i%d' % block.input_filters,
+            'o%d' % block.output_filters,
+        ]
+        if 0 < block.se_ratio <= 1:
+            args.append('se%s' % block.se_ratio)
+        if block.id_skip is False:
+            args.append('noskip')
+        return '_'.join(args)
+
+    @staticmethod
+    def decode(string_list):
+        """
+        Decodes a list of string notations to specify blocks inside the network.
+
+        :param string_list: a list of strings, each string is a notation of block
+        :return: a list of BlockArgs namedtuples of block args
+        """
+        assert isinstance(string_list, list)
+        blocks_args = []
+        for block_string in string_list:
+            blocks_args.append(BlockDecoder._decode_block_string(block_string))
+        return blocks_args
+
+    @staticmethod
+    def encode(blocks_args):
+        """
+        Encodes a list of BlockArgs to a list of strings.
+
+        :param blocks_args: a list of BlockArgs namedtuples of block args
+        :return: a list of strings, each string is a notation of block
+        """
+        block_strings = []
+        for block in blocks_args:
+            block_strings.append(BlockDecoder._encode_block_string(block))
+        return block_strings
+
+
+def efficientnet(
+    width_coefficient=None,
+    depth_coefficient=None,
+    dropout_rate=0.2,
+    drop_connect_rate=0.2,
+    image_size=None,
+    num_classes=1000,
+):
+    """Get block arguments according to parameter and coefficients."""
+    blocks_args = [
+        'r1_k3_s11_e1_i32_o16_se0.25',
+        'r2_k3_s22_e6_i16_o24_se0.25',
+        'r2_k5_s22_e6_i24_o40_se0.25',
+        'r3_k3_s22_e6_i40_o80_se0.25',
+        'r3_k5_s11_e6_i80_o112_se0.25',
+        'r4_k5_s22_e6_i112_o192_se0.25',
+        'r1_k3_s11_e6_i192_o320_se0.25',
+    ]
+    blocks_args = BlockDecoder.decode(blocks_args)
+
+    global_params = GlobalParams(
+        batch_norm_momentum=0.99,
+        batch_norm_epsilon=1e-3,
+        dropout_rate=dropout_rate,
+        drop_connect_rate=drop_connect_rate,
+        num_classes=num_classes,
+        width_coefficient=width_coefficient,
+        depth_coefficient=depth_coefficient,
+        depth_divisor=8,
+        min_depth=None,
+        image_size=image_size,
+    )
+
+    return blocks_args, global_params
+
+
+def get_model_params(model_name, override_params):
+    """Get the block args and global params for a given model"""
+    if model_name.startswith('efficientnet'):
+        w, d, s, p = efficientnet_params(model_name)
+        blocks_args, global_params = efficientnet(
+            width_coefficient=w,
+            depth_coefficient=d,
+            dropout_rate=p,
+            image_size=s,
+        )
+    else:
+        raise NotImplementedError(
+            'model name is not pre-defined: %s' % model_name
+        )
+    if override_params:
+        global_params = global_params._replace(**override_params)
+    return blocks_args, global_params
+
+
+url_map = {
+    'efficientnet-b0': '/home/aistudio/data/weights/efficientnet-b0-355c32eb.pdparams',
+    'efficientnet-b1': '/home/aistudio/data/weights/efficientnet-b1-f1951068.pdparams',
+    'efficientnet-b2': '/home/aistudio/data/weights/efficientnet-b2-8bb594d6.pdparams',
+    'efficientnet-b3': '/home/aistudio/data/weights/efficientnet-b3-5fb5a3c3.pdparams',
+    'efficientnet-b4': '/home/aistudio/data/weights/efficientnet-b4-6ed6700e.pdparams',
+    'efficientnet-b5': '/home/aistudio/data/weights/efficientnet-b5-b6417697.pdparams',
+    'efficientnet-b6': '/home/aistudio/data/weights/efficientnet-b6-c76e70fd.pdparams',
+    'efficientnet-b7': '/home/aistudio/data/weights/efficientnet-b7-dcc49843.pdparams',
+}
+
+url_map_advprop = {
+    'efficientnet-b0': '/home/aistudio/data/weights/adv-efficientnet-b0-b64d5a18.pdparams',
+    'efficientnet-b1': '/home/aistudio/data/weights/adv-efficientnet-b1-0f3ce85a.pdparams',
+    'efficientnet-b2': '/home/aistudio/data/weights/adv-efficientnet-b2-6e9d97e5.pdparams',
+    'efficientnet-b3': '/home/aistudio/data/weights/adv-efficientnet-b3-cdd7c0f4.pdparams',
+    'efficientnet-b4': '/home/aistudio/data/weights/adv-efficientnet-b4-44fb3a87.pdparams',
+    'efficientnet-b5': '/home/aistudio/data/weights/adv-efficientnet-b5-86493f6b.pdparams',
+    'efficientnet-b6': '/home/aistudio/data/weights/adv-efficientnet-b6-ac80338e.pdparams',
+    'efficientnet-b7': '/home/aistudio/data/weights/adv-efficientnet-b7-4652b6dd.pdparams',
+    'efficientnet-b8': '/home/aistudio/data/weights/adv-efficientnet-b8-22a8fe65.pdparams',
+}
+
+
+def load_pretrained_weights(
+    model, model_name, weights_path=None, load_fc=True, advprop=False
+):
+    """Loads pretrained weights from weights path or download using url.
+    Args:
+        model (Module): The whole model of efficientnet.
+        model_name (str): Model name of efficientnet.
+        weights_path (None or str):
+            str: path to pretrained weights file on the local disk.
+            None: use pretrained weights downloaded from the Internet.
+        load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model.
+        advprop (bool): Whether to load pretrained weights
+                        trained with advprop (valid when weights_path is None).
+    """
+
+    # AutoAugment or Advprop (different preprocessing)
+    url_map_ = url_map_advprop if advprop else url_map
+    state_dict = paddle.load(url_map_[model_name])
+
+    if load_fc:
+        model.set_state_dict(state_dict)
+    else:
+        state_dict.pop('_fc.weight')
+        state_dict.pop('_fc.bias')
+        model.set_state_dict(state_dict)
+
+    print(f'Loaded pretrained weights for {model_name}')
diff --git a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_kernels.py b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_kernels.py
index bda0111c310bfa..26617ec9005347 100644
--- a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_kernels.py
+++ b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_kernels.py
@@ -448,34 +448,34 @@ def write_main_header(forward_impl, backward_impl):
     main_header_content = '''
 #pragma once
 
-#ifdef %s
+#ifdef {}
 
-#include "%s"
-#include "%s"
+#include "{}"
+#include "{}"
 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 
-namespace phi {
+namespace phi {{
 
 template <typename T>
-struct CutlassTrait {
+struct CutlassTrait {{
   using Type = T;
-};
+}};
 
 template <>
-struct CutlassTrait<dtype::float16> {
+struct CutlassTrait<dtype::float16> {{
   using Type = cutlass::half_t;
-};
+}};
 
 template <>
-struct CutlassTrait<dtype::bfloat16> {
+struct CutlassTrait<dtype::bfloat16> {{
   using Type = cutlass::bfloat16_t;
-};
+}};
 
 
 template <typename T>
-struct ToPhiDTypeTrait {
+struct ToPhiDTypeTrait {{
  private:
   using NonConstT = typename std::remove_const<T>::type;
   static constexpr bool kIsFP16 = std::is_same<NonConstT, cutlass::half_t>::value;
@@ -484,51 +484,51 @@ def write_main_header(forward_impl, backward_impl):
  public:
   using Type = typename std::conditional<kIsFP16, dtype::float16,
       typename std::conditional<kIsBF16, dtype::bfloat16, NonConstT>::type>::type;
-};
+}};
 
 
 template <typename T>
-T *SafeGetTensorPtr(const DenseTensor &t) {
+T *SafeGetTensorPtr(const DenseTensor &t) {{
   using PDT = typename ToPhiDTypeTrait<T>::Type;
   return reinterpret_cast<T *>(reinterpret_cast<uintptr_t>(t.template data<PDT>()));
-}
+}}
 
 template <typename T>
-T *SafeGetTensorPtr(const DenseTensor *t) {
+T *SafeGetTensorPtr(const DenseTensor *t) {{
   return t ? SafeGetTensorPtr<T>(*t) : nullptr;
-}
+}}
 
 template <typename T>
-T *SafeGetTensorPtr(const paddle::optional<DenseTensor> &t) {
+T *SafeGetTensorPtr(const paddle::optional<DenseTensor> &t) {{
   return t ? SafeGetTensorPtr<T>(t.get()) : nullptr;
-}
+}}
 
 template <typename T, typename Context>
-T *SafeAllocTensor(const Context &ctx, DenseTensor *t) {
+T *SafeAllocTensor(const Context &ctx, DenseTensor *t) {{
   using PDT = typename ToPhiDTypeTrait<T>::Type;
   void *ptr = ctx.template Alloc<PDT>(t);
   return reinterpret_cast<T *>(reinterpret_cast<uintptr_t>(ptr));
-}
+}}
 
-inline int64_t DimStride(const phi::DDim &dims, int n) {
+inline int64_t DimStride(const phi::DDim &dims, int n) {{
   int rank = dims.size();
-  if (n < 0) {
+  if (n < 0) {{
     n += rank;
-  }
+  }}
   int64_t stride = 1;
-  for (int i = n+1; i < rank; ++i) {
+  for (int i = n+1; i < rank; ++i) {{
     stride *= dims[i];
-  }
+  }}
   return stride;
-}
+}}
 
-} // namespace phi
+}} // namespace phi
 
 #include "./cutlass_forward.h"
 #include "./cutlass_backward.h"
 
 #endif
-''' % (
+'''.format(
         ENABLE_MACRO,
         forward_impl,
         backward_impl,
diff --git a/paddle/phi/kernels/sparse/gpu/cutlass_generator/gather_gemm_scatter_operation.py b/paddle/phi/kernels/sparse/gpu/cutlass_generator/gather_gemm_scatter_operation.py
index b14036c55ddc25..32e244dc140618 100644
--- a/paddle/phi/kernels/sparse/gpu/cutlass_generator/gather_gemm_scatter_operation.py
+++ b/paddle/phi/kernels/sparse/gpu/cutlass_generator/gather_gemm_scatter_operation.py
@@ -318,7 +318,7 @@ def __init__(
         }
 
     def layout_name(self):
-        return "%s%s" % (
+        return "{}{}".format(
             self.ShortLayoutTypeNames[self.A.layout],
             self.ShortLayoutTypeNames[self.B.layout],
         )
diff --git a/pyproject.toml b/pyproject.toml
index 526b4e9e486cc2..33f37e5978419a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,39 +36,7 @@ select = [
     "C4",
 
     # Pyupgrade
-    "UP001",
-    "UP003",
-    "UP004",
-    "UP005",
-    "UP006",
-    "UP007",
-    "UP008",
-    "UP009",
-    "UP010",
-    "UP011",
-    "UP012",
-    "UP013",
-    "UP014",
-    "UP017",
-    "UP018",
-    "UP019",
-    "UP020",
-    "UP021",
-    "UP022",
-    "UP023",
-    "UP024",
-    "UP025",
-    "UP026",
-    "UP027",
-    "UP028",
-    "UP029",
-    # "UP030",
-    # "UP031",
-    # "UP032",
-    "UP033",
-    "UP034",
-    "UP035",
-    "UP036",
+    "UP",
 
     # NumPy-specific rules
     "NPY001",
@@ -152,12 +120,16 @@ select = [
 unfixable = [
     "NPY001"
 ]
+ignore = [
+    # It not met the "Explicit is better than implicit" rule
+    "UP015",
+    # It will cause the performance regression on python3.10
+    "UP038",
+]
 
 [tool.ruff.per-file-ignores]
 # Ignore unused imports in __init__.py
 "__init__.py" = ["F401"]
-# Temporarily ignore test_slice.py to avoid PR-CI-CINN failure, please fix!
-"python/paddle/fluid/tests/unittests/dygraph_to_static/test_slice.py" = ["UP034"]
 # Ignore version check in setup.py
 "setup.py" = ["UP036"]
 # Ignore unnecessary comprehension in dy2st unittest test_loop
diff --git a/python/paddle/autograd/backward_mode.py b/python/paddle/autograd/backward_mode.py
index 81a2dadf53debb..767a59f70cab64 100644
--- a/python/paddle/autograd/backward_mode.py
+++ b/python/paddle/autograd/backward_mode.py
@@ -76,19 +76,19 @@ def backward(tensors, grad_tensors=None, retain_graph=False):
     """
 
     def check_tensors(in_out_list, name):
-        assert in_out_list is not None, "{} should not be None".format(name)
+        assert in_out_list is not None, f"{name} should not be None"
 
         if isinstance(in_out_list, (list, tuple)):
-            assert len(in_out_list) > 0, "{} connot be empty".format(name)
+            assert len(in_out_list) > 0, f"{name} connot be empty"
             for each_var in in_out_list:
                 assert isinstance(
                     each_var, (paddle.Tensor, core.eager.Tensor)
-                ), "Elements of {} must be paddle.Tensor".format(name)
+                ), f"Elements of {name} must be paddle.Tensor"
             return in_out_list
         else:
             assert isinstance(
                 in_out_list, (paddle.Tensor, core.eager.Tensor)
-            ), "{} must be Tensor or list of Tensor".format(name)
+            ), f"{name} must be Tensor or list of Tensor"
             return [in_out_list]
 
     tensors = check_tensors(tensors, "tensors")
diff --git a/python/paddle/cost_model/cost_model.py b/python/paddle/cost_model/cost_model.py
index 5d27ed4f6a201a..ad8be331cb2296 100644
--- a/python/paddle/cost_model/cost_model.py
+++ b/python/paddle/cost_model/cost_model.py
@@ -41,7 +41,7 @@ def build_program(self):
             loss = paddle.mean(hidden)
             paddle.optimizer.SGD(learning_rate=0.01).minimize(loss)
 
-        print("main program is: {}".format(main_program))
+        print(f"main program is: {main_program}")
 
         return startup_program, main_program
 
diff --git a/python/paddle/dataset/common.py b/python/paddle/dataset/common.py
index fb8c4ba9691647..bab6c6f9e74e90 100644
--- a/python/paddle/dataset/common.py
+++ b/python/paddle/dataset/common.py
@@ -86,17 +86,17 @@ def download(url, module_name, md5sum, save_name=None):
     retry_limit = 3
     while not (os.path.exists(filename) and md5file(filename) == md5sum):
         if os.path.exists(filename):
-            sys.stderr.write("file %s  md5 %s\n" % (md5file(filename), md5sum))
+            sys.stderr.write(f"file {md5file(filename)}  md5 {md5sum}\n")
         if retry < retry_limit:
             retry += 1
         else:
             raise RuntimeError(
-                "Cannot download {0} within retry limit {1}".format(
+                "Cannot download {} within retry limit {}".format(
                     url, retry_limit
                 )
             )
         sys.stderr.write(
-            "Cache file %s not found, downloading %s \n" % (filename, url)
+            f"Cache file {filename} not found, downloading {url} \n"
         )
         sys.stderr.write("Begin to download\n")
         try:
@@ -220,6 +220,4 @@ def _check_exists_and_download(path, url, md5, module_name, download=True):
     if download:
         return paddle.dataset.common.download(url, module_name, md5)
     else:
-        raise ValueError(
-            '{} not exists and auto download disabled'.format(path)
-        )
+        raise ValueError(f'{path} not exists and auto download disabled')
diff --git a/python/paddle/dataset/image.py b/python/paddle/dataset/image.py
index 5d389d5c5caa13..261e12ba69d37e 100755
--- a/python/paddle/dataset/image.py
+++ b/python/paddle/dataset/image.py
@@ -76,8 +76,8 @@ def batch_images_from_tar(
     :rtype: string
     """
     batch_dir = data_file + "_batch"
-    out_path = "%s/%s_%s" % (batch_dir, dataset_name, os.getpid())
-    meta_file = "%s/%s_%s.txt" % (batch_dir, dataset_name, os.getpid())
+    out_path = f"{batch_dir}/{dataset_name}_{os.getpid()}"
+    meta_file = f"{batch_dir}/{dataset_name}_{os.getpid()}.txt"
 
     if os.path.exists(out_path):
         return meta_file
@@ -111,7 +111,7 @@ def batch_images_from_tar(
 
     with open(meta_file, mode='a') as meta:
         for file in os.listdir(out_path):
-            meta.write(os.path.abspath("%s/%s" % (out_path, file)) + "\n")
+            meta.write(os.path.abspath(f"{out_path}/{file}") + "\n")
     return meta_file
 
 
diff --git a/python/paddle/dataset/wmt16.py b/python/paddle/dataset/wmt16.py
index 1b1aacd7972e8f..b99bdb9a906bf8 100644
--- a/python/paddle/dataset/wmt16.py
+++ b/python/paddle/dataset/wmt16.py
@@ -61,7 +61,7 @@ def __build_dict(tar_file, dict_size, save_path, lang):
                 word_dict[w] += 1
 
     with open(save_path, "wb") as fout:
-        fout.write(("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK)).encode())
+        fout.write((f"{START_MARK}\n{END_MARK}\n{UNK_MARK}\n").encode())
         for idx, word in enumerate(
             sorted(word_dict.items(), key=lambda x: x[1], reverse=True)
         ):
diff --git a/python/paddle/device/__init__.py b/python/paddle/device/__init__.py
index b05c5be8d4568f..0de0e11089ec78 100644
--- a/python/paddle/device/__init__.py
+++ b/python/paddle/device/__init__.py
@@ -224,9 +224,9 @@ def get_cudnn_version():
 def _convert_to_place(device):
     lower_device = device.lower()
     if device in core.get_all_custom_device_type():
-        selected_devices = os.getenv(
-            "FLAGS_selected_{}s".format(device), "0"
-        ).split(",")
+        selected_devices = os.getenv(f"FLAGS_selected_{device}s", "0").split(
+            ","
+        )
         device_id = int(selected_devices[0])
         place = core.CustomPlace(device, device_id)
     elif lower_device == 'cpu':
@@ -343,7 +343,7 @@ def _convert_to_place(device):
                 raise ValueError(
                     "The device must be a string which is like 'cpu', {}".format(
                         ', '.join(
-                            "'{}', '{}:x'".format(x, x)
+                            f"'{x}', '{x}:x'"
                             for x in ['gpu', 'xpu', 'npu', 'mlu']
                             + core.get_all_custom_device_type()
                         )
@@ -409,7 +409,7 @@ def get_device():
         device = 'npu:' + str(device_id)
     elif isinstance(place, core.IPUPlace):
         num_devices = core.get_ipu_device_count()
-        device = "ipus:{{0-{}}}".format(num_devices - 1)
+        device = f"ipus:{{0-{num_devices - 1}}}"
     elif isinstance(place, core.MLUPlace):
         device_id = place.get_device_id()
         device = 'mlu:' + str(device_id)
@@ -418,7 +418,7 @@ def get_device():
         device_type = place.get_device_type()
         device = device_type + ':' + str(device_id)
     else:
-        raise ValueError("The device specification {} is invalid".format(place))
+        raise ValueError(f"The device specification {place} is invalid")
 
     return device
 
@@ -847,7 +847,7 @@ def __hash__(self):
         return hash((self.stream_base, self.device))
 
     def __repr__(self):
-        return '<paddle.device.Stream device={0} stream={1:#x}>'.format(
+        return '<paddle.device.Stream device={} stream={:#x}>'.format(
             self.device, self._as_parameter_.value
         )
 
diff --git a/python/paddle/device/cuda/graphs.py b/python/paddle/device/cuda/graphs.py
index d2d8180386ecca..2983897381a858 100644
--- a/python/paddle/device/cuda/graphs.py
+++ b/python/paddle/device/cuda/graphs.py
@@ -72,7 +72,7 @@ def print_to_dot_files(self, dirname, flags=None):
         os.makedirs(name=dirname, exist_ok=True)
         assert os.path.isdir(
             dirname
-        ), "The dirname {} should be a directory".format(dirname)
+        ), f"The dirname {dirname} should be a directory"
         if flags is None:
             flags = 2047  # only all information. It can be any integer inside [1, 2048)
         self._graph.print_to_dot_files(dirname, flags)
diff --git a/python/paddle/distributed/auto_parallel/auto_align_tool.py b/python/paddle/distributed/auto_parallel/auto_align_tool.py
index d34272385a36b0..76a8db09fdcd94 100644
--- a/python/paddle/distributed/auto_parallel/auto_align_tool.py
+++ b/python/paddle/distributed/auto_parallel/auto_align_tool.py
@@ -267,13 +267,13 @@ def save(self, save_dir, vars, fetch_list, dist_context=None):
             dist_attr_path = os.path.join(save_dir, "dist_attr.pkl")
         else:
             vars_path = os.path.join(
-                save_dir, "vars_rank{}.pkl".format(dist.get_rank())
+                save_dir, f"vars_rank{dist.get_rank()}.pkl"
             )
             program_path = os.path.join(
-                save_dir, "program_rank{}.pdmodel".format(dist.get_rank())
+                save_dir, f"program_rank{dist.get_rank()}.pdmodel"
             )
             dist_attr_path = os.path.join(
-                save_dir, "dist_attr_rank{}.pkl".format(dist.get_rank())
+                save_dir, f"dist_attr_rank{dist.get_rank()}.pkl"
             )
         if vars is not None:
             vars_dict = {}
@@ -438,7 +438,7 @@ def diff_informations(right_dir, wrong_dir):
                                 print(
                                     "first different op:\n",
                                     op,
-                                    "\ndifferent varname is:{}".format(varname),
+                                    f"\ndifferent varname is:{varname}",
                                 )
                             if op not in diff_ops_varname_dict:
                                 diff_ops_varname_dict[op] = [varname]
@@ -514,7 +514,7 @@ def diff_informations_from_dirs(right_dirs, wrong_dirs):
                                 print(
                                     "first different op:\n",
                                     op,
-                                    "\ndifferent varname is:{}".format(varname),
+                                    f"\ndifferent varname is:{varname}",
                                 )
                             if op not in diff_ops_varname_dict:
                                 diff_ops_varname_dict[op] = [varname]
diff --git a/python/paddle/distributed/auto_parallel/callbacks.py b/python/paddle/distributed/auto_parallel/callbacks.py
index abf6423bfe8ea9..db7f460b0f0521 100644
--- a/python/paddle/distributed/auto_parallel/callbacks.py
+++ b/python/paddle/distributed/auto_parallel/callbacks.py
@@ -233,12 +233,12 @@ def _is_save(self):
 
     def on_epoch_end(self, epoch, logs=None):
         if self._is_save() and (self.epoch + 1) % self.save_freq == 0:
-            path = '{}/epoch{}'.format(self.save_dir, epoch)
-            print('save checkpoint at {}'.format(os.path.abspath(path)))
+            path = f'{self.save_dir}/epoch{epoch}'
+            print(f'save checkpoint at {os.path.abspath(path)}')
             self.model.save(path)
 
     def on_train_end(self, logs=None):
         if self._is_save():
-            path = '{}/final'.format(self.save_dir)
-            print('save checkpoint at {}'.format(os.path.abspath(path)))
+            path = f'{self.save_dir}/final'
+            print(f'save checkpoint at {os.path.abspath(path)}')
             self.model.save(path)
diff --git a/python/paddle/distributed/auto_parallel/cluster.py b/python/paddle/distributed/auto_parallel/cluster.py
index 9cb9cde457eeb7..f037c9538fde3a 100644
--- a/python/paddle/distributed/auto_parallel/cluster.py
+++ b/python/paddle/distributed/auto_parallel/cluster.py
@@ -293,9 +293,9 @@ def get_link(self, source_global_id, target_global_id):
     def __str__(self):
         str = ""
         for device in self.devices.values():
-            str += ", device: {}".format(device)
+            str += f", device: {device}"
         for link in self.links.values():
-            str += ", link: {}".format(link)
+            str += f", link: {link}"
         return str
 
     def __repr__(self):
@@ -823,7 +823,7 @@ def get_num_devices_per_machine(self):
     def __str__(self):
         str = ""
         for machine in self.machines.values():
-            str += "machine: {}\n".format(machine)
+            str += f"machine: {machine}\n"
         return str
 
     def __repr__(self):
diff --git a/python/paddle/distributed/auto_parallel/completion.py b/python/paddle/distributed/auto_parallel/completion.py
index 0925008e58d175..91ffc556c78bdb 100644
--- a/python/paddle/distributed/auto_parallel/completion.py
+++ b/python/paddle/distributed/auto_parallel/completion.py
@@ -1227,7 +1227,7 @@ def _get_op_by_id(ops, id):
                             )
                     assert (
                         ref_dims_mapping is not None
-                    ), "[{}] 's dims mapping is NONE".format(input_name)
+                    ), f"[{input_name}] 's dims mapping is NONE"
                     grad_op_dist_attr.set_input_dims_mapping(
                         input_name, ref_dims_mapping
                     )
@@ -1329,9 +1329,7 @@ def _get_op_by_id(ops, id):
                     continue
 
                 else:
-                    raise ValueError(
-                        "got unexpect op [{}]".format(str(grad_op.type))
-                    )
+                    raise ValueError(f"got unexpect op [{str(grad_op.type)}]")
 
                 self._dist_context.set_op_dist_attr_for_program(
                     grad_op, grad_op_dist_attr
@@ -1353,7 +1351,7 @@ def _is_grad_var_name(name):
         def _get_forward_varname_from_grad_varname(grad_var_name):
             assert _is_grad_var_name(
                 grad_var_name
-            ), "[{}] is not a grad varnme.".format(grad_var_name)
+            ), f"[{grad_var_name}] is not a grad varnme."
             return grad_var_name[: grad_var_name.find("@GRAD")]
 
         def _get_op_by_id(ops, id):
@@ -1532,7 +1530,7 @@ def _get_op_by_id(ops, id):
                             )
                     assert (
                         ref_dims_mapping is not None
-                    ), "[{}] 's dims mapping is NONE".format(input_name)
+                    ), f"[{input_name}] 's dims mapping is NONE"
                     grad_op_dist_attr.set_input_dims_mapping(
                         input_name, ref_dims_mapping
                     )
@@ -1633,9 +1631,7 @@ def _get_op_by_id(ops, id):
                     )
 
                 else:
-                    raise ValueError(
-                        "got unexpect op [{}]".format(str(grad_op.type))
-                    )
+                    raise ValueError(f"got unexpect op [{str(grad_op.type)}]")
 
                 self._dist_context.set_op_dist_attr_for_program(
                     grad_op, grad_op_dist_attr
diff --git a/python/paddle/distributed/auto_parallel/converter.py b/python/paddle/distributed/auto_parallel/converter.py
index 611759ca02c75e..65df19ad69c174 100644
--- a/python/paddle/distributed/auto_parallel/converter.py
+++ b/python/paddle/distributed/auto_parallel/converter.py
@@ -150,8 +150,7 @@ def convert(self, strict=True):
                 )
             except ValueError as err:
                 raise ValueError(
-                    "Fail to convert tensor '{}'. ".format(str(tensor_name))
-                    + str(err)
+                    f"Fail to convert tensor '{str(tensor_name)}'. " + str(err)
                 )
 
         for tensor_name in self._pre_strategy:
diff --git a/python/paddle/distributed/auto_parallel/cost/base_cost.py b/python/paddle/distributed/auto_parallel/cost/base_cost.py
index 4046b8cc4dba58..e29e4cc487a23d 100644
--- a/python/paddle/distributed/auto_parallel/cost/base_cost.py
+++ b/python/paddle/distributed/auto_parallel/cost/base_cost.py
@@ -204,7 +204,7 @@ def _parse_dtype(dtype):
         elif dtype == paddle.unit8:
             dtype_str = "unit8"
         else:
-            raise TypeError("Unsupported dtype {}".format(dtype))
+            raise TypeError(f"Unsupported dtype {dtype}")
         return dtype_str
 
     assert isinstance(desc, dict)
@@ -806,7 +806,7 @@ def comm_count(self):
             elif dtype == paddle.bool:
                 factor = 8
             else:
-                raise ValueError("Unsupported comm dtype {}".format(dtype))
+                raise ValueError(f"Unsupported comm dtype {dtype}")
             comm_count = reduce(lambda x, y: x * y, shape) * factor
             self._comm_count = comm_count
 
diff --git a/python/paddle/distributed/auto_parallel/cost/estimate_cost.py b/python/paddle/distributed/auto_parallel/cost/estimate_cost.py
index b948241da369cd..6c081f94a2aadd 100644
--- a/python/paddle/distributed/auto_parallel/cost/estimate_cost.py
+++ b/python/paddle/distributed/auto_parallel/cost/estimate_cost.py
@@ -103,7 +103,7 @@ def local_bubble_time(self, rank=None):
     def _check_mode(self, mode):
         if mode not in ["modeling", "profiling"]:
             raise ValueError(
-                "Just support modeling and profiling, but got {}".format(mode)
+                f"Just support modeling and profiling, but got {mode}"
             )
 
     def _is_special_var_name(self, var_name):
diff --git a/python/paddle/distributed/auto_parallel/cost_model.py b/python/paddle/distributed/auto_parallel/cost_model.py
index 2ad7ec3a032312..ef99ff11383288 100644
--- a/python/paddle/distributed/auto_parallel/cost_model.py
+++ b/python/paddle/distributed/auto_parallel/cost_model.py
@@ -321,7 +321,7 @@ def _parse_sub_program(self, program, nodes, graph, cost_data, sub_idx):
                         graph[node_id][PRED].remove(pred_id)
 
                         write_op_cnt += 1
-                        new_var_id = node_id + '_write_{}'.format(write_op_cnt)
+                        new_var_id = node_id + f'_write_{write_op_cnt}'
                         new_var = TensorCostNode(
                             node.node,
                             CostNodeType.VARIABLE,
@@ -376,7 +376,7 @@ def _find_succ_op(self, node_id, sub_idx=0):
                 succ_ops_id = succ_ops_id + self._find_succ_op(succ_id, sub_idx)
             else:
                 raise NotImplementedError(
-                    'This type of node not supported yet:{}'.format(succ.type)
+                    f'This type of node not supported yet:{succ.type}'
                 )
         return succ_ops_id
 
diff --git a/python/paddle/distributed/auto_parallel/dist_op.py b/python/paddle/distributed/auto_parallel/dist_op.py
index 908fad25aaf987..7960adafbdfc4f 100644
--- a/python/paddle/distributed/auto_parallel/dist_op.py
+++ b/python/paddle/distributed/auto_parallel/dist_op.py
@@ -37,7 +37,7 @@ def __init__(self, serial_op, dist_attr=None):
             # TODO: Do we really need to write back to serial op？
             self._serial_op.dist_attr = dist_attr
         else:
-            assert dist_attr is None, "{}".format(dist_attr)
+            assert dist_attr is None, f"{dist_attr}"
             # Use the dist attr of serial_op to do the initialization
             self._dist_attr = self._serial_op.dist_attr
         self._serial_inputs = {}
diff --git a/python/paddle/distributed/auto_parallel/dist_saver.py b/python/paddle/distributed/auto_parallel/dist_saver.py
index a6db512b5b16c6..87a0319204fd35 100644
--- a/python/paddle/distributed/auto_parallel/dist_saver.py
+++ b/python/paddle/distributed/auto_parallel/dist_saver.py
@@ -103,9 +103,7 @@ def load(self, path, load_optimizer=True):
         def _load_file(filename, dirname, suffix="pdparams"):
             file_list = []
             for file in os.listdir(dirname):
-                if check_filename(
-                    '{}(.*)_dist(.*).{}'.format(filename, suffix), file
-                ):
+                if check_filename(f'{filename}(.*)_dist(.*).{suffix}', file):
                     file_list.append(os.path.join(dirname, file))
             file_list.sort()
             return file_list
@@ -121,7 +119,7 @@ def _load_state(filename, dirname, suffix="pdparams"):
                         state_dict[name].append(np.array(value))
                     else:
                         state_dict[name] = [np.array(value)]
-            self._logger.info("Load param file: {}".format(file_list))
+            self._logger.info(f"Load param file: {file_list}")
             return state_dict
 
         filename = os.path.basename(path)
@@ -141,7 +139,7 @@ def _load_state(filename, dirname, suffix="pdparams"):
         # load path.pdattr
         dist_attr_file_list = _load_file(filename, dirname, "pdattr")
         self._logger.info(
-            "Load distributed attribute file: {}".format(dist_attr_file_list)
+            f"Load distributed attribute file: {dist_attr_file_list}"
         )
         dist_attr = {}
         for dist_attr_file in dist_attr_file_list:
diff --git a/python/paddle/distributed/auto_parallel/dist_tensor.py b/python/paddle/distributed/auto_parallel/dist_tensor.py
index 2c2b04aaa7aad0..d44fa513f1a33c 100644
--- a/python/paddle/distributed/auto_parallel/dist_tensor.py
+++ b/python/paddle/distributed/auto_parallel/dist_tensor.py
@@ -74,7 +74,7 @@ def _validate_sizes_and_dist_attr(
                 )
             )
         if rank is not None and not (isinstance(rank, int) and rank >= 0):
-            raise ValueError("The rank must >= 0, but got {}".format(rank))
+            raise ValueError(f"The rank must >= 0, but got {rank}")
 
         # # NOTE: Only support even sharding now
         # if shard_sizes is not None:
@@ -177,7 +177,7 @@ def __init__(self, serial_tensor, dist_attr=None, dist_context=None):
             # TODO: Do we really need to write dist_attr back to serial_tensor？
             self._serial_tensor.dist_attr = dist_attr
         else:
-            assert dist_attr is None, "{}".format(dist_attr)
+            assert dist_attr is None, f"{dist_attr}"
             # Use the dist attr of serial_tensor to do the initialization
             self._dist_attr = self._serial_tensor.dist_attr
 
@@ -342,11 +342,9 @@ def _copy_kwargs(serial_tensor):
             return kwargs
 
         if rank is not None and not (isinstance(rank, int) and rank >= 0):
-            raise ValueError("The rank must >= 0, but got {}".format(rank))
+            raise ValueError(f"The rank must >= 0, but got {rank}")
         if block is not None and not isinstance(block, Block):
-            raise TypeError(
-                "The block must be Block, but got {}.".format(type(block))
-            )
+            raise TypeError(f"The block must be Block, but got {type(block)}.")
         rank = paddle.distributed.get_rank() if rank is None else rank
 
         if block is None:
@@ -373,7 +371,7 @@ def local_tensor(self, rank=None):
         rank = paddle.distributed.get_rank() if rank is None else rank
         assert (
             rank in self._local_tensor_map
-        ), "The rank {} local tensor has not been created.".format(rank)
+        ), f"The rank {rank} local tensor has not been created."
         return self._local_tensor_map[rank]
 
     def __deepcopy__(self, memo):
@@ -405,7 +403,7 @@ def __str__(self):
             annotated_str, self.dist_attr.process_mesh
         )
 
-        str += ", is_parameter: {}".format(self.serial_tensor.is_parameter)
+        str += f", is_parameter: {self.serial_tensor.is_parameter}"
 
         if self.dist_attr.is_annotated("dims_mapping"):
             annotated_str = "annotated"
diff --git a/python/paddle/distributed/auto_parallel/engine.py b/python/paddle/distributed/auto_parallel/engine.py
index 7bd9dea94a8351..4a6181758d114d 100644
--- a/python/paddle/distributed/auto_parallel/engine.py
+++ b/python/paddle/distributed/auto_parallel/engine.py
@@ -309,7 +309,7 @@ def _prepare_data_tensor(self, inputs_spec, labels_spec, inputs, labels):
             )
             assert isinstance(
                 inputs, list
-            ), "inputs should be list, but received {}".format(type(inputs))
+            ), f"inputs should be list, but received {type(inputs)}"
             assert len(inputs_spec) == len(
                 inputs
             ), "the number of `inputs_spec` should be equal to `inputs`'s."
@@ -324,7 +324,7 @@ def _prepare_data_tensor(self, inputs_spec, labels_spec, inputs, labels):
             )
             assert isinstance(
                 labels, list
-            ), "labels should be list, but received {}".format(type(labels))
+            ), f"labels should be list, but received {type(labels)}"
             assert len(labels_spec) == len(
                 labels
             ), "the number of `labels_spec` should be equal to `labels`'s."
@@ -387,12 +387,12 @@ def _prepare_feed(self, data, user_feeds, mode):
                     for name, value in data[0].items():
                         feeds[name] = value
                 else:
-                    raise ValueError("Unsupported data {}".format(data))
+                    raise ValueError(f"Unsupported data {data}")
             elif isinstance(data, dict):
                 for name, value in data.items():
                     feeds[name] = value
             else:
-                raise ValueError("Unsupported data {}".format(data))
+                raise ValueError(f"Unsupported data {data}")
         if user_feeds is not None:
             assert isinstance(
                 user_feeds, dict
@@ -1538,7 +1538,7 @@ def _metrics_name(self):
     def _switch_mode(self, mode):
         assert (
             mode in self._dist_main_progs
-        ), "{} model is not ready, please call `prepare()` first.".format(mode)
+        ), f"{mode} model is not ready, please call `prepare()` first."
         self.to_mode(mode)
         self._optimizer = self._dist_contexts[mode]._serial_optimizer
 
@@ -1547,7 +1547,7 @@ def to_mode(self, mode):
             "train",
             "eval",
             "predict",
-        ], "mode {} should be one of ['train', 'eval', 'predict']".format(mode)
+        ], f"mode {mode} should be one of ['train', 'eval', 'predict']"
         self._mode = mode
 
     def _set_state_dict(self, mode, strict, state_dict, dist_attr):
@@ -1637,7 +1637,7 @@ def save(self, path, training=True):
 
                 self._logger.info("export quantized model.")
                 self._logger.info(
-                    "convert config {}".format(self._strategy.qat.to_dict())
+                    f"convert config {self._strategy.qat.to_dict()}"
                 )
                 test_graph = IrGraph(
                     core.Graph(dist_main_prog.desc), for_test=True
diff --git a/python/paddle/distributed/auto_parallel/graph.py b/python/paddle/distributed/auto_parallel/graph.py
index 0ccb93412abcac..d4cace82585b3f 100644
--- a/python/paddle/distributed/auto_parallel/graph.py
+++ b/python/paddle/distributed/auto_parallel/graph.py
@@ -44,7 +44,7 @@ def __contains__(self, attr_name):
             return False
 
     def __str__(self):
-        str = "(id: {}, attrs: {})".format(self.id, self.attrs)
+        str = f"(id: {self.id}, attrs: {self.attrs})"
         return str
 
 
@@ -177,12 +177,12 @@ def __str__(self):
         str = ""
         str += "**************Nodes**************\n"
         for node_id in self.nodes:
-            str += "{}\n".format(self.nodes[node_id])
+            str += f"{self.nodes[node_id]}\n"
 
         str += "**************Edges**************\n"
         for src_id in self.adjs:
-            str += "--------------{}--------------\n".format(src_id)
+            str += f"--------------{src_id}--------------\n"
             for idx, tgt_id in enumerate(self.adjs[src_id]):
-                str += "{}\n".format(self.adjs[src_id][tgt_id])
+                str += f"{self.adjs[src_id][tgt_id]}\n"
 
         return str
diff --git a/python/paddle/distributed/auto_parallel/interface.py b/python/paddle/distributed/auto_parallel/interface.py
index 2072c2923cd9fb..9fda85ecef010a 100644
--- a/python/paddle/distributed/auto_parallel/interface.py
+++ b/python/paddle/distributed/auto_parallel/interface.py
@@ -78,7 +78,7 @@ def shard_tensor(x, process_mesh=None, shard_spec=None):
         ), "Specify the process mesh argument or use ProcessMesh context manager first."
     assert isinstance(
         shard_spec, list
-    ), "Argument shard_spec {} is not an instance of list".format(shard_spec)
+    ), f"Argument shard_spec {shard_spec} is not an instance of list"
     dist_tensor = DistributedTensor(x)
     serial_tensor = dist_tensor.serial_tensor
     dist_tensor.dist_attr.process_mesh = process_mesh
diff --git a/python/paddle/distributed/auto_parallel/mapper.py b/python/paddle/distributed/auto_parallel/mapper.py
index 8b0d17e8fc6bd1..fd4319490794bd 100644
--- a/python/paddle/distributed/auto_parallel/mapper.py
+++ b/python/paddle/distributed/auto_parallel/mapper.py
@@ -72,7 +72,7 @@ def get_dtype_bytes(dtype):
     elif dtype == paddle.uint8:
         num_bytes = 1
     else:
-        raise ValueError("Unrecognized dtype {}.".format(dtype))
+        raise ValueError(f"Unrecognized dtype {dtype}.")
     return num_bytes
 
 
diff --git a/python/paddle/distributed/auto_parallel/operators/common.py b/python/paddle/distributed/auto_parallel/operators/common.py
index 63b58df02c89a0..209319f861a77b 100644
--- a/python/paddle/distributed/auto_parallel/operators/common.py
+++ b/python/paddle/distributed/auto_parallel/operators/common.py
@@ -284,7 +284,7 @@ def is_parameter_related(varname, block, dist_context=None):
         varname = varname[: varname.index(".quantized")]
     assert block._find_var_recursive(
         varname
-    ), "cannot find var {} in cur block".format(varname)
+    ), f"cannot find var {varname} in cur block"
     var = block._var_recursive(varname)
     # NOTE(hack method): to find the param which is resharded
     if dist_context and "@RESHARD" in varname:
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_default.py b/python/paddle/distributed/auto_parallel/operators/dist_default.py
index 11537dde064284..2e7ad3d12e0b6d 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_default.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_default.py
@@ -51,7 +51,7 @@ def prim_operator_data_parallel_functor(ctx, src_op):
     if var_name in ctx.grads_params:
         assert (
             var_name not in ctx.synced_gradient
-        ), "in primtive mode, grad is already {} synced".format(var_name)
+        ), f"in primtive mode, grad is already {var_name} synced"
         ctx.synced_gradient.add(var_name)
         sync_group = new_process_group(ctx.data_parallel_group)
 
@@ -460,7 +460,7 @@ def forward(ctx, *args, **kwargs):
             )
             assert len(kwargs[input_name]) == len(
                 src_op.desc.input(input_name)
-            ), "number of tensor for input [{}] is not match".format(input_name)
+            ), f"number of tensor for input [{input_name}] is not match"
         for output_name in src_op.desc.output_names():
             assert output_name in kwargs, "input [{}] is not given".format(
                 output_name
@@ -588,7 +588,7 @@ def backward(ctx, *args, **kwargs):
             )
             assert len(kwargs[input_name]) == len(
                 backward_op.desc.input(input_name)
-            ), "number of tensor for input [{}] is not match".format(input_name)
+            ), f"number of tensor for input [{input_name}] is not match"
         for output_name in backward_op.desc.output_names():
             assert output_name in kwargs, "input [{}] is not given".format(
                 output_name
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_embedding.py b/python/paddle/distributed/auto_parallel/operators/dist_embedding.py
index 51e7f154f9deba..92fc5f31a81eb9 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_embedding.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_embedding.py
@@ -362,7 +362,7 @@ def forward(ctx, *args, **kwargs):
         op_dist_attr = ctx.get_op_dist_attr_for_program(src_op)
         assert (
             op_dist_attr is not None
-        ), "backward op [{}] don't have dist attribute !".format(str(src_op))
+        ), f"backward op [{str(src_op)}] don't have dist attribute !"
 
         # check validation of inputs / outputs
         assert 'Ids' in kwargs, "input [{}] is not given".format('Ids')
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py b/python/paddle/distributed/auto_parallel/operators/dist_matmul.py
index ee3c680aa5681e..f3e1c74771b730 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_matmul.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_matmul.py
@@ -315,7 +315,7 @@ def _right_operand_parameter_matmul_backward(ctx, *args, **kwargs):
     dist_attr = ctx.get_op_dist_attr_for_program(backward_op)
     assert (
         dist_attr is not None
-    ), "backward op [{}] don't have dist attribute !".format(str(backward_op))
+    ), f"backward op [{str(backward_op)}] don't have dist attribute !"
 
     # FIXME (JZ-LIANG) Remove this hack to support any op mesh group for Pipeline Parallelism
     if rank_id not in dist_attr.process_mesh.process_ids:
@@ -765,7 +765,7 @@ def forward(ctx, *args, **kwargs):
         op_dist_attr = ctx.get_op_dist_attr_for_program(src_op)
         assert (
             op_dist_attr is not None
-        ), "backward op [{}] don't have dist attribute !".format(str(src_op))
+        ), f"backward op [{str(src_op)}] don't have dist attribute !"
 
         # FIXME (JZ-LIANG) Remove this hack to support any op mesh group for Pipeline Parallelism
         if rank_id not in op_dist_attr.process_mesh.process_ids:
@@ -780,7 +780,7 @@ def forward(ctx, *args, **kwargs):
             )
             assert len(kwargs[input_name]) == len(
                 src_op.desc.input(input_name)
-            ), "number of tensor for input [{}] is not match".format(input_name)
+            ), f"number of tensor for input [{input_name}] is not match"
         for output_name in src_op.desc.output_names():
             assert output_name in kwargs, "input [{}] is not given".format(
                 output_name
@@ -1150,7 +1150,7 @@ def forward(ctx, *args, **kwargs):
         op_dist_attr = ctx.get_op_dist_attr_for_program(src_op)
         assert (
             op_dist_attr is not None
-        ), "backward op [{}] don't have dist attribute !".format(str(src_op))
+        ), f"backward op [{str(src_op)}] don't have dist attribute !"
 
         # FIXME (JZ-LIANG) Remove this hack to support any op mesh group for Pipeline Parallelism
         if rank_id not in op_dist_attr.process_mesh.process_ids:
@@ -1165,7 +1165,7 @@ def forward(ctx, *args, **kwargs):
             )
             assert len(kwargs[input_name]) == len(
                 src_op.desc.input(input_name)
-            ), "number of tensor for input [{}] is not match".format(input_name)
+            ), f"number of tensor for input [{input_name}] is not match"
         for output_name in src_op.desc.output_names():
             assert output_name in kwargs, "input [{}] is not given".format(
                 output_name
@@ -1667,7 +1667,7 @@ def forward(ctx, *args, **kwargs):
         op_dist_attr = ctx.get_op_dist_attr_for_program(src_op)
         assert (
             op_dist_attr is not None
-        ), "backward op [{}] don't have dist attribute !".format(str(src_op))
+        ), f"backward op [{str(src_op)}] don't have dist attribute !"
 
         # FIXME (JZ-LIANG) Remove this hack to support any op mesh group for Pipeline Parallelism
         if rank_id not in op_dist_attr.process_mesh.process_ids:
@@ -1682,7 +1682,7 @@ def forward(ctx, *args, **kwargs):
             )
             assert len(kwargs[input_name]) == len(
                 src_op.desc.input(input_name)
-            ), "number of tensor for input [{}] is not match".format(input_name)
+            ), f"number of tensor for input [{input_name}] is not match"
         for output_name in src_op.desc.output_names():
             assert output_name in kwargs, "input [{}] is not given".format(
                 output_name
@@ -2050,7 +2050,7 @@ def forward(ctx, *args, **kwargs):
         op_dist_attr = ctx.get_op_dist_attr_for_program(src_op)
         assert (
             op_dist_attr is not None
-        ), "backward op [{}] don't have dist attribute !".format(str(src_op))
+        ), f"backward op [{str(src_op)}] don't have dist attribute !"
 
         # FIXME (JZ-LIANG) Remove this hack to support any op mesh group for Pipeline Parallelism
         if rank_id not in op_dist_attr.process_mesh.process_ids:
@@ -2065,7 +2065,7 @@ def forward(ctx, *args, **kwargs):
             )
             assert len(kwargs[input_name]) == len(
                 src_op.desc.input(input_name)
-            ), "number of tensor for input [{}] is not match".format(input_name)
+            ), f"number of tensor for input [{input_name}] is not match"
         for output_name in src_op.desc.output_names():
             assert output_name in kwargs, "input [{}] is not given".format(
                 output_name
@@ -2558,7 +2558,7 @@ def forward(ctx, *args, **kwargs):
         op_dist_attr = ctx.get_op_dist_attr_for_program(src_op)
         assert (
             op_dist_attr is not None
-        ), "backward op [{}] don't have dist attribute !".format(str(src_op))
+        ), f"backward op [{str(src_op)}] don't have dist attribute !"
 
         # FIXME (JZ-LIANG) Remove this hack to support any op mesh group for Pipeline Parallelism
         if rank_id not in op_dist_attr.process_mesh.process_ids:
@@ -2573,7 +2573,7 @@ def forward(ctx, *args, **kwargs):
             )
             assert len(kwargs[input_name]) == len(
                 src_op.desc.input(input_name)
-            ), "number of tensor for input [{}] is not match".format(input_name)
+            ), f"number of tensor for input [{input_name}] is not match"
         for output_name in src_op.desc.output_names():
             assert output_name in kwargs, "input [{}] is not given".format(
                 output_name
@@ -2949,7 +2949,7 @@ def forward(ctx, *args, **kwargs):
         op_dist_attr = ctx.get_op_dist_attr_for_program(src_op)
         assert (
             op_dist_attr is not None
-        ), "backward op [{}] don't have dist attribute !".format(str(src_op))
+        ), f"backward op [{str(src_op)}] don't have dist attribute !"
 
         # FIXME (JZ-LIANG) Remove this hack to support any op mesh group for Pipeline Parallelism
         if rank_id not in op_dist_attr.process_mesh.process_ids:
@@ -2964,7 +2964,7 @@ def forward(ctx, *args, **kwargs):
             )
             assert len(kwargs[input_name]) == len(
                 src_op.desc.input(input_name)
-            ), "number of tensor for input [{}] is not match".format(input_name)
+            ), f"number of tensor for input [{input_name}] is not match"
         for output_name in src_op.desc.output_names():
             assert output_name in kwargs, "input [{}] is not given".format(
                 output_name
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py b/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py
index 10b6d457f6fb16..22977b7459cfdc 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_pnorm.py
@@ -172,7 +172,7 @@ def forward(ctx, *args, **kwargs):
             )
             assert len(kwargs[input_name]) == len(
                 src_op.desc.input(input_name)
-            ), "number of tensor for input [{}] is not match".format(input_name)
+            ), f"number of tensor for input [{input_name}] is not match"
         for output_name in src_op.desc.output_names():
             assert output_name in kwargs, "input [{}] is not given".format(
                 output_name
@@ -286,7 +286,7 @@ def backward(ctx, *args, **kwargs):
             )
             assert len(kwargs[input_name]) == len(
                 backward_op.desc.input(input_name)
-            ), "number of tensor for input [{}] is not match".format(input_name)
+            ), f"number of tensor for input [{input_name}] is not match"
         for output_name in backward_op.desc.output_names():
             assert output_name in kwargs, "input [{}] is not given".format(
                 output_name
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py b/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py
index 6bd0284477d113..50a4d3466b0fc7 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_reduce_sum_p.py
@@ -92,7 +92,7 @@ def forward(ctx, *args, **kwargs):
             )
             assert len(kwargs[input_name]) == len(
                 src_op.desc.input(input_name)
-            ), "number of tensor for input [{}] is not match".format(input_name)
+            ), f"number of tensor for input [{input_name}] is not match"
         for output_name in src_op.desc.output_names():
             assert output_name in kwargs, "input [{}] is not given".format(
                 output_name
diff --git a/python/paddle/distributed/auto_parallel/operators/dist_reshape.py b/python/paddle/distributed/auto_parallel/operators/dist_reshape.py
index f238c47b2fd2b0..d1e590c379ec3f 100644
--- a/python/paddle/distributed/auto_parallel/operators/dist_reshape.py
+++ b/python/paddle/distributed/auto_parallel/operators/dist_reshape.py
@@ -240,7 +240,7 @@ def forward(ctx, *args, **kwargs):
         op_dist_attr = ctx.get_op_dist_attr_for_program(src_op)
         assert (
             op_dist_attr is not None
-        ), "backward op [{}] don't have dist attribute !".format(str(src_op))
+        ), f"backward op [{str(src_op)}] don't have dist attribute !"
 
         # check validation of inputs / outputs
         for input_name in src_op.desc.input_names():
@@ -249,7 +249,7 @@ def forward(ctx, *args, **kwargs):
             )
             assert len(kwargs[input_name]) == len(
                 src_op.desc.input(input_name)
-            ), "number of tensor for input [{}] is not match".format(input_name)
+            ), f"number of tensor for input [{input_name}] is not match"
         for output_name in src_op.desc.output_names():
             assert output_name in kwargs, "input [{}] is not given".format(
                 output_name
@@ -500,7 +500,7 @@ def forward(ctx, *args, **kwargs):
         op_dist_attr = ctx.get_op_dist_attr_for_program(src_op)
         assert (
             op_dist_attr is not None
-        ), "backward op [{}] don't have dist attribute !".format(str(src_op))
+        ), f"backward op [{str(src_op)}] don't have dist attribute !"
 
         # check validation of inputs / outputs
         for input_name in src_op.desc.input_names():
@@ -509,7 +509,7 @@ def forward(ctx, *args, **kwargs):
             )
             assert len(kwargs[input_name]) == len(
                 src_op.desc.input(input_name)
-            ), "number of tensor for input [{}] is not match".format(input_name)
+            ), f"number of tensor for input [{input_name}] is not match"
         for output_name in src_op.desc.output_names():
             assert output_name in kwargs, "input [{}] is not given".format(
                 output_name
@@ -753,7 +753,7 @@ def forward(ctx, *args, **kwargs):
         op_dist_attr = ctx.get_op_dist_attr_for_program(src_op)
         assert (
             op_dist_attr is not None
-        ), "backward op [{}] don't have dist attribute !".format(str(src_op))
+        ), f"backward op [{str(src_op)}] don't have dist attribute !"
 
         # check validation of inputs / outputs
         for input_name in src_op.desc.input_names():
@@ -762,7 +762,7 @@ def forward(ctx, *args, **kwargs):
             )
             assert len(kwargs[input_name]) == len(
                 src_op.desc.input(input_name)
-            ), "number of tensor for input [{}] is not match".format(input_name)
+            ), f"number of tensor for input [{input_name}] is not match"
         for output_name in src_op.desc.output_names():
             assert output_name in kwargs, "input [{}] is not given".format(
                 output_name
diff --git a/python/paddle/distributed/auto_parallel/partitioner.py b/python/paddle/distributed/auto_parallel/partitioner.py
index 2dcd73163eecd7..f542b49fdecbde 100644
--- a/python/paddle/distributed/auto_parallel/partitioner.py
+++ b/python/paddle/distributed/auto_parallel/partitioner.py
@@ -284,7 +284,7 @@ def partition_block(self, ref_block, target_block):
                         for varname_not_in_block in __varname_not_in_block__:
                             assert (
                                 varname_not_in_block in serial_input_varname
-                            ), "{} is not found".format(serial_input_varname)
+                            ), f"{serial_input_varname} is not found"
 
                     self._serial2dist_varname_mapping[
                         serial_input_varname
@@ -334,7 +334,7 @@ def partition_block(self, ref_block, target_block):
                     self._dist_context,
                     **kinputs,
                     **koutputs,
-                    **{"grad_var_to_var": grad_var_to_var}
+                    **{"grad_var_to_var": grad_var_to_var},
                 )
             elif is_optimize_op(op):
                 # NOTE: BACKWARD_ONLY_DIST_OPS's op_role must 2 because of 1F1B PASS
@@ -346,7 +346,7 @@ def partition_block(self, ref_block, target_block):
                     self._dist_context,
                     **kinputs,
                     **koutputs,
-                    **{"grad_var_to_var": {}}
+                    **{"grad_var_to_var": {}},
                 )
             else:
                 raise NotImplementedError(
@@ -443,7 +443,7 @@ def _partition_parameter(
         stop_gradient=src_var.stop_gradient,
         is_data=src_var.is_data,
         belong_to_optimizer=src_var.belong_to_optimizer,
-        **copied_kwargs
+        **copied_kwargs,
     )
 
     return param
diff --git a/python/paddle/distributed/auto_parallel/planner.py b/python/paddle/distributed/auto_parallel/planner.py
index 1f48374f8256c7..6a511187909766 100755
--- a/python/paddle/distributed/auto_parallel/planner.py
+++ b/python/paddle/distributed/auto_parallel/planner.py
@@ -420,7 +420,7 @@ def enum_valid_dist_attr_for_program(
 
             assert (
                 op_valid_dist_attrs is not None
-            ), "Enumerate {} valid distributed attribute failed.".format(op)
+            ), f"Enumerate {op} valid distributed attribute failed."
             valid_dist_attr_dict[op.desc.id()] = [
                 op_valid_dist_attrs,
                 pipeline_stage,
diff --git a/python/paddle/distributed/auto_parallel/process_group.py b/python/paddle/distributed/auto_parallel/process_group.py
index 3ab96bb2a9be99..83e1642ba21bb1 100644
--- a/python/paddle/distributed/auto_parallel/process_group.py
+++ b/python/paddle/distributed/auto_parallel/process_group.py
@@ -116,7 +116,7 @@ def local_rank(self, global_rank):
             return self.ranks.index(global_rank)
         else:
             raise AssertionError(
-                "Rank {} doesn't belong to this group".format(global_rank)
+                f"Rank {global_rank} doesn't belong to this group"
             )
 
     def is_instantiate(self):
diff --git a/python/paddle/distributed/auto_parallel/process_mesh.py b/python/paddle/distributed/auto_parallel/process_mesh.py
index 34fecda5169e1e..531de9b545e937 100644
--- a/python/paddle/distributed/auto_parallel/process_mesh.py
+++ b/python/paddle/distributed/auto_parallel/process_mesh.py
@@ -106,7 +106,7 @@ def __init__(self, mesh=None, dim_names=None, shape=None, process_ids=None):
         unique_dim_names = set(self._dim_names)
         assert len(unique_dim_names) == len(
             self._dim_names
-        ), 'All dim_names {} must be unique.'.format(dim_names)
+        ), f'All dim_names {dim_names} must be unique.'
 
         # Follow the requirement for using pybind11
         core.ProcessMesh.__init__(
diff --git a/python/paddle/distributed/auto_parallel/reshard.py b/python/paddle/distributed/auto_parallel/reshard.py
index ce651596196ca0..7461e85c672483 100644
--- a/python/paddle/distributed/auto_parallel/reshard.py
+++ b/python/paddle/distributed/auto_parallel/reshard.py
@@ -55,7 +55,7 @@ def get_var_with_recursion(var_name, block, program):
         # parent_block = program.blocks[block.parent_idx]
         # if var_name in parent_block.vars:
         #     var = parent_block.vars[var_name]
-    assert var is not None, "{} is not found".format(var.name)
+    assert var is not None, f"{var.name} is not found"
 
     return var
 
diff --git a/python/paddle/distributed/auto_parallel/strategy.py b/python/paddle/distributed/auto_parallel/strategy.py
index 41ddad975d5e5d..58a08586ff5cbd 100644
--- a/python/paddle/distributed/auto_parallel/strategy.py
+++ b/python/paddle/distributed/auto_parallel/strategy.py
@@ -62,7 +62,7 @@ def __repr__(self):
         result_dict = self.to_dict()
         string = "{"
         for k, v in result_dict.items():
-            string += "\"%s\":\"%s\"," % (k, v)
+            string += f"\"{k}\":\"{v}\","
         return string + "}"
 
     def __deepcopy__(self, memo):
@@ -166,7 +166,7 @@ def __init__(self, config=None):
             #         self._config_dict = yaml.load(yaml_file, Loader=yaml.Loader)
             else:
                 raise ValueError(
-                    "Expected a dictionary. But received: {}".format(config)
+                    f"Expected a dictionary. But received: {config}"
                 )
         else:
             self._config_dict = {}
diff --git a/python/paddle/distributed/auto_parallel/tuner/algorithms.py b/python/paddle/distributed/auto_parallel/tuner/algorithms.py
index 26255e8062794c..c79f3b3760607c 100644
--- a/python/paddle/distributed/auto_parallel/tuner/algorithms.py
+++ b/python/paddle/distributed/auto_parallel/tuner/algorithms.py
@@ -99,7 +99,7 @@ def impl(cls):
 
 def new_algorithm(name, config):
     algor_class = AlgorithmBase._REGISTERED_ALGORITHMS.get(name)
-    assert algor_class is not None, "Algorithm {} is not defined.".format(name)
+    assert algor_class is not None, f"Algorithm {name} is not defined."
     algor_obj = algor_class(config)
     return algor_obj
 
@@ -140,7 +140,7 @@ def next_trial(self):
             sharding = new_strategy.sharding
             sharding.stage = stage
 
-            name = "trial-sharding-stage{}".format(stage)
+            name = f"trial-sharding-stage{stage}"
             trial = Trial(new_strategy, name, self.changed_configs)
 
             return trial
diff --git a/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py b/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py
index 952c32b3add1fc..b6dea66f7bfcb7 100644
--- a/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py
+++ b/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py
@@ -89,7 +89,7 @@ def parse_process_groups():
 def get_metric(results):
     assert isinstance(
         results, dict
-    ), "results should be type of dictionary, but got {}.".format(type(results))
+    ), f"results should be type of dictionary, but got {type(results)}."
     if 'Throughtput' in results and isinstance(results['Throughtput'], float):
         return float(results['Throughtput'])
     else:
@@ -529,7 +529,7 @@ def _profile_trial(self, trial):
 
     def _evaluate_trial(self, trial):
 
-        self._logger.info("Trial {} evaluation start.".format(trial.name))
+        self._logger.info(f"Trial {trial.name} evaluation start.")
         self._apply_optimization(trial)
 
         if self._config.mode == "PROFILE":
@@ -541,7 +541,7 @@ def _evaluate_trial(self, trial):
             )
         else:
             raise NotImplementedError(
-                "invalid evaluation mode: {}".format(self._config.mode)
+                f"invalid evaluation mode: {self._config.mode}"
             )
 
         self._logger.info(
diff --git a/python/paddle/distributed/auto_parallel/tuner/parallel_tuner.py b/python/paddle/distributed/auto_parallel/tuner/parallel_tuner.py
index d8eb232ab303ae..4a3f85d6b21daf 100644
--- a/python/paddle/distributed/auto_parallel/tuner/parallel_tuner.py
+++ b/python/paddle/distributed/auto_parallel/tuner/parallel_tuner.py
@@ -577,7 +577,7 @@ def _populate_space(self):
         return {"status": TrialStatus.RUNNING, "values": values}
 
     def _create_trial(self):
-        trial_id = "{{:0{}d}}".format(len(str(self._max_trials)))
+        trial_id = f"{{:0{len(str(self._max_trials))}d}}"
         trial_id = trial_id.format(self._num_trials)
 
         if self._max_trials and self._num_trials >= self._max_trials:
@@ -955,7 +955,7 @@ def _estimate_trial(self):
         max_memory = self._estimator._estimate_max_memory_by_dist_op(
             self._dist_context
         )
-        print("\tmax_memory", "{:,}".format(max_memory), flush=True)
+        print("\tmax_memory", f"{max_memory:,}", flush=True)
         # The max memory must be less than 80% 32GB (hard code)
         if max_memory > 32 * 0.8 * 1024 * 1024 * 1024:
             return math.inf
diff --git a/python/paddle/distributed/auto_parallel/tuner/profiler.py b/python/paddle/distributed/auto_parallel/tuner/profiler.py
index 138a8bddd41ab2..cca53773ebbef5 100644
--- a/python/paddle/distributed/auto_parallel/tuner/profiler.py
+++ b/python/paddle/distributed/auto_parallel/tuner/profiler.py
@@ -219,7 +219,7 @@ def profiler(args):
     # load ctx
     if not os.path.isfile(args.ctx_filename):
         raise ValueError(
-            "There is no profile context named {}.".format(args.ctx_filename)
+            f"There is no profile context named {args.ctx_filename}."
         )
     with open(args.ctx_filename, 'rb') as f:
         profile_ctx = pickle.load(f, encoding='latin1')
@@ -270,7 +270,7 @@ def profiler(args):
             with open(result_path, 'w') as fp:
                 json.dump(result_dict, fp)
 
-        print("profile done! avg speed : {} step / s.".format(avg_tput))
+        print(f"profile done! avg speed : {avg_tput} step / s.")
 
     except paddle.framework.core.EOFException:
         data_loader._inner_dataloader.reset()
@@ -286,7 +286,7 @@ def profiler(args):
             with open(result_path, 'w') as fp:
                 json.dump(result_dict, fp)
 
-        print("profile failed with error: [{}]".format(error_type))
+        print(f"profile failed with error: [{error_type}]")
         print(e)
         print(traceback.format_exc())
 
diff --git a/python/paddle/distributed/auto_parallel/tuner/recorder.py b/python/paddle/distributed/auto_parallel/tuner/recorder.py
index bef49631a35ffe..6faaac8977910b 100644
--- a/python/paddle/distributed/auto_parallel/tuner/recorder.py
+++ b/python/paddle/distributed/auto_parallel/tuner/recorder.py
@@ -59,7 +59,7 @@ def __eq__(self, other):
         return other.value == self.value and other.step == self.step
 
     def __repr__(self):
-        return "MetricRecord(value={}, step={})".format(self.value, self.step)
+        return f"MetricRecord(value={self.value}, step={self.step})"
 
 
 class MetricRecords:
@@ -166,7 +166,7 @@ def register_metrics(self, metrics=None):
 
     def register(self, name, direction=None):
         if self.exists(name):
-            raise ValueError("Metric {} have been registered.".format(name))
+            raise ValueError(f"Metric {name} have been registered.")
         if direction is None:
             direction = "min"
         self._records[name] = MetricRecords(direction)
diff --git a/python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py b/python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py
index c52d18eb2e9132..b2b5489796e82e 100644
--- a/python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py
+++ b/python/paddle/distributed/auto_parallel/tuner/rule_based_tuner.py
@@ -1336,7 +1336,7 @@ def _gen_fwd_sub_program_by_clone(self, ops):
                                 stop_gradient=src_var.stop_gradient,
                                 is_data=src_var.is_data,
                                 belong_to_optimizer=src_var.belong_to_optimizer,
-                                **copied_kwargs
+                                **copied_kwargs,
                             )
                         else:
                             target_block._clone_variable(vars[var_name])
@@ -1538,7 +1538,7 @@ def _is_grad_var_name(name):
                         )
                 assert (
                     ref_dims_mapping is not None
-                ), "[{}] 's dims mapping is NONE".format(input_name)
+                ), f"[{input_name}] 's dims mapping is NONE"
                 grad_op_dist_attr.set_input_dims_mapping(
                     input_name, ref_dims_mapping
                 )
@@ -1937,7 +1937,7 @@ def prepare(self):
         self.gen_fwd_sub_programs_by_clone()
         end = time.time()
         self._logger.info(
-            "Generate programs of every layer in {}s.".format(end - begin)
+            f"Generate programs of every layer in {end - begin}s."
         )
 
         # step3: partition devices to device meshes
@@ -1948,7 +1948,7 @@ def prepare(self):
         )
         device_meshes_list = ClusterPartitionUtil.partition_cluster(n, m)
         end = time.time()
-        self._logger.info("Partition cluster in {}s.".format(end - begin))
+        self._logger.info(f"Partition cluster in {end - begin}s.")
 
         # step4: transform device mesh to process meshes
         dm_idx = 0
@@ -1987,7 +1987,7 @@ def prepare(self):
         begin = time.time()
         self.gen_full_program()
         end = time.time()
-        self._logger.info("Generate full program in {}s.".format(end - begin))
+        self._logger.info(f"Generate full program in {end - begin}s.")
 
         # step6: complete forward sub programs
         begin = time.time()
@@ -1995,7 +1995,7 @@ def prepare(self):
             self.complete_sub_fwd_programs(process_mesh)
         end = time.time()
         self._logger.info(
-            "Complete all sub forward programs in {}s.".format(end - begin)
+            f"Complete all sub forward programs in {end - begin}s."
         )
 
         if self.mode == "train":
@@ -2004,7 +2004,7 @@ def prepare(self):
             self.complete_sub_bwd_programs()
             end = time.time()
             self._logger.info(
-                "Complete all sub backward programs in {}s.".format(end - begin)
+                f"Complete all sub backward programs in {end - begin}s."
             )
 
             # step8: complete update sub programs
@@ -2012,7 +2012,7 @@ def prepare(self):
             self.complete_sub_update_programs()
             end = time.time()
             self._logger.info(
-                "Complete all sub update programs in {}s.".format(end - begin)
+                f"Complete all sub update programs in {end - begin}s."
             )
 
     def tune_o1(self):
@@ -2137,7 +2137,7 @@ def save_strategy(self, best_dist_context, path):
         dist_attrs["cluster"] = self._cluster
         with open(path, 'wb') as f:
             pickle.dump(dist_attrs, f)
-        self._logger.info("The strategy has been saved at {}".format(path))
+        self._logger.info(f"The strategy has been saved at {path}")
 
     def run_or_quit(self):
         # Quit if just tune
@@ -2151,7 +2151,7 @@ def tune(self):
         begin = time.time()
         self.match_program(self._dist_context.serial_main_program)
         end = time.time()
-        self._logger.info("Pattern match in {}s.".format(end - begin))
+        self._logger.info(f"Pattern match in {end - begin}s.")
 
         if self._use_dp:
             completer = Completer(self._dist_context)
@@ -2213,7 +2213,7 @@ def tune(self):
         self._dist_context._process_meshes = best_dist_context._process_meshes
 
         end = time.time()
-        self._logger.info("Rule-based tuner end in {}s.".format(end - begin))
+        self._logger.info(f"Rule-based tuner end in {end - begin}s.")
         self._logger.info("The best strategy found is as follows: ")
         print_program_with_dist_attr(self.full_main_program, best_dist_context)
 
diff --git a/python/paddle/distributed/auto_parallel/tuner/trial.py b/python/paddle/distributed/auto_parallel/tuner/trial.py
index d0662e4c8eed1e..b2c6a68b3d0def 100644
--- a/python/paddle/distributed/auto_parallel/tuner/trial.py
+++ b/python/paddle/distributed/auto_parallel/tuner/trial.py
@@ -85,7 +85,7 @@ def summary(self):
                 print(tv + ":", value)
 
         if self.score is not None:
-            print("Score: {}".format(self.score))
+            print(f"Score: {self.score}")
 
     def get_state(self):
         return {
@@ -137,7 +137,7 @@ def summary(self):
 
         length = max_k + max_v + spacing
 
-        h1_format = "    " + "|{{:^{}s}}|\n".format(length)
+        h1_format = "    " + f"|{{:^{length}s}}|\n"
         h2_format = "    " + "|{{:>{}s}}{}{{:^{}s}}|\n".format(
             max_k, " " * spacing, max_v
         )
@@ -152,7 +152,7 @@ def summary(self):
 
         for name in self._changed_configs:
             draws += border + "\n"
-            draws += h1_format.format("{} auto=True <-> {}".format(name, name))
+            draws += h1_format.format(f"{name} auto=True <-> {name}")
             draws += line + "\n"
             my_configs = getattr(self.space, name)
             keys = my_configs.to_dict().keys()
diff --git a/python/paddle/distributed/auto_parallel/tuner/tunable_space.py b/python/paddle/distributed/auto_parallel/tuner/tunable_space.py
index 32017c855a4385..84f1e8924b60a1 100644
--- a/python/paddle/distributed/auto_parallel/tuner/tunable_space.py
+++ b/python/paddle/distributed/auto_parallel/tuner/tunable_space.py
@@ -49,13 +49,13 @@ def get_value(self, name):
         if name in self.values:
             return self.values[name]
         else:
-            raise KeyError("{} does not exist.".format(name))
+            raise KeyError(f"{name} does not exist.")
 
     def set_value(self, name, value):
         if name in self.values:
             self.values[name] = value
         else:
-            raise KeyError("{} does not exist.".format(name))
+            raise KeyError(f"{name} does not exist.")
 
     def _exists(self, name):
         if name in self._variables:
@@ -151,7 +151,7 @@ def _deserialize_tunable_variable(state):
     cls_name = state["class_name"]
     cls = cls_name_to_cls[cls_name]
     if cls is None:
-        raise ValueError("Unknown class name {}".format(cls_name))
+        raise ValueError(f"Unknown class name {cls_name}")
 
     cls_state = state["state"]
     deserialized_object = cls.from_state(cls_state)
diff --git a/python/paddle/distributed/auto_parallel/tuner/tunable_variable.py b/python/paddle/distributed/auto_parallel/tuner/tunable_variable.py
index df3efe004991d1..6f46ccb90132a9 100644
--- a/python/paddle/distributed/auto_parallel/tuner/tunable_variable.py
+++ b/python/paddle/distributed/auto_parallel/tuner/tunable_variable.py
@@ -59,7 +59,7 @@ def random(self, seed=None):
         return self._default
 
     def __repr__(self):
-        return "Fixed(name: {}, value: {})".format(self.name, self.default)
+        return f"Fixed(name: {self.name}, value: {self.default})"
 
 
 class Boolean(TunableVariable):
@@ -71,7 +71,7 @@ def __init__(self, name, default=False):
         super().__init__(name=name, default=default)
         if default not in {True, False}:
             raise ValueError(
-                "default must be a Python boolean, but got {}".format(default)
+                f"default must be a Python boolean, but got {default}"
             )
 
     def random(self, seed=None):
@@ -195,9 +195,7 @@ def get_state(self):
     def _check_int(self, val):
         int_val = int(val)
         if int_val != val:
-            raise ValueError(
-                "Expects val is an int, but found: {}.".format(str(val))
-            )
+            raise ValueError(f"Expects val is an int, but found: {str(val)}.")
         return int_val
 
     def __repr__(self):
diff --git a/python/paddle/distributed/auto_parallel/utils.py b/python/paddle/distributed/auto_parallel/utils.py
index d34b75198f4538..c248b9957a41b9 100644
--- a/python/paddle/distributed/auto_parallel/utils.py
+++ b/python/paddle/distributed/auto_parallel/utils.py
@@ -521,9 +521,7 @@ def _check_valid_path(file_path):
                     "but got '{}'.".format(str(type(file)))
                 )
             if not os.path.exists(file):
-                raise ValueError(
-                    "The file path '{}' does not exist.".format(file)
-                )
+                raise ValueError(f"The file path '{file}' does not exist.")
         return file_path
     else:
         raise TypeError(
@@ -742,16 +740,14 @@ def _save_distributed_attribute(program, dist_attr_path, dist_context):
     # TODO: just save a complete distributed attribute file
     rank_id = paddle.distributed.get_rank()
     dist_attr_name = os.path.join(
-        dist_attr_path, "dist_attr_rank{}.pdattr".format(rank_id)
+        dist_attr_path, f"dist_attr_rank{rank_id}.pdattr"
     )
     dist_attr_dict = {
         "model": get_dist_attr(program, dist_context),
         "world_size": paddle.distributed.get_world_size(),
     }
     paddle.save(dist_attr_dict, dist_attr_name)
-    logging.info(
-        "Already saved distributed attribute to '{}'.".format(dist_attr_path)
-    )
+    logging.info(f"Already saved distributed attribute to '{dist_attr_path}'.")
 
 
 def _load_distributed_attribute(dist_attr_path):
@@ -774,7 +770,7 @@ def _save_distributed_state_dict(program, addition_info, checkpoint_path):
     """Save parameters' state_dict"""
     rank = paddle.distributed.get_rank()
     ckpt_file_name = os.path.join(
-        checkpoint_path, "model_state_rank{}.pdmodel".format(rank)
+        checkpoint_path, f"model_state_rank{rank}.pdmodel"
     )
     state_dict = {
         "model": program.state_dict(),
@@ -782,7 +778,7 @@ def _save_distributed_state_dict(program, addition_info, checkpoint_path):
         "addition_info": addition_info,
     }
     paddle.save(state_dict, ckpt_file_name)
-    logging.info("Already saved model to '{}'.".format(checkpoint_path))
+    logging.info(f"Already saved model to '{checkpoint_path}'.")
 
 
 def _load_distributed_state_dict(checkpoint_path):
@@ -2339,7 +2335,7 @@ def insert_dependencies_for_vars(
         )
 
     if op_namescope is not None:
-        depend_op._set_attr('op_namescope', "/{}".format(op_namescope))
+        depend_op._set_attr('op_namescope', f"/{op_namescope}")
 
     if sync:
         block._sync_with_cpp()
diff --git a/python/paddle/distributed/cloud_utils.py b/python/paddle/distributed/cloud_utils.py
index 078ba9c56cae4a..3fd8ce5d16a3a8 100644
--- a/python/paddle/distributed/cloud_utils.py
+++ b/python/paddle/distributed/cloud_utils.py
@@ -77,9 +77,7 @@ def get_cloud_cluster(args_node_ips, args_node_ip, args_port, selected_devices):
                     paddle_ports_num >= len(selected_devices)
                     and paddle_port != args_port
                 ):
-                    logger.warning(
-                        "Use Cloud specified port:{}.".format(paddle_port)
-                    )
+                    logger.warning(f"Use Cloud specified port:{paddle_port}.")
                     started_port = paddle_port
 
             except Exception as e:
@@ -139,9 +137,9 @@ def get_cluster_and_pod(args):
             args.started_port,
             selected_devices,
         )
-        logger.info("get cluster from cloud:{}".format(cluster))
+        logger.info(f"get cluster from cloud:{cluster}")
     else:
         cluster, pod = get_cluster_from_args(args, selected_devices)
-        logger.info("get cluster from args:{}".format(cluster))
+        logger.info(f"get cluster from args:{cluster}")
 
     return cluster, pod
diff --git a/python/paddle/distributed/communication/group.py b/python/paddle/distributed/communication/group.py
index b5c32fe00539ef..5fff4440877df9 100644
--- a/python/paddle/distributed/communication/group.py
+++ b/python/paddle/distributed/communication/group.py
@@ -100,9 +100,7 @@ def _get_global_group():
 
 def _add_new_group(group):
     if group.id in _GroupManager.group_map_by_id:
-        raise RuntimeError(
-            "The group with id {} already exist.".format(group.id)
-        )
+        raise RuntimeError(f"The group with id {group.id} already exist.")
     _GroupManager.group_map_by_id[group.id] = group
 
 
@@ -195,7 +193,7 @@ def destroy_process_group(group=None):
     group = _get_global_group() if group is None else group
     assert (
         group.id in _GroupManager.group_map_by_id
-    ), "Destroy group with id {} is invalid.".format(group.id)
+    ), f"Destroy group with id {group.id} is invalid."
     if _is_global_group(group):
         _GroupManager.group_map_by_id.clear()
     else:
@@ -228,7 +226,7 @@ def get_group(id=0):
 
     if id in _GroupManager.group_map_by_id:
         return _GroupManager.group_map_by_id[id]
-    warnings.warn("Group {} is not initialized.".format(id))
+    warnings.warn(f"Group {id} is not initialized.")
     return None
 
 
diff --git a/python/paddle/distributed/communication/reduce.py b/python/paddle/distributed/communication/reduce.py
index 35a58327df1362..4ee2142856fa91 100644
--- a/python/paddle/distributed/communication/reduce.py
+++ b/python/paddle/distributed/communication/reduce.py
@@ -67,17 +67,17 @@ def _get_reduce_op(reduce_op, func_name):
             return framework.core.ReduceOp.PRODUCT
     else:
         if reduce_op == ReduceOp.SUM:
-            return 'c_{}_sum'.format(func_name)
+            return f'c_{func_name}_sum'
         elif reduce_op == ReduceOp.MAX:
-            return 'c_{}_max'.format(func_name)
+            return f'c_{func_name}_max'
         elif reduce_op == ReduceOp.MIN:
-            return 'c_{}_min'.format(func_name)
+            return f'c_{func_name}_min'
         elif reduce_op == ReduceOp.PROD:
-            return 'c_{}_prod'.format(func_name)
+            return f'c_{func_name}_prod'
         else:
-            return 'c_{}'.format(func_name)
+            return f'c_{func_name}'
 
-    raise ValueError("Unknown reduce_op type for {}.".format(func_name))
+    raise ValueError(f"Unknown reduce_op type for {func_name}.")
 
 
 def reduce(tensor, dst, op=ReduceOp.SUM, group=None, sync_op=True):
@@ -182,4 +182,4 @@ def reduce(tensor, dst, op=ReduceOp.SUM, group=None, sync_op=True):
             gdst,
         )
     else:
-        raise ValueError("Unknown parameter: {}.".format(op))
+        raise ValueError(f"Unknown parameter: {op}.")
diff --git a/python/paddle/distributed/elastic.py b/python/paddle/distributed/elastic.py
index 082fdd3c07bea5..2d8b659453151e 100644
--- a/python/paddle/distributed/elastic.py
+++ b/python/paddle/distributed/elastic.py
@@ -28,7 +28,7 @@ def __init__(self, server, name):
         self.np_path = self.prefix + '/np'
 
     def set_np(self, np):
-        self.etcd.put(self.np_path, '{}'.format(np).encode('latin-1'))
+        self.etcd.put(self.np_path, f'{np}'.encode('latin-1'))
 
     def scale_np(self, np):
         if self.etcd.get(self.np_path)[0] is not None:
@@ -72,6 +72,6 @@ def close(self):
     if args.action == "clean":
         cmd.clean()
 
-    print("action {} done".format(args.action))
+    print(f"action {args.action} done")
 
     cmd.close()
diff --git a/python/paddle/distributed/fleet/ascend_utils.py b/python/paddle/distributed/fleet/ascend_utils.py
index ac41d5744afc9d..132ee3afac67c2 100644
--- a/python/paddle/distributed/fleet/ascend_utils.py
+++ b/python/paddle/distributed/fleet/ascend_utils.py
@@ -84,10 +84,8 @@ def _get_ascend_rankfile(rank_table_file_path):
             nodes = os.getenv("DLS_TASK_NUMBER", None)
             assert nodes is not None, "DLS_TASK_NUMBER didn't set!"
             for node in range(int(nodes)):
-                node_ip = os.getenv("VC_CUSTOM{}_HOSTS".format(node), None)
-                assert (
-                    node_ip is not None
-                ), "VC_CUSTOM{}_HOSTS didn't set!".format(node)
+                node_ip = os.getenv(f"VC_CUSTOM{node}_HOSTS", None)
+                assert node_ip is not None, f"VC_CUSTOM{node}_HOSTS didn't set!"
                 node_ips.append(node_ip)
             return node_ips, device_count
         node_ips.append(server['server_id'])
@@ -118,7 +116,7 @@ def get_cloud_cluster(
 
         assert (
             node_ip in node_ips
-        ), "Can't find your local ip {%s} in node_ips: {%s}" % (
+        ), "Can't find your local ip {{{}}} in node_ips: {{{}}}".format(
             node_ip,
             node_ips,
         )
diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py
index 950fddaf9dba73..194e4bd6675555 100755
--- a/python/paddle/distributed/fleet/base/distributed_strategy.py
+++ b/python/paddle/distributed/fleet/base/distributed_strategy.py
@@ -73,7 +73,7 @@ def assign_configs_value(msg, config):
 def check_configs_key(msg, config, field_name):
     key_list = msg.DESCRIPTOR.fields_by_name.keys()
     for key in config:
-        assert key in key_list, "key:{} not in {}".format(key, field_name)
+        assert key in key_list, f"key:{key} not in {field_name}"
 
 
 class DistributedJobInfo:
@@ -158,7 +158,7 @@ def __init__(self):
     def __setattr__(self, key, value):
         if self.__lock_attr and not hasattr(self, key):
             raise TypeError(
-                "%s is not a attribute of %s" % (key, self.__class__.__name__)
+                f"{key} is not a attribute of {self.__class__.__name__}"
             )
         object.__setattr__(self, key, value)
 
@@ -2477,7 +2477,7 @@ def __repr__(self):
 
         length = max_k + max_v + spacing
 
-        h1_format = "    " + "|{{:^{}s}}|\n".format(length)
+        h1_format = "    " + f"|{{:^{length}s}}|\n"
         h2_format = "    " + "|{{:>{}s}}{}{{:^{}s}}|\n".format(
             max_k, " " * spacing, max_v
         )
@@ -2505,7 +2505,7 @@ def __repr__(self):
                         if getattr(self.strategy, f.name):
                             draws += border + "\n"
                             draws += h1_format.format(
-                                "{}=True <-> {}_configs".format(f.name, f.name)
+                                f"{f.name}=True <-> {f.name}_configs"
                             )
                             draws += line + "\n"
                             my_configs = getattr(
diff --git a/python/paddle/distributed/fleet/base/orthogonal_strategy.py b/python/paddle/distributed/fleet/base/orthogonal_strategy.py
index 9da61af0734734..e226b0de2d256d 100644
--- a/python/paddle/distributed/fleet/base/orthogonal_strategy.py
+++ b/python/paddle/distributed/fleet/base/orthogonal_strategy.py
@@ -94,7 +94,7 @@ def strategy_group(self, name):
         """
         assert (
             name in self._list_of_strategy_name
-        ), "Strategy group {} is not created.".format(name)
+        ), f"Strategy group {name} is not created."
         return self._name_to_group_dict[name]
 
     def fused_strategy_group(self, name):
@@ -109,7 +109,7 @@ def fused_strategy_group(self, name):
         """
         assert (
             name in self._name_to_fused_group_dict
-        ), "Fused strategy group {} is not created.".format(name)
+        ), f"Fused strategy group {name} is not created."
         return self._name_to_fused_group_dict[name]
 
     def rank_in_strategy(self, name):
@@ -124,7 +124,7 @@ def rank_in_strategy(self, name):
         """
         assert (
             name in self._list_of_strategy_name
-        ), "Strategy group {} is not created.".format(name)
+        ), f"Strategy group {name} is not created."
         return self._name_to_group_dict[name].group.rank
 
     def _check_valid_strategy(self):
diff --git a/python/paddle/distributed/fleet/base/role_maker.py b/python/paddle/distributed/fleet/base/role_maker.py
index c2f093e209b017..cdfa58330f66f6 100755
--- a/python/paddle/distributed/fleet/base/role_maker.py
+++ b/python/paddle/distributed/fleet/base/role_maker.py
@@ -185,7 +185,7 @@ def init(rank, nodes, role):
 
     def _init_http(self, ip, port, prefix, start_http_server, http_server_d):
         def __start_kv_server(http_server_d, size_d):
-            print("start http_server: {}, {}".format(port, size_d))
+            print(f"start http_server: {port}, {size_d}")
             from paddle.distributed.fleet.utils.http_server import KVServer
 
             http_server = KVServer(port, size_d)
@@ -203,7 +203,7 @@ def init_kv_server(http_server_d):
             size_d = {
                 worker_key: self._worker_num,
             }
-            print("worker_key:{}, size: {}".format(worker_key, size_d))
+            print(f"worker_key:{worker_key}, size: {size_d}")
 
             http_server_d["running"] = True
             # child process for http server
diff --git a/python/paddle/distributed/fleet/base/strategy_group.py b/python/paddle/distributed/fleet/base/strategy_group.py
index 9e4ae744d3352b..96c56d80879262 100644
--- a/python/paddle/distributed/fleet/base/strategy_group.py
+++ b/python/paddle/distributed/fleet/base/strategy_group.py
@@ -103,7 +103,7 @@ def __init__(self, list_of_ranks):
         super().__init__(list_of_ranks)
         assert not isinstance(
             self.group, list
-        ), "Rank {} belongs to multi dp groups".format(self._rank)
+        ), f"Rank {self._rank} belongs to multi dp groups"
 
 
 class MPGroup(StrategyGroupBase):
@@ -122,7 +122,7 @@ def __init__(self, list_of_ranks):
         super().__init__(list_of_ranks)
         assert not isinstance(
             self.group, list
-        ), "Rank {} belongs to multi mp groups".format(self._rank)
+        ), f"Rank {self._rank} belongs to multi mp groups"
 
 
 class ShardingGroup(StrategyGroupBase):
@@ -141,7 +141,7 @@ def __init__(self, list_of_ranks):
         super().__init__(list_of_ranks)
         assert not isinstance(
             self.group, list
-        ), "Rank {} belongs to multi sharding groups".format(self._rank)
+        ), f"Rank {self._rank} belongs to multi sharding groups"
 
 
 class PPGroup(StrategyGroupBase):
@@ -160,7 +160,7 @@ def __init__(self, list_of_ranks):
         super().__init__(list_of_ranks)
         assert not isinstance(
             self.group, list
-        ), "Rank {} belongs to multi pp groups".format(self._rank)
+        ), f"Rank {self._rank} belongs to multi pp groups"
 
         self._send_next_group = None
         self._send_prev_group = None
diff --git a/python/paddle/distributed/fleet/base/util_factory.py b/python/paddle/distributed/fleet/base/util_factory.py
index 13bda135804c26..47776655f3615a 100755
--- a/python/paddle/distributed/fleet/base/util_factory.py
+++ b/python/paddle/distributed/fleet/base/util_factory.py
@@ -383,7 +383,7 @@ def _proto_check(self, config):
         ]
         pruned_vars = OrderedDict(pruned_vars)
         pruned_vars_name = list(pruned_vars)
-        print("persistable vars in pruned program: {}".format(pruned_vars_name))
+        print(f"persistable vars in pruned program: {pruned_vars_name}")
 
         # feed and fetch op is added in pruned program when pruning, not need to be found in train program
         feed_fetch_type_list = [
@@ -688,5 +688,5 @@ def check_not_expected_ops(prog, not_expected_op_types):
                 )
             for i, v in enumerate(fetch_list):
                 print("fetch_targets name: %s" % v.name)
-                print("fetch_targets: {}".format(results[i]))
+                print(f"fetch_targets: {results[i]}")
             return results
diff --git a/python/paddle/distributed/fleet/cloud_utils.py b/python/paddle/distributed/fleet/cloud_utils.py
index 832261c9cdf530..75df0fae32d1b0 100644
--- a/python/paddle/distributed/fleet/cloud_utils.py
+++ b/python/paddle/distributed/fleet/cloud_utils.py
@@ -65,9 +65,7 @@ def get_cloud_cluster(
                     paddle_ports_num >= len(devices_per_proc)
                     and paddle_port != args_port
                 ):
-                    logger.warning(
-                        "Use Cloud specified port:{}.".format(paddle_port)
-                    )
+                    logger.warning(f"Use Cloud specified port:{paddle_port}.")
                     started_port = paddle_port
 
             except Exception as e:
diff --git a/python/paddle/distributed/fleet/elastic/collective.py b/python/paddle/distributed/fleet/elastic/collective.py
index 499da820672bc1..171ab773091c3c 100644
--- a/python/paddle/distributed/fleet/elastic/collective.py
+++ b/python/paddle/distributed/fleet/elastic/collective.py
@@ -49,7 +49,7 @@ def launch(self):
         )
 
         for idx, proc in enumerate(self.procs):
-            logger.info("launch proc_id:{} idx:{}".format(proc.proc.pid, idx))
+            logger.info(f"launch proc_id:{proc.proc.pid} idx:{idx}")
 
     def stop(self):
         logger.info("collective lauchner stop ...")
diff --git a/python/paddle/distributed/fleet/elastic/manager.py b/python/paddle/distributed/fleet/elastic/manager.py
index e16e13aa89edb6..33c25332f36d30 100644
--- a/python/paddle/distributed/fleet/elastic/manager.py
+++ b/python/paddle/distributed/fleet/elastic/manager.py
@@ -64,9 +64,7 @@ def _terminate_procs(self):
                     os.killpg(os.getpgid(p.proc.pid), signal.SIGTERM)
                     if p.log_fn:
                         p.log_fn.close()
-                    logger.info(
-                        "terminate process group gid:{}".format(p.proc.pid)
-                    )
+                    logger.info(f"terminate process group gid:{p.proc.pid}")
 
             time.sleep(1)
         for p in self.procs:
@@ -74,7 +72,7 @@ def _terminate_procs(self):
                 p.proc.terminate()
                 if p.log_fn:
                     p.log_fn.close()
-                logger.info("terminate process id:{}".format(p.proc.pid))
+                logger.info(f"terminate process id:{p.proc.pid}")
 
         for step in range(0, 50):
             alive = False
@@ -198,7 +196,7 @@ def __init__(self, args, etcd_client):
                 os.getenv('PADDLE_ELASTIC_ETCD_SERVICE_PORT'),
             )
 
-        logger.debug('init with server {} host {}'.format(server, host))
+        logger.debug(f'init with server {server} host {host}')
 
         self.hosts = []
         self.stopped = False
@@ -300,9 +298,7 @@ def lease_heartbeat():
         # endpoints handle DISTRIBUTED_TRAINER_ENDPOINTS and PADDLE_TRAINERS
         self.etcd.put(
             self.endpoints_path,
-            '{}|{}'.format(self.dist_endpoints, self.trainers).encode(
-                'latin-1'
-            ),
+            f'{self.dist_endpoints}|{self.trainers}'.encode('latin-1'),
         )
 
         def endpoints_call_back(event):
@@ -316,7 +312,7 @@ def endpoints_call_back(event):
                     self.dist_endpoints
                 )
             )
-            logger.info("set PADDLE_TRAINERS {} ".format(self.trainers))
+            logger.info(f"set PADDLE_TRAINERS {self.trainers} ")
 
         endpoints_watch = self.etcd.add_watch_callback(
             self.endpoints_path, endpoints_call_back
@@ -345,7 +341,7 @@ def _host_to_endpoints(
         return dist_endpoints
 
     def exit(self, completed=False):
-        logger.info('manager exist completed {}'.format(completed))
+        logger.info(f'manager exist completed {completed}')
 
         if self.launcher:
             self.launcher.stop()
@@ -463,7 +459,7 @@ def _match(self, host_list: list = None):
     def _update_endpoint(self, endpoints, hosts):
         self.etcd.put(
             self.endpoints_path,
-            '{}|{}'.format(endpoints, hosts).encode('latin-1'),
+            f'{endpoints}|{hosts}'.encode('latin-1'),
         )
 
     def _update_fault_tolrance(self):
@@ -479,7 +475,7 @@ def _update_fault_tolrance(self):
                     self.dist_endpoints
                 )
             )
-            logger.info("update env PADDLE_TRAINERS {} ".format(self.trainers))
+            logger.info(f"update env PADDLE_TRAINERS {self.trainers} ")
             return
 
         # fault tolerance
@@ -490,7 +486,7 @@ def _update_fault_tolrance(self):
             self.hosts[idx] = self.hosts[rank]
             self.hosts[rank] = self.curr_host
         else:
-            os.environ['PADDLE_TRAINER_ID'] = '{}'.format(idx)
+            os.environ['PADDLE_TRAINER_ID'] = f'{idx}'
         hosts = ','.join([host_port.split(":")[0] for host_port in self.hosts])
         self.args.ips = hosts
         os.environ['PADDLE_TRAINERS'] = hosts
@@ -591,12 +587,10 @@ def wait(self):
         idx = 1
         while not self.stopped:
             if self._match():
-                logger.info('ready with hosts {}'.format(self.hosts))
+                logger.info(f'ready with hosts {self.hosts}')
                 self._update_hosts()
                 return
-            logger.info(
-                'not ready for np {} with hosts {}'.format(self.np, self.hosts)
-            )
+            logger.info(f'not ready for np {self.np} with hosts {self.hosts}')
             idx += 1
             time.sleep(2)
         return
@@ -618,7 +612,7 @@ def watch(self):
             logger.debug(f"launcher.watch():{ret}")
 
             if ret is not None:  # self terminated
-                logger.info('job exit with code {}'.format(ret))
+                logger.info(f'job exit with code {ret}')
                 if ret == ELASTIC_AUTO_PARALLEL_EXIT_CODE:
                     logger.info('job re-launch for auto parallel')
                     self.launcher.stop()
diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py
index 32a36783a71b9c..cb8f19c81d6ae2 100755
--- a/python/paddle/distributed/fleet/launch.py
+++ b/python/paddle/distributed/fleet/launch.py
@@ -89,7 +89,7 @@
 def _print_arguments(args):
     print("-----------  Configuration Arguments -----------")
     for arg, value in sorted(vars(args).items()):
-        print("%s: %s" % (arg, value))
+        print(f"{arg}: {value}")
     print("------------------------------------------------")
 
 
@@ -290,7 +290,7 @@ def get_cluster_from_args(args, device_mode, devices_per_proc):
 
     assert (
         node_ip in node_ips
-    ), "Can't find your local ip {%s} in node_ips: {%s}" % (node_ip, node_ips)
+    ), f"Can't find your local ip {{{node_ip}}} in node_ips: {{{node_ips}}}"
     node_rank = node_ips.index(node_ip)
 
     logger.debug(
@@ -308,7 +308,7 @@ def get_cluster_from_args(args, device_mode, devices_per_proc):
         free_ports = find_free_ports(len(devices_per_proc))
         if free_ports is not None:
             free_ports = list(free_ports)
-            logger.info("find free ports:{}".format(free_ports))
+            logger.info(f"find free ports:{free_ports}")
     else:
         start_port = 6070
         if os.environ.get('FLAGS_START_PORT') is not None:
@@ -416,7 +416,7 @@ def get_cluster_info(args):
         cluster, pod = cloud_utils.get_cloud_cluster(
             args.ips, device_mode, devices_per_proc, start_port
         )
-        logger.debug("get cluster from cloud:{}".format(cluster))
+        logger.debug(f"get cluster from cloud:{cluster}")
     elif device_mode == DeviceMode.ASCEND_NPU:
         # for ascend
         cluster, pod = ascend_utils.get_cloud_cluster(
@@ -429,7 +429,7 @@ def get_cluster_info(args):
         cluster, pod = get_cluster_from_args(
             args, device_mode, devices_per_proc
         )
-        logger.debug("get cluster from args:{}".format(cluster))
+        logger.debug(f"get cluster from args:{cluster}")
     return cluster, pod
 
 
@@ -458,7 +458,7 @@ def launch_collective(args):
     )
 
     for idx, proc in enumerate(procs):
-        print("launch proc_id:{} idx:{}".format(proc.proc.pid, idx))
+        print(f"launch proc_id:{proc.proc.pid} idx:{idx}")
 
     while True:
         try:
@@ -466,7 +466,7 @@ def launch_collective(args):
 
             if not alive:
                 logger.info("Local processes completed.")
-                logger.debug("POD info:{}".format(pod))
+                logger.debug(f"POD info:{pod}")
                 break
 
             time.sleep(3)
diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py
index ef30b7af9bc863..ba066651921bbd 100755
--- a/python/paddle/distributed/fleet/launch_utils.py
+++ b/python/paddle/distributed/fleet/launch_utils.py
@@ -118,10 +118,10 @@ def world_device_ids(self):
     def pods_endpoints(self):
         r = []
         for pod in self.pods:
-            ep = "{}:{}".format(pod.addr, pod.port)
+            ep = f"{pod.addr}:{pod.port}"
             assert (
                 pod.port is not None and pod.addr is not None
-            ), "{} not a valid endpoint".format(ep)
+            ), f"{ep} not a valid endpoint"
             r.append(ep)
         return r
 
@@ -138,7 +138,7 @@ def __init__(self):
         self.endpoint = None
 
     def __str__(self):
-        return "{}".format(self.endpoint)
+        return f"{self.endpoint}"
 
     def __eq__(self, j):
         return self.endpint == j.endpoint
@@ -215,42 +215,34 @@ def __eq__(self, pod):
             or self.addr != pod.addr
             or self.port != pod.port
         ):
-            logger.debug("pod {} != {}".format(self, pod))
+            logger.debug(f"pod {self} != {pod}")
             return False
 
         if len(self.trainers) != len(pod.trainers):
-            logger.debug(
-                "trainers {} != {}".format(self.trainers, pod.trainers)
-            )
+            logger.debug(f"trainers {self.trainers} != {pod.trainers}")
             return False
 
         for i in range(len(self.trainers)):
             if self.trainers[i] != pod.trainers[i]:
-                logger.debug(
-                    "trainer {} != {}".format(self.trainers[i], pod.trainers[i])
-                )
+                logger.debug(f"trainer {self.trainers[i]} != {pod.trainers[i]}")
                 return False
 
         if len(self.servers) != len(pod.servers):
-            logger.debug("servers {} != {}".format(self.servers, pod.servers))
+            logger.debug(f"servers {self.servers} != {pod.servers}")
             return False
 
         for i in range(len(self.servers)):
             if self.servers[i] != pod.servers[i]:
-                logger.debug(
-                    "servers {} != {}".format(self.servers[i], pod.servers[i])
-                )
+                logger.debug(f"servers {self.servers[i]} != {pod.servers[i]}")
                 return False
 
         if len(self.workers) != len(pod.workers):
-            logger.debug("workers {} != {}".format(self.workers, pod.workers))
+            logger.debug(f"workers {self.workers} != {pod.workers}")
             return False
 
         for i in range(len(self.workers)):
             if self.workers[i] != pod.workers[i]:
-                logger.debug(
-                    "workers {} != {}".format(self.workers[i], pod.workers[i])
-                )
+                logger.debug(f"workers {self.workers[i]} != {pod.workers[i]}")
                 return False
 
         return True
@@ -267,9 +259,9 @@ def rank(self):
     def get_visible_accelerators(self):
         r = ""
         for g in self.accelerators:
-            r += "{},".format(g)
+            r += f"{g},"
 
-        assert r != "", "this pod {} can't see any accelerators".format(self)
+        assert r != "", f"this pod {self} can't see any accelerators"
 
         r = r[:-1]
         return r
@@ -343,7 +335,7 @@ def terminate_local_procs(procs):
                 os.killpg(os.getpgid(p.proc.pid), signal.SIGTERM)
                 if p.log_fn:
                     p.log_fn.close()
-                logger.info("terminate process group gid:{}".format(p.proc.pid))
+                logger.info(f"terminate process group gid:{p.proc.pid}")
 
         time.sleep(1)
 
@@ -352,7 +344,7 @@ def terminate_local_procs(procs):
             p.proc.terminate()
             if p.log_fn:
                 p.log_fn.close()
-            logger.debug("terminate process id:{}".format(p.proc.pid))
+            logger.debug(f"terminate process id:{p.proc.pid}")
 
     # wait all process terminiated
     time.sleep(3)
@@ -396,7 +388,7 @@ def add_arguments(argname, type, default, help, argparser, **kwargs):
         default=default,
         type=type,
         help=help + ' Default: %(default)s.',
-        **kwargs
+        **kwargs,
     )
 
 
@@ -453,7 +445,7 @@ def pretty_print_envs(envs, header=None):
     h_format = "    " + "|{{:>{}s}}{}{{:^{}s}}|\n".format(
         max_k, " " * spacing, max_v
     )
-    l_format = "    " + "|{{:>{}s}}{{}}{{:^{}s}}|\n".format(max_k, max_v)
+    l_format = "    " + f"|{{:>{max_k}s}}{{}}{{:^{max_v}s}}|\n"
     length = max_k + max_v + spacing
 
     border = "    +" + "".join(["="] * length) + "+"
@@ -479,7 +471,7 @@ def pretty_print_envs(envs, header=None):
 
     draws += border
 
-    _str = "\n{}\n".format(draws)
+    _str = f"\n{draws}\n"
     return _str
 
 
@@ -498,7 +490,7 @@ def __init__(self):
 
 def run_with_coverage(*args):
     global _run_with_coverage
-    assert len(args) <= 1, "len(args) {} should <= 1".format(len(args))
+    assert len(args) <= 1, f"len(args) {len(args)} should <= 1"
     if len(args) == 1:
         assert isinstance(args[0], bool)
         _run_with_coverage = args[0]
@@ -592,7 +584,7 @@ def start_local_trainers(
             + training_script_args
         )
 
-        logger.debug("start trainer proc{}  env:{}".format(cmd, current_env))
+        logger.debug(f"start trainer proc{cmd}  env:{current_env}")
 
         if idx == 0:
             logger.info(
@@ -610,9 +602,9 @@ def start_local_trainers(
         fn = None
         pre_fn = None if os.name == 'nt' else os.setsid
         if log_dir is not None:
-            os.system("mkdir -p {}".format(log_dir))
+            os.system(f"mkdir -p {log_dir}")
             if os.path.exists("%s/endpoints.log" % log_dir):
-                os.system("rm -f {}/endpoints.log".format(log_dir))
+                os.system(f"rm -f {log_dir}/endpoints.log")
             with open("%s/endpoints.log" % log_dir, "w") as f:
                 f.write("PADDLE_TRAINER_ENDPOINTS: \n")
                 f.write("\n".join(cluster.trainers_endpoints()))
@@ -752,11 +744,12 @@ def get_xpus(xpus):
             # therefore xpus=0,1,2,3
             xpu_visible_devices_list = xpu_visible_devices.split(',')
             for x in xpus.split(','):
-                assert (
-                    x in xpu_visible_devices_list
-                ), "Can't find " "your xpus %s in XPU_VISIBLE_DEVICES[%s]." % (
-                    x,
-                    xpu_visible_devices,
+                assert x in xpu_visible_devices_list, (
+                    "Can't find "
+                    "your xpus {} in XPU_VISIBLE_DEVICES[{}].".format(
+                        x,
+                        xpu_visible_devices,
+                    )
                 )
             res_xpus = [
                 xpu_visible_devices_list.index(x.strip())
@@ -821,11 +814,12 @@ def get_mlus(mlus):
             # therefore mlus=0,1,2,3
             mlu_visible_devices_list = mlu_visible_devices.split(',')
             for x in mlus.split(','):
-                assert (
-                    x in mlu_visible_devices_list
-                ), "Can't find " "your mlus %s in MLU_VISIBLE_DEVICES[%s]." % (
-                    x,
-                    mlu_visible_devices,
+                assert x in mlu_visible_devices_list, (
+                    "Can't find "
+                    "your mlus {} in MLU_VISIBLE_DEVICES[{}].".format(
+                        x,
+                        mlu_visible_devices,
+                    )
                 )
             res_mlus = [
                 mlu_visible_devices_list.index(x.strip())
@@ -1086,7 +1080,7 @@ def get_mapped_cluster_from_args_without_rank_mapping(args, device_mode):
 
     assert (
         node_ip in node_ips
-    ), "Can't find your local ip {%s} in node_ips: {%s}" % (node_ip, node_ips)
+    ), f"Can't find your local ip {{{node_ip}}} in node_ips: {{{node_ips}}}"
     node_rank = node_ips.index(node_ip)
 
     assert len(node_ranks) == len(
@@ -1220,7 +1214,7 @@ def get_mapped_cluster_from_args_with_rank_mapping(args, device_mode):
 
     assert (
         node_ip in node_ips
-    ), "Can't find your local ip {%s} in node_ips: {%s}" % (node_ip, node_ips)
+    ), f"Can't find your local ip {{{node_ip}}} in node_ips: {{{node_ips}}}"
     node_rank = node_ips.index(node_ip)
 
     assert (
@@ -1645,7 +1639,7 @@ def start_ps(self):
             for i in range(len(self.server_endpoints_ips)):
                 if ip == self.server_endpoints_ips[i]:
                     server = Trainer()
-                    server.endpoint = "%s:%s" % (
+                    server.endpoint = "{}:{}".format(
                         ip,
                         self.server_endpoints_port[i],
                     )
@@ -1655,7 +1649,7 @@ def start_ps(self):
             for j in range(len(self.worker_endpoints_ips)):
                 if ip == self.worker_endpoints_ips[j]:
                     worker = Trainer()
-                    worker.endpoint = "%s:%s" % (
+                    worker.endpoint = "{}:{}".format(
                         ip,
                         self.worker_endpoints_port[j],
                     )
@@ -1666,7 +1660,7 @@ def start_ps(self):
             for m in range(len(self.coordinator_endpoints_ips)):
                 if ip == self.coordinator_endpoints_ips[m]:
                     coordinator = Trainer()
-                    coordinator.endpoint = "%s:%s" % (
+                    coordinator.endpoint = "{}:{}".format(
                         ip,
                         self.coordinator_endpoints_port[m],
                     )
@@ -1678,7 +1672,7 @@ def start_ps(self):
             for k in range(len(self.heter_worker_endpoints_ips)):
                 if ip == self.heter_worker_endpoints_ips[k]:
                     heter_worker = Trainer()
-                    heter_worker.endpoint = "%s:%s" % (
+                    heter_worker.endpoint = "{}:{}".format(
                         ip,
                         self.heter_worker_endpoints_port[k],
                     )
@@ -1827,7 +1821,7 @@ def start_pod_server(self, args, pod):
                 )
 
             if args.log_dir is not None:
-                os.system("mkdir -p {}".format(args.log_dir))
+                os.system(f"mkdir -p {args.log_dir}")
                 fn = open("%s/serverlog.%d" % (args.log_dir, idx), "w")
                 self.log_fns["server"].append(fn)
                 proc = subprocess.Popen(
@@ -1935,7 +1929,7 @@ def start_pod_worker(self, args, pod):
                 )
 
             if args.log_dir is not None:
-                os.system("mkdir -p {}".format(args.log_dir))
+                os.system(f"mkdir -p {args.log_dir}")
                 fn = open("%s/workerlog.%d" % (args.log_dir, idx), "w")
                 self.log_fns["worker"].append(fn)
                 proc = subprocess.Popen(
@@ -2003,7 +1997,7 @@ def start_pod_coordinator(self, args, pod):
                 )
 
             if args.log_dir is not None:
-                os.system("mkdir -p {}".format(args.log_dir))
+                os.system(f"mkdir -p {args.log_dir}")
                 fn = open("%s/coordinator.%d" % (args.log_dir, idx), "w")
                 self.log_fns["coordinator"].append(fn)
                 proc = subprocess.Popen(
@@ -2094,7 +2088,7 @@ def start_pod_heter_worker(self, args, pod):
                 )
 
             if args.log_dir is not None:
-                os.system("mkdir -p {}".format(args.log_dir))
+                os.system(f"mkdir -p {args.log_dir}")
                 fn = open("%s/heterlog.%d" % (args.log_dir, idx), "w")
                 self.log_fns["heter_worker"].append(fn)
                 proc = subprocess.Popen(
diff --git a/python/paddle/distributed/fleet/layers/mpu/mp_ops.py b/python/paddle/distributed/fleet/layers/mpu/mp_ops.py
index fade4aa61ce84c..c71650600de05c 100644
--- a/python/paddle/distributed/fleet/layers/mpu/mp_ops.py
+++ b/python/paddle/distributed/fleet/layers/mpu/mp_ops.py
@@ -221,7 +221,7 @@ def _mp_allreduce(
 
     if in_dygraph_mode():
         group = collective._get_default_group() if group is None else group
-        assert op == ReduceOp.SUM, "Unknown parameter: {}.".format(op)
+        assert op == ReduceOp.SUM, f"Unknown parameter: {op}."
 
         from paddle.autograd import PyLayer
 
@@ -350,7 +350,7 @@ def forward(self, input):
         return out
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self.name) if self.name else ''
+        name_str = f', name={self.name}' if self.name else ''
         return 'in_features={}, out_features={}, dtype={}{}'.format(
             self.weight.shape[0], self.weight.shape[1], self._dtype, name_str
         )
diff --git a/python/paddle/distributed/fleet/layers/mpu/random.py b/python/paddle/distributed/fleet/layers/mpu/random.py
index c07dd654edae6c..b5b2010ba97a59 100644
--- a/python/paddle/distributed/fleet/layers/mpu/random.py
+++ b/python/paddle/distributed/fleet/layers/mpu/random.py
@@ -48,10 +48,10 @@ def reset(self):
 
     def add(self, name, seed):
         if seed in self.seeds_:
-            raise ValueError('seed {} already exists'.format(seed))
+            raise ValueError(f'seed {seed} already exists')
         self.seeds_.add(seed)
         if name in self.states_:
-            raise ValueError('state {} already exists'.format(name))
+            raise ValueError(f'state {name} already exists')
         orig_rng_state = paddle.get_cuda_rng_state()
         paddle.seed(seed)
         self.states_[name] = paddle.get_cuda_rng_state()
@@ -69,7 +69,7 @@ def set_states_tracker(self, states):
     @contextlib.contextmanager
     def rng_state(self, name=MODEL_PARALLEL_RNG):
         if name not in self.states_:
-            raise ValueError('state {} does not exist'.format(name))
+            raise ValueError(f'state {name} does not exist')
         orig_cuda_rng_state = paddle.get_cuda_rng_state()
         paddle.set_cuda_rng_state(self.states_[name])
         try:
diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py
index b4f8019067590f..e5a1e1b26014ac 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py
@@ -64,7 +64,7 @@ def _endpoint_to_world_rank_id(self, endpoint):
         world_endpoints = fleet.worker_endpoints()
         assert (
             endpoint in world_endpoints
-        ), "endpoint (%s) not in worker_endpoints (%s) " % (
+        ), "endpoint ({}) not in worker_endpoints ({}) ".format(
             endpoint,
             fleet.world_device_ids(),
         )
diff --git a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py
index b67ea084a27ef9..d973541c8660fa 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py
@@ -194,7 +194,7 @@ def apply(self, op):
         self.op = op
         assert (
             self.op.type == self.parser_name
-        ), "op [%s] != parser_name[%s]" % (self.op.type, self.parser_name)
+        ), f"op [{self.op.type}] != parser_name[{self.parser_name}]"
         # print("begin to parse op %s" % (self.parser_name))
         geop_list, index_list = self._apply()
         self.update_output(geop_list, index_list)
@@ -1719,7 +1719,7 @@ def _apply(self):
         dtype = self.op.attr("dtype")
         assert max_v > min_v, (
             "assert max_v > min_v, but received "
-            + "as max_v={}, min_v={} ".format(max_v, min_v)
+            + f"as max_v={max_v}, min_v={min_v} "
         )
 
         tensor1 = self._create_ge_tensor([len(shape)], 2, shape)
diff --git a/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py
index e7072344455572..2c9f22986d95a1 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/raw_program_optimizer.py
@@ -383,7 +383,7 @@ def _allreduce_fusion_program(self):
             # insert coalesce tensor
             fused_var = block.create_var(
                 name=unique_name.generate(
-                    'FusedOutput_{}'.format(grad_segment[0].name)
+                    f'FusedOutput_{grad_segment[0].name}'
                 ),
                 dtype=grad_segment[0].dtype,
                 persistable=False,
diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py
index e27d14b57b3025..174727de043321 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/utils.py
@@ -465,9 +465,7 @@ def insert_coalesce_tensor(
                 continue
 
             fused_var = block.create_var(
-                name=unique_name.generate(
-                    'Fused{}_{}'.format(prefix, group[0].name)
-                ),
+                name=unique_name.generate(f'Fused{prefix}_{group[0].name}'),
                 dtype=group[0].dtype,
                 persistable=False,
                 stop_gradient=True,
@@ -830,7 +828,7 @@ def get_grad_device(grad_name, shard):
 
     assert (
         base_name in shard.global_param2device
-    ), "[{}] should be a param variable.".format(base_name)
+    ), f"[{base_name}] should be a param variable."
 
     return shard.global_param2device[base_name]
 
@@ -947,14 +945,14 @@ def comm_analyse(main_program):
     gap = 1
 
     for k, v in broadcast_vars.items():
-        print("broadcast: {}: {} KB".format(k, v))
+        print(f"broadcast: {k}: {v} KB")
         if int(v / gap) in varsize_count:
             varsize_count[int(v / gap)] += 1
         else:
             varsize_count[int(v / gap)] = 1
 
     for k, v in reduce_vars.items():
-        print("allreduce: {}: {} KB".format(k, v))
+        print(f"allreduce: {k}: {v} KB")
         if int(v / gap) in varsize_count:
             varsize_count[int(v / gap)] += 1
         else:
@@ -963,10 +961,8 @@ def comm_analyse(main_program):
     with open("nccl_size.txt", 'w') as f:
         sorted_varsize = sorted(varsize_count.items(), key=lambda x: x[0])
         for varsize, count in sorted_varsize:
-            print("NCCL size {}~{} KB: {}".format(varsize, varsize + 1, count))
-            f.write(
-                "NCCL size {}~{} KB: {}\n".format(varsize, varsize + 1, count)
-            )
+            print(f"NCCL size {varsize}~{varsize + 1} KB: {count}")
+            f.write(f"NCCL size {varsize}~{varsize + 1} KB: {count}\n")
 
 
 def add_sync_comm(program, sharding_ring_id):
diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
index 264c48870e84f3..e6a559208657a9 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
@@ -456,7 +456,7 @@ def _insert_allreduce_for_pp(self, params_grads):
 
         if self.pp_allreduce_in_optimize:
             logger.info(
-                "Pipeline Persistable grad is {}".format(accumulated_grad_names)
+                f"Pipeline Persistable grad is {accumulated_grad_names}"
             )
             # FIXME(wangxi): accumulated_grad get from pipeline is not
             #  include sharding's param@BroadCast grad when
@@ -472,7 +472,7 @@ def _insert_allreduce_for_pp(self, params_grads):
                 rank=self.sharding_rank,
             )
 
-            logger.info("PP-Sharding grad is {}".format(accumulated_grad_names))
+            logger.info(f"PP-Sharding grad is {accumulated_grad_names}")
             first_optimize_op_index += len(main_block.ops) - len_of_ops
             len_of_ops = len(main_block.ops)
 
@@ -488,9 +488,7 @@ def _insert_allreduce_for_pp(self, params_grads):
                 rank=self.dp_rank,
                 strategy=strategy,
             )
-            logger.info(
-                "Optimizer grad in this rank {}".format(accumulated_grad_names)
-            )
+            logger.info(f"Optimizer grad in this rank {accumulated_grad_names}")
             first_optimize_op_index += len(main_block.ops) - len_of_ops
             len_of_ops = len(main_block.ops)
 
@@ -507,9 +505,7 @@ def _insert_allreduce_for_pp(self, params_grads):
                 rank=self.dp_rank,
                 strategy=None if optimize_cast else strategy,
             )
-            logger.info(
-                "Optimizer param in this rank {}".format(optimizer_param)
-            )
+            logger.info(f"Optimizer param in this rank {optimizer_param}")
             if not strategy.fuse_grad_merge and not optimize_cast:
                 assert len(accumulated_grad_names) == len(optimizer_param)
         elif self.hybrid_dp and self.hybrid_dp_mode == "pp_hybrid_dp":
@@ -756,7 +752,7 @@ def _init_npu_pipeline_comm(self, startup_block):
             pair_key = pair[0] * 1000 + pair[1]
             ring_id = self.pp_ring_map[pair_key]
             max_ring_id = max(max_ring_id, ring_id)
-            logger.info("pp pair:{}, ring_id: {}".format(pair, ring_id))
+            logger.info(f"pp pair:{pair}, ring_id: {ring_id}")
 
             if self.pp_rank in pair:
                 my_pair.append(pair)
@@ -786,18 +782,14 @@ def _init_npu_pipeline_comm(self, startup_block):
         ring_id = self.pp_ring_map[pair[0] * 1000 + pair[1]]
         self._init_pair_comm(pair, ring_id)
         my_pair.remove(pair)
-        logger.info(
-            "pair0(even->odd): pp pair:{}, ring_id: {}".format(pair, ring_id)
-        )
+        logger.info(f"pair0(even->odd): pp pair:{pair}, ring_id: {ring_id}")
 
         # 2. even recv from next, odd send to prev, 1->0, 3->2
         pair = recv_from_next_pair if even else send_to_prev_pair
         ring_id = self.pp_ring_map[pair[0] * 1000 + pair[1]]
         self._init_pair_comm(pair, ring_id)
         my_pair.remove(pair)
-        logger.info(
-            "pair1(even<-odd): pp pair:{}, ring_id: {}".format(pair, ring_id)
-        )
+        logger.info(f"pair1(even<-odd): pp pair:{pair}, ring_id: {ring_id}")
 
         # if pp_degree is 2, only need pair(0->1, 1->0)
         if self.pp_degree > 2:
@@ -855,7 +847,7 @@ def _init_pipeline_comm(self, startup_block):
         for pair in self.pipeline_pair:
             pair_key = pair[0] * 1000 + pair[1]
             ring_id = self.pp_ring_map[pair_key]
-            logger.info("pp pair:{}, ring_id: {}".format(pair, ring_id))
+            logger.info(f"pp pair:{pair}, ring_id: {ring_id}")
             if self.pp_rank in pair:
                 self._init_pair_comm(pair, ring_id)
 
@@ -1051,10 +1043,10 @@ def _split_program(self, block):
         if self._sharding_segment_strategy == "segment_anchors":
             assert (
                 len(self._forward_remain_anchors) == 0
-            ), "remain anchors {}".format(self._forward_remain_anchors)
+            ), f"remain anchors {self._forward_remain_anchors}"
             assert (
                 len(self._backward_remain_anchors) == 0
-            ), "remain anchors {}".format(self._backward_remain_anchors)
+            ), f"remain anchors {self._backward_remain_anchors}"
 
         if self._verbose:
             for varname in sorted(
@@ -1066,7 +1058,7 @@ def _split_program(self, block):
                     )
                 )
             for idx_ in range(len(self._segments)):
-                logger.info("segment [{}] :".format(idx_))
+                logger.info(f"segment [{idx_}] :")
                 logger.info(
                     "start op: [{}]  [{}]".format(
                         block.ops[self._segments[idx_]._start_idx].desc.type(),
@@ -1732,41 +1724,39 @@ def _build_groups(self):
         # NOTE (JZ-LIANG) when use global ring for calc global norm and dp_degree > 1, the allreduce result should be devided by dp_degree
         self.global_ring_id = 3
 
-        logger.info("global word size: {}".format(self.global_word_size))
-        logger.info("global rank: {}".format(self.global_rank))
-        logger.info("global endpoints: {}".format(self.global_endpoints))
-        logger.info("global ring id: {}".format(self.global_ring_id))
+        logger.info(f"global word size: {self.global_word_size}")
+        logger.info(f"global rank: {self.global_rank}")
+        logger.info(f"global endpoints: {self.global_endpoints}")
+        logger.info(f"global ring id: {self.global_ring_id}")
         logger.info("#####" * 6)
 
-        logger.info("mp group size: {}".format(self.mp_degree))
-        logger.info("mp rank: {}".format(self.mp_rank))
-        logger.info("mp group id: {}".format(self.mp_group_id))
-        logger.info("mp group endpoints: {}".format(self.mp_group_endpoints))
-        logger.info("mp ring id: {}".format(self.mp_ring_id))
+        logger.info(f"mp group size: {self.mp_degree}")
+        logger.info(f"mp rank: {self.mp_rank}")
+        logger.info(f"mp group id: {self.mp_group_id}")
+        logger.info(f"mp group endpoints: {self.mp_group_endpoints}")
+        logger.info(f"mp ring id: {self.mp_ring_id}")
         logger.info("#####" * 6)
 
-        logger.info("sharding group size: {}".format(self.sharding_degree))
-        logger.info("sharding rank: {}".format(self.sharding_rank))
-        logger.info("sharding group id: {}".format(self.sharding_group_id))
+        logger.info(f"sharding group size: {self.sharding_degree}")
+        logger.info(f"sharding rank: {self.sharding_rank}")
+        logger.info(f"sharding group id: {self.sharding_group_id}")
         logger.info(
-            "sharding group endpoints: {}".format(self.sharding_group_endpoints)
+            f"sharding group endpoints: {self.sharding_group_endpoints}"
         )
-        logger.info("sharding ring id: {}".format(self.sharding_ring_id))
+        logger.info(f"sharding ring id: {self.sharding_ring_id}")
         logger.info("#####" * 6)
 
-        logger.info("pp group size: {}".format(self.pp_degree))
-        logger.info("pp rank: {}".format(self.pp_rank))
-        logger.info("pp group id: {}".format(self.pp_group_id))
-        logger.info("pp group endpoints: {}".format(self.pp_group_endpoints))
-        logger.info("pp ring id: {}".format(self.pp_ring_id))
+        logger.info(f"pp group size: {self.pp_degree}")
+        logger.info(f"pp rank: {self.pp_rank}")
+        logger.info(f"pp group id: {self.pp_group_id}")
+        logger.info(f"pp group endpoints: {self.pp_group_endpoints}")
+        logger.info(f"pp ring id: {self.pp_ring_id}")
         logger.info("#####" * 6)
 
-        logger.info("pure dp group size: {}".format(self.dp_degree))
-        logger.info("pure dp rank: {}".format(self.dp_rank))
-        logger.info(
-            "pure dp group endpoints: {}".format(self.dp_group_endpoints)
-        )
-        logger.info("pure dp ring id: {}".format(self.dp_ring_id))
+        logger.info(f"pure dp group size: {self.dp_degree}")
+        logger.info(f"pure dp rank: {self.dp_rank}")
+        logger.info(f"pure dp group endpoints: {self.dp_group_endpoints}")
+        logger.info(f"pure dp ring id: {self.dp_ring_id}")
         logger.info("#####" * 6)
 
         return
diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py
index 927da810c591ec..30485d8e633359 100755
--- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py
+++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py
@@ -81,7 +81,7 @@ def __init__(
         forward_func=None,
         shared_weight_attr='weight',
         *inputs,
-        **kwargs
+        **kwargs,
     ):
         super().__init__(layer_func, *inputs, **kwargs)
         self.layer_name = key
@@ -581,26 +581,26 @@ def _print_segmentation_for_debug(self):
             )
 
             for index, layer in enumerate(self._layers_desc[start:end]):
-                logger.info("{}: {}".format(index + start, str(layer)))
+                logger.info(f"{index + start}: {str(layer)}")
 
         if self._num_virtual_pipeline_stages > 1:
             for stage in range(self._num_stages):
                 stage_to_virtual_stage_info = (
-                    "stage {} contains virtual stages: ".format(stage)
+                    f"stage {stage} contains virtual stages: "
                 )
                 for i in range(
                     stage,
                     self._total_stages_with_virtual_stages,
                     self._num_stages,
                 ):
-                    stage_to_virtual_stage_info += " {},".format(i)
+                    stage_to_virtual_stage_info += f" {i},"
                 logger.info(stage_to_virtual_stage_info)
 
         if self._loss_fn:
             try:
-                logger.info("loss: {}".format(self._loss_fn.__name__))
+                logger.info(f"loss: {self._loss_fn.__name__}")
             except AttributeError:
-                logger.info("loss: {}".format(self._loss_fn.__class__.__name__))
+                logger.info(f"loss: {self._loss_fn.__class__.__name__}")
 
     def _build_layer_with_interleave(self):
         for i in range(len(self._start_poss)):
@@ -715,7 +715,7 @@ def forward(self, input, chunk_id=None):
                     input = recompute_hybrid(
                         self.recompute_ctx,
                         self.forward_function(start_idx, end_idx),
-                        *input
+                        *input,
                     )
                 else:
                     input = self.forward_function(start_idx, end_idx)(*input)
@@ -746,17 +746,15 @@ def _offset_dirname(ckpt_dir, local_layer_idx, local_chunk_id=None):
                 pos_offset = self._start_poss[local_chunk_id]
             idx = local_layer_idx + pos_offset
             model_rank = self._topo.get_coord(self.global_rank).model
-            rank_message = "-tensor_" + "{:0>2d}".format(model_rank)
+            rank_message = "-tensor_" + f"{model_rank:0>2d}"
             virtual_pipeline_stage_message = ""
             if self._num_virtual_pipeline_stages > 1:
                 # add virtual pipeline info to the save path
                 assert local_chunk_id is not None
                 virtual_pipeline_stage_message = (
-                    "-virtual_pp_stage_{:0>2d}".format(local_chunk_id)
+                    f"-virtual_pp_stage_{local_chunk_id:0>2d}"
                 )
-            layer_save_path = os.path.join(
-                ckpt_dir, 'layer_{:0>2d}'.format(idx)
-            )
+            layer_save_path = os.path.join(ckpt_dir, f'layer_{idx:0>2d}')
             layer_save_path = (
                 layer_save_path
                 + virtual_pipeline_stage_message
@@ -784,9 +782,7 @@ def _save_model(run_functions, local_chunk_id=None):
         logger.info("save model state successfully...")
 
     def set_state_dir(self, path):
-        assert os.path.exists(
-            path
-        ), "{} not found, please check the path".format(path)
+        assert os.path.exists(path), f"{path} not found, please check the path"
 
         def _load_model(run_functions, local_chunk_id=None):
             for idx, layer in enumerate(run_functions):
@@ -799,15 +795,13 @@ def _load_model(run_functions, local_chunk_id=None):
                     assert local_chunk_id < len(self._start_poss)
                     pos_offset = self._start_poss[local_chunk_id]
                 layer_idx = idx + pos_offset
-                layer_save_path = os.path.join(
-                    path, 'layer_{0:0>2d}'.format(layer_idx)
-                )
+                layer_save_path = os.path.join(path, f'layer_{layer_idx:0>2d}')
                 if self._num_virtual_pipeline_stages > 1:
                     # add virtual pipeline info to the path
                     assert local_chunk_id is not None
                     layer_save_path = (
                         layer_save_path
-                        + "-virtual_pp_stage_{:0>2d}".format(local_chunk_id)
+                        + f"-virtual_pp_stage_{local_chunk_id:0>2d}"
                     )
                 model_files = glob.glob(
                     layer_save_path + "*model_states.pdparams"
diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py
index 7fc474da942a2c..f6b447db9193fe 100644
--- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py
+++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py
@@ -79,7 +79,7 @@ def get_tensor_bytes(tensor):
     elif tensor.dtype == paddle.int8:
         elem_size = 1
     else:
-        raise ValueError("unknown data type: {}".format(tensor.dtype))
+        raise ValueError(f"unknown data type: {tensor.dtype}")
     return tensor.numel() * elem_size
 
 
diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py
index 994ba97a57a0e5..d658fc1e82e369 100644
--- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py
+++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_stage3.py
@@ -231,7 +231,7 @@ def _clear_gradients(self):
         for param in trainable_params:
             assert hasattr(
                 param, "fw_storage"
-            ), "Find {} don't have fw_storage attribute.".format(param.name)
+            ), f"Find {param.name} don't have fw_storage attribute."
 
             param.fw_storage.clear_gradient(False)
             param.bw_storage._clear()
@@ -582,7 +582,7 @@ def _update_params(self):
         for param in trainable_params:
             assert hasattr(
                 param, "fw_storage"
-            ), "Find {} don't have fw_storage attribute".format(param.name)
+            ), f"Find {param.name} don't have fw_storage attribute"
 
             param.fw_storage = _VarBaseWrapper(param)
             assert param.fw_storage.grad is None
diff --git a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
index 1a09bb3fa92a23..2f13be31fa4935 100644
--- a/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
+++ b/python/paddle/distributed/fleet/meta_parallel/sharding/group_sharded_utils.py
@@ -201,7 +201,7 @@ def device_guard(dev_id=0, device="cpu"):
     if device == "cpu":
         paddle.set_device(device)
     elif device in ["gpu", "xpu", "npu"]:
-        paddle.set_device("{}:{}".format(device, dev_id))
+        paddle.set_device(f"{device}:{dev_id}")
     try:
         yield
     finally:
diff --git a/python/paddle/distributed/fleet/recompute/recompute.py b/python/paddle/distributed/fleet/recompute/recompute.py
index d46a7f7996edf4..b0b9885c33efdb 100755
--- a/python/paddle/distributed/fleet/recompute/recompute.py
+++ b/python/paddle/distributed/fleet/recompute/recompute.py
@@ -109,18 +109,14 @@ def forward(ctx, run_function, preserve_rng_state, *args, **kwargs):
         elif tracer._amp_level in (core.AmpLevel.O1, core.AmpLevel.O0):
             ctx.amp_level = 'O1'
         else:
-            raise ValueError(
-                "unsupported amp level: {}".format(tracer._amp_level)
-            )
+            raise ValueError(f"unsupported amp level: {tracer._amp_level}")
 
         if tracer._amp_dtype == 'float16':
             ctx.amp_dtype = 'float16'
         elif tracer._amp_dtype in ('bfloat16', 'float32'):
             ctx.amp_dtype = 'bfloat16'
         else:
-            raise ValueError(
-                "unsupported amp dtype: {}".format(tracer._amp_dtype)
-            )
+            raise ValueError(f"unsupported amp dtype: {tracer._amp_dtype}")
 
         ctx.amp_white_list, ctx.amp_black_list = tracer._get_amp_op_list()
 
@@ -499,6 +495,6 @@ def do_run(input):
             _run_func(begin, end, functions),
             *args,
             preserve_rng_state=preserve_rng_state,
-            **kwargs
+            **kwargs,
         )
     return _run_func(end + 1, len(functions) - 1, functions)(args)
diff --git a/python/paddle/distributed/fleet/recompute/recompute_hybrid.py b/python/paddle/distributed/fleet/recompute/recompute_hybrid.py
index 0b156486c7e5ad..c6acae878745bf 100644
--- a/python/paddle/distributed/fleet/recompute/recompute_hybrid.py
+++ b/python/paddle/distributed/fleet/recompute/recompute_hybrid.py
@@ -85,7 +85,7 @@ def forward(
         offload,
         partition,
         *args,
-        **kwargs
+        **kwargs,
     ):
 
         # store for recomputing
@@ -125,9 +125,7 @@ def forward(
         elif tracer._amp_level in (core.AmpLevel.O1, core.AmpLevel.O0):
             ctx.amp_level = 'O1'
         else:
-            raise ValueError(
-                "unsupported amp level: {}".format(tracer._amp_level)
-            )
+            raise ValueError(f"unsupported amp level: {tracer._amp_level}")
         ctx.amp_dtype = tracer._amp_dtype
         ctx.amp_white_list, ctx.amp_black_list = tracer._get_amp_op_list()
 
diff --git a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py
index c311eee14c0c8d..24df3203183f5c 100644
--- a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py
+++ b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py
@@ -572,9 +572,7 @@ def _save_sparse_params(self, executor, dirname, context, main_program):
                 slice_varnames = []
                 remote_varnames = []
                 for i in range(len(var_ctx.split_varnames())):
-                    slice_varnames.append(
-                        "{}.block{}".format(reshaped_varname, i)
-                    )
+                    slice_varnames.append(f"{reshaped_varname}.block{i}")
                     remote_varnames.append(reshaped_varname)
 
                 block.append_op(
diff --git a/python/paddle/distributed/fleet/runtime/the_one_ps.py b/python/paddle/distributed/fleet/runtime/the_one_ps.py
index 0563daa9d01138..b61bbaf3860fb9 100644
--- a/python/paddle/distributed/fleet/runtime/the_one_ps.py
+++ b/python/paddle/distributed/fleet/runtime/the_one_ps.py
@@ -163,9 +163,9 @@ def __init__(self):
     def to_string(self, indent):
         accessor_str = "{}accessor {{{}\n{}}}"
         attrs = ""
-        attrs += "accessor_class: \"{}\" ".format(self.accessor_class)
-        attrs += "fea_dim: {} ".format(self.feature_dim)
-        attrs += "embedx_dim: {} ".format(self.embedding_dim)
+        attrs += f"accessor_class: \"{self.accessor_class}\" "
+        attrs += f"fea_dim: {self.feature_dim} "
+        attrs += f"embedx_dim: {self.embedding_dim} "
         attrs += "\n"
         if self.optimizer is not None:
             attrs += self.optimizer.to_string(indent)
@@ -322,7 +322,7 @@ def parse_by_optimizer(
                 break
 
         if oop is None:
-            raise ValueError("can not find optimizer for {}".format(grad_name))
+            raise ValueError(f"can not find optimizer for {grad_name}")
 
         params = []
         dims = []
@@ -436,28 +436,28 @@ def parse_by_optimizer(
     def to_string(self, indent):
         accessor_str = "{}common {{{}\n{}}}"
         attrs = ""
-        attrs += "name: \"{}\" ".format(self.accessor_class)
+        attrs += f"name: \"{self.accessor_class}\" "
 
         if self.table_name:
-            attrs += "table_name: \"{}\" ".format(self.table_name)
+            attrs += f"table_name: \"{self.table_name}\" "
 
         if self.entry:
-            attrs += "entry: \"{}\" ".format(self.entry)
-        attrs += "trainer_num: {} ".format(self.trainer_num)
-        attrs += "sync: {} ".format(self.sync)
+            attrs += f"entry: \"{self.entry}\" "
+        attrs += f"trainer_num: {self.trainer_num} "
+        attrs += f"sync: {self.sync} "
         if self.table_num:
-            attrs += "table_num: {} ".format(self.table_num)
+            attrs += f"table_num: {self.table_num} "
         if self.table_dim:
-            attrs += "table_dim: {} ".format(self.table_dim)
+            attrs += f"table_dim: {self.table_dim} "
 
         for param in self.params:
-            attrs += "params: \"{}\" ".format(param)
+            attrs += f"params: \"{param}\" "
 
         for dim in self.dims:
-            attrs += "dims: {} ".format(dim)
+            attrs += f"dims: {dim} "
 
         for initializer in self.initializers:
-            attrs += "initializers: \"{}\" ".format(initializer)
+            attrs += f"initializers: \"{initializer}\" "
 
         attrs += "\n"
         return accessor_str.format(
@@ -476,10 +476,10 @@ def __init__(self):
     def to_string(self, indent):
         program_str = "{}tensor {{{}\n{}}}"
         attrs = ""
-        attrs += "feed_var_name: \"{}\" ".format(str(self.feed_var_name))
-        attrs += "fetch_var_name: \"{}\" ".format(str(self.fetch_var_name))
-        attrs += "startup_program_id: {} ".format(str(self.startup_program_id))
-        attrs += "main_program_id: {} ".format(str(self.main_program_id))
+        attrs += f"feed_var_name: \"{str(self.feed_var_name)}\" "
+        attrs += f"fetch_var_name: \"{str(self.fetch_var_name)}\" "
+        attrs += f"startup_program_id: {str(self.startup_program_id)} "
+        attrs += f"main_program_id: {str(self.main_program_id)} "
         attrs += "tensor_table_class: \"{}\" ".format(
             str(self.tensor_table_class)
         )
@@ -509,10 +509,10 @@ def to_string(self, indent):
         table_str = "{}downpour_table_param {{{}\n{}}}"
 
         attrs = ""
-        attrs += "table_id: {} ".format(self.id)
-        attrs += "table_class: \"{}\" ".format(self.table_class)
-        attrs += "shard_num: {} ".format(self.shard_num)
-        attrs += "type: {}".format(self.type)
+        attrs += f"table_id: {self.id} "
+        attrs += f"table_class: \"{self.table_class}\" "
+        attrs += f"shard_num: {self.shard_num} "
+        attrs += f"type: {self.type}"
         attrs += "\n"
         indent += 2
 
@@ -549,11 +549,11 @@ def to_string(self, indent):
         service_str = "{}service_param {{{}\n{}}}"
 
         attrs = ""
-        attrs += "server_class: \"{}\" ".format(self.server_class)
-        attrs += "client_class: \"{}\" ".format(self.client_class)
-        attrs += "service_class: \"{}\" ".format(self.service_class)
-        attrs += "start_server_port: {} ".format(self.start_server_port)
-        attrs += "server_thread_num: {} ".format(self.server_thread_num)
+        attrs += f"server_class: \"{self.server_class}\" "
+        attrs += f"client_class: \"{self.client_class}\" "
+        attrs += f"service_class: \"{self.service_class}\" "
+        attrs += f"start_server_port: {self.start_server_port} "
+        attrs += f"server_thread_num: {self.server_thread_num} "
 
         return service_str.format(
             conv_indent(indent), attrs, conv_indent(indent)
@@ -765,7 +765,7 @@ def sync_strategy_envs():
         debug = bool(int(os.getenv("PSERVER_DEBUG", "0")))
 
         if debug:
-            print("worker: \n{}".format(proto_txt))
+            print(f"worker: \n{proto_txt}")
 
         endpoints = self.compiled_strategy.get_ps_endpoints()
 
@@ -787,12 +787,12 @@ def sync_strategy_envs():
 
         debug = bool(int(os.getenv("PSERVER_DEBUG", "0")))
         if debug:
-            print("worker: \n{}".format(proto_txt))
+            print(f"worker: \n{proto_txt}")
             print("communicator send_ctx:")
             for key in send_ctx:
-                print("{}: {}".format(key, send_ctx[key]))
+                print(f"{key}: {send_ctx[key]}")
             for key in dense_map:
-                print("{}: {}".format(key, dense_map[key]))
+                print(f"{key}: {dense_map[key]}")
 
         kwargs = {}
         kwargs['need_global_step'] = "0"
@@ -1177,7 +1177,7 @@ def _init_server(self, dirname=None, var_names=None, **kwargs):
 
         debug = bool(int(os.getenv("PSERVER_DEBUG", "0")))
         if debug:
-            print("server: \n{}".format(proto_txt))
+            print(f"server: \n{proto_txt}")
 
         string_hosts = []
         for idx, ep in enumerate(endpoints):
diff --git a/python/paddle/distributed/fleet/utils/fs.py b/python/paddle/distributed/fleet/utils/fs.py
index b61abbbaa56867..994432e14e1cbc 100644
--- a/python/paddle/distributed/fleet/utils/fs.py
+++ b/python/paddle/distributed/fleet/utils/fs.py
@@ -174,7 +174,7 @@ def mkdirs(self, fs_path):
         assert not os.path.isfile(fs_path), "{} is already a file".format(
             fs_path
         )
-        os.system("mkdir -p {}".format(fs_path))
+        os.system(f"mkdir -p {fs_path}")
 
     def rename(self, fs_src_path, fs_dst_path):
         """
@@ -320,7 +320,7 @@ def touch(self, fs_path, exist_ok=True):
                 return
             raise FSFileExistsError
 
-        os.system("touch {}".format(fs_path))
+        os.system(f"touch {fs_path}")
 
     def mv(self, src_path, dst_path, overwrite=False, test_exists=False):
         """
@@ -461,7 +461,7 @@ def __init__(
 
         if configs:
             for k, v in configs.items():
-                config_command = '-D%s=%s' % (k, v)
+                config_command = f'-D{k}={v}'
                 self.pre_commands.append(config_command)
 
         self._time_out = time_out
@@ -472,7 +472,7 @@ def __init__(
         )
 
     def _run_cmd(self, cmd, redirect_stderr=False, retry_times=5):
-        exe_cmd = "{} -{}".format(self._base_cmd, cmd)
+        exe_cmd = f"{self._base_cmd} -{cmd}"
         ret = 0
         output = None
         retry_sleep_second = 3
@@ -552,7 +552,7 @@ def ls_dir(self, fs_path):
         return self._ls_dir(fs_path)
 
     def _ls_dir(self, fs_path):
-        cmd = "ls {}".format(fs_path)
+        cmd = f"ls {fs_path}"
         ret, lines = self._run_cmd(cmd)
 
         if ret != 0:
@@ -613,7 +613,7 @@ def is_dir(self, fs_path):
         return self._is_dir(fs_path)
 
     def _is_dir(self, fs_path):
-        cmd = "test -d {}".format(fs_path)
+        cmd = f"test -d {fs_path}"
         ret, lines = self._run_cmd(cmd, redirect_stderr=True, retry_times=1)
         if ret:
             # other error
@@ -683,7 +683,7 @@ def is_exist(self, fs_path):
                 client = HDFSClient(hadoop_home, configs)
                 ret = client.is_exist("hdfs:/test_hdfs_client")
         """
-        cmd = "test -e {} ".format(fs_path)
+        cmd = f"test -e {fs_path} "
         ret, out = self._run_cmd(cmd, redirect_stderr=True, retry_times=1)
         if ret != 0:
             return False
@@ -763,7 +763,7 @@ def get_local_files(path):
 
         local = LocalFS()
         if not local.is_exist(local_path):
-            raise FSFileNotExistsError("{} not exists".format(local_path))
+            raise FSFileNotExistsError(f"{local_path} not exists")
 
         all_files = get_local_files(local_path)
         if not all_files:
@@ -789,7 +789,7 @@ def get_local_files(path):
 
     @_handle_errors()
     def _try_upload(self, local_path, fs_path):
-        cmd = "put {} {}".format(local_path, fs_path)
+        cmd = f"put {local_path} {fs_path}"
         ret = 0
         try:
             ret, _ = self._run_cmd(cmd)
@@ -837,7 +837,7 @@ def __subprocess_download(local_path, datas):
                 self._try_download(data, local_path)
 
         if not self.is_exist(fs_path):
-            raise FSFileNotExistsError("{} not exits".format(fs_path))
+            raise FSFileNotExistsError(f"{fs_path} not exits")
         # download file
         if self.is_file(fs_path):
             return self._try_download(fs_path, local_path)
@@ -860,7 +860,7 @@ def __subprocess_download(local_path, datas):
 
     @_handle_errors()
     def _try_download(self, fs_path, local_path):
-        cmd = "get {} {}".format(fs_path, local_path)
+        cmd = f"get {fs_path} {local_path}"
         ret = 0
         try:
             ret, _ = self._run_cmd(cmd)
@@ -899,7 +899,7 @@ def mkdirs(self, fs_path):
 
         out_hdfs = False
 
-        cmd = "mkdir {} ".format(fs_path)
+        cmd = f"mkdir {fs_path} "
         ret, out = self._run_cmd(cmd, redirect_stderr=True)
         if ret != 0:
             for l in out:
@@ -910,7 +910,7 @@ def mkdirs(self, fs_path):
                 raise ExecuteError(cmd)
 
         if out_hdfs and not self.is_exist(fs_path):
-            cmd = "mkdir -p {}".format(fs_path)
+            cmd = f"mkdir -p {fs_path}"
             ret, _ = self._run_cmd(cmd)
             if ret != 0:
                 raise ExecuteError(cmd)
@@ -945,18 +945,16 @@ def mv(self, fs_src_path, fs_dst_path, overwrite=False, test_exists=True):
 
         if test_exists:
             if not self.is_exist(fs_src_path):
-                raise FSFileNotExistsError(
-                    "{} is not exists".format(fs_src_path)
-                )
+                raise FSFileNotExistsError(f"{fs_src_path} is not exists")
 
             if self.is_exist(fs_dst_path):
-                raise FSFileExistsError("{} exists already".format(fs_dst_path))
+                raise FSFileExistsError(f"{fs_dst_path} exists already")
 
         return self._try_mv(fs_src_path, fs_dst_path)
 
     @_handle_errors()
     def _try_mv(self, fs_src_path, fs_dst_path):
-        cmd = "mv {} {}".format(fs_src_path, fs_dst_path)
+        cmd = f"mv {fs_src_path} {fs_dst_path}"
         ret = 0
         try:
             ret, _ = self._run_cmd(cmd, retry_times=1)
@@ -968,13 +966,13 @@ def _try_mv(self, fs_src_path, fs_dst_path):
             raise e
 
     def _rmr(self, fs_path):
-        cmd = "rmr {}".format(fs_path)
+        cmd = f"rmr {fs_path}"
         ret, _ = self._run_cmd(cmd)
         if ret != 0:
             raise ExecuteError(cmd)
 
     def _rm(self, fs_path):
-        cmd = "rm {}".format(fs_path)
+        cmd = f"rm {fs_path}"
         ret, _ = self._run_cmd(cmd)
         if ret != 0:
             raise ExecuteError(cmd)
@@ -1044,7 +1042,7 @@ def touch(self, fs_path, exist_ok=True):
 
     @_handle_errors()
     def _touchz(self, fs_path):
-        cmd = "touchz {}".format(fs_path)
+        cmd = f"touchz {fs_path}"
         ret, _ = self._run_cmd(cmd)
         if ret != 0:
             raise ExecuteError(cmd)
@@ -1085,7 +1083,7 @@ def cat(self, fs_path=None):
 
     @_handle_errors()
     def _try_cat(self, fs_path):
-        cmd = "cat {}".format(fs_path)
+        cmd = f"cat {fs_path}"
         ret, output = self._run_cmd(cmd, retry_times=1)
         if ret != 0:
             raise ExecuteError(cmd)
@@ -1353,7 +1351,7 @@ def upload(self, local_path, fs_path, multi_processes=1, overwrite=False):
 
         local = LocalFS()
         if not local.is_exist(local_path):
-            raise FSFileNotExistsError("{} not exists".format(local_path))
+            raise FSFileNotExistsError(f"{local_path} not exists")
 
         self._fs.upload(local_path, fs_path)
 
@@ -1389,7 +1387,7 @@ def __subprocess_download(local_path, datas):
                 self._fs.download(local_path, data)
 
         if not self.is_exist(fs_path):
-            raise FSFileNotExistsError("{} not exits".format(fs_path))
+            raise FSFileNotExistsError(f"{fs_path} not exits")
         # download file
         if self.is_file(fs_path):
             return self._fs.download(local_path, fs_path)
@@ -1455,12 +1453,10 @@ def mv(self, fs_src_path, fs_dst_path, overwrite=False, test_exists=True):
 
         if test_exists:
             if not self.is_exist(fs_src_path):
-                raise FSFileNotExistsError(
-                    "{} is not exists".format(fs_src_path)
-                )
+                raise FSFileNotExistsError(f"{fs_src_path} is not exists")
 
             if self.is_exist(fs_dst_path):
-                raise FSFileExistsError("{} exists already".format(fs_dst_path))
+                raise FSFileExistsError(f"{fs_dst_path} exists already")
 
         self._fs.mv(fs_src_path, fs_dst_path)
 
diff --git a/python/paddle/distributed/fleet/utils/log_util.py b/python/paddle/distributed/fleet/utils/log_util.py
index 07bcbea4b41090..9f0a6d1f77f691 100644
--- a/python/paddle/distributed/fleet/utils/log_util.py
+++ b/python/paddle/distributed/fleet/utils/log_util.py
@@ -68,7 +68,7 @@ def layer_to_str(base, *args, **kwargs):
             name += ", "
     if kwargs:
         name += ", ".join(
-            "{}={}".format(key, str(value)) for key, value in kwargs.items()
+            f"{key}={str(value)}" for key, value in kwargs.items()
         )
     name += ")"
     return name
diff --git a/python/paddle/distributed/fleet/utils/tensor_parallel_utils.py b/python/paddle/distributed/fleet/utils/tensor_parallel_utils.py
index ef71dca8b31b6e..a7e12ef7534bf7 100644
--- a/python/paddle/distributed/fleet/utils/tensor_parallel_utils.py
+++ b/python/paddle/distributed/fleet/utils/tensor_parallel_utils.py
@@ -111,7 +111,7 @@ def copy_parameters(block_, params):
         )
         assert (
             param.is_distributed is False
-        ), "Try to sync Distribted Parameter: {}".format(param)
+        ), f"Try to sync Distribted Parameter: {param}"
         new_p.is_distributed = False
 
     block_.vars[new_p.name] = new_p
@@ -156,7 +156,7 @@ def insert_sync_op(
         )
     else:
         raise NotImplementedError(
-            'Sync mode of [{}] is NOT supported.'.format(sync_mode)
+            f'Sync mode of [{sync_mode}] is NOT supported.'
         )
 
 
diff --git a/python/paddle/distributed/io.py b/python/paddle/distributed/io.py
index 7255e96c405b25..bc125cb242a29b 100644
--- a/python/paddle/distributed/io.py
+++ b/python/paddle/distributed/io.py
@@ -86,7 +86,7 @@ def __load_persistable_vars(executor, dirname, need_load_vars):
                 )
             else:
                 origin = load_block.create_var(
-                    name="{}".format(origin_var.name),
+                    name=f"{origin_var.name}",
                     type=origin_var.type,
                     shape=origin_var.shape,
                     dtype=origin_var.dtype,
@@ -247,7 +247,7 @@ def __save_remote_params(executor, dirname, remote_params_map):
 
                 index = block_id if is_slice else idx
                 slices[index] = slice
-                slice_varnames[index] = "{}.slice.{}".format(slice.name, idx)
+                slice_varnames[index] = f"{slice.name}.slice.{idx}"
                 remote_varnames[index] = slice.name
                 endpoints[index] = endpoint
 
diff --git a/python/paddle/distributed/launch/context/__init__.py b/python/paddle/distributed/launch/context/__init__.py
index 0d3410e368e709..89ad489f0f43ab 100644
--- a/python/paddle/distributed/launch/context/__init__.py
+++ b/python/paddle/distributed/launch/context/__init__.py
@@ -42,7 +42,7 @@ def __init__(self, enable_plugin=True):
     def print(self):
         self.logger.info("-----------  Configuration  ----------------------")
         for arg, value in sorted(vars(self.args).items()):
-            self.logger.info("%s: %s" % (arg, value))
+            self.logger.info(f"{arg}: {value}")
         self.logger.info("--------------------------------------------------")
 
     def is_legacy_mode(self):
@@ -54,7 +54,7 @@ def is_legacy_mode(self):
 
         if len(self.unknown_args) > 0:
             self.logger.warning(
-                "Compatible mode enable with args {}".format(self.unknown_args)
+                f"Compatible mode enable with args {self.unknown_args}"
             )
             return True
 
diff --git a/python/paddle/distributed/launch/controllers/collective.py b/python/paddle/distributed/launch/controllers/collective.py
index de4db754b72cea..0c12240b2612e9 100644
--- a/python/paddle/distributed/launch/controllers/collective.py
+++ b/python/paddle/distributed/launch/controllers/collective.py
@@ -23,7 +23,7 @@ class CollectiveController(Controller):
     def enable(cls, ctx):
         # collective is the default mode
         if ctx:
-            ctx.logger.debug("{} enabled".format(cls.__name__))
+            ctx.logger.debug(f"{cls.__name__} enabled")
             ctx.args.run_mode = ControleMode.COLLECTIVE
             return True
         else:
@@ -49,7 +49,7 @@ def _build_pod_with_args(self):
             f"{h}:{p+start_port}" for h in ips for p in range(self.pod.replicas)
         ]
 
-        self.ctx.logger.debug("job endpoints: {}".format(job_endpoints))
+        self.ctx.logger.debug(f"job endpoints: {job_endpoints}")
 
         rank_offset = (
             ips.index(self.ctx.node.ip) * self.pod.replicas
@@ -66,16 +66,16 @@ def _build_pod_with_args(self):
 
         for i in range(self.pod.replicas):
             e = {
-                "PADDLE_GLOBAL_SIZE": "{}".format(len(job_endpoints)),
-                "PADDLE_LOCAL_SIZE": "{}".format(self.pod.replicas),
-                "PADDLE_GLOBAL_RANK": "{}".format(i + rank_offset),
-                "PADDLE_LOCAL_RANK": "{}".format(i),
-                "PADDLE_NNODES": "{}".format(len(ips)),
+                "PADDLE_GLOBAL_SIZE": f"{len(job_endpoints)}",
+                "PADDLE_LOCAL_SIZE": f"{self.pod.replicas}",
+                "PADDLE_GLOBAL_RANK": f"{i + rank_offset}",
+                "PADDLE_LOCAL_RANK": f"{i}",
+                "PADDLE_NNODES": f"{len(ips)}",
                 # compatible env
                 "PADDLE_TRAINER_ENDPOINTS": ",".join(job_endpoints),
                 "PADDLE_CURRENT_ENDPOINT": job_endpoints[i + rank_offset],
-                "PADDLE_TRAINER_ID": "{}".format(i + rank_offset),
-                "PADDLE_TRAINERS_NUM": "{}".format(len(job_endpoints)),
+                "PADDLE_TRAINER_ID": f"{i + rank_offset}",
+                "PADDLE_TRAINERS_NUM": f"{len(job_endpoints)}",
                 "PADDLE_RANK_IN_NODE": str(i),
             }
             if len(selected_dev_list) > 0:
@@ -103,7 +103,7 @@ def _build_pod_with_master(self):
 
         # compatible
         endpoints = [
-            "{}:{}".format(self.ctx.node.ip, p)
+            f"{self.ctx.node.ip}:{p}"
             for p in self.ctx.node.get_free_ports(self.pod.replicas)
         ]
 
@@ -113,13 +113,13 @@ def _build_pod_with_master(self):
                 'rank': self.pod.rank,
                 'replicas': self.pod.replicas,
                 'dtype': self.ctx.node.device.dtype,
-                'candidate': '{}:{}'.format(self.ctx.node.ip, port),
+                'candidate': f'{self.ctx.node.ip}:{port}',
                 'endpoints': ",".join(endpoints),
             }
         )
 
         peer_list, rank = self.master.sync_peers(
-            '/{}/info'.format(self.job.id),
+            f'/{self.job.id}/info',
             self.pod.name,
             data,
             self.job.replicas,
@@ -132,7 +132,7 @@ def _build_pod_with_master(self):
 
         peer_list = [json.loads(i) for i in peer_list]
 
-        self.ctx.logger.debug("sync peers done {}".format(peer_list))
+        self.ctx.logger.debug(f"sync peers done {peer_list}")
         self.save_pod_log(peer_list)
 
         global_size = sum([i['replicas'] for i in peer_list])
@@ -152,16 +152,16 @@ def _build_pod_with_master(self):
         for i in range(self.pod.replicas):
             e = {
                 "PADDLE_MASTER": collective_master,
-                "PADDLE_GLOBAL_SIZE": "{}".format(global_size),
-                "PADDLE_LOCAL_SIZE": "{}".format(self.pod.replicas),
-                "PADDLE_GLOBAL_RANK": "{}".format(i + rank_offset),
-                "PADDLE_LOCAL_RANK": "{}".format(i),
-                "PADDLE_NNODES": "{}".format(self.job.replicas),
+                "PADDLE_GLOBAL_SIZE": f"{global_size}",
+                "PADDLE_LOCAL_SIZE": f"{self.pod.replicas}",
+                "PADDLE_GLOBAL_RANK": f"{i + rank_offset}",
+                "PADDLE_LOCAL_RANK": f"{i}",
+                "PADDLE_NNODES": f"{self.job.replicas}",
                 # compatible env
                 "PADDLE_TRAINER_ENDPOINTS": ",".join(job_endpoints),
                 "PADDLE_CURRENT_ENDPOINT": endpoints[i],
-                "PADDLE_TRAINER_ID": "{}".format(i + rank_offset),
-                "PADDLE_TRAINERS_NUM": "{}".format(global_size),
+                "PADDLE_TRAINER_ID": f"{i + rank_offset}",
+                "PADDLE_TRAINERS_NUM": f"{global_size}",
                 "PADDLE_RANK_IN_NODE": str(i),
             }
             if len(selected_dev_list) > 0:
@@ -185,7 +185,7 @@ class CollectiveElasticController(CollectiveController):
     @classmethod
     def enable(cls, ctx):
         if ctx.args.master and ctx.args.master.startswith("etcd://"):
-            ctx.logger.debug("{} enabled".format(cls.__name__))
+            ctx.logger.debug(f"{cls.__name__} enabled")
             ctx.args.run_mode = ControleMode.COLLECTIVE
             return True
         else:
@@ -217,10 +217,10 @@ def run(self):
             if ok:
                 self.job.replicas = replicas
             else:
-                self.ctx.logger.warning("peer not ready {}".format(self.job))
+                self.ctx.logger.warning(f"peer not ready {self.job}")
                 break
 
-            self.ctx.logger.debug("Run {}".format(self.job))
+            self.ctx.logger.debug(f"Run {self.job}")
 
             if not self.build_pod():
                 continue
@@ -232,4 +232,4 @@ def run(self):
             if self.watch():
                 break
 
-        self.ctx.logger.debug("Job done {}".format(self.job))
+        self.ctx.logger.debug(f"Job done {self.job}")
diff --git a/python/paddle/distributed/launch/controllers/controller.py b/python/paddle/distributed/launch/controllers/controller.py
index b6cc7440c48974..22c3b77c95dee3 100644
--- a/python/paddle/distributed/launch/controllers/controller.py
+++ b/python/paddle/distributed/launch/controllers/controller.py
@@ -57,7 +57,7 @@ def deploy_pod(self):
 
         assert len(self.pod.containers) > 0, "No container in the pod"
 
-        self.ctx.logger.info("Run {}".format(self.pod))
+        self.ctx.logger.info(f"Run {self.pod}")
         self.ctx.logger.debug(self.pod.containers[0])
 
         self.ctx.status.run()
@@ -77,7 +77,7 @@ def watch(self) -> bool:
         '''
         # TODO(kuizhiqing) unify ctx.status and master status
 
-        self.ctx.logger.info("Watching {}".format(self.pod))
+        self.ctx.logger.info(f"Watching {self.pod}")
 
         while not self.ctx.status.is_done():
             status = self.pod.watch(timeout=2)
@@ -95,7 +95,7 @@ def watch(self) -> bool:
                 while self.pod.logs():
                     pass
 
-                self.ctx.logger.info("Pod {}".format(status))
+                self.ctx.logger.info(f"Pod {status}")
                 return True
 
             # self failure
@@ -106,8 +106,8 @@ def watch(self) -> bool:
                 self.master.restart_peer()
 
                 fc = self.pod.failed_container()
-                self.ctx.logger.info("Pod {}".format(status))
-                self.ctx.logger.error("Container failed !!!\n{}".format(fc[0]))
+                self.ctx.logger.info(f"Pod {status}")
+                self.ctx.logger.error(f"Container failed !!!\n{fc[0]}")
                 self.ctx.logger.info(
                     "------------------------- ERROR LOG DETAIL -------------------------"
                 )
@@ -140,7 +140,7 @@ def finalize(self):
         self.pod.join()
         self.master.stop()
 
-        self.ctx.logger.info("Exit code {}".format(self.pod.exit_code))
+        self.ctx.logger.info(f"Exit code {self.pod.exit_code}")
         sys.exit(self.pod.exit_code)
 
     def signal_handler(self, sigint, frame):
@@ -149,12 +149,12 @@ def signal_handler(self, sigint, frame):
             self.pod.stop(timeout=10)
             sys.exit(sigint)
 
-        self.ctx.logger.info("Terminating with signal {}".format(sigint))
+        self.ctx.logger.info(f"Terminating with signal {sigint}")
 
         self.sigint = sigint
         self.ctx.status.done()
         self.stop(sigint=sigint)
-        self.ctx.logger.info("Exit with signal {}".format(sigint))
+        self.ctx.logger.info(f"Exit with signal {sigint}")
         sys.exit(sigint)
 
 
@@ -244,11 +244,11 @@ def save_pod_log(self, info):
 
         f = os.path.join(
             self.ctx.args.log_dir,
-            '{}.{}.log'.format(self.job.id, self.pod.name),
+            f'{self.job.id}.{self.pod.name}.log',
         )
         try:
             os.makedirs(os.path.dirname(f), exist_ok=True)
             with open(f, 'a+') as fd:
                 fd.write(str(info))
         except Exception as e:
-            self.ctx.logger.error("save log failed because {}".format(e))
+            self.ctx.logger.error(f"save log failed because {e}")
diff --git a/python/paddle/distributed/launch/controllers/ipu_controller.py b/python/paddle/distributed/launch/controllers/ipu_controller.py
index 7535177e8929e6..bf2c5f34b3bdf9 100644
--- a/python/paddle/distributed/launch/controllers/ipu_controller.py
+++ b/python/paddle/distributed/launch/controllers/ipu_controller.py
@@ -25,7 +25,7 @@ class IPUController(CollectiveController):
     @classmethod
     def enable(cls, ctx):
         if ctx.args.training_script == "ipu":
-            ctx.logger.debug("{} enabled".format(cls.__name__))
+            ctx.logger.debug(f"{cls.__name__} enabled")
             ctx.args.run_mode = ControleMode.IPU
             return True
         else:
@@ -75,16 +75,12 @@ def replace_training_script(self):
             num_ipus, poprun_args.ipus_per_replica
         )
         num_replicas = num_ipus // poprun_args.ipus_per_replica
-        self.ctx.logger.info(
-            "The number of total replicas is {}.".format(num_replicas)
-        )
+        self.ctx.logger.info(f"The number of total replicas is {num_replicas}.")
 
         # The number of processes
         num_nodes = len(poprun_args.hosts.split(','))
         num_procs = num_nodes * poprun_args.nproc_per_host
-        self.ctx.logger.info(
-            "The number of total processes is {}.".format(num_procs)
-        )
+        self.ctx.logger.info(f"The number of total processes is {num_procs}.")
         assert (
             num_replicas % num_procs
         ) == 0, "The number of replicas:{} mod the number of processes:{} must == 0".format(
@@ -98,18 +94,14 @@ def replace_training_script(self):
         # args for poprun
         poprun_command = []
 
-        poprun_command.append('--num-instances={}'.format(num_procs))
-        poprun_command.append('--num-replicas={}'.format(num_replicas))
+        poprun_command.append(f'--num-instances={num_procs}')
+        poprun_command.append(f'--num-replicas={num_replicas}')
         poprun_command.append(
-            '--ipus-per-replica={}'.format(poprun_args.ipus_per_replica)
+            f'--ipus-per-replica={poprun_args.ipus_per_replica}'
         )
         poprun_command.append('--host={}'.format(','.join(hosts)))
-        poprun_command.append(
-            '--vipu-partition={}'.format(poprun_args.ipu_partition)
-        )
-        poprun_command.append(
-            '--vipu-server-host={}'.format(poprun_args.vipu_server)
-        )
+        poprun_command.append(f'--vipu-partition={poprun_args.ipu_partition}')
+        poprun_command.append(f'--vipu-server-host={poprun_args.vipu_server}')
 
         poprun_command.extend(
             [
@@ -124,7 +116,7 @@ def replace_training_script(self):
         global_envs = '--mpi-local-args=\''
         log_level = os.getenv('POPART_LOG_LEVEL', None)
         if log_level:
-            global_envs += '-x POPART_LOG_LEVEL={} '.format(log_level)
+            global_envs += f'-x POPART_LOG_LEVEL={log_level} '
         global_envs += (
             '-x PADDLE_TRAINERS_NUM={} -x PADDLE_TRAINER_ENDPOINTS={}'.format(
                 num_procs, ','.join(endpoints)
diff --git a/python/paddle/distributed/launch/controllers/master.py b/python/paddle/distributed/launch/controllers/master.py
index d47d6d6586f9ce..fb6016d9e40e0a 100644
--- a/python/paddle/distributed/launch/controllers/master.py
+++ b/python/paddle/distributed/launch/controllers/master.py
@@ -80,14 +80,12 @@ def lazy_init(self):
                         self.role = Master.MAIN
                         break
                     except Exception as e:
-                        self.ctx.logger.warning(
-                            "start master failed {}".format(e)
-                        )
+                        self.ctx.logger.warning(f"start master failed {e}")
                         time.sleep(0.1)
                         continue
         else:
             port = self.ctx.node.get_free_port()
-            self.endpoint = "{}:{}".format(self.ctx.node.ip, port)
+            self.endpoint = f"{self.ctx.node.ip}:{port}"
             self.server = KVServer(port)
             self.role = Master.MAIN
 
@@ -119,7 +117,7 @@ def lazy_init(self):
     def _start_server(self):
         if self.server and not self.server.started:
             self.server.start()
-            self.ctx.logger.debug("KV server start at {}".format(self.endpoint))
+            self.ctx.logger.debug(f"KV server start at {self.endpoint}")
 
     def _stop_server(self):
         if self.server and not self.server.stopped:
@@ -147,7 +145,7 @@ def sync_peers(self, prefix, key, value, size, rank=-1) -> (list, int):
 
         # 'aaaaaa' make sure main pod (master server) as rank 0
         ky = 'aaaaaa' if rank < 0 and self.role == Master.MAIN else key
-        k = "{}/{}/{}".format(prefix, ky, rank)
+        k = f"{prefix}/{ky}/{rank}"
 
         while not self.ctx.status.is_done():
             if not self.client.put(k, value):
@@ -156,7 +154,7 @@ def sync_peers(self, prefix, key, value, size, rank=-1) -> (list, int):
                 continue
 
             rjson = self.client.get_prefix(prefix)
-            self.ctx.logger.debug("sync peers {}".format(rjson))
+            self.ctx.logger.debug(f"sync peers {rjson}")
             if rjson and len(rjson) == size:
                 if rank < 0:
                     keys = list(rjson.keys())
@@ -198,18 +196,18 @@ def sync_peers(self, prefix, key, value, size, rank=-1) -> (list, int):
 
         self.ctx.logger.info("Waiting peer start...")
 
-        path = "{}/{}/{}".format(prefix, key, rank)
+        path = f"{prefix}/{key}/{rank}"
 
         self.client.delete_prefix(prefix)
 
-        self.ctx.logger.debug("sync path {} value {}".format(path, value))
+        self.ctx.logger.debug(f"sync path {path} value {value}")
 
         while not self.ctx.status.is_done():
             self.client.put(path, value.encode('latin-1'))
 
             result = list(self.client.get_prefix(prefix))
             result = copy.deepcopy(result)
-            self.ctx.logger.debug("sync peers {}".format(result))
+            self.ctx.logger.debug(f"sync peers {result}")
 
             if len(result) == size:
                 if rank < 0:
@@ -225,9 +223,7 @@ def sync_peers(self, prefix, key, value, size, rank=-1) -> (list, int):
                     for v, k in result:
                         ii = int(k.key.decode().split('/')[-1])
                         if ii < 0:
-                            self.ctx.logger.error(
-                                "rank {} error in sync".format(ii)
-                            )
+                            self.ctx.logger.error(f"rank {ii} error in sync")
                         ret[ii] = v.decode()
                     return ret, rank
             else:
@@ -238,14 +234,14 @@ def register_heartbeat(self, job_id, pod_id, ttl=10):
             self.ctx.logger.warning("Heartbeat already done")
             return
 
-        self.job_prefix = '/paddle/{}'.format(job_id)
-        self.heartbeat_prefix = '{}/heartbeat'.format(self.job_prefix)
+        self.job_prefix = f'/paddle/{job_id}'
+        self.heartbeat_prefix = f'{self.job_prefix}/heartbeat'
 
         lease = self.client.lease(ttl)
 
         # self.client.delete_prefix(self.job_prefix)
 
-        beat_path = "{}/{}".format(self.heartbeat_prefix, pod_id)
+        beat_path = f"{self.heartbeat_prefix}/{pod_id}"
         self.client.put(beat_path, pod_id.encode('latin-1'), lease=lease)
 
         def _beat_watch(event):
@@ -265,7 +261,7 @@ def _heartbeat():
                         )
                         self.ctx.logger.debug("Heartbeat register again")
                 except Exception as e:
-                    self.ctx.logger.error("Heartbeat error {}".format(e))
+                    self.ctx.logger.error(f"Heartbeat error {e}")
                 time.sleep(ttl / 2)
             self.ctx.logger.debug("Heartbeat done")
             self.client.cancel_watch(beat_watch)
@@ -279,7 +275,7 @@ def fetch_peer_alive(self):
         peer_alive = [
             i[0].decode() for i in self.client.get_prefix(self.heartbeat_prefix)
         ]
-        self.ctx.logger.debug("peer alive {}".format(peer_alive))
+        self.ctx.logger.debug(f"peer alive {peer_alive}")
         return peer_alive
 
     def wait_peer_ready(self, replicas_min, replicas_max, timeout):
@@ -314,7 +310,7 @@ def set_status(self, status):
             self.job_prefix,
             status.encode('latin-1'),
             lease=self.client.lease(600),
-        ), "set status failed {}".format(status)
+        ), f"set status failed {status}"
 
     def get_status(self):
         value = self.client.get(self.job_prefix)[0]
diff --git a/python/paddle/distributed/launch/controllers/ps.py b/python/paddle/distributed/launch/controllers/ps.py
index a4c3ab85177a6b..664d5a86f06bb0 100644
--- a/python/paddle/distributed/launch/controllers/ps.py
+++ b/python/paddle/distributed/launch/controllers/ps.py
@@ -29,7 +29,7 @@ def enable(cls, ctx):
             or ctx.args.trainer_num
             or len(ctx.args.trainers) > 0
         ):
-            ctx.logger.debug("{} enabled".format(cls.__name__))
+            ctx.logger.debug(f"{cls.__name__} enabled")
             ctx.args.run_mode = ControleMode.PS
             return True
         else:
@@ -85,11 +85,11 @@ def _build_pod_with_args(self):
                 "PADDLE_PORT": servers[i].split(":")[1],
                 "PADDLE_ROLE": "PSERVER",
                 "TRAINING_ROLE": "PSERVER",
-                "PADDLE_TRAINERS_NUM": "{}".format(len(trainer_endpoints)),
+                "PADDLE_TRAINERS_NUM": f"{len(trainer_endpoints)}",
                 "POD_IP": self.ctx.node.ip,
             }
             e.update(_gloo_envs)
-            log_file = "serverlog.{}".format(i)
+            log_file = f"serverlog.{i}"
             self.add_container(envs=e, log_file=log_file)
 
         trainer_rank_offset = 0
@@ -106,12 +106,12 @@ def _build_pod_with_args(self):
                 "PADDLE_PORT": trainers[i].split(":")[1],
                 "PADDLE_ROLE": "TRAINER",
                 "TRAINING_ROLE": "TRAINER",
-                "PADDLE_TRAINER_ID": "{}".format(i + trainer_rank_offset),
-                "PADDLE_TRAINERS_NUM": "{}".format(len(trainer_endpoints)),
+                "PADDLE_TRAINER_ID": f"{i + trainer_rank_offset}",
+                "PADDLE_TRAINERS_NUM": f"{len(trainer_endpoints)}",
                 "POD_IP": self.ctx.node.ip,
             }
             e.update(_gloo_envs)
-            log_file = "workerlog.{}".format(i)
+            log_file = f"workerlog.{i}"
             self.add_container(envs=e, log_file=log_file)
 
     def _build_pod_with_master(self):
@@ -120,12 +120,12 @@ def _build_pod_with_master(self):
 
         server_num = self.ctx.args.server_num or 1
         servers = [
-            "{}:{}".format(self.ctx.node.ip, p)
+            f"{self.ctx.node.ip}:{p}"
             for p in self.ctx.node.get_free_ports(server_num)
         ]
         trainer_num = self.ctx.args.trainer_num or 1
         trainers = [
-            "{}:{}".format(self.ctx.node.ip, p)
+            f"{self.ctx.node.ip}:{p}"
             for p in self.ctx.node.get_free_ports(trainer_num)
         ]
 
@@ -141,14 +141,14 @@ def _build_pod_with_master(self):
         )
 
         peer_list, rank = self.master.sync_peers(
-            '/{}/info'.format(self.job.id),
+            f'/{self.job.id}/info',
             self.pod.name,
             data,
             self.job.replicas,
             self.pod.rank,
         )
 
-        self.ctx.logger.debug("sync peers done {}".format(peer_list))
+        self.ctx.logger.debug(f"sync peers done {peer_list}")
 
         peer_list = [json.loads(i) for i in peer_list]
 
@@ -185,7 +185,7 @@ def _build_pod_with_master(self):
 
         for i in range(server_num):
             e = {
-                "PADDLE_NNODES": "{}".format(self.job.replicas),
+                "PADDLE_NNODES": f"{self.job.replicas}",
                 "PADDLE_PSERVERS_IP_PORT_LIST": ",".join(server_endpoints),
                 "PADDLE_TRAINER_ENDPOINTS": ",".join(trainer_endpoints),
                 "PADDLE_PORT": server_endpoints[i + server_rank_offset].split(
@@ -193,16 +193,16 @@ def _build_pod_with_master(self):
                 )[1],
                 "PADDLE_ROLE": "PSERVER",
                 "TRAINING_ROLE": "PSERVER",
-                "PADDLE_TRAINERS_NUM": "{}".format(len(trainer_endpoints)),
+                "PADDLE_TRAINERS_NUM": f"{len(trainer_endpoints)}",
                 "POD_IP": self.ctx.node.ip,
             }
             e.update(_gloo_envs)
-            log_file = "serverlog.{}".format(i)
+            log_file = f"serverlog.{i}"
             self.add_container(envs=e, log_file=log_file)
 
         for i in range(trainer_num):
             e = {
-                "PADDLE_NNODES": "{}".format(self.job.replicas),
+                "PADDLE_NNODES": f"{self.job.replicas}",
                 "PADDLE_PSERVERS_IP_PORT_LIST": ",".join(server_endpoints),
                 "PADDLE_TRAINER_ENDPOINTS": ",".join(trainer_endpoints),
                 "PADDLE_PORT": trainer_endpoints[i + trainer_rank_offset].split(
@@ -210,12 +210,12 @@ def _build_pod_with_master(self):
                 )[1],
                 "PADDLE_ROLE": "TRAINER",
                 "TRAINING_ROLE": "TRAINER",
-                "PADDLE_TRAINER_ID": "{}".format(i + trainer_rank_offset),
-                "PADDLE_TRAINERS_NUM": "{}".format(len(trainer_endpoints)),
+                "PADDLE_TRAINER_ID": f"{i + trainer_rank_offset}",
+                "PADDLE_TRAINERS_NUM": f"{len(trainer_endpoints)}",
                 "POD_IP": self.ctx.node.ip,
             }
             e.update(_gloo_envs)
-            log_file = "workerlog.{}".format(i)
+            log_file = f"workerlog.{i}"
             self.add_container(envs=e, log_file=log_file)
         ''' NEW VERSION
         for i in range(server_num):
diff --git a/python/paddle/distributed/launch/controllers/rpc.py b/python/paddle/distributed/launch/controllers/rpc.py
index 096aeea0c8d7ea..4825a6c2a8376a 100644
--- a/python/paddle/distributed/launch/controllers/rpc.py
+++ b/python/paddle/distributed/launch/controllers/rpc.py
@@ -21,7 +21,7 @@ class RpcController(Controller):
     @classmethod
     def enable(cls, ctx):
         if ctx.args.run_mode == ControleMode.RPC:
-            ctx.logger.debug("{} enabled".format(cls.__name__))
+            ctx.logger.debug(f"{cls.__name__} enabled")
             return True
         else:
             return False
@@ -43,7 +43,7 @@ def _build_pod_with_master(self):
 
         # compatible
         endpoints = [
-            "{}:{}".format(self.ctx.node.ip, p)
+            f"{self.ctx.node.ip}:{p}"
             for p in self.ctx.node.get_free_ports(self.pod.replicas)
         ]
 
@@ -53,12 +53,12 @@ def _build_pod_with_master(self):
                 "rank": self.pod.rank,
                 "replicas": self.pod.replicas,
                 "dtype": self.ctx.node.device.dtype,
-                "candidate": "{}:{}".format(self.ctx.node.ip, port),
+                "candidate": f"{self.ctx.node.ip}:{port}",
                 "endpoints": ",".join(endpoints),
             }
         )
         peer_list, rank = self.master.sync_peers(
-            "/{}/info".format(self.job.id),
+            f"/{self.job.id}/info",
             self.pod.name,
             data,
             self.job.replicas,
@@ -71,7 +71,7 @@ def _build_pod_with_master(self):
 
         peer_list = [json.loads(i) for i in peer_list]
 
-        self.ctx.logger.debug("sync peers done {}".format(peer_list))
+        self.ctx.logger.debug(f"sync peers done {peer_list}")
         self.save_pod_log(peer_list)
 
         global_size = sum([i["replicas"] for i in peer_list])
@@ -83,8 +83,8 @@ def _build_pod_with_master(self):
             e = {
                 "PADDLE_MASTER_ENDPOINT": rpc_master,
                 "PADDLE_WORKER_ENDPOINT": endpoints[i],
-                "PADDLE_TRAINER_ID": "{}".format(i + rank_offset),
-                "PADDLE_TRAINERS_NUM": "{}".format(global_size),
+                "PADDLE_TRAINER_ID": f"{i + rank_offset}",
+                "PADDLE_TRAINERS_NUM": f"{global_size}",
             }
             log_file = f"workerlog.{i + rank_offset}"
             self.add_container(envs=e, log_file=log_file)
diff --git a/python/paddle/distributed/launch/controllers/watcher.py b/python/paddle/distributed/launch/controllers/watcher.py
index 1768450f382a6e..ad7bc5a84b8d81 100644
--- a/python/paddle/distributed/launch/controllers/watcher.py
+++ b/python/paddle/distributed/launch/controllers/watcher.py
@@ -31,7 +31,7 @@ def __init__(self, ctx):
         self.gpus = self.ctx.args.devices or self.ctx.node.device.labels
         if len(self.gpus) > 0:
             fn = os.path.join(
-                self.ctx.args.log_dir, "{}.gpu.log".format(self.ctx.args.job_id)
+                self.ctx.args.log_dir, f"{self.ctx.args.job_id}.gpu.log"
             )
             os.makedirs(os.path.dirname(fn), exist_ok=True)
             self.gpu_fd = open(fn, 'w')
diff --git a/python/paddle/distributed/launch/job/container.py b/python/paddle/distributed/launch/job/container.py
index c21487e1ea8b3a..d5331cc3c619b0 100644
--- a/python/paddle/distributed/launch/job/container.py
+++ b/python/paddle/distributed/launch/job/container.py
@@ -90,7 +90,7 @@ def _valide_env(self):
         for k, v in self._env.items():
             assert isinstance(k, str) and isinstance(
                 v, str
-            ), 'env {}:{} must be str'.format(k, v)
+            ), f'env {k}:{v} must be str'
 
     def _get_fd(self, pth):
         if not pth:
diff --git a/python/paddle/distributed/launch/plugins/__init__.py b/python/paddle/distributed/launch/plugins/__init__.py
index b3b89e0f116df3..23e58b0e65f796 100644
--- a/python/paddle/distributed/launch/plugins/__init__.py
+++ b/python/paddle/distributed/launch/plugins/__init__.py
@@ -21,7 +21,7 @@
 def log(ctx):
     ctx.logger.info("-----------  Configuration  ----------------------")
     for arg, value in sorted(vars(ctx.args).items()):
-        ctx.logger.info("%s: %s" % (arg, value))
+        ctx.logger.info(f"{arg}: {value}")
     ctx.logger.info("--------------------------------------------------")
 
 
@@ -46,11 +46,9 @@ def collective_compatible(ctx):
     if 'PADDLE_TRAINER_ENDPOINTS' in ctx.envs:
         eps = ctx.envs['PADDLE_TRAINER_ENDPOINTS'].split(',')
         hosts = {h.split(':')[0] for h in eps}
-        ctx.args.master = eps[0] if ':' in eps[0] else '{}:6768'.format(eps[0])
+        ctx.args.master = eps[0] if ':' in eps[0] else f'{eps[0]}:6768'
         ctx.args.nnodes = len(hosts)
-        ctx.logger.info(
-            'args reset by env PADDLE_TRAINER_ENDPOINTS\n{}'.format(eps)
-        )
+        ctx.logger.info(f'args reset by env PADDLE_TRAINER_ENDPOINTS\n{eps}')
 
     if 'DISTRIBUTED_TRAINER_ENDPOINTS' in ctx.envs:
         eps = ctx.envs['DISTRIBUTED_TRAINER_ENDPOINTS'].split(',')
@@ -58,13 +56,13 @@ def collective_compatible(ctx):
         ctx.args.master = eps[0]
         ctx.args.nnodes = len(hosts)
         ctx.logger.info(
-            'args reset by env DISTRIBUTED_TRAINER_ENDPOINTS\n{}'.format(eps)
+            f'args reset by env DISTRIBUTED_TRAINER_ENDPOINTS\n{eps}'
         )
 
 
 def rewrite_host_ip(ctx):
     if ctx.args.host is not None and "." in ctx.args.host:
-        ctx.logger.warning('Host ip reset to {}'.format(ctx.args.host))
+        ctx.logger.warning(f'Host ip reset to {ctx.args.host}')
         ctx.node.ip = ctx.args.host
 
 
diff --git a/python/paddle/distributed/launch/utils/kv_client.py b/python/paddle/distributed/launch/utils/kv_client.py
index b3839ef65ac660..3da25401c92947 100644
--- a/python/paddle/distributed/launch/utils/kv_client.py
+++ b/python/paddle/distributed/launch/utils/kv_client.py
@@ -20,14 +20,12 @@
 class KVClient:
     def __init__(self, endpoint='localhost:2379'):
         self.endpoint = (
-            endpoint
-            if endpoint.startswith("http://")
-            else "http://{}".format(endpoint)
+            endpoint if endpoint.startswith("http://") else f"http://{endpoint}"
         )
 
     def put(self, key, value):
-        key = key if key.startswith('/') else "/{}".format(key)
-        u = "{}{}".format(self.endpoint, key)
+        key = key if key.startswith('/') else f"/{key}"
+        u = f"{self.endpoint}{key}"
         try:
             r = requests.post(u, data=value, timeout=3)
             if r.status_code == 200:
@@ -38,8 +36,8 @@ def put(self, key, value):
             return False
 
     def get(self, key):
-        key = key if key.startswith('/') else "/{}".format(key)
-        u = "{}{}".format(self.endpoint, key)
+        key = key if key.startswith('/') else f"/{key}"
+        u = f"{self.endpoint}{key}"
         try:
             r = requests.get(u, timeout=3)
             if r.status_code == 200:
@@ -51,8 +49,8 @@ def get(self, key):
             return ""
 
     def get_prefix(self, key):
-        key = key if key.startswith('/') else "/{}".format(key)
-        u = "{}{}".format(self.endpoint, key)
+        key = key if key.startswith('/') else f"/{key}"
+        u = f"{self.endpoint}{key}"
         try:
             r = requests.get(u, timeout=3)
             if r.status_code == 200:
@@ -61,8 +59,8 @@ def get_prefix(self, key):
             return ""
 
     def delete(self, key):
-        key = key if key.startswith('/') else "/{}".format(key)
-        u = "{}{}".format(self.endpoint, key)
+        key = key if key.startswith('/') else f"/{key}"
+        u = f"{self.endpoint}{key}"
         try:
             r = requests.delete(u, timeout=3)
             if r.status_code == 200:
diff --git a/python/paddle/distributed/passes/auto_parallel_amp.py b/python/paddle/distributed/passes/auto_parallel_amp.py
index 0add755419cb52..6f44c44bf3f96a 100644
--- a/python/paddle/distributed/passes/auto_parallel_amp.py
+++ b/python/paddle/distributed/passes/auto_parallel_amp.py
@@ -762,9 +762,9 @@ def _update_backward_cast_ops(self):
                 post_ops = find_true_post_op(main_block.ops, op, g.name)
                 if post_ops:
                     raise ValueError(
-                        "The cast op {0}'s output should not be"
+                        f"The cast op {op}'s output should not be"
                         "used by a non-optimize op, however, it"
-                        "is used by {1}".format(op, post_ops[0])
+                        f"is used by {post_ops[0]}"
                     )
 
                 if op == main_block.ops[-1]:
@@ -806,7 +806,7 @@ def _update_backward_cast_ops(self):
 
                 op_idx = find_op_index(main_block.desc, op.desc)
                 if op_idx == -1:
-                    raise ValueError("The op {0} is not in program".format(op))
+                    raise ValueError(f"The op {op} is not in program")
                 main_block._remove_op(op_idx, sync=False)
 
         main_block._sync_with_cpp()
diff --git a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
index 9ee6cc9e3b1547..f943e58748d070 100644
--- a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
+++ b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
@@ -158,7 +158,7 @@ def _analyze_program(self):
                     continue
                 assert op.has_attr(
                     "ring_id"
-                ), "Unexpected: comm op [{}] has NOT ring id.".format(str(op))
+                ), f"Unexpected: comm op [{str(op)}] has NOT ring id."
                 group = ring_id_to_process_group(op.attr("ring_id"))
 
                 assert (
@@ -483,9 +483,7 @@ def _update_program(self, grad_groups):
 
             # create coalesce tensor
             group.coalesce_var = block.create_var(
-                name=unique_name.generate(
-                    self.coalesce_prefix + '_{}'.format(i)
-                ),
+                name=unique_name.generate(self.coalesce_prefix + f'_{i}'),
                 dtype=group.dtype,
                 persistable=False,
                 stop_gradient=True,
@@ -503,7 +501,7 @@ def _update_program(self, grad_groups):
                 scale_op = block.ops[group.scale_op_idx]
                 assert (
                     scale_op.type == 'scale'
-                ), "should found scale op but found {}".format(str(scale_op))
+                ), f"should found scale op but found {str(scale_op)}"
                 scale_op._rename_input(
                     scale_op.input_arg_names[0], group.coalesce_var.name
                 )
@@ -549,7 +547,7 @@ def _update_program(self, grad_groups):
             for idx in sorted(remove_op_indices, reverse=True):
                 assert (
                     block.ops[idx].type in remove_op_types
-                ), "Unexpected: try to remove op {}".format(str(block.ops[idx]))
+                ), f"Unexpected: try to remove op {str(block.ops[idx])}"
                 block._remove_op(idx, False)
 
             # insert coalesce op
@@ -719,9 +717,7 @@ def summary(self, grad_groups=[]):
                     len(individual_grads)
                 )
             )
-            self._logger.debug(
-                "individual gradient {}".format(individual_grads)
-            )
+            self._logger.debug(f"individual gradient {individual_grads}")
 
 
 class GradientsGroup:
diff --git a/python/paddle/distributed/passes/auto_parallel_fp16.py b/python/paddle/distributed/passes/auto_parallel_fp16.py
index b3c1a5f8912e60..7cf10cfbc7fbda 100644
--- a/python/paddle/distributed/passes/auto_parallel_fp16.py
+++ b/python/paddle/distributed/passes/auto_parallel_fp16.py
@@ -228,7 +228,7 @@ def _mark_op(self, op):
 
             if op.desc.original_id() in self.grad_op_to_op_map:
                 fwd_op_id = self.grad_op_to_op_map[op.desc.original_id()]
-                assert fwd_op_id in self._op_fp16_dict, "{}".format(str(op))
+                assert fwd_op_id in self._op_fp16_dict, f"{str(op)}"
                 self._op_fp16_dict[op.desc.original_id()] = self._op_fp16_dict[
                     fwd_op_id
                 ]
@@ -516,11 +516,11 @@ def _insert_backward_cast_ops(
                     continue
                 assert (
                     len(op.output(grad_slot_name)) == 1
-                ), "[{}], Current Op: {}".format(grad_slot_name, str(op))
+                ), f"[{grad_slot_name}], Current Op: {str(op)}"
                 grad_name = op.output(grad_slot_name)[0]
                 grad = block.var(grad_name)
                 grad_dist_attr = grad_op_attr.get_output_dist_attr(grad_name)
-                assert grad_dist_attr is not None, "{}".format(grad_name)
+                assert grad_dist_attr is not None, f"{grad_name}"
                 ref_mesh = grad_dist_attr.process_mesh
                 ref_mapping = grad_dist_attr.dims_mapping
 
@@ -685,7 +685,7 @@ def _insert_memcopy(block, idx, src_var, dist_context, direction="D2H"):
         dst_place_type = 0
     else:
         raise NotImplementedError(
-            "direction [{}] is not supported yet.".format(direction)
+            f"direction [{direction}] is not supported yet."
         )
 
     attrs = {'dst_place_type': dst_place_type}
diff --git a/python/paddle/distributed/passes/auto_parallel_recompute.py b/python/paddle/distributed/passes/auto_parallel_recompute.py
index bd756ca6fde5e8..b6a13540caf098 100644
--- a/python/paddle/distributed/passes/auto_parallel_recompute.py
+++ b/python/paddle/distributed/passes/auto_parallel_recompute.py
@@ -293,9 +293,7 @@ def _apply_single_impl(self, main_program, startup_program, context):
             return
 
         for i, (idx1, idx2) in enumerate(segments):
-            logging.info(
-                "recompute segment[{}/{}]".format(i + 1, len(segments))
-            )
+            logging.info(f"recompute segment[{i + 1}/{len(segments)}]")
             logging.info(
                 "segment start op: [{}]: [{}] [{}]".format(
                     rc_state.ops[idx1].type,
diff --git a/python/paddle/distributed/passes/auto_parallel_sharding.py b/python/paddle/distributed/passes/auto_parallel_sharding.py
index f7166c8215d90c..d9c82746ae9fbb 100644
--- a/python/paddle/distributed/passes/auto_parallel_sharding.py
+++ b/python/paddle/distributed/passes/auto_parallel_sharding.py
@@ -757,7 +757,7 @@ def _fuse_overlap_parameter_comm_stage_two(self, sharding_info):
                     group = new_process_group(ranks, force_new_group=True)
                 # NOTE here stream is just a presentation with different name,
                 # it is up to executor to create the exact streams given the name.
-                stream = "sharding_param_comm_stream{}".format(i)
+                stream = f"sharding_param_comm_stream{i}"
                 self.param_comm_group_stream_pairs.append(
                     {
                         "comm_group": group,
@@ -1182,7 +1182,7 @@ def _overlap_grad_comm(
                 group = new_process_group(ranks, force_new_group=True)
             # NOTE here stream is just a presentation with different name,
             # it is up to executor to create the exact streams given the name.
-            stream = "sharding_grad_comm_stream{}".format(i)
+            stream = f"sharding_grad_comm_stream{i}"
             self.grad_comm_group_stream_pairs.append(
                 {
                     "comm_group": group,
@@ -1304,12 +1304,8 @@ def _overlap_grad_comm(
             _logger.info(
                 "Sharding Gradient Hierarchical Communication Optimization."
             )
-            _logger.info(
-                "current global rank idx: {}.".format(self.global_rank)
-            )
-            _logger.info(
-                "local inter node ranks idx: {}.".format(inter_node_ranks)
-            )
+            _logger.info(f"current global rank idx: {self.global_rank}.")
+            _logger.info(f"local inter node ranks idx: {inter_node_ranks}.")
             assert (
                 len(inter_node_ranks)
                 == self.sharding_world_size // nranks_per_node
@@ -1320,9 +1316,7 @@ def _overlap_grad_comm(
                 if rank // nranks_per_node == node_idx
             ]
             assert len(intra_node_ranks) == nranks_per_node
-            _logger.info(
-                "local intra node ranks idx: {}.".format(intra_node_ranks)
-            )
+            _logger.info(f"local intra node ranks idx: {intra_node_ranks}.")
             inter_node_groups = []
             intra_node_groups = []
             for _ in range(self.grad_comm_stream_num):
@@ -1462,7 +1456,7 @@ def _insert_reduce_op(
 ):
     assert (
         root_id >= 0
-    ), "root id should be a positive int, but now root id is {}".format(root_id)
+    ), f"root id should be a positive int, but now root id is {root_id}"
     new_op = block._insert_op_without_sync(
         insert_idx,
         type='c_reduce_sum',
@@ -1692,7 +1686,7 @@ def partition_parameters(params, group_size, algor="greedy_even"):
                 k, sum([get_var_size(var) for var in v])
             )
         )
-        _logger.info("Params in this rank: {}.".format([var.name for var in v]))
+        _logger.info(f"Params in this rank: {[var.name for var in v]}.")
 
     return rank_to_params
 
@@ -1747,9 +1741,7 @@ def re_order_program(block, param_grads, dist_context):
         assert len(block.ops) == num_ops
 
     # TODO reorder gradient clip order
-    _logger.info(
-        "Sharding the Order of param being used: {}.".format(use_order)
-    )
+    _logger.info(f"Sharding the Order of param being used: {use_order}.")
     return [pname_to_pg_pairs[p] for p in use_order]
 
 
@@ -1861,11 +1853,9 @@ def get_broadcast_vars_and_param_usage(self, block):
 
     def get_param_grad(self, param_name):
         if not self.is_in_local_shard(param_name):
-            raise ValueError(
-                "param[{}] not in current rank.".format(param_name)
-            )
+            raise ValueError(f"param[{param_name}] not in current rank.")
         if param_name not in self.params_grads:
-            raise ValueError('param[{}] not in params_grads'.format(param_name))
+            raise ValueError(f'param[{param_name}] not in params_grads')
         return self.params_grads.get(param_name, None)
 
 
diff --git a/python/paddle/distributed/passes/fuse_all_reduce.py b/python/paddle/distributed/passes/fuse_all_reduce.py
index 5bc936b23c10ce..8389599df8fa2d 100755
--- a/python/paddle/distributed/passes/fuse_all_reduce.py
+++ b/python/paddle/distributed/passes/fuse_all_reduce.py
@@ -50,7 +50,7 @@ def insert_fuse_all_reduce_ops(
     block, reversed_op_indices, input_var_names, output_var_names, dtype, attrs
 ):
     fused_var = block.create_var(
-        name=unique_name.generate("FusedOutput_{}".format(input_var_names[0])),
+        name=unique_name.generate(f"FusedOutput_{input_var_names[0]}"),
         dtype=dtype,
     )
 
diff --git a/python/paddle/distributed/passes/pass_base.py b/python/paddle/distributed/passes/pass_base.py
index beed64da7b11eb..282bbe41c20545 100755
--- a/python/paddle/distributed/passes/pass_base.py
+++ b/python/paddle/distributed/passes/pass_base.py
@@ -132,7 +132,7 @@ def impl(cls):
 
 def new_pass(name, pass_attrs={}):
     pass_class = PassBase._REGISTERED_PASSES.get(name)
-    assert pass_class is not None, "Pass {} is not registered".format(name)
+    assert pass_class is not None, f"Pass {name} is not registered"
     pass_obj = pass_class()
     for k, v in pass_attrs.items():
         pass_obj.set_attr(k, v)
@@ -230,7 +230,7 @@ def rule(pass_before, pass_after):
 def _get_list_index(in_pass):
     assert (
         in_pass.name in PassBase._PASS_PROCESS_ORDER_LIST
-    ), "Pass {} is not in _PASS_PROCESS_ORDER_LIST".format(in_pass.name)
+    ), f"Pass {in_pass.name} is not in _PASS_PROCESS_ORDER_LIST"
     return PassBase._PASS_PROCESS_ORDER_LIST.index(in_pass.name)
 
 
diff --git a/python/paddle/distributed/passes/ps_trainer_pass.py b/python/paddle/distributed/passes/ps_trainer_pass.py
index a2db1219a9d6c1..779d17668948b4 100755
--- a/python/paddle/distributed/passes/ps_trainer_pass.py
+++ b/python/paddle/distributed/passes/ps_trainer_pass.py
@@ -1370,7 +1370,7 @@ def _split_fl_program(self):
         return party_program_map
 
     def _insert_partA_communicate_op(self, block, idx):
-        comm_info = "forward_joint_{}_{}@fl_ps".format(1, 2)
+        comm_info = f"forward_joint_{1}_{2}@fl_ps"
         block._insert_op(
             idx,
             type='send_and_recv',
@@ -1395,7 +1395,7 @@ def _insert_partA_communicate_op(self, block, idx):
         return
 
     def _insert_partB_communicate_op(self, block, idx):
-        comm_info = "backward_joint_{}_{}@fl_ps".format(2, 1)
+        comm_info = f"backward_joint_{2}_{1}@fl_ps"
         block._insert_op(
             idx,
             type='send_and_recv',
@@ -1523,7 +1523,7 @@ def _get_partA_program(self, block):
             bp_op_list + push_sparse_op_list, self.partA_program, 1
         )
         # 2.1. insert partA recv op
-        block_input_flag = "backward_joint_{}_{}@fl_ps".format(2, 1)
+        block_input_flag = f"backward_joint_{2}_{1}@fl_ps"
         grad_to_block_id = block_input_flag + ":" + str(second_block.idx)
         attrs = {
             "message_to_block_id": [grad_to_block_id],
@@ -1584,7 +1584,7 @@ def _get_partB_program(self, block):
         add_send_op(self.ori_main_program, second_block, dense_grad_vars)
 
         # 3. insert partB recv op
-        block_input_flag = "forward_joint_{}_{}@fl_ps".format(1, 2)
+        block_input_flag = f"forward_joint_{1}_{2}@fl_ps"
         grad_to_block_id = block_input_flag + ":" + str(second_block.idx)
         attrs = {
             "message_to_block_id": [grad_to_block_id],
diff --git a/python/paddle/distributed/ps/coordinator.py b/python/paddle/distributed/ps/coordinator.py
index d60649ad486f07..205151202ab96f 100755
--- a/python/paddle/distributed/ps/coordinator.py
+++ b/python/paddle/distributed/ps/coordinator.py
@@ -118,9 +118,7 @@ def set_basic_config(self, role_maker, config, metrics):
         self.startup_program = paddle.static.default_startup_program()
         self._client_ptr = fleet.get_fl_client()
         self._coordinators = self.role_maker._get_coordinator_endpoints()
-        logger.info(
-            "fl-ps > coordinator enpoints: {}".format(self._coordinators)
-        )
+        logger.info(f"fl-ps > coordinator enpoints: {self._coordinators}")
         self.strategy_handlers = {}
         self.exe = None
         self.use_cuda = int(self.config.get("runner.use_gpu"))
@@ -179,9 +177,7 @@ def set_dump_fields(self):
                 str(param).split(":")[0].strip().split()[-1]
                 for param in persist_vars_list
             ]
-            logger.info(
-                "fl-ps > persist_vars_list: {}".format(persist_vars_name)
-            )
+            logger.info(f"fl-ps > persist_vars_list: {persist_vars_name}")
 
             if dump_fields_path is not None:
                 self.main_program._fleet_opt[
@@ -229,7 +225,7 @@ def run(self):
 
     def train_loop(self):
         while self.epoch_idx < self.total_train_epoch:
-            logger.info("fl-ps > curr epoch idx: {}".format(self.epoch_idx))
+            logger.info(f"fl-ps > curr epoch idx: {self.epoch_idx}")
             self.strategy_handlers['train']()
             self.strategy_handlers['save_model']()
             self.barrier()
@@ -240,7 +236,7 @@ def train_loop(self):
             }
             self.push_fl_client_info_sync(state_info)
             strategy_dict = self.pull_fl_strategy()
-            logger.info("fl-ps > recved fl strategy: {}".format(strategy_dict))
+            logger.info(f"fl-ps > recved fl strategy: {strategy_dict}")
             # ......... to implement ...... #
             if strategy_dict['next_state'] == "JOIN":
                 self.strategy_handlers['infer']()
@@ -297,7 +293,7 @@ def callback_train(self):
         epoch_start_time = time.time()
         self.set_dump_fields()
         fetch_info = [
-            "Epoch {} Var {}".format(self.epoch_idx, var_name)
+            f"Epoch {self.epoch_idx} Var {var_name}"
             for var_name in self.metrics
         ]
         self.exe.train_from_dataset(
@@ -316,7 +312,7 @@ def callback_train(self):
 
     def callback_infer(self):
         fetch_info = [
-            "Epoch {} Var {}".format(self.epoch_idx, var_name)
+            f"Epoch {self.epoch_idx} Var {var_name}"
             for var_name in self.metrics
         ]
         self.exe.infer_from_dataset(
@@ -329,7 +325,7 @@ def callback_infer(self):
         )
 
     def callback_save_model(self):
-        model_dir = "{}/{}".format(self.save_model_path, self.epoch_idx)
+        model_dir = f"{self.save_model_path}/{self.epoch_idx}"
         if fleet.is_first_worker() and self.save_model_path:
             if is_distributed_env():
                 fleet.save_persistables(self.exe, model_dir)  # save all params
@@ -341,11 +337,11 @@ def callback_finish(self):
 
     def print_program(self):
         with open(
-            "./{}_worker_main_program.prototxt".format(self.worker_index), 'w+'
+            f"./{self.worker_index}_worker_main_program.prototxt", 'w+'
         ) as f:
             f.write(str(self.main_program))
         with open(
-            "./{}_worker_startup_program.prototxt".format(self.worker_index),
+            f"./{self.worker_index}_worker_startup_program.prototxt",
             'w+',
         ) as f:
             f.write(str(self.startup_program))
diff --git a/python/paddle/distributed/ps/the_one_ps.py b/python/paddle/distributed/ps/the_one_ps.py
index 6e39f8737dd802..af6bb78f541637 100755
--- a/python/paddle/distributed/ps/the_one_ps.py
+++ b/python/paddle/distributed/ps/the_one_ps.py
@@ -430,7 +430,7 @@ def parse_by_optimizer(self, ctx, context):
                 break
 
         if oop is None:
-            raise ValueError("can not find optimizer for {}".format(grad_name))
+            raise ValueError(f"can not find optimizer for {grad_name}")
 
         params = []
         dims = []
@@ -727,7 +727,7 @@ def _set(self, table_proto):
 
         self.common._set(table_proto.common)
 
-        print('new table_name: {}'.format(self.common.table_name))
+        print(f'new table_name: {self.common.table_name}')
         all_table_proto = self.context[
             "user_defined_strategy"
         ].sparse_table_configs
@@ -1096,7 +1096,7 @@ def _set_basic_info(self, context):
         self.with_coordinator = self.role_maker._with_coordinator
         self.coordinator_hosts = []
         if self.with_coordinator:
-            print("fl-ps > all ps addrs: {}".format(self.string_hosts))
+            print(f"fl-ps > all ps addrs: {self.string_hosts}")
             coordinator_endpoints = self.role_maker._get_coordinator_endpoints()
             for idx, ep in enumerate(coordinator_endpoints):
                 ip, port = ep.split(":")
@@ -1192,12 +1192,12 @@ def sync_strategy_envs():
         trainer_config = self.context['trainer']
 
         if self.debug:
-            print("worker_desc: \n{}".format(worker_desc))
+            print(f"worker_desc: \n{worker_desc}")
             print("communicator send_ctx:")
             for key in send_ctx:
-                print("{}: {}".format(key, send_ctx[key]))
+                print(f"{key}: {send_ctx[key]}")
             for key in dense_map:
-                print("{}: {}".format(key, dense_map[key]))
+                print(f"{key}: {dense_map[key]}")
 
         kwargs = {}
         kwargs['need_global_step'] = "0"
@@ -1215,9 +1215,9 @@ def sync_strategy_envs():
         self._worker.init_worker(worker_desc, self.string_hosts, self.role_id)
         if not self.is_heter_ps_mode:
             self.trainer_endpoint = get_trainer_endpoint(self.role_maker)
-            print("fl-ps > trainer_endpoint: {}".format(self.trainer_endpoint))
-        print("fl-ps > with_coordinator? {}".format(self.with_coordinator))
-        print("fl-ps > coordinator addr: {}".format(self.coordinator_hosts))
+            print(f"fl-ps > trainer_endpoint: {self.trainer_endpoint}")
+        print(f"fl-ps > with_coordinator? {self.with_coordinator}")
+        print(f"fl-ps > coordinator addr: {self.coordinator_hosts}")
         if self.with_coordinator:
             self._worker.init_fl_worker(
                 self.coordinator_hosts, self.role_id, self.trainer_endpoint
@@ -1325,8 +1325,8 @@ def _init_coordinator(self, scopes=None):
         if self._coordinator is None:
             self._coordinator = Coordinator(self.string_hosts)
 
-        print(">>> curr node ip: {}".format(self.coordinator_hosts[0]))
-        print(">>> all trainer endpoints: {}".format(self.trainer_endpoints))
+        print(f">>> curr node ip: {self.coordinator_hosts[0]}")
+        print(f">>> all trainer endpoints: {self.trainer_endpoints}")
         self._coordinator.start_coordinator(
             self.coordinator_hosts[0], self.trainer_endpoints
         )
@@ -1344,7 +1344,7 @@ def _init_server(self, dirname=None, var_names=None, **kwargs):
             trainers += len(self.role_maker._get_heter_worker_endpoints())
 
         if self.debug:
-            print("server_desc: \n{}".format(server_desc))
+            print(f"server_desc: \n{server_desc}")
 
         self._server = core.DistFleetWrapper()
         self._server.init_server(
diff --git a/python/paddle/distributed/ps/utils/collective_transpiler.py b/python/paddle/distributed/ps/utils/collective_transpiler.py
index ea6f23de48d973..90910829fb507d 100644
--- a/python/paddle/distributed/ps/utils/collective_transpiler.py
+++ b/python/paddle/distributed/ps/utils/collective_transpiler.py
@@ -781,7 +781,7 @@ def _insert_fuse_allreduce_ops(self):
                     # insert coalesce tensor
                     tmp_var = block.create_var(
                         name=unique_name.generate(
-                            'FusedOutput_{}'.format(segment[0].name)
+                            f'FusedOutput_{segment[0].name}'
                         ),
                         dtype=segment[0].dtype,
                         persistable=False,
diff --git a/python/paddle/distributed/ps/utils/public.py b/python/paddle/distributed/ps/utils/public.py
index 2ce9e12f3193ce..1fb5174e9c630b 100755
--- a/python/paddle/distributed/ps/utils/public.py
+++ b/python/paddle/distributed/ps/utils/public.py
@@ -571,7 +571,7 @@ def get_the_one_send_context(attrs, split_dense_table=False, ep_list=None):
     send_ctx = {}
     trainer_id = get_role_id(attrs['role_maker'])
     origin_programs = attrs['origin_main_programs']
-    print("is_heter_ps_mode? {}".format(split_dense_table))
+    print(f"is_heter_ps_mode? {split_dense_table}")
 
     idx = 0
     distibuted_varnames = get_sparse_tablenames(origin_programs, True)
@@ -589,7 +589,7 @@ def get_the_one_send_context(attrs, split_dense_table=False, ep_list=None):
 
             splited_varname = []
             for i in range(len(ep_list)):
-                splited_varname.append("{}.block{}".format(param_name, i))
+                splited_varname.append(f"{param_name}.block{i}")
 
             is_distributed = (
                 True if param_name in distibuted_varnames else False
@@ -1169,7 +1169,7 @@ def get_communicate_var_info(
         shape = var.shape
         recv_var_dim = -1 * reduce(lambda x, y: x * y, shape)
         input_var_reshape_dim.append(recv_var_dim)
-        input_var_reshape_name.append("{}.input_reshape@Heter".format(name))
+        input_var_reshape_name.append(f"{name}.input_reshape@Heter")
 
     info = {
         "input_var_reshape_dim": input_var_reshape_dim,
diff --git a/python/paddle/distributed/rpc/rpc.py b/python/paddle/distributed/rpc/rpc.py
index 01298356f7f2b7..e1e2de7c7b04ab 100644
--- a/python/paddle/distributed/rpc/rpc.py
+++ b/python/paddle/distributed/rpc/rpc.py
@@ -67,7 +67,7 @@ def _gen_endpoint():
     node = Node()
     ip = node.get_host_ip()
     free_port = node.get_free_port()
-    return "{}:{}".format(ip, free_port)
+    return f"{ip}:{free_port}"
 
 
 def init_rpc(name, rank=None, world_size=None, master_endpoint=None):
@@ -103,7 +103,7 @@ def init_rpc(name, rank=None, world_size=None, master_endpoint=None):
     worker_endpoint = os.getenv("PADDLE_WORKER_ENDPOINT", None)
     if worker_endpoint is None:
         worker_endpoint = _gen_endpoint()
-    logger.info("Trainer {}: worker endpoint: {}".format(rank, worker_endpoint))
+    logger.info(f"Trainer {rank}: worker endpoint: {worker_endpoint}")
     master_endpoint = (
         master_endpoint
         if master_endpoint is not None
@@ -135,7 +135,7 @@ def init_rpc(name, rank=None, world_size=None, master_endpoint=None):
     # ensure that all the workers are started
     _barrier_never_timeout(rank, world_size)
     core.rpc_start_client()
-    logger.info("Trainer {}: Init RPC done!".format(rank))
+    logger.info(f"Trainer {rank}: Init RPC done!")
 
 
 def rpc_sync(to, fn, args=None, kwargs=None, timeout=_DEFAULT_RPC_TIMEOUT):
@@ -293,7 +293,7 @@ def shutdown():
     _barrier_never_timeout(rank, world_size)
     core.rpc_stop_worker()
     _del_barrier_store()
-    logger.info("Trainer {}: rpc shutdown!".format(rank))
+    logger.info(f"Trainer {rank}: rpc shutdown!")
 
 
 def get_worker_info(name):
diff --git a/python/paddle/distributed/sharding/group_sharded.py b/python/paddle/distributed/sharding/group_sharded.py
index 0cea2d851ad50a..2bbc93259eaa87 100644
--- a/python/paddle/distributed/sharding/group_sharded.py
+++ b/python/paddle/distributed/sharding/group_sharded.py
@@ -226,7 +226,7 @@ def save_group_sharded_model(model, output, optimizer=None):
     )
     assert not os.path.isfile(
         output
-    ), "Saving directory ({}) should be a directory, not a file".format(output)
+    ), f"Saving directory ({output}) should be a directory, not a file"
     os.makedirs(output, exist_ok=True)
     output_model = os.path.join(output, "model.pdmodel")
     if isinstance(model, GroupShardedStage2):
diff --git a/python/paddle/distributed/transpiler/collective.py b/python/paddle/distributed/transpiler/collective.py
index b60ae1266e3c9b..06c2b679d4da32 100644
--- a/python/paddle/distributed/transpiler/collective.py
+++ b/python/paddle/distributed/transpiler/collective.py
@@ -953,7 +953,7 @@ def _insert_fuse_allreduce_ops(self):
                     # insert coalesce tensor
                     tmp_var = block.create_var(
                         name=unique_name.generate(
-                            'FusedOutput_{}'.format(segment[0].name)
+                            f'FusedOutput_{segment[0].name}'
                         ),
                         dtype=segment[0].dtype,
                         persistable=False,
diff --git a/python/paddle/distributed/transpiler/distribute_transpiler.py b/python/paddle/distributed/transpiler/distribute_transpiler.py
index 359ffc520db174..35d851815f4b85 100644
--- a/python/paddle/distributed/transpiler/distribute_transpiler.py
+++ b/python/paddle/distributed/transpiler/distribute_transpiler.py
@@ -392,7 +392,7 @@ def _transpile_nccl2(
 
             for i in range(1, self.config.nccl_comm_num):
                 startup_program.global_block().create_var(
-                    name="NCCLID_{}".format(i),
+                    name=f"NCCLID_{i}",
                     persistable=True,
                     type=core.VarDesc.VarType.RAW,
                 )
@@ -400,12 +400,12 @@ def _transpile_nccl2(
             if self.config.use_hierarchical_allreduce:
                 for i in range(0, self.config.nccl_comm_num):
                     startup_program.global_block().create_var(
-                        name="Hierarchical_inter_NCCLID_{}".format(i),
+                        name=f"Hierarchical_inter_NCCLID_{i}",
                         persistable=True,
                         type=core.VarDesc.VarType.RAW,
                     )
                     startup_program.global_block().create_var(
-                        name="Hierarchical_exter_NCCLID_{}".format(i),
+                        name=f"Hierarchical_exter_NCCLID_{i}",
                         persistable=True,
                         type=core.VarDesc.VarType.RAW,
                     )
@@ -805,7 +805,7 @@ def transpile(
 
                 if self.config.completely_not_async and self.trainer_num > 1:
                     send_varnames = [
-                        "{}.trainer_{}".format(var.name, self.trainer_id)
+                        f"{var.name}.trainer_{self.trainer_id}"
                         for var in splited_vars
                     ]
                 else:
diff --git a/python/paddle/distributed/utils/launch_utils.py b/python/paddle/distributed/utils/launch_utils.py
index 9ea9475ce710ef..9e2d39469aa16f 100644
--- a/python/paddle/distributed/utils/launch_utils.py
+++ b/python/paddle/distributed/utils/launch_utils.py
@@ -179,10 +179,10 @@ def trainers_endpoints(self):
     def pods_endpoints(self):
         r = []
         for pod in self.pods:
-            ep = "{}:{}".format(pod.addr, pod.port)
+            ep = f"{pod.addr}:{pod.port}"
             assert (
                 pod.port is not None and pod.addr is not None
-            ), "{} not a valid endpoint".format(ep)
+            ), f"{ep} not a valid endpoint"
             r.append(ep)
 
         return r
@@ -200,7 +200,7 @@ def __init__(self):
         self.endpoint = None
 
     def __str__(self):
-        return "{}".format(self.endpoint)
+        return f"{self.endpoint}"
 
     def __eq__(self, j):
         return self.endpint == j.endpoint
@@ -268,20 +268,16 @@ def __eq__(self, pod):
             or self.addr != pod.addr
             or self.port != pod.port
         ):
-            logger.debug("pod {} != {}".format(self, pod))
+            logger.debug(f"pod {self} != {pod}")
             return False
 
         if len(self.trainers) != len(pod.trainers):
-            logger.debug(
-                "trainers {} != {}".format(self.trainers, pod.trainers)
-            )
+            logger.debug(f"trainers {self.trainers} != {pod.trainers}")
             return False
 
         for i in range(len(self.trainers)):
             if self.trainers[i] != pod.trainers[i]:
-                logger.debug(
-                    "trainer {} != {}".format(self.trainers[i], pod.trainers[i])
-                )
+                logger.debug(f"trainer {self.trainers[i]} != {pod.trainers[i]}")
                 return False
 
         return True
@@ -295,9 +291,9 @@ def parse_response(self, res_pods):
     def get_visible_gpus(self):
         r = ""
         for g in self.gpus:
-            r += "{},".format(g)
+            r += f"{g},"
 
-        assert r != "", "this pod {} can't see any gpus".format(self)
+        assert r != "", f"this pod {self} can't see any gpus"
 
         r = r[:-1]
         return r
@@ -336,7 +332,7 @@ def terminate_local_procs(procs):
             p.proc.terminate()
             if p.log_fn:
                 p.log_fn.close()
-            logger.debug("terminate process id:{}".format(p.proc.pid))
+            logger.debug(f"terminate process id:{p.proc.pid}")
 
     # wait all process terminiated
     time.sleep(3)
@@ -380,7 +376,7 @@ def add_arguments(argname, type, default, help, argparser, **kwargs):
         default=default,
         type=type,
         help=help + ' Default: %(default)s.',
-        **kwargs
+        **kwargs,
     )
 
 
@@ -481,15 +477,15 @@ def start_local_trainers(
         proc_env = _prepare_trainer_env(cluster, t)
         current_env.update(proc_env)
 
-        logger.debug("trainer proc env:{}".format(current_env))
+        logger.debug(f"trainer proc env:{current_env}")
 
         cmd = [sys.executable, "-u", training_script] + training_script_args
 
-        logger.info("start trainer proc:{} env:{}".format(cmd, proc_env))
+        logger.info(f"start trainer proc:{cmd} env:{proc_env}")
 
         fn = None
         if log_dir is not None:
-            os.system("mkdir -p {}".format(log_dir))
+            os.system(f"mkdir -p {log_dir}")
             fn = open("%s/workerlog.%d" % (log_dir, idx), "a")
             proc = subprocess.Popen(cmd, env=current_env, stdout=fn, stderr=fn)
         else:
@@ -572,5 +568,5 @@ def watch_local_trainers(procs, nranks):
 def _print_arguments(args):
     print("-----------  Configuration Arguments -----------")
     for arg, value in sorted(vars(args).items()):
-        print("%s: %s" % (arg, value))
+        print(f"{arg}: {value}")
     print("------------------------------------------------")
diff --git a/python/paddle/fft.py b/python/paddle/fft.py
index ac6bd0b68e54ba..1ce18f120c19e4 100644
--- a/python/paddle/fft.py
+++ b/python/paddle/fft.py
@@ -63,12 +63,10 @@ def _check_normalization(norm):
 def _check_fft_n(n):
     if not isinstance(n, int):
         raise ValueError(
-            "Invalid FFT argument n({}), it shoule be an integer.".format(n)
+            f"Invalid FFT argument n({n}), it shoule be an integer."
         )
     if n <= 0:
-        raise ValueError(
-            "Invalid FFT argument n({}), it should be positive.".format(n)
-        )
+        raise ValueError(f"Invalid FFT argument n({n}), it should be positive.")
 
 
 def _check_fft_shape(x, s):
@@ -85,17 +83,13 @@ def _check_fft_shape(x, s):
         )
     for size in s:
         if not isinstance(size, int) or size <= 0:
-            raise ValueError(
-                "FFT sizes {} contains invalid value ({})".format(s, size)
-            )
+            raise ValueError(f"FFT sizes {s} contains invalid value ({size})")
 
 
 def _check_fft_axis(x, axis):
     ndim = x.ndim
     if not isinstance(axis, int):
-        raise ValueError(
-            "Invalid FFT axis ({}), it shoule be an integer.".format(axis)
-        )
+        raise ValueError(f"Invalid FFT axis ({axis}), it shoule be an integer.")
     if axis < -ndim or axis >= ndim:
         raise ValueError(
             "Invalid FFT axis ({}), it should be in range [-{}, {})".format(
@@ -166,9 +160,7 @@ def _normalize_axes(x, axes):
 
 def _check_at_least_ndim(x, rank):
     if x.ndim < rank:
-        raise ValueError(
-            "The rank of the input ({}) should >= {}".format(x.ndim, rank)
-        )
+        raise ValueError(f"The rank of the input ({x.ndim}) should >= {rank}")
 
 
 # public APIs 1d
diff --git a/python/paddle/fluid/tests/cpp_extension/test_cpp_extension_jit.py b/python/paddle/fluid/tests/cpp_extension/test_cpp_extension_jit.py
index 1dd5c96954682e..9ed330a2b4ac7f 100644
--- a/python/paddle/fluid/tests/cpp_extension/test_cpp_extension_jit.py
+++ b/python/paddle/fluid/tests/cpp_extension/test_cpp_extension_jit.py
@@ -114,7 +114,7 @@ def _test_nullable_tensor(self):
         np.testing.assert_array_equal(
             x,
             x_np,
-            err_msg='extension out: {},\n numpy out: {}'.format(x, x_np),
+            err_msg=f'extension out: {x},\n numpy out: {x_np}',
         )
 
     def _test_optional_tensor(self):
@@ -127,7 +127,7 @@ def _test_optional_tensor(self):
         np.testing.assert_array_equal(
             x,
             x_np,
-            err_msg='extension out: {},\n numpy out: {}'.format(x, x_np),
+            err_msg=f'extension out: {x},\n numpy out: {x_np}',
         )
 
 
diff --git a/python/paddle/fluid/tests/cpp_extension/test_cpp_extension_setup.py b/python/paddle/fluid/tests/cpp_extension/test_cpp_extension_setup.py
index 29280008dfc3a0..5c8c91ed303566 100644
--- a/python/paddle/fluid/tests/cpp_extension/test_cpp_extension_setup.py
+++ b/python/paddle/fluid/tests/cpp_extension/test_cpp_extension_setup.py
@@ -225,7 +225,7 @@ def _test_nullable_tensor(self):
         np.testing.assert_array_equal(
             x,
             x_np,
-            err_msg='extension out: {},\n numpy out: {}'.format(x, x_np),
+            err_msg=f'extension out: {x},\n numpy out: {x_np}',
         )
 
     def _test_optional_tensor(self):
@@ -240,7 +240,7 @@ def _test_optional_tensor(self):
         np.testing.assert_array_equal(
             x,
             x_np,
-            err_msg='extension out: {},\n numpy out: {}'.format(x, x_np),
+            err_msg=f'extension out: {x},\n numpy out: {x_np}',
         )
 
     def _test_static(self):
diff --git a/python/paddle/fluid/tests/unittests/ascend_multi_process_collective.py b/python/paddle/fluid/tests/unittests/ascend_multi_process_collective.py
index 113c4286f352ec..572e6caa1d7f4a 100644
--- a/python/paddle/fluid/tests/unittests/ascend_multi_process_collective.py
+++ b/python/paddle/fluid/tests/unittests/ascend_multi_process_collective.py
@@ -39,9 +39,7 @@ def train(prefix):
     )
 
     print(details)
-    with open(
-        "multi_process_{}.check_{}.log".format(prefix, trainer_id), "w"
-    ) as f:
+    with open(f"multi_process_{prefix}.check_{trainer_id}.log", "w") as f:
         f.write(details)
 
 
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py b/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py
index 6585daa784ef80..cd11f2fabf7707 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/auto_parallel_relaunch_with_gpt_planner.py
@@ -151,7 +151,7 @@ def train():
                 },
                 fetch_list=[loss],
             )
-            print("step: %s, loss: %f" % (step, loss_print[0]))
+            print(f"step: {step}, loss: {loss_print[0]:f}")
         else:
             exe.run(
                 distributed_main_program,
@@ -163,7 +163,7 @@ def train():
                     "loss_mask": loss_mask,
                 },
             )
-            print("step: %s, loss: %s" % (step, "None"))
+            print("step: {}, loss: {}".format(step, "None"))
 
 
 if __name__ == "__main__":
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/quantization_pass_unittest.py b/python/paddle/fluid/tests/unittests/auto_parallel/quantization_pass_unittest.py
index 953c29206b47e9..c744f583904dbf 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel/quantization_pass_unittest.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel/quantization_pass_unittest.py
@@ -139,7 +139,7 @@ def check_export(self, exe):
 
         path_prefix = os.path.join(
             self.temp_dir.name,
-            'inf_dist{}'.format(paddle.distributed.get_rank()),
+            f'inf_dist{paddle.distributed.get_rank()}',
         )
         [
             inference_program,
diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py b/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py
index 9e5f4198d58a71..1ef9634f8db2a3 100644
--- a/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py
+++ b/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py
@@ -162,7 +162,7 @@ def test_mlp_dp(self):
         label = np.random.random(size=(80, 1)).astype('float32')
         for step in range(20):
             if step == 10:
-                path = "./output_dp{}".format(paddle.distributed.get_rank())
+                path = f"./output_dp{paddle.distributed.get_rank()}"
                 os.makedirs(path, exist_ok=True)
                 save_distributed_checkpoint(dist_main_prog, path, path)
 
@@ -196,7 +196,7 @@ def test_mlp_dp(self):
             )
 
         self.assertEqual(last_res, res[0])
-        shutil.rmtree("./output_dp{}".format(paddle.distributed.get_rank()))
+        shutil.rmtree(f"./output_dp{paddle.distributed.get_rank()}")
 
     def test_mlp_mp(self):
         global _global_parallel_strategy
@@ -214,7 +214,7 @@ def test_mlp_mp(self):
         label = np.random.random(size=(80, 1)).astype('float32')
         for step in range(20):
             if step == 10:
-                path = "./output_mp{}".format(paddle.distributed.get_rank())
+                path = f"./output_mp{paddle.distributed.get_rank()}"
                 os.makedirs(path, exist_ok=True)
                 save_distributed_checkpoint(dist_main_prog, path, path)
 
@@ -248,7 +248,7 @@ def test_mlp_mp(self):
             )
 
         self.assertEqual(last_res, res[0])
-        shutil.rmtree("./output_mp{}".format(paddle.distributed.get_rank()))
+        shutil.rmtree(f"./output_mp{paddle.distributed.get_rank()}")
 
     def test_mlp_pp(self):
         global _global_parallel_strategy
@@ -270,7 +270,7 @@ def test_mlp_pp(self):
         label = np.random.random(size=(80, 1)).astype('float32')
         for step in range(20):
             if step == 10:
-                path = "./output_pp{}".format(paddle.distributed.get_rank())
+                path = f"./output_pp{paddle.distributed.get_rank()}"
                 os.makedirs(path, exist_ok=True)
                 save_distributed_checkpoint(dist_main_prog, path, path)
 
@@ -325,7 +325,7 @@ def test_mlp_pp(self):
 
         if paddle.distributed.get_rank() in [1]:
             self.assertEqual(last_res, res[0])
-        shutil.rmtree("./output_pp{}".format(paddle.distributed.get_rank()))
+        shutil.rmtree(f"./output_pp{paddle.distributed.get_rank()}")
 
 
 if __name__ == "__main__":
diff --git a/python/paddle/fluid/tests/unittests/benchmark.py b/python/paddle/fluid/tests/unittests/benchmark.py
index e782969a32b32e..e8dcffe3ca564c 100644
--- a/python/paddle/fluid/tests/unittests/benchmark.py
+++ b/python/paddle/fluid/tests/unittests/benchmark.py
@@ -87,8 +87,8 @@ def timeit_output(self, iters=100):
             elapses.append(self.timeit_output_with_place(place, iters))
         for place, elapse in zip(places, elapses):
             print(
-                "One pass of ({2}_op) at {0} cost {1}".format(
-                    str(place), elapse, self.op_type
+                "One pass of ({}_op) at {} cost {}".format(
+                    self.op_type, str(place), elapse
                 )
             )
 
@@ -111,7 +111,7 @@ def timeit_grad(self, iters=100):
             elapses.append(self.timeit_grad_with_place(place, iters))
         for place, elapse in zip(places, elapses):
             print(
-                "One pass of ({2}_grad_op) at {0} cost {1}".format(
-                    str(place), elapse, self.op_type
+                "One pass of ({}_grad_op) at {} cost {}".format(
+                    self.op_type, str(place), elapse
                 )
             )
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_auto_checkpoint_dist_basic.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_auto_checkpoint_dist_basic.py
index 0abec955868d20..d21a9d3625ebd1 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_auto_checkpoint_dist_basic.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_auto_checkpoint_dist_basic.py
@@ -88,7 +88,7 @@ def test_distributed_basic(self):
         for i in acp.train_epoch_range(3, 0):
             o = acp._get_train_epoch_range()
             name = o.name
-            logger.info("_run_save_0 name:{} epoch_no:{}".format(o.name, i))
+            logger.info(f"_run_save_0 name:{o.name} epoch_no:{i}")
 
             for data in data_loader():
                 fetch = exe.run(
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py
index e68cc2006a46b9..1f917f937c0eb4 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_communicator_half_async.py
@@ -139,7 +139,7 @@ def runTest(self):
         os.environ["TRAINING_ROLE"] = "PSERVER"
         _python = sys.executable
 
-        ps_cmd = "{} {}".format(_python, server_file)
+        ps_cmd = f"{_python} {server_file}"
         ps_proc = subprocess.Popen(
             ps_cmd.strip().split(" "),
             stdout=subprocess.PIPE,
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_hdfs1.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_hdfs1.py
index 10b500893da483..3de5e04a249d70 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_hdfs1.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_hdfs1.py
@@ -34,18 +34,16 @@ def test_timeout(self):
         fs.mkdirs(dst)
         fs.mkdirs(dst + "/" + src)
         output = ""
-        cmd = "{} -mv {} {}".format(fs._base_cmd, src, dst)
+        cmd = f"{fs._base_cmd} -mv {src} {dst}"
         try:
             fs.mv(src, dst, test_exists=False)
-            self.assertFalse(
-                1, "can't execute cmd:{} output:{}".format(cmd, output)
-            )
+            self.assertFalse(1, f"can't execute cmd:{cmd} output:{output}")
         except FSTimeOut as e:
-            print("execute mv {} to {} timeout".format(src, dst))
+            print(f"execute mv {src} to {dst} timeout")
 
         ret, output = fluid.core.shell_execute_cmd(cmd, 6 * 1000, 2 * 1000)
         self.assertNotEqual(ret, 0)
-        print("second mv ret:{} output:{}".format(ret, output))
+        print(f"second mv ret:{ret} output:{output}")
 
     def test_is_dir(self):
         fs = HDFSClient(
diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_qat.py b/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_qat.py
index 538eb50aafa414..5457e97b27df95 100644
--- a/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_qat.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/test_parallel_dygraph_qat.py
@@ -80,14 +80,14 @@ def start_local_trainers(
 
         current_env.update(proc_env)
 
-        print("trainer proc env:{}".format(current_env))
+        print(f"trainer proc env:{current_env}")
 
         if os.getenv('WITH_COVERAGE', 'OFF') == 'ON':
             cmd = "python -m coverage run --branch -p " + training_script
         else:
             cmd = "python -u " + training_script
 
-        print("start trainer proc:{} env:{}".format(cmd, proc_env))
+        print(f"start trainer proc:{cmd} env:{proc_env}")
 
         fn = None
 
@@ -130,7 +130,7 @@ def run_2gpu(self, target_file_name, eager_mode=True):
             alive = watch_local_trainers(procs, cluster.trainers_endpoints())
 
             if not alive:
-                print("Local procs complete, POD info:{}".format(pod))
+                print(f"Local procs complete, POD info:{pod}")
                 break
             time.sleep(3)
 
diff --git a/python/paddle/fluid/tests/unittests/collective/test_communication_api_base.py b/python/paddle/fluid/tests/unittests/collective/test_communication_api_base.py
index 250c97493c2d8e..fdb5841128932b 100644
--- a/python/paddle/fluid/tests/unittests/collective/test_communication_api_base.py
+++ b/python/paddle/fluid/tests/unittests/collective/test_communication_api_base.py
@@ -64,11 +64,11 @@ def tearDown(self):
             temp_log_dir_name = os.path.basename(self._log_dir.name)
             dir_name = os.path.join(self._save_log_dir, temp_log_dir_name)
             if not os.path.isdir(dir_name):
-                print("The running logs will copy to {}".format(dir_name))
+                print(f"The running logs will copy to {dir_name}")
                 shutil.copytree(self._log_dir.name, dir_name)
             else:
                 raise RuntimeError(
-                    "Directory {} exists, failed to save log.".format(dir_name)
+                    f"Directory {dir_name} exists, failed to save log."
                 )
 
 
diff --git a/python/paddle/fluid/tests/unittests/collective/test_gen_nccl_id_op.py b/python/paddle/fluid/tests/unittests/collective/test_gen_nccl_id_op.py
index de761ea93597b5..36f00327cc1653 100644
--- a/python/paddle/fluid/tests/unittests/collective/test_gen_nccl_id_op.py
+++ b/python/paddle/fluid/tests/unittests/collective/test_gen_nccl_id_op.py
@@ -40,7 +40,7 @@ def run_gen_ncc_id(attr):
 
         for i in range(1, nccl_comm_num):
             startup_program.global_block().create_var(
-                name="NCCLID_{}".format(i),
+                name=f"NCCLID_{i}",
                 persistable=True,
                 type=core.VarDesc.VarType.RAW,
             )
@@ -48,12 +48,12 @@ def run_gen_ncc_id(attr):
         if use_hallreduce:
             for i in range(0, nccl_comm_num):
                 startup_program.global_block().create_var(
-                    name="Hierarchical_inter_NCCLID_{}".format(i),
+                    name=f"Hierarchical_inter_NCCLID_{i}",
                     persistable=True,
                     type=core.VarDesc.VarType.RAW,
                 )
                 startup_program.global_block().create_var(
-                    name="Hierarchical_exter_NCCLID_{}".format(i),
+                    name=f"Hierarchical_exter_NCCLID_{i}",
                     persistable=True,
                     type=core.VarDesc.VarType.RAW,
                 )
@@ -89,7 +89,7 @@ def gen_nccl_id(self, nranks=2):
         port = self._dist_ut_port_0
         trainers = []
         for i in range(nranks):
-            trainers.append('127.0.0.1:{}'.format(port + i))
+            trainers.append(f'127.0.0.1:{port + i}')
 
         attr = {
             "trainers": trainers,
diff --git a/python/paddle/fluid/tests/unittests/ctr_dataset_reader.py b/python/paddle/fluid/tests/unittests/ctr_dataset_reader.py
index 7120fd512ac8a5..4fdc4430242f57 100644
--- a/python/paddle/fluid/tests/unittests/ctr_dataset_reader.py
+++ b/python/paddle/fluid/tests/unittests/ctr_dataset_reader.py
@@ -183,10 +183,10 @@ def prepare_fake_data(file_nums=4, file_lines=500):
     Create fake data with same type as avazu_ctr_data
     """
     file_dir = tempfile.mkdtemp()
-    warnings.warn("Fake data write in {}".format(file_dir))
+    warnings.warn(f"Fake data write in {file_dir}")
     for file_index in range(file_nums):
         with open(
-            os.path.join(file_dir, "ctr_train_data_part_{}".format(file_index)),
+            os.path.join(file_dir, f"ctr_train_data_part_{file_index}"),
             'w+',
         ) as fin:
             file_str = ""
@@ -194,9 +194,7 @@ def prepare_fake_data(file_nums=4, file_lines=500):
             for line_index in range(file_lines - 1):
                 file_str += gen_fake_line()
             fin.write(file_str)
-            warnings.warn(
-                "Write done ctr_train_data_part_{}".format(file_index)
-            )
+            warnings.warn(f"Write done ctr_train_data_part_{file_index}")
 
     file_list = [os.path.join(file_dir, x) for x in os.listdir(file_dir)]
     assert len(file_list) == file_nums
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
index 19c99f1c0fe8e7..c290dbff3a5d85 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py
@@ -208,7 +208,7 @@ def do_distributed_testing(self, fleet):
             self.test_reader.reset()
 
         pass_time = time.time() - pass_start
-        message = "Distributed Test Succeed, Using Time {}\n".format(pass_time)
+        message = f"Distributed Test Succeed, Using Time {pass_time}\n"
         fleet.util.print_on_rank(message, 0)
 
     def do_pyreader_training(self, fleet):
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py
index 178084cba6501f..5dd23f1352559e 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py
@@ -162,7 +162,7 @@ def do_dataset_training(self, fleet):
         batch_size = 128
 
         filelist = fleet.util.get_file_shard(train_file_list)
-        print("filelist: {}".format(filelist))
+        print(f"filelist: {filelist}")
 
         # config dataset
         dataset = fluid.DatasetFactory().create_dataset()
@@ -186,7 +186,7 @@ def do_dataset_training(self, fleet):
                 debug=int(os.getenv("Debug", "0")),
             )
             pass_time = time.time() - pass_start
-            print("do_dataset_training done. using time {}".format(pass_time))
+            print(f"do_dataset_training done. using time {pass_time}")
         exe.close()
 
     def do_dataset_heter_training(self, fleet):
@@ -212,7 +212,7 @@ def do_dataset_heter_training(self, fleet):
         )
         exe.close()
         pass_time = time.time() - pass_start
-        print("do_dataset_heter_training done. using time {}".format(pass_time))
+        print(f"do_dataset_heter_training done. using time {pass_time}")
 
         # for epoch_id in range(1):
         #    pass_start = time.time()
diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py
index a8f068a8c1586f..a36a5b7b7328fb 100644
--- a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py
+++ b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py
@@ -92,7 +92,7 @@ def net(self, args, batch_size=4, lr=0.01):
         elif initializer == 2:
             init = paddle.nn.initializer.Normal()
         else:
-            raise ValueError("error initializer code: {}".format(initializer))
+            raise ValueError(f"error initializer code: {initializer}")
 
         entry = paddle.distributed.ShowClickEntry("show", "click")
         dnn_layer_dims = [128, 64, 32]
diff --git a/python/paddle/fluid/tests/unittests/distributed_fused_lamb_test_base.py b/python/paddle/fluid/tests/unittests/distributed_fused_lamb_test_base.py
index a1a9bb05f917f3..ee61f8cf04bf30 100644
--- a/python/paddle/fluid/tests/unittests/distributed_fused_lamb_test_base.py
+++ b/python/paddle/fluid/tests/unittests/distributed_fused_lamb_test_base.py
@@ -220,7 +220,7 @@ def pd_dtype_to_np_dtype(pd_dtype):
         elif pd_dtype == paddle.float16:
             return np.float16
         else:
-            raise ValueError("supported dtype {}".format(pd_dtype))
+            raise ValueError(f"supported dtype {pd_dtype}")
 
     def gen_random_grad_tensor(grad):
         np_dtype = pd_dtype_to_np_dtype(grad.dtype)
diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/auto_parallel_pass_test_base.py b/python/paddle/fluid/tests/unittests/distributed_passes/auto_parallel_pass_test_base.py
index f30ce4827888b1..8743e944a33fab 100644
--- a/python/paddle/fluid/tests/unittests/distributed_passes/auto_parallel_pass_test_base.py
+++ b/python/paddle/fluid/tests/unittests/distributed_passes/auto_parallel_pass_test_base.py
@@ -106,9 +106,7 @@ def _run_gpu_main(self, model, apply_pass, dump_file, **kwargs):
                     fetch_values = exe.run(main_prog, fetch_list=outputs)
                     if paddle.distributed.get_rank() == 0:
                         output_dict = OrderedDict(zip(outputs, fetch_values))
-                        print(
-                            'batch {}, outputs {}'.format(batch_id, output_dict)
-                        )
+                        print(f'batch {batch_id}, outputs {output_dict}')
                     all_fetch_values.append(fetch_values)
                     batch_id += 1
                 except paddle.fluid.core.EOFException:
diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/dist_pass_test_base.py b/python/paddle/fluid/tests/unittests/distributed_passes/dist_pass_test_base.py
index bb9cb4a8860cdb..70b5356071d58e 100644
--- a/python/paddle/fluid/tests/unittests/distributed_passes/dist_pass_test_base.py
+++ b/python/paddle/fluid/tests/unittests/distributed_passes/dist_pass_test_base.py
@@ -158,7 +158,7 @@ def _run_gpu_main(self, model, apply_pass, dump_file, **kwargs):
                 fetch_values = exe.run(main_prog, feed=feed, fetch_list=outputs)
                 if paddle.distributed.get_rank() == 0:
                     output_dict = OrderedDict(zip(outputs, fetch_values))
-                    print('batch {}, outputs {}'.format(batch_id, output_dict))
+                    print(f'batch {batch_id}, outputs {output_dict}')
                 all_fetch_values.append(fetch_values)
         with open(dump_file, "wb") as f:
             pickle.dump(all_fetch_values, f)
@@ -186,9 +186,9 @@ def _distributed_launch(self, model, apply_pass, gpus=None, **kwargs):
 
         pid = os.getpid()
         if apply_pass:
-            output_dir = "test_with_pass_{}".format(pid)
+            output_dir = f"test_with_pass_{pid}"
         else:
-            output_dir = "test_without_pass_{}".format(pid)
+            output_dir = f"test_without_pass_{pid}"
         remove_path_if_exists(output_dir)
         os.makedirs(output_dir, mode=0o777)
 
@@ -252,7 +252,7 @@ def _distributed_launch(self, model, apply_pass, gpus=None, **kwargs):
 
             results = []
             for i in range(num_gpus):
-                dump_file = '{0}/{1}.bin'.format(output_dir, i)
+                dump_file = f'{output_dir}/{i}.bin'
                 self.assertTrue(
                     os.path.exists(dump_file),
                     "Pass test failed with apply_pass = {}, please view log in {}".format(
diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/pass_run_main.py b/python/paddle/fluid/tests/unittests/distributed_passes/pass_run_main.py
index 54b21815dcce6a..eefd437116e5f1 100644
--- a/python/paddle/fluid/tests/unittests/distributed_passes/pass_run_main.py
+++ b/python/paddle/fluid/tests/unittests/distributed_passes/pass_run_main.py
@@ -72,7 +72,7 @@ def run_main(args):
     with open(args.input_file, "rb") as f:
         kwargs = pickle.load(f)
 
-    output_file = "{}/{}.bin".format(args.output_dir, rank)
+    output_file = f"{args.output_dir}/{rank}.bin"
     if args.model_file:
         with open(args.model_file, "rb") as f:
             model = pickle.load(f)
diff --git a/python/paddle/fluid/tests/unittests/distribution/parameterize.py b/python/paddle/fluid/tests/unittests/distribution/parameterize.py
index 324a5e4c6a0cda..a2ee301cef55f3 100644
--- a/python/paddle/fluid/tests/unittests/distribution/parameterize.py
+++ b/python/paddle/fluid/tests/unittests/distribution/parameterize.py
@@ -134,7 +134,7 @@ def delete_patches_if_need(func):
 
 def default_name_func(func, num, p):
     base_name = func.__name__
-    name_suffix = "_%s" % (num,)
+    name_suffix = f"_{num}"
 
     if len(p.args) > 0 and isinstance(p.args[0], str):
         name_suffix += "_" + to_safe_name(p.args[0])
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/decos.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/decos.py
index dcea6e82fe339b..d8bf86e931bd46 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/decos.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/decos.py
@@ -32,7 +32,7 @@ def deco2(x=0):
     def inner_deco(func):
         @wraps(func)
         def inner(*args, **kwargs):
-            print('in decos.deco2, added {}'.format(x))
+            print(f'in decos.deco2, added {x}')
             _t = paddle.to_tensor(x)
             _tt = func(*args, **kwargs)
             return paddle.add(_t, _tt)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
index 81bc871a33123b..925da4140a239c 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py
@@ -583,9 +583,7 @@ def reader():
                     (video_feat, gt_iou_map, gt_start, gt_end, video_idx)
                 )
             else:
-                raise NotImplementedError(
-                    'mode {} not implemented'.format(mode)
-                )
+                raise NotImplementedError(f'mode {mode} not implemented')
             if len(batch_out) == args.batch_size:
                 yield batch_out
                 batch_out = []
@@ -627,7 +625,7 @@ def val_bmn(model, args):
         ]
 
         print(
-            '[VALID] iter {} '.format(batch_id)
+            f'[VALID] iter {batch_id} '
             + '\tLoss = {}, \ttem_loss = {}, \tpem_reg_loss = {}, \tpem_cls_loss = {}'.format(
                 '%f' % float(avg_loss),
                 '%f' % float(tem_loss),
@@ -726,7 +724,7 @@ def train_bmn(self, args, place, to_static):
                         batch_id % args.log_interval == 0
                     ):
                         print(
-                            '[TRAIN] Epoch {}, iter {} '.format(epoch, batch_id)
+                            f'[TRAIN] Epoch {epoch}, iter {batch_id} '
                             + '\tLoss = {}, \ttem_loss = {}, \tpem_reg_loss = {}, \tpem_cls_loss = {}'.format(
                                 '%f' % float(avg_loss),
                                 '%f' % float(tem_loss),
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_build_strategy.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_build_strategy.py
index 13fb22421d3658..39f4504375467c 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_build_strategy.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_build_strategy.py
@@ -45,7 +45,7 @@ def verify_predict(self):
             dy_pre,
             st_pre,
             rtol=1e-05,
-            err_msg='dy_pre:\n {}\n, st_pre: \n{}.'.format(dy_pre, st_pre),
+            err_msg=f'dy_pre:\n {dy_pre}\n, st_pre: \n{st_pre}.',
         )
         np.testing.assert_allclose(
             dy_jit_pre,
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_container.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_container.py
index f18b4093ebfeca..34da0ebc2c71f4 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_container.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_container.py
@@ -99,7 +99,7 @@ def _run(self, to_static):
                 load_out,
                 out,
                 rtol=1e-05,
-                err_msg='load_out is {}\\st_out is {}'.format(load_out, out),
+                err_msg=f'load_out is {load_out}\\st_out is {out}',
             )
 
         return out
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py
index a84932fb9b9de5..3568ab104d8798 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py
@@ -708,7 +708,7 @@ def test_train(self):
 
         self.assertTrue(
             assert_func(dy_out, st_out),
-            msg="dy_out:\n {}\n st_out:\n{}".format(dy_out, st_out),
+            msg=f"dy_out:\n {dy_out}\n st_out:\n{st_out}",
         )
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_decorator_transform.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_decorator_transform.py
index 946718da74c32f..6add36fd9e09ed 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_decorator_transform.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_decorator_transform.py
@@ -54,7 +54,7 @@ def deco3(x=3):
     def inner_deco(func):
         @wraps(func)
         def inner(*args, **kwargs):
-            print('in deco3, added {}'.format(x))
+            print(f'in deco3, added {x}')
             _t = paddle.to_tensor(x)
             _tt = func(*args, **kwargs)
             return paddle.add(_t, _tt)
@@ -68,7 +68,7 @@ def deco4(func=None, x=0):
     def decorated(pyfunc):
         @wraps(pyfunc)
         def inner_deco(*args, **kwargs):
-            print('in deco4, added {}'.format(x))
+            print(f'in deco4, added {x}')
             _t = paddle.to_tensor(x)
             _tt = pyfunc(*args, **kwargs)
             return paddle.add(_t, _tt)
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_error.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_error.py
index e16966d875b8f9..ace50c9fc6b180 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_error.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_error.py
@@ -259,7 +259,7 @@ def set_message(self):
                 self.filepath
             ),
             'inner_func()',
-            'File "{}", line 28, in inner_func'.format(self.filepath),
+            f'File "{self.filepath}", line 28, in inner_func',
             'def inner_func():',
             'paddle.tensor.fill_constant(shape=[1, 2], value=9, dtype="int")',
             '<--- HERE',
@@ -312,7 +312,7 @@ def set_exception_type(self):
 
     def set_message(self):
         self.expected_message = [
-            'File "{}", line 91, in forward'.format(self.filepath),
+            f'File "{self.filepath}", line 91, in forward',
             '@paddle.jit.to_static',
             'def forward(self):',
             'self.test_func()',
@@ -376,7 +376,7 @@ def set_exception_type(self):
 
     def set_message(self):
         self.expected_message = [
-            'File "{}", line 80, in forward'.format(self.filepath),
+            f'File "{self.filepath}", line 80, in forward',
             'def forward(self, x):',
             'y = self._linear(x)',
             'z = paddle.tensor.fill_constant(shape=[1, 2], value=9, dtype="int")',
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_isinstance.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_isinstance.py
index e53284e0e909e2..709b9aaca29273 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_isinstance.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_isinstance.py
@@ -107,7 +107,7 @@ def _test_model(self, model):
             dy_out,
             st_out,
             rtol=1e-05,
-            err_msg='dy_out:\n {}\n st_out:\n{}'.format(dy_out, st_out),
+            err_msg=f'dy_out:\n {dy_out}\n st_out:\n{st_out}',
         )
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_layer_hook.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_layer_hook.py
index 250f81b99535fd..55f4681a990055 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_layer_hook.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_layer_hook.py
@@ -99,7 +99,7 @@ def test_hook(self):
             st_out,
             load_out,
             rtol=1e-05,
-            err_msg='load_out is {}\nstatic_res is {}'.format(load_out, st_out),
+            err_msg=f'load_out is {load_out}\nstatic_res is {st_out}',
         )
 
 
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
index 73e5825186629a..48041926604e71 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py
@@ -670,7 +670,7 @@ def assert_same_loss(self, model_name):
             dy_out,
             st_out,
             rtol=1e-05,
-            err_msg='dy_out: {}, st_out: {}'.format(dy_out, st_out),
+            err_msg=f'dy_out: {dy_out}, st_out: {st_out}',
         )
 
     def assert_same_predict(self, model_name):
@@ -693,7 +693,7 @@ def assert_same_predict(self, model_name):
             dy_pre,
             st_pre,
             rtol=1e-05,
-            err_msg='dy_pre:\n {}\n, st_pre: \n{}.'.format(dy_pre, st_pre),
+            err_msg=f'dy_pre:\n {dy_pre}\n, st_pre: \n{st_pre}.',
         )
         np.testing.assert_allclose(
             dy_jit_pre,
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_print.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_print.py
index 86a4fcb7a112a8..1f8ab755f8939f 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_print.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_print.py
@@ -39,7 +39,7 @@ def dyfunc_print_ndarray(x):
 @to_static
 def dyfunc_print_with_format(x):
     x_t = paddle.to_tensor(x)
-    print("PrintTensor: {}".format(x_t))
+    print(f"PrintTensor: {x_t}")
 
 
 # 4. print Tensor with format 2
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
index 761e580b1147f8..7738bcee08e857 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
@@ -394,7 +394,7 @@ def verify_predict(self):
             dy_pre,
             st_pre,
             rtol=1e-05,
-            err_msg='dy_pre:\n {}\n, st_pre: \n{}.'.format(dy_pre, st_pre),
+            err_msg=f'dy_pre:\n {dy_pre}\n, st_pre: \n{st_pre}.',
         )
         np.testing.assert_allclose(
             dy_jit_pre,
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py
index 5c79f8882619a6..bf332809ff8f0e 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py
@@ -393,7 +393,7 @@ def verify_predict(self):
             dy_pre,
             st_pre,
             rtol=1e-05,
-            err_msg='dy_pre:\n {}\n, st_pre: \n{}.'.format(dy_pre, st_pre),
+            err_msg=f'dy_pre:\n {dy_pre}\n, st_pre: \n{st_pre}.',
         )
         np.testing.assert_allclose(
             dy_jit_pre,
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
index e718a6a8a72695..b6c90418d29103 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
@@ -537,7 +537,7 @@ def verify_predict(self):
             dy_pre,
             st_pre,
             rtol=1e-05,
-            err_msg='dy_pre:\n {}\n, st_pre: \n{}.'.format(dy_pre, st_pre),
+            err_msg=f'dy_pre:\n {dy_pre}\n, st_pre: \n{st_pre}.',
         )
         np.testing.assert_allclose(
             dy_jit_pre,
@@ -573,25 +573,25 @@ def test_check_result(self):
             pred_1,
             pred_2,
             rtol=1e-05,
-            err_msg='static pred: {} \ndygraph pred: {}'.format(pred_1, pred_2),
+            err_msg=f'static pred: {pred_1} \ndygraph pred: {pred_2}',
         )
         np.testing.assert_allclose(
             loss_1,
             loss_2,
             rtol=1e-05,
-            err_msg='static loss: {} \ndygraph loss: {}'.format(loss_1, loss_2),
+            err_msg=f'static loss: {loss_1} \ndygraph loss: {loss_2}',
         )
         np.testing.assert_allclose(
             acc1_1,
             acc1_2,
             rtol=1e-05,
-            err_msg='static acc1: {} \ndygraph acc1: {}'.format(acc1_1, acc1_2),
+            err_msg=f'static acc1: {acc1_1} \ndygraph acc1: {acc1_2}',
         )
         np.testing.assert_allclose(
             acc5_1,
             acc5_2,
             rtol=1e-05,
-            err_msg='static acc5: {} \ndygraph acc5: {}'.format(acc5_1, acc5_2),
+            err_msg=f'static acc5: {acc5_1} \ndygraph acc5: {acc5_2}',
         )
 
         self.verify_predict()
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py
index b730c39625ca52..5de0d47387931a 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_sentiment.py
@@ -382,7 +382,7 @@ def train_model(self, model_type='cnn_net'):
             dy_out,
             st_out,
             rtol=1e-05,
-            err_msg='dy_out:\n {}\n st_out:\n {}'.format(dy_out, st_out),
+            err_msg=f'dy_out:\n {dy_out}\n st_out:\n {st_out}',
         )
 
     def test_train(self):
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_slice.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_slice.py
index d0837245460030..b4fd5f25d7ca80 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_slice.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_slice.py
@@ -120,7 +120,7 @@ def setUp(self):
         paddle.disable_static()
 
     def init_input(self):
-        self.input = np.random.random((3)).astype('int32')
+        self.input = np.random.random(3).astype('int32')
 
     def init_dygraph_func(self):
         self.dygraph_func = test_slice_without_control_flow
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/tsm_config_utils.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/tsm_config_utils.py
index 1332e0cb86bf6e..1b74bf64cfc89e 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/tsm_config_utils.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/tsm_config_utils.py
@@ -64,7 +64,7 @@ def create_attr_dict(yaml_config):
 
 
 def merge_configs(cfg, sec, args_dict):
-    assert sec in CONFIG_SECS, "invalid config section {}".format(sec)
+    assert sec in CONFIG_SECS, f"invalid config section {sec}"
     sec_dict = getattr(cfg, sec.upper())
     for k, v in args_dict.items():
         if v is None:
@@ -78,11 +78,9 @@ def merge_configs(cfg, sec, args_dict):
 
 
 def print_configs(cfg, mode):
-    logger.info(
-        "---------------- {:>5} Arguments ----------------".format(mode)
-    )
+    logger.info(f"---------------- {mode:>5} Arguments ----------------")
     for sec, sec_items in cfg.items():
-        logger.info("{}:".format(sec))
+        logger.info(f"{sec}:")
         for k, v in sec_items.items():
-            logger.info("    {}:{}".format(k, v))
+            logger.info(f"    {k}:{v}")
     logger.info("-------------------------------------------------")
diff --git a/python/paddle/fluid/tests/unittests/eager_op_test.py b/python/paddle/fluid/tests/unittests/eager_op_test.py
index f8f6c8023da81e..983531f83be1ac 100644
--- a/python/paddle/fluid/tests/unittests/eager_op_test.py
+++ b/python/paddle/fluid/tests/unittests/eager_op_test.py
@@ -828,7 +828,7 @@ def create_var(
             if (name not in np_list) and var_proto.dispensable:
                 continue
             if name not in np_list:
-                assert var_proto.intermediate, "{} not found".format(name)
+                assert var_proto.intermediate, f"{name} not found"
                 v = block.create_var(
                     dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR
                 )
@@ -839,7 +839,7 @@ def create_var(
             if var_proto.duplicable:
                 assert isinstance(
                     np_list[name], list
-                ), "Duplicable {} should be set as list".format(name)
+                ), f"Duplicable {name} should be set as list"
                 var_list = []
                 slot_name = name
                 for (name, np_value) in np_list[slot_name]:
@@ -1545,7 +1545,7 @@ def find_imperative_actual(target_name, dygraph_outs, place):
                         return dygraph_outs[name][i]
             self.assertTrue(
                 False,
-                "Found failed {} {}".format(dygraph_outs.keys(), target_name),
+                f"Found failed {dygraph_outs.keys()} {target_name}",
             )
 
         def find_imperative_expect(target_name, dygraph_outs, place):
@@ -1558,7 +1558,7 @@ def find_imperative_expect(target_name, dygraph_outs, place):
                         return dygraph_outs[name][i]
             self.assertTrue(
                 False,
-                "Found failed {} {}".format(dygraph_outs.keys(), target_name),
+                f"Found failed {dygraph_outs.keys()} {target_name}",
             )
 
         def find_actual(target_name, fetch_list):
@@ -1568,7 +1568,7 @@ def find_actual(target_name, fetch_list):
                 if var_name == target_name
             ]
             self.assertTrue(
-                len(found) == 1, "Found {} {}".format(len(found), target_name)
+                len(found) == 1, f"Found {len(found)} {target_name}"
             )
             return found[0]
 
@@ -1579,7 +1579,7 @@ def find_expect(target_name, fetch_list):
                 if var_name == target_name
             ]
             self.assertTrue(
-                len(found) == 1, "Found {} {}".format(len(found), target_name)
+                len(found) == 1, f"Found {len(found)} {target_name}"
             )
             return found[0]
 
@@ -1995,7 +1995,7 @@ def find_fetch_index(target_name, fetch_list):
             else:
                 self.assertTrue(
                     len(found) == 1,
-                    "Found {} {}".format(len(found), target_name),
+                    f"Found {len(found)} {target_name}",
                 )
                 return found[0]
 
diff --git a/python/paddle/fluid/tests/unittests/find_ports.py b/python/paddle/fluid/tests/unittests/find_ports.py
index 868efe0ee8e9d0..66a3c465c786df 100644
--- a/python/paddle/fluid/tests/unittests/find_ports.py
+++ b/python/paddle/fluid/tests/unittests/find_ports.py
@@ -24,14 +24,14 @@ def train():
     worker_endpoints = worker_endpoints_env
     trainers_num = len(worker_endpoints.split(','))
 
-    name = "worker_endpoints:{}".format(worker_endpoints)
+    name = f"worker_endpoints:{worker_endpoints}"
 
     print(name)
     file_name = os.getenv("PADDLE_LAUNCH_LOG")
     if file_name is None or file_name == "":
         print("can't find PADDLE_LAUNCH_LOG")
         sys.exit(1)
-    with open("{}_{}.log".format(file_name, trainer_id), "w") as f:
+    with open(f"{file_name}_{trainer_id}.log", "w") as f:
         f.write(name)
 
 
diff --git a/python/paddle/fluid/tests/unittests/hccl_tools.py b/python/paddle/fluid/tests/unittests/hccl_tools.py
index 1ae52f2845e80d..f839da2a786ebb 100644
--- a/python/paddle/fluid/tests/unittests/hccl_tools.py
+++ b/python/paddle/fluid/tests/unittests/hccl_tools.py
@@ -96,7 +96,7 @@ def main():
 
     # visible_devices
     visible_devices = args.visible_devices.split(',')
-    print('visible_devices:{}'.format(visible_devices))
+    print(f'visible_devices:{visible_devices}')
 
     # server_id
     ip = get_host_ip()
@@ -106,14 +106,14 @@ def main():
         server_id = ip
     else:
         raise ValueError("please input server ip!")
-    print('server_id:{}'.format(server_id))
+    print(f'server_id:{server_id}')
 
     # device_num
     first_num = int(args.device_num[1])
     last_num = int(args.device_num[3])
     if first_num < 0 or last_num > 8:
         raise ValueError(
-            "device num {} must be in range [0,8] !".format(args.device_num)
+            f"device num {args.device_num} must be in range [0,8] !"
         )
     if first_num > last_num:
         raise ValueError(
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py b/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
index 49ff0e5685bd49..6cb997a14c2a9c 100755
--- a/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
@@ -318,7 +318,7 @@ def run_test(self, quant=False, *args, **kwargs):
                 except Exception as e:
                     self.fail_log(
                         self.inference_config_str(pred_config)
-                        + '\033[1;31m \nERROR INFO: {}\033[0m'.format(str(e))
+                        + f'\033[1;31m \nERROR INFO: {str(e)}\033[0m'
                     )
                     if not ignore_flag:
                         status = False
@@ -351,7 +351,7 @@ def check_op_version(self):
             if pass_name not in self.available_passes_in_framework:
                 continue
             if not PassVersionChecker.IsCompatible(pass_name):
-                self.fail_log('{} version check failed.'.format(pass_name))
+                self.fail_log(f'{pass_name} version check failed.')
                 status = False
         return status
 
@@ -429,7 +429,7 @@ def run_test(prog_config):
         loop_func = given(generator())(run_test)
         if reproduce is not None:
             loop_func = reproduce(loop_func)
-        logging.info("Start to running test of {}".format(type(self)))
+        logging.info(f"Start to running test of {type(self)}")
         loop_func()
         logging.info(
             "===================Statistical Information==================="
@@ -439,11 +439,9 @@ def run_test(prog_config):
                 self.num_ran_programs + self.num_invalid_programs
             )
         )
-        logging.info(
-            "Number of Invalid Programs: {}".format(self.num_invalid_programs)
-        )
-        logging.info("Number of Ran Programs: {}".format(self.num_ran_programs))
-        logging.info("Number of Ignore Tests: {}".format(self.num_ignore_tests))
+        logging.info(f"Number of Invalid Programs: {self.num_invalid_programs}")
+        logging.info(f"Number of Ran Programs: {self.num_ran_programs}")
+        logging.info(f"Number of Ignore Tests: {self.num_ignore_tests}")
         successful_ran_programs = int(
             self.num_ran_programs
             - self.num_ignore_tests / max(self.num_predictor_kinds, 1)
@@ -554,7 +552,7 @@ def run_test(self, quant=False, prog_configs=None):
                 except Exception as e:
                     self.fail_log(
                         self.inference_config_str(pred_config)
-                        + '\033[1;31m \nERROR INFO: {}\033[0m'.format(str(e))
+                        + f'\033[1;31m \nERROR INFO: {str(e)}\033[0m'
                     )
                     if not ignore_flag:
                         status = False
@@ -868,7 +866,7 @@ def random_to_skip():
                 except Exception as e:
                     self.fail_log(
                         self.inference_config_str(pred_config)
-                        + '\033[1;31m \nERROR INFO: {}\033[0m'.format(str(e))
+                        + f'\033[1;31m \nERROR INFO: {str(e)}\033[0m'
                     )
                     all_passes = False
 
@@ -957,7 +955,7 @@ def run_test(self, quant=False, *args, **kwargs):
                 except Exception as e:
                     self.fail_log(
                         self.inference_config_str(pred_config)
-                        + '\033[1;31m \nERROR INFO: {}\033[0m'.format(str(e))
+                        + f'\033[1;31m \nERROR INFO: {str(e)}\033[0m'
                     )
                     if not ignore_flag:
                         status = False
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py
index 9923d2cb2da2c7..886f46dbfd76e6 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py
@@ -93,9 +93,7 @@ def check_output(self):
             elif self.precision == AnalysisConfig.Precision.Half:
                 atol, rtol = (1e-3, 1e-3)
             else:
-                raise ValueError(
-                    "Unsupported precision {}".format(self.precision)
-                )
+                raise ValueError(f"Unsupported precision {self.precision}")
             self.check_output_with_option(use_gpu, atol=atol, rtol=rtol)
             self.assertTrue(
                 PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py
index 9abbac9ec76d9f..0885ff6acc319a 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py
@@ -95,9 +95,7 @@ def check_output(self):
             elif self.precision == AnalysisConfig.Precision.Half:
                 atol, rtol = (1e-3, 1e-3)
             else:
-                raise ValueError(
-                    "Unsupported precision {}".format(self.precision)
-                )
+                raise ValueError(f"Unsupported precision {self.precision}")
             self.check_output_with_option(use_gpu, atol=atol, rtol=rtol)
             self.assertTrue(
                 PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_xpu_embedding_with_eltwise_add_xpu_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_xpu_embedding_with_eltwise_add_xpu_fuse_pass.py
index e4d545934136a6..0882af986a7653 100644
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_xpu_embedding_with_eltwise_add_xpu_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_xpu_embedding_with_eltwise_add_xpu_fuse_pass.py
@@ -128,7 +128,7 @@ def generate_input(*args, **kwargs):
         def gen_lookup_table_inputs_data(*args, **kwargs):
             inputs = {}
             for i in range(lookup_table_num):
-                input_name = "lookup_table_ids_{}".format(i)
+                input_name = f"lookup_table_ids_{i}"
                 inputs[input_name] = TensorConfig(
                     data_gen=partial(generate_input)
                 )
@@ -140,7 +140,7 @@ def gen_lookup_table_inputs_data(*args, **kwargs):
         def gen_lookup_table_weights_data():
             weights = {}
             for i in range(lookup_table_num):
-                w_name = "lookup_table_w_{}".format(i)
+                w_name = f"lookup_table_w_{i}"
                 weights[w_name] = TensorConfig(shape=w_shape)
             return weights
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_bilinear_interp_v2_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_bilinear_interp_v2_mkldnn_op.py
index 88a415a903c0a5..f0e0c95d0f9c5a 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_bilinear_interp_v2_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_bilinear_interp_v2_mkldnn_op.py
@@ -222,7 +222,7 @@ class TestBf16Case(parent):
         def init_data_type(self):
             self.dtype = np.uint16
 
-    TestBf16Case.__name__ = "{0}_{1}".format(parent.__name__, "BF16")
+    TestBf16Case.__name__ = "{}_{}".format(parent.__name__, "BF16")
     globals()[TestBf16Case.__name__] = TestBf16Case
 
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_clip_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_clip_mkldnn_op.py
index 3b9a979a7ff6d4..b26523b159b5de 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_clip_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_clip_mkldnn_op.py
@@ -131,7 +131,7 @@ def test_check_grad(self):
                 check_dygraph=False,
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "BF16")
+    cls_name = "{}_{}".format(parent.__name__, "BF16")
     TestClipBF16OneDNNOp.__name__ = cls_name
     globals()[cls_name] = TestClipBF16OneDNNOp
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_int8_mkldnn_op.py
index 239348a7ceb7bd..1978764d0043fb 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_concat_int8_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_concat_int8_mkldnn_op.py
@@ -109,9 +109,9 @@ def init_shape(self):
             self.x1_shape = [2, 3, 5, 6]
             self.x2_shape = [2, 3, 5, 7]
 
-    cls_name_1 = "{0}_axis_{1}".format(parent.__name__, "1")
-    cls_name_2 = "{0}_axis_{1}".format(parent.__name__, "2")
-    cls_name_3 = "{0}_axis_{1}".format(parent.__name__, "3")
+    cls_name_1 = "{}_axis_{}".format(parent.__name__, "1")
+    cls_name_2 = "{}_axis_{}".format(parent.__name__, "2")
+    cls_name_3 = "{}_axis_{}".format(parent.__name__, "3")
     TestAxis1Case.__name__ = cls_name_1
     TestAxis2Case.__name__ = cls_name_2
     TestAxis3Case.__name__ = cls_name_3
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py
index cff36d2c341fb8..bed1724308b7b6 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py
@@ -385,15 +385,15 @@ class TestU8S8ResCase(parent):
         def init_data_type(self):
             init_data_type_with_fusion(self, np.uint8, "", True)
 
-    cls_name_s8u8 = "{0}_relu_{1}_residual_0".format(parent.__name__, "1")
-    cls_name_s8s8 = "{0}_relu_{1}_residual_0".format(parent.__name__, "0")
-    cls_name_u8s8 = "{0}_relu_{1}_residual_0".format(parent.__name__, "0")
-    cls_name_u8u8 = "{0}_relu_{1}_residual_0".format(parent.__name__, "1")
+    cls_name_s8u8 = "{}_relu_{}_residual_0".format(parent.__name__, "1")
+    cls_name_s8s8 = "{}_relu_{}_residual_0".format(parent.__name__, "0")
+    cls_name_u8s8 = "{}_relu_{}_residual_0".format(parent.__name__, "0")
+    cls_name_u8u8 = "{}_relu_{}_residual_0".format(parent.__name__, "1")
 
-    cls_name_s8s8_re_1 = "{0}_relu_{1}_residual_{2}".format(
+    cls_name_s8s8_re_1 = "{}_relu_{}_residual_{}".format(
         parent.__name__, "0", "1"
     )
-    cls_name_u8s8_re_1 = "{0}_relu_{1}_residual_{2}".format(
+    cls_name_u8s8_re_1 = "{}_relu_{}_residual_{}".format(
         parent.__name__, "0", "1"
     )
     TestS8U8Case.__name__ = cls_name_s8u8
@@ -416,7 +416,7 @@ class TestS8U8ResCase(parent):
             def init_data_type(self):
                 init_data_type_with_fusion(self, np.int8, "relu", True)
 
-        cls_name_s8u8_re_1 = "{0}_relu_{1}_residual_{2}".format(
+        cls_name_s8u8_re_1 = "{}_relu_{}_residual_{}".format(
             parent.__name__, "1", "1"
         )
         TestS8U8ResCase.__name__ = cls_name_s8u8_re_1
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py
index 2d500a2e4fb244..225bebaec0c7ec 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py
@@ -145,7 +145,7 @@ def test_check_grad(self):
                 user_defined_grad_outputs=[self.dout],
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Expand_v2_BF16")
+    cls_name = "{}_{}".format(parent.__name__, "Expand_v2_BF16")
     TestExpandV2BF16OneDNNOp.__name__ = cls_name
     globals()[cls_name] = TestExpandV2BF16OneDNNOp
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_flatten_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_flatten_mkldnn_op.py
index 8eceaec1db3f03..7acc73f66f8cf7 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_flatten_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_flatten_mkldnn_op.py
@@ -110,7 +110,7 @@ def test_check_grad(self):
                 user_defined_grad_outputs=[self.dout],
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Flatten2_BF16")
+    cls_name = "{}_{}".format(parent.__name__, "Flatten2_BF16")
     TestFlatten2BF16OneDNNOp.__name__ = cls_name
     globals()[cls_name] = TestFlatten2BF16OneDNNOp
 
@@ -145,7 +145,7 @@ def test_check_grad(self):
                 user_defined_grad_outputs=[convert_float_to_uint16(self.dout)],
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Flatten_BF16")
+    cls_name = "{}_{}".format(parent.__name__, "Flatten_BF16")
     TestFlattenBF16OneDNNOp.__name__ = cls_name
     globals()[cls_name] = TestFlattenBF16OneDNNOp
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_v2_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_v2_mkldnn_op.py
index f824576a28ab55..1e6a276f2fe4e3 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_v2_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_matmul_v2_mkldnn_op.py
@@ -446,7 +446,7 @@ def calculate_grads(self):
 
             self.dout = dout
 
-    cls_name = "{0}_{1}".format(parent.__name__, "BF16")
+    cls_name = "{}_{}".format(parent.__name__, "BF16")
     TestMatMulV2Bf16OneDNNOp.__name__ = cls_name
     globals()[cls_name] = TestMatMulV2Bf16OneDNNOp
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_nearest_interp_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_nearest_interp_mkldnn_op.py
index 0a3ba9fa518314..3a0a62d8195b34 100755
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_nearest_interp_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_nearest_interp_mkldnn_op.py
@@ -187,8 +187,8 @@ class TestUint8Case(parent):
         def init_data_type(self):
             self.dtype = np.uint8
 
-    TestInt8Case.__name__ = "{0}_{1}".format(parent.__name__, "INT8")
-    TestUint8Case.__name__ = "{0}_{1}".format(parent.__name__, "UINT8")
+    TestInt8Case.__name__ = "{}_{}".format(parent.__name__, "INT8")
+    TestUint8Case.__name__ = "{}_{}".format(parent.__name__, "UINT8")
     globals()[TestInt8Case.__name__] = TestInt8Case
     globals()[TestUint8Case.__name__] = TestUint8Case
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_nearest_interp_v2_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_nearest_interp_v2_mkldnn_op.py
index 7a5d8048a58f25..ccdaef6df58882 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_nearest_interp_v2_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_nearest_interp_v2_mkldnn_op.py
@@ -213,9 +213,9 @@ class TestUint8Case(parent):
         def init_data_type(self):
             self.dtype = np.uint8
 
-    TestBf16Case.__name__ = "{0}_{1}".format(parent.__name__, "BF16")
-    TestInt8Case.__name__ = "{0}_{1}".format(parent.__name__, "INT8")
-    TestUint8Case.__name__ = "{0}_{1}".format(parent.__name__, "UINT8")
+    TestBf16Case.__name__ = "{}_{}".format(parent.__name__, "BF16")
+    TestInt8Case.__name__ = "{}_{}".format(parent.__name__, "INT8")
+    TestUint8Case.__name__ = "{}_{}".format(parent.__name__, "UINT8")
     globals()[TestBf16Case.__name__] = TestBf16Case
     globals()[TestInt8Case.__name__] = TestInt8Case
     globals()[TestUint8Case.__name__] = TestUint8Case
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_onnx_format_quantization_mobilenetv1.py b/python/paddle/fluid/tests/unittests/mkldnn/test_onnx_format_quantization_mobilenetv1.py
index 9999c61d053277..bf5c55ae93869d 100755
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_onnx_format_quantization_mobilenetv1.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_onnx_format_quantization_mobilenetv1.py
@@ -195,7 +195,7 @@ def download_data(self, data_urls, data_md5s, folder_name, is_model=True):
             file_name = data_urls[0].split('/')[-1]
             zip_path = os.path.join(self.cache_folder, file_name)
 
-        print('Data is downloaded at {0}'.format(zip_path))
+        print(f'Data is downloaded at {zip_path}')
         self.cache_unzipping(data_cache_folder, zip_path)
         return data_cache_folder
 
@@ -250,7 +250,7 @@ def run_program(
             cnt += len(data)
 
             if (batch_id + 1) % 100 == 0:
-                print("{0} images,".format(batch_id + 1))
+                print(f"{batch_id + 1} images,")
                 sys.stdout.flush()
             if (batch_id + 1) == iterations:
                 break
@@ -311,7 +311,7 @@ def run_test(
         model_cache_folder = self.download_data(data_urls, data_md5s, model)
 
         print(
-            "Start INT8 post training quantization for {0} on {1} images ...".format(
+            "Start INT8 post training quantization for {} on {} images ...".format(
                 model, sample_iterations * batch_size
             )
         )
@@ -327,7 +327,7 @@ def run_test(
         )
 
         print(
-            "Start FP32 inference for {0} on {1} images ...".format(
+            "Start FP32 inference for {} on {} images ...".format(
                 model, infer_iterations * batch_size
             )
         )
@@ -338,7 +338,7 @@ def run_test(
         )
 
         print(
-            "Start INT8 inference for {0} on {1} images ...".format(
+            "Start INT8 inference for {} on {} images ...".format(
                 model, infer_iterations * batch_size
             )
         )
@@ -349,14 +349,14 @@ def run_test(
             is_quantized_model=True,
         )
 
-        print("---Post training quantization of {} method---".format(algo))
+        print(f"---Post training quantization of {algo} method---")
         print(
-            "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.".format(
+            "FP32 {}: batch_size {}, throughput {} images/second, latency {} second, accuracy {}.".format(
                 model, batch_size, fp32_throughput, fp32_latency, fp32_acc1
             )
         )
         print(
-            "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.\n".format(
+            "INT8 {}: batch_size {}, throughput {} images/second, latency {} second, accuracy {}.\n".format(
                 model, batch_size, int8_throughput, int8_latency, int8_acc1
             )
         )
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_int8_mkldnn_op.py
index a17db8c18add32..a9de4b76605f85 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_int8_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_int8_mkldnn_op.py
@@ -119,8 +119,8 @@ class TestU8Case(parent):
         def init_data_type(self):
             self.dtype = np.uint8
 
-    cls_name_s8 = "{0}_{1}".format(parent.__name__, "mkldnn_s8")
-    cls_name_u8 = "{0}_{1}".format(parent.__name__, "mkldnn_u8")
+    cls_name_s8 = "{}_{}".format(parent.__name__, "mkldnn_s8")
+    cls_name_u8 = "{}_{}".format(parent.__name__, "mkldnn_u8")
     TestS8Case.__name__ = cls_name_s8
     TestU8Case.__name__ = cls_name_u8
     globals()[cls_name_s8] = TestS8Case
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py
index 8313767ae9957a..fb571108eae68b 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_pool2d_mkldnn_op.py
@@ -38,7 +38,7 @@ def init_ceil_mode(self):
         def init_data_type(self):
             self.dtype = np.float32
 
-    cls_name = "{0}_{1}".format(parent.__name__, "MKLDNNCeilModeCast")
+    cls_name = "{}_{}".format(parent.__name__, "MKLDNNCeilModeCast")
     TestMKLDNNPool2DUseCeilCase.__name__ = cls_name
     globals()[cls_name] = TestMKLDNNPool2DUseCeilCase
 
@@ -56,7 +56,7 @@ def init_kernel_type(self):
         def init_data_type(self):
             self.dtype = np.float32
 
-    cls_name = "{0}_{1}".format(parent.__name__, "MKLDNNOp")
+    cls_name = "{}_{}".format(parent.__name__, "MKLDNNOp")
     TestMKLDNNCase.__name__ = cls_name
     globals()[cls_name] = TestMKLDNNCase
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_prelu_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_prelu_mkldnn_op.py
index 13f1d0a7be76a0..39e0cd788e576a 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_prelu_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_prelu_mkldnn_op.py
@@ -171,7 +171,7 @@ def test_check_grad(self):
                 check_dygraph=False,
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "BF16")
+    cls_name = "{}_{}".format(parent.__name__, "BF16")
     TestPReluBF16OneDNNOp.__name__ = cls_name
     globals()[cls_name] = TestPReluBF16OneDNNOp
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_reshape_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_reshape_mkldnn_op.py
index 8e607d5a4480f6..701d4ad28a077a 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_reshape_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_reshape_mkldnn_op.py
@@ -216,7 +216,7 @@ def test_check_grad(self):
                 check_dygraph=False,
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Reshape2_BF16")
+    cls_name = "{}_{}".format(parent.__name__, "Reshape2_BF16")
     TestReshape2BF16OneDNNOp.__name__ = cls_name
     globals()[cls_name] = TestReshape2BF16OneDNNOp
 
@@ -242,7 +242,7 @@ def test_check_grad(self):
                 check_dygraph=False,
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Reshape_BF16")
+    cls_name = "{}_{}".format(parent.__name__, "Reshape_BF16")
     TestReshapeBF16OneDNNOp.__name__ = cls_name
     globals()[cls_name] = TestReshapeBF16OneDNNOp
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_slice_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_slice_mkldnn_op.py
index b33ac0e34a0dd2..593d993f8470d8 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_slice_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_slice_mkldnn_op.py
@@ -233,7 +233,7 @@ def test_check_grad(self):
                 user_defined_grad_outputs=[convert_float_to_uint16(self.dout)],
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "BF16")
+    cls_name = "{}_{}".format(parent.__name__, "BF16")
     TestSliceBF16OneDNNOp.__name__ = cls_name
     globals()[cls_name] = TestSliceBF16OneDNNOp
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_split_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_split_mkldnn_op.py
index c1105b2096b671..1cdfcc549e5a68 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_split_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_split_mkldnn_op.py
@@ -151,8 +151,8 @@ def init_data_type(self):
         def test_check_grad(self):
             pass
 
-    TestInt8Case.__name__ = "{0}_{1}".format(parent.__name__, "INT8")
-    TestUint8Case.__name__ = "{0}_{1}".format(parent.__name__, "UINT8")
+    TestInt8Case.__name__ = "{}_{}".format(parent.__name__, "INT8")
+    TestUint8Case.__name__ = "{}_{}".format(parent.__name__, "UINT8")
     globals()[TestInt8Case.__name__] = TestUint8Case
     globals()[TestUint8Case.__name__] = TestInt8Case
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_squeeze2_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_squeeze2_mkldnn_op.py
index 8a4bddc7f512a4..ea914f62438d83 100755
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_squeeze2_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_squeeze2_mkldnn_op.py
@@ -150,7 +150,7 @@ def test_check_grad(self):
                 user_defined_grad_outputs=[self.dout],
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Squeeze2_BF16")
+    cls_name = "{}_{}".format(parent.__name__, "Squeeze2_BF16")
     TestSqueeze2BF16OneDNNOp.__name__ = cls_name
     globals()[cls_name] = TestSqueeze2BF16OneDNNOp
 
@@ -165,7 +165,7 @@ def set_outputs(self):
         def test_check_output(self):
             self.check_output_with_place(core.CPUPlace())
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Squeeze_BF16")
+    cls_name = "{}_{}".format(parent.__name__, "Squeeze_BF16")
     TestSqueezeBF16OneDNNOp.__name__ = cls_name
     globals()[cls_name] = TestSqueezeBF16OneDNNOp
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_stack_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_stack_mkldnn_op.py
index 8fd51798f9d440..9d2996cd7cbb29 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_stack_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_stack_mkldnn_op.py
@@ -35,7 +35,7 @@ def initParameters(self):
     def getInputNames(self):
         input_names = []
         for i in range(self.num_inputs):
-            input_names.append('x{}'.format(i))
+            input_names.append(f'x{i}')
         return input_names
 
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/multi_process.py b/python/paddle/fluid/tests/unittests/multi_process.py
index 48ad6546024670..0a010a6cbd3e79 100644
--- a/python/paddle/fluid/tests/unittests/multi_process.py
+++ b/python/paddle/fluid/tests/unittests/multi_process.py
@@ -34,9 +34,7 @@ def train(prefix):
     )
 
     print(name)
-    with open(
-        "multi_process_{}.check_{}.log".format(prefix, trainer_id), "w"
-    ) as f:
+    with open(f"multi_process_{prefix}.check_{trainer_id}.log", "w") as f:
         f.write(name)
 
 
@@ -62,7 +60,7 @@ def train_abort(prefix):
             )
             print(name)
             with open(
-                "multi_process_{}.check_{}.log".format(prefix, trainer_id), "w"
+                f"multi_process_{prefix}.check_{trainer_id}.log", "w"
             ) as f:
                 f.write(name)
             raise
@@ -78,9 +76,7 @@ def train_abort(prefix):
         )
 
         print(name)
-        with open(
-            "multi_process_{}.check_{}.log".format(prefix, trainer_id), "w"
-        ) as f:
+        with open(f"multi_process_{prefix}.check_{trainer_id}.log", "w") as f:
             f.write(name)
 
 
diff --git a/python/paddle/fluid/tests/unittests/nproc_process.py b/python/paddle/fluid/tests/unittests/nproc_process.py
index 85b9c294c5f392..0c98c4a45dfd61 100644
--- a/python/paddle/fluid/tests/unittests/nproc_process.py
+++ b/python/paddle/fluid/tests/unittests/nproc_process.py
@@ -38,7 +38,7 @@ def train(prefix):
     )
 
     print(name)
-    with open("{}.check_{}.log".format(prefix, trainer_id), "w") as f:
+    with open(f"{prefix}.check_{trainer_id}.log", "w") as f:
         f.write(name)
 
 
diff --git a/python/paddle/fluid/tests/unittests/ps/fl_ps_trainer.py b/python/paddle/fluid/tests/unittests/ps/fl_ps_trainer.py
index 2ce0b6caf255c9..7eacd5a0e33cab 100755
--- a/python/paddle/fluid/tests/unittests/ps/fl_ps_trainer.py
+++ b/python/paddle/fluid/tests/unittests/ps/fl_ps_trainer.py
@@ -28,16 +28,16 @@ def get_dataset(inputs, config, pipe_cmd, role="worker"):
     reader_thread_num = int(config.get('runner.reader_thread_num'))
     dataset.set_thread(reader_thread_num)
     train_files_path = config.get('runner.train_files_path')
-    print('train_data_files:{}'.format(train_files_path))
+    print(f'train_data_files:{train_files_path}')
     file_list = [
         os.path.join(train_files_path, x) for x in os.listdir(train_files_path)
     ]
     if role == "worker":
         file_list = fleet.util.get_file_shard(file_list)
-        print("worker file list: {}".format(file_list))
+        print(f"worker file list: {file_list}")
     elif role == "heter_worker":
         file_list = fleet.util.get_heter_file_shard(file_list)
-        print("heter worker file list: {}".format(file_list))
+        print(f"heter worker file list: {file_list}")
 
     return dataset, file_list
 
diff --git a/python/paddle/fluid/tests/unittests/ps/ps_dnn_trainer.py b/python/paddle/fluid/tests/unittests/ps/ps_dnn_trainer.py
index 2b28aa1c687ecb..fc3dd1072a4b33 100755
--- a/python/paddle/fluid/tests/unittests/ps/ps_dnn_trainer.py
+++ b/python/paddle/fluid/tests/unittests/ps/ps_dnn_trainer.py
@@ -41,7 +41,7 @@
 
 def is_distributed_env():
     node_role = os.getenv("TRAINING_ROLE")
-    print("-- Role: {} --".format(node_role))
+    print(f"-- Role: {node_role} --")
     if node_role is None:
         return False
     else:
@@ -80,7 +80,7 @@ def parse_yaml(self, config):
                     _config = yaml.load(rb.read())
                 return _config
         else:
-            raise ValueError("config {} can not be supported".format(config))
+            raise ValueError(f"config {config} can not be supported")
 
     def get_all_inters_from_yaml(self, file, filters):
         _envs = self.parse_yaml(file)
@@ -122,7 +122,7 @@ def pretty_print_envs(self, envs, header=None):
         h_format = "    " + "|{{:>{}s}}{}{{:^{}s}}|\n".format(
             max_k, " " * spacing, max_v
         )
-        l_format = "    " + "|{{:>{}s}}{{}}{{:^{}s}}|\n".format(max_k, max_v)
+        l_format = "    " + f"|{{:>{max_k}s}}{{}}{{:^{max_v}s}}|\n"
         length = max_k + max_v + spacing
 
         border = "    +" + "".join(["="] * length) + "+"
@@ -148,7 +148,7 @@ def pretty_print_envs(self, envs, header=None):
 
         draws += border
 
-        _str = "\n{}\n".format(draws)
+        _str = f"\n{draws}\n"
         return _str
 
 
@@ -188,7 +188,7 @@ def get_user_defined_strategy(config):
             "accumulate_steps": config.get('runner.micro_num')
         }
     elif sync_mode == "gpubox":
-        print("sync_mode = {}".format(sync_mode))
+        print(f"sync_mode = {sync_mode}")
         strategy = paddle.distributed.fleet.DistributedStrategy()
         strategy.a_sync = True
         strategy.a_sync_configs = {"use_ps_gpu": 1}
@@ -340,9 +340,9 @@ def init_fleet_with_gloo(self, use_gloo=False):
             fleet.init()
 
         if fleet.is_server():
-            print("server: {} started".format(fleet.server_index()))
+            print(f"server: {fleet.server_index()} started")
         else:
-            print("worker: {} started".format(fleet.worker_index()))
+            print(f"worker: {fleet.worker_index()} started")
 
     def run_minimize(self):
         self.init_fleet_with_gloo()
diff --git a/python/paddle/fluid/tests/unittests/ps/static_gpubox_trainer.py b/python/paddle/fluid/tests/unittests/ps/static_gpubox_trainer.py
index 78d1415025176b..5940e3942cac15 100755
--- a/python/paddle/fluid/tests/unittests/ps/static_gpubox_trainer.py
+++ b/python/paddle/fluid/tests/unittests/ps/static_gpubox_trainer.py
@@ -160,7 +160,7 @@ def run_worker(self):
 
             fleet.barrier_worker()
             self.reader.release_memory()
-            logger.info("finish {} epoch training....".format(epoch))
+            logger.info(f"finish {epoch} epoch training....")
         self.PSGPU.finalize()
 
     def init_reader(self):
@@ -180,14 +180,11 @@ def dataset_train_loop(self, epoch):
 
         begin_pass_time = time.time()
         self.PSGPU.begin_pass()
-        print(
-            "begin_pass cost:{} seconds".format(time.time() - begin_pass_time)
-        )
+        print(f"begin_pass cost:{time.time() - begin_pass_time} seconds")
 
-        logger.info("Epoch: {}, Running Dataset Begin.".format(epoch))
+        logger.info(f"Epoch: {epoch}, Running Dataset Begin.")
         fetch_info = [
-            "Epoch {} Var {}".format(epoch, var_name)
-            for var_name in self.metrics
+            f"Epoch {epoch} Var {var_name}" for var_name in self.metrics
         ]
         fetch_vars = [var for _, var in self.metrics.items()]
         print_step = int(self.config.get("runner.print_interval"))
diff --git a/python/paddle/fluid/tests/unittests/rnn/convert.py b/python/paddle/fluid/tests/unittests/rnn/convert.py
index c41f80c7eabbf0..bb0a31058a3ab7 100644
--- a/python/paddle/fluid/tests/unittests/rnn/convert.py
+++ b/python/paddle/fluid/tests/unittests/rnn/convert.py
@@ -56,12 +56,12 @@ def convert_params_for_net_static(np_net, paddle_net, place):
 def get_params_for_cell(np_cell, num_layers, idx):
     state = np_cell.parameters
     weight_list = [
-        ('{}.weight_{}'.format(num_layers, idx), state['weight_ih']),
-        ('{}.weight_{}'.format(num_layers, idx + 1), state['weight_hh']),
+        (f'{num_layers}.weight_{idx}', state['weight_ih']),
+        (f'{num_layers}.weight_{idx + 1}', state['weight_hh']),
     ]
     bias_list = [
-        ('{}.bias_{}'.format(num_layers, idx), state['bias_ih']),
-        ('{}.bias_{}'.format(num_layers, idx + 1), state['bias_hh']),
+        (f'{num_layers}.bias_{idx}', state['bias_ih']),
+        (f'{num_layers}.bias_{idx + 1}', state['bias_hh']),
     ]
     return weight_list, bias_list
 
diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_topk_avg_pooling.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_topk_avg_pooling.py
index 7d0e16a41003c6..4fcb20f1500fd0 100644
--- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_topk_avg_pooling.py
+++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_topk_avg_pooling.py
@@ -95,7 +95,7 @@ def compute(self):
             x_len = x_lod[0][idx]
             self.assertTrue(
                 x_len == channel_num * row_lod[0][idx] * col_lod[0][idx],
-                "x_len: %s can't mod channel_num: %s" % (x_len, channel_num),
+                f"x_len: {x_len} can't mod channel_num: {channel_num}",
             )
             out_tmp = np.zeros((0,), dtype=x_data.dtype)
             pos_tmp = np.zeros((0,), dtype='int32')
diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py
index dfe0efabcc4a07..ebecd2e9601c4e 100644
--- a/python/paddle/fluid/tests/unittests/test_activation_op.py
+++ b/python/paddle/fluid/tests/unittests/test_activation_op.py
@@ -3850,7 +3850,7 @@ class TestActCudnn(parent):
         def init_kernel_type(self):
             self.attrs = {"use_cudnn": True}
 
-    cls_name = "{0}_{1}".format(parent.__name__, "cudnn")
+    cls_name = "{}_{}".format(parent.__name__, "cudnn")
     TestActCudnn.__name__ = cls_name
     globals()[cls_name] = TestActCudnn
 
@@ -3905,7 +3905,7 @@ def test_check_grad(self):
                     max_relative_error=grad_atol,
                 )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "fp16")
+    cls_name = "{}_{}".format(parent.__name__, "fp16")
     TestActFp16.__name__ = cls_name
     globals()[cls_name] = TestActFp16
 
@@ -3984,7 +3984,7 @@ def test_check_grad(self):
                 place, ['X'], 'Out', max_relative_error=grad_atol
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "bf16")
+    cls_name = "{}_{}".format(parent.__name__, "bf16")
     TestActBF16.__name__ = cls_name
     globals()[cls_name] = TestActBF16
 
diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py
index 9d5f775958ca7d..f8861b8445ca73 100644
--- a/python/paddle/fluid/tests/unittests/test_adam_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adam_op.py
@@ -926,7 +926,7 @@ def _test(
             exe = paddle.static.Executor(place)
             exe.run(startup_prog)
 
-            print("Start run on {}".format(place))
+            print(f"Start run on {place}")
             for epoch in range(10):
                 pred_res, loss_res = exe.run(
                     main_prog,
diff --git a/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py b/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py
index ac94efe60ad289..b5a81df5c27ca2 100644
--- a/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py
+++ b/python/paddle/fluid/tests/unittests/test_apply_pass_to_program.py
@@ -208,7 +208,7 @@ def test_main(self):
                 loss_value2 = self.executor.run(
                     main2, feed=feed, fetch_list=[loss2]
                 )[0]
-            self.assertEqual(loss_value1, loss_value2, "batch {}".format(idx))
+            self.assertEqual(loss_value1, loss_value2, f"batch {idx}")
 
 
 if __name__ == "__main__":
diff --git a/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py b/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py
index 424a3e0b199d68..ab68f5fc3a5842 100644
--- a/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py
@@ -290,7 +290,7 @@ def test_case(self):
                 self.run_static(place)
                 self.run_dygraph(place)
 
-    cls_name = "ArgMaxMinTestCase_{}".format(op_type)
+    cls_name = f"ArgMaxMinTestCase_{op_type}"
     ArgMaxMinTestCase.__name__ = cls_name
     globals()[cls_name] = ArgMaxMinTestCase
 
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py
index c2a116abc05804..d1104c2ce5931d 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py
@@ -438,7 +438,7 @@ def test_cluster(self):
         self.assertAlmostEqual(link0_machine1.bandwidth, 1)
         self.assertAlmostEqual(link0_machine1.latency, 0)
 
-        str = "cluster: {}".format(cluster)
+        str = f"cluster: {cluster}"
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_graph.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_graph.py
index ac75e0a9570b4f..b8628f671c022a 100644
--- a/python/paddle/fluid/tests/unittests/test_auto_parallel_graph.py
+++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_graph.py
@@ -71,7 +71,7 @@ def test_graph(self):
 
         self.assertEqual(graph[4][5]["weight"], 0.4)
 
-        str = "{}".format(graph)
+        str = f"{graph}"
         self.assertIsNotNone(str)
 
         self.assertRaises(TypeError, 6 in graph)
diff --git a/python/paddle/fluid/tests/unittests/test_channel_shuffle.py b/python/paddle/fluid/tests/unittests/test_channel_shuffle.py
index 7029e6508b1075..f4a772105a55f1 100644
--- a/python/paddle/fluid/tests/unittests/test_channel_shuffle.py
+++ b/python/paddle/fluid/tests/unittests/test_channel_shuffle.py
@@ -193,9 +193,9 @@ def run_dygraph(self, groups, data_format):
                 result_functional.numpy(), npresult, rtol=1e-05
             )
 
-            channel_shuffle_str = 'groups={}'.format(groups)
+            channel_shuffle_str = f'groups={groups}'
             if data_format != 'NCHW':
-                channel_shuffle_str += ', data_format={}'.format(data_format)
+                channel_shuffle_str += f', data_format={data_format}'
             self.assertEqual(channel_shuffle.extra_repr(), channel_shuffle_str)
 
     def test_dygraph1(self):
diff --git a/python/paddle/fluid/tests/unittests/test_checkpoint_saver.py b/python/paddle/fluid/tests/unittests/test_checkpoint_saver.py
index cd2abeb7016280..b6ee6e28c7b06d 100644
--- a/python/paddle/fluid/tests/unittests/test_checkpoint_saver.py
+++ b/python/paddle/fluid/tests/unittests/test_checkpoint_saver.py
@@ -26,15 +26,15 @@ def test(self):
 
         s = CheckpointSaver(fs)
 
-        fs.mkdirs("{}/exe.exe".format(dir_path))
-        fs.mkdirs("{}/exe.1".format(dir_path))
-        fs.mkdirs("{}/exe".format(dir_path))
+        fs.mkdirs(f"{dir_path}/exe.exe")
+        fs.mkdirs(f"{dir_path}/exe.1")
+        fs.mkdirs(f"{dir_path}/exe")
 
         a = s.get_checkpoint_no(dir_path)
         self.assertEqual(len(a), 0)
 
-        fs.mkdirs("{}/__paddle_checkpoint__.0".format(dir_path))
-        fs.mkdirs("{}/__paddle_checkpoint__.exe".format(dir_path))
+        fs.mkdirs(f"{dir_path}/__paddle_checkpoint__.0")
+        fs.mkdirs(f"{dir_path}/__paddle_checkpoint__.exe")
 
         a = s.get_checkpoint_no(dir_path)
         self.assertEqual(len(a), 1)
diff --git a/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py b/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py
index a4447f031d5511..61c29519d117f3 100644
--- a/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py
+++ b/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py
@@ -25,7 +25,7 @@ def __init__(self, chunk_type, start_idx, end_idx):
         self.end_idx = end_idx
 
     def __str__(self):
-        return '(Segment: %s, %s, %s)' % (
+        return '(Segment: {}, {}, {})'.format(
             self.chunk_type,
             self.start_idx,
             self.end_idx,
diff --git a/python/paddle/fluid/tests/unittests/test_collective_api_base.py b/python/paddle/fluid/tests/unittests/test_collective_api_base.py
index 43987131a53452..550b0e24e487cd 100644
--- a/python/paddle/fluid/tests/unittests/test_collective_api_base.py
+++ b/python/paddle/fluid/tests/unittests/test_collective_api_base.py
@@ -174,7 +174,7 @@ class TestDistBase(unittest.TestCase):
     def setUp(self):
         self._port_set = set()
         self._trainers = 2
-        self._ps_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
+        self._ps_endpoints = "127.0.0.1:{},127.0.0.1:{}".format(
             self._find_free_port(),
             self._find_free_port(),
         )
diff --git a/python/paddle/fluid/tests/unittests/test_collective_base.py b/python/paddle/fluid/tests/unittests/test_collective_base.py
index 9e398909f6b282..1462bc5f99f816 100644
--- a/python/paddle/fluid/tests/unittests/test_collective_base.py
+++ b/python/paddle/fluid/tests/unittests/test_collective_base.py
@@ -145,7 +145,7 @@ class TestDistBase(unittest.TestCase):
     def setUp(self):
         self._port_set = set()
         self._trainers = 2
-        self._ps_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
+        self._ps_endpoints = "127.0.0.1:{},127.0.0.1:{}".format(
             self._find_free_port(),
             self._find_free_port(),
         )
diff --git a/python/paddle/fluid/tests/unittests/test_communicator_geo.py b/python/paddle/fluid/tests/unittests/test_communicator_geo.py
index 6c1125506155b1..031dc2481da004 100644
--- a/python/paddle/fluid/tests/unittests/test_communicator_geo.py
+++ b/python/paddle/fluid/tests/unittests/test_communicator_geo.py
@@ -165,11 +165,11 @@ def runTest(self):
 
         os.environ["TRAINING_ROLE"] = "PSERVER"
         os.environ["PADDLE_PORT"] = str(port)
-        os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:{}".format(port)
+        os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = f"127.0.0.1:{port}"
 
         _python = sys.executable
 
-        ps_cmd = "{} {}".format(_python, server_file)
+        ps_cmd = f"{_python} {server_file}"
 
         ps_proc = subprocess.Popen(
             ps_cmd.strip().split(" "),
diff --git a/python/paddle/fluid/tests/unittests/test_compare_op.py b/python/paddle/fluid/tests/unittests/test_compare_op.py
index d7dc124c5a33b8..9765ebda405b80 100755
--- a/python/paddle/fluid/tests/unittests/test_compare_op.py
+++ b/python/paddle/fluid/tests/unittests/test_compare_op.py
@@ -47,7 +47,7 @@ def test_errors(self):
                 self.assertRaises(TypeError, op, x=x, y=a)
                 self.assertRaises(TypeError, op, x=a, y=y)
 
-    cls_name = "{0}_{1}".format(op_type, typename)
+    cls_name = f"{op_type}_{typename}"
     Cls.__name__ = cls_name
     globals()[cls_name] = Cls
 
@@ -429,7 +429,7 @@ def test_attr_name(self):
                 out = op(x=x, y=y, name="name_%s" % (self.op_type))
             self.assertEqual("name_%s" % (self.op_type) in out.name, True)
 
-    cls_name = "TestCase_{}".format(op_type)
+    cls_name = f"TestCase_{op_type}"
     PaddleCls.__name__ = cls_name
     globals()[cls_name] = PaddleCls
 
@@ -462,7 +462,7 @@ def setUp(self):
         def test_check_output(self):
             self.check_output()
 
-    cls_name = "BF16TestCase_{}".format(op_type)
+    cls_name = f"BF16TestCase_{op_type}"
     TestCompareOpBF16Op.__name__ = cls_name
     globals()[cls_name] = TestCompareOpBF16Op
 
diff --git a/python/paddle/fluid/tests/unittests/test_compare_reduce_op.py b/python/paddle/fluid/tests/unittests/test_compare_reduce_op.py
index 40da5af0b921c5..2f982d3d8aa0b0 100644
--- a/python/paddle/fluid/tests/unittests/test_compare_reduce_op.py
+++ b/python/paddle/fluid/tests/unittests/test_compare_reduce_op.py
@@ -34,7 +34,7 @@ def setUp(self):
         def test_output(self):
             self.check_output()
 
-    cls_name = "{0}_{1}_{2}".format(op_type, typename, 'not_equal_all')
+    cls_name = "{}_{}_{}".format(op_type, typename, 'not_equal_all')
     Cls.__name__ = cls_name
     globals()[cls_name] = Cls
 
@@ -53,7 +53,7 @@ def setUp(self):
         def test_output(self):
             self.check_output()
 
-    cls_name = "{0}_{1}_{2}".format(op_type, typename, 'not_shape_equal_all')
+    cls_name = "{}_{}_{}".format(op_type, typename, 'not_shape_equal_all')
     Cls.__name__ = cls_name
     globals()[cls_name] = Cls
 
@@ -71,7 +71,7 @@ def setUp(self):
         def test_output(self):
             self.check_output()
 
-    cls_name = "{0}_{1}_{2}".format(op_type, typename, 'equal_all')
+    cls_name = "{}_{}_{}".format(op_type, typename, 'equal_all')
     Cls.__name__ = cls_name
     globals()[cls_name] = Cls
 
@@ -91,7 +91,7 @@ def setUp(self):
         def test_output(self):
             self.check_output()
 
-    cls_name = "{0}_{1}_{2}".format(op_type, typename, 'equal_all')
+    cls_name = "{}_{}_{}".format(op_type, typename, 'equal_all')
     Cls.__name__ = cls_name
     globals()[cls_name] = Cls
 
diff --git a/python/paddle/fluid/tests/unittests/test_complex_variable.py b/python/paddle/fluid/tests/unittests/test_complex_variable.py
index 48144b1df31d07..98dbaa2ba500cd 100644
--- a/python/paddle/fluid/tests/unittests/test_complex_variable.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_variable.py
@@ -34,7 +34,7 @@ def compare(self):
             x = dg.to_variable(a, "x")
             y = dg.to_variable(b)
             out = paddle.add(x, y)
-            self.assertIsNotNone("{}".format(out))
+            self.assertIsNotNone(f"{out}")
 
         np.testing.assert_allclose(out.numpy(), a + b, rtol=1e-05)
         self.assertEqual(out.dtype, convert_np_dtype_to_dtype_(self._dtype))
diff --git a/python/paddle/fluid/tests/unittests/test_concat_op.py b/python/paddle/fluid/tests/unittests/test_concat_op.py
index 6cdb41fa720263..4b34f596f7916f 100644
--- a/python/paddle/fluid/tests/unittests/test_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/test_concat_op.py
@@ -249,7 +249,7 @@ def setUp(self):
                 )
             }
 
-    cls_name = "{0}_{1}".format(parent.__name__, "AxisTensor")
+    cls_name = "{}_{}".format(parent.__name__, "AxisTensor")
     TestConcatAxisTensor.__name__ = cls_name
     globals()[cls_name] = TestConcatAxisTensor
 
@@ -269,7 +269,7 @@ class TestConcatFp16(parent):
         def get_dtype(self):
             return np.float16
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Fp16")
+    cls_name = "{}_{}".format(parent.__name__, "Fp16")
     TestConcatFp16.__name__ = cls_name
     globals()[cls_name] = TestConcatFp16
 
@@ -291,7 +291,7 @@ class TestConcatBf16(parent):
         def get_dtype(self):
             return np.uint16
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Bf16")
+    cls_name = "{}_{}".format(parent.__name__, "Bf16")
     TestConcatBf16.__name__ = cls_name
     globals()[cls_name] = TestConcatBf16
 
diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py
index d74e0c8022e1d5..2aee55d2c04396 100644
--- a/python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py
@@ -27,7 +27,7 @@ def init_paddings(self):
             self.pad = [0, 0]
             self.padding_algorithm = "SAME"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "PaddingSAMEOp")
+    cls_name = "{}_{}".format(parent.__name__, "PaddingSAMEOp")
     TestPaddingSAMECase.__name__ = cls_name
     globals()[cls_name] = TestPaddingSAMECase
 
@@ -38,7 +38,7 @@ def init_paddings(self):
             self.pad = [1, 1]
             self.padding_algorithm = "VALID"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "PaddingVALIDOp")
+    cls_name = "{}_{}".format(parent.__name__, "PaddingVALIDOp")
     TestPaddingVALIDCase.__name__ = cls_name
     globals()[cls_name] = TestPaddingVALIDCase
 
@@ -64,7 +64,7 @@ def test_check_output(self):
                     place, atol=1e-5, check_dygraph=False
                 )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CudnnChannelLast")
+    cls_name = "{}_{}".format(parent.__name__, "CudnnChannelLast")
     TestCudnnChannelLastCase.__name__ = cls_name
     globals()[cls_name] = TestCudnnChannelLastCase
 
diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_op.py
index f391018a1be5aa..1742e42acf8bc4 100644
--- a/python/paddle/fluid/tests/unittests/test_conv2d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_op.py
@@ -159,7 +159,7 @@ def init_kernel_type(self):
                 np.float32 if core.is_compiled_with_rocm() else np.float64
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CUDNN")
+    cls_name = "{}_{}".format(parent.__name__, "CUDNN")
     TestCUDNNCase.__name__ = cls_name
     globals()[cls_name] = TestCUDNNCase
 
@@ -193,7 +193,7 @@ def test_check_grad_no_input(self):
                     place, ['Filter'], 'Output', no_grad_set={'Input'}
                 )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CUDNNFp16")
+    cls_name = "{}_{}".format(parent.__name__, "CUDNNFp16")
     TestConv2DCUDNNFp16.__name__ = cls_name
     globals()[cls_name] = TestConv2DCUDNNFp16
 
@@ -246,7 +246,7 @@ def test_check_grad_no_input(self):
                 user_defined_grads=[numeric_grads],
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CUDNNBF16")
+    cls_name = "{}_{}".format(parent.__name__, "CUDNNBF16")
     TestConv2DCUDNNBF16.__name__ = cls_name
     globals()[cls_name] = TestConv2DCUDNNBF16
 
@@ -260,7 +260,7 @@ def init_test_case_2(self):
             N, C, H, W = self.input_size
             self.input_size = [N, H, W, C]
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ChannelLast")
+    cls_name = "{}_{}".format(parent.__name__, "ChannelLast")
     TestChannelLastCase.__name__ = cls_name
     globals()[cls_name] = TestChannelLastCase
 
@@ -283,7 +283,7 @@ def init_test_case_2(self):
             N, C, H, W = self.input_size
             self.input_size = [N, H, W, C]
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CudnnChannelLast")
+    cls_name = "{}_{}".format(parent.__name__, "CudnnChannelLast")
     TestCudnnChannelLastCase.__name__ = cls_name
     globals()[cls_name] = TestCudnnChannelLastCase
 
@@ -324,7 +324,7 @@ def init_test_case_2(self):
             N, C, H, W = self.input_size
             self.input_size = [N, H, W, C]
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CudnnChannelLastFp16")
+    cls_name = "{}_{}".format(parent.__name__, "CudnnChannelLastFp16")
     TestCudnnChannelLastFp16.__name__ = cls_name
     globals()[cls_name] = TestCudnnChannelLastFp16
 
@@ -335,7 +335,7 @@ def init_paddings(self):
             self.pad = [0, 0]
             self.padding_algorithm = "SAME"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "PaddingSAMEOp")
+    cls_name = "{}_{}".format(parent.__name__, "PaddingSAMEOp")
     TestPaddingSMAECase.__name__ = cls_name
     globals()[cls_name] = TestPaddingSMAECase
 
@@ -346,7 +346,7 @@ def init_paddings(self):
             self.pad = [1, 1]
             self.padding_algorithm = "VALID"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "PaddingVALIDOp")
+    cls_name = "{}_{}".format(parent.__name__, "PaddingVALIDOp")
     TestPaddingVALIDCase.__name__ = cls_name
     globals()[cls_name] = TestPaddingVALIDCase
 
@@ -366,7 +366,7 @@ def init_paddings(self):
             self.pad = [1, 1]
             self.padding_algorithm = "SAME"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CudnnPaddingSAMEOp")
+    cls_name = "{}_{}".format(parent.__name__, "CudnnPaddingSAMEOp")
     TestCUDNNPaddingSMAECase.__name__ = cls_name
     globals()[cls_name] = TestCUDNNPaddingSMAECase
 
@@ -386,7 +386,7 @@ def init_paddings(self):
             self.pad = [1, 1]
             self.padding_algorithm = "VALID"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CudnnPaddingVALIDOp")
+    cls_name = "{}_{}".format(parent.__name__, "CudnnPaddingVALIDOp")
     TestCUDNNPaddingVALIDCase.__name__ = cls_name
     globals()[cls_name] = TestCUDNNPaddingVALIDCase
 
diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_op_depthwise_conv.py b/python/paddle/fluid/tests/unittests/test_conv2d_op_depthwise_conv.py
index 47553d9befcd14..aa90f72a563d62 100644
--- a/python/paddle/fluid/tests/unittests/test_conv2d_op_depthwise_conv.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_op_depthwise_conv.py
@@ -426,7 +426,7 @@ def test_check_grad_no_input(self):
                     place, ['Filter'], 'Output', no_grad_set={'Input'}
                 )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "FP16OP")
+    cls_name = "{}_{}".format(parent.__name__, "FP16OP")
     TestDepthwiseConvFP16.__name__ = cls_name
     globals()[cls_name] = TestDepthwiseConvFP16
 
@@ -479,7 +479,7 @@ def test_check_grad_no_input(self):
                 user_defined_grads=[numeric_grads],
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "BF16OP")
+    cls_name = "{}_{}".format(parent.__name__, "BF16OP")
     TestDepthwiseConvBF16.__name__ = cls_name
     globals()[cls_name] = TestDepthwiseConvBF16
 
@@ -520,7 +520,7 @@ def init_test_case_2(self):
             N, C, H, W = self.input_size
             self.input_size = [N, H, W, C]
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ChannelLastFP16")
+    cls_name = "{}_{}".format(parent.__name__, "ChannelLastFP16")
     TestChannelLastFP16.__name__ = cls_name
     globals()[cls_name] = TestChannelLastFP16
 
diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
index 2c859a22d14c6b..0b843663827c13 100644
--- a/python/paddle/fluid/tests/unittests/test_conv3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
@@ -174,7 +174,7 @@ def init_kernel_type(self):
                 np.float32 if core.is_compiled_with_rocm() else np.float64
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CUDNN")
+    cls_name = "{}_{}".format(parent.__name__, "CUDNN")
     TestCUDNNCase.__name__ = cls_name
     globals()[cls_name] = TestCUDNNCase
 
@@ -185,7 +185,7 @@ def init_paddings(self):
             self.pad = [0, 0, 0]
             self.padding_algorithm = "SAME"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "PaddingSAMEOp")
+    cls_name = "{}_{}".format(parent.__name__, "PaddingSAMEOp")
     TestPaddingSMAECase.__name__ = cls_name
     globals()[cls_name] = TestPaddingSMAECase
 
@@ -196,7 +196,7 @@ def init_paddings(self):
             self.pad = [1, 1, 1]
             self.padding_algorithm = "VALID"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "PaddingVALIDOp")
+    cls_name = "{}_{}".format(parent.__name__, "PaddingVALIDOp")
     TestPaddingVALIDCase.__name__ = cls_name
     globals()[cls_name] = TestPaddingVALIDCase
 
@@ -216,7 +216,7 @@ def init_paddings(self):
             self.pad = [1, 1, 1]
             self.padding_algorithm = "SAME"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CudnnPaddingSAMEOp")
+    cls_name = "{}_{}".format(parent.__name__, "CudnnPaddingSAMEOp")
     TestCUDNNPaddingSMAECase.__name__ = cls_name
     globals()[cls_name] = TestCUDNNPaddingSMAECase
 
@@ -236,7 +236,7 @@ def init_paddings(self):
             self.pad = [1, 1, 1]
             self.padding_algorithm = "VALID"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CudnnPaddingVALIDOp")
+    cls_name = "{}_{}".format(parent.__name__, "CudnnPaddingVALIDOp")
     TestCUDNNPaddingVALIDCase.__name__ = cls_name
     globals()[cls_name] = TestCUDNNPaddingVALIDCase
 
@@ -250,7 +250,7 @@ def init_test_case_2(self):
             N, C, D, H, W = self.input_size
             self.input_size = [N, D, H, W, C]
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ChannelLast")
+    cls_name = "{}_{}".format(parent.__name__, "ChannelLast")
     TestChannelLastCase.__name__ = cls_name
     globals()[cls_name] = TestChannelLastCase
 
@@ -273,7 +273,7 @@ def init_test_case_2(self):
             N, C, D, H, W = self.input_size
             self.input_size = [N, D, H, W, C]
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CudnnChannelLast")
+    cls_name = "{}_{}".format(parent.__name__, "CudnnChannelLast")
     TestCudnnChannelLastCase.__name__ = cls_name
     globals()[cls_name] = TestCudnnChannelLastCase
 
diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy_op.py b/python/paddle/fluid/tests/unittests/test_cross_entropy_op.py
index 3a486c43cd2d4d..8f1a8f8c815a4d 100644
--- a/python/paddle/fluid/tests/unittests/test_cross_entropy_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_op.py
@@ -387,7 +387,7 @@ def test_check_grad(self):
                     place, ['X'], 'Y', max_relative_error=0.9
                 )
 
-    cls_name = "{0}".format(cls_name)
+    cls_name = f"{cls_name}"
     TestCrossEntropyFP16Op.__name__ = cls_name
     globals()[cls_name] = TestCrossEntropyFP16Op
 
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_graph.py b/python/paddle/fluid/tests/unittests/test_cuda_graph.py
index edfa7665882ca6..58728ec476a2db 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_graph.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_graph.py
@@ -98,7 +98,7 @@ def test_concat_and_split(self):
         for i, z in enumerate(zs):
             np.testing.assert_array_equal(z.numpy(), xs_np[i])
 
-        output_dir = 'cuda_graph_dot_{}'.format(os.getpid())
+        output_dir = f'cuda_graph_dot_{os.getpid()}'
         try:
             graph.print_to_dot_files(pathlib.Path(output_dir))
             graph.reset()
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py b/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py
index f179db37af34fb..e14f4c2fd1657c 100644
--- a/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py
@@ -44,8 +44,8 @@ def test_gen_dropout_dygraph(self):
         x = paddle.uniform([2, 10], dtype="float32", min=0.0, max=1.0)
         x_again = paddle.uniform([2, 10], dtype="float32", min=0.0, max=1.0)
         x_third = paddle.uniform([2, 10], dtype="float32", min=0.0, max=1.0)
-        print("x: {}".format(x.numpy()))
-        print("x_again: {}".format(x_again.numpy()))
+        print(f"x: {x.numpy()}")
+        print(f"x_again: {x_again.numpy()}")
         x = x + x_again + x_third
         y = paddle.nn.functional.dropout(x, 0.5)
 
diff --git a/python/paddle/fluid/tests/unittests/test_cumsum_op.py b/python/paddle/fluid/tests/unittests/test_cumsum_op.py
index 541387159cf038..98bcc81aeb3a25 100644
--- a/python/paddle/fluid/tests/unittests/test_cumsum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cumsum_op.py
@@ -401,7 +401,7 @@ def test_check_grad(self):
                 check_prim=True,
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Fp16")
+    cls_name = "{}_{}".format(parent.__name__, "Fp16")
     TestCumsumFP16Op.__name__ = cls_name
     globals()[cls_name] = TestCumsumFP16Op
 
@@ -442,7 +442,7 @@ def test_check_grad(self):
             place = paddle.CUDAPlace(0)
             self.check_grad_with_place(place, ["X"], "Out", check_prim=True)
 
-    cls_name = "{0}_{1}".format(parent.__name__, "BF16")
+    cls_name = "{}_{}".format(parent.__name__, "BF16")
     TestCumsumBF16Op.__name__ = cls_name
     globals()[cls_name] = TestCumsumBF16Op
 
diff --git a/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py b/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py
index e23b6392795c47..b17d2089c6d2c4 100644
--- a/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py
+++ b/python/paddle/fluid/tests/unittests/test_dataset_dataloader.py
@@ -127,9 +127,7 @@ def check_batch_number(self, place, randomize_batch_num=False):
             random_delta_batch_size = np.zeros(shape=[file_num])
 
         for i in range(file_num):
-            filename = os.path.join(
-                self.temp_dir.name, 'dataset_test_{}.txt'.format(i)
-            )
+            filename = os.path.join(self.temp_dir.name, f'dataset_test_{i}.txt')
             filelist.append(filename)
             write_reader_data_to_file(
                 filename,
diff --git a/python/paddle/fluid/tests/unittests/test_desc_clone.py b/python/paddle/fluid/tests/unittests/test_desc_clone.py
index f5b2872576788d..be94a4322a78a3 100644
--- a/python/paddle/fluid/tests/unittests/test_desc_clone.py
+++ b/python/paddle/fluid/tests/unittests/test_desc_clone.py
@@ -125,10 +125,10 @@ def operator_equal(a, b):
             v1 = sorted(b.__dict__[k].items(), key=lambda x: x[0])
 
             if v0 != v1:
-                raise ValueError("In operator_equal not equal:{0}\n".format(k))
+                raise ValueError(f"In operator_equal not equal:{k}\n")
 
         elif v != b.__dict__[k]:
-            raise ValueError("In operator_equal not equal:{0}\n".format(k))
+            raise ValueError(f"In operator_equal not equal:{k}\n")
 
     return True
 
@@ -143,15 +143,15 @@ def block_equal(a, b):
             assert len(a.ops) == len(b.ops)
             for i in range(0, len(a.ops)):
                 if not operator_equal(a.ops[i], b.ops[i]):
-                    raise ValueError("In block_equal not equal:{0}\n".format(k))
+                    raise ValueError(f"In block_equal not equal:{k}\n")
 
         elif isinstance(v, collections.OrderedDict):
             for key, value in v.items():
                 if str(value) != str(b.__dict__[k][key]):
-                    raise ValueError("In block_equal not equal:{0}\n".format(k))
+                    raise ValueError(f"In block_equal not equal:{k}\n")
 
         elif v != b.__dict__[k]:
-            raise ValueError("In block_equal not equal:{0}\n".format(k))
+            raise ValueError(f"In block_equal not equal:{k}\n")
 
     return True
 
@@ -164,15 +164,13 @@ def program_equal(a, b):
         elif k == 'blocks':
             for i in range(0, len(a.blocks)):
                 if not block_equal(a.blocks[i], b.blocks[i]):
-                    raise ValueError(
-                        "In operator_equal not equal:{0}\n".format(k)
-                    )
+                    raise ValueError(f"In operator_equal not equal:{k}\n")
                     return False
             assert len(a.blocks) == len(b.blocks)
         elif k == '_auto_checkpoint_name':
             continue
         elif v != b.__dict__[k]:
-            raise ValueError("In program_equal not equal:{0}\n".format(k))
+            raise ValueError(f"In program_equal not equal:{k}\n")
 
     return True
 
diff --git a/python/paddle/fluid/tests/unittests/test_directory_migration.py b/python/paddle/fluid/tests/unittests/test_directory_migration.py
index 05227078e8ff10..3230a0ecc666ff 100644
--- a/python/paddle/fluid/tests/unittests/test_directory_migration.py
+++ b/python/paddle/fluid/tests/unittests/test_directory_migration.py
@@ -29,10 +29,10 @@ def tearDown(self):
     def get_import_command(self, module):
         paths = module.split('.')
         if len(paths) == 1:
-            return 'import {}'.format(module)
+            return f'import {module}'
         package = '.'.join(paths[:-1])
         func = paths[-1]
-        cmd = 'from {} import {}'.format(package, func)
+        cmd = f'from {package} import {func}'
         return cmd
 
     def test_new_directory(self):
@@ -106,11 +106,11 @@ def test_new_directory(self):
         with open(import_file, "w") as wb:
             for module in new_directory:
                 run_cmd = self.get_import_command(module)
-                wb.write("{}\n".format(run_cmd))
+                wb.write(f"{run_cmd}\n")
 
         _python = sys.executable
 
-        ps_cmd = "{} {}".format(_python, import_file)
+        ps_cmd = f"{_python} {import_file}"
         ps_proc = subprocess.Popen(
             ps_cmd.strip().split(" "),
             stdout=subprocess.PIPE,
@@ -120,7 +120,7 @@ def test_new_directory(self):
 
         self.assertFalse(
             "Error" in str(stderr),
-            "ErrorMessage:\n{}".format(bytes.decode(stderr)),
+            f"ErrorMessage:\n{bytes.decode(stderr)}",
         )
 
     def test_old_directory(self):
@@ -224,7 +224,7 @@ def test_old_directory(self):
 
         _python = sys.executable
 
-        ps_cmd = "{} {}".format(_python, import_file)
+        ps_cmd = f"{_python} {import_file}"
         ps_proc = subprocess.Popen(
             ps_cmd.strip().split(" "),
             stdout=subprocess.PIPE,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py
index def841d16fb3b2..a9c42b931aab6d 100755
--- a/python/paddle/fluid/tests/unittests/test_dist_base.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_base.py
@@ -287,7 +287,7 @@ def get_data():
             out_losses.append(loss[0])
             print_to_err(type(self).__name__, "run step %d finished" % i)
         print_to_err(type(self).__name__, "trainer run finished")
-        print_to_err(type(self).__name__, "dist losses: {}".format(out_losses))
+        print_to_err(type(self).__name__, f"dist losses: {out_losses}")
 
         sys.stdout.buffer.write(pickle.dumps(out_losses))
 
@@ -442,7 +442,7 @@ def run_trainer(self, args):
         build_stra.memory_optimize = False
 
         if args.fuse_all_reduce is not None:
-            sys.stderr.write('fuse_all_reduce={}'.format(args.fuse_all_reduce))
+            sys.stderr.write(f'fuse_all_reduce={args.fuse_all_reduce}')
             build_stra.fuse_all_reduce_ops = args.fuse_all_reduce
 
         if args.hogwild:
@@ -1029,12 +1029,12 @@ def setUp(self):
             DIST_UT_PORT = int(os.getenv("PADDLE_DIST_UT_PORT"))
 
         if DIST_UT_PORT == 0:
-            self._ps_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
+            self._ps_endpoints = "127.0.0.1:{},127.0.0.1:{}".format(
                 self._find_free_port(),
                 self._find_free_port(),
             )
         else:
-            self._ps_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
+            self._ps_endpoints = "127.0.0.1:{},127.0.0.1:{}".format(
                 DIST_UT_PORT,
                 DIST_UT_PORT + 1,
             )
@@ -1137,7 +1137,7 @@ def _run_local(
             envs['COVERAGE_FILE'] = os.getenv('COVERAGE_FILE', '')
             cmd += " -m coverage run --branch -p"
 
-        cmd += " %s --role trainer --update_method local --lr %f" % (
+        cmd += " {} --role trainer --update_method local --lr {:f}".format(
             model,
             self._lr,
         )
@@ -1184,7 +1184,7 @@ def _run_local(
             cmd += " --find_unused_parameters"
 
         env_local.update(envs)
-        print("local_cmd: {}, env: {}".format(cmd, env_local))
+        print(f"local_cmd: {cmd}, env: {env_local}")
 
         if check_error_log:
             path = os.path.join(self.temp_dir.name, log_name + "_local.log")
@@ -1290,8 +1290,8 @@ def _run_cluster(self, model, envs, check_error_log, log_name):
         env0.update(envs)
         env1.update(envs)
 
-        print("tr0_cmd: {}, env: {}".format(tr0_cmd, env0))
-        print("tr1_cmd: {}, env: {}".format(tr1_cmd, env1))
+        print(f"tr0_cmd: {tr0_cmd}, env: {env0}")
+        print(f"tr1_cmd: {tr1_cmd}, env: {env1}")
 
         path0 = os.path.join(self.temp_dir.name, log_name + "_tr0_err.log")
         path1 = os.path.join(self.temp_dir.name, log_name + "_tr1_err.log")
@@ -1377,8 +1377,8 @@ def _get_gloo_trainer_cmd(
         tr_cmd += " --use_cpu"
         env.update(
             {
-                "PADDLE_TRAINERS_NUM": "{}".format(trainer_num),
-                "PADDLE_TRAINER_ID": "{}".format(trainer_id),
+                "PADDLE_TRAINERS_NUM": f"{trainer_num}",
+                "PADDLE_TRAINER_ID": f"{trainer_id}",
                 "PADDLE_TRAINER_ENDPOINTS": self._ps_endpoints,
                 "PADDLE_CURRENT_ENDPOINT": ep,
                 "PADDLE_CURRENT_ENDPOINT": ep,
@@ -1401,7 +1401,7 @@ def _get_gloo_trainer_cmd(
             tr_cmd += " --enable_backward_deps"
 
         if self._fuse_all_reduce is not None:
-            tr_cmd += " --fuse_all_reduce {}".format(self._fuse_all_reduce)
+            tr_cmd += f" --fuse_all_reduce {self._fuse_all_reduce}"
 
         assert not self._use_fleet_api, "gloo not support use fleet api"
         assert not self._use_fleet_api_20, "gloo not support use fleet api"
@@ -1438,10 +1438,10 @@ def _get_nccl2_trainer_cmd(
             tr_cmd += " --use_cuda"
             env.update(
                 {
-                    "FLAGS_selected_gpus": "{}".format(0),
-                    "CUDA_VISIBLE_DEVICES": "{}".format(trainer_id),
-                    "PADDLE_TRAINERS_NUM": "{}".format(trainer_num),
-                    "PADDLE_TRAINER_ID": "{}".format(trainer_id),
+                    "FLAGS_selected_gpus": f"{0}",
+                    "CUDA_VISIBLE_DEVICES": f"{trainer_id}",
+                    "PADDLE_TRAINERS_NUM": f"{trainer_num}",
+                    "PADDLE_TRAINER_ID": f"{trainer_id}",
                     "PADDLE_TRAINER_ENDPOINTS": self._ps_endpoints,
                     "PADDLE_CURRENT_ENDPOINT": ep,
                 }
@@ -1452,10 +1452,10 @@ def _get_nccl2_trainer_cmd(
             tr_cmd += " --use_xpu"
             env.update(
                 {
-                    "FLAGS_selected_xpus": "{}".format(trainer_id),
+                    "FLAGS_selected_xpus": f"{trainer_id}",
                     # "XPU_VISIBLE_DEVICES": "{}".format(trainer_id + 1),
-                    "PADDLE_TRAINERS_NUM": "{}".format(trainer_num),
-                    "PADDLE_TRAINER_ID": "{}".format(trainer_id),
+                    "PADDLE_TRAINERS_NUM": f"{trainer_num}",
+                    "PADDLE_TRAINER_ID": f"{trainer_id}",
                     "PADDLE_TRAINER_ENDPOINTS": self._ps_endpoints,
                     "PADDLE_CURRENT_ENDPOINT": ep,
                     "GLOG_v": "2",
@@ -1465,9 +1465,9 @@ def _get_nccl2_trainer_cmd(
             tr_cmd += " --use_npu"
             env.update(
                 {
-                    "FLAGS_selected_npus": "{}".format(trainer_id),
-                    "PADDLE_TRAINERS_NUM": "{}".format(trainer_num),
-                    "PADDLE_TRAINER_ID": "{}".format(trainer_id),
+                    "FLAGS_selected_npus": f"{trainer_id}",
+                    "PADDLE_TRAINERS_NUM": f"{trainer_num}",
+                    "PADDLE_TRAINER_ID": f"{trainer_id}",
                     "PADDLE_TRAINER_ENDPOINTS": self._ps_endpoints,
                     "PADDLE_CURRENT_ENDPOINT": ep,
                     "GLOG_v": "2",
@@ -1477,9 +1477,9 @@ def _get_nccl2_trainer_cmd(
             tr_cmd += " --use_mlu"
             env.update(
                 {
-                    "FLAGS_selected_mlus": "{}".format(trainer_id),
-                    "PADDLE_TRAINERS_NUM": "{}".format(trainer_num),
-                    "PADDLE_TRAINER_ID": "{}".format(trainer_id),
+                    "FLAGS_selected_mlus": f"{trainer_id}",
+                    "PADDLE_TRAINERS_NUM": f"{trainer_num}",
+                    "PADDLE_TRAINER_ID": f"{trainer_id}",
                     "PADDLE_TRAINER_ENDPOINTS": self._ps_endpoints,
                     "PADDLE_CURRENT_ENDPOINT": ep,
                     "GLOG_v": "4",
@@ -1500,10 +1500,10 @@ def _get_nccl2_trainer_cmd(
         if self._pipeline_mode:
             tr_cmd += " --use_pipeline"
         if self._mp_mode:
-            env = {"FLAGS_selected_gpus": "{}".format(trainer_id)}
+            env = {"FLAGS_selected_gpus": f"{trainer_id}"}
 
         if self._nccl_comm_num > 1:
-            tr_cmd += " --nccl_comm_num {}".format(self._nccl_comm_num)
+            tr_cmd += f" --nccl_comm_num {self._nccl_comm_num}"
 
         if self._use_hallreduce:
             tr_cmd += " --use_hallreduce --hallreduce_inter_nranks 2"
@@ -1512,7 +1512,7 @@ def _get_nccl2_trainer_cmd(
             tr_cmd += " --enable_backward_deps"
 
         if self._fuse_all_reduce is not None:
-            tr_cmd += " --fuse_all_reduce {}".format(self._fuse_all_reduce)
+            tr_cmd += f" --fuse_all_reduce {self._fuse_all_reduce}"
 
         if self._use_fleet_api:
             tr_cmd += (
@@ -1563,13 +1563,13 @@ def _run_cluster_gloo(
             )
 
             path = os.path.join(
-                self.temp_dir.name, log_name + "_tr{}_err.log".format(i)
+                self.temp_dir.name, log_name + f"_tr{i}_err.log"
             )
             tr_pipe = open(path, "wb")
 
             print_to_err(
                 type(self).__name__,
-                "going to start process {} with nccl2".format(i),
+                f"going to start process {i} with nccl2",
             )
             tr_proc = subprocess.Popen(
                 tr_cmd.strip().split(" "),
@@ -1586,7 +1586,7 @@ def _run_cluster_gloo(
             tr_out, tr_err = procs[i].communicate()
             outs.append(tr_out)
             pipes[i].close()
-            sys.stderr.write('trainer {} stderr: {}\n'.format(i, tr_err))
+            sys.stderr.write(f'trainer {i} stderr: {tr_err}\n')
 
         if trainer_num == 1:
             if check_error_log:
@@ -1637,13 +1637,13 @@ def _run_cluster_nccl2(
             )
 
             path = os.path.join(
-                self.temp_dir.name, log_name + "_tr{}_err.log".format(i)
+                self.temp_dir.name, log_name + f"_tr{i}_err.log"
             )
             tr_pipe = open(path, "wb")
 
             print_to_err(
                 type(self).__name__,
-                "going to start process {} with nccl2".format(i),
+                f"going to start process {i} with nccl2",
             )
             tr_proc = subprocess.Popen(
                 tr_cmd.strip().split(" "),
@@ -1660,7 +1660,7 @@ def _run_cluster_nccl2(
             tr_out, tr_err = procs[i].communicate()
             outs.append(tr_out)
             pipes[i].close()
-            sys.stderr.write('trainer {} stderr: {}\n'.format(i, tr_err))
+            sys.stderr.write(f'trainer {i} stderr: {tr_err}\n')
 
         if check_error_log:
             print("outs[0]:", outs[0])
@@ -1686,14 +1686,14 @@ def _run_pipeline(self, model, envs, check_error_log, log_name):
             tr_env['NCCL_SHM_DISABLE'] = '1'
             tr_env['FLAGS_selected_gpus'] = str(i)
             tr_env['FLAGS_cudnn_deterministic'] = '0'
-            print("tr_cmd:{}, env: {}".format(tr_cmd, tr_env))
+            print(f"tr_cmd:{tr_cmd}, env: {tr_env}")
 
-            path = os.path.join(self.temp_dir.name + "tr{}_err.log".format(i))
+            path = os.path.join(self.temp_dir.name + f"tr{i}_err.log")
             tr_pipe = open(path, "wb")
 
             print_to_err(
                 type(self).__name__,
-                "going to start process {} with nccl2".format(i),
+                f"going to start process {i} with nccl2",
             )
             tr_proc = subprocess.Popen(
                 tr_cmd.strip().split(" "),
@@ -1710,7 +1710,7 @@ def _run_pipeline(self, model, envs, check_error_log, log_name):
             tr_out, tr_err = procs[i].communicate()
             outs.append(tr_out)
             pipes[i].close()
-            sys.stderr.write('trainer {} stderr: {}\n'.format(i, tr_err))
+            sys.stderr.write(f'trainer {i} stderr: {tr_err}\n')
 
         if check_error_log:
             print("outs[0]:", outs[0])
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py
index a8d9bdb0c9e35a..bd368fc7b58c26 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py
@@ -194,7 +194,7 @@ def _setup_config(self):
 
     def tearDown(self):
         t = time.time() - self.startTime
-        print('%s: %.3f' % (self.__class__.__name__, t))
+        print(f'{self.__class__.__name__}: {t:.3f}')
 
     def setUp(self):
         self.startTime = time.time()
@@ -213,21 +213,21 @@ def setUp(self):
 
         if DIST_UT_PORT:
             print("set begin_port:", DIST_UT_PORT)
-            self._ps_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
+            self._ps_endpoints = "127.0.0.1:{},127.0.0.1:{}".format(
                 DIST_UT_PORT,
                 DIST_UT_PORT + 1,
             )
-            self._tr_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
+            self._tr_endpoints = "127.0.0.1:{},127.0.0.1:{}".format(
                 DIST_UT_PORT + 2,
                 DIST_UT_PORT + 3,
             )
             DIST_UT_PORT += 4
         else:
-            self._ps_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
+            self._ps_endpoints = "127.0.0.1:{},127.0.0.1:{}".format(
                 self._find_free_port(),
                 self._find_free_port(),
             )
-            self._tr_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
+            self._tr_endpoints = "127.0.0.1:{},127.0.0.1:{}".format(
                 self._find_free_port(),
                 self._find_free_port(),
             )
@@ -339,7 +339,7 @@ def _run_cluster(self, model, envs):
             python_path += " -m coverage run --branch -p"
         env.update(envs)
 
-        tr_cmd = "{0} {1} --role trainer --endpoints {2} --trainer_endpoints {3} --current_id {{}} --trainers {4} --mode {5} --geo_sgd_need_push_nums {6} --reader {7} --gloo_path {8} --test {9}".format(
+        tr_cmd = "{} {} --role trainer --endpoints {} --trainer_endpoints {} --current_id {{}} --trainers {} --mode {} --geo_sgd_need_push_nums {} --reader {} --gloo_path {} --test {}".format(
             python_path,
             model,
             self._ps_endpoints,
@@ -352,7 +352,7 @@ def _run_cluster(self, model, envs):
             self._need_test,
         )
 
-        ps_cmd = "{0} {1} --role pserver --endpoints {2} --trainer_endpoints {3} --current_id {{}} --trainers {4} --mode {5} --geo_sgd_need_push_nums {6} --reader {7} --gloo_path {8} --test {9}".format(
+        ps_cmd = "{} {} --role pserver --endpoints {} --trainer_endpoints {} --current_id {{}} --trainers {} --mode {} --geo_sgd_need_push_nums {} --reader {} --gloo_path {} --test {}".format(
             python_path,
             model,
             self._ps_endpoints,
@@ -366,8 +366,8 @@ def _run_cluster(self, model, envs):
         )
 
         if self._model_dir:
-            tr_cmd += " --model_dir {}".format(self._model_dir)
-            ps_cmd += " --model_dir {}".format(self._model_dir)
+            tr_cmd += f" --model_dir {self._model_dir}"
+            ps_cmd += f" --model_dir {self._model_dir}"
 
         # Run dist train to compare with local results
         ps0, ps1 = self._start_pserver(ps_cmd, env)
@@ -428,7 +428,7 @@ def catlog(logx):
                     basename
                 )
             )
-            os.system("cat {}".format(logx))
+            os.system(f"cat {logx}")
             print(
                 "================== Error {} end =====================\n".format(
                     basename
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_gloo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_gloo.py
index c14c32d1fe7ce2..127c3cf4b0188d 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_gloo.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_gloo.py
@@ -102,9 +102,9 @@ def _run_cluster(self, model, envs):
             python_path += " -m coverage run --branch -p"
         env.update(envs)
 
-        tr_cmd = "{0} {1}".format(python_path, model)
+        tr_cmd = f"{python_path} {model}"
 
-        ps_cmd = "{0} {1}".format(python_path, model)
+        ps_cmd = f"{python_path} {model}"
 
         # Run dist train to compare with local results
         env["TRAINING_ROLE"] = "PSERVER"
diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py
index 25fe5ba54c086c..48463115605090 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py
@@ -191,7 +191,7 @@ def _setup_config(self):
 
     def tearDown(self):
         t = time.time() - self.startTime
-        print('%s: %.3f' % (self.__class__.__name__, t))
+        print(f'{self.__class__.__name__}: {t:.3f}')
 
     def setUp(self):
         self.startTime = time.time()
@@ -210,37 +210,37 @@ def setUp(self):
 
         if DIST_UT_PORT:
             print("set begin_port:", DIST_UT_PORT)
-            self._ps_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
+            self._ps_endpoints = "127.0.0.1:{},127.0.0.1:{}".format(
                 DIST_UT_PORT,
                 DIST_UT_PORT + 1,
             )
-            self._tr_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
+            self._tr_endpoints = "127.0.0.1:{},127.0.0.1:{}".format(
                 DIST_UT_PORT + 2,
                 DIST_UT_PORT + 3,
             )
-            self._heter_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
+            self._heter_endpoints = "127.0.0.1:{},127.0.0.1:{}".format(
                 DIST_UT_PORT + 4,
                 DIST_UT_PORT + 5,
             )
-            self._heter_endpoints_2 = "127.0.0.1:%s,127.0.0.1:%s" % (
+            self._heter_endpoints_2 = "127.0.0.1:{},127.0.0.1:{}".format(
                 DIST_UT_PORT + 6,
                 DIST_UT_PORT + 7,
             )
             DIST_UT_PORT += 8
         else:
-            self._ps_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
+            self._ps_endpoints = "127.0.0.1:{},127.0.0.1:{}".format(
                 self._find_free_port(),
                 self._find_free_port(),
             )
-            self._tr_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
+            self._tr_endpoints = "127.0.0.1:{},127.0.0.1:{}".format(
                 self._find_free_port(),
                 self._find_free_port(),
             )
-            self._heter_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
+            self._heter_endpoints = "127.0.0.1:{},127.0.0.1:{}".format(
                 self._find_free_port(),
                 self._find_free_port(),
             )
-            self._heter_endpoints_2 = "127.0.0.1:%s,127.0.0.1:%s" % (
+            self._heter_endpoints_2 = "127.0.0.1:{},127.0.0.1:{}".format(
                 self._find_free_port(),
                 self._find_free_port(),
             )
@@ -377,7 +377,7 @@ def _run_cluster(self, model, envs):
             (self._heter_endpoints, self._heter_endpoints_2)
         )
 
-        tr_cmd = "{0} {1} --role trainer --endpoints {2} --trainer_endpoints {3} --current_id {{}} --trainers {4} --mode {5} --geo_sgd_need_push_nums {6} --reader {7} --gloo_path {8} --heter_trainer_endpoints {9} --heter_trainer_device {10}".format(
+        tr_cmd = "{} {} --role trainer --endpoints {} --trainer_endpoints {} --current_id {{}} --trainers {} --mode {} --geo_sgd_need_push_nums {} --reader {} --gloo_path {} --heter_trainer_endpoints {} --heter_trainer_device {}".format(
             python_path,
             model,
             self._ps_endpoints,
@@ -391,7 +391,7 @@ def _run_cluster(self, model, envs):
             self._heter_device,
         )
 
-        ps_cmd = "{0} {1} --role pserver --endpoints {2} --trainer_endpoints {3} --current_id {{}} --trainers {4} --mode {5} --geo_sgd_need_push_nums {6} --reader {7} --gloo_path {8} --heter_trainer_endpoints {9} --heter_trainer_device {10}".format(
+        ps_cmd = "{} {} --role pserver --endpoints {} --trainer_endpoints {} --current_id {{}} --trainers {} --mode {} --geo_sgd_need_push_nums {} --reader {} --gloo_path {} --heter_trainer_endpoints {} --heter_trainer_device {}".format(
             python_path,
             model,
             self._ps_endpoints,
@@ -405,7 +405,7 @@ def _run_cluster(self, model, envs):
             self._heter_device,
         )
 
-        heter_cmd = "{0} {1} --role heter_trainer --endpoints {2} --trainer_endpoints {3} --current_id {{}} --stage_id {{}} --trainers {4} --mode {5} --geo_sgd_need_push_nums {6} --reader {7} --gloo_path {8} --heter_trainer_endpoints {9} --heter_trainer_device {10}".format(
+        heter_cmd = "{} {} --role heter_trainer --endpoints {} --trainer_endpoints {} --current_id {{}} --stage_id {{}} --trainers {} --mode {} --geo_sgd_need_push_nums {} --reader {} --gloo_path {} --heter_trainer_endpoints {} --heter_trainer_device {}".format(
             python_path,
             model,
             self._ps_endpoints,
diff --git a/python/paddle/fluid/tests/unittests/test_dist_train.py b/python/paddle/fluid/tests/unittests/test_dist_train.py
index 09d188f9f5f5b8..968594397c7d55 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_train.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_train.py
@@ -103,7 +103,7 @@ def init_client(self, place, port):
                 inputs={},
                 outputs={"Out": []},
                 attrs={
-                    "endpoints": ["127.0.0.1:{0}".format(port)],
+                    "endpoints": [f"127.0.0.1:{port}"],
                     RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE,
                 },
             )
diff --git a/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_clip.py b/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_clip.py
index 431f9632441571..671e11e7702fe1 100644
--- a/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_clip.py
+++ b/python/paddle/fluid/tests/unittests/test_distributed_fused_lamb_op_with_clip.py
@@ -48,7 +48,7 @@ def run_test(
     if os.name == 'nt':
         return
     args = locals()
-    log_dir = os.path.join(temp_dir.name, 'log_{}'.format(os.getpid()))
+    log_dir = os.path.join(temp_dir.name, f'log_{os.getpid()}')
     cmd = [
         sys.executable,
         '-u',
@@ -69,13 +69,13 @@ def run_test(
     touch_file_env = 'SUCCESS_TOUCH_FILE'
     touch_file_name = os.path.join(
         temp_dir.name,
-        'distributed_fused_lamb_touch_file_{}'.format(os.getpid()),
+        f'distributed_fused_lamb_touch_file_{os.getpid()}',
     )
     os.environ[touch_file_env] = touch_file_name
     try:
         assert os.system(cmd) == 0 and os.path.exists(
             touch_file_name
-        ), 'Test failed when {}'.format(args)
+        ), f'Test failed when {args}'
     finally:
         temp_dir.cleanup()
 
diff --git a/python/paddle/fluid/tests/unittests/test_downpoursgd.py b/python/paddle/fluid/tests/unittests/test_downpoursgd.py
index 63ca6f490bde4b..e5294926e9e6b2 100644
--- a/python/paddle/fluid/tests/unittests/test_downpoursgd.py
+++ b/python/paddle/fluid/tests/unittests/test_downpoursgd.py
@@ -64,7 +64,7 @@ def test_device_work_use_cvm(self):
             avg_cost = paddle.mean(cost)
 
             ps_param = pslib.PSParameter()
-            with open("{}/fleet_desc.prototxt".format(cache_path)) as f:
+            with open(f"{cache_path}/fleet_desc.prototxt") as f:
                 text_format.Merge(f.read(), ps_param)
             fleet_desc = ps_param
             exe = fluid.Executor(fluid.CPUPlace())
@@ -128,7 +128,7 @@ def test_device_work(self):
             avg_cost = paddle.mean(cost)
 
             ps_param = pslib.PSParameter()
-            with open("{}/fleet_desc.prototxt".format(cache_path)) as f:
+            with open(f"{cache_path}/fleet_desc.prototxt") as f:
                 text_format.Merge(f.read(), ps_param)
             fleet_desc = ps_param
             exe = fluid.Executor(fluid.CPUPlace())
@@ -190,7 +190,7 @@ def test_downpour_opt_work(self):
             avg_cost = paddle.mean(cost)
 
             ps_param = pslib.PSParameter()
-            with open("{}/fleet_desc.prototxt".format(cache_path)) as f:
+            with open(f"{cache_path}/fleet_desc.prototxt") as f:
                 text_format.Merge(f.read(), ps_param)
             fleet_desc = ps_param
             exe = fluid.Executor(fluid.CPUPlace())
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
index 4f5a8e612782f1..a727fba1192deb 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py
@@ -135,7 +135,7 @@ def executor_main(self):
         persistables, non_persistables = get_persistables_and_non_persistables(
             fluid.default_main_program(), [loss.name]
         )
-        print('Non-persistable var number {}'.format(len(non_persistables)))
+        print(f'Non-persistable var number {len(non_persistables)}')
         print(non_persistables)
 
         self.assert_gc_vars(
diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
index 1fc84c87ed126a..134bf926608893 100644
--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_dynamic_rnn_base.py
@@ -79,9 +79,7 @@ def test_network(self):
             return
 
         for use_cuda in [True, False]:
-            print(
-                'network: {}, use_cuda: {}'.format(self.net.__name__, use_cuda)
-            )
+            print(f'network: {self.net.__name__}, use_cuda: {use_cuda}')
             with fluid.program_guard(fluid.Program(), fluid.Program()):
                 with fluid.scope_guard(core.Scope()):
                     train(self.net, use_cuda)
diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
index af06373ea0a2de..15880ec1fb2ed0 100644
--- a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py
@@ -460,7 +460,7 @@ def test_check_gradient(self):
                     check_args.insert(0, self.place)
                     self.check_grad_with_place(*check_args, **check_kwargs)
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Fp16")
+    cls_name = "{}_{}".format(parent.__name__, "Fp16")
     TestElementwiseDivFP16Op.__name__ = cls_name
     globals()[cls_name] = TestElementwiseDivFP16Op
 
diff --git a/python/paddle/fluid/tests/unittests/test_empty_like_op.py b/python/paddle/fluid/tests/unittests/test_empty_like_op.py
index dd0d634d6b3991..8ccaabd7c2cf06 100644
--- a/python/paddle/fluid/tests/unittests/test_empty_like_op.py
+++ b/python/paddle/fluid/tests/unittests/test_empty_like_op.py
@@ -28,14 +28,14 @@ def __check_out__(self, out):
         self.assertEqual(
             data_type,
             self.dst_dtype,
-            'dtype should be %s, but get %s' % (self.dst_dtype, data_type),
+            f'dtype should be {self.dst_dtype}, but get {data_type}',
         )
 
         shape = out.shape
         self.assertTupleEqual(
             shape,
             self.dst_shape,
-            'shape should be %s, but get %s' % (self.dst_shape, shape),
+            f'shape should be {self.dst_shape}, but get {shape}',
         )
 
         if data_type in ['float32', 'float64', 'int32', 'int64']:
diff --git a/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py b/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py
index 5a3e3bf02a76e0..6972505bf3cbb6 100755
--- a/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py
+++ b/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py
@@ -131,9 +131,9 @@ def __init__(self, model_dir):
         model_file = os.path.join(model_dir, "inference.pdmodel")
         params_file = os.path.join(model_dir, "inference.pdiparams")
         if not os.path.exists(model_file):
-            raise ValueError("not find model file path {}".format(model_file))
+            raise ValueError(f"not find model file path {model_file}")
         if not os.path.exists(params_file):
-            raise ValueError("not find params file path {}".format(params_file))
+            raise ValueError(f"not find params file path {params_file}")
         config = paddle.inference.Config(model_file, params_file)
 
         # fast_tokenizer op only support cpu.
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_auto.py b/python/paddle/fluid/tests/unittests/test_fleet_auto.py
index 9716e671393bea..2c9c6ccccc18d5 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_auto.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_auto.py
@@ -50,7 +50,7 @@ def test_distributed_strategy_auto(self):
         optimizer.minimize(avg_cost)
 
         applied_meta_list = fleet._get_applied_meta_list()
-        print("applied_meta_list: {}".format(applied_meta_list))
+        print(f"applied_meta_list: {applied_meta_list}")
 
 
 if __name__ == "__main__":
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base_single.py b/python/paddle/fluid/tests/unittests/test_fleet_base_single.py
index 0c6d94c9916b67..55cb0486e4ca6a 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_base_single.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_base_single.py
@@ -105,7 +105,7 @@ def test_single_run_collective_minimize(self):
 
         for i in range(10):
             cost_val = exe.run(feed=self.gen_data(), fetch_list=[avg_cost.name])
-            print("cost of step[{}] = {}".format(i, cost_val))
+            print(f"cost of step[{i}] = {cost_val}")
 
 
 class TestFleetBaseSingleRunPS(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_frame_op.py b/python/paddle/fluid/tests/unittests/test_frame_op.py
index ba37ad45562613..f012ac06f38b85 100644
--- a/python/paddle/fluid/tests/unittests/test_frame_op.py
+++ b/python/paddle/fluid/tests/unittests/test_frame_op.py
@@ -39,7 +39,7 @@ def frame_from_librosa(x, frame_length, hop_length, axis=-1):
         strides = [hop_length * x.itemsize] + list(strides)
 
     else:
-        raise ValueError("Frame axis={} must be either 0 or -1".format(axis))
+        raise ValueError(f"Frame axis={axis} must be either 0 or -1")
 
     return as_strided(x, shape=shape, strides=strides)
 
diff --git a/python/paddle/fluid/tests/unittests/test_fuse_gemm_epilogue_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_gemm_epilogue_pass.py
index 41cafb8c24fe87..3d1b910fea971e 100644
--- a/python/paddle/fluid/tests/unittests/test_fuse_gemm_epilogue_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_gemm_epilogue_pass.py
@@ -155,7 +155,7 @@ def _test_output(self):
         )
         self.assertTrue(
             compare(self.reference, result, self.atol, self.rtol),
-            "[{}] outputs are miss-matched.".format(type(self).__name__),
+            f"[{type(self).__name__}] outputs are miss-matched.",
         )
         self.assertTrue(
             verify_node_count(program._graph, "fused_gemm_epilogue", 3),
@@ -312,12 +312,12 @@ def setUp(self):
 
         self.fetch = [
             self.loss.name,
-            '{}.w_0@GRAD'.format(multi_layer.linear1.full_name()),
-            '{}.b_0@GRAD'.format(multi_layer.linear1.full_name()),
-            '{}.w_0@GRAD'.format(multi_layer.linear2.full_name()),
-            '{}.b_0@GRAD'.format(multi_layer.linear2.full_name()),
-            '{}.w_0@GRAD'.format(multi_layer.linear3.full_name()),
-            '{}.b_0@GRAD'.format(multi_layer.linear3.full_name()),
+            f'{multi_layer.linear1.full_name()}.w_0@GRAD',
+            f'{multi_layer.linear1.full_name()}.b_0@GRAD',
+            f'{multi_layer.linear2.full_name()}.w_0@GRAD',
+            f'{multi_layer.linear2.full_name()}.b_0@GRAD',
+            f'{multi_layer.linear3.full_name()}.w_0@GRAD',
+            f'{multi_layer.linear3.full_name()}.b_0@GRAD',
         ]
         self.outs_ref = self.exe.run(
             self.main_prog, feed=self.feed, fetch_list=self.fetch
@@ -338,7 +338,7 @@ def _test_output(self):
         for ref, res in zip(self.outs_ref, outs_res):
             self.assertTrue(
                 compare(ref, res, self.atol, self.rtol),
-                "[{}] output is miss-matched.".format(type(self).__name__),
+                f"[{type(self).__name__}] output is miss-matched.",
             )
 
         self.assertTrue(
diff --git a/python/paddle/fluid/tests/unittests/test_fused_dropout_add_op.py b/python/paddle/fluid/tests/unittests/test_fused_dropout_add_op.py
index 792f0b2e877d08..efd1833a713cb2 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_dropout_add_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_dropout_add_op.py
@@ -101,7 +101,7 @@ def setUp(self):
             self.mode = mode
             self.seed = seed
 
-    cls_name = "{0}_{1}_{2}_{3}_{4}_{5}".format(
+    cls_name = "{}_{}_{}_{}_{}_{}".format(
         parent.__name__, dtype, mode, str(training), str(p), str(seed)
     )
     TestFusedDropoutAddCase.__name__ = cls_name
diff --git a/python/paddle/fluid/tests/unittests/test_fused_gate_attention_op.py b/python/paddle/fluid/tests/unittests/test_fused_gate_attention_op.py
index 9830fe455b84ed..70face3ce5b674 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_gate_attention_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_gate_attention_op.py
@@ -311,7 +311,7 @@ def _convert(value):
         if check_equal:
             self.assertTrue(
                 np.equal(_convert(ref), _convert(out)).all(),
-                "Checking < {} > failed!".format(name),
+                f"Checking < {name} > failed!",
             )
         else:
             np.testing.assert_allclose(
@@ -319,7 +319,7 @@ def _convert(value):
                 _convert(out),
                 atol=atol,
                 rtol=rtol,
-                err_msg="Checking < {} > failed!".format(name),
+                err_msg=f"Checking < {name} > failed!",
             )
 
     def check_output_and_grad(self, atol, rtol):
diff --git a/python/paddle/fluid/tests/unittests/test_fusion_repeated_fc_relu_op.py b/python/paddle/fluid/tests/unittests/test_fusion_repeated_fc_relu_op.py
index b96d0b1d179dd5..03cb6bd3b822c2 100644
--- a/python/paddle/fluid/tests/unittests/test_fusion_repeated_fc_relu_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fusion_repeated_fc_relu_op.py
@@ -39,11 +39,11 @@ def setUp(self):
         inp = np.reshape(matrix.input, [self.bs, ics[i]])
         weights.append(
             (
-                'W_{0}'.format(i),
+                f'W_{i}',
                 np.reshape(matrix.weights, [ics[i], self.oc[i]]),
             )
         )
-        biases.append(('B_{0}'.format(i), matrix.bias))
+        biases.append((f'B_{i}', matrix.bias))
         outs.append(
             np.reshape(
                 np.maximum(fc_refer(matrix, True), 0), [self.bs, self.oc[i]]
@@ -56,18 +56,18 @@ def setUp(self):
             out = fc_refer(matrix, True)
             weights.append(
                 (
-                    'W_{0}'.format(i + 1),
+                    f'W_{i + 1}',
                     np.reshape(matrix.weights, [ics[i + 1], self.oc[i + 1]]),
                 )
             )
-            biases.append(('B_{0}'.format(i + 1), matrix.bias))
+            biases.append((f'B_{i + 1}', matrix.bias))
             outs.append(
                 np.reshape(np.maximum(out, 0), [self.bs, self.oc[i + 1]])
             )
 
         relu_outs = []
         for i in range(sz - 1):
-            relu_outs.append(('ReluOut_{0}'.format(i), outs[i]))
+            relu_outs.append((f'ReluOut_{i}', outs[i]))
 
         self.inputs = {
             'X': inp,
diff --git a/python/paddle/fluid/tests/unittests/test_fusion_seqpool_concat_op.py b/python/paddle/fluid/tests/unittests/test_fusion_seqpool_concat_op.py
index 7fee91b8148d9d..56ce20a76c0d44 100644
--- a/python/paddle/fluid/tests/unittests/test_fusion_seqpool_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fusion_seqpool_concat_op.py
@@ -58,7 +58,7 @@ def setUp(self):
                 compute_seqpool_sqrt(x, offset, out)
             else:
                 raise Exception("Unsupported pool type!")
-            inputs.append(('x_{0}'.format(i), (x, lod)))
+            inputs.append((f'x_{i}', (x, lod)))
             outs.append(out)
             i = i + 1
 
@@ -111,8 +111,8 @@ class TestSeqPoolSqrtCase(parent):
         def set_pooltype(self):
             self.pooltype = "SQRT"
 
-    cls_name_avg = "{0}_{1}".format(parent.__name__, "avg")
-    cls_name_sqrt = "{0}_{1}".format(parent.__name__, "sqrt")
+    cls_name_avg = "{}_{}".format(parent.__name__, "avg")
+    cls_name_sqrt = "{}_{}".format(parent.__name__, "sqrt")
     TestSeqPoolAvgCase.__name__ = cls_name_avg
     TestSeqPoolSqrtCase.__name__ = cls_name_sqrt
     globals()[cls_name_avg] = TestSeqPoolAvgCase
diff --git a/python/paddle/fluid/tests/unittests/test_fusion_seqpool_cvm_concat_op.py b/python/paddle/fluid/tests/unittests/test_fusion_seqpool_cvm_concat_op.py
index 3bf59c9df60a6a..a5d2eb5c832786 100644
--- a/python/paddle/fluid/tests/unittests/test_fusion_seqpool_cvm_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fusion_seqpool_cvm_concat_op.py
@@ -65,7 +65,7 @@ def setUp(self):
                 out = cvm_compute(out, self.w, self.use_cvm)
             else:
                 raise Exception("Unsupported pool type!")
-            inputs.append(('x_{0}'.format(i), (x, lod)))
+            inputs.append((f'x_{i}', (x, lod)))
             outs.append(out)
             i = i + 1
 
@@ -118,8 +118,8 @@ class TestSeqPoolSqrtCase(parent):
         def set_pooltype(self):
             self.pooltype = "SQRT"
 
-    cls_name_avg = "{0}_{1}".format(parent.__name__, "avg")
-    cls_name_sqrt = "{0}_{1}".format(parent.__name__, "sqrt")
+    cls_name_avg = "{}_{}".format(parent.__name__, "avg")
+    cls_name_sqrt = "{}_{}".format(parent.__name__, "sqrt")
     TestSeqPoolAvgCase.__name__ = cls_name_avg
     TestSeqPoolSqrtCase.__name__ = cls_name_sqrt
     globals()[cls_name_avg] = TestSeqPoolAvgCase
diff --git a/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py b/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py
index 6cf6eec52cdb37..59db731117766f 100644
--- a/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py
+++ b/python/paddle/fluid/tests/unittests/test_gpu_package_without_gpu_device.py
@@ -48,7 +48,7 @@ def test_import_paddle(self):
 
             _python = sys.executable
 
-            ps_cmd = '{} {}'.format(_python, test_file)
+            ps_cmd = f'{_python} {test_file}'
             ps_proc = subprocess.Popen(
                 ps_cmd.strip().split(" "),
                 stdout=subprocess.PIPE,
diff --git a/python/paddle/fluid/tests/unittests/test_gru_rnn_op.py b/python/paddle/fluid/tests/unittests/test_gru_rnn_op.py
index 98234e1613f597..2aa68dfb16ba64 100644
--- a/python/paddle/fluid/tests/unittests/test_gru_rnn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gru_rnn_op.py
@@ -68,10 +68,10 @@ def get_weight_names(self):
         weight_names = []
         for i in range(self.num_layers):
             for j in range(0, 2 * self.direction_num):
-                weight_names.append("{}.weight_{}".format(i, j))
+                weight_names.append(f"{i}.weight_{j}")
         for i in range(self.num_layers):
             for j in range(0, 2 * self.direction_num):
-                weight_names.append("{}.bias_{}".format(i, j))
+                weight_names.append(f"{i}.bias_{j}")
         return weight_names
 
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
index ccfb83d1b721f7..7b7fd5dadf4da0 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py
@@ -333,7 +333,7 @@ def test_deefcf(self):
                     adam.minimize(loss)
                     deepcf.clear_gradients()
                     dy_loss = loss.numpy()
-                    sys.stderr.write('dynamic loss: %s %s\n' % (slice, dy_loss))
+                    sys.stderr.write(f'dynamic loss: {slice} {dy_loss}\n')
 
         with fluid.dygraph.guard():
             paddle.seed(seed)
@@ -367,9 +367,7 @@ def test_deefcf(self):
                     adam2.minimize(loss2)
                     deepcf2.clear_gradients()
                     dy_loss2 = loss2.numpy()
-                    sys.stderr.write(
-                        'dynamic loss: %s %s\n' % (slice, dy_loss2)
-                    )
+                    sys.stderr.write(f'dynamic loss: {slice} {dy_loss2}\n')
 
         with fluid.dygraph.guard():
             paddle.seed(seed)
@@ -405,9 +403,7 @@ def test_deefcf(self):
                     adam.minimize(loss)
                     deepcf.clear_gradients()
                     eager_loss = loss.numpy()
-                    sys.stderr.write(
-                        'eager loss: %s %s\n' % (slice, eager_loss)
-                    )
+                    sys.stderr.write(f'eager loss: {slice} {eager_loss}\n')
 
         self.assertEqual(static_loss, dy_loss)
         self.assertEqual(static_loss, dy_loss2)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
index e717eeadd340c7..93c2aee3a9d5ce 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_gnn.py
@@ -176,7 +176,7 @@ def test_gnn_float32(self):
         np.testing.assert_allclose(
             static_weight, model2_gc_weight_value, rtol=1e-05
         )
-        sys.stderr.write('%s %s\n' % (static_loss, loss_value))
+        sys.stderr.write(f'{static_loss} {loss_value}\n')
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/test_initializer_nn.py b/python/paddle/fluid/tests/unittests/test_initializer_nn.py
index f90902631c532b..e6fd87c497c668 100644
--- a/python/paddle/fluid/tests/unittests/test_initializer_nn.py
+++ b/python/paddle/fluid/tests/unittests/test_initializer_nn.py
@@ -352,10 +352,10 @@ def test_uniform_initializer_dygraph(self):
 
         min_value, max_value = get_uniform_min_and_max(linear.weight.numpy())
         self.assertTrue(
-            min_value >= -0.5, 'min value {} should >= -0.5'.format(min_value)
+            min_value >= -0.5, f'min value {min_value} should >= -0.5'
         )
         self.assertTrue(
-            max_value <= 0.5, 'max value {} should <= 0.5'.format(max_value)
+            max_value <= 0.5, f'max value {max_value} should <= 0.5'
         )
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_ir_graph.py b/python/paddle/fluid/tests/unittests/test_ir_graph.py
index 0aabb05be7d449..b563024e5bf2a2 100644
--- a/python/paddle/fluid/tests/unittests/test_ir_graph.py
+++ b/python/paddle/fluid/tests/unittests/test_ir_graph.py
@@ -71,7 +71,7 @@ def test_create_op_node(self):
 
     def test_create_control_dep_var(self):
         graph = build_graph()
-        name = "__control_var@{}".format(len(graph.nodes()))
+        name = f"__control_var@{len(graph.nodes())}"
         node = graph.create_control_dep_var()
         self.assertTrue(node.name() == name)
 
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 8af504eaaf9aaf..b68ee02d2074fe 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -1738,9 +1738,7 @@ def make_nce(self):
         words = []
         for i in range(window_size):
             words.append(
-                self._get_data(
-                    name='word_{0}'.format(i), shape=[1], dtype='int64'
-                )
+                self._get_data(name=f'word_{i}', shape=[1], dtype='int64')
             )
 
         dict_size = 10000
diff --git a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
index b432e64620d134..e92f44f5e1b414 100644
--- a/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
+++ b/python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
@@ -252,7 +252,7 @@ def test_NoamDecay(self):
                 self.assertAlmostEqual(
                     right_result,
                     fluid_result[0],
-                    msg='Failed lr scheduler in step {0}, Python result is {1}, Fluid result is {2}'.format(
+                    msg='Failed lr scheduler in step {}, Python result is {}, Fluid result is {}'.format(
                         step, right_result, fluid_result[0]
                     ),
                 )
@@ -309,7 +309,7 @@ def test_MultiStepDecay(self):
                 self.assertAlmostEqual(
                     right_result,
                     fluid_result,
-                    msg='Failed lr scheduler in epoch {0}, Python result is {1}, Fluid result is {2}'.format(
+                    msg='Failed lr scheduler in epoch {}, Python result is {}, Fluid result is {}'.format(
                         epoch, right_result, fluid_result
                     ),
                 )
@@ -347,7 +347,7 @@ def test_StepDecay(self):
                 self.assertAlmostEqual(
                     right_result,
                     fluid_result,
-                    msg='Failed lr scheduler in epoch {0}, Python result is {1}, Fluid result is {2}'.format(
+                    msg='Failed lr scheduler in epoch {}, Python result is {}, Fluid result is {}'.format(
                         epoch, right_result, fluid_result
                     ),
                 )
@@ -376,7 +376,7 @@ def test_LambdaDecay(self):
                 self.assertAlmostEqual(
                     right_result,
                     fluid_result,
-                    msg='Failed lr scheduler in epoch {0}, Python result is {1}, Fluid result is {2}'.format(
+                    msg='Failed lr scheduler in epoch {}, Python result is {}, Fluid result is {}'.format(
                         epoch, right_result, fluid_result
                     ),
                 )
@@ -420,7 +420,7 @@ def check_decay_with_place(
             self.assertAlmostEqual(
                 python_decayed_lr,
                 lr_val[0],
-                msg='Failed lr scheduler is {0}, step {1}, Python result is {2}, Fluid result is {3}'.format(
+                msg='Failed lr scheduler is {}, step {}, Python result is {}, Fluid result is {}'.format(
                     python_decay_fn.__name__,
                     str(step),
                     str(python_decayed_lr),
@@ -527,7 +527,7 @@ def check_decay_with_place(
             self.assertAlmostEqual(
                 python_decayed_lr,
                 lr_val[0],
-                msg='Test {0} Failed, step {1}, Python result is {2}, Fluid result is {3}'.format(
+                msg='Test {} Failed, step {}, Python result is {}, Fluid result is {}'.format(
                     python_decay_fn.__name__,
                     str(step),
                     str(python_decayed_lr),
@@ -562,7 +562,7 @@ def run_scalar_lr(self, place, lr, start_lr, end_lr):
             self.assertAlmostEqual(
                 expected_lr,
                 lr_val[0],
-                msg='Test failed, step {0}, expected {1}, but got {2}'.format(
+                msg='Test failed, step {}, expected {}, but got {}'.format(
                     step, expected_lr, lr_val[0]
                 ),
             )
diff --git a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py
index f81eb531424deb..2e171913915aa5 100644
--- a/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lstm_cudnn_op.py
@@ -404,9 +404,9 @@ class TestCUDNNLstmOp(OpTest):
     def get_weight_names(self):
         weight_names = []
         for i in range(2 * self.num_layers):
-            weight_names.append('weight{}'.format(i))
+            weight_names.append(f'weight{i}')
         for i in range(2 * self.num_layers):
-            weight_names.append('bias{}'.format(i))
+            weight_names.append(f'bias{i}')
         return weight_names
 
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py
index 4f4a607905c84d..a0c41b63b05f2e 100644
--- a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py
@@ -355,7 +355,7 @@ def test_check_grad(self):
                     max_relative_error=max_relative_error,
                 )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Fp16")
+    cls_name = "{}_{}".format(parent.__name__, "Fp16")
     TestMatMulOpFp16Case.__name__ = cls_name
     globals()[cls_name] = TestMatMulOpFp16Case
 
@@ -432,7 +432,7 @@ def test_check_grad_y(self):
         def test_check_grad(self):
             pass
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Bf16")
+    cls_name = "{}_{}".format(parent.__name__, "Bf16")
     TestMatMulOpBf16Case.__name__ = cls_name
     globals()[cls_name] = TestMatMulOpBf16Case
 
diff --git a/python/paddle/fluid/tests/unittests/test_nan_inf.py b/python/paddle/fluid/tests/unittests/test_nan_inf.py
index 139bd8e9d8ebda..0aebff97e250e6 100644
--- a/python/paddle/fluid/tests/unittests/test_nan_inf.py
+++ b/python/paddle/fluid/tests/unittests/test_nan_inf.py
@@ -91,7 +91,7 @@ def get_reference_num_nan_inf(self, x):
         out = np.log(x)
         num_nan = np.sum(np.isnan(out))
         num_inf = np.sum(np.isinf(out))
-        print("[reference] num_nan={}, num_inf={}".format(num_nan, num_inf))
+        print(f"[reference] num_nan={num_nan}, num_inf={num_inf}")
         return num_nan, num_inf
 
     def get_num_nan_inf(self, x_np, use_cuda=True, add_assert=False):
@@ -121,7 +121,7 @@ def get_num_nan_inf(self, x_np, use_cuda=True, add_assert=False):
                     num_nan = int(err_str.split("=")[1])
                 elif "num_inf" in err_str:
                     num_inf = int(err_str.split("=")[1])
-            print("[paddle] num_nan={}, num_inf={}".format(num_nan, num_inf))
+            print(f"[paddle] num_nan={num_nan}, num_inf={num_inf}")
         return num_nan, num_inf
 
     def test_num_nan_inf(self):
diff --git a/python/paddle/fluid/tests/unittests/test_nan_inf_dir.py b/python/paddle/fluid/tests/unittests/test_nan_inf_dir.py
index 49882d192f9f64..6d2fa6a84add86 100644
--- a/python/paddle/fluid/tests/unittests/test_nan_inf_dir.py
+++ b/python/paddle/fluid/tests/unittests/test_nan_inf_dir.py
@@ -35,7 +35,7 @@ def get_reference_num_nan_inf(self, x):
         out = np.log(x)
         num_nan = np.sum(np.isnan(out))
         num_inf = np.sum(np.isinf(out))
-        print("[reference] num_nan={}, num_inf={}".format(num_nan, num_inf))
+        print(f"[reference] num_nan={num_nan}, num_inf={num_inf}")
         return num_nan, num_inf
 
     def get_num_nan_inf(
@@ -73,9 +73,7 @@ def get_num_nan_inf(
                                             num_nan = int(err_str.split("=")[1])
                                         elif "num_inf" in err_str:
                                             num_inf = int(err_str.split("=")[1])
-                print(
-                    "[paddle] num_nan={}, num_inf={}".format(num_nan, num_inf)
-                )
+                print(f"[paddle] num_nan={num_nan}, num_inf={num_inf}")
         return num_nan, num_inf
 
     def test_num_nan_inf(self):
diff --git a/python/paddle/fluid/tests/unittests/test_newprofiler.py b/python/paddle/fluid/tests/unittests/test_newprofiler.py
index 7f2009b1a884ac..19256e7cf6b5b9 100755
--- a/python/paddle/fluid/tests/unittests/test_newprofiler.py
+++ b/python/paddle/fluid/tests/unittests/test_newprofiler.py
@@ -404,7 +404,7 @@ def train(step_num_samples=None):
                 p.step(num_samples=step_num_samples)
                 if i % 10 == 0:
                     step_info = p.step_info()
-                    print("Iter {}: {}".format(i, step_info))
+                    print(f"Iter {i}: {step_info}")
             p.stop()
             return step_info
 
diff --git a/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py b/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py
index f867b75617b41d..f125c1cbe878fb 100644
--- a/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py
@@ -177,7 +177,7 @@ def test_case(self):
                 self.run_dynamic_functional_api(place)
                 self.run_dynamic_broadcast_api(place)
 
-    cls_name = "TestMarginRankLossCase_{}_{}".format(margin, reduction)
+    cls_name = f"TestMarginRankLossCase_{margin}_{reduction}"
     MarginRankingLossCls.__name__ = cls_name
     globals()[cls_name] = MarginRankingLossCls
 
diff --git a/python/paddle/fluid/tests/unittests/test_norm_all.py b/python/paddle/fluid/tests/unittests/test_norm_all.py
index b18b9b49625b4f..75441aa5dfab4b 100644
--- a/python/paddle/fluid/tests/unittests/test_norm_all.py
+++ b/python/paddle/fluid/tests/unittests/test_norm_all.py
@@ -310,7 +310,7 @@ def test_check_grad(self):
                     max_relative_error=max_relative_error,
                 )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Fp16")
+    cls_name = "{}_{}".format(parent.__name__, "Fp16")
     TestPnormFP16Op.__name__ = cls_name
     globals()[cls_name] = TestPnormFP16Op
 
diff --git a/python/paddle/fluid/tests/unittests/test_ops_nms.py b/python/paddle/fluid/tests/unittests/test_ops_nms.py
index 3b44b8ee444ca0..8b95329eb56d39 100644
--- a/python/paddle/fluid/tests/unittests/test_ops_nms.py
+++ b/python/paddle/fluid/tests/unittests/test_ops_nms.py
@@ -110,7 +110,7 @@ def test_nms(self):
                 np.testing.assert_array_equal(
                     out.numpy(),
                     out_py,
-                    err_msg='paddle out: {}\n py out: {}\n'.format(out, out_py),
+                    err_msg=f'paddle out: {out}\n py out: {out_py}\n',
                 )
 
     def test_multiclass_nms_dynamic(self):
@@ -135,7 +135,7 @@ def test_multiclass_nms_dynamic(self):
                 np.testing.assert_array_equal(
                     out.numpy(),
                     out_py,
-                    err_msg='paddle out: {}\n py out: {}\n'.format(out, out_py),
+                    err_msg=f'paddle out: {out}\n py out: {out_py}\n',
                 )
 
     def test_multiclass_nms_static(self):
@@ -186,7 +186,7 @@ def test_multiclass_nms_static(self):
                 np.testing.assert_array_equal(
                     out,
                     out_py,
-                    err_msg='paddle out: {}\n py out: {}\n'.format(out, out_py),
+                    err_msg=f'paddle out: {out}\n py out: {out_py}\n',
                 )
 
     def test_multiclass_nms_dynamic_to_static(self):
diff --git a/python/paddle/fluid/tests/unittests/test_pad3d_op.py b/python/paddle/fluid/tests/unittests/test_pad3d_op.py
index 30c90fc6b29d90..7e6cefcc3132d0 100644
--- a/python/paddle/fluid/tests/unittests/test_pad3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pad3d_op.py
@@ -219,7 +219,7 @@ def test_check_output(self):
         def test_check_grad_normal(self):
             self.check_grad(['X'], 'Out', max_relative_error=1.5e-3)
 
-    cls_name = "{0}_{1}".format(parent.__name__, "FP16OP")
+    cls_name = "{}_{}".format(parent.__name__, "FP16OP")
     TestPad3dFp16.__name__ = cls_name
     globals()[cls_name] = TestPad3dFp16
 
@@ -259,7 +259,7 @@ def test_check_grad_normal(self):
                 place, ['X'], 'Out', max_relative_error=1e-2
             )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "BF16OP")
+    cls_name = "{}_{}".format(parent.__name__, "BF16OP")
     TestPad3dBf16.__name__ = cls_name
     globals()[cls_name] = TestPad3dBf16
 
diff --git a/python/paddle/fluid/tests/unittests/test_pad_op.py b/python/paddle/fluid/tests/unittests/test_pad_op.py
index b3f926d9e08090..bb569686783767 100644
--- a/python/paddle/fluid/tests/unittests/test_pad_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pad_op.py
@@ -98,7 +98,7 @@ def get_dtype(self):
         def test_check_grad_normal(self):
             self.check_grad(['X'], 'Out', max_relative_error=0.3)
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Fp16")
+    cls_name = "{}_{}".format(parent.__name__, "Fp16")
     TestPadFp16.__name__ = cls_name
     globals()[cls_name] = TestPadFp16
 
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
index 9add456a14c403..8f6c9cb7baabad 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
@@ -73,14 +73,14 @@ def start_local_trainers_cpu(
 
         current_env.update(proc_env)
 
-        print("trainer proc env:{}".format(current_env))
+        print(f"trainer proc env:{current_env}")
 
         assert (
             os.getenv('WITH_COVERAGE', 'OFF') == 'OFF'
         ), "Gloo don't support WITH_COVERAGE."
         cmd = "python -u " + training_script
 
-        print("start trainer proc:{} env:{}".format(cmd, proc_env))
+        print(f"start trainer proc:{cmd} env:{proc_env}")
 
         fn = None
 
@@ -129,14 +129,14 @@ def start_local_trainers(
 
         current_env.update(proc_env)
 
-        print("trainer proc env:{}".format(current_env))
+        print(f"trainer proc env:{current_env}")
 
         if os.getenv('WITH_COVERAGE', 'OFF') == 'ON':
             cmd = "python -m coverage run --branch -p " + training_script
         else:
             cmd = "python -u " + training_script
 
-        print("start trainer proc:{} env:{}".format(cmd, proc_env))
+        print(f"start trainer proc:{cmd} env:{proc_env}")
 
         fn = None
 
@@ -183,7 +183,7 @@ def run_mnist_2gpu(
             alive = watch_local_trainers(procs, cluster.trainers_endpoints())
 
             if not alive:
-                print("Local procs complete, POD info:{}".format(pod))
+                print(f"Local procs complete, POD info:{pod}")
                 break
             time.sleep(3)
 
@@ -205,7 +205,7 @@ def run_mnist_2cpu(self, target_file_name):
             alive = watch_local_trainers(procs, cluster.trainers_nranks())
 
             if not alive:
-                print("Local procs complete, POD info:{}".format(pod))
+                print(f"Local procs complete, POD info:{pod}")
                 break
             time.sleep(3)
 
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel_cpuonly.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel_cpuonly.py
index c3f3caa1383c92..b2f78314891001 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel_cpuonly.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel_cpuonly.py
@@ -77,14 +77,14 @@ def start_local_trainers(
 
         current_env.update(proc_env)
 
-        print("trainer proc env:{}".format(current_env))
+        print(f"trainer proc env:{current_env}")
 
         if os.getenv('WITH_COVERAGE', 'OFF') == 'ON':
             cmd = "python -m coverage run --branch -p " + training_script
         else:
             cmd = "python -u " + training_script
 
-        print("start trainer proc:{} env:{}".format(cmd, proc_env))
+        print(f"start trainer proc:{cmd} env:{proc_env}")
 
         fn = None
 
@@ -123,7 +123,7 @@ def run_mnist_2gpu(self, target_file_name):
             alive = watch_local_trainers(procs, cluster.trainers_nranks())
 
             if not alive:
-                print("Local procs complete, POD info:{}".format(pod))
+                print(f"Local procs complete, POD info:{pod}")
                 break
             time.sleep(3)
 
diff --git a/python/paddle/fluid/tests/unittests/test_pixel_unshuffle.py b/python/paddle/fluid/tests/unittests/test_pixel_unshuffle.py
index 9ed49a3196e55a..5d1f9907ecb9e5 100644
--- a/python/paddle/fluid/tests/unittests/test_pixel_unshuffle.py
+++ b/python/paddle/fluid/tests/unittests/test_pixel_unshuffle.py
@@ -251,9 +251,9 @@ def run_dygraph(self, down_factor, data_format):
                 result_functional.numpy(), npresult, rtol=1e-05
             )
 
-            pixel_unshuffle_str = 'downscale_factor={}'.format(down_factor)
+            pixel_unshuffle_str = f'downscale_factor={down_factor}'
             if data_format != 'NCHW':
-                pixel_unshuffle_str += ', data_format={}'.format(data_format)
+                pixel_unshuffle_str += f', data_format={data_format}'
             self.assertEqual(pixel_unshuffle.extra_repr(), pixel_unshuffle_str)
 
     def test_dygraph1(self):
diff --git a/python/paddle/fluid/tests/unittests/test_pool2d_op.py b/python/paddle/fluid/tests/unittests/test_pool2d_op.py
index 7a835f0fb440ea..5ab2bad28e3c3f 100644
--- a/python/paddle/fluid/tests/unittests/test_pool2d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pool2d_op.py
@@ -546,7 +546,7 @@ class TestCUDNNCase(parent):
         def init_kernel_type(self):
             self.use_cudnn = True
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CUDNNOp")
+    cls_name = "{}_{}".format(parent.__name__, "CUDNNOp")
     TestCUDNNCase.__name__ = cls_name
     globals()[cls_name] = TestCUDNNCase
 
@@ -597,7 +597,7 @@ def test_check_grad(self):
                     check_dygraph=(not self.use_mkldnn),
                 )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CUDNNFp16Op")
+    cls_name = "{}_{}".format(parent.__name__, "CUDNNFp16Op")
     TestCUDNNFp16Case.__name__ = cls_name
     globals()[cls_name] = TestCUDNNFp16Case
 
@@ -638,7 +638,7 @@ def test_check_grad(self):
                     check_dygraph=(not self.use_mkldnn),
                 )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Fp16Op")
+    cls_name = "{}_{}".format(parent.__name__, "Fp16Op")
     TestFp16Case.__name__ = cls_name
     globals()[cls_name] = TestFp16Case
 
@@ -671,7 +671,7 @@ def init_kernel_type(self):
         def init_ceil_mode(self):
             self.ceil_mode = True
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CUDNNOpCeilMode")
+    cls_name = "{}_{}".format(parent.__name__, "CUDNNOpCeilMode")
     TestPool2DUseCeilCase.__name__ = cls_name
     globals()[cls_name] = TestPool2DUseCeilCase
 
@@ -685,7 +685,7 @@ class TestPool2DUseCeilCase(parent):
         def init_ceil_mode(self):
             self.ceil_mode = True
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CeilModeCast")
+    cls_name = "{}_{}".format(parent.__name__, "CeilModeCast")
     TestPool2DUseCeilCase.__name__ = cls_name
     globals()[cls_name] = TestPool2DUseCeilCase
 
@@ -1075,7 +1075,7 @@ def init_paddings(self):
             self.paddings = [0, 0]
             self.padding_algorithm = "SAME"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "PaddingSAMEOp")
+    cls_name = "{}_{}".format(parent.__name__, "PaddingSAMEOp")
     TestPaddingSMAECase.__name__ = cls_name
     globals()[cls_name] = TestPaddingSMAECase
 
@@ -1107,7 +1107,7 @@ def init_paddings(self):
             self.paddings = [1, 1]
             self.padding_algorithm = "SAME"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CudnnPaddingSAMEOp")
+    cls_name = "{}_{}".format(parent.__name__, "CudnnPaddingSAMEOp")
     TestCUDNNPaddingSMAECase.__name__ = cls_name
     globals()[cls_name] = TestCUDNNPaddingSMAECase
 
@@ -1133,7 +1133,7 @@ def init_paddings(self):
             self.paddings = [1, 1]
             self.padding_algorithm = "VALID"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "PaddingVALIDOp")
+    cls_name = "{}_{}".format(parent.__name__, "PaddingVALIDOp")
     TestPaddingVALIDCase.__name__ = cls_name
     globals()[cls_name] = TestPaddingVALIDCase
 
@@ -1165,7 +1165,7 @@ def init_paddings(self):
             self.paddings = [1, 1]
             self.padding_algorithm = "VALID"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CudnnPaddingVALIDOp")
+    cls_name = "{}_{}".format(parent.__name__, "CudnnPaddingVALIDOp")
     TestCUDNNPaddingVALIDCase.__name__ = cls_name
     globals()[cls_name] = TestCUDNNPaddingVALIDCase
 
diff --git a/python/paddle/fluid/tests/unittests/test_pool3d_op.py b/python/paddle/fluid/tests/unittests/test_pool3d_op.py
index 6f104a7f557dc9..e1f4d048a4cb49 100644
--- a/python/paddle/fluid/tests/unittests/test_pool3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pool3d_op.py
@@ -510,7 +510,7 @@ class TestCUDNNCase(parent):
         def init_kernel_type(self):
             self.use_cudnn = True
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CUDNNOp")
+    cls_name = "{}_{}".format(parent.__name__, "CUDNNOp")
     TestCUDNNCase.__name__ = cls_name
     globals()[cls_name] = TestCUDNNCase
 
@@ -541,7 +541,7 @@ def test_check_output(self):
                     else:
                         self.check_output_with_place(place, atol=1e-3)
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CUDNNFp16Op")
+    cls_name = "{}_{}".format(parent.__name__, "CUDNNFp16Op")
     TestCUDNNFp16Case.__name__ = cls_name
     globals()[cls_name] = TestCUDNNFp16Case
 
@@ -561,7 +561,7 @@ def test_check_output(self):
                 if core.is_float16_supported(place):
                     self.check_output_with_place(place, atol=1e-2)
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Fp16Op")
+    cls_name = "{}_{}".format(parent.__name__, "Fp16Op")
     TestFp16Case.__name__ = cls_name
     globals()[cls_name] = TestFp16Case
 
@@ -593,7 +593,7 @@ def init_kernel_type(self):
         def init_ceil_mode(self):
             self.ceil_mode = True
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CUDNNOpCeilMode")
+    cls_name = "{}_{}".format(parent.__name__, "CUDNNOpCeilMode")
     TestPool3DUseCeilCase.__name__ = cls_name
     globals()[cls_name] = TestPool3DUseCeilCase
 
@@ -607,7 +607,7 @@ class TestPool3DUseCeilCase(parent):
         def init_ceil_mode(self):
             self.ceil_mode = True
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CeilModeCast")
+    cls_name = "{}_{}".format(parent.__name__, "CeilModeCast")
     TestPool3DUseCeilCase.__name__ = cls_name
     globals()[cls_name] = TestPool3DUseCeilCase
 
@@ -983,7 +983,7 @@ def init_paddings(self):
             self.paddings = [0, 0, 0]
             self.padding_algorithm = "SAME"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "PaddingSAMEOp")
+    cls_name = "{}_{}".format(parent.__name__, "PaddingSAMEOp")
     TestPaddingSMAECase.__name__ = cls_name
     globals()[cls_name] = TestPaddingSMAECase
 
@@ -1015,7 +1015,7 @@ def init_paddings(self):
             self.paddings = [1, 1, 1]
             self.padding_algorithm = "SAME"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CudnnPaddingSAMEOp")
+    cls_name = "{}_{}".format(parent.__name__, "CudnnPaddingSAMEOp")
     TestCUDNNPaddingSMAECase.__name__ = cls_name
     globals()[cls_name] = TestCUDNNPaddingSMAECase
 
@@ -1041,7 +1041,7 @@ def init_paddings(self):
             self.paddings = [1, 1, 1]
             self.padding_algorithm = "VALID"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "PaddingVALIDOp")
+    cls_name = "{}_{}".format(parent.__name__, "PaddingVALIDOp")
     TestPaddingVALIDCase.__name__ = cls_name
     globals()[cls_name] = TestPaddingVALIDCase
 
@@ -1073,7 +1073,7 @@ def init_paddings(self):
             self.paddings = [1, 1, 1]
             self.padding_algorithm = "VALID"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "CudnnPaddingVALIDOp")
+    cls_name = "{}_{}".format(parent.__name__, "CudnnPaddingVALIDOp")
     TestCUDNNPaddingVALIDCase.__name__ = cls_name
     globals()[cls_name] = TestCUDNNPaddingVALIDCase
 
diff --git a/python/paddle/fluid/tests/unittests/test_prelu_op.py b/python/paddle/fluid/tests/unittests/test_prelu_op.py
index 4667be67449ad2..a9d00c42219afd 100644
--- a/python/paddle/fluid/tests/unittests/test_prelu_op.py
+++ b/python/paddle/fluid/tests/unittests/test_prelu_op.py
@@ -400,7 +400,7 @@ def test_check_grad(self):
                     max_relative_error=max_relative_error,
                 )
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Fp16Op")
+    cls_name = "{}_{}".format(parent.__name__, "Fp16Op")
     TestPReluFp16Case.__name__ = cls_name
     globals()[cls_name] = TestPReluFp16Case
 
diff --git a/python/paddle/fluid/tests/unittests/test_ps_dispatcher.py b/python/paddle/fluid/tests/unittests/test_ps_dispatcher.py
index 6308307c870af1..875822e42ba817 100644
--- a/python/paddle/fluid/tests/unittests/test_ps_dispatcher.py
+++ b/python/paddle/fluid/tests/unittests/test_ps_dispatcher.py
@@ -41,7 +41,7 @@ def test_base(self):
     def test_hash(self):
         class Var:
             def __init__(self, index):
-                self._name = "var_{}".format(index)
+                self._name = f"var_{index}"
 
             def name(self):
                 return self._name
@@ -59,7 +59,7 @@ def name(self):
     def test_round_rodin(self):
         class Var:
             def __init__(self, index):
-                self._name = "var_{}".format(index)
+                self._name = f"var_{index}"
 
             def name(self):
                 return self._name
diff --git a/python/paddle/fluid/tests/unittests/test_py_func_op.py b/python/paddle/fluid/tests/unittests/test_py_func_op.py
index 173726c243f100..64b0b044ef6bf5 100644
--- a/python/paddle/fluid/tests/unittests/test_py_func_op.py
+++ b/python/paddle/fluid/tests/unittests/test_py_func_op.py
@@ -89,7 +89,7 @@ def simple_fc_net(img, label, use_py_func_op):
                 fluid.default_main_program()
                 .current_block()
                 .create_var(
-                    name='hidden_{}'.format(idx),
+                    name=f'hidden_{idx}',
                     dtype='float32',
                     shape=hidden.shape,
                 )
diff --git a/python/paddle/fluid/tests/unittests/test_retain_graph.py b/python/paddle/fluid/tests/unittests/test_retain_graph.py
index df806739e06c50..03d45ec5b84578 100644
--- a/python/paddle/fluid/tests/unittests/test_retain_graph.py
+++ b/python/paddle/fluid/tests/unittests/test_retain_graph.py
@@ -74,7 +74,7 @@ def cal_gradient_penalty(
                 alpha = paddle.reshape(alpha, real_data.shape)
                 interpolatesv = alpha * real_data + ((1 - alpha) * fake_data)
             else:
-                raise NotImplementedError('{} not implemented'.format(type))
+                raise NotImplementedError(f'{type} not implemented')
             interpolatesv.stop_gradient = False
             real_data.stop_gradient = True
             fake_AB = paddle.concat((real_data.detach(), interpolatesv), 1)
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_op.py b/python/paddle/fluid/tests/unittests/test_rnn_op.py
index 73ef33816534d9..e8c25f4b178ba3 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_op.py
@@ -68,10 +68,10 @@ def get_weight_names(self):
         weight_names = []
         for i in range(self.num_layers):
             for j in range(0, 2 * self.direction_num):
-                weight_names.append("{}.weight_{}".format(i, j))
+                weight_names.append(f"{i}.weight_{j}")
         for i in range(self.num_layers):
             for j in range(0, 2 * self.direction_num):
-                weight_names.append("{}.bias_{}".format(i, j))
+                weight_names.append(f"{i}.bias_{j}")
         return weight_names
 
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_run.py b/python/paddle/fluid/tests/unittests/test_run.py
index da95f190c5fd64..f692e2598fdfd2 100644
--- a/python/paddle/fluid/tests/unittests/test_run.py
+++ b/python/paddle/fluid/tests/unittests/test_run.py
@@ -77,7 +77,7 @@ def pdrun(self, args, env=None):
 
     def test_collective_1(self):
         log_dir = tempfile.TemporaryDirectory()
-        args = "--job_id test1 --log_dir {}".format(log_dir.name)
+        args = f"--job_id test1 --log_dir {log_dir.name}"
         p = self.pdrun(args)
         p.wait()
         self.assertTrue(p.poll() == 0)
@@ -134,7 +134,7 @@ def pdrun(self, args, env=None):
 
     def test_ps_1(self):
         log_dir = tempfile.TemporaryDirectory()
-        args = "--run_mode ps --log_dir {}".format(log_dir.name)
+        args = f"--run_mode ps --log_dir {log_dir.name}"
         p = self.pdrun(args)
         p.wait()
         self.assertTrue(p.poll() == 0)
diff --git a/python/paddle/fluid/tests/unittests/test_sample_logits_op.py b/python/paddle/fluid/tests/unittests/test_sample_logits_op.py
index bec958353a4b35..809a6579fce465 100644
--- a/python/paddle/fluid/tests/unittests/test_sample_logits_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sample_logits_op.py
@@ -73,7 +73,7 @@ def test_check_output(self):
 
             assert (
                 Samples.dtype == np.int64
-            ), "Samples dtype is {}, not int64".format(Samples.dtype)
+            ), f"Samples dtype is {Samples.dtype}, not int64"
             assert (
                 Probabilities.dtype == np.float64
             ), "Probabilities dtype is {}, not float64".format(
diff --git a/python/paddle/fluid/tests/unittests/test_set_value_op.py b/python/paddle/fluid/tests/unittests/test_set_value_op.py
index b33f562d018906..c03b249a03d816 100644
--- a/python/paddle/fluid/tests/unittests/test_set_value_op.py
+++ b/python/paddle/fluid/tests/unittests/test_set_value_op.py
@@ -498,7 +498,7 @@ def set_value(self):
         def set_dtype(self):
             self.dtype = "int32"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ValueInt32")
+    cls_name = "{}_{}".format(parent.__name__, "ValueInt32")
     TestValueInt.__name__ = cls_name
     globals()[cls_name] = TestValueInt
 
@@ -518,7 +518,7 @@ def set_value(self):
         def set_dtype(self):
             self.dtype = "int64"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ValueInt64")
+    cls_name = "{}_{}".format(parent.__name__, "ValueInt64")
     TestValueInt.__name__ = cls_name
     globals()[cls_name] = TestValueInt
 
@@ -538,7 +538,7 @@ def set_value(self):
         def set_dtype(self):
             self.dtype = "float16"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Valuefp16")
+    cls_name = "{}_{}".format(parent.__name__, "Valuefp16")
     TestValueInt.__name__ = cls_name
     globals()[cls_name] = TestValueInt
 
@@ -558,7 +558,7 @@ def set_value(self):
         def set_dtype(self):
             self.dtype = "float32"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ValueFp32")
+    cls_name = "{}_{}".format(parent.__name__, "ValueFp32")
     TestValueInt.__name__ = cls_name
     globals()[cls_name] = TestValueInt
 
@@ -578,7 +578,7 @@ def set_value(self):
         def set_dtype(self):
             self.dtype = "float64"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ValueFp64")
+    cls_name = "{}_{}".format(parent.__name__, "ValueFp64")
     TestValueInt.__name__ = cls_name
     globals()[cls_name] = TestValueInt
 
@@ -598,7 +598,7 @@ def set_value(self):
         def set_dtype(self):
             self.dtype = "bool"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ValueBool")
+    cls_name = "{}_{}".format(parent.__name__, "ValueBool")
     TestValueInt.__name__ = cls_name
     globals()[cls_name] = TestValueInt
 
@@ -619,7 +619,7 @@ def set_value(self):
         def set_dtype(self):
             self.dtype = "int32"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ValueNumpyInt32")
+    cls_name = "{}_{}".format(parent.__name__, "ValueNumpyInt32")
     TestValueInt.__name__ = cls_name
     globals()[cls_name] = TestValueInt
 
@@ -639,7 +639,7 @@ def set_value(self):
         def set_dtype(self):
             self.dtype = "int64"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ValueNumpyInt64")
+    cls_name = "{}_{}".format(parent.__name__, "ValueNumpyInt64")
     TestValueInt.__name__ = cls_name
     globals()[cls_name] = TestValueInt
 
@@ -659,7 +659,7 @@ def set_value(self):
         def set_dtype(self):
             self.dtype = "float32"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ValueNumpyFp32")
+    cls_name = "{}_{}".format(parent.__name__, "ValueNumpyFp32")
     TestValueInt.__name__ = cls_name
     globals()[cls_name] = TestValueInt
 
@@ -679,7 +679,7 @@ def set_value(self):
         def set_dtype(self):
             self.dtype = "float64"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ValueNumpyFp64")
+    cls_name = "{}_{}".format(parent.__name__, "ValueNumpyFp64")
     TestValueInt.__name__ = cls_name
     globals()[cls_name] = TestValueInt
 
@@ -699,7 +699,7 @@ def set_value(self):
         def set_dtype(self):
             self.dtype = "bool"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ValueNumpyBool")
+    cls_name = "{}_{}".format(parent.__name__, "ValueNumpyBool")
     TestValueInt.__name__ = cls_name
     globals()[cls_name] = TestValueInt
 
@@ -724,7 +724,7 @@ def _call_setitem(self, x):
         def _get_answer(self):
             self.data[0, 1] = 3
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ValueTensorInt32")
+    cls_name = "{}_{}".format(parent.__name__, "ValueTensorInt32")
     TestValueInt.__name__ = cls_name
     globals()[cls_name] = TestValueInt
 
@@ -748,7 +748,7 @@ def _call_setitem(self, x):
         def _get_answer(self):
             self.data[0, 1] = 3
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ValueTensorInt64")
+    cls_name = "{}_{}".format(parent.__name__, "ValueTensorInt64")
     TestValueInt.__name__ = cls_name
     globals()[cls_name] = TestValueInt
 
@@ -772,7 +772,7 @@ def _call_setitem(self, x):
         def _get_answer(self):
             self.data[0, 1] = 3
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ValueTensorFp32")
+    cls_name = "{}_{}".format(parent.__name__, "ValueTensorFp32")
     TestValueInt.__name__ = cls_name
     globals()[cls_name] = TestValueInt
 
@@ -796,7 +796,7 @@ def _call_setitem(self, x):
         def _get_answer(self):
             self.data[0, 1] = 3
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ValueTensorFp64")
+    cls_name = "{}_{}".format(parent.__name__, "ValueTensorFp64")
     TestValueInt.__name__ = cls_name
     globals()[cls_name] = TestValueInt
 
@@ -820,7 +820,7 @@ def _call_setitem(self, x):
         def _get_answer(self):
             self.data[0, 1] = False
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ValueTensorBool")
+    cls_name = "{}_{}".format(parent.__name__, "ValueTensorBool")
     TestValueInt.__name__ = cls_name
     globals()[cls_name] = TestValueInt
 
diff --git a/python/paddle/fluid/tests/unittests/test_signal.py b/python/paddle/fluid/tests/unittests/test_signal.py
index 19a0dd433ce2ce..749924dba5dca3 100644
--- a/python/paddle/fluid/tests/unittests/test_signal.py
+++ b/python/paddle/fluid/tests/unittests/test_signal.py
@@ -78,7 +78,7 @@ def normalize(S, norm=np.inf, axis=0, threshold=None, fill=None):
         )
 
     if fill not in [None, False, True]:
-        raise Exception("fill={} must be None or boolean".format(fill))
+        raise Exception(f"fill={fill} must be None or boolean")
 
     if not np.all(np.isfinite(S)):
         raise Exception("Input must be finite")
@@ -113,7 +113,7 @@ def normalize(S, norm=np.inf, axis=0, threshold=None, fill=None):
         return S
 
     else:
-        raise Exception("Unsupported norm: {}".format(repr(norm)))
+        raise Exception(f"Unsupported norm: {repr(norm)}")
 
     # indices where norm is below the threshold
     small_idx = length < threshold
@@ -224,7 +224,7 @@ def frame(x, frame_length, hop_length, axis=-1):
         )
 
     if hop_length < 1:
-        raise Exception("Invalid hop_length: {:d}".format(hop_length))
+        raise Exception(f"Invalid hop_length: {hop_length:d}")
 
     if axis == -1 and not x.flags["F_CONTIGUOUS"]:
         print(
@@ -257,7 +257,7 @@ def frame(x, frame_length, hop_length, axis=-1):
         strides = [hop_length * new_stride] + list(strides)
 
     else:
-        raise Exception("Frame axis={} must be either 0 or -1".format(axis))
+        raise Exception(f"Frame axis={axis} must be either 0 or -1")
 
     return as_strided(x, shape=shape, strides=strides)
 
@@ -299,7 +299,7 @@ def get_window(window, Nx, fftbins=True):
             "Window size mismatch: " "{:d} != {:d}".format(len(window), Nx)
         )
     else:
-        raise Exception("Invalid window specification: {}".format(window))
+        raise Exception(f"Invalid window specification: {window}")
 
 
 def __overlap_add(y, ytmp, hop_length):
@@ -512,7 +512,7 @@ def frame_for_api_test(x, frame_length, hop_length, axis=-1):
         strides = [hop_length * x.itemsize] + list(strides)
 
     else:
-        raise ValueError("Frame axis={} must be either 0 or -1".format(axis))
+        raise ValueError(f"Frame axis={axis} must be either 0 or -1")
 
     return as_strided(x, shape=shape, strides=strides)
 
diff --git a/python/paddle/fluid/tests/unittests/test_simple_rnn_op.py b/python/paddle/fluid/tests/unittests/test_simple_rnn_op.py
index b0901005204500..758c67036b002c 100644
--- a/python/paddle/fluid/tests/unittests/test_simple_rnn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_simple_rnn_op.py
@@ -68,10 +68,10 @@ def get_weight_names(self):
         weight_names = []
         for i in range(self.num_layers):
             for j in range(0, 2 * self.direction_num):
-                weight_names.append("{}.weight_{}".format(i, j))
+                weight_names.append(f"{i}.weight_{j}")
         for i in range(self.num_layers):
             for j in range(0, 2 * self.direction_num):
-                weight_names.append("{}.bias_{}".format(i, j))
+                weight_names.append(f"{i}.bias_{j}")
         return weight_names
 
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_split_op.py b/python/paddle/fluid/tests/unittests/test_split_op.py
index f0929e5758d8fb..18cfbe59fe83d5 100644
--- a/python/paddle/fluid/tests/unittests/test_split_op.py
+++ b/python/paddle/fluid/tests/unittests/test_split_op.py
@@ -248,7 +248,7 @@ def get_dtype(self):
         def test_check_grad(self):
             pass
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Fp16")
+    cls_name = "{}_{}".format(parent.__name__, "Fp16")
     TestSplitFp16.__name__ = cls_name
     globals()[cls_name] = TestSplitFp16
 
@@ -273,7 +273,7 @@ def test_check_output(self):
         def test_check_grad(self):
             pass
 
-    cls_name = "{0}_{1}".format(parent.__name__, "Bf16")
+    cls_name = "{}_{}".format(parent.__name__, "Bf16")
     TestSplitBf16.__name__ = cls_name
     globals()[cls_name] = TestSplitBf16
 
diff --git a/python/paddle/fluid/tests/unittests/test_split_program.py b/python/paddle/fluid/tests/unittests/test_split_program.py
index 3d09c6e8c77011..cf14c0dd2ad2da 100644
--- a/python/paddle/fluid/tests/unittests/test_split_program.py
+++ b/python/paddle/fluid/tests/unittests/test_split_program.py
@@ -69,7 +69,7 @@ def test_split_program(self):
                 np.testing.assert_array_equal(
                     actual,
                     expected,
-                    err_msg='{}\n{}\n'.format(actual, expected),
+                    err_msg=f'{actual}\n{expected}\n',
                 )
 
     def get_places(self):
diff --git a/python/paddle/fluid/tests/unittests/test_stack_op.py b/python/paddle/fluid/tests/unittests/test_stack_op.py
index f2ec5041450a76..d2411dda4b95a4 100644
--- a/python/paddle/fluid/tests/unittests/test_stack_op.py
+++ b/python/paddle/fluid/tests/unittests/test_stack_op.py
@@ -37,7 +37,7 @@ def initParameters(self):
     def get_x_names(self):
         x_names = []
         for i in range(self.num_inputs):
-            x_names.append('x{}'.format(i))
+            x_names.append(f'x{i}')
         return x_names
 
     def setUp(self):
@@ -118,7 +118,7 @@ def initParameters(self):
     def get_x_names(self):
         x_names = []
         for i in range(self.num_inputs):
-            x_names.append('x{}'.format(i))
+            x_names.append(f'x{i}')
         return x_names
 
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_stft_op.py b/python/paddle/fluid/tests/unittests/test_stft_op.py
index 6addfc1d61abe3..44c01689d3a2f7 100644
--- a/python/paddle/fluid/tests/unittests/test_stft_op.py
+++ b/python/paddle/fluid/tests/unittests/test_stft_op.py
@@ -39,7 +39,7 @@ def frame_from_librosa(x, frame_length, hop_length, axis=-1):
         strides = [hop_length * x.itemsize] + list(strides)
 
     else:
-        raise ValueError("Frame axis={} must be either 0 or -1".format(axis))
+        raise ValueError(f"Frame axis={axis} must be either 0 or -1")
 
     return as_strided(x, shape=shape, strides=strides)
 
diff --git a/python/paddle/fluid/tests/unittests/test_strided_slice_op.py b/python/paddle/fluid/tests/unittests/test_strided_slice_op.py
index de5a1bcb19a0f9..117321dbd3ed5b 100644
--- a/python/paddle/fluid/tests/unittests/test_strided_slice_op.py
+++ b/python/paddle/fluid/tests/unittests/test_strided_slice_op.py
@@ -721,14 +721,14 @@ def is_grads_equal(self, g1, g2):
 
             self.assertTrue(
                 self.grad_equal(g, g2[i]),
-                msg="gradient_1:\n{} \ngradient_2:\n{}".format(g, g2),
+                msg=f"gradient_1:\n{g} \ngradient_2:\n{g2}",
             )
 
     def is_grads_equal_zeros(self, grads):
         for g in grads:
             self.assertTrue(
                 self.grad_equal(np.zeros_like(g), g),
-                msg="The gradient should be zeros, but received \n{}".format(g),
+                msg=f"The gradient should be zeros, but received \n{g}",
             )
 
     def create_case(self, net):
diff --git a/python/paddle/fluid/tests/unittests/test_sum_op.py b/python/paddle/fluid/tests/unittests/test_sum_op.py
index 7845434133db76..5cdf85fd38bab2 100644
--- a/python/paddle/fluid/tests/unittests/test_sum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sum_op.py
@@ -321,7 +321,7 @@ def test_w_is_selected_rows(self):
                 for inplace in [True, False]:
                     self.check_with_place(place, inplace)
 
-    cls_name = "{0}_{1}".format(parent.__name__, "SumFp16Test")
+    cls_name = "{}_{}".format(parent.__name__, "SumFp16Test")
     TestSumFp16Case.__name__ = cls_name
     globals()[cls_name] = TestSumFp16Case
 
diff --git a/python/paddle/fluid/tests/unittests/test_switch_case.py b/python/paddle/fluid/tests/unittests/test_switch_case.py
index e3d93b149b37a0..d16c48540549a2 100644
--- a/python/paddle/fluid/tests/unittests/test_switch_case.py
+++ b/python/paddle/fluid/tests/unittests/test_switch_case.py
@@ -99,31 +99,31 @@ def fn_3():
                 res[0],
                 1,
                 rtol=1e-05,
-                err_msg='result is {} but answer is {}'.format(res[0], 1),
+                err_msg=f'result is {res[0]} but answer is {1}',
             )
             np.testing.assert_allclose(
                 res[1],
                 2,
                 rtol=1e-05,
-                err_msg='result is {} but answer is {}'.format(res[1], 2),
+                err_msg=f'result is {res[1]} but answer is {2}',
             )
             np.testing.assert_allclose(
                 res[2],
                 3,
                 rtol=1e-05,
-                err_msg='result is {} but answer is {}'.format(res[2], 3),
+                err_msg=f'result is {res[2]} but answer is {3}',
             )
             np.testing.assert_allclose(
                 res[3],
                 2,
                 rtol=1e-05,
-                err_msg='result is {} but answer is {}'.format(res[3], 2),
+                err_msg=f'result is {res[3]} but answer is {2}',
             )
             np.testing.assert_allclose(
                 res[4],
                 2,
                 rtol=1e-05,
-                err_msg='result is {} but answer is {}'.format(res[4], 2),
+                err_msg=f'result is {res[4]} but answer is {2}',
             )
 
     def test_0d_tensor(self):
@@ -186,35 +186,35 @@ def fn_3():
                 res[0],
                 1,
                 rtol=1e-05,
-                err_msg='result is {} but answer is {}'.format(res[0], 1),
+                err_msg=f'result is {res[0]} but answer is {1}',
             )
             self.assertEqual(res[0].shape, ())
             np.testing.assert_allclose(
                 res[1],
                 2,
                 rtol=1e-05,
-                err_msg='result is {} but answer is {}'.format(res[1], 2),
+                err_msg=f'result is {res[1]} but answer is {2}',
             )
             self.assertEqual(res[1].shape, ())
             np.testing.assert_allclose(
                 res[2],
                 3,
                 rtol=1e-05,
-                err_msg='result is {} but answer is {}'.format(res[2], 3),
+                err_msg=f'result is {res[2]} but answer is {3}',
             )
             self.assertEqual(res[2].shape, ())
             np.testing.assert_allclose(
                 res[3],
                 2,
                 rtol=1e-05,
-                err_msg='result is {} but answer is {}'.format(res[3], 2),
+                err_msg=f'result is {res[3]} but answer is {2}',
             )
             self.assertEqual(res[3].shape, ())
             np.testing.assert_allclose(
                 res[4],
                 2,
                 rtol=1e-05,
-                err_msg='result is {} but answer is {}'.format(res[4], 2),
+                err_msg=f'result is {res[4]} but answer is {2}',
             )
             self.assertEqual(res[4].shape, ())
 
@@ -297,35 +297,35 @@ def fn_3():
             out_0,
             1,
             rtol=1e-05,
-            err_msg='result is {} but answer is {}'.format(out_0, 1),
+            err_msg=f'result is {out_0} but answer is {1}',
         )
         self.assertEqual(out_0.shape, [])
         np.testing.assert_allclose(
             out_1,
             2,
             rtol=1e-05,
-            err_msg='result is {} but answer is {}'.format(out_1, 2),
+            err_msg=f'result is {out_1} but answer is {2}',
         )
         self.assertEqual(out_1.shape, [])
         np.testing.assert_allclose(
             out_2,
             3,
             rtol=1e-05,
-            err_msg='result is {} but answer is {}'.format(out_2, 3),
+            err_msg=f'result is {out_2} but answer is {3}',
         )
         self.assertEqual(out_2.shape, [])
         np.testing.assert_allclose(
             out_3,
             2,
             rtol=1e-05,
-            err_msg='result is {} but answer is {}'.format(out_3, 2),
+            err_msg=f'result is {out_3} but answer is {2}',
         )
         self.assertEqual(out_3.shape, [])
         np.testing.assert_allclose(
             out_4,
             2,
             rtol=1e-05,
-            err_msg='result is {} but answer is {}'.format(out_4, 2),
+            err_msg=f'result is {out_4} but answer is {2}',
         )
         self.assertEqual(out_4.shape, [])
 
@@ -479,19 +479,19 @@ def fn_3():
                 res[0],
                 1,
                 rtol=1e-05,
-                err_msg='result is {} but answer is {}'.format(res[0], 1),
+                err_msg=f'result is {res[0]} but answer is {1}',
             )
             np.testing.assert_allclose(
                 res[1],
                 2,
                 rtol=1e-05,
-                err_msg='result is {} but answer is {}'.format(res[1], 2),
+                err_msg=f'result is {res[1]} but answer is {2}',
             )
             np.testing.assert_allclose(
                 res[2],
                 3,
                 rtol=1e-05,
-                err_msg='result is {} but answer is {}'.format(res[2], 3),
+                err_msg=f'result is {res[2]} but answer is {3}',
             )
 
     def test_nested_switch_0d_tensor(self):
@@ -576,21 +576,21 @@ def fn_3():
                 res[0],
                 1,
                 rtol=1e-05,
-                err_msg='result is {} but answer is {}'.format(res[0], 1),
+                err_msg=f'result is {res[0]} but answer is {1}',
             )
             self.assertEqual(res[0].shape, ())
             np.testing.assert_allclose(
                 res[1],
                 2,
                 rtol=1e-05,
-                err_msg='result is {} but answer is {}'.format(res[1], 2),
+                err_msg=f'result is {res[1]} but answer is {2}',
             )
             self.assertEqual(res[1].shape, ())
             np.testing.assert_allclose(
                 res[2],
                 3,
                 rtol=1e-05,
-                err_msg='result is {} but answer is {}'.format(res[2], 3),
+                err_msg=f'result is {res[2]} but answer is {3}',
             )
             self.assertEqual(res[2].shape, ())
 
diff --git a/python/paddle/fluid/tests/unittests/test_tril_triu_op.py b/python/paddle/fluid/tests/unittests/test_tril_triu_op.py
index 3da2c7b7399415..aef72df282600d 100644
--- a/python/paddle/fluid/tests/unittests/test_tril_triu_op.py
+++ b/python/paddle/fluid/tests/unittests/test_tril_triu_op.py
@@ -61,7 +61,7 @@ def case_generator(op_type, Xshape, diagonal, expected):
     If arg`expercted` is 'success', it will register an Optest case and expect to pass.
     Otherwise, it will register an API case and check the expect failure.
     """
-    cls_name = "{0}_{1}_shape_{2}_diag_{3}".format(
+    cls_name = "{}_{}_shape_{}_diag_{}".format(
         expected, op_type, Xshape, diagonal
     )
     errmsg = {
diff --git a/python/paddle/fluid/tests/unittests/test_unstack_op.py b/python/paddle/fluid/tests/unittests/test_unstack_op.py
index 9942b3193b1599..34c6950d7f1d8c 100755
--- a/python/paddle/fluid/tests/unittests/test_unstack_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unstack_op.py
@@ -32,7 +32,7 @@ def initParameters(self):
     def get_y_names(self):
         y_names = []
         for i in range(self.input_dim[self.axis]):
-            y_names.append('y{}'.format(i))
+            y_names.append(f'y{i}')
         return y_names
 
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_variable.py b/python/paddle/fluid/tests/unittests/test_variable.py
index c26053a734297d..b709510371edf5 100644
--- a/python/paddle/fluid/tests/unittests/test_variable.py
+++ b/python/paddle/fluid/tests/unittests/test_variable.py
@@ -753,7 +753,7 @@ def test_static_graph_list_index_muti_dim(self):
             np.testing.assert_array_equal(
                 y2,
                 getitem_pp[0],
-                err_msg='\n numpy:{},\n paddle:{}'.format(y2, getitem_pp[0]),
+                err_msg=f'\n numpy:{y2},\n paddle:{getitem_pp[0]}',
             )
 
     def test_dygraph_list_index_muti_dim(self):
@@ -1216,7 +1216,7 @@ def test_dygraph_array_index_muti_dim(self):
             np.testing.assert_array_equal(
                 y_t1.numpy(),
                 y_np1,
-                err_msg='\n numpy:{},\n paddle:{}'.format(y_np1, y_t1.numpy()),
+                err_msg=f'\n numpy:{y_np1},\n paddle:{y_t1.numpy()}',
             )
             # 1 dim getitem
             array2 = array.copy()
@@ -1227,7 +1227,7 @@ def test_dygraph_array_index_muti_dim(self):
             np.testing.assert_array_equal(
                 y_t2.numpy(),
                 y_np2,
-                err_msg='\n numpy:{},\n paddle:{}'.format(y_np2, y_t2.numpy()),
+                err_msg=f'\n numpy:{y_np2},\n paddle:{y_t2.numpy()}',
             )
 
             # 2 dim setitem
diff --git a/python/paddle/fluid/tests/unittests/testsuite.py b/python/paddle/fluid/tests/unittests/testsuite.py
index 7db80c08eacffe..5ae73e386bf938 100644
--- a/python/paddle/fluid/tests/unittests/testsuite.py
+++ b/python/paddle/fluid/tests/unittests/testsuite.py
@@ -105,7 +105,7 @@ def create_var(block, name, np_list, var_proto, is_calc_ref=False):
         shape = None
         lod_level = None
         if name not in np_list:
-            assert var_proto.intermediate, "{} not found".format(name)
+            assert var_proto.intermediate, f"{name} not found"
         else:
             # inferece the dtype from numpy value.
             np_value = np_list[name]
@@ -134,11 +134,11 @@ def create_var(block, name, np_list, var_proto, is_calc_ref=False):
         if is_input:
             assert (var_name in np_list) or (
                 var_proto.dispensable
-            ), "Missing {} as input".format(var_name)
+            ), f"Missing {var_name} as input"
         if var_proto.duplicable:
             assert isinstance(
                 np_list[var_name], list
-            ), "Duplicable {} should be set as list".format(var_name)
+            ), f"Duplicable {var_name} should be set as list"
             var_list = []
             for (name, np_value) in np_list[var_name]:
                 var_list.append(
diff --git a/python/paddle/fluid/tests/unittests/tokenizer/tokenizer_utils.py b/python/paddle/fluid/tests/unittests/tokenizer/tokenizer_utils.py
index 2280292670316b..5b779b93a19dfc 100644
--- a/python/paddle/fluid/tests/unittests/tokenizer/tokenizer_utils.py
+++ b/python/paddle/fluid/tests/unittests/tokenizer/tokenizer_utils.py
@@ -487,9 +487,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
                 print("Already cached %s" % path)
                 resolved_vocab_files[file_id] = path
             else:
-                print(
-                    "Downloading %s and saved to %s" % (file_path, default_root)
-                )
+                print(f"Downloading {file_path} and saved to {default_root}")
                 try:
                     resolved_vocab_files[file_id] = get_path_from_url(
                         file_path, default_root
diff --git a/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py b/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py
index 7562a340d001dd..4ef72587a1e20f 100644
--- a/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py
+++ b/python/paddle/fluid/tests/unittests/xpu/get_test_cover_info.py
@@ -281,7 +281,7 @@ def create_test_class(
         if test_class[0] == '__class__':
             continue
         class_obj = test_class[1]
-        cls_name = "{0}_{1}".format(test_class[0], str(test_type))
+        cls_name = f"{test_class[0]}_{str(test_type)}"
         func_globals[cls_name] = type(
             cls_name,
             (class_obj,),
@@ -298,7 +298,7 @@ def create_test_class(
     ):
         base_class, dynamic_classes = test_class_obj.dynamic_create_class()
         for dy_class in dynamic_classes:
-            cls_name = "{0}_{1}".format(dy_class[0], str(test_type))
+            cls_name = f"{dy_class[0]}_{str(test_type)}"
             attr_dict = dy_class[1]
             attr_dict['in_type'] = type_dict_str_to_numpy[test_type]
             attr_dict['in_type_str'] = test_type
@@ -329,10 +329,10 @@ def get_test_cover_info():
     diff_list = list(set(xpu_op_list).difference(set(xpu_op_covered)))
     total_len = len(set(xpu_op_list))
     covered_len = len(set(xpu_op_covered))
-    print('{} test: {}/{}'.format(version_str, covered_len, total_len))
+    print(f'{version_str} test: {covered_len}/{total_len}')
     if len(diff_list) != 0:
         print(
-            "These ops need to be tested on {0}! ops:{1}".format(
+            "These ops need to be tested on {}! ops:{}".format(
                 version_str, ','.join(diff_list)
             )
         )
diff --git a/python/paddle/fluid/tests/unittests/xpu/process_group_bkcl.py b/python/paddle/fluid/tests/unittests/xpu/process_group_bkcl.py
index b5a0655ad9ca1d..9c9b88862feab1 100644
--- a/python/paddle/fluid/tests/unittests/xpu/process_group_bkcl.py
+++ b/python/paddle/fluid/tests/unittests/xpu/process_group_bkcl.py
@@ -48,9 +48,9 @@ def test_create_process_group_bkcl(self):
 
         pg = init_process_group()
         sys.stdout.write(
-            "rank {}: size {} name {}\n".format(pg.rank(), pg.size(), pg.name())
+            f"rank {pg.rank()}: size {pg.size()} name {pg.name()}\n"
         )
-        sys.stdout.write("rank {}: test new group api ok\n".format(pg.rank()))
+        sys.stdout.write(f"rank {pg.rank()}: test new group api ok\n")
 
         # TODO(zhangxiaoci) allreduce unittest raise error
         # test allreduce sum
@@ -92,7 +92,7 @@ def test_create_process_group_bkcl(self):
             paddle.device.xpu.synchronize()
             assert np.array_equal(broadcast_result, tensor_y)
 
-        sys.stdout.write("rank {}: test broadcast api ok\n".format(pg.rank()))
+        sys.stdout.write(f"rank {pg.rank()}: test broadcast api ok\n")
 
         # test barrier
         # rank 0
@@ -103,7 +103,7 @@ def test_create_process_group_bkcl(self):
             task = pg.barrier(device_id)
             task.wait()
 
-        sys.stdout.write("rank {}: test barrier api ok\n".format(pg.rank()))
+        sys.stdout.write(f"rank {pg.rank()}: test barrier api ok\n")
 
         # test allgather
         # rank 0
@@ -134,7 +134,7 @@ def test_create_process_group_bkcl(self):
         )
         assert np.array_equal(tensor_x, out_1)
         assert np.array_equal(tensor_y, out_2)
-        sys.stdout.write("rank {}: test allgather api ok\n".format(pg.rank()))
+        sys.stdout.write(f"rank {pg.rank()}: test allgather api ok\n")
 
         if pg.rank() == 0:
             task = pg.all_gather(tensor_x, tensor_out)
@@ -152,7 +152,7 @@ def test_create_process_group_bkcl(self):
         )
         assert np.array_equal(tensor_x, out_1)
         assert np.array_equal(tensor_y, out_2)
-        sys.stdout.write("rank {}: test allgather api2 ok\n".format(pg.rank()))
+        sys.stdout.write(f"rank {pg.rank()}: test allgather api2 ok\n")
 
         # test Reduce
         # rank 0
@@ -173,7 +173,7 @@ def test_create_process_group_bkcl(self):
         if pg.rank() == 0:
             assert np.array_equal(tensor_x, sum_result)
         assert np.array_equal(tensor_y, old_tensor_y)
-        sys.stdout.write("rank {}: test reduce sum api ok\n".format(pg.rank()))
+        sys.stdout.write(f"rank {pg.rank()}: test reduce sum api ok\n")
 
         # test reduce_scatter
         in_shape = list(self.shape)
@@ -199,9 +199,7 @@ def test_create_process_group_bkcl(self):
             assert np.array_equal(need_result0, tensor_out)
         else:
             assert np.array_equal(need_result1, tensor_out)
-        sys.stdout.write(
-            "rank {}: test reduce_scatter sum api ok\n".format(pg.rank())
-        )
+        sys.stdout.write(f"rank {pg.rank()}: test reduce_scatter sum api ok\n")
 
         # test send async api
         # rank 0
@@ -247,7 +245,7 @@ def test_create_process_group_bkcl(self):
             task = dist.recv(tensor_y, 0, sync_op=True)
             assert np.array_equal(tensor_y, tensor_x) and tensor_y.shape == []
 
-        sys.stdout.write("rank {}: test send api ok\n".format(pg.rank()))
+        sys.stdout.write(f"rank {pg.rank()}: test send api ok\n")
 
 
 class TestProcessGroupFp16(TestProcessGroupFp32):
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_collective_base_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_collective_base_xpu.py
index 160d7f61551c54..022ef400164d18 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_collective_base_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_collective_base_xpu.py
@@ -170,7 +170,7 @@ class TestDistBase(unittest.TestCase):
     def setUp(self):
         self._port_set = set()
         self._trainers = 2
-        self._ps_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
+        self._ps_endpoints = "127.0.0.1:{},127.0.0.1:{}".format(
             self._find_free_port(),
             self._find_free_port(),
         )
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py
index 0f240abc90d88c..a3eb2a1f3a77e0 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_conv2d_op_xpu.py
@@ -159,7 +159,7 @@ def init_test_case_2(self):
             N, C, H, W = self.input_size
             self.input_size = [N, H, W, C]
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ChannelLast")
+    cls_name = "{}_{}".format(parent.__name__, "ChannelLast")
     TestChannelLastCase.__name__ = cls_name
     globals()[cls_name] = TestChannelLastCase
 
@@ -170,7 +170,7 @@ def init_paddings(self):
             self.pad = [0, 0]
             self.padding_algorithm = "SAME"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "PaddingSAMEOp")
+    cls_name = "{}_{}".format(parent.__name__, "PaddingSAMEOp")
     TestPaddingSMAECase.__name__ = cls_name
     globals()[cls_name] = TestPaddingSMAECase
 
@@ -181,7 +181,7 @@ def init_paddings(self):
             self.pad = [1, 1]
             self.padding_algorithm = "VALID"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "PaddingVALIDOp")
+    cls_name = "{}_{}".format(parent.__name__, "PaddingVALIDOp")
     TestPaddingVALIDCase.__name__ = cls_name
     globals()[cls_name] = TestPaddingVALIDCase
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py
index cb97a27d7bd864..f6578371b97adf 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py
@@ -172,7 +172,7 @@ def init_paddings(self):
             self.pad = [0, 0, 0]
             self.padding_algorithm = "SAME"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "PaddingSAMEOp")
+    cls_name = "{}_{}".format(parent.__name__, "PaddingSAMEOp")
     TestPaddingSMAECase.__name__ = cls_name
     globals()[cls_name] = TestPaddingSMAECase
 
@@ -183,7 +183,7 @@ def init_paddings(self):
             self.pad = [1, 1, 1]
             self.padding_algorithm = "VALID"
 
-    cls_name = "{0}_{1}".format(parent.__name__, "PaddingVALIDOp")
+    cls_name = "{}_{}".format(parent.__name__, "PaddingVALIDOp")
     TestPaddingVALIDCase.__name__ = cls_name
     globals()[cls_name] = TestPaddingVALIDCase
 
@@ -197,7 +197,7 @@ def init_test_case_2(self):
             N, C, D, H, W = self.input_size
             self.input_size = [N, D, H, W, C]
 
-    cls_name = "{0}_{1}".format(parent.__name__, "ChannelLast")
+    cls_name = "{}_{}".format(parent.__name__, "ChannelLast")
     TestChannelLastCase.__name__ = cls_name
     globals()[cls_name] = TestChannelLastCase
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_gen_bkcl_id_op.py b/python/paddle/fluid/tests/unittests/xpu/test_gen_bkcl_id_op.py
index 64938259018038..e13efff36e4845 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_gen_bkcl_id_op.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_gen_bkcl_id_op.py
@@ -43,7 +43,7 @@ def run_gen_bkc_id(attr):
 
         for i in range(1, bkcl_comm_num):
             startup_program.global_block().create_var(
-                name="BKCLID_{}".format(i),
+                name=f"BKCLID_{i}",
                 persistable=True,
                 type=core.VarDesc.VarType.RAW,
             )
@@ -51,12 +51,12 @@ def run_gen_bkc_id(attr):
         if use_hallreduce:
             for i in range(0, bkcl_comm_num):
                 startup_program.global_block().create_var(
-                    name="Hierarchical_inter_BKCLID_{}".format(i),
+                    name=f"Hierarchical_inter_BKCLID_{i}",
                     persistable=True,
                     type=core.VarDesc.VarType.RAW,
                 )
                 startup_program.global_block().create_var(
-                    name="Hierarchical_exter_BKCLID_{}".format(i),
+                    name=f"Hierarchical_exter_BKCLID_{i}",
                     persistable=True,
                     type=core.VarDesc.VarType.RAW,
                 )
@@ -92,7 +92,7 @@ def gen_bkcl_id(self, nranks=2):
         port = self._dist_ut_port_0
         trainers = []
         for i in range(nranks):
-            trainers.append('127.0.0.1:{}'.format(port + i))
+            trainers.append(f'127.0.0.1:{port + i}')
 
         attr = {
             "trainers": trainers,
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_parallel_dygraph_dataparallel.py b/python/paddle/fluid/tests/unittests/xpu/test_parallel_dygraph_dataparallel.py
index c347e708ec3ee8..11fa8405317dc0 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_parallel_dygraph_dataparallel.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_parallel_dygraph_dataparallel.py
@@ -82,14 +82,14 @@ def start_local_trainers(
 
         current_env.update(proc_env)
 
-        print("trainer proc env:{}".format(current_env))
+        print(f"trainer proc env:{current_env}")
 
         if os.getenv('WITH_COVERAGE', 'OFF') == 'ON':
             cmd = "python -m coverage run --branch -p " + training_script
         else:
             cmd = "python -u " + training_script
 
-        print("start trainer proc:{} env:{}".format(cmd, proc_env))
+        print(f"start trainer proc:{cmd} env:{proc_env}")
 
         fn = None
 
@@ -133,7 +133,7 @@ def run_mnist_2xpu(self, target_file_name, eager_mode=True):
             alive = watch_local_trainers(procs, cluster.trainers_endpoints())
 
             if not alive:
-                print("Local procs complete, POD info:{}".format(pod))
+                print(f"Local procs complete, POD info:{pod}")
                 break
             time.sleep(3)
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py
index b5284dfb320828..e28a7ff9c10d59 100755
--- a/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_rnn_op_xpu.py
@@ -164,10 +164,10 @@ def get_weight_names(self):
             weight_names = []
             for i in range(self.num_layers):
                 for j in range(0, 2 * self.direction_num):
-                    weight_names.append("{}.weight_{}".format(i, j))
+                    weight_names.append(f"{i}.weight_{j}")
             for i in range(self.num_layers):
                 for j in range(0, 2 * self.direction_num):
-                    weight_names.append("{}.bias_{}".format(i, j))
+                    weight_names.append(f"{i}.bias_{j}")
             return weight_names
 
         def set_attrs(self):
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py
index 7f18a297dd49b3..90277c7f484d6a 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py
@@ -687,7 +687,7 @@ def _call_setitem(self, x):
             def _get_answer(self):
                 self.data[0, 1] = 3
 
-        cls_name = "{0}_{1}".format(parent.__name__, "ValueTensorInt32")
+        cls_name = "{}_{}".format(parent.__name__, "ValueTensorInt32")
         XPUTestValueInt.__name__ = cls_name
         globals()[cls_name] = XPUTestValueInt
 
@@ -709,7 +709,7 @@ def _call_setitem(self, x):
             def _get_answer(self):
                 self.data[0, 1] = 3
 
-        cls_name = "{0}_{1}".format(parent.__name__, "ValueTensorInt64")
+        cls_name = "{}_{}".format(parent.__name__, "ValueTensorInt64")
         XPUTestValueInt.__name__ = cls_name
         globals()[cls_name] = XPUTestValueInt
 
@@ -731,7 +731,7 @@ def _call_setitem(self, x):
             def _get_answer(self):
                 self.data[0, 1] = 3
 
-        cls_name = "{0}_{1}".format(parent.__name__, "ValueTensorFp32")
+        cls_name = "{}_{}".format(parent.__name__, "ValueTensorFp32")
         XPUTestValueInt.__name__ = cls_name
         globals()[cls_name] = XPUTestValueInt
 
@@ -755,7 +755,7 @@ def _call_setitem(self, x):
             def _get_answer(self):
                 self.data[0, 1] = False
 
-        cls_name = "{0}_{1}".format(parent.__name__, "ValueTensorBool")
+        cls_name = "{}_{}".format(parent.__name__, "ValueTensorBool")
         XPUTestValueInt.__name__ = cls_name
         globals()[cls_name] = XPUTestValueInt
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py
index 8702b11e2321a1..b13e1b9b300aad 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_stack_op_xpu.py
@@ -73,7 +73,7 @@ def initParameters(self):
         def get_x_names(self):
             x_names = []
             for i in range(self.num_inputs):
-                x_names.append('x{}'.format(i))
+                x_names.append(f'x{i}')
             return x_names
 
         def test_check_output(self):
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py
index 8b84bde6ea4144..77d934e478cb52 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_sum_op_xpu.py
@@ -96,7 +96,7 @@ def test_w_is_selected_rows(self):
             for inplace in [True, False]:
                 self.check_with_place(place, inplace)
 
-    cls_name = "{0}_{1}".format(parent.__name__, "SumFp16Test")
+    cls_name = "{}_{}".format(parent.__name__, "SumFp16Test")
     TestSumFp16Case.__name__ = cls_name
     globals()[cls_name] = TestSumFp16Case
 
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_unstack_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_unstack_op_xpu.py
index bb9e9bff21d533..6195ec55abd411 100755
--- a/python/paddle/fluid/tests/unittests/xpu/test_unstack_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_unstack_op_xpu.py
@@ -47,7 +47,7 @@ def initParameters(self):
         def get_y_names(self):
             y_names = []
             for i in range(self.input_dim[self.axis]):
-                y_names.append('y{}'.format(i))
+                y_names.append(f'y{i}')
             return y_names
 
         def setUp(self):
diff --git a/python/paddle/framework/io.py b/python/paddle/framework/io.py
index 468ac08017d3a2..ef440ac253b59f 100644
--- a/python/paddle/framework/io.py
+++ b/python/paddle/framework/io.py
@@ -179,9 +179,9 @@ def _build_load_path_and_config(path, config):
     directory_format_exist = os.path.isdir(path)
     if prefix_format_exist and directory_format_exist:
         raise ValueError(
-            "The %s.pdmodel and %s directory exist at the same time, "
+            "The {}.pdmodel and {} directory exist at the same time, "
             "don't know which one to load, please make sure that the specified target "
-            "of ``path`` is unique." % (path, path)
+            "of ``path`` is unique.".format(path, path)
         )
     elif not prefix_format_exist and not directory_format_exist:
         error_msg = "The ``path`` (%s) to load model not exists."
@@ -286,7 +286,7 @@ def _pickle_save(obj, f, protocol):
 
     if protocol < 2 or protocol > 4:
         raise ValueError(
-            "Expected 1<'protocol'<5, but received protocol={}".format(protocol)
+            f"Expected 1<'protocol'<5, but received protocol={protocol}"
         )
 
     def reduce_varbase(self):
@@ -848,7 +848,7 @@ def _legacy_save(obj, path, protocol=2):
 
     if protocol < 2 or protocol > 4:
         raise ValueError(
-            "Expected 1<'protocol'<5, but received protocol={}".format(protocol)
+            f"Expected 1<'protocol'<5, but received protocol={protocol}"
         )
 
     if _is_file_path(path):
diff --git a/python/paddle/framework/io_utils.py b/python/paddle/framework/io_utils.py
index 05e542193fc279..74267e3e17635c 100644
--- a/python/paddle/framework/io_utils.py
+++ b/python/paddle/framework/io_utils.py
@@ -73,9 +73,7 @@ def _open_file_buffer(path_or_buffer, mode):
         elif 'r' in mode:
             return _buffer_reader(path_or_buffer)
         else:
-            raise ValueError(
-                "Expected 'r' or 'w' in mode but got {}".format(mode)
-            )
+            raise ValueError(f"Expected 'r' or 'w' in mode but got {mode}")
 
 
 def _is_memory_buffer(buffer):
diff --git a/python/paddle/hapi/callbacks.py b/python/paddle/hapi/callbacks.py
index b6e3b7a541edf6..9619c65aa189ca 100644
--- a/python/paddle/hapi/callbacks.py
+++ b/python/paddle/hapi/callbacks.py
@@ -103,12 +103,12 @@ def _check_mode(self, mode):
 
     def on_begin(self, mode, logs=None):
         self._check_mode(mode)
-        name = 'on_{}_begin'.format(mode)
+        name = f'on_{mode}_begin'
         self._call(name, logs)
 
     def on_end(self, mode, logs=None):
         self._check_mode(mode)
-        name = 'on_{}_end'.format(mode)
+        name = f'on_{mode}_end'
         self._call(name, logs)
 
     def on_epoch_begin(self, epoch=None, logs=None):
@@ -119,12 +119,12 @@ def on_epoch_end(self, epoch=None, logs=None):
 
     def on_batch_begin(self, mode, step=None, logs=None):
         self._check_mode(mode)
-        name = 'on_{}_batch_begin'.format(mode)
+        name = f'on_{mode}_batch_begin'
         self._call(name, step, logs)
 
     def on_batch_end(self, mode, step=None, logs=None):
         self._check_mode(mode)
-        name = 'on_{}_batch_end'.format(mode)
+        name = f'on_{mode}_batch_end'
         self._call(name, step, logs)
 
 
@@ -605,14 +605,14 @@ def _is_save(self):
 
     def on_epoch_end(self, epoch, logs=None):
         if self._is_save() and self.epoch % self.save_freq == 0:
-            path = '{}/{}'.format(self.save_dir, epoch)
-            print('save checkpoint at {}'.format(os.path.abspath(path)))
+            path = f'{self.save_dir}/{epoch}'
+            print(f'save checkpoint at {os.path.abspath(path)}')
             self.model.save(path)
 
     def on_train_end(self, logs=None):
         if self._is_save():
-            path = '{}/final'.format(self.save_dir)
-            print('save checkpoint at {}'.format(os.path.abspath(path)))
+            path = f'{self.save_dir}/final'
+            print(f'save checkpoint at {os.path.abspath(path)}')
             self.model.save(path)
 
 
@@ -1055,7 +1055,7 @@ def __init__(
         dir=None,
         mode=None,
         job_type=None,
-        **kwargs
+        **kwargs,
     ):
         self.wandb = try_import(
             "wandb",
diff --git a/python/paddle/hapi/dynamic_flops.py b/python/paddle/hapi/dynamic_flops.py
index 89f5928c6a528d..e5be25ac2686f0 100644
--- a/python/paddle/hapi/dynamic_flops.py
+++ b/python/paddle/hapi/dynamic_flops.py
@@ -229,13 +229,11 @@ def add_hooks(m):
         if m_type in custom_ops:
             flops_fn = custom_ops[m_type]
             if m_type not in types_collection:
-                print(
-                    "Customize Function has been applied to {}".format(m_type)
-                )
+                print(f"Customize Function has been applied to {m_type}")
         elif m_type in register_hooks:
             flops_fn = register_hooks[m_type]
             if m_type not in types_collection:
-                print("{}'s flops has been counted".format(m_type))
+                print(f"{m_type}'s flops has been counted")
         else:
             if m_type not in types_collection:
                 print(
diff --git a/python/paddle/hapi/hub.py b/python/paddle/hapi/hub.py
index 2788922482ab79..7885cbdef8c40e 100644
--- a/python/paddle/hapi/hub.py
+++ b/python/paddle/hapi/hub.py
@@ -105,7 +105,7 @@ def _get_cache_or_reload(repo, force_reload, verbose=True, source='github'):
 
     if use_cache:
         if verbose:
-            sys.stderr.write('Using cache found in {}\n'.format(repo_dir))
+            sys.stderr.write(f'Using cache found in {repo_dir}\n')
     else:
         cached_file = os.path.join(hub_dir, normalized_br + '.zip')
         _remove_if_exists(cached_file)
@@ -146,7 +146,7 @@ def _load_entry_from_hubconf(m, name):
     func = getattr(m, name, None)
 
     if func is None or not callable(func):
-        raise RuntimeError('Cannot find callable {} in hubconf'.format(name))
+        raise RuntimeError(f'Cannot find callable {name} in hubconf')
 
     return func
 
diff --git a/python/paddle/hapi/logger.py b/python/paddle/hapi/logger.py
index 25a6bbcbf10089..21f984bd526f77 100644
--- a/python/paddle/hapi/logger.py
+++ b/python/paddle/hapi/logger.py
@@ -54,7 +54,7 @@ def setup_logger(output=None, name="hapi", log_level=logging.INFO):
             filename = os.path.join(output, "log.txt")
 
         if local_rank > 0:
-            filename = filename + ".rank{}".format(local_rank)
+            filename = filename + f".rank{local_rank}"
 
         if not os.path.exists(os.path.dirname(filename)):
             os.makedirs(os.path.dirname(filename))
diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py
index e34274ceaee0e1..434b56dcbc72bd 100644
--- a/python/paddle/hapi/model.py
+++ b/python/paddle/hapi/model.py
@@ -494,7 +494,7 @@ def _load_optimizer(self, state, executor):
 
             assert (
                 var.name in converted_state
-            ), "variable [{}] is not in optimizer state file".format(var.name)
+            ), f"variable [{var.name}] is not in optimizer state file"
             self._set_var(var, converted_state[var.name])
 
     def _set_var(self, var, ndarray):
@@ -695,7 +695,7 @@ def _make_program(self, mode):
                         amp_lists=amp_lists,
                         use_pure_fp16=self._amp_level == "O2",
                         use_fp16_guard=self._use_fp16_guard,
-                        **self._amp_configs
+                        **self._amp_configs,
                     )
 
                 self.model._optimizer.minimize(self._loss_endpoint)
@@ -827,7 +827,7 @@ def train_batch(self, inputs, labels=None, update=True):
         with paddle.amp.auto_cast(
             enable=self._amp_level != 'O0',
             **self._amp_custom_lists,
-            level=self._amp_level
+            level=self._amp_level,
         ):
             if self._nranks > 1:
                 outputs = self.ddp_model(*[to_variable(x) for x in inputs])
@@ -1482,9 +1482,7 @@ def _load_state_from_path(path):
         def _check_match(key, param):
             state = param_state.get(key, None)
             if state is None:
-                raise ValueError(
-                    "{} is not found in the providing file.".format(key)
-                )
+                raise ValueError(f"{key} is not found in the providing file.")
             if list(state.shape) != list(param.shape):
                 raise ValueError(
                     "{} receives a shape {}, but the expected shape is {}.".format(
@@ -1500,7 +1498,7 @@ def _strip_postfix(path):
                 '.pdparams',
                 '.pdopt',
                 '.pdmodel',
-            ], "Unknown postfix {} from weights".format(ext)
+            ], f"Unknown postfix {ext} from weights"
             return path
 
         path = _strip_postfix(path)
@@ -1513,9 +1511,7 @@ def _strip_postfix(path):
                 match_res = _check_match(key, param)
             except ValueError as err:
                 if skip_mismatch:
-                    warnings.warn(
-                        "Skip loading for {}. ".format(key) + str(err)
-                    )
+                    warnings.warn(f"Skip loading for {key}. " + str(err))
                     # reset optimizer when mismatch happens
                     reset_optimizer = True
                 else:
@@ -1731,7 +1727,7 @@ def prepare(
         for metric in to_list(metrics):
             assert isinstance(
                 metric, Metric
-            ), "{} is not sub class of Metric".format(metric.__class__.__name__)
+            ), f"{metric.__class__.__name__} is not sub class of Metric"
         self._metrics = to_list(metrics)
         self._prepare_amp(amp_configs)
 
diff --git a/python/paddle/hapi/model_summary.py b/python/paddle/hapi/model_summary.py
index 0f6dece35a2d82..b28f64845ab0fd 100644
--- a/python/paddle/hapi/model_summary.py
+++ b/python/paddle/hapi/model_summary.py
@@ -439,7 +439,7 @@ def _get_str_length(summary):
             table_width['input_shape_width'],
             str(summary[layer]["output_shape"]),
             table_width['output_shape_width'],
-            "{0:,}".format(summary[layer]["nb_params"]),
+            "{:,}".format(summary[layer]["nb_params"]),
             table_width['params_width'],
         )
         total_params += summary[layer]["nb_params"]
@@ -473,11 +473,10 @@ def _get_input_size(input_size, size):
     total_size = total_params_size + total_output_size + total_input_size
 
     summary_str += "=" * table_width['table_width'] + "\n"
-    summary_str += "Total params: {0:,}".format(total_params) + "\n"
-    summary_str += "Trainable params: {0:,}".format(trainable_params) + "\n"
+    summary_str += f"Total params: {total_params:,}" + "\n"
+    summary_str += f"Trainable params: {trainable_params:,}" + "\n"
     summary_str += (
-        "Non-trainable params: {0:,}".format(total_params - trainable_params)
-        + "\n"
+        f"Non-trainable params: {total_params - trainable_params:,}" + "\n"
     )
     summary_str += "-" * table_width['table_width'] + "\n"
     summary_str += "Input size (MB): %0.2f" % total_input_size + "\n"
diff --git a/python/paddle/hapi/progressbar.py b/python/paddle/hapi/progressbar.py
index f9db60d04aa6df..e63bb913334eab 100644
--- a/python/paddle/hapi/progressbar.py
+++ b/python/paddle/hapi/progressbar.py
@@ -91,11 +91,11 @@ def convert_uint16_to_float(in_list):
             time_per_unit = 0
 
         if time_per_unit >= 1 or time_per_unit == 0:
-            fps = ' - %.0fs/%s' % (time_per_unit, self.name)
+            fps = f' - {time_per_unit:.0f}s/{self.name}'
         elif time_per_unit >= 1e-3:
-            fps = ' - %.0fms/%s' % (time_per_unit * 1e3, self.name)
+            fps = ' - {:.0f}ms/{}'.format(time_per_unit * 1e3, self.name)
         else:
-            fps = ' - %.0fus/%s' % (time_per_unit * 1e6, self.name)
+            fps = ' - {:.0f}us/{}'.format(time_per_unit * 1e6, self.name)
 
         info = ''
         if self._verbose == 1:
diff --git a/python/paddle/incubate/asp/supported_layer_list.py b/python/paddle/incubate/asp/supported_layer_list.py
index db0ba97c62fdb7..01935d4badbdfa 100644
--- a/python/paddle/incubate/asp/supported_layer_list.py
+++ b/python/paddle/incubate/asp/supported_layer_list.py
@@ -72,7 +72,7 @@ def _default_pruning(weight_nparray, m, n, func_name, param_name):
     weight_pruned_nparray = np.multiply(weight_nparray, weight_sparse_mask)
     assert asp.check_sparsity(
         weight_pruned_nparray.T, n=n, m=m, func_name=checked_func_name
-    ), 'Pruning {} weight matrix failure!!!'.format(param_name)
+    ), f'Pruning {param_name} weight matrix failure!!!'
     return weight_pruned_nparray, weight_sparse_mask
 
 
diff --git a/python/paddle/incubate/asp/utils.py b/python/paddle/incubate/asp/utils.py
index 684b4a933c6368..6502f9c8ab473f 100644
--- a/python/paddle/incubate/asp/utils.py
+++ b/python/paddle/incubate/asp/utils.py
@@ -398,7 +398,7 @@ def _compute_valid_2d_patterns(n, m):
     global _valid_2d_patterns_lock
     global _valid_2d_patterns
 
-    valid_key = '{}_{}'.format(m, n)
+    valid_key = f'{m}_{n}'
     if valid_key in _valid_2d_patterns:
         return _valid_2d_patterns[valid_key]
     else:
diff --git a/python/paddle/incubate/autograd/primx.py b/python/paddle/incubate/autograd/primx.py
index 352101a7fd1f6c..6810f1dde63290 100644
--- a/python/paddle/incubate/autograd/primx.py
+++ b/python/paddle/incubate/autograd/primx.py
@@ -543,7 +543,7 @@ def expand_nested_list(xs):
     for var_name in sorted(vars_to_remove):
         assert (
             var_name in to_bind_rev
-        ), 'var_name "{}" is not in to_bind_rev.'.format(var_name)
+        ), f'var_name "{var_name}" is not in to_bind_rev.'
         if var_name != to_bind_rev[var_name]:
             block.desc._remove_var(var_name.encode())
             del block.vars[var_name]
@@ -673,7 +673,7 @@ def expand_nested_list(xs):
         for var_name in sorted(vars_to_remove):
             assert (
                 var_name in to_bind_rev
-            ), 'var_name "{}" is not in to_bind_rev.'.format(var_name)
+            ), f'var_name "{var_name}" is not in to_bind_rev.'
             if var_name != to_bind_rev[var_name]:
                 block.desc._remove_var(var_name.encode())
                 del block.vars[var_name]
diff --git a/python/paddle/incubate/autotune.py b/python/paddle/incubate/autotune.py
index f7b1d9091eee9c..742c5bded064a8 100644
--- a/python/paddle/incubate/autotune.py
+++ b/python/paddle/incubate/autotune.py
@@ -95,7 +95,7 @@ def set_config(config=None):
             with open(config, 'r') as filehandle:
                 config_dict = json.load(filehandle)
         except Exception as e:
-            print('Load config error: {}'.format(e))
+            print(f'Load config error: {e}')
             warnings.warn("Use default configuration for auto-tuning.")
 
     if "kernel" in config_dict:
diff --git a/python/paddle/incubate/distributed/fleet/collective.py b/python/paddle/incubate/distributed/fleet/collective.py
index aa893520dcdcee..6716d189153b95 100644
--- a/python/paddle/incubate/distributed/fleet/collective.py
+++ b/python/paddle/incubate/distributed/fleet/collective.py
@@ -301,9 +301,7 @@ def apply_gradients(self, params_grads):
     def _check_condition(self, name, **kwargs):
         for k, v in kwargs.items():
             if v is True:
-                raise AssertionError(
-                    "you can't use %s and %s together" % (name, k)
-                )
+                raise AssertionError(f"you can't use {name} and {k} together")
 
     def _check_collective_mode(self, main_program, optimizer, strategy):
         """
@@ -494,8 +492,8 @@ def _try_to_compile(self, startup_program, main_program):
 
     def raiseOptimizeError(self, strategy_name, optimize_name):
         raise ValueError(
-            "can not use {0} when you set DistStrategy.{1} "
-            "as True".format(optimize_name, strategy_name)
+            f"can not use {optimize_name} when you set DistStrategy.{strategy_name} "
+            "as True"
         )
 
     def minimize(
diff --git a/python/paddle/incubate/distributed/fleet/fleet_util.py b/python/paddle/incubate/distributed/fleet/fleet_util.py
index ab06adb5c71aa6..e4d1979bc2f0e2 100644
--- a/python/paddle/incubate/distributed/fleet/fleet_util.py
+++ b/python/paddle/incubate/distributed/fleet/fleet_util.py
@@ -422,7 +422,7 @@ def write_model_donefile(
         xbox_base_key = int(xbox_base_key)
 
         if pass_id != "-1":
-            suffix_name = "/%s/%s/" % (day, pass_id)
+            suffix_name = f"/{day}/{pass_id}/"
             model_path = output_path.rstrip("/") + suffix_name
         else:
             suffix_name = "/%s/0/" % day
@@ -461,19 +461,19 @@ def write_model_donefile(
                     client.delete(donefile_path)
                     client.upload(donefile_name, output_path)
                     self.rank0_error(
-                        "write %s/%s %s succeed" % (day, pass_id, donefile_name)
+                        f"write {day}/{pass_id} {donefile_name} succeed"
                     )
                 else:
                     self.rank0_error(
-                        "not write %s because %s/%s already "
-                        "exists" % (donefile_name, day, pass_id)
+                        "not write {} because {}/{} already "
+                        "exists".format(donefile_name, day, pass_id)
                     )
             else:
                 with open(donefile_name, "w") as f:
                     f.write(content + "\n")
                 client.upload(donefile_name, output_path)
                 self.rank0_error(
-                    "write %s/%s %s succeed" % (day, pass_id, donefile_name)
+                    f"write {day}/{pass_id} {donefile_name} succeed"
                 )
         fleet._role_maker._barrier_worker()
 
@@ -530,7 +530,7 @@ def write_xbox_donefile(
 
         if pass_id != "-1":
             mode = "patch"
-            suffix_name = "/%s/delta-%s/" % (day, pass_id)
+            suffix_name = f"/{day}/delta-{pass_id}/"
             model_path = output_path.rstrip("/") + suffix_name
             if donefile_name is None:
                 donefile_name = "xbox_patch_done.txt"
@@ -580,19 +580,19 @@ def write_xbox_donefile(
                     client.delete(donefile_path)
                     client.upload(donefile_name, output_path)
                     self.rank0_error(
-                        "write %s/%s %s succeed" % (day, pass_id, donefile_name)
+                        f"write {day}/{pass_id} {donefile_name} succeed"
                     )
                 else:
                     self.rank0_error(
-                        "not write %s because %s/%s already "
-                        "exists" % (donefile_name, day, pass_id)
+                        "not write {} because {}/{} already "
+                        "exists".format(donefile_name, day, pass_id)
                     )
             else:
                 with open(donefile_name, "w") as f:
                     f.write(xbox_str + "\n")
                 client.upload(donefile_name, output_path)
                 self.rank0_error(
-                    "write %s/%s %s succeed" % (day, pass_id, donefile_name)
+                    f"write {day}/{pass_id} {donefile_name} succeed"
                 )
         fleet._role_maker._barrier_worker()
 
@@ -606,7 +606,7 @@ def write_cache_donefile(
         hadoop_fs_ugi,
         hadoop_home="$HADOOP_HOME",
         donefile_name="sparse_cache.meta",
-        **kwargs
+        **kwargs,
     ):
         """
         write cache donefile
@@ -693,7 +693,7 @@ def load_model(self, output_path, day, pass_id):
         """
         day = str(day)
         pass_id = str(pass_id)
-        suffix_name = "/%s/%s/" % (day, pass_id)
+        suffix_name = f"/{day}/{pass_id}/"
         load_path = output_path + suffix_name
         self.rank0_error("going to load_model %s" % load_path)
         self.load_fleet_model(load_path)
@@ -718,7 +718,7 @@ def save_model(self, output_path, day, pass_id):
         """
         day = str(day)
         pass_id = str(pass_id)
-        suffix_name = "/%s/%s/" % (day, pass_id)
+        suffix_name = f"/{day}/{pass_id}/"
         model_path = output_path + suffix_name
         self.rank0_print("going to save_model %s" % model_path)
         self.save_fleet_model(model_path)
@@ -766,7 +766,7 @@ def save_delta_model(self, output_path, day, pass_id):
         """
         day = str(day)
         pass_id = str(pass_id)
-        suffix_name = "/%s/delta-%s/" % (day, pass_id)
+        suffix_name = f"/{day}/delta-{pass_id}/"
         model_path = output_path + suffix_name
         self.rank0_print("going to save_delta_model %s" % model_path)
         fleet.save_persistables(None, model_path, mode=1)
@@ -822,7 +822,7 @@ def save_cache_model(self, output_path, day, pass_id, mode=1, **kwargs):
         pass_id = str(pass_id)
         mode = int(mode)
         table_id = kwargs.get("table_id", 0)
-        suffix_name = "/%s/delta-%s" % (day, pass_id)
+        suffix_name = f"/{day}/delta-{pass_id}"
         model_path = output_path.rstrip("/") + suffix_name
         self.rank0_print("going to save_cache_model %s" % model_path)
         key_num = fleet.save_cache_model(
@@ -996,9 +996,9 @@ def save_paddle_inference_model(
             client = HDFSClient(hadoop_home, configs)
 
             if pass_id == "-1":
-                dest = "%s/%s/base/dnn_plugin/" % (output_path, day)
+                dest = f"{output_path}/{day}/base/dnn_plugin/"
             else:
-                dest = "%s/%s/delta-%s/dnn_plugin/" % (
+                dest = "{}/{}/delta-{}/dnn_plugin/".format(
                     output_path,
                     day,
                     pass_id,
@@ -1103,9 +1103,9 @@ def save_paddle_params(
             client = HDFSClient(hadoop_home, configs)
 
             if pass_id == "-1":
-                dest = "%s/%s/base/dnn_plugin/" % (output_path, day)
+                dest = f"{output_path}/{day}/base/dnn_plugin/"
             else:
-                dest = "%s/%s/delta-%s/dnn_plugin/" % (
+                dest = "{}/{}/delta-{}/dnn_plugin/".format(
                     output_path,
                     day,
                     pass_id,
@@ -1962,7 +1962,7 @@ def write_model_donefile(
         xbox_base_key = int(xbox_base_key)
 
         if pass_id != "-1":
-            suffix_name = "/%s/%s/" % (day, pass_id)
+            suffix_name = f"/{day}/{pass_id}/"
             model_path = output_path.rstrip("/") + suffix_name
         else:
             suffix_name = "/%s/0/" % day
@@ -2000,19 +2000,19 @@ def write_model_donefile(
                     self._afs.delete(donefile_path)
                     self._afs.upload(donefile_name, donefile_path)
                     self.rank0_error(
-                        "write %s/%s %s succeed" % (day, pass_id, donefile_name)
+                        f"write {day}/{pass_id} {donefile_name} succeed"
                     )
                 else:
                     self.rank0_error(
-                        "not write %s because %s/%s already "
-                        "exists" % (donefile_name, day, pass_id)
+                        "not write {} because {}/{} already "
+                        "exists".format(donefile_name, day, pass_id)
                     )
             else:
                 with open(donefile_name, "w") as f:
                     f.write(content + "\n")
                 self._afs.upload(donefile_name, donefile_path)
                 self.rank0_error(
-                    "write %s/%s %s succeed" % (day, pass_id, donefile_name)
+                    f"write {day}/{pass_id} {donefile_name} succeed"
                 )
 
     def write_xbox_donefile(
@@ -2065,7 +2065,7 @@ def write_xbox_donefile(
         mode = None
         if pass_id != "-1":
             mode = "patch"
-            suffix_name = "/%s/delta-%s/" % (day, pass_id)
+            suffix_name = f"/{day}/delta-{pass_id}/"
             model_path = output_path.rstrip("/") + suffix_name
             if donefile_name is None:
                 donefile_name = "xbox_patch_done.txt"
@@ -2117,19 +2117,19 @@ def write_xbox_donefile(
                     self._afs.delete(donefile_path)
                     self._afs.upload(donefile_name, donefile_path)
                     self.rank0_info(
-                        "write %s/%s %s succeed" % (day, pass_id, donefile_name)
+                        f"write {day}/{pass_id} {donefile_name} succeed"
                     )
                 else:
                     self.rank0_info(
-                        "not write %s because %s/%s already "
-                        "exists" % (donefile_name, day, pass_id)
+                        "not write {} because {}/{} already "
+                        "exists".format(donefile_name, day, pass_id)
                     )
             else:
                 with open(donefile_name, "w") as f:
                     f.write(xbox_str + "\n")
                 self._afs.upload(donefile_name, donefile_path)
                 self.rank0_error(
-                    "write %s/%s %s succeed" % (day, pass_id, donefile_name)
+                    f"write {day}/{pass_id} {donefile_name} succeed"
                 )
 
     def write_cache_donefile(
@@ -2139,7 +2139,7 @@ def write_cache_donefile(
         pass_id,
         key_num,
         donefile_name="sparse_cache.meta",
-        **kwargs
+        **kwargs,
     ):
         """
         write cache donefile
diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/distribute_transpiler/__init__.py b/python/paddle/incubate/distributed/fleet/parameter_server/distribute_transpiler/__init__.py
index d3c34f5694cc19..f2e113c6de2b16 100644
--- a/python/paddle/incubate/distributed/fleet/parameter_server/distribute_transpiler/__init__.py
+++ b/python/paddle/incubate/distributed/fleet/parameter_server/distribute_transpiler/__init__.py
@@ -634,9 +634,7 @@ def _save_sparse_params(self, executor, dirname, context, main_program):
                 slice_varnames = []
                 remote_varnames = []
                 for i in range(len(var_ctx.split_varnames())):
-                    slice_varnames.append(
-                        "{}.block{}".format(reshaped_varname, i)
-                    )
+                    slice_varnames.append(f"{reshaped_varname}.block{i}")
                     remote_varnames.append(reshaped_varname)
 
                 block.append_op(
diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/distribute_transpiler/distributed_strategy.py b/python/paddle/incubate/distributed/fleet/parameter_server/distribute_transpiler/distributed_strategy.py
index 5ad91c7b98d01b..87cb5fa1d3b119 100644
--- a/python/paddle/incubate/distributed/fleet/parameter_server/distribute_transpiler/distributed_strategy.py
+++ b/python/paddle/incubate/distributed/fleet/parameter_server/distribute_transpiler/distributed_strategy.py
@@ -148,7 +148,7 @@ def display(self, configs):
 
         draws += border
 
-        _str = "\n{}\n".format(draws)
+        _str = f"\n{draws}\n"
         return _str
 
     def __repr__(self):
@@ -239,7 +239,7 @@ def set_trainer_runtime_config(self, config):
                     self._trainer_runtime_config.runtime_configs[key] = Value
                 else:
                     raise ValueError(
-                        "TrainerRuntimeConfig doesn't have key: {}".format(key)
+                        f"TrainerRuntimeConfig doesn't have key: {key}"
                     )
         else:
             raise TypeError(
@@ -270,7 +270,7 @@ def set_server_runtime_config(self, config):
                     setattr(self._server_runtime_config, key, config[key])
                 else:
                     raise ValueError(
-                        "ServerRuntimeConfig doesn't have key: {}".format(key)
+                        f"ServerRuntimeConfig doesn't have key: {key}"
                     )
         else:
             raise TypeError(
@@ -295,7 +295,7 @@ def set_execute_strategy(self, config):
                     setattr(self._execute_strategy, key, config[key])
                 else:
                     raise ValueError(
-                        "ExecutionStrategy doesn't have key: {}".format(key)
+                        f"ExecutionStrategy doesn't have key: {key}"
                     )
         else:
             raise TypeError(
@@ -319,9 +319,7 @@ def set_build_strategy(self, config):
                 if hasattr(self._build_strategy, key):
                     setattr(self._build_strategy, key, config[key])
                 else:
-                    raise ValueError(
-                        "BuildStrategy doesn't have key: {}".format(key)
-                    )
+                    raise ValueError(f"BuildStrategy doesn't have key: {key}")
         else:
             raise TypeError(
                 "build_strategy only accept input type: dict or BuildStrategy"
diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/ir/pserver_pass.py b/python/paddle/incubate/distributed/fleet/parameter_server/ir/pserver_pass.py
index dc2ac790956c84..5f742e89844210 100644
--- a/python/paddle/incubate/distributed/fleet/parameter_server/ir/pserver_pass.py
+++ b/python/paddle/incubate/distributed/fleet/parameter_server/ir/pserver_pass.py
@@ -500,7 +500,7 @@ def _is_opt_op_on_pserver(endpoint, op):
         for i in range(len(merged_ordernames)):
             if param == merged_ordernames[i]:
                 merged_p = merged_varnames[i]
-                merged_g = "{}@GRAD".format(merged_varnames[i])
+                merged_g = f"{merged_varnames[i]}@GRAD"
                 op._set_attr(OP_ROLE_VAR_ATTR_NAME, [merged_p, merged_g])
                 return True
         return False
@@ -784,7 +784,7 @@ def add_large_scale_op(
         opt_idx,
     ):
         ids = global_block.create_var(
-            name="kSparseIDs@{}".format(table_name),
+            name=f"kSparseIDs@{table_name}",
             persistable=False,
             dtype="int64",
             shape=[1, 1],
@@ -904,7 +904,7 @@ def add_large_scale_op(
             mode = "0"
             names_str = ",".join(value_names)
             dims_str = ",".join([str(dim) for dim in value_dims])
-            ids_name = "kSparseIDs@{}".format(param)
+            ids_name = f"kSparseIDs@{param}"
             cached_str = ",".join(acture_names + [ids_name])
             init_attr_str = get_initializer_attrs(acture_names)
 
@@ -920,7 +920,7 @@ def add_large_scale_op(
                     entry_attr,
                 ]
             )
-            print("large_scale_metas: {}".format(meta_str))
+            print(f"large_scale_metas: {meta_str}")
             large_scale_kv_metas.append(meta_str)
 
         program.global_block().append_op(
diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/ir/public.py b/python/paddle/incubate/distributed/fleet/parameter_server/ir/public.py
index 16a93ab6b6fe37..2587505116263c 100755
--- a/python/paddle/incubate/distributed/fleet/parameter_server/ir/public.py
+++ b/python/paddle/incubate/distributed/fleet/parameter_server/ir/public.py
@@ -376,15 +376,13 @@ def get_grad_var_ep(slices):
             for slice in slices:
                 if self.is_geo_mode():
                     if is_send:
-                        names.append("{}.delta".format(slice.name))
+                        names.append(f"{slice.name}.delta")
                     else:
                         names.append(slice.name)
                 elif (
                     is_grad and self.is_sync_mode() and self.get_trainers() > 1
                 ):
-                    names.append(
-                        "{}.trainer_{}".format(slice.name, self.get_role_id())
-                    )
+                    names.append(f"{slice.name}.trainer_{self.get_role_id()}")
                 else:
                     names.append(slice.name)
 
@@ -793,7 +791,7 @@ def get_the_one_send_context(
             splited_varname = []
 
             for i in range(len(ep_list)):
-                splited_varname.append("{}.block{}".format(param_name, i))
+                splited_varname.append(f"{param_name}.block{i}")
 
             is_distributed = (
                 True if param_name in distibuted_varnames else False
@@ -1327,7 +1325,7 @@ def remove_var_pair_by_grad(self, var_name):
                 del self.merged_sparse_pairs[index]
                 return
 
-        print("Not find {} in self.merge_pairs".format(var_name))
+        print(f"Not find {var_name} in self.merge_pairs")
 
 
 def _is_opt_role_op(op):
diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/ir/trainer_pass.py b/python/paddle/incubate/distributed/fleet/parameter_server/ir/trainer_pass.py
index 563d6556f021df..f32a9b2e11d162 100644
--- a/python/paddle/incubate/distributed/fleet/parameter_server/ir/trainer_pass.py
+++ b/python/paddle/incubate/distributed/fleet/parameter_server/ir/trainer_pass.py
@@ -1486,7 +1486,7 @@ def get_communicate_var_info(
         #         format(name, shape))
         recv_var_dim = -1 * reduce(lambda x, y: x * y, shape)
         input_var_reshape_dim.append(recv_var_dim)
-        input_var_reshape_name.append("{}.input_reshape@Heter".format(name))
+        input_var_reshape_name.append(f"{name}.input_reshape@Heter")
 
     # output
     # var -> reshape -> var@Heter_SERVER_BLOCK@INPUT_RESHAPE_VAR -> concat -> Heter_SERVER_BLOCK_index@JOINT_VAR
@@ -1847,7 +1847,7 @@ def insert_reshape_op(
         new_var_shape = out.shape
 
     x_shape = block.create_var(
-        name="{}.xshape@Heter".format(var_name), dtype=input_var.dtype
+        name=f"{var_name}.xshape@Heter", dtype=input_var.dtype
     )
     block._insert_op(
         index=index,
diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/pslib/__init__.py b/python/paddle/incubate/distributed/fleet/parameter_server/pslib/__init__.py
index 0e2e7c5e25ba66..4c0cb3e3fad6f2 100644
--- a/python/paddle/incubate/distributed/fleet/parameter_server/pslib/__init__.py
+++ b/python/paddle/incubate/distributed/fleet/parameter_server/pslib/__init__.py
@@ -112,9 +112,9 @@ def init_worker(self):
             # prepare for client to client communication
             if self._role_maker.is_worker():
                 info = self._fleet_ptr.get_clients_info()
-                print("Client Info: {}".format(info))
+                print(f"Client Info: {info}")
                 all_info = self._role_maker._worker_gather(info[0])
-                print("All Client Info: {}".format(all_info))
+                print(f"All Client Info: {all_info}")
                 self._fleet_ptr.gather_clients(all_info)
                 self._fleet_ptr.set_client2client_config(
                     self._client2client_request_timeout_ms,
@@ -916,9 +916,7 @@ def _prepare_params(
     if d_size.get(name) is None:
         d_size[name] = size
     elif d_size[name] != size:
-        raise ValueError(
-            "embedding size error: %s vs %s" % (size, d_size[name])
-        )
+        raise ValueError(f"embedding size error: {size} vs {d_size[name]}")
 
     # check embedding accessor
     accessor = FLEET_GLOBAL_DICT["cur_accessor"]
@@ -926,7 +924,7 @@ def _prepare_params(
         d_accessor[name] = accessor
     elif d_accessor[name] != accessor:
         raise ValueError(
-            "embedding size error: %s vs %s" % (d_accessor[name], accessor)
+            f"embedding size error: {d_accessor[name]} vs {accessor}"
         )
 
     # check embedding table id
diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/pslib/optimizer_factory.py b/python/paddle/incubate/distributed/fleet/parameter_server/pslib/optimizer_factory.py
index 58234164f50fe4..99879df0c19b1d 100644
--- a/python/paddle/incubate/distributed/fleet/parameter_server/pslib/optimizer_factory.py
+++ b/python/paddle/incubate/distributed/fleet/parameter_server/pslib/optimizer_factory.py
@@ -286,7 +286,7 @@ def _check_params_grads(self, params, grads):
             pname = params[i].name
             gname = grads[i].name
             if pname != gname[:-5]:
-                raise ValueError(" params != grads , %s vs %s" % (pname, gname))
+                raise ValueError(f" params != grads , {pname} vs {gname}")
             pname2grad[pname] = grads[i]
 
         return pname2grad
@@ -623,8 +623,10 @@ def _minimize(
             emb_to_size = FLEET_GLOBAL_DICT["emb_to_size"]
             if len(sparse_table_to_index) != len(emb_to_table):
                 raise ValueError(
-                    "sparse tables from  program != sparse tables from op: %s "
-                    "vs %s" % (len(sparse_table_to_index), len(emb_to_table))
+                    "sparse tables from  program != sparse tables from op: {} "
+                    "vs {}".format(
+                        len(sparse_table_to_index), len(emb_to_table)
+                    )
                 )
             for key in sparse_table_to_index:
                 if (
diff --git a/python/paddle/incubate/distributed/fleet/utils.py b/python/paddle/incubate/distributed/fleet/utils.py
index 92b2672863c1d8..59047f1e424323 100644
--- a/python/paddle/incubate/distributed/fleet/utils.py
+++ b/python/paddle/incubate/distributed/fleet/utils.py
@@ -97,9 +97,7 @@ def check_pruned_program_vars(train_prog, pruned_prog):
     ]
     pruned_vars = OrderedDict(pruned_vars)
     pruned_vars_name = list(pruned_vars)
-    logger.info(
-        "persistable vars in pruned program: {}".format(pruned_vars_name)
-    )
+    logger.info(f"persistable vars in pruned program: {pruned_vars_name}")
 
     for var_name in pruned_vars:
         var = pruned_vars[var_name]
@@ -423,7 +421,7 @@ def try_load_model_vars(
             )
         for i, v in enumerate(fetch_list):
             logger.info("fetch_targets name: %s" % v.name)
-            logger.info("fetch_targets: {}".format(results[i]))
+            logger.info(f"fetch_targets: {results[i]}")
         return results
 
 
diff --git a/python/paddle/incubate/nn/layer/fused_dropout_add.py b/python/paddle/incubate/nn/layer/fused_dropout_add.py
index 373103442922e1..77874d2944764b 100644
--- a/python/paddle/incubate/nn/layer/fused_dropout_add.py
+++ b/python/paddle/incubate/nn/layer/fused_dropout_add.py
@@ -74,5 +74,5 @@ def forward(self, x, y):
         return out
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self.name) if self.name else ''
-        return 'p={}, mode={}{}'.format(self.p, self.mode, name_str)
+        name_str = f', name={self.name}' if self.name else ''
+        return f'p={self.p}, mode={self.mode}{name_str}'
diff --git a/python/paddle/incubate/nn/layer/fused_transformer.py b/python/paddle/incubate/nn/layer/fused_transformer.py
index cbfe95df2062a4..31ea1e8d663b18 100644
--- a/python/paddle/incubate/nn/layer/fused_transformer.py
+++ b/python/paddle/incubate/nn/layer/fused_transformer.py
@@ -178,7 +178,7 @@ def forward(self, x, residual):
         return out
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self.name) if self.name else ''
+        name_str = f', name={self.name}' if self.name else ''
         return 'embed_dim={}, seq_len={}, dropout_rate={}, epsilon={}, dtype={}{}'.format(
             self.embed_dim,
             self.seq_len,
@@ -459,7 +459,7 @@ def forward(self, query, key=None, value=None, attn_mask=None, cache=None):
         return out
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self.name) if self.name else ''
+        name_str = f', name={self.name}' if self.name else ''
         return 'embed_dim={}, num_heads={}, dropout_rate={}, attn_dropout_rate={}, epsilon={}, kdim={}, vdim={}, normalize_before={}, need_weights={}, dtype={}{}'.format(
             self.embed_dim,
             self.num_heads,
@@ -689,7 +689,7 @@ def forward(self, src, cache=None):
         return out
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self.name) if self.name else ''
+        name_str = f', name={self.name}' if self.name else ''
         return 'd_model={}, dim_feedforward={}, dropout_rate={}, epsilon={}, activation={}, act_dropout_rate={}, normalize_before={}, dtype={}{}'.format(
             self._d_model,
             self._dim_feedforward,
diff --git a/python/paddle/incubate/optimizer/functional/utils.py b/python/paddle/incubate/optimizer/functional/utils.py
index 79b6085a74783f..674c56c0530fbb 100644
--- a/python/paddle/incubate/optimizer/functional/utils.py
+++ b/python/paddle/incubate/optimizer/functional/utils.py
@@ -21,7 +21,7 @@ def check_input_type(input, name, op_name):
     r"""Check whether the input is tensor or variable."""
     if paddle.in_dynamic_mode():
         if not isinstance(input, paddle.Tensor):
-            raise ValueError("The input: {} must be tensor.".format(input))
+            raise ValueError(f"The input: {input} must be tensor.")
     else:
         check_type(input, name, Variable, op_name)
 
diff --git a/python/paddle/jit/api.py b/python/paddle/jit/api.py
index e5f8731fc9ead5..2048064a7d6834 100644
--- a/python/paddle/jit/api.py
+++ b/python/paddle/jit/api.py
@@ -601,9 +601,9 @@ def _build_load_path_and_config(path, config):
     directory_format_exist = os.path.isdir(path)
     if prefix_format_exist and directory_format_exist:
         raise ValueError(
-            "The %s.pdmodel and %s directory exist at the same time, "
+            "The {}.pdmodel and {} directory exist at the same time, "
             "don't know which one to load, please make sure that the specified target "
-            "of ``path`` is unique." % (path, path)
+            "of ``path`` is unique.".format(path, path)
         )
     elif not prefix_format_exist and not directory_format_exist:
         raise ValueError(
@@ -1827,7 +1827,7 @@ def get_feed_fetch(all_vars, partial_vars):
             target_vars = []
             for name in target_var_names:
                 target_var = self._program.global_block().vars.get(name, None)
-                assert target_var is not None, "{} cannot be found".format(name)
+                assert target_var is not None, f"{name} cannot be found"
                 target_vars.append(target_var)
 
             model_filename = file_prefix + INFER_MODEL_SUFFIX
diff --git a/python/paddle/jit/dy2static/ast_transformer.py b/python/paddle/jit/dy2static/ast_transformer.py
index 0e935c5842433f..3626709bc37145 100644
--- a/python/paddle/jit/dy2static/ast_transformer.py
+++ b/python/paddle/jit/dy2static/ast_transformer.py
@@ -86,7 +86,7 @@ def _apply(self, transformer, node_wrapper, log_level):
 
     def transfer_from_node_type(self, node_wrapper):
         self.translator_logger.log(
-            1, "Source code: \n{}".format(ast_to_source_code(self.root))
+            1, f"Source code: \n{ast_to_source_code(self.root)}"
         )
         # Generic transformation
         self.visit(node_wrapper.node)
diff --git a/python/paddle/jit/dy2static/base_transformer.py b/python/paddle/jit/dy2static/base_transformer.py
index 7b0fd4a03a5fdf..c019f87dee0828 100644
--- a/python/paddle/jit/dy2static/base_transformer.py
+++ b/python/paddle/jit/dy2static/base_transformer.py
@@ -33,7 +33,7 @@
 class BaseTransformer(gast.NodeTransformer):
     def visit(self, node):
         if not isinstance(node, gast.AST):
-            msg = ('Expected "gast.AST", but got "{}".').format(type(node))
+            msg = f'Expected "gast.AST", but got "{type(node)}".'
             raise ValueError(msg)
         origin_info = getattr(node, ORIGI_INFO, None)
 
diff --git a/python/paddle/jit/dy2static/basic_api_transformer.py b/python/paddle/jit/dy2static/basic_api_transformer.py
index ea3e7c2a3da28f..a2c9823a3537f2 100644
--- a/python/paddle/jit/dy2static/basic_api_transformer.py
+++ b/python/paddle/jit/dy2static/basic_api_transformer.py
@@ -152,9 +152,7 @@ def transform(self):
 
     def _surround_with_ld(self, node):
         node = (
-            gast.parse(
-                "_jst.Ld({})".format(utils.ast_to_source_code(node).strip())
-            )
+            gast.parse(f"_jst.Ld({utils.ast_to_source_code(node).strip()})")
             .body[0]
             .value
         )
diff --git a/python/paddle/jit/dy2static/call_transformer.py b/python/paddle/jit/dy2static/call_transformer.py
index b4efe6af55e83a..e4418a62206b66 100644
--- a/python/paddle/jit/dy2static/call_transformer.py
+++ b/python/paddle/jit/dy2static/call_transformer.py
@@ -56,7 +56,7 @@ def _no_need_convert_call(self, node):
                 'enumerate',
                 'print',
             }
-            is_builtin = eval("is_builtin({})".format(func_str))  # noqa: F811
+            is_builtin = eval(f"is_builtin({func_str})")  # noqa: F811
             need_convert = func_str in need_convert_builtin_func_list
             return is_builtin and not need_convert
         except Exception:
@@ -78,7 +78,7 @@ def visit_Call(self, node):
         if PDB_SET in func_str:
             return node
 
-        new_func_str = "_jst.Call({})".format(func_str)
+        new_func_str = f"_jst.Call({func_str})"
         new_func_ast = gast.parse(new_func_str).body[0].value
         node.func = new_func_ast
 
diff --git a/python/paddle/jit/dy2static/cast_transformer.py b/python/paddle/jit/dy2static/cast_transformer.py
index 8d04411e3161a4..da169786cd3aa2 100644
--- a/python/paddle/jit/dy2static/cast_transformer.py
+++ b/python/paddle/jit/dy2static/cast_transformer.py
@@ -41,7 +41,7 @@ def visit_Call(self, node):
         func_str = ast_to_source_code(node.func).strip()
         if func_str in self._castable_type and len(node.args) > 0:
             args_str = ast_to_source_code(node.args[0]).strip()
-            new_func_str = "_jst.AsDtype({}, '{}')".format(args_str, func_str)
+            new_func_str = f"_jst.AsDtype({args_str}, '{func_str}')"
             new_node = gast.parse(new_func_str).body[0].value
             return new_node
 
diff --git a/python/paddle/jit/dy2static/convert_call_func.py b/python/paddle/jit/dy2static/convert_call_func.py
index 077f208fb2cfab..2f0494551c88a0 100644
--- a/python/paddle/jit/dy2static/convert_call_func.py
+++ b/python/paddle/jit/dy2static/convert_call_func.py
@@ -184,9 +184,7 @@ def dyfunc(x):
             #  [1. 1. 1.]]
 
     """
-    translator_logger.log(
-        1, "Convert callable object: convert {}.".format(func)
-    )
+    translator_logger.log(1, f"Convert callable object: convert {func}.")
     func_self = None
     converted_call = None
 
@@ -329,7 +327,7 @@ def dyfunc(x):
                 func_self = None if func_self else func_self
     else:
         raise NotImplementedError(
-            "Callable {} can not be transformed at present.".format(func)
+            f"Callable {func} can not be transformed at present."
         )
 
     if converted_call is None:
diff --git a/python/paddle/jit/dy2static/convert_operators.py b/python/paddle/jit/dy2static/convert_operators.py
index a492cb6c51af09..ad9abcc9849efb 100644
--- a/python/paddle/jit/dy2static/convert_operators.py
+++ b/python/paddle/jit/dy2static/convert_operators.py
@@ -725,7 +725,7 @@ def convert_var_dtype(var, dtype):
         }
         return paddle.cast(var, dtype=cast_map[dtype])
     else:
-        return eval('{}(var)'.format(dtype))
+        return eval(f'{dtype}(var)')
 
 
 def convert_assert(cond, message=""):
diff --git a/python/paddle/jit/dy2static/decorator_transformer.py b/python/paddle/jit/dy2static/decorator_transformer.py
index c567b279cccb17..f7391f301dbac9 100644
--- a/python/paddle/jit/dy2static/decorator_transformer.py
+++ b/python/paddle/jit/dy2static/decorator_transformer.py
@@ -156,7 +156,7 @@ def visit_FunctionDef(self, node):
 
         args = [arg.id for arg in node.args.args]
         arg_str = ','.join(args)
-        callfun_str = 'return {}({})'.format(decoed_func, arg_str)
+        callfun_str = f'return {decoed_func}({arg_str})'
         callfun_node = gast.parse(callfun_str).body[0]
 
         node.body = [orig_func_node] + decofun_nodes + [callfun_node]
diff --git a/python/paddle/jit/dy2static/function_spec.py b/python/paddle/jit/dy2static/function_spec.py
index e600e88099ac8b..3fc3a32b210fc4 100644
--- a/python/paddle/jit/dy2static/function_spec.py
+++ b/python/paddle/jit/dy2static/function_spec.py
@@ -433,7 +433,7 @@ def _replace_spec_name(name, input_spec):
     elif isinstance(input_spec, (list, tuple)):
         processed_specs = []
         for i, spec in enumerate(input_spec):
-            new_name = "{}_{}".format(name, i)
+            new_name = f"{name}_{i}"
             processed_specs.append(_replace_spec_name(new_name, spec))
         return processed_specs
     elif isinstance(input_spec, dict):
diff --git a/python/paddle/jit/dy2static/ifelse_transformer.py b/python/paddle/jit/dy2static/ifelse_transformer.py
index baa11b53458bf2..ba64b246b35ed6 100644
--- a/python/paddle/jit/dy2static/ifelse_transformer.py
+++ b/python/paddle/jit/dy2static/ifelse_transformer.py
@@ -424,8 +424,8 @@ def create_convert_ifelse_node(
     to replace original `python if/else` statement.
     """
     if is_if_expr:
-        true_func_source = "lambda : {}".format(ast_to_source_code(true_func))
-        false_func_source = "lambda : {}".format(ast_to_source_code(false_func))
+        true_func_source = f"lambda : {ast_to_source_code(true_func)}"
+        false_func_source = f"lambda : {ast_to_source_code(false_func)}"
     else:
         true_func_source = true_func.name
         false_func_source = false_func.name
diff --git a/python/paddle/jit/dy2static/logging_utils.py b/python/paddle/jit/dy2static/logging_utils.py
index 6e087fbedda8d3..b8a6e5f4b63885 100644
--- a/python/paddle/jit/dy2static/logging_utils.py
+++ b/python/paddle/jit/dy2static/logging_utils.py
@@ -120,7 +120,7 @@ def check_level(self, level):
         if isinstance(level, (int, type(None))):
             rv = level
         else:
-            raise TypeError("Level is not an integer: {}".format(level))
+            raise TypeError(f"Level is not an integer: {level}")
         return rv
 
     def has_code_level(self, level):
@@ -151,7 +151,7 @@ def warn(self, msg, *args, **kwargs):
 
     def log(self, level, msg, *args, **kwargs):
         if self.has_verbosity(level):
-            msg_with_level = '(Level {}) {}'.format(level, msg)
+            msg_with_level = f'(Level {level}) {msg}'
             self.logger.info(msg_with_level, *args, **kwargs)
             if self.need_to_echo_log_to_stdout:
                 self._output_to_stdout('INFO: ' + msg_with_level, *args)
diff --git a/python/paddle/jit/dy2static/logical_transformer.py b/python/paddle/jit/dy2static/logical_transformer.py
index 2ed79bc55622ec..a31ddcd44e90c8 100644
--- a/python/paddle/jit/dy2static/logical_transformer.py
+++ b/python/paddle/jit/dy2static/logical_transformer.py
@@ -59,7 +59,7 @@ def visit_UnaryOp(self, node):
         self.generic_visit(node)
         if isinstance(node.op, gast.Not):
             arg = ast_to_source_code(node.operand)
-            new_node_str = "_jst.Not({})".format(arg)
+            new_node_str = f"_jst.Not({arg})"
             # NOTE: gast.parse returns Module(body=[expr(value=...)])
             new_node = gast.parse(new_node_str).body[0].value
             return new_node
diff --git a/python/paddle/jit/dy2static/origin_info.py b/python/paddle/jit/dy2static/origin_info.py
index ba917d666373d2..b0e7179f49ba91 100644
--- a/python/paddle/jit/dy2static/origin_info.py
+++ b/python/paddle/jit/dy2static/origin_info.py
@@ -315,7 +315,7 @@ def get_new_op_callstack(callstack):
             callstack[i] = '  File "{}", line {}, in {}'.format(
                 filepath, lineno, funcname
             )
-            callstack[i + 1] = '    {}'.format(code)
+            callstack[i + 1] = f'    {code}'
 
         return callstack
 
diff --git a/python/paddle/jit/dy2static/tensor_shape_transformer.py b/python/paddle/jit/dy2static/tensor_shape_transformer.py
index 81e36028055232..6efff3b056185b 100644
--- a/python/paddle/jit/dy2static/tensor_shape_transformer.py
+++ b/python/paddle/jit/dy2static/tensor_shape_transformer.py
@@ -44,7 +44,7 @@ def visit_Attribute(self, node):
             # NOTE(dev): we can deal with paddle.shape in this case, but it's
             # not pretty to modify into 'convert_shape(paddle)(x)[0]'.
             if args != 'paddle':
-                convert_shape_func = "_jst.Shape({})".format(args)
+                convert_shape_func = f"_jst.Shape({args})"
                 shape_node = gast.parse(convert_shape_func).body[0].value
                 return shape_node
         return node
diff --git a/python/paddle/jit/dy2static/utils.py b/python/paddle/jit/dy2static/utils.py
index 845e556a22a126..cccf1dcca045ce 100644
--- a/python/paddle/jit/dy2static/utils.py
+++ b/python/paddle/jit/dy2static/utils.py
@@ -392,7 +392,7 @@ def update_args_of_func(node, dygraph_node, method_name):
     class_src = astor.to_source(gast.gast_to_ast(dygraph_node.func))
 
     if method_name == "__init__" or eval(
-        "issubclass({}, paddle.nn.Layer)".format(class_src)
+        f"issubclass({class_src}, paddle.nn.Layer)"
     ):
         full_args = eval(f"inspect.getfullargspec({class_src}.{method_name})")
         full_args_name = [
@@ -437,7 +437,7 @@ def create_api_shape_node(tensor_shape_node):
 
 def get_constant_variable_node(name, value, shape=[1], dtype='int64'):
     return gast.parse(
-        '%s = paddle.full(%s, "%s", %s)' % (name, str(shape), str(value), dtype)
+        f'{name} = paddle.full({str(shape)}, "{str(value)}", {dtype})'
     )
 
 
@@ -516,7 +516,7 @@ def get_temp_dir():
     """
     Return @to_static temp directory.
     """
-    dir_name = "paddle/to_static_tmp/{pid}".format(pid=os.getpid())
+    dir_name = f"paddle/to_static_tmp/{os.getpid()}"
     temp_dir = os.path.join(os.path.expanduser('~/.cache'), dir_name)
     is_windows = sys.platform.startswith('win')
     if is_windows:
diff --git a/python/paddle/jit/dy2static/utils_helper.py b/python/paddle/jit/dy2static/utils_helper.py
index 67dc7c498858f2..1311c37ebf2e34 100644
--- a/python/paddle/jit/dy2static/utils_helper.py
+++ b/python/paddle/jit/dy2static/utils_helper.py
@@ -69,9 +69,7 @@ def is_api_in_module(node, module_prefix):
         import paddle.jit.dy2static as _jst  # noqa: F401
         from paddle import to_tensor  # noqa: F401
 
-        return eval(
-            "_is_api_in_module_helper({}, '{}')".format(func_str, module_prefix)
-        )
+        return eval(f"_is_api_in_module_helper({func_str}, '{module_prefix}')")
     except Exception:
         return False
 
diff --git a/python/paddle/jit/dy2static/variable_trans_func.py b/python/paddle/jit/dy2static/variable_trans_func.py
index b8439e2c79b68c..20f0fb6317e3b6 100644
--- a/python/paddle/jit/dy2static/variable_trans_func.py
+++ b/python/paddle/jit/dy2static/variable_trans_func.py
@@ -22,12 +22,12 @@
 
 
 def create_undefined_var(name):
-    func_code = "{} = _jst.UndefinedVar('{}')".format(name, name)
+    func_code = f"{name} = _jst.UndefinedVar('{name}')"
     return gast.parse(func_code).body[0]
 
 
 def create_fill_constant_node(name, value=0):
-    func_code = "{} = paddle.full(shape=[1], ".format(name)
+    func_code = f"{name} = paddle.full(shape=[1], "
     if isinstance(value, bool):
         func_code += "dtype='bool', fill_value={}, name='{}')".format(
             value, name
@@ -81,5 +81,5 @@ def create_bool_node(name, value):
     Create a assign stmt for name = value .
     '''
     assert isinstance(value, bool)
-    node = "{} = {}".format(name, value)
+    node = f"{name} = {value}"
     return gast.parse(node).body[0]
diff --git a/python/paddle/jit/translated_layer.py b/python/paddle/jit/translated_layer.py
index 599b6df93a05b6..edc0a0cc158e92 100644
--- a/python/paddle/jit/translated_layer.py
+++ b/python/paddle/jit/translated_layer.py
@@ -519,9 +519,7 @@ def _append_scale_to_output(self, program):
         with framework.program_guard(program):
             for i, out in enumerate(self._output_descs):
                 var = program.global_block().var(out.name())
-                var = paddle.scale(
-                    var, 1.0, name="translated_layer/scale_{}".format(i)
-                )
+                var = paddle.scale(var, 1.0, name=f"translated_layer/scale_{i}")
                 scale_output_vars.append(var)
         # 2. update output names & descs
         for i, var in enumerate(scale_output_vars):
@@ -1000,7 +998,7 @@ def _run_dygraph(instance, input, program_holder):
         tmp_scope_vec,
         _valid_vars(double_grad_vars),
         None,
-        *attrs
+        *attrs,
     )
 
     # NOTE: [ why need set param's gradient type here ]
diff --git a/python/paddle/metric/metrics.py b/python/paddle/metric/metrics.py
index 16ef97a3f044b3..13a916abec592c 100644
--- a/python/paddle/metric/metrics.py
+++ b/python/paddle/metric/metrics.py
@@ -324,7 +324,7 @@ def accumulate(self):
     def _init_name(self, name):
         name = name or 'acc'
         if self.maxk != 1:
-            self._name = ['{}_top{}'.format(name, k) for k in self.topk]
+            self._name = [f'{name}_top{k}' for k in self.topk]
         else:
             self._name = [name]
 
diff --git a/python/paddle/nn/clip.py b/python/paddle/nn/clip.py
index 4c22a8a4814dca..10daab1fc1b4d8 100644
--- a/python/paddle/nn/clip.py
+++ b/python/paddle/nn/clip.py
@@ -290,7 +290,7 @@ def __init__(self, max, min=None):
         self.min = min
 
     def __str__(self):
-        return "ByValue, min=%f, max=%f" % (self.min, self.max)
+        return f"ByValue, min={self.min:f}, max={self.max:f}"
 
     def _append_clip_op(self, block, grad_name):
         clip_op_desc = block.desc.append_op()
@@ -403,7 +403,7 @@ def __init__(self, max, min=None):
         self.min = float(min)
 
     def __str__(self):
-        return "Clip Gradient By Value, min = %f, max=%f" % (self.min, self.max)
+        return f"Clip Gradient By Value, min = {self.min:f}, max={self.max:f}"
 
     @imperative_base.no_grad()
     def _dygraph_clip(self, params_grads):
diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py
index 4dc3e3a62ec5d9..e79c2403ba0993 100644
--- a/python/paddle/nn/functional/activation.py
+++ b/python/paddle/nn/functional/activation.py
@@ -924,12 +924,12 @@ def selu(
     """
     if scale <= 1.0:
         raise ValueError(
-            "The scale must be greater than 1.0. Received: {}.".format(scale)
+            f"The scale must be greater than 1.0. Received: {scale}."
         )
 
     if alpha < 0:
         raise ValueError(
-            "The alpha must be no less than zero. Received: {}.".format(alpha)
+            f"The alpha must be no less than zero. Received: {alpha}."
         )
 
     if in_dygraph_mode():
diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py
index 2e247c9f943272..86cc69b92f9ee9 100644
--- a/python/paddle/nn/functional/common.py
+++ b/python/paddle/nn/functional/common.py
@@ -437,7 +437,7 @@ def interpolate(
             return paddle.nn.functional.adaptive_avg_pool2d(x, size)
         elif len(x.shape) == 5:
             return paddle.nn.functional.adaptive_avg_pool3d(x, size)
-    helper = LayerHelper('{}_interp_v2'.format(resample_type), **locals())
+    helper = LayerHelper(f'{resample_type}_interp_v2', **locals())
     if len(x.shape) == 3 and data_format not in ['NCW', 'NWC']:
         raise ValueError(
             "Got wrong value for param `data_format`: "
@@ -700,7 +700,7 @@ def _is_list_or_turple_(data):
 
     out = helper.create_variable_for_type_inference(dtype)
     helper.append_op(
-        type='{}_interp_v2'.format(resample_type),
+        type=f'{resample_type}_interp_v2',
         inputs=inputs,
         outputs={"Out": out},
         attrs=attrs,
@@ -1622,7 +1622,7 @@ def pad(x, pad, mode='constant', value=0.0, data_format="NCHW", name=None):
         3,
         4,
         5,
-    ], "input tesor dimension must be in [3, 4, 5] but got {}".format(x_dim)
+    ], f"input tesor dimension must be in [3, 4, 5] but got {x_dim}"
 
     supported_format_map = {
         3: ["NCL", "NLC"],
diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py
index 816fd3266f184f..1a85d97b822830 100644
--- a/python/paddle/nn/functional/conv.py
+++ b/python/paddle/nn/functional/conv.py
@@ -98,7 +98,7 @@ def _update_padding_nd(padding, channel_last, num_dims):
             padding_algorithm = "EXPLICIT"
             padding = convert_to_list(padding, num_dims, 'padding')
         else:
-            raise ValueError("In valid padding: {}".format(padding))
+            raise ValueError(f"In valid padding: {padding}")
     # for integer padding
     else:
         padding_algorithm = "EXPLICIT"
diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py
index c4cbfce45e4cb4..874faafc92338c 100644
--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -981,9 +981,7 @@ def hsigmoid_loss(
             #  [1.92374969]]
     """
     if num_classes < 2:
-        raise ValueError(
-            'Expected num_classes >= 2 (got {})'.format(num_classes)
-        )
+        raise ValueError(f'Expected num_classes >= 2 (got {num_classes})')
 
     if in_dygraph_mode():
         out, _, _ = _C_ops.hsigmoid_loss(
@@ -1416,9 +1414,7 @@ def nll_loss(
         )
 
     if input_dims < 2:
-        raise ValueError(
-            'Expected 2 or more dimensions (got {})'.format(input_dims)
-        )
+        raise ValueError(f'Expected 2 or more dimensions (got {input_dims})')
 
     if input_shape[1] < 1:
         raise ValueError(
diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py
index 99d72ddb48086f..f58ad0f0238a69 100755
--- a/python/paddle/nn/functional/pooling.py
+++ b/python/paddle/nn/functional/pooling.py
@@ -154,7 +154,7 @@ def _update_padding_nd(padding, num_dims, channel_last=False, ceil_mode=False):
             padding_algorithm = "EXPLICIT"
             padding = convert_to_list(padding, num_dims, 'padding')
         else:
-            raise ValueError("Invalid padding: {}".format(padding))
+            raise ValueError(f"Invalid padding: {padding}")
     # for integer padding
     else:
         padding_algorithm = "EXPLICIT"
diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py
index 6c85ae646b71c8..3a28e63c2035e6 100644
--- a/python/paddle/nn/layer/activation.py
+++ b/python/paddle/nn/layer/activation.py
@@ -61,8 +61,8 @@ def forward(self, x):
         return F.celu(x, self._alpha, self._name)
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
-        return 'alpha={}{}'.format(self._alpha, name_str)
+        name_str = f', name={self._name}' if self._name else ''
+        return f'alpha={self._alpha}{name_str}'
 
 
 class ELU(Layer):
@@ -109,8 +109,8 @@ def forward(self, x):
         return F.elu(x, self._alpha, self._name)
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
-        return 'alpha={}{}'.format(self._alpha, name_str)
+        name_str = f', name={self._name}' if self._name else ''
+        return f'alpha={self._alpha}{name_str}'
 
 
 class GELU(Layer):
@@ -161,8 +161,8 @@ def forward(self, x):
         return F.gelu(x, self._approximate, self._name)
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
-        return 'approximate={}{}'.format(self._approximate, name_str)
+        name_str = f', name={self._name}' if self._name else ''
+        return f'approximate={self._approximate}{name_str}'
 
 
 class Hardshrink(Layer):
@@ -209,8 +209,8 @@ def forward(self, x):
         return F.hardshrink(x, self._threshold, self._name)
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
-        return 'threshold={}{}'.format(self._threshold, name_str)
+        name_str = f', name={self._name}' if self._name else ''
+        return f'threshold={self._threshold}{name_str}'
 
 
 class Hardswish(Layer):
@@ -259,7 +259,7 @@ def forward(self, x):
         return F.hardswish(x, self._name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self._name) if self._name else ''
+        name_str = f'name={self._name}' if self._name else ''
         return name_str
 
 
@@ -300,7 +300,7 @@ def forward(self, x):
         return F.tanh(x, self._name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self._name) if self._name else ''
+        name_str = f'name={self._name}' if self._name else ''
         return name_str
 
 
@@ -350,8 +350,8 @@ def forward(self, x):
         return F.hardtanh(x, self._min, self._max, self._name)
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
-        return 'min={}, max={}{}'.format(self._min, self._max, name_str)
+        name_str = f', name={self._name}' if self._name else ''
+        return f'min={self._min}, max={self._max}{name_str}'
 
 
 class PReLU(Layer):
@@ -434,7 +434,7 @@ def forward(self, x):
         return F.prelu(x, self._weight, data_format=self._data_format)
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
+        name_str = f', name={self._name}' if self._name else ''
         return 'num_parameters={}, data_format={}, init={}, dtype={}{}'.format(
             self._num_parameters,
             self._data_format,
@@ -527,7 +527,7 @@ def forward(self, x):
         )
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
+        name_str = f', name={self._name}' if self._name else ''
         return 'lower={}, upper={}, training={}, dtype={}{}'.format(
             self._lower, self._upper, self.training, self._dtype, name_str
         )
@@ -571,7 +571,7 @@ def forward(self, x):
         return F.relu(x, self._name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self._name) if self._name else ''
+        name_str = f'name={self._name}' if self._name else ''
         return name_str
 
 
@@ -613,7 +613,7 @@ def forward(self, x):
         return F.relu6(x, self._name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self._name) if self._name else ''
+        name_str = f'name={self._name}' if self._name else ''
         return name_str
 
 
@@ -668,7 +668,7 @@ def forward(self, x):
         return F.selu(x, self._scale, self._alpha, self._name)
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
+        name_str = f', name={self._name}' if self._name else ''
         return 'scale={:.16f}, alpha={:.16f}{}'.format(
             self._scale, self._alpha, name_str
         )
@@ -719,8 +719,8 @@ def forward(self, x):
         return F.leaky_relu(x, self._negative_slope, self._name)
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
-        return 'negative_slope={}{}'.format(self._negative_slope, name_str)
+        name_str = f', name={self._name}' if self._name else ''
+        return f'negative_slope={self._negative_slope}{name_str}'
 
 
 class Sigmoid(Layer):
@@ -759,7 +759,7 @@ def forward(self, x):
         return F.sigmoid(x, self.name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self.name) if self.name else ''
+        name_str = f'name={self.name}' if self.name else ''
         return name_str
 
 
@@ -810,7 +810,7 @@ def forward(self, x):
         return F.hardsigmoid(x, name=self.name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self.name) if self.name else ''
+        name_str = f'name={self.name}' if self.name else ''
         return name_str
 
 
@@ -853,7 +853,7 @@ def forward(self, x):
         return F.softplus(x, self._beta, self._threshold, self._name)
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
+        name_str = f', name={self._name}' if self._name else ''
         return 'beta={}, threshold={}{}'.format(
             self._beta, self._threshold, name_str
         )
@@ -906,8 +906,8 @@ def forward(self, x):
         return F.softshrink(x, self._threshold, self._name)
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
-        return 'threshold={}{}'.format(self._threshold, name_str)
+        name_str = f', name={self._name}' if self._name else ''
+        return f'threshold={self._threshold}{name_str}'
 
 
 class Softsign(Layer):
@@ -947,7 +947,7 @@ def forward(self, x):
         return F.softsign(x, self._name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self._name) if self._name else ''
+        name_str = f'name={self._name}' if self._name else ''
         return name_str
 
 
@@ -988,7 +988,7 @@ def forward(self, x):
         return F.swish(x, self._name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self._name) if self._name else ''
+        name_str = f'name={self._name}' if self._name else ''
         return name_str
 
 
@@ -1033,7 +1033,7 @@ def forward(self, x):
         return F.mish(x, self._name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self._name) if self._name else ''
+        name_str = f'name={self._name}' if self._name else ''
         return name_str
 
 
@@ -1074,7 +1074,7 @@ def forward(self, x):
         return F.tanhshrink(x, self._name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self._name) if self._name else ''
+        name_str = f'name={self._name}' if self._name else ''
         return name_str
 
 
@@ -1124,8 +1124,8 @@ def forward(self, x):
         return F.thresholded_relu(x, self._threshold, self._name)
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
-        return 'threshold={}{}'.format(self._threshold, name_str)
+        name_str = f', name={self._name}' if self._name else ''
+        return f'threshold={self._threshold}{name_str}'
 
 
 class Silu(Layer):
@@ -1163,7 +1163,7 @@ def forward(self, x):
         return F.silu(x, self._name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self._name) if self._name else ''
+        name_str = f'name={self._name}' if self._name else ''
         return name_str
 
 
@@ -1202,7 +1202,7 @@ def forward(self, x):
         return F.log_sigmoid(x, self._name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self._name) if self._name else ''
+        name_str = f'name={self._name}' if self._name else ''
         return name_str
 
 
@@ -1327,8 +1327,8 @@ def forward(self, x):
         return F.softmax(x, self._axis, name=self._name)
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
-        return 'axis={}{}'.format(self._axis, name_str)
+        name_str = f', name={self._name}' if self._name else ''
+        return f'axis={self._axis}{name_str}'
 
 
 class LogSoftmax(Layer):
@@ -1385,8 +1385,8 @@ def forward(self, x):
         return F.log_softmax(x, self._axis)
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
-        return 'axis={}{}'.format(self._axis, name_str)
+        name_str = f', name={self._name}' if self._name else ''
+        return f'axis={self._axis}{name_str}'
 
 
 class Maxout(Layer):
@@ -1452,8 +1452,8 @@ def forward(self, x):
         return F.maxout(x, self._groups, self._axis, self._name)
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
-        return 'groups={}, axis={}{}'.format(self._groups, self._axis, name_str)
+        name_str = f', name={self._name}' if self._name else ''
+        return f'groups={self._groups}, axis={self._axis}{name_str}'
 
 
 class Softmax2D(Layer):
@@ -1509,5 +1509,5 @@ def forward(self, x):
         return F.softmax(x, axis=-3, dtype=self._dtype, name=self._name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self._name) if self._name else ''
+        name_str = f'name={self._name}' if self._name else ''
         return name_str
diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py
index dba6b357e5cd8b..1e7d9d7c46da9a 100644
--- a/python/paddle/nn/layer/common.py
+++ b/python/paddle/nn/layer/common.py
@@ -177,7 +177,7 @@ def forward(self, input):
         return out
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self.name) if self.name else ''
+        name_str = f', name={self.name}' if self.name else ''
         return 'in_features={}, out_features={}, dtype={}{}'.format(
             self.weight.shape[0], self.weight.shape[1], self._dtype, name_str
         )
@@ -409,10 +409,10 @@ def forward(self, x):
 
     def extra_repr(self):
         if self.scale_factor is not None:
-            main_str = 'scale_factor={}'.format(self.scale_factor)
+            main_str = f'scale_factor={self.scale_factor}'
         else:
-            main_str = 'size={}'.format(self.size)
-        name_str = ', name={}'.format(self.name) if self.name else ''
+            main_str = f'size={self.size}'
+        name_str = f', name={self.name}' if self.name else ''
         return '{}, mode={}, align_corners={}, align_mode={}, data_format={}{}'.format(
             main_str,
             self.mode,
@@ -499,10 +499,10 @@ def forward(self, x):
 
     def extra_repr(self):
         if self.scale_factor is not None:
-            main_str = 'scale_factor={}'.format(self.scale_factor)
+            main_str = f'scale_factor={self.scale_factor}'
         else:
-            main_str = 'size={}'.format(self.size)
-        name_str = ', name={}'.format(self.name) if self.name else ''
+            main_str = f'size={self.size}'
+        name_str = f', name={self.name}' if self.name else ''
         return '{}, data_format={}{}'.format(
             main_str, self.data_format, name_str
         )
@@ -585,10 +585,10 @@ def forward(self, x):
 
     def extra_repr(self):
         if self.scale_factor is not None:
-            main_str = 'scale_factor={}'.format(self.scale_factor)
+            main_str = f'scale_factor={self.scale_factor}'
         else:
-            main_str = 'size={}'.format(self.size)
-        name_str = ', name={}'.format(self.name) if self.name else ''
+            main_str = f'size={self.size}'
+        name_str = f', name={self.name}' if self.name else ''
         return '{}, data_format={}{}'.format(
             main_str, self.data_format, name_str
         )
@@ -687,7 +687,7 @@ def forward(self, x1, x2):
         return F.bilinear(x1, x2, self.weight, self.bias, self._name)
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
+        name_str = f', name={self._name}' if self._name else ''
         return 'in1_features={}, in2_features={}, out_features={}, dtype={}{}'.format(
             self._in1_features,
             self._in2_features,
@@ -772,7 +772,7 @@ def forward(self, input):
         return out
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self.name) if self.name else ''
+        name_str = f', name={self.name}' if self.name else ''
         return 'p={}, axis={}, mode={}{}'.format(
             self.p, self.axis, self.mode, name_str
         )
@@ -853,7 +853,7 @@ def forward(self, input):
         return out
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self.name) if self.name else ''
+        name_str = f', name={self.name}' if self.name else ''
         return 'p={}, data_format={}{}'.format(
             self.p, self.data_format, name_str
         )
@@ -946,7 +946,7 @@ def forward(self, input):
         return out
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self.name) if self.name else ''
+        name_str = f', name={self.name}' if self.name else ''
         return 'p={}, data_format={}{}'.format(
             self.p, self.data_format, name_str
         )
@@ -1005,8 +1005,8 @@ def forward(self, input):
         return out
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self.name) if self.name else ''
-        return 'p={}{}'.format(self.p, name_str)
+        name_str = f', name={self.name}' if self.name else ''
+        return f'p={self.p}{name_str}'
 
 
 class Pad1D(Layer):
@@ -1072,7 +1072,7 @@ def forward(self, x):
         )
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
+        name_str = f', name={self._name}' if self._name else ''
         return 'padding={}, mode={}, value={}, data_format={}{}'.format(
             self._pad, self._mode, self._value, self._data_format, name_str
         )
@@ -1145,7 +1145,7 @@ def forward(self, x):
         )
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
+        name_str = f', name={self._name}' if self._name else ''
         return 'padding={}, mode={}, value={}, data_format={}{}'.format(
             self._pad, self._mode, self._value, self._data_format, name_str
         )
@@ -1213,7 +1213,7 @@ def forward(self, x):
         )
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
+        name_str = f', name={self._name}' if self._name else ''
         return 'padding={}, data_format={}{}'.format(
             self._pad, self._data_format, name_str
         )
@@ -1291,7 +1291,7 @@ def forward(self, x):
         )
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self._name) if self._name else ''
+        name_str = f', name={self._name}' if self._name else ''
         return 'padding={}, mode={}, value={}, data_format={}{}'.format(
             self._pad, self._mode, self._value, self._data_format, name_str
         )
@@ -1598,7 +1598,7 @@ def forward(self, input):
         )
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self.name) if self.name else ''
+        name_str = f', name={self.name}' if self.name else ''
         return 'kernel_size={}, dilation={}, padding={}, stride={}{}'.format(
             self.kernel_sizes,
             self.dilations,
@@ -1695,7 +1695,7 @@ def forward(self, input):
         )
 
     def extra_repr(self):
-        name_str = ', name={}'.format(self.name) if self.name else ''
+        name_str = f', name={self.name}' if self.name else ''
         return 'kernel_size={}, dilation={}, padding={}, stride={}{}'.format(
             self.kernel_sizes,
             self.dilations,
diff --git a/python/paddle/nn/layer/container.py b/python/paddle/nn/layer/container.py
index 7aeb973ab94179..c583e035f2e8db 100644
--- a/python/paddle/nn/layer/container.py
+++ b/python/paddle/nn/layer/container.py
@@ -582,11 +582,11 @@ def __getitem__(self, name):
             return self._sub_layers[name]
         else:
             if name >= len(self._sub_layers):
-                raise IndexError('index {} is out of range'.format(name))
+                raise IndexError(f'index {name} is out of range')
             elif name < 0 and name >= -len(self._sub_layers):
                 name += len(self._sub_layers)
             elif name < -len(self._sub_layers):
-                raise IndexError('index {} is out of range'.format(name))
+                raise IndexError(f'index {name} is out of range')
             return list(self._sub_layers.values())[name]
 
     def __setitem__(self, name, layer):
diff --git a/python/paddle/nn/layer/layers.py b/python/paddle/nn/layer/layers.py
index a8adddcff11d1c..8a3659e87b037f 100644
--- a/python/paddle/nn/layer/layers.py
+++ b/python/paddle/nn/layer/layers.py
@@ -170,7 +170,7 @@ def _multiple_input(self, inputs_in):
     def _input(self, inputs_in):
         inputs = self._multiple_input(inputs_in)
         if len(inputs) != 1:
-            raise "{0} layer only takes one input in".format(self.layer_type)
+            raise f"{self.layer_type} layer only takes one input in"
         return inputs[0]
 
     def _multiple_param_attr(self, length, param_attr_in=None):
@@ -179,9 +179,7 @@ def _multiple_param_attr(self, length, param_attr_in=None):
             param_attr = [param_attr]
 
         if len(param_attr) != 1 and len(param_attr) != length:
-            raise ValueError(
-                "parameter number mismatch in {}".format(self.name)
-            )
+            raise ValueError(f"parameter number mismatch in {self.name}")
         elif len(param_attr) == 1 and length != 1:
             tmp = [None] * length
             for i in range(length):
@@ -200,9 +198,7 @@ def iter_inputs_and_params(self, inputs_in, param_attr_in=None):
         """
         param_attr_in = ParamAttr._to_attr(param_attr_in)
         if isinstance(param_attr_in, bool):
-            raise ValueError(
-                'Param_attr should not be False in {}'.format(self.name)
-            )
+            raise ValueError(f'Param_attr should not be False in {self.name}')
         inputs = inputs_in if (inputs_in is not None) else []
         inputs = self._multiple_input(inputs)
         param_attrs = self._multiple_param_attr(len(inputs), param_attr_in)
@@ -239,9 +235,7 @@ def get_parameter(self, name):
         """
         param = self.main_program.global_block().var(name)
         if not isinstance(param, Parameter):
-            raise ValueError(
-                "no Parameter name %s found in %s" % (name, self.name)
-            )
+            raise ValueError(f"no Parameter name {name} found in {self.name}")
         return param
 
     # TODO: this should not be called anymore after all activation func move to Layers
@@ -1068,7 +1062,7 @@ def register_buffer(self, name, tensor, persistable=True):
         elif name == '':
             raise KeyError("The name of buffer can not be empty.")
         elif hasattr(self, name) and name not in self._buffers:
-            raise KeyError("attribute '{}' already exists.".format(name))
+            raise KeyError(f"attribute '{name}' already exists.")
         elif tensor is not None and not (type(tensor) == core.eager.Tensor):
             raise TypeError(
                 "The registered buffer should be a Paddle.Tensor, but received {}.".format(
@@ -1359,7 +1353,7 @@ def forward(self, input):
         elif name == '':
             raise KeyError("The name of parameter can not be empty.")
         elif hasattr(self, name) and name not in self._parameters:
-            raise KeyError("The parameter '{}' already exists.".format(name))
+            raise KeyError(f"The parameter '{name}' already exists.")
         elif parameter is not None and not isinstance(
             parameter, framework.Parameter
         ):
@@ -1855,9 +1849,7 @@ def _check_match(key, param):
             state = state_dict.get(key, None)
             if state is None:
                 missing_keys.append(key)
-                raise ValueError(
-                    "{} is not found in the provided dict.".format(key)
-                )
+                raise ValueError(f"{key} is not found in the provided dict.")
             if isinstance(state, (dict, list)):
                 if len(state) != len(param):
                     missing_keys.append(key)
@@ -1894,7 +1886,7 @@ def _check_match(key, param):
                 match_res = _check_match(key_name, param)
                 matched_param_state.append(match_res)
             except ValueError as err:
-                warnings.warn("Skip loading for {}. ".format(key) + str(err))
+                warnings.warn(f"Skip loading for {key}. " + str(err))
         for key in state_dict.keys():
             if key not in match_keys:
                 unexpected_keys.append(key)
diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py
index 550b7c5587444a..3421fe4f9b00c9 100644
--- a/python/paddle/nn/layer/norm.py
+++ b/python/paddle/nn/layer/norm.py
@@ -281,7 +281,7 @@ def __init__(
     def _check_input_dim(self, input):
         if len(input.shape) != 4:
             raise ValueError(
-                'expected 4D input (got {}D input)'.format(len(input.shape))
+                f'expected 4D input (got {len(input.shape)}D input)'
             )
 
 
@@ -368,7 +368,7 @@ def __init__(
     def _check_input_dim(self, input):
         if len(input.shape) != 5:
             raise ValueError(
-                'expected 5D input (got {}D input)'.format(len(input.shape))
+                f'expected 5D input (got {len(input.shape)}D input)'
             )
 
 
@@ -790,9 +790,9 @@ def extra_repr(self):
             self._num_features, self._momentum, self._epsilon
         )
         if self._data_format != 'NCHW':
-            main_str += ', data_format={}'.format(self._data_format)
+            main_str += f', data_format={self._data_format}'
         if self._name is not None:
-            main_str += ', name={}'.format(self._name)
+            main_str += f', name={self._name}'
         return main_str
 
 
@@ -1264,7 +1264,7 @@ def _check_data_format(self, input):
     def _check_input_dim(self, input):
         if len(input.shape) != 4:
             raise ValueError(
-                'expected 4D input (got {}D input)'.format(len(input.shape))
+                f'expected 4D input (got {len(input.shape)}D input)'
             )
 
 
@@ -1374,7 +1374,7 @@ def _check_data_format(self, input):
     def _check_input_dim(self, input):
         if len(input.shape) != 5:
             raise ValueError(
-                'expected 5D input (got {}D input)'.format(len(input.shape))
+                f'expected 5D input (got {len(input.shape)}D input)'
             )
 
 
@@ -1556,7 +1556,7 @@ def forward(self, x):
                 self._variance,
                 mean_out,
                 variance_out,
-                *attrs
+                *attrs,
             )
             return sync_batch_norm_out
 
@@ -1743,9 +1743,9 @@ def extra_repr(self):
             self.size, self.alpha, self.beta, self.k
         )
         if self.data_format != 'NCHW':
-            main_str += ', data_format={}'.format(self.data_format)
+            main_str += f', data_format={self.data_format}'
         if self.name is not None:
-            main_str += ', name={}'.format(self.name)
+            main_str += f', name={self.name}'
         return main_str
 
 
diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py
index ea716f6521a1d4..fbaabafcb5bb0b 100755
--- a/python/paddle/nn/layer/pooling.py
+++ b/python/paddle/nn/layer/pooling.py
@@ -680,7 +680,7 @@ def forward(self, input):
         return F.adaptive_avg_pool1d(input, self.output_size, self.name)
 
     def extra_repr(self):
-        return 'output_size={}'.format(self.output_size)
+        return f'output_size={self.output_size}'
 
 
 class AdaptiveAvgPool2D(Layer):
@@ -765,7 +765,7 @@ def forward(self, x):
         )
 
     def extra_repr(self):
-        return 'output_size={}'.format(self._output_size)
+        return f'output_size={self._output_size}'
 
 
 class AdaptiveAvgPool3D(Layer):
@@ -857,7 +857,7 @@ def forward(self, x):
         )
 
     def extra_repr(self):
-        return 'output_size={}'.format(self._output_size)
+        return f'output_size={self._output_size}'
 
 
 class AdaptiveMaxPool1D(Layer):
@@ -1198,7 +1198,7 @@ def forward(self, x, indices):
         )
 
     def extra_repr(self):
-        return 'output_size={}'.format(self.output_size)
+        return f'output_size={self.output_size}'
 
 
 class MaxUnPool2D(Layer):
@@ -1286,7 +1286,7 @@ def forward(self, x, indices):
         )
 
     def extra_repr(self):
-        return 'output_size={}'.format(self.output_size)
+        return f'output_size={self.output_size}'
 
 
 class MaxUnPool3D(Layer):
@@ -1377,4 +1377,4 @@ def forward(self, x, indices):
         )
 
     def extra_repr(self):
-        return 'output_size={}'.format(self.output_size)
+        return f'output_size={self.output_size}'
diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py
index c9945506fd9b1e..51ee38a65f2782 100644
--- a/python/paddle/nn/layer/rnn.py
+++ b/python/paddle/nn/layer/rnn.py
@@ -47,7 +47,7 @@ def rnn(
     sequence_length=None,
     time_major=False,
     is_reverse=False,
-    **kwargs
+    **kwargs,
 ):
     r"""
     rnn creates a recurrent neural network specified by RNNCell `cell`,
@@ -109,7 +109,7 @@ def rnn(
             sequence_length,
             time_major,
             is_reverse,
-            **kwargs
+            **kwargs,
         )
     else:
         return _rnn_static_graph(
@@ -119,7 +119,7 @@ def rnn(
             sequence_length,
             time_major,
             is_reverse,
-            **kwargs
+            **kwargs,
         )
 
 
@@ -155,7 +155,7 @@ def _rnn_dynamic_graph(
     sequence_length=None,
     time_major=False,
     is_reverse=False,
-    **kwargs
+    **kwargs,
 ):
     time_step_index = 0 if time_major else 1
     flat_inputs = paddle.utils.flatten(inputs)
@@ -223,7 +223,7 @@ def _rnn_static_graph(
     sequence_length=None,
     time_major=False,
     is_reverse=False,
-    **kwargs
+    **kwargs,
 ):
     check_type(inputs, 'inputs', (Variable, list, tuple), 'rnn')
     if isinstance(inputs, (list, tuple)):
@@ -359,7 +359,7 @@ def birnn(
     initial_states=None,
     sequence_length=None,
     time_major=False,
-    **kwargs
+    **kwargs,
 ):
     r"""
     birnn creates a bidirectional recurrent neural network specified by
@@ -432,7 +432,7 @@ def birnn(
         states_fw,
         sequence_length,
         time_major=time_major,
-        **kwargs
+        **kwargs,
     )
 
     outputs_bw, states_bw = rnn(
@@ -442,7 +442,7 @@ def birnn(
         sequence_length,
         time_major=time_major,
         is_reverse=True,
-        **kwargs
+        **kwargs,
     )
 
     outputs = paddle.utils.map_structure(
@@ -1209,7 +1209,7 @@ def forward(
             sequence_length=sequence_length,
             time_major=self.time_major,
             is_reverse=self.is_reverse,
-            **kwargs
+            **kwargs,
         )
         return final_outputs, final_states
 
@@ -1296,7 +1296,7 @@ def forward(
             initial_states,
             sequence_length,
             self.time_major,
-            **kwargs
+            **kwargs,
         )
         return outputs, final_states
 
@@ -1718,7 +1718,7 @@ def __init__(
         elif activation == "relu":
             mode = "RNN_RELU"
         else:
-            raise ValueError("Unknown activation '{}'".format(activation))
+            raise ValueError(f"Unknown activation '{activation}'")
         self.activation = activation
         super().__init__(
             mode,
diff --git a/python/paddle/nn/layer/vision.py b/python/paddle/nn/layer/vision.py
index af98743d132e3e..a48be90e74c000 100644
--- a/python/paddle/nn/layer/vision.py
+++ b/python/paddle/nn/layer/vision.py
@@ -79,11 +79,11 @@ def forward(self, x):
         )
 
     def extra_repr(self):
-        main_str = 'upscale_factor={}'.format(self._upscale_factor)
+        main_str = f'upscale_factor={self._upscale_factor}'
         if self._data_format != 'NCHW':
-            main_str += ', data_format={}'.format(self._data_format)
+            main_str += f', data_format={self._data_format}'
         if self._name is not None:
-            main_str += ', name={}'.format(self._name)
+            main_str += f', name={self._name}'
         return main_str
 
 
@@ -145,11 +145,11 @@ def forward(self, x):
         )
 
     def extra_repr(self):
-        main_str = 'downscale_factor={}'.format(self._downscale_factor)
+        main_str = f'downscale_factor={self._downscale_factor}'
         if self._data_format != 'NCHW':
-            main_str += ', data_format={}'.format(self._data_format)
+            main_str += f', data_format={self._data_format}'
         if self._name is not None:
-            main_str += ', name={}'.format(self._name)
+            main_str += f', name={self._name}'
         return main_str
 
 
@@ -220,9 +220,9 @@ def forward(self, x):
         )
 
     def extra_repr(self):
-        main_str = 'groups={}'.format(self._groups)
+        main_str = f'groups={self._groups}'
         if self._data_format != 'NCHW':
-            main_str += ', data_format={}'.format(self._data_format)
+            main_str += f', data_format={self._data_format}'
         if self._name is not None:
-            main_str += ', name={}'.format(self._name)
+            main_str += f', name={self._name}'
         return main_str
diff --git a/python/paddle/nn/quant/lsq.py b/python/paddle/nn/quant/lsq.py
index ae73ec6f7024ea..8f225e92659804 100644
--- a/python/paddle/nn/quant/lsq.py
+++ b/python/paddle/nn/quant/lsq.py
@@ -172,9 +172,7 @@ def __init__(
             self.Qn = -(2 ** (self.bits - 1))
             self.Qp = 2 ** (self.bits - 1) - 1
 
-        scale_prefix = (
-            "{}.scale".format(name) if name else 'quant_dequant.scale'
-        )
+        scale_prefix = f"{name}.scale" if name else 'quant_dequant.scale'
         self._scale_name = unique_name.generate(scale_prefix)
 
         s_attr = ParamAttr(
@@ -184,9 +182,7 @@ def __init__(
         self.s.stop_gradient = False
 
         if not self.symmetric:
-            beta_prefix = (
-                "{}.beta".format(name) if name else 'quant_dequant.beta'
-            )
+            beta_prefix = f"{name}.beta" if name else 'quant_dequant.beta'
             self._beta_name = unique_name.generate(beta_prefix)
 
             beta_attr = ParamAttr(
@@ -292,9 +288,7 @@ def __init__(
             self.Qp = 2 ** (self.bits - 1) - 1
 
         self.init_state = 0
-        scale_prefix = (
-            "{}.scale".format(name) if name else 'quant_dequant.scale'
-        )
+        scale_prefix = f"{name}.scale" if name else 'quant_dequant.scale'
         self._scale_name = unique_name.generate(scale_prefix)
         s_attr = ParamAttr(
             name=self._scale_name, initializer=Constant(1.0), trainable=True
diff --git a/python/paddle/nn/quant/quant_layers.py b/python/paddle/nn/quant/quant_layers.py
index 49c9e0a3f4b683..c928a5c5f3140f 100644
--- a/python/paddle/nn/quant/quant_layers.py
+++ b/python/paddle/nn/quant/quant_layers.py
@@ -69,9 +69,7 @@ def __init__(
         self._quant_bits = quant_bits
         self._name = name
         self._reduce_type = reduce_type
-        scale_prefix = (
-            "{}.scale".format(name) if name else 'quant_dequant.scale'
-        )
+        scale_prefix = f"{name}.scale" if name else 'quant_dequant.scale'
         self._scale_name = unique_name.generate(scale_prefix)
         if quant_on_weight:
             scale_attr = ParamAttr(
@@ -91,7 +89,7 @@ def forward(self, input):
             attrs = ('bit_length', self._quant_bits)
             quant_out = _varbase_creator(
                 type=input.type,
-                name="{}.quantized.dequantized".format(input.name),
+                name=f"{input.name}.quantized.dequantized",
                 shape=input.shape,
                 dtype=input.dtype,
                 persistable=False,
@@ -120,7 +118,7 @@ def forward(self, input):
         attrs = {'bit_length': self._quant_bits}
         inputs = {"X": [input]}
         quant_out = self._helper.create_variable(
-            name="{}.quantized.dequantized".format(input.name),
+            name=f"{input.name}.quantized.dequantized",
             dtype=input.dtype,
             type=core.VarDesc.VarType.LOD_TENSOR,
             persistable=False,
@@ -169,9 +167,7 @@ def __init__(
         self._moving_rate = moving_rate
         self._quant_bits = quant_bits
         self._reduce_type = reduce_type
-        scale_prefix = (
-            "{}.scale".format(name) if name else 'quant_dequant.scale'
-        )
+        scale_prefix = f"{name}.scale" if name else 'quant_dequant.scale'
         scale_attr = ParamAttr(
             name=unique_name.generate(scale_prefix),
             initializer=Constant(0.001),
@@ -182,9 +178,7 @@ def __init__(
         )
         self._scale.stop_gradient = True
 
-        state_prefix = (
-            "{}.state".format(name) if name else 'quant_dequant.state'
-        )
+        state_prefix = f"{name}.state" if name else 'quant_dequant.state'
         state_attr = ParamAttr(
             name=unique_name.generate(state_prefix),
             initializer=Constant(1),
@@ -195,9 +189,7 @@ def __init__(
         )
         self._state.stop_gradient = True
 
-        accum_prefix = (
-            "{}.accum".format(name) if name else 'quant_dequant.accum'
-        )
+        accum_prefix = f"{name}.accum" if name else 'quant_dequant.accum'
         accum_attr = ParamAttr(
             name=unique_name.generate(accum_prefix),
             initializer=Constant(1),
@@ -220,7 +212,7 @@ def forward(self, input):
             )
             quant_out = _varbase_creator(
                 type=input.type,
-                name="{}.quantized.dequantized".format(input.name),
+                name=f"{input.name}.quantized.dequantized",
                 shape=input.shape,
                 dtype=input.dtype,
                 persistable=False,
@@ -247,7 +239,7 @@ def forward(self, input):
                 self._scale,
                 state,
                 accum,
-                *attrs
+                *attrs,
             )
 
             return out
@@ -262,7 +254,7 @@ def forward(self, input):
         }
         inputs = {"X": [input], "InScale": [self._scale]}
         quant_out = self._helper.create_variable(
-            name="{}.quantized.dequantized".format(input.name),
+            name=f"{input.name}.quantized.dequantized",
             dtype=input.dtype,
             type=core.VarDesc.VarType.LOD_TENSOR,
             persistable=False,
@@ -307,9 +299,7 @@ def __init__(
         self._name = name
         self._channel_num = channel_num
         self._reduce_type = reduce_type
-        scale_prefix = (
-            "{}.scale".format(name) if name else 'quant_dequant.scale'
-        )
+        scale_prefix = f"{name}.scale" if name else 'quant_dequant.scale'
         self._scale_name = unique_name.generate(scale_prefix)
         if quant_on_weight:
             scale_attr = ParamAttr(
@@ -334,7 +324,7 @@ def forward(self, input):
             )
             quant_out = _varbase_creator(
                 type=input.type,
-                name="{}.quantized.dequantized".format(input.name),
+                name=f"{input.name}.quantized.dequantized",
                 shape=input.shape,
                 dtype=input.dtype,
                 persistable=False,
@@ -369,7 +359,7 @@ def forward(self, input):
         attrs = {'bit_length': self._quant_bits, 'quant_axis': self._quant_axis}
         inputs = {"X": [input]}
         quant_out = self._helper.create_variable(
-            name="{}.quantized.dequantized".format(input.name),
+            name=f"{input.name}.quantized.dequantized",
             dtype=input.dtype,
             type=core.VarDesc.VarType.LOD_TENSOR,
             persistable=False,
@@ -410,7 +400,7 @@ def __init__(
         super().__init__()
         self._moving_rate = moving_rate
         self._reduce_type = reduce_type
-        scale_prefix = '{}.scale'.format(name) if name else 'outscale.scale'
+        scale_prefix = f'{name}.scale' if name else 'outscale.scale'
         scale_name = unique_name.generate(scale_prefix)
         scale_attr = ParamAttr(
             name=scale_name, initializer=Constant(0), trainable=False
@@ -420,7 +410,7 @@ def __init__(
         )
         self._scale.stop_gradient = True
 
-        state_prefix = "{}.state".format(name) if name else 'outscale.state'
+        state_prefix = f"{name}.state" if name else 'outscale.state'
         state_attr = ParamAttr(
             name=unique_name.generate(state_prefix),
             initializer=Constant(0),
@@ -431,7 +421,7 @@ def __init__(
         )
         self._state.stop_gradient = True
 
-        accum_prefix = "{}.accum".format(name) if name else 'outscale.accum'
+        accum_prefix = f"{name}.accum" if name else 'outscale.accum'
         accum_attr = ParamAttr(
             name=unique_name.generate(accum_prefix),
             initializer=Constant(0),
@@ -453,7 +443,7 @@ def forward(self, input):
 
             quant_out = _varbase_creator(
                 type=input.type,
-                name="{}.tmp".format(input.name),
+                name=f"{input.name}.tmp",
                 shape=input.shape,
                 dtype=input.dtype,
                 persistable=False,
@@ -474,7 +464,7 @@ def forward(self, input):
                 self._scale,
                 state,
                 accum,
-                *attrs
+                *attrs,
             )
             return out
 
@@ -485,7 +475,7 @@ def forward(self, input):
         attrs = {'moving_rate': self._moving_rate, 'is_test': not self.training}
         inputs = {"X": [input]}
         quant_out = self._helper.create_variable(
-            name="{}.tmp".format(input.name),
+            name=f"{input.name}.tmp",
             dtype=input.dtype,
             type=core.VarDesc.VarType.LOD_TENSOR,
             persistable=False,
@@ -1107,7 +1097,7 @@ def __init__(
         name=None,
         reduce_type=None,
         *args,
-        **kwargs
+        **kwargs,
     ):
 
         super().__init__()
diff --git a/python/paddle/nn/utils/weight_norm_hook.py b/python/paddle/nn/utils/weight_norm_hook.py
index e3fa28723875be..36e62128c088fb 100644
--- a/python/paddle/nn/utils/weight_norm_hook.py
+++ b/python/paddle/nn/utils/weight_norm_hook.py
@@ -242,4 +242,4 @@ def remove_weight_norm(layer, name='weight'):
             del layer._forward_pre_hooks[k]
             return layer
 
-    raise ValueError("weight_norm of '{}' not found in {}".format(name, layer))
+    raise ValueError(f"weight_norm of '{name}' not found in {layer}")
diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py
index 4a5dd476fa14fb..ad639a06c7a0f9 100644
--- a/python/paddle/optimizer/lr.py
+++ b/python/paddle/optimizer/lr.py
@@ -807,7 +807,7 @@ def __init__(
         self.end_lr = end_lr
         assert (
             end_lr > start_lr
-        ), "end_lr {} must be greater than start_lr {}".format(end_lr, start_lr)
+        ), f"end_lr {end_lr} must be greater than start_lr {start_lr}"
         super().__init__(start_lr, last_epoch, verbose)
 
     def state_dict(self):
diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py
index 328d97109ff87f..1a1cfd25eeabb3 100644
--- a/python/paddle/optimizer/optimizer.py
+++ b/python/paddle/optimizer/optimizer.py
@@ -374,7 +374,7 @@ def set_state_dict(self, state_dict):
             for para_name, var_tmp in v.items():
                 assert (
                     var_tmp.name in state_dict
-                ), "optimizer Tensor {} not found".format(var_tmp.name)
+                ), f"optimizer Tensor {var_tmp.name} not found"
                 var = var_tmp.value()
                 tensor = var.get_tensor()
                 model_np = np.array(tensor)
diff --git a/python/paddle/profiler/profiler.py b/python/paddle/profiler/profiler.py
index e274f007b83b0e..c450b8f0a2b5a1 100644
--- a/python/paddle/profiler/profiler.py
+++ b/python/paddle/profiler/profiler.py
@@ -594,7 +594,7 @@ def start(self):
             self.profiler.prepare()
             self.profiler.start()
         self.record_event = RecordEvent(
-            name="ProfileStep#{}".format(self.step_num),
+            name=f"ProfileStep#{self.step_num}",
             event_type=TracerEventType.ProfileStep,
         )
         self.record_event.begin()
@@ -684,7 +684,7 @@ def step(self, num_samples: Optional[int] = None):
         self.current_state = self.scheduler(self.step_num)
         self._trigger_action()
         self.record_event = RecordEvent(
-            name="ProfileStep#{}".format(self.step_num),
+            name=f"ProfileStep#{self.step_num}",
             event_type=TracerEventType.ProfileStep,
         )
         self.record_event.begin()
@@ -907,7 +907,7 @@ def get_profiler(config_path):
         with open(config_path, 'r') as filehandle:
             config_dict = json.load(filehandle)
     except Exception as e:
-        print('Load config file for profiler error: {}'.format(e))
+        print(f'Load config file for profiler error: {e}')
         print('Use default parameters instead.')
         return Profiler()
     translated_config_dict = {}
diff --git a/python/paddle/profiler/profiler_statistic.py b/python/paddle/profiler/profiler_statistic.py
index c928309b66eac8..69fd3c15130e34 100755
--- a/python/paddle/profiler/profiler_statistic.py
+++ b/python/paddle/profiler/profiler_statistic.py
@@ -228,24 +228,24 @@ def build_layer(node, depth=0):
 
 def _format_large_number(n, precision=2):
     if n // 1e12 > 0:
-        return "{} T".format(round(n / 1e12, precision))
+        return f"{round(n / 1e12, precision)} T"
     if n // 1e9 > 0:
-        return "{} G".format(round(n / 1e9, precision))
+        return f"{round(n / 1e9, precision)} G"
     if n // 1e6 > 0:
-        return "{} M".format(round(n / 1e6, precision))
+        return f"{round(n / 1e6, precision)} M"
     if n // 1e3 > 0:
-        return "{} K".format(round(n / 1e3, precision))
-    return "{}".format(round(n, precision))
+        return f"{round(n / 1e3, precision)} K"
+    return f"{round(n, precision)}"
 
 
 def _format_time(n, precision=2):
     if n // 1e9 > 0:
-        return "{} s".format(round(n / 1e9, precision))
+        return f"{round(n / 1e9, precision)} s"
     if n // 1e6 > 0:
-        return "{} ms".format(round(n / 1e6, precision))
+        return f"{round(n / 1e6, precision)} ms"
     if n // 1e3 > 0:
-        return "{} us".format(round(n / 1e3, precision))
-    return "{} ns".format(round(n, precision))
+        return f"{round(n / 1e3, precision)} us"
+    return f"{round(n, precision)} ns"
 
 
 def _gen_layer_flops(node, repeat=1):
@@ -977,7 +977,7 @@ def format_ratio(ratio, indent=0):
                 )
             )
             utilization = gpu_time / total_time
-            row_values = ['GPU{}'.format(gpu_name), format_ratio(utilization)]
+            row_values = [f'GPU{gpu_name}', format_ratio(utilization)]
             append(row_format.format(*row_values))
 
         append(header_sep)
@@ -1010,7 +1010,7 @@ def format_ratio(ratio, indent=0):
 
         # construct table string
         append(add_title(line_length, "Overview Summary"))
-        append('Time unit: {}'.format(time_unit))
+        append(f'Time unit: {time_unit}')
         append(header_sep)
         append(row_format.format(*headers))
         append(header_sep)
@@ -1155,13 +1155,9 @@ def format_ratio(ratio, indent=0):
                         gpu_ratio = 0
                     else:
                         gpu_ratio = float(item.gpu_time) / gpu_total_time
-                    name = (
-                        '{}'.format(name)
-                        if 'ProfileStep' in name
-                        else '  {}'.format(name)
-                    )
+                    name = f'{name}' if 'ProfileStep' in name else f'  {name}'
                     row_values = [
-                        '{}'.format(name),
+                        f'{name}',
                         item.call,
                         '{} / {} / {} / {} / {}'.format(
                             format_time(item.cpu_time, unit=time_unit),
@@ -1237,7 +1233,7 @@ def format_ratio(ratio, indent=0):
 
             # construct table string
             append(add_title(line_length, "Model Summary"))
-            append('Time unit: {}'.format(time_unit))
+            append(f'Time unit: {time_unit}')
             append(header_sep)
             append(row_format.format(*headers))
             append(header_sep)
@@ -1275,7 +1271,7 @@ def format_ratio(ratio, indent=0):
 
             # construct table string
             append(add_title(line_length, "Distribution Summary"))
-            append('Time unit: {}'.format(time_unit))
+            append(f'Time unit: {time_unit}')
             append(header_sep)
             append(row_format.format(*headers))
             append(header_sep)
@@ -1345,7 +1341,7 @@ def format_ratio(ratio, indent=0):
                     'All threads merged': statistic_data.event_summary.items
                 }
             for thread_id, items in thread_items.items():
-                all_row_values.append("Thread: {}".format(thread_id))
+                all_row_values.append(f"Thread: {thread_id}")
                 if sorted_by == SortedKeys.CPUTotal:
                     sorted_items = sorted(
                         items.items(), key=lambda x: x[1].cpu_time, reverse=True
@@ -1456,7 +1452,7 @@ def format_ratio(ratio, indent=0):
                                 ]
                                 innerop_name += "..."
                             row_values = [
-                                '  {}'.format(innerop_name),
+                                f'  {innerop_name}',
                                 innerop_node.call,
                                 '{} / {} / {} / {} / {}'.format(
                                     format_time(
@@ -1518,7 +1514,7 @@ def format_ratio(ratio, indent=0):
                                     ]
                                     device_node_name += "..."
                                 row_values = [
-                                    '    {}'.format(device_node_name),
+                                    f'    {device_node_name}',
                                     device_node.call,
                                     '- / - / - / - / -',
                                     '{} / {} / {} / {} / {}'.format(
@@ -1559,7 +1555,7 @@ def format_ratio(ratio, indent=0):
                                 ]
                                 device_node_name += "..."
                             row_values = [
-                                '  {}'.format(device_node_name),
+                                f'  {device_node_name}',
                                 device_node.call,
                                 '- / - / - / - / -',
                                 '{} / {} / {} / {} / {}'.format(
@@ -1619,7 +1615,7 @@ def format_ratio(ratio, indent=0):
 
             # construct table string
             append(add_title(line_length, "Operator Summary"))
-            append('Time unit: {}'.format(time_unit))
+            append(f'Time unit: {time_unit}')
             append(header_sep)
             append(row_format.format(*headers))
             append(header_sep)
@@ -1713,7 +1709,7 @@ def format_ratio(ratio, indent=0):
 
             # construct table string
             append(add_title(line_length, "Kernel Summary"))
-            append('Time unit: {}'.format(time_unit))
+            append(f'Time unit: {time_unit}')
             append(header_sep)
             append(row_format.format(*headers))
             append(header_sep)
@@ -1809,7 +1805,7 @@ def format_ratio(ratio, indent=0):
 
             # construct table string
             append(add_title(line_length, "Memory Manipulation Summary"))
-            append('Time unit: {}'.format(time_unit))
+            append(f'Time unit: {time_unit}')
             append(header_sep)
             append(row_format.format(*headers))
             append(header_sep)
@@ -1838,7 +1834,7 @@ def format_ratio(ratio, indent=0):
                     'All threads merged': statistic_data.event_summary.userdefined_items
                 }
             for thread_id, items in userdefined_thread_items.items():
-                all_row_values.append("Thread: {}".format(thread_id))
+                all_row_values.append(f"Thread: {thread_id}")
                 if sorted_by == SortedKeys.CPUTotal:
                     sorted_items = sorted(
                         items.items(), key=lambda x: x[1].cpu_time, reverse=True
@@ -1956,7 +1952,7 @@ def format_ratio(ratio, indent=0):
 
             # construct table string
             append(add_title(line_length, "UserDefined Summary"))
-            append('Time unit: {}'.format(time_unit))
+            append(f'Time unit: {time_unit}')
             append(header_sep)
             append(row_format.format(*headers))
             append(header_sep)
@@ -2046,9 +2042,7 @@ def format_ratio(ratio, indent=0):
 
                 # construct table string
                 append(
-                    add_title(
-                        line_length, "Memory Summary - {}".format(device_type)
-                    )
+                    add_title(line_length, f"Memory Summary - {device_type}")
                 )
                 append(
                     'Peak Allocated Memory: {}'.format(
diff --git a/python/paddle/profiler/timer.py b/python/paddle/profiler/timer.py
index 6675cfa97991ce..3fd4eeafde36f7 100644
--- a/python/paddle/profiler/timer.py
+++ b/python/paddle/profiler/timer.py
@@ -385,10 +385,10 @@ def step_info(self, unit):
                 self.current_event.speed_unit = 'steps/s'
             else:
                 self.current_event.speed_unit = unit + '/s'
-            message += ' %s: %.5f s' % ('batch_cost', batch_average)
+            message += ' {}: {:.5f} s'.format('batch_cost', batch_average)
         speed_average = self.current_event.speed_average()
         if speed_average:
-            message += ' ips: %.3f %s' % (
+            message += ' ips: {:.3f} {}'.format(
                 speed_average,
                 self.current_event.speed_unit,
             )
diff --git a/python/paddle/quantization/imperative/ptq.py b/python/paddle/quantization/imperative/ptq.py
index a53ef21bbd7d84..8d7e1bac6eca3d 100644
--- a/python/paddle/quantization/imperative/ptq.py
+++ b/python/paddle/quantization/imperative/ptq.py
@@ -250,9 +250,7 @@ def _cal_thresholds(self, model):
             if self._is_quant_layer(sub_layer):
                 cur_num += 1
                 if cur_num % 5 == 0:
-                    _logger.info(
-                        "Process the %s / %s layer" % (cur_num, total_num)
-                    )
+                    _logger.info(f"Process the {cur_num} / {total_num} layer")
 
                 quant_config = sub_layer._quant_config
 
diff --git a/python/paddle/quantization/quanters/abs_max.py b/python/paddle/quantization/quanters/abs_max.py
index abb4cb84002702..58a47bb9896bda 100644
--- a/python/paddle/quantization/quanters/abs_max.py
+++ b/python/paddle/quantization/quanters/abs_max.py
@@ -105,9 +105,7 @@ def __init__(
         super().__init__()
         self._moving_rate = moving_rate
         self._bit_length = bit_length
-        scale_prefix = (
-            "{}.scale".format(name) if name else 'quant_dequant.scale'
-        )
+        scale_prefix = f"{name}.scale" if name else 'quant_dequant.scale'
         scale_attr = ParamAttr(
             name=unique_name.generate(scale_prefix),
             initializer=Constant(0.001),
@@ -118,9 +116,7 @@ def __init__(
         )
         self._scale.stop_gradient = True
 
-        state_prefix = (
-            "{}.state".format(name) if name else 'quant_dequant.state'
-        )
+        state_prefix = f"{name}.state" if name else 'quant_dequant.state'
         state_attr = ParamAttr(
             name=unique_name.generate(state_prefix),
             initializer=Constant(1),
@@ -131,9 +127,7 @@ def __init__(
         )
         self._state.stop_gradient = True
 
-        accum_prefix = (
-            "{}.accum".format(name) if name else 'quant_dequant.accum'
-        )
+        accum_prefix = f"{name}.accum" if name else 'quant_dequant.accum'
         accum_attr = ParamAttr(
             name=unique_name.generate(accum_prefix),
             initializer=Constant(1),
@@ -155,7 +149,7 @@ def dynamic_forward(self, input):
         )
         quant_out = _varbase_creator(
             type=input.type,
-            name="{}.quantized.dequantized".format(input.name),
+            name=f"{input.name}.quantized.dequantized",
             shape=input.shape,
             dtype=input.dtype,
             persistable=False,
@@ -178,7 +172,7 @@ def dynamic_forward(self, input):
             self._scale,
             state,
             accum,
-            *attrs
+            *attrs,
         )
 
         return out
@@ -194,7 +188,7 @@ def static_forward(self, input):
         }
         inputs = {"X": [input], "InScale": [self._scale]}
         quant_out = self._helper.create_variable(
-            name="{}.quantized.dequantized".format(input.name),
+            name=f"{input.name}.quantized.dequantized",
             dtype=input.dtype,
             type=core.VarDesc.VarType.LOD_TENSOR,
             persistable=False,
diff --git a/python/paddle/sparse/nn/layer/activation.py b/python/paddle/sparse/nn/layer/activation.py
index 84037a8dacd175..c7ece016722d7f 100644
--- a/python/paddle/sparse/nn/layer/activation.py
+++ b/python/paddle/sparse/nn/layer/activation.py
@@ -57,7 +57,7 @@ def forward(self, x):
         return F.relu(x, self._name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self._name) if self._name else ''
+        name_str = f'name={self._name}' if self._name else ''
         return name_str
 
 
@@ -127,7 +127,7 @@ def forward(self, x):
         return F.softmax(x, self._axis, self._name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self._name) if self._name else ''
+        name_str = f'name={self._name}' if self._name else ''
         return name_str
 
 
@@ -168,7 +168,7 @@ def forward(self, x):
         return F.relu6(x, self._name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self._name) if self._name else ''
+        name_str = f'name={self._name}' if self._name else ''
         return name_str
 
 
@@ -218,5 +218,5 @@ def forward(self, x):
         return F.leaky_relu(x, self._negative_slope, self._name)
 
     def extra_repr(self):
-        name_str = 'name={}'.format(self._name) if self._name else ''
+        name_str = f'name={self._name}' if self._name else ''
         return name_str
diff --git a/python/paddle/static/amp/bf16/amp_utils.py b/python/paddle/static/amp/bf16/amp_utils.py
index f9a813aa44d41c..84681e9378b679 100644
--- a/python/paddle/static/amp/bf16/amp_utils.py
+++ b/python/paddle/static/amp/bf16/amp_utils.py
@@ -499,7 +499,7 @@ def cast_parameters_to_bf16(place, program, scope=None, to_bf16_var_names=None):
     var_scope = scope if scope else global_scope()
     for param in all_parameters:
         if param.name in bf16_var_names:
-            _logger.debug("---- cast {} to bf16 dtype ----".format(param.name))
+            _logger.debug(f"---- cast {param.name} to bf16 dtype ----")
             param_t = var_scope.find_var(param.name).get_tensor()
             data = np.array(param_t)
             param_t.set(convert_float_to_uint16(data), place)
diff --git a/python/paddle/static/amp/fp16_utils.py b/python/paddle/static/amp/fp16_utils.py
index 24a2a53d35a236..ced21f9bb758ec 100644
--- a/python/paddle/static/amp/fp16_utils.py
+++ b/python/paddle/static/amp/fp16_utils.py
@@ -299,7 +299,7 @@ def find_true_prev_op(ops, cur_op, var_name):
         if not len(prev_op) == 1:
             raise ValueError(
                 "There must be only one previous op "
-                "that outputs {0} variable".format(var_name)
+                f"that outputs {var_name} variable"
             )
         else:
             return prev_op[0]
@@ -611,7 +611,7 @@ def cast_parameters_to_fp16(place, program, scope=None, to_fp16_var_names=None):
     var_scope = scope if scope else global_scope()
     for param in all_parameters:
         if param.name in fp16_var_names:
-            _logger.debug("---- cast {} to fp16 dtype ----".format(param.name))
+            _logger.debug(f"---- cast {param.name} to fp16 dtype ----")
             param_t = var_scope.find_var(param.name).get_tensor()
             data = np.array(param_t)
             param_t.set(np.float16(data), place)
@@ -751,8 +751,8 @@ def update_role_var_grad(main_prog, params_grads):
                 op._remove_attr("op_role_var")
             else:
                 raise ValueError(
-                    "The cast op {0} must be in BACKWARD role "
-                    "and have op_role_var attr.".format(op)
+                    f"The cast op {op} must be in BACKWARD role "
+                    "and have op_role_var attr."
                 )
 
             fp16_grad_name = op.input(op.input_names[0])[0]
@@ -774,9 +774,9 @@ def update_role_var_grad(main_prog, params_grads):
             post_ops = find_true_post_op(block.ops, op, g.name)
             if post_ops:
                 raise ValueError(
-                    "The cast op {0}'s output should not be"
+                    f"The cast op {op}'s output should not be"
                     "used by a non-optimize op, however, it"
-                    "is used by {1}".format(op, post_ops[0])
+                    f"is used by {post_ops[0]}"
                 )
             # add new op in the python and cpp at the same time
             new_op_desc = block.desc.append_op()
@@ -792,6 +792,6 @@ def update_role_var_grad(main_prog, params_grads):
             block.ops.append(new_op)
             op_idx = find_op_index(block.desc, op.desc)
             if op_idx == -1:
-                raise ValueError("The op {0} is not in program".format(op))
+                raise ValueError(f"The op {op} is not in program")
             block._remove_op(op_idx, sync=False)
     block._sync_with_cpp()
diff --git a/python/paddle/static/io.py b/python/paddle/static/io.py
index 28969085c84301..cac8f821c5d724 100644
--- a/python/paddle/static/io.py
+++ b/python/paddle/static/io.py
@@ -77,7 +77,7 @@ def _check_vars(name, var_list):
         var_list = [var_list]
     if not var_list or not all([isinstance(var, Variable) for var in var_list]):
         raise ValueError(
-            "'{}' should be a Variable or a list of Variable.".format(name)
+            f"'{name}' should be a Variable or a list of Variable."
         )
 
 
@@ -212,9 +212,7 @@ def normalize_program(program, feed_vars, fetch_vars):
         uniq_fetch_vars = []
         for i, var in enumerate(fetch_vars):
             if var.dtype != paddle.bool:
-                var = paddle.scale(
-                    var, 1.0, name="save_infer_model/scale_{}".format(i)
-                )
+                var = paddle.scale(var, 1.0, name=f"save_infer_model/scale_{i}")
             uniq_fetch_vars.append(var)
         fetch_vars = uniq_fetch_vars
 
@@ -507,9 +505,9 @@ def save_inference_model(
     model_path = path_prefix + ".pdmodel"
     params_path = path_prefix + ".pdiparams"
     if os.path.isdir(model_path):
-        raise ValueError("'{}' is an existing directory.".format(model_path))
+        raise ValueError(f"'{model_path}' is an existing directory.")
     if os.path.isdir(params_path):
-        raise ValueError("'{}' is an existing directory.".format(params_path))
+        raise ValueError(f"'{params_path}' is an existing directory.")
 
     # verify feed_vars
     _check_vars('feed_vars', feed_vars)
@@ -1336,7 +1334,7 @@ def save(program, model_path, protocol=4, **configs):
 
     if protocol < 2 or protocol > 4:
         raise ValueError(
-            "Expected 1<'protocol'<5, but received protocol={}".format(protocol)
+            f"Expected 1<'protocol'<5, but received protocol={protocol}"
         )
 
     dir_name = os.path.dirname(model_path)
@@ -1583,7 +1581,7 @@ def set_var(var, ndarray):
         opt_file_name = model_prefix + ".pdopt"
         assert os.path.exists(
             opt_file_name
-        ), "Optimizer file [{}] not exits".format(opt_file_name)
+        ), f"Optimizer file [{opt_file_name}] not exits"
 
         if executor:
             paddle.fluid.core._create_loaded_parameter(
@@ -1890,7 +1888,7 @@ def _load_vars_with_try_catch(
 
     assert os.path.exists(
         parameter_file_name
-    ), "Parameter file [{}] not exits".format(parameter_file_name)
+    ), f"Parameter file [{parameter_file_name}] not exits"
 
     with open(parameter_file_name, 'rb') as f:
         # When value of dict is lager than 4GB ,there is a Bug on 'MAC python3'
diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py
index 05e76601feaea8..28bc648f7c5a6f 100644
--- a/python/paddle/static/nn/common.py
+++ b/python/paddle/static/nn/common.py
@@ -909,8 +909,8 @@ def conv2d(
     num_channels = input.shape[3] if channel_last else input.shape[1]
     if num_channels < 0:
         raise ValueError(
-            "The channel dimmention of the input(%s) should be defined. "
-            "Received: %s." % (str(input.shape), str(num_channels))
+            "The channel dimmention of the input({}) should be defined. "
+            "Received: {}.".format(str(input.shape), str(num_channels))
         )
     assert param_attr is not False, "param_attr should not be False here."
 
@@ -1231,8 +1231,8 @@ def conv3d(
     num_channels = input.shape[4] if channel_last else input.shape[1]
     if num_channels < 0:
         raise ValueError(
-            "The channel dimmention of the input(%s) should be defined. "
-            "Received: %s." % (str(input.shape), str(num_channels))
+            "The channel dimmention of the input({}) should be defined. "
+            "Received: {}.".format(str(input.shape), str(num_channels))
         )
 
     if groups is None:
diff --git a/python/paddle/static/nn/control_flow.py b/python/paddle/static/nn/control_flow.py
index d8fa89f24468cd..c5d52654775418 100644
--- a/python/paddle/static/nn/control_flow.py
+++ b/python/paddle/static/nn/control_flow.py
@@ -300,7 +300,7 @@ def __init__(self, cond, is_test=False, name=None):
         check_variable_and_dtype(cond, 'cond', ['bool'], 'static.nn.While')
         if reduce(lambda a, b: a * b, cond.shape, 1) != 1:
             raise TypeError(
-                "condition expected shape as [1], but given shape as {0}.".format(
+                "condition expected shape as [1], but given shape as {}.".format(
                     list(cond.shape)
                 )
             )
@@ -462,7 +462,7 @@ def body(i, ten):
     if reduce(lambda a, b: a * b, pre_cond.shape, 1) != 1:
         raise TypeError(
             "the shape of the variable returned by cond should be [1],"
-            "but given shape as {0}.".format(list(pre_cond.shape))
+            f"but given shape as {list(pre_cond.shape)}."
         )
 
     if _non_static_mode():
@@ -500,7 +500,7 @@ def body(i, ten):
         except ValueError as e:
             raise ValueError(
                 "body in while_loop should return the same arity "
-                "(length and structure) as loop_vars: {0}".format(e)
+                f"(length and structure) as loop_vars: {e}"
             )
         now_cond = cond(*output_vars)
         map_structure(assign_skip_lod_tensor_array, output_vars, loop_vars)
@@ -839,7 +839,7 @@ def _check_args(branch_index, branch_fns, default):
             if not callable(fn):
                 raise TypeError(
                     _error_message(
-                        "The type of function for key {}".format(key),
+                        f"The type of function for key {key}",
                         "branch_fns",
                         "switch_case",
                         "callable",
diff --git a/python/paddle/static/quantization/adaround.py b/python/paddle/static/quantization/adaround.py
index e9fa4418cd2c6c..4fde71ffe11a17 100644
--- a/python/paddle/static/quantization/adaround.py
+++ b/python/paddle/static/quantization/adaround.py
@@ -241,7 +241,7 @@ def run_adaround(
     fetch_op_name = fetch_list[0].name
     final_weight_tensor_quant_dict = {}
     for weight_var_name, quant_op_out_name in quantized_op_pairs.items():
-        _logger.info('Start adaround op: {}'.format(weight_var_name))
+        _logger.info(f'Start adaround op: {weight_var_name}')
         weight_op_type = weight_op_pairs[weight_var_name]
         # get scale and weight tensor
         weight_var_tensor = load_variable_data(scope, weight_var_name)
diff --git a/python/paddle/static/quantization/post_training_quantization.py b/python/paddle/static/quantization/post_training_quantization.py
index 6382d6d5c7d6f0..673743302611e4 100644
--- a/python/paddle/static/quantization/post_training_quantization.py
+++ b/python/paddle/static/quantization/post_training_quantization.py
@@ -108,7 +108,7 @@ def _apply_pass(
             ir_pass.set(attr, value)
     ir_pass.apply(cpp_graph)
     if debug:
-        graph.draw('.', 'qat_fp32_{}'.format(pass_name), graph.all_op_nodes())
+        graph.draw('.', f'qat_fp32_{pass_name}', graph.all_op_nodes())
     _remove_unused_var_nodes(graph)
     return graph
 
@@ -1131,7 +1131,7 @@ def _calculate_kl_hist_threshold(self):
         '''
         Calculate the KL or hist threshold of quantized variables.
         '''
-        _logger.info("Calculate {} threshold ...".format(self._algo))
+        _logger.info(f"Calculate {self._algo} threshold ...")
         assert self._algo in ["KL", "hist"], "The algo should be KL or hist."
 
         # Abs_max threshold for weights
diff --git a/python/paddle/static/quantization/quant2_int8_mkldnn_pass.py b/python/paddle/static/quantization/quant2_int8_mkldnn_pass.py
index 3e6737bf8c3231..990f11220927b9 100644
--- a/python/paddle/static/quantization/quant2_int8_mkldnn_pass.py
+++ b/python/paddle/static/quantization/quant2_int8_mkldnn_pass.py
@@ -510,7 +510,7 @@ def _apply_pass(self, graph, pass_name, attrs=None, attr_values=None):
         if self._debug:
             graph.draw(
                 '.',
-                '{}_{}_{}'.format(self._pass_group, self._pass_idx, pass_name),
+                f'{self._pass_group}_{self._pass_idx}_{pass_name}',
                 graph.all_op_nodes(),
             )
         self._remove_unused_var_nodes(graph)
diff --git a/python/paddle/static/quantization/quantization_pass.py b/python/paddle/static/quantization/quantization_pass.py
index b7ebba0fbdbb8b..606e88320cb5e0 100644
--- a/python/paddle/static/quantization/quantization_pass.py
+++ b/python/paddle/static/quantization/quantization_pass.py
@@ -497,7 +497,7 @@ def _insert_quant_abs_max_op(self, graph, var_node, name, quant_bits):
         """
         Insert fake_quantize_abs_max op in the graph.
         """
-        assert var_node.is_var(), '{} is not a var'.format(var_node.name())
+        assert var_node.is_var(), f'{var_node.name()} is not a var'
 
         quant_var_node = graph.create_var_node(
             name=self._quantized_var_name(name),
@@ -544,7 +544,7 @@ def _insert_quant_range_abs_max_op(self, graph, var_node, name, quant_bits):
         """
         Insert fake_quantize_range_abs_max on the graph.
         """
-        assert var_node.is_var(), '{} is not a var'.format(var_node.name())
+        assert var_node.is_var(), f'{var_node.name()} is not a var'
 
         quant_var_node = graph.create_var_node(
             name=self._quantized_var_name(name),
@@ -735,7 +735,7 @@ def _insert_channel_quant_op(
         """
         Insert fake_channel_wise_quantize_abs_max op in the graph.
         """
-        assert var_node.is_var(), '{} is not a var'.format(var_node.name())
+        assert var_node.is_var(), f'{var_node.name()} is not a var'
 
         quant_var_node = graph.create_var_node(
             name=self._quantized_var_name(name),
@@ -785,7 +785,7 @@ def _insert_dequant_op(self, graph, var_node, scale_var_node, quant_bits):
         """
         Insert fake_dequantize_op in the graph.
         """
-        assert var_node.is_var(), '{} is not a var'.format(var_node.name())
+        assert var_node.is_var(), f'{var_node.name()} is not a var'
 
         dequant_var_node = graph.create_var_node(
             name=self._dequantized_var_name(var_node.name()),
@@ -814,7 +814,7 @@ def _insert_channel_dequant_op(
         """
         Insert fake_channel_wise_dequantize_max_abs in the graph.
         """
-        assert var_node.is_var(), '{} is not a var'.format(var_node.name())
+        assert var_node.is_var(), f'{var_node.name()} is not a var'
 
         dequant_var_node = graph.create_var_node(
             name=self._dequantized_var_name(var_node.name()),
@@ -1982,12 +1982,12 @@ def _inser_quant_dequant_moving_average_abs_max_op(
     ):
         """Insert fake_quantize_dequantize_moving_average_abs_max op."""
         quant_var_node = graph.create_var_node(
-            name="{}.quant_dequant".format(var_node.name()),
+            name=f"{var_node.name()}.quant_dequant",
             var_type=var_node.type(),
             shape=var_node.shape(),
             var_dtype=var_node.dtype(),
         )
-        scale_name = "{}.quant_dequant@scale".format(var_node.name())
+        scale_name = f"{var_node.name()}.quant_dequant@scale"
         if var_node.dtype() == core.VarDesc.VarType.FP64:
             data_type = 'float64'
         elif var_node.dtype() == core.VarDesc.VarType.FP32:
@@ -2011,7 +2011,7 @@ def _inser_quant_dequant_moving_average_abs_max_op(
             scale_value = np.array([_SCALE_DEFAULT_VALUE], dtype=data_type)
 
         scale_in_node = graph.create_persistable_node(
-            name="{}.quant_dequant@scale".format(var_node.name()),
+            name=f"{var_node.name()}.quant_dequant@scale",
             var_type=core.VarDesc.VarType.LOD_TENSOR,
             shape=[1],
             var_dtype=var_node.dtype(),
@@ -2133,7 +2133,7 @@ def __init__(
     def insert_quant_op(
         self, graph, var_node, var_name=None, scale_var_node=None
     ):
-        assert var_node.is_var(), '{} is not a var'.format(var_node.name())
+        assert var_node.is_var(), f'{var_node.name()} is not a var'
         var_name = var_node.name() if not var_name else var_name
         quant_var_node = graph.create_var_node(
             name=self._quantized_var_name(var_name),
@@ -2272,7 +2272,7 @@ def insert_quant_op(
         return quant_var_node, scale_var_node
 
     def insert_dequant_op(self, graph, var_node, scale_var_node):
-        assert var_node.is_var(), '{} is not a var'.format(var_node.name())
+        assert var_node.is_var(), f'{var_node.name()} is not a var'
 
         dequant_var_node = graph.create_var_node(
             name=self._dequantized_var_name(var_node.name()),
@@ -3329,11 +3329,11 @@ def _scale_name(self, var_name):
         return "%s@scale" % (var_name)
 
     def _insert_quant_dequant_op(self, graph, var_node):
-        assert var_node.is_var(), '{} is not a var'.format(var_node.name())
+        assert var_node.is_var(), f'{var_node.name()} is not a var'
         var_name = var_node.name()
         quant_axis = -1
         quant_var_node = graph.create_var_node(
-            name="{}.quantized".format(var_name),
+            name=f"{var_name}.quantized",
             var_type=var_node.type(),
             shape=var_node.shape(),
             var_dtype=var_node.dtype(),
@@ -3376,11 +3376,11 @@ def _insert_quant_dequant_op(self, graph, var_node):
         try:
             zero_point_node = graph._find_node_by_name(
                 graph.all_persistable_nodes(),
-                "{}@zero_point".format(quant_var_node.name()),
+                f"{quant_var_node.name()}@zero_point",
             )
         except:
             zero_point_node = graph.create_persistable_node(
-                name="{}@zero_point".format(quant_var_node.name()),
+                name=f"{quant_var_node.name()}@zero_point",
                 var_type=core.VarDesc.VarType.LOD_TENSOR,
                 shape=scale_var_node.shape(),
                 var_dtype=core.VarDesc.VarType.INT32,
@@ -3419,7 +3419,7 @@ def _insert_quant_dequant_op(self, graph, var_node):
 
         # add dequant_linear node
         dequant_var_node = graph.create_var_node(
-            name="{}.dequantized".format(quant_var_node.name()),
+            name=f"{quant_var_node.name()}.dequantized",
             var_type=quant_var_node.type(),
             shape=quant_var_node.shape(),
             var_dtype=quant_var_node.dtype(),
diff --git a/python/paddle/static/quantization/tests/convert_model2dot.py b/python/paddle/static/quantization/tests/convert_model2dot.py
index 4eb5463b511e10..4ae3e838d31413 100644
--- a/python/paddle/static/quantization/tests/convert_model2dot.py
+++ b/python/paddle/static/quantization/tests/convert_model2dot.py
@@ -76,7 +76,7 @@ def generate_dot_for_model(model_path, save_graph_dir, save_graph_name):
             save_graph_name = model_name
         graph.draw(save_graph_dir, save_graph_name, graph.all_op_nodes())
         print(
-            "Success! Generated dot and pdf files for {0} model, that can be found at {1} named {2}.\n".format(
+            "Success! Generated dot and pdf files for {} model, that can be found at {} named {}.\n".format(
                 model_name, save_graph_dir, save_graph_name
             )
         )
diff --git a/python/paddle/static/quantization/tests/quant2_int8_image_classification_comparison.py b/python/paddle/static/quantization/tests/quant2_int8_image_classification_comparison.py
index d10bd4641a886c..d3bd3a48dda255 100644
--- a/python/paddle/static/quantization/tests/quant2_int8_image_classification_comparison.py
+++ b/python/paddle/static/quantization/tests/quant2_int8_image_classification_comparison.py
@@ -290,15 +290,8 @@ def _predict(
                 iters += 1
                 appx = ' (warm-up)' if iters <= skip_batch_num else ''
                 _logger.info(
-                    'batch {0}{5}, acc1: {1:.4f}, acc5: {2:.4f}, '
-                    'latency: {3:.4f} ms, fps: {4:.2f}'.format(
-                        iters,
-                        batch_acc1,
-                        batch_acc5,
-                        batch_time / batch_size,
-                        fps,
-                        appx,
-                    )
+                    f'batch {iters}{appx}, acc1: {batch_acc1:.4f}, acc5: {batch_acc5:.4f}, '
+                    f'latency: {batch_time / batch_size:.4f} ms, fps: {fps:.2f}'
                 )
 
             # Postprocess benchmark data
@@ -310,22 +303,20 @@ def _predict(
             infer_total_time = time.time() - infer_start_time
             acc1_avg = np.mean(infer_accs1)
             acc5_avg = np.mean(infer_accs5)
-            _logger.info(
-                'Total inference run time: {:.2f} s'.format(infer_total_time)
-            )
+            _logger.info(f'Total inference run time: {infer_total_time:.2f} s')
 
             return outputs, acc1_avg, acc5_avg, fps_avg, latency_avg
 
     def _print_performance(self, title, fps, lat):
         _logger.info(
-            '{0}: avg fps: {1:.2f}, avg latency: {2:.4f} ms'.format(
+            '{}: avg fps: {:.2f}, avg latency: {:.4f} ms'.format(
                 title, fps, lat
             )
         )
 
     def _print_accuracy(self, title, acc1, acc5):
         _logger.info(
-            '{0}: avg top1 accuracy: {1:.4f}, avg top5 accuracy: {2:.4f}'.format(
+            '{}: avg top1 accuracy: {:.4f}, avg top5 accuracy: {:.4f}'.format(
                 title, acc1, acc5
             )
         )
@@ -347,7 +338,7 @@ def _summarize_accuracy(
 
     def _compare_accuracy(self, threshold, quant_acc1, int8_acc1):
         _logger.info(
-            'Accepted top1 accuracy drop threshold: {0}. (condition: (Quant_top1_acc - IN8_top1_acc) <= threshold && Quant_top1_acc > 0.5 && INT8_top1_acc > 0.5)'.format(
+            'Accepted top1 accuracy drop threshold: {}. (condition: (Quant_top1_acc - IN8_top1_acc) <= threshold && Quant_top1_acc > 0.5 && INT8_top1_acc > 0.5)'.format(
                 threshold
             )
         )
@@ -399,13 +390,13 @@ def test_graph_transformation(self):
         ), 'The --targets option, if used, must contain at least one of the targets: "quant", "int8", "fp32".'
 
         _logger.info('Quant & INT8 prediction run.')
-        _logger.info('Quant model: {}'.format(quant_model_path))
+        _logger.info(f'Quant model: {quant_model_path}')
         if fp32_model_path:
-            _logger.info('FP32 model: {}'.format(fp32_model_path))
-        _logger.info('Dataset: {}'.format(data_path))
-        _logger.info('Batch size: {}'.format(batch_size))
-        _logger.info('Batch number: {}'.format(batch_num))
-        _logger.info('Accuracy drop threshold: {}.'.format(acc_diff_threshold))
+            _logger.info(f'FP32 model: {fp32_model_path}')
+        _logger.info(f'Dataset: {data_path}')
+        _logger.info(f'Batch size: {batch_size}')
+        _logger.info(f'Batch number: {batch_num}')
+        _logger.info(f'Accuracy drop threshold: {acc_diff_threshold}.')
         _logger.info(
             'Quantized ops: {}.'.format(
                 ','.join(self._quantized_ops)
diff --git a/python/paddle/static/quantization/tests/quant2_int8_lstm_model.py b/python/paddle/static/quantization/tests/quant2_int8_lstm_model.py
index 01a205c3a99024..ffb2c755433d5e 100644
--- a/python/paddle/static/quantization/tests/quant2_int8_lstm_model.py
+++ b/python/paddle/static/quantization/tests/quant2_int8_lstm_model.py
@@ -253,19 +253,19 @@ def test_lstm_model(self):
         )
 
         print(
-            "FP32: fps {0}, hx_acc {1}, ctc_acc {2}".format(
+            "FP32: fps {}, hx_acc {}, ctc_acc {}".format(
                 fp32_fps, fp32_hx_acc, fp32_ctc_acc
             )
         )
 
         print(
-            "PTQ_INT8: fps {0}, hx_acc {1}, ctc_acc {2}".format(
+            "PTQ_INT8: fps {}, hx_acc {}, ctc_acc {}".format(
                 int8_fps, int8_hx_acc, int8_ctc_acc
             )
         )
 
         print(
-            "QAT: fps {0}, hx_acc {1}, ctc_acc {2}".format(
+            "QAT: fps {}, hx_acc {}, ctc_acc {}".format(
                 quant_fps, quant_hx_acc, quant_ctc_acc
             )
         )
diff --git a/python/paddle/static/quantization/tests/quant2_int8_nlp_comparison.py b/python/paddle/static/quantization/tests/quant2_int8_nlp_comparison.py
index 7aac1d4e761e0f..459046d16675b0 100644
--- a/python/paddle/static/quantization/tests/quant2_int8_nlp_comparison.py
+++ b/python/paddle/static/quantization/tests/quant2_int8_nlp_comparison.py
@@ -240,8 +240,8 @@ def _predict(
                 iters += 1
                 appx = ' (warm-up)' if iters <= skip_batch_num else ''
                 _logger.info(
-                    'batch {0}{4}, acc: {1:.4f}, latency: {2:.4f} ms, predictions per sec: {3:.2f}'.format(
-                        iters, batch_acc, latency, pps, appx
+                    'batch {}{}, acc: {:.4f}, latency: {:.4f} ms, predictions per sec: {:.2f}'.format(
+                        iters, appx, batch_acc, latency, pps
                     )
                 )
 
@@ -253,21 +253,19 @@ def _predict(
             ppses = ppses[skip_batch_num:]
             pps_avg = np.average(ppses)
             acc_avg = float(np.sum(total_correct)) / float(total_samples)
-            _logger.info(
-                'Total inference run time: {:.2f} s'.format(infer_total_time)
-            )
+            _logger.info(f'Total inference run time: {infer_total_time:.2f} s')
 
             return acc_avg, pps_avg, latency_avg
 
     def _print_performance(self, title, pps, lat):
         _logger.info(
-            '{0}: avg predictions per sec: {1:.2f}, avg latency: {2:.4f} ms'.format(
+            '{}: avg predictions per sec: {:.2f}, avg latency: {:.4f} ms'.format(
                 title, pps, lat
             )
         )
 
     def _print_accuracy(self, title, acc):
-        _logger.info('{0}: avg accuracy: {1:.6f}'.format(title, acc))
+        _logger.info(f'{title}: avg accuracy: {acc:.6f}')
 
     def _summarize_performance(self, int8_pps, int8_lat, fp32_pps, fp32_lat):
         _logger.info('--- Performance summary ---')
@@ -284,7 +282,7 @@ def _summarize_accuracy(self, quant_acc, int8_acc, fp32_acc):
 
     def _compare_accuracy(self, threshold, quant_acc, int8_acc):
         _logger.info(
-            'Accepted accuracy drop threshold: {0}. (condition: (Quant_acc - INT8_acc) <= threshold)'.format(
+            'Accepted accuracy drop threshold: {}. (condition: (Quant_acc - INT8_acc) <= threshold)'.format(
                 threshold
             )
         )
@@ -337,14 +335,14 @@ def test_graph_transformation(self):
         ), 'The --targets option, if used, must contain at least one of the targets: "quant", "int8", "fp32".'
 
         _logger.info('Quant & INT8 prediction run.')
-        _logger.info('Quant model: {}'.format(quant_model_path))
+        _logger.info(f'Quant model: {quant_model_path}')
         if fp32_model_path:
-            _logger.info('FP32 model: {}'.format(fp32_model_path))
-        _logger.info('Dataset: {}'.format(data_path))
-        _logger.info('Labels: {}'.format(labels_path))
-        _logger.info('Batch size: {}'.format(batch_size))
-        _logger.info('Batch number: {}'.format(batch_num))
-        _logger.info('Accuracy drop threshold: {}.'.format(acc_diff_threshold))
+            _logger.info(f'FP32 model: {fp32_model_path}')
+        _logger.info(f'Dataset: {data_path}')
+        _logger.info(f'Labels: {labels_path}')
+        _logger.info(f'Batch size: {batch_size}')
+        _logger.info(f'Batch number: {batch_num}')
+        _logger.info(f'Accuracy drop threshold: {acc_diff_threshold}.')
         _logger.info(
             'Quantized ops: {}.'.format(
                 ','.join(self._quantized_ops)
diff --git a/python/paddle/static/quantization/tests/quant_int8_image_classification_comparison.py b/python/paddle/static/quantization/tests/quant_int8_image_classification_comparison.py
index 3f12bc3c63dc05..a1ca602406ac04 100644
--- a/python/paddle/static/quantization/tests/quant_int8_image_classification_comparison.py
+++ b/python/paddle/static/quantization/tests/quant_int8_image_classification_comparison.py
@@ -239,15 +239,8 @@ def _predict(
                 iters += 1
                 appx = ' (warm-up)' if iters <= skip_batch_num else ''
                 _logger.info(
-                    'batch {0}{5}, acc1: {1:.4f}, acc5: {2:.4f}, '
-                    'latency: {3:.4f} ms, fps: {4:.2f}'.format(
-                        iters,
-                        batch_acc1,
-                        batch_acc5,
-                        batch_time / batch_size,
-                        fps,
-                        appx,
-                    )
+                    f'batch {iters}{appx}, acc1: {batch_acc1:.4f}, acc5: {batch_acc5:.4f}, '
+                    f'latency: {batch_time / batch_size:.4f} ms, fps: {fps:.2f}'
                 )
 
             # Postprocess benchmark data
@@ -259,21 +252,19 @@ def _predict(
             infer_total_time = time.time() - infer_start_time
             acc1_avg = np.mean(infer_accs1)
             acc5_avg = np.mean(infer_accs5)
-            _logger.info(
-                'Total inference run time: {:.2f} s'.format(infer_total_time)
-            )
+            _logger.info(f'Total inference run time: {infer_total_time:.2f} s')
 
             return outputs, acc1_avg, acc5_avg, fps_avg, latency_avg
 
     def _summarize_performance(self, fp32_fps, fp32_lat, int8_fps, int8_lat):
         _logger.info('--- Performance summary ---')
         _logger.info(
-            'FP32: avg fps: {0:.2f}, avg latency: {1:.4f} ms'.format(
+            'FP32: avg fps: {:.2f}, avg latency: {:.4f} ms'.format(
                 fp32_fps, fp32_lat
             )
         )
         _logger.info(
-            'INT8: avg fps: {0:.2f}, avg latency: {1:.4f} ms'.format(
+            'INT8: avg fps: {:.2f}, avg latency: {:.4f} ms'.format(
                 int8_fps, int8_lat
             )
         )
@@ -283,17 +274,17 @@ def _compare_accuracy(
     ):
         _logger.info('--- Accuracy summary ---')
         _logger.info(
-            'Accepted top1 accuracy drop threshold: {0}. (condition: (FP32_top1_acc - IN8_top1_acc) <= threshold)'.format(
+            'Accepted top1 accuracy drop threshold: {}. (condition: (FP32_top1_acc - IN8_top1_acc) <= threshold)'.format(
                 threshold
             )
         )
         _logger.info(
-            'FP32: avg top1 accuracy: {0:.4f}, avg top5 accuracy: {1:.4f}'.format(
+            'FP32: avg top1 accuracy: {:.4f}, avg top5 accuracy: {:.4f}'.format(
                 fp32_acc1, fp32_acc5
             )
         )
         _logger.info(
-            'INT8: avg top1 accuracy: {0:.4f}, avg top5 accuracy: {1:.4f}'.format(
+            'INT8: avg top1 accuracy: {:.4f}, avg top5 accuracy: {:.4f}'.format(
                 int8_acc1, int8_acc5
             )
         )
@@ -320,11 +311,11 @@ def test_graph_transformation(self):
         self._debug = test_case_args.debug
 
         _logger.info('Quant FP32 & INT8 prediction run.')
-        _logger.info('Quant model: {0}'.format(quant_model_path))
-        _logger.info('Dataset: {0}'.format(data_path))
-        _logger.info('Batch size: {0}'.format(batch_size))
-        _logger.info('Batch number: {0}'.format(batch_num))
-        _logger.info('Accuracy drop threshold: {0}.'.format(acc_diff_threshold))
+        _logger.info(f'Quant model: {quant_model_path}')
+        _logger.info(f'Dataset: {data_path}')
+        _logger.info(f'Batch size: {batch_size}')
+        _logger.info(f'Batch number: {batch_num}')
+        _logger.info(f'Accuracy drop threshold: {acc_diff_threshold}.')
 
         _logger.info('--- Quant FP32 prediction start ---')
         val_reader = paddle.batch(
diff --git a/python/paddle/static/quantization/tests/test_imperative_ptq.py b/python/paddle/static/quantization/tests/test_imperative_ptq.py
index f96a17382a18f3..959ca8c36d67d9 100644
--- a/python/paddle/static/quantization/tests/test_imperative_ptq.py
+++ b/python/paddle/static/quantization/tests/test_imperative_ptq.py
@@ -100,7 +100,7 @@ def download_model(self, data_url, data_md5, folder_name):
         download(data_url, self.download_path, data_md5)
         file_name = data_url.split('/')[-1]
         zip_path = os.path.join(self.cache_folder, file_name)
-        print('Data is downloaded at {0}'.format(zip_path))
+        print(f'Data is downloaded at {zip_path}')
 
         data_cache_folder = os.path.join(self.cache_folder, folder_name)
         self.cache_unzipping(data_cache_folder, zip_path)
diff --git a/python/paddle/static/quantization/tests/test_imperative_qat_amp.py b/python/paddle/static/quantization/tests/test_imperative_qat_amp.py
index e27642d5bf7402..c1b1e005cde510 100644
--- a/python/paddle/static/quantization/tests/test_imperative_qat_amp.py
+++ b/python/paddle/static/quantization/tests/test_imperative_qat_amp.py
@@ -77,7 +77,7 @@ def download_model(self, data_url, data_md5, folder_name):
         download(data_url, self.download_path, data_md5)
         file_name = data_url.split('/')[-1]
         zip_path = os.path.join(self.cache_folder, file_name)
-        print('Data is downloaded at {0}'.format(zip_path))
+        print(f'Data is downloaded at {zip_path}')
 
         data_cache_folder = os.path.join(self.cache_folder, folder_name)
         self.cache_unzipping(data_cache_folder, zip_path)
diff --git a/python/paddle/static/quantization/tests/test_post_training_quantization_lstm_model.py b/python/paddle/static/quantization/tests/test_post_training_quantization_lstm_model.py
index 906e4e6c073073..fefeb230c6aa91 100644
--- a/python/paddle/static/quantization/tests/test_post_training_quantization_lstm_model.py
+++ b/python/paddle/static/quantization/tests/test_post_training_quantization_lstm_model.py
@@ -66,7 +66,7 @@ def download_model(self, data_url, data_md5, folder_name):
         download(data_url, self.download_path, data_md5)
         file_name = data_url.split('/')[-1]
         zip_path = os.path.join(self.cache_folder, file_name)
-        print('Data is downloaded at {0}'.format(zip_path))
+        print(f'Data is downloaded at {zip_path}')
 
         data_cache_folder = os.path.join(self.cache_folder, folder_name)
         self.cache_unzipping(data_cache_folder, zip_path)
@@ -254,7 +254,7 @@ def run_test(
         data_path = os.path.join(data_path, data_name)
 
         print(
-            "Start FP32 inference for {0} on {1} samples ...".format(
+            "Start FP32 inference for {} on {} samples ...".format(
                 model_name, infer_iterations
             )
         )
@@ -267,7 +267,7 @@ def run_test(
         )
 
         print(
-            "Start post training quantization for {0} on {1} samples ...".format(
+            "Start post training quantization for {} on {} samples ...".format(
                 model_name, quant_iterations
             )
         )
@@ -288,7 +288,7 @@ def run_test(
         )
 
         print(
-            "Start INT8 inference for {0} on {1} samples ...".format(
+            "Start INT8 inference for {} on {} samples ...".format(
                 model_name, infer_iterations
             )
         )
@@ -300,14 +300,14 @@ def run_test(
             infer_iterations,
         )
 
-        print("---Post training quantization of {} method---".format(algo))
+        print(f"---Post training quantization of {algo} method---")
         print(
-            "FP32 {0}: batch_size {1}, latency {2} s, acc {3}.".format(
+            "FP32 {}: batch_size {}, latency {} s, acc {}.".format(
                 model_name, 1, fp32_latency, fp32_acc
             )
         )
         print(
-            "INT8 {0}: batch_size {1}, latency {2} s, acc1 {3}.\n".format(
+            "INT8 {}: batch_size {}, latency {} s, acc1 {}.\n".format(
                 model_name, 1, int8_latency, int8_acc
             )
         )
diff --git a/python/paddle/static/quantization/tests/test_post_training_quantization_mnist.py b/python/paddle/static/quantization/tests/test_post_training_quantization_mnist.py
index d22997eca6397d..1cdec57bf0baa7 100644
--- a/python/paddle/static/quantization/tests/test_post_training_quantization_mnist.py
+++ b/python/paddle/static/quantization/tests/test_post_training_quantization_mnist.py
@@ -77,19 +77,17 @@ def download(self, url, dirname, md5sum, save_name=None):
         retry_limit = 3
         while not (os.path.exists(filename) and md5file(filename) == md5sum):
             if os.path.exists(filename):
-                sys.stderr.write(
-                    "file %s  md5 %s\n" % (md5file(filename), md5sum)
-                )
+                sys.stderr.write(f"file {md5file(filename)}  md5 {md5sum}\n")
             if retry < retry_limit:
                 retry += 1
             else:
                 raise RuntimeError(
-                    "Cannot download {0} within retry limit {1}".format(
+                    "Cannot download {} within retry limit {}".format(
                         url, retry_limit
                     )
                 )
             sys.stderr.write(
-                "Cache file %s not found, downloading %s \n" % (filename, url)
+                f"Cache file {filename} not found, downloading {url} \n"
             )
             sys.stderr.write("Begin to download\n")
             try:
@@ -131,7 +129,7 @@ def download_model(self, data_url, data_md5, folder_name):
         file_name = data_url.split('/')[-1]
         zip_path = os.path.join(self.cache_folder, file_name)
         print(
-            'Data is downloaded at {0}. File exists: {1}'.format(
+            'Data is downloaded at {}. File exists: {}'.format(
                 zip_path, os.path.exists(zip_path)
             )
         )
@@ -269,7 +267,7 @@ def run_test(
         origin_model_path = os.path.join(origin_model_path, model_name)
 
         print(
-            "Start FP32 inference for {0} on {1} images ...".format(
+            "Start FP32 inference for {} on {} images ...".format(
                 model_name, infer_iterations * batch_size
             )
         )
@@ -283,7 +281,7 @@ def run_test(
         )
 
         print(
-            "Start INT8 post training quantization for {0} on {1} images ...".format(
+            "Start INT8 post training quantization for {} on {} images ...".format(
                 model_name, quant_iterations * batch_size
             )
         )
@@ -305,7 +303,7 @@ def run_test(
         )
 
         print(
-            "Start INT8 inference for {0} on {1} images ...".format(
+            "Start INT8 inference for {} on {} images ...".format(
                 model_name, infer_iterations * batch_size
             )
         )
@@ -317,14 +315,14 @@ def run_test(
             infer_iterations,
         )
 
-        print("---Post training quantization of {} method---".format(algo))
+        print(f"---Post training quantization of {algo} method---")
         print(
-            "FP32 {0}: batch_size {1}, throughput {2} img/s, latency {3} s, acc1 {4}.".format(
+            "FP32 {}: batch_size {}, throughput {} img/s, latency {} s, acc1 {}.".format(
                 model_name, batch_size, fp32_throughput, fp32_latency, fp32_acc1
             )
         )
         print(
-            "INT8 {0}: batch_size {1}, throughput {2} img/s, latency {3} s, acc1 {4}.\n".format(
+            "INT8 {}: batch_size {}, throughput {} img/s, latency {} s, acc1 {}.\n".format(
                 model_name, batch_size, int8_throughput, int8_latency, int8_acc1
             )
         )
diff --git a/python/paddle/static/quantization/tests/test_post_training_quantization_mobilenetv1.py b/python/paddle/static/quantization/tests/test_post_training_quantization_mobilenetv1.py
index cfef61d51a40a8..a7acfa1cfab8da 100644
--- a/python/paddle/static/quantization/tests/test_post_training_quantization_mobilenetv1.py
+++ b/python/paddle/static/quantization/tests/test_post_training_quantization_mobilenetv1.py
@@ -193,7 +193,7 @@ def download_data(self, data_urls, data_md5s, folder_name, is_model=True):
             file_name = data_urls[0].split('/')[-1]
             zip_path = os.path.join(self.cache_folder, file_name)
 
-        print('Data is downloaded at {0}'.format(zip_path))
+        print(f'Data is downloaded at {zip_path}')
         self.cache_unzipping(data_cache_folder, zip_path)
         return data_cache_folder
 
@@ -253,7 +253,7 @@ def run_program(
             cnt += len(data)
 
             if (batch_id + 1) % 100 == 0:
-                print("{0} images,".format(batch_id + 1))
+                print(f"{batch_id + 1} images,")
                 sys.stdout.flush()
             if (batch_id + 1) == iterations:
                 break
@@ -282,9 +282,7 @@ def generate_quantized_model(
         try:
             os.system("mkdir " + self.int8_model)
         except Exception as e:
-            print(
-                "Failed to create {} due to {}".format(self.int8_model, str(e))
-            )
+            print(f"Failed to create {self.int8_model} due to {str(e)}")
             sys.exit(-1)
 
         place = paddle.CPUPlace()
@@ -339,7 +337,7 @@ def run_test(
         model_cache_folder = self.download_data(data_urls, data_md5s, model)
 
         print(
-            "Start FP32 inference for {0} on {1} images ...".format(
+            "Start FP32 inference for {} on {} images ...".format(
                 model, infer_iterations * batch_size
             )
         )
@@ -368,7 +366,7 @@ def run_test(
         )
 
         print(
-            "Start INT8 inference for {0} on {1} images ...".format(
+            "Start INT8 inference for {} on {} images ...".format(
                 model, infer_iterations * batch_size
             )
         )
@@ -380,14 +378,14 @@ def run_test(
             infer_iterations,
         )
 
-        print("---Post training quantization of {} method---".format(algo))
+        print(f"---Post training quantization of {algo} method---")
         print(
-            "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.".format(
+            "FP32 {}: batch_size {}, throughput {} images/second, latency {} second, accuracy {}.".format(
                 model, batch_size, fp32_throughput, fp32_latency, fp32_acc1
             )
         )
         print(
-            "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.\n".format(
+            "INT8 {}: batch_size {}, throughput {} images/second, latency {} second, accuracy {}.\n".format(
                 model, batch_size, int8_throughput, int8_latency, int8_acc1
             )
         )
diff --git a/python/paddle/static/quantization/tests/test_post_training_quantization_program_resnet50.py b/python/paddle/static/quantization/tests/test_post_training_quantization_program_resnet50.py
index 26d52cf2011704..fecb2e76099481 100644
--- a/python/paddle/static/quantization/tests/test_post_training_quantization_program_resnet50.py
+++ b/python/paddle/static/quantization/tests/test_post_training_quantization_program_resnet50.py
@@ -160,7 +160,7 @@ def run_program(
             cnt += len(data)
 
             if (batch_id + 1) % 100 == 0:
-                print("{0} images,".format(batch_id + 1))
+                print(f"{batch_id + 1} images,")
                 sys.stdout.flush()
             if (batch_id + 1) == iterations:
                 break
@@ -203,9 +203,7 @@ def generate_quantized_model(
         try:
             os.system("mkdir " + self.int8_model)
         except Exception as e:
-            print(
-                "Failed to create {} due to {}".format(self.int8_model, str(e))
-            )
+            print(f"Failed to create {self.int8_model} due to {str(e)}")
             sys.exit(-1)
 
         place = paddle.CPUPlace()
@@ -264,7 +262,7 @@ def run_test(
         model_cache_folder = self.download_data(data_urls, data_md5s, model)
 
         print(
-            "Start FP32 inference for {0} on {1} images ...".format(
+            "Start FP32 inference for {} on {} images ...".format(
                 model, infer_iterations * batch_size
             )
         )
@@ -297,7 +295,7 @@ def run_test(
         )
 
         print(
-            "Start INT8 inference for {0} on {1} images ...".format(
+            "Start INT8 inference for {} on {} images ...".format(
                 model, infer_iterations * batch_size
             )
         )
@@ -309,14 +307,14 @@ def run_test(
             infer_iterations,
         )
 
-        print("---Post training quantization of {} method---".format(algo))
+        print(f"---Post training quantization of {algo} method---")
         print(
-            "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.".format(
+            "FP32 {}: batch_size {}, throughput {} images/second, latency {} second, accuracy {}.".format(
                 model, batch_size, fp32_throughput, fp32_latency, fp32_acc1
             )
         )
         print(
-            "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}.\n".format(
+            "INT8 {}: batch_size {}, throughput {} images/second, latency {} second, accuracy {}.\n".format(
                 model, batch_size, int8_throughput, int8_latency, int8_acc1
             )
         )
diff --git a/python/paddle/static/quantization/tests/test_post_training_quantization_while.py b/python/paddle/static/quantization/tests/test_post_training_quantization_while.py
index 389465df89932a..0e1863dcf7fc96 100644
--- a/python/paddle/static/quantization/tests/test_post_training_quantization_while.py
+++ b/python/paddle/static/quantization/tests/test_post_training_quantization_while.py
@@ -51,7 +51,7 @@ def setUp(self):
 
     def tearDown(self):
         try:
-            os.system("rm -rf {}".format(self.int8_model_path))
+            os.system(f"rm -rf {self.int8_model_path}")
         except Exception as e:
             print(
                 "Failed to delete {} due to {}".format(
@@ -60,14 +60,14 @@ def tearDown(self):
             )
 
     def cache_unzipping(self, target_folder, zip_path):
-        cmd = 'tar xf {0} -C {1}'.format(zip_path, target_folder)
+        cmd = f'tar xf {zip_path} -C {target_folder}'
         os.system(cmd)
 
     def download_model(self, data_url, data_md5, folder_name):
         download(data_url, self.download_path, data_md5)
         file_name = data_url.split('/')[-1]
         zip_path = os.path.join(self.cache_folder, file_name)
-        print('Data is downloaded at {0}'.format(zip_path))
+        print(f'Data is downloaded at {zip_path}')
 
         data_cache_folder = os.path.join(self.cache_folder, folder_name)
         self.cache_unzipping(self.cache_folder, zip_path)
@@ -190,7 +190,7 @@ def run_test(
         origin_model_path = self.download_model(data_url, data_md5, model_name)
 
         print(
-            "Start FP32 inference for {0} on {1} images ...".format(
+            "Start FP32 inference for {} on {} images ...".format(
                 model_name, infer_iterations * batch_size
             )
         )
@@ -199,7 +199,7 @@ def run_test(
         )
 
         print(
-            "Start INT8 post training quantization for {0} on {1} images ...".format(
+            "Start INT8 post training quantization for {} on {} images ...".format(
                 model_name, quant_iterations * batch_size
             )
         )
@@ -216,7 +216,7 @@ def run_test(
         )
 
         print(
-            "Start INT8 inference for {0} on {1} images ...".format(
+            "Start INT8 inference for {} on {} images ...".format(
                 model_name, infer_iterations * batch_size
             )
         )
@@ -224,14 +224,14 @@ def run_test(
             self.int8_model_path, batch_size, infer_iterations
         )
 
-        print("---Post training quantization of {} method---".format(algo))
+        print(f"---Post training quantization of {algo} method---")
         print(
-            "FP32 {0}: batch_size {1}, throughput {2} img/s, latency {3} s, acc1 {4}.".format(
+            "FP32 {}: batch_size {}, throughput {} img/s, latency {} s, acc1 {}.".format(
                 model_name, batch_size, fp32_throughput, fp32_latency, fp32_acc1
             )
         )
         print(
-            "INT8 {0}: batch_size {1}, throughput {2} img/s, latency {3} s, acc1 {4}.\n".format(
+            "INT8 {}: batch_size {}, throughput {} img/s, latency {} s, acc1 {}.\n".format(
                 model_name, batch_size, int8_throughput, int8_latency, int8_acc1
             )
         )
diff --git a/python/paddle/static/quantization/tests/test_weight_quantization_mobilenetv1.py b/python/paddle/static/quantization/tests/test_weight_quantization_mobilenetv1.py
index e85064f5847a0e..e84b870d1691ba 100644
--- a/python/paddle/static/quantization/tests/test_weight_quantization_mobilenetv1.py
+++ b/python/paddle/static/quantization/tests/test_weight_quantization_mobilenetv1.py
@@ -113,11 +113,9 @@ def quantize_to_int(
         print("finish weight quantization for " + model_name + "\n")
 
         try:
-            os.system("rm -rf {}".format(save_model_dir))
+            os.system(f"rm -rf {save_model_dir}")
         except Exception as e:
-            print(
-                "Failed to delete {} due to {}".format(save_model_dir, str(e))
-            )
+            print(f"Failed to delete {save_model_dir} due to {str(e)}")
 
     def convert_to_fp16(
         self,
@@ -167,11 +165,9 @@ def convert_to_fp16(
         )
 
         try:
-            os.system("rm -rf {}".format(save_model_dir))
+            os.system(f"rm -rf {save_model_dir}")
         except Exception as e:
-            print(
-                "Failed to delete {} due to {}".format(save_model_dir, str(e))
-            )
+            print(f"Failed to delete {save_model_dir} due to {str(e)}")
 
     def run_models(
         self,
diff --git a/python/paddle/static/quantization/utils.py b/python/paddle/static/quantization/utils.py
index 39c4db33aa11e8..7397ff3fe01d25 100644
--- a/python/paddle/static/quantization/utils.py
+++ b/python/paddle/static/quantization/utils.py
@@ -279,9 +279,7 @@ def update(self, n=1):
         a = "=" * round((self.n / self.total) * self.ncols)
         b = " " * (self.ncols - len(a))
         prefix = self.bar_format.split('|')[0]
-        sys.stderr.write(
-            "\r{}|{}=>{}| {}/{}".format(prefix, a, b, self.n, self.total)
-        )
+        sys.stderr.write(f"\r{prefix}|{a}=>{b}| {self.n}/{self.total}")
         sys.stderr.flush()
 
     def __enter__(self):
diff --git a/python/paddle/tensor/array.py b/python/paddle/tensor/array.py
index f3a9930c7068eb..eefdf27c1a15f7 100644
--- a/python/paddle/tensor/array.py
+++ b/python/paddle/tensor/array.py
@@ -207,7 +207,7 @@ def array_write(x, i, array=None):
                 )
         if array is None:
             array = helper.create_variable(
-                name="{0}.out".format(helper.name),
+                name=f"{helper.name}.out",
                 type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
                 dtype=x.dtype,
             )
@@ -272,7 +272,7 @@ def create_array(dtype, initialized_list=None):
     else:
         helper = LayerHelper("array", **locals())
         tensor_array = helper.create_variable(
-            name="{0}.out".format(helper.name),
+            name=f"{helper.name}.out",
             type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
             dtype=dtype,
         )
diff --git a/python/paddle/tensor/attribute.py b/python/paddle/tensor/attribute.py
index 679bf3d811847a..7a859d64d0c517 100644
--- a/python/paddle/tensor/attribute.py
+++ b/python/paddle/tensor/attribute.py
@@ -162,9 +162,7 @@ def is_complex(x):
             # False
     """
     if not isinstance(x, (paddle.Tensor, paddle.static.Variable)):
-        raise TypeError(
-            "Expected Tensor, but received type of x: {}".format(type(x))
-        )
+        raise TypeError(f"Expected Tensor, but received type of x: {type(x)}")
     dtype = x.dtype
     is_complex_dtype = (
         dtype == core.VarDesc.VarType.COMPLEX64
@@ -196,9 +194,7 @@ def is_floating_point(x):
             # False
     """
     if not isinstance(x, (paddle.Tensor, paddle.static.Variable)):
-        raise TypeError(
-            "Expected Tensor, but received type of x: {}".format(type(x))
-        )
+        raise TypeError(f"Expected Tensor, but received type of x: {type(x)}")
     dtype = x.dtype
     is_fp_dtype = (
         dtype == core.VarDesc.VarType.FP32
@@ -236,9 +232,7 @@ def is_integer(x):
             # True
     """
     if not isinstance(x, (paddle.Tensor, paddle.static.Variable)):
-        raise TypeError(
-            "Expected Tensor, but received type of x: {}".format(type(x))
-        )
+        raise TypeError(f"Expected Tensor, but received type of x: {type(x)}")
     dtype = x.dtype
     is_int_dtype = (
         dtype == core.VarDesc.VarType.UINT8
diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py
index 433d29c2ddafa7..3e2cbd2d0d1a92 100644
--- a/python/paddle/tensor/creation.py
+++ b/python/paddle/tensor/creation.py
@@ -1130,7 +1130,7 @@ def _check_attr(attr, message):
         if isinstance(attr, ((Variable, core.eager.Tensor))):
             assert len(attr.shape) == 1 and attr.shape[0] in [1, -1]
         elif not isinstance(attr, int) or attr < 0:
-            raise TypeError("{} should be a non-negative int.".format(message))
+            raise TypeError(f"{message} should be a non-negative int.")
 
     _check_attr(num_rows, "num_rows")
 
@@ -1345,7 +1345,7 @@ def _tril_triu_op(helper):
     op_type = helper.layer_type
     x = helper.kwargs.get('x', None)
 
-    assert x is not None, 'x cannot be None in {}'.format(op_type)
+    assert x is not None, f'x cannot be None in {op_type}'
     check_variable_and_dtype(
         x,
         'x',
@@ -1353,10 +1353,10 @@ def _tril_triu_op(helper):
         op_type,
     )
     if len(x.shape) < 2:
-        raise ValueError("x shape in {} must be at least 2-D".format(op_type))
+        raise ValueError(f"x shape in {op_type} must be at least 2-D")
     diagonal = helper.kwargs.get('diagonal', 0)
     if not isinstance(diagonal, (int,)):
-        raise TypeError("diagonal in {} must be a python Int".format(op_type))
+        raise TypeError(f"diagonal in {op_type} must be a python Int")
     name = helper.kwargs.get('name', None)
 
     if name is None:
diff --git a/python/paddle/tensor/layer_function_generator.py b/python/paddle/tensor/layer_function_generator.py
index fd1cf73d37f42d..8b753e1d2b6301 100644
--- a/python/paddle/tensor/layer_function_generator.py
+++ b/python/paddle/tensor/layer_function_generator.py
@@ -86,7 +86,7 @@ def _generate_doc_string_(
     buf.write(escape_math(op_proto.comment))
     buf.write('\nArgs:\n')
     for each_input in op_proto.inputs:
-        line_begin = '    {0}'.format(_convert_(each_input.name))
+        line_begin = f'    {_convert_(each_input.name)}'
         buf.write(line_begin)
         buf.write(" (Tensor): ")
         buf.write(escape_math(each_input.comment))
@@ -158,7 +158,7 @@ def generate_layer_fn(op_type):
     if len(not_intermediate_outputs) != 1:
         raise ValueError(
             "Only one non intermediate output operator can be",
-            "automatically generated. {0}".format(op_type),
+            f"automatically generated. {op_type}",
         )
 
     if not_intermediate_outputs[0].duplicable:
@@ -195,15 +195,13 @@ def infer_and_check_dtype(op_proto, *args, **kwargs):
 
             for each in val:
                 if not isinstance(each, Variable):
-                    raise ValueError(
-                        "input of {0} must be variable".format(op_type)
-                    )
+                    raise ValueError(f"input of {op_type} must be variable")
 
                 if dtype is None:
                     dtype = each.dtype
                 elif dtype != each.dtype:
                     raise ValueError(
-                        "operator {0} must input same dtype. {1} vs {2}".format(
+                        "operator {} must input same dtype. {} vs {}".format(
                             op_type, dtype, each.dtype
                         )
                     )
@@ -348,8 +346,8 @@ def func(x, name=None):
 
     func.__name__ = inplace_op_type
     func.__doc__ = """
-Inplace version of ``{0}`` API, the output Tensor will be inplaced with input ``x``.
-Please refer to :ref:`api_fluid_layers_{1}`.
+Inplace version of ``{}`` API, the output Tensor will be inplaced with input ``x``.
+Please refer to :ref:`api_fluid_layers_{}`.
 """.format(
         origin_op_type, origin_op_type
     )
@@ -395,23 +393,17 @@ def __impl__(func):
         args = {"comment": trim_ending_dot(comment)}
         for each_input in op_proto.inputs:
             input_name = _convert_(each_input.name)
-            args["{0}_comment".format(input_name)] = trim_ending_dot(
-                each_input.comment
-            )
-            args["{0}_type".format(input_name)] = "Variable"
+            args[f"{input_name}_comment"] = trim_ending_dot(each_input.comment)
+            args[f"{input_name}_type"] = "Variable"
         for each_attr in op_proto.attrs:
             input_name = _convert_(each_attr.name)
-            args["{0}_comment".format(input_name)] = trim_ending_dot(
-                each_attr.comment
-            )
-            args["{0}_type".format(input_name)] = _type_to_str_(each_attr.type)
+            args[f"{input_name}_comment"] = trim_ending_dot(each_attr.comment)
+            args[f"{input_name}_type"] = _type_to_str_(each_attr.type)
 
         for each_opt in op_proto.outputs:
             output_name = _convert_(each_opt.name)
-            args["{0}_comment".format(output_name)] = trim_ending_dot(
-                each_opt.comment
-            )
-            args["{0}_type".format(output_name)] = "Variable"
+            args[f"{output_name}_comment"] = trim_ending_dot(each_opt.comment)
+            args[f"{output_name}_type"] = "Variable"
         func.__doc__ = tmpl.substitute(args)
         return func
 
diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py
index 004ed485f7629e..5a575ae92d58ff 100644
--- a/python/paddle/tensor/linalg.py
+++ b/python/paddle/tensor/linalg.py
@@ -112,8 +112,10 @@ def transpose(x, perm, name=None):
             raise ValueError(
                 "Input(perm) is the permutation of dimensions of Input(x), "
                 "its length should be equal to dimensions of Input(x), "
-                "but received dimension of Input(x) is %s, "
-                "the length of Input(perm) is %s." % (len(x.shape), len(perm))
+                "but received dimension of Input(x) is {}, "
+                "the length of Input(perm) is {}.".format(
+                    len(x.shape), len(perm)
+                )
             )
         for idx, dim in enumerate(perm):
             if dim >= len(x.shape):
@@ -536,7 +538,7 @@ def p_matrix_norm(input, porder=1.0, axis=axis, keepdim=False, name=None):
                 return frobenius_norm(x, dim=axis, keepdim=keepdim, name=name)
             else:
                 raise ValueError(
-                    "only valid string values are 'fro', found {}".format(p)
+                    f"only valid string values are 'fro', found {p}"
                 )
         elif isinstance(p, (int, float)):
             return vector_norm(
@@ -549,7 +551,7 @@ def p_matrix_norm(input, porder=1.0, axis=axis, keepdim=False, name=None):
             )
         else:
             raise ValueError(
-                "only valid p type is string or float, found {}".format(type(p))
+                f"only valid p type is string or float, found {type(p)}"
             )
 
     if isinstance(axis, tuple):
@@ -572,7 +574,7 @@ def p_matrix_norm(input, porder=1.0, axis=axis, keepdim=False, name=None):
 
             else:
                 raise ValueError(
-                    "only valid string values are 'fro', found {}".format(p)
+                    f"only valid string values are 'fro', found {p}"
                 )
         elif isinstance(p, (int, float)):
             return vector_norm(
@@ -1027,7 +1029,7 @@ def empty_tensor(input, shape):
     if not len(x_shape) >= 2:
         raise ValueError(
             "input should be a matrix or batches of matrices, "
-            + "but the dimention of received input is {}".format(len(x_shape))
+            + f"but the dimention of received input is {len(x_shape)}"
         )
     if p is None:
         p = 2
@@ -1051,7 +1053,7 @@ def empty_tensor(input, shape):
                 )
         else:
             raise ValueError(
-                "only support p is {} when input is a ".format(p)
+                f"only support p is {p} when input is a "
                 + "square matrix or batches of square matrices"
             )
     elif p in (2, -2):
@@ -1060,7 +1062,7 @@ def empty_tensor(input, shape):
         return svd_norm(x, porder=p)
     else:
         raise ValueError(
-            "unsupported {} for p, only supporting ('fro', 'nuc', ".format(p)
+            f"unsupported {p} for p, only supporting ('fro', 'nuc', "
             + "1, -1, 2, -2, inf, -inf) or none"
         )
 
@@ -1105,8 +1107,8 @@ def dot(x, y, name=None):
     else:
         op_type = 'dot'
 
-        assert x is not None, 'x cannot be None in {}'.format(op_type)
-        assert y is not None, 'y cannot be None in {}'.format(op_type)
+        assert x is not None, f'x cannot be None in {op_type}'
+        assert y is not None, f'y cannot be None in {op_type}'
 
         check_variable_and_dtype(
             x, 'x', ['float32', 'float64', 'int32', 'int64'], op_type
@@ -1807,7 +1809,7 @@ def det(x, name=None):
 
         assert (
             input_shape[-1] == input_shape[-2]
-        ), "Expect squared input," "but received %s by %s matrix.\n" % (
+        ), "Expect squared input," "but received {} by {} matrix.\n".format(
             input_shape[-2],
             input_shape[-1],
         )
@@ -1866,7 +1868,7 @@ def slogdet(x, name=None):
 
         assert (
             input_shape[-1] == input_shape[-2]
-        ), "Expect squared input," "but received %s by %s matrix.\n" % (
+        ), "Expect squared input," "but received {} by {} matrix.\n".format(
             input_shape[-2],
             input_shape[-1],
         )
@@ -2555,7 +2557,7 @@ def __check_input(x, UPLO):
                 )
             if UPLO != 'L' and UPLO != 'U':
                 raise ValueError(
-                    "UPLO must be L or U. But received UPLO is: {}".format(UPLO)
+                    f"UPLO must be L or U. But received UPLO is: {UPLO}"
                 )
 
         __check_input(x, UPLO)
@@ -3081,7 +3083,7 @@ def __check_input(x, UPLO):
                 )
             if UPLO != 'L' and UPLO != 'U':
                 raise ValueError(
-                    "UPLO must be L or U. But received UPLO is: {}".format(UPLO)
+                    f"UPLO must be L or U. But received UPLO is: {UPLO}"
                 )
 
         __check_input(x, UPLO)
diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py
index b6fda0fd189e50..be1f88d43256ae 100644
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
@@ -491,8 +491,10 @@ def transpose(x, perm, name=None):
             raise ValueError(
                 "Input(perm) is the permutation of dimensions of Input(x), "
                 "its length should be equal to dimensions of Input(x), "
-                "but received dimension of Input(x) is %s, "
-                "the length of Input(perm) is %s." % (len(x.shape), len(perm))
+                "but received dimension of Input(x) is {}, "
+                "the length of Input(perm) is {}.".format(
+                    len(x.shape), len(perm)
+                )
             )
         for idx, dim in enumerate(perm):
             if dim >= len(x.shape):
@@ -961,7 +963,7 @@ def _fill_diagonal_tensor_impl(x, y, offset=0, dim1=0, dim2=1, inplace=False):
     predshape.append(diaglen)
     assert tuple(predshape) == tuple(
         y.shape
-    ), "the y shape should be {}".format(predshape)
+    ), f"the y shape should be {predshape}"
     if len(y.shape) == 1:
         y = y.reshape([1, -1])
 
@@ -1445,13 +1447,9 @@ def rot90(x, k=1, axes=[0, 1], name=None):
         )
 
     if not (axes[0] < input_total_dims and axes[0] >= -input_total_dims):
-        raise ValueError(
-            "Rotation axis0 out of range, axis0 = {}".format(axes[0])
-        )
+        raise ValueError(f"Rotation axis0 out of range, axis0 = {axes[0]}")
     if not (axes[1] < input_total_dims and axes[1] >= -input_total_dims):
-        raise ValueError(
-            "Rotation axis1 out of range, axis1 = {}".format(axes[1])
-        )
+        raise ValueError(f"Rotation axis1 out of range, axis1 = {axes[1]}")
 
     k %= 4
     if k == 0:
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index 8d889c964c419d..0494338eb453e7 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -492,8 +492,8 @@ def _elementwise_op(helper):
 
     out = helper.kwargs.get('out', None)
 
-    assert x is not None, 'x cannot be None in {}'.format(original_op_type)
-    assert y is not None, 'y cannot be None in {}'.format(original_op_type)
+    assert x is not None, f'x cannot be None in {original_op_type}'
+    assert y is not None, f'y cannot be None in {original_op_type}'
     bf16_and_complex_supported_ops = [
         "elementwise_add",
         "elementwise_sub",
@@ -5192,7 +5192,7 @@ def _trapezoid(y, x=None, dx=None, axis=-1, mode='sum'):
             dx = 1.0
         dx = paddle.to_tensor(dx)
         if dx.dim() > 1:
-            raise ValueError('Expected dx to be a scalar, got dx={}'.format(dx))
+            raise ValueError(f'Expected dx to be a scalar, got dx={dx}')
     else:
         if x.dtype not in [paddle.float16, paddle.float32, paddle.float64]:
             raise TypeError(
diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py
index 4339a25f565efb..96297964940557 100644
--- a/python/paddle/tensor/random.py
+++ b/python/paddle/tensor/random.py
@@ -785,7 +785,7 @@ def randint(low=0, high=None, shape=[1], dtype=None, name=None):
     if high is None:
         if low <= 0:
             raise ValueError(
-                "If high is None, low must be greater than 0, but received low = {0}.".format(
+                "If high is None, low must be greater than 0, but received low = {}.".format(
                     low
                 )
             )
@@ -805,8 +805,8 @@ def randint(low=0, high=None, shape=[1], dtype=None, name=None):
         check_dtype(dtype, 'dtype', ['int32', 'int64'], 'randint')
         if low >= high:
             raise ValueError(
-                "randint's low must less then high, but received low = {0}, "
-                "high = {1}".format(low, high)
+                f"randint's low must less then high, but received low = {low}, "
+                f"high = {high}"
             )
 
         inputs = {}
@@ -951,7 +951,7 @@ def randint_like(x, low=0, high=None, dtype=None, name=None):
     if high is None:
         if low <= 0:
             raise ValueError(
-                "If high is None, low must be greater than 0, but received low = {0}.".format(
+                "If high is None, low must be greater than 0, but received low = {}.".format(
                     low
                 )
             )
@@ -965,8 +965,8 @@ def randint_like(x, low=0, high=None, dtype=None, name=None):
 
     if low >= high:
         raise ValueError(
-            "randint_like's low must less then high, but received low = {0}, "
-            "high = {1}".format(low, high)
+            f"randint_like's low must less then high, but received low = {low}, "
+            f"high = {high}"
         )
 
     if in_dygraph_mode():
diff --git a/python/paddle/tensor/to_string.py b/python/paddle/tensor/to_string.py
index ac5fa5a90aa0b3..897735333ace81 100644
--- a/python/paddle/tensor/to_string.py
+++ b/python/paddle/tensor/to_string.py
@@ -133,13 +133,13 @@ def _format_item(np_var, max_width=0, signed=False):
                 DEFAULT_PRINT_OPTIONS.precision
             ).format(np_var)
         elif np.ceil(np_var) == np_var:
-            item_str = '{:.0f}.'.format(np_var)
+            item_str = f'{np_var:.0f}.'
         else:
             item_str = '{{:.{}f}}'.format(
                 DEFAULT_PRINT_OPTIONS.precision
             ).format(np_var)
     else:
-        item_str = '{}'.format(np_var)
+        item_str = f'{np_var}'
 
     if max_width > len(item_str):
         if signed:  # handle sign character for tenosr with negative item
diff --git a/python/paddle/text/datasets/imdb.py b/python/paddle/text/datasets/imdb.py
index abf4424e3f37b8..d34d2e8ecf1d1a 100644
--- a/python/paddle/text/datasets/imdb.py
+++ b/python/paddle/text/datasets/imdb.py
@@ -75,7 +75,7 @@ def __init__(self, data_file=None, mode='train', cutoff=150, download=True):
         assert mode.lower() in [
             'train',
             'test',
-        ], "mode should be 'train', 'test', but got {}".format(mode)
+        ], f"mode should be 'train', 'test', but got {mode}"
         self.mode = mode.lower()
 
         self.data_file = data_file
@@ -129,8 +129,8 @@ def _tokenize(self, pattern):
         return data
 
     def _load_anno(self):
-        pos_pattern = re.compile(r"aclImdb/{}/pos/.*\.txt$".format(self.mode))
-        neg_pattern = re.compile(r"aclImdb/{}/neg/.*\.txt$".format(self.mode))
+        pos_pattern = re.compile(fr"aclImdb/{self.mode}/pos/.*\.txt$")
+        neg_pattern = re.compile(fr"aclImdb/{self.mode}/neg/.*\.txt$")
 
         UNK = self.word_idx['<unk>']
 
diff --git a/python/paddle/text/datasets/imikolov.py b/python/paddle/text/datasets/imikolov.py
index b84d6255bf844f..5aead1c2d9cf53 100644
--- a/python/paddle/text/datasets/imikolov.py
+++ b/python/paddle/text/datasets/imikolov.py
@@ -83,13 +83,13 @@ def __init__(
         assert data_type.upper() in [
             'NGRAM',
             'SEQ',
-        ], "data type should be 'NGRAM', 'SEQ', but got {}".format(data_type)
+        ], f"data type should be 'NGRAM', 'SEQ', but got {data_type}"
         self.data_type = data_type.upper()
 
         assert mode.lower() in [
             'train',
             'test',
-        ], "mode should be 'train', 'test', but got {}".format(mode)
+        ], f"mode should be 'train', 'test', but got {mode}"
         self.mode = mode.lower()
 
         self.window_size = window_size
@@ -147,7 +147,7 @@ def _build_work_dict(self, cutoff):
     def _load_anno(self):
         self.data = []
         with tarfile.open(self.data_file) as tf:
-            filename = './simple-examples/data/ptb.{}.txt'.format(self.mode)
+            filename = f'./simple-examples/data/ptb.{self.mode}.txt'
             f = tf.extractfile(filename)
 
             UNK = self.word_idx['<unk>']
diff --git a/python/paddle/text/datasets/movielens.py b/python/paddle/text/datasets/movielens.py
index 6793cd9e8c05a2..00eee8e7784f33 100644
--- a/python/paddle/text/datasets/movielens.py
+++ b/python/paddle/text/datasets/movielens.py
@@ -149,7 +149,7 @@ def __init__(
         assert mode.lower() in [
             'train',
             'test',
-        ], "mode should be 'train', 'test', but got {}".format(mode)
+        ], f"mode should be 'train', 'test', but got {mode}"
         self.mode = mode.lower()
 
         self.data_file = data_file
diff --git a/python/paddle/text/datasets/uci_housing.py b/python/paddle/text/datasets/uci_housing.py
index 381ba1b36524a4..dee0e3c98bca63 100644
--- a/python/paddle/text/datasets/uci_housing.py
+++ b/python/paddle/text/datasets/uci_housing.py
@@ -87,7 +87,7 @@ def __init__(self, data_file=None, mode='train', download=True):
         assert mode.lower() in [
             'train',
             'test',
-        ], "mode should be 'train' or 'test', but got {}".format(mode)
+        ], f"mode should be 'train' or 'test', but got {mode}"
         self.mode = mode.lower()
 
         self.data_file = data_file
diff --git a/python/paddle/text/datasets/wmt14.py b/python/paddle/text/datasets/wmt14.py
index 8609e3dd02dc41..3b109ba3736b0f 100644
--- a/python/paddle/text/datasets/wmt14.py
+++ b/python/paddle/text/datasets/wmt14.py
@@ -92,7 +92,7 @@ def __init__(
             'train',
             'test',
             'gen',
-        ], "mode should be 'train', 'test' or 'gen', but got {}".format(mode)
+        ], f"mode should be 'train', 'test' or 'gen', but got {mode}"
         self.mode = mode.lower()
 
         self.data_file = data_file
@@ -138,7 +138,7 @@ def __to_dict(fd, size):
             assert len(names) == 1
             self.trg_dict = __to_dict(f.extractfile(names[0]), self.dict_size)
 
-            file_name = "{}/{}".format(self.mode, self.mode)
+            file_name = f"{self.mode}/{self.mode}"
             names = [
                 each_item.name
                 for each_item in f
diff --git a/python/paddle/text/datasets/wmt16.py b/python/paddle/text/datasets/wmt16.py
index 3183b67f7e6516..13e610bfd62cba 100644
--- a/python/paddle/text/datasets/wmt16.py
+++ b/python/paddle/text/datasets/wmt16.py
@@ -116,7 +116,7 @@ def __init__(
             'train',
             'test',
             'val',
-        ], "mode should be 'train', 'test' or 'val', but got {}".format(mode)
+        ], f"mode should be 'train', 'test' or 'val', but got {mode}"
         self.mode = mode.lower()
 
         self.data_file = data_file
@@ -181,9 +181,7 @@ def _build_dict(self, dict_path, dict_size, lang):
                     word_dict[w] += 1
 
         with open(dict_path, "wb") as fout:
-            fout.write(
-                ("%s\n%s\n%s\n" % (START_MARK, END_MARK, UNK_MARK)).encode()
-            )
+            fout.write((f"{START_MARK}\n{END_MARK}\n{UNK_MARK}\n").encode())
             for idx, word in enumerate(
                 sorted(word_dict.items(), key=lambda x: x[1], reverse=True)
             ):
@@ -207,7 +205,7 @@ def _load_data(self):
         self.trg_ids = []
         self.trg_ids_next = []
         with tarfile.open(self.data_file, mode="r") as f:
-            for line in f.extractfile("wmt16/{}".format(self.mode)):
+            for line in f.extractfile(f"wmt16/{self.mode}"):
                 line = line.decode()
                 line_split = line.strip().split("\t")
                 if len(line_split) != 2:
diff --git a/python/paddle/utils/cpp_extension/cpp_extension.py b/python/paddle/utils/cpp_extension/cpp_extension.py
index 48ca3836f651c6..60f3a65a7633a7 100644
--- a/python/paddle/utils/cpp_extension/cpp_extension.py
+++ b/python/paddle/utils/cpp_extension/cpp_extension.py
@@ -899,9 +899,9 @@ def load(
     # ensure to use abs path
     build_directory = os.path.abspath(build_directory)
 
-    log_v("build_directory: {}".format(build_directory), verbose)
+    log_v(f"build_directory: {build_directory}", verbose)
 
-    file_path = os.path.join(build_directory, "{}_setup.py".format(name))
+    file_path = os.path.join(build_directory, f"{name}_setup.py")
     sources = [os.path.abspath(source) for source in sources]
 
     if extra_cxx_cflags is None:
diff --git a/python/paddle/utils/cpp_extension/extension_utils.py b/python/paddle/utils/cpp_extension/extension_utils.py
index acba715c68ebeb..a0e64dac1ad799 100644
--- a/python/paddle/utils/cpp_extension/extension_utils.py
+++ b/python/paddle/utils/cpp_extension/extension_utils.py
@@ -467,7 +467,7 @@ def _get_lib_core_path():
     Return real path of libcore_(no)avx.dylib on MacOS.
     """
     raw_core_name = _get_core_name()
-    lib_core_name = "lib{}.dylib".format(raw_core_name[:-3])
+    lib_core_name = f"lib{raw_core_name[:-3]}.dylib"
     return os.path.join(_get_fluid_path(), lib_core_name)
 
 
@@ -493,7 +493,7 @@ def _reset_so_rpath(so_path):
     assert os.path.exists(so_path)
     if OS_NAME.startswith("darwin"):
         origin_runtime_path = "@loader_path/../libs/"
-        rpath = "@rpath/{}".format(_get_core_name())
+        rpath = f"@rpath/{_get_core_name()}"
         cmd = 'install_name_tool -change {} {} {}'.format(
             origin_runtime_path, rpath, so_path
         )
@@ -567,7 +567,7 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
         extra_link_args = kwargs.get('extra_link_args', [])
         extra_link_args.extend(MSVC_LINK_FLAGS)
         lib_core_name = create_sym_link_if_not_exist()
-        extra_link_args.append('{}'.format(lib_core_name))
+        extra_link_args.append(f'{lib_core_name}')
         if use_cuda:
             extra_link_args.extend(['cudadevrt.lib', 'cudart_static.lib'])
         kwargs['extra_link_args'] = extra_link_args
@@ -578,15 +578,15 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
         # On Linux, GCC support '-l:xxx.so' to specify the library name
         # without `lib` prefix.
         if OS_NAME.startswith('linux'):
-            extra_link_args.append('-l:{}'.format(_get_core_name()))
+            extra_link_args.append(f'-l:{_get_core_name()}')
         # ----------------------- MacOS Platform ----------------------- #
         else:
             # See _reset_so_rpath for details.
-            extra_link_args.append('-Wl,-rpath,{}'.format(_get_fluid_path()))
+            extra_link_args.append(f'-Wl,-rpath,{_get_fluid_path()}')
             # On MacOS, ld don't support `-l:xx`, so we create a
             # liblibpaddle.dylib symbol link.
             lib_core_name = create_sym_link_if_not_exist()
-            extra_link_args.append('-l{}'.format(lib_core_name))
+            extra_link_args.append(f'-l{lib_core_name}')
         # -----------------------   -- END --    ----------------------- #
 
         add_compile_flag(extra_compile_args, ['-w'])  # disable warning
@@ -637,7 +637,7 @@ def create_sym_link_if_not_exist():
                         raw_core_name,
                     )
                 )
-                run_cmd('mklink /H {} {}'.format(new_dll_core_path, core_path))
+                run_cmd(f'mklink /H {new_dll_core_path} {core_path}')
         # libpaddle with lib suffix
         assert os.path.exists(new_dll_core_path)
         return raw_core_name[:-4] + ".lib"
@@ -970,12 +970,10 @@ def _import_module_from_library(module_name, build_directory, verbose=False):
         dynamic_suffix = '.so'
     ext_path = os.path.join(build_directory, module_name + dynamic_suffix)
     if not os.path.exists(ext_path):
-        raise FileNotFoundError(
-            "Extension path: {} does not exist.".format(ext_path)
-        )
+        raise FileNotFoundError(f"Extension path: {ext_path} does not exist.")
 
     # load custom op_info and kernels from .so shared library
-    log_v('loading shared library from: {}'.format(ext_path), verbose)
+    log_v(f'loading shared library from: {ext_path}', verbose)
     op_names = load_op_meta_info_and_register_op(ext_path)
 
     if os.name == 'nt' or sys.platform.startswith('darwin'):
@@ -1023,7 +1021,7 @@ def remove_if_exit(filepath):
     api_file = os.path.join(
         build_directory, module_name + '_' + thread_id + '.py'
     )
-    log_v("generate api file: {}".format(api_file), verbose)
+    log_v(f"generate api file: {api_file}", verbose)
 
     # delete the temp file before exit python process
     atexit.register(lambda: remove_if_exit(api_file))
@@ -1139,12 +1137,10 @@ def _load_module_from_file(api_file_path, module_name, verbose=False):
     Load module from python file.
     """
     if not os.path.exists(api_file_path):
-        raise FileNotFoundError(
-            "File : {} does not exist.".format(api_file_path)
-        )
+        raise FileNotFoundError(f"File : {api_file_path} does not exist.")
 
     # Unique readable module name to place custom api.
-    log_v('import module from file: {}'.format(api_file_path), verbose)
+    log_v(f'import module from file: {api_file_path}', verbose)
     ext_name = "_paddle_cpp_extension_" + module_name
 
     # load module with RWLock
@@ -1182,7 +1178,7 @@ def _get_api_inputs_str(op_name):
         ]
     )
     # e.g: ['Out', 'Index']
-    outs_list = "[%s]" % ','.join(["'{}'".format(name) for name in out_names])
+    outs_list = "[%s]" % ','.join([f"'{name}'" for name in out_names])
 
     inplace_reverse_idx = core.eager._get_custom_operator_inplace_map(op_name)
 
@@ -1238,7 +1234,7 @@ def _write_setup_file(
     with_cuda = False
     if any([is_cuda_file(source) for source in sources]):
         with_cuda = True
-    log_v("with_cuda: {}".format(with_cuda), verbose)
+    log_v(f"with_cuda: {with_cuda}", verbose)
 
     content = template.format(
         name=name,
@@ -1252,7 +1248,7 @@ def _write_setup_file(
         build_dir=build_dir,
     )
 
-    log_v('write setup.py into {}'.format(file_path), verbose)
+    log_v(f'write setup.py into {file_path}', verbose)
     with open(file_path, 'w') as f:
         f.write(content)
 
@@ -1264,7 +1260,7 @@ def list2str(args):
     if args is None:
         return '[]'
     assert isinstance(args, (list, tuple))
-    args = ["{}".format(arg) for arg in args]
+    args = [f"{arg}" for arg in args]
     return repr(args)
 
 
@@ -1335,7 +1331,7 @@ def run_cmd(command, verbose=False):
     Execute command with subprocess.
     """
     # logging
-    log_v("execute command: {}".format(command), verbose)
+    log_v(f"execute command: {command}", verbose)
 
     # execute command
     try:
@@ -1347,9 +1343,7 @@ def run_cmd(command, verbose=False):
             return subprocess.check_call(command, shell=True, stdout=DEVNULL)
     except Exception:
         _, error, _ = sys.exc_info()
-        raise RuntimeError(
-            "Failed to run command: {}, errors: {}".format(compile, error)
-        )
+        raise RuntimeError(f"Failed to run command: {compile}, errors: {error}")
 
 
 def check_abi_compatibility(compiler, verbose=False):
diff --git a/python/paddle/utils/deprecated.py b/python/paddle/utils/deprecated.py
index cd9e6947b77b35..c482484dce6abc 100755
--- a/python/paddle/utils/deprecated.py
+++ b/python/paddle/utils/deprecated.py
@@ -62,10 +62,10 @@ def decorator(func):
         _update_to = update_to.strip()
         _reason = reason.strip()
 
-        msg = 'API "{}.{}" is deprecated'.format(func.__module__, func.__name__)
+        msg = f'API "{func.__module__}.{func.__name__}" is deprecated'
 
         if len(_since) > 0:
-            msg += " since {}".format(_since)
+            msg += f" since {_since}"
         msg += ", and will be removed in future versions."
         if len(_update_to) > 0:
             assert _update_to.startswith(
@@ -73,9 +73,9 @@ def decorator(func):
             ), 'Argument update_to must start with "paddle.", your value is "{}"'.format(
                 update_to
             )
-            msg += ' Please use "{}" instead.'.format(_update_to)
+            msg += f' Please use "{_update_to}" instead.'
         if len(_reason) > 0:
-            msg += "\nreason: {}".format(_reason)
+            msg += f"\nreason: {_reason}"
         if func.__doc__:
             func.__doc__ = ('\n\nWarning: ' + msg + '\n') + func.__doc__
 
diff --git a/python/paddle/utils/download.py b/python/paddle/utils/download.py
index 9c82531565f24a..d03a7fe5330dc8 100644
--- a/python/paddle/utils/download.py
+++ b/python/paddle/utils/download.py
@@ -36,10 +36,10 @@ def __init__(self, total=None):
         def update(self, n):
             self.n += n
             if self.total is None:
-                sys.stderr.write("\r{0:.1f} bytes".format(self.n))
+                sys.stderr.write(f"\r{self.n:.1f} bytes")
             else:
                 sys.stderr.write(
-                    "\r{0:.1f}%".format(100 * self.n / float(self.total))
+                    "\r{:.1f}%".format(100 * self.n / float(self.total))
                 )
             sys.stderr.flush()
 
@@ -112,7 +112,7 @@ def _get_unique_endpoints(trainer_endpoints):
             continue
         ips.add(ip)
         unique_endpoints.add(endpoint)
-    logger.info("unique_endpoints {}".format(unique_endpoints))
+    logger.info(f"unique_endpoints {unique_endpoints}")
     return unique_endpoints
 
 
@@ -138,7 +138,7 @@ def get_path_from_url(
 
     from paddle.distributed import ParallelEnv
 
-    assert is_url(url), "downloading from {} not a url".format(url)
+    assert is_url(url), f"downloading from {url} not a url"
     # parse path after download to decompress under root_dir
     fullpath = _map_path(url, root_dir)
     # Mainly used to solve the problem of downloading data from different
@@ -146,7 +146,7 @@ def get_path_from_url(
     # data, and the same ip will only download data once.
     unique_endpoints = _get_unique_endpoints(ParallelEnv().trainer_endpoints[:])
     if osp.exists(fullpath) and check_exist and _md5check(fullpath, md5sum):
-        logger.info("Found {}".format(fullpath))
+        logger.info(f"Found {fullpath}")
     else:
         if ParallelEnv().current_endpoint in unique_endpoints:
             fullpath = _download(url, root_dir, md5sum, method=method)
@@ -253,7 +253,7 @@ def _download(url, path, md5sum=None, method='get'):
     fullname = osp.join(path, fname)
     retry_cnt = 0
 
-    logger.info("Downloading {} from {}".format(fname, url))
+    logger.info(f"Downloading {fname} from {url}")
     while not (osp.exists(fullname) and _md5check(fullname, md5sum)):
         if retry_cnt < DOWNLOAD_RETRY_LIMIT:
             retry_cnt += 1
@@ -273,7 +273,7 @@ def _md5check(fullname, md5sum=None):
     if md5sum is None:
         return True
 
-    logger.info("File {} md5 checking...".format(fullname))
+    logger.info(f"File {fullname} md5 checking...")
     md5 = hashlib.md5()
     with open(fullname, 'rb') as f:
         for chunk in iter(lambda: f.read(4096), b""):
@@ -293,7 +293,7 @@ def _decompress(fname):
     """
     Decompress for zip and tar file
     """
-    logger.info("Decompressing {}...".format(fname))
+    logger.info(f"Decompressing {fname}...")
 
     # For protecting decompressing interupted,
     # decompress to fpath_tmp directory firstly, if decompress
@@ -305,7 +305,7 @@ def _decompress(fname):
     elif zipfile.is_zipfile(fname):
         uncompressed_path = _uncompress_file_zip(fname)
     else:
-        raise TypeError("Unsupport compress file type {}".format(fname))
+        raise TypeError(f"Unsupport compress file type {fname}")
 
     return uncompressed_path
 
diff --git a/python/paddle/utils/inplace_utils.py b/python/paddle/utils/inplace_utils.py
index 65cac04350ca44..b56f766f11509b 100644
--- a/python/paddle/utils/inplace_utils.py
+++ b/python/paddle/utils/inplace_utils.py
@@ -31,7 +31,7 @@ def __impl__(*args, **kwargs):
                     func.__name__, origin_api_name
                 )
             )
-            origin_func = "{}.{}".format(func.__module__, origin_api_name)
+            origin_func = f"{func.__module__}.{origin_api_name}"
             return eval(origin_func)(*args, **kwargs)
         return func(*args, **kwargs)
 
diff --git a/python/paddle/utils/install_check.py b/python/paddle/utils/install_check.py
index 064d70ed4f8929..f89f2a59204101 100644
--- a/python/paddle/utils/install_check.py
+++ b/python/paddle/utils/install_check.py
@@ -277,7 +277,7 @@ def run_check():
 
     _run_static_single(use_cuda, use_xpu, use_npu)
     _run_dygraph_single(use_cuda, use_xpu, use_npu)
-    print("PaddlePaddle works well on 1 {}.".format(device_str))
+    print(f"PaddlePaddle works well on 1 {device_str}.")
 
     try:
         if len(device_list) > 1:
@@ -301,7 +301,7 @@ def run_check():
             )
         )
 
-        logging.warning("\n Original Error is: {}".format(e))
+        logging.warning(f"\n Original Error is: {e}")
         print(
             "PaddlePaddle is installed successfully ONLY for single {}! "
             "Let's start deep learning with PaddlePaddle now.".format(
diff --git a/python/paddle/utils/layers_utils.py b/python/paddle/utils/layers_utils.py
index 4d09ff12796467..d04c2bfeac62cd 100644
--- a/python/paddle/utils/layers_utils.py
+++ b/python/paddle/utils/layers_utils.py
@@ -290,7 +290,7 @@ def _recursive_assert_same_structure(nest1, nest2, check_types):
     if is_sequence_nest1 != is_sequence(nest2):
         raise ValueError(
             "The two structures don't have the same nested structure.\n\n"
-            "First structure: %s\n\nSecond structure: %s." % (nest1, nest2)
+            "First structure: {}\n\nSecond structure: {}.".format(nest1, nest2)
         )
     if not is_sequence_nest1:
         return  # finished checking
diff --git a/python/paddle/vision/datasets/cifar.py b/python/paddle/vision/datasets/cifar.py
index 80547614e28218..41a96fef16d277 100644
--- a/python/paddle/vision/datasets/cifar.py
+++ b/python/paddle/vision/datasets/cifar.py
@@ -114,7 +114,7 @@ def __init__(
         assert mode.lower() in [
             'train',
             'test',
-        ], "mode.lower() should be 'train' or 'test', but got {}".format(mode)
+        ], f"mode.lower() should be 'train' or 'test', but got {mode}"
         self.mode = mode.lower()
 
         if backend is None:
diff --git a/python/paddle/vision/datasets/flowers.py b/python/paddle/vision/datasets/flowers.py
index 206e4d9703ed32..8b9c1d3a926df9 100644
--- a/python/paddle/vision/datasets/flowers.py
+++ b/python/paddle/vision/datasets/flowers.py
@@ -121,7 +121,7 @@ def __init__(
             'train',
             'valid',
             'test',
-        ], "mode should be 'train', 'valid' or 'test', but got {}".format(mode)
+        ], f"mode should be 'train', 'valid' or 'test', but got {mode}"
 
         if backend is None:
             backend = paddle.vision.get_image_backend()
diff --git a/python/paddle/vision/datasets/mnist.py b/python/paddle/vision/datasets/mnist.py
index 9ba86036fbe141..de5826c5aa28fd 100644
--- a/python/paddle/vision/datasets/mnist.py
+++ b/python/paddle/vision/datasets/mnist.py
@@ -113,7 +113,7 @@ def __init__(
         assert mode.lower() in [
             'train',
             'test',
-        ], "mode should be 'train' or 'test', but got {}".format(mode)
+        ], f"mode should be 'train' or 'test', but got {mode}"
 
         if backend is None:
             backend = paddle.vision.get_image_backend()
diff --git a/python/paddle/vision/datasets/voc2012.py b/python/paddle/vision/datasets/voc2012.py
index b80b3efb7db15c..405d9bc6a22904 100644
--- a/python/paddle/vision/datasets/voc2012.py
+++ b/python/paddle/vision/datasets/voc2012.py
@@ -115,7 +115,7 @@ def __init__(
             'train',
             'valid',
             'test',
-        ], "mode should be 'train', 'valid' or 'test', but got {}".format(mode)
+        ], f"mode should be 'train', 'valid' or 'test', but got {mode}"
 
         if backend is None:
             backend = paddle.vision.get_image_backend()
diff --git a/python/paddle/vision/models/densenet.py b/python/paddle/vision/models/densenet.py
index 8a0f8bb9e2f73b..e5ab689d8465aa 100644
--- a/python/paddle/vision/models/densenet.py
+++ b/python/paddle/vision/models/densenet.py
@@ -131,7 +131,7 @@ def __init__(
         for layer in range(num_layers):
             self.dense_layer_func.append(
                 self.add_sublayer(
-                    "{}_{}".format(name, layer + 1),
+                    f"{name}_{layer + 1}",
                     DenseLayer(
                         num_channels=pre_channel,
                         growth_rate=growth_rate,
@@ -274,7 +274,7 @@ def __init__(
         for i, num_layers in enumerate(block_config):
             self.dense_block_func_list.append(
                 self.add_sublayer(
-                    "db_conv_{}".format(i + 2),
+                    f"db_conv_{i + 2}",
                     DenseBlock(
                         num_channels=pre_num_channels,
                         num_layers=num_layers,
@@ -292,7 +292,7 @@ def __init__(
             if i != len(block_config) - 1:
                 self.transition_func_list.append(
                     self.add_sublayer(
-                        "tr_conv{}_blk".format(i + 2),
+                        f"tr_conv{i + 2}_blk",
                         TransitionLayer(
                             num_channels=pre_num_channels,
                             num_output_features=num_features // 2,
diff --git a/python/paddle/vision/models/mobilenetv3.py b/python/paddle/vision/models/mobilenetv3.py
index 1012502d876e81..98236bec695fcf 100644
--- a/python/paddle/vision/models/mobilenetv3.py
+++ b/python/paddle/vision/models/mobilenetv3.py
@@ -407,7 +407,7 @@ def _mobilenet_v3(arch, pretrained=False, scale=1.0, **kwargs):
     else:
         model = MobileNetV3Small(scale=scale, **kwargs)
     if pretrained:
-        arch = "{}_x{}".format(arch, scale)
+        arch = f"{arch}_x{scale}"
         assert (
             arch in model_urls
         ), "{} model do not have a pretrained model now, you should set pretrained=False".format(
diff --git a/python/paddle/vision/models/shufflenetv2.py b/python/paddle/vision/models/shufflenetv2.py
index 9efd451a3bc051..c146bb88ddcfdf 100644
--- a/python/paddle/vision/models/shufflenetv2.py
+++ b/python/paddle/vision/models/shufflenetv2.py
@@ -61,9 +61,7 @@ def create_activation_layer(act):
     elif act is None:
         return None
     else:
-        raise RuntimeError(
-            "The activation function is not supported: {}".format(act)
-        )
+        raise RuntimeError(f"The activation function is not supported: {act}")
 
 
 def channel_shuffle(x, groups):
@@ -577,5 +575,5 @@ def shufflenet_v2_swish(pretrained=False, **kwargs):
         scale=1.0,
         act="swish",
         pretrained=pretrained,
-        **kwargs
+        **kwargs,
     )
diff --git a/python/paddle/vision/transforms/functional_cv2.py b/python/paddle/vision/transforms/functional_cv2.py
index b44c20ff1930d6..1f76b35d201fe6 100644
--- a/python/paddle/vision/transforms/functional_cv2.py
+++ b/python/paddle/vision/transforms/functional_cv2.py
@@ -40,9 +40,7 @@ def to_tensor(pic, data_format='CHW'):
     """
 
     if data_format not in ['CHW', 'HWC']:
-        raise ValueError(
-            'data_format should be CHW or HWC. Got {}'.format(data_format)
-        )
+        raise ValueError(f'data_format should be CHW or HWC. Got {data_format}')
 
     if pic.ndim == 2:
         pic = pic[:, :, None]
@@ -89,7 +87,7 @@ def resize(img, size, interpolation='bilinear'):
     if not (
         isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)
     ):
-        raise TypeError('Got inappropriate size arg: {}'.format(size))
+        raise TypeError(f'Got inappropriate size arg: {size}')
 
     h, w = img.shape[:2]
 
@@ -176,7 +174,7 @@ def pad(img, padding, fill=0, padding_mode='constant'):
     if isinstance(padding, Sequence) and len(padding) not in [2, 4]:
         raise ValueError(
             "Padding must be an int or a 2, or 4 element tuple, not a "
-            + "{} element tuple".format(len(padding))
+            + f"{len(padding)} element tuple"
         )
 
     assert padding_mode in [
@@ -401,9 +399,7 @@ def adjust_hue(img, hue_factor):
     cv2 = try_import('cv2')
 
     if not (-0.5 <= hue_factor <= 0.5):
-        raise ValueError(
-            'hue_factor:{} is not in [-0.5, 0.5].'.format(hue_factor)
-        )
+        raise ValueError(f'hue_factor:{hue_factor} is not in [-0.5, 0.5].')
 
     dtype = img.dtype
     img = img.astype(np.uint8)
diff --git a/python/paddle/vision/transforms/functional_pil.py b/python/paddle/vision/transforms/functional_pil.py
index bca2df08e622c3..3d816c25742f9f 100644
--- a/python/paddle/vision/transforms/functional_pil.py
+++ b/python/paddle/vision/transforms/functional_pil.py
@@ -59,9 +59,7 @@ def to_tensor(pic, data_format='CHW'):
     """
 
     if data_format not in ['CHW', 'HWC']:
-        raise ValueError(
-            'data_format should be CHW or HWC. Got {}'.format(data_format)
-        )
+        raise ValueError(f'data_format should be CHW or HWC. Got {data_format}')
 
     # PIL Image
     if pic.mode == 'I':
@@ -119,7 +117,7 @@ def resize(img, size, interpolation='bilinear'):
     if not (
         isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)
     ):
-        raise TypeError('Got inappropriate size arg: {}'.format(size))
+        raise TypeError(f'Got inappropriate size arg: {size}')
 
     if isinstance(size, int):
         w, h = img.size
@@ -182,7 +180,7 @@ def pad(img, padding, fill=0, padding_mode='constant'):
     if isinstance(padding, Sequence) and len(padding) not in [2, 4]:
         raise ValueError(
             "Padding must be an int or a 2, or 4 element tuple, not a "
-            + "{} element tuple".format(len(padding))
+            + f"{len(padding)} element tuple"
         )
 
     assert padding_mode in [
@@ -395,9 +393,7 @@ def adjust_hue(img, hue_factor):
 
     """
     if not (-0.5 <= hue_factor <= 0.5):
-        raise ValueError(
-            'hue_factor:{} is not in [-0.5, 0.5].'.format(hue_factor)
-        )
+        raise ValueError(f'hue_factor:{hue_factor} is not in [-0.5, 0.5].')
 
     input_mode = img.mode
     if input_mode in {'L', '1', 'I', 'F'}:
diff --git a/python/paddle/vision/transforms/functional_tensor.py b/python/paddle/vision/transforms/functional_tensor.py
index 835599f44cf723..e2b7f3cc734d1e 100644
--- a/python/paddle/vision/transforms/functional_tensor.py
+++ b/python/paddle/vision/transforms/functional_tensor.py
@@ -703,7 +703,7 @@ def pad(img, padding, fill=0, padding_mode='constant', data_format='CHW'):
     if isinstance(padding, (list, tuple)) and len(padding) not in [2, 4]:
         raise ValueError(
             "Padding must be an int or a 2, or 4 element tuple, not a "
-            + "{} element tuple".format(len(padding))
+            + f"{len(padding)} element tuple"
         )
 
     assert padding_mode in [
@@ -772,7 +772,7 @@ def resize(img, size, interpolation='bilinear', data_format='CHW'):
         isinstance(size, int)
         or (isinstance(size, (tuple, list)) and len(size) == 2)
     ):
-        raise TypeError('Got inappropriate size arg: {}'.format(size))
+        raise TypeError(f'Got inappropriate size arg: {size}')
 
     if isinstance(size, int):
         w, h = _get_image_size(img, data_format)
diff --git a/python/paddle/vision/transforms/transforms.py b/python/paddle/vision/transforms/transforms.py
index 5c7b8124a76285..b1a83ed0c57330 100644
--- a/python/paddle/vision/transforms/transforms.py
+++ b/python/paddle/vision/transforms/transforms.py
@@ -44,7 +44,7 @@ def _get_image_size(img):
                 )
             )
     else:
-        raise TypeError("Unexpected type {}".format(type(img)))
+        raise TypeError(f"Unexpected type {type(img)}")
 
 
 def _check_input(
@@ -62,9 +62,7 @@ def _check_input(
             value[0] = max(value[0], 0)
     elif isinstance(value, (tuple, list)) and len(value) == 2:
         if not bound[0] <= value[0] <= value[1] <= bound[1]:
-            raise ValueError(
-                "{} values should be between {}".format(name, bound)
-            )
+            raise ValueError(f"{name} values should be between {bound}")
     else:
         raise TypeError(
             "{} should be a single number or a list/tuple with lenght 2.".format(
@@ -125,7 +123,7 @@ def __repr__(self):
         format_string = self.__class__.__name__ + '('
         for t in self.transforms:
             format_string += '\n'
-            format_string += '    {0}'.format(t)
+            format_string += f'    {t}'
         format_string += '\n)'
         return format_string
 
@@ -241,14 +239,10 @@ def __init__(self, keys=None):
         if keys is None:
             keys = ("image",)
         elif not isinstance(keys, Sequence):
-            raise ValueError(
-                "keys should be a sequence, but got keys={}".format(keys)
-            )
+            raise ValueError(f"keys should be a sequence, but got keys={keys}")
         for k in keys:
             if self._get_apply(k) is None:
-                raise NotImplementedError(
-                    "{} is unsupported data structure".format(k)
-                )
+                raise NotImplementedError(f"{k} is unsupported data structure")
         self.keys = keys
 
         # storage some params get from function get_params()
@@ -281,7 +275,7 @@ def __call__(self, inputs):
         return outputs
 
     def _get_apply(self, key):
-        return getattr(self, "_apply_{}".format(key), None)
+        return getattr(self, f"_apply_{key}", None)
 
     def _apply_image(self, image):
         raise NotImplementedError
@@ -1345,7 +1339,7 @@ def __init__(self, padding, fill=0, padding_mode='constant', keys=None):
         if isinstance(padding, Sequence) and len(padding) not in [2, 4]:
             raise ValueError(
                 "Padding must be an int or a 2, or 4 element tuple, not a "
-                + "{} element tuple".format(len(padding))
+                + f"{len(padding)} element tuple"
             )
 
         super().__init__(keys)
diff --git a/setup.py b/setup.py
index 87c223f42b4c62..86587d8a0561dc 100644
--- a/setup.py
+++ b/setup.py
@@ -231,9 +231,9 @@ def run(self):
         distributed_proto_source_path = (
             paddle_source_dir + '/python/paddle/distributed/fleet/proto/'
         )
-        os.system("rm -rf {}".format(fluid_proto_source_path))
+        os.system(f"rm -rf {fluid_proto_source_path}")
         shutil.copytree(fluid_proto_binary_path, fluid_proto_source_path)
-        os.system("rm -rf {}".format(distributed_proto_source_path))
+        os.system(f"rm -rf {distributed_proto_source_path}")
         shutil.copytree(
             distributed_proto_binary_path, distributed_proto_source_path
         )
@@ -255,7 +255,7 @@ def run(self):
             )
         )
         write_cuda_env_config_py(
-            filename='{}/python/paddle/cuda_env.py'.format(paddle_source_dir)
+            filename=f'{paddle_source_dir}/python/paddle/cuda_env.py'
         )
         write_parameter_server_version_py(
             filename='{}/python/paddle/incubate/distributed/fleet/parameter_server/version.py'.format(
@@ -667,7 +667,7 @@ def find_files(pattern, root, recursive=False):
 @contextmanager
 def cd(path):
     if not os.path.isabs(path):
-        raise RuntimeError('Can only cd to absolute path, got: {}'.format(path))
+        raise RuntimeError(f'Can only cd to absolute path, got: {path}')
     orig_path = os.getcwd()
     os.chdir(path)
     try:
@@ -679,7 +679,7 @@ def cd(path):
 def options_process(args, build_options):
     for key, value in sorted(build_options.items()):
         if value is not None:
-            args.append("-D{}={}".format(key, value))
+            args.append(f"-D{key}={value}")
 
 
 def get_cmake_generator():
@@ -784,7 +784,7 @@ def run_cmake_build(build_path):
 
         build_args += ["--"]
         if IS_WINDOWS:
-            build_args += ["/p:CL_MPCount={}".format(max_jobs)]
+            build_args += [f"/p:CL_MPCount={max_jobs}"]
         else:
             build_args += ["-j", max_jobs]
     else:
@@ -1503,7 +1503,7 @@ def main():
         )
     )
     write_cuda_env_config_py(
-        filename='{}/python/paddle/cuda_env.py'.format(paddle_binary_dir)
+        filename=f'{paddle_binary_dir}/python/paddle/cuda_env.py'
     )
     write_parameter_server_version_py(
         filename='{}/python/paddle/incubate/distributed/fleet/parameter_server/version.py'.format(
diff --git a/test/book/notest_understand_sentiment.py b/test/book/notest_understand_sentiment.py
index 91a7765f3c3975..0cf498a50be7b5 100644
--- a/test/book/notest_understand_sentiment.py
+++ b/test/book/notest_understand_sentiment.py
@@ -112,9 +112,7 @@ def train_loop(main_program):
                     return
                 if math.isnan(float(cost_val)):
                     sys.exit("got NaN loss, training failed.")
-        raise AssertionError(
-            "Cost is too large for {0}".format(net_method.__name__)
-        )
+        raise AssertionError(f"Cost is too large for {net_method.__name__}")
 
     if is_local:
         train_loop(fluid.default_main_program())
diff --git a/test/book/test_fit_a_line.py b/test/book/test_fit_a_line.py
index 7570def0da1c11..ae1a549f0d538e 100644
--- a/test/book/test_fit_a_line.py
+++ b/test/book/test_fit_a_line.py
@@ -128,7 +128,7 @@ def train_loop(main_program):
                 if math.isnan(float(avg_loss_value)):
                     sys.exit("got NaN loss, training failed.")
         raise AssertionError(
-            "Fit a line cost is too large, {0:2.2}".format(avg_loss_value[0])
+            f"Fit a line cost is too large, {avg_loss_value[0]:2.2}"
         )
 
     if is_local:
diff --git a/test/book/test_image_classification.py b/test/book/test_image_classification.py
index 60462412c31911..443d66654b5850 100644
--- a/test/book/test_image_classification.py
+++ b/test/book/test_image_classification.py
@@ -175,7 +175,7 @@ def train_loop(main_program):
                     avg_loss_value = numpy.array(avg_loss_list).mean()
 
                     print(
-                        'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'.format(
+                        'PassID {:1}, BatchID {:04}, Test Loss {:2.2}, Acc {:2.2}'.format(
                             pass_id,
                             batch_id + 1,
                             float(avg_loss_value),
diff --git a/test/book/test_recognize_digits.py b/test/book/test_recognize_digits.py
index 4a5c2f8516605b..62efcc815d8395 100644
--- a/test/book/test_recognize_digits.py
+++ b/test/book/test_recognize_digits.py
@@ -154,7 +154,7 @@ def train_loop(main_program):
                         return
                     else:
                         print(
-                            'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'.format(
+                            'PassID {:1}, BatchID {:04}, Test Loss {:2.2}, Acc {:2.2}'.format(
                                 pass_id,
                                 batch_id + 1,
                                 float(avg_loss_val),
@@ -274,7 +274,7 @@ def __impl__(self):
             with fluid.program_guard(prog, startup_prog):
                 main(use_cuda, parallel, nn_type, combine)
 
-    fn = 'test_{0}_{1}_{2}_{3}'.format(
+    fn = 'test_{}_{}_{}_{}'.format(
         nn_type,
         'cuda' if use_cuda else 'cpu',
         'parallel' if parallel else 'normal',
diff --git a/test/book/test_word2vec_book.py b/test/book/test_word2vec_book.py
index d410c45573c6e7..cdebfc58cfa9cd 100644
--- a/test/book/test_word2vec_book.py
+++ b/test/book/test_word2vec_book.py
@@ -35,7 +35,7 @@ def get_place(target):
         return fluid.CPUPlace()
     else:
         raise ValueError(
-            "Target `{0}` is not on the support list: `cuda`, `xpu` and `cpu`.".format(
+            "Target `{}` is not on the support list: `cuda`, `xpu` and `cpu`.".format(
                 target
             )
         )
@@ -169,7 +169,7 @@ def train_loop(main_program):
                 if math.isnan(float(avg_cost_np[0])):
                     sys.exit("got NaN loss, training failed.")
 
-        raise AssertionError("Cost is too large {0:2.2}".format(avg_cost_np[0]))
+        raise AssertionError(f"Cost is too large {avg_cost_np[0]:2.2}")
 
     if is_local:
         train_loop(fluid.default_main_program())
@@ -286,7 +286,7 @@ def to_infer_tensor(lod_tensor):
         np_data = np.array(results[0])
         infer_out = infer_outputs[0].data.float_data()
         for a, b in zip(np_data[0], infer_out):
-            assert np.isclose(a, b, rtol=5e-5), "a: {}, b: {}".format(a, b)
+            assert np.isclose(a, b, rtol=5e-5), f"a: {a}, b: {b}"
 
 
 def main(target, is_sparse, is_parallel, use_bf16, pure_bf16):
@@ -339,7 +339,7 @@ class W2VTest(unittest.TestCase):
 def inject_test_method(
     target, is_sparse, is_parallel, use_bf16=False, pure_bf16=False
 ):
-    fn_name = "test_{0}_{1}_{2}{3}".format(
+    fn_name = "test_{}_{}_{}{}".format(
         target,
         "sparse" if is_sparse else "dense",
         "parallel" if is_parallel else "normal",
diff --git a/test/contrib/test_image_classification_fp16.py b/test/contrib/test_image_classification_fp16.py
index 69969e44452cde..48bb126431d55e 100644
--- a/test/contrib/test_image_classification_fp16.py
+++ b/test/contrib/test_image_classification_fp16.py
@@ -178,7 +178,7 @@ def train_loop(main_program):
                     fetch_list=[scaled_loss, avg_cost],
                 )
                 print(
-                    'PassID {0:1}, BatchID {1:04}, train loss {2:2.4}, scaled train closs {3:2.4}'.format(
+                    'PassID {:1}, BatchID {:04}, train loss {:2.4}, scaled train closs {:2.4}'.format(
                         pass_id,
                         batch_id + 1,
                         float(loss),
@@ -204,7 +204,7 @@ def train_loop(main_program):
                     avg_loss_value = numpy.array(avg_loss_list).mean()
 
                     print(
-                        'PassID {0:1}, BatchID {1:04}, test loss {2:2.2}, acc {3:2.2}'.format(
+                        'PassID {:1}, BatchID {:04}, test loss {:2.2}, acc {:2.2}'.format(
                             pass_id,
                             batch_id + 1,
                             float(avg_loss_value),
diff --git a/test/contrib/test_multi_precision_fp16_train.py b/test/contrib/test_multi_precision_fp16_train.py
index 81693fc36cd9cd..a364d2161ebe4e 100644
--- a/test/contrib/test_multi_precision_fp16_train.py
+++ b/test/contrib/test_multi_precision_fp16_train.py
@@ -180,7 +180,7 @@ def train_loop():
                 )
                 loss_v = loss[0] if isinstance(loss, np.ndarray) else loss
                 print(
-                    'PassID {0:1}, Train Batch ID {1:04}, train loss {2:2.4}'.format(
+                    'PassID {:1}, Train Batch ID {:04}, train loss {:2.4}'.format(
                         pass_id, batch_id + 1, float(loss_v)
                     )
                 )
@@ -194,7 +194,7 @@ def train_loop():
                 )
                 test_loss_list.append(float(loss_t))
                 print(
-                    'PassID {0:1}, Test Batch ID {1:04}, test loss {2:2.4}'.format(
+                    'PassID {:1}, Test Batch ID {:04}, test loss {:2.4}'.format(
                         pass_id, tid + 1, float(loss_t)
                     )
                 )
diff --git a/test/custom_kernel/test_custom_kernel_load.py b/test/custom_kernel/test_custom_kernel_load.py
index 929f42a35169a9..9428a6d0d454a7 100644
--- a/test/custom_kernel/test_custom_kernel_load.py
+++ b/test/custom_kernel/test_custom_kernel_load.py
@@ -81,7 +81,7 @@ def test_custom_kernel_dot_load(self):
         )
 
     def tearDown(self):
-        cmd = 'rm -rf {}'.format(self.default_path)
+        cmd = f'rm -rf {self.default_path}'
         os.system(cmd)
 
 
diff --git a/test/custom_op/custom_op_multidevice_model_train.py b/test/custom_op/custom_op_multidevice_model_train.py
index 048198539570f6..f9aa7ee9b892d7 100644
--- a/test/custom_op/custom_op_multidevice_model_train.py
+++ b/test/custom_op/custom_op_multidevice_model_train.py
@@ -99,9 +99,7 @@ def train(args):
     relu_out2_list = np.array(relu_out2_list)
     rank = paddle.distributed.get_rank()
     np.savez(
-        os.path.join(
-            args.output_dir, 'train_{}_{}.npz'.format(rank, args.use_custom_op)
-        ),
+        os.path.join(args.output_dir, f'train_{rank}_{args.use_custom_op}.npz'),
         losses=losses,
         relu_out1_list=relu_out1_list,
         relu_out2_list=relu_out2_list,
@@ -136,9 +134,7 @@ def eval(args):
 
     rank = paddle.distributed.get_rank()
     np.savez(
-        os.path.join(
-            args.output_dir, 'eval_{}_{}.npz'.format(rank, args.use_custom_op)
-        ),
+        os.path.join(args.output_dir, f'eval_{rank}_{args.use_custom_op}.npz'),
         losses=losses,
         relu_out1_list=relu_out1_list,
         relu_out2_list=relu_out2_list,
diff --git a/test/custom_op/test_context_pool.py b/test/custom_op/test_context_pool.py
index 9e9085e91b4df4..b8ccebc1106b47 100644
--- a/test/custom_op/test_context_pool.py
+++ b/test/custom_op/test_context_pool.py
@@ -28,7 +28,7 @@
     get_build_directory()
 )
 if os.name == 'nt' and os.path.isfile(file):
-    cmd = 'del {}'.format(file)
+    cmd = f'del {file}'
     run_cmd(cmd, True)
 
 # Compile and load custom op Just-In-Time.
diff --git a/test/custom_op/test_custom_attrs_jit.py b/test/custom_op/test_custom_attrs_jit.py
index c8f86ed004b112..676e81c49be276 100644
--- a/test/custom_op/test_custom_attrs_jit.py
+++ b/test/custom_op/test_custom_attrs_jit.py
@@ -28,7 +28,7 @@
     get_build_directory()
 )
 if os.name == 'nt' and os.path.isfile(file):
-    cmd = 'del {}'.format(file)
+    cmd = f'del {file}'
     run_cmd(cmd, True)
 
 # Compile and load custom op Just-In-Time.
diff --git a/test/custom_op/test_custom_concat.py b/test/custom_op/test_custom_concat.py
index fa7b8e1c612932..a1d93de4f2d3fb 100644
--- a/test/custom_op/test_custom_concat.py
+++ b/test/custom_op/test_custom_concat.py
@@ -25,9 +25,9 @@
 
 # Because Windows don't use docker, the shared lib already exists in the
 # cache dir, it will not be compiled again unless the shared lib is removed.
-file = '{}\\custom_concat\\custom_concat.pyd'.format(get_build_directory())
+file = f'{get_build_directory()}\\custom_concat\\custom_concat.pyd'
 if os.name == 'nt' and os.path.isfile(file):
-    cmd = 'del {}'.format(file)
+    cmd = f'del {file}'
     run_cmd(cmd, True)
 
 if os.name == 'nt':
diff --git a/test/custom_op/test_custom_conj.py b/test/custom_op/test_custom_conj.py
index 28ebcf802b8d25..c30463bc348690 100644
--- a/test/custom_op/test_custom_conj.py
+++ b/test/custom_op/test_custom_conj.py
@@ -25,9 +25,9 @@
 
 # Because Windows don't use docker, the shared lib already exists in the
 # cache dir, it will not be compiled again unless the shared lib is removed.
-file = '{}\\custom_conj\\custom_conj.pyd'.format(get_build_directory())
+file = f'{get_build_directory()}\\custom_conj\\custom_conj.pyd'
 if os.name == 'nt' and os.path.isfile(file):
-    cmd = 'del {}'.format(file)
+    cmd = f'del {file}'
     run_cmd(cmd, True)
 
 custom_ops = load(
diff --git a/test/custom_op/test_custom_inplace.py b/test/custom_op/test_custom_inplace.py
index 4fc807ca29d8a8..bdfe018c40f672 100644
--- a/test/custom_op/test_custom_inplace.py
+++ b/test/custom_op/test_custom_inplace.py
@@ -25,9 +25,9 @@
 
 # Because Windows don't use docker, the shared lib already exists in the
 # cache dir, it will not be compiled again unless the shared lib is removed.
-file = '{}\\custom_inplace\\custom_inplace.pyd'.format(get_build_directory())
+file = f'{get_build_directory()}\\custom_inplace\\custom_inplace.pyd'
 if os.name == 'nt' and os.path.isfile(file):
-    cmd = 'del {}'.format(file)
+    cmd = f'del {file}'
     run_cmd(cmd, True)
 
 # Compile and load custom op Just-In-Time.
diff --git a/test/custom_op/test_custom_linear.py b/test/custom_op/test_custom_linear.py
index dd170ad00c0fe8..5d2a55456d7d23 100644
--- a/test/custom_op/test_custom_linear.py
+++ b/test/custom_op/test_custom_linear.py
@@ -26,9 +26,9 @@
 
 # Because Windows don't use docker, the shared lib already exists in the
 # cache dir, it will not be compiled again unless the shared lib is removed.
-file = '{}\\custom_linear\\custom_linear.pyd'.format(get_build_directory())
+file = f'{get_build_directory()}\\custom_linear\\custom_linear.pyd'
 if os.name == 'nt' and os.path.isfile(file):
-    cmd = 'del {}'.format(file)
+    cmd = f'del {file}'
     run_cmd(cmd, True)
 
 custom_ops = load(
diff --git a/test/custom_op/test_custom_op_relu_model_static_multidevice.py b/test/custom_op/test_custom_op_relu_model_static_multidevice.py
index a7a26565c0c34c..a0455585ba0a29 100644
--- a/test/custom_op/test_custom_op_relu_model_static_multidevice.py
+++ b/test/custom_op/test_custom_op_relu_model_static_multidevice.py
@@ -90,14 +90,12 @@ def test_train_and_eval(self):
 
         for id in range(count):
             loss_custom = np.load(
-                os.path.join(
-                    self.output_log_dir.name, 'train_{}_{}.npz'.format(id, True)
-                )
+                os.path.join(self.output_log_dir.name, f'train_{id}_{True}.npz')
             )
             loss_origin = np.load(
                 os.path.join(
                     self.output_log_dir.name,
-                    'train_{}_{}.npz'.format(id, False),
+                    f'train_{id}_{False}.npz',
                 )
             )
             np.testing.assert_array_equal(
@@ -114,14 +112,10 @@ def test_train_and_eval(self):
         self.eval(use_custom_op=False)
         for id in range(count):
             loss_custom = np.load(
-                os.path.join(
-                    self.output_log_dir.name, 'eval_{}_{}.npz'.format(id, True)
-                )
+                os.path.join(self.output_log_dir.name, f'eval_{id}_{True}.npz')
             )
             loss_origin = np.load(
-                os.path.join(
-                    self.output_log_dir.name, 'eval_{}_{}.npz'.format(id, False)
-                )
+                os.path.join(self.output_log_dir.name, f'eval_{id}_{False}.npz')
             )
             np.testing.assert_array_equal(
                 loss_custom['losses'], loss_origin['losses']
diff --git a/test/custom_op/test_custom_optional.py b/test/custom_op/test_custom_optional.py
index 6ef8c1a28d35c2..8619c6c82c56a0 100644
--- a/test/custom_op/test_custom_optional.py
+++ b/test/custom_op/test_custom_optional.py
@@ -25,9 +25,9 @@
 
 # Because Windows don't use docker, the shared lib already exists in the
 # cache dir, it will not be compiled again unless the shared lib is removed.
-file = '{}\\custom_optional\\custom_optional.pyd'.format(get_build_directory())
+file = f'{get_build_directory()}\\custom_optional\\custom_optional.pyd'
 if os.name == 'nt' and os.path.isfile(file):
-    cmd = 'del {}'.format(file)
+    cmd = f'del {file}'
     run_cmd(cmd, True)
 
 # Compile and load custom op Just-In-Time.
diff --git a/test/custom_op/test_custom_relu_model.py b/test/custom_op/test_custom_relu_model.py
index e09514a3e94057..386c343f55e5c7 100644
--- a/test/custom_op/test_custom_relu_model.py
+++ b/test/custom_op/test_custom_relu_model.py
@@ -30,7 +30,7 @@
     get_build_directory()
 )
 if os.name == 'nt' and os.path.isfile(file):
-    cmd = 'del {}'.format(file)
+    cmd = f'del {file}'
     run_cmd(cmd, True)
 
 # Compile and load custom op Just-In-Time.
diff --git a/test/custom_op/test_custom_relu_op_jit.py b/test/custom_op/test_custom_relu_op_jit.py
index 04ec4dcde86eb3..ecf9c6dfbc87f3 100644
--- a/test/custom_op/test_custom_relu_op_jit.py
+++ b/test/custom_op/test_custom_relu_op_jit.py
@@ -35,7 +35,7 @@
     get_build_directory()
 )
 if os.name == 'nt' and os.path.isfile(file):
-    cmd = 'del {}'.format(file)
+    cmd = f'del {file}'
     run_cmd(cmd, True)
 
 # Compile and load custom op Just-In-Time.
diff --git a/test/custom_op/test_custom_simple_slice.py b/test/custom_op/test_custom_simple_slice.py
index 166f15ca4c249a..d69322103520c2 100644
--- a/test/custom_op/test_custom_simple_slice.py
+++ b/test/custom_op/test_custom_simple_slice.py
@@ -28,7 +28,7 @@
     get_build_directory()
 )
 if os.name == 'nt' and os.path.isfile(file):
-    cmd = 'del {}'.format(file)
+    cmd = f'del {file}'
     run_cmd(cmd, True)
 
 custom_ops = load(
diff --git a/test/custom_op/test_custom_tanh_double_grad.py b/test/custom_op/test_custom_tanh_double_grad.py
index ad139e84a90c4d..08c57dac91fe17 100644
--- a/test/custom_op/test_custom_tanh_double_grad.py
+++ b/test/custom_op/test_custom_tanh_double_grad.py
@@ -24,9 +24,9 @@
 
 # Because Windows don't use docker, the shared lib already exists in the
 # cache dir, it will not be compiled again unless the shared lib is removed.
-file = '{}\\custom_tanh\\custom_tanh.pyd'.format(get_build_directory())
+file = f'{get_build_directory()}\\custom_tanh\\custom_tanh.pyd'
 if os.name == 'nt' and os.path.isfile(file):
-    cmd = 'del {}'.format(file)
+    cmd = f'del {file}'
     run_cmd(cmd, True)
 
 custom_ops = load(
diff --git a/test/custom_op/test_custom_tensor_operator.py b/test/custom_op/test_custom_tensor_operator.py
index dd60c0f29ec424..4e524b2f5b16bc 100644
--- a/test/custom_op/test_custom_tensor_operator.py
+++ b/test/custom_op/test_custom_tensor_operator.py
@@ -29,7 +29,7 @@
     get_build_directory()
 )
 if os.name == 'nt' and os.path.isfile(file):
-    cmd = 'del {}'.format(file)
+    cmd = f'del {file}'
     run_cmd(cmd, True)
 
 
diff --git a/test/custom_op/test_dispatch_jit.py b/test/custom_op/test_dispatch_jit.py
index db144675a58bea..ac073e06fbac0c 100644
--- a/test/custom_op/test_dispatch_jit.py
+++ b/test/custom_op/test_dispatch_jit.py
@@ -24,9 +24,9 @@
 
 # Because Windows don't use docker, the shared lib already exists in the
 # cache dir, it will not be compiled again unless the shared lib is removed.
-file = '{}\\dispatch_op\\dispatch_op.pyd'.format(get_build_directory())
+file = f'{get_build_directory()}\\dispatch_op\\dispatch_op.pyd'
 if os.name == 'nt' and os.path.isfile(file):
-    cmd = 'del {}'.format(file)
+    cmd = f'del {file}'
     run_cmd(cmd, True)
 
 dispatch_op = load(
@@ -52,7 +52,7 @@ def run_dispatch_test(self, func, dtype):
         np.testing.assert_array_equal(
             np_x,
             np_out,
-            err_msg='custom op x: {},\n custom op out: {}'.format(np_x, np_out),
+            err_msg=f'custom op x: {np_x},\n custom op out: {np_out}',
         )
 
     def test_dispatch_integer(self):
diff --git a/test/custom_op/test_multi_out_jit.py b/test/custom_op/test_multi_out_jit.py
index 7e252e048b64c9..8582650a98612b 100644
--- a/test/custom_op/test_multi_out_jit.py
+++ b/test/custom_op/test_multi_out_jit.py
@@ -24,9 +24,9 @@
 
 # Because Windows don't use docker, the shared lib already exists in the
 # cache dir, it will not be compiled again unless the shared lib is removed.
-file = '{}\\multi_out_jit\\multi_out_jit.pyd'.format(get_build_directory())
+file = f'{get_build_directory()}\\multi_out_jit\\multi_out_jit.pyd'
 if os.name == 'nt' and os.path.isfile(file):
-    cmd = 'del {}'.format(file)
+    cmd = f'del {file}'
     run_cmd(cmd, True)
 
 # Compile and load custom op Just-In-Time.
diff --git a/test/custom_runtime/custom_device_multi_process_collective.py b/test/custom_runtime/custom_device_multi_process_collective.py
index b658b885176f37..d229c44d01cd82 100644
--- a/test/custom_runtime/custom_device_multi_process_collective.py
+++ b/test/custom_runtime/custom_device_multi_process_collective.py
@@ -39,9 +39,7 @@ def train(prefix):
     )
 
     print(details)
-    with open(
-        "multi_process_{}.check_{}.log".format(prefix, trainer_id), "w"
-    ) as f:
+    with open(f"multi_process_{prefix}.check_{trainer_id}.log", "w") as f:
         f.write(details)
 
 
diff --git a/test/custom_runtime/test_collective_process_group_xccl.py b/test/custom_runtime/test_collective_process_group_xccl.py
index 8a1f90359f0c14..d96072cbc654d7 100644
--- a/test/custom_runtime/test_collective_process_group_xccl.py
+++ b/test/custom_runtime/test_collective_process_group_xccl.py
@@ -59,14 +59,14 @@ def start_local_trainers(
 
         current_env.update(proc_env)
 
-        print("trainer proc env:{}".format(current_env))
+        print(f"trainer proc env:{current_env}")
 
         if os.getenv('WITH_COVERAGE', 'OFF') == 'ON':
             cmd = "python -m coverage run --branch -p " + training_script
         else:
             cmd = "python -u " + training_script
 
-        print("start trainer proc:{} env:{}".format(cmd, proc_env))
+        print(f"start trainer proc:{cmd} env:{proc_env}")
 
         fn = open("workerlog.%d" % idx, "a")
         proc = subprocess.Popen(
@@ -138,7 +138,7 @@ def run_mnist_2custom_cpu(self, target_file_name, eager_mode=True):
             alive = watch_local_trainers(procs, cluster.trainers_endpoints())
 
             if not alive:
-                print("Local procs complete, POD info:{}".format(pod))
+                print(f"Local procs complete, POD info:{pod}")
                 break
             time.sleep(3)
 
diff --git a/test/ipu/distributed/test_dist_pod128_sample.py b/test/ipu/distributed/test_dist_pod128_sample.py
index bbca3516b05a8d..40a081a356ce05 100644
--- a/test/ipu/distributed/test_dist_pod128_sample.py
+++ b/test/ipu/distributed/test_dist_pod128_sample.py
@@ -109,7 +109,7 @@ def TestDistTraining():
                 res = exe.run(
                     program, feed={"x": input_data}, fetch_list=fetch_list
                 )
-                print("index: {}, result: {}".format(i, res))
+                print(f"index: {i}, result: {res}")
 
 
 if __name__ == "__main__":
diff --git a/test/legacy_test/test_dist_hapi_model.py b/test/legacy_test/test_dist_hapi_model.py
index cc3e2508606ce0..314a7621f07fc8 100644
--- a/test/legacy_test/test_dist_hapi_model.py
+++ b/test/legacy_test/test_dist_hapi_model.py
@@ -79,14 +79,14 @@ def start_local_trainers(
 
         current_env.update(proc_env)
 
-        print("trainer proc env:{}".format(current_env))
+        print(f"trainer proc env:{current_env}")
 
         if os.getenv('WITH_COVERAGE', 'OFF') == 'ON':
             cmd = "python -m coverage run --branch -p " + training_script
         else:
             cmd = "python -u " + training_script
 
-        print("start trainer proc:{} env:{}".format(cmd, proc_env))
+        print(f"start trainer proc:{cmd} env:{proc_env}")
 
         fn = None
 
@@ -125,7 +125,7 @@ def run_mnist_2gpu(self, target_file_name):
             alive = watch_local_trainers(procs, cluster.trainers_nranks())
 
             if not alive:
-                print("Local procs complete, POD info:{}".format(pod))
+                print(f"Local procs complete, POD info:{pod}")
                 break
             time.sleep(3)
 
diff --git a/test/legacy_test/test_metrics.py b/test/legacy_test/test_metrics.py
index f720fd9ddb2593..d61e496709bbc4 100644
--- a/test/legacy_test/test_metrics.py
+++ b/test/legacy_test/test_metrics.py
@@ -174,7 +174,7 @@ def test_main(self):
                         np.array(res_f, dtype='float64'),
                         rtol=1e-3,
                     )
-                ), "Accuracy precision error: {} != {}".format(res_m, res_f)
+                ), f"Accuracy precision error: {res_m} != {res_f}"
                 acc.reset()
                 assert np.sum(acc.total) == 0
                 assert np.sum(acc.count) == 0
@@ -230,7 +230,7 @@ def test_main(self):
             res_f = accuracy(pred, label, self.topk)
             assert np.all(
                 np.isclose(np.array(res_m), np.array(res_f), rtol=1e-3)
-            ), "Accuracy precision error: {} != {}".format(res_m, res_f)
+            ), f"Accuracy precision error: {res_m} != {res_f}"
             acc.reset()
             assert np.sum(acc.total) == 0
             assert np.sum(acc.count) == 0
diff --git a/test/rpc/rpc_launch_sync_add.py b/test/rpc/rpc_launch_sync_add.py
index 234c57f69d28af..391ecb30dcd61e 100644
--- a/test/rpc/rpc_launch_sync_add.py
+++ b/test/rpc/rpc_launch_sync_add.py
@@ -33,7 +33,7 @@ def rpc_add(to, args):
 
 
 def worker_name(rank):
-    return "worker{}".format(rank)
+    return f"worker{rank}"
 
 
 def main():
diff --git a/test/rpc/test_rpc.py b/test/rpc/test_rpc.py
index ce9e8b3a00008f..80a3ea8c1aaf7a 100644
--- a/test/rpc/test_rpc.py
+++ b/test/rpc/test_rpc.py
@@ -25,7 +25,7 @@
 
 
 def worker_name(rank):
-    return "worker{}".format(rank)
+    return f"worker{rank}"
 
 
 def paddle_add(a, b):
@@ -80,7 +80,7 @@ def test_two_server_async_paddle_add(self):
 class TestSingleProcessRpc(RpcTestBase):
     def setUp(self):
         self._port_set = set()
-        master_endpoint = "127.0.0.1:{}".format(self._find_free_port())
+        master_endpoint = f"127.0.0.1:{self._find_free_port()}"
         dist.rpc.init_rpc(worker_name(0), 0, 1, master_endpoint)
         print("Single Process RPC setUp...")
 
diff --git a/test/rpc/test_rpc_base.py b/test/rpc/test_rpc_base.py
index 06d9b11db66089..1cde3b51d734be 100644
--- a/test/rpc/test_rpc_base.py
+++ b/test/rpc/test_rpc_base.py
@@ -25,7 +25,7 @@
 
 
 def worker_name(rank):
-    return "worker{}".format(rank)
+    return f"worker{rank}"
 
 
 def run_rpc_sync(
@@ -143,7 +143,7 @@ def __free_port():
     def run_rpc(self, sync, world_size, fn, fn_args=None, fn_kwargs=None):
         self.processes = []
         queues = []
-        master_endpoint = "127.0.0.1:{}".format(self._find_free_port())
+        master_endpoint = f"127.0.0.1:{self._find_free_port()}"
         for rank in range(world_size):
             q = Queue()
             queues.append(q)
@@ -229,7 +229,7 @@ def remove_data(self):
         os.remove("rpc_launch_data2.npy")
 
     def launch_rpc(self, nnodes, nproc_per_node, model_file):
-        master_endpoint = "127.0.0.1:{}".format(self._find_free_port())
+        master_endpoint = f"127.0.0.1:{self._find_free_port()}"
         log_dir = "log"
         tr_cmd = "python -m paddle.distributed.launch --master {} --rank {} --nnodes {} --nproc_per_node {} --run_mode rpc {} --log_dir {}"
         cmds = [
diff --git a/tools/CheckPRTemplate.py b/tools/CheckPRTemplate.py
index 8812cb597bf9e1..d8580e79405855 100644
--- a/tools/CheckPRTemplate.py
+++ b/tools/CheckPRTemplate.py
@@ -54,14 +54,14 @@ def parameter_accuracy(body):
         value = PR_dic[key].strip().split(',')
         single_mess = ''
         if len(value) == 1 and value[0] == '':
-            message += '%s should be in %s. but now is None.' % (key, test_list)
+            message += f'{key} should be in {test_list}. but now is None.'
         else:
             for i in value:
                 i = i.strip().lower()
                 if i not in test_list_lower:
                     single_mess += '%s.' % i
             if len(single_mess) != 0:
-                message += '%s should be in %s. but now is [%s].' % (
+                message += '{} should be in {}. but now is [{}].'.format(
                     key,
                     test_list,
                     single_mess,
@@ -120,7 +120,7 @@ def pull_request_event_template(event, repo, *args, **kwargs):
         check_pr_template, check_pr_template_message = checkPRTemplate(
             repo, BODY, CHECK_TEMPLATE
         )
-        print("check_pr_template: %s pr: %s" % (check_pr_template, pr_num))
+        print(f"check_pr_template: {check_pr_template} pr: {pr_num}")
         if check_pr_template is False:
             print("ERROR MESSAGE:", check_pr_template_message)
             sys.exit(7)
diff --git a/tools/CrossStackProfiler/CspFileReader.py b/tools/CrossStackProfiler/CspFileReader.py
index db43cc536186a6..fbf7d7f9c6fb11 100755
--- a/tools/CrossStackProfiler/CspFileReader.py
+++ b/tools/CrossStackProfiler/CspFileReader.py
@@ -116,7 +116,7 @@ def _checkArgsKey(self, key, type):
                 % (key, type)
             )
 
-        exec("self._%s = self._args[\"%s\"]" % (key, key))
+        exec(f"self._{key} = self._args[\"{key}\"]")
 
     def _align_ts(self, ts):
         return ts - self._minTimeStamp
diff --git a/tools/analysisPyXml.py b/tools/analysisPyXml.py
index 99c9959aa25026..f6c86619998aa9 100644
--- a/tools/analysisPyXml.py
+++ b/tools/analysisPyXml.py
@@ -21,13 +21,13 @@
 
 
 def analysisPyXml(rootPath, ut):
-    xml_path = '%s/build/pytest/%s/python-coverage.xml' % (rootPath, ut)
-    related_ut_map_file = '%s/build/ut_map/%s/related_%s.txt' % (
+    xml_path = f'{rootPath}/build/pytest/{ut}/python-coverage.xml'
+    related_ut_map_file = '{}/build/ut_map/{}/related_{}.txt'.format(
         rootPath,
         ut,
         ut,
     )
-    notrelated_ut_map_file = '%s/build/ut_map/%s/notrelated_%s.txt' % (
+    notrelated_ut_map_file = '{}/build/ut_map/{}/notrelated_{}.txt'.format(
         rootPath,
         ut,
         ut,
@@ -44,7 +44,7 @@ def analysisPyXml(rootPath, ut):
             line_hits = int(line.attrib.get('hits'))
             if line_hits != 0:
                 line_number = int(line.attrib.get('number'))
-                command = 'sed -n %sp %s' % (line_number, clazz_filename)
+                command = f'sed -n {line_number}p {clazz_filename}'
                 _code, output = commands.getstatusoutput(command)
                 if _code == 0:
                     if not output.strip().startswith(
@@ -83,11 +83,9 @@ def analysisPyXml(rootPath, ut):
             else:
                 coverageMessage = 'NOT_RELATED'
         if coverageMessage in ['NOT_RELATED', 'ERROR', 'FILTER']:
-            os.system(
-                'echo %s >> %s' % (clazz_filename, notrelated_ut_map_file)
-            )
+            os.system(f'echo {clazz_filename} >> {notrelated_ut_map_file}')
         elif coverageMessage == 'RELATED':
-            os.system('echo %s >> %s' % (clazz_filename, related_ut_map_file))
+            os.system(f'echo {clazz_filename} >> {related_ut_map_file}')
 
     print("============len(pyCov_file)")
     print(len(pyCov_file))
diff --git a/tools/check_op_benchmark_result.py b/tools/check_op_benchmark_result.py
index 07fab0ff6d354c..4396734a854913 100644
--- a/tools/check_op_benchmark_result.py
+++ b/tools/check_op_benchmark_result.py
@@ -29,7 +29,7 @@ def parse_case_name(log_file_name):
     case_id, case_info = log_file_name.split("-")
     direction = case_info.split(".")[0].split("_")[-1]
 
-    return "%s (%s)" % (case_id, direction)
+    return f"{case_id} ({direction})"
 
 
 def parse_log_file(log_file):
@@ -144,7 +144,7 @@ def update_api_info_file(fail_case_list, api_info_file):
             line_list = line.split(',')
             case = line_list[0].split(':')[0]
             if case in fail_case_dict:
-                line_list[0] = "%s:%s" % (case, fail_case_dict[case])
+                line_list[0] = f"{case}:{fail_case_dict[case]}"
                 api_info_list.append(','.join(line_list))
 
     # update api info file
diff --git a/tools/check_op_desc.py b/tools/check_op_desc.py
index 7c952460d55458..ce234589564a2c 100644
--- a/tools/check_op_desc.py
+++ b/tools/check_op_desc.py
@@ -321,15 +321,15 @@ def print_desc_error_message(error_message):
         "Op desc error for the changes of Inputs/Outputs/Attrs of OPs:\n"
     )
     for op_name in error_message:
-        print("For OP '{}':".format(op_name))
+        print(f"For OP '{op_name}':")
 
         # 1. print inputs error message
         Inputs_error = error_message.get(op_name, {}).get(INPUTS, {})
         for name in Inputs_error.get(ADD_DISPENSABLE, {}):
-            print(" * The added Input '{}' is not dispensable.".format(name))
+            print(f" * The added Input '{name}' is not dispensable.")
 
         for name in Inputs_error.get(DELETE, {}):
-            print(" * The Input '{}' is deleted.".format(name))
+            print(f" * The Input '{name}' is deleted.")
 
         for name in Inputs_error.get(CHANGE, {}):
             changed_args = Inputs_error.get(CHANGE, {}).get(name, {})
@@ -358,10 +358,10 @@ def print_desc_error_message(error_message):
         # 2. print outputs error message
         Outputs_error = error_message.get(op_name, {}).get(OUTPUTS, {})
         for name in Outputs_error.get(ADD_DISPENSABLE, {}):
-            print(" * The added Output '{}' is not dispensable.".format(name))
+            print(f" * The added Output '{name}' is not dispensable.")
 
         for name in Outputs_error.get(DELETE, {}):
-            print(" * The Output '{}' is deleted.".format(name))
+            print(f" * The Output '{name}' is deleted.")
 
         for name in Outputs_error.get(CHANGE, {}):
             changed_args = Outputs_error.get(CHANGE, {}).get(name, {})
@@ -390,12 +390,10 @@ def print_desc_error_message(error_message):
         # 3. print attrs error message
         attrs_error = error_message.get(op_name, {}).get(ATTRS, {})
         for name in attrs_error.get(ADD_WITH_DEFAULT, {}):
-            print(
-                " * The added attr '{}' doesn't set default value.".format(name)
-            )
+            print(f" * The added attr '{name}' doesn't set default value.")
 
         for name in attrs_error.get(DELETE, {}):
-            print(" * The attr '{}' is deleted.".format(name))
+            print(f" * The attr '{name}' is deleted.")
 
         for name in attrs_error.get(CHANGE, {}):
             changed_args = attrs_error.get(CHANGE, {}).get(name, {})
@@ -430,7 +428,7 @@ def print_version_error_message(error_message):
         "Operator registration error for the changes of Inputs/Outputs/Attrs of OPs:\n"
     )
     for op_name in error_message:
-        print("For OP '{}':".format(op_name))
+        print(f"For OP '{op_name}':")
 
         # 1. print inputs error message
         inputs_error = error_message.get(op_name, {}).get(INPUTS, {})
diff --git a/tools/check_ut.py b/tools/check_ut.py
index b91f762c33dd12..48d887f246e53c 100644
--- a/tools/check_ut.py
+++ b/tools/check_ut.py
@@ -48,7 +48,7 @@ def check(self, filename, msg):
         with open(filename) as f:
             for l in f:
                 if l.rstrip('\r\n') == user:
-                    print('{} {}'.format(user, msg))
+                    print(f'{user} {msg}')
 
 
 if __name__ == '__main__':
diff --git a/tools/continuous_integration/bisect.py b/tools/continuous_integration/bisect.py
index bcfe3e90150dcb..cc1e6cd1b80406 100644
--- a/tools/continuous_integration/bisect.py
+++ b/tools/continuous_integration/bisect.py
@@ -66,13 +66,13 @@
 args = parser.parse_args()
 
 if not args.log_file:
-    args.log_file = '/tmp/%s...%s.log' % (args.good_commit, args.bad_commit)
+    args.log_file = f'/tmp/{args.good_commit}...{args.bad_commit}.log'
 
 
 def print_arguments():
     print('-----------  Configuration Arguments -----------')
     for arg, value in sorted(vars(args).iteritems()):
-        print('%s: %s' % (arg, value))
+        print(f'{arg}: {value}')
     print('------------------------------------------------')
 
 
@@ -128,12 +128,12 @@ def print_arguments():
     try:
         subprocess.check_output([cmd], shell=True)
     except subprocess.CalledProcessError as e:
-        sys.stderr.write('failed to build commit: %s\n%s\n' % (pick, e))
+        sys.stderr.write(f'failed to build commit: {pick}\n{e}\n')
         sys.exit()
     # test the selected branch.
     passed = True
     try:
-        cmd = 'ctest --repeat-until-fail %s -R %s >> %s' % (
+        cmd = 'ctest --repeat-until-fail {} -R {} >> {}'.format(
             args.test_times,
             args.test_target,
             args.log_file,
@@ -143,7 +143,7 @@ def print_arguments():
     except subprocess.CalledProcessError as e:
         passed = False
         last_culprit = pick
-    sys.stdout.write('eval %s passed: %s\n' % (pick, passed))
+    sys.stdout.write(f'eval {pick} passed: {passed}\n')
     if passed:
         if pick_idx == 0:
             break
diff --git a/tools/count_api_without_core_ops.py b/tools/count_api_without_core_ops.py
index 58095d517bb639..0a5da2a8fa5d49 100644
--- a/tools/count_api_without_core_ops.py
+++ b/tools/count_api_without_core_ops.py
@@ -111,7 +111,7 @@ def visit_member(parent_name, member, func):
         return
     else:
         raise RuntimeError(
-            "Unsupported generate signature of member, type {0}".format(
+            "Unsupported generate signature of member, type {}".format(
                 str(type(member))
             )
         )
diff --git a/tools/coverage/coverage_diff.py b/tools/coverage/coverage_diff.py
index 4ba3a6758e5eb4..ebb9049c5d761a 100644
--- a/tools/coverage/coverage_diff.py
+++ b/tools/coverage/coverage_diff.py
@@ -103,12 +103,12 @@ def get_info_file_lines(info_file, diff_file):
                 continue
 
             elif line.startswith('LF:'):
-                print('LF:{}'.format(current_lf))
+                print(f'LF:{current_lf}')
 
                 continue
 
             elif line.startswith('LH:'):
-                print('LH:{}'.format(current_lh))
+                print(f'LH:{current_lh}')
 
                 continue
 
diff --git a/tools/coverage/coverage_lines.py b/tools/coverage/coverage_lines.py
index 60df3d0c10addf..d1afc7b645d110 100644
--- a/tools/coverage/coverage_lines.py
+++ b/tools/coverage/coverage_lines.py
@@ -60,7 +60,7 @@ def get_lines(info_file):
     expected = float(sys.argv[2])
 
     if not os.path.isfile(info_file):
-        print('info file {} is not exists, ignored'.format(info_file))
+        print(f'info file {info_file} is not exists, ignored')
         sys.exit()
 
     actual = get_lines(info_file)
diff --git a/tools/coverage/gcda_clean.py b/tools/coverage/gcda_clean.py
index bf3ef6b7333645..96f5b54a854eea 100644
--- a/tools/coverage/gcda_clean.py
+++ b/tools/coverage/gcda_clean.py
@@ -80,7 +80,7 @@ def clean(pull_id):
     changed = []
 
     for file in get_files(pull_id):
-        changed.append('/paddle/build/{}.gcda'.format(file))
+        changed.append(f'/paddle/build/{file}.gcda')
 
     for parent, dirs, files in os.walk('/paddle/build/'):
         for gcda in files:
diff --git a/tools/coverage/pull_request.py b/tools/coverage/pull_request.py
index 508c2fb407b463..dc776999552ba1 100644
--- a/tools/coverage/pull_request.py
+++ b/tools/coverage/pull_request.py
@@ -53,7 +53,7 @@ def get_files(args):
     pull = get_pull(args.pull_id)
 
     for file in pull.get_files():
-        print('/paddle/{}'.format(file.filename))
+        print(f'/paddle/{file.filename}')
 
 
 def diff(args):
@@ -68,7 +68,7 @@ def diff(args):
     pull = get_pull(args.pull_id)
 
     for file in pull.get_files():
-        print('+++ {}'.format(file.filename))
+        print(f'+++ {file.filename}')
         print(file.patch)
 
 
diff --git a/tools/coverage/python_coverage.py b/tools/coverage/python_coverage.py
index 906f04e9ba457c..7132b119b4c0e4 100644
--- a/tools/coverage/python_coverage.py
+++ b/tools/coverage/python_coverage.py
@@ -40,7 +40,7 @@
         continue
 
     print('TN:')
-    print('SF:{}'.format(clazz_filename))
+    print(f'SF:{clazz_filename}')
 
     branch_index = 0
 
@@ -77,6 +77,6 @@
                     )
                     branch_index += 1
 
-        print('DA:{},{}'.format(line_number, line_hits))
+        print(f'DA:{line_number},{line_hits}')
 
     print('end_of_record')
diff --git a/tools/externalError/spider.py b/tools/externalError/spider.py
index 50e4a3815b8ff4..7f57f81b584e53 100644
--- a/tools/externalError/spider.py
+++ b/tools/externalError/spider.py
@@ -80,7 +80,7 @@ def parsing(externalErrorDesc):
                 _Messages.code = int(m_type[1], 16)
             else:
                 raise ValueError
-        _Messages.message = "'%s'. %s" % (m_type[0], m_message)
+        _Messages.message = f"'{m_type[0]}'. {m_message}"
     print("End crawling errorMessage for nvidia CUDA API!\n")
 
     # ***********************************************************************************************#
@@ -111,7 +111,7 @@ def parsing(externalErrorDesc):
                 _Messages.code = int(m_type[1], 16)
             else:
                 raise ValueError
-        _Messages.message = "'%s'. %s" % (m_type[0], m_message)
+        _Messages.message = f"'{m_type[0]}'. {m_message}"
     print("End crawling errorMessage for nvidia CURAND API!\n")
 
     # **************************************************************************************************#
@@ -189,7 +189,7 @@ def parsing(externalErrorDesc):
 
         _Messages = allMessageDesc.messages.add()
         _Messages.code = int(cudnnStatus_t[error[0]])
-        _Messages.message = "'%s'. %s" % (error[0], m_message)
+        _Messages.message = f"'{error[0]}'. {m_message}"
     print("End crawling errorMessage for nvidia CUDNN API!\n")
 
     # *************************************************************************************************#
@@ -243,7 +243,7 @@ def parsing(externalErrorDesc):
 
         _Messages = allMessageDesc.messages.add()
         _Messages.code = int(cublasStatus_t[error[0]])
-        _Messages.message = "'%s'. %s" % (error[0], m_message)
+        _Messages.message = f"'{error[0]}'. {m_message}"
     print("End crawling errorMessage for nvidia CUBLAS API!\n")
 
     # *************************************************************************************************#
@@ -320,7 +320,7 @@ def parsing(externalErrorDesc):
 
         _Messages = allMessageDesc.messages.add()
         _Messages.code = int(cusolverStatus_t[error[0]])
-        _Messages.message = "'%s'. %s" % (error[0], m_message)
+        _Messages.message = f"'{error[0]}'. {m_message}"
     print("End crawling errorMessage for nvidia CUSOLVER API!\n")
 
     # **********************************************************************************************#
@@ -339,7 +339,7 @@ def parsing(externalErrorDesc):
         m_message = re.sub(r'\n', '', error[2])
         _Messages = allMessageDesc.messages.add()
         _Messages.code = int(error[1])
-        _Messages.message = "'%s'. %s" % (error[0], m_message)
+        _Messages.message = f"'{error[0]}'. {m_message}"
     print("End crawling errorMessage for nvidia NCCL API!\n")
 
     # *************************************************************************************************#
@@ -361,7 +361,7 @@ def handle_data(self, data):
                     status, code, desc = re.split('=|//', line.strip())
                     _Messages = allMessageDesc.messages.add()
                     _Messages.code = int(code.strip(' ,'))
-                    _Messages.message = "'%s'. %s" % (
+                    _Messages.message = "'{}'. {}".format(
                         status.strip(),
                         desc.strip(),
                     )
diff --git a/tools/get_pr_ut.py b/tools/get_pr_ut.py
index 91b3cb4d927dcd..380dcb14d9041a 100644
--- a/tools/get_pr_ut.py
+++ b/tools/get_pr_ut.py
@@ -64,7 +64,7 @@ def init(self):
             try:
                 commits = self.pr.get_commits().get_page(ix)
                 if len(commits) == 0:
-                    raise ValueError("no commit found in {} page".format(ix))
+                    raise ValueError(f"no commit found in {ix} page")
                 last_commit = commits[-1].commit
             except Exception as e:
                 break
@@ -87,7 +87,7 @@ def __wget_with_retry(self, url):
                 else:
                     proxy = '--no-proxy'
             code = subprocess.call(
-                'wget -q {} --no-check-certificate {}'.format(proxy, url),
+                f'wget -q {proxy} --no-check-certificate {url}',
                 shell=True,
             )
             if code == 0:
@@ -265,12 +265,12 @@ def is_only_comment(self, f):
         for l in diff_lines:
             if l not in comment_lines:
                 return False
-        print('PREC {} is only comment'.format(f))
+        print(f'PREC {f} is only comment')
         return True
 
     def get_all_count(self):
         p = subprocess.Popen(
-            "cd {}build && ctest -N".format(PADDLE_ROOT),
+            f"cd {PADDLE_ROOT}build && ctest -N",
             shell=True,
             stdout=subprocess.PIPE,
         )
diff --git a/tools/get_single_test_cov.py b/tools/get_single_test_cov.py
index ba95a59fc1a81d..e69283aea63a71 100644
--- a/tools/get_single_test_cov.py
+++ b/tools/get_single_test_cov.py
@@ -23,7 +23,7 @@
 def getFNDAFile(rootPath, test):
     # load base fnda
     fnda_base_dict = {}
-    find_file_cmd = os.popen("find %s -name %s.cc" % (rootPath, test))
+    find_file_cmd = os.popen(f"find {rootPath} -name {test}.cc")
     if find_file_cmd.read() != "":
         print("%s is a c++ unittest" % test)
         with open(
@@ -32,8 +32,8 @@ def getFNDAFile(rootPath, test):
         ) as load_f:
             fnda_base_dict = json.load(load_f)
     # analyse fnda
-    filename = '%s/build/ut_map/%s/coverage.info.tmp' % (rootPath, test)
-    fn_filename = '%s/build/ut_map/%s/fnda.tmp' % (rootPath, test)
+    filename = f'{rootPath}/build/ut_map/{test}/coverage.info.tmp'
+    fn_filename = f'{rootPath}/build/ut_map/{test}/fnda.tmp'
     os.system('touch %s' % fn_filename)
     try:
         f = open(filename)
@@ -45,7 +45,7 @@ def getFNDAFile(rootPath, test):
     del all_data[0]
     for gcov_data in all_data:
         message_list = gcov_data.split('\n')
-        os.system('echo %s >> %s' % (message_list[1], fn_filename))
+        os.system(f'echo {message_list[1]} >> {fn_filename}')
         if 'FNH:0' not in gcov_data:
             for message in message_list:
                 if message.startswith('FNDA:') and (
@@ -56,23 +56,23 @@ def getFNDAFile(rootPath, test):
                     symbol = tmp_data[1]
                     if symbol in fnda_base_dict:
                         if (hit - fnda_base_dict[symbol]) > 0:
-                            fnda_str = 'FNDA:%s,%s' % (
+                            fnda_str = 'FNDA:{},{}'.format(
                                 str(hit - fnda_base_dict[symbol]),
                                 symbol,
                             )
-                            os.system('echo %s >> %s' % (fnda_str, fn_filename))
+                            os.system(f'echo {fnda_str} >> {fn_filename}')
                     else:
-                        os.system('echo %s >> %s' % (message, fn_filename))
+                        os.system(f'echo {message} >> {fn_filename}')
     f.close()
 
 
 def analysisFNDAFile(rootPath, test):
-    related_ut_map_file = '%s/build/ut_map/%s/related_%s.txt' % (
+    related_ut_map_file = '{}/build/ut_map/{}/related_{}.txt'.format(
         rootPath,
         test,
         test,
     )
-    notrelated_ut_map_file = '%s/build/ut_map/%s/notrelated_%s.txt' % (
+    notrelated_ut_map_file = '{}/build/ut_map/{}/notrelated_{}.txt'.format(
         rootPath,
         test,
         test,
@@ -88,12 +88,10 @@ def analysisFNDAFile(rootPath, test):
             % (related_ut_map_file, related_ut_map_file)
         )
     else:
-        print(
-            "make %s and %s failed" % (related_ut_map_file, related_ut_map_file)
-        )
+        print(f"make {related_ut_map_file} and {related_ut_map_file} failed")
         return
 
-    fn_filename = '%s/build/ut_map/%s/fnda.tmp' % (rootPath, test)
+    fn_filename = f'{rootPath}/build/ut_map/{test}/fnda.tmp'
     try:
         f = open(fn_filename)
         print("oepn %s successfully" % fn_filename)
@@ -125,13 +123,9 @@ def analysisFNDAFile(rootPath, test):
                     break
             if not OP_REGIST:
                 related_file_list.append(clazz_filename)
-                os.system(
-                    'echo %s >> %s' % (clazz_filename, related_ut_map_file)
-                )
+                os.system(f'echo {clazz_filename} >> {related_ut_map_file}')
             else:
-                os.system(
-                    'echo %s >> %s' % (clazz_filename, notrelated_ut_map_file)
-                )
+                os.system(f'echo {clazz_filename} >> {notrelated_ut_map_file}')
         else:
             if clazz_filename != '':
                 if (
@@ -145,7 +139,7 @@ def analysisFNDAFile(rootPath, test):
 
 
 def getBaseFnda(rootPath, test):
-    filename = '%s/build/ut_map/%s/coverage.info.tmp' % (rootPath, test)
+    filename = f'{rootPath}/build/ut_map/{test}/coverage.info.tmp'
     try:
         f = open(filename)
         print("oepn %s successfully" % filename)
@@ -166,12 +160,12 @@ def getBaseFnda(rootPath, test):
                     symbol_fnda[tmp_data[1]] = int(tmp_data[0])
     f.close()
 
-    with open("%s/build/ut_map/%s/base_fnda.json" % (rootPath, test), "w") as f:
+    with open(f"{rootPath}/build/ut_map/{test}/base_fnda.json", "w") as f:
         json.dump(symbol_fnda, f, indent=4)
 
 
 def getCovinfo(rootPath, test):
-    ut_map_path = '%s/build/ut_map/%s' % (rootPath, test)
+    ut_map_path = f'{rootPath}/build/ut_map/{test}'
     print("start get fluid ===>")
     cmd_fluid = (
         'cd %s && lcov --capture -d paddle/fluid/ -o paddle/fluid/coverage_fluid.info --rc lcov_branch_coverage=0'
diff --git a/tools/get_ut_file_map.py b/tools/get_ut_file_map.py
index 1541efaa584ee4..e91f2a263fc7ab 100644
--- a/tools/get_ut_file_map.py
+++ b/tools/get_ut_file_map.py
@@ -72,17 +72,17 @@ def handle_ut_file_map(rootPath):
             all_ut_list.append(ut.strip())
         f.close()
     for ut in all_ut_list:
-        filedir = '%s/build/ut_map/%s' % (rootPath, ut)
+        filedir = f'{rootPath}/build/ut_map/{ut}'
         if not os.path.exists(filedir):
             not_success_file.write('%s\n' % ut)
             utNotSuccess_list.append(ut)
     # if fnda.tmp not exists,write the test into prec_delta
     for ut in files:
         count = count + 1
-        print("ut %s: %s" % (count, ut))
-        coverage_info = '%s/%s/fnda.tmp' % (ut_map_path, ut)
+        print(f"ut {count}: {ut}")
+        coverage_info = f'{ut_map_path}/{ut}/fnda.tmp'
         if os.path.exists(coverage_info):
-            filename = '%s/%s/related_%s.txt' % (ut_map_path, ut, ut)
+            filename = f'{ut_map_path}/{ut}/related_{ut}.txt'
             try:
                 f = open(filename)
                 print("oepn %s succesfully" % filename)
@@ -118,7 +118,7 @@ def handle_ut_file_map(rootPath):
 
     for ut in files:
         if ut not in utNotSuccess_list:
-            filename = '%s/%s/notrelated_%s.txt' % (ut_map_path, ut, ut)
+            filename = f'{ut_map_path}/{ut}/notrelated_{ut}.txt'
             try:
                 f = open(filename)
                 print("oepn %s succesfully" % filename)
@@ -150,7 +150,7 @@ def notsuccessfuc(rootPath):
     for ut in files:
         if ut == 'simple_precise_test':
             continue
-        coverage_info = '%s/%s/fnda.tmp' % (ut_map_path, ut)
+        coverage_info = f'{ut_map_path}/{ut}/fnda.tmp'
         if os.path.exists(coverage_info):
             pass
         else:
diff --git a/tools/get_ut_mem_map.py b/tools/get_ut_mem_map.py
index 12bdc1d49153f6..7d0b7ce9dfad79 100644
--- a/tools/get_ut_mem_map.py
+++ b/tools/get_ut_mem_map.py
@@ -25,7 +25,7 @@ def get_ut_mem(rootPath):
                 continue
             ut = f.replace('^', '').replace('$.log', '')
             case_dic[ut] = {}
-            filename = '%s/%s' % (parent, f)
+            filename = f'{parent}/{f}'
             fi = open(filename, mode='rb')
             lines = fi.readlines()
             mem_reserved1 = -1
diff --git a/tools/handle_h_cu_file.py b/tools/handle_h_cu_file.py
index 377ca0fa123503..a11c3c3aa81ffc 100644
--- a/tools/handle_h_cu_file.py
+++ b/tools/handle_h_cu_file.py
@@ -48,7 +48,7 @@ def get_h_file_md5(rootPath):
     lines = f.readlines()
     for line in lines:
         line = line.strip()
-        os.system('md5sum %s >> %s/tools/h_cu_md5.log' % (line, rootPath))
+        os.system(f'md5sum {line} >> {rootPath}/tools/h_cu_md5.log')
 
 
 def insert_pile_to_h_file(rootPath):
@@ -58,8 +58,8 @@ def insert_pile_to_h_file(rootPath):
     for line in lines:
         line = line.strip()
         func = line.replace('/', '_').replace('.', '_')
-        os.system('echo "\n#ifndef _PRECISE%s_\n" >> %s' % (func.upper(), line))
-        os.system('echo "#define _PRECISE%s_" >> %s' % (func.upper(), line))
+        os.system(f'echo "\n#ifndef _PRECISE{func.upper()}_\n" >> {line}')
+        os.system(f'echo "#define _PRECISE{func.upper()}_" >> {line}')
         os.system('echo "\n#include <cstdio>\n" >> %s' % line)
         os.system(
             'echo "__attribute__((constructor)) static void calledFirst%s()\n{" >> %s'
@@ -115,7 +115,7 @@ def get_h_cu_file(file_path):
     dir_path = file_path[1]
     filename = file_path[2]
     ut = filename.replace('^', '').replace('$', '').replace('.log', '')
-    ut_path = "%s/build/ut_map/%s" % (rootPath, ut)
+    ut_path = f"{rootPath}/build/ut_map/{ut}"
     if os.path.exists(ut_path):
         os.system(
             "cat %s/%s | grep 'precise test map fileeee:'| uniq >> %s/build/ut_map/%s/related_%s.txt"
diff --git a/tools/parse_kernel_info.py b/tools/parse_kernel_info.py
index ef85a0bb4bbd6f..ad5a696bc626c7 100644
--- a/tools/parse_kernel_info.py
+++ b/tools/parse_kernel_info.py
@@ -74,7 +74,7 @@ def __str__(self):
         res = "{ "
         num_floats = int(self.num_ops_for_dtypes["float32"])
         for dtype, num in self.num_ops_for_dtypes.items():
-            res += "{}: {:4d}".format(dtype, num)
+            res += f"{dtype}: {num:4d}"
             if dtype in ["float16", "bfloat16"]:
                 if num_floats != 0:
                     percent = float(self.num_ops_for_dtypes[dtype]) / float(
@@ -82,7 +82,7 @@ def __str__(self):
                     )
                     res += "({:.2f}%)".format(percent * 100)
                 else:
-                    res += "({:.2f}%)".format(0)
+                    res += f"({0:.2f}%)"
             res += " "
         res += "}"
         return res
@@ -159,9 +159,9 @@ def main(lib):
         print(
             "==================================   phi kernels summary   =================================="
         )
-        print("phi function  kernels : {}".format(phi_function_kernels_stats))
-        print("phi structure kernels : {}".format(phi_structure_kernels_stats))
-        print("phi all       kernels : {}".format(phi_all_kernels_stats))
+        print(f"phi function  kernels : {phi_function_kernels_stats}")
+        print(f"phi structure kernels : {phi_structure_kernels_stats}")
+        print(f"phi all       kernels : {phi_all_kernels_stats}")
         print("")
     else:
         fluid_ops_stats = parse_paddle_kernels(lib, "fluid", print_detail=False)
@@ -172,9 +172,9 @@ def main(lib):
         print(
             "================================== fluid operators summary =================================="
         )
-        print("fluid operators : {}".format(fluid_ops_stats))
-        print("phi   operators : {}".format(phi_ops_stats))
-        print("all   operators : {}".format(all_ops_stats))
+        print(f"fluid operators : {fluid_ops_stats}")
+        print(f"phi   operators : {phi_ops_stats}")
+        print(f"all   operators : {all_ops_stats}")
         print("")
 
 
diff --git a/tools/print_signatures.py b/tools/print_signatures.py
index b816f5380f6d56..6f4673c2a6ed90 100644
--- a/tools/print_signatures.py
+++ b/tools/print_signatures.py
@@ -388,12 +388,12 @@ def parse_args():
                 continue
             api_info = api_info_dict[all_api_names_to_k[api_name]]
             print(
-                "{0} ({2}, ('document', '{1}'))".format(
+                "{} ({}, ('document', '{}'))".format(
                     api_name,
-                    md5(api_info['docstring']),
                     api_info['signature']
                     if 'signature' in api_info
                     else 'ArgSpec()',
+                    md5(api_info['docstring']),
                 )
             )
 
@@ -402,7 +402,7 @@ def parse_args():
     else:
         for erroritem in ErrorSet:
             print(
-                "Error, new function {} is unreachable".format(erroritem),
+                f"Error, new function {erroritem} is unreachable",
                 file=sys.stderr,
             )
         sys.exit(1)
diff --git a/tools/pyCov_multithreading.py b/tools/pyCov_multithreading.py
index 073d5d34b61143..b3b4fb328dd945 100644
--- a/tools/pyCov_multithreading.py
+++ b/tools/pyCov_multithreading.py
@@ -48,13 +48,11 @@ def getPyCovResult(params):
     ut = params[1]
     print("ut: %s" % ut)
     startTime = int(time.time())
-    path = '%s/build/pytest/%s' % (rootPath, ut)
+    path = f'{rootPath}/build/pytest/{ut}'
     os.system('cd %s && coverage combine `ls python-coverage.data.*`' % path)
     os.system('cd %s && pwd && coverage xml -i -o python-coverage.xml' % path)
     xml_path = '%s/python-coverage.xml' % path
-    os.system(
-        "python2.7 %s/tools/analysisPyXml.py %s %s" % (rootPath, rootPath, ut)
-    )
+    os.system(f"python2.7 {rootPath}/tools/analysisPyXml.py {rootPath} {ut}")
     endTime = int(time.time())
     print('pyCov Time: %s' % (endTime - startTime))
 
diff --git a/tools/sampcd_processor.py b/tools/sampcd_processor.py
index 747f2020f64089..21697b8a5d7b31 100644
--- a/tools/sampcd_processor.py
+++ b/tools/sampcd_processor.py
@@ -381,7 +381,7 @@ def sampcd_extract_to_file(srccom, name, htype="def", hname=""):
                 SAMPLECODE_TEMPDIR,
                 '{}_example{}'.format(
                     name,
-                    '.py' if len(codeblocks) == 1 else '_{}.py'.format(y + 1),
+                    '.py' if len(codeblocks) == 1 else f'_{y + 1}.py',
                 ),
             )
             with open(tfname, 'w') as tempf:
@@ -780,7 +780,7 @@ def parse_args():
             )
             if args.debug:
                 for k, v in timeovered_test.items():
-                    logger.info('{} - {}s'.format(k, v))
+                    logger.info(f'{k} - {v}s')
         if len(SUMMARY_INFO['success']):
             logger.info(
                 "%d sample codes ran success", len(SUMMARY_INFO['success'])
diff --git a/tools/summary_env.py b/tools/summary_env.py
index 2cd4ade4b06c94..42019c280d666e 100644
--- a/tools/summary_env.py
+++ b/tools/summary_env.py
@@ -61,7 +61,7 @@ def get_os_info():
     else:
         plat = 'N/A'
         ver = 'N/A'
-    envs['os_info'] = "{0} {1}".format(plat, ver)
+    envs['os_info'] = f"{plat} {ver}"
 
 
 def get_gcc_version():
@@ -164,7 +164,7 @@ def _get_cudnn_ver(cmd):
     )
 
     if major != 'N/A':
-        envs['cudnn_version'] = "{0}.{1}.{2}".format(major, minor, patch_level)
+        envs['cudnn_version'] = f"{major}.{minor}.{patch_level}"
     else:
         envs['cudnn_version'] = 'N/A'
 
diff --git a/tools/windows/get_prec_ut_list.py b/tools/windows/get_prec_ut_list.py
index b5bb6a23673ac5..1ed65fd0d5f2fa 100644
--- a/tools/windows/get_prec_ut_list.py
+++ b/tools/windows/get_prec_ut_list.py
@@ -31,7 +31,7 @@ def get_prec_ut_list(all_test_cases, prec_test_cases):
         if case in prec_test_cases_list_new:
             case_to_run.append(case)
         else:
-            print("{} will not run in PRECISION_TEST mode.".format(case))
+            print(f"{case} will not run in PRECISION_TEST mode.")
 
     with open(file_path, 'w') as f:
         f.write('\n'.join(case_to_run))