Fix transition articacts

mike9251 · mike9251 · commit e3ac35e9bac9 · 2022-07-13T11:44:47.000+03:00
* Clip final mask to 0...1
* Added parameter smooth_mask_value
* Added parameter validation
diff --git a/app.py b/app.py
@@ -22,11 +22,6 @@ def __init__(self, config: DictConfig):
         att_video_path = Path(config.data.att_video)
         output_dir = Path(config.data.output_dir)
 
-        device = config.pipeline.device
-
-        crop_size = config.pipeline.crop_size
-        use_mask = True
-
         assert id_image_path.exists(), f"Can't find {id_image_path} file!"
 
         self.id_image: Optional[np.ndarray] = imread_rgb(id_image_path)
@@ -49,10 +44,7 @@ def __init__(self, config: DictConfig):
 
         self.model = SimSwap(config=config.pipeline,
                              id_image=self.id_image,
-                             specific_image=self.specific_id_image,
-                             use_mask=use_mask,
-                             crop_size=crop_size,
-                             device=device)
+                             specific_image=self.specific_id_image)
 
     def run(self):
         for _ in tqdm(range(len(self.data_manager))):
diff --git a/configs/run_image.yaml b/configs/run_image.yaml
@@ -13,9 +13,10 @@ pipeline:
   device: "cuda"
   crop_size: 224
   # it seems that the official 224 checkpoint works better with 'none' face alignment type
-  checkpoint_type: "official_224" #"none
+  checkpoint_type: "official_224" #"none"
   face_alignment_type: "none" #"ffhq"
-  erosion_kernel_size: 4
+  erode_mask_value: 40
+  smooth_mask_value: 41
   face_detector_threshold: 0.6
   specific_latent_match_threshold: 0.05
 
diff --git a/configs/run_image_specific.yaml b/configs/run_image_specific.yaml
@@ -13,9 +13,10 @@ pipeline:
   device: "cuda"
   crop_size: 224
   # it seems that the official 224 checkpoint works better with 'none' face alignment type
-  checkpoint_type: "official_224" #"none
+  checkpoint_type: "official_224" #"none"
   face_alignment_type: "none" #"ffhq"
-  erosion_kernel_size: 4
+  erode_mask_value: 40
+  smooth_mask_value: 41
   face_detector_threshold: 0.6
   specific_latent_match_threshold: 0.05
 
diff --git a/configs/run_video.yaml b/configs/run_video.yaml
@@ -13,9 +13,10 @@ pipeline:
   device: "cuda"
   crop_size: 224
   # it seems that the official 224 checkpoint works better with 'none' face alignment type
-  checkpoint_type: "official_224" #"none
+  checkpoint_type: "official_224" #"none"
   face_alignment_type: "none" #"ffhq"
-  erosion_kernel_size: 4
+  erode_mask_value: 40
+  smooth_mask_value: 41
   face_detector_threshold: 0.6
   specific_latent_match_threshold: 0.05
 
diff --git a/configs/run_video_specific.yaml b/configs/run_video_specific.yaml
@@ -13,9 +13,10 @@ pipeline:
   device: "cuda"
   crop_size: 224
   # it seems that the official 224 checkpoint works better with 'none' face alignment type
-  checkpoint_type: "official_224" #"none
+  checkpoint_type: "official_224" #"none"
   face_alignment_type: "none" #"ffhq"
-  erosion_kernel_size: 4
+  erode_mask_value: 40
+  smooth_mask_value: 41
   face_detector_threshold: 0.6
   specific_latent_match_threshold: 0.05
 
diff --git a/src/Generator/fs_networks_fix.py b/src/Generator/fs_networks_fix.py
@@ -94,7 +94,7 @@ def __init__(self, input_nc: int,
                  latent_size: int,
                  n_blocks: int=6,
                  deep: bool = False,
-                 checkpoint_type: str = 'none',
+                 use_last_act: bool = True,
                  norm_layer: torch.nn.Module = nn.BatchNorm2d,
                  padding_type: str = 'reflect'):
         assert (n_blocks >= 0)
@@ -103,7 +103,7 @@ def __init__(self, input_nc: int,
         activation = nn.ReLU(True)
 
         self.deep = deep
-        self.checkpoint_type = checkpoint_type
+        self.use_last_act = use_last_act
 
         self.to_tensor_normalize = transforms.Compose([
             transforms.ToTensor(),
@@ -159,7 +159,7 @@ def __init__(self, input_nc: int,
             nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1),
             nn.BatchNorm2d(64), activation
         )
-        if self.checkpoint_type == "official_224":
+        if self.use_last_act:
             self.last_layer = nn.Sequential(nn.ReflectionPad2d(3), nn.Conv2d(64, output_nc, kernel_size=7, padding=0),
                                             torch.nn.Tanh())
         else:
@@ -173,7 +173,7 @@ def to(self, device):
         return self
 
     def forward(self, x: Iterable[np.ndarray], dlatents: torch.Tensor):
-        if self.checkpoint_type == "official_224":
+        if self.use_last_act:
             x = [self.to_tensor(_) for _ in x]
         else:
             x = [self.to_tensor_normalize(_) for _ in x]
@@ -202,7 +202,7 @@ def forward(self, x: Iterable[np.ndarray], dlatents: torch.Tensor):
         x = self.up1(x)
         x = self.last_layer(x)
 
-        if self.checkpoint_type == "official_224":
+        if self.use_last_act:
             x = (x + 1) / 2
         else:
             x = x * self.imagenet_std + self.imagenet_mean
diff --git a/src/Misc/types.py b/src/Misc/types.py
@@ -0,0 +1,11 @@
+from enum import Enum
+
+
+class CheckpointType(Enum):
+    OFFICIAL_224 = "official_224"
+    UNOFFICIAL = "none"
+
+
+class FaceAlignmentType(Enum):
+    FFHQ = "ffhq"
+    DEFAULT = "none"
diff --git a/src/Misc/utils.py b/src/Misc/utils.py
@@ -0,0 +1,19 @@
+import torch
+import numpy as np
+
+
+def tensor2img_denorm(tensor):
+    std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
+    mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
+    tensor = std * tensor.detach().cpu() + mean
+    img = tensor.numpy()
+    img = img.transpose(0, 2, 3, 1)[0]
+    img = np.clip(img * 255, 0.0, 255.0).astype(np.uint8)
+    return img
+
+
+def tensor2img(tensor):
+    tensor = tensor.detach().cpu().numpy()
+    img = tensor.transpose(0, 2, 3, 1)[0]
+    img = np.clip(img * 255, 0.0, 255.0).astype(np.uint8)
+    return img
diff --git a/src/simswap.py b/src/simswap.py
@@ -2,6 +2,7 @@
 import numpy as np
 import torch
 import torch.nn.functional as F
+from enum import Enum
 from typing import Optional, Iterable, Tuple, Union
 from pathlib import Path
 from torchvision import transforms
@@ -16,47 +17,62 @@
 from src.PostProcess.utils import SoftErosion
 from src.Generator.fs_networks_fix import Generator_Adain_Upsample as Generator_Adain_Upsample_224
 from src.Generator.fs_networks_512 import Generator_Adain_Upsample as Generator_Adain_Upsample_512
-
-
-def tensor2img_denorm(tensor):
-    std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
-    mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
-    tensor = std * tensor.detach().cpu() + mean
-    img = tensor.numpy()
-    img = img.transpose(0, 2, 3, 1)[0]
-    img = np.clip(img * 255, 0.0, 255.0).astype(np.uint8)
-    return img
-
-
-def tensor2img(tensor):
-    tensor = tensor.detach().cpu().numpy()
-    img = tensor.transpose(0, 2, 3, 1)[0]
-    img = np.clip(img * 255, 0.0, 255.0).astype(np.uint8)
-    return img
+from src.Misc.types import CheckpointType, FaceAlignmentType
+from src.Misc.utils import tensor2img, tensor2img_denorm
 
 
 class SimSwap:
     def __init__(self,
                  config: DictConfig,
                  id_image: np.ndarray,
-                 specific_image: Optional[np.ndarray] = None,
-                 use_mask: bool = True,
-                 crop_size: int = 224,
-                 device: str = 'cpu'):
+                 specific_image: Optional[np.ndarray] = None):
 
         self.id_image: np.ndarray = id_image
         self.id_latent = None
         self.specific_id_image: Optional[np.ndarray] = specific_image
         self.specific_latent = None
 
-        self.use_mask: bool = use_mask
-        self.crop_size: int = crop_size
-        self.checkpoint_type: str = config.checkpoint_type
-        self.face_alignment_type: str = config.face_alignment_type
-        self.erosion_kernel_size: int = config.erosion_kernel_size
+        self.use_mask: bool = True
+        self.crop_size: int = config.crop_size
+        self.checkpoint_type: CheckpointType = CheckpointType(config.checkpoint_type)
+        self.face_alignment_type: FaceAlignmentType = FaceAlignmentType(config.face_alignment_type)
+        self.erode_mask_value: int = config.erode_mask_value
+        self.smooth_mask_value: int = config.smooth_mask_value
         self.face_detector_threshold: float = config.face_detector_threshold
         self.specific_latent_match_th: float = config.specific_latent_match_threshold
-        self.device = torch.device(device)
+        self.device = torch.device(config.device)
+
+        if self.crop_size < 0:
+            raise f'Invalid crop_size! Must be a positive value.'
+
+        if self.checkpoint_type not in (CheckpointType.OFFICIAL_224, CheckpointType.UNOFFICIAL):
+            raise f'Invalid checkpoint_type! Must be one of the predefined values.'
+
+        if self.face_alignment_type not in (FaceAlignmentType.FFHQ, FaceAlignmentType.DEFAULT):
+            raise f'Invalid face_alignment_type! Must be one of the predefined values.'
+
+        self.use_erosion = True
+        if self.erode_mask_value == 0:
+            self.use_erosion = False
+
+        if self.erode_mask_value < 0:
+            raise f'Invalid erode_mask_value! Must be a positive value.'
+
+        self.use_blur = True
+        if self.smooth_mask_value == 0:
+            self.use_erosion = False
+        elif self.smooth_mask_value > 0:
+            # Make sure it's odd
+            self.smooth_mask_value += 1 if self.smooth_mask_value % 2 == 0 else 0
+
+        if self.smooth_mask_value < 0:
+            raise f"Invalid smooth_mask_value! Must be a positive value."
+
+        if self.face_detector_threshold < 0.0 or self.face_detector_threshold > 1.0:
+            raise f"Invalid face_detector_threshold! Must be a positive value in range [0.0...1.0]."
+
+        if self.specific_latent_match_th < 0.0:
+            raise f"Invalid specific_latent_match_th! Must be a positive value."
 
         # For BiSeNet and for official_224 SimSwap
         self.to_tensor_normalize = transforms.Compose([
@@ -69,7 +85,7 @@ def __init__(self,
 
         self.face_detector = FaceDetector(
             Path(config.face_detector_weights),
-            det_thresh=self.face_detector_threshold, det_size=(640, 640), mode="ffhq", device=device)
+            det_thresh=self.face_detector_threshold, det_size=(640, 640), mode="ffhq", device=self.device.__str__())
 
         self.face_id_net = FaceId(Path(config.face_id_weights)).to(self.device)
 
@@ -81,7 +97,7 @@ def __init__(self,
 
         self.simswap_net = Generator_Adain_Upsample_224(input_nc=3, output_nc=3, latent_size=512, n_blocks=9,
                                                         deep=True if self.crop_size == 512 else False,
-                                                        checkpoint_type=self.checkpoint_type)
+                                                        use_last_act=True if self.checkpoint_type == CheckpointType.OFFICIAL_224 else False)
 
         # if crop_size == 224:
         #     self.simswap_net = Generator_Adain_Upsample_224(input_nc=3, output_nc=3, latent_size=512, n_blocks=9,
@@ -207,21 +223,20 @@ def __call__(self, att_image: np.ndarray) -> np.ndarray:
         # Get np.ndarray with range [0...255]
         img_mask = tensor2img(img_mask / 255.0)
 
-        kernel = np.ones((self.erosion_kernel_size, self.erosion_kernel_size), dtype=np.uint8)
-        img_mask = cv2.erode(img_mask, kernel, iterations=1)
-
-        delta = 1 if self.erosion_kernel_size % 2 == 0 else 0
-        kernel_size = (self.erosion_kernel_size + delta, self.erosion_kernel_size + delta)
+        if self.use_erosion:
+            kernel = np.ones((self.erode_mask_value, self.erode_mask_value), dtype=np.uint8)
+            img_mask = cv2.erode(img_mask, kernel, iterations=1)
 
-        img_mask = cv2.GaussianBlur(img_mask, kernel_size, 0)
+        if self.use_blur:
+            img_mask = cv2.GaussianBlur(img_mask, (self.smooth_mask_value, self.smooth_mask_value), 0)
 
         # Collect all swapped crops
         target_image = torch.sum(target_image, dim=0, keepdim=True)
         target_image = tensor2img(target_image)
 
-        img_mask = img_mask // 255
+        img_mask = np.clip(img_mask / 255, 0.0, 1.0)
 
-        result = img_mask * target_image + (1 - img_mask) * att_image
+        result = (img_mask * target_image + (1 - img_mask) * att_image).astype(np.uint8)
 
         # # torch postprocessing
         # # faster but Erosion with 40x40 kernel requires too much memory and causes OOM.