update the Augmentation option for training and README.me

khadijeh.alibabaei · khadijeh.alibabaei · commit 9a6e2657fdcd · 2024-08-19T15:41:50.000+02:00
diff --git a/README.md b/README.md
@@ -231,6 +231,9 @@ deepaas-run --listen-ip 0.0.0.0
 Then, open the Swagger interface, change the hyperparameters in the train section, and click on train.
 
 ><span style="color:Blue">**Note:**</span>  Please note that the model training process may take some time depending on the size of your dataset and the complexity of your custom backbone. Once the model is trained, you can use the API to perform inference on new images.
+
+><span style="color:Blue">**Note:**</span> Augmentation Settings:
+among the training arguments, there are options related to augmentation, such as flipping, scaling, etc. The default values are set to automatically activate some of these options during training. If you want to disable augmentation entirely or partially, please review the default values and adjust them accordingly to deactivate the desired augmentations.
 # Inference Methods
 
 You can utilize the Swagger interface to upload your images or videos and obtain the following outputs:
diff --git a/api/__init__.py b/api/__init__.py
@@ -168,6 +168,7 @@ def train(**args):
                 "mlflow": False,
                 "datasets_dir": config.DATA_PATH,
                 "model_dir": config.MODELS_PATH,
+                "wandb": str(args["disable_wandb"]),
             }
         )
         # Modify the model name based on task type
@@ -210,6 +211,8 @@ def train(**args):
 
         else:
             model = YOLO(args["model"])
+        if "auto_augment" not in args:
+            args["auto_augment"] = None
 
         device = args.get("device", "cpu")
         if device != "cpu" and not torch.cuda.is_available():
@@ -315,15 +318,13 @@ def main():
     args = cmd_parser.parse_args()
 
     main()
-    
 
     """
     python3 api/__init__.py  train --model yolov8n.yaml\
     --task_type  det\
     --data /srv/football-players-detection-7/data.yaml\
     --Enable_MLFLOW --epochs 50
-    
-        python3 api/__init__.py  predict --files \
+    python3 api/__init__.py  predict --files \
     /srv/yolov8_api/tests/data/det/test/cat1.jpg\
     --task_type  det --accept application/json
     """
diff --git a/api/schemas.py b/api/schemas.py
@@ -95,9 +95,9 @@ class Meta:
         validate=validate.Length(max=2),
         metadata={
             "description": "image size as scalar or (h, w) list,"
-            " i.e. (640, 480)"
+            " i.e. (704, 512). Note: must be multiple of max stride 32"
         },
-        load_default=[640,480]
+        load_default=[704, 512],
     )
 
     conf = fields.Float(
@@ -130,12 +130,12 @@ class Meta:
 
     augment = fields.Boolean(
         metadata={
-            "description": "Apply image augmentation to prediction sources"
+            "description": "Apply image augmentation to prediction sources. "
             "augment for segmentation has not supported yet.",
         },
         load_default=False,
     )
-    classes =  fields.List(
+    classes = fields.List(
         fields.Int(),
         metadata={
             "description": "Filter results by class, i.e. class=0, "
@@ -431,61 +431,179 @@ class Meta:
     )
     hsv_h = fields.Float(
         metadata={
-            "description": "Image HSV-Hue augmentation (fraction)"
+            "description": "Augmentation option: adjusts the hue "
+            "of the image by a fraction of"
+            " the color wheel, introducing color "
+            "variability. Helps the model generalize "
+            "across different lighting"
+            " conditions. Range: 0.0 - 1.0"
         },
         load_default=0.015,
     )
     hsv_s = fields.Float(
         metadata={
-            "description": "Image HSV-Saturation augmentation (fraction)"
+            "description": "Augmentation option: Alters the "
+            "saturation of the image by a fraction,"
+            " affecting the intensity of colors. Useful for "
+            "simulating different environmental conditions. "
+            "Range: 0.0 - 1.0"
         },
         load_default=0.7,
     )
     hsv_v = fields.Float(
         metadata={
-            "description": "Image HSV-Value augmentation (fraction)"
+            "description": "Augmentation option: Modifies the value "
+            "(brightness) of the "
+            "image by a fraction, helping the model "
+            "to perform well under various lighting"
+            " conditions. Range: 0.0 - 1.0"
         },
         load_default=0.4,
     )
     degrees = fields.Float(
-        metadata={"description": "Image rotation (+/- deg)"},
-        load_default=0.001,
+        metadata={
+            "description": "Augmentation option: Rotates the"
+            " image randomly within "
+            "the specified degree range, improving"
+            " the model's ability to recognize objects"
+            " at various orientations. Range: -180 - +180"
+        },
+        load_default=0.0,
     )
     translate = fields.Float(
-        metadata={"description": "Image translation (+/- fraction)"},
-        load_default=0.5,
+        metadata={
+            "description": "Augmentation option: Translates the "
+            "image horizontally and"
+            " vertically by a fraction of the image size,"
+            " aiding in learning to detect partially"
+            " visible objects. Range: 0.0 - 1.0"
+        },
+        load_default=0.1,
     )
     scale = fields.Float(
-        metadata={"description": "Image scale (+/- gain)"},
+        metadata={
+            "description": "Augmentation option: Scales the"
+            " image by a gain factor,"
+            " simulating objects at different "
+            "distances from the camera. "
+            "Range: >=0.0"
+        },
         load_default=0.5,
     )
     shear = fields.Float(
-        metadata={"description": "Image shear (+/- deg)"},
-        load_default=0.01,
+        metadata={
+            "description": "Augmentation option: Shears the"
+            " image by a specified "
+            "degree, mimicking the effect of "
+            "objects being viewed from different"
+            " angles. Range: -180 - +180"
+        },
+        load_default=0.0,
     )
     perspective = fields.Float(
         metadata={
-            "description": "Image perspective (+/- fraction), range 0-0.001"
+            "description": "Augmentation option: Applies a"
+            " random perspective transformation"
+            " to the image, enhancing the model's ability"
+            " to understand objects in 3D space. "
+            "Range 0-0.001"
         },
-        load_default=0.01,
+        load_default=0.0,
     )
     flipud = fields.Float(
-        metadata={"description": "Image flip up-down (probability)"},
-        load_default=0.01,
+        metadata={
+            "description": "Augmentation option: Flips the"
+            " image upside down "
+            "with the specified probability,"
+            " increasing the data variability "
+            "without affecting the object's"
+            " characteristics. Range 0.0-1.0"
+        },
+        load_default=0.0,
     )
     fliplr = fields.Float(
         metadata={
-            "description": "Image flip left-right (probability)"
+            "description": "Augmentation option: Flips the"
+            " image left to right "
+            "with the specified probability, "
+            "useful for learning symmetrical "
+            "objects and increasing dataset "
+            "diversity. Range 0.0-1.0"
         },
         load_default=0.5,
     )
     mosaic = fields.Float(
-        metadata={"description": "Image mosaic (probability)"},
-        load_default=1.0,
+        metadata={
+            "description": "Augmentation option:Combines four"
+            " training images "
+            "into one, simulating different "
+            "scene compositions and object "
+            "interactions. Highly effective "
+            "for complex scene understanding."
+            "Range 0.0- 1.0"
+        },
+        load_default=0.1,
     )
     mixup = fields.Float(
-        metadata={"description": "Image mixup (probability)"},
-        load_default=0.01,
+        metadata={
+            "description": "Augmentation option: Blends two "
+            "images and their labels, "
+            "creating a composite image. Enhances "
+            "the model's ability to generalize by "
+            "introducing label noise and visual "
+            "variability. Range 0.0- 1.0"
+        },
+        load_default=0.0,
+    )
+
+    copy_paste = fields.Float(
+        metadata={
+            "description": "Augmentation option: Copies objects"
+            " from one image "
+            "and pastes them onto another, "
+            "useful for increasing object "
+            "instances and learning object "
+            "occlusion. Range 0.0- 1.0"
+        },
+        load_default=0.0,
+    )
+
+    erasing = fields.Float(
+        metadata={
+            "description": "Augmentation option: Randomly erases"
+            " a portion of the"
+            " image during classification "
+            "training, encouraging the model "
+            "to focus on less obvious features "
+            "for recognition."
+            " Range 0.0- 0.9"
+        },
+        load_default=0.4,
+    )
+
+    crop_fraction = fields.Float(
+        metadata={
+            "description": "Augmentation option: Crops the "
+            "classification image to a "
+            "fraction of its size to emphasize "
+            "central features and adapt to object "
+            "scales, reducing background distractions."
+            " Range 0.1- 1.0"
+        },
+        load_default=1.0,
+    )
+    auto_augment = fields.String(
+        metadata={
+            "description": "Automatically applies a predefined "
+            "augmentation policy (randaugment, "
+            "autoaugment, augmix), optimizing for"
+            " classification tasks by diversifying "
+            "the visual features.",
+        },
+        required=False,
+        validate=validate.OneOf(
+            ["randaugment", "autoaugment", "augmix"]
+        ),
     )
 
     disable_wandb = fields.Bool(
diff --git a/docs/conf.py b/docs/conf.py
@@ -3,28 +3,24 @@
 # ai4os-yolov8-torch documentation build configuration file, created by
 # sphinx-quickstart.
 #
-# This file is execfile()d with the current directory set to its containing dir.
-#
+# This file is execfile()d with the current directory set
+#  to its containing dir.
 # Note that not all possible configuration values are present in this
 # autogenerated file.
 #
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
-import os
-import sys
-
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 # sys.path.insert(0, os.path.abspath('.'))
 
-# -- General configuration -----------------------------------------------------
-
+# -- General configuration ---------------
 # If your documentation needs a minimal Sphinx version, state it here.
 # needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be extensions
+# Add any Sphinx extension module names here, as strings.
+# They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = []
 
@@ -66,9 +62,8 @@
 # directories to ignore when looking for source files.
 exclude_patterns = ["_build"]
 
-# The reST default role (used for this markup: `text`) to use for all documents.
-# default_role = None
-
+# The reST default role (used for this markup: `text`)
+# to use for all documents. default_role = None
 # If true, '()' will be appended to :func: etc. cross-reference text.
 # add_function_parentheses = True
 
@@ -87,8 +82,7 @@
 # modindex_common_prefix = []
 
 
-# -- Options for HTML output ---------------------------------------------------
-
+# -- Options for HTML output ------------
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 html_theme = "default"
@@ -167,7 +161,7 @@
 htmlhelp_basename = "ai4os-yolov8-torchdoc"
 
 
-# -- Options for LaTeX output --------------------------------------------------
+# -- Options for LaTeX output -------------
 
 latex_elements = {
     # The paper size ('letterpaper' or 'a4paper').
@@ -179,7 +173,8 @@
 }
 
 # Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title, author, documentclass [howto/manual]).
+# (source start file, target name, title, author,
+#  documentclass [howto/manual]).
 latex_documents = [
     (
         "index",
@@ -211,7 +206,7 @@
 # latex_domain_indices = True
 
 
-# -- Options for manual page output --------------------------------------------
+# -- Options for manual page output ---------------
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
@@ -229,7 +224,7 @@
 # man_show_urls = False
 
 
-# -- Options for Texinfo output ------------------------------------------------
+# -- Options for Texinfo output -----
 
 # Grouping the document tree into Texinfo files. List of tuples
 # (source start file, target name, title, author,
diff --git a/metadata.json b/metadata.json
@@ -8,6 +8,7 @@
     " solution across diverse tasks such as object detection, oriented bounding boxes detection, tracking, instance segmentation, and",
     " image classification. Its refined architecture and innovations make it an ideal choice for",
     " cutting-edge applications in the field of computer vision.\n",
+    "**NOTE**: Among the training arguments, there are options related to augmentation, such as flipping, scaling, etc. The default values are set to automatically activate some of these options during training. If you want to disable augmentation entirely or partially, please review the default values and adjust them accordingly to deactivate the desired augmentations.\n",
     "**References**\n",
     "[1] Jocher, G., Chaurasia, A., & Qiu, J. (2023). YOLO by Ultralytics (Version 8.0.0) [Computer software]. https://github.com/ultralytics/ultralytics\n",
     "[2] https://docs.ultralytics.com/\n",
diff --git a/requirements.txt b/requirements.txt
@@ -4,7 +4,7 @@ webargs~=5.5.3
 flaat~=1.1.18
 ## external requirements
 fPDF2~=2.7.8
-ultralytics~=8.1.47
+ultralytics~=8.2
 PyPDF3~=1.0.6
 defusedxml~=0.5.0
 ## Optional tools
diff --git a/yolov8_api/__init__.py b/yolov8_api/__init__.py
@@ -75,13 +75,13 @@ def predict(
     args = {
         "files": ["/home/se1131/cat1.jpg"],
         "model": "yolov8n.pt",
-        "imgsz": [680, 512],
+        "imgsz": [704, 512],
         "conf": 0.25,
         "iou": 0.5,
         "show_labels": True,
         "show_conf": True,
-        "augment": False,
+        "augment": True,
         "classes": None,
-        "boxes": True,
+        "show_boxes": True,
     }
     predict(**args)
diff --git a/yolov8_api/preprocess_ann.py b/yolov8_api/preprocess_ann.py
diff --git a/yolov8_api/seg_coco_json_to_yolo.py b/yolov8_api/seg_coco_json_to_yolo.py
diff --git a/yolov8_api/utils.py b/yolov8_api/utils.py