Skip to content

Commit 9a6e265

Browse files
author
khadijeh.alibabaei
committed
update the Augmentation option for training and README.me
1 parent 05bc7b4 commit 9a6e265

File tree

10 files changed

+173
-60
lines changed

10 files changed

+173
-60
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,9 @@ deepaas-run --listen-ip 0.0.0.0
231231
Then, open the Swagger interface, change the hyperparameters in the train section, and click on train.
232232
233233
><span style="color:Blue">**Note:**</span> Please note that the model training process may take some time depending on the size of your dataset and the complexity of your custom backbone. Once the model is trained, you can use the API to perform inference on new images.
234+
235+
><span style="color:Blue">**Note:**</span> Augmentation Settings:
236+
among the training arguments, there are options related to augmentation, such as flipping, scaling, etc. The default values are set to automatically activate some of these options during training. If you want to disable augmentation entirely or partially, please review the default values and adjust them accordingly to deactivate the desired augmentations.
234237
# Inference Methods
235238
236239
You can utilize the Swagger interface to upload your images or videos and obtain the following outputs:

api/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ def train(**args):
168168
"mlflow": False,
169169
"datasets_dir": config.DATA_PATH,
170170
"model_dir": config.MODELS_PATH,
171+
"wandb": str(args["disable_wandb"]),
171172
}
172173
)
173174
# Modify the model name based on task type
@@ -210,6 +211,8 @@ def train(**args):
210211

211212
else:
212213
model = YOLO(args["model"])
214+
if "auto_augment" not in args:
215+
args["auto_augment"] = None
213216

214217
device = args.get("device", "cpu")
215218
if device != "cpu" and not torch.cuda.is_available():
@@ -315,15 +318,13 @@ def main():
315318
args = cmd_parser.parse_args()
316319

317320
main()
318-
319321

320322
"""
321323
python3 api/__init__.py train --model yolov8n.yaml\
322324
--task_type det\
323325
--data /srv/football-players-detection-7/data.yaml\
324326
--Enable_MLFLOW --epochs 50
325-
326-
python3 api/__init__.py predict --files \
327+
python3 api/__init__.py predict --files \
327328
/srv/yolov8_api/tests/data/det/test/cat1.jpg\
328329
--task_type det --accept application/json
329330
"""

api/schemas.py

Lines changed: 141 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,9 @@ class Meta:
9595
validate=validate.Length(max=2),
9696
metadata={
9797
"description": "image size as scalar or (h, w) list,"
98-
" i.e. (640, 480)"
98+
" i.e. (704, 512). Note: must be multiple of max stride 32"
9999
},
100-
load_default=[640,480]
100+
load_default=[704, 512],
101101
)
102102

103103
conf = fields.Float(
@@ -130,12 +130,12 @@ class Meta:
130130

131131
augment = fields.Boolean(
132132
metadata={
133-
"description": "Apply image augmentation to prediction sources"
133+
"description": "Apply image augmentation to prediction sources. "
134134
"augment for segmentation has not supported yet.",
135135
},
136136
load_default=False,
137137
)
138-
classes = fields.List(
138+
classes = fields.List(
139139
fields.Int(),
140140
metadata={
141141
"description": "Filter results by class, i.e. class=0, "
@@ -431,61 +431,179 @@ class Meta:
431431
)
432432
hsv_h = fields.Float(
433433
metadata={
434-
"description": "Image HSV-Hue augmentation (fraction)"
434+
"description": "Augmentation option: adjusts the hue "
435+
"of the image by a fraction of"
436+
" the color wheel, introducing color "
437+
"variability. Helps the model generalize "
438+
"across different lighting"
439+
" conditions. Range: 0.0 - 1.0"
435440
},
436441
load_default=0.015,
437442
)
438443
hsv_s = fields.Float(
439444
metadata={
440-
"description": "Image HSV-Saturation augmentation (fraction)"
445+
"description": "Augmentation option: Alters the "
446+
"saturation of the image by a fraction,"
447+
" affecting the intensity of colors. Useful for "
448+
"simulating different environmental conditions. "
449+
"Range: 0.0 - 1.0"
441450
},
442451
load_default=0.7,
443452
)
444453
hsv_v = fields.Float(
445454
metadata={
446-
"description": "Image HSV-Value augmentation (fraction)"
455+
"description": "Augmentation option: Modifies the value "
456+
"(brightness) of the "
457+
"image by a fraction, helping the model "
458+
"to perform well under various lighting"
459+
" conditions. Range: 0.0 - 1.0"
447460
},
448461
load_default=0.4,
449462
)
450463
degrees = fields.Float(
451-
metadata={"description": "Image rotation (+/- deg)"},
452-
load_default=0.001,
464+
metadata={
465+
"description": "Augmentation option: Rotates the"
466+
" image randomly within "
467+
"the specified degree range, improving"
468+
" the model's ability to recognize objects"
469+
" at various orientations. Range: -180 - +180"
470+
},
471+
load_default=0.0,
453472
)
454473
translate = fields.Float(
455-
metadata={"description": "Image translation (+/- fraction)"},
456-
load_default=0.5,
474+
metadata={
475+
"description": "Augmentation option: Translates the "
476+
"image horizontally and"
477+
" vertically by a fraction of the image size,"
478+
" aiding in learning to detect partially"
479+
" visible objects. Range: 0.0 - 1.0"
480+
},
481+
load_default=0.1,
457482
)
458483
scale = fields.Float(
459-
metadata={"description": "Image scale (+/- gain)"},
484+
metadata={
485+
"description": "Augmentation option: Scales the"
486+
" image by a gain factor,"
487+
" simulating objects at different "
488+
"distances from the camera. "
489+
"Range: >=0.0"
490+
},
460491
load_default=0.5,
461492
)
462493
shear = fields.Float(
463-
metadata={"description": "Image shear (+/- deg)"},
464-
load_default=0.01,
494+
metadata={
495+
"description": "Augmentation option: Shears the"
496+
" image by a specified "
497+
"degree, mimicking the effect of "
498+
"objects being viewed from different"
499+
" angles. Range: -180 - +180"
500+
},
501+
load_default=0.0,
465502
)
466503
perspective = fields.Float(
467504
metadata={
468-
"description": "Image perspective (+/- fraction), range 0-0.001"
505+
"description": "Augmentation option: Applies a"
506+
" random perspective transformation"
507+
" to the image, enhancing the model's ability"
508+
" to understand objects in 3D space. "
509+
"Range 0-0.001"
469510
},
470-
load_default=0.01,
511+
load_default=0.0,
471512
)
472513
flipud = fields.Float(
473-
metadata={"description": "Image flip up-down (probability)"},
474-
load_default=0.01,
514+
metadata={
515+
"description": "Augmentation option: Flips the"
516+
" image upside down "
517+
"with the specified probability,"
518+
" increasing the data variability "
519+
"without affecting the object's"
520+
" characteristics. Range 0.0-1.0"
521+
},
522+
load_default=0.0,
475523
)
476524
fliplr = fields.Float(
477525
metadata={
478-
"description": "Image flip left-right (probability)"
526+
"description": "Augmentation option: Flips the"
527+
" image left to right "
528+
"with the specified probability, "
529+
"useful for learning symmetrical "
530+
"objects and increasing dataset "
531+
"diversity. Range 0.0-1.0"
479532
},
480533
load_default=0.5,
481534
)
482535
mosaic = fields.Float(
483-
metadata={"description": "Image mosaic (probability)"},
484-
load_default=1.0,
536+
metadata={
537+
"description": "Augmentation option:Combines four"
538+
" training images "
539+
"into one, simulating different "
540+
"scene compositions and object "
541+
"interactions. Highly effective "
542+
"for complex scene understanding."
543+
"Range 0.0- 1.0"
544+
},
545+
load_default=0.1,
485546
)
486547
mixup = fields.Float(
487-
metadata={"description": "Image mixup (probability)"},
488-
load_default=0.01,
548+
metadata={
549+
"description": "Augmentation option: Blends two "
550+
"images and their labels, "
551+
"creating a composite image. Enhances "
552+
"the model's ability to generalize by "
553+
"introducing label noise and visual "
554+
"variability. Range 0.0- 1.0"
555+
},
556+
load_default=0.0,
557+
)
558+
559+
copy_paste = fields.Float(
560+
metadata={
561+
"description": "Augmentation option: Copies objects"
562+
" from one image "
563+
"and pastes them onto another, "
564+
"useful for increasing object "
565+
"instances and learning object "
566+
"occlusion. Range 0.0- 1.0"
567+
},
568+
load_default=0.0,
569+
)
570+
571+
erasing = fields.Float(
572+
metadata={
573+
"description": "Augmentation option: Randomly erases"
574+
" a portion of the"
575+
" image during classification "
576+
"training, encouraging the model "
577+
"to focus on less obvious features "
578+
"for recognition."
579+
" Range 0.0- 0.9"
580+
},
581+
load_default=0.4,
582+
)
583+
584+
crop_fraction = fields.Float(
585+
metadata={
586+
"description": "Augmentation option: Crops the "
587+
"classification image to a "
588+
"fraction of its size to emphasize "
589+
"central features and adapt to object "
590+
"scales, reducing background distractions."
591+
" Range 0.1- 1.0"
592+
},
593+
load_default=1.0,
594+
)
595+
auto_augment = fields.String(
596+
metadata={
597+
"description": "Automatically applies a predefined "
598+
"augmentation policy (randaugment, "
599+
"autoaugment, augmix), optimizing for"
600+
" classification tasks by diversifying "
601+
"the visual features.",
602+
},
603+
required=False,
604+
validate=validate.OneOf(
605+
["randaugment", "autoaugment", "augmix"]
606+
),
489607
)
490608

491609
disable_wandb = fields.Bool(

docs/conf.py

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,24 @@
33
# ai4os-yolov8-torch documentation build configuration file, created by
44
# sphinx-quickstart.
55
#
6-
# This file is execfile()d with the current directory set to its containing dir.
7-
#
6+
# This file is execfile()d with the current directory set
7+
# to its containing dir.
88
# Note that not all possible configuration values are present in this
99
# autogenerated file.
1010
#
1111
# All configuration values have a default; values that are commented out
1212
# serve to show the default.
1313

14-
import os
15-
import sys
16-
1714
# If extensions (or modules to document with autodoc) are in another directory,
1815
# add these directories to sys.path here. If the directory is relative to the
1916
# documentation root, use os.path.abspath to make it absolute, like shown here.
2017
# sys.path.insert(0, os.path.abspath('.'))
2118

22-
# -- General configuration -----------------------------------------------------
23-
19+
# -- General configuration ---------------
2420
# If your documentation needs a minimal Sphinx version, state it here.
2521
# needs_sphinx = '1.0'
26-
27-
# Add any Sphinx extension module names here, as strings. They can be extensions
22+
# Add any Sphinx extension module names here, as strings.
23+
# They can be extensions
2824
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
2925
extensions = []
3026

@@ -66,9 +62,8 @@
6662
# directories to ignore when looking for source files.
6763
exclude_patterns = ["_build"]
6864

69-
# The reST default role (used for this markup: `text`) to use for all documents.
70-
# default_role = None
71-
65+
# The reST default role (used for this markup: `text`)
66+
# to use for all documents. default_role = None
7267
# If true, '()' will be appended to :func: etc. cross-reference text.
7368
# add_function_parentheses = True
7469

@@ -87,8 +82,7 @@
8782
# modindex_common_prefix = []
8883

8984

90-
# -- Options for HTML output ---------------------------------------------------
91-
85+
# -- Options for HTML output ------------
9286
# The theme to use for HTML and HTML Help pages. See the documentation for
9387
# a list of builtin themes.
9488
html_theme = "default"
@@ -167,7 +161,7 @@
167161
htmlhelp_basename = "ai4os-yolov8-torchdoc"
168162

169163

170-
# -- Options for LaTeX output --------------------------------------------------
164+
# -- Options for LaTeX output -------------
171165

172166
latex_elements = {
173167
# The paper size ('letterpaper' or 'a4paper').
@@ -179,7 +173,8 @@
179173
}
180174

181175
# Grouping the document tree into LaTeX files. List of tuples
182-
# (source start file, target name, title, author, documentclass [howto/manual]).
176+
# (source start file, target name, title, author,
177+
# documentclass [howto/manual]).
183178
latex_documents = [
184179
(
185180
"index",
@@ -211,7 +206,7 @@
211206
# latex_domain_indices = True
212207

213208

214-
# -- Options for manual page output --------------------------------------------
209+
# -- Options for manual page output ---------------
215210

216211
# One entry per manual page. List of tuples
217212
# (source start file, name, description, authors, manual section).
@@ -229,7 +224,7 @@
229224
# man_show_urls = False
230225

231226

232-
# -- Options for Texinfo output ------------------------------------------------
227+
# -- Options for Texinfo output -----
233228

234229
# Grouping the document tree into Texinfo files. List of tuples
235230
# (source start file, target name, title, author,

metadata.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
" solution across diverse tasks such as object detection, oriented bounding boxes detection, tracking, instance segmentation, and",
99
" image classification. Its refined architecture and innovations make it an ideal choice for",
1010
" cutting-edge applications in the field of computer vision.\n",
11+
"**NOTE**: Among the training arguments, there are options related to augmentation, such as flipping, scaling, etc. The default values are set to automatically activate some of these options during training. If you want to disable augmentation entirely or partially, please review the default values and adjust them accordingly to deactivate the desired augmentations.\n",
1112
"**References**\n",
1213
"[1] Jocher, G., Chaurasia, A., & Qiu, J. (2023). YOLO by Ultralytics (Version 8.0.0) [Computer software]. https://github.com/ultralytics/ultralytics\n",
1314
"[2] https://docs.ultralytics.com/\n",

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ webargs~=5.5.3
44
flaat~=1.1.18
55
## external requirements
66
fPDF2~=2.7.8
7-
ultralytics~=8.1.47
7+
ultralytics~=8.2
88
PyPDF3~=1.0.6
99
defusedxml~=0.5.0
1010
## Optional tools

yolov8_api/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,13 @@ def predict(
7575
args = {
7676
"files": ["/home/se1131/cat1.jpg"],
7777
"model": "yolov8n.pt",
78-
"imgsz": [680, 512],
78+
"imgsz": [704, 512],
7979
"conf": 0.25,
8080
"iou": 0.5,
8181
"show_labels": True,
8282
"show_conf": True,
83-
"augment": False,
83+
"augment": True,
8484
"classes": None,
85-
"boxes": True,
85+
"show_boxes": True,
8686
}
8787
predict(**args)

0 commit comments

Comments
 (0)