Updating to Python 3.11 and using the latest packages

ignaciohrdz · Jun 2, 2024 · 6059d52 · 6059d52
1 parent 81b5520
commit 6059d52
Show file tree

Hide file tree

Showing 6 changed files with 140 additions and 143 deletions.
diff --git a/Readme.md b/Readme.md
@@ -2,12 +2,14 @@
 
 ## Introduction
 
-In this project I use the most recent implementation of YOLO by Ultralytics, [YOLOv8](https://github.com/ultralytics/ultralytics). The goal is to train an algorithm that is able to detect separate face parts without having to use landmark detectors that don't do well when part of the face is occluded or missing. My goal is to also combine frontal, semi-frontal and profile face datasets so that the YOLO model works well on all of them. 
+In this project I use Ultralytics' implementation of [YOLOv8](https://github.com/ultralytics/ultralytics). The goal is to train an algorithm that is able to detect separate face parts without having to use landmark detectors that don't do well when part of the face is occluded or missing. My goal is to also combine frontal, semi-frontal and profile face datasets so that the YOLO model works well on all of them. 
 
-It is also a great opportunity to try out the `supervision` library by [Roboflow](https://github.com/roboflow/supervision). Despite it's still in beta, it looks really helpful for some common YOLO-related tasks such as drawing the detections.
+It is also a great opportunity to try out the `supervision` library by [Roboflow](https://github.com/roboflow/supervision). It looks really helpful for some common YOLO-related tasks such as drawing the detections.
 
 ![A live demo of YOLOv8 nano](images/live_demo.gif)
 
+This project uses Python 3.11. To install the required packages, use `pip install -r requirements.txt` in a Python 3.11 environment.
+
 ## Motivation
 
 All I want these models for is data exploration and check what face parts can be seen in an image. I'm talking about detecting _face parts_, which is not the same as detecting _faces_.

diff --git a/live_demo.py b/live_demo.py
@@ -1,3 +1,9 @@
+"""
+This script loads a YOLO model and runs it on live camera feed
+
+Author: Ignacio Hernández Montilla, 2023
+"""
+
 from pathlib import Path
 import argparse
 import time
@@ -7,79 +13,72 @@
 import numpy as np
 import imageio
 import supervision as spv
+from utils import annotate_frame
 
 
 if __name__ == "__main__":
 
     parser = argparse.ArgumentParser()
     parser.add_argument("-m", '--path_model', type=str, help="Path to the model")
-    parser.add_argument('--save_gif', action="store_true", help="Save the video to a GIF file")
+    parser.add_argument("-i", '--camera_id', type=int, help="Camera ID")
+    parser.add_argument('--save_gif', type=str,
+                        help="Save the video to a GIF file in given location")
     args = parser.parse_args()
 
-    # Debugging
-    # args.path_model = "runs/detect/train_n"
-    # args.save_gif = True
-
     # Loading the model
     try:
+        print("Loading the model")
         path_model = Path(args.path_model)
-        model = YOLO(path_model / "weights" / "best.pt")
+        model = YOLO(path_model)
     except FileNotFoundError:
         print("ERROR: Could not load the YOLO model")
         exit()
 
     # This will draw the detections
     class_colors = spv.ColorPalette.from_hex(['#ffff66', '#66ffcc', '#ff99ff', '#ffcc99'])
-    box_annotator = spv.BoxAnnotator(
-        thickness=2,
-        text_thickness=1,
-        text_scale=0.5,
-        color=class_colors
-    )
+    class_names_dict = model.model.names
+    bbox_annotator = spv.BoundingBoxAnnotator(thickness=2, color=class_colors)
+    label_annotator = spv.LabelAnnotator(color=class_colors, text_color=spv.Color.from_hex("#000000"))
 
     # Reading frames from the webcam
-    cap = cv2.VideoCapture(0)
-
-    # Optional: exporting to GIF
-    if args.save_gif:
-        frames = []
-        times = []
-        path_gif = path_model / "live_demo.gif"
+    cap = cv2.VideoCapture(args.camera_id)
 
+    # Exporting to GIF
+    frames = []
+    times = []
+    make_gif = args.save_gif is not None
+    if make_gif:
+        if Path(args.save_gif).is_file():
+            path_gif = Path(args.save_gif)
+        else:
+            path_gif = Path(args.save_gif) / "live_demo.gif"
+
+    # Read from camera and run the YOLO model on each frame
     while True:
-        ret, frame = cap.read()
-
-        start_time = time.time()
-        result = model(frame, agnostic_nms=True, verbose=False)[0]
-        detections = spv.Detections.from_yolov8(result)
+        frame_ok, frame = cap.read()
 
-        labels = [
-            f"{model.model.names[class_id]} {confidence:0.2f}"
-            for _, confidence, class_id, _
-            in detections
-        ]
-        frame = box_annotator.annotate(
-            scene=frame,
-            detections=detections,
-            labels=labels
-        )
+        if frame_ok:
+            start_time = time.time()
+            result = model(frame, agnostic_nms=True, verbose=False)[0]
+            detections = spv.Detections.from_ultralytics(result)
 
-        cv2.imshow("Face parts", frame)
-        k = cv2.waitKey(1)
+            frame = annotate_frame(frame, detections, bbox_annotator, label_annotator, class_names_dict)
+            cv2.imshow("Face parts", frame)
+            k = cv2.waitKey(1)
 
-        if args.save_gif:
-            frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-            times.append(time.time() - start_time)
+            if make_gif:
+                frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+                times.append(time.time() - start_time)
 
-        if k == ord("q"):
-            break
+            if k == ord("q"):
+                break
 
     cv2.destroyAllWindows()
     cap.release()
 
     # Exporting to GIF
     # Source: https://pysource.com/2021/03/25/create-an-animated-gif-in-real-time-with-opencv-and-python/
-    if args.save_gif:
+    if make_gif:
         print("\nSaving the stream to ", path_gif)
         avg_time = np.array(times).mean()
         fps = round(1 / avg_time)

diff --git a/requirements.txt b/requirements.txt
@@ -1,78 +1,51 @@
-absl-py==1.4.0
-antlr4-python3-runtime==4.9.3
-asttokens==2.2.1
-backcall==0.2.0
-cachetools==5.2.1
-certifi==2022.12.7
-charset-normalizer==3.0.1
+certifi==2024.2.2
+charset-normalizer==3.3.2
 colorama==0.4.6
-contourpy==1.0.7
-cycler==0.11.0
-decorator==5.1.1
-executing==1.2.0
-fonttools==4.38.0
-gitdb==4.0.10
-GitPython==3.1.30
-google-auth==2.16.0
-google-auth-oauthlib==0.4.6
-grpcio==1.51.1
-hydra-core==1.3.1
-idna==3.4
-imageio~=2.26.0
-importlib-metadata==6.0.0
-ipython==8.8.0
-jedi==0.18.2
-kiwisolver==1.4.4
-Markdown==3.4.1
-MarkupSafe==2.1.2
-matplotlib==3.6.3
-matplotlib-inline==0.1.6
-numpy==1.24.1
-oauthlib==3.2.2
-omegaconf==2.3.0
-opencv-python==4.7.0.68
-packaging==23.0
-pandas==1.5.2
-parso==0.8.3
-pickleshare==0.7.5
-Pillow==9.4.0
-pip==22.3.1
-prompt-toolkit==3.0.36
-protobuf==3.20.3
-psutil==5.9.4
-pure-eval==0.2.2
-pyasn1==0.4.8
-pyasn1-modules==0.2.8
-Pygments==2.14.0
-pyparsing==3.0.9
-python-dateutil==2.8.2
-pytz==2022.7.1
-PyYAML==6.0
-requests==2.28.2
-requests-oauthlib==1.3.1
-rsa==4.9
-scipy==1.10.0
-seaborn==0.12.2
-sentry-sdk==1.13.0
-setuptools==65.6.3
+contourpy==1.2.1
+cycler==0.12.1
+defusedxml==0.7.1
+filelock==3.14.0
+fonttools==4.53.0
+fsspec==2024.5.0
+idna==3.7
+imageio==2.34.1
+intel-openmp==2021.4.0
+Jinja2==3.1.4
+kiwisolver==1.4.5
+MarkupSafe==2.1.5
+matplotlib==3.9.0
+mkl==2021.4.0
+mpmath==1.3.0
+networkx==3.3
+numpy==1.26.4
+opencv-contrib-python==4.9.0.80
+opencv-python==4.9.0.80
+opencv-python-headless==4.9.0.80
+packaging==24.0
+pandas==2.2.2
+pillow==10.3.0
+pip==24.0
+psutil==5.9.8
+py-cpuinfo==9.0.0
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+pytz==2024.1
+PyYAML==6.0.1
+requests==2.32.3
+scipy==1.13.1
+seaborn==0.13.2
+setuptools==69.5.1
 six==1.16.0
-smmap==5.0.0
-stack-data==0.6.2
-supervision~=0.3.0
-tensorboard==2.11.2
-tensorboard-data-server==0.6.1
-tensorboard-plugin-wit==1.8.1
+supervision==0.20.0
+sympy==1.12.1
+tbb==2021.12.0
 thop==0.1.1.post2209072238
-torch==1.13.1+cu116
-torchvision==0.14.1+cu116
-tqdm==4.64.1
-traitlets==5.8.1
-typing_extensions==4.4.0
-unidecode~=1.3.6
-ultralytics~=8.0.53
-urllib3==1.26.14
-wcwidth==0.2.6
-Werkzeug==2.2.2
-wheel==0.37.1
-wincertstore==0.2
-zipp==3.11.0
+torch==2.3.0
+torchvision==0.18.0
+tqdm==4.66.4
+typing_extensions==4.12.1
+tzdata==2024.1
+ultralytics==8.2.27
+Unidecode==1.3.8
+urllib3==2.2.1
+wheel==0.43.0
diff --git a/run.py b/run.py
@@ -27,7 +27,7 @@
     # Loading the model
     try:
         path_model = Path(args.path_model)
-        model = YOLO(path_model / "weights" / "best.pt")
+        model = YOLO(path_model)
     except FileNotFoundError:
         print("ERROR: Could not load the YOLO model")
         exit()
@@ -39,36 +39,24 @@
         path_report = path_output / "report.csv"
         report = pd.DataFrame(columns=['image_name', 'detection', 'x1', 'y1', 'x2', 'y2'])
 
-        if args.show:
-            class_colors = spv.ColorPalette.from_hex(['#ffff66', '#66ffcc', '#ff99ff', '#ffcc99'])
-            box_annotator = spv.BoxAnnotator(
-                thickness=2,
-                text_thickness=1,
-                text_scale=0.5,
-                color=class_colors
-            )
+        class_colors = spv.ColorPalette.from_hex(['#ffff66', '#66ffcc', '#ff99ff', '#ffcc99'])
+        class_names_dict = model.model.names
+        bbox_annotator = spv.BoundingBoxAnnotator(thickness=2, color=class_colors)
+        label_annotator = spv.LabelAnnotator(color=class_colors, text_color=spv.Color.from_hex("#000000"))
 
         for f in os.listdir(args.path_data):
             img = cv2.imread(os.path.join(args.path_data, f))
             img, _ = smart_resize(img, new_size=640)
             result = model(img, agnostic_nms=True, verbose=False)[0]
-            detections = spv.Detections.from_yolov8(result)
+            detections = spv.Detections.from_ultralytics(result)
+
             for i, bbox in enumerate(detections.xyxy):
                 x1, y1, x2, y2 = bbox.astype(int)
-                label = model.model.names[detections.class_id[i]]
+                label = class_names_dict[detections.class_id[i]]
                 report.loc[len(report), :] = [f, label, x1, y1, x2, y2]
-            if args.show:
-                labels = [
-                    f"{model.model.names[class_id]} {confidence:0.2f}"
-                    for _, confidence, class_id, _
-                    in detections
-                ]
-                frame = box_annotator.annotate(
-                    scene=img,
-                    detections=detections,
-                    labels=labels
-                )
 
+            if args.show:
+                img = annotate_frame(img, detections, bbox_annotator, label_annotator, class_names_dict)
                 cv2.imshow("Face parts", img)
                 k = cv2.waitKey(args.frame_time)
 

diff --git a/train.py b/train.py
@@ -16,6 +16,7 @@
     parser.add_argument("-a", '--arch', type=str, default='n', help="Architecture (n, s, m, l, x)")
     parser.add_argument("-n", '--name', type=str, default="train", help="Run name")
     parser.add_argument("-d", '--path_data', type=str, help="Path to the datasets folder")
+    parser.add_argument("-i", '--image_size', type=int, default=640, help="Image size")
     parser.add_argument("-b", '--batch_size', type=int, default=8, help="Batch size")
     parser.add_argument("-e", '--epochs', type=int, default=10, help="Number of epochs")
     parser.add_argument("--device", type=str, default=['0'], nargs='+', help="Device list (also accepts 'cpu')")
@@ -32,6 +33,6 @@
     # Training
     model = YOLO("weights/yolov8{}.pt".format(args.arch))
     results = model.train(data=str(path_yaml), task="detect", name="{}_{}".format(args.name, args.arch),
-                          epochs=args.epochs, batch=args.batch_size,
+                          epochs=args.epochs, imgsz=args.image_size, batch=args.batch_size,
                           device=",".join(args.device),
                           scale=0.25, degrees=25.0, mosaic=0.8)
diff --git a/utils.py b/utils.py
@@ -3,12 +3,26 @@
 
 
 def smart_resize(img, new_size=512):
+    """
+    A very basic resizing function
+    :param img: input image
+    :param new_size: output max size
+    :return: resized image (largest side = new_size), size ratio
+    """
     ratio = new_size/max(img.shape[:2])
     return cv2.resize(img, None, fx=ratio, fy=ratio), ratio
 
 
 def points_to_yolo(labels_df, points, part_id, img_h, img_w):
-    # Create a contour from the list of X,Y points and get the bounding box
+    """
+    Create a contour from the list of X,Y points and get the bounding box
+    :param labels_df: dataframe that will contain the boxes
+    :param points: list of points (X,Y) of a facial landmark
+    :param part_id: facial part ID
+    :param img_h: image height
+    :param img_w: image width
+    :return: bounding box coordinates (not normalized)
+    """
     contour = np.array(points, dtype=np.int32)
     x, y, w, h = cv2.boundingRect(contour)
     x_n, w_n = x / img_w, w / img_w
@@ -19,3 +33,23 @@ def points_to_yolo(labels_df, points, part_id, img_h, img_w):
     # Populating the dataframe
     labels_df.loc[len(labels_df), :] = [part_id, x_c, y_c, w_n, h_n]
     return x, y, w, h  # these are not the normalised coordinates, these are for plotting the box
+
+
+def annotate_frame(image, detections, box_annotator, label_annotator, class_names_dict):
+    """
+    Annotate the bounding box with class name and confidence
+    :param image: input image
+    :param detections: YOLO detections object
+    :param box_annotator: supervision bounding box annotator
+    :param label_annotator: supervision bounding box annotator
+    :param class_names_dict: dictionary with model's class names {class_id: class_name, ...}
+    :return: annotated image
+    """
+    labels = [
+        "{} {:0.2f}".format(class_names_dict[class_id], confidence)
+        for _, _, confidence, class_id, _, _
+        in detections
+    ]
+    image = box_annotator.annotate(scene=image, detections=detections)
+    image = label_annotator.annotate(scene=image, detections=detections, labels=labels)
+    return image