Skip to content

Commit

Permalink
Updating to Python 3.11 and using the latest packages
Browse files Browse the repository at this point in the history
  • Loading branch information
ignaciohrdz committed Jun 2, 2024
1 parent 81b5520 commit 6059d52
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 143 deletions.
6 changes: 4 additions & 2 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

## Introduction

In this project I use the most recent implementation of YOLO by Ultralytics, [YOLOv8](https://github.com/ultralytics/ultralytics). The goal is to train an algorithm that is able to detect separate face parts without having to use landmark detectors that don't do well when part of the face is occluded or missing. My goal is to also combine frontal, semi-frontal and profile face datasets so that the YOLO model works well on all of them.
In this project I use Ultralytics' implementation of [YOLOv8](https://github.com/ultralytics/ultralytics). The goal is to train an algorithm that is able to detect separate face parts without having to use landmark detectors that don't do well when part of the face is occluded or missing. My goal is to also combine frontal, semi-frontal and profile face datasets so that the YOLO model works well on all of them.

It is also a great opportunity to try out the `supervision` library by [Roboflow](https://github.com/roboflow/supervision). Despite it's still in beta, it looks really helpful for some common YOLO-related tasks such as drawing the detections.
It is also a great opportunity to try out the `supervision` library by [Roboflow](https://github.com/roboflow/supervision). It looks really helpful for some common YOLO-related tasks such as drawing the detections.

![A live demo of YOLOv8 nano](images/live_demo.gif)

This project uses Python 3.11. To install the required packages, use `pip install -r requirements.txt` in a Python 3.11 environment.

## Motivation

All I want these models for is data exploration and check what face parts can be seen in an image. I'm talking about detecting _face parts_, which is not the same as detecting _faces_.
Expand Down
83 changes: 41 additions & 42 deletions live_demo.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
"""
This script loads a YOLO model and runs it on live camera feed
Author: Ignacio Hernández Montilla, 2023
"""

from pathlib import Path
import argparse
import time
Expand All @@ -7,79 +13,72 @@
import numpy as np
import imageio
import supervision as spv
from utils import annotate_frame


if __name__ == "__main__":

parser = argparse.ArgumentParser()
parser.add_argument("-m", '--path_model', type=str, help="Path to the model")
parser.add_argument('--save_gif', action="store_true", help="Save the video to a GIF file")
parser.add_argument("-i", '--camera_id', type=int, help="Camera ID")
parser.add_argument('--save_gif', type=str,
help="Save the video to a GIF file in given location")
args = parser.parse_args()

# Debugging
# args.path_model = "runs/detect/train_n"
# args.save_gif = True

# Loading the model
try:
print("Loading the model")
path_model = Path(args.path_model)
model = YOLO(path_model / "weights" / "best.pt")
model = YOLO(path_model)
except FileNotFoundError:
print("ERROR: Could not load the YOLO model")
exit()

# This will draw the detections
class_colors = spv.ColorPalette.from_hex(['#ffff66', '#66ffcc', '#ff99ff', '#ffcc99'])
box_annotator = spv.BoxAnnotator(
thickness=2,
text_thickness=1,
text_scale=0.5,
color=class_colors
)
class_names_dict = model.model.names
bbox_annotator = spv.BoundingBoxAnnotator(thickness=2, color=class_colors)
label_annotator = spv.LabelAnnotator(color=class_colors, text_color=spv.Color.from_hex("#000000"))

# Reading frames from the webcam
cap = cv2.VideoCapture(0)

# Optional: exporting to GIF
if args.save_gif:
frames = []
times = []
path_gif = path_model / "live_demo.gif"
cap = cv2.VideoCapture(args.camera_id)

# Exporting to GIF
frames = []
times = []
make_gif = args.save_gif is not None
if make_gif:
if Path(args.save_gif).is_file():
path_gif = Path(args.save_gif)
else:
path_gif = Path(args.save_gif) / "live_demo.gif"

# Read from camera and run the YOLO model on each frame
while True:
ret, frame = cap.read()

start_time = time.time()
result = model(frame, agnostic_nms=True, verbose=False)[0]
detections = spv.Detections.from_yolov8(result)
frame_ok, frame = cap.read()

labels = [
f"{model.model.names[class_id]} {confidence:0.2f}"
for _, confidence, class_id, _
in detections
]
frame = box_annotator.annotate(
scene=frame,
detections=detections,
labels=labels
)
if frame_ok:
start_time = time.time()
result = model(frame, agnostic_nms=True, verbose=False)[0]
detections = spv.Detections.from_ultralytics(result)

cv2.imshow("Face parts", frame)
k = cv2.waitKey(1)
frame = annotate_frame(frame, detections, bbox_annotator, label_annotator, class_names_dict)
cv2.imshow("Face parts", frame)
k = cv2.waitKey(1)

if args.save_gif:
frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
times.append(time.time() - start_time)
if make_gif:
frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
times.append(time.time() - start_time)

if k == ord("q"):
break
if k == ord("q"):
break

cv2.destroyAllWindows()
cap.release()

# Exporting to GIF
# Source: https://pysource.com/2021/03/25/create-an-animated-gif-in-real-time-with-opencv-and-python/
if args.save_gif:
if make_gif:
print("\nSaving the stream to ", path_gif)
avg_time = np.array(times).mean()
fps = round(1 / avg_time)
Expand Down
123 changes: 48 additions & 75 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,78 +1,51 @@
absl-py==1.4.0
antlr4-python3-runtime==4.9.3
asttokens==2.2.1
backcall==0.2.0
cachetools==5.2.1
certifi==2022.12.7
charset-normalizer==3.0.1
certifi==2024.2.2
charset-normalizer==3.3.2
colorama==0.4.6
contourpy==1.0.7
cycler==0.11.0
decorator==5.1.1
executing==1.2.0
fonttools==4.38.0
gitdb==4.0.10
GitPython==3.1.30
google-auth==2.16.0
google-auth-oauthlib==0.4.6
grpcio==1.51.1
hydra-core==1.3.1
idna==3.4
imageio~=2.26.0
importlib-metadata==6.0.0
ipython==8.8.0
jedi==0.18.2
kiwisolver==1.4.4
Markdown==3.4.1
MarkupSafe==2.1.2
matplotlib==3.6.3
matplotlib-inline==0.1.6
numpy==1.24.1
oauthlib==3.2.2
omegaconf==2.3.0
opencv-python==4.7.0.68
packaging==23.0
pandas==1.5.2
parso==0.8.3
pickleshare==0.7.5
Pillow==9.4.0
pip==22.3.1
prompt-toolkit==3.0.36
protobuf==3.20.3
psutil==5.9.4
pure-eval==0.2.2
pyasn1==0.4.8
pyasn1-modules==0.2.8
Pygments==2.14.0
pyparsing==3.0.9
python-dateutil==2.8.2
pytz==2022.7.1
PyYAML==6.0
requests==2.28.2
requests-oauthlib==1.3.1
rsa==4.9
scipy==1.10.0
seaborn==0.12.2
sentry-sdk==1.13.0
setuptools==65.6.3
contourpy==1.2.1
cycler==0.12.1
defusedxml==0.7.1
filelock==3.14.0
fonttools==4.53.0
fsspec==2024.5.0
idna==3.7
imageio==2.34.1
intel-openmp==2021.4.0
Jinja2==3.1.4
kiwisolver==1.4.5
MarkupSafe==2.1.5
matplotlib==3.9.0
mkl==2021.4.0
mpmath==1.3.0
networkx==3.3
numpy==1.26.4
opencv-contrib-python==4.9.0.80
opencv-python==4.9.0.80
opencv-python-headless==4.9.0.80
packaging==24.0
pandas==2.2.2
pillow==10.3.0
pip==24.0
psutil==5.9.8
py-cpuinfo==9.0.0
pyparsing==3.1.2
python-dateutil==2.9.0.post0
pytz==2024.1
PyYAML==6.0.1
requests==2.32.3
scipy==1.13.1
seaborn==0.13.2
setuptools==69.5.1
six==1.16.0
smmap==5.0.0
stack-data==0.6.2
supervision~=0.3.0
tensorboard==2.11.2
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.1
supervision==0.20.0
sympy==1.12.1
tbb==2021.12.0
thop==0.1.1.post2209072238
torch==1.13.1+cu116
torchvision==0.14.1+cu116
tqdm==4.64.1
traitlets==5.8.1
typing_extensions==4.4.0
unidecode~=1.3.6
ultralytics~=8.0.53
urllib3==1.26.14
wcwidth==0.2.6
Werkzeug==2.2.2
wheel==0.37.1
wincertstore==0.2
zipp==3.11.0
torch==2.3.0
torchvision==0.18.0
tqdm==4.66.4
typing_extensions==4.12.1
tzdata==2024.1
ultralytics==8.2.27
Unidecode==1.3.8
urllib3==2.2.1
wheel==0.43.0
32 changes: 10 additions & 22 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
# Loading the model
try:
path_model = Path(args.path_model)
model = YOLO(path_model / "weights" / "best.pt")
model = YOLO(path_model)
except FileNotFoundError:
print("ERROR: Could not load the YOLO model")
exit()
Expand All @@ -39,36 +39,24 @@
path_report = path_output / "report.csv"
report = pd.DataFrame(columns=['image_name', 'detection', 'x1', 'y1', 'x2', 'y2'])

if args.show:
class_colors = spv.ColorPalette.from_hex(['#ffff66', '#66ffcc', '#ff99ff', '#ffcc99'])
box_annotator = spv.BoxAnnotator(
thickness=2,
text_thickness=1,
text_scale=0.5,
color=class_colors
)
class_colors = spv.ColorPalette.from_hex(['#ffff66', '#66ffcc', '#ff99ff', '#ffcc99'])
class_names_dict = model.model.names
bbox_annotator = spv.BoundingBoxAnnotator(thickness=2, color=class_colors)
label_annotator = spv.LabelAnnotator(color=class_colors, text_color=spv.Color.from_hex("#000000"))

for f in os.listdir(args.path_data):
img = cv2.imread(os.path.join(args.path_data, f))
img, _ = smart_resize(img, new_size=640)
result = model(img, agnostic_nms=True, verbose=False)[0]
detections = spv.Detections.from_yolov8(result)
detections = spv.Detections.from_ultralytics(result)

for i, bbox in enumerate(detections.xyxy):
x1, y1, x2, y2 = bbox.astype(int)
label = model.model.names[detections.class_id[i]]
label = class_names_dict[detections.class_id[i]]
report.loc[len(report), :] = [f, label, x1, y1, x2, y2]
if args.show:
labels = [
f"{model.model.names[class_id]} {confidence:0.2f}"
for _, confidence, class_id, _
in detections
]
frame = box_annotator.annotate(
scene=img,
detections=detections,
labels=labels
)

if args.show:
img = annotate_frame(img, detections, bbox_annotator, label_annotator, class_names_dict)
cv2.imshow("Face parts", img)
k = cv2.waitKey(args.frame_time)

Expand Down
3 changes: 2 additions & 1 deletion train.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
parser.add_argument("-a", '--arch', type=str, default='n', help="Architecture (n, s, m, l, x)")
parser.add_argument("-n", '--name', type=str, default="train", help="Run name")
parser.add_argument("-d", '--path_data', type=str, help="Path to the datasets folder")
parser.add_argument("-i", '--image_size', type=int, default=640, help="Image size")
parser.add_argument("-b", '--batch_size', type=int, default=8, help="Batch size")
parser.add_argument("-e", '--epochs', type=int, default=10, help="Number of epochs")
parser.add_argument("--device", type=str, default=['0'], nargs='+', help="Device list (also accepts 'cpu')")
Expand All @@ -32,6 +33,6 @@
# Training
model = YOLO("weights/yolov8{}.pt".format(args.arch))
results = model.train(data=str(path_yaml), task="detect", name="{}_{}".format(args.name, args.arch),
epochs=args.epochs, batch=args.batch_size,
epochs=args.epochs, imgsz=args.image_size, batch=args.batch_size,
device=",".join(args.device),
scale=0.25, degrees=25.0, mosaic=0.8)
36 changes: 35 additions & 1 deletion utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,26 @@


def smart_resize(img, new_size=512):
"""
A very basic resizing function
:param img: input image
:param new_size: output max size
:return: resized image (largest side = new_size), size ratio
"""
ratio = new_size/max(img.shape[:2])
return cv2.resize(img, None, fx=ratio, fy=ratio), ratio


def points_to_yolo(labels_df, points, part_id, img_h, img_w):
# Create a contour from the list of X,Y points and get the bounding box
"""
Create a contour from the list of X,Y points and get the bounding box
:param labels_df: dataframe that will contain the boxes
:param points: list of points (X,Y) of a facial landmark
:param part_id: facial part ID
:param img_h: image height
:param img_w: image width
:return: bounding box coordinates (not normalized)
"""
contour = np.array(points, dtype=np.int32)
x, y, w, h = cv2.boundingRect(contour)
x_n, w_n = x / img_w, w / img_w
Expand All @@ -19,3 +33,23 @@ def points_to_yolo(labels_df, points, part_id, img_h, img_w):
# Populating the dataframe
labels_df.loc[len(labels_df), :] = [part_id, x_c, y_c, w_n, h_n]
return x, y, w, h # these are not the normalised coordinates, these are for plotting the box


def annotate_frame(image, detections, box_annotator, label_annotator, class_names_dict):
"""
Annotate the bounding box with class name and confidence
:param image: input image
:param detections: YOLO detections object
:param box_annotator: supervision bounding box annotator
:param label_annotator: supervision bounding box annotator
:param class_names_dict: dictionary with model's class names {class_id: class_name, ...}
:return: annotated image
"""
labels = [
"{} {:0.2f}".format(class_names_dict[class_id], confidence)
for _, _, confidence, class_id, _, _
in detections
]
image = box_annotator.annotate(scene=image, detections=detections)
image = label_annotator.annotate(scene=image, detections=detections, labels=labels)
return image

0 comments on commit 6059d52

Please sign in to comment.