Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test face detection model using face landmark datasets #47

Merged
merged 37 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
6bdb315
Test face detection model using face landmark datasets
Inokinoki Jul 26, 2024
264fd1b
Wrap FFHQ dataset for face detection model
Inokinoki Jul 26, 2024
c9e1dd0
Merge branch 'main' of github.com:Giskard-AI/giskard-vision into face…
Inokinoki Jul 26, 2024
949144b
Merge with `face-detection-scan` to make scan work
Inokinoki Jul 26, 2024
e13cff5
Fix after merge with `face-detection-scan` to make scan work
Inokinoki Jul 26, 2024
504f4ea
Update notebook after merge with `face-detection-scan`
Inokinoki Jul 26, 2024
1bb421d
Add landmark based labels for ffhq face detection dataloader
Inokinoki Jul 29, 2024
09c2022
Merge branch 'object-detection' into face-detection-exp-inoki
Inokinoki Jul 29, 2024
34ad4b4
Merge branch 'object-detection' into face-detection-exp-inoki
rabah-khalek Aug 5, 2024
a420ecc
Fix import
Inokinoki Aug 5, 2024
befe774
Merge branch 'object-detection' into face-detection-exp-inoki
rabah-khalek Aug 5, 2024
d9f0983
Add transformers as a dep
Inokinoki Aug 5, 2024
555a72c
Regenerating pdm.lock
Aug 5, 2024
88598d8
Merge branch 'object-detection' of github.com:Giskard-AI/giskard-visi…
Inokinoki Aug 7, 2024
8c0752d
Merge branch 'main' into face-detection-exp-inoki
rabah-khalek Aug 7, 2024
807e9f5
Regenerating pdm.lock
Aug 7, 2024
5fd39fd
Merge branch 'main' into face-detection-exp-inoki
rabah-khalek Aug 7, 2024
2983aed
Rebase `DataLoaderFFHQFaceDetection` on `DataLoaderFFHQFaceDetectionL…
Inokinoki Aug 7, 2024
5ee3d94
Merge branch 'face-detection-exp-inoki' of github.com:Giskard-AI/gisk…
Inokinoki Aug 7, 2024
144475d
Merge branch 'main' into face-detection-exp-inoki
Inokinoki Aug 7, 2024
c063456
Simplify meta in `DataLoaderFFHQFaceDetection` by popping face rectan…
Inokinoki Aug 7, 2024
c8d0ac4
Merge branch 'main' into face-detection-exp-inoki
rabah-khalek Aug 7, 2024
d879d1a
Wrap metadata to fix failed notebooks
Inokinoki Aug 7, 2024
13e49f9
Merge branch 'face-detection-exp-inoki' of github.com:Giskard-AI/gisk…
Inokinoki Aug 7, 2024
b69443a
Cleanup `device="mps"` to fix warning in CI
Inokinoki Aug 7, 2024
f4ca865
Wrap None metadata when nothing is returned
Inokinoki Aug 7, 2024
51755f6
Attempt to fix notebooks
Inokinoki Aug 7, 2024
a9b5b29
Revert "Attempt to fix notebooks"
Inokinoki Aug 8, 2024
b9a2473
Fix batch in dataloader
Inokinoki Aug 8, 2024
92220b6
Fix batch with empty dict in dataloader
Inokinoki Aug 8, 2024
912a0d6
Rename to load datasets to avoid import conflicts
Inokinoki Aug 8, 2024
ba1055f
Remove unused and broken CachedDataLoader import
Inokinoki Aug 8, 2024
496a3c6
Keep `MetaData` class in `get_single_element`
Inokinoki Aug 8, 2024
7f0121f
Format code
Inokinoki Aug 8, 2024
de57092
Fix key error in SixDRepNet_benchmark.ipynb
Inokinoki Aug 8, 2024
70c16a5
Fix misunderstood meta in DataLoaderFFHQFaceDetectionLandmark
Inokinoki Aug 8, 2024
485d5e1
Fix misunderstood meta in ffhq
Inokinoki Aug 8, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
270 changes: 270 additions & 0 deletions examples/object_detection/test_face_detection.ipynb

Large diffs are not rendered by default.

255 changes: 254 additions & 1 deletion giskard_vision/object_detection/dataloaders/loaders.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import os
from pathlib import Path
from typing import Any, Dict, Optional, Union
from typing import Any, Dict, List, Optional, Union

import cv2
import numpy as np
Expand All @@ -10,6 +11,13 @@
from giskard_vision.core.dataloaders.base import DataIteratorBase, PerformanceIssueMeta
from giskard_vision.core.dataloaders.hf import HFDataLoader
from giskard_vision.core.dataloaders.meta import MetaData
from giskard_vision.core.dataloaders.utils import flatten_dict
from giskard_vision.landmark_detection.dataloaders.loaders import (
DataLoader300W,
DataLoaderFFHQ,
EthicalIssueMeta,
PerformanceIssueMeta,
)

from ..types import Types

Expand Down Expand Up @@ -187,6 +195,251 @@ def get_image(self, idx: int) -> np.ndarray:
return self.load_image_from_file(self.get_image_path(idx))


class DataLoader300WFaceDetection(DataLoader300W):
"""Data loader for the 300W dataset for face detection. Ref: https://ibug.doc.ic.ac.uk/resources/300-W/"""

def get_labels(self, idx: int) -> Optional[np.ndarray]:
"""
Gets marks for a specific index after validation.
Args:
idx (int): Index of the data.
Returns:
Optional[np.ndarray]: Marks for the given index.
"""
landmarks = super().get_labels(idx)

if landmarks is None:
return None

min_point = np.min(landmarks, axis=0)
max_point = np.max(landmarks, axis=0)

return {
"boxes": np.array([min_point[0], min_point[1], max_point[0], max_point[1]]),
"labels": "face",
}


class DataLoaderFFHQFaceDetection(DataLoaderFFHQ):
"""Data loader for the FFHQ (Flickr-Faces-HQ) dataset for face detection."""

def __init__(
self,
dir_path: Union[str, Path],
batch_size: Optional[int] = 1,
shuffle: Optional[bool] = False,
rng_seed: Optional[int] = None,
) -> None:
super().__init__(dir_path, batch_size, shuffle, rng_seed)

# Load face bbox data
with (Path(dir_path) / "ffhq-dataset-meta.json").open(encoding="utf-8") as fp:
self.bboxes: Dict[int, List[float]] = {
int(k): [e for e in v["in_the_wild"]["face_rect"]]
+ v["in_the_wild"]["pixel_size"]
+ v["thumbnail"]["pixel_size"]
+ v["image"]["pixel_size"]
for k, v in json.load(fp).items()
}

def get_labels(self, idx: int) -> Optional[np.ndarray]:
"""
Gets marks for a specific index after validation.
Args:
idx (int): Index of the data.
Returns:
Optional[np.ndarray]: Marks for the given index.
"""
original_bbox = self.bboxes.get(idx, None)
try:
with Path(self.images_dir_path / f"{idx:05d}.json").open(encoding="utf-8") as fp:
meta = json.load(fp)
w, h = original_bbox[8], original_bbox[9]
thumbnail_w, thumbnail_h = original_bbox[6], original_bbox[7]
return {
"boxes": np.array(
[
meta[0]["faceRectangle"]["left"] * w / thumbnail_w,
meta[0]["faceRectangle"]["top"] * h / thumbnail_h,
(meta[0]["faceRectangle"]["left"] + meta[0]["faceRectangle"]["width"]) * w / thumbnail_w,
(meta[0]["faceRectangle"]["top"] + meta[0]["faceRectangle"]["height"]) * h / thumbnail_h,
]
),
"labels": "face",
}
except FileNotFoundError:
return np.array(original_bbox)

def get_meta(self, idx: int) -> Optional[Dict[str, Any]]:
"""
Gets metadata for a specific index and flattens it.
Args:
idx (int): Index of the image.
Returns:
Optional[Dict[str, Any]]: Flattened metadata for the given index.
"""
try:
with Path(self.images_dir_path / f"{idx:05d}.json").open(encoding="utf-8") as fp:
meta = json.load(fp)
flat_meta = self.process_hair_color_data(
flatten_dict(
meta[0],
excludes=[
"faceRectangle_top",
"faceRectangle_left",
"faceRectangle_width",
"faceRectangle_height",
],
)
)
flat_meta = self.process_emotions_data(flat_meta)
flat_meta_without_prefix = {key.replace("faceAttributes_", ""): value for key, value in flat_meta.items()}
flat_meta_without_prefix.pop("confidence")
return MetaData(
data=flat_meta_without_prefix,
categories=[
"gender",
"glasses",
"exposure_exposureLevel",
"noise_noiseLevel",
"makeup_eyeMakeup",
"makeup_lipMakeup",
"occlusion_foreheadOccluded",
"occlusion_eyeOccluded",
"occlusion_mouthOccluded",
"hair_invisible",
"hairColor",
"emotion",
],
issue_groups={
"smile": PerformanceIssueMeta,
"headPose_pitch": PerformanceIssueMeta,
"headPose_roll": PerformanceIssueMeta,
"headPose_yaw": PerformanceIssueMeta,
"gender": EthicalIssueMeta,
"age": EthicalIssueMeta,
"facialHair_moustache": EthicalIssueMeta,
"facialHair_beard": EthicalIssueMeta,
"facialHair_sideburns": EthicalIssueMeta,
"glasses": EthicalIssueMeta,
"emotion": PerformanceIssueMeta,
"blur_blurLevel": PerformanceIssueMeta,
"blur_value": PerformanceIssueMeta,
"exposure_exposureLevel": PerformanceIssueMeta,
"exposure_value": PerformanceIssueMeta,
"noise_noiseLevel": PerformanceIssueMeta,
"noise_value": PerformanceIssueMeta,
"makeup_eyeMakeup": EthicalIssueMeta,
"makeup_lipMakeup": EthicalIssueMeta,
"occlusion_foreheadOccluded": PerformanceIssueMeta,
"occlusion_eyeOccluded": PerformanceIssueMeta,
"occlusion_mouthOccluded": PerformanceIssueMeta,
"hair_bald": EthicalIssueMeta,
"hair_invisible": PerformanceIssueMeta,
"hairColor": EthicalIssueMeta,
},
)
except FileNotFoundError:
return None


class DataLoaderFFHQFaceDetectionLandmark(DataLoaderFFHQ):
"""Data loader for the FFHQ (Flickr-Faces-HQ) dataset for face detection."""

def get_labels(self, idx: int) -> Optional[np.ndarray]:
"""
Gets marks for a specific index after validation.
Args:
idx (int): Index of the data.
Returns:
Optional[np.ndarray]: Marks for the given index.
"""
landmarks = super().get_labels(idx)

if landmarks is None:
return None

min_point = np.min(landmarks, axis=0)
max_point = np.max(landmarks, axis=0)

return {
"boxes": np.array([min_point[0], min_point[1], max_point[0], max_point[1]]),
"labels": "face",
}

def get_meta(self, idx: int) -> Optional[Dict[str, Any]]:
"""
Gets metadata for a specific index and flattens it.
Args:
idx (int): Index of the image.
Returns:
Optional[Dict[str, Any]]: Flattened metadata for the given index.
"""
try:
with Path(self.images_dir_path / f"{idx:05d}.json").open(encoding="utf-8") as fp:
meta = json.load(fp)
flat_meta = self.process_hair_color_data(
flatten_dict(
meta[0],
excludes=[
"faceRectangle_top",
"faceRectangle_left",
"faceRectangle_width",
"faceRectangle_height",
],
)
)
flat_meta = self.process_emotions_data(flat_meta)
flat_meta_without_prefix = {key.replace("faceAttributes_", ""): value for key, value in flat_meta.items()}
flat_meta_without_prefix.pop("confidence")
return MetaData(
data=flat_meta_without_prefix,
categories=[
"gender",
"glasses",
"exposure_exposureLevel",
"noise_noiseLevel",
"makeup_eyeMakeup",
"makeup_lipMakeup",
"occlusion_foreheadOccluded",
"occlusion_eyeOccluded",
"occlusion_mouthOccluded",
"hair_invisible",
"hairColor",
"emotion",
],
issue_groups={
"smile": PerformanceIssueMeta,
"headPose_pitch": PerformanceIssueMeta,
"headPose_roll": PerformanceIssueMeta,
"headPose_yaw": PerformanceIssueMeta,
"gender": EthicalIssueMeta,
"age": EthicalIssueMeta,
"facialHair_moustache": EthicalIssueMeta,
"facialHair_beard": EthicalIssueMeta,
"facialHair_sideburns": EthicalIssueMeta,
"glasses": EthicalIssueMeta,
"emotion": PerformanceIssueMeta,
"blur_blurLevel": PerformanceIssueMeta,
"blur_value": PerformanceIssueMeta,
"exposure_exposureLevel": PerformanceIssueMeta,
"exposure_value": PerformanceIssueMeta,
"noise_noiseLevel": PerformanceIssueMeta,
"noise_value": PerformanceIssueMeta,
"makeup_eyeMakeup": EthicalIssueMeta,
"makeup_lipMakeup": EthicalIssueMeta,
"occlusion_foreheadOccluded": PerformanceIssueMeta,
"occlusion_eyeOccluded": PerformanceIssueMeta,
"occlusion_mouthOccluded": PerformanceIssueMeta,
"hair_bald": EthicalIssueMeta,
"hair_invisible": PerformanceIssueMeta,
"hairColor": EthicalIssueMeta,
},
)
except FileNotFoundError:
return None


class DataLoaderFurnitureHuggingFaceDataset(HFDataLoader):
"""
A data loader for the `Nfiniteai/living-room-passes` dataset on HF, extending the HFDataLoader class.
Expand Down
41 changes: 41 additions & 0 deletions giskard_vision/object_detection/models/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from typing import Any, Optional

import numpy as np
from PIL import Image

from giskard_vision.core.models.hf_pipeline import HFPipelineModelBase, HFPipelineTask
from giskard_vision.object_detection.types import Types


class ObjectDetectionHFModel(HFPipelineModelBase):
"""Hugging Face pipeline wrapper class that serves as a template for image classification predictions
Args:
model_id (str): Hugging Face model ID
name (Optional[str]): name of the model
device (str): device to run the model on
"""

model_type = "object_detection"
prediction_result_cls = Types.prediction_result

def __init__(self, model_id: str, name: Optional[str] = None, device: str = "cpu"):
"""init method that accepts a model id, name and device
Args:
model_id (str): Hugging Face model ID
name (Optional[str]): name of the model
device (str): device to run the model on
"""

super().__init__(
model_id=model_id,
pipeline_task=HFPipelineTask.OBJECT_DETECTION,
name=name,
device=device,
)

def predict_raw(self, image: np.ndarray) -> Any:
"""method that takes one image as input and outputs the raw predictions
Args:
image (np.ndarray): input image
"""
return self.pipeline(Image.fromarray(image, "RGB"))
36 changes: 35 additions & 1 deletion giskard_vision/object_detection/models/wrappers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import os
from typing import Optional
from typing import Any, Optional

import cv2
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

from giskard_vision.core.models.base import ModelBase
from giskard_vision.object_detection.models.base import ObjectDetectionHFModel
from giskard_vision.utils.errors import GiskardImportError


Expand Down Expand Up @@ -242,3 +243,36 @@ def train(self, data_path):
reduce_lr = ReduceLROnPlateau(monitor="IoU", factor=0.2, patience=PATIENCE, min_lr=1e-7, verbose=1, mode="max")

self.model.fit(batch_images, gt, epochs=100, callbacks=[stop, reduce_lr], verbose=2)


class DetrFinetunedFaceDetectionHuggingFaceModel(ObjectDetectionHFModel):
"""Wrapper class for goshiv's detr finetuned face detection model on Hugging Face.
Args:
name (str): The name of the model.
device (str): The device to run the model on.
"""

def __init__(self, name: str = None, device: str = "cpu"):
super().__init__(
model_id="goshiv/detr-finetuned-face",
name=name,
device=device,
)

def predict_image(self, image: np.ndarray) -> Any:
raw_predictions = super().predict_raw(image)

# Filter out predictions with a highest score
best_prediction = max(raw_predictions, key=lambda x: x["score"])

return {
"boxes": np.array(
[
best_prediction["box"]["xmin"],
best_prediction["box"]["ymin"],
best_prediction["box"]["xmax"],
best_prediction["box"]["ymax"],
]
),
"labels": "face",
}
Loading
Loading