diff --git a/README.md b/README.md
index 8c39c073b..71b5ae5b8 100644
--- a/README.md
+++ b/README.md
@@ -235,6 +235,7 @@ The collection of pre-trained, state-of-the-art AI models.
| [
](hand_recognition/minimal-hand/) |[minimal-hand](/hand_recognition/minimal-hand/) | [Minimal Hand](https://github.com/CalciferZh/minimal-hand) | TensorFlow | 1.2.8 and later |
| [
](hand_recognition/v2v-posenet/) |[v2v-posenet](/hand_recognition/v2v-posenet/) | [V2V-PoseNet](https://github.com/mks0601/V2V-PoseNet_RELEASE) | Pytorch | 1.2.6 and later |
| [
](hands_recognition/hands_segmentation_pytorch/) |[hands_segmentation_pytorch](/hand_recognition/hands_segmentation_pytorch/) | [hands-segmentation-pytorch](https://github.com/guglielmocamporese/hands-segmentation-pytorch) | Pytorch | 1.2.10 and later |
+| [
](hand_recognition/ego2hands/) |[ego2hands](/hand_recognition/ego2hands/) | [Ego2Hands](https://github.com/AlextheEngineer/Ego2Hands) | Pytorch | 1.2.16 and later |
## Image captioning
diff --git a/hand_recognition/ego2hands/README.md b/hand_recognition/ego2hands/README.md
new file mode 100644
index 000000000..877dc4e95
--- /dev/null
+++ b/hand_recognition/ego2hands/README.md
@@ -0,0 +1,75 @@
+*# Ego2Hands - Egocentric Two-hand Segmentation and Detection
+## Input
+
+* **Image or Video**
+
+
+
+
+## Output
+
+* **Predicted mask**
+
+
+
+Estimated mask of the hands (without option ```--overlay```),
+or a mask overlayed over the original image (with option ```--overlay```).
+
+The result will be saved to ```./output.png``` by default but it can be specified with the ```-s``` option
+
+* **Energy prediction**
+
+
+The segmentation mask predicts all region of your arms but the "energy" prediction will focus on the position of your hands. This can also be used to create a bounding box of the hands.
+
+The result will be saved to ```./output.png``` by default but it can be specified with the ```-s``` option. For this output, the result will be saved to a path with "_energy" inserted before the extension.
+
+## Usage
+An Internet connection is required when running the script for the first time, as the model files will be downloaded automatically.
+
+The predicted mask of the hands in the input media will be generated by running the script.
+
+#### Example 1: Inference on prepared demo image.
+```bash
+$ python3 ego2hands.py
+```
+
+#### Example 2: Specify input path, save path.
+```bash
+$ python3 ego2hands.py -i input.png -s output.png
+```
+The ```-i``` and ```-s``` options can be used to specify the input path and the save path, respectively.
+In this example, the segmentation result will be saved to output.png, and the energy result will be saved to output_energy.png.
+
+#### Example 3: Specify the size of the image, and visualize the overlayed mask.
+```bash
+$ python3 ego2hands.py --width 256 --height 512 --overlay
+```
+Use options ```--width``` and ```height``` to specify the image size on which the model will be running inference.
+The result will always be resized to the original size of the image.
+You can visualize the result with a mask overlayed over the original image using the option ```--overlay```.
+
+#### Example 4: Inference on Video.
+```bash
+$ python3 ego2hands.py -v 0
+```
+argument after the ```-v``` option can be the device ID of the webcam,
+or the path to the input video.
+
+## Reference
+
+* [Ego2Hands](https://github.com/AlextheEngineer/Ego2Hands)
+
+## Framework
+
+Pytorch
+
+
+## Model Format
+
+ONNX opset=11
+
+## Netron
+
+- [ego2hands.onnx.prototxt](https://netron.app/?url=https://storage.googleapis.com/ailia-models/ego2hands/ego2hands.onnx.prototxt)
+*
\ No newline at end of file
diff --git a/hand_recognition/ego2hands/ego2hands.py b/hand_recognition/ego2hands/ego2hands.py
new file mode 100644
index 000000000..296f4b478
--- /dev/null
+++ b/hand_recognition/ego2hands/ego2hands.py
@@ -0,0 +1,244 @@
+import sys
+import time
+
+import ailia
+import cv2
+
+import numpy as np
+
+# import original modules
+sys.path.append('../../util')
+
+# logger
+from logging import getLogger # noqa: E402
+
+import webcamera_utils # noqa: E402
+from image_utils import imread # noqa: E402
+from model_utils import check_and_download_models # noqa: E402
+from arg_utils import get_base_parser, get_savepath, update_parser # noqa: E402
+
+import matplotlib.pyplot as plt
+
+logger = getLogger(__name__)
+
+
+# ======================
+# Parameters
+# ======================
+
+MODEL_NAME = "ego2hands"
+WEIGHT_PATH = MODEL_NAME + ".onnx"
+MODEL_PATH = WEIGHT_PATH + '.prototxt'
+
+REMOTE_PATH = "https://storage.googleapis.com/ailia-models/" + MODEL_NAME + "/"
+
+DEFAULT_INPUT_PATH = 'sample_image.png'
+DEFAULT_SAVE_PATH = 'output.png'
+
+# ======================
+# Arguemnt Parser Config
+# ======================
+parser = get_base_parser(
+ 'Ego2Hands: Egocentric Two-hand Segmentation and Detection',
+ DEFAULT_INPUT_PATH, DEFAULT_SAVE_PATH
+)
+
+parser.add_argument(
+ '--height', type=int, default=None,
+ help='height of the image to run inference on '
+)
+
+parser.add_argument(
+ '--width', type=int, default=None,
+ help='width of the image to run inference on'
+)
+
+parser.add_argument(
+ '--overlay', action='store_true',
+ help='Visualize the mask overlayed on the image'
+)
+
+args = update_parser(parser)
+
+# ======================
+# Helper functions
+# ======================
+
+def preprocess(image, h=None, w=None):
+
+ if h is not None and w is not None:
+ image = cv2.resize(image, (w, h))
+
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+
+ img_edge = cv2.Canny(image, 25, 100).astype(np.float32)
+ img_real_test = np.stack((image, img_edge), -1)
+ img_real_test = (img_real_test - 128.0) / 256.0
+ return img_real_test.transpose(2, 0, 1)
+
+
+def postprocess(org_image, seg_output, energy_output):
+ seg_output_final = cv2.resize(seg_output[0].transpose(1,2,0), dsize=(org_image.shape[1], org_image.shape[0]), interpolation=cv2.INTER_LINEAR_EXACT, )
+ seg_output_final = np.argmax(seg_output_final, axis=-1)
+ energy_l_final = cv2.resize(energy_output[0,1][None].transpose(1,2,0), dsize=(org_image.shape[1], org_image.shape[0]), interpolation=cv2.INTER_LINEAR_EXACT, )
+ energy_r_final = cv2.resize(energy_output[0,2][None].transpose(1,2,0), dsize=(org_image.shape[1], org_image.shape[0]), interpolation=cv2.INTER_LINEAR_EXACT, )
+ return seg_output_final, energy_l_final, energy_r_final
+
+def get_bounding_box_from_energy(energy, close_kernel_size = 15, close_op = True):
+ energy_positives = (energy > 0.5).astype(np.uint8)
+ if close_op:
+ energy_positives = cv2.erode(energy_positives, np.ones((close_kernel_size, close_kernel_size)))
+ energy_positives = cv2.dilate(energy_positives, np.ones((close_kernel_size, close_kernel_size)))
+ coords = np.where(energy_positives.astype(bool))
+ if coords[0].size != 0:
+ row_min, row_max, col_min, col_max = np.min(coords[0]), np.max(coords[0]), np.min(coords[1]), np.max(coords[1])
+ else:
+ row_min, row_max, col_min, col_max = 0, 0, 0, 0
+ return np.array([row_min, row_max, col_min, col_max])
+
+def create_visualization(image, seg, energy_l, energy_r, overlay=False, savepath=None):
+ # visualize segmentation mask
+ if overlay:
+ mask = np.where((seg == 1)[:,:,None], image/2 + np.array([0,0,128])[None,None], image)
+ mask = np.where((seg == 2)[:,:,None], image/2 + np.array([128,0,0])[None,None], mask)
+ else:
+ mask = seg * 100
+ mask = np.tile(mask[:,:,None], (1,1,3))
+ mask = (mask).astype(np.uint8)
+
+ # vizualize energy map and bounding box
+ if overlay:
+ energy_vis = np.where((energy_l > 0.5)[:,:,None], image/2 + np.array([0,0,128])[None,None], image)
+ energy_vis = np.where((energy_r > 0.5)[:,:,None], energy_vis/2 + np.array([128,0,0])[None,None], energy_vis)
+ else:
+ energy_vis = np.tile(((energy_l > 0.5) * 100 + (energy_r > 0.5) * 200)[:,:,None], (1,1,3)).astype('uint8')
+
+ bbox_l = get_bounding_box_from_energy(energy_l)
+ bbox_r = get_bounding_box_from_energy(energy_r)
+ energy_vis = cv2.rectangle(energy_vis, (bbox_l[2], bbox_l[0]), (bbox_l[3], bbox_l[1]), (0, 255, 0), 2)
+ energy_vis = cv2.rectangle(energy_vis, (bbox_r[2], bbox_r[0]), (bbox_r[3], bbox_r[1]), (0, 255, 0), 2)
+ energy_vis = (energy_vis).astype(np.uint8)
+ return mask, energy_vis
+
+def visualize_and_save(image, seg, energy_l, energy_r, overlay=False, savepath=None):
+ mask, energy_vis = create_visualization(image, seg, energy_l, energy_r, overlay=overlay)
+
+ plt.imshow(mask)
+ plt.show()
+
+ plt.imshow(energy_vis)
+ plt.show()
+
+ if savepath is not None:
+ logger.info(f'saving result to {savepath}')
+ mask = cv2.cvtColor(mask, cv2.COLOR_RGB2BGR)
+ cv2.imwrite(savepath, mask)
+
+ energy_vis = cv2.cvtColor(energy_vis, cv2.COLOR_RGB2BGR)
+ energy_savepath = savepath.split('.')
+ energy_savepath[-2] += '_energy'
+ energy_savepath = '.'.join(energy_savepath)
+ cv2.imwrite(energy_savepath, energy_vis)
+
+def update_frame(image, mask, energy, frame):
+ vis = np.concatenate([mask, energy], axis=1).astype('uint8')
+ if frame is None:
+ frame = plt.imshow(vis)
+ else:
+ frame.set_data(vis)
+ plt.pause(0.1)
+ return frame
+
+# ======================
+# Main functions
+# ======================
+
+def recognize_from_image(model):
+ logger.info('Start inference...')
+
+ image_path = args.input[0]
+
+ # prepare input data
+ org_img = cv2.cvtColor(imread(image_path), cv2.COLOR_BGR2RGB).astype(np.uint8)
+ if args.height is not None and args.width is not None:
+ h = org_img.shape[0]
+ w = org_img.shape[1]
+ else:
+ h = args.height
+ w = args.width
+
+ image = preprocess(org_img, h = h, w = h)[None]
+
+ if args.benchmark and not (args.video is not None):
+ logger.info('BENCHMARK mode')
+ for i in range(5):
+ start = int(round(time.time() * 1000))
+ _, _, seg_output_final, energy_output_final = model.predict([image])
+ end = int(round(time.time() * 1000))
+ logger.info(f'\tailia processing time {end - start} ms')
+ else:
+ _, _, seg_output_final, energy_output_final = model.predict([image])
+
+ seg, energy_l, energy_r = postprocess(org_img, seg_output_final, energy_output_final)
+
+ # visualize
+ visualize_and_save(org_img, seg, energy_l, energy_r, args.overlay, args.savepath)
+
+ logger.info('Script finished successfully.')
+
+def recognize_from_video(model):
+ # net initialize
+
+ capture = webcamera_utils.get_capture(args.video)
+
+ _, t = capture.read()
+ if args.height is not None and args.width is not None:
+ h = t.shape[0]
+ w = t.shape[1]
+ else:
+ h = args.height
+ w = args.width
+
+ frame_shown = None
+
+ while(True):
+ ret, frame = capture.read()
+ if (cv2.waitKey(1) & 0xFF == ord("q")) or not ret:
+ break
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB).astype(np.uint8)
+
+
+ # inference
+ image = preprocess(frame, h = h, w = w)[None]
+
+ _, _, seg_output_final, energy_output_final = model.predict([image])
+
+
+ seg, energy_l, energy_r = postprocess(frame, seg_output_final, energy_output_final)
+ mask_vis, energy_vis = create_visualization(frame, seg, energy_l, energy_r, overlay=args.overlay)
+
+ # visualize
+ frame_shown = update_frame(frame, mask_vis, energy_vis, frame_shown)
+ if not plt.get_fignums():
+ break
+
+ capture.release()
+ logger.info('Script finished successfully.')
+
+def main():
+ # model files check and download
+ check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)
+
+ # net initialize
+ model = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id = args.env_id)
+
+ if args.video is not None:
+ # video mode
+ recognize_from_video(model)
+ else:
+ # image mode
+ recognize_from_image(model)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/hand_recognition/ego2hands/output.png b/hand_recognition/ego2hands/output.png
new file mode 100644
index 000000000..978b39f85
Binary files /dev/null and b/hand_recognition/ego2hands/output.png differ
diff --git a/hand_recognition/ego2hands/output_energy.png b/hand_recognition/ego2hands/output_energy.png
new file mode 100644
index 000000000..a210a957e
Binary files /dev/null and b/hand_recognition/ego2hands/output_energy.png differ
diff --git a/hand_recognition/ego2hands/sample_image.png b/hand_recognition/ego2hands/sample_image.png
new file mode 100644
index 000000000..5b3c8c613
Binary files /dev/null and b/hand_recognition/ego2hands/sample_image.png differ
diff --git a/scripts/download_all_models.sh b/scripts/download_all_models.sh
index 76821420e..0e5a273c5 100755
--- a/scripts/download_all_models.sh
+++ b/scripts/download_all_models.sh
@@ -131,6 +131,7 @@ cd ../../hand_recognition/hand3d; python3 hand3d.py ${OPTION}
cd ../../hand_recognition/minimal-hand; python3 minimal-hand.py ${OPTION}
cd ../../hand_recognition/v2v-posenet; python3 v2v-posenet.py ${OPTION}
cd ../../hand_recognition/hands_segmentation_pytorch; python3 hands_segmentation_pytorch.py ${OPTION}
+cd ../../hand_recognition/ego2hands; python3 ego2hands.py ${OPTION}
cd ../../image_captioning/illustration2vec; python3 illustration2vec.py ${OPTION}
cd ../../image_captioning/image_captioning_pytorch; python3 image_captioning_pytorch.py ${OPTION}
cd ../../image_captioning/blip2; python3 blip2.py ${OPTION}