-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathunzip_data.py
114 lines (101 loc) · 3.88 KB
/
unzip_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import argparse
import os
import os.path as osp
import tempfile
import zipfile
import mmcv
from mmengine.utils import mkdir_or_exist
from tqdm import tqdm
def parse_args():
parser = argparse.ArgumentParser(
description="Convert MICCAI dataset to mmsegmentation format"
)
parser.add_argument("dataset_path", help="path of MICCAI train.zip)")
parser.add_argument("--tmp_dir", help="path of the temporary directory")
parser.add_argument("-o", "--out_dir", default=None, help="output path")
parser.add_argument(
"-s", "--split_rate", type=float, default=0.3, help="splite rate for train val"
)
args = parser.parse_args()
return args
def main():
args = parse_args()
dataset_path = args.dataset_path
if args.out_dir is None:
out_dir = osp.join("data", "miccai")
else:
out_dir = args.out_dir
print("Making directories...")
mkdir_or_exist(out_dir)
mkdir_or_exist(osp.join(out_dir, "images"))
mkdir_or_exist(osp.join(out_dir, "images", "training"))
mkdir_or_exist(osp.join(out_dir, "images", "validation"))
mkdir_or_exist(osp.join(out_dir, "annotations"))
mkdir_or_exist(osp.join(out_dir, "annotations", "training"))
mkdir_or_exist(osp.join(out_dir, "annotations", "validation"))
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
print("Extracting train.zip...")
zip_file = zipfile.ZipFile(dataset_path)
zip_file.extractall(tmp_dir)
# split
train_img_path = osp.join(tmp_dir, "train/image")
train_anno_path = osp.join(tmp_dir, "train/mask")
image_num = len(os.listdir(train_img_path))
training_len = int(image_num * args.split_rate)
print("Processing train data...")
for img_name in tqdm(sorted(os.listdir(train_img_path))[:training_len]):
img = mmcv.imread(osp.join(train_img_path, img_name))
mmcv.imwrite(
img,
osp.join(
out_dir,
"images",
"training",
osp.splitext(img_name)[0] + ".png",
),
)
print("Processing train anno data...")
for img_name in tqdm(sorted(os.listdir(train_anno_path))[:training_len]):
# The annotation img should be divided by 128, because some of
# the annotation imgs are not standard. We should set a
# threshold to convert the nonstandard annotation imgs. The
# value divided by 128 is equivalent to '1 if value >= 128
# else 0'
img = mmcv.imread(osp.join(train_anno_path, img_name))
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(
out_dir,
"annotations",
"training",
osp.splitext(img_name)[0] + ".png",
),
)
print("Processing val data...")
for img_name in tqdm(sorted(os.listdir(train_img_path))[training_len:]):
img = mmcv.imread(osp.join(train_img_path, img_name))
mmcv.imwrite(
img,
osp.join(
out_dir,
"images",
"validation",
osp.splitext(img_name)[0] + ".png",
),
)
print("Processing val anno data...")
for img_name in tqdm(sorted(os.listdir(train_anno_path))[training_len:]):
img = mmcv.imread(osp.join(train_anno_path, img_name))
mmcv.imwrite(
img[:, :, 0] // 128,
osp.join(
out_dir,
"annotations",
"validation",
osp.splitext(img_name)[0] + ".png",
),
)
print("Removing the temporary files...")
print("Done!")
if __name__ == "__main__":
main()