-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdump_coco.py
74 lines (63 loc) · 2.59 KB
/
dump_coco.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging
import sys
from typing import List, Optional
import click
from IPython.core import ultratb
from yamlu.coco import CocoDatasetExport
import pybpmn
from pybpmn import syntax
from pybpmn.constants import VALID_SPLITS
from pybpmn.dataset import HdBpmnDataset
# fallback to debugger on error
sys.excepthook = ultratb.FormattedTB(mode="Verbose", color_scheme="Linux", call_pdb=1)
_logger = logging.getLogger(__name__)
@click.command()
@click.argument("hdbpmn_root", type=click.Path(file_okay=False, exists=True))
@click.argument("coco_dataset_root", type=click.Path(file_okay=False))
@click.option("--sample", default=None, type=int)
@click.option("--n_jobs", default=None, type=int)
@click.option("--write_img", default=True, type=bool)
@click.option("--write_ann_img", default=False, type=bool)
@click.option("--splits", "-s", multiple=True, default=list(VALID_SPLITS))
@click.option("--quiet", "log_level", flag_value=logging.WARNING)
@click.option("-v", "--verbose", "log_level", flag_value=logging.INFO, default=True)
@click.option("-vv", "--very-verbose", "log_level", flag_value=logging.DEBUG)
@click.version_option(pybpmn.__version__)
def main(
hdbpmn_root: str,
coco_dataset_root: str,
sample: Optional[int],
n_jobs: Optional[int],
write_img: bool,
write_ann_img: bool,
splits: List[str],
log_level: int,
):
logging.basicConfig(format="%(asctime)s %(levelname)s - %(message)s", level=log_level)
# logging.getLogger("yamlu.img").setLevel(logging.ERROR)
# don't create objects for labels inside plain activities (e.g. task) as they do not have to be detected
# during inference, the label of a task is defined by all text located within that task
excluded_label_categories = [c for c in syntax.ACTIVITY_CATEGORIES if c not in syntax.ACTIVITIES_WITH_CHILD_SHAPES]
ds = HdBpmnDataset(
bpmn_dataset_root=hdbpmn_root,
coco_dataset_root=coco_dataset_root,
# exclude categories that are too infrequent
category_translate_dict={syntax.TERMINATE_EVENT: syntax.END_EVENT},
# BpmnParser args
# association arrows are not consistently annotated
excluded_categories={syntax.ASSOCIATION, syntax.TEXT_ANNOTATION},
excluded_label_categories=excluded_label_categories,
)
exporter = CocoDatasetExport(
ds=ds,
write_img=write_img,
write_ann_img=write_ann_img,
sample=sample,
n_jobs=n_jobs,
)
for split in splits:
exporter.dump_split(split)
if __name__ == "__main__":
main()