From 443e979ed303fa9f965f780c9f518da74734c909 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Thu, 30 Mar 2023 13:30:36 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96xfund=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E9=9B=86=E7=9A=84config=5Fgenerator=E5=91=BD=E5=90=8D=EF=BC=8C?= =?UTF-8?q?=E4=BD=BFconfig=5Fgenerator=E7=9B=AE=E5=BD=95=E7=BB=93=E6=9E=84?= =?UTF-8?q?=E6=9B=B4=E6=B8=85=E6=99=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- configs/re/_base_/datasets/xfund_zh.py | 4 +- configs/ser/_base_/datasets/xfund_zh.py | 4 +- dataset_zoo/xfund/de/re.py | 2 +- dataset_zoo/xfund/de/ser.py | 2 +- dataset_zoo/xfund/es/re.py | 2 +- dataset_zoo/xfund/es/ser.py | 2 +- dataset_zoo/xfund/fr/re.py | 2 +- dataset_zoo/xfund/fr/ser.py | 2 +- dataset_zoo/xfund/it/re.py | 2 +- dataset_zoo/xfund/it/ser.py | 2 +- dataset_zoo/xfund/ja/re.py | 2 +- dataset_zoo/xfund/ja/ser.py | 2 +- dataset_zoo/xfund/pt/re.py | 2 +- dataset_zoo/xfund/pt/ser.py | 2 +- dataset_zoo/xfund/zh/re.py | 2 +- dataset_zoo/xfund/zh/ser.py | 2 +- .../preparers/config_generators/__init__.py | 6 +- .../config_generators/re_config_generator.py | 96 --------- .../config_generators/ser_config_generator.py | 96 --------- .../xfund_config_generator.py | 187 ++++++++++++++++++ 20 files changed, 208 insertions(+), 213 deletions(-) delete mode 100644 mmocr/datasets/preparers/config_generators/re_config_generator.py delete mode 100644 mmocr/datasets/preparers/config_generators/ser_config_generator.py create mode 100644 mmocr/datasets/preparers/config_generators/xfund_config_generator.py diff --git a/configs/re/_base_/datasets/xfund_zh.py b/configs/re/_base_/datasets/xfund_zh.py index 5ea9c9d33..4a44301dd 100644 --- a/configs/re/_base_/datasets/xfund_zh.py +++ b/configs/re/_base_/datasets/xfund_zh.py @@ -1,13 +1,13 @@ xfund_zh_re_data_root = 'data/xfund/zh' xfund_zh_re_train = dict( - type='REDataset', + type='XFUNDREDataset', data_root=xfund_zh_re_data_root, ann_file='re_train.json', pipeline=None) xfund_zh_re_test = dict( - type='REDataset', + type='XFUNDREDataset', data_root=xfund_zh_re_data_root, ann_file='re_test.json', test_mode=True, diff --git a/configs/ser/_base_/datasets/xfund_zh.py b/configs/ser/_base_/datasets/xfund_zh.py index 4ee522efd..40bbce4de 100644 --- a/configs/ser/_base_/datasets/xfund_zh.py +++ b/configs/ser/_base_/datasets/xfund_zh.py @@ -1,13 +1,13 @@ xfund_zh_ser_data_root = 'data/xfund/zh' xfund_zh_ser_train = dict( - type='SERDataset', + type='XFUNDSERDataset', data_root=xfund_zh_ser_data_root, ann_file='ser_train.json', pipeline=None) xfund_zh_ser_test = dict( - type='SERDataset', + type='XFUNDSERDataset', data_root=xfund_zh_ser_data_root, ann_file='ser_test.json', test_mode=True, diff --git a/dataset_zoo/xfund/de/re.py b/dataset_zoo/xfund/de/re.py index b3e0666de..e0419d026 100644 --- a/dataset_zoo/xfund/de/re.py +++ b/dataset_zoo/xfund/de/re.py @@ -3,4 +3,4 @@ _base_.train_preparer.packer.type = 'REPacker' _base_.test_preparer.packer.type = 'REPacker' -config_generator = dict(type='REConfigGenerator') +config_generator = dict(type='XFUNDREConfigGenerator') diff --git a/dataset_zoo/xfund/de/ser.py b/dataset_zoo/xfund/de/ser.py index 60c6963c0..5e9769eb0 100644 --- a/dataset_zoo/xfund/de/ser.py +++ b/dataset_zoo/xfund/de/ser.py @@ -57,4 +57,4 @@ ) delete = ['annotations'] + [f'{lang}_{split}' for split in ['train', 'val']] -config_generator = dict(type='SERConfigGenerator') +config_generator = dict(type='XFUNDSERConfigGenerator') diff --git a/dataset_zoo/xfund/es/re.py b/dataset_zoo/xfund/es/re.py index b3e0666de..e0419d026 100644 --- a/dataset_zoo/xfund/es/re.py +++ b/dataset_zoo/xfund/es/re.py @@ -3,4 +3,4 @@ _base_.train_preparer.packer.type = 'REPacker' _base_.test_preparer.packer.type = 'REPacker' -config_generator = dict(type='REConfigGenerator') +config_generator = dict(type='XFUNDREConfigGenerator') diff --git a/dataset_zoo/xfund/es/ser.py b/dataset_zoo/xfund/es/ser.py index 2cc4dbcc6..da8900980 100644 --- a/dataset_zoo/xfund/es/ser.py +++ b/dataset_zoo/xfund/es/ser.py @@ -57,4 +57,4 @@ ) delete = ['annotations'] + [f'{lang}_{split}' for split in ['train', 'val']] -config_generator = dict(type='SERConfigGenerator') +config_generator = dict(type='XFUNDSERConfigGenerator') diff --git a/dataset_zoo/xfund/fr/re.py b/dataset_zoo/xfund/fr/re.py index b3e0666de..e0419d026 100644 --- a/dataset_zoo/xfund/fr/re.py +++ b/dataset_zoo/xfund/fr/re.py @@ -3,4 +3,4 @@ _base_.train_preparer.packer.type = 'REPacker' _base_.test_preparer.packer.type = 'REPacker' -config_generator = dict(type='REConfigGenerator') +config_generator = dict(type='XFUNDREConfigGenerator') diff --git a/dataset_zoo/xfund/fr/ser.py b/dataset_zoo/xfund/fr/ser.py index ff9f5ea1f..aad6b7cf3 100644 --- a/dataset_zoo/xfund/fr/ser.py +++ b/dataset_zoo/xfund/fr/ser.py @@ -57,4 +57,4 @@ ) delete = ['annotations'] + [f'{lang}_{split}' for split in ['train', 'val']] -config_generator = dict(type='SERConfigGenerator') +config_generator = dict(type='XFUNDSERConfigGenerator') diff --git a/dataset_zoo/xfund/it/re.py b/dataset_zoo/xfund/it/re.py index b3e0666de..e0419d026 100644 --- a/dataset_zoo/xfund/it/re.py +++ b/dataset_zoo/xfund/it/re.py @@ -3,4 +3,4 @@ _base_.train_preparer.packer.type = 'REPacker' _base_.test_preparer.packer.type = 'REPacker' -config_generator = dict(type='REConfigGenerator') +config_generator = dict(type='XFUNDREConfigGenerator') diff --git a/dataset_zoo/xfund/it/ser.py b/dataset_zoo/xfund/it/ser.py index 92c298ff1..fc9fc8b70 100644 --- a/dataset_zoo/xfund/it/ser.py +++ b/dataset_zoo/xfund/it/ser.py @@ -57,4 +57,4 @@ ) delete = ['annotations'] + [f'{lang}_{split}' for split in ['train', 'val']] -config_generator = dict(type='SERConfigGenerator') +config_generator = dict(type='XFUNDSERConfigGenerator') diff --git a/dataset_zoo/xfund/ja/re.py b/dataset_zoo/xfund/ja/re.py index b3e0666de..e0419d026 100644 --- a/dataset_zoo/xfund/ja/re.py +++ b/dataset_zoo/xfund/ja/re.py @@ -3,4 +3,4 @@ _base_.train_preparer.packer.type = 'REPacker' _base_.test_preparer.packer.type = 'REPacker' -config_generator = dict(type='REConfigGenerator') +config_generator = dict(type='XFUNDREConfigGenerator') diff --git a/dataset_zoo/xfund/ja/ser.py b/dataset_zoo/xfund/ja/ser.py index e536151ea..856b4f96d 100644 --- a/dataset_zoo/xfund/ja/ser.py +++ b/dataset_zoo/xfund/ja/ser.py @@ -57,4 +57,4 @@ ) delete = ['annotations'] + [f'{lang}_{split}' for split in ['train', 'val']] -config_generator = dict(type='SERConfigGenerator') +config_generator = dict(type='XFUNDSERConfigGenerator') diff --git a/dataset_zoo/xfund/pt/re.py b/dataset_zoo/xfund/pt/re.py index b3e0666de..e0419d026 100644 --- a/dataset_zoo/xfund/pt/re.py +++ b/dataset_zoo/xfund/pt/re.py @@ -3,4 +3,4 @@ _base_.train_preparer.packer.type = 'REPacker' _base_.test_preparer.packer.type = 'REPacker' -config_generator = dict(type='REConfigGenerator') +config_generator = dict(type='XFUNDREConfigGenerator') diff --git a/dataset_zoo/xfund/pt/ser.py b/dataset_zoo/xfund/pt/ser.py index 079b39448..ff147ba4c 100644 --- a/dataset_zoo/xfund/pt/ser.py +++ b/dataset_zoo/xfund/pt/ser.py @@ -57,4 +57,4 @@ ) delete = ['annotations'] + [f'{lang}_{split}' for split in ['train', 'val']] -config_generator = dict(type='SERConfigGenerator') +config_generator = dict(type='XFUNDSERConfigGenerator') diff --git a/dataset_zoo/xfund/zh/re.py b/dataset_zoo/xfund/zh/re.py index b3e0666de..e0419d026 100644 --- a/dataset_zoo/xfund/zh/re.py +++ b/dataset_zoo/xfund/zh/re.py @@ -3,4 +3,4 @@ _base_.train_preparer.packer.type = 'REPacker' _base_.test_preparer.packer.type = 'REPacker' -config_generator = dict(type='REConfigGenerator') +config_generator = dict(type='XFUNDREConfigGenerator') diff --git a/dataset_zoo/xfund/zh/ser.py b/dataset_zoo/xfund/zh/ser.py index ec8efb1a3..20a3d1150 100644 --- a/dataset_zoo/xfund/zh/ser.py +++ b/dataset_zoo/xfund/zh/ser.py @@ -57,4 +57,4 @@ ) delete = ['annotations'] + [f'{lang}_{split}' for split in ['train', 'val']] -config_generator = dict(type='SERConfigGenerator') +config_generator = dict(type='XFUNDSERConfigGenerator') diff --git a/mmocr/datasets/preparers/config_generators/__init__.py b/mmocr/datasets/preparers/config_generators/__init__.py index 1a6221256..69e3b5157 100644 --- a/mmocr/datasets/preparers/config_generators/__init__.py +++ b/mmocr/datasets/preparers/config_generators/__init__.py @@ -1,13 +1,13 @@ # Copyright (c) OpenMMLab. All rights reserved. from .base import BaseDatasetConfigGenerator -from .re_config_generator import REConfigGenerator -from .ser_config_generator import SERConfigGenerator from .textdet_config_generator import TextDetConfigGenerator from .textrecog_config_generator import TextRecogConfigGenerator from .textspotting_config_generator import TextSpottingConfigGenerator +from .xfund_config_generator import (XFUNDREConfigGenerator, + XFUNDSERConfigGenerator) __all__ = [ 'BaseDatasetConfigGenerator', 'TextDetConfigGenerator', 'TextRecogConfigGenerator', 'TextSpottingConfigGenerator', - 'SERConfigGenerator', 'REConfigGenerator' + 'XFUNDSERConfigGenerator', 'XFUNDREConfigGenerator' ] diff --git a/mmocr/datasets/preparers/config_generators/re_config_generator.py b/mmocr/datasets/preparers/config_generators/re_config_generator.py deleted file mode 100644 index 3d5d4c5e2..000000000 --- a/mmocr/datasets/preparers/config_generators/re_config_generator.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from typing import Dict, List, Optional - -from mmocr.registry import CFG_GENERATORS -from .base import BaseDatasetConfigGenerator - - -@CFG_GENERATORS.register_module() -class REConfigGenerator(BaseDatasetConfigGenerator): - """Text detection config generator. - - Args: - data_root (str): The root path of the dataset. - dataset_name (str): The name of the dataset. - overwrite_cfg (bool): Whether to overwrite the dataset config file if - it already exists. If False, config generator will not generate new - config for datasets whose configs are already in base. - train_anns (List[Dict], optional): A list of train annotation files - to appear in the base configs. Defaults to - ``[dict(file='re_train.json', dataset_postfix='')]``. - Each element is typically a dict with the following fields: - - ann_file (str): The path to the annotation file relative to - data_root. - - dataset_postfix (str, optional): Affects the postfix of the - resulting variable in the generated config. If specified, the - dataset variable will be named in the form of - ``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to - None. - val_anns (List[Dict], optional): A list of val annotation files - to appear in the base configs, similar to ``train_anns``. Defaults - to []. - test_anns (List[Dict], optional): A list of test annotation files - to appear in the base configs, similar to ``train_anns``. Defaults - to ``[dict(file='re_test.json')]``. - config_path (str): Path to the configs. Defaults to 'configs/'. - """ - - def __init__( - self, - data_root: str, - dataset_name: str, - overwrite_cfg: bool = False, - train_anns: Optional[List[Dict]] = [ - dict(ann_file='re_train.json', dataset_postfix='') - ], - val_anns: Optional[List[Dict]] = [], - test_anns: Optional[List[Dict]] = [ - dict(ann_file='re_test.json', dataset_postfix='') - ], - config_path: str = 'configs/', - ) -> None: - if '/' in dataset_name: - dataset_name = '_'.join(dataset_name.split('/')) - super().__init__( - data_root=data_root, - task='re', - overwrite_cfg=overwrite_cfg, - dataset_name=dataset_name, - train_anns=train_anns, - val_anns=val_anns, - test_anns=test_anns, - config_path=config_path, - ) - - def _gen_dataset_config(self) -> str: - """Generate a full dataset config based on the annotation file - dictionary. - - Args: - ann_dict (dict[str, dict(str, str)]): A nested dictionary that maps - a config variable name (such as icdar2015_textrecog_train) to - its corresponding annotation information dict. Each dict - contains following keys: - - ann_file (str): The path to the annotation file relative to - data_root. - - dataset_postfix (str, optional): Affects the postfix of the - resulting variable in the generated config. If specified, the - dataset variable will be named in the form of - ``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults - to None. - - split (str): The split the annotation belongs to. Usually - it can be 'train', 'val' and 'test'. - - Returns: - str: The generated dataset config. - """ - cfg = '' - for key_name, ann_dict in self.anns.items(): - cfg += f'\n{key_name} = dict(\n' - cfg += ' type=\'REDataset\',\n' - cfg += ' data_root=' + f'{self.dataset_name}_{self.task}_data_root,\n' # noqa: E501 - cfg += f' ann_file=\'{ann_dict["ann_file"]}\',\n' - if ann_dict['split'] in ['test', 'val']: - cfg += ' test_mode=True,\n' - cfg += ' pipeline=None)\n' - return cfg diff --git a/mmocr/datasets/preparers/config_generators/ser_config_generator.py b/mmocr/datasets/preparers/config_generators/ser_config_generator.py deleted file mode 100644 index c3cb7f53f..000000000 --- a/mmocr/datasets/preparers/config_generators/ser_config_generator.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from typing import Dict, List, Optional - -from mmocr.registry import CFG_GENERATORS -from .base import BaseDatasetConfigGenerator - - -@CFG_GENERATORS.register_module() -class SERConfigGenerator(BaseDatasetConfigGenerator): - """Text detection config generator. - - Args: - data_root (str): The root path of the dataset. - dataset_name (str): The name of the dataset. - overwrite_cfg (bool): Whether to overwrite the dataset config file if - it already exists. If False, config generator will not generate new - config for datasets whose configs are already in base. - train_anns (List[Dict], optional): A list of train annotation files - to appear in the base configs. Defaults to - ``[dict(file='ser_train.json', dataset_postfix='')]``. - Each element is typically a dict with the following fields: - - ann_file (str): The path to the annotation file relative to - data_root. - - dataset_postfix (str, optional): Affects the postfix of the - resulting variable in the generated config. If specified, the - dataset variable will be named in the form of - ``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to - None. - val_anns (List[Dict], optional): A list of val annotation files - to appear in the base configs, similar to ``train_anns``. Defaults - to []. - test_anns (List[Dict], optional): A list of test annotation files - to appear in the base configs, similar to ``train_anns``. Defaults - to ``[dict(file='ser_test.json')]``. - config_path (str): Path to the configs. Defaults to 'configs/'. - """ - - def __init__( - self, - data_root: str, - dataset_name: str, - overwrite_cfg: bool = False, - train_anns: Optional[List[Dict]] = [ - dict(ann_file='ser_train.json', dataset_postfix='') - ], - val_anns: Optional[List[Dict]] = [], - test_anns: Optional[List[Dict]] = [ - dict(ann_file='ser_test.json', dataset_postfix='') - ], - config_path: str = 'configs/', - ) -> None: - if '/' in dataset_name: - dataset_name = '_'.join(dataset_name.split('/')) - super().__init__( - data_root=data_root, - task='ser', - overwrite_cfg=overwrite_cfg, - dataset_name=dataset_name, - train_anns=train_anns, - val_anns=val_anns, - test_anns=test_anns, - config_path=config_path, - ) - - def _gen_dataset_config(self) -> str: - """Generate a full dataset config based on the annotation file - dictionary. - - Args: - ann_dict (dict[str, dict(str, str)]): A nested dictionary that maps - a config variable name (such as icdar2015_textrecog_train) to - its corresponding annotation information dict. Each dict - contains following keys: - - ann_file (str): The path to the annotation file relative to - data_root. - - dataset_postfix (str, optional): Affects the postfix of the - resulting variable in the generated config. If specified, the - dataset variable will be named in the form of - ``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults - to None. - - split (str): The split the annotation belongs to. Usually - it can be 'train', 'val' and 'test'. - - Returns: - str: The generated dataset config. - """ - cfg = '' - for key_name, ann_dict in self.anns.items(): - cfg += f'\n{key_name} = dict(\n' - cfg += ' type=\'SERDataset\',\n' - cfg += ' data_root=' + f'{self.dataset_name}_{self.task}_data_root,\n' # noqa: E501 - cfg += f' ann_file=\'{ann_dict["ann_file"]}\',\n' - if ann_dict['split'] in ['test', 'val']: - cfg += ' test_mode=True,\n' - cfg += ' pipeline=None)\n' - return cfg diff --git a/mmocr/datasets/preparers/config_generators/xfund_config_generator.py b/mmocr/datasets/preparers/config_generators/xfund_config_generator.py new file mode 100644 index 000000000..ca80375bc --- /dev/null +++ b/mmocr/datasets/preparers/config_generators/xfund_config_generator.py @@ -0,0 +1,187 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Dict, List, Optional + +from mmocr.registry import CFG_GENERATORS +from .base import BaseDatasetConfigGenerator + + +@CFG_GENERATORS.register_module() +class XFUNDSERConfigGenerator(BaseDatasetConfigGenerator): + """XFUND dataset Semantic Entity Recognition task config generator. + + Args: + data_root (str): The root path of the dataset. + dataset_name (str): The name of the dataset. + overwrite_cfg (bool): Whether to overwrite the dataset config file if + it already exists. If False, config generator will not generate new + config for datasets whose configs are already in base. + train_anns (List[Dict], optional): A list of train annotation files + to appear in the base configs. Defaults to + ``[dict(file='ser_train.json', dataset_postfix='')]``. + Each element is typically a dict with the following fields: + - ann_file (str): The path to the annotation file relative to + data_root. + - dataset_postfix (str, optional): Affects the postfix of the + resulting variable in the generated config. If specified, the + dataset variable will be named in the form of + ``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to + None. + val_anns (List[Dict], optional): A list of val annotation files + to appear in the base configs, similar to ``train_anns``. Defaults + to []. + test_anns (List[Dict], optional): A list of test annotation files + to appear in the base configs, similar to ``train_anns``. Defaults + to ``[dict(file='ser_test.json')]``. + config_path (str): Path to the configs. Defaults to 'configs/'. + """ + + def __init__( + self, + data_root: str, + dataset_name: str, + overwrite_cfg: bool = False, + train_anns: Optional[List[Dict]] = [ + dict(ann_file='ser_train.json', dataset_postfix='') + ], + val_anns: Optional[List[Dict]] = [], + test_anns: Optional[List[Dict]] = [ + dict(ann_file='ser_test.json', dataset_postfix='') + ], + config_path: str = 'configs/', + ) -> None: + if '/' in dataset_name: + dataset_name = '_'.join(dataset_name.split('/')) + super().__init__( + data_root=data_root, + task='ser', + overwrite_cfg=overwrite_cfg, + dataset_name=dataset_name, + train_anns=train_anns, + val_anns=val_anns, + test_anns=test_anns, + config_path=config_path, + ) + + def _gen_dataset_config(self) -> str: + """Generate a full dataset config based on the annotation file + dictionary. + + Args: + ann_dict (dict[str, dict(str, str)]): A nested dictionary that maps + a config variable name (such as icdar2015_textrecog_train) to + its corresponding annotation information dict. Each dict + contains following keys: + - ann_file (str): The path to the annotation file relative to + data_root. + - dataset_postfix (str, optional): Affects the postfix of the + resulting variable in the generated config. If specified, the + dataset variable will be named in the form of + ``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults + to None. + - split (str): The split the annotation belongs to. Usually + it can be 'train', 'val' and 'test'. + + Returns: + str: The generated dataset config. + """ + cfg = '' + for key_name, ann_dict in self.anns.items(): + cfg += f'\n{key_name} = dict(\n' + cfg += ' type=\'XFUNDSERDataset\',\n' + cfg += ' data_root=' + f'{self.dataset_name}_{self.task}_data_root,\n' # noqa: E501 + cfg += f' ann_file=\'{ann_dict["ann_file"]}\',\n' + if ann_dict['split'] in ['test', 'val']: + cfg += ' test_mode=True,\n' + cfg += ' pipeline=None)\n' + return cfg + + +@CFG_GENERATORS.register_module() +class XFUNDREConfigGenerator(BaseDatasetConfigGenerator): + """XFUND dataset Relation Extraction task config generator. + + Args: + data_root (str): The root path of the dataset. + dataset_name (str): The name of the dataset. + overwrite_cfg (bool): Whether to overwrite the dataset config file if + it already exists. If False, config generator will not generate new + config for datasets whose configs are already in base. + train_anns (List[Dict], optional): A list of train annotation files + to appear in the base configs. Defaults to + ``[dict(file='re_train.json', dataset_postfix='')]``. + Each element is typically a dict with the following fields: + - ann_file (str): The path to the annotation file relative to + data_root. + - dataset_postfix (str, optional): Affects the postfix of the + resulting variable in the generated config. If specified, the + dataset variable will be named in the form of + ``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to + None. + val_anns (List[Dict], optional): A list of val annotation files + to appear in the base configs, similar to ``train_anns``. Defaults + to []. + test_anns (List[Dict], optional): A list of test annotation files + to appear in the base configs, similar to ``train_anns``. Defaults + to ``[dict(file='re_test.json')]``. + config_path (str): Path to the configs. Defaults to 'configs/'. + """ + + def __init__( + self, + data_root: str, + dataset_name: str, + overwrite_cfg: bool = False, + train_anns: Optional[List[Dict]] = [ + dict(ann_file='re_train.json', dataset_postfix='') + ], + val_anns: Optional[List[Dict]] = [], + test_anns: Optional[List[Dict]] = [ + dict(ann_file='re_test.json', dataset_postfix='') + ], + config_path: str = 'configs/', + ) -> None: + if '/' in dataset_name: + dataset_name = '_'.join(dataset_name.split('/')) + super().__init__( + data_root=data_root, + task='re', + overwrite_cfg=overwrite_cfg, + dataset_name=dataset_name, + train_anns=train_anns, + val_anns=val_anns, + test_anns=test_anns, + config_path=config_path, + ) + + def _gen_dataset_config(self) -> str: + """Generate a full dataset config based on the annotation file + dictionary. + + Args: + ann_dict (dict[str, dict(str, str)]): A nested dictionary that maps + a config variable name (such as icdar2015_textrecog_train) to + its corresponding annotation information dict. Each dict + contains following keys: + - ann_file (str): The path to the annotation file relative to + data_root. + - dataset_postfix (str, optional): Affects the postfix of the + resulting variable in the generated config. If specified, the + dataset variable will be named in the form of + ``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults + to None. + - split (str): The split the annotation belongs to. Usually + it can be 'train', 'val' and 'test'. + + Returns: + str: The generated dataset config. + """ + cfg = '' + for key_name, ann_dict in self.anns.items(): + cfg += f'\n{key_name} = dict(\n' + cfg += ' type=\'XFUNDREDataset\',\n' + cfg += ' data_root=' + f'{self.dataset_name}_{self.task}_data_root,\n' # noqa: E501 + cfg += f' ann_file=\'{ann_dict["ann_file"]}\',\n' + if ann_dict['split'] in ['test', 'val']: + cfg += ' test_mode=True,\n' + cfg += ' pipeline=None)\n' + return cfg