From 443e979ed303fa9f965f780c9f518da74734c909 Mon Sep 17 00:00:00 2001
From: Kevin Wang <wangnu_043@126.com>
Date: Thu, 30 Mar 2023 13:30:36 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96xfund=E6=95=B0=E6=8D=AE?=
 =?UTF-8?q?=E9=9B=86=E7=9A=84config=5Fgenerator=E5=91=BD=E5=90=8D=EF=BC=8C?=
 =?UTF-8?q?=E4=BD=BFconfig=5Fgenerator=E7=9B=AE=E5=BD=95=E7=BB=93=E6=9E=84?=
 =?UTF-8?q?=E6=9B=B4=E6=B8=85=E6=99=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 configs/re/_base_/datasets/xfund_zh.py        |   4 +-
 configs/ser/_base_/datasets/xfund_zh.py       |   4 +-
 dataset_zoo/xfund/de/re.py                    |   2 +-
 dataset_zoo/xfund/de/ser.py                   |   2 +-
 dataset_zoo/xfund/es/re.py                    |   2 +-
 dataset_zoo/xfund/es/ser.py                   |   2 +-
 dataset_zoo/xfund/fr/re.py                    |   2 +-
 dataset_zoo/xfund/fr/ser.py                   |   2 +-
 dataset_zoo/xfund/it/re.py                    |   2 +-
 dataset_zoo/xfund/it/ser.py                   |   2 +-
 dataset_zoo/xfund/ja/re.py                    |   2 +-
 dataset_zoo/xfund/ja/ser.py                   |   2 +-
 dataset_zoo/xfund/pt/re.py                    |   2 +-
 dataset_zoo/xfund/pt/ser.py                   |   2 +-
 dataset_zoo/xfund/zh/re.py                    |   2 +-
 dataset_zoo/xfund/zh/ser.py                   |   2 +-
 .../preparers/config_generators/__init__.py   |   6 +-
 .../config_generators/re_config_generator.py  |  96 ---------
 .../config_generators/ser_config_generator.py |  96 ---------
 .../xfund_config_generator.py                 | 187 ++++++++++++++++++
 20 files changed, 208 insertions(+), 213 deletions(-)
 delete mode 100644 mmocr/datasets/preparers/config_generators/re_config_generator.py
 delete mode 100644 mmocr/datasets/preparers/config_generators/ser_config_generator.py
 create mode 100644 mmocr/datasets/preparers/config_generators/xfund_config_generator.py

diff --git a/configs/re/_base_/datasets/xfund_zh.py b/configs/re/_base_/datasets/xfund_zh.py
index 5ea9c9d33..4a44301dd 100644
--- a/configs/re/_base_/datasets/xfund_zh.py
+++ b/configs/re/_base_/datasets/xfund_zh.py
@@ -1,13 +1,13 @@
 xfund_zh_re_data_root = 'data/xfund/zh'
 
 xfund_zh_re_train = dict(
-    type='REDataset',
+    type='XFUNDREDataset',
     data_root=xfund_zh_re_data_root,
     ann_file='re_train.json',
     pipeline=None)
 
 xfund_zh_re_test = dict(
-    type='REDataset',
+    type='XFUNDREDataset',
     data_root=xfund_zh_re_data_root,
     ann_file='re_test.json',
     test_mode=True,
diff --git a/configs/ser/_base_/datasets/xfund_zh.py b/configs/ser/_base_/datasets/xfund_zh.py
index 4ee522efd..40bbce4de 100644
--- a/configs/ser/_base_/datasets/xfund_zh.py
+++ b/configs/ser/_base_/datasets/xfund_zh.py
@@ -1,13 +1,13 @@
 xfund_zh_ser_data_root = 'data/xfund/zh'
 
 xfund_zh_ser_train = dict(
-    type='SERDataset',
+    type='XFUNDSERDataset',
     data_root=xfund_zh_ser_data_root,
     ann_file='ser_train.json',
     pipeline=None)
 
 xfund_zh_ser_test = dict(
-    type='SERDataset',
+    type='XFUNDSERDataset',
     data_root=xfund_zh_ser_data_root,
     ann_file='ser_test.json',
     test_mode=True,
diff --git a/dataset_zoo/xfund/de/re.py b/dataset_zoo/xfund/de/re.py
index b3e0666de..e0419d026 100644
--- a/dataset_zoo/xfund/de/re.py
+++ b/dataset_zoo/xfund/de/re.py
@@ -3,4 +3,4 @@
 _base_.train_preparer.packer.type = 'REPacker'
 _base_.test_preparer.packer.type = 'REPacker'
 
-config_generator = dict(type='REConfigGenerator')
+config_generator = dict(type='XFUNDREConfigGenerator')
diff --git a/dataset_zoo/xfund/de/ser.py b/dataset_zoo/xfund/de/ser.py
index 60c6963c0..5e9769eb0 100644
--- a/dataset_zoo/xfund/de/ser.py
+++ b/dataset_zoo/xfund/de/ser.py
@@ -57,4 +57,4 @@
 )
 
 delete = ['annotations'] + [f'{lang}_{split}' for split in ['train', 'val']]
-config_generator = dict(type='SERConfigGenerator')
+config_generator = dict(type='XFUNDSERConfigGenerator')
diff --git a/dataset_zoo/xfund/es/re.py b/dataset_zoo/xfund/es/re.py
index b3e0666de..e0419d026 100644
--- a/dataset_zoo/xfund/es/re.py
+++ b/dataset_zoo/xfund/es/re.py
@@ -3,4 +3,4 @@
 _base_.train_preparer.packer.type = 'REPacker'
 _base_.test_preparer.packer.type = 'REPacker'
 
-config_generator = dict(type='REConfigGenerator')
+config_generator = dict(type='XFUNDREConfigGenerator')
diff --git a/dataset_zoo/xfund/es/ser.py b/dataset_zoo/xfund/es/ser.py
index 2cc4dbcc6..da8900980 100644
--- a/dataset_zoo/xfund/es/ser.py
+++ b/dataset_zoo/xfund/es/ser.py
@@ -57,4 +57,4 @@
 )
 
 delete = ['annotations'] + [f'{lang}_{split}' for split in ['train', 'val']]
-config_generator = dict(type='SERConfigGenerator')
+config_generator = dict(type='XFUNDSERConfigGenerator')
diff --git a/dataset_zoo/xfund/fr/re.py b/dataset_zoo/xfund/fr/re.py
index b3e0666de..e0419d026 100644
--- a/dataset_zoo/xfund/fr/re.py
+++ b/dataset_zoo/xfund/fr/re.py
@@ -3,4 +3,4 @@
 _base_.train_preparer.packer.type = 'REPacker'
 _base_.test_preparer.packer.type = 'REPacker'
 
-config_generator = dict(type='REConfigGenerator')
+config_generator = dict(type='XFUNDREConfigGenerator')
diff --git a/dataset_zoo/xfund/fr/ser.py b/dataset_zoo/xfund/fr/ser.py
index ff9f5ea1f..aad6b7cf3 100644
--- a/dataset_zoo/xfund/fr/ser.py
+++ b/dataset_zoo/xfund/fr/ser.py
@@ -57,4 +57,4 @@
 )
 
 delete = ['annotations'] + [f'{lang}_{split}' for split in ['train', 'val']]
-config_generator = dict(type='SERConfigGenerator')
+config_generator = dict(type='XFUNDSERConfigGenerator')
diff --git a/dataset_zoo/xfund/it/re.py b/dataset_zoo/xfund/it/re.py
index b3e0666de..e0419d026 100644
--- a/dataset_zoo/xfund/it/re.py
+++ b/dataset_zoo/xfund/it/re.py
@@ -3,4 +3,4 @@
 _base_.train_preparer.packer.type = 'REPacker'
 _base_.test_preparer.packer.type = 'REPacker'
 
-config_generator = dict(type='REConfigGenerator')
+config_generator = dict(type='XFUNDREConfigGenerator')
diff --git a/dataset_zoo/xfund/it/ser.py b/dataset_zoo/xfund/it/ser.py
index 92c298ff1..fc9fc8b70 100644
--- a/dataset_zoo/xfund/it/ser.py
+++ b/dataset_zoo/xfund/it/ser.py
@@ -57,4 +57,4 @@
 )
 
 delete = ['annotations'] + [f'{lang}_{split}' for split in ['train', 'val']]
-config_generator = dict(type='SERConfigGenerator')
+config_generator = dict(type='XFUNDSERConfigGenerator')
diff --git a/dataset_zoo/xfund/ja/re.py b/dataset_zoo/xfund/ja/re.py
index b3e0666de..e0419d026 100644
--- a/dataset_zoo/xfund/ja/re.py
+++ b/dataset_zoo/xfund/ja/re.py
@@ -3,4 +3,4 @@
 _base_.train_preparer.packer.type = 'REPacker'
 _base_.test_preparer.packer.type = 'REPacker'
 
-config_generator = dict(type='REConfigGenerator')
+config_generator = dict(type='XFUNDREConfigGenerator')
diff --git a/dataset_zoo/xfund/ja/ser.py b/dataset_zoo/xfund/ja/ser.py
index e536151ea..856b4f96d 100644
--- a/dataset_zoo/xfund/ja/ser.py
+++ b/dataset_zoo/xfund/ja/ser.py
@@ -57,4 +57,4 @@
 )
 
 delete = ['annotations'] + [f'{lang}_{split}' for split in ['train', 'val']]
-config_generator = dict(type='SERConfigGenerator')
+config_generator = dict(type='XFUNDSERConfigGenerator')
diff --git a/dataset_zoo/xfund/pt/re.py b/dataset_zoo/xfund/pt/re.py
index b3e0666de..e0419d026 100644
--- a/dataset_zoo/xfund/pt/re.py
+++ b/dataset_zoo/xfund/pt/re.py
@@ -3,4 +3,4 @@
 _base_.train_preparer.packer.type = 'REPacker'
 _base_.test_preparer.packer.type = 'REPacker'
 
-config_generator = dict(type='REConfigGenerator')
+config_generator = dict(type='XFUNDREConfigGenerator')
diff --git a/dataset_zoo/xfund/pt/ser.py b/dataset_zoo/xfund/pt/ser.py
index 079b39448..ff147ba4c 100644
--- a/dataset_zoo/xfund/pt/ser.py
+++ b/dataset_zoo/xfund/pt/ser.py
@@ -57,4 +57,4 @@
 )
 
 delete = ['annotations'] + [f'{lang}_{split}' for split in ['train', 'val']]
-config_generator = dict(type='SERConfigGenerator')
+config_generator = dict(type='XFUNDSERConfigGenerator')
diff --git a/dataset_zoo/xfund/zh/re.py b/dataset_zoo/xfund/zh/re.py
index b3e0666de..e0419d026 100644
--- a/dataset_zoo/xfund/zh/re.py
+++ b/dataset_zoo/xfund/zh/re.py
@@ -3,4 +3,4 @@
 _base_.train_preparer.packer.type = 'REPacker'
 _base_.test_preparer.packer.type = 'REPacker'
 
-config_generator = dict(type='REConfigGenerator')
+config_generator = dict(type='XFUNDREConfigGenerator')
diff --git a/dataset_zoo/xfund/zh/ser.py b/dataset_zoo/xfund/zh/ser.py
index ec8efb1a3..20a3d1150 100644
--- a/dataset_zoo/xfund/zh/ser.py
+++ b/dataset_zoo/xfund/zh/ser.py
@@ -57,4 +57,4 @@
 )
 
 delete = ['annotations'] + [f'{lang}_{split}' for split in ['train', 'val']]
-config_generator = dict(type='SERConfigGenerator')
+config_generator = dict(type='XFUNDSERConfigGenerator')
diff --git a/mmocr/datasets/preparers/config_generators/__init__.py b/mmocr/datasets/preparers/config_generators/__init__.py
index 1a6221256..69e3b5157 100644
--- a/mmocr/datasets/preparers/config_generators/__init__.py
+++ b/mmocr/datasets/preparers/config_generators/__init__.py
@@ -1,13 +1,13 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .base import BaseDatasetConfigGenerator
-from .re_config_generator import REConfigGenerator
-from .ser_config_generator import SERConfigGenerator
 from .textdet_config_generator import TextDetConfigGenerator
 from .textrecog_config_generator import TextRecogConfigGenerator
 from .textspotting_config_generator import TextSpottingConfigGenerator
+from .xfund_config_generator import (XFUNDREConfigGenerator,
+                                     XFUNDSERConfigGenerator)
 
 __all__ = [
     'BaseDatasetConfigGenerator', 'TextDetConfigGenerator',
     'TextRecogConfigGenerator', 'TextSpottingConfigGenerator',
-    'SERConfigGenerator', 'REConfigGenerator'
+    'XFUNDSERConfigGenerator', 'XFUNDREConfigGenerator'
 ]
diff --git a/mmocr/datasets/preparers/config_generators/re_config_generator.py b/mmocr/datasets/preparers/config_generators/re_config_generator.py
deleted file mode 100644
index 3d5d4c5e2..000000000
--- a/mmocr/datasets/preparers/config_generators/re_config_generator.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import Dict, List, Optional
-
-from mmocr.registry import CFG_GENERATORS
-from .base import BaseDatasetConfigGenerator
-
-
-@CFG_GENERATORS.register_module()
-class REConfigGenerator(BaseDatasetConfigGenerator):
-    """Text detection config generator.
-
-    Args:
-        data_root (str): The root path of the dataset.
-        dataset_name (str): The name of the dataset.
-        overwrite_cfg (bool): Whether to overwrite the dataset config file if
-            it already exists. If False, config generator will not generate new
-            config for datasets whose configs are already in base.
-        train_anns (List[Dict], optional): A list of train annotation files
-            to appear in the base configs. Defaults to
-            ``[dict(file='re_train.json', dataset_postfix='')]``.
-            Each element is typically a dict with the following fields:
-            - ann_file (str): The path to the annotation file relative to
-              data_root.
-            - dataset_postfix (str, optional): Affects the postfix of the
-              resulting variable in the generated config. If specified, the
-              dataset variable will be named in the form of
-              ``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to
-              None.
-        val_anns (List[Dict], optional): A list of val annotation files
-            to appear in the base configs, similar to ``train_anns``. Defaults
-            to [].
-        test_anns (List[Dict], optional): A list of test annotation files
-            to appear in the base configs, similar to ``train_anns``. Defaults
-            to ``[dict(file='re_test.json')]``.
-        config_path (str): Path to the configs. Defaults to 'configs/'.
-    """
-
-    def __init__(
-        self,
-        data_root: str,
-        dataset_name: str,
-        overwrite_cfg: bool = False,
-        train_anns: Optional[List[Dict]] = [
-            dict(ann_file='re_train.json', dataset_postfix='')
-        ],
-        val_anns: Optional[List[Dict]] = [],
-        test_anns: Optional[List[Dict]] = [
-            dict(ann_file='re_test.json', dataset_postfix='')
-        ],
-        config_path: str = 'configs/',
-    ) -> None:
-        if '/' in dataset_name:
-            dataset_name = '_'.join(dataset_name.split('/'))
-        super().__init__(
-            data_root=data_root,
-            task='re',
-            overwrite_cfg=overwrite_cfg,
-            dataset_name=dataset_name,
-            train_anns=train_anns,
-            val_anns=val_anns,
-            test_anns=test_anns,
-            config_path=config_path,
-        )
-
-    def _gen_dataset_config(self) -> str:
-        """Generate a full dataset config based on the annotation file
-        dictionary.
-
-        Args:
-            ann_dict (dict[str, dict(str, str)]): A nested dictionary that maps
-                a config variable name (such as icdar2015_textrecog_train) to
-                its corresponding annotation information dict. Each dict
-                contains following keys:
-                - ann_file (str): The path to the annotation file relative to
-                  data_root.
-                - dataset_postfix (str, optional): Affects the postfix of the
-                  resulting variable in the generated config. If specified, the
-                  dataset variable will be named in the form of
-                  ``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults
-                  to None.
-                - split (str): The split the annotation belongs to. Usually
-                  it can be 'train', 'val' and 'test'.
-
-        Returns:
-            str: The generated dataset config.
-        """
-        cfg = ''
-        for key_name, ann_dict in self.anns.items():
-            cfg += f'\n{key_name} = dict(\n'
-            cfg += '    type=\'REDataset\',\n'
-            cfg += '    data_root=' + f'{self.dataset_name}_{self.task}_data_root,\n'  # noqa: E501
-            cfg += f'    ann_file=\'{ann_dict["ann_file"]}\',\n'
-            if ann_dict['split'] in ['test', 'val']:
-                cfg += '    test_mode=True,\n'
-            cfg += '    pipeline=None)\n'
-        return cfg
diff --git a/mmocr/datasets/preparers/config_generators/ser_config_generator.py b/mmocr/datasets/preparers/config_generators/ser_config_generator.py
deleted file mode 100644
index c3cb7f53f..000000000
--- a/mmocr/datasets/preparers/config_generators/ser_config_generator.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import Dict, List, Optional
-
-from mmocr.registry import CFG_GENERATORS
-from .base import BaseDatasetConfigGenerator
-
-
-@CFG_GENERATORS.register_module()
-class SERConfigGenerator(BaseDatasetConfigGenerator):
-    """Text detection config generator.
-
-    Args:
-        data_root (str): The root path of the dataset.
-        dataset_name (str): The name of the dataset.
-        overwrite_cfg (bool): Whether to overwrite the dataset config file if
-            it already exists. If False, config generator will not generate new
-            config for datasets whose configs are already in base.
-        train_anns (List[Dict], optional): A list of train annotation files
-            to appear in the base configs. Defaults to
-            ``[dict(file='ser_train.json', dataset_postfix='')]``.
-            Each element is typically a dict with the following fields:
-            - ann_file (str): The path to the annotation file relative to
-              data_root.
-            - dataset_postfix (str, optional): Affects the postfix of the
-              resulting variable in the generated config. If specified, the
-              dataset variable will be named in the form of
-              ``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to
-              None.
-        val_anns (List[Dict], optional): A list of val annotation files
-            to appear in the base configs, similar to ``train_anns``. Defaults
-            to [].
-        test_anns (List[Dict], optional): A list of test annotation files
-            to appear in the base configs, similar to ``train_anns``. Defaults
-            to ``[dict(file='ser_test.json')]``.
-        config_path (str): Path to the configs. Defaults to 'configs/'.
-    """
-
-    def __init__(
-        self,
-        data_root: str,
-        dataset_name: str,
-        overwrite_cfg: bool = False,
-        train_anns: Optional[List[Dict]] = [
-            dict(ann_file='ser_train.json', dataset_postfix='')
-        ],
-        val_anns: Optional[List[Dict]] = [],
-        test_anns: Optional[List[Dict]] = [
-            dict(ann_file='ser_test.json', dataset_postfix='')
-        ],
-        config_path: str = 'configs/',
-    ) -> None:
-        if '/' in dataset_name:
-            dataset_name = '_'.join(dataset_name.split('/'))
-        super().__init__(
-            data_root=data_root,
-            task='ser',
-            overwrite_cfg=overwrite_cfg,
-            dataset_name=dataset_name,
-            train_anns=train_anns,
-            val_anns=val_anns,
-            test_anns=test_anns,
-            config_path=config_path,
-        )
-
-    def _gen_dataset_config(self) -> str:
-        """Generate a full dataset config based on the annotation file
-        dictionary.
-
-        Args:
-            ann_dict (dict[str, dict(str, str)]): A nested dictionary that maps
-                a config variable name (such as icdar2015_textrecog_train) to
-                its corresponding annotation information dict. Each dict
-                contains following keys:
-                - ann_file (str): The path to the annotation file relative to
-                  data_root.
-                - dataset_postfix (str, optional): Affects the postfix of the
-                  resulting variable in the generated config. If specified, the
-                  dataset variable will be named in the form of
-                  ``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults
-                  to None.
-                - split (str): The split the annotation belongs to. Usually
-                  it can be 'train', 'val' and 'test'.
-
-        Returns:
-            str: The generated dataset config.
-        """
-        cfg = ''
-        for key_name, ann_dict in self.anns.items():
-            cfg += f'\n{key_name} = dict(\n'
-            cfg += '    type=\'SERDataset\',\n'
-            cfg += '    data_root=' + f'{self.dataset_name}_{self.task}_data_root,\n'  # noqa: E501
-            cfg += f'    ann_file=\'{ann_dict["ann_file"]}\',\n'
-            if ann_dict['split'] in ['test', 'val']:
-                cfg += '    test_mode=True,\n'
-            cfg += '    pipeline=None)\n'
-        return cfg
diff --git a/mmocr/datasets/preparers/config_generators/xfund_config_generator.py b/mmocr/datasets/preparers/config_generators/xfund_config_generator.py
new file mode 100644
index 000000000..ca80375bc
--- /dev/null
+++ b/mmocr/datasets/preparers/config_generators/xfund_config_generator.py
@@ -0,0 +1,187 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Dict, List, Optional
+
+from mmocr.registry import CFG_GENERATORS
+from .base import BaseDatasetConfigGenerator
+
+
+@CFG_GENERATORS.register_module()
+class XFUNDSERConfigGenerator(BaseDatasetConfigGenerator):
+    """XFUND dataset Semantic Entity Recognition task config generator.
+
+    Args:
+        data_root (str): The root path of the dataset.
+        dataset_name (str): The name of the dataset.
+        overwrite_cfg (bool): Whether to overwrite the dataset config file if
+            it already exists. If False, config generator will not generate new
+            config for datasets whose configs are already in base.
+        train_anns (List[Dict], optional): A list of train annotation files
+            to appear in the base configs. Defaults to
+            ``[dict(file='ser_train.json', dataset_postfix='')]``.
+            Each element is typically a dict with the following fields:
+            - ann_file (str): The path to the annotation file relative to
+              data_root.
+            - dataset_postfix (str, optional): Affects the postfix of the
+              resulting variable in the generated config. If specified, the
+              dataset variable will be named in the form of
+              ``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to
+              None.
+        val_anns (List[Dict], optional): A list of val annotation files
+            to appear in the base configs, similar to ``train_anns``. Defaults
+            to [].
+        test_anns (List[Dict], optional): A list of test annotation files
+            to appear in the base configs, similar to ``train_anns``. Defaults
+            to ``[dict(file='ser_test.json')]``.
+        config_path (str): Path to the configs. Defaults to 'configs/'.
+    """
+
+    def __init__(
+        self,
+        data_root: str,
+        dataset_name: str,
+        overwrite_cfg: bool = False,
+        train_anns: Optional[List[Dict]] = [
+            dict(ann_file='ser_train.json', dataset_postfix='')
+        ],
+        val_anns: Optional[List[Dict]] = [],
+        test_anns: Optional[List[Dict]] = [
+            dict(ann_file='ser_test.json', dataset_postfix='')
+        ],
+        config_path: str = 'configs/',
+    ) -> None:
+        if '/' in dataset_name:
+            dataset_name = '_'.join(dataset_name.split('/'))
+        super().__init__(
+            data_root=data_root,
+            task='ser',
+            overwrite_cfg=overwrite_cfg,
+            dataset_name=dataset_name,
+            train_anns=train_anns,
+            val_anns=val_anns,
+            test_anns=test_anns,
+            config_path=config_path,
+        )
+
+    def _gen_dataset_config(self) -> str:
+        """Generate a full dataset config based on the annotation file
+        dictionary.
+
+        Args:
+            ann_dict (dict[str, dict(str, str)]): A nested dictionary that maps
+                a config variable name (such as icdar2015_textrecog_train) to
+                its corresponding annotation information dict. Each dict
+                contains following keys:
+                - ann_file (str): The path to the annotation file relative to
+                  data_root.
+                - dataset_postfix (str, optional): Affects the postfix of the
+                  resulting variable in the generated config. If specified, the
+                  dataset variable will be named in the form of
+                  ``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults
+                  to None.
+                - split (str): The split the annotation belongs to. Usually
+                  it can be 'train', 'val' and 'test'.
+
+        Returns:
+            str: The generated dataset config.
+        """
+        cfg = ''
+        for key_name, ann_dict in self.anns.items():
+            cfg += f'\n{key_name} = dict(\n'
+            cfg += '    type=\'XFUNDSERDataset\',\n'
+            cfg += '    data_root=' + f'{self.dataset_name}_{self.task}_data_root,\n'  # noqa: E501
+            cfg += f'    ann_file=\'{ann_dict["ann_file"]}\',\n'
+            if ann_dict['split'] in ['test', 'val']:
+                cfg += '    test_mode=True,\n'
+            cfg += '    pipeline=None)\n'
+        return cfg
+
+
+@CFG_GENERATORS.register_module()
+class XFUNDREConfigGenerator(BaseDatasetConfigGenerator):
+    """XFUND dataset Relation Extraction task config generator.
+
+    Args:
+        data_root (str): The root path of the dataset.
+        dataset_name (str): The name of the dataset.
+        overwrite_cfg (bool): Whether to overwrite the dataset config file if
+            it already exists. If False, config generator will not generate new
+            config for datasets whose configs are already in base.
+        train_anns (List[Dict], optional): A list of train annotation files
+            to appear in the base configs. Defaults to
+            ``[dict(file='re_train.json', dataset_postfix='')]``.
+            Each element is typically a dict with the following fields:
+            - ann_file (str): The path to the annotation file relative to
+              data_root.
+            - dataset_postfix (str, optional): Affects the postfix of the
+              resulting variable in the generated config. If specified, the
+              dataset variable will be named in the form of
+              ``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults to
+              None.
+        val_anns (List[Dict], optional): A list of val annotation files
+            to appear in the base configs, similar to ``train_anns``. Defaults
+            to [].
+        test_anns (List[Dict], optional): A list of test annotation files
+            to appear in the base configs, similar to ``train_anns``. Defaults
+            to ``[dict(file='re_test.json')]``.
+        config_path (str): Path to the configs. Defaults to 'configs/'.
+    """
+
+    def __init__(
+        self,
+        data_root: str,
+        dataset_name: str,
+        overwrite_cfg: bool = False,
+        train_anns: Optional[List[Dict]] = [
+            dict(ann_file='re_train.json', dataset_postfix='')
+        ],
+        val_anns: Optional[List[Dict]] = [],
+        test_anns: Optional[List[Dict]] = [
+            dict(ann_file='re_test.json', dataset_postfix='')
+        ],
+        config_path: str = 'configs/',
+    ) -> None:
+        if '/' in dataset_name:
+            dataset_name = '_'.join(dataset_name.split('/'))
+        super().__init__(
+            data_root=data_root,
+            task='re',
+            overwrite_cfg=overwrite_cfg,
+            dataset_name=dataset_name,
+            train_anns=train_anns,
+            val_anns=val_anns,
+            test_anns=test_anns,
+            config_path=config_path,
+        )
+
+    def _gen_dataset_config(self) -> str:
+        """Generate a full dataset config based on the annotation file
+        dictionary.
+
+        Args:
+            ann_dict (dict[str, dict(str, str)]): A nested dictionary that maps
+                a config variable name (such as icdar2015_textrecog_train) to
+                its corresponding annotation information dict. Each dict
+                contains following keys:
+                - ann_file (str): The path to the annotation file relative to
+                  data_root.
+                - dataset_postfix (str, optional): Affects the postfix of the
+                  resulting variable in the generated config. If specified, the
+                  dataset variable will be named in the form of
+                  ``{dataset_name}_{dataset_postfix}_{task}_{split}``. Defaults
+                  to None.
+                - split (str): The split the annotation belongs to. Usually
+                  it can be 'train', 'val' and 'test'.
+
+        Returns:
+            str: The generated dataset config.
+        """
+        cfg = ''
+        for key_name, ann_dict in self.anns.items():
+            cfg += f'\n{key_name} = dict(\n'
+            cfg += '    type=\'XFUNDREDataset\',\n'
+            cfg += '    data_root=' + f'{self.dataset_name}_{self.task}_data_root,\n'  # noqa: E501
+            cfg += f'    ann_file=\'{ann_dict["ann_file"]}\',\n'
+            if ann_dict['split'] in ['test', 'val']:
+                cfg += '    test_mode=True,\n'
+            cfg += '    pipeline=None)\n'
+        return cfg