diff --git a/config/ja_ginza.meta.json b/config/ja_ginza.meta.json index 7c43873..982bdfe 100644 --- a/config/ja_ginza.meta.json +++ b/config/ja_ginza.meta.json @@ -1,7 +1,7 @@ { "lang":"ja", "name":"ginza", - "version":"5.0.0", + "version":"5.1.0", "description":"Japanese multi-task CNN trained on UD-Japanese BCCWJ r2.8 + GSK2014-A(2019). Assigns word2vec token vectors. Components: tok2vec, parser, ner, morphologizer, atteribute_ruler, compound_splitter, bunsetu_recognizer.", "author":"Megagon Labs Tokyo.", "email":"ginza@megagon.ai", @@ -57,8 +57,8 @@ "attribute_ruler" ], "requirements":[ - "sudachipy>=0.5.2,<0.6.0", - "sudachidict_core>=20210608", - "ginza>=5.0.0,<5.1.0" + "sudachipy>=0.5.4,<0.7.0", + "sudachidict_core>=20210802", + "ginza>=5.1.0,<5.2.0" ] } diff --git a/config/ja_ginza_electra.analysis.cfg b/config/ja_ginza_electra.analysis.cfg index 42fa75d..17666e9 100644 --- a/config/ja_ginza_electra.analysis.cfg +++ b/config/ja_ginza_electra.analysis.cfg @@ -25,6 +25,7 @@ split_mode = "C" [components.attribute_ruler] factory = "attribute_ruler" +scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"} validate = false [components.bunsetu_recognizer] @@ -37,6 +38,9 @@ split_mode = null [components.morphologizer] factory = "morphologizer" +extend = true +overwrite = true +scorer = {"@scorers":"spacy.morphologizer_scorer.v1"} [components.morphologizer.model] @architectures = "spacy.Tagger.v1" @@ -52,6 +56,7 @@ upstream = "*" factory = "ner" incorrect_spans_key = null moves = null +scorer = {"@scorers":"spacy.ner_scorer.v1"} update_with_oracle_cut_size = 100 [components.ner.model] @@ -74,6 +79,7 @@ factory = "parser" learn_tokens = false min_action_freq = 30 moves = null +scorer = {"@scorers":"spacy.parser_scorer.v1"} update_with_oracle_cut_size = 100 [components.parser.model] @@ -97,14 +103,17 @@ max_batch_items = 4096 set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"} [components.transformer.model] -@architectures = "ginza-transformers.TransformerModel.v1" -name = "megagonlabs/transformers-ud-japanese-electra-base-ginza" +@architectures = "spacy-transformers.TransformerModel.v3" +name = "megagonlabs/transformers-ud-japanese-electra-base-ginza-510" +mixed_precision = false [components.transformer.model.get_spans] @span_getters = "spacy-transformers.strided_spans.v1" window = 128 stride = 96 +[components.transformer.model.grad_scaler_config] + [components.transformer.model.tokenizer_config] use_fast = false tokenizer_class = "sudachitra.tokenization_electra_sudachipy.ElectraSudachipyTokenizer" @@ -119,6 +128,8 @@ word_form_type = "dictionary_and_surface" split_mode = "A" dict_type = "core" +[components.transformer.model.transformer_config] + [corpora] [corpora.dev] @@ -149,8 +160,8 @@ max_epochs = 0 max_steps = 50000 eval_frequency = 200 frozen_components = [] -before_to_disk = null annotating_components = [] +before_to_disk = null [training.batcher] @batchers = "spacy.batch_by_padded.v1" diff --git a/config/ja_ginza_electra.meta.json b/config/ja_ginza_electra.meta.json index 5469971..9a2ae79 100644 --- a/config/ja_ginza_electra.meta.json +++ b/config/ja_ginza_electra.meta.json @@ -1,7 +1,7 @@ { "lang":"ja", "name":"ginza_electra", - "version":"5.0.0", + "version":"5.1.0", "description":"Japanese multi-task CNN trained on UD-Japanese BCCWJ r2.8 + GSK2014-A(2019) + transformers-ud-japanese-electra--base. Components: transformer, parser, atteribute_ruler, ner, morphologizer, compound_splitter, bunsetu_recognizer.", "author":"Megagon Labs Tokyo.", "email":"ginza@megagon.ai", @@ -64,10 +64,10 @@ "attribute_ruler" ], "requirements":[ - "sudachipy>=0.5.2,<0.6.0", - "sudachidict_core>=20210608", + "sudachipy>=0.5.4,<0.7.0", + "sudachidict_core>=20210802", "sudachitra>=0.1.5,<0.2.0", - "ginza-transformers>=0.3.1,<1.0.0", - "ginza>=5.0.0,<5.1.0" + "ginza-transformers>=0.4.0,<0.5.0", + "ginza>=5.1.0,<5.2.0" ] } diff --git a/setup.py b/setup.py index b8fc0ad..fb3e4ae 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ install_requires=[ "spacy>=3.2.0,<3.3.0", "plac>=1.3.3", - "SudachiPy>=0.6.0,<0.7.0", + "SudachiPy>=0.5.4,<0.7.0", "SudachiDict-core>=20210802", ], setup_requires=["pytest-runner"],