Skip to content

Commit

Permalink
Merge pull request #222 from megagonlabs/feature/update_config
Browse files Browse the repository at this point in the history
update model config files
  • Loading branch information
hiroshi-matsuda-rit authored Dec 5, 2021
2 parents a78376f + 1a95324 commit 82ec7c2
Showing 4 changed files with 24 additions and 13 deletions.
8 changes: 4 additions & 4 deletions config/ja_ginza.meta.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"lang":"ja",
"name":"ginza",
"version":"5.0.0",
"version":"5.1.0",
"description":"Japanese multi-task CNN trained on UD-Japanese BCCWJ r2.8 + GSK2014-A(2019). Assigns word2vec token vectors. Components: tok2vec, parser, ner, morphologizer, atteribute_ruler, compound_splitter, bunsetu_recognizer.",
"author":"Megagon Labs Tokyo.",
"email":"ginza@megagon.ai",
@@ -57,8 +57,8 @@
"attribute_ruler"
],
"requirements":[
"sudachipy>=0.5.2,<0.6.0",
"sudachidict_core>=20210608",
"ginza>=5.0.0,<5.1.0"
"sudachipy>=0.5.4,<0.7.0",
"sudachidict_core>=20210802",
"ginza>=5.1.0,<5.2.0"
]
}
17 changes: 14 additions & 3 deletions config/ja_ginza_electra.analysis.cfg
Original file line number Diff line number Diff line change
@@ -25,6 +25,7 @@ split_mode = "C"

[components.attribute_ruler]
factory = "attribute_ruler"
scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
validate = false

[components.bunsetu_recognizer]
@@ -37,6 +38,9 @@ split_mode = null

[components.morphologizer]
factory = "morphologizer"
extend = true
overwrite = true
scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}

[components.morphologizer.model]
@architectures = "spacy.Tagger.v1"
@@ -52,6 +56,7 @@ upstream = "*"
factory = "ner"
incorrect_spans_key = null
moves = null
scorer = {"@scorers":"spacy.ner_scorer.v1"}
update_with_oracle_cut_size = 100

[components.ner.model]
@@ -74,6 +79,7 @@ factory = "parser"
learn_tokens = false
min_action_freq = 30
moves = null
scorer = {"@scorers":"spacy.parser_scorer.v1"}
update_with_oracle_cut_size = 100

[components.parser.model]
@@ -97,14 +103,17 @@ max_batch_items = 4096
set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}

[components.transformer.model]
@architectures = "ginza-transformers.TransformerModel.v1"
name = "megagonlabs/transformers-ud-japanese-electra-base-ginza"
@architectures = "spacy-transformers.TransformerModel.v3"
name = "megagonlabs/transformers-ud-japanese-electra-base-ginza-510"
mixed_precision = false

[components.transformer.model.get_spans]
@span_getters = "spacy-transformers.strided_spans.v1"
window = 128
stride = 96

[components.transformer.model.grad_scaler_config]

[components.transformer.model.tokenizer_config]
use_fast = false
tokenizer_class = "sudachitra.tokenization_electra_sudachipy.ElectraSudachipyTokenizer"
@@ -119,6 +128,8 @@ word_form_type = "dictionary_and_surface"
split_mode = "A"
dict_type = "core"

[components.transformer.model.transformer_config]

[corpora]

[corpora.dev]
@@ -149,8 +160,8 @@ max_epochs = 0
max_steps = 50000
eval_frequency = 200
frozen_components = []
before_to_disk = null
annotating_components = []
before_to_disk = null

[training.batcher]
@batchers = "spacy.batch_by_padded.v1"
10 changes: 5 additions & 5 deletions config/ja_ginza_electra.meta.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"lang":"ja",
"name":"ginza_electra",
"version":"5.0.0",
"version":"5.1.0",
"description":"Japanese multi-task CNN trained on UD-Japanese BCCWJ r2.8 + GSK2014-A(2019) + transformers-ud-japanese-electra--base. Components: transformer, parser, atteribute_ruler, ner, morphologizer, compound_splitter, bunsetu_recognizer.",
"author":"Megagon Labs Tokyo.",
"email":"ginza@megagon.ai",
@@ -64,10 +64,10 @@
"attribute_ruler"
],
"requirements":[
"sudachipy>=0.5.2,<0.6.0",
"sudachidict_core>=20210608",
"sudachipy>=0.5.4,<0.7.0",
"sudachidict_core>=20210802",
"sudachitra>=0.1.5,<0.2.0",
"ginza-transformers>=0.3.1,<1.0.0",
"ginza>=5.0.0,<5.1.0"
"ginza-transformers>=0.4.0,<0.5.0",
"ginza>=5.1.0,<5.2.0"
]
}
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@
install_requires=[
"spacy>=3.2.0,<3.3.0",
"plac>=1.3.3",
"SudachiPy>=0.6.0,<0.7.0",
"SudachiPy>=0.5.4,<0.7.0",
"SudachiDict-core>=20210802",
],
setup_requires=["pytest-runner"],

0 comments on commit 82ec7c2

Please sign in to comment.