diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f814f08b3..362562095 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -107,6 +107,27 @@ jobs: run: | make docker-test-run DOCKER_TAG=$DOCKER_TAG ARGS='test-pretrained' + configs: + name: Training Configs + # Don't run for forks, and only run for master pushes and on schedule. + if: github.repository == 'allenai/allennlp-models' && github.event_name != 'pull_request' + runs-on: [self-hosted] + + steps: + - uses: actions/checkout@v2 + + - name: Set Docker tag + run: | + echo "::set-env name=DOCKER_TAG::$GITHUB_SHA"; + + - name: Build test image + run: | + make docker-test-image DOCKER_TAG=$DOCKER_TAG + + - name: Validate training configs + run: | + make docker-test-run DOCKER_TAG=$DOCKER_TAG ARGS='test-configs' + gpu_checks: name: GPU Checks # Don't run for forks. diff --git a/CHANGELOG.md b/CHANGELOG.md index b4cf8e542..1a290bb50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- Added regression tests for training configs. + ## [v1.1.0rc3](https://github.com/allenai/allennlp-models/releases/tag/v1.1.0rc3) - 2020-08-12 ### Fixed diff --git a/Makefile b/Makefile index 17ff5fa48..1d9b5b353 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,7 @@ typecheck : .PHONY : test test : - pytest --color=yes -rf --durations=40 -m "not pretrained_model_test" + pytest --color=yes -rf --durations=40 -m "not pretrained_model_test" -m "not pretrained_config_test" .PHONY : gpu-test gpu-test : @@ -66,14 +66,20 @@ gpu-test : .PHONY : test-with-cov test-with-cov : - pytest --color=yes -rf --durations=40 -m "not pretrained_model_test" \ + pytest --color=yes -rf --durations=40 \ + -m "not pretrained_model_test" \ + -m "not pretrained_config_test" \ --cov-config=.coveragerc \ --cov=allennlp_models/ \ --cov-report=xml .PHONY : test-pretrained test-pretrained : - pytest -v --color=yes -m "pretrained_model_test" + pytest -v --color=yes --durations=10 -m "pretrained_model_test" + +.PHONY : test-configs +test-configs : + pytest -v --color=yes --durations=10 -m "pretrained_config_test" .PHONY : build-all-api-docs build-all-api-docs : scripts/py2md.py diff --git a/allennlp_models/rc/dataset_readers/quac.py b/allennlp_models/rc/dataset_readers/quac.py index c96cc6628..a31b5cd89 100644 --- a/allennlp_models/rc/dataset_readers/quac.py +++ b/allennlp_models/rc/dataset_readers/quac.py @@ -44,10 +44,10 @@ def __init__( self, tokenizer: Tokenizer = None, token_indexers: Dict[str, TokenIndexer] = None, - lazy: bool = False, num_context_answers: int = 0, + **kwargs, ) -> None: - super().__init__(lazy) + super().__init__(**kwargs) self._tokenizer = tokenizer or SpacyTokenizer() self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()} self._num_context_answers = num_context_answers diff --git a/pytest.ini b/pytest.ini index dbf818992..97794b2a8 100644 --- a/pytest.ini +++ b/pytest.ini @@ -5,6 +5,7 @@ log_format = %(asctime)s - %(levelname)s - %(name)s - %(message)s log_level = DEBUG markers = pretrained_model_test + pretrained_config_test java gpu: marks tests that need at least one GPU filterwarnings = diff --git a/tests/pretrained_test.py b/tests/pretrained_test.py index 7101dc6e5..463cebbfd 100644 --- a/tests/pretrained_test.py +++ b/tests/pretrained_test.py @@ -1,5 +1,3 @@ -import os - import pytest import spacy @@ -9,7 +7,7 @@ # But default we don't run these tests @pytest.mark.pretrained_model_test -class TestAllenNlpPretrained(AllenNlpTestCase): +class TestAllenNlpPretrainedModels(AllenNlpTestCase): def test_machine_comprehension(self): predictor = load_predictor("rc-bidaf") diff --git a/tests/training_config_test.py b/tests/training_config_test.py new file mode 100644 index 000000000..5a7d00872 --- /dev/null +++ b/tests/training_config_test.py @@ -0,0 +1,139 @@ +from pathlib import Path +from glob import glob +import os +from typing import Dict, Tuple + +import pytest + +from tests import FIXTURES_ROOT +from allennlp.commands.train import TrainModel +from allennlp.common.testing import AllenNlpTestCase +from allennlp.common.params import Params +from allennlp.common.plugins import import_plugins + + +CONFIGS_TO_IGNORE = { + # TODO (epwalsh): once the new data loading API is merged, try to get this model working. + "bidirectional_language_model.jsonnet", + # Requires some bi-directional LM archive path. + "constituency_parser_transformer_elmo.jsonnet", +} + + +def find_configs(): + for item in os.walk("training_config/"): + for pattern in ("*.json", "*.jsonnet"): + for path in glob(os.path.join(item[0], pattern)): + if os.path.basename(path) == "common.jsonnet": + continue + yield pytest.param( + path, + marks=pytest.mark.skipif( + any(x in path for x in CONFIGS_TO_IGNORE), reason="ignoring" + ), + ) + + +GLOVE_PATCHES = { + FIXTURES_ROOT + / "glove.6B.100d.sample.txt.gz": ( + "https://allennlp.s3.amazonaws.com/datasets/glove/glove.6B.100d.txt.gz", + ), + FIXTURES_ROOT + / "glove.6B.300d.sample.txt.gz": ( + "https://allennlp.s3.amazonaws.com/datasets/glove/glove.6B.300d.txt.gz", + "https://allennlp.s3.amazonaws.com/datasets/glove/glove.840B.300d.txt.gz", + "https://allennlp.s3.amazonaws.com/datasets/glove/glove.840B.300d.lower.converted.zip", + ), +} + + +def patch_glove(params): + for key, value in params.items(): + if isinstance(value, str): + for patch, patch_targets in GLOVE_PATCHES.items(): + if value in patch_targets: + params[key] = str(patch) + elif isinstance(value, Params): + patch_glove(value) + + +DATASET_PATCHES: Dict[Path, Tuple[str, ...]] = { + FIXTURES_ROOT + / "structured_prediction" + / "srl" + / "conll_2012": ("SRL_TRAIN_DATA_PATH", "SRL_VALIDATION_DATA_PATH"), + FIXTURES_ROOT + / "structured_prediction" + / "example_ptb.trees": ("PTB_TRAIN_PATH", "PTB_DEV_PATH", "PTB_TEST_PATH"), + FIXTURES_ROOT + / "structured_prediction" + / "dependencies.conllu": ("PTB_DEPENDENCIES_TRAIN", "PTB_DEPENDENCIES_VAL"), + FIXTURES_ROOT + / "structured_prediction" + / "semantic_dependencies" + / "dm.sdp": ("SEMEVAL_TRAIN", "SEMEVAL_DEV", "SEMEVAL_TEST"), + FIXTURES_ROOT / "tagging" / "conll2003.txt": ("NER_TRAIN_DATA_PATH", "NER_TEST_DATA_PATH"), + FIXTURES_ROOT / "mc" / "swag.csv": ("SWAG_TRAIN", "SWAG_DEV", "SWAG_TEST"), + FIXTURES_ROOT / "rc" / "drop.json": ("DROP_TRAIN", "DROP_DEV"), + FIXTURES_ROOT / "lm" / "language_model" / "sentences.txt": ("BIDIRECTIONAL_LM_TRAIN_PATH",), + FIXTURES_ROOT / "rc" / "squad.json": ("SQUAD_TRAIN", "SQUAD_DEV"), + FIXTURES_ROOT + / "coref" + / "coref.gold_conll": ("COREF_TRAIN_DATA_PATH", "COREF_DEV_DATA_PATH", "COREF_TEST_DATA_PATH",), + FIXTURES_ROOT + / "structured_prediction" + / "srl" + / "conll_2012" + / "subdomain": ("CONLL_TRAIN_DATA_PATH", "CONLL_DEV_DATA_PATH"), + FIXTURES_ROOT + / "tagging" + / "conll2003.txt": ( + "NER_TRAIN_DATA_PATH", + "NER_TEST_DATA_PATH", + "NER_TEST_A_PATH", + "NER_TEST_B_PATH", + ), + FIXTURES_ROOT + / "generation" + / "bart" + / "data" + / "url_lists" + / "all_train.txt": ("CNNDM_TRAIN", "CNNDM_DEV",), +} + + +@pytest.mark.pretrained_config_test +class TestAllenNlpPretrainedModelConfigs(AllenNlpTestCase): + @classmethod + def setup_class(cls): + # Make sure all the classes we need are registered. + import_plugins() + + # Patch dataset paths. + for dataset_patch, patch_targets in DATASET_PATCHES.items(): + for patch_target in patch_targets: + os.environ[patch_target] = str(dataset_patch) + + @pytest.mark.parametrize("path", find_configs()) + def test_pretrained_configs(self, path): + params = Params.from_file( + path, + params_overrides="{" + "'trainer.cuda_device': -1, " + "'trainer.num_epochs': 2, " + "'dataset_reader.max_instances': 4, " + "'dataset_reader.lazy': false, " + "}", + ) + + # Patch any pretrained glove files with smaller fixtures. + patch_glove(params) + + # Remove unnecessary keys. + for key in ("random_seed", "numpy_seed", "pytorch_seed", "distributed"): + if key in params: + del params[key] + + # Just make sure the train loop can be instantiated. + TrainModel.from_params(params=params, serialization_dir=self.TEST_DIR, local_rank=0) diff --git a/training_config/generation/bart_cnn_dm.jsonnet b/training_config/generation/bart_cnn_dm.jsonnet index 62ea73624..2acd9e1aa 100755 --- a/training_config/generation/bart_cnn_dm.jsonnet +++ b/training_config/generation/bart_cnn_dm.jsonnet @@ -1,9 +1,15 @@ local model_name = "facebook/bart-large"; -local data_base_url = "https://storage.googleapis.com/allennlp-public-data/cnndm-combined-data-2020.07.13.tar.gz"; + +local train_data = std.extVar("CNNDM_TRAIN"); +local dev_data = std.extVar("CNNDM_DEV"); +// Use these lines below to get the actual dataset. +// local data_base_url = "https://storage.googleapis.com/allennlp-public-data/cnndm-combined-data-2020.07.13.tar.gz"; +// local train_data = data_base_url + "!cnndm-combined-data-2020.07.13/url_lists/all_train.txt"; +// local dev_data = data_base_url + "!cnndm-combined-data-2020.07.13/url_lists/all_val.txt"; { - "train_data_path": data_base_url + "!cnndm-combined-data-2020.07.13/url_lists/all_train.txt", - "validation_data_path": data_base_url + "!cnndm-combined-data-2020.07.13/url_lists/all_val.txt", + "train_data_path": train_data, + "validation_data_path": dev_data, "dataset_reader": { "type": "cnn_dm", "source_tokenizer": { diff --git a/training_config/mc/swag.jsonnet b/training_config/mc/swag.jsonnet index 5ed729314..b3f5191aa 100644 --- a/training_config/mc/swag.jsonnet +++ b/training_config/mc/swag.jsonnet @@ -12,8 +12,12 @@ local gradient_accumulation_steps = batch_size / gpu_batch_size; "transformer_model_name": transformer_model, //"max_instances": 200 // debug setting }, - "train_data_path": "/net/nfs.corp/allennlp/dirkg/data/swag/train.csv", - "validation_data_path": "/net/nfs.corp/allennlp/dirkg/data/swag/val.csv", + "train_data_path": std.extVar("SWAG_TRAIN"), + "validation_data_path": std.extVar("SWAG_DEV"), + //"test_data_path": std.extVar("SWAG_TEST") + // Replace the above few lines with these if you are Dirk: + //"train_data_path": "/net/nfs.corp/allennlp/dirkg/data/swag/train.csv" + //"validation_data_path": "/net/nfs.corp/allennlp/dirkg/data/swag/val.csv" //"test_data_path": "/net/nfs.corp/allennlp/dirkg/data/swag/test.csv" "model": { "type": "transformer_mc", diff --git a/training_config/rc/bidaf.jsonnet b/training_config/rc/bidaf.jsonnet index 9321b12ed..c6f8252a5 100644 --- a/training_config/rc/bidaf.jsonnet +++ b/training_config/rc/bidaf.jsonnet @@ -20,8 +20,11 @@ } } }, - "train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json", - "validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json", + "train_data_path": std.extVar("SQUAD_TRAIN"), + "validation_data_path": std.extVar("SQUAD_DEV"), + // You can replace the above two lines with these to get the actual squad datasets. + // "train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json", + // "validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json", "model": { "type": "bidaf", "text_field_embedder": { diff --git a/training_config/rc/bidaf_elmo.jsonnet b/training_config/rc/bidaf_elmo.jsonnet index 4a7e40596..27bea1ff7 100644 --- a/training_config/rc/bidaf_elmo.jsonnet +++ b/training_config/rc/bidaf_elmo.jsonnet @@ -23,8 +23,11 @@ } } }, - "train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json", - "validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json", + "train_data_path": std.extVar("SQUAD_TRAIN"), + "validation_data_path": std.extVar("SQUAD_DEV"), + // You can replace the above two lines with these to get the actual squad datasets. + // "train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json", + // "validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json", "model": { "type": "bidaf", "text_field_embedder": { diff --git a/training_config/rc/dialog_qa.jsonnet b/training_config/rc/dialog_qa.jsonnet index f7366785f..a708b9c7e 100644 --- a/training_config/rc/dialog_qa.jsonnet +++ b/training_config/rc/dialog_qa.jsonnet @@ -62,27 +62,29 @@ "num_layers": 1 }, "text_field_embedder": { - "elmo": { - "type": "elmo_token_embedder", - "do_layer_norm": false, - "dropout": 0.2, - "options_file": "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json", - "weight_file": "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" - }, - "token_characters": { - "type": "character_encoding", - "dropout": 0.2, - "embedding": { - "embedding_dim": 20, - "num_embeddings": 262 + "token_embedders": { + "elmo": { + "type": "elmo_token_embedder", + "do_layer_norm": false, + "dropout": 0.2, + "options_file": "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json", + "weight_file": "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" }, - "encoder": { - "type": "cnn", - "embedding_dim": 20, - "ngram_filter_sizes": [ - 5 - ], - "num_filters": 100 + "token_characters": { + "type": "character_encoding", + "dropout": 0.2, + "embedding": { + "embedding_dim": 20, + "num_embeddings": 262 + }, + "encoder": { + "type": "cnn", + "embedding_dim": 20, + "ngram_filter_sizes": [ + 5 + ], + "num_filters": 100 + } } } } diff --git a/training_config/rc/naqanet.jsonnet b/training_config/rc/naqanet.jsonnet index 595c130b0..242eb9fa1 100644 --- a/training_config/rc/naqanet.jsonnet +++ b/training_config/rc/naqanet.jsonnet @@ -42,8 +42,8 @@ }, "only_include_pretrained_words": true }, - "train_data_path": "drop_dataset_train.json", - "validation_data_path": "drop_dataset_dev.json", + "train_data_path": std.extVar("DROP_TRAIN"), + "validation_data_path": std.extVar("DROP_DEV"), "model": { "type": "naqanet", "text_field_embedder": { @@ -106,14 +106,14 @@ "attention_dropout_prob": 0 }, "dropout_prob": 0.1, - "regularizer": [ + "regularizer": { "regexes": [ [".*", { "type": "l2", "alpha": 1e-07 }] ] - ], + }, "answering_abilities": [ "passage_span_extraction", "question_span_extraction", diff --git a/training_config/rc/qanet.jsonnet b/training_config/rc/qanet.jsonnet index 8e5736885..837e1020e 100644 --- a/training_config/rc/qanet.jsonnet +++ b/training_config/rc/qanet.jsonnet @@ -47,8 +47,11 @@ }, "only_include_pretrained_words": true }, - "train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json", - "validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json", + "train_data_path": std.extVar("SQUAD_TRAIN"), + "validation_data_path": std.extVar("SQUAD_DEV"), + // You can replace the above two lines with these to get the actual squad datasets. + // "train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json", + // "validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json", "model": { "type": "qanet", "text_field_embedder": { diff --git a/training_config/rc/transformer_qa.jsonnet b/training_config/rc/transformer_qa.jsonnet index f3a6fc2d5..48042bc76 100644 --- a/training_config/rc/transformer_qa.jsonnet +++ b/training_config/rc/transformer_qa.jsonnet @@ -13,8 +13,11 @@ local batch_size = 8; "validation_dataset_reader": self.dataset_reader + { "skip_invalid_examples": false, }, - "train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json", - "validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json", + "train_data_path": std.extVar("SQUAD_TRAIN"), + "validation_data_path": std.extVar("SQUAD_DEV"), + // You can replace the above two lines with these to get the actual squad datasets. + // "train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json", + // "validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json", "model": { "type": "transformer_qa", "transformer_model_name": transformer_model, diff --git a/training_config/rc/transformer_qa_distributed.jsonnet b/training_config/rc/transformer_qa_distributed.jsonnet deleted file mode 100644 index ad987b15d..000000000 --- a/training_config/rc/transformer_qa_distributed.jsonnet +++ /dev/null @@ -1,47 +0,0 @@ -local transformer_model = 'bert-base-cased'; - -local epochs = 3; - -{ - "dataset_reader": { - "type": "transformer_squad", - "transformer_model_name": transformer_model, - "skip_invalid_examples": true, - //"max_instances": 200 // debug setting - }, - "validation_dataset_reader": self.dataset_reader + { - "skip_invalid_examples": false, - }, - "train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json", - "validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json", - "model": { - "type": "transformer_qa", - "transformer_model_name": transformer_model, - }, - "data_loader": { - "batch_sampler": { - "type": "bucket", - "batch_size": 4 - } - }, - "trainer": { - "optimizer": { - "type": "huggingface_adamw", - "weight_decay": 0.0, - "parameter_groups": [[["bias", "LayerNorm\\.weight", "layer_norm\\.weight"], {"weight_decay": 0}]], - "lr": 2e-5, - "eps": 1e-8 - }, - "learning_rate_scheduler": { - "type": "slanted_triangular", - "num_epochs": epochs, - "cut_frac": 0.1, - }, - "grad_clipping": 1.0, - "num_epochs": epochs, - }, - "distributed": {"cuda_devices": [0, 1]}, - "random_seed": 42, - "numpy_seed": 42, - "pytorch_seed": 42, -} diff --git a/training_config/structured_prediction/semantic_dependencies.json b/training_config/structured_prediction/semantic_dependencies.jsonnet similarity index 78% rename from training_config/structured_prediction/semantic_dependencies.json rename to training_config/structured_prediction/semantic_dependencies.jsonnet index 870a6e732..0fa16184b 100644 --- a/training_config/structured_prediction/semantic_dependencies.json +++ b/training_config/structured_prediction/semantic_dependencies.jsonnet @@ -6,9 +6,9 @@ "type":"semantic_dependencies", "skip_when_no_arcs": false }, - "train_data_path": "/home/markn/data/semantic_dependency_parsing/semeval2015_data/dm/data/english/english_dm_augmented_train.sdp", - "validation_data_path": "/home/markn/data/semantic_dependency_parsing/semeval2015_data/dm/data/english/english_dm_augmented_dev.sdp", - "test_data_path": "/home/markn/data/semantic_dependency_parsing/semeval2015_data/dm/data/english/english_id_dm_augmented_test.sdp", + "train_data_path": std.extVar("SEMEVAL_TRAIN"), + "validation_data_path": std.extVar("SEMEVAL_DEV"), + "test_data_path": std.extVar("SEMEVAL_TEST"), "model": { "type": "graph_parser", "text_field_embedder": { @@ -16,7 +16,7 @@ "tokens": { "type": "embedding", "embedding_dim": 100, - "pretrained_file": "/home/markn/data/glove/glove.6B/glove.6B.100d.txt", + "pretrained_file": "https://allennlp.s3.amazonaws.com/datasets/glove/glove.6B.100d.txt.gz", "trainable": true, "sparse": true } diff --git a/training_config/structured_prediction/srl_elmo.jsonnet b/training_config/structured_prediction/srl_elmo.jsonnet index 8ba6ce077..cc171fe0b 100644 --- a/training_config/structured_prediction/srl_elmo.jsonnet +++ b/training_config/structured_prediction/srl_elmo.jsonnet @@ -37,15 +37,17 @@ "use_input_projection_bias": false }, "binary_feature_dim": 100, - "regularizer": [ - [ - ".*scalar_parameters.*", - { - "type": "l2", - "alpha": 0.001 - } + "regularizer": { + "regexes": [ + [ + ".*scalar_parameters.*", + { + "type": "l2", + "alpha": 0.001 + } + ] ] - ] + } }, "data_loader": { "batch_sampler": { diff --git a/training_config/structured_prediction/srl_elmo_5.5B.jsonnet b/training_config/structured_prediction/srl_elmo_5.5B.jsonnet index 4eb7c6c95..473b6c16c 100644 --- a/training_config/structured_prediction/srl_elmo_5.5B.jsonnet +++ b/training_config/structured_prediction/srl_elmo_5.5B.jsonnet @@ -37,15 +37,17 @@ "use_input_projection_bias": false }, "binary_feature_dim": 100, - "regularizer": [ - [ - ".*scalar_parameters.*", - { - "type": "l2", - "alpha": 0.001 - } + "regularizer": { + "regexes": [ + [ + ".*scalar_parameters.*", + { + "type": "l2", + "alpha": 0.001 + } + ] ] - ] + } }, "data_loader": { "batch_sampler": { diff --git a/training_config/tagging/fgner_transformer.jsonnet b/training_config/tagging/fgner_transformer.jsonnet index 39280c3af..e21b5fba2 100644 --- a/training_config/tagging/fgner_transformer.jsonnet +++ b/training_config/tagging/fgner_transformer.jsonnet @@ -1,6 +1,9 @@ -local data_dir = std.extVar("CONLL_DATA_PATH"); -// local data_dir = "/net/nfs.corp/allennlp/dirkg/data/conll-formatted-ontonotes-5.0/data"; -// local data_dir = "/Users/dirkg/Documents/data/conll-formatted-ontonotes-5.0/data"; +local train_data = std.extVar("CONLL_TRAIN_DATA_PATH"); +local dev_data = std.extVar("CONLL_DEV_DATA_PATH"); +// local train_data = "/net/nfs.corp/allennlp/dirkg/data/conll-formatted-ontonotes-5.0/data/train"; +// local dev_data = "/net/nfs.corp/allennlp/dirkg/data/conll-formatted-ontonotes-5.0/data/development"; +// local train_data = "/Users/dirkg/Documents/data/conll-formatted-ontonotes-5.0/data/train"; +// local dev_data = "/Users/dirkg/Documents/data/conll-formatted-ontonotes-5.0/data/development"; local transformer_model = "roberta-base"; local transformer_hidden_dim = 768; @@ -20,8 +23,8 @@ local max_length = 512; }, }, }, - "train_data_path": data_dir + "/train", - "validation_data_path": data_dir + "/development", + "train_data_path": train_data, + "validation_data_path": dev_data, "data_loader": { "batch_sampler": { "type": "bucket", diff --git a/training_config/tagging/fine-grained-ner.jsonnet b/training_config/tagging/fine-grained-ner.jsonnet index 7de657f70..8d20ab3bf 100644 --- a/training_config/tagging/fine-grained-ner.jsonnet +++ b/training_config/tagging/fine-grained-ner.jsonnet @@ -1,6 +1,9 @@ -local data_dir = std.extVar("CONLL_DATA_PATH"); -// local data_dir = "/net/nfs.corp/allennlp/dirkg/data/conll-formatted-ontonotes-5.0/data"; -// local data_dir = "/Users/dirkg/Documents/data/conll-formatted-ontonotes-5.0/data"; +local train_data = std.extVar("CONLL_TRAIN_DATA_PATH"); +local dev_data = std.extVar("CONLL_DEV_DATA_PATH"); +// local train_data = "/net/nfs.corp/allennlp/dirkg/data/conll-formatted-ontonotes-5.0/data/train"; +// local dev_data = "/net/nfs.corp/allennlp/dirkg/data/conll-formatted-ontonotes-5.0/data/development"; +// local train_data = "/Users/dirkg/Documents/data/conll-formatted-ontonotes-5.0/data/train"; +// local dev_data = "/Users/dirkg/Documents/data/conll-formatted-ontonotes-5.0/data/development"; { "dataset_reader": { @@ -19,8 +22,8 @@ local data_dir = std.extVar("CONLL_DATA_PATH"); } } }, - "train_data_path": data_dir + "/train", - "validation_data_path": data_dir + "/development", + "train_data_path": train_data, + "validation_data_path": dev_data, "data_loader": { "batch_sampler": { "type": "bucket", diff --git a/training_config/tagging/ner_elmo.jsonnet b/training_config/tagging/ner_elmo.jsonnet index 28af8ea10..8f8a98358 100644 --- a/training_config/tagging/ner_elmo.jsonnet +++ b/training_config/tagging/ner_elmo.jsonnet @@ -24,11 +24,11 @@ }, "elmo": { "type": "elmo_characters" - } + } } }, "train_data_path": std.extVar("NER_TRAIN_DATA_PATH"), - "validation_data_path": std.extVar("NER_TEST_A_PATH"), + "validation_data_path": std.extVar("NER_TEST_DATA_PATH"), "model": { "type": "crf_tagger", "label_encoding": "BIOUL", @@ -72,15 +72,17 @@ "dropout": 0.5, "bidirectional": true }, - "regularizer": [ - [ - "scalar_parameters", - { - "type": "l2", - "alpha": 0.1 - } + "regularizer": { + "regexes": [ + [ + "scalar_parameters", + { + "type": "l2", + "alpha": 0.1 + } + ] ] - ] + } }, "data_loader": { "batch_size": 64