Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
validate pretrained configs in CI (#112)
Browse files Browse the repository at this point in the history
* add pretrained config tests

* update CHANGELOG

* fixes

* fixes

* patch some datasets

* fix ner_elmo

* fix srl elmo

* fix semeval

* fix commonsenseqa

* fix swag

* remove tqa distributed

* fix naqanet

* fix dialog_qa

* track duration of tests

* test configs separately

* move to separate workflow, patch glove

* update

* fix

* add some more comments
  • Loading branch information
epwalsh authored Aug 17, 2020
1 parent 4fa5fc1 commit 87570ec
Show file tree
Hide file tree
Showing 22 changed files with 290 additions and 132 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,27 @@ jobs:
run: |
make docker-test-run DOCKER_TAG=$DOCKER_TAG ARGS='test-pretrained'
configs:
name: Training Configs
# Don't run for forks, and only run for master pushes and on schedule.
if: github.repository == 'allenai/allennlp-models' && github.event_name != 'pull_request'
runs-on: [self-hosted]

steps:
- uses: actions/checkout@v2

- name: Set Docker tag
run: |
echo "::set-env name=DOCKER_TAG::$GITHUB_SHA";
- name: Build test image
run: |
make docker-test-image DOCKER_TAG=$DOCKER_TAG
- name: Validate training configs
run: |
make docker-test-run DOCKER_TAG=$DOCKER_TAG ARGS='test-configs'
gpu_checks:
name: GPU Checks
# Don't run for forks.
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

### Added

- Added regression tests for training configs.

## [v1.1.0rc3](https://github.com/allenai/allennlp-models/releases/tag/v1.1.0rc3) - 2020-08-12

### Fixed
Expand Down
12 changes: 9 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,22 +58,28 @@ typecheck :

.PHONY : test
test :
pytest --color=yes -rf --durations=40 -m "not pretrained_model_test"
pytest --color=yes -rf --durations=40 -m "not pretrained_model_test" -m "not pretrained_config_test"

.PHONY : gpu-test
gpu-test :
pytest --color=yes -v -rf -m gpu

.PHONY : test-with-cov
test-with-cov :
pytest --color=yes -rf --durations=40 -m "not pretrained_model_test" \
pytest --color=yes -rf --durations=40 \
-m "not pretrained_model_test" \
-m "not pretrained_config_test" \
--cov-config=.coveragerc \
--cov=allennlp_models/ \
--cov-report=xml

.PHONY : test-pretrained
test-pretrained :
pytest -v --color=yes -m "pretrained_model_test"
pytest -v --color=yes --durations=10 -m "pretrained_model_test"

.PHONY : test-configs
test-configs :
pytest -v --color=yes --durations=10 -m "pretrained_config_test"

.PHONY : build-all-api-docs
build-all-api-docs : scripts/py2md.py
Expand Down
4 changes: 2 additions & 2 deletions allennlp_models/rc/dataset_readers/quac.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ def __init__(
self,
tokenizer: Tokenizer = None,
token_indexers: Dict[str, TokenIndexer] = None,
lazy: bool = False,
num_context_answers: int = 0,
**kwargs,
) -> None:
super().__init__(lazy)
super().__init__(**kwargs)
self._tokenizer = tokenizer or SpacyTokenizer()
self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
self._num_context_answers = num_context_answers
Expand Down
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ log_format = %(asctime)s - %(levelname)s - %(name)s - %(message)s
log_level = DEBUG
markers =
pretrained_model_test
pretrained_config_test
java
gpu: marks tests that need at least one GPU
filterwarnings =
Expand Down
4 changes: 1 addition & 3 deletions tests/pretrained_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

import pytest
import spacy

Expand All @@ -9,7 +7,7 @@

# But default we don't run these tests
@pytest.mark.pretrained_model_test
class TestAllenNlpPretrained(AllenNlpTestCase):
class TestAllenNlpPretrainedModels(AllenNlpTestCase):
def test_machine_comprehension(self):
predictor = load_predictor("rc-bidaf")

Expand Down
139 changes: 139 additions & 0 deletions tests/training_config_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
from pathlib import Path
from glob import glob
import os
from typing import Dict, Tuple

import pytest

from tests import FIXTURES_ROOT
from allennlp.commands.train import TrainModel
from allennlp.common.testing import AllenNlpTestCase
from allennlp.common.params import Params
from allennlp.common.plugins import import_plugins


CONFIGS_TO_IGNORE = {
# TODO (epwalsh): once the new data loading API is merged, try to get this model working.
"bidirectional_language_model.jsonnet",
# Requires some bi-directional LM archive path.
"constituency_parser_transformer_elmo.jsonnet",
}


def find_configs():
for item in os.walk("training_config/"):
for pattern in ("*.json", "*.jsonnet"):
for path in glob(os.path.join(item[0], pattern)):
if os.path.basename(path) == "common.jsonnet":
continue
yield pytest.param(
path,
marks=pytest.mark.skipif(
any(x in path for x in CONFIGS_TO_IGNORE), reason="ignoring"
),
)


GLOVE_PATCHES = {
FIXTURES_ROOT
/ "glove.6B.100d.sample.txt.gz": (
"https://allennlp.s3.amazonaws.com/datasets/glove/glove.6B.100d.txt.gz",
),
FIXTURES_ROOT
/ "glove.6B.300d.sample.txt.gz": (
"https://allennlp.s3.amazonaws.com/datasets/glove/glove.6B.300d.txt.gz",
"https://allennlp.s3.amazonaws.com/datasets/glove/glove.840B.300d.txt.gz",
"https://allennlp.s3.amazonaws.com/datasets/glove/glove.840B.300d.lower.converted.zip",
),
}


def patch_glove(params):
for key, value in params.items():
if isinstance(value, str):
for patch, patch_targets in GLOVE_PATCHES.items():
if value in patch_targets:
params[key] = str(patch)
elif isinstance(value, Params):
patch_glove(value)


DATASET_PATCHES: Dict[Path, Tuple[str, ...]] = {
FIXTURES_ROOT
/ "structured_prediction"
/ "srl"
/ "conll_2012": ("SRL_TRAIN_DATA_PATH", "SRL_VALIDATION_DATA_PATH"),
FIXTURES_ROOT
/ "structured_prediction"
/ "example_ptb.trees": ("PTB_TRAIN_PATH", "PTB_DEV_PATH", "PTB_TEST_PATH"),
FIXTURES_ROOT
/ "structured_prediction"
/ "dependencies.conllu": ("PTB_DEPENDENCIES_TRAIN", "PTB_DEPENDENCIES_VAL"),
FIXTURES_ROOT
/ "structured_prediction"
/ "semantic_dependencies"
/ "dm.sdp": ("SEMEVAL_TRAIN", "SEMEVAL_DEV", "SEMEVAL_TEST"),
FIXTURES_ROOT / "tagging" / "conll2003.txt": ("NER_TRAIN_DATA_PATH", "NER_TEST_DATA_PATH"),
FIXTURES_ROOT / "mc" / "swag.csv": ("SWAG_TRAIN", "SWAG_DEV", "SWAG_TEST"),
FIXTURES_ROOT / "rc" / "drop.json": ("DROP_TRAIN", "DROP_DEV"),
FIXTURES_ROOT / "lm" / "language_model" / "sentences.txt": ("BIDIRECTIONAL_LM_TRAIN_PATH",),
FIXTURES_ROOT / "rc" / "squad.json": ("SQUAD_TRAIN", "SQUAD_DEV"),
FIXTURES_ROOT
/ "coref"
/ "coref.gold_conll": ("COREF_TRAIN_DATA_PATH", "COREF_DEV_DATA_PATH", "COREF_TEST_DATA_PATH",),
FIXTURES_ROOT
/ "structured_prediction"
/ "srl"
/ "conll_2012"
/ "subdomain": ("CONLL_TRAIN_DATA_PATH", "CONLL_DEV_DATA_PATH"),
FIXTURES_ROOT
/ "tagging"
/ "conll2003.txt": (
"NER_TRAIN_DATA_PATH",
"NER_TEST_DATA_PATH",
"NER_TEST_A_PATH",
"NER_TEST_B_PATH",
),
FIXTURES_ROOT
/ "generation"
/ "bart"
/ "data"
/ "url_lists"
/ "all_train.txt": ("CNNDM_TRAIN", "CNNDM_DEV",),
}


@pytest.mark.pretrained_config_test
class TestAllenNlpPretrainedModelConfigs(AllenNlpTestCase):
@classmethod
def setup_class(cls):
# Make sure all the classes we need are registered.
import_plugins()

# Patch dataset paths.
for dataset_patch, patch_targets in DATASET_PATCHES.items():
for patch_target in patch_targets:
os.environ[patch_target] = str(dataset_patch)

@pytest.mark.parametrize("path", find_configs())
def test_pretrained_configs(self, path):
params = Params.from_file(
path,
params_overrides="{"
"'trainer.cuda_device': -1, "
"'trainer.num_epochs': 2, "
"'dataset_reader.max_instances': 4, "
"'dataset_reader.lazy': false, "
"}",
)

# Patch any pretrained glove files with smaller fixtures.
patch_glove(params)

# Remove unnecessary keys.
for key in ("random_seed", "numpy_seed", "pytorch_seed", "distributed"):
if key in params:
del params[key]

# Just make sure the train loop can be instantiated.
TrainModel.from_params(params=params, serialization_dir=self.TEST_DIR, local_rank=0)
12 changes: 9 additions & 3 deletions training_config/generation/bart_cnn_dm.jsonnet
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
local model_name = "facebook/bart-large";
local data_base_url = "https://storage.googleapis.com/allennlp-public-data/cnndm-combined-data-2020.07.13.tar.gz";

local train_data = std.extVar("CNNDM_TRAIN");
local dev_data = std.extVar("CNNDM_DEV");
// Use these lines below to get the actual dataset.
// local data_base_url = "https://storage.googleapis.com/allennlp-public-data/cnndm-combined-data-2020.07.13.tar.gz";
// local train_data = data_base_url + "!cnndm-combined-data-2020.07.13/url_lists/all_train.txt";
// local dev_data = data_base_url + "!cnndm-combined-data-2020.07.13/url_lists/all_val.txt";

{
"train_data_path": data_base_url + "!cnndm-combined-data-2020.07.13/url_lists/all_train.txt",
"validation_data_path": data_base_url + "!cnndm-combined-data-2020.07.13/url_lists/all_val.txt",
"train_data_path": train_data,
"validation_data_path": dev_data,
"dataset_reader": {
"type": "cnn_dm",
"source_tokenizer": {
Expand Down
8 changes: 6 additions & 2 deletions training_config/mc/swag.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@ local gradient_accumulation_steps = batch_size / gpu_batch_size;
"transformer_model_name": transformer_model,
//"max_instances": 200 // debug setting
},
"train_data_path": "/net/nfs.corp/allennlp/dirkg/data/swag/train.csv",
"validation_data_path": "/net/nfs.corp/allennlp/dirkg/data/swag/val.csv",
"train_data_path": std.extVar("SWAG_TRAIN"),
"validation_data_path": std.extVar("SWAG_DEV"),
//"test_data_path": std.extVar("SWAG_TEST")
// Replace the above few lines with these if you are Dirk:
//"train_data_path": "/net/nfs.corp/allennlp/dirkg/data/swag/train.csv"
//"validation_data_path": "/net/nfs.corp/allennlp/dirkg/data/swag/val.csv"
//"test_data_path": "/net/nfs.corp/allennlp/dirkg/data/swag/test.csv"
"model": {
"type": "transformer_mc",
Expand Down
7 changes: 5 additions & 2 deletions training_config/rc/bidaf.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@
}
}
},
"train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json",
"validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json",
"train_data_path": std.extVar("SQUAD_TRAIN"),
"validation_data_path": std.extVar("SQUAD_DEV"),
// You can replace the above two lines with these to get the actual squad datasets.
// "train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json",
// "validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json",
"model": {
"type": "bidaf",
"text_field_embedder": {
Expand Down
7 changes: 5 additions & 2 deletions training_config/rc/bidaf_elmo.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@
}
}
},
"train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json",
"validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json",
"train_data_path": std.extVar("SQUAD_TRAIN"),
"validation_data_path": std.extVar("SQUAD_DEV"),
// You can replace the above two lines with these to get the actual squad datasets.
// "train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json",
// "validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json",
"model": {
"type": "bidaf",
"text_field_embedder": {
Expand Down
42 changes: 22 additions & 20 deletions training_config/rc/dialog_qa.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -62,27 +62,29 @@
"num_layers": 1
},
"text_field_embedder": {
"elmo": {
"type": "elmo_token_embedder",
"do_layer_norm": false,
"dropout": 0.2,
"options_file": "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json",
"weight_file": "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
},
"token_characters": {
"type": "character_encoding",
"dropout": 0.2,
"embedding": {
"embedding_dim": 20,
"num_embeddings": 262
"token_embedders": {
"elmo": {
"type": "elmo_token_embedder",
"do_layer_norm": false,
"dropout": 0.2,
"options_file": "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json",
"weight_file": "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
},
"encoder": {
"type": "cnn",
"embedding_dim": 20,
"ngram_filter_sizes": [
5
],
"num_filters": 100
"token_characters": {
"type": "character_encoding",
"dropout": 0.2,
"embedding": {
"embedding_dim": 20,
"num_embeddings": 262
},
"encoder": {
"type": "cnn",
"embedding_dim": 20,
"ngram_filter_sizes": [
5
],
"num_filters": 100
}
}
}
}
Expand Down
8 changes: 4 additions & 4 deletions training_config/rc/naqanet.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@
},
"only_include_pretrained_words": true
},
"train_data_path": "drop_dataset_train.json",
"validation_data_path": "drop_dataset_dev.json",
"train_data_path": std.extVar("DROP_TRAIN"),
"validation_data_path": std.extVar("DROP_DEV"),
"model": {
"type": "naqanet",
"text_field_embedder": {
Expand Down Expand Up @@ -106,14 +106,14 @@
"attention_dropout_prob": 0
},
"dropout_prob": 0.1,
"regularizer": [
"regularizer": {
"regexes": [
[".*", {
"type": "l2",
"alpha": 1e-07
}]
]
],
},
"answering_abilities": [
"passage_span_extraction",
"question_span_extraction",
Expand Down
Loading

0 comments on commit 87570ec

Please sign in to comment.