forked from GEM-benchmark/NL-Augmenter
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinitialize.py
36 lines (29 loc) Β· 1.01 KB
/
initialize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import spacy
from spacy.tokenizer import Tokenizer
from spacy.util import (
compile_infix_regex,
compile_prefix_regex,
compile_suffix_regex,
)
# Use this file to initialize all the heavy common packages shared by multiple transformation and filters.
spacy_nlp = None
def initialize_models():
global spacy_nlp
# load spacy
spacy_nlp = spacy.load("en_core_web_sm")
def reinitialize_spacy():
"""Reinitialize global spacy tokenizer to defaults so that each
transformation has a default spacy model to work with.
"""
global spacy_nlp
rules = spacy_nlp.Defaults.tokenizer_exceptions
infix_re = compile_infix_regex(spacy_nlp.Defaults.infixes)
prefix_re = compile_prefix_regex(spacy_nlp.Defaults.prefixes)
suffix_re = compile_suffix_regex(spacy_nlp.Defaults.suffixes)
spacy_nlp.tokenizer = Tokenizer(
spacy_nlp.vocab,
rules=rules,
prefix_search=prefix_re.search,
suffix_search=suffix_re.search,
infix_finditer=infix_re.finditer,
)