Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement negation of the modal verb "shall" and auto-update spaCy models #13

Merged
merged 6 commits into from
May 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ model, first install the additional dependencies and then initialize the Negator
passing `use_transformers=True`:

```shell
pip install -U "negator[transformers]"
pip install -U "negate[transformers]"
```

```Python
Expand Down
62 changes: 44 additions & 18 deletions negate/negate.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,15 @@ def negate_sentence(
negation = self._get_negated_child(root, min_index=negation.i+1)
aux_child = self._get_aux_child(root)
if negation:
remove, add = self._handle_ca_wo(root, aux_child, negation=negation)
remove, add = self._handle_ca_wo_sha(root, aux_child, negation=negation)
# General verbs -> Remove negation and conjugate verb.
# If there is an AUX child, we need to "unnegate" the AUX instead.
if (not self._is_aux(root) and root.tag_ not in ("VBN", "VBG")
and not aux_child and not self._is_verb_to_do(root)
and not self._is_verb_to_be(root)):
and not self._is_verb_to_be(root)
# The latest spaCy model sometimes misclassifies "shan't".
# This is a temporal workaround.
and not root.text.lower() == "sha"):
remove = [root.i, negation.i]
add = {root.i: Token(
text=self.conjugate_verb(root.text, root.tag_),
Expand Down Expand Up @@ -334,7 +337,7 @@ def _negate_aux_in_doc(
negation = self._get_negated_child(aux)
# If AUX negated -> Remove negation.
if negation:
remove, add = self._handle_ca_wo(aux, negation=negation)
remove, add = self._handle_ca_wo_sha(aux, negation=negation)
if not remove and not add:
remove = [aux.i, negation.i]
add = Token(
Expand Down Expand Up @@ -396,20 +399,21 @@ def _negate_aux_in_doc(
add_tokens=add
)

def _handle_ca_wo(
def _handle_ca_wo_sha(
self,
*aux_tokens: Optional[SpacyToken],
negation: SpacyToken
) -> Tuple[Optional[List[int]], Optional[Dict[int, Token]]]:
"""Handle special cases ``"won't"`` and ``"can't"``.
"""Handle special cases ``"won't"``, ``"can't"`` and ``"shan't"``.

These auxiliary verbs are split into ``"wo"`` (AUX) and ``"n't"`` (neg),
and ``"ca"`` (AUX) / ``"n't"`` (neg), respectively. If we simply removed
the negation as with other negated auxiliaries (e.g., ``"cannot"`` →
``"can"`` (AUX) / ``"not"`` (neg), we remove ``"not"`` and keep
``"can"``), we would end up with ``"wo"`` and ``"ca"``, which are not
correct words. Therefore, we need to take extra steps to replace these
words by ``"will"`` and ``"can"``, respectively.
``"ca"`` (AUX) / ``"n't"`` (neg), and ``"sha"`` (AUX) / ``"n't"`` (neg),
respectively. If we simply removed the negation as with other negated
auxiliaries (e.g., ``"cannot"`` → ``"can"`` (AUX) / ``"not"`` (neg), we
remove ``"not"`` and keep ``"can"``), we would end up with ``"wo"``,
``"ca"``, and ``"sha"``which are not correct words. Therefore, we need
to take extra steps to replace these words by ``"will"``, ``"can"``, and
``"shall"``respectively.

Args:
*aux_tokens (:obj:`Optional[SpacyToken]`):
Expand Down Expand Up @@ -440,22 +444,27 @@ def _handle_ca_wo(
for aux in aux_tokens:
if not aux:
continue
# Case AUX "won't" -> Remove negation and replace
# "wo" -> "will".
# Case AUX "won't" -> Remove negation and replace "wo" -> "will".
if aux.text.lower() == "wo":
remove.append(aux.i)
add.update({aux.i: Token(
text=" will",
has_space_after=negation._.has_space_after
)})
# Case AUX "can't" -> Remove negation and replace
# "ca" -> "can".
# Case AUX "can't" -> Remove negation and replace "ca" -> "can".
elif aux.text.lower() == "ca":
remove.append(aux.i)
add.update({aux.i: Token(
text=" can",
has_space_after=negation._.has_space_after
)})
# Case AUX "shan't" -> Remove negation and replace "sha" -> "shall".
elif aux.text.lower() == "sha":
remove.append(aux.i)
add.update({aux.i: Token(
text=" shall",
has_space_after=negation._.has_space_after
)})
if remove and add:
remove.append(negation.i)
return remove, add
Expand Down Expand Up @@ -507,7 +516,11 @@ def _get_entry_point(
"""
if contains_inversion:
entry_point = [tk for tk in doc
if self._is_aux(tk) or self._is_verb(tk)]
if self._is_aux(tk)
or self._is_verb(tk)
# The latest spaCy model sometimes misclassifies
# "shan't". This is a temporal workaround.
or tk.text.lower() == "sha"]
if entry_point:
return entry_point[0]
root = self._get_root(doc)
Expand Down Expand Up @@ -793,7 +806,9 @@ def _contains_inversion(self, doc: SpacyDoc) -> bool:
aux = None
pronoun = None
for tk in doc:
if self._is_aux(tk):
# The latest spaCy model sometimes misclassifies "shan't". The
# additional check here is just a temporal workaround.
if self._is_aux(tk) or tk.text.lower() == "sha":
aux = tk
# Only attend to pronouns that don't refer to a noun (i.e., those
# which could act as subjects).
Expand Down Expand Up @@ -938,11 +953,20 @@ def suppress_stdout():
try: # Model installed?
model_module = importlib.import_module(module_name)
except ModuleNotFoundError: # Download and install model.
self.logger.info("Downloading model. This only needs to happen "
self.logger.info("Downloading spaCy model. This only needs to happen "
"once. Please, be patient...")
with suppress_stdout():
spacy.cli.download(model_name, True, False, "-q")
model_module = importlib.import_module(module_name)
spacy_model = model_module.load(**kwargs)
installed_model_version: str = spacy_model.meta["version"]
expected_version: str = model_name.split("-")[1]
if installed_model_version != expected_version:
self.logger.info("Updating spaCy model to version %s."
" Please, be patient...", expected_version)
with suppress_stdout():
spacy.cli.download(model_name, True, False, "-q")
model_module = importlib.import_module(module_name)
return model_module.load(**kwargs)

def _handle_unsupported(self, fail: Optional[bool] = None):
Expand Down Expand Up @@ -997,6 +1021,7 @@ def _initialize_aux_negations(self) -> None:
"must": "mustn't",
"might": "mightn't",
"may": "may not",
"shall": "shan't",
"should": "shouldn't",
"ought": "oughtn't",
"'ll": " won't",
Expand Down Expand Up @@ -1026,6 +1051,7 @@ def _initialize_aux_negations(self) -> None:
"must": "must not",
"might": "might not",
"may": "may not",
"shall": "shall not",
"should": "should not",
"ought": "ought not",
"'ll": " will not",
Expand Down
6 changes: 3 additions & 3 deletions negate/version.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Version specification."""

# Negate version.
__version__ = "1.1.3"
__version__ = "1.1.5"

# spaCy models version.
EN_CORE_WEB_MD_VERSION: str = "3.7.0"
EN_CORE_WEB_TRF_VERSION: str = "3.7.2"
EN_CORE_WEB_MD_VERSION: str = "3.7.1"
EN_CORE_WEB_TRF_VERSION: str = "3.7.3"
2 changes: 1 addition & 1 deletion tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pytest --transformers

Remember that to use Transformers, the additional dependencies have to be installed first with:
```shell
pip install -U "negator[transformers]"
pip install -U "negate[transformers]"
```

<br>
Expand Down
26 changes: 21 additions & 5 deletions tests/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
("I can.", "I cannot.", False),
("I will.", "I won't.", True),
("I will.", "I will not.", False),
("I shall.", "I shan't.", True),
("I shall.", "I shall not.", False),
("I'm excited.", "I'm not excited.", True),
("I'm excited.", "I am not excited.", False),
("I am excited.", "I am not excited.", True),
Expand Down Expand Up @@ -75,6 +77,10 @@
("I won't.", "I will.", False),
("I will not.", "I will.", True),
("I will not.", "I will.", False),
("I shan't.", "I shall.", True),
("I shan't.", "I shall.", False),
("I shall not.", "I shall.", True),
("I shall not.", "I shall.", False),
("I'm not excited.", "I'm excited.", True),
("I'm not excited.", "I'm excited.", False),
("I am not excited.", "I am excited.", True),
Expand Down Expand Up @@ -120,6 +126,8 @@
("I can do it.", "I cannot do it.", False),
("I will do it.", "I won't do it.", True),
("I will do it.", "I will not do it.", False),
("I shall do it.", "I shan't do it.", True),
("I shall do it.", "I shall not do it.", False),
("I've done it.", "I haven't done it.", True),
("I've done it.", "I have not done it.", False),
("I've been doing it.", "I haven't been doing it.", True),
Expand Down Expand Up @@ -175,6 +183,10 @@
("I won't do it.", "I will do it.", False),
("I will not do it.", "I will do it.", True),
("I will no do it.", "I will do it.", False),
("I shan't do it.", "I shall do it.", True),
("I shan't do it.", "I shall do it.", False),
("I shall not do it.", "I shall do it.", True),
("I shall not do it.", "I shall do it.", False),
("I've not done it.", "I've done it.", True),
("I've not done it.", "I've done it.", False),
("I've not been doing it.", "I've been doing it.", True),
Expand Down Expand Up @@ -278,12 +290,14 @@
("Do you know about it?", "Do you not know about it?", False),
("Does she know about it?", "Doesn't she know about it?", True),
("Does she know about it?", "Does she not know about it?", False),
("Will it work?", "Will it not work?", False),
("Will it work?", "Won't it work?", True),
("Can it work?", "Can it not work?", False),
("Will it work?", "Will it not work?", False),
("Can it work?", "Can't it work?", True),
("Could it work?", "Could it not work?", False),
("Can it work?", "Can it not work?", False),
("Shall it work?", "Shan't it work?", True),
("Shall it work?", "Shall it not work?", False),
("Could it work?", "Couldn't it work?", True),
("Could it work?", "Could it not work?", False),
("Are there many ways it can be done?", "Aren't there many ways it can be done?", True),
("Are there many ways it can be done?", "Are there not many ways it can be done?", False),
("Little did I know their opinion was so biased.", "Little didn't I know their opinion was so biased.", True),
Expand All @@ -310,10 +324,12 @@
("Do you not know about it?", "Do you know about it?", False),
("Doesn't she know about it?", "Does she know about it?", False),
("Does she not know about it?", "Does she know about it?", False),
("Will it not work?", "Will it work?", False),
("Won't it work?", "Will it work?", True),
("Can it not work?", "Can it work?", False),
("Will it not work?", "Will it work?", False),
("Can't it work?", "Can it work?", True),
("Can it not work?", "Can it work?", False),
("Shan't it work?", "Shall it work?", True),
("Shall it not work?", "Shall it work?", False),
("Could it not work?", "Could it work?", False),
("Couldn't it work?", "Could it work?", True),
("Aren't there many ways it can be done?", "Are there many ways it can be done?", True),
Expand Down
Loading