diff --git a/README.md b/README.md index dc4d671..1763548 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ model, first install the additional dependencies and then initialize the Negator passing `use_transformers=True`: ```shell -pip install -U "negator[transformers]" +pip install -U "negate[transformers]" ``` ```Python diff --git a/negate/negate.py b/negate/negate.py index e80e69c..616ef06 100644 --- a/negate/negate.py +++ b/negate/negate.py @@ -132,12 +132,15 @@ def negate_sentence( negation = self._get_negated_child(root, min_index=negation.i+1) aux_child = self._get_aux_child(root) if negation: - remove, add = self._handle_ca_wo(root, aux_child, negation=negation) + remove, add = self._handle_ca_wo_sha(root, aux_child, negation=negation) # General verbs -> Remove negation and conjugate verb. # If there is an AUX child, we need to "unnegate" the AUX instead. if (not self._is_aux(root) and root.tag_ not in ("VBN", "VBG") and not aux_child and not self._is_verb_to_do(root) - and not self._is_verb_to_be(root)): + and not self._is_verb_to_be(root) + # The latest spaCy model sometimes misclassifies "shan't". + # This is a temporal workaround. + and not root.text.lower() == "sha"): remove = [root.i, negation.i] add = {root.i: Token( text=self.conjugate_verb(root.text, root.tag_), @@ -334,7 +337,7 @@ def _negate_aux_in_doc( negation = self._get_negated_child(aux) # If AUX negated -> Remove negation. if negation: - remove, add = self._handle_ca_wo(aux, negation=negation) + remove, add = self._handle_ca_wo_sha(aux, negation=negation) if not remove and not add: remove = [aux.i, negation.i] add = Token( @@ -396,20 +399,21 @@ def _negate_aux_in_doc( add_tokens=add ) - def _handle_ca_wo( + def _handle_ca_wo_sha( self, *aux_tokens: Optional[SpacyToken], negation: SpacyToken ) -> Tuple[Optional[List[int]], Optional[Dict[int, Token]]]: - """Handle special cases ``"won't"`` and ``"can't"``. + """Handle special cases ``"won't"``, ``"can't"`` and ``"shan't"``. These auxiliary verbs are split into ``"wo"`` (AUX) and ``"n't"`` (neg), - and ``"ca"`` (AUX) / ``"n't"`` (neg), respectively. If we simply removed - the negation as with other negated auxiliaries (e.g., ``"cannot"`` → - ``"can"`` (AUX) / ``"not"`` (neg), we remove ``"not"`` and keep - ``"can"``), we would end up with ``"wo"`` and ``"ca"``, which are not - correct words. Therefore, we need to take extra steps to replace these - words by ``"will"`` and ``"can"``, respectively. + ``"ca"`` (AUX) / ``"n't"`` (neg), and ``"sha"`` (AUX) / ``"n't"`` (neg), + respectively. If we simply removed the negation as with other negated + auxiliaries (e.g., ``"cannot"`` → ``"can"`` (AUX) / ``"not"`` (neg), we + remove ``"not"`` and keep ``"can"``), we would end up with ``"wo"``, + ``"ca"``, and ``"sha"``which are not correct words. Therefore, we need + to take extra steps to replace these words by ``"will"``, ``"can"``, and + ``"shall"``respectively. Args: *aux_tokens (:obj:`Optional[SpacyToken]`): @@ -440,22 +444,27 @@ def _handle_ca_wo( for aux in aux_tokens: if not aux: continue - # Case AUX "won't" -> Remove negation and replace - # "wo" -> "will". + # Case AUX "won't" -> Remove negation and replace "wo" -> "will". if aux.text.lower() == "wo": remove.append(aux.i) add.update({aux.i: Token( text=" will", has_space_after=negation._.has_space_after )}) - # Case AUX "can't" -> Remove negation and replace - # "ca" -> "can". + # Case AUX "can't" -> Remove negation and replace "ca" -> "can". elif aux.text.lower() == "ca": remove.append(aux.i) add.update({aux.i: Token( text=" can", has_space_after=negation._.has_space_after )}) + # Case AUX "shan't" -> Remove negation and replace "sha" -> "shall". + elif aux.text.lower() == "sha": + remove.append(aux.i) + add.update({aux.i: Token( + text=" shall", + has_space_after=negation._.has_space_after + )}) if remove and add: remove.append(negation.i) return remove, add @@ -507,7 +516,11 @@ def _get_entry_point( """ if contains_inversion: entry_point = [tk for tk in doc - if self._is_aux(tk) or self._is_verb(tk)] + if self._is_aux(tk) + or self._is_verb(tk) + # The latest spaCy model sometimes misclassifies + # "shan't". This is a temporal workaround. + or tk.text.lower() == "sha"] if entry_point: return entry_point[0] root = self._get_root(doc) @@ -793,7 +806,9 @@ def _contains_inversion(self, doc: SpacyDoc) -> bool: aux = None pronoun = None for tk in doc: - if self._is_aux(tk): + # The latest spaCy model sometimes misclassifies "shan't". The + # additional check here is just a temporal workaround. + if self._is_aux(tk) or tk.text.lower() == "sha": aux = tk # Only attend to pronouns that don't refer to a noun (i.e., those # which could act as subjects). @@ -938,11 +953,20 @@ def suppress_stdout(): try: # Model installed? model_module = importlib.import_module(module_name) except ModuleNotFoundError: # Download and install model. - self.logger.info("Downloading model. This only needs to happen " + self.logger.info("Downloading spaCy model. This only needs to happen " "once. Please, be patient...") with suppress_stdout(): spacy.cli.download(model_name, True, False, "-q") model_module = importlib.import_module(module_name) + spacy_model = model_module.load(**kwargs) + installed_model_version: str = spacy_model.meta["version"] + expected_version: str = model_name.split("-")[1] + if installed_model_version != expected_version: + self.logger.info("Updating spaCy model to version %s." + " Please, be patient...", expected_version) + with suppress_stdout(): + spacy.cli.download(model_name, True, False, "-q") + model_module = importlib.import_module(module_name) return model_module.load(**kwargs) def _handle_unsupported(self, fail: Optional[bool] = None): @@ -997,6 +1021,7 @@ def _initialize_aux_negations(self) -> None: "must": "mustn't", "might": "mightn't", "may": "may not", + "shall": "shan't", "should": "shouldn't", "ought": "oughtn't", "'ll": " won't", @@ -1026,6 +1051,7 @@ def _initialize_aux_negations(self) -> None: "must": "must not", "might": "might not", "may": "may not", + "shall": "shall not", "should": "should not", "ought": "ought not", "'ll": " will not", diff --git a/negate/version.py b/negate/version.py index 2db60e1..9e208da 100644 --- a/negate/version.py +++ b/negate/version.py @@ -1,8 +1,8 @@ """Version specification.""" # Negate version. -__version__ = "1.1.3" +__version__ = "1.1.5" # spaCy models version. -EN_CORE_WEB_MD_VERSION: str = "3.7.0" -EN_CORE_WEB_TRF_VERSION: str = "3.7.2" +EN_CORE_WEB_MD_VERSION: str = "3.7.1" +EN_CORE_WEB_TRF_VERSION: str = "3.7.3" diff --git a/tests/README.md b/tests/README.md index d31fb54..e5fcade 100644 --- a/tests/README.md +++ b/tests/README.md @@ -25,7 +25,7 @@ pytest --transformers Remember that to use Transformers, the additional dependencies have to be installed first with: ```shell -pip install -U "negator[transformers]" +pip install -U "negate[transformers]" ```
diff --git a/tests/data.py b/tests/data.py index e0f0472..e72d989 100644 --- a/tests/data.py +++ b/tests/data.py @@ -13,6 +13,8 @@ ("I can.", "I cannot.", False), ("I will.", "I won't.", True), ("I will.", "I will not.", False), + ("I shall.", "I shan't.", True), + ("I shall.", "I shall not.", False), ("I'm excited.", "I'm not excited.", True), ("I'm excited.", "I am not excited.", False), ("I am excited.", "I am not excited.", True), @@ -75,6 +77,10 @@ ("I won't.", "I will.", False), ("I will not.", "I will.", True), ("I will not.", "I will.", False), + ("I shan't.", "I shall.", True), + ("I shan't.", "I shall.", False), + ("I shall not.", "I shall.", True), + ("I shall not.", "I shall.", False), ("I'm not excited.", "I'm excited.", True), ("I'm not excited.", "I'm excited.", False), ("I am not excited.", "I am excited.", True), @@ -120,6 +126,8 @@ ("I can do it.", "I cannot do it.", False), ("I will do it.", "I won't do it.", True), ("I will do it.", "I will not do it.", False), + ("I shall do it.", "I shan't do it.", True), + ("I shall do it.", "I shall not do it.", False), ("I've done it.", "I haven't done it.", True), ("I've done it.", "I have not done it.", False), ("I've been doing it.", "I haven't been doing it.", True), @@ -175,6 +183,10 @@ ("I won't do it.", "I will do it.", False), ("I will not do it.", "I will do it.", True), ("I will no do it.", "I will do it.", False), + ("I shan't do it.", "I shall do it.", True), + ("I shan't do it.", "I shall do it.", False), + ("I shall not do it.", "I shall do it.", True), + ("I shall not do it.", "I shall do it.", False), ("I've not done it.", "I've done it.", True), ("I've not done it.", "I've done it.", False), ("I've not been doing it.", "I've been doing it.", True), @@ -278,12 +290,14 @@ ("Do you know about it?", "Do you not know about it?", False), ("Does she know about it?", "Doesn't she know about it?", True), ("Does she know about it?", "Does she not know about it?", False), - ("Will it work?", "Will it not work?", False), ("Will it work?", "Won't it work?", True), - ("Can it work?", "Can it not work?", False), + ("Will it work?", "Will it not work?", False), ("Can it work?", "Can't it work?", True), - ("Could it work?", "Could it not work?", False), + ("Can it work?", "Can it not work?", False), + ("Shall it work?", "Shan't it work?", True), + ("Shall it work?", "Shall it not work?", False), ("Could it work?", "Couldn't it work?", True), + ("Could it work?", "Could it not work?", False), ("Are there many ways it can be done?", "Aren't there many ways it can be done?", True), ("Are there many ways it can be done?", "Are there not many ways it can be done?", False), ("Little did I know their opinion was so biased.", "Little didn't I know their opinion was so biased.", True), @@ -310,10 +324,12 @@ ("Do you not know about it?", "Do you know about it?", False), ("Doesn't she know about it?", "Does she know about it?", False), ("Does she not know about it?", "Does she know about it?", False), - ("Will it not work?", "Will it work?", False), ("Won't it work?", "Will it work?", True), - ("Can it not work?", "Can it work?", False), + ("Will it not work?", "Will it work?", False), ("Can't it work?", "Can it work?", True), + ("Can it not work?", "Can it work?", False), + ("Shan't it work?", "Shall it work?", True), + ("Shall it not work?", "Shall it work?", False), ("Could it not work?", "Could it work?", False), ("Couldn't it work?", "Could it work?", True), ("Aren't there many ways it can be done?", "Are there many ways it can be done?", True),