diff --git a/src/edu/stanford/nlp/trees/CoordinationTransformer.java b/src/edu/stanford/nlp/trees/CoordinationTransformer.java index debf10c151..4432581920 100644 --- a/src/edu/stanford/nlp/trees/CoordinationTransformer.java +++ b/src/edu/stanford/nlp/trees/CoordinationTransformer.java @@ -728,6 +728,10 @@ private static Tree findCCparent(Tree t, Tree root) { private static final TregexPattern BUT_ALSO_PATTERN = TregexPattern.compile("CONJP=conjp < (CC=cc < but) < (RB=rb < also) ?$+ (__=nextNode < (__ < __))"); private static final TsurgeonPattern BUT_ALSO_OPERATION = Tsurgeon.parseOperation("[move cc $- conjp] [move rb $- cc] [if exists nextNode move rb >1 nextNode] [createSubtree ADVP rb] [delete conjp]"); + /* "not only" is not a MWE, so break up the CONJP similar to "but also". */ + private static final TregexPattern NOT_ONLY_PATTERN = TregexPattern.compile("CONJP|ADVP=conjp < (RB=not < /^(?i)not$/) < (RB=only < /^(?i)only|just|merely|even$/) ?$+ (__=nextNode < (__ < __))"); + private static final TsurgeonPattern NOT_ONLY_OPERATION = Tsurgeon.parseOperation("[move not $- conjp] [move only $- not] [if exists nextNode move only >1 nextNode] [if exists nextNode move not >1 nextNode] [createSubtree ADVP not] [createSubtree ADVP only] [delete conjp]"); + /* at least / at most / at best / at worst / ... should be treated as if "at" was a preposition and the RBS was a noun. Assumes that the MWE "at least" has already been extracted. */ @@ -749,6 +753,7 @@ public static Tree MWETransform(Tree t) { Tsurgeon.processPattern(ACCORDING_TO_PATTERN, ACCORDING_TO_OPERATION, t); Tsurgeon.processPattern(BUT_ALSO_PATTERN, BUT_ALSO_OPERATION, t); + Tsurgeon.processPattern(NOT_ONLY_PATTERN, NOT_ONLY_OPERATION, t); Tsurgeon.processPattern(AT_RBS_PATTERN, AT_RBS_OPERATION, t); Tsurgeon.processPattern(AT_ALL_PATTERN, AT_ALL_OPERATION, t); diff --git a/src/edu/stanford/nlp/trees/UniversalEnglishGrammaticalRelations.java b/src/edu/stanford/nlp/trees/UniversalEnglishGrammaticalRelations.java index 2579127dee..22c8b96ade 100644 --- a/src/edu/stanford/nlp/trees/UniversalEnglishGrammaticalRelations.java +++ b/src/edu/stanford/nlp/trees/UniversalEnglishGrammaticalRelations.java @@ -1312,7 +1312,6 @@ private UniversalEnglishGrammaticalRelations() {} MODIFIER, "S|VP|ADJP|PP|ADVP|UCP(?:-TMP|-ADV)?|NX|NML|SBAR|NP(?:-TMP|-ADV)?", tregexCompiler, "NP|NP-TMP|NP-ADV|NX|NML < (PDT|CC|DT=target < /^(?i:either|neither|both)$/ $++ CC)", - "NP|NP-TMP|NP-ADV|NX|NML < (CONJP=target < (RB < /^(?i:not)$/) < (RB|JJ < /^(?i:only|merely|just)$/) $++ CC|CONJP)", // This matches weird/wrong NP-internal preconjuncts where you get (NP PDT (NP NP CC NP)) or similar "NP|NP-TMP|NP-ADV|NX|NML < (PDT|CC|DT=target < /^(?i:either|neither|both)$/ ) < (NP < CC)", "/^S|VP|ADJP|PP|ADVP|UCP(?:-TMP|-ADV)?|NX|NML|SBAR$/ < (PDT|DT|CC=target < /^(?i:either|neither|both)$/ $++ CC)", diff --git a/src/edu/stanford/nlp/trees/UniversalPOSMapper.java b/src/edu/stanford/nlp/trees/UniversalPOSMapper.java index dfc394dfd1..1840e583d8 100644 --- a/src/edu/stanford/nlp/trees/UniversalPOSMapper.java +++ b/src/edu/stanford/nlp/trees/UniversalPOSMapper.java @@ -134,6 +134,9 @@ public static void load() { // RB -> PART when it is verbal negation (not or its reductions) { "@VP|SINV|SQ|FRAG|ADVP < (RB=target < /^(?i:not|n't|nt|t|n)$/)", "PART" }, + // "not" as part of a phrase such as "not only", "not just", etc is tagged as PART in UD + { "@ADVP|CONJP <1 (RB=target < /^(?i:not|n't|nt|t|n)$/) <2 (__ < only|just|merely|even) !<3 __", "PART" }, + // Otherwise RB -> ADV { "RB=target <... {/.*/}", "ADV" }, diff --git a/src/edu/stanford/nlp/trees/treebank/EnglishPTBTreebankCorrector.java b/src/edu/stanford/nlp/trees/treebank/EnglishPTBTreebankCorrector.java index 21d6c5aa64..da14689740 100644 --- a/src/edu/stanford/nlp/trees/treebank/EnglishPTBTreebankCorrector.java +++ b/src/edu/stanford/nlp/trees/treebank/EnglishPTBTreebankCorrector.java @@ -168,6 +168,16 @@ private static BufferedReader getBufferedReader(String source) { "adjoin (NP NN@) newnp\n" + '\n') + + // Fix not_RB only_JJ, which should generally be not_RB only_RB + // and put it under a CONJP instead of an ADVP + ("ADVP|CONJP <1 (__ < /^(?i:not)$/) <2 (JJ=bad < only|just|merely|even) !<3 __\n" + + "relabel bad RB\n" + + '\n') + + + ("ADVP=bad <1 (__ < /^(?i:not)$/) <2 (RB < only|just|merely|even) !<3 __\n" + + "relabel bad CONJP\n" + + '\n') + + // Fix some cases of 'as well as' not made into a CONJP unit // There are a few other weird cases that should also be reviewed with the tregex // well|Well|WELL , as|AS|As . as|AS|As !>(__ > @CONJP)