Skip to content

Commit

Permalink
V3.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
amir-zeldes committed Aug 12, 2021
1 parent baaa95b commit d417064
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 52 deletions.
18 changes: 17 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,20 @@ Batch mode options:
.depedit)
```

For more information see https://corpling.uis.georgetown.edu/depedit/ and read the included User Guide PDF in doc/.
For more information see https://corpling.uis.georgetown.edu/depedit/ and read the included User Guide PDF in doc/.

## Citing

If you are using DepEdit in a scholarly paper, please cite the following reference:

```
@InProceedings{PengZeldes2020,
author = {Siyao Peng and Amir Zeldes},
title = {All Roads Lead to {UD}: Converting {S}tanford and {P}enn Parses to {E}nglish {U}niversal {D}ependencies with Multilayer Annotations},
booktitle = {Proceedings of the Joint Workshop on Linguistic Annotation, Multiword Expressions and Constructions ({LAW}-{MWE}-{C}x{G}-2018)},
year = {2018},
pages = {167--177},
address = {Santa Fe, NM},
url = {https://www.aclweb.org/anthology/W18-4918}
}
```
2 changes: 1 addition & 1 deletion depedit/depedit.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import io
from six import iteritems, iterkeys

__version__ = "3.1.0.0"
__version__ = "3.2.0.0"

ALIASES = {"form":"text","upostag":"pos","xpostag":"cpos","feats":"morph","deprel":"func","deps":"head2","misc":"func2",
"xpos": "cpos","upos":"pos"}
Expand Down
Binary file modified docs/DepEdit_user_guide.pdf
Binary file not shown.
95 changes: 62 additions & 33 deletions examples/eng_enhance.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,35 @@
# set a variable for transitive verbal lemmas that trigger subject control for `xcomp`
{$subject_control}=/promise|threaten|offer|propose/

# erase any existing enhanced dependencies in the input
head2=/.*/ none #1:head2=;#1:edep=
# erase any existing enhanced dependencies in the input except for ellipsis nodes
num!=/.*\..*/&head2=/.*/ none #1:head2=;#1:edep=

# store desired lemma form for `conj`, `case` and `mark` augmentation
# use lowercase of lemma by default
lemma=/(.*)/ none #1:head2=$1L
lemma=/(.*)/ none #1:storage2=$1L
# for participles used as mark/case, use the lowercased word form, not the verbal lemma
text=/(.*)/&xpos=/VB[GN]/ none #1:head2=$1L
text=/(.*)/&xpos=/VB[GN]/ none #1:storage2=$1L
# for non-alphabetic words set to empty to avoid validation errors
text=/.*[^A-Za-z]/ none #1:head2=
text=/.*[^A-Za-z]/ none #1:storage2=
# try known mappings for specific non-alphabetic cases
text=/[–-]+/ none #1:head2=to
text=/\u002F+/ none #1:head2=slash
text=/\+/ none #1:head2=plus
text=/-/ none #1:head2=minus
text=/@/ none #1:head2=at
text=/vs/ none #1:head2=versus
text=/:/ none #1:head2=colon
text=/±/ none #1:head2=plus_minus
text=/à/ none #1:head2=a
text=/ca?\./ none #1:head2=circa
text=/[–-]+/ none #1:storage2=to
text=/\u002F+/ none #1:storage2=slash
text=/\+/ none #1:storage2=plus
text=/-/ none #1:storage2=minus
text=/@/ none #1:storage2=at
text=/[Vv][Ss]\.?/ none #1:storage2=versus
text=/:/ none #1:storage2=colon
text=/±/ none #1:storage2=plus_minus
text=/à/ none #1:storage2=a
text=/ca?\./ none #1:storage2=circa
text=/&/ none #1:storage2=and
text=/n'/ none #1:storage2=and
text=/cuz/ none #1:storage2=because
text=/x/&func=/case/ none #1:storage2=by
# default edep - duplicate the regular dependency
text=/.*/;func=/(.*)/ #1>#2 #1~#2;#2:edep=$1
func=/root/ none #1:ehead=0;#1:edep=root
# annotate tokens internally to mark whether they have certain dependents
xpos=/[VN].*|JJ.?|RB.?/;func=/aux.*/ #1>#2 #1:storage+=hasaux
Expand All @@ -38,29 +43,35 @@ xpos=/VBN/;func=/aux:pass/ #1>#2 #1:storage+=haspass
xpos=/[NP].*/;func=/case/ #1>#2 #1:storage+=hascase
# handle augmented `case`
# two and three word `fixed` expressions
text=/.*/;func=/^(obl|nmod)$/;head2=/(.*)/&func=/(case)/;func=/fixed/&head2=/(.*)/;func=/fixed/&head2=/(.*)/ #1>#2>#3>#4;#3>#5;#4.#5 #1~#2;#2:edep=$1:$2_$4_$5;#3:storage=hasdblfixed
text=/.*/;func=/^(obl|nmod)$/;head2=/(.*)/&func=/(case)/&storage!=/hasdblfixed/;func=/fixed/&head2=/(.*)/ #1>#2>#3>#4 #1~#2;#2:edep=$1:$2_$4;#3:storage=hasfixed
# combo case + mark
func=/advcl/;func=/case/&storage2=/(.*)/;func=/mark/&xpos=/W.*/&storage2=/(.*)/;text=/.*/ #1>#2;#1>#3;#2.#3;#4>#1 #4~#1;#1:edep=advcl:$1_$2;#2:storage=hasdblfixed
# combo mark-fixed + mark
func=/advcl/;func=/mark/&storage2=/(.*)/;func=/fixed/&storage2=/(.*)/;func=/mark/&storage2=/(.*)/;text=/.*/ #1>#2>#3;#1>#4;#2.#3.#4;#5>#1 #5~#1;#1:edep=advcl:$1_$2_$3;#2:storage=hasdblfixed
# two and three word `fixed` and `goeswith` expressions
text=/.*/;func=/^(obl|nmod)$/;storage2=/(.*)/&func=/(case)/;func=/fixed/&storage2=/(.*)/;func=/fixed/&storage2=/(.*)/ #1>#2>#3>#4;#3>#5;#4.#5 #1~#2;#2:edep=$1:$2_$4_$5;#3:storage=hasdblfixed
text=/.*/;func=/^(obl|nmod)$/;storage2=/(.*)/&func=/(case)/&storage!=/hasdblfixed/;func=/fixed/&storage2=/(.*)/ #1>#2>#3>#4 #1~#2;#2:edep=$1:$2_$4;#3:storage=hasfixed
text=/.*/;func=/^(obl|nmod)$/;storage2=/(.*)/&func=/(case)/&storage!=/hasdblfixed/;func=/goeswith/&storage2=/(.*)/ #1>#2>#3>#4 #1~#2;#2:edep=$1:$2$4;#3:storage=hasgoeswith
# all other regular cases - and ruling out genitive 's as the augmentation (xpos=POS)
text=/.*/;func=/^(obl|nmod)$/;head2=/(.*)/&func=/(case)/&xpos!=/POS/&storage!=/hasfixed/ #1>#2>#3 #1~#2;#2:edep=$1:$2;#3:storage=
text=/.*/;func=/^(obl|nmod)$/;head2=/(.*)/&func=/(case)/&xpos!=/POS/&storage!=/hasfixed/;func=/conj/&storage!=/.*hascase.*/ #1>#2>#3;#2>#4 #1~#4;#4:edep=$1:$2
text=/.*/;func=/^(obl|nmod)$/;storage2=/(.*)/&func=/(case)/&xpos!=/POS/&storage!=/hasfixed/ #1>#2>#3 #1~#2;#2:edep=$1:$2;#3:storage=
text=/.*/;func=/^(obl|nmod)$/;storage2=/(.*)/&func=/(case)/&xpos!=/POS/&storage!=/hasfixed/;func=/conj/&storage!=/.*hascase.*/ #1>#2>#3;#2>#4 #1~#4;#4:edep=$1:$2
# handle double case, e.g. obl:out_of for "out of X", rather than obl:out
text=/.*/;func=/^(obl|nmod)$/;func=/case/&xpos!=/POS/&head2=/(.*)/;func=/case/&xpos!=/POS/&head2=/(.*)/ #3.*#4;#1>#2;#2>#3;#2>#4 #1~#2;#2:edep=$1:$2_$3;#3:storage=;#4:storage=
text=/.*/;func=/^(obl|nmod)$/;func=/case/&xpos!=/POS/&storage2=/(.*)/;func=/case/&xpos!=/POS/&storage2=/(.*)/ #3.*#4;#1>#2;#2>#3;#2>#4 #1~#2;#2:edep=$1:$2_$3;#3:storage=;#4:storage=
# handle augmented `mark`
# two and three word `fixed` expressions
text=/.*/;func=/^(advcl|acl)$/;head2=/(.*)/&func=/(mark)/;func=/fixed/&head2=/(.*)/;func=/fixed/&head2=/(.*)/ #1>#2>#3>#4;#3>#5;#4.#5 #1~#2;#2:edep=$1:$2_$4_$5;#3:storage=hasdblfixed
text=/.*/;func=/^(advcl|acl)$/;head2=/(.*)/&func=/(mark)/&storage!=/hasdblfixed/;func=/fixed/&head2=/(.*)/ #1>#2>#3>#4 #1~#2;#2:edep=$1:$2_$4;#3:storage=hasfixed
text=/.*/;func=/^(advcl|acl)$/;storage2=/(.*)/&func=/(mark)/;func=/fixed/&storage2=/(.*)/;func=/fixed/&storage2=/(.*)/ #1>#2>#3>#4;#3>#5;#4.#5 #1~#2;#2:edep=$1:$2_$4_$5;#3:storage=hasdblfixed
text=/.*/;func=/^(advcl|acl)$/;storage2=/(.*)/&func=/(mark)/&storage!=/hasdblfixed/;func=/fixed/&storage2=/(.*)/ #1>#2>#3>#4 #1~#2;#2:edep=$1:$2_$4;#3:storage=hasfixed
text=/.*/;func=/^(advcl|acl)$/;storage2=/(.*)/&func=/(mark)/&storage!=/hasdblfixed/;func=/goeswith/&storage2=/(.*)/ #1>#2>#3>#4 #1~#2;#2:edep=$1:$2$4;#3:storage=hasgoeswith
# all other regular cases - and ruling out genitive 's as the augmentation (xpos=POS)
text=/.*/;func=/^(advcl|acl)$/;head2=/(.*)/&func=/(mark)/&xpos!=/POS/&storage!=/hasfixed/ #1>#2>#3 #1~#2;#2:edep=$1:$2;#3:storage=
text=/.*/;func=/^(advcl|acl)$/;head2=/(.*)/&func=/(mark)/&xpos!=/POS/&storage!=/hasfixed/;func=/conj/&storage!=/.*hascase.*/ #1>#2>#3;#2>#4 #1~#4;#4:edep=$1:$2
text=/.*/;func=/^(advcl|acl)$/;storage2=/(.*)/&func=/(mark)/&xpos!=/POS/&storage!=/hasfixed/ #1>#2>#3 #1~#2;#2:edep=$1:$2;#3:storage=
text=/.*/;func=/^(advcl|acl)$/;storage2=/(.*)/&func=/(mark)/&xpos!=/POS/&storage!=/hasfixed/;func=/conj/&storage!=/.*hascase.*/ #1>#2>#3;#2>#4 #1~#4;#4:edep=$1:$2
# handle double mark, e.g. acl:for_to in "for X to Y", rather than acl:for
text=/.*/;func=/^(advcl|acl)$/;func=/mark/&xpos!=/POS/&head2=/(.*)/;func=/mark/&xpos!=/POS/&head2=/(.*)/ #3.*#4;#1>#2;#2>#3;#2>#4 #1~#2;#2:edep=$1:$2_$3;#3:storage=;#4:storage=
text=/.*/;func=/^(advcl|acl)$/;func=/mark/&xpos!=/POS/&storage2=/(.*)/;func=/mark/&xpos!=/POS/&storage2=/(.*)/ #3.*#4;#1>#2;#2>#3;#2>#4 #1~#2;#2:edep=$1:$2_$3;#3:storage=;#4:storage=
# augment `conj` with `cc` lemma
# two and three word `fixed` expressions
text=/.*/;func=/^(conj)$/;head2=/(.*)/&func=/(cc)/;func=/fixed/&head2=/(.*)/;func=/fixed/&head2=/(.*)/ #1>#2>#3>#4;#3>#5;#4.#5 #1~#2;#2:edep=$1:$2_$4_$5;#3:storage=hasdblfixed
text=/.*/;func=/^(conj)$/;head2=/(.*)/&func=/(cc)/&storage!=/hasdblfixed/;func=/fixed/&head2=/(.*)/ #1>#2>#3>#4 #1~#2;#2:edep=$1:$2_$4;#3:storage=hasfixed
text=/.*/;func=/^(conj)$/;storage2=/(.*)/&func=/(cc)/;func=/fixed/&storage2=/(.*)/;func=/fixed/&storage2=/(.*)/ #1>#2>#3>#4;#3>#5;#4.#5 #1~#2;#2:edep=$1:$2_$4_$5;#3:storage=hasdblfixed
text=/.*/;func=/^(conj)$/;storage2=/(.*)/&func=/(cc)/&storage!=/hasdblfixed/;func=/fixed/&storage2=/(.*)/ #1>#2>#3>#4 #1~#2;#2:edep=$1:$2_$4;#3:storage=hasfixed
# all other regular cases
text=/.*/;func=/^(conj)$/;head2=/(.*)/&func=/(cc)/&storage!=/hasfixed/ #1>#2>#3 #1~#2;#2:edep=$1:$2;#3:storage=
text=/.*/;func=/^(conj)$/;storage2=/(.*)/&func=/(cc)/&storage!=/hasfixed/ #1>#2>#3 #1~#2;#2:edep=$1:$2;#3:storage=
# multiple conj
text=/.*/;func=/conj/&edep=/.*conj:([^|]+).*/;func=/conj/&edep!=/.*conj:.*/ #1>#2;#1>#3;#3.*#2 #1~#3;#3:edep=conj:$1

Expand Down Expand Up @@ -91,7 +102,7 @@ func=/(.subj).*/;text=/.*/;func=/conj/&storage!=/.*hassubj.*/;func=/xcomp/&stora

# coord general - duplicate all resulting deps and edeps on `conj`; note that `parataxis` is not carried over
text=/.*/;func=/(.*)/&func!=/parataxis/;func=/conj/ #1>#2>#3 #1~#3;#3:edep=$1
text=/.*/;edep=/(.*)/&edep!=/conj.*/&func!=/parataxis/;func=/conj/ #1~#2;#2>#3 #1~#3;#3:edep=$1
text=/.*/;edep=/(.*)/&edep!=/conj.*/&func!=/parataxis/;func=/conj|root/ #1~#2;#2>#3 #1~#3;#3:edep=$1

# coord subjects
text=/.*/;func=/(.subj.*)/;func=/conj/ #1>#2>#3 #1~#3;#3:edep=$1
Expand Down Expand Up @@ -120,14 +131,32 @@ text=/.*/;func=/acl:relcl/;func=/(.*)/&xpos=/^W(DT|P.?)$/ #1>#2>#3 #1~#3;#3:edep
# relative in embedded PP ("to which")
text=/.*/;func=/acl:relcl/;func=/(nmod|obl)/&xpos=/^W(DT|P.?)$/;func=/case/&lemma=/(.*)/ #1>#2>#3>#4 #2~#1;#1:edep=$1:$3
# coordinate matrix NP with embedded PP ([X and Y] to which Z)
text=/.*/;func=/acl:relcl/;func=/(nmod|obl)/&xpos=/^W(DT|P.?)$/;func=/case/&lemma=/(.*)/;func=/conj/&storage!=/.*hascase.*/ #1>#2>#3>#4;#1>#5 #2~#5;#5:edep=$1:$3
text=/.*/;func=/acl:relcl/;func=/(nmod|obl)/&xpos=/^W(DT|P.?)$/;func=/case/&lemma=/(.*)/;func=/conj/&storage!=/.*hascase.*/ #1>#2>#3>#4;#1>#5;#5.*#3 #2~#5;#5:edep=$1:$3
# relative pronoun in PP embedded in NP ("... most of whom")
text=/.*/;func=/acl:relcl/;func=/.*/;func=/(nmod)/&xpos=/^W(DT|P.?)$/;func=/case/&lemma=/(.*)/ #1>#2>#3>#4>#5 #4:edep=
text=/.*/;func=/acl:relcl/;func=/.*/;func=/(nmod)/&xpos=/^W(DT|P.?)$/;func=/case/&lemma=/(.*)/ #1>#2>#3>#4>#5 #1~#4;#4:edep=ref;#3~#1;#1:edep=$1:$3
# exception to previous: prevent cycle when acl:relcl head has a coordinate predicate
func=/(.*)/;func=/acl:relcl/;func=/.*/;func=/(nmod)/&xpos=/^W(DT|P.?)$/;func=/case/&lemma=/(.*)/;func=/conj/;text=/.*/ #1>#2>#3>#4>#5;#2>#6;#7>#1 #1:edep=;#7~#1;#1:edep=$1

# coord general - duplicate all resulting deps and edeps on `conj`; note that `parataxis` is not carried over
text=/.*/;func=/(.*)/&func!=/parataxis/;func=/conj/ #1>#2>#3 #1~#3;#3:edep=$1
text=/.*/;edep=/(.*)/&edep!=/conj.*/&func!=/parataxis/;func=/conj/ #1~#2;#2>#3 #1~#3;#3:edep=$1
text=/.*/;func=/(.*)/&func!=/parataxis|root/;func=/conj/ #1>#2>#3 #1~#3;#3:edep=$1
#text=/.*/;edep=/(.*)/&edep!=/conj.*/&func!=/parataxis|root/;func=/conj/ #1~#2;#2>#3 #1~#3;#3:edep=$1
edom=/.*?([0-9.]+\|\|(nmod|obl|conj):[a-z]+).*/&edep!=/conj.*/;func=/conj/ #1>#2 #2:edom=$1

# coord unlike coordination
# compound + amod
text=/.*/;func=/compound/;func=/conj/&xpos=/J.*|V.N/&edom=/(.*?[0-9.]+\|\|)compound(.*)/ #1>#2>#3;#1~#3 #3:edom=$1amod$2
# amod + compound
text=/.*/;func=/amod/;func=/conj/&xpos=/NN.*/&edom=/(.*?[0-9.]+\|\|)amod(.*)/ #1>#2>#3;#1~#3 #3:edom=$1compound$2
# nsubj + csubj
text=/.*/;func=/nsubj/;func=/conj/&xpos=/V.G/&edom=/(.*?[0-9.]+\|\|)nsubj(.*)/ #1>#2>#3;#1~#3 #3:edom=$1csubj$2

# supertokens (=multiword tokens, MWTs)
# uncomment the following lines to introduce MWTs for words like "don't" to data which lacks them
#text=/^(?i)gon|wan/;text=/^(?i)na/ #1.#2 #1><#2
#text=/^(?i)dun/;text=/^(?i)no/ #1.#2 #1><#2
#text=/^(?i)out|got/;text=/^(?i)ta/ #1.#2 #1><#2
#text=/^(?i)c'?m/&misc=/.*SpaceAfter.No.*/;text=/^(?i)on/ #1.#2 #1><#2
#misc=/.*SpaceAfter.No.*/;text=/^(?i)[^A-Za-z]?(ll|d|m|ve|s)/&xpos=/VBP|MD|VHP|VBZ|VHZ/ #1.#2 #1><#2
#misc=/.*SpaceAfter.No.*/;xpos=/POS/ #1.#2 #1><#2
#misc=/.*SpaceAfter.No.*/;lemma=/n[o']?t/ #1.#2 #1><#2
Loading

0 comments on commit d417064

Please sign in to comment.