Skip to content

Commit

Permalink
Merge pull request #43 from amir-zeldes/develop
Browse files Browse the repository at this point in the history
V3.4.0.0
  • Loading branch information
amir-zeldes authored May 1, 2024
2 parents 061545a + d566dc9 commit e78cb84
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 4 deletions.
29 changes: 27 additions & 2 deletions depedit/depedit.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import io
from six import iteritems, iterkeys

__version__ = "3.3.1.0"
__version__ = "3.4.0.0"

ALIASES = {"form":"text","upostag":"pos","xpostag":"cpos","feats":"morph","deprel":"func","deps":"head2","misc":"func2",
"xpos": "cpos","upos":"pos"}
Expand Down Expand Up @@ -172,6 +172,7 @@ def handle_aliases(orig_action):
orig_action = orig_action.replace(":" + source + "=", ":" + target + "=")
orig_action = orig_action.replace(":" + source + "+=", ":" + target + "+=")
orig_action = orig_action.replace(":" + source + "-=", ":" + target + "-=")
orig_action = orig_action.replace(":" + source + ",=", ":" + target + ",=")
return orig_action

@staticmethod
Expand Down Expand Up @@ -203,6 +204,7 @@ def validate(self):
node = escape(definition.def_text, "&", "/")
criteria = (_crit.replace("%%%%%", "&") for _crit in node.split("&"))
for criterion in criteria:
criterion = escape(criterion, "=", "/")
if re.match(r"(text|pos|cpos|lemma|morph|storage[23]?|edom|func|head|func2|head2|num|form|upos|upostag|xpos|xpostag|feats|deprel|deps|misc|edep|ehead)!?=/[^/=]*/", criterion) is None:
if re.match(r"position!?=/(first|last|mid)/", criterion) is None:
if re.match(r"#S:[A-Za-z_]+!?=/[^/\t]+/",criterion) is None:
Expand All @@ -224,7 +226,7 @@ def validate(self):
for action in self.actions:
commands = action.split(";")
for command in commands: # Node action
if re.match(r"(#[0-9]+([>~]|><)#[0-9]+|#[0-9]+:(func|lemma|text|pos|cpos|morph|storage[23]?|edom|head|head2|func2|num|form|upos|upostag|xpos|xpostag|feats|deprel|deps|misc|edep|ehead|split)[\+-]?=[^;]*)$", command) is None:
if re.match(r"(#[0-9]+([>~]|><)#[0-9]+|#[0-9]+:(func|lemma|text|pos|cpos|morph|storage[23]?|edom|head|head2|func2|num|form|upos|upostag|xpos|xpostag|feats|deprel|deps|misc|edep|ehead|split)[\+,-]?=[^;]*)$", command) is None:
if re.match(r"#S:[A-Za-z_]+=[A-Za-z_]+$|last$|once$", command) is None: # Sentence annotation action or quit
report += "Column 3 invalid action definition: " + command + " and the action was " + action
if "#" not in action:
Expand Down Expand Up @@ -920,12 +922,16 @@ def execute_action(self, result_sets, action_list, transformation):
value = action[action.find("=") + 1:].strip()
add_val = False
subtract_val = False
concat_val = False
if prop.endswith("+"): # Add annotation, e.g. feats+=...
add_val = True
prop = prop[:-1]
elif prop.endswith("-"): # Remove annotation, e.g. feats-=...
subtract_val = True
prop = prop[:-1]
elif prop.endswith(","): # Add to existing values, separated by , and alphabetized, e.g. Cxn=X,Y,Z
concat_val = True
prop = prop[:-1]
group_num_matches = re.findall(r"(\$[0-9]+[LU]?)", value)
if group_num_matches is not None:
for g in group_num_matches:
Expand Down Expand Up @@ -981,6 +987,25 @@ def execute_action(self, result_sets, action_list, transformation):
value = "_"
else:
value = "_"
elif concat_val:
old_val = getattr(result[node_position],prop)
new_vals = sorted(value.split("|"))
new_vals_keys = defaultdict(set)
for pair in new_vals:
this_key, this_val = pair.split("=")
new_vals_keys[this_key].add(this_val)
if old_val != "_" and isinstance(old_val,str): # Some values already exist
kv = []
for ov in sorted(old_val.split("|")):
this_key, this_val = ov.split("=")
if this_key not in new_vals_keys: # Else this needs to be overwritten
kv.append(ov)
else:
kv.append(this_key + "=" + ",".join(sorted(list(new_vals_keys[this_key].union(set(this_val.split(",")))))))
value = "|".join(sorted(kv,key=lambda x:x.lower()))
else:
value = "|".join(new_vals)

if prop == "edep":
if value == "": # Set empty edep
result[node_position].edep = []
Expand Down
Binary file modified docs/DepEdit_user_guide.pdf
Binary file not shown.
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
setup(
name = 'depedit',
packages = ['depedit'],
version = '3.3.1.0',
version = '3.4.0.0',
description = 'A simple configurable tool for manipulating dependency trees',
author = 'Amir Zeldes',
author_email = '[email protected]',
url = 'https://github.com/amir-zeldes/depedit',
install_requires=["six"],
license='Apache License, Version 2.0',
download_url = 'https://github.com/amir-zeldes/depedit/releases/tag/3.3.1.0',
download_url = 'https://github.com/amir-zeldes/depedit/releases/tag/3.4.0.0',
keywords = ['NLP', 'parsing', 'syntax', 'dependencies', 'dependency', 'tree', 'treebank', 'conll', 'conllu', 'ud', 'enhanced'],
classifiers = ['Programming Language :: Python',
'Programming Language :: Python :: 2',
Expand Down

0 comments on commit e78cb84

Please sign in to comment.