-
Notifications
You must be signed in to change notification settings - Fork 23
/
Copy pathexpand_to_subword.py
51 lines (43 loc) · 1.59 KB
/
expand_to_subword.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import re
try:
import expand_to_regex_set
except:
from . import expand_to_regex_set
def expand_to_subword(string, start, end):
# if it is an upper case word search for upper case chars
# else search for lower case chars
if(_is_inside_upper(string, start, end)):
regex = re.compile(r"[A-Z]")
else:
regex = re.compile(r"[a-z]")
result = expand_to_regex_set._expand_to_regex_rule(
string, start, end, regex, "subword")
if result is None:
return None
# check if it is prefixed by an upper char
# expand from camelC|ase| to camel|Case|
upper = re.compile(r"[A-Z]")
if upper.match(string[result["start"]-1:result["start"]]):
result["start"] -= 1
# check that it is a "true" subword, i.e. inside a word
if not _is_true_subword(string, result):
return None
return result
def _is_true_subword(string, result):
start = result["start"]
end = result["end"]
char_before = string[start-1:start]
char_after = string[end:end+1]
is_word_before = re.match(r"[a-z0-9_]", char_before, re.IGNORECASE)
is_word_after = re.match(r"[a-z0-9_]", char_after, re.IGNORECASE)
return bool(is_word_before or is_word_after)
def _is_inside_upper(string, start, end):
if start != end:
return string[start:end].isupper()
start = max(0, start-2)
end = min(end + 2, len(string))
sub_str = string[start:end]
contains_upper = re.search(r"[A-Z]{2}", sub_str)
sub_str = sub_str[1:3]
contains_lower = re.search(r"[a-z]", sub_str)
return bool(contains_upper) and not bool(contains_lower)