-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmisc.py
59 lines (48 loc) · 1.29 KB
/
misc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#coding: utf8
import re
def to_utf8(s):
return to_en(s, 'utf8')
def to_gb2312(s):
return to_en(s, 'gb2312')
def to_en(s, en):
encodings = ['utf8', 'gb2312', 'gbk', 'big5', 'gb18030', 'cp950']
for a in encodings:
try:
return s.decode(a).encode(en)
except:
pass
return s
def normalize_name(name):
"""Remove links, (*) in name, so we have more chances to hit while searching"""
new = []
keywords = ['http', 'www.', '.com', '.cn']
for a in name.split():
found = 0
for k in keywords:
if k in a:
found = 1
break
if found:
continue
new.append(a)
tmp = ' '.join(new)
# Remove contents in ()
remove_patterns = ['\(.*?\)', '\[.*?\]']
for p in remove_patterns:
tmp = re.sub(p, '', tmp)
# Special chars
special_chars = ['・', '/']
for c in special_chars:
tmp = re.sub(c, '', tmp)
return tmp.strip()
def escape_path(file):
file = file.replace('\'', '\\\'')
return '\'%s\'' % file
def get_string(input):
'''read a string, strip and encode it with utf8, remove extra \'"\' '''
s = to_utf8(input.strip())
if s[0] == '"':
s = s[1:]
if s[-1] == '"':
s = s[:-1]
return s