Skip to content

Commit

Permalink
lint
Browse files Browse the repository at this point in the history
  • Loading branch information
svenkreiss committed Jun 4, 2020
1 parent e9a6ddc commit 77b514b
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 62 deletions.
10 changes: 10 additions & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[BASIC]

variable-rgx=[a-z0-9_]{1,30}$
good-names=ap,ar,ax,d,f,g,gt,h,i,im,lr,p,r,s,t,t1,t2,th,v,vs,w,wh,x,x1,x2,xs,y,ys,xy



[TYPECHECK]

disable=duplicate-code,missing-docstring,invalid-name,redefined-outer-name
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ matrix:
install:
- "pip3 install --editable .[dev]"
script:
- pylint unicodeit --disable=fixme
- pylint unicodeit/convert.py --disable=fixme
- pylint tests/*.py --disable=fixme
- pytest -vv
156 changes: 95 additions & 61 deletions unicodeit/convert.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@

#
# test case:
# python unicodeit.py \\Sigma def\\Sigma_{01234}abc\\alpha_{567}ggg\\beta_{1234}lll "\\Sigma e_0 e^3" def^{01234}abc\\alpha^{567abc}ggg\\beta^{1234=\(5\)}lll "\\:) \\:G"
# python unicodeit.py a_{\\beta\\gamma\\phi\\rho\\chi} b_{aeox} c_{hklmnpst} d_j
# python unicodeit.py "m^{ABDEGHIJKLMNOPRTUWabcdefghiklmnoprstuvwxyz\beta\gamma\delta\phi\chi<>}"
#

import re
from xml.etree import ElementTree as ET

Expand All @@ -16,46 +8,50 @@
def addSymbols(replacements):
#isoent
for n in nodes:
if(n.tag == 'char'):
if n.tag == 'char':
unicode = ''
latex = ''

unicodeElement = n.find('unicode')
if unicodeElement is not None:
if 'value' in unicodeElement.attrib:
unicode = unicodeElement.attrib['value']
latexElement = n.find('latex/seq')

latexElement = n.find('latex/seq')
if latexElement is not None:
latex = latexElement.text
latexElement = n.find('latex/mathseq')

latexElement = n.find('latex/mathseq')
if latexElement is not None:
latex = latexElement.text

# remove elements of the form "\^{A}" and "\_{A}"
if len(latex) == 5 and latex[2] == "{" and latex[4] == "}" and (latex[1] in ['^','_']):
if len(latex) == 5 and latex[2] == "{" and latex[4] == "}" and (latex[1] in ['^', '_']):
continue

# and latex.find('{') == -1 and latex.find('}') == -1
if unicode and latex and latex[0] == '\\' and latex.find('\\',1) == -1 and latex != "\\backslash":
if unicode and latex \
and latex[0] == '\\' \
and latex.find('\\', 1) == -1 \
and latex != "\\backslash":
replacements[latex] = unicode

# alias
replacements["\\to"] = "2192"

# other symbols
# other symbols
replacements["\\degree"] = "00B0"
replacements["\\star"] = "002A"
replacements["\\sqrt"] = "221A"
replacements["\\sqrt[3]"] = "221B"
replacements["\\sqrt[4]"] = "221C"
replacements["\\neq"] = "2260"
replacements["\\ne"] = "2260"


#def addSubSuper(replacements):
# # sub- and superscript replacements from http://en.wikipedia.org/wiki/Unicode_subscripts_and_superscripts
# # sub- and superscript replacements from
# # http://en.wikipedia.org/wiki/Unicode_subscripts_and_superscripts
# # ascii part from http://www.w3schools.com/tags/ref_entities.asp
#
# # subscripts
Expand Down Expand Up @@ -84,13 +80,17 @@ def addSymbols(replacements):
# replacements["\\^n"] = "207F"
# replacements["\\^i"] = "2071"
# replacements["\\^*"] = "002A"



# pylint: disable=too-many-statements
def addSubSuperNoSlash(replacements):
# sub- and superscript replacements from http://en.wikipedia.org/wiki/Unicode_subscripts_and_superscripts
# sub- and superscript replacements from
# http://en.wikipedia.org/wiki/Unicode_subscripts_and_superscripts
# ascii part from http://www.w3schools.com/tags/ref_entities.asp

# subscripts
for i in range(10): replacements["_"+str(i)] = "208"+str(i)
for i in range(10):
replacements["_"+str(i)] = "208"+str(i)
replacements["_+"] = "208A"
replacements["_-"] = "208B"
replacements["_="] = "208C"
Expand Down Expand Up @@ -120,7 +120,7 @@ def addSubSuperNoSlash(replacements):
replacements["_\\u03C1"] = "1D68" #rho
replacements["_\\u03C6"] = "1D69" #phi
replacements["_\\u03C7"] = "1D6A" #chi

# superscripts
replacements["^0"] = "2070"
replacements["^1"] = "00B9"#"20B9"
Expand All @@ -140,7 +140,7 @@ def addSubSuperNoSlash(replacements):
replacements["^*"] = "002A"
replacements["^<"] = "02C2"
replacements["^>"] = "02C3"

# from http://en.wikipedia.org/wiki/Phonetic_symbols_in_Unicode
# and there are a couple of more signs, that are not added yet
replacements["^A"] = "1D2C"
Expand Down Expand Up @@ -189,7 +189,7 @@ def addSubSuperNoSlash(replacements):
replacements["^x"] = "02E3"
replacements["^y"] = "02B8"
replacements["^z"] = "1DBB"

replacements["^\\u03B2"] = "1D5D" #beta
replacements["^\\u03B3"] = "1D5E" #gamma
replacements["^\\u03B4"] = "1D5F" #delta
Expand All @@ -198,15 +198,15 @@ def addSubSuperNoSlash(replacements):
replacements["^\\u222B"] = "1DB4" #int



def addEmoticons(replacements):
replacements["\\smile"] = "263A"
replacements["\\:)"] = "263A"
replacements["\\sad"] = "2639"
replacements["\\:("] = "2639"
replacements["\\happy"] = "32E1"
replacements["\\:G"] = "32E1"

def addMathLetterlike(replacements):
replacements["\\h"] = "210E"
replacements["\\i"] = "2139"
Expand Down Expand Up @@ -254,7 +254,7 @@ def addCombiningMarks(replacements):
replacements["\\doubleunderline"] = "0333"
replacements["\\strikethrough"] = "0335"
replacements["\\slash"] = "0338"


addSymbols(replacements)
#addSubSuperNoSlash(replacements) # handled separately, do not add here
Expand All @@ -271,14 +271,30 @@ def sortRule(i):
replacements = sorted(replacements, key=sortRule)

def escapeAll(i):
escape = ['\\', '.', '^', '$', '*', '+', '?', '{', '}', '[', ']', '|', '(', ')', '\''] # backslash must be first
escape = [
'\\',
'.',
'^',
'$',
'*',
'+',
'?',
'{',
'}',
'[',
']',
'|',
'(',
')',
'\'',
] # backslash must be first
l = i

for e in escape:
l = l.replace(e, '\\'+e)
#undo escaped unicode \u
l = re.sub(r'\\\\u([0-9A-Fa-f]{4})', r'\\u\1', l)

return l

# write python converter script
Expand All @@ -287,23 +303,23 @@ def escapeAll(i):
for t in template:
if t[:12] == "replacements":
out.write("replacements = [\n")
for l,u in replacements:
for l, u in replacements:
l = escapeAll(l)
out.write(" (r'"+l+"', '\\u"+u+"'),\n")
out.write("]\n")
elif t[:14] == "combiningmarks":
combiningmarks = {}
addCombiningMarks(combiningmarks)
out.write("combiningmarks = [\n")
for l,u in list(combiningmarks.items()):
for l, u in list(combiningmarks.items()):
l = escapeAll(l)
out.write(" (r'"+l+"', '\\u"+u+"'),\n")
out.write("]\n")
elif t[:15] == "subsuperscripts":
subsuperscripts = {}
addSubSuperNoSlash(subsuperscripts)
out.write("subsuperscripts = [\n")
for l,u in list(subsuperscripts.items()):
for l, u in list(subsuperscripts.items()):
l = escapeAll(l)
out.write(" (r'"+l+"', '\\u"+u+"'),\n")
out.write("]\n")
Expand All @@ -316,23 +332,23 @@ def escapeAll(i):
for t in template:
if t[:16] == "var replacements":
out.write("var replacements = [\n")
for l,u in replacements:
for l, u in replacements:
l = escapeAll(l)
out.write(" ['"+l+"', '\\u"+u+"'],\n")
out.write("];\n")
elif t[:18] == "var combiningmarks":
combiningmarks = {}
addCombiningMarks(combiningmarks)
out.write("var combiningmarks = [\n")
for l,u in list(combiningmarks.items()):
for l, u in list(combiningmarks.items()):
l = escapeAll(l)
out.write(" ['"+l+"', '\\u"+u+"'],\n")
out.write("];\n")
elif t[:19] == "var subsuperscripts":
subsuperscripts = {}
addSubSuperNoSlash(subsuperscripts)
out.write("var subsuperscripts = [\n")
for l,u in list(subsuperscripts.items()):
for l, u in list(subsuperscripts.items()):
l = escapeAll(l)
out.write(" ['"+l+"', '\\u"+u+"'],\n")
out.write("];\n")
Expand All @@ -346,37 +362,50 @@ def escapeAll(i):
# list all replacements
def nicePrint(i):
return i[0]
def printFromFunction(f):
def printFromFunction(f):
r = {}
f(r)
r = list(r.items())
r = sorted(r, key=nicePrint)
for l,u in r:
if l == "_\\u03B2": l = "_{\\beta}"
if l == "_\\u03B3": l = "_{\\gamma}"
if l == "_\\u03C1": l = "_{\\rho}"
if l == "_\\u03C6": l = "_{\\phi}"
if l == "_\\u03C7": l = "_{\\chi}"
for l, u in r:
if l == "_\\u03B2":
l = "_{\\beta}"
if l == "_\\u03B3":
l = "_{\\gamma}"
if l == "_\\u03C1":
l = "_{\\rho}"
if l == "_\\u03C6":
l = "_{\\phi}"
if l == "_\\u03C7":
l = "_{\\chi}"
print("%20s \t %s" % (l, chr(int(u, 16)).encode("utf-8")))
def jsonFromFunctions(functions):
def jsonFromFunctions(functions):
r = {}
for f in functions:
f(r)
r = list(r.items())
#r = sorted(r, key=nicePrint)
for l,u in r:
if l == "_\\u03B2": l = "_{\\beta}"
if l == "_\\u03B3": l = "_{\\gamma}"
if l == "_\\u03C1": l = "_{\\rho}"
if l == "_\\u03C6": l = "_{\\phi}"
if l == "_\\u03C7": l = "_{\\chi}"

for l, _ in r:
if l == "_\\u03B2":
l = "_{\\beta}"
if l == "_\\u03B3":
l = "_{\\gamma}"
if l == "_\\u03C1":
l = "_{\\rho}"
if l == "_\\u03C6":
l = "_{\\phi}"
if l == "_\\u03C7":
l = "_{\\chi}"

#print '"%s %s",' % (l, unichr(int(u, 16)).encode("utf-8")),
if len(re.split('"',l)) > 1: continue
if len(re.split("'",l)) > 1: continue
if l[0]=="\\" and len(l) <= 2: continue
if len(re.split('"', l)) > 1:
continue
if len(re.split("'", l)) > 1:
continue
if l[0] == "\\" and len(l) <= 2:
continue
print('"%s",' % (escapeAll(l)), end=' ')

print("Sub- and Superscripts")
print("==============================")
printFromFunction(addSubSuperNoSlash)
Expand All @@ -401,5 +430,10 @@ def jsonFromFunctions(functions):
print("")
print("JSON")
print("==============================")
jsonFromFunctions([addSymbols,addSubSuperNoSlash,addMathLetterlike,addEmoticons,addCombiningMarks])

jsonFromFunctions([
addSymbols,
addSubSuperNoSlash,
addMathLetterlike,
addEmoticons,
addCombiningMarks,
])

0 comments on commit 77b514b

Please sign in to comment.