forked from zeldaret/oot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmsgenc.py
165 lines (138 loc) · 4.75 KB
/
msgenc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#!/usr/bin/env python3
#
# message_data_static text encoder
#
import argparse, ast, re, sys
from typing import Dict, Optional
def read_charmap(path : str, wchar : bool) -> Dict[str,str]:
with open(path) as infile:
charmap = infile.read()
charmap = ast.literal_eval(charmap)
out_charmap = {}
for k,v in charmap.items():
v = v[wchar]
if v is None:
v = 0
assert isinstance(k, str)
assert v in (range(0xFFFF + 1) if wchar else range(0xFF + 1))
k = repr(k)[1:-1]
if wchar:
u = (v >> 8) & 0xFF
l = (v >> 0) & 0xFF
out_charmap[k] = f"0x{u:02X}, 0x{l:02X},"
else:
out_charmap[k] = f"0x{v:02X},"
return out_charmap
# From https://stackoverflow.com/questions/241327/remove-c-and-c-comments-using-python
def remove_comments(text : str) -> str:
def replacer(match : re.Match) -> str:
string : str = match.group(0)
if string.startswith("/"):
return " " # note: a space and not an empty string
else:
return string
pattern = re.compile(
r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE
)
return re.sub(pattern, replacer, text)
def convert_text(text : str, encoding : str, charmap : Dict[str, str]) -> str:
def cvt_str(match : re.Match) -> str:
string : str = match.group(0)
# strip quotes
string = string[1:-1]
def cvt_escape(s : str):
# Convert escape sequences such as "\\\"" to "\""
return s.encode("ascii").decode("unicode-escape")
run_start = 0
def emit(text : Optional[str], advance : int):
nonlocal out, string, i, run_start
# flush text
to_flush = string[run_start:i]
if len(string[run_start:i]) != 0:
out += ",".join(f"0x{b:02X}" for b in to_flush.encode(encoding))
out += ","
if text is None:
return
# emit + advance source pos
out += text
i += advance
# start new run
run_start = i
out = ""
i = 0
while i != len(string):
# check charmap
for k in charmap.keys():
if string.startswith(k, i):
# is in charmap, emit the mapped sequence
emit(charmap[k], len(k))
break
else:
if string[i] == "\\" and string[i + 1] != "\\":
# is already escaped, emit the escape sequence verbatim
if string[i + 1] == "x":
# \x**
emit("0" + string[i + 1 : i + 4] + ",", 4)
else:
# \*
e = cvt_escape(string[i : i + 2]).encode(encoding)
assert len(e) == 1
emit(f"0x{e[0]:02X},", 2)
else:
# increment pos, accumulating text that requires encoding
i += 1
# emit remaining accumulated text
emit(None, 0)
return out
# Naive string matcher, assumes single line strings and no comments, handles escaped quotations
string_regex = re.compile(r'"((?:[^\\"\n]|\\.)*)"')
# Collapse escaped newlines
text = text.replace("\\\n", "")
# Encode according to charmap
text = re.sub(string_regex, cvt_str, text)
return text
def main():
parser = argparse.ArgumentParser(
description="Encode message_data_static text headers"
)
parser.add_argument(
"input",
help="path to file to be encoded, or - for stdin",
)
parser.add_argument(
"output",
help="path to write encoded file, or - for stdout",
)
parser.add_argument(
"--encoding",
help="encoding (jpn or nes)",
required=True,
type=str,
choices=("jpn", "nes"),
)
parser.add_argument(
"--charmap",
help="path to charmap file specifying custom encoding elements",
required=True,
)
args = parser.parse_args()
wchar,encoding = {
"jpn" : (True, "SHIFT-JIS"),
"nes" : (False, "raw-unicode-escape"),
}[args.encoding]
charmap = read_charmap(args.charmap, wchar)
text = ""
if args.input == "-":
text = sys.stdin.read()
else:
with open(args.input, "r") as infile:
text = infile.read()
text = remove_comments(text)
text = convert_text(text, encoding, charmap)
if args.output == "-":
sys.stdout.buffer.write(text.encode("utf-8"))
else:
with open(args.output, "w") as outfile:
outfile.write(text)
if __name__ == "__main__":
main()