forked from mtorpey/smallgrp-to-csv
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfix.py
43 lines (36 loc) · 1.24 KB
/
fix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import json
import re
import os
ORDER_UP_TO = 511
print("Turning invalid lines into JSON ")
REGEX = r'^([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),([^,]+),(.*)$'
with open("all.json") as infile:
with open("all.json.fixed", "w") as outfile:
for lineno, line in enumerate(infile):
if ",fail," in line:
org = line
line = re.sub(REGEX, r'[\1,\2,\3,\4,\5,\6,null,[\8,\9],"\10"],', line)
outfile.write(line)
replace_line = lineno - 1
# open the file again
print("Removing trailing comma")
REGEX_2 = r'^(.*),(\s)*$'
with open("all.json", "w") as outfile:
with open("all.json.fixed") as infile:
for lineno, line in enumerate(infile):
if lineno == replace_line:
line = re.sub(REGEX_2, r'\1\2', line)
outfile.write(line)
# check that it's valid json now
print("Removing unuused field and filtering")
with open("all.json") as infile:
data = json.load(infile)
data = [
row[0:6] + row[7:9] for row in data if row[0] <= ORDER_UP_TO
]
for row in data:
if len(row) != 8:
print("FAILURE")
print("Writing final result of {} items".format(len(data)))
with open("all.json", "w") as outfile:
json.dump(data, outfile)