forked from bzzzzzu/nnhack_rzd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdocument_reader.py
99 lines (92 loc) · 3.53 KB
/
document_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import os.path
def get_text_from_document(path):
with open(path, 'r', encoding='utf-8') as f:
#with open(path, 'r', errors='ignore') as f:
return f.readlines()
if not os.path.exists('data-processing/tables/'):
os.makedirs('data-processing/tables/')
if not os.path.exists('data-processing/tables_fixed/'):
os.makedirs('data-processing/tables_fixed/')
csv_list = get_text_from_document('data-processing/test2.csv')
num_list = 0
start_parse = 0
curr_list = []
curr_fixed_list = []
curr_col0 = ''
curr_col1 = ''
curr_col2 = ''
curr_col3 = ''
for c in csv_list:
if start_parse == 1:
if 'и устранению неисправностей' in c:
print(c)
print(len(curr_list))
if len(curr_list) > 0:
with open(f'data-processing/tables/table_{num_list}.csv', 'w', encoding='utf-8') as f:
for cl in curr_list:
f.write(cl)
with open(f'data-processing/tables_fixed/table_{num_list}.csv', 'w', encoding='utf-8') as f:
for cl in curr_fixed_list:
f.write(cl + '\n')
curr_col0 = ''
curr_col1 = ''
curr_col2 = ''
curr_col3 = ''
print('-------------------')
curr_list = []
curr_fixed_list = []
num_list = num_list + 1
c2 = str.replace(c, '\n', '')
csv_str = str.split(c2, ';')
'''
if len(csv_str) > 3:
if csv_str[1] == '' and csv_str[2] == '' and csv_str[3] == '' and len(c2) > 10 and not csv_str[0].isnumeric():
print(c2)
if csv_str[1] == '' and csv_str[2] == '' and len(c2) > 10:
print(c2)
'''
#if c.count(';;;') > 0 and len(c) > 10:
# print(c)
if len(csv_str) > 3:
write_row = 1
fixed_str = ''
if csv_str[0] != '':
if csv_str[0].isnumeric():
curr_col1 = csv_str[0]
fixed_str = curr_col0 + ';' + csv_str[0]
else:
curr_col0 = csv_str[0]
write_row = 0
#fixed_str = csv_str[0] + ';' + curr_col1
else:
fixed_str = curr_col0 + ';' + curr_col1
fixed_str = fixed_str + ';'
if csv_str[1] != '':
fixed_str = fixed_str + csv_str[1]
curr_col2 = csv_str[1]
else:
fixed_str = fixed_str + curr_col2
fixed_str = fixed_str + ';'
if csv_str[2] != '':
fixed_str = fixed_str + csv_str[2]
curr_col3 = csv_str[2]
else:
fixed_str = fixed_str + curr_col3
fixed_str = fixed_str + ';'
if write_row == 1:
if len(curr_fixed_list) > 0:
if curr_fixed_list[-1] != fixed_str:
curr_fixed_list.append(fixed_str)
else:
curr_fixed_list.append(fixed_str)
curr_list.append(c)
if 'Приложение N 40 к настоящему Перечню' in c:
start_parse = 1
if len(curr_list) > 0:
with open(f'data-processing/tables/table_{num_list}.csv', 'w', encoding='utf-8') as f:
for cl in curr_list:
f.write(cl)
with open(f'data-processing/tables_fixed/table_{num_list}.csv', 'w', encoding='utf-8') as f:
for cl in curr_fixed_list:
f.write(cl + '\n')
print(len(csv_list))