forked from chinese-soup/zradlo
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathperfcanteen-moneta.py
executable file
·104 lines (88 loc) · 2.95 KB
/
perfcanteen-moneta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python3
# coding=utf-8
# VRTULE
import requests, os, re, tempfile, time, locale
from bs4 import BeautifulSoup
from subprocess import check_output
locale.setlocale(locale.LC_ALL,'')
def get_url():
return "http://menu.perfectcanteen.cz/pdf/24/cz/price/a3"
def get_file():
print("Stahuji menu")
pdf_stream = requests.get(get_url(), stream=True, timeout=6)
tmp_fd,tmp_path = tempfile.mkstemp()
with open(tmp_path, "wb") as f:
for chunk in pdf_stream.iter_content(chunk_size=1024):
f.write(chunk)
os.close(tmp_fd)
print("menu stazeno, prevadim na text")
antiword = check_output(["pdftotext", "-layout", tmp_path, "-"]).decode("utf8")
#print(antiword)
os.remove(tmp_path)
print("prevedeno na text")
return antiword
def get_name():
return "Perfect Canteen Moneta"
def return_menu(antiword):
# datum
today = time.strftime("%A")
# today = "Pátek"
items = []
published = False
prev_match = False
date = "???"
#print(antiword)
for item in antiword.splitlines():
match = re.match("\s*([A-Za-z0-9ěščřžýáíéůúťňóöďŤĚŠČŘŽŇÝÁÍÉÚŮÓÖĎ \t,\-–“\(\)´\/]+)[\s\n]+([0-9]+)\s+Kč?\s*", item)
#print(item)
#print(published)
#print(prev_match)
match_date = re.match("(" + today + ").*$", item.strip())
if match and published:
# print(item)
nazev = re.sub(r'\s+', ' ',match.group(1).strip())
if match.group(2):
cena = match.group(2).strip()
if nazev and cena:
items.append([nazev, cena + ' Kč'])
prev_match = True
continue
elif match_date:
date = match_date.group(1)
published = True
prev_match = True
elif published and not item.strip() and prev_match:
prev_match = False
continue
elif published and not prev_match:
break
#elif published and not item.strip() and not prev_match:
# break
elif published and not item.strip():
break
elif not match:
prev_match = False
continue
return (date, items)
def debug_print(date, menu):
print(date)
print(menu)
def result():
lokalita = "brumlovka"
try:
page = get_file()
date, menu_list = return_menu(page)
nazev = get_name()
url = get_url()
return (nazev, url, date, menu_list, lokalita)
except:
#return(get_name() + " - Chyba", "", "Chyba", ["", "", ""])
nazev = get_name()
url = get_url()
return (nazev, url, "Menu nenalezeno", [], lokalita)
os.remove(TMP)
if __name__ == "__main__":
page = get_file()
date, menu_list = return_menu(page)
debug_print(date, menu_list)
#print(result())