This repository has been archived by the owner on Jul 11, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfrenchFrequency.py
76 lines (63 loc) · 2.85 KB
/
frenchFrequency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/python
# -*- coding: utf-8 -*-
# missing French words, based on the frequency list from http://corpus.leeds.ac.uk/frqc/internet-fr.num
import codecs
import pywikibot
import datetime
import re
import config
import urllib.request, urllib.error, urllib.parse
from klasa import *
def main():
site = pywikibot.Site()
outputPage = pywikibot.Page(site, 'Portal:Francuski/potrzebne')
excludedPage = pywikibot.Page(site, 'Portal:Francuski/potrzebne/wykluczone')
excludedList = excludedPage.get().split('\n')
date = datetime.datetime.now().strftime("%Y-%m-%d")
inp = codecs.open('%sinne/lista_franc.txt' % config.path['scripts'], encoding='utf-8')
wordList = [line.split()[2].split('|')[0] for line in inp]
re_stary = re.compile('(\*.*?\n)\[\[Kat', re.DOTALL)
text = ''
counter = 0
startPoint = 310 #all the words < 300 already exist, no point checking there
for word in wordList[startPoint:]:
if word not in excludedList and len(word)>1:
try:
haslo = Haslo(word)
except urllib.error.HTTPError:
pass
except sectionsNotFound:
pass
except WrongHeader:
text += '*[[' + word + ']] - problem z nagłówkiem' + '\n'
else:
if haslo.type == 0:
text += '*[[' + word + ']] - przekierowanie' + '\n'
elif haslo.type == 1:
text += '*[[' + word + ']]' + '\n'
counter += 1
elif haslo.type == 2:
print('*--------* nieznany błąd (haslo.type=2) *----------------*')
else:
found = 0
try: haslo.listLangs
except AttributeError:
pass
else:
for section in haslo.listLangs:
if section.lang == 'francuski':
found = 1
if not found:
text += '*[[' + word + ']]' + '\n'
counter += 1
if counter == 100:
stary_s = re.search(re_stary, outputPage.get())
if (stary_s.group(1) != text):
final = '{{język linków|francuski}}\nOto lista około stu najczęściej występujących haseł w języku francuskim, których nie ma jeszcze na Wikisłowniku. Jeśli możesz - dodaj je. Lista ta jest wyborem słów z zestawienia stworzonego przez [http://corpus.leeds.ac.uk/list.html korpus University of Leeds].\nOstatnia aktualizacja: %s\n%s[[Kategoria:Listy frekwencyjne|francuski]]' % (date, text)
outputPage.put(final, comment='Aktualizacja listy')
return 0
if __name__ == '__main__':
try:
main()
finally:
pywikibot.stopme()