-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwikiscraper.py
38 lines (31 loc) · 1.07 KB
/
wikiscraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import wikipedia
import datetime
class wikiScraper(object):
"""
A wrapper class for scraping wikipedia using a simple Media Wiki API
https://pypi.python.org/pypi/wikipedia/
"""
def __init__(self, lang):
"""
Initializes the wikiScraper class, given a language.
"""
# set rate limit
wikipedia.set_rate_limiting(True, min_wait=datetime.timedelta(0, 0, 500000))
# set language
wikipedia.set_lang(lang)
def getPage(self,title):
"""
Get the page object given a title (the title must exist in wiki for a given language)
"""
return wikipedia.page(title)
def getPageTitle(self,title):
"""
Get the title from a page object given a title (the title must exist in the wiki for a given language)
"""
return self.getPage(title).title
# get the summary
def getSummary(self,title):
return self.getPage(title).summary
# search for keyword
def searchKw(self, kw):
return wikipedia.search(kw)