Skip to content

Commit

Permalink
add basic logging to url scrappers and fix broken elements
Browse files Browse the repository at this point in the history
nickkatsios committed Jan 30, 2024
1 parent c88d967 commit 9f6b36c
Showing 5 changed files with 39 additions and 33 deletions.
23 changes: 12 additions & 11 deletions scrappers/urls/betshop_url_scrapper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import time
from selenium.webdriver.support.relative_locator import locate_with
from selenium.webdriver.common.by import By
import traceback

"""
-------Url Strategy------
@@ -25,7 +26,7 @@ def close_popups_and_cookies(self):
accept_cookies_btn.click()
self.driver.implicitly_wait(1)
except:
print("error closing popup")
self.logger.info(__class__.__name__ + " : " + "Error closing popup")
pass

# *----- BETSHOP LEAGUE URL SCRAPPING -----*
@@ -35,10 +36,8 @@ def get_all_league_urls(self):
all_a_tags = self.driver.find_elements(By.TAG_NAME , "a")
urls = []
for a_tag in all_a_tags:
class_attr = a_tag.get_attribute("class")
url = a_tag.get_attribute("href")
if(class_attr == "block align-bottom text-light-500 hover:text-light-600"):
league_name = self.driver.find_element(locate_with(By.TAG_NAME, "label").to_left_of(a_tag)).text
if url is not None and url.startswith("https://www.betshop.gr/sports/game/stoixima-podosfairo/"):
urls.append(url)
return urls

@@ -53,7 +52,7 @@ def run_league_url_extractor(self):
# *----- BETSHOP EVENT URL SCRAPPING -----*

def get_event_elements(self):
panel = self.driver.find_element(By.CLASS_NAME , "groupedByDate")
panel = self.driver.find_elements(By.CLASS_NAME , "min-h-screen")[1]
event_elements = panel.find_elements(By.CSS_SELECTOR , "div.truncate")
time.sleep(1.5)
return event_elements
@@ -80,15 +79,16 @@ def run_event_url_extractor(self, urls):
self.driver.back()
time.sleep(wait_sec)
except:
print("error finding element, reloading...")
self.driver.get(url)
self.logger.info(__class__.__name__ + " : " + "Error getting event urls")
self.logger.info(__class__.__name__ + " : " + traceback.format_exc())
continue
return event_urls

# save to db for each league
self.write_urls_to_db(event_urls)


def run_url_extractor(self):
league_urls = self.run_league_url_extractor()
event_urls = self.run_event_url_extractor(league_urls)
self.write_urls_to_db(event_urls)
self.run_event_url_extractor(league_urls)

def write_urls_to_db(self, urls):
"""Writes the urls to the database
@@ -100,4 +100,5 @@ def write_urls_to_db(self, urls):
sql = "INSERT INTO Urls (bookmaker_id, url, timestamp) VALUES (%s, %s, NOW())"
values = (self.bookmaker_id, url)
self.db.execute_insert(sql, values)
self.logger.info(__class__.__name__ + " : " + "Inserted: " + str(len(urls)) + " urls to db")

3 changes: 2 additions & 1 deletion scrappers/urls/betsson_url_scrapper.py
Original file line number Diff line number Diff line change
@@ -71,4 +71,5 @@ def write_urls_to_db(self, urls):
for url in urls:
sql = "INSERT INTO Urls (bookmaker_id, url, timestamp) VALUES (%s, %s, NOW())"
values = (self.bookmaker_id, url)
self.db.execute_insert(sql, values)
self.db.execute_insert(sql, values)
self.logger.info(__class__.__name__ + " : " + "Inserted: " + str(len(urls)) + " urls to db")
16 changes: 9 additions & 7 deletions scrappers/urls/novibet_url_scrapper.py
Original file line number Diff line number Diff line change
@@ -31,7 +31,7 @@ def close_popups_and_cookies(self):
accept_cookes_btn.click()
self.driver.implicitly_wait(1)
except:
print("error closing popup")
self.logger.info(__class__.__name__ + " : " + "Error closing popup")
pass

def get_league_elements(self):
@@ -58,7 +58,7 @@ def run_league_url_extractor(self):
leagues = self.get_league_elements()
league_urls = []
wait_sec = 1
for i in range(2):
for i in range(10):
try:
leagues = self.get_league_elements()
league = leagues[i]
@@ -69,7 +69,7 @@ def run_league_url_extractor(self):
self.driver.back()
time.sleep(wait_sec)
except:
print("error finding element")
self.logger.info(__class__.__name__ + " : " + "Error getting league urls")
continue
return league_urls

@@ -86,12 +86,13 @@ def run_event_url_extractor(self, league_urls):
event_urls.append(event.get_attribute("href"))
# filter out live events
event_urls = list(filter(lambda x: "live" not in x, event_urls))
return event_urls
# save to db for each league
self.write_urls_to_db(event_urls)


def run_url_extractor(self):
league_urls = self.run_league_url_extractor()
event_urls = self.run_event_url_extractor(league_urls)
self.write_urls_to_db(event_urls)
self.run_event_url_extractor(league_urls)

def write_urls_to_db(self, urls):
"""Writes the urls to the database
@@ -102,4 +103,5 @@ def write_urls_to_db(self, urls):
for url in urls:
sql = "INSERT INTO Urls (bookmaker_id, url, timestamp) VALUES (%s, %s, NOW())"
values = (self.bookmaker_id, url)
self.db.execute_insert(sql, values)
self.db.execute_insert(sql, values)
self.logger.info(__class__.__name__ + " : " + "Inserted: " + str(len(urls)) + " urls to db")
12 changes: 7 additions & 5 deletions scrappers/urls/stoiximan_url_scrapper.py
Original file line number Diff line number Diff line change
@@ -29,7 +29,7 @@ def close_popups_and_cookies(self):
accept_cookes_btn.click()
self.driver.implicitly_wait(1)
except:
print("error closing popup")
self.logger.info(__class__.__name__ + " : " + "Error closing popup")
pass

# *----- LEAGUE URL SCRAPPING -----*
@@ -62,12 +62,13 @@ def run_event_url_extractor(self, league_urls):
event_urls.append(event.get_attribute("href"))
# filter out live events
event_urls = list(filter(lambda x: "live" not in x, event_urls))
return event_urls
# save to db for each league
self.write_urls_to_db(event_urls)


def run_url_extractor(self):
league_urls = self.run_league_url_extractor()
event_urls = self.run_event_url_extractor(league_urls)
self.write_urls_to_db(event_urls)
self.run_event_url_extractor(league_urls)

def write_urls_to_db(self, urls):
"""Writes the urls to the database
@@ -78,4 +79,5 @@ def write_urls_to_db(self, urls):
for url in urls:
sql = "INSERT INTO Urls (bookmaker_id, url, timestamp) VALUES (%s, %s, NOW())"
values = (self.bookmaker_id, url)
self.db.execute_insert(sql, values)
self.db.execute_insert(sql, values)
self.logger.info(__class__.__name__ + " : " + "Inserted: " + str(len(urls)) + " urls to db")
18 changes: 9 additions & 9 deletions scrappers/urls/url_scrapper_manager.py
Original file line number Diff line number Diff line change
@@ -133,21 +133,21 @@ def run_url_scrappers(self):

# Init scrappers ordered by bookmaker id

novibet_url_scrapper = Novibet_url_scrapper(None, db, self.create_logger(Bookmaker.NOVIBET.value), notifier, bookmaker_ids['novibet'], base_urls[Bookmaker.NOVIBET.value][Sport.FOOTBALL.value])
root_logger.info(f"--------Novibet Url scrapper created--------")
scrappers.append(novibet_url_scrapper)
# novibet_url_scrapper = Novibet_url_scrapper(None, db, self.create_logger(Bookmaker.NOVIBET.value), notifier, bookmaker_ids['novibet'], base_urls[Bookmaker.NOVIBET.value][Sport.FOOTBALL.value])
# root_logger.info(f"--------Novibet Url scrapper created--------")
# scrappers.append(novibet_url_scrapper)

stoiximan_url_scrapper = Stoiximan_url_scrapper(None, db, self.create_logger(Bookmaker.STOIXIMAN.value), notifier, bookmaker_ids['stoiximan'], base_urls[Bookmaker.STOIXIMAN.value][Sport.FOOTBALL.value])
root_logger.info(f"--------Stoiximan Url scrapper created--------")
scrappers.append(stoiximan_url_scrapper)
# stoiximan_url_scrapper = Stoiximan_url_scrapper(None, db, self.create_logger(Bookmaker.STOIXIMAN.value), notifier, bookmaker_ids['stoiximan'], base_urls[Bookmaker.STOIXIMAN.value][Sport.FOOTBALL.value])
# root_logger.info(f"--------Stoiximan Url scrapper created--------")
# scrappers.append(stoiximan_url_scrapper)

betshop_url_scrapper = Betshop_url_scrapper(None, db, self.create_logger(Bookmaker.BETSHOP.value), notifier, bookmaker_ids['betshop'], base_urls[Bookmaker.BETSHOP.value][Sport.FOOTBALL.value])
root_logger.info(f"--------Betshop Url scrapper created--------")
scrappers.append(betshop_url_scrapper)

betsson_url_scrapper = Betsson_url_scrapper(None, db, self.create_logger(Bookmaker.BETSSON.value), notifier, bookmaker_ids['betsson'], base_urls[Bookmaker.BETSSON.value][Sport.FOOTBALL.value])
root_logger.info(f"--------Betsson Url scrapper created--------")
scrappers.append(betsson_url_scrapper)
# betsson_url_scrapper = Betsson_url_scrapper(None, db, self.create_logger(Bookmaker.BETSSON.value), notifier, bookmaker_ids['betsson'], base_urls[Bookmaker.BETSSON.value][Sport.FOOTBALL.value])
# root_logger.info(f"--------Betsson Url scrapper created--------")
# scrappers.append(betsson_url_scrapper)

# Parallel scrapping
if self.is_parallel:

0 comments on commit 9f6b36c

Please sign in to comment.