Skip to content

Commit

Permalink
feat: scrape traits per champion
Browse files Browse the repository at this point in the history
  • Loading branch information
Flexicon committed Dec 29, 2024
1 parent 1ebfbe1 commit 6f419cd
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 22 deletions.
1 change: 1 addition & 0 deletions common/models/champion.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ class Champion(BaseModel):
name: str
image: str
cost: int
traits: list[str] = []
56 changes: 42 additions & 14 deletions scraper/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,29 @@
import re

import requests
from bs4 import BeautifulSoup
from bs4.element import Tag
from selenium import webdriver
from selenium.common.exceptions import TimeoutException

from common.models import Champion


class ScraperWebDriver:
def __init__(self) -> None:
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--headless')
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--headless")
self.driver = webdriver.Chrome(options=chrome_options)
self.driver.set_page_load_timeout(10)
self.driver.implicitly_wait(5)

def fetch_content_html(self, url: str, *, selector: str = '.main') -> str:
print('Fetching html to scrape, please wait...')
def fetch_content_html(self, url: str, *, selector: str = ".main") -> str:
print(f"Fetching html from: {url}")
self.driver.get(url)
return self.driver.find_element_by_css_selector(selector).get_attribute('innerHTML')
return self.driver.find_element_by_css_selector(selector).get_attribute(
"innerHTML"
)

def __enter__(self):
return self
Expand All @@ -29,26 +33,50 @@ def __exit__(self, exc_type, exc_val, exc_tb):
self.driver.close()


def _build_champion_from_character(character: Tag) -> Champion:
img_tag = character.find('img')
name = img_tag['alt']
icon = img_tag['src']
cost = _price_from_character_class(' '.join(character['class']))
return Champion(name=name, image=icon, cost=cost)
def _build_champion_from_character(
driver: ScraperWebDriver, character: Tag
) -> Champion:
traits = _scrape_traits_for_character(driver, character)
img_tag = character.find("img")
name = img_tag["alt"]
icon = img_tag["src"]
cost = _price_from_character_class(" ".join(character["class"]))
return Champion(name=name, image=icon, cost=cost, traits=traits)


def _scrape_traits_for_character(driver: ScraperWebDriver, character: Tag) -> list[str]:
try:
href = character["href"]
url = f"https://tftactics.gg{href}" if href.startswith("/") else href
html = driver.fetch_content_html(url)
return _extract_traits_from_character_html(html)
except TimeoutException as err:
print(f"Failed to scrape traits for champion: {href}\n{err}")
return []


def _extract_traits_from_character_html(html: str) -> list[str]:
selector = ".ability-description-name"
ability_tags = BeautifulSoup(html, "html.parser").select(selector)
return [
tag.find("h2").get_text()
for tag in ability_tags
if tag.find("h4").get_text().lower() not in ["active", "passive"]
]


def _price_from_character_class(classes: str) -> int:
pattern = re.compile(r'\bc(\d+)\b')
pattern = re.compile(r"\bc(\d+)\b")
matches = pattern.findall(classes)
return int(matches[0]) if matches else 0


def _trigger_webhook_if_set():
data_fetched_webhook = os.getenv('DATA_FETCHED_WEBHOOK')
data_fetched_webhook = os.getenv("DATA_FETCHED_WEBHOOK")
if not data_fetched_webhook:
return

response = requests.post(data_fetched_webhook)
response.raise_for_status()

print('Webhook triggered')
print("Webhook triggered")
6 changes: 3 additions & 3 deletions scraper/scrape_champions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,16 @@ def scrape_champions() -> List[Champion]:
with ScraperWebDriver() as driver:
html = driver.fetch_content_html(TFTChampionsURL)
characters = BeautifulSoup(html, 'html.parser').select('.characters-list > .characters-item')
champions = list(map(_build_champion_from_character, characters))
return champions
champions = [_build_champion_from_character(driver, c) for c in characters]
return champions


def scrape_and_persist(collection: Collection):
result = scrape_champions()
print('Found {count} champions\n{separator}\n'.format(count=len(result), separator="-" * 15))

for champion in result:
print(f'Name: {champion.name}\nImage: {champion.image}\nCost: {champion.cost}\n')
print(f'Name: {champion.name}\nImage: {champion.image}\nCost: {champion.cost}\nTraits: {champion.traits}\n')

collection.drop()
collection.insert_many([comp.dict() for comp in result])
Expand Down
6 changes: 3 additions & 3 deletions scraper/scrape_comps.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@ def scrape_comps() -> List[Comp]:
"div", class_="team-portrait"
)
comps = list(map(_build_comp_from_team, teams))
return comps
return comps


def _build_comp_from_team(team: Tag) -> Comp:
def _build_comp_from_team(driver: ScraperWebDriver, team: Tag) -> Comp:
playstyle = team.find_next(class_="team-playstyle").get_text()
name = team.find_next(class_="team-name-elipsis").get_text().replace(playstyle, "")

tier = team.find_next(class_="team-rank").get_text()
characters = team.select(".team-characters > .characters-item")
champions = list(map(_build_champion_from_character, characters))
champions = [_build_champion_from_character(driver, c) for c in characters]
items = list(map(_build_item_recommendation, characters, champions))

return Comp(
Expand Down
4 changes: 2 additions & 2 deletions scraper/scrape_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@
def scrape_items() -> List[CompositeItem]:
with ScraperWebDriver() as driver:
html = driver.fetch_content_html(ScrapeURL, selector="#content")
items = BeautifulSoup(html, "html.parser").select(Selector)
return list(map(_build_composite_item, items))
items = BeautifulSoup(html, "html.parser").select(Selector)
return list(map(_build_composite_item, items))


def _build_composite_item(div: Tag) -> CompositeItem:
Expand Down

0 comments on commit 6f419cd

Please sign in to comment.