diff --git a/scraper/api_scraper.py b/scraper/api_scraper.py index 0521797..c100425 100644 --- a/scraper/api_scraper.py +++ b/scraper/api_scraper.py @@ -103,6 +103,7 @@ def parse_article(self, item: dict) -> ScraperOutput: article_id, title, category=self.category, + content_type=self.content_type, content=content, author=author, date=date, diff --git a/scraper/html_scraper.py b/scraper/html_scraper.py index 3b30535..7e51a01 100644 --- a/scraper/html_scraper.py +++ b/scraper/html_scraper.py @@ -120,6 +120,7 @@ def parse_article(self, tag: "ResultSet[Tag]") -> ScraperOutput: article_id, title, category=self.category, + content_type=self.content_type, content=content, author=author, date=date, diff --git a/scraper/rss_scraper.py b/scraper/rss_scraper.py index 8b846e7..8480341 100644 --- a/scraper/rss_scraper.py +++ b/scraper/rss_scraper.py @@ -85,6 +85,7 @@ def parse_article(self, item: Dict[str, Any]) -> ScraperOutput: article_id, title, category=self.category, + content_type=self.content_type, content=content, author=author, date=date, diff --git a/scraper/scraper.py b/scraper/scraper.py index 1748f33..dea7095 100644 --- a/scraper/scraper.py +++ b/scraper/scraper.py @@ -135,11 +135,6 @@ async def get_articles(self) -> List[ScraperOutput]: # filter out None values articles = [article for article in articles if article is not None] - # add category and content type - for article in articles: - article.category = self.category - article.content_type = self.content_type - return articles async def fetch_article(self, item: Any) -> Any: