-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathlat.py
18 lines (14 loc) · 1004 Bytes
/
lat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import json
import feedparser
from urllib.parse import urljoin, urlparse
with open("lat_urls.json", "r") as lat_file:
lat_json = json.load(lat_file)
feeds = ["https://www.latimes.com/business/rss2.0.xml", "https://www.latimes.com/california/rss2.0.xml", "https://www.latimes.com/environment/rss2.0.xml", "https://www.latimes.com/entertainment-arts/rss2.0.xml", "https://www.latimes.com/food/rss2.0.xml", "https://www.latimes.com/lifestyle/rss2.0.xml", "https://www.latimes.com/politics/rss2.0.xml", "https://www.latimes.com/science/rss2.0.xml", "https://www.latimes.com/sports/rss2.0.xml", "https://www.latimes.com/travel/rss2.0.xml", "https://www.latimes.com/world-nation/rss2.0.xml"]
for url in feeds:
feed = feedparser.parse(url)
for article in feed.entries:
clean_url = urljoin(article['link'], urlparse(article['link']).path)
if not clean_url in lat_json:
lat_json.append(clean_url)
with open("lat_urls.json", "w") as f:
f.write(json.dumps(lat_json))