-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patha21_startup failure news.py
75 lines (60 loc) · 1.82 KB
/
a21_startup failure news.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#%%
from import_stocks2 import *
api_key=newsapi_org
query='startup failure news'
data=fetch_news_for_query(api_key, query, language='en', page_size=5)
data
# %%
import requests
# Replace 'YOUR_API_KEY' with your actual API key from newsapi.org
# api_key = 'YOUR_API_KEY'
url = 'https://newsapi.org/v2/everything'
# Define the search parameters
parameters = {
'q': 'startup failure', # Query for news stories about startup failure
'apiKey': api_key,
'language': 'en',
'sortBy': 'relevancy', # You can sort by 'publishedAt', 'relevancy', or 'popularity'
}
# Make the request
response = requests.get(url, params=parameters)
# Check if the request was successful
if response.status_code == 200:
# Parse the response JSON to get the articles
articles = response.json().get('articles')
for article in articles:
print(article['title'], article['url'])
else:
print("Failed to fetch news: ", response.status_code)
# %%
data=articles[0]
data
# %%
url=data['url']
# %%
import requests
url = "https://www.businessinsider.com/failure-museum-vc-norwest-venture-partners-startups-sean-jacobsohn"
payload = ""
headers = {"User-Agent": "insomnia/8.5.0"}
response = requests.request("GET", url, data=payload, headers=headers)
print(response.text)
# %%
import re
def remove_angle_bracket_strings(text):
# Regular expression pattern to match strings within angle brackets
pattern = r'<[^>]*>'
# Replacing the matched strings with an empty string
cleaned_text = re.sub(pattern, '', text)
return cleaned_text
# Example usage
original_text = remove_angle_bracket_strings(response.text)
original_text
# %%<!DOCTYPE html>\n<html lang="
text2=original_text.replace('\n','')
# %%
pattern = r'{[^>]*}'
# Replacing the matched strings with an empty string
text3 = re.sub(pattern, '', text2)
# %%
text3
# %%