-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
163 lines (125 loc) · 4.82 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
from post import Post
from plot import Plot
from server import Server
import threading
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json, time, os, argparse, getpass
from sys import exit
from lxml import html
MFACEBOOK_URL="https://m.facebook.com/"
profile_url="https://mbasic.facebook.com/manai.elyes"
DRIVER_NAME="chromedriver.exe"
DRIVER_DIR=os.path.join(os.getcwd(),DRIVER_NAME)
print("driver",DRIVER_DIR)
#element load timeout
TIMEOUT = 5
def setup_driver(dir_driver):
#webdriver options and config
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--window-size=1920x1080")
chrome_options.add_argument("--log-level=3")
# chrome_options.add_argument("--headless")
#Add headers
# for header in headers:
# chrome_options.add_argument(f"{header}={headers[header]}")
driver = webdriver.Chrome(options=chrome_options)
return driver
def signin(driver):
fb_login = input('enter your fb login: ')
fb_pass = getpass.getpass(prompt='enter your fb password: ')
driver.get(MFACEBOOK_URL)
#Wait for inputs
WebDriverWait(driver, TIMEOUT).until(EC.presence_of_element_located((By.ID, 'm_login_email')))
WebDriverWait(driver, TIMEOUT).until(EC.presence_of_element_located((By.ID, 'm_login_password')))
WebDriverWait(driver, TIMEOUT).until(EC.presence_of_element_located((By.NAME, 'login')))
email_id = driver.find_element_by_id("m_login_email")
pass_id = driver.find_element_by_id("m_login_password")
confirm_id = driver.find_element_by_name("login")
email_id.send_keys(fb_login)
pass_id.send_keys(fb_pass)
confirm_id.click()
print("Logging in automatically...")
time.sleep(5)
if "Log" in driver.title:
print("Login failed pls check your credentials and retry")
driver.close()
exit()
return False
return True
def get_urls_and_save(driver, profile_url, nbPages):
driver.get(profile_url)
#loads new page
no_exception=True
pageCount=0
while (no_exception and ((pageCount < nbPages) or nbPages == 0)):
try:
next_page=driver.find_element_by_xpath("//*[contains(text(), 'See More Stories')]")
append_urls()
next_page.click()
pageCount+=1
except Exception as e :
no_exception=False
# time.sleep(2)
def append_urls():
global posts
posts_page=driver.find_elements_by_xpath("//*[contains(text(), 'Full Story')]")
for post in posts_page :
url=post.get_attribute('href')
posts.append(url)
#init containers
posts = []
data = {"dates":set(), "users":{}}
users = data["users"]
dates = data["dates"]
if "__main__" == __name__ :
args = argparse.ArgumentParser()
args.add_argument("--update", action="store_true", help="Crawl users interactions ")
args.add_argument('--pages', nargs='?', type=int, default=0)
args = args.parse_args()
#get nb of pages and validate
nbPages = args.pages
if nbPages<0:
print("number of pages should be > 0")
exit()
driver = setup_driver(DRIVER_DIR)
if args.update:
signin(driver)
get_urls_and_save(driver, profile_url, nbPages)
for url in posts:
try:
post=Post(url, driver)
#append commentators
post_date = str(post.date)
for commentator in post.commentators:
try:
#if user exists
users[commentator["user_name"]]
except :
users.update({commentator["user_name"]:[]})
users[commentator["user_name"]].append(post_date)
if post.reaction_count>0:
dates.add(post_date)
#append reactors
for reactor in post.reactors:
try:
#if user exists
users[reactor["user_name"]]
except :
users.update({reactor["user_name"]:[]})
users[reactor["user_name"]].append(post_date)
except:
pass
data["dates"] = list(data["dates"]) #Serialize set
with open("data.json", "w", encoding='utf-8') as outfile:
json.dump(data, outfile, ensure_ascii=False)
server = Server(9000)
threading.Thread(target=server.start).start()
driver.get("http://localhost:9000/chart.html")
driver.maximize_window()
# fig=Plot("data.json")
# fig.plot_interactions_over_time()