-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_images_from_profile.py
48 lines (37 loc) · 1.24 KB
/
get_images_from_profile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from bs4 import BeautifulSoup
import os
import selenium.webdriver as webdriver
import sqlite3
db = sqlite3.connect('web/data/crawler')
# Check user not exist on DB
def check_existed_user(username):
cursor = db.cursor()
cursor.execute('SELECT COUNT(*) FROM links WHERE nickname="' + username + '"')
if cursor.fetchone()[0] == 0:
return False
else:
return True
def get_img_url(url):
direccions = []
url = 'http://instagram.com/' + url
driver = webdriver.Chrome()
driver.set_window_size(300, 300) # set window size
driver.get(url)
soup = BeautifulSoup(driver.page_source, "html.parser")
for foto in soup.findAll('img'):
#print foto.get('alt', '')
direccions.append( foto.get('src', '') )
driver.close()
print direccions
return direccions
for filename in os.listdir('.'):
# print filenames of dir
if ("users_" in filename):
user = filename.split("users_")[1]
userbo = user.split(".txt")[0]
if (check_existed_user(userbo) == False):
file_links = open("links_" + userbo + ".txt","w")
links = get_img_url(userbo)
for items in links:
file_links.write(items + "\n")
file_links.close()