-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimage_scraper1.py
57 lines (48 loc) · 1.73 KB
/
image_scraper1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#Uncomment the code below if running on Google Colab
""" %%capture
import sys
!pip install selenium
#!apt-get update # to update ubuntu to correctly run apt install
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver') """
import requests
from bs4 import BeautifulSoup
import lxml
import re
import os
from PIL import Image
import sys
import urllib.request, urllib.parse, urllib.error
import ssl
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
driver = webdriver.Chrome(
'/home/samuel/Downloads/Compressed/chromedriver')
driver.get("https://www.google.com/")
box = driver.find_element_by_xpath('//*[@id="sb_form_q"]')
#Change cranes to the object you wish to search for
box.send_keys("cranes")
box.send_keys(Keys.ENTER)
driver.find_element_by_xpath('//*[@id="b-scopeListItem-images"]/a').click()
#The line of code will keep scrolling down the webpage until it cannot scroll no more
last_height = driver.execute_script('return document.body.scrollHeight')
while True:
driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')
time.sleep(2)
new_height = driver.execute_script('return document.body.scrollHeight')
try:
driver.find_element_by_xpath('//*[@id="islmp"]/div/div/div/div/div[5]/input').click()
time.sleep(2)
except:
pass
if new_height == last_height:
break
last_height = new_height
for i in range(1, 1000):
try:
driver.find_element_by_xpath('//*[@id="islrg"]/div[1]/div['+str(i)+']/a[1]/div[1]/img').screenshot('/home/samuel/images/crane/crane_'+str(i)+'.png')
#Change the path to save the images
except:
pass