-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathScrape EgyptPrices Laptops.py
78 lines (70 loc) · 2.46 KB
/
Scrape EgyptPrices Laptops.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
"""
Importing Libraries
"""
from bs4 import BeautifulSoup as bs
import json
import requests as rq
import pathlib
import csv
url = "http://www.egprices.com/en/category/computers/laptops"
s = rq.get(url).content ### Get The Page Content
soup = bs(s,"html.parser") ### Analyzing Page Tags And Classes
pages = []
### Finding The Last Page Num
for x in soup.find('ul',{'class':'pagination'}):
try :
pages.append(int(x.text))
except:
pass ## To Ignore Undefined '...' Pages ex: 1 - 2 - ... - 61
w = 1
"""
Creating Lists To Collect Information
"""
images = []
item_name = []
prices = []
stores = []
items = 0
### Scraping The Site Page By Page
while w <= max(pages):
url = "http://www.egprices.com/en/category/computers/laptops" + "/?&page=" + str(w)
s = rq.get(url).content
soup = bs(s,"html.parser")
for tag in soup.find_all('div',{'class':'row hide-for-small-only'}):
### Getting Images
for x in tag.find_all('a',{'class':'divItem'}):
for a in x.find_all('img',src=True):
images.append("http://www.egprices.com" + a['src'])
### Getting Item's Name
for x in tag.find_all('div',{'class':'medium-6 columns'}):
for a in x.find('a'):
item_name.append(a)
items = items + 1
### Getting Item's Price
for x in tag.find_all('div',{'class':'medium-2 text-center columns'}):
for a in x.find_all('div',{'class':'child'}):
for b in a.find('div'):
prices.append(b)
### Getting Store's Name
for x in tag.find_all('div',{'class':'medium-2 text-center columns'}):
for a in x.find_all('img',src=True,alt=True):
if('store' in a['src']):
stores.append(a['alt'])
print('scraping page: '+str(w)) ### Print The Page Num It's Working On To Tell Me That It's Working
w = w + 1
"""
Exporting To CSV File
"""
pathlib.Path('./results').mkdir(parents=True, exist_ok=True)
f = csv.writer(open("./results/Laptops.csv",'w',newline=''))
f.writerow(['item','price','store','image'])
result = zip(item_name,prices,stores,images)
for x in result:
f.writerow(x)
"""
Exporting To JSON File
"""
json_result = [{'name':n,'price': p,'store':s,'image':i} for n, p, s, i in zip(item_name,prices,stores,images)]
f = open("./results/Laptops.json",'w',newline='')
f.write(json.dumps(json_result, indent=4, separators=(',', ': ')))
print(json.dumps(json_result, indent=4, separators=(',', ': ')))