-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.js
54 lines (50 loc) · 1.78 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
const puppeteer = require("puppeteer");
let browser;
const initializePuppeteer = async () => {
try {
browser = await puppeteer.launch({headless: 'new'});
return await browser.newPage();
} catch (err) {
throw err;
}
};
const closePuppeteer = async () => await browser.close();
const scrapeProducts = async (QUERY_TO_SEARCH) => {
try {
const page = await initializePuppeteer();
await page.goto("https://www.amazon.com/");
await page.type("#twotabsearchtextbox", QUERY_TO_SEARCH);
await page.click("#nav-search-submit-button");
await page.waitForNavigation();
await page.waitForTimeout(5000);
const products = await page.evaluate(() => {
let results = [];
const items = document.querySelectorAll(".s-result-item .sg-col-inner");
for (let i = items.length; i--; ) {
const item = items[i];
const title = item.querySelector("h2 > a > span");
const price = item.querySelector(".a-price-whole");
const cents = item.querySelector(".a-price-fraction");
const image = item.querySelector("img");
const ratings = item.querySelector(".a-icon-alt");
const reviews = item.querySelector("span[aria-label] > a > span");
if (!title || !price || !image || !ratings || !reviews) continue;
results = [...results, {
title: title.innerText,
price: !cents?.innerText.length ? parseInt(price.innerText) : parseFloat(`${parseInt(price.innerText)}.${parseInt(cents.innerText)}`),
image: image.getAttribute("src"),
ratings: ratings.innerText,
reviews: reviews.innerText
}]
}
return results;
});
await closePuppeteer();
return products;
} catch (err) {
throw err;
}
}
module.exports = {
scrapeAmazon: scrapeProducts
};