forked from jdanceze/cg
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpackage_craw.py
73 lines (62 loc) · 2.25 KB
/
package_craw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import re
import sys
import requests
from google_play_scraper import app
from bs4 import BeautifulSoup
import json
import time
import wget
from selenium import webdriver
import subprocess
import webbrowser
#target = sys.argv[1]
#page_num = 48
category = "WEATHER"
target = "weather"
#read old package names from txt file
old_package_names = []
with open(f"package/{target}.txt", "r") as file:
for line in file:
old_package_names.append(line.strip())
package_names = set()
for page_num in range(2, 49):
print(f"Page {page_num}")
url = f"https://www.androidrank.org/android-most-popular-google-play-apps?start={page_num}1&sort=0&price=all&category={category}"
response = requests.get(url)
html_content = response.text
for match in re.findall(r'/application/[^/]+/([^"]+)', html_content):
if(match not in old_package_names):
package_names.add(match)
if package_names:
print('Package names found:', package_names)
else:
print('Package names not found.')
print("Total: ", len(package_names))
with open(f"package/new/{target}.txt", "w") as file:
for package_name in package_names:
file.write(f"{package_name}\n")
# #read package names from txt file
# package_names = []
# with open(f"package/{target}.txt", "r") as file:
# for line in file:
# package_names.append(line.strip())
# #print(package_names)
# for package_name in package_names:
# print('Fetching app details... ', package_name)
# try:
# result = app(package_name)
# latest_version = result['version']
# #print(f"App name: {result['title']}")
# print(f"Latest version: {latest_version}\n")
# download_url = f"https://d.apkpure.com/b/APK/{package_name}?version=latest"
# webbrowser.open(download_url)
# time.sleep(5)
# # r = requests.get(download_url, allow_redirects=False)
# # headers = r.headers
# # print(headers)
# # open(f"{package_name}.apk", 'wb').write(r.content)
# #append package name, app name, latest version, download url to csv file
# with open(f"{target}.txt", "a") as file:
# file.write(f"{package_name},{latest_version},{download_url}\n")
# except:
# print("Error: ", package_name)