-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
executable file
·178 lines (158 loc) · 6.41 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#!/bin/python3
# -*- coding: utf-8 -*
import sys
from datetime import datetime
import requests #請求
from bs4 import BeautifulSoup as soup #解析html
from fake_useragent import UserAgent #偽裝請求
import modules #sys.path.append("modules")
import json #解析 json
import signal #signal 判斷ctrl+c
import time #Sleep
import download #引入下載功能
#############################################
class _info():
def __init__(self):
self.name="Instag"
self.author="Alpaca0x0"
self.version="1.1"
self.update="2023/06/31"
_info=_info()
#############################################
sysDebug=False #debug mode
sysAuto=False #auto download
if ("--help" in sys.argv) or ("-h" in sys.argv):
print("Arguments:")
print(" --auto, -a\n No need to wait for seconds between pages. (頁數之間不須等待秒數,立刻開始下載下一頁)\n")
print(" --debug, -d\n Debug Mode (除錯模式,能夠顯示更多詳細資訊)\n")
exit()
if ("--auto" in sys.argv) or ("-a" in sys.argv):
sysAuto=True
print("Auto Mode On")
if ("--debug" in sys.argv) or ("-d" in sys.argv):
sysDebug=True
print("Debug Mode On")
#############################################
def exit(signum, frame):
print('\n\nStoped '+_info.name+'! \n')
sys.exit()
signal.signal(signal.SIGINT, exit)
signal.signal(signal.SIGTERM, exit)
def interrupted(signum, frame):
raise InputTimeoutError
def set_header_user_agent():
user_agent = UserAgent()
return user_agent.random
#############################################
class Req:
def __init__(self, protocol, domain, path, keyword, hasNext=""):
self.protocol=protocol
self.domain=domain
self.path=path
self.keyword=keyword
self.hasNext=hasNext
self.url=self.protocol+self.domain+self.path+self.keyword+self.hasNext
self.p=requests.Session()
self.useragent=set_header_user_agent()
self.headers={"User-Agent": self.useragent}
self.data=""
#############################################
print(" ___ _____ \n|_ _|_ __ __|_ _|_ _ __ _ \n | || '_ \/ __|| |/ _` |/ _` |\n | || | | \__ \| | (_| | (_| |\n|___|_| |_|___/|_|\__,_|\__, |\n |___/ ")
#############################################
print("《Version》"+_info.name+" "+_info.version)
print("《Author》"+_info.author)
print("\nHelp? Run with \"--help\".")
# 請求&回應
while 1:
print("-"*32)
jump=False #break double loop
keyword=input("關鍵字: #").strip().replace(" ","")
while keyword=="":
keyword=input("請輸入關鍵字: #").strip().replace(" ","")
continue
while 1:
req=Req("https://","imginn.com","/api/tags/?id=",keyword) #目標
sys.stdout.write("Host --???--> " + req.domain + "\n")
sys.stdout.flush()
req.data=requests.get(req.url,headers=req.headers,timeout=15) #將此頁面的HTML GET下來
sys.stdout.write("\u001b[1A\u001b[2K"+"Host --" + str(req.data.status_code) + "--> " + req.domain + "\n")
sys.stdout.flush()
if req.data.status_code != requests.codes.ok:
print("Bad request -「"+req.url+"」\n")
if sysAuto:
print("-"*32)
print("關鍵字: #"+req.keyword)
continue
else:
if input("請求失敗,繼續? (Y/N) ").lower().strip() == "y":
print("-"*32)
print("關鍵字: #"+req.keyword)
continue
else:
jump=True #break double loop
break
else:
break
if jump:
continue
# 解析
download_part=1
jump="n"
while 1:
req.data.encoding="utf-8"
req.data=req.data.text
datas=json.loads(req.data)
#下載
for i in range(len(datas["items"])):
req.useragent=set_header_user_agent()
req.headers={"User-Agent": req.useragent}
download.download(req_headers=req.headers,file_name=datas["items"][i]["id"]+"_"+datas["items"][i]["code"],file_url=datas["items"][i]["src"],save_path="./save/"+req.keyword+"/")
#存在下一頁
if datas["hasNext"]:
if not sysAuto:
signal.signal(signal.SIGALRM, interrupted)
signal.alarm(5) #計時5秒
try:
jump=input("本頁資料下載完畢,繼續到下一頁下載嗎 (5秒後 自動下載)? (Y/N) ").lower().strip()
except: #InputTimeoutError:
#無訊號,默認繼續下載
print("繼續下載...")
jump="y"
#class InputTimeoutError(Exception):
signal.alarm(0) # 讀到輸入信號,重置
if jump == "n":
print("取消繼續")
break
else:
# print("\n存在下一頁,接收參數「--auto」自動下載...")
print("\n本頁資料下載完畢,自動前往下一頁")
while 1:
jump=False
req=Req(req.protocol,req.domain,req.path,keyword=req.keyword,hasNext="&cursor="+datas["cursor"]) #目標
sys.stdout.write("Host --???--> " + req.domain + "\n")
sys.stdout.flush()
req.data=requests.get(req.url,headers=req.headers,timeout=15) #將此頁面的HTML GET下來
sys.stdout.write("\u001b[1A\u001b[2K"+"Host --" + str(req.data.status_code) + "--> " + req.domain + "\n")
sys.stdout.flush()
if req.data.status_code != requests.codes.ok:
print("Bad request -「"+req.url+"」\n")
if sysAuto:
print("關鍵字:#"+req.keyword+" (part."+str(download_part)+")")
continue
else:
if input("請求失敗,繼續? (Y/N) ").lower().strip() == "n":
jump=True #break double loop
break
else:
print("關鍵字:#"+req.keyword+" (part."+str(download_part)+")")
continue
else:
download_part=download_part+1
break
if jump:
break
else:
continue
else:
print("爬蟲結束")
break