-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_images.py
executable file
·161 lines (147 loc) · 5.57 KB
/
get_images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Filename: get_travel.py
Programmer: Johnson
Email: [email protected]
Github: https://github.com/yu-chun-kit/
Date : 2019-02-09
Description: useless program
"""
import os
# import re
import random
import time
import cv2
import requests
from requests_html import HTMLSession
from getsizes import getsizes
session = HTMLSession()
def get_list(src_url, folder):
""" search pic from web"""
time.sleep(3.1415926)
response = session.get(src_url) # return a session obj
content = response.html.find('div.first-child', first=True)
div_list = content.find('div.post')
# print("1st size:", len(div_list))
pictures = []
urls = []
descriptions = []
# isdestroyed, height, width
details = []
# for div in div_list:
for div in div_list:
url = div.find('a.image-list-link', first=True).attrs['href']
pic = url.split('/')[-1]
url = "https://i.imgur.com/" + url.split('/')[-1] + ".jpg"
desc = div.find('div.hover ', first=True).find('p', first=True).text
print(url)
print(desc)
time.sleep(random.random() * 2 + 1)
pictures.append(pic)
descriptions.append(desc)
urls.append(url)
detl = save_image(url, folder)
details.append(detl)
# filename = "./image/japan/" + src_url.split("/")[-1] + ".json"
filename = "./image/" + folder + "/" + src_url.split("/")[-1] + ".html"
print('filenmae = ' + filename)
with open('prefix.txt', 'r') as preFile, open('postfix.txt', 'r') as postFile, open(filename, "a+") as f_obj:
idNo = 0
# lines = preFile.readlines()
# lines = [l for l in lines if "ROW" in l]
# f_obj.writelines(lines)
f_obj.write(' <div class="img-container">\n')
for i, pic in enumerate(pictures):
link = src_url + "/" + pic
if not details[i][0]:
# f_obj.write(' <div class=\"drp\" data-img-id=\"' +
# str(idNo) + '\">\n')
# idNo += 1
# f_obj.write(
# ' <!--<img src=\"image/' + folder + '/' + pic +
# '.jpg\" alt=\"\" width=\"200\" height=\"200\">-->\n')
f_obj.write(' <img src=\"' + urls[i] +
'\" alt=\"\" width=\"300\" height=\"220\">\n')
# f_obj.write(" <div class=\"drpCon\">\n")
# f_obj.write(' <!--<img src=\"image/' + folder +
# '/' + pic + '.jpg\" height=\"' +
# str(details[i][1]) + '\" width=\"' +
# str(details[i][2]) + '\" alt=\"\">-->\n')
# f_obj.write(' <img src=\"' + urls[i] +
# '\" height=\"' + str(details[i][1]) +
# '\" width=\"' + str(details[i][2]) +
# '\" alt=\"\">\n')
# f_obj.write(' <a href=\"' + link +
# '\" class=\"desc\" target=\"videoframe\"' + '>' +
# descriptions[i] + '</a>\n')
# f_obj.write(' </div>\n')
# f_obj.write(' </div>\n')
f_obj.write(' </div>\n')
# lines = []
# lines = postFile.readlines()
# f_obj.writelines(lines)
# 保持大图
def save_image(img_url, folder):
"""Save some image"""
destroyed = False
abbr = img_url.split("/")[-1]
try:
size, width, height = getsizes(img_url)
print(height, width, end=", dispaly size = ")
if (height == 81 and width == 161):
print("image --> " + abbr + " may no longer exist")
destroyed = True
if (height == 0 and width == 0):
print("image --> " + abbr + "can not get the size")
destroyed = True
if (height > 2500 or width > 2500):
print("image --> " + abbr + " height or width too large")
destroyed = True
if (height / width > 2.5 or width / height > 2.5):
print("image --> " + abbr + " has unusual ratio")
destroyed = True
elif size > 1737905:
print("image --> " + abbr + " size is too large")
destroyed = True
else:
if width > 320:
div_num = width // 320
height = height // div_num
width = width // div_num
print(height, width)
except AttributeError:
print("Has error (may be gif)!!!!")
destroyed = True
if destroyed:
print("picture not suitable")
print("-------------------")
return [destroyed, 0, 0]
print("-------------------")
return [destroyed, height, width]
# img_response = requests.get(img_url)
# t = int(round(time.time() * 1000))
# f = open('./image/japan/{0}'.format(abbr[-1]), 'ab')
# f = open('./image/' + folder + '/{0}'.format(abbr), 'ab')
# f.write(img_response.content)
# f.close()
def main():
""" main function of get_travel """
# site = "https://imgur.com/r/japanpics"
site = "https://imgur.com/r/SouthKoreaPics"
folder = "koera2"
directory = os.getcwd() + "/image/" + folder
if site.endswith('/'):
site = site[:-1]
if not os.path.exists(directory):
print("create folder at " + directory)
os.makedirs(directory)
else:
print("folder exists")
# "https://imgur.com/r/ChinaPics"
# "https://imgur.com/r/japanpics"
print("The site is ", site)
get_list(site, folder)
print("fin")
if __name__ == '__main__':
main()