-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy path__init__.py
308 lines (267 loc) · 11.5 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2017, Wu Yuan <[email protected]>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.metadata.sources.base import Source, Option
from calibre.ebooks.metadata.book.base import Metadata
from calibre import as_unicode
import re
import json
from urllib import urlencode
def to_metadata(log,gmetadata,ExHentai_Status): # {{{
title = gmetadata['title']
title_jpn = gmetadata['title_jpn']
tags = gmetadata['tags']
rating = gmetadata['rating']
category = gmetadata['category']
gid = gmetadata['gid']
token = gmetadata['token']
thumb = gmetadata['thumb']
# title
if title_jpn:
raw_title = title_jpn
else:
raw_title = title
pat1 = re.compile(r'(?P<comments>.*?\[(?P<author>(?:(?!汉化|漢化)[^\[\]])*)\](?:\s*(?:\[[^\(\)]+\]|\([^\[\]\(\)]+\))\s*)*(?P<title>[^\[\]\(\)]+).*)')
if re.findall(pat1,raw_title):
m = re.search(pat1, raw_title)
title_ = m.group('title').strip()
author = m.group('author').strip()
else:
title_ = raw_title.strip()
author = 'Unknown'
log.exception('Title match failed. Title is %s' % raw_title)
authors = [(author)]
mi = Metadata(title_, authors)
mi.identifiers = {'ehentai':'%s_%s_%d' % (str(gid),str(token),int(ExHentai_Status))}
# publisher
pat2 = re.compile(r'^\(([^\[\]\(\)]*)\)')
if re.findall(pat2, raw_title):
publisher = re.search(pat2, raw_title).group(1).strip()
mi.publisher = publisher
else:
mi.publisher = 'Unknown'
log.exception('Not Found publisher.')
# Tags
tags_ = []
for tag in tags:
if re.match('language',tag):
tag_ = re.sub('language:','',tag)
if tag_ != 'translated':
mi.language = tag_
else:
tags_.append(tag_)
# elif re.match('parody|group|character|artist', tag):
# log('drop tag %s' % tag)
# continue
elif not ':' in tag:
log('drop tag %s' % tag)
continue
else:
tags_.append(tag)
tags_.append(category)
mi.tags = tags_
# rating
mi.rating = float(rating)
# cover
mi.has_ehentai_cover = None
if thumb:
mi.has_ehentai_cover = thumb
return mi
# }}}
class Ehentai(Source):
name = 'E-hentai Galleries'
author = 'Wu yuan'
version = (1,1,2)
minimum_calibre_version = (2, 80, 0)
description = _('Download metadata and cover from e-hentai.org.'
'Useful only for doujinshi.')
capabilities = frozenset(['identify', 'cover'])
touched_fields = frozenset(['title','authors','tags','rating','publisher','identifier:ehentai'])
supports_gzip_transfer_encoding = True
cached_cover_url_is_reliable = True
EHentai_url = 'https://e-hentai.org/g/%s/%s/'
ExHentai_url = 'https://exhentai.org/g/%s/%s/'
options = (
Option('Use_Exhentai','bool',False,_('Use Exhentai'),
_('If Use Exhentai is True, the plugin will search metadata on exhentai.')),
Option('ipb_member_id','string',None,_('ipb_member_id'),
_('If Use Exhentai is True, please input your cookies.')),
Option('ipb_pass_hash','string',None,_('ipb_pass_hash'),
_('If Use Exhentai is True, please input your cookies.'))
)
config_help_message = ('<p>'+_('To Download Metadata from exhentai.org you must sign up'
' a free account and get the cookies of .exhentai.org.'
' If you don\'t have an account, you can <a href="%s">sign up</a>.')) % 'https://forums.e-hentai.org/index.php'
def __init__(self, *args, **kwargs): # {{{
Source.__init__(self, *args, **kwargs)
self.config_exhentai()
# }}}
def config_exhentai(self): # {{{
ExHentai_Status = self.prefs['Use_Exhentai']
ExHentai_Cookies = [{'name':'ipb_member_id', 'value':self.prefs['ipb_member_id'], 'domain':'.exhentai.org', 'path':'/'},
{'name':'ipb_pass_hash', 'value':self.prefs['ipb_pass_hash'], 'domain':'.exhentai.org', 'path':'/'}]
if ExHentai_Status is True:
for cookie in ExHentai_Cookies:
if cookie['value'] is None:
ExHentai_Status = False
break
self.ExHentai_Status = ExHentai_Status
self.ExHentai_Cookies = ExHentai_Cookies
return
# }}}
def create_query(self,log,title=None, authors=None,identifiers={},is_exhentai=False): # {{{
EHentai_SEARCH_URL = 'https://e-hentai.org/?'
ExHentai_SEARCH_URL = 'https://exhentai.org/?'
q = ''
if title or authors:
def build_term(type,parts):
return ' '.join(x for x in parts)
title_token = list(self.get_title_tokens(title))
if title_token:
q = q + build_term('title',title_token)
author_token = list(self.get_author_tokens(authors, only_first_author=True))
if author_token:
q = q + (' ' if q != '' else '') + build_term('author', author_token)
q = q.strip()
if isinstance(q, unicode):
q = q.encode('utf-8')
if not q:
return None
q_dict = {'f_doujinshi':1, 'f_manga':1, 'f_artistcg':1, 'f_gamecg':1, 'f_western':1, 'f_non-h':1,
'f_imageset':1, 'f_cosplay':1, 'f_asianporn':1, 'f_misc':1, 'f_search':q, 'f_apply':'Apply+Filter',
'advsearch':1, 'f_sname':'on', 'f_sh':'on', 'f_srdd':2}
if is_exhentai is False:
url = EHentai_SEARCH_URL + urlencode(q_dict)
else:
url = ExHentai_SEARCH_URL + urlencode(q_dict)
return url
# }}}
def get_gallery_info(self,log,raw): # {{{
pattern = re.compile(r'https:\/\/(?:e-hentai\.org|exhentai\.org)\/g\/(?P<gallery_id>\d+)/(?P<gallery_token>\w+)/')
results = re.findall(pattern,raw)
if not results:
log.exception('Failed to get gallery_id and gallery_token!')
return None
gidlist = []
for r in results:
gidlist.append(list(r))
return gidlist
# }}}
def get_all_details(self,gidlist,log,abort,result_queue,timeout): # {{{
EHentai_API_url = 'https://api.e-hentai.org/api.php'
br = self.browser
data = {"method": "gdata","gidlist": gidlist,"namespace": 1}
data = json.dumps(data)
try:
raw = br.open_novisit(EHentai_API_url,data=data,timeout=timeout).read()
except Exception as e:
log.exception('Failed to make api request.',e)
return
gmetadatas = json.loads(raw)['gmetadata']
for relevance, gmetadata in enumerate(gmetadatas):
try:
ans = to_metadata(log, gmetadata,self.ExHentai_Status)
if isinstance(ans, Metadata):
ans.source_relevance = relevance
db = ans.identifiers['ehentai']
if ans.has_ehentai_cover:
self.cache_identifier_to_cover_url(db,ans.has_ehentai_cover)
self.clean_downloaded_metadata(ans)
result_queue.put(ans)
except:
log.exception('Failed to get metadata for identify entry:',gmetadata)
if abort.is_set():
break
# }}}
def get_book_url(self, identifiers): # {{{
db = identifiers.get('ehentai',None)
d = {'0':False,'1':True}
if db is not None:
gid,token,s = re.split('_', db)
ExHentai_Status = d[str(s)]
if ExHentai_Status:
url = self.ExHentai_url % (gid,token)
else:
url = self.EHentai_url % (gid,token)
return ('ehentai', db, url)
# }}}
def download_cover(self, log, result_queue, abort,title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False): # {{{
cached_url = self.get_cached_cover_url(identifiers)
if cached_url is None:
return
if abort.is_set():
return
br = self.browser
log('Downloading cover from:', cached_url)
try:
cdata = br.open_novisit(cached_url, timeout=timeout).read()
if cdata:
result_queue.put((self, cdata))
except:
log.exception('Failed to download cover from:', cached_url)
# }}}
def get_cached_cover_url(self, identifiers): # {{{
url = None
db = identifiers.get('ehentai',None)
if db is None:
pass
if db is not None:
url = self.cached_identifier_to_cover_url(db)
return url
# }}}
def identify(self, log, result_queue, abort, title=None, authors=None,identifiers={}, timeout=30): # {{{
is_exhentai = self.ExHentai_Status
query = self.create_query(log,title=title, authors=authors,identifiers=identifiers,is_exhentai=is_exhentai)
if not query:
log.error('Insufficient metadata to construct query')
return
br = self.browser
if is_exhentai is True:
for cookie in self.ExHentai_Cookies:
br.set_cookie(name=cookie['name'], value=cookie['value'], domain=cookie['domain'], path=cookie['path'])
try:
_raw = br.open_novisit(query,timeout=timeout)
raw = _raw.read()
except Exception as e:
log.exception('Failed to make identify query: %r'%query)
return as_unicode(e)
if not raw and identifiers and title and authors and not abort.is_set():
return self.identify(log, result_queue, abort, title=title,authors=authors, timeout=timeout)
if is_exhentai is True:
try:
'https://exhentai.org/' in raw
except Exception as e:
log.error('The cookies for ExHentai is invalid.')
log.error('Exhentai cookies:')
log.error(self.ExHentai_Cookies)
return
gidlist = self.get_gallery_info(log,raw)
if not gidlist:
log.error('No result found.\n','query: %s' % query)
return
self.get_all_details(gidlist=gidlist, log=log, abort=abort, result_queue=result_queue, timeout=timeout)
# }}}
if __name__ == '__main__': # tests {{{
# To run these test use: calibre-customize -b ehentai_metadata && calibre-debug -e ehentai_metadata/__init__.py
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
title_test, authors_test)
test_identify_plugin(Ehentai.name,
[
(
{'title': 'キリト君の白くべたつくなにか', 'authors':['しらたま肉球']},
[title_test('キリト君の白くべたつくなにか', exact=False)]
),
(
{'title':'拘束する部活動 (僕は友達が少ない)','authors':['すもも堂 (すももEX) ','有条色狼']},
[title_test('拘束する部活動', exact=False)]
),
(
{'title':'桜の蜜','authors':['劇毒少女 (ke-ta)']},
[title_test('桜の蜜', exact=False)]
)
])
# }}}