From 74d855c693ed59abdf60eed8629120b1e9d5de9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 26 Nov 2024 21:58:15 +0100 Subject: [PATCH] [kemonoparty] update to new site layout / API endpoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (#6415, #6503, #6528, #6530, #6536) … at least for the most part. Favorites are still broken, but the rest should be functional again. --- gallery_dl/extractor/kemonoparty.py | 300 +++++++++++++++------------- test/results/kemonoparty.py | 65 +++--- 2 files changed, 200 insertions(+), 165 deletions(-) diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 6f2d5f30f32..d8c44fafaef 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -10,7 +10,7 @@ from .common import Extractor, Message from .. import text, util, exception -from ..cache import cache, memcache +from ..cache import cache import itertools import json import re @@ -38,6 +38,7 @@ def __init__(self, match): Extractor.__init__(self, match) def _init(self): + self.api = KemonoAPI(self) self.revisions = self.config("revisions") if self.revisions: self.revisions_unique = (self.revisions == "unique") @@ -53,48 +54,53 @@ def _init(self): sort_keys=True, separators=(",", ":")).encode def items(self): + service = self.groups[2] + creator_id = self.groups[3] + find_hash = re.compile(HASH_PATTERN).match generators = self._build_file_generators(self.config("files")) - duplicates = self.config("duplicates") - comments = self.config("comments") - username = dms = announcements = None + announcements = True if self.config("announcements") else None + comments = True if self.config("comments") else False + duplicates = True if self.config("duplicates") else False + dms = True if self.config("dms") else None + profile = username = None # prevent files from being sent with gzip compression headers = {"Accept-Encoding": "identity"} if self.config("metadata"): - username = text.unescape(text.extract( - self.request(self.user_url).text, - '"): - extr = text.extract_from(comment) - cid = extr('id="', '"') - comments.append({ - "id" : cid, - "user": extr('href="#' + cid + '"', '"), - "body": extr( - '
', '
').strip(), - "date": extr('datetime="', '"'), - }) - return comments - - def _extract_cards(self, post, type): - url = "{}/{}/user/{}/{}".format( - self.root, post["service"], post["user"], type) - page = self.request(url).text - - cards = [] - for card in text.extract_iter(page, ""): - footer = text.extr(card, "") - cards.append({ - "body": text.unescape(text.extr( - card, "
", "
19: date_string = date_string[:19] return text.parse_datetime(date_string, "%Y-%m-%dT%H:%M:%S") - @memcache(keyarg=1) - def _discord_channels(self, server): - url = "{}/api/v1/discord/channel/lookup/{}".format( - self.root, server) - return self.request(url).json() + def _revisions(self, posts): + return itertools.chain.from_iterable( + self._revisions_post(post) for post in posts) - def _revisions_post(self, post, url): + def _revisions_post(self, post): post["revision_id"] = 0 try: - revs = self.request(url + "/revisions").json() + revs = self.api.creator_post_revisions( + post["service"], post["user"], post["id"]) except exception.HttpError: post["revision_hash"] = self._revision_hash(post) post["revision_index"] = 1 @@ -268,8 +239,8 @@ def _revisions_post(self, post, url): return revs - def _revisions_all(self, url): - revs = self.request(url + "/revisions").json() + def _revisions_all(self, service, creator_id, post_id): + revs = self.api.creator_post_revisions(service, creator_id, post_id) cnt = idx = len(revs) for rev in revs: @@ -305,50 +276,30 @@ def _validate(response): class KemonopartyUserExtractor(KemonopartyExtractor): """Extractor for all posts from a kemono.su user listing""" subcategory = "user" - pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|[?#])" + pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|\?|#)" example = "https://kemono.su/SERVICE/user/12345" def __init__(self, match): - _, _, service, user_id, self.query = match.groups() - self.subcategory = service + self.subcategory = match.group(3) KemonopartyExtractor.__init__(self, match) - self.api_url = "{}/api/v1/{}/user/{}".format( - self.root, service, user_id) - self.user_url = "{}/{}/user/{}".format(self.root, service, user_id) def posts(self): - url = self.api_url - params = text.parse_query(self.query) - params["o"] = text.parse_int(params.get("o")) - - while True: - posts = self.request(url, params=params).json() - - if self.revisions: - for post in posts: - post_url = "{}/api/v1/{}/user/{}/post/{}".format( - self.root, post["service"], post["user"], post["id"]) - yield from self._revisions_post(post, post_url) - else: - yield from posts - - if len(posts) < 50: - break - params["o"] += 50 + _, _, service, creator_id, query = self.groups + params = text.parse_query(query) + return self.api.creator_posts( + service, creator_id, params.get("o"), params.get("q")) class KemonopartyPostsExtractor(KemonopartyExtractor): """Extractor for kemono.su post listings""" subcategory = "posts" - pattern = BASE_PATTERN + r"/posts(?:/?\?([^#]+))?" + pattern = BASE_PATTERN + r"/posts()()(?:/?\?([^#]+))?" example = "https://kemono.su/posts" - def __init__(self, match): - KemonopartyExtractor.__init__(self, match) - self.query = match.group(3) - self.api_url = self.root + "/api/v1/posts" - - posts = KemonopartyUserExtractor.posts + def posts(self): + params = text.parse_query(self.groups[4]) + return self.api.posts( + params.get("o"), params.get("q"), params.get("tag")) class KemonopartyPostExtractor(KemonopartyExtractor): @@ -358,27 +309,23 @@ class KemonopartyPostExtractor(KemonopartyExtractor): example = "https://kemono.su/SERVICE/user/12345/post/12345" def __init__(self, match): - _, _, service, user_id, post_id, self.revision, self.revision_id = \ - match.groups() - self.subcategory = service + self.subcategory = match.group(3) KemonopartyExtractor.__init__(self, match) - self.api_url = "{}/api/v1/{}/user/{}/post/{}".format( - self.root, service, user_id, post_id) - self.user_url = "{}/{}/user/{}".format(self.root, service, user_id) def posts(self): - if not self.revision: - post = self.request(self.api_url).json() - if self.revisions: - return self._revisions_post(post, self.api_url) - return (post,) + _, _, service, creator_id, post_id, revision, revision_id = self.groups + post = self.api.creator_post(service, creator_id, post_id) + if not revision: + return (post["post"],) - revs = self._revisions_all(self.api_url) - if not self.revision_id: + self.revisions = False + + revs = self._revisions_all(service, creator_id, post_id) + if not revision_id: return revs for rev in revs: - if str(rev["revision_id"]) == self.revision_id: + if str(rev["revision_id"]) == revision_id: return (rev,) raise exception.NotFoundError("revision") @@ -394,37 +341,35 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor): pattern = BASE_PATTERN + r"/discord/server/(\d+)(?:/channel/(\d+))?#(.*)" example = "https://kemono.su/discord/server/12345#CHANNEL" - def __init__(self, match): - KemonopartyExtractor.__init__(self, match) - _, _, self.server, self.channel_id, self.channel = match.groups() - self.channel_name = "" - def items(self): self._prepare_ddosguard_cookies() - if self.channel_id: - self.channel_name = self.channel + _, _, server_id, channel_id, channel = self.groups + channel_name = "" + + if channel_id: + channel_name = channel else: - if self.channel.isdecimal() and len(self.channel) >= 16: + if channel.isdecimal() and len(channel) >= 16: key = "id" else: key = "name" - for channel in self._discord_channels(self.server): - if channel[key] == self.channel: + for ch in self.api.discord_server(server_id): + if ch[key] == channel: break else: raise exception.NotFoundError("channel") - self.channel_id = channel["id"] - self.channel_name = channel["name"] + channel_id = ch["id"] + channel_name = ch["name"] find_inline = re.compile( r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)" r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall find_hash = re.compile(HASH_PATTERN).match - posts = self.posts() + posts = self.api.discord_channel(channel_id) max_posts = self.config("max-posts") if max_posts: posts = itertools.islice(posts, max_posts) @@ -441,7 +386,7 @@ def items(self): append({"path": "https://cdn.discordapp.com" + path, "name": path, "type": "inline", "hash": ""}) - post["channel_name"] = self.channel_name + post["channel_name"] = channel_name post["date"] = self._parse_datetime(post["published"]) post["count"] = len(files) yield Message.Directory, post @@ -461,33 +406,17 @@ def items(self): url = self.root + "/data" + url[20:] yield Message.Url, url, post - def posts(self): - url = "{}/api/v1/discord/channel/{}".format( - self.root, self.channel_id) - params = {"o": 0} - - while True: - posts = self.request(url, params=params).json() - yield from posts - - if len(posts) < 150: - break - params["o"] += 150 - class KemonopartyDiscordServerExtractor(KemonopartyExtractor): subcategory = "discord-server" pattern = BASE_PATTERN + r"/discord/server/(\d+)$" example = "https://kemono.su/discord/server/12345" - def __init__(self, match): - KemonopartyExtractor.__init__(self, match) - self.server = match.group(3) - def items(self): - for channel in self._discord_channels(self.server): + server_id = self.groups[2] + for channel in self.api.discord_server(server_id): url = "{}/discord/server/{}/channel/{}#{}".format( - self.root, self.server, channel["id"], channel["name"]) + self.root, server_id, channel["id"], channel["name"]) channel["_extractor"] = KemonopartyDiscordExtractor yield Message.Queue, url, channel @@ -541,3 +470,100 @@ def items(self): url = "{}/{}/user/{}/post/{}".format( self.root, post["service"], post["user"], post["id"]) yield Message.Queue, url, post + + +class KemonoAPI(): + """Interface for the Kemono API v1.1.0 + + https://kemono.su/documentation/api + """ + + def __init__(self, extractor): + self.extractor = extractor + self.root = extractor.root + "/api/v1" + + def posts(self, offset=0, query=None, tags=None): + endpoint = "/posts" + params = {"q": query, "o": offset, "tags": tags} + return self._pagination(endpoint, params, 50, "posts") + + def creator_posts(self, service, creator_id, offset=0, query=None): + endpoint = "/{}/user/{}".format(service, creator_id) + params = {"q": query, "o": offset} + return self._pagination(endpoint, params, 50) + + def creator_announcements(self, service, creator_id): + endpoint = "/{}/user/{}/announcements".format(service, creator_id) + return self._call(endpoint) + + def creator_dms(self, service, creator_id): + endpoint = "/{}/user/{}/dms".format(service, creator_id) + return self._call(endpoint) + + def creator_fancards(self, service, creator_id): + endpoint = "/{}/user/{}/fancards".format(service, creator_id) + return self._call(endpoint) + + def creator_post(self, service, creator_id, post_id): + endpoint = "/{}/user/{}/post/{}".format(service, creator_id, post_id) + return self._call(endpoint) + + def creator_post_comments(self, service, creator_id, post_id): + endpoint = "/{}/user/{}/post/{}/comments".format( + service, creator_id, post_id) + return self._call(endpoint) + + def creator_post_revisions(self, service, creator_id, post_id): + endpoint = "/{}/user/{}/post/{}/revisions".format( + service, creator_id, post_id) + return self._call(endpoint) + + def creator_profile(self, service, creator_id): + endpoint = "/{}/user/{}/profile".format(service, creator_id) + return self._call(endpoint) + + def creator_links(self, service, creator_id): + endpoint = "/{}/user/{}/links".format(service, creator_id) + return self._call(endpoint) + + def creator_tags(self, service, creator_id): + endpoint = "/{}/user/{}/tags".format(service, creator_id) + return self._call(endpoint) + + def discord_channel(self, channel_id): + endpoint = "/discord/channel/{}".format(channel_id) + return self._pagination(endpoint, {}, 150) + + def discord_server(self, server_id): + endpoint = "/discord/channel/lookup/{}".format(server_id) + return self._call(endpoint) + + def account_favorites(self, type): + endpoint = "/account/favorites" + params = {"type": type} + return self._call(endpoint, params) + + def authentication_login(self, username, password): + endpoint = "/authentication/login" + params = {"username": username, "password": password} + return self._call(endpoint, params) + + def _call(self, endpoint, params=None): + url = self.root + endpoint + response = self.extractor.request(url, params=params) + return response.json() + + def _pagination(self, endpoint, params, batch=50, key=False): + params["o"] = text.parse_int(params.get("o")) % 50 + + while True: + data = self._call(endpoint, params) + + if key: + yield from data[key] + else: + yield from data + + if len(data) < batch: + return + params["o"] += batch diff --git a/test/results/kemonoparty.py b/test/results/kemonoparty.py index 4c370089792..a9b66a88714 100644 --- a/test/results/kemonoparty.py +++ b/test/results/kemonoparty.py @@ -23,7 +23,7 @@ "#category": ("", "kemonoparty", "patreon"), "#class" : kemonoparty.KemonopartyUserExtractor, "#options" : {"max-posts": 100}, - "#count" : range(200, 300), + "#count" : range(200, 400), }, { @@ -92,7 +92,7 @@ "#url" : "https://kemono.su/gumroad/user/3101696181060/post/tOWyf", "#category": ("", "kemonoparty", "gumroad"), "#class" : kemonoparty.KemonopartyPostExtractor, - "#urls" : "https://kemono.su/data/6f/13/6f1394b19516396ea520254350662c254bbea30c1e111fd4b0f042c61c426d07.zip", + "#count" : 12, }, { @@ -129,10 +129,19 @@ "#class" : kemonoparty.KemonopartyPostExtractor, "#options" : {"dms": True}, - "dms": [{ - "body": r"re:Hi! Thank you very much for supporting the work I did in May. Here's your reward pack! I hope you find something you enjoy in it. :\)\n\nhttps://www.mediafire.com/file/\w+/Set13_tier_2.zip/file", - "date": "2021-06", - }], + "dms": [ + { + "added" : "2021-07-31T02:47:51.327865", + "artist" : None, + "content" : "Hi! Thank you very much for supporting the work I did in May. Here's your reward pack! I hope you find something you enjoy in it. :)\n\nhttps://www.mediafire.com/file/n9ppjpip0r3f01v/Set13_tier_2.zip/file", + "embed" : {}, + "file" : {}, + "hash" : "f8d4962fb7908614c9b7c8c0de1b5f8985f01b62a9b06d74d640c5b2bcedf758", + "published": "2021-06-09T03:28:51.431000", + "service" : "patreon", + "user" : "34134344", + }, + ], }, { @@ -142,10 +151,16 @@ "#class" : kemonoparty.KemonopartyPostExtractor, "#options" : {"announcements": True}, - "announcements": [{ - "body": "
Thank you so much for the support!
This Patreon is more of a tip jar for supporting what I make. I have to clarify that there are no exclusive Patreon animations because all are released for the public. You will get earlier access to WIPs. Direct downloads to my works are also available for $5 and $10 Tiers.
", - "date": "2023-02", - }], + "announcements": [ + { + "added" : "2023-02-01T22:44:34.670719", + "content" : "
Thank you so much for the support!
This Patreon is more of a tip jar for supporting what I make. I have to clarify that there are no exclusive Patreon animations because all are released for the public. You will get earlier access to WIPs. Direct downloads to my works are also available for $5 and $10 Tiers.
", + "hash" : "815648d41c60d1d546437e475a0888fd4a77fd098b1ec61a3648ea6da30c1034", + "published": None, + "service" : "patreon", + "user_id" : "3161935", + }, + ], }, { @@ -207,7 +222,7 @@ "hash" : "88521f71822dfa2f42df3beba319ea4fceda2a2d6dc59da0276a75238f743f86", "revision_id" : 142470, "revision_index": 2, - "revision_count": 9, + "revision_count": 10, "revision_hash" : "e0e93281495e151b11636c156e52bfe9234c2a40", }, @@ -218,13 +233,15 @@ "#class" : kemonoparty.KemonopartyPostExtractor, "#options" : {"revisions": "unique"}, "#urls" : "https://kemono.su/data/88/52/88521f71822dfa2f42df3beba319ea4fceda2a2d6dc59da0276a75238f743f86.jpg", + "#archive" : False, "filename" : "wip update", "hash" : "88521f71822dfa2f42df3beba319ea4fceda2a2d6dc59da0276a75238f743f86", - "revision_id" : 0, - "revision_index": 1, - "revision_count": 1, - "revision_hash" : "e0e93281495e151b11636c156e52bfe9234c2a40", + "revision_id" : {9277608, 0}, + "revision_index": {1, 2}, + "revision_count": 2, + "revision_hash" : {"e0e93281495e151b11636c156e52bfe9234c2a40", + "79d5967719583a6fa52b2fc143e6a80fcdf75fb8"}, }, { @@ -233,12 +250,12 @@ "#category": ("", "kemonoparty", "patreon"), "#class" : kemonoparty.KemonopartyPostExtractor, "#pattern" : r"https://kemono\.su/data/88/52/88521f71822dfa2f42df3beba319ea4fceda2a2d6dc59da0276a75238f743f86\.jpg", - "#count" : 9, + "#count" : 10, "#archive" : False, - "revision_id": range(134996, 3052965), - "revision_index": range(1, 9), - "revision_count": 9, + "revision_id": range(134996, 9277608), + "revision_index": range(1, 10), + "revision_count": 10, "revision_hash": "e0e93281495e151b11636c156e52bfe9234c2a40", }, @@ -341,15 +358,7 @@ "#category": ("", "kemonoparty", "discord-server"), "#class" : kemonoparty.KemonopartyDiscordServerExtractor, "#pattern" : kemonoparty.KemonopartyDiscordExtractor.pattern, - "#count" : 13, -}, - -{ - "#url" : "https://kemono.su/discord/server/488668827274444803", - "#category": ("", "kemonoparty", "discord-server"), - "#class" : kemonoparty.KemonopartyDiscordServerExtractor, - "#pattern" : kemonoparty.KemonopartyDiscordExtractor.pattern, - "#count" : 13, + "#count" : 15, }, {