From 00fe308587ad30d30f9681b8d5e5f37527b8b823 Mon Sep 17 00:00:00 2001 From: jsouthgb Date: Wed, 25 Oct 2023 20:01:14 -0400 Subject: [PATCH 1/4] add support for erooups.com --- docs/supportedsites.md | 6 ++++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/erooups.py | 52 ++++++++++++++++++++++++++++++++ scripts/supportedsites.py | 1 + 4 files changed, 60 insertions(+) create mode 100644 gallery_dl/extractor/erooups.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a15566df98..4ab897af9c 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -181,6 +181,12 @@ Consider all sites to be NSFW unless otherwise known. Albums, Search Results, User Profiles + + Erooups + http://erooups.com/ + Albums, Search Results, User Profiles + + ExHentai https://exhentai.org/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 22e4fe3412..b357fdcbc7 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -38,6 +38,7 @@ "dynastyscans", "e621", "erome", + "erooups", "exhentai", "fallenangels", "fanbox", diff --git a/gallery_dl/extractor/erooups.py b/gallery_dl/extractor/erooups.py new file mode 100644 index 0000000000..e9a0ff1beb --- /dev/null +++ b/gallery_dl/extractor/erooups.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for http://erooups.com/""" + +from .common import Extractor, Message +from .. import text + + +class ErooupsExtractor(Extractor): + category = 'erooups' + directory_fmt = ('{category}', '{title}') + filename_fmt = '{filename}.{extension}' + archive_fmt = '{date}_{filename}' + subcategory = 'gallery' + pattern = r'(?:http?://)?(?:www\.)?erooups\.com' + root = 'http://erooups.com' + + def items(self): + page = self.request( + text.ensure_http_scheme(self.url, scheme="http://")).text + + data = self.metadata(page) + images = text.extract_iter(page, '', '') + data['tag'] = text.extr( + page, '">', '') + data['imagecount'] = text.extr( + page, '
', '
') + + data = {k: text.unescape(data[k]) for k in data if data[k] != ""} + + return data diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 470b629d6d..4a5ac30194 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -43,6 +43,7 @@ "e926" : "e926", "e6ai" : "e6AI", "erome" : "EroMe", + "erooups" : "erooups", "e-hentai" : "E-Hentai", "exhentai" : "ExHentai", "fallenangels" : "Fallen Angels Scans", From 16ef8958f2a89194b3183ec7d77e2dfdeebadd3a Mon Sep 17 00:00:00 2001 From: jsouthgb Date: Thu, 26 Oct 2023 11:13:40 -0400 Subject: [PATCH 2/4] Corrected extractor name for tests --- gallery_dl/extractor/erooups.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/erooups.py b/gallery_dl/extractor/erooups.py index e9a0ff1beb..e599b92fa0 100644 --- a/gallery_dl/extractor/erooups.py +++ b/gallery_dl/extractor/erooups.py @@ -10,7 +10,7 @@ from .. import text -class ErooupsExtractor(Extractor): +class ErooupsGalleryExtractor(Extractor): category = 'erooups' directory_fmt = ('{category}', '{title}') filename_fmt = '{filename}.{extension}' @@ -18,6 +18,7 @@ class ErooupsExtractor(Extractor): subcategory = 'gallery' pattern = r'(?:http?://)?(?:www\.)?erooups\.com' root = 'http://erooups.com' + example = 'http://erooups.com/2023/10/25/page-title-11-pics.html' def items(self): page = self.request( From f7671e59790556a8f799b6f0e0a409a4bd1e748e Mon Sep 17 00:00:00 2001 From: jsouthgb Date: Wed, 29 Nov 2023 21:34:52 -0500 Subject: [PATCH 3/4] [erooups] rewrite for GalleryExtractor inheritance --- gallery_dl/extractor/erooups.py | 78 ++++++++++++++++----------------- 1 file changed, 38 insertions(+), 40 deletions(-) diff --git a/gallery_dl/extractor/erooups.py b/gallery_dl/extractor/erooups.py index e599b92fa0..919bdd149a 100644 --- a/gallery_dl/extractor/erooups.py +++ b/gallery_dl/extractor/erooups.py @@ -6,48 +6,46 @@ """Extractors for http://erooups.com/""" -from .common import Extractor, Message +from .common import GalleryExtractor from .. import text -class ErooupsGalleryExtractor(Extractor): - category = 'erooups' - directory_fmt = ('{category}', '{title}') - filename_fmt = '{filename}.{extension}' - archive_fmt = '{date}_{filename}' - subcategory = 'gallery' - pattern = r'(?:http?://)?(?:www\.)?erooups\.com' - root = 'http://erooups.com' - example = 'http://erooups.com/2023/10/25/page-title-11-pics.html' - - def items(self): - page = self.request( - text.ensure_http_scheme(self.url, scheme="http://")).text - - data = self.metadata(page) - images = text.extract_iter(page, '") + return [ + (fmt(i), None) for i in text.extract_iter( + extr, 'src="http://content.erooups.com', '"') + ] def metadata(self, page): - data = {} - data['pageurl'] = self.url - data['date'] = '-'.join(self.url.split('/')[3:6]) - data['title'] = text.extr( - page, '

', '

') - data['tag'] = text.extr( - page, '">', '') - data['imagecount'] = text.extr( - page, '
', '
') - - data = {k: text.unescape(data[k]) for k in data if data[k] != ""} - - return data + return { + "pageurl": self.url, + "date": text.parse_datetime( + "{}-{}-{}".format(self.year, self.month, self.day)), + "title": text.extr( + page, '

', "

"), + "tag": text.extr( + page, '">', ""), + "count": text.parse_int(text.extr( + page, '
', "
")), + } From ae8ba88925c06cd81fe3405975d29724178a6f0b Mon Sep 17 00:00:00 2001 From: jsouthgb Date: Thu, 30 Nov 2023 22:21:19 -0500 Subject: [PATCH 4/4] [erooups] accommodate older albums using different "img src" --- gallery_dl/extractor/erooups.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/gallery_dl/extractor/erooups.py b/gallery_dl/extractor/erooups.py index 919bdd149a..7a4c47de59 100644 --- a/gallery_dl/extractor/erooups.py +++ b/gallery_dl/extractor/erooups.py @@ -14,7 +14,6 @@ class ErooupsGalleryExtractor(GalleryExtractor): category = "erooups" directory_fmt = ("{category}", "{title}") archive_fmt = "{date}_{filename}" - subcategory = "gallery" pattern = (r"(?:http?://)?(?:www\.)?erooups\.com" r"/(\d+)/(\d+)/(\d+)/([^/?#]+)") root = "http://erooups.com" @@ -30,11 +29,10 @@ def __init__(self, match): GalleryExtractor.__init__(self, match, url) def images(self, page): - fmt = "http://content.erooups.com/{}".format - extr = text.extr(page, 'class="imgs"', "") + extr = text.extr(page, 'class="imgs">', "") return [ - (fmt(i), None) for i in text.extract_iter( - extr, 'src="http://content.erooups.com', '"') + (self.root + i if "erooups" not in i else i, None) for i in + text.extract_iter(extr, 'img src="', '"') ] def metadata(self, page):