From bb53edda04e122ccd1ae0e77bd93880b2b7d70f8 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Sun, 22 Dec 2024 22:51:51 +0000 Subject: [PATCH 01/23] Add TikTok photo support #3061 #4177 --- docs/supportedsites.md | 7 +++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/tiktok.py | 98 ++++++++++++++++++++++++++++++++ scripts/supportedsites.py | 3 +- test/results/tiktok.py | 70 +++++++++++++++++++++++ 5 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 gallery_dl/extractor/tiktok.py create mode 100644 test/results/tiktok.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index db73b37a1e..002ef5c1c1 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1,6 +1,7 @@ # Supported Sites + Consider all listed sites to potentially be NSFW.
Galleries | + | |||
TikTok | +https://www.tiktok.com/ | +Photos | +Cookies | +|
TMOHentai | https://tmohentai.com/ | diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index fc8d7b20cd..0abcce61fd 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -168,6 +168,7 @@ "tapas", "tcbscans", "telegraph", + "tiktok", "tmohentai", "toyhouse", "tsumino", diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py new file mode 100644 index 0000000000..9e4d684793 --- /dev/null +++ b/gallery_dl/extractor/tiktok.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.tiktok.com/""" + +from .common import Extractor, Message +from .. import exception, text, util +import re + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktok\.com" +USER_PATTERN = BASE_PATTERN + r"/+@([\w.]{0,23}\w)(?:/\S*)?/*" +POST_PATTERN = BASE_PATTERN + r"/+@(?:[\w.]{0,23}\w)(?:/\S*)?/+(?:[pP][hH][oO][tT][oO]|[vV][iI][dD][eE][oO])/+(?:[0-9]+)/*" +VM_POST_PATTERN = r"(?:https?://)?(?:vm\.)?tiktok\.com/+.*/*" +INSENSITIVE_PHOTO = re.compile(re.escape("/photo/"), re.IGNORECASE) + + +class TikTokExtractor(Extractor): + """Base class for TikTok extractors""" + + category = "tiktok" + directory_fmt = ("{category}", "{user}") + filename_fmt = "{id}_{index}_{img_id}.{extension}" + archive_fmt = "{id}_{img_id}" + root = "https://www.tiktok.com/" + cookies_domain = ".tiktok.com" + + def urls(self): + return [self.url] + + def items(self): + for tiktok_url in self.urls(): + # If we can recognise that this is a /photo/ link, preemptively + # replace it with /video/ to prevent a needless second request. + # See below. + tiktok_url = INSENSITIVE_PHOTO.sub("/video/", tiktok_url) + video_detail = util.json_loads(text.extr( + self.request(tiktok_url).text, + '' + ))["__DEFAULT_SCOPE__"] + if "webapp.video-detail" not in video_detail: + # Only /video/ links result in the video-detail dict we need. + # Try again using that form of link. + tiktok_url = video_detail["seo.abtest"]["canonical"] \ + .replace("/photo/", "/video/") + video_detail = util.json_loads(text.extr( + self.request(tiktok_url).text, + '' + ))["__DEFAULT_SCOPE__"] + video_detail = video_detail["webapp.video-detail"] + if "statusMsg" in video_detail and \ + video_detail["statusMsg"] == "author_secret": + raise exception.AuthorizationError("Login required to access " + "this post") + post_info = video_detail["itemInfo"]["itemStruct"] + user = post_info["author"]["uniqueId"] + if "imagePost" in post_info: + yield Message.Directory, { "user": user } + img_list = post_info["imagePost"]["images"] + for i, img in enumerate(img_list): + url = img["imageURL"]["urlList"][0] + name_and_ext = text.nameext_from_url(url) + yield Message.Url, url, { + "id": post_info["id"], + "index": i, + "img_id": name_and_ext["filename"].split("~")[0], + "extension": name_and_ext["extension"], + "width": img["imageWidth"], + "height": img["imageHeight"] + } + else: + # TODO: Not a slide show. Should pass this to yt-dlp. + pass + + +class TikTokPostExtractor(TikTokExtractor): + """Extract a single video or photo TikTok link""" + + subcategory = "post" + pattern = POST_PATTERN + example = "https://www.tiktok.com/@chillezy/photo/7240568259186019630" + + +class TikTokVMPostExtractor(TikTokExtractor): + """Extract a single video or photo TikTok link""" + + subcategory = "post" + pattern = VM_POST_PATTERN + example = "https://vm.tiktok.com/ZGdh4WUhr/" + + +# TODO: Write profile extractor. diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 5a6303e700..8765164dbe 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -141,10 +141,11 @@ "tbib" : "The Big ImageBoard", "tcbscans" : "TCB Scans", "tco" : "Twitter t.co", - "tmohentai" : "TMOHentai", "thatpervert" : "ThatPervert", "thebarchive" : "The /b/ Archive", "thecollection" : "The /co/llection", + "tiktok" : "TikTok", + "tmohentai" : "TMOHentai", "tumblrgallery" : "TumblrGallery", "vanillarock" : "もえぴりあ", "vidyart2" : "/v/idyart2", diff --git a/test/results/tiktok.py b/test/results/tiktok.py new file mode 100644 index 0000000000..ebb8ceaa35 --- /dev/null +++ b/test/results/tiktok.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import tiktok + +PATTERN = r"https://p1[69]-.*\.tiktokcdn.*\.com/.*/[0-9a-fA-F]+~.*\.jpeg" + + +__tests__ = ( +# Test many photos. +{ + "#url" : "https://www.tiktok.com/@chillezy/photo/7240568259186019630", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokPostExtractor, + "#pattern" : PATTERN +}, +{ + "#url" : "https://www.tiktok.com/@chillezy/video/7240568259186019630", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokPostExtractor, + "#pattern" : PATTERN +}, +{ + "#url" : "https://vm.tiktok.com/ZGdh4WUhr/", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokVMPostExtractor, + "#pattern" : PATTERN +}, +# Test one photo. +{ + "#url" : "https://www.tiktok.com/@d4vinefem/photo/7449575367024626974", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokPostExtractor, + "#pattern" : PATTERN +}, +{ + "#url" : "https://www.tiktok.com/@d4vinefem/video/7449575367024626974", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokPostExtractor, + "#pattern" : PATTERN +}, +{ + "#url" : "https://vm.tiktok.com/ZGdhVtER2/", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokVMPostExtractor, + "#pattern" : PATTERN +}, +# Test a few photos. +{ + "#url" : "https://www.tiktok.com/@.mcfc.central/photo/7449701420934122785", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokPostExtractor, + "#pattern" : PATTERN +}, +{ + "#url" : "https://www.tiktok.com/@.mcfc.central/video/7449701420934122785", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokPostExtractor, + "#pattern" : PATTERN +}, +{ + "#url" : "https://vm.tiktok.com/ZGdhVW3cu/", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokVMPostExtractor, + "#pattern" : PATTERN +} +) From 7ebda8486429b8c9a2b9d1a1fab63372f0f91eed Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Sun, 22 Dec 2024 23:20:54 +0000 Subject: [PATCH 02/23] Address linting errors --- gallery_dl/extractor/tiktok.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index 9e4d684793..c1ce211b65 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -11,8 +11,9 @@ import re BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktok\.com" -USER_PATTERN = BASE_PATTERN + r"/+@([\w.]{0,23}\w)(?:/\S*)?/*" -POST_PATTERN = BASE_PATTERN + r"/+@(?:[\w.]{0,23}\w)(?:/\S*)?/+(?:[pP][hH][oO][tT][oO]|[vV][iI][dD][eE][oO])/+(?:[0-9]+)/*" +USER_PATTERN = BASE_PATTERN + r"/+@([\w.]{0,23}\w)(?:/\S*)?" +POST_PATTERN = USER_PATTERN + \ + r"/+(?:[pP][hH][oO][tT][oO]|[vV][iI][dD][eE][oO])/+(?:[0-9]+)/*" VM_POST_PATTERN = r"(?:https?://)?(?:vm\.)?tiktok\.com/+.*/*" INSENSITIVE_PHOTO = re.compile(re.escape("/photo/"), re.IGNORECASE) @@ -39,7 +40,7 @@ def items(self): video_detail = util.json_loads(text.extr( self.request(tiktok_url).text, '' ))["__DEFAULT_SCOPE__"] if "webapp.video-detail" not in video_detail: @@ -50,29 +51,29 @@ def items(self): video_detail = util.json_loads(text.extr( self.request(tiktok_url).text, '' ))["__DEFAULT_SCOPE__"] video_detail = video_detail["webapp.video-detail"] - if "statusMsg" in video_detail and \ - video_detail["statusMsg"] == "author_secret": + has_status = "statusMsg" in video_detail + if has_status and video_detail["statusMsg"] == "author_secret": raise exception.AuthorizationError("Login required to access " "this post") post_info = video_detail["itemInfo"]["itemStruct"] user = post_info["author"]["uniqueId"] if "imagePost" in post_info: - yield Message.Directory, { "user": user } + yield Message.Directory, {"user": user} img_list = post_info["imagePost"]["images"] for i, img in enumerate(img_list): url = img["imageURL"]["urlList"][0] name_and_ext = text.nameext_from_url(url) yield Message.Url, url, { - "id": post_info["id"], - "index": i, - "img_id": name_and_ext["filename"].split("~")[0], - "extension": name_and_ext["extension"], - "width": img["imageWidth"], - "height": img["imageHeight"] + "id" : post_info["id"], + "index" : i, + "img_id" : name_and_ext["filename"].split("~")[0], + "extension" : name_and_ext["extension"], + "width" : img["imageWidth"], + "height" : img["imageHeight"] } else: # TODO: Not a slide show. Should pass this to yt-dlp. From 491beacb9c8f6ecbe8072034a2b93d67b40e50d0 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Sun, 22 Dec 2024 23:33:58 +0000 Subject: [PATCH 03/23] Fix more test failures --- gallery_dl/extractor/tiktok.py | 14 +++++++------- test/results/tiktok.py | 18 +++++++++--------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index c1ce211b65..823e20b703 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -8,17 +8,17 @@ from .common import Extractor, Message from .. import exception, text, util -import re +from re import compile, escape, IGNORECASE BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktok\.com" USER_PATTERN = BASE_PATTERN + r"/+@([\w.]{0,23}\w)(?:/\S*)?" POST_PATTERN = USER_PATTERN + \ r"/+(?:[pP][hH][oO][tT][oO]|[vV][iI][dD][eE][oO])/+(?:[0-9]+)/*" VM_POST_PATTERN = r"(?:https?://)?(?:vm\.)?tiktok\.com/+.*/*" -INSENSITIVE_PHOTO = re.compile(re.escape("/photo/"), re.IGNORECASE) +INSENSITIVE_PHOTO = compile(escape("/photo/"), IGNORECASE) -class TikTokExtractor(Extractor): +class TiktokExtractor(Extractor): """Base class for TikTok extractors""" category = "tiktok" @@ -80,7 +80,7 @@ def items(self): pass -class TikTokPostExtractor(TikTokExtractor): +class TiktokPostExtractor(TiktokExtractor): """Extract a single video or photo TikTok link""" subcategory = "post" @@ -88,10 +88,10 @@ class TikTokPostExtractor(TikTokExtractor): example = "https://www.tiktok.com/@chillezy/photo/7240568259186019630" -class TikTokVMPostExtractor(TikTokExtractor): - """Extract a single video or photo TikTok link""" +class TiktokVmpostExtractor(TiktokExtractor): + """Extract a single video or photo TikTok VM link""" - subcategory = "post" + subcategory = "vmpost" pattern = VM_POST_PATTERN example = "https://vm.tiktok.com/ZGdh4WUhr/" diff --git a/test/results/tiktok.py b/test/results/tiktok.py index ebb8ceaa35..223f1877c8 100644 --- a/test/results/tiktok.py +++ b/test/results/tiktok.py @@ -14,57 +14,57 @@ { "#url" : "https://www.tiktok.com/@chillezy/photo/7240568259186019630", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokPostExtractor, + "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN }, { "#url" : "https://www.tiktok.com/@chillezy/video/7240568259186019630", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokPostExtractor, + "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN }, { "#url" : "https://vm.tiktok.com/ZGdh4WUhr/", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokVMPostExtractor, + "#class" : tiktok.TiktokVmpostExtractor, "#pattern" : PATTERN }, # Test one photo. { "#url" : "https://www.tiktok.com/@d4vinefem/photo/7449575367024626974", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokPostExtractor, + "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN }, { "#url" : "https://www.tiktok.com/@d4vinefem/video/7449575367024626974", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokPostExtractor, + "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN }, { "#url" : "https://vm.tiktok.com/ZGdhVtER2/", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokVMPostExtractor, + "#class" : tiktok.TiktokVmpostExtractor, "#pattern" : PATTERN }, # Test a few photos. { "#url" : "https://www.tiktok.com/@.mcfc.central/photo/7449701420934122785", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokPostExtractor, + "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN }, { "#url" : "https://www.tiktok.com/@.mcfc.central/video/7449701420934122785", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokPostExtractor, + "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN }, { "#url" : "https://vm.tiktok.com/ZGdhVW3cu/", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokVMPostExtractor, + "#class" : tiktok.TiktokVmpostExtractor, "#pattern" : PATTERN } ) From 863dfc0798689b223ed1fd57d65a7011e0643658 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Sun, 22 Dec 2024 23:36:08 +0000 Subject: [PATCH 04/23] Forgot to update category names in tests --- test/results/tiktok.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/results/tiktok.py b/test/results/tiktok.py index 223f1877c8..43221764d6 100644 --- a/test/results/tiktok.py +++ b/test/results/tiktok.py @@ -25,7 +25,7 @@ }, { "#url" : "https://vm.tiktok.com/ZGdh4WUhr/", - "#category" : ("", "tiktok", "post"), + "#category" : ("", "tiktok", "vmpost"), "#class" : tiktok.TiktokVmpostExtractor, "#pattern" : PATTERN }, @@ -44,7 +44,7 @@ }, { "#url" : "https://vm.tiktok.com/ZGdhVtER2/", - "#category" : ("", "tiktok", "post"), + "#category" : ("", "tiktok", "vmpost"), "#class" : tiktok.TiktokVmpostExtractor, "#pattern" : PATTERN }, @@ -63,7 +63,7 @@ }, { "#url" : "https://vm.tiktok.com/ZGdhVW3cu/", - "#category" : ("", "tiktok", "post"), + "#category" : ("", "tiktok", "vmpost"), "#class" : tiktok.TiktokVmpostExtractor, "#pattern" : PATTERN } From 5db1ca8e519f51b566a182f4b9818a7e031e04f6 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Sun, 22 Dec 2024 23:42:16 +0000 Subject: [PATCH 05/23] Looking into re issue --- gallery_dl/extractor/tiktok.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index 823e20b703..a714ecf98d 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -15,7 +15,6 @@ POST_PATTERN = USER_PATTERN + \ r"/+(?:[pP][hH][oO][tT][oO]|[vV][iI][dD][eE][oO])/+(?:[0-9]+)/*" VM_POST_PATTERN = r"(?:https?://)?(?:vm\.)?tiktok\.com/+.*/*" -INSENSITIVE_PHOTO = compile(escape("/photo/"), IGNORECASE) class TiktokExtractor(Extractor): @@ -36,7 +35,10 @@ def items(self): # If we can recognise that this is a /photo/ link, preemptively # replace it with /video/ to prevent a needless second request. # See below. - tiktok_url = INSENSITIVE_PHOTO.sub("/video/", tiktok_url) + tiktok_url = compile( + escape("/photo/"), + IGNORECASE + ).sub("/video/", tiktok_url) video_detail = util.json_loads(text.extr( self.request(tiktok_url).text, '' @@ -48,20 +49,40 @@ def items(self): if "webapp.video-detail" not in video_detail: # Only /video/ links result in the video-detail dict we need. # Try again using that form of link. - tiktok_url = video_detail["seo.abtest"]["canonical"] \ + tiktok_url_to_use = video_detail["seo.abtest"]["canonical"] \ .replace("/photo/", "/video/") video_detail = util.json_loads(text.extr( - self.request(tiktok_url).text, + self.request(tiktok_url_to_use).text, '' ))["__DEFAULT_SCOPE__"] video_detail = video_detail["webapp.video-detail"] - has_status = "statusMsg" in video_detail - if has_status and video_detail["statusMsg"] == "author_secret": - raise exception.AuthorizationError("Login required to access " - "this post") + if "statusCode" in video_detail: + if video_detail["statusCode"] == 10222: + raise exception.AuthorizationError( + tiktok_url + ": Login required to access this post" + ) + elif video_detail["statusCode"] == 10204: + raise exception.NotFoundError(tiktok_url) + elif video_detail["statusCode"] == 10231: + raise exception.ExtractionError( + tiktok_url + " is region locked, try downloading with " + "a VPN/proxy connection" + ) + elif video_detail["statusCode"] != 0: + raise exception.ExtractionError( + tiktok_url + ": Received unknown error code " + + str(video_detail['statusCode']) + " with message " + + (video_detail['statusMsg'] if + "statusMsg" in video_detail else "") + ) post_info = video_detail["itemInfo"]["itemStruct"] + id = post_info["id"] + original_title = title = post_info["desc"] + if len(original_title) == 0: + title = "TikTok photo #{}".format(id) + title = title[:150] user = post_info["author"]["uniqueId"] if "imagePost" in post_info: yield Message.Directory, {"user": user} @@ -69,22 +90,36 @@ def items(self): for i, img in enumerate(img_list): url = img["imageURL"]["urlList"][0] name_and_ext = text.nameext_from_url(url) - id = post_info["id"] - title = post_info["desc"] - if len(title) == 0: - title = "TikTok photo #{}".format(id) yield Message.Url, url, { - "title" : text.sanitize_for_filename(title)[:170], + "title" : title, "id" : id, - "index" : i, + "index" : i + 1, "img_id" : name_and_ext["filename"].split("~")[0], "extension" : name_and_ext["extension"], "width" : img["imageWidth"], "height" : img["imageHeight"] } + elif videos: + # It's probably obvious but I thought it was worth noting + # because I got stuck on this for a while: make sure to emit + # a Directory message before attempting to download anything + # with yt-dlp! Otherwise you'll run into NoneType, set_filename + # errors since the download job doesn't get initialized. + yield Message.Directory, {"user": user} + if len(original_title) == 0: + title = "TikTok video #{}".format(id) + title = title[:150] else: - # TODO: Not a slide show. Should pass this to yt-dlp. - pass + self.log.info("Skipping video post %s", tiktok_url) + if videos: + yield Message.Url, "ytdl:" + tiktok_url_to_use, { + "filename" : "", + "extension" : "", + "title" : title, + "id" : id, + "index" : "", + "img_id" : "" + } class TiktokPostExtractor(TiktokExtractor): diff --git a/gallery_dl/text.py b/gallery_dl/text.py index 246efee320..5fd5a40715 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -51,21 +51,6 @@ def slugify(value): return re.sub(r"[-\s]+", "-", value).strip("-_") -def sanitize_for_filename(string): - """Removes characters from a string that would be illegal to have in - a filename - - This function is similar to slugify(), except it retains more - characters (notably characters such as # and @). - - Note that the length of the string is not capped! - - Inspiration: - https://stackoverflow.com/a/71199182 - """ - return re.sub(r"[/\\?%*:|\"<>\x7F\x00-\x1F]", " ", str(string)) - - def ensure_http_scheme(url, scheme="https://"): """Prepend 'scheme' to 'url' if it doesn't have one""" if url and not url.startswith(("https://", "http://")): diff --git a/test/results/tiktok.py b/test/results/tiktok.py index 43221764d6..4bdbd9bdab 100644 --- a/test/results/tiktok.py +++ b/test/results/tiktok.py @@ -5,66 +5,147 @@ # published by the Free Software Foundation. from gallery_dl.extractor import tiktok +from gallery_dl import exception PATTERN = r"https://p1[69]-.*\.tiktokcdn.*\.com/.*/[0-9a-fA-F]+~.*\.jpeg" +PATTERN_WITH_AUDIO = r"(?:" + PATTERN + r")|(?:ytdl\:)" __tests__ = ( -# Test many photos. { "#url" : "https://www.tiktok.com/@chillezy/photo/7240568259186019630", + "#comment" : "/photo/ link: many photos", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#pattern" : PATTERN + "#pattern" : PATTERN, + "#options" : {"videos": False} }, { "#url" : "https://www.tiktok.com/@chillezy/video/7240568259186019630", + "#comment" : "/video/ link: many photos", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#pattern" : PATTERN + "#pattern" : PATTERN, + "#options" : {"videos": False} }, { "#url" : "https://vm.tiktok.com/ZGdh4WUhr/", + "#comment" : "vm.tiktok.com link: many photos", "#category" : ("", "tiktok", "vmpost"), "#class" : tiktok.TiktokVmpostExtractor, - "#pattern" : PATTERN + "#pattern" : PATTERN, + "#options" : {"videos": False} }, -# Test one photo. { "#url" : "https://www.tiktok.com/@d4vinefem/photo/7449575367024626974", + "#comment" : "/photo/ link: single photo", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#pattern" : PATTERN + "#pattern" : PATTERN, + "#options" : {"videos": False} }, { "#url" : "https://www.tiktok.com/@d4vinefem/video/7449575367024626974", + "#comment" : "/video/ link: single photo", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#pattern" : PATTERN + "#pattern" : PATTERN, + "#options" : {"videos": False} }, { "#url" : "https://vm.tiktok.com/ZGdhVtER2/", + "#comment" : "vm.tiktok.com link: single photo", "#category" : ("", "tiktok", "vmpost"), "#class" : tiktok.TiktokVmpostExtractor, - "#pattern" : PATTERN + "#pattern" : PATTERN, + "#options" : {"videos": False} }, -# Test a few photos. { "#url" : "https://www.tiktok.com/@.mcfc.central/photo/7449701420934122785", + "#comment" : "/photo/ link: few photos", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#pattern" : PATTERN + "#pattern" : PATTERN, + "#options" : {"videos": False} }, { "#url" : "https://www.tiktok.com/@.mcfc.central/video/7449701420934122785", + "#comment" : "/video/ link: few photos", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#pattern" : PATTERN + "#pattern" : PATTERN, + "#options" : {"videos": False} }, { "#url" : "https://vm.tiktok.com/ZGdhVW3cu/", + "#comment" : "vm.tiktok.com link: few photos", "#category" : ("", "tiktok", "vmpost"), "#class" : tiktok.TiktokVmpostExtractor, - "#pattern" : PATTERN -} + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktok.com/@ughuwhguweghw/video/1", + "#comment" : "deleted post", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#exception" : exception.NotFoundError, + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktok.com/@memezar/video/7449708266168274208", + "#comment" : "Video post", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208", + "#options" : {"videos": True} +}, +{ + "#url" : "https://www.tiktok.com/@memezar/photo/7449708266168274208", + "#comment" : "Video post as a /photo/ link", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208", + "#options" : {"videos": True} +}, +{ + "#url" : "https://vm.tiktok.com/ZGdht7cjp/", + "#comment" : "Video post as a VM link", + "#category" : ("", "tiktok", "vmpost"), + "#class" : tiktok.TiktokVmpostExtractor, + "#urls" : "ytdl:https://vm.tiktok.com/ZGdht7cjp/", + "#options" : {"videos": True} +}, +{ + "#url" : "https://www.tiktok.com/@memezar/video/7449708266168274208", + "#comment" : "Skipping video post", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#urls" : [], + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktok.com/@chillezy/photo/7240568259186019630", + "#comment" : "/photo/ link: many photos with audio", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#pattern" : PATTERN_WITH_AUDIO, + "#options" : {"videos": True} +}, +{ + "#url" : "https://www.tiktok.com/@chillezy/video/7240568259186019630", + "#comment" : "/video/ link: many photos with audio", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#pattern" : PATTERN_WITH_AUDIO, + "#options" : {"videos": True} +}, +{ + "#url" : "https://vm.tiktok.com/ZGdh4WUhr/", + "#comment" : "vm.tiktok.com link: many photos with audio", + "#category" : ("", "tiktok", "vmpost"), + "#class" : tiktok.TiktokVmpostExtractor, + "#pattern" : PATTERN_WITH_AUDIO, + "#options" : {"videos": True} +}, ) diff --git a/test/test_text.py b/test/test_text.py index 5b97db7f91..1b19c4742a 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -92,30 +92,6 @@ def test_slugify(self, f=text.slugify): self.assertEqual(f(1), "1") self.assertEqual(f(2.3), "23") - def test_sanitize_for_filename(self, f=text.sanitize_for_filename): - self.assertEqual(f("Hello World"), "Hello World") - self.assertEqual(f("-HeLLo---World-"), "-HeLLo---World-") - self.assertEqual( - f("_-H#e:l#l:o+\t+W?o!rl=d-_"), - "_-H#e l#l o+ +W o!rl=d-_" - ) - self.assertEqual(f("_Hello_World_"), "_Hello_World_") - self.assertEqual( - f("/\\?%*:|\"<>\x7F\x00\x0B\x1F"), - " " - ) - - self.assertEqual(f(""), "") - self.assertEqual(f("-"), "-") - self.assertEqual(f("--"), "--") - - self.assertEqual(f(()), "()") - self.assertEqual(f([]), "[]") - self.assertEqual(f({}), "{}") - self.assertEqual(f(None), "None") - self.assertEqual(f(1), "1") - self.assertEqual(f(2.3), "2.3") - def test_ensure_http_scheme(self, f=text.ensure_http_scheme): result = "https://example.org/filename.ext" From 6e91e5969d605a0037205658a24af8ee45920587 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Mon, 23 Dec 2024 15:56:16 +0000 Subject: [PATCH 09/23] Forgot to update supportedsites.md --- docs/supportedsites.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 002ef5c1c1..cdc5ebf731 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -929,7 +929,7 @@ Consider all listed sites to potentially be NSFW.|||
TikTok | https://www.tiktok.com/ | -Photos | +Photos, Videos, Audio | Cookies |
TikTok | https://www.tiktok.com/ | -Photos, Videos, Audio | +Photos, Videos, Audio, User Profiles **Videos, Audio, and User Profiles require yt-dlp or youtube-dl.** Pass `-o videos` to download photos only, and `-o tiktok-range="a-yt-dlp-range"` to tell yt-dlp to only extract XYZ links when scraping a user profile. |
Cookies |
TikTok | https://www.tiktok.com/ | -Photos, Videos, Audio, User Profiles **Videos, Audio, and User Profiles require yt-dlp or youtube-dl.** Pass `-o videos` to download photos only, and `-o tiktok-range="a-yt-dlp-range"` to tell yt-dlp to only extract XYZ links when scraping a user profile. |
+ Photos, Videos, Audio, User Profiles, Profile Avatars **Videos, Audio, and User Profiles require yt-dlp or youtube-dl.** Pass `-o videos` to download photos only, and `-o tiktok-range="a-yt-dlp-range"` to tell yt-dlp to only extract XYZ links when scraping a user profile. |
Cookies |
TikTok | https://www.tiktok.com/ | -Photos, Videos, Audio, User Profiles, Profile Avatars **Videos, Audio, and User Profiles require yt-dlp or youtube-dl.** Pass `-o videos` to download photos only, and `-o tiktok-range="a-yt-dlp-range"` to tell yt-dlp to only extract XYZ links when scraping a user profile. |
+ Photos, Videos, Audio, User Profiles, Profile Avatars Videos, Audio, and User Profiles require yt-dlp or youtube-dl. Pass -o videos to download photos only, and -o tiktok-range="a-yt-dlp-range" to tell yt-dlp to only extract XYZ links when scraping a user profile. |
Cookies |
TikTok | https://www.tiktok.com/ | -Photos, Videos, Audio, User Profiles, Profile Avatars Videos, Audio, and User Profiles require yt-dlp or youtube-dl. Pass -o videos to download photos only, and -o tiktok-range="a-yt-dlp-range" to tell yt-dlp to only extract XYZ links when scraping a user profile. |
+ Photos, Videos, Audio, User Profiles, Profile Avatars Videos, Audio, and User Profiles require yt-dlp or youtube-dl. Pass -o videos to download photos only, and -o tiktok-range="a-yt-dlp-range" to tell yt-dlp to only extract XYZ links when scraping a user profile. Also note that profile avatars will only be downloaded when downloading a User Profile and if the user has at least one post. |
Cookies |