From bb53edda04e122ccd1ae0e77bd93880b2b7d70f8 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Sun, 22 Dec 2024 22:51:51 +0000 Subject: [PATCH 01/23] Add TikTok photo support #3061 #4177 --- docs/supportedsites.md | 7 +++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/tiktok.py | 98 ++++++++++++++++++++++++++++++++ scripts/supportedsites.py | 3 +- test/results/tiktok.py | 70 +++++++++++++++++++++++ 5 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 gallery_dl/extractor/tiktok.py create mode 100644 test/results/tiktok.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index db73b37a1e..002ef5c1c1 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1,6 +1,7 @@ # Supported Sites + Consider all listed sites to potentially be NSFW. @@ -925,6 +926,12 @@ Consider all listed sites to potentially be NSFW. + + + + + + diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index fc8d7b20cd..0abcce61fd 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -168,6 +168,7 @@ "tapas", "tcbscans", "telegraph", + "tiktok", "tmohentai", "toyhouse", "tsumino", diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py new file mode 100644 index 0000000000..9e4d684793 --- /dev/null +++ b/gallery_dl/extractor/tiktok.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.tiktok.com/""" + +from .common import Extractor, Message +from .. import exception, text, util +import re + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktok\.com" +USER_PATTERN = BASE_PATTERN + r"/+@([\w.]{0,23}\w)(?:/\S*)?/*" +POST_PATTERN = BASE_PATTERN + r"/+@(?:[\w.]{0,23}\w)(?:/\S*)?/+(?:[pP][hH][oO][tT][oO]|[vV][iI][dD][eE][oO])/+(?:[0-9]+)/*" +VM_POST_PATTERN = r"(?:https?://)?(?:vm\.)?tiktok\.com/+.*/*" +INSENSITIVE_PHOTO = re.compile(re.escape("/photo/"), re.IGNORECASE) + + +class TikTokExtractor(Extractor): + """Base class for TikTok extractors""" + + category = "tiktok" + directory_fmt = ("{category}", "{user}") + filename_fmt = "{id}_{index}_{img_id}.{extension}" + archive_fmt = "{id}_{img_id}" + root = "https://www.tiktok.com/" + cookies_domain = ".tiktok.com" + + def urls(self): + return [self.url] + + def items(self): + for tiktok_url in self.urls(): + # If we can recognise that this is a /photo/ link, preemptively + # replace it with /video/ to prevent a needless second request. + # See below. + tiktok_url = INSENSITIVE_PHOTO.sub("/video/", tiktok_url) + video_detail = util.json_loads(text.extr( + self.request(tiktok_url).text, + '' + ))["__DEFAULT_SCOPE__"] + if "webapp.video-detail" not in video_detail: + # Only /video/ links result in the video-detail dict we need. + # Try again using that form of link. + tiktok_url = video_detail["seo.abtest"]["canonical"] \ + .replace("/photo/", "/video/") + video_detail = util.json_loads(text.extr( + self.request(tiktok_url).text, + '' + ))["__DEFAULT_SCOPE__"] + video_detail = video_detail["webapp.video-detail"] + if "statusMsg" in video_detail and \ + video_detail["statusMsg"] == "author_secret": + raise exception.AuthorizationError("Login required to access " + "this post") + post_info = video_detail["itemInfo"]["itemStruct"] + user = post_info["author"]["uniqueId"] + if "imagePost" in post_info: + yield Message.Directory, { "user": user } + img_list = post_info["imagePost"]["images"] + for i, img in enumerate(img_list): + url = img["imageURL"]["urlList"][0] + name_and_ext = text.nameext_from_url(url) + yield Message.Url, url, { + "id": post_info["id"], + "index": i, + "img_id": name_and_ext["filename"].split("~")[0], + "extension": name_and_ext["extension"], + "width": img["imageWidth"], + "height": img["imageHeight"] + } + else: + # TODO: Not a slide show. Should pass this to yt-dlp. + pass + + +class TikTokPostExtractor(TikTokExtractor): + """Extract a single video or photo TikTok link""" + + subcategory = "post" + pattern = POST_PATTERN + example = "https://www.tiktok.com/@chillezy/photo/7240568259186019630" + + +class TikTokVMPostExtractor(TikTokExtractor): + """Extract a single video or photo TikTok link""" + + subcategory = "post" + pattern = VM_POST_PATTERN + example = "https://vm.tiktok.com/ZGdh4WUhr/" + + +# TODO: Write profile extractor. diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 5a6303e700..8765164dbe 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -141,10 +141,11 @@ "tbib" : "The Big ImageBoard", "tcbscans" : "TCB Scans", "tco" : "Twitter t.co", - "tmohentai" : "TMOHentai", "thatpervert" : "ThatPervert", "thebarchive" : "The /b/ Archive", "thecollection" : "The /co/llection", + "tiktok" : "TikTok", + "tmohentai" : "TMOHentai", "tumblrgallery" : "TumblrGallery", "vanillarock" : "もえぴりあ", "vidyart2" : "/v/idyart2", diff --git a/test/results/tiktok.py b/test/results/tiktok.py new file mode 100644 index 0000000000..ebb8ceaa35 --- /dev/null +++ b/test/results/tiktok.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import tiktok + +PATTERN = r"https://p1[69]-.*\.tiktokcdn.*\.com/.*/[0-9a-fA-F]+~.*\.jpeg" + + +__tests__ = ( +# Test many photos. +{ + "#url" : "https://www.tiktok.com/@chillezy/photo/7240568259186019630", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokPostExtractor, + "#pattern" : PATTERN +}, +{ + "#url" : "https://www.tiktok.com/@chillezy/video/7240568259186019630", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokPostExtractor, + "#pattern" : PATTERN +}, +{ + "#url" : "https://vm.tiktok.com/ZGdh4WUhr/", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokVMPostExtractor, + "#pattern" : PATTERN +}, +# Test one photo. +{ + "#url" : "https://www.tiktok.com/@d4vinefem/photo/7449575367024626974", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokPostExtractor, + "#pattern" : PATTERN +}, +{ + "#url" : "https://www.tiktok.com/@d4vinefem/video/7449575367024626974", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokPostExtractor, + "#pattern" : PATTERN +}, +{ + "#url" : "https://vm.tiktok.com/ZGdhVtER2/", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokVMPostExtractor, + "#pattern" : PATTERN +}, +# Test a few photos. +{ + "#url" : "https://www.tiktok.com/@.mcfc.central/photo/7449701420934122785", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokPostExtractor, + "#pattern" : PATTERN +}, +{ + "#url" : "https://www.tiktok.com/@.mcfc.central/video/7449701420934122785", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokPostExtractor, + "#pattern" : PATTERN +}, +{ + "#url" : "https://vm.tiktok.com/ZGdhVW3cu/", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TikTokVMPostExtractor, + "#pattern" : PATTERN +} +) From 7ebda8486429b8c9a2b9d1a1fab63372f0f91eed Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Sun, 22 Dec 2024 23:20:54 +0000 Subject: [PATCH 02/23] Address linting errors --- gallery_dl/extractor/tiktok.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index 9e4d684793..c1ce211b65 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -11,8 +11,9 @@ import re BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktok\.com" -USER_PATTERN = BASE_PATTERN + r"/+@([\w.]{0,23}\w)(?:/\S*)?/*" -POST_PATTERN = BASE_PATTERN + r"/+@(?:[\w.]{0,23}\w)(?:/\S*)?/+(?:[pP][hH][oO][tT][oO]|[vV][iI][dD][eE][oO])/+(?:[0-9]+)/*" +USER_PATTERN = BASE_PATTERN + r"/+@([\w.]{0,23}\w)(?:/\S*)?" +POST_PATTERN = USER_PATTERN + \ + r"/+(?:[pP][hH][oO][tT][oO]|[vV][iI][dD][eE][oO])/+(?:[0-9]+)/*" VM_POST_PATTERN = r"(?:https?://)?(?:vm\.)?tiktok\.com/+.*/*" INSENSITIVE_PHOTO = re.compile(re.escape("/photo/"), re.IGNORECASE) @@ -39,7 +40,7 @@ def items(self): video_detail = util.json_loads(text.extr( self.request(tiktok_url).text, '' ))["__DEFAULT_SCOPE__"] if "webapp.video-detail" not in video_detail: @@ -50,29 +51,29 @@ def items(self): video_detail = util.json_loads(text.extr( self.request(tiktok_url).text, '' ))["__DEFAULT_SCOPE__"] video_detail = video_detail["webapp.video-detail"] - if "statusMsg" in video_detail and \ - video_detail["statusMsg"] == "author_secret": + has_status = "statusMsg" in video_detail + if has_status and video_detail["statusMsg"] == "author_secret": raise exception.AuthorizationError("Login required to access " "this post") post_info = video_detail["itemInfo"]["itemStruct"] user = post_info["author"]["uniqueId"] if "imagePost" in post_info: - yield Message.Directory, { "user": user } + yield Message.Directory, {"user": user} img_list = post_info["imagePost"]["images"] for i, img in enumerate(img_list): url = img["imageURL"]["urlList"][0] name_and_ext = text.nameext_from_url(url) yield Message.Url, url, { - "id": post_info["id"], - "index": i, - "img_id": name_and_ext["filename"].split("~")[0], - "extension": name_and_ext["extension"], - "width": img["imageWidth"], - "height": img["imageHeight"] + "id" : post_info["id"], + "index" : i, + "img_id" : name_and_ext["filename"].split("~")[0], + "extension" : name_and_ext["extension"], + "width" : img["imageWidth"], + "height" : img["imageHeight"] } else: # TODO: Not a slide show. Should pass this to yt-dlp. From 491beacb9c8f6ecbe8072034a2b93d67b40e50d0 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Sun, 22 Dec 2024 23:33:58 +0000 Subject: [PATCH 03/23] Fix more test failures --- gallery_dl/extractor/tiktok.py | 14 +++++++------- test/results/tiktok.py | 18 +++++++++--------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index c1ce211b65..823e20b703 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -8,17 +8,17 @@ from .common import Extractor, Message from .. import exception, text, util -import re +from re import compile, escape, IGNORECASE BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktok\.com" USER_PATTERN = BASE_PATTERN + r"/+@([\w.]{0,23}\w)(?:/\S*)?" POST_PATTERN = USER_PATTERN + \ r"/+(?:[pP][hH][oO][tT][oO]|[vV][iI][dD][eE][oO])/+(?:[0-9]+)/*" VM_POST_PATTERN = r"(?:https?://)?(?:vm\.)?tiktok\.com/+.*/*" -INSENSITIVE_PHOTO = re.compile(re.escape("/photo/"), re.IGNORECASE) +INSENSITIVE_PHOTO = compile(escape("/photo/"), IGNORECASE) -class TikTokExtractor(Extractor): +class TiktokExtractor(Extractor): """Base class for TikTok extractors""" category = "tiktok" @@ -80,7 +80,7 @@ def items(self): pass -class TikTokPostExtractor(TikTokExtractor): +class TiktokPostExtractor(TiktokExtractor): """Extract a single video or photo TikTok link""" subcategory = "post" @@ -88,10 +88,10 @@ class TikTokPostExtractor(TikTokExtractor): example = "https://www.tiktok.com/@chillezy/photo/7240568259186019630" -class TikTokVMPostExtractor(TikTokExtractor): - """Extract a single video or photo TikTok link""" +class TiktokVmpostExtractor(TiktokExtractor): + """Extract a single video or photo TikTok VM link""" - subcategory = "post" + subcategory = "vmpost" pattern = VM_POST_PATTERN example = "https://vm.tiktok.com/ZGdh4WUhr/" diff --git a/test/results/tiktok.py b/test/results/tiktok.py index ebb8ceaa35..223f1877c8 100644 --- a/test/results/tiktok.py +++ b/test/results/tiktok.py @@ -14,57 +14,57 @@ { "#url" : "https://www.tiktok.com/@chillezy/photo/7240568259186019630", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokPostExtractor, + "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN }, { "#url" : "https://www.tiktok.com/@chillezy/video/7240568259186019630", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokPostExtractor, + "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN }, { "#url" : "https://vm.tiktok.com/ZGdh4WUhr/", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokVMPostExtractor, + "#class" : tiktok.TiktokVmpostExtractor, "#pattern" : PATTERN }, # Test one photo. { "#url" : "https://www.tiktok.com/@d4vinefem/photo/7449575367024626974", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokPostExtractor, + "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN }, { "#url" : "https://www.tiktok.com/@d4vinefem/video/7449575367024626974", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokPostExtractor, + "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN }, { "#url" : "https://vm.tiktok.com/ZGdhVtER2/", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokVMPostExtractor, + "#class" : tiktok.TiktokVmpostExtractor, "#pattern" : PATTERN }, # Test a few photos. { "#url" : "https://www.tiktok.com/@.mcfc.central/photo/7449701420934122785", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokPostExtractor, + "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN }, { "#url" : "https://www.tiktok.com/@.mcfc.central/video/7449701420934122785", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokPostExtractor, + "#class" : tiktok.TiktokPostExtractor, "#pattern" : PATTERN }, { "#url" : "https://vm.tiktok.com/ZGdhVW3cu/", "#category" : ("", "tiktok", "post"), - "#class" : tiktok.TikTokVMPostExtractor, + "#class" : tiktok.TiktokVmpostExtractor, "#pattern" : PATTERN } ) From 863dfc0798689b223ed1fd57d65a7011e0643658 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Sun, 22 Dec 2024 23:36:08 +0000 Subject: [PATCH 04/23] Forgot to update category names in tests --- test/results/tiktok.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/results/tiktok.py b/test/results/tiktok.py index 223f1877c8..43221764d6 100644 --- a/test/results/tiktok.py +++ b/test/results/tiktok.py @@ -25,7 +25,7 @@ }, { "#url" : "https://vm.tiktok.com/ZGdh4WUhr/", - "#category" : ("", "tiktok", "post"), + "#category" : ("", "tiktok", "vmpost"), "#class" : tiktok.TiktokVmpostExtractor, "#pattern" : PATTERN }, @@ -44,7 +44,7 @@ }, { "#url" : "https://vm.tiktok.com/ZGdhVtER2/", - "#category" : ("", "tiktok", "post"), + "#category" : ("", "tiktok", "vmpost"), "#class" : tiktok.TiktokVmpostExtractor, "#pattern" : PATTERN }, @@ -63,7 +63,7 @@ }, { "#url" : "https://vm.tiktok.com/ZGdhVW3cu/", - "#category" : ("", "tiktok", "post"), + "#category" : ("", "tiktok", "vmpost"), "#class" : tiktok.TiktokVmpostExtractor, "#pattern" : PATTERN } From 5db1ca8e519f51b566a182f4b9818a7e031e04f6 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Sun, 22 Dec 2024 23:42:16 +0000 Subject: [PATCH 05/23] Looking into re issue --- gallery_dl/extractor/tiktok.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index 823e20b703..a714ecf98d 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -15,7 +15,6 @@ POST_PATTERN = USER_PATTERN + \ r"/+(?:[pP][hH][oO][tT][oO]|[vV][iI][dD][eE][oO])/+(?:[0-9]+)/*" VM_POST_PATTERN = r"(?:https?://)?(?:vm\.)?tiktok\.com/+.*/*" -INSENSITIVE_PHOTO = compile(escape("/photo/"), IGNORECASE) class TiktokExtractor(Extractor): @@ -36,7 +35,10 @@ def items(self): # If we can recognise that this is a /photo/ link, preemptively # replace it with /video/ to prevent a needless second request. # See below. - tiktok_url = INSENSITIVE_PHOTO.sub("/video/", tiktok_url) + tiktok_url = compile( + escape("/photo/"), + IGNORECASE + ).sub("/video/", tiktok_url) video_detail = util.json_loads(text.extr( self.request(tiktok_url).text, '' @@ -48,20 +49,40 @@ def items(self): if "webapp.video-detail" not in video_detail: # Only /video/ links result in the video-detail dict we need. # Try again using that form of link. - tiktok_url = video_detail["seo.abtest"]["canonical"] \ + tiktok_url_to_use = video_detail["seo.abtest"]["canonical"] \ .replace("/photo/", "/video/") video_detail = util.json_loads(text.extr( - self.request(tiktok_url).text, + self.request(tiktok_url_to_use).text, '' ))["__DEFAULT_SCOPE__"] video_detail = video_detail["webapp.video-detail"] - has_status = "statusMsg" in video_detail - if has_status and video_detail["statusMsg"] == "author_secret": - raise exception.AuthorizationError("Login required to access " - "this post") + if "statusCode" in video_detail: + if video_detail["statusCode"] == 10222: + raise exception.AuthorizationError( + tiktok_url + ": Login required to access this post" + ) + elif video_detail["statusCode"] == 10204: + raise exception.NotFoundError(tiktok_url) + elif video_detail["statusCode"] == 10231: + raise exception.ExtractionError( + tiktok_url + " is region locked, try downloading with " + "a VPN/proxy connection" + ) + elif video_detail["statusCode"] != 0: + raise exception.ExtractionError( + tiktok_url + ": Received unknown error code " + + str(video_detail['statusCode']) + " with message " + + (video_detail['statusMsg'] if + "statusMsg" in video_detail else "") + ) post_info = video_detail["itemInfo"]["itemStruct"] + id = post_info["id"] + original_title = title = post_info["desc"] + if len(original_title) == 0: + title = "TikTok photo #{}".format(id) + title = title[:150] user = post_info["author"]["uniqueId"] if "imagePost" in post_info: yield Message.Directory, {"user": user} @@ -69,22 +90,36 @@ def items(self): for i, img in enumerate(img_list): url = img["imageURL"]["urlList"][0] name_and_ext = text.nameext_from_url(url) - id = post_info["id"] - title = post_info["desc"] - if len(title) == 0: - title = "TikTok photo #{}".format(id) yield Message.Url, url, { - "title" : text.sanitize_for_filename(title)[:170], + "title" : title, "id" : id, - "index" : i, + "index" : i + 1, "img_id" : name_and_ext["filename"].split("~")[0], "extension" : name_and_ext["extension"], "width" : img["imageWidth"], "height" : img["imageHeight"] } + elif videos: + # It's probably obvious but I thought it was worth noting + # because I got stuck on this for a while: make sure to emit + # a Directory message before attempting to download anything + # with yt-dlp! Otherwise you'll run into NoneType, set_filename + # errors since the download job doesn't get initialized. + yield Message.Directory, {"user": user} + if len(original_title) == 0: + title = "TikTok video #{}".format(id) + title = title[:150] else: - # TODO: Not a slide show. Should pass this to yt-dlp. - pass + self.log.info("Skipping video post %s", tiktok_url) + if videos: + yield Message.Url, "ytdl:" + tiktok_url_to_use, { + "filename" : "", + "extension" : "", + "title" : title, + "id" : id, + "index" : "", + "img_id" : "" + } class TiktokPostExtractor(TiktokExtractor): diff --git a/gallery_dl/text.py b/gallery_dl/text.py index 246efee320..5fd5a40715 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -51,21 +51,6 @@ def slugify(value): return re.sub(r"[-\s]+", "-", value).strip("-_") -def sanitize_for_filename(string): - """Removes characters from a string that would be illegal to have in - a filename - - This function is similar to slugify(), except it retains more - characters (notably characters such as # and @). - - Note that the length of the string is not capped! - - Inspiration: - https://stackoverflow.com/a/71199182 - """ - return re.sub(r"[/\\?%*:|\"<>\x7F\x00-\x1F]", " ", str(string)) - - def ensure_http_scheme(url, scheme="https://"): """Prepend 'scheme' to 'url' if it doesn't have one""" if url and not url.startswith(("https://", "http://")): diff --git a/test/results/tiktok.py b/test/results/tiktok.py index 43221764d6..4bdbd9bdab 100644 --- a/test/results/tiktok.py +++ b/test/results/tiktok.py @@ -5,66 +5,147 @@ # published by the Free Software Foundation. from gallery_dl.extractor import tiktok +from gallery_dl import exception PATTERN = r"https://p1[69]-.*\.tiktokcdn.*\.com/.*/[0-9a-fA-F]+~.*\.jpeg" +PATTERN_WITH_AUDIO = r"(?:" + PATTERN + r")|(?:ytdl\:)" __tests__ = ( -# Test many photos. { "#url" : "https://www.tiktok.com/@chillezy/photo/7240568259186019630", + "#comment" : "/photo/ link: many photos", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#pattern" : PATTERN + "#pattern" : PATTERN, + "#options" : {"videos": False} }, { "#url" : "https://www.tiktok.com/@chillezy/video/7240568259186019630", + "#comment" : "/video/ link: many photos", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#pattern" : PATTERN + "#pattern" : PATTERN, + "#options" : {"videos": False} }, { "#url" : "https://vm.tiktok.com/ZGdh4WUhr/", + "#comment" : "vm.tiktok.com link: many photos", "#category" : ("", "tiktok", "vmpost"), "#class" : tiktok.TiktokVmpostExtractor, - "#pattern" : PATTERN + "#pattern" : PATTERN, + "#options" : {"videos": False} }, -# Test one photo. { "#url" : "https://www.tiktok.com/@d4vinefem/photo/7449575367024626974", + "#comment" : "/photo/ link: single photo", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#pattern" : PATTERN + "#pattern" : PATTERN, + "#options" : {"videos": False} }, { "#url" : "https://www.tiktok.com/@d4vinefem/video/7449575367024626974", + "#comment" : "/video/ link: single photo", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#pattern" : PATTERN + "#pattern" : PATTERN, + "#options" : {"videos": False} }, { "#url" : "https://vm.tiktok.com/ZGdhVtER2/", + "#comment" : "vm.tiktok.com link: single photo", "#category" : ("", "tiktok", "vmpost"), "#class" : tiktok.TiktokVmpostExtractor, - "#pattern" : PATTERN + "#pattern" : PATTERN, + "#options" : {"videos": False} }, -# Test a few photos. { "#url" : "https://www.tiktok.com/@.mcfc.central/photo/7449701420934122785", + "#comment" : "/photo/ link: few photos", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#pattern" : PATTERN + "#pattern" : PATTERN, + "#options" : {"videos": False} }, { "#url" : "https://www.tiktok.com/@.mcfc.central/video/7449701420934122785", + "#comment" : "/video/ link: few photos", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#pattern" : PATTERN + "#pattern" : PATTERN, + "#options" : {"videos": False} }, { "#url" : "https://vm.tiktok.com/ZGdhVW3cu/", + "#comment" : "vm.tiktok.com link: few photos", "#category" : ("", "tiktok", "vmpost"), "#class" : tiktok.TiktokVmpostExtractor, - "#pattern" : PATTERN -} + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktok.com/@ughuwhguweghw/video/1", + "#comment" : "deleted post", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#exception" : exception.NotFoundError, + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktok.com/@memezar/video/7449708266168274208", + "#comment" : "Video post", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208", + "#options" : {"videos": True} +}, +{ + "#url" : "https://www.tiktok.com/@memezar/photo/7449708266168274208", + "#comment" : "Video post as a /photo/ link", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208", + "#options" : {"videos": True} +}, +{ + "#url" : "https://vm.tiktok.com/ZGdht7cjp/", + "#comment" : "Video post as a VM link", + "#category" : ("", "tiktok", "vmpost"), + "#class" : tiktok.TiktokVmpostExtractor, + "#urls" : "ytdl:https://vm.tiktok.com/ZGdht7cjp/", + "#options" : {"videos": True} +}, +{ + "#url" : "https://www.tiktok.com/@memezar/video/7449708266168274208", + "#comment" : "Skipping video post", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#urls" : [], + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktok.com/@chillezy/photo/7240568259186019630", + "#comment" : "/photo/ link: many photos with audio", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#pattern" : PATTERN_WITH_AUDIO, + "#options" : {"videos": True} +}, +{ + "#url" : "https://www.tiktok.com/@chillezy/video/7240568259186019630", + "#comment" : "/video/ link: many photos with audio", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#pattern" : PATTERN_WITH_AUDIO, + "#options" : {"videos": True} +}, +{ + "#url" : "https://vm.tiktok.com/ZGdh4WUhr/", + "#comment" : "vm.tiktok.com link: many photos with audio", + "#category" : ("", "tiktok", "vmpost"), + "#class" : tiktok.TiktokVmpostExtractor, + "#pattern" : PATTERN_WITH_AUDIO, + "#options" : {"videos": True} +}, ) diff --git a/test/test_text.py b/test/test_text.py index 5b97db7f91..1b19c4742a 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -92,30 +92,6 @@ def test_slugify(self, f=text.slugify): self.assertEqual(f(1), "1") self.assertEqual(f(2.3), "23") - def test_sanitize_for_filename(self, f=text.sanitize_for_filename): - self.assertEqual(f("Hello World"), "Hello World") - self.assertEqual(f("-HeLLo---World-"), "-HeLLo---World-") - self.assertEqual( - f("_-H#e:l#l:o+\t+W?o!rl=d-_"), - "_-H#e l#l o+ +W o!rl=d-_" - ) - self.assertEqual(f("_Hello_World_"), "_Hello_World_") - self.assertEqual( - f("/\\?%*:|\"<>\x7F\x00\x0B\x1F"), - " " - ) - - self.assertEqual(f(""), "") - self.assertEqual(f("-"), "-") - self.assertEqual(f("--"), "--") - - self.assertEqual(f(()), "()") - self.assertEqual(f([]), "[]") - self.assertEqual(f({}), "{}") - self.assertEqual(f(None), "None") - self.assertEqual(f(1), "1") - self.assertEqual(f(2.3), "2.3") - def test_ensure_http_scheme(self, f=text.ensure_http_scheme): result = "https://example.org/filename.ext" From 6e91e5969d605a0037205658a24af8ee45920587 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Mon, 23 Dec 2024 15:56:16 +0000 Subject: [PATCH 09/23] Forgot to update supportedsites.md --- docs/supportedsites.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 002ef5c1c1..cdc5ebf731 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -929,7 +929,7 @@ Consider all listed sites to potentially be NSFW. - + From 7bac7cea11ca1f994770b9979601526738a16817 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Mon, 23 Dec 2024 17:00:08 +0000 Subject: [PATCH 10/23] Support user profiles --- docs/supportedsites.md | 2 +- gallery_dl/extractor/tiktok.py | 33 +++++++++++++++++++++++++++++++-- test/results/tiktok.py | 16 ++++++++++++++++ 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index cdc5ebf731..c7d78f7532 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -929,7 +929,7 @@ Consider all listed sites to potentially be NSFW. - + diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index 787ee65f3e..765b942b26 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -7,7 +7,7 @@ """Extractors for https://www.tiktok.com/""" from .common import Extractor, Message -from .. import exception, text, util +from .. import exception, text, util, ytdl from re import compile, escape, IGNORECASE BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktok\.com" @@ -138,4 +138,33 @@ class TiktokVmpostExtractor(TiktokExtractor): example = "https://vm.tiktok.com/ZGdh4WUhr/" -# TODO: Write profile extractor. +class TiktokUserExtractor(TiktokExtractor): + """Extract a TikTok user's profile""" + + subcategory = "user" + pattern = USER_PATTERN + example = "https://www.tiktok.com/@chillezy" + + def urls(self): + """Attempt to use yt-dlp/youtube-dl to extract links from a + user's page""" + + try: + module = ytdl.import_module(self.config("module")) + except (ImportError, SyntaxError) as exc: + self.log.error("Cannot import module '%s'", + getattr(exc, "name", "")) + self.log.debug("", exc_info=exc) + raise exception.ExtractionError("yt-dlp or youtube-dl is required " + "for this feature!") + with ytdl.construct_YoutubeDL( + module=module, + obj=self, + user_opts={ + "cookiefile": self.cookies_file, + "playlist_items": str(self.config("tiktok-range", "")) + } + ) as ydl: + info = ydl.extract_info(self.url, download=False) + # This should include video and photo posts in /video/ URL form. + return [video["webpage_url"] for video in info["entries"]] diff --git a/test/results/tiktok.py b/test/results/tiktok.py index 4bdbd9bdab..4ea48772e5 100644 --- a/test/results/tiktok.py +++ b/test/results/tiktok.py @@ -148,4 +148,20 @@ "#pattern" : PATTERN_WITH_AUDIO, "#options" : {"videos": True} }, +{ + "#url" : "https://www.tiktok.com/@chillezy", + "#comment" : "User profile", + "#category" : ("", "tiktok", "user"), + "#class" : tiktok.TiktokUserExtractor, + "#pattern" : PATTERN_WITH_AUDIO, + "#options" : {"videos": True, "tiktok-range": "1-10"} +}, +{ + "#url" : "https://www.tiktok.com/@chillezy", + "#comment" : "User profile without audio or videos", + "#category" : ("", "tiktok", "user"), + "#class" : tiktok.TiktokUserExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False, "tiktok-range": "1-10"} +}, ) From b8690abb6dd84b3a7b6c879f1fc8828ae50568e7 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Mon, 23 Dec 2024 17:01:34 +0000 Subject: [PATCH 11/23] Fix indentation --- gallery_dl/extractor/tiktok.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index 765b942b26..8c066e02ee 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -153,7 +153,7 @@ def urls(self): module = ytdl.import_module(self.config("module")) except (ImportError, SyntaxError) as exc: self.log.error("Cannot import module '%s'", - getattr(exc, "name", "")) + getattr(exc, "name", "")) self.log.debug("", exc_info=exc) raise exception.ExtractionError("yt-dlp or youtube-dl is required " "for this feature!") From 9acb3234e8a9fc3d20bc8e2b7687be22f8a4f4e7 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Mon, 23 Dec 2024 17:03:39 +0000 Subject: [PATCH 12/23] Prevent matching with more than one TikTok extractor --- gallery_dl/extractor/tiktok.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index 8c066e02ee..c4cc449af4 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -142,7 +142,7 @@ class TiktokUserExtractor(TiktokExtractor): """Extract a TikTok user's profile""" subcategory = "user" - pattern = USER_PATTERN + pattern = USER_PATTERN + r"$" example = "https://www.tiktok.com/@chillezy" def urls(self): From 42d1b48c37275d1270ed98a6e71c803f9133cc8b Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Mon, 23 Dec 2024 17:12:45 +0000 Subject: [PATCH 13/23] Fix TikTok regex --- gallery_dl/extractor/tiktok.py | 4 ++-- test/results/tiktok.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index c4cc449af4..fa01cc90d6 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -11,7 +11,7 @@ from re import compile, escape, IGNORECASE BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktok\.com" -USER_PATTERN = BASE_PATTERN + r"/+@([\w.]{0,23}\w)(?:/\S*)?" +USER_PATTERN = BASE_PATTERN + r"/+@([\w.]{0,23}\w)?" POST_PATTERN = USER_PATTERN + \ r"/+(?:[pP][hH][oO][tT][oO]|[vV][iI][dD][eE][oO])/+(?:[0-9]+)/*" VM_POST_PATTERN = r"(?:https?://)?(?:vm\.)?tiktok\.com/+.*/*" @@ -142,7 +142,7 @@ class TiktokUserExtractor(TiktokExtractor): """Extract a TikTok user's profile""" subcategory = "user" - pattern = USER_PATTERN + r"$" + pattern = USER_PATTERN + r"/*$" example = "https://www.tiktok.com/@chillezy" def urls(self): diff --git a/test/results/tiktok.py b/test/results/tiktok.py index 4ea48772e5..68a2291b16 100644 --- a/test/results/tiktok.py +++ b/test/results/tiktok.py @@ -157,7 +157,7 @@ "#options" : {"videos": True, "tiktok-range": "1-10"} }, { - "#url" : "https://www.tiktok.com/@chillezy", + "#url" : "https://www.tiktok.com/@chillezy/", "#comment" : "User profile without audio or videos", "#category" : ("", "tiktok", "user"), "#class" : tiktok.TiktokUserExtractor, From 9b4b0102df1e499983fb2d4716c79f1df6ac15eb Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Mon, 23 Dec 2024 17:25:02 +0000 Subject: [PATCH 14/23] Support TikTok profile avatars --- docs/supportedsites.md | 2 +- gallery_dl/extractor/tiktok.py | 26 +++++++++++++++++++------- test/results/tiktok.py | 8 ++++---- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c7d78f7532..ac8318cecd 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -929,7 +929,7 @@ Consider all listed sites to potentially be NSFW. - + diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index fa01cc90d6..b7d4e81102 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -32,6 +32,9 @@ def urls(self): def items(self): videos = self.config("videos", True) + # We assume that all of the URLs served by urls() come from the same + # author. + downloaded_avatar = False for tiktok_url in self.urls(): # If we can recognise that this is a /photo/ link, preemptively # replace it with /video/ to prevent a needless second request. @@ -84,8 +87,23 @@ def items(self): title = "TikTok photo #{}".format(id) title = title[:150] user = post_info["author"]["uniqueId"] + # It's probably obvious but I thought it was worth noting + # because I got stuck on this for a while: make sure to emit + # a Directory message before attempting to download anything + # with yt-dlp! Otherwise you'll run into NoneType, set_filename + # errors since the download job doesn't get initialized. + yield Message.Directory, {"user": user} + if not downloaded_avatar: + avatar = post_info["author"]["avatarLarger"] + yield Message.Url, avatar, { + "title" : "@" + user, + "id" : post_info["author"]["id"], + "index" : "", + "img_id" : "", + "extension" : text.ext_from_url(avatar) + } + downloaded_avatar = True if "imagePost" in post_info: - yield Message.Directory, {"user": user} img_list = post_info["imagePost"]["images"] for i, img in enumerate(img_list): url = img["imageURL"]["urlList"][0] @@ -100,12 +118,6 @@ def items(self): "height" : img["imageHeight"] } elif videos: - # It's probably obvious but I thought it was worth noting - # because I got stuck on this for a while: make sure to emit - # a Directory message before attempting to download anything - # with yt-dlp! Otherwise you'll run into NoneType, set_filename - # errors since the download job doesn't get initialized. - yield Message.Directory, {"user": user} if len(original_title) == 0: title = "TikTok video #{}".format(id) title = title[:150] diff --git a/test/results/tiktok.py b/test/results/tiktok.py index 68a2291b16..e1989e241e 100644 --- a/test/results/tiktok.py +++ b/test/results/tiktok.py @@ -97,7 +97,7 @@ "#comment" : "Video post", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208", + "#pattern" : PATTERN_WITH_AUDIO, "#options" : {"videos": True} }, { @@ -105,7 +105,7 @@ "#comment" : "Video post as a /photo/ link", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208", + "#pattern" : PATTERN_WITH_AUDIO, "#options" : {"videos": True} }, { @@ -113,7 +113,7 @@ "#comment" : "Video post as a VM link", "#category" : ("", "tiktok", "vmpost"), "#class" : tiktok.TiktokVmpostExtractor, - "#urls" : "ytdl:https://vm.tiktok.com/ZGdht7cjp/", + "#pattern" : PATTERN_WITH_AUDIO, "#options" : {"videos": True} }, { @@ -121,7 +121,7 @@ "#comment" : "Skipping video post", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#urls" : [], + "#pattern" : PATTERN, "#options" : {"videos": False} }, { From 1dc358dc7631320ea518bb5f5d8c5f5d94a671c6 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Mon, 23 Dec 2024 17:26:42 +0000 Subject: [PATCH 15/23] Fix supportedsites.md --- docs/supportedsites.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index ac8318cecd..865da73fbb 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -929,7 +929,7 @@ Consider all listed sites to potentially be NSFW. - + From bde2d8310c2f8465c04f4c3e6f20891e8a1c12cc Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Mon, 23 Dec 2024 19:32:14 +0000 Subject: [PATCH 16/23] TikTok: Ignore no formats error In my limited experience, this doesn't mean that gallery-dl can't download the photo post (but this could mean that you can't download the audio) --- gallery_dl/extractor/tiktok.py | 1 + 1 file changed, 1 insertion(+) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index b7d4e81102..6304afd79c 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -173,6 +173,7 @@ def urls(self): module=module, obj=self, user_opts={ + "ignore_no_formats_error": True, "cookiefile": self.cookies_file, "playlist_items": str(self.config("tiktok-range", "")) } From 4abfad09dfb7e58c8fdda057d4ed09fae343cd4a Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Mon, 23 Dec 2024 21:15:58 +0000 Subject: [PATCH 17/23] Fix error reporting message --- gallery_dl/extractor/tiktok.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index 6304afd79c..29259f2936 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -76,9 +76,10 @@ def items(self): elif video_detail["statusCode"] != 0: raise exception.ExtractionError( tiktok_url + ": Received unknown error code " + - str(video_detail['statusCode']) + " with message " + - (video_detail['statusMsg'] if - "statusMsg" in video_detail else "") + str(video_detail['statusCode']) + ( + " with message " + video_detail['statusMsg'] if + "statusMsg" in video_detail else "" + ) ) post_info = video_detail["itemInfo"]["itemStruct"] id = post_info["id"] From cec8f3b75f9695695dcb76f412789f9ac0fcab09 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Tue, 24 Dec 2024 10:36:17 +0000 Subject: [PATCH 18/23] TikTok: Support more URL formats vt.tiktok.com www.tiktok.com/t/ --- gallery_dl/extractor/tiktok.py | 3 ++- test/results/tiktok.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index 29259f2936..8ad5352bae 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -14,7 +14,8 @@ USER_PATTERN = BASE_PATTERN + r"/+@([\w.]{0,23}\w)?" POST_PATTERN = USER_PATTERN + \ r"/+(?:[pP][hH][oO][tT][oO]|[vV][iI][dD][eE][oO])/+(?:[0-9]+)/*" -VM_POST_PATTERN = r"(?:https?://)?(?:vm\.)?tiktok\.com/+.*/*" +VM_POST_PATTERN = r"(?:(?:https?://)?(?:(?:vm|vt)\.)?tiktok\.com/+.*/*)|" + \ + r"(?:(?:https?://)?(?:www\.)?tiktok\.com/+t/+.*/*)" class TiktokExtractor(Extractor): diff --git a/test/results/tiktok.py b/test/results/tiktok.py index e1989e241e..06f67e335f 100644 --- a/test/results/tiktok.py +++ b/test/results/tiktok.py @@ -164,4 +164,20 @@ "#pattern" : PATTERN, "#options" : {"videos": False, "tiktok-range": "1-10"} }, +{ + "#url" : "https://vt.tiktok.com/ZGdhVtER2", + "#comment" : "vt.tiktok.com link: single photo", + "#category" : ("", "tiktok", "vmpost"), + "#class" : tiktok.TiktokVmpostExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktok.com/t/ZGdhVtER2//", + "#comment" : "www.tiktok.com/t/ link: single photo", + "#category" : ("", "tiktok", "vmpost"), + "#class" : tiktok.TiktokVmpostExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, ) From 8c59d74323b10c0f5a071bfb6e7ec837d7301f46 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Tue, 24 Dec 2024 13:42:16 +0000 Subject: [PATCH 19/23] TikTok: Only download avatar when extracting user profile --- gallery_dl/extractor/tiktok.py | 13 ++++++++++--- test/results/tiktok.py | 8 ++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index 8ad5352bae..8388412914 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -31,11 +31,14 @@ class TiktokExtractor(Extractor): def urls(self): return [self.url] + def avatar(self): + return False + def items(self): videos = self.config("videos", True) # We assume that all of the URLs served by urls() come from the same # author. - downloaded_avatar = False + downloaded_avatar = not self.avatar() for tiktok_url in self.urls(): # If we can recognise that this is a /photo/ link, preemptively # replace it with /video/ to prevent a needless second request. @@ -97,12 +100,13 @@ def items(self): yield Message.Directory, {"user": user} if not downloaded_avatar: avatar = post_info["author"]["avatarLarger"] + name_and_ext = text.nameext_from_url(avatar) yield Message.Url, avatar, { "title" : "@" + user, "id" : post_info["author"]["id"], "index" : "", - "img_id" : "", - "extension" : text.ext_from_url(avatar) + "img_id" : name_and_ext["filename"].split("~")[0], + "extension" : name_and_ext["extension"] } downloaded_avatar = True if "imagePost" in post_info: @@ -183,3 +187,6 @@ def urls(self): info = ydl.extract_info(self.url, download=False) # This should include video and photo posts in /video/ URL form. return [video["webpage_url"] for video in info["entries"]] + + def avatar(self): + return True diff --git a/test/results/tiktok.py b/test/results/tiktok.py index 06f67e335f..5c23384a1b 100644 --- a/test/results/tiktok.py +++ b/test/results/tiktok.py @@ -97,7 +97,7 @@ "#comment" : "Video post", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#pattern" : PATTERN_WITH_AUDIO, + "#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208", "#options" : {"videos": True} }, { @@ -105,7 +105,7 @@ "#comment" : "Video post as a /photo/ link", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#pattern" : PATTERN_WITH_AUDIO, + "#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208", "#options" : {"videos": True} }, { @@ -113,7 +113,7 @@ "#comment" : "Video post as a VM link", "#category" : ("", "tiktok", "vmpost"), "#class" : tiktok.TiktokVmpostExtractor, - "#pattern" : PATTERN_WITH_AUDIO, + "#urls" : "ytdl:https://vm.tiktok.com/ZGdht7cjp/", "#options" : {"videos": True} }, { @@ -121,7 +121,7 @@ "#comment" : "Skipping video post", "#category" : ("", "tiktok", "post"), "#class" : tiktok.TiktokPostExtractor, - "#pattern" : PATTERN, + "#urls" : [], "#options" : {"videos": False} }, { From 7560bde71b19bb198b3c2908928fc19f4f616e98 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Tue, 24 Dec 2024 13:45:56 +0000 Subject: [PATCH 20/23] TikTok: Document profile avatar limitation --- docs/supportedsites.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 865da73fbb..e66f20234e 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -929,7 +929,7 @@ Consider all listed sites to potentially be NSFW. - + From 1b9852c8fdf1ca6405d531f995fddc7fe7403bc2 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Thu, 9 Jan 2025 17:15:58 +0000 Subject: [PATCH 21/23] TikTok: Add support for www.tiktokv.com/share links --- gallery_dl/extractor/tiktok.py | 11 +++++++++- test/results/tiktok.py | 40 ++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index 8388412914..eb2024496d 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -10,12 +10,13 @@ from .. import exception, text, util, ytdl from re import compile, escape, IGNORECASE -BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktok\.com" +BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktok(?:v?)\.com" USER_PATTERN = BASE_PATTERN + r"/+@([\w.]{0,23}\w)?" POST_PATTERN = USER_PATTERN + \ r"/+(?:[pP][hH][oO][tT][oO]|[vV][iI][dD][eE][oO])/+(?:[0-9]+)/*" VM_POST_PATTERN = r"(?:(?:https?://)?(?:(?:vm|vt)\.)?tiktok\.com/+.*/*)|" + \ r"(?:(?:https?://)?(?:www\.)?tiktok\.com/+t/+.*/*)" +SHARE_PATTERN = BASE_PATTERN + r"/+share/+video/+(?:[0-9]+)/*" class TiktokExtractor(Extractor): @@ -156,6 +157,14 @@ class TiktokVmpostExtractor(TiktokExtractor): example = "https://vm.tiktok.com/ZGdh4WUhr/" +class TiktokShareExtractor(TiktokExtractor): + """Extract a single video or photo TikTok share link""" + + subcategory = "sharepost" + pattern = SHARE_PATTERN + example = "https://www.tiktokv.com/share/video/7240568259186019630" + + class TiktokUserExtractor(TiktokExtractor): """Extract a TikTok user's profile""" diff --git a/test/results/tiktok.py b/test/results/tiktok.py index 5c23384a1b..3af8655715 100644 --- a/test/results/tiktok.py +++ b/test/results/tiktok.py @@ -36,6 +36,14 @@ "#pattern" : PATTERN, "#options" : {"videos": False} }, +{ + "#url" : "https://www.tiktokv.com/share/video/7240568259186019630", + "#comment" : "www.tiktokv.com link: many photos", + "#category" : ("", "tiktok", "sharepost"), + "#class" : tiktok.TiktokShareExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, { "#url" : "https://www.tiktok.com/@d4vinefem/photo/7449575367024626974", "#comment" : "/photo/ link: single photo", @@ -60,6 +68,14 @@ "#pattern" : PATTERN, "#options" : {"videos": False} }, +{ + "#url" : "https://www.tiktokv.com/share/video/7449575367024626974", + "#comment" : "www.tiktokv.com link: single photo", + "#category" : ("", "tiktok", "sharepost"), + "#class" : tiktok.TiktokShareExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, { "#url" : "https://www.tiktok.com/@.mcfc.central/photo/7449701420934122785", "#comment" : "/photo/ link: few photos", @@ -84,6 +100,14 @@ "#pattern" : PATTERN, "#options" : {"videos": False} }, +{ + "#url" : "https://www.tiktokv.com/share/video/7449701420934122785", + "#comment" : "www.tiktokv.com link: few photos", + "#category" : ("", "tiktok", "sharepost"), + "#class" : tiktok.TiktokShareExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, { "#url" : "https://www.tiktok.com/@ughuwhguweghw/video/1", "#comment" : "deleted post", @@ -116,6 +140,14 @@ "#urls" : "ytdl:https://vm.tiktok.com/ZGdht7cjp/", "#options" : {"videos": True} }, +{ + "#url" : "https://www.tiktokv.com/share/video/7449708266168274208", + "#comment" : "Video post as a share link", + "#category" : ("", "tiktok", "sharepost"), + "#class" : tiktok.TiktokShareExtractor, + "#urls" : "ytdl:https://www.tiktokv.com/share/video/7449708266168274208", + "#options" : {"videos": True} +}, { "#url" : "https://www.tiktok.com/@memezar/video/7449708266168274208", "#comment" : "Skipping video post", @@ -148,6 +180,14 @@ "#pattern" : PATTERN_WITH_AUDIO, "#options" : {"videos": True} }, +{ + "#url" : "https://www.tiktokv.com/share/video/7240568259186019630", + "#comment" : "www.tiktokv.com link: many photos with audio", + "#category" : ("", "tiktok", "sharepost"), + "#class" : tiktok.TiktokShareExtractor, + "#pattern" : PATTERN_WITH_AUDIO, + "#options" : {"videos": True} +}, { "#url" : "https://www.tiktok.com/@chillezy", "#comment" : "User profile", From c9f409ad2ab910ff8c7d15b0bfa3dcb2dab19e15 Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Thu, 9 Jan 2025 17:19:09 +0000 Subject: [PATCH 22/23] Address Share -> Sharepost issue --- gallery_dl/extractor/tiktok.py | 2 +- test/results/tiktok.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index eb2024496d..986a60b618 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -157,7 +157,7 @@ class TiktokVmpostExtractor(TiktokExtractor): example = "https://vm.tiktok.com/ZGdh4WUhr/" -class TiktokShareExtractor(TiktokExtractor): +class TiktokSharepostExtractor(TiktokExtractor): """Extract a single video or photo TikTok share link""" subcategory = "sharepost" diff --git a/test/results/tiktok.py b/test/results/tiktok.py index 3af8655715..62779b9945 100644 --- a/test/results/tiktok.py +++ b/test/results/tiktok.py @@ -40,7 +40,7 @@ "#url" : "https://www.tiktokv.com/share/video/7240568259186019630", "#comment" : "www.tiktokv.com link: many photos", "#category" : ("", "tiktok", "sharepost"), - "#class" : tiktok.TiktokShareExtractor, + "#class" : tiktok.TiktokSharepostExtractor, "#pattern" : PATTERN, "#options" : {"videos": False} }, @@ -72,7 +72,7 @@ "#url" : "https://www.tiktokv.com/share/video/7449575367024626974", "#comment" : "www.tiktokv.com link: single photo", "#category" : ("", "tiktok", "sharepost"), - "#class" : tiktok.TiktokShareExtractor, + "#class" : tiktok.TiktokSharepostExtractor, "#pattern" : PATTERN, "#options" : {"videos": False} }, @@ -104,7 +104,7 @@ "#url" : "https://www.tiktokv.com/share/video/7449701420934122785", "#comment" : "www.tiktokv.com link: few photos", "#category" : ("", "tiktok", "sharepost"), - "#class" : tiktok.TiktokShareExtractor, + "#class" : tiktok.TiktokSharepostExtractor, "#pattern" : PATTERN, "#options" : {"videos": False} }, @@ -144,7 +144,7 @@ "#url" : "https://www.tiktokv.com/share/video/7449708266168274208", "#comment" : "Video post as a share link", "#category" : ("", "tiktok", "sharepost"), - "#class" : tiktok.TiktokShareExtractor, + "#class" : tiktok.TiktokSharepostExtractor, "#urls" : "ytdl:https://www.tiktokv.com/share/video/7449708266168274208", "#options" : {"videos": True} }, @@ -184,7 +184,7 @@ "#url" : "https://www.tiktokv.com/share/video/7240568259186019630", "#comment" : "www.tiktokv.com link: many photos with audio", "#category" : ("", "tiktok", "sharepost"), - "#class" : tiktok.TiktokShareExtractor, + "#class" : tiktok.TiktokSharepostExtractor, "#pattern" : PATTERN_WITH_AUDIO, "#options" : {"videos": True} }, From d835e9cb332ef92625330bdb798f844679df1b1d Mon Sep 17 00:00:00 2001 From: CasualYT31 <21147925+CasualYT31@users.noreply.github.com> Date: Tue, 21 Jan 2025 18:26:18 +0000 Subject: [PATCH 23/23] TikTok: Export post's creation date in JSON (ISO 8601) --- gallery_dl/extractor/tiktok.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index 986a60b618..ff9481b5b9 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -9,6 +9,7 @@ from .common import Extractor, Message from .. import exception, text, util, ytdl from re import compile, escape, IGNORECASE +from datetime import datetime BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktok(?:v?)\.com" USER_PATTERN = BASE_PATTERN + r"/+@([\w.]{0,23}\w)?" @@ -92,6 +93,9 @@ def items(self): if len(original_title) == 0: title = "TikTok photo #{}".format(id) title = title[:150] + date = datetime.fromtimestamp( + int(post_info["createTime"]) + ).isoformat() user = post_info["author"]["uniqueId"] # It's probably obvious but I thought it was worth noting # because I got stuck on this for a while: make sure to emit @@ -117,6 +121,7 @@ def items(self): name_and_ext = text.nameext_from_url(url) yield Message.Url, url, { "title" : title, + "date" : date, "id" : id, "index" : i + 1, "img_id" : name_and_ext["filename"].split("~")[0], @@ -135,6 +140,7 @@ def items(self): "filename" : "", "extension" : "", "title" : title, + "date" : date, "id" : id, "index" : "", "img_id" : ""
Galleries
TikTokhttps://www.tiktok.com/PhotosCookies
TMOHentai https://tmohentai.com/
TikTok https://www.tiktok.com/PhotosPhotos, Videos, Audio Cookies
TikTok https://www.tiktok.com/Photos, Videos, AudioPhotos, Videos, Audio, User Profiles
**Videos, Audio, and User Profiles require yt-dlp or youtube-dl.** Pass `-o videos` to download photos only, and `-o tiktok-range="a-yt-dlp-range"` to tell yt-dlp to only extract XYZ links when scraping a user profile.
Cookies
TikTok https://www.tiktok.com/Photos, Videos, Audio, User Profiles
**Videos, Audio, and User Profiles require yt-dlp or youtube-dl.** Pass `-o videos` to download photos only, and `-o tiktok-range="a-yt-dlp-range"` to tell yt-dlp to only extract XYZ links when scraping a user profile.
Photos, Videos, Audio, User Profiles, Profile Avatars
**Videos, Audio, and User Profiles require yt-dlp or youtube-dl.** Pass `-o videos` to download photos only, and `-o tiktok-range="a-yt-dlp-range"` to tell yt-dlp to only extract XYZ links when scraping a user profile.
Cookies
TikTok https://www.tiktok.com/Photos, Videos, Audio, User Profiles, Profile Avatars
**Videos, Audio, and User Profiles require yt-dlp or youtube-dl.** Pass `-o videos` to download photos only, and `-o tiktok-range="a-yt-dlp-range"` to tell yt-dlp to only extract XYZ links when scraping a user profile.
Photos, Videos, Audio, User Profiles, Profile Avatars

Videos, Audio, and User Profiles require yt-dlp or youtube-dl. Pass -o videos to download photos only, and -o tiktok-range="a-yt-dlp-range" to tell yt-dlp to only extract XYZ links when scraping a user profile.
Cookies
TikTok https://www.tiktok.com/Photos, Videos, Audio, User Profiles, Profile Avatars

Videos, Audio, and User Profiles require yt-dlp or youtube-dl. Pass -o videos to download photos only, and -o tiktok-range="a-yt-dlp-range" to tell yt-dlp to only extract XYZ links when scraping a user profile.
Photos, Videos, Audio, User Profiles, Profile Avatars

Videos, Audio, and User Profiles require yt-dlp or youtube-dl. Pass -o videos to download photos only, and -o tiktok-range="a-yt-dlp-range" to tell yt-dlp to only extract XYZ links when scraping a user profile. Also note that profile avatars will only be downloaded when downloading a User Profile and if the user has at least one post.
Cookies