diff --git a/docs/supportedsites.md b/docs/supportedsites.md index db73b37a1e..e66f20234e 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1,6 +1,7 @@ # Supported Sites + Consider all listed sites to potentially be NSFW. @@ -925,6 +926,12 @@ Consider all listed sites to potentially be NSFW. + + + + + + diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index fc8d7b20cd..0abcce61fd 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -168,6 +168,7 @@ "tapas", "tcbscans", "telegraph", + "tiktok", "tmohentai", "toyhouse", "tsumino", diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py new file mode 100644 index 0000000000..ff9481b5b9 --- /dev/null +++ b/gallery_dl/extractor/tiktok.py @@ -0,0 +1,207 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.tiktok.com/""" + +from .common import Extractor, Message +from .. import exception, text, util, ytdl +from re import compile, escape, IGNORECASE +from datetime import datetime + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktok(?:v?)\.com" +USER_PATTERN = BASE_PATTERN + r"/+@([\w.]{0,23}\w)?" +POST_PATTERN = USER_PATTERN + \ + r"/+(?:[pP][hH][oO][tT][oO]|[vV][iI][dD][eE][oO])/+(?:[0-9]+)/*" +VM_POST_PATTERN = r"(?:(?:https?://)?(?:(?:vm|vt)\.)?tiktok\.com/+.*/*)|" + \ + r"(?:(?:https?://)?(?:www\.)?tiktok\.com/+t/+.*/*)" +SHARE_PATTERN = BASE_PATTERN + r"/+share/+video/+(?:[0-9]+)/*" + + +class TiktokExtractor(Extractor): + """Base class for TikTok extractors""" + + category = "tiktok" + directory_fmt = ("{category}", "{user}") + filename_fmt = "{title} [{id}{index:?_//}{img_id:?_//}].{extension}" + archive_fmt = "{id}_{index}_{img_id}" + root = "https://www.tiktok.com/" + cookies_domain = ".tiktok.com" + + def urls(self): + return [self.url] + + def avatar(self): + return False + + def items(self): + videos = self.config("videos", True) + # We assume that all of the URLs served by urls() come from the same + # author. + downloaded_avatar = not self.avatar() + for tiktok_url in self.urls(): + # If we can recognise that this is a /photo/ link, preemptively + # replace it with /video/ to prevent a needless second request. + # See below. + tiktok_url_to_use = compile( + escape("/photo/"), + IGNORECASE + ).sub("/video/", tiktok_url) + video_detail = util.json_loads(text.extr( + self.request(tiktok_url_to_use).text, + '' + ))["__DEFAULT_SCOPE__"] + if "webapp.video-detail" not in video_detail: + # Only /video/ links result in the video-detail dict we need. + # Try again using that form of link. + tiktok_url_to_use = video_detail["seo.abtest"]["canonical"] \ + .replace("/photo/", "/video/") + video_detail = util.json_loads(text.extr( + self.request(tiktok_url_to_use).text, + '' + ))["__DEFAULT_SCOPE__"] + video_detail = video_detail["webapp.video-detail"] + if "statusCode" in video_detail: + if video_detail["statusCode"] == 10222: + raise exception.AuthorizationError( + tiktok_url + ": Login required to access this post" + ) + elif video_detail["statusCode"] == 10204: + raise exception.NotFoundError(tiktok_url) + elif video_detail["statusCode"] == 10231: + raise exception.ExtractionError( + tiktok_url + " is region locked, try downloading with " + "a VPN/proxy connection" + ) + elif video_detail["statusCode"] != 0: + raise exception.ExtractionError( + tiktok_url + ": Received unknown error code " + + str(video_detail['statusCode']) + ( + " with message " + video_detail['statusMsg'] if + "statusMsg" in video_detail else "" + ) + ) + post_info = video_detail["itemInfo"]["itemStruct"] + id = post_info["id"] + original_title = title = post_info["desc"] + if len(original_title) == 0: + title = "TikTok photo #{}".format(id) + title = title[:150] + date = datetime.fromtimestamp( + int(post_info["createTime"]) + ).isoformat() + user = post_info["author"]["uniqueId"] + # It's probably obvious but I thought it was worth noting + # because I got stuck on this for a while: make sure to emit + # a Directory message before attempting to download anything + # with yt-dlp! Otherwise you'll run into NoneType, set_filename + # errors since the download job doesn't get initialized. + yield Message.Directory, {"user": user} + if not downloaded_avatar: + avatar = post_info["author"]["avatarLarger"] + name_and_ext = text.nameext_from_url(avatar) + yield Message.Url, avatar, { + "title" : "@" + user, + "id" : post_info["author"]["id"], + "index" : "", + "img_id" : name_and_ext["filename"].split("~")[0], + "extension" : name_and_ext["extension"] + } + downloaded_avatar = True + if "imagePost" in post_info: + img_list = post_info["imagePost"]["images"] + for i, img in enumerate(img_list): + url = img["imageURL"]["urlList"][0] + name_and_ext = text.nameext_from_url(url) + yield Message.Url, url, { + "title" : title, + "date" : date, + "id" : id, + "index" : i + 1, + "img_id" : name_and_ext["filename"].split("~")[0], + "extension" : name_and_ext["extension"], + "width" : img["imageWidth"], + "height" : img["imageHeight"] + } + elif videos: + if len(original_title) == 0: + title = "TikTok video #{}".format(id) + title = title[:150] + else: + self.log.info("Skipping video post %s", tiktok_url) + if videos: + yield Message.Url, "ytdl:" + tiktok_url_to_use, { + "filename" : "", + "extension" : "", + "title" : title, + "date" : date, + "id" : id, + "index" : "", + "img_id" : "" + } + + +class TiktokPostExtractor(TiktokExtractor): + """Extract a single video or photo TikTok link""" + + subcategory = "post" + pattern = POST_PATTERN + example = "https://www.tiktok.com/@chillezy/photo/7240568259186019630" + + +class TiktokVmpostExtractor(TiktokExtractor): + """Extract a single video or photo TikTok VM link""" + + subcategory = "vmpost" + pattern = VM_POST_PATTERN + example = "https://vm.tiktok.com/ZGdh4WUhr/" + + +class TiktokSharepostExtractor(TiktokExtractor): + """Extract a single video or photo TikTok share link""" + + subcategory = "sharepost" + pattern = SHARE_PATTERN + example = "https://www.tiktokv.com/share/video/7240568259186019630" + + +class TiktokUserExtractor(TiktokExtractor): + """Extract a TikTok user's profile""" + + subcategory = "user" + pattern = USER_PATTERN + r"/*$" + example = "https://www.tiktok.com/@chillezy" + + def urls(self): + """Attempt to use yt-dlp/youtube-dl to extract links from a + user's page""" + + try: + module = ytdl.import_module(self.config("module")) + except (ImportError, SyntaxError) as exc: + self.log.error("Cannot import module '%s'", + getattr(exc, "name", "")) + self.log.debug("", exc_info=exc) + raise exception.ExtractionError("yt-dlp or youtube-dl is required " + "for this feature!") + with ytdl.construct_YoutubeDL( + module=module, + obj=self, + user_opts={ + "ignore_no_formats_error": True, + "cookiefile": self.cookies_file, + "playlist_items": str(self.config("tiktok-range", "")) + } + ) as ydl: + info = ydl.extract_info(self.url, download=False) + # This should include video and photo posts in /video/ URL form. + return [video["webpage_url"] for video in info["entries"]] + + def avatar(self): + return True diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 5a6303e700..8765164dbe 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -141,10 +141,11 @@ "tbib" : "The Big ImageBoard", "tcbscans" : "TCB Scans", "tco" : "Twitter t.co", - "tmohentai" : "TMOHentai", "thatpervert" : "ThatPervert", "thebarchive" : "The /b/ Archive", "thecollection" : "The /co/llection", + "tiktok" : "TikTok", + "tmohentai" : "TMOHentai", "tumblrgallery" : "TumblrGallery", "vanillarock" : "もえぴりあ", "vidyart2" : "/v/idyart2", diff --git a/test/results/tiktok.py b/test/results/tiktok.py new file mode 100644 index 0000000000..62779b9945 --- /dev/null +++ b/test/results/tiktok.py @@ -0,0 +1,223 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import tiktok +from gallery_dl import exception + +PATTERN = r"https://p1[69]-.*\.tiktokcdn.*\.com/.*/[0-9a-fA-F]+~.*\.jpeg" +PATTERN_WITH_AUDIO = r"(?:" + PATTERN + r")|(?:ytdl\:)" + + +__tests__ = ( +{ + "#url" : "https://www.tiktok.com/@chillezy/photo/7240568259186019630", + "#comment" : "/photo/ link: many photos", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktok.com/@chillezy/video/7240568259186019630", + "#comment" : "/video/ link: many photos", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +{ + "#url" : "https://vm.tiktok.com/ZGdh4WUhr/", + "#comment" : "vm.tiktok.com link: many photos", + "#category" : ("", "tiktok", "vmpost"), + "#class" : tiktok.TiktokVmpostExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktokv.com/share/video/7240568259186019630", + "#comment" : "www.tiktokv.com link: many photos", + "#category" : ("", "tiktok", "sharepost"), + "#class" : tiktok.TiktokSharepostExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktok.com/@d4vinefem/photo/7449575367024626974", + "#comment" : "/photo/ link: single photo", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktok.com/@d4vinefem/video/7449575367024626974", + "#comment" : "/video/ link: single photo", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +{ + "#url" : "https://vm.tiktok.com/ZGdhVtER2/", + "#comment" : "vm.tiktok.com link: single photo", + "#category" : ("", "tiktok", "vmpost"), + "#class" : tiktok.TiktokVmpostExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktokv.com/share/video/7449575367024626974", + "#comment" : "www.tiktokv.com link: single photo", + "#category" : ("", "tiktok", "sharepost"), + "#class" : tiktok.TiktokSharepostExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktok.com/@.mcfc.central/photo/7449701420934122785", + "#comment" : "/photo/ link: few photos", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktok.com/@.mcfc.central/video/7449701420934122785", + "#comment" : "/video/ link: few photos", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +{ + "#url" : "https://vm.tiktok.com/ZGdhVW3cu/", + "#comment" : "vm.tiktok.com link: few photos", + "#category" : ("", "tiktok", "vmpost"), + "#class" : tiktok.TiktokVmpostExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktokv.com/share/video/7449701420934122785", + "#comment" : "www.tiktokv.com link: few photos", + "#category" : ("", "tiktok", "sharepost"), + "#class" : tiktok.TiktokSharepostExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktok.com/@ughuwhguweghw/video/1", + "#comment" : "deleted post", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#exception" : exception.NotFoundError, + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktok.com/@memezar/video/7449708266168274208", + "#comment" : "Video post", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208", + "#options" : {"videos": True} +}, +{ + "#url" : "https://www.tiktok.com/@memezar/photo/7449708266168274208", + "#comment" : "Video post as a /photo/ link", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208", + "#options" : {"videos": True} +}, +{ + "#url" : "https://vm.tiktok.com/ZGdht7cjp/", + "#comment" : "Video post as a VM link", + "#category" : ("", "tiktok", "vmpost"), + "#class" : tiktok.TiktokVmpostExtractor, + "#urls" : "ytdl:https://vm.tiktok.com/ZGdht7cjp/", + "#options" : {"videos": True} +}, +{ + "#url" : "https://www.tiktokv.com/share/video/7449708266168274208", + "#comment" : "Video post as a share link", + "#category" : ("", "tiktok", "sharepost"), + "#class" : tiktok.TiktokSharepostExtractor, + "#urls" : "ytdl:https://www.tiktokv.com/share/video/7449708266168274208", + "#options" : {"videos": True} +}, +{ + "#url" : "https://www.tiktok.com/@memezar/video/7449708266168274208", + "#comment" : "Skipping video post", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#urls" : [], + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktok.com/@chillezy/photo/7240568259186019630", + "#comment" : "/photo/ link: many photos with audio", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#pattern" : PATTERN_WITH_AUDIO, + "#options" : {"videos": True} +}, +{ + "#url" : "https://www.tiktok.com/@chillezy/video/7240568259186019630", + "#comment" : "/video/ link: many photos with audio", + "#category" : ("", "tiktok", "post"), + "#class" : tiktok.TiktokPostExtractor, + "#pattern" : PATTERN_WITH_AUDIO, + "#options" : {"videos": True} +}, +{ + "#url" : "https://vm.tiktok.com/ZGdh4WUhr/", + "#comment" : "vm.tiktok.com link: many photos with audio", + "#category" : ("", "tiktok", "vmpost"), + "#class" : tiktok.TiktokVmpostExtractor, + "#pattern" : PATTERN_WITH_AUDIO, + "#options" : {"videos": True} +}, +{ + "#url" : "https://www.tiktokv.com/share/video/7240568259186019630", + "#comment" : "www.tiktokv.com link: many photos with audio", + "#category" : ("", "tiktok", "sharepost"), + "#class" : tiktok.TiktokSharepostExtractor, + "#pattern" : PATTERN_WITH_AUDIO, + "#options" : {"videos": True} +}, +{ + "#url" : "https://www.tiktok.com/@chillezy", + "#comment" : "User profile", + "#category" : ("", "tiktok", "user"), + "#class" : tiktok.TiktokUserExtractor, + "#pattern" : PATTERN_WITH_AUDIO, + "#options" : {"videos": True, "tiktok-range": "1-10"} +}, +{ + "#url" : "https://www.tiktok.com/@chillezy/", + "#comment" : "User profile without audio or videos", + "#category" : ("", "tiktok", "user"), + "#class" : tiktok.TiktokUserExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False, "tiktok-range": "1-10"} +}, +{ + "#url" : "https://vt.tiktok.com/ZGdhVtER2", + "#comment" : "vt.tiktok.com link: single photo", + "#category" : ("", "tiktok", "vmpost"), + "#class" : tiktok.TiktokVmpostExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +{ + "#url" : "https://www.tiktok.com/t/ZGdhVtER2//", + "#comment" : "www.tiktok.com/t/ link: single photo", + "#category" : ("", "tiktok", "vmpost"), + "#class" : tiktok.TiktokVmpostExtractor, + "#pattern" : PATTERN, + "#options" : {"videos": False} +}, +)
Galleries
TikTokhttps://www.tiktok.com/Photos, Videos, Audio, User Profiles, Profile Avatars

Videos, Audio, and User Profiles require yt-dlp or youtube-dl. Pass -o videos to download photos only, and -o tiktok-range="a-yt-dlp-range" to tell yt-dlp to only extract XYZ links when scraping a user profile. Also note that profile avatars will only be downloaded when downloading a User Profile and if the user has at least one post.
Cookies
TMOHentai https://tmohentai.com/