mikf · CasualYT31 · Dec 22, 2024 · Dec 22, 2024 · Dec 22, 2024 · Dec 22, 2024
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
@@ -1,6 +1,7 @@
 # Supported Sites
 
 <!-- auto-generated by scripts/supportedsites.py -->
+
 Consider all listed sites to potentially be NSFW.
 
 <table>
@@ -925,6 +926,12 @@ Consider all listed sites to potentially be NSFW.
     <td>Galleries</td>
     <td></td>
 </tr>
+<tr>
+    <td>TikTok</td>
+    <td>https://www.tiktok.com/</td>
+    <td>Photos</td>
+    <td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
+</tr>
 <tr>
     <td>TMOHentai</td>
     <td>https://tmohentai.com/</td>

diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
@@ -168,6 +168,7 @@
     "tapas",
     "tcbscans",
     "telegraph",
+    "tiktok",
     "tmohentai",
     "toyhouse",
     "tsumino",

diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.tiktok.com/"""
+
+from .common import Extractor, Message
+from .. import exception, text, util
+import re
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktok\.com"
+USER_PATTERN = BASE_PATTERN + r"/+@([\w.]{0,23}\w)(?:/\S*)?/*"
+POST_PATTERN = BASE_PATTERN + r"/+@(?:[\w.]{0,23}\w)(?:/\S*)?/+(?:[pP][hH][oO][tT][oO]|[vV][iI][dD][eE][oO])/+(?:[0-9]+)/*"
+VM_POST_PATTERN = r"(?:https?://)?(?:vm\.)?tiktok\.com/+.*/*"
+INSENSITIVE_PHOTO = re.compile(re.escape("/photo/"), re.IGNORECASE)
+
+
+class TikTokExtractor(Extractor):
+    """Base class for TikTok extractors"""
+
+    category = "tiktok"
+    directory_fmt = ("{category}", "{user}")
+    filename_fmt = "{id}_{index}_{img_id}.{extension}"
+    archive_fmt = "{id}_{img_id}"
+    root = "https://www.tiktok.com/"
+    cookies_domain = ".tiktok.com"
+
+    def urls(self):
+        return [self.url]
+
+    def items(self):
+        for tiktok_url in self.urls():
+            # If we can recognise that this is a /photo/ link, preemptively
+            # replace it with /video/ to prevent a needless second request.
+            # See below.
+            tiktok_url = INSENSITIVE_PHOTO.sub("/video/", tiktok_url)
+            video_detail = util.json_loads(text.extr(
+                self.request(tiktok_url).text,
+                '<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__" '
+                    'type="application/json">',
+                '</script>'
+            ))["__DEFAULT_SCOPE__"]
+            if "webapp.video-detail" not in video_detail:
+                # Only /video/ links result in the video-detail dict we need.
+                # Try again using that form of link.
+                tiktok_url = video_detail["seo.abtest"]["canonical"] \
+                    .replace("/photo/", "/video/")
+                video_detail = util.json_loads(text.extr(
+                    self.request(tiktok_url).text,
+                    '<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__" '
+                        'type="application/json">',
+                    '</script>'
+                ))["__DEFAULT_SCOPE__"]
+            video_detail = video_detail["webapp.video-detail"]
+            if "statusMsg" in video_detail and \
+                video_detail["statusMsg"] == "author_secret":
+                raise exception.AuthorizationError("Login required to access "
+                                                   "this post")
+            post_info = video_detail["itemInfo"]["itemStruct"]
+            user = post_info["author"]["uniqueId"]
+            if "imagePost" in post_info:
+                yield Message.Directory, { "user": user }
+                img_list = post_info["imagePost"]["images"]
+                for i, img in enumerate(img_list):
+                    url = img["imageURL"]["urlList"][0]
+                    name_and_ext = text.nameext_from_url(url)
+                    yield Message.Url, url, {
+                        "id":        post_info["id"],
+                        "index":     i,
+                        "img_id":    name_and_ext["filename"].split("~")[0],
+                        "extension": name_and_ext["extension"],
+                        "width":     img["imageWidth"],
+                        "height":    img["imageHeight"]
+                    }
+            else:
+                # TODO: Not a slide show. Should pass this to yt-dlp.
+                pass
+
+
+class TikTokPostExtractor(TikTokExtractor):
+    """Extract a single video or photo TikTok link"""
+
+    subcategory = "post"
+    pattern = POST_PATTERN
+    example = "https://www.tiktok.com/@chillezy/photo/7240568259186019630"
+
+
+class TikTokVMPostExtractor(TikTokExtractor):
+    """Extract a single video or photo TikTok link"""
+
+    subcategory = "post"
+    pattern = VM_POST_PATTERN
+    example = "https://vm.tiktok.com/ZGdh4WUhr/"
+
+
+# TODO: Write profile extractor.
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
@@ -141,10 +141,11 @@
     "tbib"           : "The Big ImageBoard",
     "tcbscans"       : "TCB Scans",
     "tco"            : "Twitter t.co",
-    "tmohentai"      : "TMOHentai",
     "thatpervert"    : "ThatPervert",
     "thebarchive"    : "The /b/ Archive",
     "thecollection"  : "The /co/llection",
+    "tiktok"         : "TikTok",
+    "tmohentai"      : "TMOHentai",
     "tumblrgallery"  : "TumblrGallery",
     "vanillarock"    : "もえぴりあ",
     "vidyart2"       : "/v/idyart2",

diff --git a/test/results/tiktok.py b/test/results/tiktok.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import tiktok
+
+PATTERN = r"https://p1[69]-.*\.tiktokcdn.*\.com/.*/[0-9a-fA-F]+~.*\.jpeg"
+
+
+__tests__ = (
+# Test many photos.
+{
+    "#url"      : "https://www.tiktok.com/@chillezy/photo/7240568259186019630",
+    "#category" : ("", "tiktok", "post"),
+    "#class"    : tiktok.TikTokPostExtractor,
+    "#pattern"  : PATTERN
+},
+{
+    "#url"      : "https://www.tiktok.com/@chillezy/video/7240568259186019630",
+    "#category" : ("", "tiktok", "post"),
+    "#class"    : tiktok.TikTokPostExtractor,
+    "#pattern"  : PATTERN
+},
+{
+    "#url"      : "https://vm.tiktok.com/ZGdh4WUhr/",
+    "#category" : ("", "tiktok", "post"),
+    "#class"    : tiktok.TikTokVMPostExtractor,
+    "#pattern"  : PATTERN
+},
+# Test one photo.
+{
+    "#url"      : "https://www.tiktok.com/@d4vinefem/photo/7449575367024626974",
+    "#category" : ("", "tiktok", "post"),
+    "#class"    : tiktok.TikTokPostExtractor,
+    "#pattern"  : PATTERN
+},
+{
+    "#url"      : "https://www.tiktok.com/@d4vinefem/video/7449575367024626974",
+    "#category" : ("", "tiktok", "post"),
+    "#class"    : tiktok.TikTokPostExtractor,
+    "#pattern"  : PATTERN
+},
+{
+    "#url"      : "https://vm.tiktok.com/ZGdhVtER2/",
+    "#category" : ("", "tiktok", "post"),
+    "#class"    : tiktok.TikTokVMPostExtractor,
+    "#pattern"  : PATTERN
+},
+# Test a few photos.
+{
+    "#url"      : "https://www.tiktok.com/@.mcfc.central/photo/7449701420934122785",
+    "#category" : ("", "tiktok", "post"),
+    "#class"    : tiktok.TikTokPostExtractor,
+    "#pattern"  : PATTERN
+},
+{
+    "#url"      : "https://www.tiktok.com/@.mcfc.central/video/7449701420934122785",
+    "#category" : ("", "tiktok", "post"),
+    "#class"    : tiktok.TikTokPostExtractor,
+    "#pattern"  : PATTERN
+},
+{
+    "#url"      : "https://vm.tiktok.com/ZGdhVW3cu/",
+    "#category" : ("", "tiktok", "post"),
+    "#class"    : tiktok.TikTokVMPostExtractor,
+    "#pattern"  : PATTERN
+}
+)