Skip to content

Commit

Permalink
Support downloading videos and audio
Browse files Browse the repository at this point in the history
Respond to comments
Improve archiving and file naming
  • Loading branch information
CasualYT31 committed Dec 23, 2024
1 parent 451de93 commit 33954fd
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 70 deletions.
71 changes: 53 additions & 18 deletions gallery_dl/extractor/tiktok.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,69 +22,104 @@ class TiktokExtractor(Extractor):

category = "tiktok"
directory_fmt = ("{category}", "{user}")
filename_fmt = "{title} [{id}] [{index}].{extension}"
archive_fmt = "{id}_{img_id}"
filename_fmt = "{title} [{id}{index:?_//}{img_id:?_//}].{extension}"
archive_fmt = "{id}_{index}_{img_id}"
root = "https://www.tiktok.com/"
cookies_domain = ".tiktok.com"

def urls(self):
return [self.url]

def items(self):
videos = self.config("videos", True)
for tiktok_url in self.urls():
# If we can recognise that this is a /photo/ link, preemptively
# replace it with /video/ to prevent a needless second request.
# See below.
tiktok_url = compile(
tiktok_url_to_use = compile(
escape("/photo/"),
IGNORECASE
).sub("/video/", tiktok_url)
video_detail = util.json_loads(text.extr(
self.request(tiktok_url).text,
self.request(tiktok_url_to_use).text,
'<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__" '
'type="application/json">',
'</script>'
))["__DEFAULT_SCOPE__"]
if "webapp.video-detail" not in video_detail:
# Only /video/ links result in the video-detail dict we need.
# Try again using that form of link.
tiktok_url = video_detail["seo.abtest"]["canonical"] \
tiktok_url_to_use = video_detail["seo.abtest"]["canonical"] \
.replace("/photo/", "/video/")
video_detail = util.json_loads(text.extr(
self.request(tiktok_url).text,
self.request(tiktok_url_to_use).text,
'<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__" '
'type="application/json">',
'</script>'
))["__DEFAULT_SCOPE__"]
video_detail = video_detail["webapp.video-detail"]
has_status = "statusMsg" in video_detail
if has_status and video_detail["statusMsg"] == "author_secret":
raise exception.AuthorizationError("Login required to access "
"this post")
if "statusCode" in video_detail:
if video_detail["statusCode"] == 10222:
raise exception.AuthorizationError(
tiktok_url + ": Login required to access this post"
)
elif video_detail["statusCode"] == 10204:
raise exception.NotFoundError(tiktok_url)
elif video_detail["statusCode"] == 10231:
raise exception.ExtractionError(
tiktok_url + " is region locked, try downloading with "
"a VPN/proxy connection"
)
elif video_detail["statusCode"] != 0:
raise exception.ExtractionError(
tiktok_url + ": Received unknown error code " +
str(video_detail['statusCode']) + " with message " +
(video_detail['statusMsg'] if
"statusMsg" in video_detail else "")
)
post_info = video_detail["itemInfo"]["itemStruct"]
id = post_info["id"]
original_title = title = post_info["desc"]
if len(original_title) == 0:
title = "TikTok photo #{}".format(id)
title = title[:150]
user = post_info["author"]["uniqueId"]
if "imagePost" in post_info:
yield Message.Directory, {"user": user}
img_list = post_info["imagePost"]["images"]
for i, img in enumerate(img_list):
url = img["imageURL"]["urlList"][0]
name_and_ext = text.nameext_from_url(url)
id = post_info["id"]
title = post_info["desc"]
if len(title) == 0:
title = "TikTok photo #{}".format(id)
yield Message.Url, url, {
"title" : text.sanitize_for_filename(title)[:170],
"title" : title,
"id" : id,
"index" : i,
"index" : i + 1,
"img_id" : name_and_ext["filename"].split("~")[0],
"extension" : name_and_ext["extension"],
"width" : img["imageWidth"],
"height" : img["imageHeight"]
}
elif videos:
# It's probably obvious but I thought it was worth noting
# because I got stuck on this for a while: make sure to emit
# a Directory message before attempting to download anything
# with yt-dlp! Otherwise you'll run into NoneType, set_filename
# errors since the download job doesn't get initialized.
yield Message.Directory, {"user": user}
if len(original_title) == 0:
title = "TikTok video #{}".format(id)
title = title[:150]
else:
# TODO: Not a slide show. Should pass this to yt-dlp.
pass
self.log.info("Skipping video post %s", tiktok_url)
if videos:
yield Message.Url, "ytdl:" + tiktok_url_to_use, {
"filename" : "",
"extension" : "",
"title" : title,
"id" : id,
"index" : "",
"img_id" : ""
}


class TiktokPostExtractor(TiktokExtractor):
Expand Down
15 changes: 0 additions & 15 deletions gallery_dl/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,21 +51,6 @@ def slugify(value):
return re.sub(r"[-\s]+", "-", value).strip("-_")


def sanitize_for_filename(string):
"""Removes characters from a string that would be illegal to have in
a filename
This function is similar to slugify(), except it retains more
characters (notably characters such as # and @).
Note that the length of the string is not capped!
Inspiration:
https://stackoverflow.com/a/71199182
"""
return re.sub(r"[/\\?%*:|\"<>\x7F\x00-\x1F]", " ", str(string))


def ensure_http_scheme(url, scheme="https://"):
"""Prepend 'scheme' to 'url' if it doesn't have one"""
if url and not url.startswith(("https://", "http://")):
Expand Down
107 changes: 94 additions & 13 deletions test/results/tiktok.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,66 +5,147 @@
# published by the Free Software Foundation.

from gallery_dl.extractor import tiktok
from gallery_dl import exception

PATTERN = r"https://p1[69]-.*\.tiktokcdn.*\.com/.*/[0-9a-fA-F]+~.*\.jpeg"
PATTERN_WITH_AUDIO = r"(?:" + PATTERN + r")|(?:ytdl\:)"


__tests__ = (
# Test many photos.
{
"#url" : "https://www.tiktok.com/@chillezy/photo/7240568259186019630",
"#comment" : "/photo/ link: many photos",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN
"#pattern" : PATTERN,
"#options" : {"videos": False}
},
{
"#url" : "https://www.tiktok.com/@chillezy/video/7240568259186019630",
"#comment" : "/video/ link: many photos",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN
"#pattern" : PATTERN,
"#options" : {"videos": False}
},
{
"#url" : "https://vm.tiktok.com/ZGdh4WUhr/",
"#comment" : "vm.tiktok.com link: many photos",
"#category" : ("", "tiktok", "vmpost"),
"#class" : tiktok.TiktokVmpostExtractor,
"#pattern" : PATTERN
"#pattern" : PATTERN,
"#options" : {"videos": False}
},
# Test one photo.
{
"#url" : "https://www.tiktok.com/@d4vinefem/photo/7449575367024626974",
"#comment" : "/photo/ link: single photo",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN
"#pattern" : PATTERN,
"#options" : {"videos": False}
},
{
"#url" : "https://www.tiktok.com/@d4vinefem/video/7449575367024626974",
"#comment" : "/video/ link: single photo",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN
"#pattern" : PATTERN,
"#options" : {"videos": False}
},
{
"#url" : "https://vm.tiktok.com/ZGdhVtER2/",
"#comment" : "vm.tiktok.com link: single photo",
"#category" : ("", "tiktok", "vmpost"),
"#class" : tiktok.TiktokVmpostExtractor,
"#pattern" : PATTERN
"#pattern" : PATTERN,
"#options" : {"videos": False}
},
# Test a few photos.
{
"#url" : "https://www.tiktok.com/@.mcfc.central/photo/7449701420934122785",
"#comment" : "/photo/ link: few photos",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN
"#pattern" : PATTERN,
"#options" : {"videos": False}
},
{
"#url" : "https://www.tiktok.com/@.mcfc.central/video/7449701420934122785",
"#comment" : "/video/ link: few photos",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN
"#pattern" : PATTERN,
"#options" : {"videos": False}
},
{
"#url" : "https://vm.tiktok.com/ZGdhVW3cu/",
"#comment" : "vm.tiktok.com link: few photos",
"#category" : ("", "tiktok", "vmpost"),
"#class" : tiktok.TiktokVmpostExtractor,
"#pattern" : PATTERN
}
"#pattern" : PATTERN,
"#options" : {"videos": False}
},
{
"#url" : "https://www.tiktok.com/@ughuwhguweghw/video/1",
"#comment" : "deleted post",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#exception" : exception.NotFoundError,
"#options" : {"videos": False}
},
{
"#url" : "https://www.tiktok.com/@memezar/video/7449708266168274208",
"#comment" : "Video post",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208",
"#options" : {"videos": True}
},
{
"#url" : "https://www.tiktok.com/@memezar/photo/7449708266168274208",
"#comment" : "Video post as a /photo/ link",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#urls" : "ytdl:https://www.tiktok.com/@memezar/video/7449708266168274208",
"#options" : {"videos": True}
},
{
"#url" : "https://vm.tiktok.com/ZGdht7cjp/",
"#comment" : "Video post as a VM link",
"#category" : ("", "tiktok", "vmpost"),
"#class" : tiktok.TiktokVmpostExtractor,
"#urls" : "ytdl:https://vm.tiktok.com/ZGdht7cjp/",
"#options" : {"videos": True}
},
{
"#url" : "https://www.tiktok.com/@memezar/video/7449708266168274208",
"#comment" : "Skipping video post",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#urls" : [],
"#options" : {"videos": False}
},
{
"#url" : "https://www.tiktok.com/@chillezy/photo/7240568259186019630",
"#comment" : "/photo/ link: many photos with audio",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN_WITH_AUDIO,
"#options" : {"videos": True}
},
{
"#url" : "https://www.tiktok.com/@chillezy/video/7240568259186019630",
"#comment" : "/video/ link: many photos with audio",
"#category" : ("", "tiktok", "post"),
"#class" : tiktok.TiktokPostExtractor,
"#pattern" : PATTERN_WITH_AUDIO,
"#options" : {"videos": True}
},
{
"#url" : "https://vm.tiktok.com/ZGdh4WUhr/",
"#comment" : "vm.tiktok.com link: many photos with audio",
"#category" : ("", "tiktok", "vmpost"),
"#class" : tiktok.TiktokVmpostExtractor,
"#pattern" : PATTERN_WITH_AUDIO,
"#options" : {"videos": True}
},
)
24 changes: 0 additions & 24 deletions test/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,30 +92,6 @@ def test_slugify(self, f=text.slugify):
self.assertEqual(f(1), "1")
self.assertEqual(f(2.3), "23")

def test_sanitize_for_filename(self, f=text.sanitize_for_filename):
self.assertEqual(f("Hello World"), "Hello World")
self.assertEqual(f("-HeLLo---World-"), "-HeLLo---World-")
self.assertEqual(
f("_-H#e:l#l:o+\t+W?o!rl=d-_"),
"_-H#e l#l o+ +W o!rl=d-_"
)
self.assertEqual(f("_Hello_World_"), "_Hello_World_")
self.assertEqual(
f("/\\?%*:|\"<>\x7F\x00\x0B\x1F"),
" "
)

self.assertEqual(f(""), "")
self.assertEqual(f("-"), "-")
self.assertEqual(f("--"), "--")

self.assertEqual(f(()), "()")
self.assertEqual(f([]), "[]")
self.assertEqual(f({}), "{}")
self.assertEqual(f(None), "None")
self.assertEqual(f(1), "1")
self.assertEqual(f(2.3), "2.3")

def test_ensure_http_scheme(self, f=text.ensure_http_scheme):
result = "https://example.org/filename.ext"

Expand Down

0 comments on commit 33954fd

Please sign in to comment.