diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 3a704cf454..53c8833509 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -13,6 +13,12 @@ Consider all listed sites to potentially be NSFW.
+
+ 2ch |
+ https://2ch.hk/ |
+ Boards, Threads |
+ |
+
2chen |
https://sturdychan.help/ |
diff --git a/gallery_dl/extractor/2ch.py b/gallery_dl/extractor/2ch.py
new file mode 100644
index 0000000000..dbbf21b635
--- /dev/null
+++ b/gallery_dl/extractor/2ch.py
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://2ch.hk/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+
+class _2chThreadExtractor(Extractor):
+ """Extractor for 2ch threads"""
+ category = "2ch"
+ subcategory = "thread"
+ root = "https://2ch.hk"
+ directory_fmt = ("{category}", "{board}", "{thread} {title}")
+ filename_fmt = "{tim}{filename:? //}.{extension}"
+ archive_fmt = "{board}_{thread}_{tim}"
+ pattern = r"(?:https?://)?2ch\.hk/([^/?#]+)/res/(\d+)"
+ example = "https://2ch.hk/a/res/12345.html"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.board, self.thread = match.groups()
+
+ def items(self):
+ url = "{}/{}/res/{}.json".format(self.root, self.board, self.thread)
+ posts = self.request(url).json()["threads"][0]["posts"]
+
+ op = posts[0]
+ title = op.get("subject") or text.remove_html(op["comment"])
+
+ thread = {
+ "board" : self.board,
+ "thread": self.thread,
+ "title" : text.unescape(title)[:50],
+ }
+
+ yield Message.Directory, thread
+ for post in posts:
+ files = post.get("files")
+ if files:
+ post["post_name"] = post["name"]
+ post["date"] = text.parse_timestamp(post["timestamp"])
+ del post["files"]
+ del post["name"]
+
+ for file in files:
+ file.update(thread)
+ file.update(post)
+
+ file["filename"] = file["fullname"].rpartition(".")[0]
+ file["tim"], _, file["extension"] = \
+ file["name"].rpartition(".")
+
+ yield Message.Url, self.root + file["path"], file
+
+
+class _2chBoardExtractor(Extractor):
+ """Extractor for 2ch boards"""
+ category = "2ch"
+ subcategory = "board"
+ root = "https://2ch.hk"
+ pattern = r"(?:https?://)?2ch\.hk/([^/?#]+)/?$"
+ example = "https://2ch.hk/a/"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.board = match.group(1)
+
+ def items(self):
+ # index page
+ url = "{}/{}/index.json".format(self.root, self.board)
+ index = self.request(url).json()
+ index["_extractor"] = _2chThreadExtractor
+ for thread in index["threads"]:
+ url = "{}/{}/res/{}.html".format(
+ self.root, self.board, thread["thread_num"])
+ yield Message.Queue, url, index
+
+ # pages 1..n
+ for n in util.advance(index["pages"], 1):
+ url = "{}/{}/{}.json".format(self.root, self.board, n)
+ page = self.request(url).json()
+ page["_extractor"] = _2chThreadExtractor
+ for thread in page["threads"]:
+ url = "{}/{}/res/{}.html".format(
+ self.root, self.board, thread["thread_num"])
+ yield Message.Queue, url, page
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 13d7b38b65..8e7129618a 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -10,6 +10,7 @@
import re
modules = [
+ "2ch",
"2chan",
"2chen",
"35photo",
diff --git a/test/results/2ch.py b/test/results/2ch.py
new file mode 100644
index 0000000000..5400292cf4
--- /dev/null
+++ b/test/results/2ch.py
@@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+gallery_dl = __import__("gallery_dl.extractor.2ch")
+_2ch = getattr(gallery_dl.extractor, "2ch")
+
+
+__tests__ = (
+{
+ "#url" : "https://2ch.hk/a/res/6202876.html",
+ "#category": ("", "2ch", "thread"),
+ "#class" : _2ch._2chThreadExtractor,
+ "#pattern" : r"https://2ch\.hk/a/src/6202876/\d+\.\w+",
+ "#count" : range(450, 1000),
+
+ "banned" : 0,
+ "board" : "a",
+ "closed" : 0,
+ "comment" : str,
+ "date" : "type:datetime",
+ "displayname": str,
+ "email" : "",
+ "endless" : 1,
+ "extension": str,
+ "filename" : str,
+ "fullname" : str,
+ "height" : int,
+ "lasthit" : 1705273977,
+ "md5" : r"re:[0-9a-f]{32}",
+ "name" : r"re:\d+\.\w+",
+ "num" : int,
+ "number" : range(1, 1000),
+ "op" : 0,
+ "parent" : int,
+ "path" : r"re:/a/src/6202876/\d+\.\w+",
+ "post_name": "Аноним",
+ "size" : int,
+ "sticky" : 0,
+ "subject" : str,
+ "thread" : "6202876",
+ "thumbnail": str,
+ "tim" : r"re:\d+",
+ "timestamp": int,
+ "title" : "MP4/WEBM",
+ "tn_height": int,
+ "tn_width" : int,
+ "trip" : "",
+ "type" : int,
+ "views" : int,
+ "width" : int,
+},
+
+{
+ "#url" : "https://2ch.hk/a/",
+ "#category": ("", "2ch", "board"),
+ "#class" : _2ch._2chBoardExtractor,
+ "#pattern" : _2ch._2chThreadExtractor.pattern,
+ "#count" : range(200, 300),
+},
+
+)