From 2856485bfab5b422b3ba335106a7449b161835bb Mon Sep 17 00:00:00 2001 From: Hacker 17082006 Date: Mon, 21 Aug 2023 17:15:47 +0700 Subject: [PATCH 1/5] Add fancaps --- gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/fancaps.py | 274 +++++++++++++++++++++++++++++++ 2 files changed, 275 insertions(+) create mode 100644 gallery_dl/extractor/fancaps.py diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index fa56bfb45d..443bec37d3 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -39,6 +39,7 @@ "exhentai", "fallenangels", "fanbox", + "fancaps", "fanleaks", "fantia", "fapello", diff --git a/gallery_dl/extractor/fancaps.py b/gallery_dl/extractor/fancaps.py new file mode 100644 index 0000000000..592b2b6950 --- /dev/null +++ b/gallery_dl/extractor/fancaps.py @@ -0,0 +1,274 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019-2023 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://fancaps.net/""" + +from .common import GalleryExtractor, Extractor, Message +from .. import text, util +import re + +SKIP_FIRST_N_IMAGES = 4 +ANIME_BASE_PATTERN = r"(?:https?://)?(?:www\.)?fancaps\.net/anime/" +MOVIE_BASE_PATTERN = r"(?:https?://)?(?:www\.)?fancaps\.net/movies/" +TV_BASE_PATTERN = r"(?:https?://)?(?:www\.)?fancaps\.net/tv/" +ID_PATTERN = r"(\d{1,5})(-[a-zA-Z0-9_/]*)*" + +def extract_episode(page): + title_content = text.extr(page, '

', '

').strip() + return { + "episode": f'Episode {re.search(r"Episode (.+)", title_content).group(1)}', + "episode_alt": text.extr(page, 'Other Title: ', '').replace(" ", ' ') + } + +def id_groups_to_str(groups): + return groups[0] + (groups[1] if len(groups) > 1 else '') + + +class FancapsAnimeEpisodeExtractor(Extractor): + """Extractor for an anime episode on fancaps.net""" + category = "fancaps" + subcategory = "anime-episode" + directory_fmt = ("{category}", "{series}", "{episode_id} {episode}") + filename_fmt = "{image_id}.{extension}" + archive_fmt = "{episode_id}" + root = "https://fancaps.net/anime/episodeimages.php?" + pattern = ANIME_BASE_PATTERN + fr"episodeimages\.php\?{ID_PATTERN}" + test = ( + ("https://fancaps.net/anime/episodeimages.php?19879-Mushoku_Tensei__Jobless_Reincarnation/Episode_1"), + ("https://fancaps.net/anime/episodeimages.php?36394-"), + ("https://fancaps.net/anime/episodeimages.php?33225-Bocchi_the_Rock/Episode_1"), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + groups = match.groups() + self.episode_url = self.root + id_groups_to_str(groups) + self.episode_id = groups[0] + self._base_original = "https://cdni.fancaps.net/file/fancaps-animeimages/" + self._base_fallback = ("https://ancdn.fancaps.net/", "https://animethumbs.fancaps.net/") + + def metadata(self, page): + series_match = re.search(fr'', page) + return { + "episode_id": self.episode_id, + **extract_episode(page), + "series": text.split_html(text.extr(page, series_match[0], ''))[0], + "series_url": f"https://fancaps.net/anime/episodeimages.php?{id_groups_to_str(series_match.groups())}" + } + + def _image_fallback(self, filename): + for base_url in self._base_fallback: + yield base_url + filename + + def items(self): + true_episode_url = self.request(text.ensure_http_scheme(self.episode_url)).url + page_idx = 1 + metadata = None + while True: + page = self.request(f"{true_episode_url}&page={page_idx}").text + if metadata is None: + metadata = self.metadata(page) + + image_ids_gen = text.extract_iter(page, 'Next', page) is not None: + break + + page_idx += 1 + +class FancapsTVEpisodeExtractor(Extractor): + """Extractor for a TV episode on fancaps.net""" + category = "fancaps" + subcategory = "tv-episode" + directory_fmt = ("{category}", "{series}", "{episode_id} {episode}") + filename_fmt = "{image_id}.{extension}" + archive_fmt = "{episode_id}" + root = "https://fancaps.net/tv/episodeimages.php?" + pattern = TV_BASE_PATTERN + fr"episodeimages\.php\?{ID_PATTERN}" + test = ( + ("https://fancaps.net/tv/episodeimages.php?22491-Rick_and_Morty_Season_2/Episode_1"), + ("https://fancaps.net/tv/episodeimages.php?22491-"), + ("https://fancaps.net/tv/episodeimages.php?22491"), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + groups = match.groups() + self.episode_url = self.root + id_groups_to_str(groups) + self.episode_id = groups[0] + self._base_original = "https://cdni.fancaps.net/file/fancaps-tvimages/" + self._base_fallback = ("https://tvcdn.fancaps.net/", "https://tvthumbs.fancaps.net/") + + def metadata(self, page): + series_match = re.search(fr'', page) + return { + "episode_id": self.episode_id, + **extract_episode(page), + "series": text.split_html(text.extr(page, series_match[0], ''))[0], + "series_url": f"https://fancaps.net/tv/episodeimages.php?{id_groups_to_str(series_match.groups())}" + } + + def _image_fallback(self, filename): + for base_url in self._base_fallback: + yield base_url + filename + + def items(self): + true_episode_url = self.request(text.ensure_http_scheme(self.episode_url)).url + page_idx = 1 + metadata = None + while True: + page = self.request(f"{true_episode_url}&page={page_idx}").text + if metadata is None: + metadata = self.metadata(page) + + image_ids_gen = text.extract_iter(page, '', ''): + episode_id = id_groups_to_str(re.search(fr"/tv/episodeimages\.php\?{ID_PATTERN}", a_html).groups()) + yield Message.Queue, f"https://fancaps.net/tv/episodeimages.php?{episode_id}", {"_extractor": FancapsTVEpisodeExtractor} + + if re.search('Next', page) is not None: + break + + page_idx += 1 + +class FancapsMovieExtractor(Extractor): + """Extractor for a movie on fancaps.net""" + category = "fancaps" + subcategory = "movie" + directory_fmt = ("{category}", "{movie_id} {movie}") + filename_fmt = "{image_id}.{extension}" + archive_fmt = "{movie_id}" + root = "https://fancaps.net/movies/MovieImages.php?movieid=" + pattern = MOVIE_BASE_PATTERN + r"MovieImages\.php\?(?:.*&)?movieid=(\d+)" + test = ( + ("https://fancaps.net/movies/MovieImages.php?movieid=4156&page=1"), + ("https://fancaps.net/movies/MovieImages.php?name=Elemental_2023&movieid=4156&page=2") + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.movie_id = match.group(1) + self.movie_url = self.root + self.movie_id + self._base_original = "https://cdni.fancaps.net/file/fancaps-movieimages/" + self._base_fallback = ("https://mvcdn.fancaps.net/", "https://moviethumbs.fancaps.net/") + + def metadata(self, page): + return { + "movie_id": self.movie_id, + "movie": text.extr(page, '

', "

").strip().replace("Images from ", ''), + } + + def _image_fallback(self, filename): + for base_url in self._base_fallback: + yield base_url + filename + + def items(self): + page_idx = 1 + metadata = None + while True: + page = self.request(f"{self.movie_url}&page={page_idx}").text + if metadata is None: + metadata = self.metadata(page) + + image_ids_gen = text.extract_iter(page, '
Date: Mon, 21 Aug 2023 17:44:47 +0700 Subject: [PATCH 2/5] Here is a nicer code. Mr. flake8 --- .vscode/settings.json | 4 + gallery_dl/extractor/fancaps.py | 232 +++++++++++++++++++++++--------- 2 files changed, 170 insertions(+), 66 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000..33fe63f7c4 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "python.linting.flake8Enabled": true, + "python.linting.enabled": true +} \ No newline at end of file diff --git a/gallery_dl/extractor/fancaps.py b/gallery_dl/extractor/fancaps.py index 592b2b6950..58bf00a164 100644 --- a/gallery_dl/extractor/fancaps.py +++ b/gallery_dl/extractor/fancaps.py @@ -8,8 +8,8 @@ """Extractors for https://fancaps.net/""" -from .common import GalleryExtractor, Extractor, Message -from .. import text, util +from .common import Extractor, Message +from .. import text import re SKIP_FIRST_N_IMAGES = 4 @@ -18,30 +18,47 @@ TV_BASE_PATTERN = r"(?:https?://)?(?:www\.)?fancaps\.net/tv/" ID_PATTERN = r"(\d{1,5})(-[a-zA-Z0-9_/]*)*" + def extract_episode(page): - title_content = text.extr(page, '

', '

').strip() + title_content = text.extr( + page, + '

', + "

" + ).strip() return { - "episode": f'Episode {re.search(r"Episode (.+)", title_content).group(1)}', - "episode_alt": text.extr(page, 'Other Title: ', '').replace(" ", ' ') + "episode": ( + f'Episode {re.search(r"Episode (.+)", title_content).group(1)}' + ), + "episode_alt": text.extr( + page, 'Other Title: ', "" + ).replace(" ", " "), } - + + def id_groups_to_str(groups): - return groups[0] + (groups[1] if len(groups) > 1 else '') + return groups[0] + (groups[1] if len(groups) > 1 else "") class FancapsAnimeEpisodeExtractor(Extractor): """Extractor for an anime episode on fancaps.net""" + category = "fancaps" subcategory = "anime-episode" directory_fmt = ("{category}", "{series}", "{episode_id} {episode}") filename_fmt = "{image_id}.{extension}" archive_fmt = "{episode_id}" root = "https://fancaps.net/anime/episodeimages.php?" - pattern = ANIME_BASE_PATTERN + fr"episodeimages\.php\?{ID_PATTERN}" + pattern = ANIME_BASE_PATTERN + rf"episodeimages\.php\?{ID_PATTERN}" test = ( - ("https://fancaps.net/anime/episodeimages.php?19879-Mushoku_Tensei__Jobless_Reincarnation/Episode_1"), + ( + "https://fancaps.net/anime/episodeimages.php?19879-Mushoku_Tensei" + "__Jobless_Reincarnation/Episode_1" + ), ("https://fancaps.net/anime/episodeimages.php?36394-"), - ("https://fancaps.net/anime/episodeimages.php?33225-Bocchi_the_Rock/Episode_1"), + ( + "https://fancaps.net/anime/episodeimages.php?33225" + "-Bocchi_the_Rock/Episode_1" + ), ) def __init__(self, match): @@ -49,24 +66,39 @@ def __init__(self, match): groups = match.groups() self.episode_url = self.root + id_groups_to_str(groups) self.episode_id = groups[0] - self._base_original = "https://cdni.fancaps.net/file/fancaps-animeimages/" - self._base_fallback = ("https://ancdn.fancaps.net/", "https://animethumbs.fancaps.net/") - + self._base_original = "https://cdni.fancaps.net" \ + "/file/fancaps-animeimages/" + self._base_fallback = ( + "https://ancdn.fancaps.net/", + "https://animethumbs.fancaps.net/", + ) + def metadata(self, page): - series_match = re.search(fr'
', page) + series_match = re.search( + r"", + page, + ) return { "episode_id": self.episode_id, **extract_episode(page), - "series": text.split_html(text.extr(page, series_match[0], ''))[0], - "series_url": f"https://fancaps.net/anime/episodeimages.php?{id_groups_to_str(series_match.groups())}" + "series": text.split_html( + text.extr(page, series_match[0], "") + )[0], + "series_url": ( + "https://fancaps.net/anime/episodeimages.php?" + "id_groups_to_str(series_match.groups())" + ) } - + def _image_fallback(self, filename): for base_url in self._base_fallback: yield base_url + filename def items(self): - true_episode_url = self.request(text.ensure_http_scheme(self.episode_url)).url + true_episode_url = self.request( + text.ensure_http_scheme(self.episode_url) + ).url page_idx = 1 metadata = None while True: @@ -74,9 +106,11 @@ def items(self): if metadata is None: metadata = self.metadata(page) - image_ids_gen = text.extract_iter(page, 'Next', page) is not None: break page_idx += 1 + class FancapsTVEpisodeExtractor(Extractor): """Extractor for a TV episode on fancaps.net""" + category = "fancaps" subcategory = "tv-episode" directory_fmt = ("{category}", "{series}", "{episode_id} {episode}") filename_fmt = "{image_id}.{extension}" archive_fmt = "{episode_id}" root = "https://fancaps.net/tv/episodeimages.php?" - pattern = TV_BASE_PATTERN + fr"episodeimages\.php\?{ID_PATTERN}" + pattern = TV_BASE_PATTERN + rf"episodeimages\.php\?{ID_PATTERN}" test = ( - ("https://fancaps.net/tv/episodeimages.php?22491-Rick_and_Morty_Season_2/Episode_1"), + ( + "https://fancaps.net/tv/episodeimages.php" + "?22491-Rick_and_Morty_Season_2/Episode_1" + ), ("https://fancaps.net/tv/episodeimages.php?22491-"), ("https://fancaps.net/tv/episodeimages.php?22491"), ) @@ -144,23 +195,35 @@ def __init__(self, match): self.episode_url = self.root + id_groups_to_str(groups) self.episode_id = groups[0] self._base_original = "https://cdni.fancaps.net/file/fancaps-tvimages/" - self._base_fallback = ("https://tvcdn.fancaps.net/", "https://tvthumbs.fancaps.net/") - + self._base_fallback = ( + "https://tvcdn.fancaps.net/", + "https://tvthumbs.fancaps.net/", + ) + def metadata(self, page): - series_match = re.search(fr'', page) + series_match = re.search( + rf"", page + ) return { "episode_id": self.episode_id, **extract_episode(page), - "series": text.split_html(text.extr(page, series_match[0], ''))[0], - "series_url": f"https://fancaps.net/tv/episodeimages.php?{id_groups_to_str(series_match.groups())}" + "series": text.split_html( + text.extr(page, series_match[0], "") + )[0], + "series_url": ( + "https://fancaps.net/tv/episodeimages.php", + f"?{id_groups_to_str(series_match.groups())}" + ) } - + def _image_fallback(self, filename): for base_url in self._base_fallback: yield base_url + filename def items(self): - true_episode_url = self.request(text.ensure_http_scheme(self.episode_url)).url + true_episode_url = self.request( + text.ensure_http_scheme(self.episode_url) + ).url page_idx = 1 metadata = None while True: @@ -168,9 +231,13 @@ def items(self): if metadata is None: metadata = self.metadata(page) - image_ids_gen = text.extract_iter(page, '', ''): - episode_id = id_groups_to_str(re.search(fr"/tv/episodeimages\.php\?{ID_PATTERN}", a_html).groups()) - yield Message.Queue, f"https://fancaps.net/tv/episodeimages.php?{episode_id}", {"_extractor": FancapsTVEpisodeExtractor} + for a_html in text.extract_iter( + page, '

', "

" + ): + episode_id = id_groups_to_str( + re.search( + rf"/tv/episodeimages\.php\?{ID_PATTERN}", + a_html + ).groups() + ) + yield Message.Queue, ( + "https://fancaps.net/tv/episodeimages.php" + f"?{episode_id}" + ), { + "_extractor": FancapsTVEpisodeExtractor + } if re.search('
Next', page) is not None: break page_idx += 1 + class FancapsMovieExtractor(Extractor): """Extractor for a movie on fancaps.net""" + category = "fancaps" subcategory = "movie" directory_fmt = ("{category}", "{movie_id} {movie}") @@ -229,22 +316,31 @@ class FancapsMovieExtractor(Extractor): pattern = MOVIE_BASE_PATTERN + r"MovieImages\.php\?(?:.*&)?movieid=(\d+)" test = ( ("https://fancaps.net/movies/MovieImages.php?movieid=4156&page=1"), - ("https://fancaps.net/movies/MovieImages.php?name=Elemental_2023&movieid=4156&page=2") + ( + "https://fancaps.net/movies/MovieImages.php" + "?name=Elemental_2023&movieid=4156&page=2" + ), ) def __init__(self, match): Extractor.__init__(self, match) self.movie_id = match.group(1) self.movie_url = self.root + self.movie_id - self._base_original = "https://cdni.fancaps.net/file/fancaps-movieimages/" - self._base_fallback = ("https://mvcdn.fancaps.net/", "https://moviethumbs.fancaps.net/") + self._base_original = "https://cdni.fancaps.net/file/" \ + "fancaps-movieimages/" + self._base_fallback = ( + "https://mvcdn.fancaps.net/", + "https://moviethumbs.fancaps.net/", + ) def metadata(self, page): return { "movie_id": self.movie_id, - "movie": text.extr(page, '

', "

").strip().replace("Images from ", ''), + "movie": text.extr(page, '

', "

") + .strip() + .replace("Images from ", ""), } - + def _image_fallback(self, filename): for base_url in self._base_fallback: yield base_url + filename @@ -257,18 +353,22 @@ def items(self): if metadata is None: metadata = self.metadata(page) - image_ids_gen = text.extract_iter(page, '
Date: Mon, 21 Aug 2023 17:45:06 +0700 Subject: [PATCH 3/5] Here is a nicer code. Mr. flake8 --- .vscode/settings.json | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 33fe63f7c4..0000000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "python.linting.flake8Enabled": true, - "python.linting.enabled": true -} \ No newline at end of file From 10a0f1ba7c7aafc704f34640fff07a68697f1c95 Mon Sep 17 00:00:00 2001 From: Hacker 17082006 Date: Mon, 21 Aug 2023 17:51:30 +0700 Subject: [PATCH 4/5] 'Tv' instead of 'TV' --- gallery_dl/extractor/fancaps.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gallery_dl/extractor/fancaps.py b/gallery_dl/extractor/fancaps.py index 58bf00a164..472d51ac1c 100644 --- a/gallery_dl/extractor/fancaps.py +++ b/gallery_dl/extractor/fancaps.py @@ -170,7 +170,7 @@ def items(self): page_idx += 1 -class FancapsTVEpisodeExtractor(Extractor): +class FancapsTvEpisodeExtractor(Extractor): """Extractor for a TV episode on fancaps.net""" category = "fancaps" @@ -257,7 +257,7 @@ def items(self): page_idx += 1 -class FancapsTVSeriesExtractor(Extractor): +class FancapsTvSeriesExtractor(Extractor): """Extractor for a TV series on fancaps.net""" category = "fancaps" @@ -295,7 +295,7 @@ def items(self): "https://fancaps.net/tv/episodeimages.php" f"?{episode_id}" ), { - "_extractor": FancapsTVEpisodeExtractor + "_extractor": FancapsTvEpisodeExtractor } if re.search('Next', page) is not None: From 92887a91a2e7181e5d28383cc38f58e430809183 Mon Sep 17 00:00:00 2001 From: Hacker 17082006 Date: Mon, 21 Aug 2023 17:57:18 +0700 Subject: [PATCH 5/5] Add supportedsites info --- docs/supportedsites.md | 5 +++++ scripts/supportedsites.py | 1 + 2 files changed, 6 insertions(+) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e095743f98..8a1aaa91f0 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -667,6 +667,11 @@ Consider all sites to be NSFW unless otherwise known. Creators, Posts Cookies + + FanCaps.net + https://fancaps.net/ + Movie, TV, Anime Images, Screencaps, Screenshots, Wallpapers + Pixnet https://www.pixnet.net/ diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 5415276625..fe56e962a4 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -41,6 +41,7 @@ "exhentai" : "ExHentai", "fallenangels" : "Fallen Angels Scans", "fanbox" : "pixivFANBOX", + "fancaps" : "FanCaps.net", "fashionnova" : "Fashion Nova", "furaffinity" : "Fur Affinity", "hbrowse" : "HBrowse",