Skip to content

Commit

Permalink
[piczel] fix extraction (#6735)
Browse files Browse the repository at this point in the history
- fix pagination
- update API endpoints
- provide 'count' metadata field
- use BASE_PATTERN and self.groups[…]
  • Loading branch information
mikf committed Dec 27, 2024
1 parent 167a726 commit bc7e956
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 44 deletions.
49 changes: 18 additions & 31 deletions gallery_dl/extractor/piczel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from .common import Extractor, Message
from .. import text

BASE_PATTERN = r"(?:https?://)?(?:www\.)?piczel\.tv"


class PiczelExtractor(Extractor):
"""Base class for piczel extractors"""
Expand All @@ -30,6 +32,7 @@ def items(self):
if post["multi"]:
images = post["images"]
del post["images"]
post["count"] = len(images)
yield Message.Directory, post
for post["num"], image in enumerate(images):
if "id" in image:
Expand All @@ -39,6 +42,7 @@ def items(self):
yield Message.Url, url, text.nameext_from_url(url, post)

else:
post["count"] = 1
yield Message.Directory, post
post["num"] = 0
url = post["image"]["url"]
Expand All @@ -47,35 +51,27 @@ def items(self):
def posts(self):
"""Return an iterable with all relevant post objects"""

def _pagination(self, url, folder_id=None):
params = {
"from_id" : None,
"folder_id": folder_id,
}
def _pagination(self, url, pnum=1):
params = {"page": pnum}

while True:
data = self.request(url, params=params).json()
if not data:
return
params["from_id"] = data[-1]["id"]

for post in data:
if not folder_id or folder_id == post["folder_id"]:
yield post
yield from data["data"]

params["page"] = data["meta"]["next_page"]
if not params["page"]:
return


class PiczelUserExtractor(PiczelExtractor):
"""Extractor for all images from a user's gallery"""
subcategory = "user"
pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?#]+)/?$"
pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/?$"
example = "https://piczel.tv/gallery/USER"

def __init__(self, match):
PiczelExtractor.__init__(self, match)
self.user = match.group(1)

def posts(self):
url = "{}/api/users/{}/gallery".format(self.root_api, self.user)
url = "{}/api/users/{}/gallery".format(self.root_api, self.groups[0])
return self._pagination(url)


Expand All @@ -84,29 +80,20 @@ class PiczelFolderExtractor(PiczelExtractor):
subcategory = "folder"
directory_fmt = ("{category}", "{user[username]}", "{folder[name]}")
archive_fmt = "f{folder[id]}_{id}_{num}"
pattern = (r"(?:https?://)?(?:www\.)?piczel\.tv"
r"/gallery/(?!image)([^/?#]+)/(\d+)")
pattern = BASE_PATTERN + r"/gallery/(?!image/)[^/?#]+/(\d+)"
example = "https://piczel.tv/gallery/USER/12345"

def __init__(self, match):
PiczelExtractor.__init__(self, match)
self.user, self.folder_id = match.groups()

def posts(self):
url = "{}/api/users/{}/gallery".format(self.root_api, self.user)
return self._pagination(url, int(self.folder_id))
url = "{}/api/gallery/folder/{}".format(self.root_api, self.groups[0])
return self._pagination(url)


class PiczelImageExtractor(PiczelExtractor):
"""Extractor for individual images"""
subcategory = "image"
pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/image/(\d+)"
pattern = BASE_PATTERN + r"/gallery/image/(\d+)"
example = "https://piczel.tv/gallery/image/12345"

def __init__(self, match):
PiczelExtractor.__init__(self, match)
self.image_id = match.group(1)

def posts(self):
url = "{}/api/gallery/{}".format(self.root_api, self.image_id)
url = "{}/api/gallery/{}".format(self.root_api, self.groups[0])
return (self.request(url).json(),)
81 changes: 68 additions & 13 deletions test/results/piczel.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,35 @@

__tests__ = (
{
"#url" : "https://piczel.tv/gallery/Bikupan",
"#category": ("", "piczel", "user"),
"#class" : piczel.PiczelUserExtractor,
"#range" : "1-100",
"#count" : ">= 100",
"#url" : "https://piczel.tv/gallery/Bikupan",
"#class": piczel.PiczelUserExtractor,
"#range": "1-100",
"#count": ">= 100",
},

{
"#url" : "https://piczel.tv/gallery/Lulena/1114",
"#category": ("", "piczel", "folder"),
"#class" : piczel.PiczelFolderExtractor,
"#count" : ">= 4",
"#url" : "https://piczel.tv/gallery/Lulena/1114",
"#class": piczel.PiczelFolderExtractor,
"#urls" : (
"https://piczel.tv/static/uploads/gallery_image/32920/image/11194/1544126403-Lulena.png",
"https://piczel.tv/static/uploads/gallery_image/32920/image/8008/1533616260-Lulena.png",
"https://piczel.tv/static/uploads/plain_image/32920/image/3761/3761-Lulena.png",
"https://piczel.tv/static/uploads/plain_image/32920/image/3762/3762-Lulena.png",
"https://piczel.tv/static/uploads/gallery_image/32920/image/7991/1533513024-Lulena.png",
"https://piczel.tv/static/uploads/gallery_image/32920/image/7806/1532236348-Lulena.png",
"https://piczel.tv/static/uploads/gallery_image/32920/image/7800/1532235785-Lulena.png",
),

"folder_id": 1114,
},

{
"#url" : "https://piczel.tv/gallery/image/7807",
"#category": ("", "piczel", "image"),
"#class" : piczel.PiczelImageExtractor,
"#pattern" : r"https://(\w+\.)?piczel\.tv/static/uploads/gallery_image/32920/image/7807/1532236438-Lulena\.png",
"#url" : "https://piczel.tv/gallery/image/7807",
"#class": piczel.PiczelImageExtractor,
"#urls" : "https://piczel.tv/static/uploads/gallery_image/32920/image/7807/1532236438-Lulena.png",
"#sha1_content": "df9a053a24234474a19bce2b7e27e0dec23bff87",

"count" : 1,
"created_at" : "2018-07-22T05:13:58.000Z",
"date" : "dt:2018-07-22 05:13:58",
"description" : None,
Expand All @@ -54,4 +62,51 @@
"views" : int,
},

{
"#url" : "https://piczel.tv/gallery/image/8008",
"#comment": "multi",
"#class" : piczel.PiczelImageExtractor,
"#urls" : (
"https://piczel.tv/static/uploads/gallery_image/32920/image/8008/1533616260-Lulena.png",
"https://piczel.tv/static/uploads/plain_image/32920/image/3761/3761-Lulena.png",
"https://piczel.tv/static/uploads/plain_image/32920/image/3762/3762-Lulena.png",
),

"count" : 3,
"created_at" : "2018-08-07T04:31:00.000Z",
"curated" : False,
"date" : "dt:2018-08-07 04:31:00",
"description": "8/7/18",
"extension" : "png",
"favorites_count": range(3, 10),
"folder_id" : 1114,
"width" : None,
"height" : None,
"id" : 8008,
"is_flash" : False,
"is_video" : False,
"multi" : True,
"nsfw" : True,
"num" : {0, 1, 2},
"password_protected" : False,
"published_at" : "2018-08-07T04:31:00.000Z",
"rendered_description": "<p>8/7/18</p>",
"status" : "published",
"thumbnail" : None,
"title" : "❤",
"views" : 314,
"tags" : [
"original",
"Orc",
"tanlines",
],
"user" : {
"follower_count": range(15, 25),
"id" : 32920,
"premium?": False,
"role" : "user",
"username": "Lulena",
},
},

)

0 comments on commit bc7e956

Please sign in to comment.