-
-
Notifications
You must be signed in to change notification settings - Fork 993
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
caf31e7
commit 3b60dab
Showing
3 changed files
with
201 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -168,6 +168,7 @@ | |
"webmshare", | ||
"webtoons", | ||
"weibo", | ||
"weverse", | ||
"wikiart", | ||
"wikifeet", | ||
"xhamster", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
# This program is free software; you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License version 2 as | ||
# published by the Free Software Foundation. | ||
|
||
"""Extractors for https://weverse.io/""" | ||
|
||
from .common import Extractor, Message | ||
from .. import text, util, exception | ||
from ..cache import cache | ||
import binascii | ||
import hashlib | ||
import hmac | ||
import time | ||
import urllib.parse | ||
import uuid | ||
|
||
BASE_PATTERN = r"(?:https?://)?(?:m\.)?weverse\.io" | ||
COMMUNITY_PATTERN = BASE_PATTERN + r"/(\w+)" | ||
|
||
MEMBER_ID_PATTERN = r"([a-f0-9]{32})" | ||
POST_ID_PATTERN = r"(\d-\d{9})" | ||
|
||
|
||
class WeverseExtractor(Extractor): | ||
"""Base class for weverse extractors""" | ||
category = "weverse" | ||
cookies_domain = ".weverse.io" | ||
cookies_names = ("we2_access_token",) | ||
root = "https://weverse.io" | ||
request_interval = (1.0, 2.0) | ||
|
||
def _init(self): | ||
self.login() | ||
if self.access_token: | ||
self.api = WeverseAPI(self, self.access_token) | ||
|
||
def login(self): | ||
if self.config("access-token"): | ||
self.access_token = self.config("access-token") | ||
return | ||
|
||
if not self.cookies_check(self.cookies_names): | ||
username, password = self._get_auth_info() | ||
if username: | ||
self.cookies_update( | ||
self._login_impl(username, password), self.cookies_domain) | ||
|
||
self.access_token = self.cookies.get(self.cookies_names[0]) | ||
|
||
@cache(maxage=365*24*3600, keyarg=1) | ||
def _login_impl(self, username, password): | ||
endpoint = ("https://accountapi.weverse.io" | ||
"/web/api/v2/auth/token/by-credentials") | ||
data = {"email": username, "password": password} | ||
headers = { | ||
"x-acc-app-secret": "5419526f1c624b38b10787e5c10b2a7a", | ||
"x-acc-app-version": "2.2.20-alpha.0", | ||
"x-acc-language": "en", | ||
"x-acc-service-id": "weverse", | ||
"x-acc-trace-id": str(uuid.uuid64()) | ||
} | ||
res = self.request( | ||
endpoint, method="POST", data=data, headers=headers).json() | ||
if "accessToken" not in res: | ||
raise exception.AuthenticationError() | ||
return {self.cookies_names[0]: res["accessToken"]} | ||
|
||
|
||
class WeversePostExtractor(WeverseExtractor): | ||
"""Extractor for weverse posts""" | ||
subcategory = "post" | ||
directory_fmt = ("{category}", "{community[communityName]}", | ||
"{author_name}", "{postId}") | ||
filename_fmt = "{category}_{filename}.{extension}" | ||
archive_fmt = "{postId}" | ||
pattern = COMMUNITY_PATTERN + r"/(?:artist|fanpost)/" + POST_ID_PATTERN | ||
example = "https://weverse.io/abcdef/artist/1-123456789" | ||
|
||
def __init__(self, match): | ||
WeverseExtractor.__init__(self, match) | ||
self.community_keyword = match.group(1) | ||
self.post_id = match.group(2) | ||
|
||
def items(self): | ||
data = self.api.post(self.post_id) | ||
|
||
if "publishedAt" in data: | ||
data["date"] = text.parse_timestamp(data["publishedAt"] / 1000) | ||
|
||
extension = data["extension"] | ||
attachments = data["attachment"] | ||
|
||
# skip posts with no media | ||
if extension in [None, {}] and attachments in [None, {}]: | ||
return | ||
|
||
del data["extension"] | ||
del data["attachment"] | ||
|
||
author = data["author"] | ||
data["author_name"] = author.get("artistOfficialProfile", {}).get( | ||
"officialName") or author["profileName"] | ||
|
||
yield Message.Directory, data | ||
for type in attachments: | ||
if type == "photo": | ||
for photo in attachments[type].values(): | ||
url = photo["url"] | ||
data["filename"] = photo["photoId"] | ||
data["extension"] = text.ext_from_url(url) | ||
yield Message.Url, url, data | ||
if type == "video": | ||
for video in attachments[type].values(): | ||
best_video = self.api.video(video["videoId"]) | ||
url = best_video["url"] | ||
data["filename"] = video["videoId"] | ||
data["extension"] = text.ext_from_url(url) | ||
yield Message.Url, url, data | ||
|
||
|
||
class WeverseAPI(): | ||
"""Interface for the Weverse API""" | ||
|
||
BASE_API_URL = "https://global.apis.naver.com" | ||
|
||
def __init__(self, extractor, access_token): | ||
self.extractor = extractor | ||
self.headers = {"Authorization": "Bearer " + access_token} | ||
|
||
def _endpoint_with_params(self, endpoint, params): | ||
params_delimiter = "?" | ||
if "?" in endpoint: | ||
params_delimiter = "&" | ||
return endpoint + params_delimiter + urllib.parse.urlencode( | ||
query=params) | ||
|
||
def _message_digest(self, endpoint, params, timestamp): | ||
key = "1b9cb6378d959b45714bec49971ade22e6e24e42".encode() | ||
url = self._endpoint_with_params(endpoint, params) | ||
message = "{}{}".format(url[:255], timestamp).encode() | ||
hash = hmac.new(key, message, hashlib.sha1).digest() | ||
return binascii.b2a_base64(hash).rstrip().decode() | ||
|
||
def post(self, post_id): | ||
endpoint = "/post/v1.0/post-{}".format(post_id) | ||
params = {"fieldSet": "postV1"} | ||
return self._call(endpoint, params) | ||
|
||
def video(self, video_id): | ||
endpoint = "/cvideo/v1.0/cvideo-{}/downloadInfo".format(video_id) | ||
videos = self._call(endpoint)["downloadInfo"] | ||
best_video = max(videos, key=lambda video: video["resolution"]) | ||
return best_video | ||
|
||
def _call(self, endpoint, params=None): | ||
if params is None: | ||
params = {} | ||
params = util.combine_dict({ | ||
"appId": "be4d79eb8fc7bd008ee82c8ec4ff6fd4", | ||
"language": "en", | ||
"platform": "WEB", | ||
"wpf": "pc" | ||
}, params) | ||
timestamp = int(time.time() * 1000) | ||
message_digest = self._message_digest(endpoint, params, timestamp) | ||
params = util.combine_dict(params, { | ||
"wmsgpad": timestamp, | ||
"wmd": message_digest | ||
}) | ||
while True: | ||
try: | ||
return self.extractor.request( | ||
self.BASE_API_URL + "/weverse/wevweb" + endpoint, | ||
params=params, headers=self.headers, | ||
).json() | ||
except exception.HttpError as exc: | ||
self.extractor.log.warning(exc) | ||
return |