Skip to content

Commit

Permalink
refactor(parsing): simplify emojify() and the map used by it
Browse files Browse the repository at this point in the history
Signed-off-by: Rongrong <[email protected]>
  • Loading branch information
Rongronggg9 committed Jun 6, 2024
1 parent d8dde05 commit 84ce41f
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 111 deletions.
1 change: 1 addition & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Enhancements

- **Print Telegram user info of bot**: Print the bot's Telegram user info when the bot is started. This is to help bot managers to find the bot's username and user ID when deploying the bot.
- **Minor refactor**: Some internal functions have been refactored to improve performance and maintainability.

### Bug fixes

Expand Down
1 change: 1 addition & 0 deletions docs/CHANGELOG.zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### 增强

- **打印 bot 的 Telegram 用户信息**: 在 bot 启动时打印 bot 的 Telegram 用户信息。这是为了帮助 bot 管理员在部署 bot 时找到 bot 的用户名和用户 ID。
- **次要的重构**: 重构了一些内部函数以提高性能和可维护性。

### Bug 修复

Expand Down
103 changes: 0 additions & 103 deletions src/parsing/emojify.json

This file was deleted.

11 changes: 3 additions & 8 deletions src/parsing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import Optional, Sequence, Union, Final, Iterable

import re
import json
import string
from contextlib import suppress
from bs4.element import Tag
Expand All @@ -11,9 +10,9 @@
from telethon.tl.types import TypeMessageEntity
from functools import partial
from urllib.parse import urljoin
from os import path
from itertools import chain

from .weibo_emojify_map import EMOJIFY_MAP
from .. import log
from ..aio_helper import run_async
from ..compat import parsing_utils_html_validator_minify, INT64_T_MAX
Expand Down Expand Up @@ -83,10 +82,6 @@
sorted(set(SPACES + INVALID_CHARACTERS + string.punctuation + string.whitespace))
)

# load emoji dict
with open(path.join(path.dirname(__file__), 'emojify.json'), 'r', encoding='utf-8') as emojify_json:
EMOJI_DICT = json.load(emojify_json)

replaceInvalidCharacter = partial(re.compile(rf'[{INVALID_CHARACTERS}]').sub, ' ') # use initially
replaceSpecialSpace = partial(re.compile(rf'[{SPACES[1:]}]').sub, ' ') # use carefully
stripBr = partial(re.compile(r'\s*<br\s*/?\s*>\s*').sub, '<br>')
Expand Down Expand Up @@ -121,9 +116,9 @@ def resolve_relative_link(base: Optional[str], url: Optional[str]) -> str:

def emojify(xml):
xml = emojize(xml, language='alias', variant='emoji_type')
for emoticon, emoji in EMOJI_DICT.items():
for emoticon_phrase, emoji in EMOJIFY_MAP.items():
# emojify weibo emoticons, get all here: https://api.weibo.com/2/emotions.json?source=1362404091
xml = xml.replace(f'[{emoticon}]', emoji)
xml = xml.replace(emoticon_phrase, emoji)
return xml


Expand Down
107 changes: 107 additions & 0 deletions src/parsing/weibo_emojify_map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from __future__ import annotations

from typing import Final

EMOJIFY_MAP: Final[dict] = {
"[微笑]": "🙂",
"[可爱]": "😊",
"[太开心]": "😆",
"[鼓掌]": "👏",
"[嘻嘻]": "😁",
"[哈哈]": "😄",
"[笑cry]": "😂",
"[挤眼]": "😜",
"[馋嘴]": "😋",
"[黑线]": "😑",
"[汗]": "😓",
"[哼]": "😠",
"[怒]": "😡",
"[可怜]": "🥺",
"[失望]": "😞",
"[悲伤]": "😢",
"[泪]": "😭",
"[害羞]": "😳",
"[爱你]": "🥰",
"[亲亲]": "😚",
"[色]": "😍",
"[阴险]": "😏",
"[偷笑]": "🤭",
"[酷]": "😎",
"[并不简单]": "🧐",
"[思考]": "🤔",
"[晕]": "😵",
"[骷髅]": "💀",
"[嘘]": "🤫",
"[闭嘴]": "🤐",
"[傻眼]": "😮",
"[吃惊]": "😲",
"[吐]": "🤮",
"[感冒]": "😷",
"[生病]": "🤒",
"[拜拜]": "👋",
"[鄙视]": "🖕",
"[白眼]": "🙄",
"[抓狂]": "😖",
"[怒骂]": "🤬",
"[钱]": "🤑",
"[哈欠]": "🥱",
"[困]": "😴",
"[睡]": "😪",
"[吃瓜]": "🍉",
"[酸]": "🍋",
"[喵喵]": "🐱",
"[抱抱]": "🤗",
"[摊手]": "🤷",
"[跪了]": "🧎",
"[鲜花]": "🌹",
"[给你小心心]": "💝",
"[心]": "❤",
"[伤心]": "💔",
"[握手]": "🤝",
"[赞]": "👍",
"[good]": "👍",
"[弱]": "👎",
"[NO]": "✋",
"[耶]": "✌",
"[拳头]": "✊",
"[ok]": "👌",
"[加油]": "💪",
"[haha]": "🤟",
"[熊猫]": "🐼",
"[兔子]": "🐰",
"[猪头]": "🐷",
"[太阳]": "🌞",
"[月亮]": "🌙",
"[浮云]": "☁",
"[下雨]": "🌧",
"[微风]": "🍃",
"[围观]": "👨‍👧‍👦",
"[飞机]": "✈",
"[照相机]": "📷",
"[话筒]": "🎙",
"[蜡烛]": "🕯",
"[音乐]": "🎵",
"[可乐]": "🥤",
"[干杯]": "🍻",
"[蛋糕]": "🎂",
"[礼物]": "🎁",
"[钟]": "⏰",
"[肥皂]": "🧼",
"[绿丝带]": "🎗",
"[围脖]": "🧣",
"[圣诞老人]": "🎅",
"[文明遛狗]": "🐕",
"[最右]": " →_→ ",
"[五仁月饼]": "🥮",
"[弗莱见钱眼开]": "🤑",
"[棒棒糖]": "🍭",
"[炸鸡腿]": "🍗",
"[点亮平安灯]": "🏮",
"[点亮橙色]": "🖐",
"[看涨]": "📈",
"[看跌]": "📉",
"[星星]": "⭐",
"[空星]": "★",
"[全家福]": "👪",
"[圆月]": "🌕"
}

0 comments on commit 84ce41f

Please sign in to comment.