Skip to content

Commit

Permalink
feat(parser): blockquote
Browse files Browse the repository at this point in the history
Signed-off-by: Rongrong <[email protected]>
  • Loading branch information
Rongronggg9 committed Nov 4, 2023
1 parent b5680cb commit 2a460f6
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 7 deletions.
6 changes: 5 additions & 1 deletion src/parsing/html_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from url_normalize import url_normalize

__all__ = ["HtmlTree", "Text", "Link", "Bold", "Italic", "Underline", "Strike", "Code", "Pre", "Br", "Hr",
__all__ = ["HtmlTree", "Text", "Link", "Bold", "Italic", "Underline", "Strike", "Blockquote", "Code", "Pre", "Br", "Hr",
"ListItem", "OrderedList", "UnorderedList", "TypeTextContent"]

TypeTextContent = Union["Text", str, list["Text"]]
Expand Down Expand Up @@ -224,6 +224,10 @@ class Strike(TagWithoutParam):
tag = 's'


class Blockquote(TagWithoutParam):
tag = 'blockquote'


class Code(TagWithOptionalParam):
tag = 'code'
attr = 'class'
Expand Down
9 changes: 3 additions & 6 deletions src/parsing/html_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,9 @@ async def _parse_item(self, soup: Union[PageElement, BeautifulSoup, Tag, Navigab

if tag == 'blockquote':
quote = await self._parse_item(soup.children)
if not quote:
return None
quote.strip()
if quote.is_empty():
return None
return Text([Hr(), quote, Hr()])
if quote:
return Blockquote(quote)
return None

if tag == 'q':
quote = await self._parse_item(soup.children)
Expand Down

0 comments on commit 2a460f6

Please sign in to comment.