Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: an error on empty paragraph surrounded by non-paragraphs #474

Merged
merged 2 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/users/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ Note that there is currently no guarantee for a stable Markdown formatting style
- Incorrect line wrap on lines right after a hard break.
Thank you, [MDW](https://github.com/mdeweerd), for the issue.
- Adding an extra leading space to paragraphs that start with space in line wrap modes.
- An error on empty paragraph (Unicode space only) surrounded by non-paragraph elements.
Thank you, [Nico Schlömer](https://github.com/nschloe), for the issue.
- Added
- Plugin interface: `mdformat.plugins.ParserExtensionInterface.add_cli_argument_group`.
With this plugins can now read CLI arguments merged with values from `.mdformat.toml`.
Expand Down
29 changes: 14 additions & 15 deletions src/mdformat/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@

NULL_CTX = nullcontext()
EMPTY_MAP: MappingProxyType = MappingProxyType({})

RE_NEWLINES = re.compile(r"\r\n|\r|\n")
RE_HTML_START_SPACE_PREFIX = re.compile(r" (<[a-zA-Z][-a-zA-Z0-9]*>)")
RE_HTML_END_SPACE_SUFFIX = re.compile(r"(</[a-zA-Z][-a-zA-Z0-9]*>) ")


def build_mdit(
Expand Down Expand Up @@ -62,17 +65,11 @@ def is_md_equal(
for key, text in [("md1", md1), ("md2", md2)]:
html = mdit.render(text)

# The HTML can start with whitespace if Markdown starts with raw HTML
# preceded by whitespace. This whitespace should be safe to lstrip.
# Also, the trailing newline we add at the end of a document that ends
# in a raw html block not followed by a newline, seems to propagate to
# an HTML rendering. This newline should be safe to rstrip.
html = html.strip()

# Remove codeblocks because code formatter plugins do arbitrary changes.
for codeclass in codeformatters:
if codeformatters:
langs_re = "|".join(re.escape(lang) for lang in codeformatters)
html = re.sub(
f'<code class="language-{codeclass}">.*</code>',
rf'<code class="language-(?:{langs_re})">.*</code>',
"",
html,
flags=re.DOTALL,
Expand All @@ -85,17 +82,19 @@ def is_md_equal(
html = html.replace("<p> ", "<p>")
html = html.replace(" </p>", "</p>")

# Also strip whitespace leading/trailing the <p> elements so that we can
# safely remove empty paragraphs below without introducing extra whitespace.
html = html.replace(" <p>", "<p>")
html = html.replace("</p> ", "</p>")
# Also remove whitespace preceding opening tags, and trailing
# closing tags, so that we can safely remove empty paragraphs
# below without introducing extra whitespace.
html = RE_HTML_END_SPACE_SUFFIX.sub(r"\g<1>", html)
html = RE_HTML_START_SPACE_PREFIX.sub(r"\g<1>", html)

# empty p elements should be ignored by user agents
# (https://www.w3.org/TR/REC-html40/struct/text.html#edef-P)
html = html.replace("<p></p>", "")

# If it's nothing but whitespace, it's equal
html = re.sub(r"^\s+$", "", html)
# Leading and trailing whitespace should be safe to ignore. This
# also makes any documents that are whitespace-only equal.
html = html.strip()

html_texts[key] = html

Expand Down
2 changes: 1 addition & 1 deletion tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_fmt_string():
pytest.param("a\n\n\xa0\n\nb"), # lone NBSP between two paragraphs
pytest.param("\xa0\n\n# heading"), # lone NBSP followed by a heading
pytest.param(
"```\na\n```\n\u2003\n# A\n", marks=pytest.mark.xfail()
"```\na\n```\n\u2003\n# A\n"
), # em space surrounded by code and header
],
)
Expand Down
23 changes: 23 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from mdformat._util import is_md_equal


def test_is_md_equal():
md1 = """
paragraph

```js
console.log()
```

paragr
"""
md2 = """
paragraph

```js
bonsole.l()g
```

paragr"""
assert not is_md_equal(md1, md2)
assert is_md_equal(md1, md2, codeformatters=("js", "go"))