Skip to content

Commit 27e2ba3

Browse files
committed
fix: an error on empty paragraph surrounded by non-paragraphs
1 parent b8c3561 commit 27e2ba3

File tree

3 files changed

+14
-14
lines changed

3 files changed

+14
-14
lines changed

docs/users/changelog.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ Note that there is currently no guarantee for a stable Markdown formatting style
1212
- Incorrect line wrap on lines right after a hard break.
1313
Thank you, [MDW](https://github.com/mdeweerd), for the issue.
1414
- Adding an extra leading space to paragraphs that start with space in line wrap modes.
15+
- An error on empty paragraph (Unicode space only) surrounded by non-paragraph elements.
16+
Thank you, [Nico Schlömer](https://github.com/nschloe), for the issue.
1517
- Added
1618
- Plugin interface: `mdformat.plugins.ParserExtensionInterface.add_cli_argument_group`.
1719
With this plugins can now read CLI arguments merged with values from `.mdformat.toml`.

src/mdformat/_util.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@
1313

1414
NULL_CTX = nullcontext()
1515
EMPTY_MAP: MappingProxyType = MappingProxyType({})
16+
1617
RE_NEWLINES = re.compile(r"\r\n|\r|\n")
18+
RE_HTML_START_SPACE_PREFIX = re.compile(r" (<[a-zA-Z][-a-zA-Z0-9]*>)")
19+
RE_HTML_END_SPACE_SUFFIX = re.compile(r"(</[a-zA-Z][-a-zA-Z0-9]*>) ")
1720

1821

1922
def build_mdit(
@@ -62,13 +65,6 @@ def is_md_equal(
6265
for key, text in [("md1", md1), ("md2", md2)]:
6366
html = mdit.render(text)
6467

65-
# The HTML can start with whitespace if Markdown starts with raw HTML
66-
# preceded by whitespace. This whitespace should be safe to lstrip.
67-
# Also, the trailing newline we add at the end of a document that ends
68-
# in a raw html block not followed by a newline, seems to propagate to
69-
# an HTML rendering. This newline should be safe to rstrip.
70-
html = html.strip()
71-
7268
# Remove codeblocks because code formatter plugins do arbitrary changes.
7369
for codeclass in codeformatters:
7470
html = re.sub(
@@ -85,17 +81,19 @@ def is_md_equal(
8581
html = html.replace("<p> ", "<p>")
8682
html = html.replace(" </p>", "</p>")
8783

88-
# Also strip whitespace leading/trailing the <p> elements so that we can
89-
# safely remove empty paragraphs below without introducing extra whitespace.
90-
html = html.replace(" <p>", "<p>")
91-
html = html.replace("</p> ", "</p>")
84+
# Also remove whitespace preceding opening tags, and trailing
85+
# closing tags, so that we can safely remove empty paragraphs
86+
# below without introducing extra whitespace.
87+
html = RE_HTML_END_SPACE_SUFFIX.sub(r"\g<1>", html)
88+
html = RE_HTML_START_SPACE_PREFIX.sub(r"\g<1>", html)
9289

9390
# empty p elements should be ignored by user agents
9491
# (https://www.w3.org/TR/REC-html40/struct/text.html#edef-P)
9592
html = html.replace("<p></p>", "")
9693

97-
# If it's nothing but whitespace, it's equal
98-
html = re.sub(r"^\s+$", "", html)
94+
# Leading and trailing whitespace should be safe to ignore. This
95+
# also makes any documents that are whitespace-only equal.
96+
html = html.strip()
9997

10098
html_texts[key] = html
10199

tests/test_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def test_fmt_string():
5454
pytest.param("a\n\n\xa0\n\nb"), # lone NBSP between two paragraphs
5555
pytest.param("\xa0\n\n# heading"), # lone NBSP followed by a heading
5656
pytest.param(
57-
"```\na\n```\n\u2003\n# A\n", marks=pytest.mark.xfail()
57+
"```\na\n```\n\u2003\n# A\n"
5858
), # em space surrounded by code and header
5959
],
6060
)

0 commit comments

Comments
 (0)