1313
1414NULL_CTX = nullcontext ()
1515EMPTY_MAP : MappingProxyType = MappingProxyType ({})
16+
1617RE_NEWLINES = re .compile (r"\r\n|\r|\n" )
18+ RE_HTML_START_SPACE_PREFIX = re .compile (r" (<[a-zA-Z][-a-zA-Z0-9]*>)" )
19+ RE_HTML_END_SPACE_SUFFIX = re .compile (r"(</[a-zA-Z][-a-zA-Z0-9]*>) " )
1720
1821
1922def build_mdit (
@@ -62,13 +65,6 @@ def is_md_equal(
6265 for key , text in [("md1" , md1 ), ("md2" , md2 )]:
6366 html = mdit .render (text )
6467
65- # The HTML can start with whitespace if Markdown starts with raw HTML
66- # preceded by whitespace. This whitespace should be safe to lstrip.
67- # Also, the trailing newline we add at the end of a document that ends
68- # in a raw html block not followed by a newline, seems to propagate to
69- # an HTML rendering. This newline should be safe to rstrip.
70- html = html .strip ()
71-
7268 # Remove codeblocks because code formatter plugins do arbitrary changes.
7369 for codeclass in codeformatters :
7470 html = re .sub (
@@ -85,17 +81,19 @@ def is_md_equal(
8581 html = html .replace ("<p> " , "<p>" )
8682 html = html .replace (" </p>" , "</p>" )
8783
88- # Also strip whitespace leading/trailing the <p> elements so that we can
89- # safely remove empty paragraphs below without introducing extra whitespace.
90- html = html .replace (" <p>" , "<p>" )
91- html = html .replace ("</p> " , "</p>" )
84+ # Also remove whitespace preceding opening tags, and trailing
85+ # closing tags, so that we can safely remove empty paragraphs
86+ # below without introducing extra whitespace.
87+ html = RE_HTML_END_SPACE_SUFFIX .sub (r"\g<1>" , html )
88+ html = RE_HTML_START_SPACE_PREFIX .sub (r"\g<1>" , html )
9289
9390 # empty p elements should be ignored by user agents
9491 # (https://www.w3.org/TR/REC-html40/struct/text.html#edef-P)
9592 html = html .replace ("<p></p>" , "" )
9693
97- # If it's nothing but whitespace, it's equal
98- html = re .sub (r"^\s+$" , "" , html )
94+ # Leading and trailing whitespace should be safe to ignore. This
95+ # also makes any documents that are whitespace-only equal.
96+ html = html .strip ()
9997
10098 html_texts [key ] = html
10199
0 commit comments