Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 26 additions & 14 deletions tools/markup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,21 @@
_RE_LIQUID_INCLUDE: Pattern[str] = re.compile(r"( \{% include.+%\})")


_PRE_PARSE_REPLACEMENTS: tuple[str, str] = (
"https://en.wikipedia.org/wiki/Uniform_distribution_(continuous)",
"https://en.wikipedia.org/wiki/Continuous_uniform_distribution",
)
"""
Replacement to apply *prior* to parsing as markdown.

**HACK**: Closing parenthesis messes up markdown parsing, replace with resolved redirect wikipedia URL.

TODO
----
Remove if this gets fixed upstream, via https://github.com/vega/vega/pull/3996
"""


class RSTRenderer(_RSTRenderer):
def __init__(self) -> None:
super().__init__()
Expand Down Expand Up @@ -68,8 +83,11 @@ def __init__(
super().__init__(renderer, block, inline, plugins)

def __call__(self, s: str) -> str:
s = super().__call__(s) # pyright: ignore[reportAssignmentType]
return unescape(s).replace(r"\ ,", ",").replace(r"\ ", " ")
r = super().__call__(s)
if isinstance(r, str):
return unescape(r).replace(r"\ ,", ",").replace(r"\ ", " ")
msg = f"Expected `str` but got {type(r).__name__!r}"
raise TypeError(msg)

def render_tokens(self, tokens: Iterable[Token], /) -> str:
"""
Expand Down Expand Up @@ -129,32 +147,26 @@ def process_text(self, text: str, state: InlineState) -> None:
Removes `liquid`_ templating markup.

.. _liquid:
https://shopify.github.io/liquid/
https://shopify.github.io/liquid/
"""
state.append_token({"type": "text", "raw": _RE_LIQUID_INCLUDE.sub(r"", text)})


def read_ast_tokens(
source: Url | Path, /, replacements: list[tuple[str, str]] | None = None
) -> list[Token]:
def read_ast_tokens(source: Url | Path, /) -> list[Token]:
"""
Read from ``source``, drop ``BlockState``.

Factored out to provide accurate typing.
"""
markdown = _Markdown(renderer=None, inline=InlineParser())
if isinstance(source, Path):
token_text = source.read_text()
text = source.read_text()
else:
with request.urlopen(source) as response:
token_text = response.read().decode("utf-8")

# Apply replacements
if replacements:
for replacement in replacements:
token_text = token_text.replace(replacement[0], replacement[1])
text = response.read().decode("utf-8")

tokens = markdown.parse(token_text, markdown.block.state_cls())
text = text.replace(*_PRE_PARSE_REPLACEMENTS)
tokens: Any = markdown.parse(text)
return tokens[0]


Expand Down
21 changes: 4 additions & 17 deletions tools/vega_expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,6 @@
EXPRESSIONS_DOCS_URL: LiteralString = f"{VEGA_DOCS_URL}expressions/"
EXPRESSIONS_URL_TEMPLATE = "https://raw.githubusercontent.com/vega/vega/refs/tags/{version}/docs/docs/expressions.md"

# Replacements to apply prior to parsing as markdown
PRE_PARSE_REPLACEMENTS = [
# Closing paren messes up markdown parsing, replace with equivalent wikipedia URL
(
"https://en.wikipedia.org/wiki/Uniform_distribution_(continuous)",
"https://en.wikipedia.org/wiki/Continuous_uniform_distribution",
)
]

# NOTE: Regex patterns
FUNCTION_DEF_LINE: Pattern[str] = re.compile(
Expand Down Expand Up @@ -939,15 +931,13 @@ def italics_to_backticks(s: str, names: Iterable[str], /) -> str:
return re.sub(pattern, r"\g<not_link_start>``\g<name>``\g<not_link_end>", s)


def parse_expressions(
source: Url | Path, /, replacements: list[tuple[str, str]] | None = None
) -> Iterator[VegaExprDef]:
def parse_expressions(source: Url | Path, /) -> Iterator[VegaExprDef]:
"""
Download remote or read local `.md` resource and eagerly parse signatures of relevant definitions.

Yields with docs to ensure each can use all remapped names, regardless of the order they appear.
"""
tokens = read_ast_tokens(source, replacements=replacements)
tokens = read_ast_tokens(source)
expr_defs = tuple(VegaExprDef.from_tokens(tokens))
VegaExprDef.remap_title.refresh()
for expr_def in expr_defs:
Expand All @@ -971,7 +961,7 @@ def write_expr_module(version: str, output: Path, *, header: str) -> None:
# Retrieve all of the links used in expr method docstrings,
# so we can include them in the class docstrings, so that sphinx
# will find them.
expr_defs = parse_expressions(url, replacements=PRE_PARSE_REPLACEMENTS)
expr_defs = parse_expressions(url)

links = {}
rst_renderer = RSTRenderer()
Expand Down Expand Up @@ -1001,10 +991,7 @@ def write_expr_module(version: str, output: Path, *, header: str) -> None:
)
contents = chain(
content,
(
expr_def.render()
for expr_def in parse_expressions(url, replacements=PRE_PARSE_REPLACEMENTS)
),
(expr_def.render() for expr_def in parse_expressions(url)),
[MODULE_POST],
)
print(f"Generating\n {url!s}\n ->{output!s}")
Expand Down