219 lines
7.5 KiB
Python
219 lines
7.5 KiB
Python
import json
|
|
|
|
from myst_nb.parser import *
|
|
|
|
SPHINX_LOGGER = logging.getLogger(__name__)
|
|
|
|
# Mostly taken and slightly adapted from https://github.com/executablebooks/MyST-NB/blob/v0.13.2/myst_nb/parser.py
|
|
|
|
|
|
class PatchedNotebookParser(MystParser):
|
|
"""Docutils parser for Markedly Structured Text (MyST) and Jupyter Notebooks."""
|
|
|
|
supported = ("myst-nb",)
|
|
translate_section_name = None
|
|
|
|
config_section = "myst-nb parser"
|
|
config_section_dependencies = ("parsers",)
|
|
|
|
def parse(
|
|
self, inputstring: str, document: nodes.document, renderer: str = "sphinx"
|
|
) -> None:
|
|
self.reporter = document.reporter
|
|
self.env = document.settings.env # type: BuildEnvironment
|
|
|
|
converter = get_nb_converter(
|
|
self.env.doc2path(self.env.docname, True),
|
|
self.env,
|
|
inputstring.splitlines(keepends=True),
|
|
)
|
|
|
|
if converter is None:
|
|
# Read the notebook as a text-document
|
|
super().parse(inputstring, document=document)
|
|
return
|
|
|
|
try:
|
|
ntbk = converter.func(inputstring)
|
|
except Exception as error:
|
|
SPHINX_LOGGER.error(
|
|
"MyST-NB: Conversion to notebook failed: %s",
|
|
error,
|
|
# exc_info=True,
|
|
location=(self.env.docname, 1),
|
|
)
|
|
return
|
|
|
|
# add outputs to notebook from the cache
|
|
if self.env.config["jupyter_execute_notebooks"] != "off":
|
|
ntbk = generate_notebook_outputs(
|
|
self.env, ntbk, show_traceback=self.env.config["execution_show_tb"]
|
|
)
|
|
|
|
# Parse the notebook content to a list of syntax tokens and an env
|
|
# containing global data like reference definitions
|
|
md_parser, env, tokens = patched_nb_to_tokens( # <-- patched here
|
|
ntbk,
|
|
(
|
|
self.env.myst_config # type: ignore[attr-defined]
|
|
if converter is None
|
|
else converter.config
|
|
),
|
|
self.env.config["nb_render_plugin"],
|
|
)
|
|
|
|
# Write the notebook's output to disk
|
|
path_doc = nb_output_to_disc(ntbk, document)
|
|
|
|
# Update our glue key list with new ones defined in this page
|
|
glue_domain = NbGlueDomain.from_env(self.env)
|
|
glue_domain.add_notebook(ntbk, path_doc)
|
|
|
|
# Render the Markdown tokens to docutils AST.
|
|
tokens_to_docutils(md_parser, env, tokens, document)
|
|
|
|
|
|
def patched_nb_to_tokens(
|
|
ntbk: nbf.NotebookNode, config: MdParserConfig, renderer_plugin: str
|
|
) -> Tuple[MarkdownIt, Dict[str, Any], List[Token]]:
|
|
"""Parse the notebook content to a list of syntax tokens and an env,
|
|
containing global data like reference definitions.
|
|
"""
|
|
md = default_parser(config)
|
|
# setup the markdown parser
|
|
# Note we disable front matter parsing,
|
|
# because this is taken from the actual notebook metadata
|
|
md.disable("front_matter", ignoreInvalid=True)
|
|
md.renderer = SphinxNBRenderer(md)
|
|
# make a sandbox where all the parsing global data,
|
|
# like reference definitions will be stored
|
|
env: Dict[str, Any] = {}
|
|
rules = md.core.ruler.get_active_rules()
|
|
|
|
# First only run pre-inline chains
|
|
# so we can collect all reference definitions, etc, before assessing references
|
|
def parse_block(src, start_line):
|
|
with md.reset_rules():
|
|
# enable only rules up to block
|
|
md.core.ruler.enableOnly(rules[: rules.index("inline")])
|
|
tokens = md.parse(src, env)
|
|
for token in tokens:
|
|
if token.map:
|
|
token.map = [start_line + token.map[0], start_line + token.map[1]]
|
|
for dup_ref in env.get("duplicate_refs", []):
|
|
if "fixed" not in dup_ref:
|
|
dup_ref["map"] = [
|
|
start_line + dup_ref["map"][0],
|
|
start_line + dup_ref["map"][1],
|
|
]
|
|
dup_ref["fixed"] = True
|
|
return tokens
|
|
|
|
block_tokens = []
|
|
source_map = ntbk.metadata.get("source_map", None)
|
|
|
|
# get language lexer name
|
|
langinfo = ntbk.metadata.get("language_info", {})
|
|
lexer = langinfo.get("pygments_lexer", langinfo.get("name", None))
|
|
if lexer is None:
|
|
ntbk.metadata.get("kernelspec", {}).get("language", None)
|
|
# TODO log warning if lexer is still None
|
|
|
|
for cell_index, nb_cell in enumerate(ntbk.cells):
|
|
# if the the source_map has been stored (for text-based notebooks),
|
|
# we use that do define the starting line for each cell
|
|
# otherwise, we set a pseudo base that represents the cell index
|
|
start_line = source_map[cell_index] if source_map else (cell_index + 1) * 10000
|
|
start_line += 1 # use base 1 rather than 0
|
|
|
|
# Skip empty cells
|
|
if len(nb_cell["source"].strip()) == 0:
|
|
continue
|
|
|
|
# skip cells tagged for removal
|
|
# TODO this logic should be deferred to a transform
|
|
tags = nb_cell.metadata.get("tags", [])
|
|
if ("remove_cell" in tags) or ("remove-cell" in tags):
|
|
continue
|
|
|
|
### Patched here
|
|
|
|
# Add a Token with a cell_meta directive, i.e.:
|
|
#
|
|
# ```cell_meta
|
|
# {"slideshow": {"slide_type": "slide"}}
|
|
# ```
|
|
|
|
block_tokens.append(
|
|
Token(
|
|
type="fence",
|
|
tag="code",
|
|
nesting=0,
|
|
attrs={},
|
|
map=[start_line, start_line],
|
|
level=0,
|
|
children=None,
|
|
content=json.dumps(nb_cell.metadata),
|
|
markup="```",
|
|
info="{cell_meta}",
|
|
meta={},
|
|
block=True,
|
|
hidden=False,
|
|
)
|
|
)
|
|
|
|
### / Patched here
|
|
|
|
if nb_cell["cell_type"] == "markdown":
|
|
# we add the cell index to tokens,
|
|
# so they can be included in the error logging,
|
|
block_tokens.extend(parse_block(nb_cell["source"], start_line))
|
|
|
|
elif nb_cell["cell_type"] == "code":
|
|
# here we do nothing but store the cell as a custom token
|
|
block_tokens.append(
|
|
Token(
|
|
"nb_code_cell",
|
|
"",
|
|
0,
|
|
meta={"cell": nb_cell, "lexer": lexer, "renderer": renderer_plugin},
|
|
map=[start_line, start_line],
|
|
)
|
|
)
|
|
|
|
# Now all definitions have been gathered,
|
|
# we run inline and post-inline chains, to expand the text.
|
|
# Note we assume here that these rules never require the actual source text,
|
|
# only acting on the existing tokens
|
|
state = StateCore("", md, env, block_tokens)
|
|
with md.reset_rules():
|
|
md.core.ruler.enableOnly(rules[rules.index("inline") :])
|
|
md.core.process(state)
|
|
|
|
# Add the front matter.
|
|
# Note that myst_parser serialises dict/list like keys, when rendering to
|
|
# docutils docinfo. These could be read back with `json.loads`.
|
|
state.tokens = [
|
|
Token(
|
|
"front_matter",
|
|
"",
|
|
0,
|
|
map=[0, 0],
|
|
content=({k: v for k, v in ntbk.metadata.items()}), # type: ignore[arg-type]
|
|
)
|
|
] + state.tokens
|
|
|
|
# If there are widgets, this will embed the state of all widgets in a script
|
|
if contains_widgets(ntbk):
|
|
state.tokens.append(
|
|
Token(
|
|
"jupyter_widget_state",
|
|
"",
|
|
0,
|
|
map=[0, 0],
|
|
meta={"state": get_widgets(ntbk)},
|
|
)
|
|
)
|
|
|
|
return md, env, state.tokens
|