Skip to content

Commit 688e421

Browse files
authored
Refactor parsing process (#59)
* Add initial implementation * add some more comments * Update parser.py * add comments * update mistletoe/myst_parser versions * Update basic.ipynb * fix pre-commit * try adding widget state after metadata
1 parent 2207e30 commit 688e421

3 files changed

Lines changed: 134 additions & 64 deletions

File tree

docs/use/basic.ipynb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
"![](../_static/logo.png)\n",
1818
"\n",
1919
"because MyST-NB is using the MyST-markdown parser, you can include rich markdown with Sphinx\n",
20-
"in your notebook. For example, here's a note block:\n",
20+
"in your notebook.[^note] For example, here's a note block:\n",
21+
"\n",
22+
"[^note]: Even footnotes!\n",
2123
"\n",
2224
"`````{note}\n",
2325
"Wow, a note! It was generated with this code:\n",
@@ -256,7 +258,7 @@
256258
"name": "python",
257259
"nbconvert_exporter": "python",
258260
"pygments_lexer": "ipython3",
259-
"version": "3.7.3"
261+
"version": "3.7.6-final"
260262
},
261263
"widgets": {
262264
"application/vnd.jupyter.widget-state+json": {

myst_nb/parser.py

Lines changed: 129 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,23 @@
11
from docutils import nodes
22
import nbformat as nbf
33
from pathlib import Path
4+
from sphinx.util import logging
45

5-
from myst_parser.docutils_renderer import SphinxRenderer, dict_to_docinfo
6-
from myst_parser.block_tokens import Document
6+
from myst_parser.docutils_renderer import SphinxRenderer
77
from myst_parser.sphinx_parser import MystParser
8+
9+
from mistletoe.base_elements import BlockToken, Position, SourceLines
10+
from mistletoe.parse_context import ParseContext, get_parse_context, set_parse_context
11+
from mistletoe.block_tokenizer import tokenize_block
12+
from mistletoe.block_tokens import Document, FrontMatter
13+
814
from jupyter_sphinx.ast import get_widgets, JupyterWidgetStateNode
915
from jupyter_sphinx.execute import contains_widgets, write_notebook_output
1016

1117

18+
SPHINX_LOGGER = logging.getLogger(__name__)
19+
20+
1221
class NotebookParser(MystParser):
1322
"""Docutils parser for IPynb + CommonMark + Math + Tables + RST Extensions """
1423

@@ -21,6 +30,92 @@ class NotebookParser(MystParser):
2130
config_section_dependencies = ("parsers",)
2231

2332
def parse(self, inputstring, document):
33+
34+
# de-serialize the notebook
35+
ntbk = nbf.reads(inputstring, nbf.NO_CONVERT)
36+
37+
# This is a contaner for top level markdown tokens
38+
# which we will add to as we walk the document
39+
mkdown_tokens = [] # type: list[BlockToken]
40+
41+
# First we ensure that we are using a 'clean' global context
42+
# for parsing, which is setup with the MyST parsing tokens
43+
# the logger will report on duplicate link/footnote definitions, etc
44+
parse_context = ParseContext(
45+
find_blocks=SphinxNBRenderer.default_block_tokens,
46+
find_spans=SphinxNBRenderer.default_span_tokens,
47+
logger=SPHINX_LOGGER,
48+
)
49+
set_parse_context(parse_context)
50+
51+
for cell_index, nb_cell in enumerate(ntbk.cells):
52+
53+
# Skip empty cells
54+
if len(nb_cell["source"].strip()) == 0:
55+
continue
56+
57+
# skip cells tagged for removal
58+
tags = nb_cell.metadata.get("tags", [])
59+
if "remove_cell" in tags:
60+
continue
61+
62+
if nb_cell["cell_type"] == "markdown":
63+
64+
# we add the document path and cell index
65+
# to the source lines, so they can be included in the error logging
66+
# NOTE: currently the logic to report metadata is not written
67+
# into SphinxRenderer, but this will be introduced in a later update
68+
lines = SourceLines(
69+
nb_cell["source"],
70+
uri=document["source"],
71+
metadata={"cell_index": cell_index},
72+
standardize_ends=True,
73+
)
74+
75+
# parse the source markdown text;
76+
# at this point span/inline level tokens are not yet processed, but
77+
# link/footnote definitions are collected/stored in the global context
78+
mkdown_tokens.extend(tokenize_block(lines))
79+
80+
# TODO for md cells, think of a way to implement the previous
81+
# `if "hide_input" in tags:` logic
82+
83+
elif nb_cell["cell_type"] == "code":
84+
# here we do nothing but store the cell as a custom token
85+
mkdown_tokens.append(
86+
NbCodeCell(
87+
cell=nb_cell,
88+
position=Position(
89+
line_start=0,
90+
uri=document["source"],
91+
data={"cell_index": cell_index},
92+
),
93+
)
94+
)
95+
96+
# Now all definitions have been gathered, we walk the tokens and
97+
# process any inline text
98+
for token in mkdown_tokens + list(
99+
get_parse_context().foot_definitions.values()
100+
):
101+
token.expand_spans()
102+
103+
# If there are widgets, this will embed the state of all widgets in a script
104+
if contains_widgets(ntbk):
105+
mkdown_tokens.insert(0, JupyterWidgetState(state=get_widgets(ntbk)))
106+
107+
# create the front matter token
108+
front_matter = FrontMatter(content=ntbk.metadata, position=None)
109+
110+
# Finally, we create the top-level markdown document
111+
markdown_doc = Document(
112+
children=mkdown_tokens,
113+
front_matter=front_matter,
114+
link_definitions=parse_context.link_definitions,
115+
footnotes=parse_context.foot_definitions,
116+
footref_order=parse_context.foot_references,
117+
)
118+
24119
self.reporter = document.reporter
25120
self.config = self.default_config.copy()
26121
try:
@@ -29,8 +124,6 @@ def parse(self, inputstring, document):
29124
except AttributeError:
30125
pass
31126

32-
ntbk = nbf.reads(inputstring, nbf.NO_CONVERT)
33-
34127
# Write the notebook's output to disk
35128
path_doc = Path(document.settings.env.docname)
36129
doc_relpath = path_doc.parent
@@ -39,69 +132,44 @@ def parse(self, inputstring, document):
39132
output_dir = build_dir.joinpath("jupyter_execute", doc_relpath)
40133
write_notebook_output(ntbk, str(output_dir), doc_filename)
41134

42-
# Parse notebook-level metadata as front-matter
43-
# For now, only keep key/val pairs that point to int/float/string
44-
metadata = ntbk.metadata
45-
docinfo = dict_to_docinfo(metadata)
46-
document += docinfo
135+
# render the Markdown AST to docutils AST
136+
renderer = SphinxNBRenderer(
137+
parse_context=parse_context, document=document, current_node=None
138+
)
139+
renderer.render(markdown_doc)
140+
141+
142+
class JupyterWidgetState(BlockToken):
143+
def __init__(self, state):
144+
self.state = state
145+
146+
147+
class NbCodeCell(BlockToken):
148+
def __init__(self, cell, position):
149+
self.cell = cell
150+
self.position = position
151+
152+
153+
class SphinxNBRenderer(SphinxRenderer):
154+
def __init__(self, *args, **kwargs):
155+
super().__init__(*args, **kwargs)
156+
self.render_map["NbCodeCell"] = self.render_nb_code_cell
157+
self.render_map["JupyterWidgetState"] = self.render_jupyter_widget_state
158+
159+
def render_jupyter_widget_state(self, token):
160+
self.document.append(JupyterWidgetStateNode(state=token.state))
161+
162+
def render_nb_code_cell(self, token: NbCodeCell):
163+
"""Render a Jupyter notebook cell."""
164+
cell = token.cell
165+
tags = cell.metadata.get("tags", [])
47166

48-
# If there are widgets, this will embed the state of all widgets in a script
49-
if contains_widgets(ntbk):
50-
document.append(JupyterWidgetStateNode(state=get_widgets(ntbk)))
51-
renderer = SphinxRenderer(document=document, current_node=None)
52-
with renderer:
53-
# Loop through cells and render them
54-
for ii, cell in enumerate(ntbk.cells):
55-
# Skip empty cells
56-
if len(cell["source"]) == 0:
57-
continue
58-
try:
59-
_render_cell(cell, renderer)
60-
except Exception as exc:
61-
source = cell["source"][:50]
62-
if len(cell["source"]) > 50:
63-
source = source + "..."
64-
msg_node = self.reporter.error(
65-
(
66-
f"\nError parsing notebook cell #{ii+1}: {exc}\n"
67-
f"Type: {cell['cell_type']}\n"
68-
f"Source:\n{source}\n\n"
69-
)
70-
)
71-
msg_node += nodes.literal_block(cell["source"], cell["source"])
72-
renderer.current_node += [msg_node]
73-
continue
74-
75-
76-
def _render_cell(cell, renderer):
77-
"""Render a cell with a SphinxRenderer instance.
78-
79-
Returns nothing because the renderer updates itself.
80-
"""
81-
tags = cell.metadata.get("tags", [])
82-
if "remove_cell" in tags:
83-
return
84-
85-
# If a markdown cell, simply call the Myst parser and append children
86-
if cell["cell_type"] == "markdown":
87-
document = Document.read(cell["source"], front_matter=False)
88-
# Check for tag-specific behavior because markdown isn't wrapped in a cell
89-
if "hide_input" in tags:
90-
container = nodes.container()
91-
container["classes"].extend(["toggle"])
92-
with renderer.current_node_context(container, append=True):
93-
renderer.render(document)
94-
else:
95-
renderer.render(document)
96-
97-
# If a code cell, convert the code + outputs
98-
elif cell["cell_type"] == "code":
99167
# Cell container will wrap whatever is in the cell
100168
classes = ["cell"]
101169
for tag in tags:
102170
classes.append(f"tag_{tag}")
103171
sphinx_cell = CellNode(classes=classes, cell_type=cell["cell_type"])
104-
renderer.current_node += sphinx_cell
172+
self.current_node += sphinx_cell
105173
if "remove_input" not in tags:
106174
cell_input = CellInputNode(classes=["cell_input"])
107175
sphinx_cell += cell_input

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
python_requires=">=3.6",
4242
package_data={"myst_nb": ["_static/mystnb.css"]},
4343
install_requires=[
44-
"myst-parser~=0.5",
44+
"myst-parser~=0.6.0",
4545
"docutils>=0.15",
4646
"sphinx>=2,<3",
4747
"jupyter_sphinx==0.2.4a1",

0 commit comments

Comments
 (0)