11from docutils import nodes
22import nbformat as nbf
33from pathlib import Path
4+ from sphinx .util import logging
45
5- from myst_parser .docutils_renderer import SphinxRenderer , dict_to_docinfo
6- from myst_parser .block_tokens import Document
6+ from myst_parser .docutils_renderer import SphinxRenderer
77from myst_parser .sphinx_parser import MystParser
8+
9+ from mistletoe .base_elements import BlockToken , Position , SourceLines
10+ from mistletoe .parse_context import ParseContext , get_parse_context , set_parse_context
11+ from mistletoe .block_tokenizer import tokenize_block
12+ from mistletoe .block_tokens import Document , FrontMatter
13+
814from jupyter_sphinx .ast import get_widgets , JupyterWidgetStateNode
915from jupyter_sphinx .execute import contains_widgets , write_notebook_output
1016
1117
18+ SPHINX_LOGGER = logging .getLogger (__name__ )
19+
20+
1221class NotebookParser (MystParser ):
1322 """Docutils parser for IPynb + CommonMark + Math + Tables + RST Extensions """
1423
@@ -21,6 +30,92 @@ class NotebookParser(MystParser):
2130 config_section_dependencies = ("parsers" ,)
2231
2332 def parse (self , inputstring , document ):
33+
34+ # de-serialize the notebook
35+ ntbk = nbf .reads (inputstring , nbf .NO_CONVERT )
36+
37+ # This is a contaner for top level markdown tokens
38+ # which we will add to as we walk the document
39+ mkdown_tokens = [] # type: list[BlockToken]
40+
41+ # First we ensure that we are using a 'clean' global context
42+ # for parsing, which is setup with the MyST parsing tokens
43+ # the logger will report on duplicate link/footnote definitions, etc
44+ parse_context = ParseContext (
45+ find_blocks = SphinxNBRenderer .default_block_tokens ,
46+ find_spans = SphinxNBRenderer .default_span_tokens ,
47+ logger = SPHINX_LOGGER ,
48+ )
49+ set_parse_context (parse_context )
50+
51+ for cell_index , nb_cell in enumerate (ntbk .cells ):
52+
53+ # Skip empty cells
54+ if len (nb_cell ["source" ].strip ()) == 0 :
55+ continue
56+
57+ # skip cells tagged for removal
58+ tags = nb_cell .metadata .get ("tags" , [])
59+ if "remove_cell" in tags :
60+ continue
61+
62+ if nb_cell ["cell_type" ] == "markdown" :
63+
64+ # we add the document path and cell index
65+ # to the source lines, so they can be included in the error logging
66+ # NOTE: currently the logic to report metadata is not written
67+ # into SphinxRenderer, but this will be introduced in a later update
68+ lines = SourceLines (
69+ nb_cell ["source" ],
70+ uri = document ["source" ],
71+ metadata = {"cell_index" : cell_index },
72+ standardize_ends = True ,
73+ )
74+
75+ # parse the source markdown text;
76+ # at this point span/inline level tokens are not yet processed, but
77+ # link/footnote definitions are collected/stored in the global context
78+ mkdown_tokens .extend (tokenize_block (lines ))
79+
80+ # TODO for md cells, think of a way to implement the previous
81+ # `if "hide_input" in tags:` logic
82+
83+ elif nb_cell ["cell_type" ] == "code" :
84+ # here we do nothing but store the cell as a custom token
85+ mkdown_tokens .append (
86+ NbCodeCell (
87+ cell = nb_cell ,
88+ position = Position (
89+ line_start = 0 ,
90+ uri = document ["source" ],
91+ data = {"cell_index" : cell_index },
92+ ),
93+ )
94+ )
95+
96+ # Now all definitions have been gathered, we walk the tokens and
97+ # process any inline text
98+ for token in mkdown_tokens + list (
99+ get_parse_context ().foot_definitions .values ()
100+ ):
101+ token .expand_spans ()
102+
103+ # If there are widgets, this will embed the state of all widgets in a script
104+ if contains_widgets (ntbk ):
105+ mkdown_tokens .insert (0 , JupyterWidgetState (state = get_widgets (ntbk )))
106+
107+ # create the front matter token
108+ front_matter = FrontMatter (content = ntbk .metadata , position = None )
109+
110+ # Finally, we create the top-level markdown document
111+ markdown_doc = Document (
112+ children = mkdown_tokens ,
113+ front_matter = front_matter ,
114+ link_definitions = parse_context .link_definitions ,
115+ footnotes = parse_context .foot_definitions ,
116+ footref_order = parse_context .foot_references ,
117+ )
118+
24119 self .reporter = document .reporter
25120 self .config = self .default_config .copy ()
26121 try :
@@ -29,8 +124,6 @@ def parse(self, inputstring, document):
29124 except AttributeError :
30125 pass
31126
32- ntbk = nbf .reads (inputstring , nbf .NO_CONVERT )
33-
34127 # Write the notebook's output to disk
35128 path_doc = Path (document .settings .env .docname )
36129 doc_relpath = path_doc .parent
@@ -39,69 +132,44 @@ def parse(self, inputstring, document):
39132 output_dir = build_dir .joinpath ("jupyter_execute" , doc_relpath )
40133 write_notebook_output (ntbk , str (output_dir ), doc_filename )
41134
42- # Parse notebook-level metadata as front-matter
43- # For now, only keep key/val pairs that point to int/float/string
44- metadata = ntbk .metadata
45- docinfo = dict_to_docinfo (metadata )
46- document += docinfo
135+ # render the Markdown AST to docutils AST
136+ renderer = SphinxNBRenderer (
137+ parse_context = parse_context , document = document , current_node = None
138+ )
139+ renderer .render (markdown_doc )
140+
141+
142+ class JupyterWidgetState (BlockToken ):
143+ def __init__ (self , state ):
144+ self .state = state
145+
146+
147+ class NbCodeCell (BlockToken ):
148+ def __init__ (self , cell , position ):
149+ self .cell = cell
150+ self .position = position
151+
152+
153+ class SphinxNBRenderer (SphinxRenderer ):
154+ def __init__ (self , * args , ** kwargs ):
155+ super ().__init__ (* args , ** kwargs )
156+ self .render_map ["NbCodeCell" ] = self .render_nb_code_cell
157+ self .render_map ["JupyterWidgetState" ] = self .render_jupyter_widget_state
158+
159+ def render_jupyter_widget_state (self , token ):
160+ self .document .append (JupyterWidgetStateNode (state = token .state ))
161+
162+ def render_nb_code_cell (self , token : NbCodeCell ):
163+ """Render a Jupyter notebook cell."""
164+ cell = token .cell
165+ tags = cell .metadata .get ("tags" , [])
47166
48- # If there are widgets, this will embed the state of all widgets in a script
49- if contains_widgets (ntbk ):
50- document .append (JupyterWidgetStateNode (state = get_widgets (ntbk )))
51- renderer = SphinxRenderer (document = document , current_node = None )
52- with renderer :
53- # Loop through cells and render them
54- for ii , cell in enumerate (ntbk .cells ):
55- # Skip empty cells
56- if len (cell ["source" ]) == 0 :
57- continue
58- try :
59- _render_cell (cell , renderer )
60- except Exception as exc :
61- source = cell ["source" ][:50 ]
62- if len (cell ["source" ]) > 50 :
63- source = source + "..."
64- msg_node = self .reporter .error (
65- (
66- f"\n Error parsing notebook cell #{ ii + 1 } : { exc } \n "
67- f"Type: { cell ['cell_type' ]} \n "
68- f"Source:\n { source } \n \n "
69- )
70- )
71- msg_node += nodes .literal_block (cell ["source" ], cell ["source" ])
72- renderer .current_node += [msg_node ]
73- continue
74-
75-
76- def _render_cell (cell , renderer ):
77- """Render a cell with a SphinxRenderer instance.
78-
79- Returns nothing because the renderer updates itself.
80- """
81- tags = cell .metadata .get ("tags" , [])
82- if "remove_cell" in tags :
83- return
84-
85- # If a markdown cell, simply call the Myst parser and append children
86- if cell ["cell_type" ] == "markdown" :
87- document = Document .read (cell ["source" ], front_matter = False )
88- # Check for tag-specific behavior because markdown isn't wrapped in a cell
89- if "hide_input" in tags :
90- container = nodes .container ()
91- container ["classes" ].extend (["toggle" ])
92- with renderer .current_node_context (container , append = True ):
93- renderer .render (document )
94- else :
95- renderer .render (document )
96-
97- # If a code cell, convert the code + outputs
98- elif cell ["cell_type" ] == "code" :
99167 # Cell container will wrap whatever is in the cell
100168 classes = ["cell" ]
101169 for tag in tags :
102170 classes .append (f"tag_{ tag } " )
103171 sphinx_cell = CellNode (classes = classes , cell_type = cell ["cell_type" ])
104- renderer .current_node += sphinx_cell
172+ self .current_node += sphinx_cell
105173 if "remove_input" not in tags :
106174 cell_input = CellInputNode (classes = ["cell_input" ])
107175 sphinx_cell += cell_input
0 commit comments