Skip to content

Commit b46c2f1

Browse files
authored
Add glue and paste functionality (#66)
committed on behalf of @choldgraf This adds a prototype functionality for "glue and paste" with MyST-NB. It closes #4 You glue things into a notebook's metadata like this: ```python from myst_nb import glue glue("your-key", an_object) ``` And it will run IPython's display on the object, then store the mimebundle at the key you specify. When the notebooks are parsed with `MyST-NB`, it builds up a registry of all the keys across all notebooks, so that you can then refer to them in the following ways: You can paste it in markdown with a directive like this: ```` ```{paste} your-key ``` ```` Or you can add it in-line like this: ``` {paste}`your-key` ``` optionally: ``` {paste}`your-key:format-string` ``` See documentation for more details
1 parent e79a627 commit b46c2f1

14 files changed

Lines changed: 1066 additions & 28 deletions

File tree

docs/use/basic.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@
258258
"name": "python",
259259
"nbconvert_exporter": "python",
260260
"pygments_lexer": "ipython3",
261-
"version": "3.7.6-final"
261+
"version": "3.8.0"
262262
},
263263
"widgets": {
264264
"application/vnd.jupyter.widget-state+json": {

docs/use/glue.ipynb

Lines changed: 280 additions & 0 deletions
Large diffs are not rendered by default.

docs/use/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ basic
88
interactive
99
hiding
1010
markdown
11+
glue
1112
```

myst_nb/__init__.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
JupyterCell,
88
)
99

10-
# from ipywidgets import embed
1110
from pathlib import Path
11+
import json
1212

1313
from .parser import (
1414
NotebookParser,
@@ -18,6 +18,7 @@
1818
CellOutputBundleNode,
1919
)
2020
from .transform import CellOutputsToNodes
21+
from .glue import Paste, paste_role, PasteNodesToDocutils
2122

2223

2324
def static_path(app):
@@ -35,6 +36,17 @@ def update_togglebutton_classes(app, config):
3536
config.togglebutton_selector += f", {selector}"
3637

3738

39+
def init_glue_cache(app):
40+
if not hasattr(app.env, "glue_data"):
41+
app.env.glue_data = {}
42+
43+
44+
def save_glue_cache(app, env):
45+
path_cache = Path(env.doctreedir).joinpath("glue_cache.json")
46+
with path_cache.open("w") as handle:
47+
json.dump(env.glue_data, handle)
48+
49+
3850
def setup(app):
3951
"""Initialize Sphinx extension."""
4052
# Sllow parsing ipynb files
@@ -82,13 +94,18 @@ def visit_element_html(self, node):
8294
)
8395

8496
# Register our post-transform which will convert output bundles to nodes
97+
app.add_post_transform(PasteNodesToDocutils)
8598
app.add_post_transform(CellOutputsToNodes)
8699

100+
app.connect("builder-inited", init_glue_cache)
87101
app.connect("builder-inited", static_path)
88102
app.connect("config-inited", update_togglebutton_classes)
103+
app.connect("env-updated", save_glue_cache)
89104
app.add_css_file("mystnb.css")
90105
# We use `execute` here instead of `jupyter-execute`
91106
app.add_directive("execute", JupyterCell)
107+
app.add_directive("paste", Paste)
108+
app.add_role("paste", paste_role)
92109
app.setup_extension("jupyter_sphinx")
93110

94111
return {"version": __version__, "parallel_read_safe": True}

myst_nb/_static/mystnb.css

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,10 @@ div.cell_output table {
100100
div.cell_output tbody tr:hover {
101101
background: rgba(66, 165, 245, 0.2);
102102
}
103+
104+
105+
/* Inline text from `paste` operation */
106+
107+
span.pasted-text {
108+
font-weight: bold;
109+
}

myst_nb/glue.py

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
import nbformat as nbf
2+
from pathlib import Path
3+
import json
4+
import IPython
5+
from IPython.display import display as ipy_display
6+
from sphinx.util.docutils import SphinxDirective
7+
from sphinx.transforms import SphinxTransform
8+
from docutils import nodes
9+
from docutils.parsers.rst import directives
10+
from sphinx.util import logging
11+
12+
from .parser import CellNode, CellInputNode, CellOutputBundleNode
13+
14+
SPHINX_LOGGER = logging.getLogger(__name__)
15+
16+
GLUE_PREFIX = "application/papermill.record/"
17+
18+
19+
def glue(name, variable, display=True):
20+
"""Glue an variable into the notebook's cell metadata.
21+
22+
Parameters
23+
----------
24+
name: string
25+
A unique name for the variable. You can use this name to refer to the variable
26+
later on.
27+
variable: python object
28+
A variable in Python for which you'd like to store its display value. This is
29+
not quite the same as storing the object itself - the stored information is
30+
what is *displayed* when you print or show the object in a Jupyter Notebook.
31+
display: bool
32+
Display the object you are gluing. This is helpful in sanity-checking the
33+
state of the object at glue-time.
34+
"""
35+
mimebundle, metadata = IPython.core.formatters.format_display_data(variable)
36+
mime_prefix = "" if display else GLUE_PREFIX
37+
metadata["scrapbook"] = dict(name=name, mime_prefix=mime_prefix)
38+
ipy_display(
39+
{mime_prefix + k: v for k, v in mimebundle.items()}, raw=True, metadata=metadata
40+
)
41+
42+
43+
def read_glue_cache(path):
44+
"""Read a glue cache generated by Sphinx build.
45+
46+
Parameters
47+
----------
48+
path : str
49+
Path to a doctree dir, or directly to a glue cache .json file.
50+
51+
Returns
52+
-------
53+
data : dictionary
54+
A dictionary containing the JSON data in your glue cache.
55+
"""
56+
path = Path(path)
57+
if path.is_dir():
58+
# Assume our folder is doctrees and append the glue data name to it.
59+
path = path.joinpath("glue_cache.json")
60+
if not path.exists():
61+
raise FileNotFoundError(f"A glue cache was not found at: {path}")
62+
63+
data = json.load(path.open())
64+
return data
65+
66+
67+
def find_glued_key(path_ntbk, key):
68+
"""Find an output mimebundle in a notebook based on a key.
69+
70+
Parameters
71+
----------
72+
path_ntbk : path
73+
The path to a Jupyter Notebook that has variables "glued" in it.
74+
key : string
75+
The unique string to use as a look-up in `path_ntbk`.
76+
77+
Returns
78+
-------
79+
mimebundle
80+
The output mimebundle associated with the given key.
81+
"""
82+
# Read in the notebook
83+
if isinstance(path_ntbk, Path):
84+
path_ntbk = str(path_ntbk)
85+
ntbk = nbf.read(path_ntbk, nbf.NO_CONVERT)
86+
outputs = []
87+
for cell in ntbk.cells:
88+
if cell.cell_type != "code":
89+
continue
90+
91+
# If we have outputs, look for scrapbook metadata and reference the key
92+
for output in cell["outputs"]:
93+
meta = output.get("metadata", {})
94+
if "scrapbook" in meta:
95+
this_key = meta["scrapbook"]["name"].replace(GLUE_PREFIX, "")
96+
if key == this_key:
97+
bundle = output["data"]
98+
bundle = {this_key: val for key, val in bundle.items()}
99+
outputs.append(bundle)
100+
if len(outputs) == 0:
101+
raise KeyError(f"Did not find key {this_key} in notebook {path_ntbk}")
102+
if len(outputs) > 1:
103+
raise KeyError(
104+
f"Multiple variables found for key: {this_key}. Returning first value."
105+
)
106+
return outputs[0]
107+
108+
109+
def find_all_keys(ntbk, keys=None, path=None, logger=None):
110+
"""Find all `glue` keys in a notebook and return a dictionary with key: outputs."""
111+
if isinstance(ntbk, (str, Path)):
112+
ntbk = nbf.read(str(ntbk), nbf.NO_CONVERT)
113+
114+
if keys is None:
115+
keys = {}
116+
117+
for cell in ntbk.cells:
118+
if cell.cell_type != "code":
119+
continue
120+
121+
for output in cell["outputs"]:
122+
meta = output.get("metadata", {})
123+
if "scrapbook" in meta:
124+
this_key = meta["scrapbook"]["name"]
125+
if this_key in keys:
126+
msg = f"Over-writing pre-existing glue key: `{this_key}`"
127+
if logger is None:
128+
print(msg)
129+
else:
130+
logger.warning(msg, location=(path, None))
131+
keys[this_key] = output
132+
return keys
133+
134+
135+
# Role and directive for pasting
136+
class Paste(SphinxDirective):
137+
required_arguments = 1
138+
final_argument_whitespace = True
139+
has_content = False
140+
141+
option_spec = {"id": directives.unchanged}
142+
143+
def run(self):
144+
# TODO: Figure out how to report cell number in the location
145+
# currently, line numbers in ipynb files are not reliable
146+
path, lineno = self.state_machine.get_source_and_line(self.lineno)
147+
# Remove line number if we have a notebook because it is unreliable
148+
if path.endswith(".ipynb"):
149+
lineno = None
150+
# Remove the suffix from path so its suffix is printed properly in logs
151+
path = str(Path(path).with_suffix(""))
152+
return [PasteNode(self.arguments[0], "directive", location=(path, lineno))]
153+
154+
155+
def paste_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
156+
path = inliner.document.current_source
157+
# Remove line number if we have a notebook because it is unreliable
158+
if path.endswith(".ipynb"):
159+
lineno = None
160+
path = str(Path(path).with_suffix(""))
161+
return [PasteNode(text, "role", location=(path, lineno))], []
162+
163+
164+
# Transform to replace nodes with outputs
165+
class PasteNode(nodes.container):
166+
"""Represent a MimeBundle in the Sphinx AST, to be transformed later."""
167+
168+
def __init__(self, key, kind, location=None, rawsource="", *children, **attributes):
169+
self.key = key
170+
self.kind = kind
171+
self.location = location
172+
super().__init__("", **attributes)
173+
174+
175+
class PasteNodesToDocutils(SphinxTransform):
176+
"""Use the builder context to transform a CellOutputNode into Sphinx nodes."""
177+
178+
default_priority = 699 # must be applied before CellOutputsToNodes
179+
180+
def apply(self):
181+
glue_data = self.app.env.glue_data
182+
for paste_node in self.document.traverse(PasteNode):
183+
184+
# First check if we have both key:format in the key
185+
parts = paste_node.key.rsplit(":", 1)
186+
if len(parts) == 2:
187+
key, formatting = parts
188+
else:
189+
key = parts[0]
190+
formatting = None
191+
192+
if key not in glue_data:
193+
SPHINX_LOGGER.warning(
194+
f"Couldn't find key `{key}` in keys defined across all pages.",
195+
location=paste_node.location,
196+
)
197+
continue
198+
199+
# Grab the output for this key and replace `glue` specific prefix info
200+
output = glue_data.get(key).copy()
201+
output["data"] = {
202+
key.replace(GLUE_PREFIX, ""): val for key, val in output["data"].items()
203+
}
204+
205+
# Roles will be parsed as text, with some formatting fanciness
206+
if paste_node.kind == "role":
207+
# Currently only plain text is supported
208+
if "text/plain" in output["data"]:
209+
text = output["data"]["text/plain"].strip("'")
210+
# If formatting is specified, see if we have a number of some kind
211+
if formatting:
212+
try:
213+
newtext = float(text)
214+
text = f"{newtext:>{formatting}}"
215+
except ValueError:
216+
pass
217+
out_node = nodes.inline(text, text, classes=["pasted-text"])
218+
else:
219+
SPHINX_LOGGER.warning(
220+
f"Couldn't find compatible output format for key `{key}`",
221+
location=paste_node.location,
222+
)
223+
# Directives will have the whole output chunk deposited and rendered later
224+
elif paste_node.kind == "directive":
225+
output_node = CellOutputBundleNode(outputs=[output])
226+
out_node = CellNode()
227+
out_node += CellInputNode()
228+
out_node += output_node
229+
else:
230+
SPHINX_LOGGER.error(
231+
(
232+
"`kind` must by one of `role` or `directive`,"
233+
f"not `{paste_node.kind}`"
234+
),
235+
location=paste_node.location,
236+
)
237+
238+
paste_node.replace_self(out_node)

myst_nb/parser.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class NotebookParser(MystParser):
3030
config_section_dependencies = ("parsers",)
3131

3232
def parse(self, inputstring, document):
33+
from .glue import find_all_keys, GLUE_PREFIX
3334

3435
# de-serialize the notebook
3536
ntbk = nbf.reads(inputstring, nbf.NO_CONVERT)
@@ -124,14 +125,45 @@ def parse(self, inputstring, document):
124125
except AttributeError:
125126
pass
126127

127-
# Write the notebook's output to disk
128+
# Remove all the mime prefixes from "glue" step.
129+
# This way, writing properly captures the glued images
130+
replace_mime = []
131+
for cell in ntbk.cells:
132+
if hasattr(cell, "outputs"):
133+
for out in cell.outputs:
134+
if "data" in out:
135+
# Only do the mimebundle replacing for the scrapbook outputs
136+
if out.get("metadata", {}).get("scrapbook", {}).get("name"):
137+
out["data"] = {
138+
key.replace(GLUE_PREFIX, ""): val
139+
for key, val in out["data"].items()
140+
}
141+
replace_mime.append(out)
142+
143+
# Write the notebook's output to disk. This changes metadata in notebook cells
128144
path_doc = Path(document.settings.env.docname)
129145
doc_relpath = path_doc.parent
130146
doc_filename = path_doc.name
131147
build_dir = Path(document.settings.env.app.outdir).parent
132148
output_dir = build_dir.joinpath("jupyter_execute", doc_relpath)
133149
write_notebook_output(ntbk, str(output_dir), doc_filename)
134150

151+
# Now add back the mime prefixes to the right outputs so they aren't rendered
152+
# until called from the role/directive
153+
for out in replace_mime:
154+
out["data"] = {
155+
f"{GLUE_PREFIX}{key}": val for key, val in out["data"].items()
156+
}
157+
158+
# Update our glue key list with new ones defined in this page
159+
new_keys = find_all_keys(
160+
ntbk,
161+
keys=document.settings.env.glue_data,
162+
path=str(path_doc),
163+
logger=SPHINX_LOGGER,
164+
)
165+
document.settings.env.glue_data.update(new_keys)
166+
135167
# render the Markdown AST to docutils AST
136168
renderer = SphinxNBRenderer(
137169
parse_context=parse_context, document=document, current_node=None

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
"pytest-cov",
5959
"pytest-regressions",
6060
"beautifulsoup4",
61+
"ipython",
6162
],
6263
"rtd": [
6364
"sphinxcontrib-bibtex",

0 commit comments

Comments
 (0)