|
| 1 | +import nbformat as nbf |
| 2 | +from pathlib import Path |
| 3 | +import json |
| 4 | +import IPython |
| 5 | +from IPython.display import display as ipy_display |
| 6 | +from sphinx.util.docutils import SphinxDirective |
| 7 | +from sphinx.transforms import SphinxTransform |
| 8 | +from docutils import nodes |
| 9 | +from docutils.parsers.rst import directives |
| 10 | +from sphinx.util import logging |
| 11 | + |
| 12 | +from .parser import CellNode, CellInputNode, CellOutputBundleNode |
| 13 | + |
| 14 | +SPHINX_LOGGER = logging.getLogger(__name__) |
| 15 | + |
| 16 | +GLUE_PREFIX = "application/papermill.record/" |
| 17 | + |
| 18 | + |
| 19 | +def glue(name, variable, display=True): |
| 20 | + """Glue an variable into the notebook's cell metadata. |
| 21 | +
|
| 22 | + Parameters |
| 23 | + ---------- |
| 24 | + name: string |
| 25 | + A unique name for the variable. You can use this name to refer to the variable |
| 26 | + later on. |
| 27 | + variable: python object |
| 28 | + A variable in Python for which you'd like to store its display value. This is |
| 29 | + not quite the same as storing the object itself - the stored information is |
| 30 | + what is *displayed* when you print or show the object in a Jupyter Notebook. |
| 31 | + display: bool |
| 32 | + Display the object you are gluing. This is helpful in sanity-checking the |
| 33 | + state of the object at glue-time. |
| 34 | + """ |
| 35 | + mimebundle, metadata = IPython.core.formatters.format_display_data(variable) |
| 36 | + mime_prefix = "" if display else GLUE_PREFIX |
| 37 | + metadata["scrapbook"] = dict(name=name, mime_prefix=mime_prefix) |
| 38 | + ipy_display( |
| 39 | + {mime_prefix + k: v for k, v in mimebundle.items()}, raw=True, metadata=metadata |
| 40 | + ) |
| 41 | + |
| 42 | + |
| 43 | +def read_glue_cache(path): |
| 44 | + """Read a glue cache generated by Sphinx build. |
| 45 | +
|
| 46 | + Parameters |
| 47 | + ---------- |
| 48 | + path : str |
| 49 | + Path to a doctree dir, or directly to a glue cache .json file. |
| 50 | +
|
| 51 | + Returns |
| 52 | + ------- |
| 53 | + data : dictionary |
| 54 | + A dictionary containing the JSON data in your glue cache. |
| 55 | + """ |
| 56 | + path = Path(path) |
| 57 | + if path.is_dir(): |
| 58 | + # Assume our folder is doctrees and append the glue data name to it. |
| 59 | + path = path.joinpath("glue_cache.json") |
| 60 | + if not path.exists(): |
| 61 | + raise FileNotFoundError(f"A glue cache was not found at: {path}") |
| 62 | + |
| 63 | + data = json.load(path.open()) |
| 64 | + return data |
| 65 | + |
| 66 | + |
| 67 | +def find_glued_key(path_ntbk, key): |
| 68 | + """Find an output mimebundle in a notebook based on a key. |
| 69 | +
|
| 70 | + Parameters |
| 71 | + ---------- |
| 72 | + path_ntbk : path |
| 73 | + The path to a Jupyter Notebook that has variables "glued" in it. |
| 74 | + key : string |
| 75 | + The unique string to use as a look-up in `path_ntbk`. |
| 76 | +
|
| 77 | + Returns |
| 78 | + ------- |
| 79 | + mimebundle |
| 80 | + The output mimebundle associated with the given key. |
| 81 | + """ |
| 82 | + # Read in the notebook |
| 83 | + if isinstance(path_ntbk, Path): |
| 84 | + path_ntbk = str(path_ntbk) |
| 85 | + ntbk = nbf.read(path_ntbk, nbf.NO_CONVERT) |
| 86 | + outputs = [] |
| 87 | + for cell in ntbk.cells: |
| 88 | + if cell.cell_type != "code": |
| 89 | + continue |
| 90 | + |
| 91 | + # If we have outputs, look for scrapbook metadata and reference the key |
| 92 | + for output in cell["outputs"]: |
| 93 | + meta = output.get("metadata", {}) |
| 94 | + if "scrapbook" in meta: |
| 95 | + this_key = meta["scrapbook"]["name"].replace(GLUE_PREFIX, "") |
| 96 | + if key == this_key: |
| 97 | + bundle = output["data"] |
| 98 | + bundle = {this_key: val for key, val in bundle.items()} |
| 99 | + outputs.append(bundle) |
| 100 | + if len(outputs) == 0: |
| 101 | + raise KeyError(f"Did not find key {this_key} in notebook {path_ntbk}") |
| 102 | + if len(outputs) > 1: |
| 103 | + raise KeyError( |
| 104 | + f"Multiple variables found for key: {this_key}. Returning first value." |
| 105 | + ) |
| 106 | + return outputs[0] |
| 107 | + |
| 108 | + |
| 109 | +def find_all_keys(ntbk, keys=None, path=None, logger=None): |
| 110 | + """Find all `glue` keys in a notebook and return a dictionary with key: outputs.""" |
| 111 | + if isinstance(ntbk, (str, Path)): |
| 112 | + ntbk = nbf.read(str(ntbk), nbf.NO_CONVERT) |
| 113 | + |
| 114 | + if keys is None: |
| 115 | + keys = {} |
| 116 | + |
| 117 | + for cell in ntbk.cells: |
| 118 | + if cell.cell_type != "code": |
| 119 | + continue |
| 120 | + |
| 121 | + for output in cell["outputs"]: |
| 122 | + meta = output.get("metadata", {}) |
| 123 | + if "scrapbook" in meta: |
| 124 | + this_key = meta["scrapbook"]["name"] |
| 125 | + if this_key in keys: |
| 126 | + msg = f"Over-writing pre-existing glue key: `{this_key}`" |
| 127 | + if logger is None: |
| 128 | + print(msg) |
| 129 | + else: |
| 130 | + logger.warning(msg, location=(path, None)) |
| 131 | + keys[this_key] = output |
| 132 | + return keys |
| 133 | + |
| 134 | + |
| 135 | +# Role and directive for pasting |
| 136 | +class Paste(SphinxDirective): |
| 137 | + required_arguments = 1 |
| 138 | + final_argument_whitespace = True |
| 139 | + has_content = False |
| 140 | + |
| 141 | + option_spec = {"id": directives.unchanged} |
| 142 | + |
| 143 | + def run(self): |
| 144 | + # TODO: Figure out how to report cell number in the location |
| 145 | + # currently, line numbers in ipynb files are not reliable |
| 146 | + path, lineno = self.state_machine.get_source_and_line(self.lineno) |
| 147 | + # Remove line number if we have a notebook because it is unreliable |
| 148 | + if path.endswith(".ipynb"): |
| 149 | + lineno = None |
| 150 | + # Remove the suffix from path so its suffix is printed properly in logs |
| 151 | + path = str(Path(path).with_suffix("")) |
| 152 | + return [PasteNode(self.arguments[0], "directive", location=(path, lineno))] |
| 153 | + |
| 154 | + |
| 155 | +def paste_role(name, rawtext, text, lineno, inliner, options={}, content=[]): |
| 156 | + path = inliner.document.current_source |
| 157 | + # Remove line number if we have a notebook because it is unreliable |
| 158 | + if path.endswith(".ipynb"): |
| 159 | + lineno = None |
| 160 | + path = str(Path(path).with_suffix("")) |
| 161 | + return [PasteNode(text, "role", location=(path, lineno))], [] |
| 162 | + |
| 163 | + |
| 164 | +# Transform to replace nodes with outputs |
| 165 | +class PasteNode(nodes.container): |
| 166 | + """Represent a MimeBundle in the Sphinx AST, to be transformed later.""" |
| 167 | + |
| 168 | + def __init__(self, key, kind, location=None, rawsource="", *children, **attributes): |
| 169 | + self.key = key |
| 170 | + self.kind = kind |
| 171 | + self.location = location |
| 172 | + super().__init__("", **attributes) |
| 173 | + |
| 174 | + |
| 175 | +class PasteNodesToDocutils(SphinxTransform): |
| 176 | + """Use the builder context to transform a CellOutputNode into Sphinx nodes.""" |
| 177 | + |
| 178 | + default_priority = 699 # must be applied before CellOutputsToNodes |
| 179 | + |
| 180 | + def apply(self): |
| 181 | + glue_data = self.app.env.glue_data |
| 182 | + for paste_node in self.document.traverse(PasteNode): |
| 183 | + |
| 184 | + # First check if we have both key:format in the key |
| 185 | + parts = paste_node.key.rsplit(":", 1) |
| 186 | + if len(parts) == 2: |
| 187 | + key, formatting = parts |
| 188 | + else: |
| 189 | + key = parts[0] |
| 190 | + formatting = None |
| 191 | + |
| 192 | + if key not in glue_data: |
| 193 | + SPHINX_LOGGER.warning( |
| 194 | + f"Couldn't find key `{key}` in keys defined across all pages.", |
| 195 | + location=paste_node.location, |
| 196 | + ) |
| 197 | + continue |
| 198 | + |
| 199 | + # Grab the output for this key and replace `glue` specific prefix info |
| 200 | + output = glue_data.get(key).copy() |
| 201 | + output["data"] = { |
| 202 | + key.replace(GLUE_PREFIX, ""): val for key, val in output["data"].items() |
| 203 | + } |
| 204 | + |
| 205 | + # Roles will be parsed as text, with some formatting fanciness |
| 206 | + if paste_node.kind == "role": |
| 207 | + # Currently only plain text is supported |
| 208 | + if "text/plain" in output["data"]: |
| 209 | + text = output["data"]["text/plain"].strip("'") |
| 210 | + # If formatting is specified, see if we have a number of some kind |
| 211 | + if formatting: |
| 212 | + try: |
| 213 | + newtext = float(text) |
| 214 | + text = f"{newtext:>{formatting}}" |
| 215 | + except ValueError: |
| 216 | + pass |
| 217 | + out_node = nodes.inline(text, text, classes=["pasted-text"]) |
| 218 | + else: |
| 219 | + SPHINX_LOGGER.warning( |
| 220 | + f"Couldn't find compatible output format for key `{key}`", |
| 221 | + location=paste_node.location, |
| 222 | + ) |
| 223 | + # Directives will have the whole output chunk deposited and rendered later |
| 224 | + elif paste_node.kind == "directive": |
| 225 | + output_node = CellOutputBundleNode(outputs=[output]) |
| 226 | + out_node = CellNode() |
| 227 | + out_node += CellInputNode() |
| 228 | + out_node += output_node |
| 229 | + else: |
| 230 | + SPHINX_LOGGER.error( |
| 231 | + ( |
| 232 | + "`kind` must by one of `role` or `directive`," |
| 233 | + f"not `{paste_node.kind}`" |
| 234 | + ), |
| 235 | + location=paste_node.location, |
| 236 | + ) |
| 237 | + |
| 238 | + paste_node.replace_self(out_node) |
0 commit comments