|
1 | 1 | from collections import defaultdict |
| 2 | +from copy import deepcopy |
2 | 3 | from datetime import datetime |
3 | 4 | from itertools import count |
4 | 5 | from json import dumps |
5 | 6 |
|
| 7 | +from nbdime import diff |
| 8 | + |
6 | 9 | from redspot import load |
7 | 10 |
|
8 | 11 |
|
9 | 12 | def main(path, outdir): |
10 | 13 | counters = defaultdict(count) |
11 | | - for panel, data in load(path): |
| 14 | + stream = load(path) |
| 15 | + stream = _filter_by_kind(stream) |
| 16 | + stream = _filter_by_diff(stream) |
| 17 | + stream = _filter_void(stream) |
| 18 | + for panel, data in stream: |
12 | 19 | time, uuid = panel.split("-") |
13 | 20 | ymdt = datetime.fromtimestamp(int(time) / 1000) |
14 | 21 | outd = outdir / datetime.strftime(ymdt, f"%Y-%m%d-%H%M_{uuid}") |
15 | 22 | name = f"{next(counters[panel])}.ipynb" |
16 | 23 | outd.mkdir(parents=True, exist_ok=True) |
17 | 24 | (outd / name).write_text(dumps(data)) |
18 | 25 | return 0 |
| 26 | + |
| 27 | + |
| 28 | +def _filter_void(stream): |
| 29 | + prev = defaultdict(lambda: None) |
| 30 | + void = defaultdict(lambda: True) |
| 31 | + for panel, notebook in stream: |
| 32 | + if panel in prev: |
| 33 | + void[panel] = False |
| 34 | + yield panel, prev[panel] |
| 35 | + prev[panel] = deepcopy(notebook) |
| 36 | + for panel, notebook in prev.items(): |
| 37 | + if not void[panel]: |
| 38 | + yield panel, notebook |
| 39 | + |
| 40 | + |
| 41 | +def _filter_by_diff(stream): |
| 42 | + notebooks = defaultdict(lambda: {"cells": [], "metadata": {}}) |
| 43 | + for panel, notebook in stream: |
| 44 | + delta = diff(notebooks[panel], notebook) |
| 45 | + notebooks[panel] = deepcopy(notebook) |
| 46 | + if _has_visible_change(delta): |
| 47 | + yield panel, notebook |
| 48 | + |
| 49 | + |
| 50 | +def _filter_by_kind(stream): |
| 51 | + notebooks = {} |
| 52 | + cell_changed = _cell_change_detector() |
| 53 | + for _, panel, kind, args, notebook in stream: |
| 54 | + notebooks[panel] = notebook |
| 55 | + if kind in _yield_immediately: |
| 56 | + yield panel, notebooks.pop(panel) |
| 57 | + elif kind in _yield_if_cell_changed: |
| 58 | + if cell_changed(panel, args): |
| 59 | + yield panel, notebooks.pop(panel) |
| 60 | + yield from notebooks.items() |
| 61 | + |
| 62 | + |
| 63 | +def _has_visible_change(delta): |
| 64 | + for eps in delta: |
| 65 | + op, key = eps["op"], eps["key"] |
| 66 | + if op == "patch" and key == "cells": |
| 67 | + return True |
| 68 | + return False |
| 69 | + |
| 70 | + |
| 71 | +def _cell_change_detector(): |
| 72 | + def _(panel, args): |
| 73 | + if panel in prev: |
| 74 | + new = args.get("cell") |
| 75 | + old = prev[panel] |
| 76 | + if old != new: |
| 77 | + prev[panel] = new |
| 78 | + return True |
| 79 | + return False |
| 80 | + |
| 81 | + prev = {} |
| 82 | + return _ |
| 83 | + |
| 84 | + |
| 85 | +_yield_immediately = ( |
| 86 | + "INotebookModel.changed:cellsChange", |
| 87 | + "ISharedCell.changed:executionCountChange", |
| 88 | +) |
| 89 | + |
| 90 | +_yield_if_cell_changed = ( |
| 91 | + "ISharedCell.changed:attachmentsChange", |
| 92 | + "ISharedCell.changed:outputsChange", |
| 93 | + "ISharedCell.changed:sourceChange", |
| 94 | +) |
0 commit comments