diff --git a/examples/Finding and Correcting Mistakes.ipynb b/examples/Finding and Correcting Mistakes.ipynb
new file mode 100644
index 0000000..d479773
--- /dev/null
+++ b/examples/Finding and Correcting Mistakes.ipynb
@@ -0,0 +1,1321 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Try in Google Colab\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " \n",
+ " Share via nbviewer\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " \n",
+ " View on GitHub\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " \n",
+ " Download notebook\n",
+ " \n",
+ " | \n",
+ "
\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1e19f5f8",
+ "metadata": {},
+ "source": [
+ "# **Finding and Correcting Mistakes with FiftyOne**\n",
+ "\n",
+ "
\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fc32a348",
+ "metadata": {},
+ "source": [
+ "## Find and Removing Duplicates"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "4a038291",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/dan/.local/lib/python3.10/site-packages/scipy/__init__.py:132: UserWarning: A NumPy version >=1.21.6 and <1.28.0 is required for this version of SciPy (detected version 1.21.3)\n",
+ " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Split 'test' already downloaded\n",
+ "Loading 'cifar10' split 'test'\n",
+ " 100% |█████████████| 10000/10000 [2.9s elapsed, 0s remaining, 3.4K samples/s] \n",
+ "Dataset 'cifar10-test' created\n"
+ ]
+ }
+ ],
+ "source": [
+ "import fiftyone as fo\n",
+ "import fiftyone.zoo as foz\n",
+ "\n",
+ "# Load the CIFAR-10 test split\n",
+ "# Downloads the dataset from the web if necessary\n",
+ "dataset = foz.load_zoo_dataset(\"cifar10\", split=\"test\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "71baae73",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/dan/.local/lib/python3.10/site-packages/scipy/__init__.py:132: UserWarning: A NumPy version >=1.21.6 and <1.28.0 is required for this version of SciPy (detected version 1.21.3)\n",
+ " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "

\n",
+ "
\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "session = fo.launch_app(dataset)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "806e5c7e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Computing embeddings...\n",
+ " 0% ||------------| 16/10000 [3.1s elapsed, 32.6m remaining, 5.1 samples/s] "
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/dan/Documents/fiftyone/fiftyone/utils/torch.py:695: RuntimeWarning: overflow encountered in exp\n",
+ " odds = np.exp(logits)\n",
+ "/home/dan/Documents/fiftyone/fiftyone/utils/torch.py:696: RuntimeWarning: invalid value encountered in true_divide\n",
+ " odds /= np.sum(odds, axis=1, keepdims=True)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " 100% |█████████████| 10000/10000 [5.7s elapsed, 0s remaining, 4.3K samples/s] \n",
+ "Computing uniqueness...\n",
+ "Uniqueness computation complete\n"
+ ]
+ }
+ ],
+ "source": [
+ "import fiftyone.brain as fob\n",
+ "\n",
+ "fob.compute_uniqueness(dataset)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "cd1e0585",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "

\n",
+ "
\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Sort in increasing order of uniqueness (least unique first)\n",
+ "dups_view = dataset.sort_by(\"uniqueness\")\n",
+ "\n",
+ "# Open view in the App\n",
+ "session.view = dups_view"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "cf4265ba",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "

\n",
+ "
\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Get currently selected images from App\n",
+ "dup_ids = session.selected\n",
+ "\n",
+ "# Mark as duplicates\n",
+ "dups_view = dataset.select(dup_ids)\n",
+ "dups_view.tag_samples(\"dups\")\n",
+ "\n",
+ "# Visualize duplicates-only in App\n",
+ "session.view = dups_view"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "5e6dba02",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "

\n",
+ "
\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from fiftyone import ViewField as F\n",
+ "clean_view = dataset.sort_by(\"uniqueness\").match_tags(\"dups\", bool=False)\n",
+ "\n",
+ "# Open view in the App\n",
+ "session.view = clean_view\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cfa84211",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import fiftyone as fo\n",
+ "\n",
+ "export_dir = \"/path/for/image-classification-dir-tree\"\n",
+ "label_field = \"ground_truth\" # for example\n",
+ "\n",
+ "# Export the dataset\n",
+ "clean_view.export(\n",
+ " export_dir=export_dir,\n",
+ " dataset_type=fo.types.ImageClassificationDirectoryTree,\n",
+ " label_field=label_field,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "45831de7",
+ "metadata": {},
+ "source": [
+ "## Finding Classification Mistakes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "8eb5b338",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Cloning into 'PyTorch_CIFAR10'...\n",
+ "remote: Enumerating objects: 82, done.\u001b[K\n",
+ "remote: Counting objects: 100% (82/82), done.\u001b[K\n",
+ "remote: Compressing objects: 100% (57/57), done.\u001b[K\n",
+ "remote: Total 82 (delta 13), reused 60 (delta 12), pack-reused 0\u001b[K\n",
+ "Receiving objects: 100% (82/82), 5.16 MiB | 14.05 MiB/s, done.\n",
+ "Resolving deltas: 100% (13/13), done.\n",
+ "Note: switching to '2a2e76a56f943b70403796387d968704e74971ae'.\n",
+ "\n",
+ "You are in 'detached HEAD' state. You can look around, make experimental\n",
+ "changes and commit them, and you can discard any commits you make in this\n",
+ "state without impacting any branches by switching back to a branch.\n",
+ "\n",
+ "If you want to create a new branch to retain commits you create, you may\n",
+ "do so (now or later) by using -c with the switch command. Example:\n",
+ "\n",
+ " git switch -c \n",
+ "\n",
+ "Or undo this operation with:\n",
+ "\n",
+ " git switch -\n",
+ "\n",
+ "Turn off this advice by setting config variable advice.detachedHead to false\n",
+ "\n",
+ "/home/dan/.local/lib/python3.10/site-packages/scipy/__init__.py:132: UserWarning: A NumPy version >=1.21.6 and <1.28.0 is required for this version of SciPy (detected version 1.21.3)\n",
+ " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n",
+ "Downloading '1dGfpeFK_QG0kV-U6QDHMX2EOGXPqaNzu' to 'PyTorch_CIFAR10/cifar10_models/state_dicts/resnet50.pt'\n",
+ " "
+ ]
+ }
+ ],
+ "source": [
+ "# Download the software\n",
+ "!git clone --depth 1 --branch v2.1 https://github.com/huyvnphan/PyTorch_CIFAR10.git\n",
+ "\n",
+ "# Download the pretrained model (90MB)\n",
+ "!eta gdrive download --public \\\n",
+ " 1dGfpeFK_QG0kV-U6QDHMX2EOGXPqaNzu \\\n",
+ " PyTorch_CIFAR10/cifar10_models/state_dicts/resnet50.pt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "8bc39b13",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Split 'test' already downloaded\n",
+ "Loading existing dataset 'cifar10-test'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use\n",
+ " 100% |███████████████████| 50/50 [5.2s elapsed, 0s remaining, 10.3 samples/s] \n"
+ ]
+ }
+ ],
+ "source": [
+ "import sys\n",
+ "\n",
+ "import numpy as np\n",
+ "import torch\n",
+ "import torchvision\n",
+ "from torch.utils.data import DataLoader\n",
+ "\n",
+ "import fiftyone.utils.torch as fout\n",
+ "\n",
+ "sys.path.append(\"./PyTorch_CIFAR10\")\n",
+ "from cifar10_models import resnet50\n",
+ "\n",
+ "import random\n",
+ "\n",
+ "import fiftyone as fo\n",
+ "import fiftyone.zoo as foz\n",
+ "\n",
+ "# Load the CIFAR-10 test split\n",
+ "# Downloads the dataset from the web if necessary\n",
+ "dataset = foz.load_zoo_dataset(\"cifar10\", split=\"test\")\n",
+ "\n",
+ "# Get the CIFAR-10 classes list\n",
+ "classes = dataset.default_classes\n",
+ "\n",
+ "# Artificially corrupt 10% of the labels\n",
+ "_num_mistakes = int(0.1 * len(dataset))\n",
+ "for sample in dataset.take(_num_mistakes):\n",
+ " mistake = random.randint(0, 9)\n",
+ " while classes[mistake] == sample.ground_truth.label:\n",
+ " mistake = random.randint(0, 9)\n",
+ "\n",
+ " sample.tags.append(\"mistake\")\n",
+ " sample.ground_truth = fo.Classification(label=classes[mistake])\n",
+ " sample.save()\n",
+ " \n",
+ "\n",
+ "def make_cifar10_data_loader(image_paths, sample_ids, batch_size):\n",
+ " mean = [0.4914, 0.4822, 0.4465]\n",
+ " std = [0.2023, 0.1994, 0.2010]\n",
+ " transforms = torchvision.transforms.Compose(\n",
+ " [\n",
+ " torchvision.transforms.ToTensor(),\n",
+ " torchvision.transforms.Normalize(mean, std),\n",
+ " ]\n",
+ " )\n",
+ " dataset = fout.TorchImageDataset(\n",
+ " image_paths, sample_ids=sample_ids, transform=transforms\n",
+ " )\n",
+ " return DataLoader(dataset, batch_size=batch_size, num_workers=4)\n",
+ "\n",
+ "\n",
+ "def predict(model, imgs):\n",
+ " logits = model(imgs).detach().cpu().numpy()\n",
+ " predictions = np.argmax(logits, axis=1)\n",
+ " odds = np.exp(logits)\n",
+ " confidences = np.max(odds, axis=1) / np.sum(odds, axis=1)\n",
+ " return predictions, confidences, logits\n",
+ "\n",
+ "\n",
+ "#\n",
+ "# Load a model\n",
+ "#\n",
+ "# Model performance numbers are available at:\n",
+ "# https://github.com/huyvnphan/PyTorch_CIFAR10\n",
+ "#\n",
+ "\n",
+ "model = resnet50(pretrained=True)\n",
+ "model_name = \"resnet50\"\n",
+ "\n",
+ "#\n",
+ "# Extract a few images to process\n",
+ "# (some of these will have been manipulated above)\n",
+ "#\n",
+ "\n",
+ "num_samples = 1000\n",
+ "batch_size = 20\n",
+ "view = dataset.take(num_samples)\n",
+ "image_paths, sample_ids = zip(*[(s.filepath, s.id) for s in view.iter_samples()])\n",
+ "data_loader = make_cifar10_data_loader(image_paths, sample_ids, batch_size)\n",
+ "\n",
+ "#\n",
+ "# Perform prediction and store results in dataset\n",
+ "#\n",
+ "\n",
+ "with fo.ProgressBar() as pb:\n",
+ " for imgs, sample_ids in pb(data_loader):\n",
+ " predictions, _, logits_ = predict(model, imgs)\n",
+ "\n",
+ " # Add predictions to your FiftyOne dataset\n",
+ " for sample_id, prediction, logits in zip(sample_ids, predictions, logits_):\n",
+ " sample = dataset[sample_id]\n",
+ " sample.tags.append(\"processed\")\n",
+ " sample[model_name] = fo.Classification(\n",
+ " label=classes[prediction], logits=logits,\n",
+ " )\n",
+ " sample.save()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "b8f2d617",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Added predictions to 1900 samples\n",
+ "366 of these samples have label mistakes\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Count the number of samples with the `processed` tag\n",
+ "num_processed = len(dataset.match_tags(\"processed\"))\n",
+ "\n",
+ "# Count the number of samples with both `processed` and `mistake` tags\n",
+ "num_corrupted = len(dataset.match_tags(\"processed\").match_tags(\"mistake\"))\n",
+ "\n",
+ "print(\"Added predictions to %d samples\" % num_processed)\n",
+ "print(\"%d of these samples have label mistakes\" % num_corrupted)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "6a5d392e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Computing mistakenness...\n",
+ " 100% |███████████████| 1900/1900 [2.5s elapsed, 0s remaining, 833.6 samples/s] \n",
+ "Mistakenness computation complete\n"
+ ]
+ }
+ ],
+ "source": [
+ "import fiftyone.brain as fob\n",
+ "\n",
+ "# Get samples for which we added predictions\n",
+ "h_view = dataset.match_tags(\"processed\")\n",
+ "\n",
+ "# Compute mistakenness\n",
+ "fob.compute_mistakenness(h_view, model_name, label_field=\"ground_truth\", use_logits=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "a065c98a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "

\n",
+ "
\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Sort by likelihood of mistake (most likely first)\n",
+ "mistake_view = (dataset\n",
+ " .match_tags(\"processed\")\n",
+ " .sort_by(\"mistakenness\", reverse=True)\n",
+ ")\n",
+ "# Show only the samples for which we added label mistakes\n",
+ "session.view = mistake_view"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1ab2eb66",
+ "metadata": {},
+ "source": [
+ "## Find and Correcting Detection Mistakes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "0705c23f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Overwriting existing directory '/home/dan/fiftyone/coco-2017/validation'\n",
+ "Downloading split 'validation' to '/home/dan/fiftyone/coco-2017/validation' if necessary\n",
+ "Found annotations at '/home/dan/fiftyone/coco-2017/raw/instances_val2017.json'\n",
+ "Downloading 1000 images\n",
+ " 100% |████████████████| 1000/1000 [13.0s elapsed, 0s remaining, 61.9 images/s] \n",
+ "Writing annotations for 1000 downloaded samples to '/home/dan/fiftyone/coco-2017/validation/labels.json'\n",
+ "Dataset info written to '/home/dan/fiftyone/coco-2017/info.json'\n",
+ "Loading 'coco-2017' split 'validation'\n",
+ " 100% |███████████████| 1000/1000 [3.2s elapsed, 0s remaining, 316.4 samples/s] \n",
+ "Dataset 'Find Mistakes' created\n"
+ ]
+ }
+ ],
+ "source": [
+ "dataset = foz.load_zoo_dataset(\"coco-2017\", split=\"validation\", max_samples=1000, overwrite=True, dataset_name=\"Find Mistakes\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "c2e829c7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " 100% |███████████████| 1000/1000 [1.5s elapsed, 0s remaining, 694.9 samples/s] \n",
+ "Max IoU range: (0.000000, 0.951640)\n"
+ ]
+ }
+ ],
+ "source": [
+ "import fiftyone.utils.iou as foui\n",
+ "\n",
+ "foui.compute_max_ious(dataset, \"ground_truth\", iou_attr=\"max_iou\", classwise=True)\n",
+ "print(\"Max IoU range: (%f, %f)\" % dataset.bounds(\"ground_truth.detections.max_iou\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "6f3c4764",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Dataset: Find Mistakes\n",
+ "Media type: image\n",
+ "Num samples: 7\n",
+ "Sample fields:\n",
+ " id: fiftyone.core.fields.ObjectIdField\n",
+ " filepath: fiftyone.core.fields.StringField\n",
+ " tags: fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)\n",
+ " metadata: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)\n",
+ " ground_truth: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)\n",
+ "View stages:\n",
+ " 1. FilterLabels(field='ground_truth', filter={'$gt': ['$$this.max_iou', 0.75]}, only_matches=True, trajectories=False)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Retrieve detections that overlap above a chosen threshold\n",
+ "dups_view = dataset.filter_labels(\"ground_truth\", F(\"max_iou\") > 0.75)\n",
+ "print(dups_view)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "80870d02",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "

\n",
+ "
\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "session.view = dups_view"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "695d5c14",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset.delete_labels(tags=\"dups\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "3b1aac63",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{}\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Verify that tagged labels were deleted\n",
+ "print(dataset.count_label_tags())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "de5d8dfc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "

\n",
+ "
\n",
+ "\n",
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "session.view = dups_view"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "27da799c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " 0% ||--------------| 0/1000 [15.5ms elapsed, ? remaining, ? samples/s] "
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " 100% |███████████████| 1000/1000 [1.6s elapsed, 0s remaining, 646.2 samples/s] \n",
+ "Found 6 duplicates\n"
+ ]
+ }
+ ],
+ "source": [
+ "dup_ids = foui.find_duplicates(\n",
+ " dataset, \"ground_truth\", iou_thresh=0.75, classwise=True\n",
+ ")\n",
+ "print(\"Found %d duplicates\" % len(dup_ids))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "56592e9a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Cleanup any previous tags (if necessary)\n",
+ "dataset.untag_labels(\"dups\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "910615cd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Tag the automatically selected duplicates\n",
+ "dataset.select_labels(ids=dup_ids).tag_labels(\"duplicate\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "0b97a826",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "session.view = dataset.match_labels(ids=dup_ids)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b0b80780",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "anno_key = \"remove_dups\"\n",
+ "dups_view.annotate(anno_key, label_field=\"ground_truth\", launch_editor=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "51cf455f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dataset.load_annotations(anno_key)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}