From 8931ac0c92e36652520ae285a9985e884faeef9d Mon Sep 17 00:00:00 2001 From: Bryce Meyer Date: Mon, 18 Aug 2025 16:15:26 +0200 Subject: [PATCH 01/10] updated loading in main demo to use transformers bridge --- demos/Main_Demo.ipynb | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/demos/Main_Demo.ipynb b/demos/Main_Demo.ipynb index 41853de67..a2da5c2bb 100644 --- a/demos/Main_Demo.ipynb +++ b/demos/Main_Demo.ipynb @@ -153,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -162,7 +162,7 @@ "from transformer_lens.hook_points import (\n", " HookPoint,\n", ") # Hooking utilities\n", - "from transformer_lens import HookedTransformer, FactoredMatrix" + "from transformer_lens.model_bridge import TransformerBridge" ] }, { @@ -249,7 +249,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -258,27 +258,22 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Using pad_token, but it is not set yet.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loaded pretrained model gpt2-small into HookedTransformer\n" + "The following generation flags are not valid and may be ignored: ['output_attentions']. Set `TRANSFORMERS_VERBOSITY=info` for more details.\n", + "The following generation flags are not valid and may be ignored: ['output_attentions']. Set `TRANSFORMERS_VERBOSITY=info` for more details.\n" ] } ], "source": [ "# NBVAL_IGNORE_OUTPUT\n", - "model = HookedTransformer.from_pretrained(\"gpt2-small\", device=device)" + "model = TransformerBridge.boot_transformers(\"gpt2-small\", device=device)\n", + "model.enable_compatibility_mode()" ] }, { @@ -2996,7 +2991,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.12.7" }, "orig_nbformat": 4, "vscode": { From 9c51e28dc1ae4765a29e191a45e69127c27edcca Mon Sep 17 00:00:00 2001 From: Bryce Meyer Date: Fri, 22 Aug 2025 23:42:40 +0200 Subject: [PATCH 02/10] updated model name --- demos/Main_Demo.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demos/Main_Demo.ipynb b/demos/Main_Demo.ipynb index a2da5c2bb..b28b90493 100644 --- a/demos/Main_Demo.ipynb +++ b/demos/Main_Demo.ipynb @@ -272,7 +272,7 @@ ], "source": [ "# NBVAL_IGNORE_OUTPUT\n", - "model = TransformerBridge.boot_transformers(\"gpt2-small\", device=device)\n", + "model = TransformerBridge.boot_transformers(\"gpt2\", device=device)\n", "model.enable_compatibility_mode()" ] }, From 832264f601017ada9bb0ab7f89502dd013476f86 Mon Sep 17 00:00:00 2001 From: Bryce Meyer Date: Tue, 14 Oct 2025 10:25:14 +0200 Subject: [PATCH 03/10] updated imports --- demos/Main_Demo.ipynb | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/demos/Main_Demo.ipynb b/demos/Main_Demo.ipynb index b28b90493..8552928f9 100644 --- a/demos/Main_Demo.ipynb +++ b/demos/Main_Demo.ipynb @@ -153,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -162,6 +162,7 @@ "from transformer_lens.hook_points import (\n", " HookPoint,\n", ") # Hooking utilities\n", + "from transformer_lens import FactoredMatrix\n", "from transformer_lens.model_bridge import TransformerBridge" ] }, @@ -1004,7 +1005,7 @@ }, { "cell_type": "code", - "execution_count": 312, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1024,7 +1025,7 @@ ], "source": [ "# NBVAL_IGNORE_OUTPUT\n", - "distilgpt2 = HookedTransformer.from_pretrained(\"distilgpt2\", device=device)" + "distilgpt2 = TransformerBridge.boot_transformers(\"distilgpt2\", device=device)" ] }, { @@ -2977,7 +2978,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": "transformer-lens-MmxDhO8d-py3.11", "language": "python", "name": "python3" }, @@ -2991,14 +2992,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.11.9" }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "eb812820b5094695c8a581672e17220e30dd2c15d704c018326e3cc2e1a566f1" - } - } + "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 From 8f4f7f0dc5721281d9e360c3cf30854c35fd0e2d Mon Sep 17 00:00:00 2001 From: Bryce Meyer Date: Tue, 14 Oct 2025 17:44:26 +0200 Subject: [PATCH 04/10] updated some cells --- demos/Main_Demo.ipynb | 67 ++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/demos/Main_Demo.ipynb b/demos/Main_Demo.ipynb index 8552928f9..505293f28 100644 --- a/demos/Main_Demo.ipynb +++ b/demos/Main_Demo.ipynb @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -79,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -102,27 +102,27 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, - "execution_count": 13, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -135,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -153,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -175,16 +175,16 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 16, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -202,7 +202,7 @@ }, { "cell_type": "code", - "execution_count": 298, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -250,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -259,22 +259,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "The following generation flags are not valid and may be ignored: ['output_attentions']. Set `TRANSFORMERS_VERBOSITY=info` for more details.\n", - "The following generation flags are not valid and may be ignored: ['output_attentions']. Set `TRANSFORMERS_VERBOSITY=info` for more details.\n" + "Loaded pretrained model gpt2 into HookedTransformer\n" ] } ], "source": [ "# NBVAL_IGNORE_OUTPUT\n", "model = TransformerBridge.boot_transformers(\"gpt2\", device=device)\n", - "model.enable_compatibility_mode()" + "model.enable_compatibility_mode(disable_warnings=True)" ] }, { @@ -291,14 +290,14 @@ }, { "cell_type": "code", - "execution_count": 301, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Model loss: tensor(4.1758)\n" + "Model loss: tensor(4.1763)\n" ] } ], @@ -329,7 +328,7 @@ }, { "cell_type": "code", - "execution_count": 302, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -363,7 +362,7 @@ }, { "cell_type": "code", - "execution_count": 303, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -384,7 +383,7 @@ }, { "cell_type": "code", - "execution_count": 304, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -397,26 +396,22 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, - "execution_count": 304, - "metadata": { - "text/html": { - "Content-Type": "text/html" - } - }, + "execution_count": 15, + "metadata": {}, "output_type": "execute_result" } ], From c05754d7b6ba557ec0531a0b3c04b485c21fe6d8 Mon Sep 17 00:00:00 2001 From: Bryce Meyer Date: Tue, 14 Oct 2025 18:45:07 +0200 Subject: [PATCH 05/10] reran demo --- demos/Main_Demo.ipynb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/demos/Main_Demo.ipynb b/demos/Main_Demo.ipynb index 505293f28..5007a5bf8 100644 --- a/demos/Main_Demo.ipynb +++ b/demos/Main_Demo.ipynb @@ -429,7 +429,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -437,7 +437,7 @@ "attn_layer = 0\n", "_, gpt2_attn_cache = model.run_with_cache(gpt2_tokens, remove_batch_dim=True, stop_at_layer=attn_layer + 1, names_filter=[attn_hook_name])\n", "gpt2_attn = gpt2_attn_cache[attn_hook_name]\n", - "assert torch.equal(gpt2_attn, attention_pattern)" + "assert torch.allclose(gpt2_attn, attention_pattern)" ] }, { @@ -480,7 +480,7 @@ }, { "cell_type": "code", - "execution_count": 305, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -489,7 +489,7 @@ "text": [ "Shape of the value tensor: torch.Size([1, 33, 12, 64])\n", "Original Loss: 3.999\n", - "Ablated Loss: 5.453\n" + "Ablated Loss: 5.455\n" ] } ], @@ -559,15 +559,15 @@ }, { "cell_type": "code", - "execution_count": 306, + "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Clean logit difference: 4.276\n", - "Corrupted logit difference: -2.738\n" + "Clean logit difference: 4.278\n", + "Corrupted logit difference: -2.736\n" ] } ], From 1baeefa6ecb0bcd091dccfb5167c1b61d9907469 Mon Sep 17 00:00:00 2001 From: Bryce Meyer Date: Tue, 14 Oct 2025 21:20:12 +0200 Subject: [PATCH 06/10] updated some cells --- demos/Main_Demo.ipynb | 115 +++++++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 51 deletions(-) diff --git a/demos/Main_Demo.ipynb b/demos/Main_Demo.ipynb index 5007a5bf8..21b5dd335 100644 --- a/demos/Main_Demo.ipynb +++ b/demos/Main_Demo.ipynb @@ -608,13 +608,23 @@ }, { "cell_type": "code", - "execution_count": 307, + "execution_count": 21, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "980e183587f54a03bb4ead134831c94d", + "model_id": "6e15ca66b5084627b12152102397b2af", "version_major": 2, "version_minor": 0 }, @@ -668,7 +678,7 @@ }, { "cell_type": "code", - "execution_count": 308, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -678,9 +688,9 @@ "\n", "\n", "
\n", - "
\n", + " }) };
\n", "\n", "" ] @@ -754,7 +764,7 @@ }, { "cell_type": "code", - "execution_count": 309, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -764,9 +774,9 @@ "\n", "\n", "
\n", - "
\n", + " }) };
\n", "\n", "" ] @@ -829,7 +839,7 @@ }, { "cell_type": "code", - "execution_count": 310, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -839,9 +849,9 @@ "\n", "\n", "
\n", - "
\n", + " }) };
\n", "\n", "" ] @@ -913,32 +923,50 @@ }, { "cell_type": "code", - "execution_count": 311, + "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, - "metadata": { - "text/html": { - "Content-Type": "text/html" - } - }, + "metadata": {}, "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "tensor([[[-1.8241e-02, -1.6308e-01, 7.8980e-02, ..., -1.6379e-01,\n", + " -1.3369e-01, -2.0370e-01],\n", + " [ 2.0590e-01, -4.5152e-01, 3.8969e-05, ..., 9.6852e-02,\n", + " -3.2458e-01, 1.7931e-01],\n", + " [ 8.7596e-02, 1.1502e-01, -2.6019e-01, ..., 1.9008e-02,\n", + " 3.1360e-01, -1.5783e-01],\n", + " ...,\n", + " [ 5.1349e-01, 3.3470e-01, -3.0922e-01, ..., 5.6531e-01,\n", + " 5.3221e-01, -3.3371e-01],\n", + " [ 3.9106e-01, 1.1724e-01, 1.5566e-02, ..., 1.3414e-01,\n", + " 4.6405e-01, -1.5158e-01],\n", + " [ 4.1897e-01, -2.5493e-01, 1.6319e-01, ..., -5.1418e-02,\n", + " -5.8637e-02, -4.2457e-01]]])" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -1000,24 +1028,9 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using pad_token, but it is not set yet.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loaded pretrained model distilgpt2 into HookedTransformer\n" - ] - } - ], + "outputs": [], "source": [ "# NBVAL_IGNORE_OUTPUT\n", "distilgpt2 = TransformerBridge.boot_transformers(\"distilgpt2\", device=device)" @@ -1025,7 +1038,7 @@ }, { "cell_type": "code", - "execution_count": 313, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -1035,9 +1048,9 @@ "\n", "\n", "
\n", - "
\n", + " }) };
\n", "\n", "" ] @@ -1216,7 +1229,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -1224,16 +1237,16 @@ "output_type": "stream", "text": [ "blocks.0.attn.W_Q torch.Size([12, 768, 64])\n", - "blocks.0.attn.W_O torch.Size([12, 64, 768])\n", - "blocks.0.attn.b_Q torch.Size([12, 64])\n", - "blocks.0.attn.b_O torch.Size([768])\n", "blocks.0.attn.W_K torch.Size([12, 768, 64])\n", "blocks.0.attn.W_V torch.Size([12, 768, 64])\n", + "blocks.0.attn.W_O torch.Size([12, 64, 768])\n", + "blocks.0.attn.b_Q torch.Size([12, 64])\n", "blocks.0.attn.b_K torch.Size([12, 64])\n", "blocks.0.attn.b_V torch.Size([12, 64])\n", + "blocks.0.attn.b_O torch.Size([768])\n", "blocks.0.mlp.W_in torch.Size([768, 3072])\n", - "blocks.0.mlp.b_in torch.Size([3072])\n", "blocks.0.mlp.W_out torch.Size([3072, 768])\n", + "blocks.0.mlp.b_in torch.Size([3072])\n", "blocks.0.mlp.b_out torch.Size([768])\n" ] } From fbc07d25a6c7cc70891fc91209001c8bbc493dfe Mon Sep 17 00:00:00 2001 From: Bryce Meyer Date: Thu, 16 Oct 2025 03:12:42 +0200 Subject: [PATCH 07/10] reran some cells --- demos/Main_Demo.ipynb | 188 ++++++++++++++++++++---------------------- 1 file changed, 91 insertions(+), 97 deletions(-) diff --git a/demos/Main_Demo.ipynb b/demos/Main_Demo.ipynb index 21b5dd335..c53acd10e 100644 --- a/demos/Main_Demo.ipynb +++ b/demos/Main_Demo.ipynb @@ -108,18 +108,18 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -181,7 +181,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -250,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -259,9 +259,18 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`torch_dtype` is deprecated! Use `dtype` instead!\n", + "The following generation flags are not valid and may be ignored: ['output_attentions']. Set `TRANSFORMERS_VERBOSITY=info` for more details.\n", + "`torch_dtype` is deprecated! Use `dtype` instead!\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -290,14 +299,14 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Model loss: tensor(4.1763)\n" + "Model loss: tensor(4.1763, device='mps:0')\n" ] } ], @@ -328,14 +337,14 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "cpu\n" + "mps:0\n" ] } ], @@ -362,7 +371,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -383,7 +392,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -396,21 +405,21 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, - "execution_count": 15, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -429,7 +438,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -480,13 +489,14 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ + "Shape of the value tensor: torch.Size([1, 33, 12, 64])\n", "Shape of the value tensor: torch.Size([1, 33, 12, 64])\n", "Original Loss: 3.999\n", "Ablated Loss: 5.455\n" @@ -559,7 +569,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -608,7 +618,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -624,7 +634,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6e15ca66b5084627b12152102397b2af", + "model_id": "9a99f95470e643318b60f868f7672fb2", "version_major": 2, "version_minor": 0 }, @@ -678,7 +688,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -688,9 +698,9 @@ "\n", "\n", "
\n", - "
\n", - "
\n", - "
" ], "text/plain": [ - "" + "" ] }, "metadata": {}, "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "tensor([[[-1.8241e-02, -1.6308e-01, 7.8980e-02, ..., -1.6379e-01,\n", - " -1.3369e-01, -2.0370e-01],\n", - " [ 2.0590e-01, -4.5152e-01, 3.8969e-05, ..., 9.6852e-02,\n", - " -3.2458e-01, 1.7931e-01],\n", - " [ 8.7596e-02, 1.1502e-01, -2.6019e-01, ..., 1.9008e-02,\n", - " 3.1360e-01, -1.5783e-01],\n", - " ...,\n", - " [ 5.1349e-01, 3.3470e-01, -3.0922e-01, ..., 5.6531e-01,\n", - " 5.3221e-01, -3.3371e-01],\n", - " [ 3.9106e-01, 1.1724e-01, 1.5566e-02, ..., 1.3414e-01,\n", - " 4.6405e-01, -1.5158e-01],\n", - " [ 4.1897e-01, -2.5493e-01, 1.6319e-01, ..., -5.1418e-02,\n", - " -5.8637e-02, -4.2457e-01]]])" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -1028,7 +1016,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -1038,7 +1026,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -1048,9 +1036,9 @@ "\n", "\n", "
\n", - "
" ], "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -181,7 +181,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -250,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -306,7 +306,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Model loss: tensor(4.1763, device='mps:0')\n" + "Model loss: tensor(4.1763)\n" ] } ], @@ -344,7 +344,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "mps:0\n" + "cpu\n" ] } ], @@ -405,18 +405,18 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 13, @@ -634,7 +634,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9a99f95470e643318b60f868f7672fb2", + "model_id": "3373800f248c4dcc8af26bf80b4df76c", "version_major": 2, "version_minor": 0 }, @@ -698,9 +698,9 @@ "\n", "\n", "
\n", - "
\n", - "
\n", - "
" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1036,9 +1036,9 @@ "\n", "\n", "
\n", - "
" ], "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -181,7 +181,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 6, @@ -405,18 +405,18 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, "execution_count": 13, @@ -634,7 +634,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3373800f248c4dcc8af26bf80b4df76c", + "model_id": "b761eaa1ee6e4044915cc23b5b6bcc28", "version_major": 2, "version_minor": 0 }, @@ -698,9 +698,9 @@ "\n", "\n", "
\n", - "
\n", - "
\n", - "
" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1036,9 +1036,9 @@ "\n", "\n", "
\n", - "
" ], "text/plain": [ - "" + "" ] }, - "execution_count": 3, + "execution_count": 216, "metadata": {}, "output_type": "execute_result" } @@ -135,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 217, "metadata": {}, "outputs": [], "source": [ @@ -153,7 +153,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 218, "metadata": {}, "outputs": [], "source": [ @@ -175,16 +175,16 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 219, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 6, + "execution_count": 219, "metadata": {}, "output_type": "execute_result" } @@ -202,7 +202,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 220, "metadata": {}, "outputs": [], "source": [ @@ -259,18 +259,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 222, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "`torch_dtype` is deprecated! Use `dtype` instead!\n", - "The following generation flags are not valid and may be ignored: ['output_attentions']. Set `TRANSFORMERS_VERBOSITY=info` for more details.\n", - "`torch_dtype` is deprecated! Use `dtype` instead!\n" - ] - }, { "name": "stdout", "output_type": "stream", @@ -299,7 +290,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 223, "metadata": {}, "outputs": [ { @@ -337,7 +328,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 224, "metadata": {}, "outputs": [ { @@ -371,7 +362,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 225, "metadata": {}, "outputs": [ { @@ -392,7 +383,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 226, "metadata": {}, "outputs": [ { @@ -405,21 +396,21 @@ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, - "execution_count": 13, + "execution_count": 226, "metadata": {}, "output_type": "execute_result" } @@ -438,7 +429,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 227, "metadata": {}, "outputs": [], "source": [ @@ -489,7 +480,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 228, "metadata": {}, "outputs": [ { @@ -569,7 +560,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 229, "metadata": {}, "outputs": [ { @@ -618,23 +609,13 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 230, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" - ] - }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b761eaa1ee6e4044915cc23b5b6bcc28", + "model_id": "9a90bf287fc348f3a4f198f75180182a", "version_major": 2, "version_minor": 0 }, @@ -688,7 +669,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 231, "metadata": {}, "outputs": [ { @@ -698,9 +679,9 @@ "\n", "\n", "
\n", - "
\n", - "
\n", - "
" ], "text/plain": [ - "" + "" ] }, "metadata": {}, @@ -1016,7 +997,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 235, "metadata": {}, "outputs": [], "source": [ @@ -1026,7 +1007,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 236, "metadata": {}, "outputs": [ { @@ -1036,9 +1017,9 @@ "\n", "\n", "
\n", - "
\n", - "
\n", + " }) };
\n", "\n", "" ] @@ -2162,7 +2143,7 @@ }, { "cell_type": "code", - "execution_count": 338, + "execution_count": 261, "metadata": {}, "outputs": [ { @@ -2172,9 +2153,9 @@ "\n", "\n", "
\n", - "
\n", + " }) };
\n", "\n", "" ] @@ -2218,7 +2199,7 @@ }, { "cell_type": "code", - "execution_count": 339, + "execution_count": 262, "metadata": {}, "outputs": [ { @@ -2236,7 +2217,7 @@ }, { "cell_type": "code", - "execution_count": 340, + "execution_count": 263, "metadata": {}, "outputs": [ { @@ -2256,7 +2237,7 @@ }, { "cell_type": "code", - "execution_count": 341, + "execution_count": 264, "metadata": {}, "outputs": [ { @@ -2266,9 +2247,9 @@ "\n", "\n", "
\n", - "
\n", + " }) };
\n", "\n", "" ] @@ -2313,7 +2294,7 @@ }, { "cell_type": "code", - "execution_count": 342, + "execution_count": 265, "metadata": {}, "outputs": [ { @@ -2323,9 +2304,9 @@ "\n", "\n", "
\n", - "
\n", + " }) };
\n", "\n", "" ] @@ -2362,7 +2343,7 @@ }, { "cell_type": "code", - "execution_count": 343, + "execution_count": 266, "metadata": {}, "outputs": [ { @@ -2399,30 +2380,16 @@ }, { "cell_type": "code", - "execution_count": 344, + "execution_count": 267, "metadata": {}, "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f16e699caef243e3bd730cd876600c4a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/50 [00:00\n", "\n", "
\n", - "
\n", + " }) };
\n", "\n", "" ] @@ -2687,9 +2654,9 @@ "\n", "\n", "
\n", - "
\n", + " }) };
\n", "\n", "" ] @@ -2726,9 +2693,9 @@ "\n", "\n", "
\n", - "
\n", + " }) };
\n", "\n", "" ] @@ -2765,9 +2732,9 @@ "\n", "\n", "
\n", - "
\n", + " }) };
\n", "\n", "" ] @@ -2804,9 +2771,9 @@ "\n", "\n", "
\n", - "
\n", + " }) };
\n", "\n", "" ] @@ -2878,7 +2845,7 @@ }, { "cell_type": "code", - "execution_count": 349, + "execution_count": 272, "metadata": {}, "outputs": [], "source": [ @@ -2901,7 +2868,7 @@ }, { "cell_type": "code", - "execution_count": 350, + "execution_count": 273, "metadata": {}, "outputs": [], "source": [ @@ -2929,7 +2896,7 @@ }, { "cell_type": "code", - "execution_count": 351, + "execution_count": 274, "metadata": {}, "outputs": [ { @@ -2939,9 +2906,9 @@ "\n", "\n", "
\n", - "
\n", + " }) };
\n", "\n", "" ]