From 3b82bd47f9093f6b1d5d789b0dbd6c995761eeff Mon Sep 17 00:00:00 2001
From: xuhangscut <945440358@qq.com>
Date: Fri, 3 Jan 2025 15:06:36 +0800
Subject: [PATCH] update bert demo, add annotation

---
 .../2.BERT/bert_emotect_finetune.ipynb        | 676 ++++++------------
 .../2.BERT/bert_introduction.ipynb            |  37 +-
 2 files changed, 236 insertions(+), 477 deletions(-)
diff --git a/Season1.step_into_chatgpt/2.BERT/bert_emotect_finetune.ipynb b/Season1.step_into_chatgpt/2.BERT/bert_emotect_finetune.ipynb
index e6b51b7..c9ea7d0 100644
--- a/Season1.step_into_chatgpt/2.BERT/bert_emotect_finetune.ipynb
+++ b/Season1.step_into_chatgpt/2.BERT/bert_emotect_finetune.ipynb
@@ -4,312 +4,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# 基于MindSpore实现BERT对话情绪识别"
+    "### 基于MindSpore实现BERT对话情绪识别"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "该实验可进行在线体验，在线体验链接 (https://pangu.huaweicloud.com/gallery/asset-detail.html?id=5443b528-0dd5-4909-ac4f-1c9cf839e2aa)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 环境配置"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "> 此为在线运行平台配置python3.9的指南，如在其他环境平台运行案例，请根据实际情况修改如下代码"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "第一步：设置python版本为3.9.0"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%capture captured_output\n",
-    "!/home/ma-user/anaconda3/bin/conda create -n python-3.9.0 python=3.9.0 -y --override-channels --channel https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main\n",
-    "!/home/ma-user/anaconda3/envs/python-3.9.0/bin/pip install ipykernel"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "import os\n",
-    "\n",
-    "data = {\n",
-    "   \"display_name\": \"python-3.9.0\",\n",
-    "   \"env\": {\n",
-    "      \"PATH\": \"/home/ma-user/anaconda3/envs/python-3.9.0/bin:/home/ma-user/anaconda3/envs/python-3.7.10/bin:/modelarts/authoring/notebook-conda/bin:/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/home/ma-user/modelarts/ma-cli/bin:/home/ma-user/modelarts/ma-cli/bin\"\n",
-    "   },\n",
-    "   \"language\": \"python\",\n",
-    "   \"argv\": [\n",
-    "      \"/home/ma-user/anaconda3/envs/python-3.9.0/bin/python\",\n",
-    "      \"-m\",\n",
-    "      \"ipykernel\",\n",
-    "      \"-f\",\n",
-    "      \"{connection_file}\"\n",
-    "   ]\n",
-    "}\n",
-    "\n",
-    "if not os.path.exists(\"/home/ma-user/anaconda3/share/jupyter/kernels/python-3.9.0/\"):\n",
-    "    os.mkdir(\"/home/ma-user/anaconda3/share/jupyter/kernels/python-3.9.0/\")\n",
-    "\n",
-    "with open('/home/ma-user/anaconda3/share/jupyter/kernels/python-3.9.0/kernel.json', 'w') as f:\n",
-    "    json.dump(data, f, indent=4)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 注：以上代码运行完成后，需要重新设置kernel为python-3.9.0"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "<div align=center><img src=\"https://mindspore-demo.obs.cn-north-4.myhuaweicloud.com/imgs/ai-gallery/change-kernel.PNG\"></div>"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "第二步：安装MindSpore框架和MindNLP套件"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n",
-      "Collecting mindspore==2.2.14\n",
-      "  Downloading https://ms-release.obs.cn-north-4.myhuaweicloud.com/2.2.14/MindSpore/unified/x86_64/mindspore-2.2.14-cp39-cp39-linux_x86_64.whl (743.0 MB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m743.0/743.0 MB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
-      "\u001b[?25hCollecting numpy>=1.17.0 (from mindspore==2.2.14)\n",
-      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/b1/e3/24d289c5a3255bf52824bd52295e9a7923cad8ae5ec29539fc971e1122f6/numpy-2.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (19.5 MB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.5/19.5 MB\u001b[0m \u001b[31m67.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
-      "\u001b[?25hCollecting protobuf>=3.13.0 (from mindspore==2.2.14)\n",
-      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/27/e4/8dc4546be46873f8950cb44cdfe19b79d66d26e53c4ee5e3440406257fcd/protobuf-5.27.2-cp38-abi3-manylinux2014_x86_64.whl (309 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m309.3/309.3 kB\u001b[0m \u001b[31m24.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hRequirement already satisfied: asttokens>=2.0.4 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore==2.2.14) (2.4.1)\n",
-      "Collecting pillow>=6.2.0 (from mindspore==2.2.14)\n",
-      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/32/3f/c02268d0c6fb6b3958bdda673c17b315c821d97df29ae6969f20fb49388a/pillow-10.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.4 MB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.4/4.4 MB\u001b[0m \u001b[31m50.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
-      "\u001b[?25hCollecting scipy>=1.5.4 (from mindspore==2.2.14)\n",
-      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/35/f5/d0ad1a96f80962ba65e2ce1de6a1e59edecd1f0a7b55990ed208848012e0/scipy-1.13.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.6 MB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.6/38.6 MB\u001b[0m \u001b[31m45.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
-      "\u001b[?25hRequirement already satisfied: packaging>=20.0 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore==2.2.14) (24.1)\n",
-      "Requirement already satisfied: psutil>=5.6.1 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore==2.2.14) (6.0.0)\n",
-      "Collecting astunparse>=1.6.3 (from mindspore==2.2.14)\n",
-      "  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/2b/03/13dde6512ad7b4557eb792fbcf0c653af6076b81e5941d36ec61f7ce6028/astunparse-1.6.3-py2.py3-none-any.whl (12 kB)\n",
-      "Requirement already satisfied: six>=1.12.0 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from asttokens>=2.0.4->mindspore==2.2.14) (1.16.0)\n",
-      "Requirement already satisfied: wheel<1.0,>=0.23.0 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from astunparse>=1.6.3->mindspore==2.2.14) (0.43.0)\n",
-      "Installing collected packages: protobuf, pillow, numpy, astunparse, scipy, mindspore\n",
-      "Successfully installed astunparse-1.6.3 mindspore-2.2.14 numpy-2.0.1 pillow-10.4.0 protobuf-5.27.2 scipy-1.13.1\n"
-     ]
-    }
-   ],
-   "source": [
-    "!pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/2.2.14/MindSpore/unified/x86_64/mindspore-2.2.14-cp39-cp39-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Looking in indexes: http://repo.myhuaweicloud.com/repository/pypi/simple\n",
-      "Collecting mindnlp\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/72/37/ef313c23fd587c3d1f46b0741c98235aecdfd93b4d6d446376f3db6a552c/mindnlp-0.3.1-py3-none-any.whl (5.7 MB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.7/5.7 MB\u001b[0m \u001b[31m43.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
-      "\u001b[?25hRequirement already satisfied: mindspore in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindnlp) (2.2.14)\n",
-      "Collecting tqdm (from mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/18/eb/fdb7eb9e48b7b02554e1664afd3bd3f117f6b6d6c5881438a0b055554f9b/tqdm-4.66.4-py3-none-any.whl (78 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.3/78.3 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting requests (from mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl (64 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.9/64.9 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting datasets (from mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/60/2d/963b266bb8f88492d5ab4232d74292af8beb5b6fdae97902df9e284d4c32/datasets-2.20.0-py3-none-any.whl (547 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m547.8/547.8 kB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
-      "\u001b[?25hCollecting evaluate (from mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/c2/d6/ff9baefc8fc679dcd9eb21b29da3ef10c81aa36be630a7ae78e4611588e1/evaluate-0.4.2-py3-none-any.whl (84 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.1/84.1 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting tokenizers (from mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/0f/cb/8fc733c8f251bac1e5c4ae52458c353b3faa98f41d734c226cad3783da03/tokenizers-0.19.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m72.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
-      "\u001b[?25hCollecting safetensors (from mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/38/7f/3ba803bd6d726d65e480bee2aaeea79580d2e4836e4c6ebc27144c62ce51/safetensors-0.4.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
-      "\u001b[?25hCollecting sentencepiece (from mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/5f/01/c95e42eb86282b2c79305d3e0b0ca5a743f85a61262bb7130999c70b9374/sentencepiece-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m39.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting regex (from mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/36/67/851cf82e2c47d46846cca15ba84f845e876257a54cb82f229d335cd5c67e/regex-2024.7.24-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (775 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m775.9/775.9 kB\u001b[0m \u001b[31m21.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
-      "\u001b[?25hCollecting addict (from mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/6a/00/b08f23b7d7e1e14ce01419a467b583edbb93c6cdb8654e54a9cc579cd61f/addict-2.4.0-py3-none-any.whl (3.8 kB)\n",
-      "Collecting ml-dtypes (from mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/23/1c/06b52d3dcd75a81f6ca1e56514db6b21fe928f159cc5302428c1fed46562/ml_dtypes-0.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m27.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
-      "\u001b[?25hCollecting pyctcdecode (from mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/a5/8a/93e2118411ae5e861d4f4ce65578c62e85d0f1d9cb389bd63bd57130604e/pyctcdecode-0.5.0-py2.py3-none-any.whl (39 kB)\n",
-      "Collecting jieba (from mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/c6/cb/18eeb235f833b726522d7ebed54f2278ce28ba9438e3135ab0278d9792a2/jieba-0.42.1.tar.gz (19.2 MB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.2/19.2 MB\u001b[0m \u001b[31m66.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
-      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
-      "\u001b[?25hCollecting pytest==7.2.0 (from mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/67/68/a5eb36c3a8540594b6035e6cdae40c1ef1b6a2bfacbecc3d1a544583c078/pytest-7.2.0-py3-none-any.whl (316 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m316.8/316.8 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting attrs>=19.2.0 (from pytest==7.2.0->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/e0/44/827b2a91a5816512fcaf3cc4ebc465ccd5d598c45cefa6703fcf4a79018f/attrs-23.2.0-py3-none-any.whl (60 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.8/60.8 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting iniconfig (from pytest==7.2.0->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl (5.9 kB)\n",
-      "Requirement already satisfied: packaging in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from pytest==7.2.0->mindnlp) (24.1)\n",
-      "Collecting pluggy<2.0,>=0.12 (from pytest==7.2.0->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl (20 kB)\n",
-      "Requirement already satisfied: exceptiongroup>=1.0.0rc8 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from pytest==7.2.0->mindnlp) (1.2.2)\n",
-      "Collecting tomli>=1.0.0 (from pytest==7.2.0->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/97/75/10a9ebee3fd790d20926a90a2547f0bf78f371b2f13aa822c759680ca7b9/tomli-2.0.1-py3-none-any.whl (12 kB)\n",
-      "Collecting filelock (from datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/ae/f0/48285f0262fe47103a4a45972ed2f9b93e4c80b8fd609fa98da78b2a5706/filelock-3.15.4-py3-none-any.whl (16 kB)\n",
-      "Requirement already satisfied: numpy>=1.17 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from datasets->mindnlp) (2.0.1)\n",
-      "Collecting pyarrow>=15.0.0 (from datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/af/61/bcd9b58e38ead6ad42b9ed00da33a3f862bc1d445e3d3164799c25550ac2/pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (39.9 MB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.9/39.9 MB\u001b[0m \u001b[31m56.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
-      "\u001b[?25hCollecting pyarrow-hotfix (from datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/e4/f4/9ec2222f5f5f8ea04f66f184caafd991a39c8782e31f5b0266f101cb68ca/pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n",
-      "Collecting dill<0.3.9,>=0.3.0 (from datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl (116 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting pandas (from datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/bb/30/f6f1f1ac36250f50c421b1b6af08c35e5a8b5a84385ef928625336b93e6f/pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m57.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
-      "\u001b[?25hCollecting xxhash (from datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/63/93/812d78f70145c68c4e64533f4d625bea01236f27698febe15f0ceebc1566/xxhash-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (193 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m193.8/193.8 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting multiprocess (from datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl (133 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m133.4/133.4 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
-      "\u001b[?25hCollecting fsspec<=2024.5.0,>=2023.1.0 (from fsspec[http]<=2024.5.0,>=2023.1.0->datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/ba/a3/16e9fe32187e9c8bc7f9b7bcd9728529faa725231a0c96f2f98714ff2fc5/fsspec-2024.5.0-py3-none-any.whl (316 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m316.1/316.1 kB\u001b[0m \u001b[31m17.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting aiohttp (from datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/4c/e6/061ab7e0084b7443f9bd7092853b5d0f97029157a58fcc8749cdad8aef0f/aiohttp-3.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m25.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
-      "\u001b[?25hCollecting huggingface-hub>=0.21.2 (from datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/0f/36/83c0f0c7a5ec75738241c4c0c066097e4f74729716961db6a2905395015c/huggingface_hub-0.24.3-py3-none-any.whl (417 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m417.3/417.3 kB\u001b[0m \u001b[31m17.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting pyyaml>=5.1 (from datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/7d/39/472f2554a0f1e825bd7c5afc11c817cd7a2f3657460f7159f691fbb37c51/PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (738 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m738.9/738.9 kB\u001b[0m \u001b[31m38.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0m\n",
-      "\u001b[?25hCollecting charset-normalizer<4,>=2 (from requests->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/98/69/5d8751b4b670d623aa7a47bef061d69c279e9f922f6705147983aa76c3ce/charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (142 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.3/142.3 kB\u001b[0m \u001b[31m17.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting idna<4,>=2.5 (from requests->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/e5/3e/741d8c82801c347547f8a2a06aa57dbb1992be9e948df2ea0eda2c8b79e8/idna-3.7-py3-none-any.whl (66 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.8/66.8 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting urllib3<3,>=1.21.1 (from requests->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/ca/1c/89ffc63a9605b583d5df2be791a27bc1a42b7c32bab68d3c8f2f73a98cd4/urllib3-2.2.2-py3-none-any.whl (121 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.4/121.4 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting certifi>=2017.4.17 (from requests->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/1c/d5/c84e1a17bf61d4df64ca866a1c9a913874b4e9bdc131ec689a0ad013fb36/certifi-2024.7.4-py3-none-any.whl (162 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m163.0/163.0 kB\u001b[0m \u001b[31m17.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hRequirement already satisfied: protobuf>=3.13.0 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore->mindnlp) (5.27.2)\n",
-      "Requirement already satisfied: asttokens>=2.0.4 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore->mindnlp) (2.4.1)\n",
-      "Requirement already satisfied: pillow>=6.2.0 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore->mindnlp) (10.4.0)\n",
-      "Requirement already satisfied: scipy>=1.5.4 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore->mindnlp) (1.13.1)\n",
-      "Requirement already satisfied: psutil>=5.6.1 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore->mindnlp) (6.0.0)\n",
-      "Requirement already satisfied: astunparse>=1.6.3 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore->mindnlp) (1.6.3)\n",
-      "Collecting numpy>=1.17 (from datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/54/30/c2a907b9443cf42b90c17ad10c1e8fa801975f01cb9764f3f8eb8aea638b/numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.2/18.2 MB\u001b[0m \u001b[31m70.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
-      "\u001b[?25hCollecting pygtrie<3.0,>=2.1 (from pyctcdecode->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/ec/cd/bd196b2cf014afb1009de8b0f05ecd54011d881944e62763f3c1b1e8ef37/pygtrie-2.5.0-py3-none-any.whl (25 kB)\n",
-      "Collecting hypothesis<7,>=6.14 (from pyctcdecode->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/dd/b6/619043aa33150cfbb2491f7d712a5a955cd3702056c6e436454477b5c18b/hypothesis-6.108.5-py3-none-any.whl (465 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m465.2/465.2 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
-      "\u001b[?25hRequirement already satisfied: six>=1.12.0 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from asttokens>=2.0.4->mindspore->mindnlp) (1.16.0)\n",
-      "Requirement already satisfied: wheel<1.0,>=0.23.0 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from astunparse>=1.6.3->mindspore->mindnlp) (0.43.0)\n",
-      "Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/07/b1/d9455cf313df7b2fe6c60a871eb96801b6e8fbdc7d736f6576492b4c97b3/aiohappyeyeballs-2.3.2-py3-none-any.whl (11 kB)\n",
-      "Collecting aiosignal>=1.1.2 (from aiohttp->datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/76/ac/a7305707cb852b7e16ff80eaf5692309bde30e2b1100a1fcacdc8f731d97/aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n",
-      "Collecting frozenlist>=1.1.1 (from aiohttp->datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/70/b0/6f1ebdabfb604e39a0f84428986b89ab55f246b64cddaa495f2c953e1f6b/frozenlist-1.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (240 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m240.7/240.7 kB\u001b[0m \u001b[31m24.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting multidict<7.0,>=4.5 (from aiohttp->datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/39/a9/1f8d42c8103bcb1da6bb719f1bc018594b5acc8eae56b3fec4720ebee225/multidict-6.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (123 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m123.8/123.8 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting yarl<2.0,>=1.0 (from aiohttp->datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/69/ea/d7e961ea9b1b818a43b155ee512117be6ab9ab67c1e94967b2e64126e8e4/yarl-1.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (304 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m304.3/304.3 kB\u001b[0m \u001b[31m27.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting async-timeout<5.0,>=4.0 (from aiohttp->datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/a7/fa/e01228c2938de91d47b307831c62ab9e4001e747789d0b05baf779a6488c/async_timeout-4.0.3-py3-none-any.whl (5.7 kB)\n",
-      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from huggingface-hub>=0.21.2->datasets->mindnlp) (4.12.2)\n",
-      "Collecting sortedcontainers<3.0.0,>=2.1.0 (from hypothesis<7,>=6.14->pyctcdecode->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl (29 kB)\n",
-      "Requirement already satisfied: python-dateutil>=2.8.2 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from pandas->datasets->mindnlp) (2.9.0.post0)\n",
-      "Collecting pytz>=2020.1 (from pandas->datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/9c/3d/a121f284241f08268b21359bd425f7d4825cffc5ac5cd0e1b3d82ffd2b10/pytz-2024.1-py2.py3-none-any.whl (505 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m505.5/505.5 kB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting tzdata>=2022.7 (from pandas->datasets->mindnlp)\n",
-      "  Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/65/58/f9c9e6be752e9fcb8b6a0ee9fb87e6e7a1f6bcab2cdc73f02bb7ba91ada0/tzdata-2024.1-py2.py3-none-any.whl (345 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m345.4/345.4 kB\u001b[0m \u001b[31m14.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hBuilding wheels for collected packages: jieba\n",
-      "  Building wheel for jieba (setup.py) ... \u001b[?25ldone\n",
-      "\u001b[?25h  Created wheel for jieba: filename=jieba-0.42.1-py3-none-any.whl size=19314459 sha256=99e99961421c6a7516fb6dae30f85adc7b7643e3245afc9a1e32608d5e4fd5a6\n",
-      "  Stored in directory: /home/ma-user/.cache/pip/wheels/2d/22/9e/9af7e8c2773513ac75905acfb75073922bcc1aa176f730a0c9\n",
-      "Successfully built jieba\n",
-      "Installing collected packages: sortedcontainers, sentencepiece, pytz, pygtrie, jieba, addict, xxhash, urllib3, tzdata, tqdm, tomli, safetensors, regex, pyyaml, pyarrow-hotfix, pluggy, numpy, multidict, iniconfig, idna, fsspec, frozenlist, filelock, dill, charset-normalizer, certifi, attrs, async-timeout, aiohappyeyeballs, yarl, requests, pytest, pyarrow, pandas, multiprocess, ml-dtypes, hypothesis, aiosignal, pyctcdecode, huggingface-hub, aiohttp, tokenizers, datasets, evaluate, mindnlp\n",
-      "  Attempting uninstall: numpy\n",
-      "    Found existing installation: numpy 2.0.1\n",
-      "    Uninstalling numpy-2.0.1:\n",
-      "      Successfully uninstalled numpy-2.0.1\n",
-      "Successfully installed addict-2.4.0 aiohappyeyeballs-2.3.2 aiohttp-3.10.0 aiosignal-1.3.1 async-timeout-4.0.3 attrs-23.2.0 certifi-2024.7.4 charset-normalizer-3.3.2 datasets-2.20.0 dill-0.3.8 evaluate-0.4.2 filelock-3.15.4 frozenlist-1.4.1 fsspec-2024.5.0 huggingface-hub-0.24.3 hypothesis-6.108.5 idna-3.7 iniconfig-2.0.0 jieba-0.42.1 mindnlp-0.3.1 ml-dtypes-0.4.0 multidict-6.0.5 multiprocess-0.70.16 numpy-1.26.4 pandas-2.2.2 pluggy-1.5.0 pyarrow-17.0.0 pyarrow-hotfix-0.6 pyctcdecode-0.5.0 pygtrie-2.5.0 pytest-7.2.0 pytz-2024.1 pyyaml-6.0.1 regex-2024.7.24 requests-2.32.3 safetensors-0.4.3 sentencepiece-0.2.0 sortedcontainers-2.4.0 tokenizers-0.19.1 tomli-2.0.1 tqdm-4.66.4 tzdata-2024.1 urllib3-2.2.2 xxhash-3.4.1 yarl-1.9.4\n"
-     ]
-    }
-   ],
-   "source": [
-    "!pip install mindnlp"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 注：MindNLP whl包下载链接为：[MindNLP](https://repo.mindspore.cn/mindspore-lab/mindnlp/newest/any/)"
+    "> 此为在线运行平台配置python3.9 mindspore2.4.1 mindnlp0.4.1的指南，如在其他环境平台运行案例，请根据实际情况安装依赖包"
    ]
   },
   {
@@ -330,12 +32,12 @@
     "\n",
     "对话情绪识别（Emotion Detection，简称EmoTect），专注于识别智能对话场景中用户的情绪，针对智能对话场景中的用户文本，自动判断该文本的情绪类别并给出相应的置信度，情绪类型分为积极、消极、中性。 对话情绪识别适用于聊天、客服等多个场景，能够帮助企业更好地把握对话质量、改善产品的用户交互体验，也能分析客服服务质量、降低人工质检成本。\n",
     "\n",
-    "下面以一个文本情感分类任务为例子来说明BERT模型的整个应用过程。"
+    "下面以一个文本情感分类任务为例子来说明BERT模型的整个应用过程。\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {
     "tags": []
    },
@@ -344,25 +46,39 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n",
       "Building prefix dict from the default dictionary ...\n",
       "Dumping model to file cache /tmp/jieba.cache\n",
-      "Loading model cost 0.782 seconds.\n",
-      "Prefix dict has been built successfully.\n"
+      "Loading model cost 1.321 seconds.\n",
+      "Prefix dict has been built successfully.\n",
+      "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.9/site-packages/Cython/Compiler/Main.py:384: FutureWarning: Cython directive 'language_level' not set, using '3str' for now (Py3). This has changed from earlier releases! File: /home/ma-user/anaconda3/envs/MindSpore/lib/python3.9/site-packages/mindnlp/transformers/models/graphormer/algos_graphormer.pyx\n",
+      "  tree = Parsing.p_module(s, pxd, full_module_name)\n"
      ]
     }
    ],
    "source": [
-    "import os\n",
-    "\n",
     "import mindspore\n",
-    "from mindspore.dataset import text, GeneratorDataset, transforms\n",
-    "from mindspore import nn, context\n",
+    "from mindspore.dataset import GeneratorDataset, transforms\n",
     "\n",
-    "from mindnlp._legacy.engine import Trainer, Evaluator\n",
-    "from mindnlp._legacy.engine.callbacks import CheckpointCallback, BestModelCallback\n",
-    "from mindnlp._legacy.metrics import Accuracy"
+    "from mindnlp.engine import Trainer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 数据集\n",
+    "\n",
+    "这里提供一份已标注的、经过分词预处理的机器人聊天数据集，来自于百度飞桨团队。数据由两列组成，以制表符（'\\t'）分隔，第一列是情绪分类的类别（0表示消极；1表示中性；2表示积极），第二列是以空格分词的中文文本，如下示例，文件为 utf8 编码。\n",
+    "\n",
+    "label--text_a\n",
+    "\n",
+    "0--谁骂人了？我从来不骂人，我骂的都不是人，你是人吗 ？\n",
+    "\n",
+    "1--我有事等会儿就回来和你聊\n",
+    "\n",
+    "2--我见到你很高兴谢谢你帮我\n",
+    "\n",
+    "这部分主要包括数据集读取，数据格式转换，数据 Tokenize 处理和 pad 操作。"
    ]
   },
   {
@@ -398,25 +114,6 @@
     "        return len(self._labels)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 数据集\n",
-    "\n",
-    "这里提供一份已标注的、经过分词预处理的机器人聊天数据集，来自于百度飞桨团队。数据由两列组成，以制表符（'\\t'）分隔，第一列是情绪分类的类别（0表示消极；1表示中性；2表示积极），第二列是以空格分词的中文文本，如下示例，文件为 utf8 编码。\n",
-    "\n",
-    "label--text_a\n",
-    "\n",
-    "0--谁骂人了？我从来不骂人，我骂的都不是人，你是人吗 ？\n",
-    "\n",
-    "1--我有事等会儿就回来和你聊\n",
-    "\n",
-    "2--我见到你很高兴谢谢你帮我\n",
-    "\n",
-    "这部分主要包括数据集读取，数据格式转换，数据 Tokenize 处理和 pad 操作。"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 5,
@@ -428,16 +125,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "--2024-07-31 09:59:47--  https://baidu-nlp.bj.bcebos.com/emotion_detection-dataset-1.0.0.tar.gz\n",
-      "Resolving proxy.modelarts.com (proxy.modelarts.com)... 192.168.6.3\n",
-      "Connecting to proxy.modelarts.com (proxy.modelarts.com)|192.168.6.3|:80... connected.\n",
+      "--2025-01-03 11:44:42--  https://baidu-nlp.bj.bcebos.com/emotion_detection-dataset-1.0.0.tar.gz\n",
+      "Resolving proxy-notebook.modelarts.com (proxy-notebook.modelarts.com)... 192.168.0.33\n",
+      "Connecting to proxy-notebook.modelarts.com (proxy-notebook.modelarts.com)|192.168.0.33|:8083... connected.\n",
       "Proxy request sent, awaiting response... 200 OK\n",
       "Length: 1710581 (1.6M) [application/x-gzip]\n",
       "Saving to: ‘emotion_detection.tar.gz’\n",
       "\n",
-      "emotion_detection.t 100%[===================>]   1.63M  1.71MB/s    in 1.0s    \n",
+      "emotion_detection.t 100%[===================>]   1.63M  7.10MB/s    in 0.2s    \n",
       "\n",
-      "2024-07-31 09:59:48 (1.71 MB/s) - ‘emotion_detection.tar.gz’ saved [1710581/1710581]\n",
+      "2025-01-03 11:44:42 (7.10 MB/s) - ‘emotion_detection.tar.gz’ saved [1710581/1710581]\n",
       "\n",
       "data/\n",
       "data/test.tsv\n",
@@ -471,8 +168,6 @@
    },
    "outputs": [],
    "source": [
-    "import numpy as np\n",
-    "\n",
     "def process_dataset(source, tokenizer, max_seq_len=64, batch_size=32, shuffle=True):\n",
     "    is_ascend = mindspore.get_context('device_target') == 'Ascend'\n",
     "\n",
@@ -490,7 +185,7 @@
     "    # map dataset\n",
     "    dataset = dataset.map(operations=tokenize_and_pad, input_columns=\"text_a\", output_columns=['input_ids', 'attention_mask'])\n",
     "    dataset = dataset.map(operations=[type_cast_op], input_columns=\"label\", output_columns='labels')\n",
-    "    # batch dataset\n",
+    "    # # batch dataset\n",
     "    if is_ascend:\n",
     "        dataset = dataset.batch(batch_size)\n",
     "    else:\n",
@@ -518,10 +213,12 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 49.0/49.0 [00:00<00:00, 214kB/s]\n",
-      "107kB [00:11, 9.56kB/s] \n",
-      "263kB [00:06, 41.9kB/s] \n",
-      "624B [00:00, 1.76MB/s]                   \n"
+      "100%|██████████| 49.0/49.0 [00:00<00:00, 61.5kB/s]\n",
+      "107kB [00:00, 823kB/s] \n",
+      "263kB [00:00, 588kB/s] \n",
+      "624B [00:00, 713kB/s]                    \n",
+      "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.9/site-packages/mindnlp/transformers/tokenization_utils_base.py:1526: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted, and will be then set to `False` by default. \n",
+      "  warnings.warn(\n"
      ]
     }
    ],
@@ -589,32 +286,52 @@
    "cell_type": "code",
    "execution_count": 11,
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "mindspore.dataset.engine.datasets.BatchDataset"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "type(dataset_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[Tensor(shape=[32, 18], dtype=Int64, value=\n",
-      "[[ 101,  872, 4339 ...    0,    0,    0],\n",
-      " [ 101,  872, 2849 ...    0,    0,    0],\n",
-      " [ 101,  679, 2190 ...    0,    0,    0],\n",
+      "{'input_ids': Tensor(shape=[32, 64], dtype=Int64, value=\n",
+      "[[ 101, 2769, 3221 ...    0,    0,    0],\n",
+      " [ 101, 1091, 1139 ...    0,    0,    0],\n",
+      " [ 101, 2828, 6929 ...    0,    0,    0],\n",
       " ...\n",
-      " [ 101, 1063, 1921 ...    0,    0,    0],\n",
-      " [ 101, 8275, 8331 ...    0,    0,    0],\n",
-      " [ 101, 3221, 2207 ...    0,    0,    0]]), Tensor(shape=[32, 18], dtype=Int64, value=\n",
+      " [ 101,  671, 4157 ...    0,    0,    0],\n",
+      " [ 101, 2769, 6432 ...    0,    0,    0],\n",
+      " [ 101, 2207, 4908 ...    0,    0,    0]]), 'attention_mask': Tensor(shape=[32, 64], dtype=Int64, value=\n",
       "[[1, 1, 1 ... 0, 0, 0],\n",
       " [1, 1, 1 ... 0, 0, 0],\n",
       " [1, 1, 1 ... 0, 0, 0],\n",
       " ...\n",
       " [1, 1, 1 ... 0, 0, 0],\n",
       " [1, 1, 1 ... 0, 0, 0],\n",
-      " [1, 1, 1 ... 0, 0, 0]]), Tensor(shape=[32], dtype=Int32, value= [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 0, 1, \n",
-      " 1, 1, 0, 1, 1, 1, 1, 1])]\n"
+      " [1, 1, 1 ... 0, 0, 0]]), 'labels': Tensor(shape=[32], dtype=Int32, value= [0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, \n",
+      " 0, 1, 1, 1, 0, 1, 1, 1])}\n"
      ]
     }
    ],
    "source": [
-    "print(next(dataset_train.create_tuple_iterator()))"
+    "print(next(dataset_train.create_dict_iterator()))"
    ]
   },
   {
@@ -628,7 +345,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
    "metadata": {
     "tags": []
    },
@@ -637,241 +354,265 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 392M/392M [00:30<00:00, 13.4MB/s] \n",
-      "The following parameters in checkpoint files are not loaded:\n",
-      "['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']\n",
-      "The following parameters in models are missing parameter:\n",
-      "['classifier.weight', 'classifier.bias']\n"
+      "100%|██████████| 392M/392M [00:34<00:00, 11.8MB/s] \n",
+      "[WARNING] DEVICE(32558,ffff8291f0b0,python):2025-01-03-11:45:51.479.694 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:188] CheckVmmDriverVersion] Driver version is less than 24.0.0, vmm is disabled by default, drvier_version: 23.0.6\n",
+      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-chinese and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
      ]
     }
    ],
    "source": [
     "from mindnlp.transformers import BertForSequenceClassification, BertModel\n",
-    "from mindnlp._legacy.amp import auto_mixed_precision\n",
     "\n",
     "# set bert config and define parameters for training\n",
-    "model = BertForSequenceClassification.from_pretrained('bert-base-chinese', num_labels=3)\n",
-    "model = auto_mixed_precision(model, 'O1')\n",
-    "\n",
-    "optimizer = nn.Adam(model.trainable_params(), learning_rate=2e-5)"
+    "model = BertForSequenceClassification.from_pretrained('bert-base-chinese', num_labels=3)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
-    "metric = Accuracy()\n",
-    "# define callbacks to save checkpoints\n",
-    "ckpoint_cb = CheckpointCallback(save_path='checkpoint', ckpt_name='bert_emotect', epochs=1, keep_checkpoint_max=2)\n",
-    "best_model_cb = BestModelCallback(save_path='checkpoint', ckpt_name='bert_emotect_best', auto_load=True)\n",
+    "from mindnlp.engine import TrainingArguments\n",
     "\n",
-    "trainer = Trainer(network=model, train_dataset=dataset_train,\n",
-    "                  eval_dataset=dataset_val, metrics=metric,\n",
-    "                  epochs=5, optimizer=optimizer, callbacks=[ckpoint_cb, best_model_cb])"
+    "training_args = TrainingArguments(\n",
+    "    output_dir=\"bert_emotect_finetune\",\n",
+    "    evaluation_strategy=\"epoch\",\n",
+    "    save_strategy=\"epoch\",\n",
+    "    logging_strategy=\"epoch\",\n",
+    "    load_best_model_at_end=True,\n",
+    "    num_train_epochs=3.0\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "tags": []
-   },
+   "execution_count": 15,
+   "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The train will start from the checkpoint saved in 'checkpoint'.\n"
-     ]
-    },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Epoch 0:   0%|          | 0/302 [00:00<?, ?it/s][WARNING] KERNEL(4746,7fbca17fe700,python):2024-07-31-10:01:50.035.610 [mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel.cc:40] CheckDeviceSm] It is recommended to use devices with a computing capacity >= 7, but the current device's computing capacity is 6\n",
-      "Epoch 0: 100%|██████████| 302/302 [01:57<00:00,  2.56it/s, loss=0.3391044] \n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Checkpoint: 'bert_emotect_epoch_0.ckpt' has been saved in epoch: 0.\n"
+      "Downloading builder script: 4.20kB [00:00, 4.84MB/s]\n"
      ]
-    },
+    }
+   ],
+   "source": [
+    "from mindnlp import evaluate\n",
+    "import numpy as np\n",
+    "\n",
+    "metric = evaluate.load(\"accuracy\")\n",
+    "\n",
+    "def compute_metrics(eval_pred):\n",
+    "    logits, labels = eval_pred\n",
+    "    predictions = np.argmax(logits, axis=-1)\n",
+    "    return metric.compute(predictions=predictions, references=labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=dataset_train,\n",
+    "    eval_dataset=dataset_val,\n",
+    "    compute_metrics=compute_metrics\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Evaluate: 100%|██████████| 34/34 [00:04<00:00,  7.48it/s]\n"
+      "  0%|          | 0/906 [00:00<?, ?it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Evaluate Score: {'Accuracy': 0.9342592592592592}\n",
-      "---------------Best Model: 'bert_emotect_best.ckpt' has been saved in epoch: 0.---------------\n"
+      "-\r"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Epoch 1: 100%|██████████| 302/302 [01:55<00:00,  2.61it/s, loss=0.18814266]\n"
+      "  0%|          | 1/906 [01:06<16:42:36, 66.47s/it]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Checkpoint: 'bert_emotect_epoch_1.ckpt' has been saved in epoch: 1.\n"
+      "|\r"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Evaluate: 100%|██████████| 34/34 [00:03<00:00,  8.96it/s]\n"
+      " 33%|███▎      | 301/906 [02:57<02:57,  3.40it/s] "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Evaluate Score: {'Accuracy': 0.9648148148148148}\n",
-      "---------------Best Model: 'bert_emotect_best.ckpt' has been saved in epoch: 1.---------------\n"
+      "/\r"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Epoch 2: 100%|██████████| 302/302 [01:56<00:00,  2.60it/s, loss=0.12776488] \n"
+      " 33%|███▎      | 302/906 [02:59<09:24,  1.07it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "The maximum number of stored checkpoints has been reached.\n",
-      "Checkpoint: 'bert_emotect_epoch_2.ckpt' has been saved in epoch: 2.\n"
+      "{'loss': 0.3404, 'learning_rate': 3.3333333333333335e-05, 'epoch': 1.0}\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Evaluate: 100%|██████████| 34/34 [00:03<00:00,  9.82it/s]\n"
+      "\n",
+      "  0%|          | 0/34 [00:00<?, ?it/s]\u001b[A\n",
+      "  6%|▌         | 2/34 [00:01<00:29,  1.09it/s]\u001b[A\n",
+      " 18%|█▊        | 6/34 [00:01<00:07,  3.87it/s]\u001b[A\n",
+      " 29%|██▉       | 10/34 [00:02<00:03,  7.16it/s]\u001b[A\n",
+      " 41%|████      | 14/34 [00:02<00:01, 10.83it/s]\u001b[A\n",
+      " 53%|█████▎    | 18/34 [00:02<00:01, 14.71it/s]\u001b[A\n",
+      " 65%|██████▍   | 22/34 [00:02<00:00, 18.56it/s]\u001b[A\n",
+      " 76%|███████▋  | 26/34 [00:02<00:00, 21.52it/s]\u001b[A\n",
+      " 88%|████████▊ | 30/34 [00:02<00:00, 24.98it/s]\u001b[A\n",
+      "100%|██████████| 34/34 [00:02<00:00, 26.66it/s]\u001b[A\n",
+      "                                                 \n",
+      " 33%|███▎      | 302/906 [03:04<09:24,  1.07it/s]A\n",
+      "                                               \u001b[A"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Evaluate Score: {'Accuracy': 0.9796296296296296}\n",
-      "---------------Best Model: 'bert_emotect_best.ckpt' has been saved in epoch: 2.---------------\n"
+      "{'eval_loss': 0.2110523134469986, 'eval_accuracy': 0.9092592592592592, 'eval_runtime': 4.8961, 'eval_samples_per_second': 6.944, 'eval_steps_per_second': 1.021, 'epoch': 1.0}\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Epoch 3: 100%|██████████| 302/302 [01:56<00:00,  2.60it/s, loss=0.08596088] \n"
+      " 67%|██████▋   | 604/906 [04:43<01:22,  3.67it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "The maximum number of stored checkpoints has been reached.\n",
-      "Checkpoint: 'bert_emotect_epoch_3.ckpt' has been saved in epoch: 3.\n"
+      "{'loss': 0.187, 'learning_rate': 1.6666666666666667e-05, 'epoch': 2.0}\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Evaluate: 100%|██████████| 34/34 [00:03<00:00, 10.29it/s]\n"
+      "\n",
+      "  0%|          | 0/34 [00:00<?, ?it/s]\u001b[A\n",
+      "  9%|▉         | 3/34 [00:00<00:02, 15.14it/s]\u001b[A\n",
+      " 15%|█▍        | 5/34 [00:00<00:02, 12.00it/s]\u001b[A\n",
+      " 21%|██        | 7/34 [00:00<00:02, 11.18it/s]\u001b[A\n",
+      " 26%|██▋       | 9/34 [00:00<00:02, 10.99it/s]\u001b[A\n",
+      " 38%|███▊      | 13/34 [00:00<00:01, 16.43it/s]\u001b[A\n",
+      " 50%|█████     | 17/34 [00:01<00:00, 20.48it/s]\u001b[A\n",
+      " 62%|██████▏   | 21/34 [00:01<00:00, 23.40it/s]\u001b[A\n",
+      " 71%|███████   | 24/34 [00:01<00:00, 24.66it/s]\u001b[A\n",
+      " 82%|████████▏ | 28/34 [00:01<00:00, 26.50it/s]\u001b[A\n",
+      "                                                 A\n",
+      " 67%|██████▋   | 604/906 [04:44<01:22,  3.67it/s]\n",
+      "100%|██████████| 34/34 [00:01<00:00, 27.82it/s]\u001b[A\n",
+      "                                               \u001b[A"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Evaluate Score: {'Accuracy': 0.9916666666666667}\n",
-      "---------------Best Model: 'bert_emotect_best.ckpt' has been saved in epoch: 3.---------------\n"
+      "{'eval_loss': 0.13068892061710358, 'eval_accuracy': 0.9453703703703704, 'eval_runtime': 1.7879, 'eval_samples_per_second': 19.016, 'eval_steps_per_second': 2.797, 'epoch': 2.0}\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Epoch 4: 100%|██████████| 302/302 [01:56<00:00,  2.60it/s, loss=0.062058248]\n"
+      "100%|██████████| 906/906 [06:28<00:00,  3.41it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "The maximum number of stored checkpoints has been reached.\n",
-      "Checkpoint: 'bert_emotect_epoch_4.ckpt' has been saved in epoch: 4.\n"
+      "{'loss': 0.1093, 'learning_rate': 0.0, 'epoch': 3.0}\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Evaluate: 100%|██████████| 34/34 [00:03<00:00, 10.11it/s]\n"
+      "\n",
+      "  0%|          | 0/34 [00:00<?, ?it/s]\u001b[A\n",
+      "  9%|▉         | 3/34 [00:00<00:01, 16.69it/s]\u001b[A\n",
+      " 15%|█▍        | 5/34 [00:00<00:02, 13.72it/s]\u001b[A\n",
+      " 21%|██        | 7/34 [00:00<00:02, 12.48it/s]\u001b[A\n",
+      " 26%|██▋       | 9/34 [00:00<00:02, 12.29it/s]\u001b[A\n",
+      " 32%|███▏      | 11/34 [00:00<00:01, 12.15it/s]\u001b[A\n",
+      " 47%|████▋     | 16/34 [00:00<00:00, 19.98it/s]\u001b[A\n",
+      " 62%|██████▏   | 21/34 [00:01<00:00, 25.94it/s]\u001b[A\n",
+      " 76%|███████▋  | 26/34 [00:01<00:00, 30.30it/s]\u001b[A\n",
+      "                                                 A\n",
+      "100%|██████████| 906/906 [06:30<00:00,  3.41it/s]\n",
+      "100%|██████████| 34/34 [00:01<00:00, 33.33it/s]\u001b[A\n",
+      "                                               \u001b[A"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Evaluate Score: {'Accuracy': 0.9907407407407407}\n",
-      "Loading best model from 'checkpoint' with '['Accuracy']': [0.9916666666666667]...\n",
-      "---------------The model is already load the best model from 'bert_emotect_best.ckpt'.---------------\n"
+      "{'eval_loss': 0.05388114973902702, 'eval_accuracy': 0.9851851851851852, 'eval_runtime': 1.5974, 'eval_samples_per_second': 21.285, 'eval_steps_per_second': 3.13, 'epoch': 3.0}\n"
      ]
-    }
-   ],
-   "source": [
-    "# start training\n",
-    "trainer.run(tgt_columns=\"labels\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 模型验证\n",
-    "\n",
-    "将验证数据集加再进训练好的模型，对数据集进行验证，查看模型在验证数据上面的效果，此处的评价指标为准确率。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
+    },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Evaluate: 100%|██████████| 33/33 [00:03<00:00,  9.23it/s]"
+      "100%|██████████| 906/906 [06:39<00:00,  2.27it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Evaluate Score: {'Accuracy': 0.9102316602316602}\n"
+      "{'train_runtime': 399.6651, 'train_samples_per_second': 72.541, 'train_steps_per_second': 2.267, 'train_loss': 0.2122221982505411, 'epoch': 3.0}\n"
      ]
     },
     {
@@ -880,11 +621,21 @@
      "text": [
       "\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "TrainOutput(global_step=906, training_loss=0.2122221982505411, metrics={'train_runtime': 399.6651, 'train_samples_per_second': 72.541, 'train_steps_per_second': 2.267, 'train_loss': 0.2122221982505411, 'epoch': 3.0})"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "evaluator = Evaluator(network=model, eval_dataset=dataset_test, metrics=metric)\n",
-    "evaluator.run(tgt_columns=\"labels\")"
+    "# start training\n",
+    "trainer.train()"
    ]
   },
   {
@@ -898,7 +649,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 18,
    "metadata": {
     "tags": []
    },
@@ -909,7 +660,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 19,
    "metadata": {
     "tags": []
    },
@@ -929,7 +680,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 20,
    "metadata": {
     "tags": []
    },
@@ -967,12 +718,12 @@
    "source": [
     "## 自定义推理数据集\n",
     "\n",
-    "自己输入推理数据，展示模型的泛化能力。"
+    "尝试输入自定义推理数据，展示模型的泛化能力。"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 21,
    "metadata": {
     "tags": []
    },
@@ -981,7 +732,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "inputs: '家人们咱就是说一整个无语住了 绝绝子叠buff', predict: '中性'\n"
+      "inputs: '家人们咱就是说一整个无语住了 绝绝子叠buff', predict: '消极'\n"
      ]
     }
    ],
@@ -991,21 +742,10 @@
   }
  ],
  "metadata": {
-  "AIGalleryInfo": {
-   "item_id": "5443b528-0dd5-4909-ac4f-1c9cf839e2aa"
-  },
-  "flavorInfo": {
-   "architecture": "X86_64",
-   "category": "GPU"
-  },
-  "imageInfo": {
-   "id": "e1a07296-22a8-4f05-8bc8-e936c8e54202",
-   "name": "mindspore1.7.0-cuda10.1-py3.7-ubuntu18.04"
-  },
   "kernelspec": {
-   "display_name": "python-3.9.0",
+   "display_name": "MindSpore",
    "language": "python",
-   "name": "python-3.9.0"
+   "name": "mindspore"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1017,7 +757,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.0"
+   "version": "3.9.10"
   }
  },
  "nbformat": 4,
diff --git a/Season1.step_into_chatgpt/2.BERT/bert_introduction.ipynb b/Season1.step_into_chatgpt/2.BERT/bert_introduction.ipynb
index f17db12..1f5d6b2 100644
--- a/Season1.step_into_chatgpt/2.BERT/bert_introduction.ipynb
+++ b/Season1.step_into_chatgpt/2.BERT/bert_introduction.ipynb
@@ -451,15 +451,18 @@
     "\n",
     "    def construct(self, input_ids, token_type_ids=None, position_ids=None):\n",
     "        seq_len = input_ids.shape[1]\n",
+    "        #初始化position_ids，使其为值(0, seq_len-1)的张量，与input_ids形状相同\n",
     "        if position_ids is None:\n",
     "            position_ids = mnp.arange(seq_len)\n",
     "            position_ids = position_ids.expand_dims(0).expand_as(input_ids)\n",
+    "        #初始化token_type_ids，使其为元素值全为0的张量，与input_ids形状相同\n",
     "        if token_type_ids is None:\n",
     "            token_type_ids = ops.zeros_like(input_ids)\n",
     "        \n",
     "        words_embeddings = self.word_embeddings(input_ids)\n",
     "        position_embeddings = self.position_embeddings(position_ids)\n",
     "        token_type_embeddings = self.token_type_embeddings(token_type_ids)\n",
+    "        #BERT最终的embeddings为token_ids, position_ids, token_type_ids 三者相加\n",
     "        embeddings = words_embeddings + position_embeddings + token_type_embeddings\n",
     "        embeddings = self.layer_norm(embeddings)\n",
     "        embeddings = self.dropout(embeddings)\n",
@@ -508,6 +511,7 @@
     "    \"\"\"\n",
     "    def __init__(self,  config):\n",
     "        super().__init__()\n",
+    "        #检查隐藏层大小是否能被注意力头数量整除，每个注意力头需要相同大小的输入\n",
     "        if config.hidden_size % config.num_attention_heads != 0:\n",
     "            raise ValueError(\n",
     "                f\"The hidden size {config.hidden_size} is not a multiple of the number of attention \"\n",
@@ -518,7 +522,8 @@
     "        self.num_attention_heads = config.num_attention_heads\n",
     "        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)\n",
     "        self.all_head_size = self.num_attention_heads * self.attention_head_size\n",
-    "\n",
+    "        \n",
+    "        #定义Q,K,V\n",
     "        self.query = nn.Dense(config.hidden_size, self.all_head_size, \\\n",
     "            weight_init=TruncatedNormal(config.initializer_range))\n",
     "        self.key = nn.Dense(config.hidden_size, self.all_head_size, \\\n",
@@ -529,7 +534,8 @@
     "        self.dropout = Dropout(config.attention_probs_dropout_prob)\n",
     "        self.softmax = nn.Softmax(-1)\n",
     "        self.matmul = Matmul()\n",
-    "\n",
+    "    \n",
+    "    #转换注意力计算的张量形状，便于并行化计算\n",
     "    def transpose_for_scores(self, input_x):\n",
     "        \"\"\"\n",
     "        transpose for scores\n",
@@ -540,6 +546,7 @@
     "        return input_x.transpose(0, 2, 1, 3)\n",
     "\n",
     "    def construct(self, hidden_states, attention_mask=None, head_mask=None):\n",
+    "        #获取Q,K,V并转换张量形状\n",
     "        mixed_query_layer = self.query(hidden_states)\n",
     "        mixed_key_layer = self.key(hidden_states)\n",
     "        mixed_value_layer = self.value(hidden_states)\n",
@@ -547,8 +554,10 @@
     "        key_layer = self.transpose_for_scores(mixed_key_layer)\n",
     "        value_layer = self.transpose_for_scores(mixed_value_layer)\n",
     "\n",
+    "        #计算注意力分数\n",
     "        attention_scores = self.matmul(query_layer, key_layer.swapaxes(-1, -2))\n",
     "        attention_scores = attention_scores / ops.sqrt(Tensor(self.attention_head_size, mstype.float32))\n",
+    "        #如果提供了掩码，需要将对应位置的信息屏蔽\n",
     "        if attention_mask is not None:\n",
     "            attention_scores = attention_scores + attention_mask\n",
     "\n",
@@ -558,7 +567,8 @@
     "\n",
     "        if head_mask is not None:\n",
     "            attention_probs = attention_probs * head_mask\n",
-    "\n",
+    "        \n",
+    "        #计算加权后的上下文权重并转换为原来的形状\n",
     "        context_layer = self.matmul(attention_probs, value_layer)\n",
     "        context_layer = context_layer.transpose(0, 2, 1, 3)\n",
     "        new_context_layer_shape = context_layer.shape[:-2] + (self.all_head_size,)\n",
@@ -603,6 +613,7 @@
     "    def construct(self, hidden_states, input_tensor):\n",
     "        hidden_states = self.dense(hidden_states)\n",
     "        hidden_states = self.dropout(hidden_states)\n",
+    "        #残差连接与层归一化\n",
     "        hidden_states = self.layer_norm(hidden_states + input_tensor)\n",
     "        return hidden_states"
    ]
@@ -634,6 +645,7 @@
     "\n",
     "    def construct(self, hidden_states):\n",
     "        hidden_states = self.dense(hidden_states)\n",
+    "        #激活函数，增加非线性部分\n",
     "        hidden_states = self.intermediate_act_fn(hidden_states)\n",
     "        return hidden_states"
    ]
@@ -667,6 +679,7 @@
     "    def construct(self, hidden_states, input_tensor):\n",
     "        hidden_states = self.dense(hidden_states)\n",
     "        hidden_states = self.dropout(hidden_states)\n",
+    "        #残差连接与层归一化\n",
     "        hidden_states = self. layer_norm(hidden_states + input_tensor)\n",
     "        return hidden_states"
    ]
@@ -697,10 +710,12 @@
     "    \"\"\"\n",
     "    def __init__(self, config):\n",
     "        super().__init__()\n",
+    "        #BertAttention由BertSelfAttention, BertSelfOutput构成\n",
     "        self.attention = BertAttention(config)\n",
     "        self.intermediate = BertIntermediate(config)\n",
     "        self.output = BertOutput(config)\n",
-    "\n",
+    "    \n",
+    "    #将attention层，feedforword层，和最终的Add&Norm顺序进行\n",
     "    def construct(self, hidden_states, attention_mask=None, head_mask=None):\n",
     "        attention_outputs = self.attention(hidden_states, attention_mask, head_mask)\n",
     "        attention_output = attention_outputs[0]\n",
@@ -723,6 +738,7 @@
     "    def construct(self, hidden_states, attention_mask=None, head_mask=None):\n",
     "        all_hidden_states = ()\n",
     "        all_attentions = ()\n",
+    "        #hidden_state为前一层的输出，累加多层权重结果\n",
     "        for i, layer_module in enumerate(self.layer):\n",
     "            if self.output_hidden_states:\n",
     "                all_hidden_states += (hidden_states,)\n",
@@ -783,7 +799,7 @@
     "            activation='tanh', weight_init=TruncatedNormal(config.initializer_range))\n",
     "\n",
     "    def construct(self, hidden_states):\n",
-    "\n",
+    "        #BERT pooler只关注句首的[CLS]\n",
     "        first_token_tensor = hidden_states[:, 0]\n",
     "        pooled_output = self.dense(first_token_tensor)\n",
     "        return pooled_output"
@@ -929,6 +945,7 @@
     "    def construct(self, hidden_states, masked_lm_positions):\n",
     "\n",
     "        batch_size, seq_len, hidden_size = hidden_states.shape\n",
+    "        #判断是否提供了需要预测的token位置，如果提供了可以仅预测masked的部分，提高预测效率\n",
     "        if masked_lm_positions is not None:\n",
     "            flat_offsets = mnp.arange(batch_size) * seq_len\n",
     "            flat_position = (masked_lm_positions + flat_offsets.reshape(-1, 1)).reshape(-1)\n",
@@ -991,6 +1008,7 @@
     "        self.dense = nn.Dense(config.hidden_size, config.hidden_size, activation='tanh', weight_init=TruncatedNormal(config.initializer_range))\n",
     "    \n",
     "    def construct(self, hidden_states):\n",
+    "        #BERT pooler只关注句首的[CLS]\n",
     "        first_token_tensor = hidden_states[:, 0]\n",
     "        pooled_output = self.dense(first_token_tensor)\n",
     "        return pooled_output"
@@ -1081,8 +1099,9 @@
     "            position_ids=position_ids,\n",
     "            head_mask=head_mask\n",
     "        )\n",
-    "\n",
+    "        #得到序列输出和池化输出，序列输出对应Masked LM任务，池化输出对应NSP任务\n",
     "        sequence_output, pooled_output = outputs[:2]\n",
+    "        #序列输出和池化输出分别经过对应的全连接层\n",
     "        prediction_scores, seq_relationship_score = self.cls(sequence_output, pooled_output, masked_lm_positions)\n",
     "\n",
     "        outputs = (prediction_scores, seq_relationship_score,) + outputs[2:]\n",
@@ -1094,9 +1113,9 @@
  "metadata": {
   "celltoolbar": "幻灯片",
   "kernelspec": {
-   "display_name": "python-3.9.0",
+   "display_name": "MindSpore",
    "language": "python",
-   "name": "python-3.9.0"
+   "name": "mindspore"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1108,7 +1127,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.0"
+   "version": "3.9.10"
   }
  },
  "nbformat": 4,