From 3b82bd47f9093f6b1d5d789b0dbd6c995761eeff Mon Sep 17 00:00:00 2001 From: xuhangscut <945440358@qq.com> Date: Fri, 3 Jan 2025 15:06:36 +0800 Subject: [PATCH] update bert demo, add annotation --- .../2.BERT/bert_emotect_finetune.ipynb | 676 ++++++------------ .../2.BERT/bert_introduction.ipynb | 37 +- 2 files changed, 236 insertions(+), 477 deletions(-) diff --git a/Season1.step_into_chatgpt/2.BERT/bert_emotect_finetune.ipynb b/Season1.step_into_chatgpt/2.BERT/bert_emotect_finetune.ipynb index e6b51b7..c9ea7d0 100644 --- a/Season1.step_into_chatgpt/2.BERT/bert_emotect_finetune.ipynb +++ b/Season1.step_into_chatgpt/2.BERT/bert_emotect_finetune.ipynb @@ -4,312 +4,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 基于MindSpore实现BERT对话情绪识别" + "### 基于MindSpore实现BERT对话情绪识别" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "该实验可进行在线体验,在线体验链接 (https://pangu.huaweicloud.com/gallery/asset-detail.html?id=5443b528-0dd5-4909-ac4f-1c9cf839e2aa)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 环境配置" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "> 此为在线运行平台配置python3.9的指南,如在其他环境平台运行案例,请根据实际情况修改如下代码" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "第一步:设置python版本为3.9.0" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%%capture captured_output\n", - "!/home/ma-user/anaconda3/bin/conda create -n python-3.9.0 python=3.9.0 -y --override-channels --channel https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main\n", - "!/home/ma-user/anaconda3/envs/python-3.9.0/bin/pip install ipykernel" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import os\n", - "\n", - "data = {\n", - " \"display_name\": \"python-3.9.0\",\n", - " \"env\": {\n", - " \"PATH\": \"/home/ma-user/anaconda3/envs/python-3.9.0/bin:/home/ma-user/anaconda3/envs/python-3.7.10/bin:/modelarts/authoring/notebook-conda/bin:/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/home/ma-user/modelarts/ma-cli/bin:/home/ma-user/modelarts/ma-cli/bin\"\n", - " },\n", - " \"language\": \"python\",\n", - " \"argv\": [\n", - " \"/home/ma-user/anaconda3/envs/python-3.9.0/bin/python\",\n", - " \"-m\",\n", - " \"ipykernel\",\n", - " \"-f\",\n", - " \"{connection_file}\"\n", - " ]\n", - "}\n", - "\n", - "if not os.path.exists(\"/home/ma-user/anaconda3/share/jupyter/kernels/python-3.9.0/\"):\n", - " os.mkdir(\"/home/ma-user/anaconda3/share/jupyter/kernels/python-3.9.0/\")\n", - "\n", - "with open('/home/ma-user/anaconda3/share/jupyter/kernels/python-3.9.0/kernel.json', 'w') as f:\n", - " json.dump(data, f, indent=4)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 注:以上代码运行完成后,需要重新设置kernel为python-3.9.0" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "第二步:安装MindSpore框架和MindNLP套件" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple\n", - "Collecting mindspore==2.2.14\n", - " Downloading https://ms-release.obs.cn-north-4.myhuaweicloud.com/2.2.14/MindSpore/unified/x86_64/mindspore-2.2.14-cp39-cp39-linux_x86_64.whl (743.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m743.0/743.0 MB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hCollecting numpy>=1.17.0 (from mindspore==2.2.14)\n", - " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/b1/e3/24d289c5a3255bf52824bd52295e9a7923cad8ae5ec29539fc971e1122f6/numpy-2.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (19.5 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.5/19.5 MB\u001b[0m \u001b[31m67.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hCollecting protobuf>=3.13.0 (from mindspore==2.2.14)\n", - " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/27/e4/8dc4546be46873f8950cb44cdfe19b79d66d26e53c4ee5e3440406257fcd/protobuf-5.27.2-cp38-abi3-manylinux2014_x86_64.whl (309 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m309.3/309.3 kB\u001b[0m \u001b[31m24.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: asttokens>=2.0.4 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore==2.2.14) (2.4.1)\n", - "Collecting pillow>=6.2.0 (from mindspore==2.2.14)\n", - " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/32/3f/c02268d0c6fb6b3958bdda673c17b315c821d97df29ae6969f20fb49388a/pillow-10.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.4/4.4 MB\u001b[0m \u001b[31m50.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hCollecting scipy>=1.5.4 (from mindspore==2.2.14)\n", - " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/35/f5/d0ad1a96f80962ba65e2ce1de6a1e59edecd1f0a7b55990ed208848012e0/scipy-1.13.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (38.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.6/38.6 MB\u001b[0m \u001b[31m45.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: packaging>=20.0 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore==2.2.14) (24.1)\n", - "Requirement already satisfied: psutil>=5.6.1 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore==2.2.14) (6.0.0)\n", - "Collecting astunparse>=1.6.3 (from mindspore==2.2.14)\n", - " Downloading https://pypi.tuna.tsinghua.edu.cn/packages/2b/03/13dde6512ad7b4557eb792fbcf0c653af6076b81e5941d36ec61f7ce6028/astunparse-1.6.3-py2.py3-none-any.whl (12 kB)\n", - "Requirement already satisfied: six>=1.12.0 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from asttokens>=2.0.4->mindspore==2.2.14) (1.16.0)\n", - "Requirement already satisfied: wheel<1.0,>=0.23.0 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from astunparse>=1.6.3->mindspore==2.2.14) (0.43.0)\n", - "Installing collected packages: protobuf, pillow, numpy, astunparse, scipy, mindspore\n", - "Successfully installed astunparse-1.6.3 mindspore-2.2.14 numpy-2.0.1 pillow-10.4.0 protobuf-5.27.2 scipy-1.13.1\n" - ] - } - ], - "source": [ - "!pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/2.2.14/MindSpore/unified/x86_64/mindspore-2.2.14-cp39-cp39-linux_x86_64.whl --trusted-host ms-release.obs.cn-north-4.myhuaweicloud.com -i https://pypi.tuna.tsinghua.edu.cn/simple" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Looking in indexes: http://repo.myhuaweicloud.com/repository/pypi/simple\n", - "Collecting mindnlp\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/72/37/ef313c23fd587c3d1f46b0741c98235aecdfd93b4d6d446376f3db6a552c/mindnlp-0.3.1-py3-none-any.whl (5.7 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.7/5.7 MB\u001b[0m \u001b[31m43.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: mindspore in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindnlp) (2.2.14)\n", - "Collecting tqdm (from mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/18/eb/fdb7eb9e48b7b02554e1664afd3bd3f117f6b6d6c5881438a0b055554f9b/tqdm-4.66.4-py3-none-any.whl (78 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.3/78.3 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting requests (from mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl (64 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.9/64.9 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting datasets (from mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/60/2d/963b266bb8f88492d5ab4232d74292af8beb5b6fdae97902df9e284d4c32/datasets-2.20.0-py3-none-any.whl (547 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m547.8/547.8 kB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hCollecting evaluate (from mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/c2/d6/ff9baefc8fc679dcd9eb21b29da3ef10c81aa36be630a7ae78e4611588e1/evaluate-0.4.2-py3-none-any.whl (84 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.1/84.1 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting tokenizers (from mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/0f/cb/8fc733c8f251bac1e5c4ae52458c353b3faa98f41d734c226cad3783da03/tokenizers-0.19.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m72.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hCollecting safetensors (from mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/38/7f/3ba803bd6d726d65e480bee2aaeea79580d2e4836e4c6ebc27144c62ce51/safetensors-0.4.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hCollecting sentencepiece (from mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/5f/01/c95e42eb86282b2c79305d3e0b0ca5a743f85a61262bb7130999c70b9374/sentencepiece-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m39.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting regex (from mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/36/67/851cf82e2c47d46846cca15ba84f845e876257a54cb82f229d335cd5c67e/regex-2024.7.24-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (775 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m775.9/775.9 kB\u001b[0m \u001b[31m21.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hCollecting addict (from mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/6a/00/b08f23b7d7e1e14ce01419a467b583edbb93c6cdb8654e54a9cc579cd61f/addict-2.4.0-py3-none-any.whl (3.8 kB)\n", - "Collecting ml-dtypes (from mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/23/1c/06b52d3dcd75a81f6ca1e56514db6b21fe928f159cc5302428c1fed46562/ml_dtypes-0.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.2/2.2 MB\u001b[0m \u001b[31m27.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hCollecting pyctcdecode (from mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/a5/8a/93e2118411ae5e861d4f4ce65578c62e85d0f1d9cb389bd63bd57130604e/pyctcdecode-0.5.0-py2.py3-none-any.whl (39 kB)\n", - "Collecting jieba (from mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/c6/cb/18eeb235f833b726522d7ebed54f2278ce28ba9438e3135ab0278d9792a2/jieba-0.42.1.tar.gz (19.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m19.2/19.2 MB\u001b[0m \u001b[31m66.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n", - "\u001b[?25hCollecting pytest==7.2.0 (from mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/67/68/a5eb36c3a8540594b6035e6cdae40c1ef1b6a2bfacbecc3d1a544583c078/pytest-7.2.0-py3-none-any.whl (316 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m316.8/316.8 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting attrs>=19.2.0 (from pytest==7.2.0->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/e0/44/827b2a91a5816512fcaf3cc4ebc465ccd5d598c45cefa6703fcf4a79018f/attrs-23.2.0-py3-none-any.whl (60 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.8/60.8 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting iniconfig (from pytest==7.2.0->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl (5.9 kB)\n", - "Requirement already satisfied: packaging in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from pytest==7.2.0->mindnlp) (24.1)\n", - "Collecting pluggy<2.0,>=0.12 (from pytest==7.2.0->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl (20 kB)\n", - "Requirement already satisfied: exceptiongroup>=1.0.0rc8 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from pytest==7.2.0->mindnlp) (1.2.2)\n", - "Collecting tomli>=1.0.0 (from pytest==7.2.0->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/97/75/10a9ebee3fd790d20926a90a2547f0bf78f371b2f13aa822c759680ca7b9/tomli-2.0.1-py3-none-any.whl (12 kB)\n", - "Collecting filelock (from datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/ae/f0/48285f0262fe47103a4a45972ed2f9b93e4c80b8fd609fa98da78b2a5706/filelock-3.15.4-py3-none-any.whl (16 kB)\n", - "Requirement already satisfied: numpy>=1.17 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from datasets->mindnlp) (2.0.1)\n", - "Collecting pyarrow>=15.0.0 (from datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/af/61/bcd9b58e38ead6ad42b9ed00da33a3f862bc1d445e3d3164799c25550ac2/pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (39.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.9/39.9 MB\u001b[0m \u001b[31m56.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hCollecting pyarrow-hotfix (from datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/e4/f4/9ec2222f5f5f8ea04f66f184caafd991a39c8782e31f5b0266f101cb68ca/pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n", - "Collecting dill<0.3.9,>=0.3.0 (from datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl (116 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting pandas (from datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/bb/30/f6f1f1ac36250f50c421b1b6af08c35e5a8b5a84385ef928625336b93e6f/pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m57.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hCollecting xxhash (from datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/63/93/812d78f70145c68c4e64533f4d625bea01236f27698febe15f0ceebc1566/xxhash-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (193 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m193.8/193.8 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting multiprocess (from datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl (133 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m133.4/133.4 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hCollecting fsspec<=2024.5.0,>=2023.1.0 (from fsspec[http]<=2024.5.0,>=2023.1.0->datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/ba/a3/16e9fe32187e9c8bc7f9b7bcd9728529faa725231a0c96f2f98714ff2fc5/fsspec-2024.5.0-py3-none-any.whl (316 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m316.1/316.1 kB\u001b[0m \u001b[31m17.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting aiohttp (from datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/4c/e6/061ab7e0084b7443f9bd7092853b5d0f97029157a58fcc8749cdad8aef0f/aiohttp-3.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m25.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hCollecting huggingface-hub>=0.21.2 (from datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/0f/36/83c0f0c7a5ec75738241c4c0c066097e4f74729716961db6a2905395015c/huggingface_hub-0.24.3-py3-none-any.whl (417 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m417.3/417.3 kB\u001b[0m \u001b[31m17.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting pyyaml>=5.1 (from datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/7d/39/472f2554a0f1e825bd7c5afc11c817cd7a2f3657460f7159f691fbb37c51/PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (738 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m738.9/738.9 kB\u001b[0m \u001b[31m38.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0m\n", - "\u001b[?25hCollecting charset-normalizer<4,>=2 (from requests->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/98/69/5d8751b4b670d623aa7a47bef061d69c279e9f922f6705147983aa76c3ce/charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (142 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.3/142.3 kB\u001b[0m \u001b[31m17.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting idna<4,>=2.5 (from requests->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/e5/3e/741d8c82801c347547f8a2a06aa57dbb1992be9e948df2ea0eda2c8b79e8/idna-3.7-py3-none-any.whl (66 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.8/66.8 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting urllib3<3,>=1.21.1 (from requests->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/ca/1c/89ffc63a9605b583d5df2be791a27bc1a42b7c32bab68d3c8f2f73a98cd4/urllib3-2.2.2-py3-none-any.whl (121 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.4/121.4 kB\u001b[0m \u001b[31m14.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting certifi>=2017.4.17 (from requests->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/1c/d5/c84e1a17bf61d4df64ca866a1c9a913874b4e9bdc131ec689a0ad013fb36/certifi-2024.7.4-py3-none-any.whl (162 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m163.0/163.0 kB\u001b[0m \u001b[31m17.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: protobuf>=3.13.0 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore->mindnlp) (5.27.2)\n", - "Requirement already satisfied: asttokens>=2.0.4 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore->mindnlp) (2.4.1)\n", - "Requirement already satisfied: pillow>=6.2.0 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore->mindnlp) (10.4.0)\n", - "Requirement already satisfied: scipy>=1.5.4 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore->mindnlp) (1.13.1)\n", - "Requirement already satisfied: psutil>=5.6.1 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore->mindnlp) (6.0.0)\n", - "Requirement already satisfied: astunparse>=1.6.3 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from mindspore->mindnlp) (1.6.3)\n", - "Collecting numpy>=1.17 (from datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/54/30/c2a907b9443cf42b90c17ad10c1e8fa801975f01cb9764f3f8eb8aea638b/numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.2/18.2 MB\u001b[0m \u001b[31m70.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hCollecting pygtrie<3.0,>=2.1 (from pyctcdecode->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/ec/cd/bd196b2cf014afb1009de8b0f05ecd54011d881944e62763f3c1b1e8ef37/pygtrie-2.5.0-py3-none-any.whl (25 kB)\n", - "Collecting hypothesis<7,>=6.14 (from pyctcdecode->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/dd/b6/619043aa33150cfbb2491f7d712a5a955cd3702056c6e436454477b5c18b/hypothesis-6.108.5-py3-none-any.whl (465 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m465.2/465.2 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: six>=1.12.0 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from asttokens>=2.0.4->mindspore->mindnlp) (1.16.0)\n", - "Requirement already satisfied: wheel<1.0,>=0.23.0 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from astunparse>=1.6.3->mindspore->mindnlp) (0.43.0)\n", - "Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/07/b1/d9455cf313df7b2fe6c60a871eb96801b6e8fbdc7d736f6576492b4c97b3/aiohappyeyeballs-2.3.2-py3-none-any.whl (11 kB)\n", - "Collecting aiosignal>=1.1.2 (from aiohttp->datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/76/ac/a7305707cb852b7e16ff80eaf5692309bde30e2b1100a1fcacdc8f731d97/aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n", - "Collecting frozenlist>=1.1.1 (from aiohttp->datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/70/b0/6f1ebdabfb604e39a0f84428986b89ab55f246b64cddaa495f2c953e1f6b/frozenlist-1.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (240 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m240.7/240.7 kB\u001b[0m \u001b[31m24.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting multidict<7.0,>=4.5 (from aiohttp->datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/39/a9/1f8d42c8103bcb1da6bb719f1bc018594b5acc8eae56b3fec4720ebee225/multidict-6.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (123 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m123.8/123.8 kB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting yarl<2.0,>=1.0 (from aiohttp->datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/69/ea/d7e961ea9b1b818a43b155ee512117be6ab9ab67c1e94967b2e64126e8e4/yarl-1.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (304 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m304.3/304.3 kB\u001b[0m \u001b[31m27.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting async-timeout<5.0,>=4.0 (from aiohttp->datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/a7/fa/e01228c2938de91d47b307831c62ab9e4001e747789d0b05baf779a6488c/async_timeout-4.0.3-py3-none-any.whl (5.7 kB)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from huggingface-hub>=0.21.2->datasets->mindnlp) (4.12.2)\n", - "Collecting sortedcontainers<3.0.0,>=2.1.0 (from hypothesis<7,>=6.14->pyctcdecode->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl (29 kB)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages (from pandas->datasets->mindnlp) (2.9.0.post0)\n", - "Collecting pytz>=2020.1 (from pandas->datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/9c/3d/a121f284241f08268b21359bd425f7d4825cffc5ac5cd0e1b3d82ffd2b10/pytz-2024.1-py2.py3-none-any.whl (505 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m505.5/505.5 kB\u001b[0m \u001b[31m18.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting tzdata>=2022.7 (from pandas->datasets->mindnlp)\n", - " Downloading http://repo.myhuaweicloud.com/repository/pypi/packages/65/58/f9c9e6be752e9fcb8b6a0ee9fb87e6e7a1f6bcab2cdc73f02bb7ba91ada0/tzdata-2024.1-py2.py3-none-any.whl (345 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m345.4/345.4 kB\u001b[0m \u001b[31m14.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hBuilding wheels for collected packages: jieba\n", - " Building wheel for jieba (setup.py) ... \u001b[?25ldone\n", - "\u001b[?25h Created wheel for jieba: filename=jieba-0.42.1-py3-none-any.whl size=19314459 sha256=99e99961421c6a7516fb6dae30f85adc7b7643e3245afc9a1e32608d5e4fd5a6\n", - " Stored in directory: /home/ma-user/.cache/pip/wheels/2d/22/9e/9af7e8c2773513ac75905acfb75073922bcc1aa176f730a0c9\n", - "Successfully built jieba\n", - "Installing collected packages: sortedcontainers, sentencepiece, pytz, pygtrie, jieba, addict, xxhash, urllib3, tzdata, tqdm, tomli, safetensors, regex, pyyaml, pyarrow-hotfix, pluggy, numpy, multidict, iniconfig, idna, fsspec, frozenlist, filelock, dill, charset-normalizer, certifi, attrs, async-timeout, aiohappyeyeballs, yarl, requests, pytest, pyarrow, pandas, multiprocess, ml-dtypes, hypothesis, aiosignal, pyctcdecode, huggingface-hub, aiohttp, tokenizers, datasets, evaluate, mindnlp\n", - " Attempting uninstall: numpy\n", - " Found existing installation: numpy 2.0.1\n", - " Uninstalling numpy-2.0.1:\n", - " Successfully uninstalled numpy-2.0.1\n", - "Successfully installed addict-2.4.0 aiohappyeyeballs-2.3.2 aiohttp-3.10.0 aiosignal-1.3.1 async-timeout-4.0.3 attrs-23.2.0 certifi-2024.7.4 charset-normalizer-3.3.2 datasets-2.20.0 dill-0.3.8 evaluate-0.4.2 filelock-3.15.4 frozenlist-1.4.1 fsspec-2024.5.0 huggingface-hub-0.24.3 hypothesis-6.108.5 idna-3.7 iniconfig-2.0.0 jieba-0.42.1 mindnlp-0.3.1 ml-dtypes-0.4.0 multidict-6.0.5 multiprocess-0.70.16 numpy-1.26.4 pandas-2.2.2 pluggy-1.5.0 pyarrow-17.0.0 pyarrow-hotfix-0.6 pyctcdecode-0.5.0 pygtrie-2.5.0 pytest-7.2.0 pytz-2024.1 pyyaml-6.0.1 regex-2024.7.24 requests-2.32.3 safetensors-0.4.3 sentencepiece-0.2.0 sortedcontainers-2.4.0 tokenizers-0.19.1 tomli-2.0.1 tqdm-4.66.4 tzdata-2024.1 urllib3-2.2.2 xxhash-3.4.1 yarl-1.9.4\n" - ] - } - ], - "source": [ - "!pip install mindnlp" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 注:MindNLP whl包下载链接为:[MindNLP](https://repo.mindspore.cn/mindspore-lab/mindnlp/newest/any/)" + "> 此为在线运行平台配置python3.9 mindspore2.4.1 mindnlp0.4.1的指南,如在其他环境平台运行案例,请根据实际情况安装依赖包" ] }, { @@ -330,12 +32,12 @@ "\n", "对话情绪识别(Emotion Detection,简称EmoTect),专注于识别智能对话场景中用户的情绪,针对智能对话场景中的用户文本,自动判断该文本的情绪类别并给出相应的置信度,情绪类型分为积极、消极、中性。 对话情绪识别适用于聊天、客服等多个场景,能够帮助企业更好地把握对话质量、改善产品的用户交互体验,也能分析客服服务质量、降低人工质检成本。\n", "\n", - "下面以一个文本情感分类任务为例子来说明BERT模型的整个应用过程。" + "下面以一个文本情感分类任务为例子来说明BERT模型的整个应用过程。\n" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "tags": [] }, @@ -344,25 +46,39 @@ "name": "stderr", "output_type": "stream", "text": [ - "/home/ma-user/anaconda3/envs/python-3.9.0/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", "Building prefix dict from the default dictionary ...\n", "Dumping model to file cache /tmp/jieba.cache\n", - "Loading model cost 0.782 seconds.\n", - "Prefix dict has been built successfully.\n" + "Loading model cost 1.321 seconds.\n", + "Prefix dict has been built successfully.\n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.9/site-packages/Cython/Compiler/Main.py:384: FutureWarning: Cython directive 'language_level' not set, using '3str' for now (Py3). This has changed from earlier releases! File: /home/ma-user/anaconda3/envs/MindSpore/lib/python3.9/site-packages/mindnlp/transformers/models/graphormer/algos_graphormer.pyx\n", + " tree = Parsing.p_module(s, pxd, full_module_name)\n" ] } ], "source": [ - "import os\n", - "\n", "import mindspore\n", - "from mindspore.dataset import text, GeneratorDataset, transforms\n", - "from mindspore import nn, context\n", + "from mindspore.dataset import GeneratorDataset, transforms\n", "\n", - "from mindnlp._legacy.engine import Trainer, Evaluator\n", - "from mindnlp._legacy.engine.callbacks import CheckpointCallback, BestModelCallback\n", - "from mindnlp._legacy.metrics import Accuracy" + "from mindnlp.engine import Trainer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 数据集\n", + "\n", + "这里提供一份已标注的、经过分词预处理的机器人聊天数据集,来自于百度飞桨团队。数据由两列组成,以制表符('\\t')分隔,第一列是情绪分类的类别(0表示消极;1表示中性;2表示积极),第二列是以空格分词的中文文本,如下示例,文件为 utf8 编码。\n", + "\n", + "label--text_a\n", + "\n", + "0--谁骂人了?我从来不骂人,我骂的都不是人,你是人吗 ?\n", + "\n", + "1--我有事等会儿就回来和你聊\n", + "\n", + "2--我见到你很高兴谢谢你帮我\n", + "\n", + "这部分主要包括数据集读取,数据格式转换,数据 Tokenize 处理和 pad 操作。" ] }, { @@ -398,25 +114,6 @@ " return len(self._labels)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 数据集\n", - "\n", - "这里提供一份已标注的、经过分词预处理的机器人聊天数据集,来自于百度飞桨团队。数据由两列组成,以制表符('\\t')分隔,第一列是情绪分类的类别(0表示消极;1表示中性;2表示积极),第二列是以空格分词的中文文本,如下示例,文件为 utf8 编码。\n", - "\n", - "label--text_a\n", - "\n", - "0--谁骂人了?我从来不骂人,我骂的都不是人,你是人吗 ?\n", - "\n", - "1--我有事等会儿就回来和你聊\n", - "\n", - "2--我见到你很高兴谢谢你帮我\n", - "\n", - "这部分主要包括数据集读取,数据格式转换,数据 Tokenize 处理和 pad 操作。" - ] - }, { "cell_type": "code", "execution_count": 5, @@ -428,16 +125,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "--2024-07-31 09:59:47-- https://baidu-nlp.bj.bcebos.com/emotion_detection-dataset-1.0.0.tar.gz\n", - "Resolving proxy.modelarts.com (proxy.modelarts.com)... 192.168.6.3\n", - "Connecting to proxy.modelarts.com (proxy.modelarts.com)|192.168.6.3|:80... connected.\n", + "--2025-01-03 11:44:42-- https://baidu-nlp.bj.bcebos.com/emotion_detection-dataset-1.0.0.tar.gz\n", + "Resolving proxy-notebook.modelarts.com (proxy-notebook.modelarts.com)... 192.168.0.33\n", + "Connecting to proxy-notebook.modelarts.com (proxy-notebook.modelarts.com)|192.168.0.33|:8083... connected.\n", "Proxy request sent, awaiting response... 200 OK\n", "Length: 1710581 (1.6M) [application/x-gzip]\n", "Saving to: ‘emotion_detection.tar.gz’\n", "\n", - "emotion_detection.t 100%[===================>] 1.63M 1.71MB/s in 1.0s \n", + "emotion_detection.t 100%[===================>] 1.63M 7.10MB/s in 0.2s \n", "\n", - "2024-07-31 09:59:48 (1.71 MB/s) - ‘emotion_detection.tar.gz’ saved [1710581/1710581]\n", + "2025-01-03 11:44:42 (7.10 MB/s) - ‘emotion_detection.tar.gz’ saved [1710581/1710581]\n", "\n", "data/\n", "data/test.tsv\n", @@ -471,8 +168,6 @@ }, "outputs": [], "source": [ - "import numpy as np\n", - "\n", "def process_dataset(source, tokenizer, max_seq_len=64, batch_size=32, shuffle=True):\n", " is_ascend = mindspore.get_context('device_target') == 'Ascend'\n", "\n", @@ -490,7 +185,7 @@ " # map dataset\n", " dataset = dataset.map(operations=tokenize_and_pad, input_columns=\"text_a\", output_columns=['input_ids', 'attention_mask'])\n", " dataset = dataset.map(operations=[type_cast_op], input_columns=\"label\", output_columns='labels')\n", - " # batch dataset\n", + " # # batch dataset\n", " if is_ascend:\n", " dataset = dataset.batch(batch_size)\n", " else:\n", @@ -518,10 +213,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 49.0/49.0 [00:00<00:00, 214kB/s]\n", - "107kB [00:11, 9.56kB/s] \n", - "263kB [00:06, 41.9kB/s] \n", - "624B [00:00, 1.76MB/s] \n" + "100%|██████████| 49.0/49.0 [00:00<00:00, 61.5kB/s]\n", + "107kB [00:00, 823kB/s] \n", + "263kB [00:00, 588kB/s] \n", + "624B [00:00, 713kB/s] \n", + "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.9/site-packages/mindnlp/transformers/tokenization_utils_base.py:1526: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted, and will be then set to `False` by default. \n", + " warnings.warn(\n" ] } ], @@ -589,32 +286,52 @@ "cell_type": "code", "execution_count": 11, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mindspore.dataset.engine.datasets.BatchDataset" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(dataset_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[Tensor(shape=[32, 18], dtype=Int64, value=\n", - "[[ 101, 872, 4339 ... 0, 0, 0],\n", - " [ 101, 872, 2849 ... 0, 0, 0],\n", - " [ 101, 679, 2190 ... 0, 0, 0],\n", + "{'input_ids': Tensor(shape=[32, 64], dtype=Int64, value=\n", + "[[ 101, 2769, 3221 ... 0, 0, 0],\n", + " [ 101, 1091, 1139 ... 0, 0, 0],\n", + " [ 101, 2828, 6929 ... 0, 0, 0],\n", " ...\n", - " [ 101, 1063, 1921 ... 0, 0, 0],\n", - " [ 101, 8275, 8331 ... 0, 0, 0],\n", - " [ 101, 3221, 2207 ... 0, 0, 0]]), Tensor(shape=[32, 18], dtype=Int64, value=\n", + " [ 101, 671, 4157 ... 0, 0, 0],\n", + " [ 101, 2769, 6432 ... 0, 0, 0],\n", + " [ 101, 2207, 4908 ... 0, 0, 0]]), 'attention_mask': Tensor(shape=[32, 64], dtype=Int64, value=\n", "[[1, 1, 1 ... 0, 0, 0],\n", " [1, 1, 1 ... 0, 0, 0],\n", " [1, 1, 1 ... 0, 0, 0],\n", " ...\n", " [1, 1, 1 ... 0, 0, 0],\n", " [1, 1, 1 ... 0, 0, 0],\n", - " [1, 1, 1 ... 0, 0, 0]]), Tensor(shape=[32], dtype=Int32, value= [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 0, 1, \n", - " 1, 1, 0, 1, 1, 1, 1, 1])]\n" + " [1, 1, 1 ... 0, 0, 0]]), 'labels': Tensor(shape=[32], dtype=Int32, value= [0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, \n", + " 0, 1, 1, 1, 0, 1, 1, 1])}\n" ] } ], "source": [ - "print(next(dataset_train.create_tuple_iterator()))" + "print(next(dataset_train.create_dict_iterator()))" ] }, { @@ -628,7 +345,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": { "tags": [] }, @@ -637,241 +354,265 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 392M/392M [00:30<00:00, 13.4MB/s] \n", - "The following parameters in checkpoint files are not loaded:\n", - "['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']\n", - "The following parameters in models are missing parameter:\n", - "['classifier.weight', 'classifier.bias']\n" + "100%|██████████| 392M/392M [00:34<00:00, 11.8MB/s] \n", + "[WARNING] DEVICE(32558,ffff8291f0b0,python):2025-01-03-11:45:51.479.694 [mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_vmm_adapter.h:188] CheckVmmDriverVersion] Driver version is less than 24.0.0, vmm is disabled by default, drvier_version: 23.0.6\n", + "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-chinese and are newly initialized: ['classifier.bias', 'classifier.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "from mindnlp.transformers import BertForSequenceClassification, BertModel\n", - "from mindnlp._legacy.amp import auto_mixed_precision\n", "\n", "# set bert config and define parameters for training\n", - "model = BertForSequenceClassification.from_pretrained('bert-base-chinese', num_labels=3)\n", - "model = auto_mixed_precision(model, 'O1')\n", - "\n", - "optimizer = nn.Adam(model.trainable_params(), learning_rate=2e-5)" + "model = BertForSequenceClassification.from_pretrained('bert-base-chinese', num_labels=3)" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": { "tags": [] }, "outputs": [], "source": [ - "metric = Accuracy()\n", - "# define callbacks to save checkpoints\n", - "ckpoint_cb = CheckpointCallback(save_path='checkpoint', ckpt_name='bert_emotect', epochs=1, keep_checkpoint_max=2)\n", - "best_model_cb = BestModelCallback(save_path='checkpoint', ckpt_name='bert_emotect_best', auto_load=True)\n", + "from mindnlp.engine import TrainingArguments\n", "\n", - "trainer = Trainer(network=model, train_dataset=dataset_train,\n", - " eval_dataset=dataset_val, metrics=metric,\n", - " epochs=5, optimizer=optimizer, callbacks=[ckpoint_cb, best_model_cb])" + "training_args = TrainingArguments(\n", + " output_dir=\"bert_emotect_finetune\",\n", + " evaluation_strategy=\"epoch\",\n", + " save_strategy=\"epoch\",\n", + " logging_strategy=\"epoch\",\n", + " load_best_model_at_end=True,\n", + " num_train_epochs=3.0\n", + ")" ] }, { "cell_type": "code", - "execution_count": 14, - "metadata": { - "tags": [] - }, + "execution_count": 15, + "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The train will start from the checkpoint saved in 'checkpoint'.\n" - ] - }, { "name": "stderr", "output_type": "stream", "text": [ - "Epoch 0: 0%| | 0/302 [00:00= 7, but the current device's computing capacity is 6\n", - "Epoch 0: 100%|██████████| 302/302 [01:57<00:00, 2.56it/s, loss=0.3391044] \n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Checkpoint: 'bert_emotect_epoch_0.ckpt' has been saved in epoch: 0.\n" + "Downloading builder script: 4.20kB [00:00, 4.84MB/s]\n" ] - }, + } + ], + "source": [ + "from mindnlp import evaluate\n", + "import numpy as np\n", + "\n", + "metric = evaluate.load(\"accuracy\")\n", + "\n", + "def compute_metrics(eval_pred):\n", + " logits, labels = eval_pred\n", + " predictions = np.argmax(logits, axis=-1)\n", + " return metric.compute(predictions=predictions, references=labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "trainer = Trainer(\n", + " model=model,\n", + " args=training_args,\n", + " train_dataset=dataset_train,\n", + " eval_dataset=dataset_val,\n", + " compute_metrics=compute_metrics\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "tags": [] + }, + "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Evaluate: 100%|██████████| 34/34 [00:04<00:00, 7.48it/s]\n" + " 0%| | 0/906 [00:00