diff --git a/Q learning/Taxi-v2/Q Learning with OpenAI Taxi-v2 video version.ipynb b/Q learning/Taxi-v2/Q Learning with OpenAI Taxi-v2 video version.ipynb index b394a84..1b8bb2c 100644 --- a/Q learning/Taxi-v2/Q Learning with OpenAI Taxi-v2 video version.ipynb +++ b/Q learning/Taxi-v2/Q Learning with OpenAI Taxi-v2 video version.ipynb @@ -8,7 +8,7 @@ "
\n", "In this Notebook, we'll implement an agent that plays OpenAI Taxi-V2.\n", "\n", - "\"Taxi\"/\n", + "\"Taxi\"/\n", "\n", "The goal of this game is that our agent must pick up the passenger at one location and drop him off to the goal as fast as possible.\n", "\n", @@ -77,23 +77,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from IPython.display import HTML\n", "HTML('')\n" @@ -113,10 +99,8 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", @@ -135,24 +119,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+---------+\n", - "|\u001b[35mR\u001b[0m: | : :G|\n", - "| : : : : |\n", - "| : : : : |\n", - "| | : | : |\n", - "|\u001b[43mY\u001b[0m| : |\u001b[34;1mB\u001b[0m: |\n", - "+---------+\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "env = gym.make(\"Taxi-v2\")\n", "env.render()" @@ -169,18 +138,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Action size 6\n", - "State size 500\n" - ] - } - ], + "outputs": [], "source": [ "action_size = env.action_space.n\n", "print(\"Action size \", action_size)\n", @@ -191,23 +151,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[0. 0. 0. 0. 0. 0.]\n", - " [0. 0. 0. 0. 0. 0.]\n", - " [0. 0. 0. 0. 0. 0.]\n", - " ...\n", - " [0. 0. 0. 0. 0. 0.]\n", - " [0. 0. 0. 0. 0. 0.]\n", - " [0. 0. 0. 0. 0. 0.]]\n" - ] - } - ], + "outputs": [], "source": [ "qtable = np.zeros((state_size, action_size))\n", "print(qtable)" @@ -223,10 +169,8 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "total_episodes = 50000 # Total episodes\n", @@ -254,10 +198,8 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "# 2 For life or until learning is stopped\n", @@ -309,17 +251,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Score over time: 8.2\n" - ] - } - ], + "outputs": [], "source": [ "env.reset()\n", "rewards = []\n", @@ -348,15 +282,13 @@ " break\n", " state = new_state\n", "env.close()\n", - "print (\"Score over time: \" + str(sum(rewards)/total_test_episodes))" + "print (\"Score over time: \", np.mean(rewards))" ] }, { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [] } @@ -377,7 +309,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.4" + "version": "3.7.7" } }, "nbformat": 4,