simoninithomas · goelakash · Jul 30, 2020
diff --git a/Q learning/Taxi-v2/Q Learning with OpenAI Taxi-v2 video version.ipynb b/Q learning/Taxi-v2/Q Learning with OpenAI Taxi-v2 video version.ipynb
@@ -8,7 +8,7 @@
     "<br> \n",
     "In this Notebook, we'll implement an agent <b>that plays OpenAI Taxi-V2.</b>\n",
     "\n",
-    "<img src=\"taxi.png\" alt=\"Taxi\"/>\n",
+    "<img src=\"../../docs/assets/img/video projects/taxi.png\" alt=\"Taxi\"/>\n",
     "\n",
     "The goal of this game is that our agent must <b>pick up the passenger at one location and drop him off to the goal as fast as possible.</b>\n",
     "\n",
@@ -77,23 +77,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/q2ZOEFAaaI0?showinfo=0\" frameborder=\"0\" allow=\"autoplay; encrypted-media\" allowfullscreen></iframe>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "execution_count": 1,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from IPython.display import HTML\n",
     "HTML('<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/q2ZOEFAaaI0?showinfo=0\" frameborder=\"0\" allow=\"autoplay; encrypted-media\" allowfullscreen></iframe>')\n"
@@ -113,10 +99,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "import numpy as np\n",
@@ -135,24 +119,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "+---------+\n",
-      "|\u001b[35mR\u001b[0m: | : :G|\n",
-      "| : : : : |\n",
-      "| : : : : |\n",
-      "| | : | : |\n",
-      "|\u001b[43mY\u001b[0m| : |\u001b[34;1mB\u001b[0m: |\n",
-      "+---------+\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "env = gym.make(\"Taxi-v2\")\n",
     "env.render()"
@@ -169,18 +138,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Action size  6\n",
-      "State size  500\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "action_size = env.action_space.n\n",
     "print(\"Action size \", action_size)\n",
@@ -191,23 +151,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[[0. 0. 0. 0. 0. 0.]\n",
-      " [0. 0. 0. 0. 0. 0.]\n",
-      " [0. 0. 0. 0. 0. 0.]\n",
-      " ...\n",
-      " [0. 0. 0. 0. 0. 0.]\n",
-      " [0. 0. 0. 0. 0. 0.]\n",
-      " [0. 0. 0. 0. 0. 0.]]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "qtable = np.zeros((state_size, action_size))\n",
     "print(qtable)"
@@ -223,10 +169,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "total_episodes = 50000        # Total episodes\n",
@@ -254,10 +198,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# 2 For life or until learning is stopped\n",
@@ -309,17 +251,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Score over time: 8.2\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "env.reset()\n",
     "rewards = []\n",
@@ -348,15 +282,13 @@
     "            break\n",
     "        state = new_state\n",
     "env.close()\n",
-    "print (\"Score over time: \" +  str(sum(rewards)/total_test_episodes))"
+    "print (\"Score over time: \", np.mean(rewards))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": []
   }
@@ -377,7 +309,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.4"
+   "version": "3.7.7"
   }
  },
  "nbformat": 4,