diff --git a/Q learning/Taxi-v2/Q Learning with OpenAI Taxi-v2 video version.ipynb b/Q learning/Taxi-v2/Q Learning with OpenAI Taxi-v2 video version.ipynb
index b394a84..1b8bb2c 100644
--- a/Q learning/Taxi-v2/Q Learning with OpenAI Taxi-v2 video version.ipynb
+++ b/Q learning/Taxi-v2/Q Learning with OpenAI Taxi-v2 video version.ipynb
@@ -8,7 +8,7 @@
"
\n",
"In this Notebook, we'll implement an agent that plays OpenAI Taxi-V2.\n",
"\n",
- "
\n",
+ "
\n",
"\n",
"The goal of this game is that our agent must pick up the passenger at one location and drop him off to the goal as fast as possible.\n",
"\n",
@@ -77,23 +77,9 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- ""
- ],
- "text/plain": [
- ""
- ]
- },
- "execution_count": 1,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"from IPython.display import HTML\n",
"HTML('')\n"
@@ -113,10 +99,8 @@
},
{
"cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "collapsed": true
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
@@ -135,24 +119,9 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "+---------+\n",
- "|\u001b[35mR\u001b[0m: | : :G|\n",
- "| : : : : |\n",
- "| : : : : |\n",
- "| | : | : |\n",
- "|\u001b[43mY\u001b[0m| : |\u001b[34;1mB\u001b[0m: |\n",
- "+---------+\n",
- "\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"env = gym.make(\"Taxi-v2\")\n",
"env.render()"
@@ -169,18 +138,9 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Action size 6\n",
- "State size 500\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"action_size = env.action_space.n\n",
"print(\"Action size \", action_size)\n",
@@ -191,23 +151,9 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[[0. 0. 0. 0. 0. 0.]\n",
- " [0. 0. 0. 0. 0. 0.]\n",
- " [0. 0. 0. 0. 0. 0.]\n",
- " ...\n",
- " [0. 0. 0. 0. 0. 0.]\n",
- " [0. 0. 0. 0. 0. 0.]\n",
- " [0. 0. 0. 0. 0. 0.]]\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"qtable = np.zeros((state_size, action_size))\n",
"print(qtable)"
@@ -223,10 +169,8 @@
},
{
"cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "collapsed": true
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"total_episodes = 50000 # Total episodes\n",
@@ -254,10 +198,8 @@
},
{
"cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "collapsed": true
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"# 2 For life or until learning is stopped\n",
@@ -309,17 +251,9 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Score over time: 8.2\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"env.reset()\n",
"rewards = []\n",
@@ -348,15 +282,13 @@
" break\n",
" state = new_state\n",
"env.close()\n",
- "print (\"Score over time: \" + str(sum(rewards)/total_test_episodes))"
+ "print (\"Score over time: \", np.mean(rewards))"
]
},
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": true
- },
+ "metadata": {},
"outputs": [],
"source": []
}
@@ -377,7 +309,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.4"
+ "version": "3.7.7"
}
},
"nbformat": 4,