Skip to content

Commit 101f986

Browse files
Updated notebooks
1 parent 8a1fccb commit 101f986

16 files changed

+92
-74
lines changed

Chapter01/06_neural_evolutionary_agent.ipynb

Lines changed: 6 additions & 6 deletions
Large diffs are not rendered by default.

Chapter02/2_value_based_rl.ipynb

Lines changed: 14 additions & 14 deletions
Large diffs are not rendered by default.

Chapter02/3_temporal_difference_learning.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

Chapter02/4_monte_carlo_prediction_and_control_rl.ipynb

Lines changed: 2 additions & 2 deletions
Large diffs are not rendered by default.

Chapter02/5_sarsa_sarsa_lambda.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

Chapter02/6_q_learning.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

Chapter02/7_policy_gradients.ipynb

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,13 @@
321321
"Episode#:0 ep_reward:-169.0\r",
322322
"Episode#:0 ep_reward:-170.0\r",
323323
"Episode#:0 ep_reward:-171.0\r",
324-
"Episode#:0 ep_reward:-172.0\r",
324+
"Episode#:0 ep_reward:-172.0\r"
325+
]
326+
},
327+
{
328+
"name": "stdout",
329+
"output_type": "stream",
330+
"text": [
325331
"Episode#:0 ep_reward:-173.0\r",
326332
"Episode#:0 ep_reward:-174.0\r",
327333
"Episode#:0 ep_reward:-175.0\r",
@@ -531,7 +537,13 @@
531537
"Episode#:1 ep_reward:-165.0\r",
532538
"Episode#:1 ep_reward:-166.0\r",
533539
"Episode#:1 ep_reward:-167.0\r",
534-
"Episode#:1 ep_reward:-168.0\r",
540+
"Episode#:1 ep_reward:-168.0\r"
541+
]
542+
},
543+
{
544+
"name": "stdout",
545+
"output_type": "stream",
546+
"text": [
535547
"Episode#:1 ep_reward:-169.0\r",
536548
"Episode#:1 ep_reward:-170.0\r",
537549
"Episode#:1 ep_reward:-171.0\r",

Chapter02/8_actor_critic_agent.ipynb

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -163,37 +163,29 @@
163163
"Episode#:0 ep_reward:9.0\r",
164164
"Episode#:0 ep_reward:10.0\r",
165165
"Episode#:0 ep_reward:11.0\r",
166-
"Episode#:0 ep_reward:12.0\r",
167-
"Episode#:0 ep_reward:13.0\r",
168-
"Episode#:0 ep_reward:14.0\r",
169-
"Episode#:0 ep_reward:15.0\r"
166+
"Episode#:0 ep_reward:12.0\r"
170167
]
171168
},
172169
{
173170
"name": "stdout",
174171
"output_type": "stream",
175172
"text": [
173+
"Episode#:0 ep_reward:13.0\r",
174+
"Episode#:0 ep_reward:14.0\r",
175+
"Episode#:0 ep_reward:15.0\r",
176176
"Episode#:0 ep_reward:16.0\r",
177177
"Episode#:0 ep_reward:17.0\r",
178178
"Episode#:0 ep_reward:18.0\r",
179-
"Episode#:0 ep_reward:19.0\r",
179+
"Episode#:0 ep_reward:19.0\r\n",
180+
"\n",
180181
"Episode#:0 ep_reward:20.0\r",
181-
"Episode#:0 ep_reward:21.0\r",
182-
"Episode#:0 ep_reward:22.0\r",
183-
"Episode#:0 ep_reward:23.0\r",
184-
"Episode#:0 ep_reward:24.0\r"
182+
"Episode#:1 ep_reward:1.0\r"
185183
]
186184
},
187185
{
188186
"name": "stdout",
189187
"output_type": "stream",
190188
"text": [
191-
"Episode#:0 ep_reward:25.0\r",
192-
"Episode#:0 ep_reward:26.0\r",
193-
"Episode#:0 ep_reward:27.0\r\n",
194-
"\n",
195-
"Episode#:0 ep_reward:28.0\r",
196-
"Episode#:1 ep_reward:1.0\r",
197189
"Episode#:1 ep_reward:2.0\r",
198190
"Episode#:1 ep_reward:3.0\r",
199191
"Episode#:1 ep_reward:4.0\r",
@@ -203,20 +195,34 @@
203195
"Episode#:1 ep_reward:8.0\r",
204196
"Episode#:1 ep_reward:9.0\r",
205197
"Episode#:1 ep_reward:10.0\r",
206-
"Episode#:1 ep_reward:11.0\r"
198+
"Episode#:1 ep_reward:11.0\r",
199+
"Episode#:1 ep_reward:12.0\r",
200+
"Episode#:1 ep_reward:13.0\r"
207201
]
208202
},
209203
{
210204
"name": "stdout",
211205
"output_type": "stream",
212206
"text": [
213-
"Episode#:1 ep_reward:12.0\r",
214-
"Episode#:1 ep_reward:13.0\r",
215207
"Episode#:1 ep_reward:14.0\r",
216208
"Episode#:1 ep_reward:15.0\r",
217-
"Episode#:1 ep_reward:16.0\r\n",
209+
"Episode#:1 ep_reward:16.0\r",
210+
"Episode#:1 ep_reward:17.0\r",
211+
"Episode#:1 ep_reward:18.0\r",
212+
"Episode#:1 ep_reward:19.0\r",
213+
"Episode#:1 ep_reward:20.0\r",
214+
"Episode#:1 ep_reward:21.0\r",
215+
"Episode#:1 ep_reward:22.0\r"
216+
]
217+
},
218+
{
219+
"name": "stdout",
220+
"output_type": "stream",
221+
"text": [
222+
"Episode#:1 ep_reward:23.0\r",
223+
"Episode#:1 ep_reward:24.0\r\n",
218224
"\n",
219-
"Episode#:1 ep_reward:17.0\r"
225+
"Episode#:1 ep_reward:25.0\r"
220226
]
221227
}
222228
],

Chapter03/1_double_dqn.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272
"name": "stdout",
7373
"output_type": "stream",
7474
"text": [
75-
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DoubleDQN/CartPole-v0/20210520-051949\n"
75+
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DoubleDQN/CartPole-v0/20210523-000945\n"
7676
]
7777
}
7878
],
@@ -216,14 +216,14 @@
216216
"name": "stdout",
217217
"output_type": "stream",
218218
"text": [
219-
"Episode#0 Reward:13.0\n"
219+
"Episode#0 Reward:29.0\n"
220220
]
221221
},
222222
{
223223
"name": "stdout",
224224
"output_type": "stream",
225225
"text": [
226-
"Episode#1 Reward:19.0\n"
226+
"Episode#1 Reward:28.0\n"
227227
]
228228
}
229229
],

Chapter03/1_dqn.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
"name": "stdout",
8181
"output_type": "stream",
8282
"text": [
83-
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DQN/CartPole-v0/20210520-051953\n"
83+
"Saving training logs to:logs/TFRL-Cookbook-Ch3-DQN/CartPole-v0/20210523-000951\n"
8484
]
8585
}
8686
],
@@ -221,14 +221,14 @@
221221
"name": "stdout",
222222
"output_type": "stream",
223223
"text": [
224-
"Episode#0 Reward:15.0\n"
224+
"Episode#0 Reward:17.0\n"
225225
]
226226
},
227227
{
228228
"name": "stdout",
229229
"output_type": "stream",
230230
"text": [
231-
"Episode#1 Reward:14.0\n"
231+
"Episode#1 Reward:24.0\n"
232232
]
233233
}
234234
],

0 commit comments

Comments
 (0)