Skip to content

Instantly share code, notes, and snippets.

@phyous
Created September 4, 2020 19:00
Show Gist options
  • Save phyous/d859000185b9b1b6b7c6f21a83c8da66 to your computer and use it in GitHub Desktop.
Save phyous/d859000185b9b1b6b7c6f21a83c8da66 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Monte Carlo Prediction"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"import sys\n",
"import gym\n",
"import numpy as np\n",
"from collections import defaultdict\n",
"from plot_utils import plot_blackjack_values, plot_policy"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"env = gym.make('Blackjack-v0')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def play_episode(env):\n",
" \"\"\"\n",
" Plays a single episode with a set policy in the environment given. Records the state, action \n",
" and reward for each step and returns the all timesteps for the episode.\n",
" \"\"\"\n",
" episode = []\n",
" state = env.reset()\n",
" while True:\n",
" probs = [0.8, 0.2] if state[0] > 18 else [0.2, 0.8]\n",
" action = np.random.choice(np.arange(2), p=probs)\n",
" next_state, reward, done, info = env.step(action)\n",
" episode.append((state, action, reward))\n",
" state = next_state\n",
" if done:\n",
" break\n",
" return episode"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def update_Q(episode, Q,returns_sum, N, gamma=1.0):\n",
" \"\"\"\n",
" For each time step in the episode we carry out the first visit monte carlo method, checking if this is \n",
" the first index of this state. Get the discounted reward and add it to the total reward for that \n",
" state/action pair. Increment the times we have seen this state action pair and finally update the Q values\n",
" \"\"\"\n",
" \n",
" for s, a, r in episode:\n",
" first_occurence_idx = next(i for i,x in enumerate(episode) if x[0] == s)\n",
" G = sum([x[2]*(gamma**i) for i,x in enumerate(episode[first_occurence_idx:])])\n",
" returns_sum[s][a] += G\n",
" N[s][a] += 1.0\n",
" Q[s][a] = returns_sum[s][a] / N[s][a]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def mc_predict(env, num_episodes, gamma=1.0):\n",
"\n",
" \"\"\"\n",
" This is the primary method. Plays through several episodes of the environment. \n",
" \"\"\"\n",
" returns_sum = defaultdict(lambda: np.zeros(env.action_space.n))\n",
" N = defaultdict(lambda: np.zeros(env.action_space.n))\n",
" Q = defaultdict(lambda: np.zeros(env.action_space.n))\n",
" \n",
" for i_episode in range(1, num_episodes+1):\n",
" if i_episode % 1000 == 0:\n",
" print(\"\\rEpisode {}/{}.\".format(i_episode, num_episodes), end=\"\")\n",
" sys.stdout.flush()\n",
" \n",
" episode = play_episode(env)\n",
"\n",
" update_Q(episode, Q, returns_sum, N)\n",
" \n",
" return Q "
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true,
"jupyter": {
"outputs_hidden": false
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode 500000/500000."
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1440x1440 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#predict the policy values for our test policy\n",
"Q = mc_predict(env, 500000)\n",
"\n",
"#get the state value function for our test policy\n",
"V_to_plot = dict((k,(k[0]>18)*(np.dot([0.8, 0.2],v)) + (k[0]<=18)*(np.dot([0.2, 0.8],v))) \\\n",
" for k, v in Q.items())\n",
"\n",
"# plot the state value functions\n",
"plot_blackjack_values(V_to_plot)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"deletable": true,
"editable": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"## E1\n",
"[((21, 4, True), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:21|dealer:4|ace:True], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.])})\n",
"\n",
"\n",
"## E2\n",
"[((15, 6, False), 1, 0.0), ((17, 6, False), 1, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:15|dealer:6|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:17|dealer:6|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E3\n",
"[((21, 9, True), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:21|dealer:9|ace:True], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.])})\n",
"\n",
"\n",
"## E4\n",
"[((8, 4, False), 1, 0.0), ((16, 4, False), 0, 1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:8|dealer:4|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:16|dealer:4|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.])})\n",
"\n",
"\n",
"## E5\n",
"[((14, 10, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:14|dealer:10|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E6\n",
"[((6, 9, False), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:6|dealer:9|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 1.]), (6, 9, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.])})\n",
"\n",
"\n",
"## E7\n",
"[((17, 10, False), 1, 0.0), ((19, 10, False), 0, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:17|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:19|dealer:10|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 1.]), (19, 10, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.])})\n",
"\n",
"\n",
"## E8\n",
"[((14, 10, False), 1, 0.0), ((16, 10, False), 0, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:14|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 1.]), (19, 10, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:16|dealer:10|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 1.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.])})\n",
"\n",
"\n",
"## E9\n",
"[((20, 10, False), 0, 0.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:20|dealer:10|ace:False], action:0, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 1.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([0., 0.])})\n",
"\n",
"\n",
"## E10\n",
"[((21, 2, True), 1, 0.0), ((17, 2, False), 1, 0.0), ((18, 2, False), 1, -1.0)]\n",
"enum:[0, 1, 2]\n",
"first_idx: 0\n",
"STEP: state[sum:21|dealer:2|ace:True], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 1.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 0.]), (21, 2, True): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([0., 0.]), (21, 2, True): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 1\n",
"STEP: state[sum:17|dealer:2|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 1.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 0.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([0., 0.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 2\n",
"STEP: state[sum:18|dealer:2|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 1.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 0.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([0., 0.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E11\n",
"[((20, 8, False), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:20|dealer:8|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 1.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 0.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([0., 0.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.])})\n",
"\n",
"\n",
"## E12\n",
"[((21, 3, True), 0, 0.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:21|dealer:3|ace:True], action:0, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 1.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 0.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([0., 0.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.])})\n",
"\n",
"\n",
"## E13\n",
"[((11, 2, False), 1, 0.0), ((21, 2, False), 0, 1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:11|dealer:2|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 1.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 0.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([0., 0.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:21|dealer:2|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 1.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 0.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([0., 0.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.])})\n",
"\n",
"\n",
"## E14\n",
"[((14, 2, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:14|dealer:2|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 1.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 0.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([0., 0.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E15\n",
"[((18, 8, False), 1, 0.0), ((21, 8, False), 1, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:18|dealer:8|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 1.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 0.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([0., 0.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:21|dealer:8|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 1.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 0.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([0., 0.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E16\n",
"[((17, 10, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:17|dealer:10|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 0.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([0., 0.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E17\n",
"[((20, 6, False), 0, 0.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:20|dealer:6|ace:False], action:0, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 0.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([0., 0.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.])})\n",
"\n",
"\n",
"## E18\n",
"[((19, 7, False), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:19|dealer:7|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 0.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([0., 0.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.])})\n",
"\n",
"\n",
"## E19\n",
"[((10, 10, False), 1, 0.0), ((20, 10, False), 1, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:10|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 0.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([0., 0.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:20|dealer:10|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([ 0., -1.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E20\n",
"[((12, 3, False), 1, 0.0), ((21, 3, False), 0, 1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:12|dealer:3|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([ 0., -1.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:21|dealer:3|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([ 0., -1.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.])})\n",
"\n",
"\n",
"## E21\n",
"[((10, 7, False), 1, 0.0), ((16, 7, False), 1, 0.0), ((18, 7, False), 1, 0.0), ((19, 7, False), 0, 1.0)]\n",
"enum:[0, 1, 2, 3]\n",
"first_idx: 0\n",
"STEP: state[sum:10|dealer:7|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([ 0., -1.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2, 3]\n",
"first_idx: 1\n",
"STEP: state[sum:16|dealer:7|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([ 0., -1.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2, 3]\n",
"first_idx: 2\n",
"STEP: state[sum:18|dealer:7|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([ 0., -1.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2, 3]\n",
"first_idx: 3\n",
"STEP: state[sum:19|dealer:7|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([ 0., -1.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.])})\n",
"\n",
"\n",
"## E22\n",
"[((19, 5, False), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:19|dealer:5|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([ 0., -1.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.])})\n",
"\n",
"\n",
"## E23\n",
"[((12, 7, False), 1, 0.0), ((21, 7, False), 0, 1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:12|dealer:7|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([ 0., -1.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:21|dealer:7|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([ 0., -1.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.])})\n",
"\n",
"\n",
"## E24\n",
"[((16, 6, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:16|dealer:6|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([ 0., -1.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E25\n",
"[((12, 10, False), 1, 0.0), ((18, 10, False), 1, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:12|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([ 0., -1.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:18|dealer:10|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([ 0., -1.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E26\n",
"[((18, 6, False), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:18|dealer:6|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([1., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([ 0., -1.]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.])})\n",
"\n",
"\n",
"## E27\n",
"[((20, 10, False), 0, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:20|dealer:10|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.])})\n",
"\n",
"\n",
"## E28\n",
"[((12, 5, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:12|dealer:5|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E29\n",
"[((14, 8, False), 1, 0.0), ((18, 8, False), 1, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:14|dealer:8|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 1.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([0., 1.]), (14, 8, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 0., -1.]), (14, 8, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:18|dealer:8|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([0., 1.]), (14, 8, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 0., -1.]), (14, 8, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E30\n",
"[((12, 5, False), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:12|dealer:5|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E31\n",
"[((9, 7, False), 1, 0.0), ((19, 7, False), 1, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:9|dealer:7|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 0.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([1., 0.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:19|dealer:7|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0., -1.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E32\n",
"[((18, 2, False), 1, 0.0), ((21, 2, False), 0, 1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:18|dealer:2|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 2.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([0., 0.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:21|dealer:2|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 2.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([0., 0.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E33\n",
"[((8, 2, False), 1, 0.0), ((18, 2, False), 1, 0.0), ((19, 2, False), 1, -1.0)]\n",
"enum:[0, 1, 2]\n",
"first_idx: 0\n",
"STEP: state[sum:8|dealer:2|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 2.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([0., 0.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 1\n",
"STEP: state[sum:18|dealer:2|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 2\n",
"STEP: state[sum:19|dealer:2|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E34\n",
"[((13, 7, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:13|dealer:7|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E35\n",
"[((14, 8, True), 1, 0.0), ((20, 8, True), 0, 1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:14|dealer:8|ace:True], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:20|dealer:8|ace:True], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([1., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([-1., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.])})\n",
"\n",
"\n",
"## E36\n",
"[((16, 10, False), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:16|dealer:10|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([0., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.])})\n",
"\n",
"\n",
"## E37\n",
"[((11, 2, False), 0, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:11|dealer:2|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.])})\n",
"\n",
"\n",
"## E38\n",
"[((12, 10, True), 1, 0.0), ((12, 10, False), 1, 0.0), ((13, 10, False), 1, 0.0), ((14, 10, False), 1, -1.0)]\n",
"enum:[0, 1, 2, 3]\n",
"first_idx: 0\n",
"STEP: state[sum:12|dealer:10|ace:True], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 1.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2, 3]\n",
"first_idx: 1\n",
"STEP: state[sum:12|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2, 3]\n",
"first_idx: 2\n",
"STEP: state[sum:13|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 2.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2, 3]\n",
"first_idx: 3\n",
"STEP: state[sum:14|dealer:10|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 3.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E39\n",
"[((11, 3, False), 1, 0.0), ((21, 3, False), 1, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:11|dealer:3|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 3.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 0.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:21|dealer:3|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([0., 3.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([ 0., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E40\n",
"[((14, 10, False), 0, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:14|dealer:10|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 3.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E41\n",
"[((14, 4, False), 1, 0.0), ((17, 4, False), 0, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:14|dealer:4|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 3.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:17|dealer:4|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 3.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 1.]), (17, 4, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.])})\n",
"\n",
"\n",
"## E42\n",
"[((18, 3, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:18|dealer:3|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 3.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 1.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E43\n",
"[((13, 3, False), 0, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:13|dealer:3|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 3.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 1.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 1.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.])})\n",
"\n",
"\n",
"## E44\n",
"[((12, 5, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:12|dealer:5|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 3.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 1.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.])})\n",
"\n",
"\n",
"## E45\n",
"[((9, 8, False), 1, 0.0), ((18, 8, False), 0, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:9|dealer:8|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 3.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([0., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 1.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([ 0., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:18|dealer:8|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 3.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 1.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E46\n",
"[((5, 2, False), 1, 0.0), ((10, 2, False), 1, 0.0), ((19, 2, False), 0, 1.0)]\n",
"enum:[0, 1, 2]\n",
"first_idx: 0\n",
"STEP: state[sum:5|dealer:2|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 3.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 1.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 1\n",
"STEP: state[sum:10|dealer:2|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 3.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([0., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 1.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 0., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 2\n",
"STEP: state[sum:19|dealer:2|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 3.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 1.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.])})\n",
"\n",
"\n",
"## E47\n",
"[((14, 10, False), 1, 0.0), ((18, 10, False), 1, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:14|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 1.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:18|dealer:10|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 1.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.])})\n",
"\n",
"\n",
"## E48\n",
"[((9, 6, False), 1, 0.0), ((14, 6, False), 1, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:9|dealer:6|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 1.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:14|dealer:6|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 1.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E49\n",
"[((14, 4, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:14|dealer:4|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E50\n",
"[((19, 3, False), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:19|dealer:3|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 1.]), (19, 3, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([1., 0.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([ 0., -1.]), (19, 3, False): array([1., 0.])})\n",
"\n",
"\n",
"## E51\n",
"[((18, 6, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:18|dealer:6|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 1.]), (19, 3, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([ 0., -1.]), (19, 3, False): array([1., 0.])})\n",
"\n",
"\n",
"## E52\n",
"[((13, 5, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:13|dealer:5|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 1.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([ 0., -1.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E53\n",
"[((12, 4, False), 1, 0.0), ((13, 4, False), 0, 1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:12|dealer:4|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 1.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([ 0., -1.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:13|dealer:4|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 1.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([ 0., -1.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.])})\n",
"\n",
"\n",
"## E54\n",
"[((13, 1, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:13|dealer:1|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 1.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([ 0., -1.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E55\n",
"[((13, 7, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:13|dealer:7|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 1.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([ 0., -1.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E56\n",
"[((13, 6, False), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:13|dealer:6|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 1.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([ 0., -1.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.])})\n",
"\n",
"\n",
"## E57\n",
"[((14, 6, False), 1, 0.0), ((20, 6, False), 0, 1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:14|dealer:6|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([1., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0., 0.]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:20|dealer:6|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 0.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([0., 0.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.])})\n",
"\n",
"\n",
"## E58\n",
"[((16, 10, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:16|dealer:10|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.])})\n",
"\n",
"\n",
"## E59\n",
"[((11, 8, False), 0, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:11|dealer:8|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.])})\n",
"\n",
"\n",
"## E60\n",
"[((21, 10, True), 0, 0.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:21|dealer:10|ace:True], action:0, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 1.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 1.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.])})\n",
"\n",
"\n",
"## E61\n",
"[((11, 2, False), 1, 0.0), ((15, 2, False), 1, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:11|dealer:2|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:15|dealer:2|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E62\n",
"[((21, 6, True), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:21|dealer:6|ace:True], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.])})\n",
"\n",
"\n",
"## E63\n",
"[((11, 10, False), 1, 0.0), ((15, 10, False), 1, 0.0), ((19, 10, False), 0, 1.0)]\n",
"enum:[0, 1, 2]\n",
"first_idx: 0\n",
"STEP: state[sum:11|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 1\n",
"STEP: state[sum:15|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([1., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([-1., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 2\n",
"STEP: state[sum:19|dealer:10|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([ 0., -1.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.])})\n",
"\n",
"\n",
"## E64\n",
"[((9, 8, False), 1, 0.0), ((19, 8, False), 0, 1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:9|dealer:8|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:19|dealer:8|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 1.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.])})\n",
"\n",
"\n",
"## E65\n",
"[((13, 10, False), 1, 0.0), ((21, 10, False), 1, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:13|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:21|dealer:10|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E66\n",
"[((21, 7, True), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:21|dealer:7|ace:True], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.])})\n",
"\n",
"\n",
"## E67\n",
"[((16, 5, False), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:16|dealer:5|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.])})\n",
"\n",
"\n",
"## E68\n",
"[((8, 1, False), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:8|dealer:1|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 0.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., 0.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.])})\n",
"\n",
"\n",
"## E69\n",
"[((17, 4, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:17|dealer:4|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.])})\n",
"\n",
"\n",
"## E70\n",
"[((18, 9, False), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:18|dealer:9|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.])})\n",
"\n",
"\n",
"## E71\n",
"[((13, 2, False), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:13|dealer:2|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([0., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([ 0., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.])})\n",
"\n",
"\n",
"## E72\n",
"[((12, 10, False), 0, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:12|dealer:10|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.])})\n",
"\n",
"\n",
"## E73\n",
"[((20, 5, False), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:20|dealer:5|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.])})\n",
"\n",
"\n",
"## E74\n",
"[((16, 3, False), 1, 0.0), ((17, 3, False), 1, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:16|dealer:3|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:17|dealer:3|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E75\n",
"[((12, 8, False), 0, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:12|dealer:8|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.])})\n",
"\n",
"\n",
"## E76\n",
"[((6, 6, False), 1, 0.0), ((8, 6, False), 1, 0.0), ((18, 6, False), 1, -1.0)]\n",
"enum:[0, 1, 2]\n",
"first_idx: 0\n",
"STEP: state[sum:6|dealer:6|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 1\n",
"STEP: state[sum:8|dealer:6|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 1.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 2\n",
"STEP: state[sum:18|dealer:6|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 2.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([ 0., -1.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E77\n",
"[((15, 2, False), 1, 0.0), ((16, 2, False), 1, 0.0), ((21, 2, False), 0, 1.0)]\n",
"enum:[0, 1, 2]\n",
"first_idx: 0\n",
"STEP: state[sum:15|dealer:2|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 2.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 1\n",
"STEP: state[sum:16|dealer:2|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([2., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 2.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 2\n",
"STEP: state[sum:21|dealer:2|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 2.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.])})\n",
"\n",
"\n",
"## E78\n",
"[((20, 7, False), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:20|dealer:7|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 2.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 2.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1., -1.]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"\n",
"\n",
"## E79\n",
"[((12, 10, False), 1, 0.0), ((14, 10, False), 1, 0.0), ((19, 10, False), 0, 1.0)]\n",
"enum:[0, 1, 2]\n",
"first_idx: 0\n",
"STEP: state[sum:12|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 4.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 3.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 2.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1., -1.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.33333333]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 1\n",
"STEP: state[sum:14|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([2., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 3.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 2.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0., 0.]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.33333333]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 2\n",
"STEP: state[sum:19|dealer:10|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([3., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([0., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 3.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 2.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0.33333333, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([ 0., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.33333333]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"\n",
"\n",
"## E80\n",
"[((10, 10, False), 0, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:10|dealer:10|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([3., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 3.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 2.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0.33333333, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.33333333]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"\n",
"\n",
"## E81\n",
"[((12, 10, False), 1, 0.0), ((13, 10, False), 1, 0.0), ((14, 10, False), 0, -1.0)]\n",
"enum:[0, 1, 2]\n",
"first_idx: 0\n",
"STEP: state[sum:12|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([3., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 2.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 2.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0.33333333, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 1\n",
"STEP: state[sum:13|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([1., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([3., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 2.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0.33333333, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 2\n",
"STEP: state[sum:14|dealer:10|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([3., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 2.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0.33333333, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1., -1.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"\n",
"\n",
"## E82\n",
"[((10, 10, False), 1, 0.0), ((20, 10, False), 0, 0.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:10|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([3., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([2., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 2.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0.33333333, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.5, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:20|dealer:10|ace:False], action:0, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([3., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 2.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0.33333333, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"\n",
"\n",
"## E83\n",
"[((18, 6, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:18|dealer:6|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([3., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0.33333333, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.])})\n",
"\n",
"\n",
"## E84\n",
"[((12, 9, False), 0, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:12|dealer:9|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 2.]), (19, 10, False): array([3., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0., -1.]), (19, 10, False): array([0.33333333, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.])})\n",
"\n",
"\n",
"## E85\n",
"[((17, 10, False), 1, 0.0), ((19, 10, False), 0, 1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:17|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([3., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.33333333, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:19|dealer:10|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([1., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([1., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.])})\n",
"\n",
"\n",
"## E86\n",
"[((20, 8, False), 0, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:20|dealer:8|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.])})\n",
"\n",
"\n",
"## E87\n",
"[((4, 5, False), 1, 0.0), ((14, 5, False), 0, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:4|dealer:5|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:14|dealer:5|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.])})\n",
"\n",
"\n",
"## E88\n",
"[((6, 4, False), 1, 0.0), ((16, 4, False), 1, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:6|dealer:4|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 0.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:16|dealer:4|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E89\n",
"[((21, 7, True), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:21|dealer:7|ace:True], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E90\n",
"[((21, 8, True), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:21|dealer:8|ace:True], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.]), (21, 8, True): array([1., 0.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.]), (21, 8, True): array([1., 0.])})\n",
"\n",
"\n",
"## E91\n",
"[((7, 4, False), 1, 0.0), ((13, 4, False), 1, 0.0), ((18, 4, False), 1, -1.0)]\n",
"enum:[0, 1, 2]\n",
"first_idx: 0\n",
"STEP: state[sum:7|dealer:4|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 0.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 1\n",
"STEP: state[sum:13|dealer:4|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 1.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([ 1., -1.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1, 2]\n",
"first_idx: 2\n",
"STEP: state[sum:18|dealer:4|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 1.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([0., 1.]), (18, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 1.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([ 1., -1.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([ 0., -1.]), (18, 4, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E92\n",
"[((18, 7, False), 1, 0.0), ((21, 7, False), 1, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:18|dealer:7|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 2.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 1.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([0., 1.]), (18, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 0.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 0.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([ 1., -1.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([ 0., -1.]), (18, 4, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:21|dealer:7|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 2.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 1.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 1.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 1.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([0., 1.]), (18, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 0.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([ 1., -1.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([ 1., -1.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([ 0., -1.]), (18, 4, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E93\n",
"[((17, 3, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:17|dealer:3|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 2.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 1.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 1.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 1.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 2.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([0., 1.]), (18, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 0.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([ 1., -1.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([ 0., -1.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([ 1., -1.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([ 0., -1.]), (18, 4, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E94\n",
"[((9, 7, False), 1, 0.0), ((19, 7, False), 0, 1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:9|dealer:7|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([2., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 2.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 1.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 2.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 1.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 2.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([0., 1.]), (18, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 0.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([ 1., -1.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([0., 0.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([ 1., -1.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([ 0., -1.]), (18, 4, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:19|dealer:7|ace:False], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([3., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 2.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 1.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 2.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 1.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 2.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([0., 1.]), (18, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 0.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([ 1., -1.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([0., 0.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([ 1., -1.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([1., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([ 0., -1.]), (18, 4, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E95\n",
"[((19, 8, False), 0, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:19|dealer:8|ace:False], action:0, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([3., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 2.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 1.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 2.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 1.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([2., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 2.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([0., 1.]), (18, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 0.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([ 1., -1.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([0., 0.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([ 1., -1.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([0., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([ 0., -1.]), (18, 4, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E96\n",
"[((20, 3, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:20|dealer:3|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([3., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 2.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 1.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 2.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 1.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([2., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 2.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([0., 1.]), (18, 4, False): array([0., 1.]), (20, 3, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 0.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([ 1., -1.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([0., 0.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([ 1., -1.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([0., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([ 0., -1.]), (18, 4, False): array([ 0., -1.]), (20, 3, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E97\n",
"[((20, 4, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:20|dealer:4|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([0., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([3., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 2.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 1.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 2.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 1.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([2., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 2.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([0., 1.]), (18, 4, False): array([0., 1.]), (20, 3, False): array([0., 1.]), (20, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 0., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 0.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([ 1., -1.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([0., 0.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([ 1., -1.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([0., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([ 0., -1.]), (18, 4, False): array([ 0., -1.]), (20, 3, False): array([ 0., -1.]), (20, 4, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E98\n",
"[((21, 2, True), 0, 1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:21|dealer:2|ace:True], action:0, reward:1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([1., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([3., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 2.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 1.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 2.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 2.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 1.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([2., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 2.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([0., 1.]), (18, 4, False): array([0., 1.]), (20, 3, False): array([0., 1.]), (20, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 1., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 0.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([ 1., -1.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([0., 0.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([ 1., -1.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([0., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([ 0., -1.]), (18, 4, False): array([ 0., -1.]), (20, 3, False): array([ 0., -1.]), (20, 4, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E99\n",
"[((18, 10, False), 1, -1.0)]\n",
"enum:[0]\n",
"first_idx: 0\n",
"STEP: state[sum:18|dealer:10|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([1., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([3., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 2.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 1.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 3.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 2.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 1.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([2., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 2.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([0., 1.]), (18, 4, False): array([0., 1.]), (20, 3, False): array([0., 1.]), (20, 4, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 1., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 0.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([ 1., -1.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([0., 0.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([ 1., -1.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([0., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([ 0., -1.]), (18, 4, False): array([ 0., -1.]), (20, 3, False): array([ 0., -1.]), (20, 4, False): array([ 0., -1.])})\n",
"\n",
"\n",
"## E100\n",
"[((7, 10, False), 1, 0.0), ((17, 10, False), 1, -1.0)]\n",
"enum:[0, 1]\n",
"first_idx: 0\n",
"STEP: state[sum:7|dealer:10|ace:False], action:1, reward:0.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 3.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([1., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([3., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 2.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 1.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 3.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 2.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 1.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([2., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 2.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([0., 1.]), (18, 4, False): array([0., 1.]), (20, 3, False): array([0., 1.]), (20, 4, False): array([0., 1.]), (7, 10, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.33333333]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 1., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 0.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([ 1., -1.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([0., 0.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([ 1., -1.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([0., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([ 0., -1.]), (18, 4, False): array([ 0., -1.]), (20, 3, False): array([ 0., -1.]), (20, 4, False): array([ 0., -1.]), (7, 10, False): array([ 0., -1.])})\n",
"\n",
"\n",
"enum:[0, 1]\n",
"first_idx: 1\n",
"STEP: state[sum:17|dealer:10|ace:False], action:1, reward:-1.0\n",
"N: defaultdict(<function <lambda> at 0x1320055f0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([0., 1.]), (17, 6, False): array([0., 1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([1., 1.]), (14, 10, False): array([2., 5.]), (6, 9, False): array([1., 0.]), (17, 10, False): array([0., 4.]), (19, 10, False): array([4., 0.]), (16, 10, False): array([2., 1.]), (20, 10, False): array([3., 1.]), (21, 2, True): array([1., 1.]), (17, 2, False): array([0., 1.]), (18, 2, False): array([0., 3.]), (20, 8, False): array([2., 0.]), (21, 3, True): array([1., 0.]), (11, 2, False): array([1., 2.]), (21, 2, False): array([3., 0.]), (14, 2, False): array([0., 1.]), (18, 8, False): array([1., 2.]), (21, 8, False): array([0., 1.]), (20, 6, False): array([2., 0.]), (19, 7, False): array([3., 1.]), (10, 10, False): array([1., 2.]), (12, 3, False): array([0., 1.]), (21, 3, False): array([1., 1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 2.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([1., 1.]), (16, 6, False): array([0., 1.]), (12, 10, False): array([1., 4.]), (18, 10, False): array([0., 3.]), (18, 6, False): array([1., 3.]), (12, 5, False): array([1., 2.]), (14, 8, False): array([0., 1.]), (9, 7, False): array([0., 2.]), (8, 2, False): array([0., 1.]), (19, 2, False): array([1., 1.]), (13, 7, False): array([0., 2.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([0., 1.]), (13, 10, False): array([0., 3.]), (11, 3, False): array([0., 1.]), (14, 4, False): array([0., 2.]), (17, 4, False): array([1., 1.]), (18, 3, False): array([0., 1.]), (13, 3, False): array([1., 0.]), (9, 8, False): array([0., 2.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([0., 1.]), (14, 6, False): array([0., 2.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([0., 1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([1., 1.]), (13, 1, False): array([0., 1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([1., 0.]), (21, 10, True): array([1., 0.]), (15, 2, False): array([0., 2.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([2., 0.]), (21, 10, False): array([0., 1.]), (21, 7, True): array([2., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([0., 1.]), (17, 3, False): array([0., 2.]), (12, 8, False): array([1., 0.]), (6, 6, False): array([0., 1.]), (8, 6, False): array([0., 1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([1., 0.]), (4, 5, False): array([0., 1.]), (14, 5, False): array([1., 0.]), (6, 4, False): array([0., 1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([0., 1.]), (18, 4, False): array([0., 1.]), (20, 3, False): array([0., 1.]), (20, 4, False): array([0., 1.]), (7, 10, False): array([0., 1.])})\n",
"Q: defaultdict(<function <lambda> at 0x1330e2cb0>, {(21, 4, True): array([1., 0.]), (15, 6, False): array([ 0., -1.]), (17, 6, False): array([ 0., -1.]), (21, 9, True): array([1., 0.]), (8, 4, False): array([0., 1.]), (16, 4, False): array([ 1., -1.]), (14, 10, False): array([-1. , -0.6]), (6, 9, False): array([1., 0.]), (17, 10, False): array([ 0. , -0.5]), (19, 10, False): array([0.5, 0. ]), (16, 10, False): array([ 0., -1.]), (20, 10, False): array([-0.33333333, -1. ]), (21, 2, True): array([ 1., -1.]), (17, 2, False): array([ 0., -1.]), (18, 2, False): array([ 0. , -0.33333333]), (20, 8, False): array([0., 0.]), (21, 3, True): array([0., 0.]), (11, 2, False): array([-1., 0.]), (21, 2, False): array([1., 0.]), (14, 2, False): array([ 0., -1.]), (18, 8, False): array([-1., -1.]), (21, 8, False): array([ 0., -1.]), (20, 6, False): array([0.5, 0. ]), (19, 7, False): array([ 1., -1.]), (10, 10, False): array([-1. , -0.5]), (12, 3, False): array([0., 1.]), (21, 3, False): array([ 1., -1.]), (10, 7, False): array([0., 1.]), (16, 7, False): array([0., 1.]), (18, 7, False): array([0., 0.]), (19, 5, False): array([1., 0.]), (12, 7, False): array([0., 1.]), (21, 7, False): array([ 1., -1.]), (16, 6, False): array([ 0., -1.]), (12, 10, False): array([-1. , -0.5]), (18, 10, False): array([ 0., -1.]), (18, 6, False): array([ 1., -1.]), (12, 5, False): array([ 1., -1.]), (14, 8, False): array([ 0., -1.]), (9, 7, False): array([0., 0.]), (8, 2, False): array([ 0., -1.]), (19, 2, False): array([ 1., -1.]), (13, 7, False): array([ 0., -1.]), (14, 8, True): array([0., 1.]), (20, 8, True): array([1., 0.]), (12, 10, True): array([ 0., -1.]), (13, 10, False): array([ 0., -1.]), (11, 3, False): array([ 0., -1.]), (14, 4, False): array([ 0., -1.]), (17, 4, False): array([-1., -1.]), (18, 3, False): array([ 0., -1.]), (13, 3, False): array([-1., 0.]), (9, 8, False): array([0., 0.]), (5, 2, False): array([0., 1.]), (10, 2, False): array([0., 1.]), (9, 6, False): array([ 0., -1.]), (14, 6, False): array([0., 0.]), (19, 3, False): array([1., 0.]), (13, 5, False): array([ 0., -1.]), (12, 4, False): array([0., 1.]), (13, 4, False): array([ 1., -1.]), (13, 1, False): array([ 0., -1.]), (13, 6, False): array([1., 0.]), (11, 8, False): array([-1., 0.]), (21, 10, True): array([0., 0.]), (15, 2, False): array([0., 0.]), (21, 6, True): array([1., 0.]), (11, 10, False): array([0., 1.]), (15, 10, False): array([0., 1.]), (19, 8, False): array([0., 0.]), (21, 10, False): array([ 0., -1.]), (21, 7, True): array([1., 0.]), (16, 5, False): array([1., 0.]), (8, 1, False): array([1., 0.]), (18, 9, False): array([1., 0.]), (13, 2, False): array([1., 0.]), (20, 5, False): array([1., 0.]), (16, 3, False): array([ 0., -1.]), (17, 3, False): array([ 0., -1.]), (12, 8, False): array([-1., 0.]), (6, 6, False): array([ 0., -1.]), (8, 6, False): array([ 0., -1.]), (16, 2, False): array([0., 1.]), (20, 7, False): array([1., 0.]), (12, 9, False): array([-1., 0.]), (4, 5, False): array([ 0., -1.]), (14, 5, False): array([-1., 0.]), (6, 4, False): array([ 0., -1.]), (21, 8, True): array([1., 0.]), (7, 4, False): array([ 0., -1.]), (18, 4, False): array([ 0., -1.]), (20, 3, False): array([ 0., -1.]), (20, 4, False): array([ 0., -1.]), (7, 10, False): array([ 0., -1.])})\n",
"\n",
"\n"
]
}
],
"source": [
"num_episodes = 100\n",
"gamma=1\n",
"\n",
"returns_sum = defaultdict(lambda: np.zeros(env.action_space.n))\n",
"N = defaultdict(lambda: np.zeros(env.action_space.n))\n",
"Q = defaultdict(lambda: np.zeros(env.action_space.n))\n",
"\n",
"for i_episode in range(1, num_episodes+1):\n",
" print(f\"## E{i_episode}\")\n",
" if i_episode % 1000 == 0:\n",
" print(\"\\rEpisode {}/{}.\".format(i_episode, num_episodes), end=\"\")\n",
" sys.stdout.flush()\n",
"\n",
" episode = play_episode(env)\n",
" print(episode)\n",
" for s, a, r in episode:\n",
" print(f\"enum:{[i for i,x in enumerate(episode)]}\")\n",
" first_occurence_idx = next(i for i,x in enumerate(episode) if x[0] == s)\n",
" print(f\"first_idx: {first_occurence_idx}\")\n",
" G = sum([x[2]*(gamma**i) for i,x in enumerate(episode[first_occurence_idx:])])\n",
" returns_sum[s][a] += G\n",
" N[s][a] += 1.0\n",
" Q[s][a] = returns_sum[s][a] / N[s][a]\n",
" print(f\"STEP: state[sum:{s[0]}|dealer:{s[1]}|ace:{s[2]}], action:{a}, reward:{r}\")\n",
" print(f\"N: {N}\")\n",
" print(f\"Q: {Q}\")\n",
" print(\"\\n\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0., 0.])"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.zeros(env.action_space.n)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"env."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment