Skip to content

Instantly share code, notes, and snippets.

@izmailovpavel
Created September 3, 2017 21:09
Show Gist options
  • Save izmailovpavel/3bfc17e1f22e603f57718f03806ff64f to your computer and use it in GitHub Desktop.
Save izmailovpavel/3bfc17e1f22e603f57718f03806ff64f to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Simplified skiing\n",
"\n",
"We have a strip of shape 50x5, and a skier is moving down it. Each 5 rows, there is a gate. Possibly, we will add trees later. At the end the skier is told how many gates he went through. At each time the skier observes number of rows to the next gate, the position of the next gate, and his position. \n",
"\n",
"We want to use this example to test the RLGAN idea."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.preprocessing import OneHotEncoder"
]
},
{
"cell_type": "code",
"execution_count": 146,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def one_hot_many_features(arr, n_values):\n",
" total_n_values = np.prod(n_values)\n",
" n_values.append(1)\n",
" weights = np.cumprod(n_values[::-1])[::-1][1:]\n",
" weights = weights[:, None]\n",
"# print(weights, total_n_values)\n",
" combined_features = arr.dot(weights)\n",
" encoder = OneHotEncoder(n_values=total_n_values)\n",
" new_features = encoder.fit_transform(combined_features)\n",
" return new_features.toarray()\n",
"\n",
"class SimpleSkiGame:\n",
" \n",
" def __init__(self, strip_width=5, num_gates=10, gate_freq=5):\n",
" self.strip_width = strip_width\n",
" self.num_gates = num_gates\n",
" self.gate_freq = gate_freq\n",
" self.strip_len = self.num_gates * self.gate_freq\n",
" \n",
" def generate_game(self):\n",
" gates = np.random.randint(low=0, high=self.strip_width, size=self.num_gates)\n",
" initial_position = np.random.randint(low=0, high=self.strip_width)\n",
" actions = np.random.randint(low=-1, high=2, size=self.strip_len - 1)\n",
" trajectory = [initial_position]\n",
" for a in actions:\n",
" next_position = trajectory[-1] + a\n",
" if next_position >= self.strip_width:\n",
" next_position = self.strip_width - 1\n",
" elif next_position < 0:\n",
" next_position = 0\n",
" trajectory.append(next_position)\n",
" dist_to_gate = np.arange(4, -1, -1)\n",
" dist_to_gate = np.tile(dist_to_gate, self.num_gates)[:, None]\n",
" trajectory = np.array(trajectory)[:, None]\n",
" trajectory_at_gates = trajectory.reshape((-1, self.gate_freq))[:, -1]\n",
" reward = np.sum(trajectory_at_gates == gates)\n",
" gates = np.repeat(gates, self.gate_freq)\n",
" gates = gates[:-1, None]\n",
" trajectory = trajectory = trajectory[:-1]\n",
" dist_to_gate = dist_to_gate[:-1]\n",
" actions = actions[:, None] + 1\n",
" states = self.transform_state_action_to_onehot(gates, trajectory, dist_to_gate, actions)\n",
"# print(gates)\n",
"# print(trajectory)\n",
"# print(dist_to_gate)\n",
"# state_action_features = np.hstack([gates[:-1], trajectory[:-1], dist_to_gate[:-1], actions[:, None]+1])\n",
"# states = one_hot_many_features(state_action_features, \n",
"# [self.strip_width, self.strip_width, self.gate_freq, 3])\n",
" \n",
" return states, reward\n",
" \n",
" def transform_state_action_to_onehot(self, gates, trajectory, dist_to_gate, actions):\n",
" state_action_features = np.hstack([gates, trajectory, dist_to_gate, actions])\n",
" states = one_hot_many_features(state_action_features, \n",
" [self.strip_width, self.strip_width, self.gate_freq, 3])\n",
" return states\n",
"# def one_hot(self, arr):\n",
"# \"\"\"\n",
"# Combines three categorical features into one, and applies one hot encoding.\n",
"# \"\"\"\n",
"# weights = np.array([self.strip_width * self.gate_freq, self.gate_freq, 1])[:, None]\n",
"# combined_features = arr.dot(weights)\n",
"# encoder = OneHotEncoder(n_values=self.strip_width * self.strip_width * self.gate_freq)\n",
"# new_features = encoder.fit_transform(combined_features)\n",
"# return new_features.toarray()"
]
},
{
"cell_type": "code",
"execution_count": 147,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"game = SimpleSkiGame()\n",
"s, r = game.generate_game()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Generate many games"
]
},
{
"cell_type": "code",
"execution_count": 176,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"n_games = 500\n",
"game = SimpleSkiGame()\n",
"states = []\n",
"actions = []\n",
"rewards = []\n",
"for i in range(n_games):\n",
" s, r = game.generate_game()\n",
" states.append(s)\n",
" rewards.append(r)\n",
"states = np.array(states)\n",
"rewards = np.array(rewards)[:, None]"
]
},
{
"cell_type": "code",
"execution_count": 177,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(500, 1)"
]
},
"execution_count": 177,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rewards.shape"
]
},
{
"cell_type": "code",
"execution_count": 178,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(500, 49, 375)"
]
},
"execution_count": 178,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"states.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Solve"
]
},
{
"cell_type": "code",
"execution_count": 189,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import tensorflow as tf"
]
},
{
"cell_type": "code",
"execution_count": 205,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"tf.reset_default_graph()"
]
},
{
"cell_type": "code",
"execution_count": 206,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"S = tf.Variable(initial_value=states, trainable=False)\n",
"r = tf.Variable(initial_value=rewards.astype(float), trainable=False)\n",
"num_tr = 400\n",
"S_tr = S[:num_tr]\n",
"S_te = S[num_tr:]\n",
"r_tr = r[:num_tr]\n",
"r_te = r[num_tr:]\n",
"w = tf.Variable(initial_value=tf.zeros((states.shape[-1]), dtype=tf.float64))#, dtype=tf.float64)"
]
},
{
"cell_type": "code",
"execution_count": 207,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def predict_rewards(S, w):\n",
" return tf.einsum('gtf,f->g', S, w)[:, None]\n",
"\n",
"def compute_loss(r, predicted_rewards):\n",
" return tf.reduce_mean((r - predicted_rewards)**2)"
]
},
{
"cell_type": "code",
"execution_count": 208,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"pred_tr = predict_rewards(S_tr, w)\n",
"pred_te = predict_rewards(S_te, w)"
]
},
{
"cell_type": "code",
"execution_count": 209,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"tr_loss = compute_loss(r_tr, pred_tr)\n",
"te_loss = compute_loss(r_te, pred_te)"
]
},
{
"cell_type": "code",
"execution_count": 210,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"sgd = tf.train.GradientDescentOptimizer(learning_rate=1e-1)\n",
"train_op = sgd.minimize(tr_loss)"
]
},
{
"cell_type": "code",
"execution_count": 211,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sess = tf.Session()\n",
"sess.run(tf.global_variables_initializer())"
]
},
{
"cell_type": "code",
"execution_count": 212,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train loss: 6.3425\n",
"Test loss: 2.25397945338\n",
"Train loss: 0.379442088898\n",
"Test loss: 0.595460997003\n",
"Train loss: 0.153750168329\n",
"Test loss: 0.338179879635\n",
"Train loss: 0.0837601282732\n",
"Test loss: 0.234497723606\n",
"Train loss: 0.0546310347671\n",
"Test loss: 0.183248186286\n",
"Train loss: 0.0399317846675\n",
"Test loss: 0.154180699614\n",
"Train loss: 0.0314565089633\n",
"Test loss: 0.135889983082\n",
"Train loss: 0.026074058861\n",
"Test loss: 0.123395368644\n",
"Train loss: 0.0223974063954\n",
"Test loss: 0.114284569812\n",
"Train loss: 0.0197402973611\n",
"Test loss: 0.107293189233\n",
"Train loss: 0.0177331251402\n",
"Test loss: 0.101713545277\n",
"Train loss: 0.0161628263052\n",
"Test loss: 0.0971257320751\n",
"Train loss: 0.014899440224\n",
"Test loss: 0.0932667862562\n",
"Train loss: 0.0138597835011\n",
"Test loss: 0.0899635627337\n",
"Train loss: 0.0129883488206\n",
"Test loss: 0.0870967155535\n",
"Train loss: 0.0122467314998\n",
"Test loss: 0.0845805740204\n",
"Train loss: 0.0116075102425\n",
"Test loss: 0.0823514607107\n",
"Train loss: 0.0110505631841\n",
"Test loss: 0.0803606499605\n",
"Train loss: 0.0105607699468\n",
"Test loss: 0.0785699614539\n",
"Train loss: 0.0101265314761\n",
"Test loss: 0.0769488988728\n",
"Train loss: 0.0097387884921\n",
"Test loss: 0.0754727242866\n",
"Train loss: 0.00939035324234\n",
"Test loss: 0.0741211181998\n",
"Train loss: 0.00907544361886\n",
"Test loss: 0.0728772184349\n",
"Train loss: 0.00878935133292\n",
"Test loss: 0.0717269120985\n",
"Train loss: 0.00852820096685\n",
"Test loss: 0.0706583018814\n",
"Train loss: 0.00828877194043\n",
"Test loss: 0.0696612958774\n",
"Train loss: 0.00806836487412\n",
"Test loss: 0.068727287151\n",
"Train loss: 0.00786469983166\n",
"Test loss: 0.0678488999665\n",
"Train loss: 0.00767583782069\n",
"Test loss: 0.0670197864754\n",
"Train loss: 0.00750011950963\n",
"Test loss: 0.0662344622183\n",
"Train loss: 0.00733611685951\n",
"Test loss: 0.065488171894\n",
"Train loss: 0.00718259456421\n",
"Test loss: 0.0647767790094\n",
"Train loss: 0.00703847902451\n",
"Test loss: 0.0640966745606\n",
"Train loss: 0.00690283317036\n",
"Test loss: 0.0634447010142\n",
"Train loss: 0.00677483586769\n",
"Test loss: 0.0628180886843\n",
"Train loss: 0.00665376495185\n",
"Test loss: 0.0622144022209\n",
"Train loss: 0.0065389831547\n",
"Test loss: 0.0616314953997\n",
"Train loss: 0.0064299263591\n",
"Test loss: 0.0610674727634\n",
"Train loss: 0.0063260937389\n",
"Test loss: 0.0605206569501\n",
"Train loss: 0.00622703943767\n",
"Test loss: 0.059989560765\n"
]
}
],
"source": [
"num_iter=400\n",
"for i in range(num_iter):\n",
" _, loss_val = sess.run([train_op, tr_loss])\n",
" if not (i%10):\n",
" print('Train loss:', loss_val)\n",
" print('Test loss:', sess.run(te_loss))"
]
},
{
"cell_type": "code",
"execution_count": 214,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.059523919593294561"
]
},
"execution_count": 214,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sess.run(te_loss)"
]
},
{
"cell_type": "code",
"execution_count": 213,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"w_model = sess.run(w)"
]
},
{
"cell_type": "code",
"execution_count": 204,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sess.close()"
]
},
{
"cell_type": "code",
"execution_count": 284,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[['.' '.' '.' '.' '.']\n",
" ['.' '.' '.' '.' '.']\n",
" ['.' '.' '.' '.' '.']\n",
" ['.' '.' '.' 'S' '.']\n",
" ['.' '.' '.' '.' 'G']]\n",
"left : [-0.15964259]\n",
"down : [-0.12028602]\n",
"right : [ 0.50504172]\n"
]
}
],
"source": [
"gate = 4 #0 ... 4, position of gate\n",
"pos = 3 #0 ... 4, position of skier\n",
"dist = 1 #0 ... 4, vertical distance to gate\n",
"\n",
"gates = np.array([[gate]]) \n",
"trajectory = ([[pos]]) \n",
"dist_to_gate = ([[dist]]) \n",
"\n",
"print(gen_pic(gate, pos, dist))\n",
"\n",
"for a, action in enumerate(['left', 'down', 'right']):\n",
" actions = np.array([[a]]) #0 ... 2, left down right\n",
" print(action, ': ', end='')\n",
" print(game.transform_state_action_to_onehot(gates, trajectory, dist_to_gate, actions).dot(w_model))"
]
},
{
"cell_type": "code",
"execution_count": 272,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def gen_pic(gate, pos_x, pos_y):\n",
" res = np.array([['.']*5]*5)\n",
" res[4-pos_y, pos_x] = 'S'\n",
" res[4, gate] = 'G'\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": 273,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([['.', '.', '.', '.', '.'],\n",
" ['.', '.', '.', '.', '.'],\n",
" ['.', '.', '.', '.', '.'],\n",
" ['S', '.', '.', '.', '.'],\n",
" ['.', 'G', '.', '.', '.']],\n",
" dtype='<U1')"
]
},
"execution_count": 273,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment