Skip to content

Instantly share code, notes, and snippets.

@vlad17
Created Oct 21, 2017
Embed
What would you like to do?
accept-process-output(#<process ycmd-server> 0 100 t)
ycmd--start-server("|\257}\n\362\301\325OW\333&\370I\263\324")
ycmd-open()
ycmd--request("/event_notification" (("event_name" . "FileReadyToParse") ("file_data" ("/home/vlad/Documents/cal/courses-year1/cs294-112/cal-deeprl-hw4/controllers.py" ("contents" . "import tensorflow as tf\nimport numpy as np\nimport time\nfrom utils import get_ac_dim, get_ob_dim, build_mlp\n\n\nclass Controller:\n def __init__(self):\n pass\n\n def get_action(self, state):\n raise NotImplementedError\n\n def fit(self, data):\n pass\n\n def reset(self, nstates):\n pass\n\nclass RandomController(Controller):\n def __init__(self, env):\n super().__init__()\n self.ac_space = env.action_space\n\n def get_action(self, states):\n nstates = len(states)\n return self._sample_n(nstates)\n\n def _sample_n(self, n):\n return np.random.uniform(\n low=self.ac_space.low,\n high=self.ac_space.high,\n size=(n,) + self.ac_space.shape)\n\n\nclass MPCcontroller(Controller):\n def __init__(self,\n env,\n dyn_model,\n horizon=5,\n cost_fn=None,\n num_simulated_paths=10,\n sess=None,\n policy=None):\n super().__init__()\n self.ac_dim = get_ac_dim(env)\n self.ac_space = env.action_space\n self.sess = sess\n self.num_simulated_paths = num_simulated_paths\n\n # compute the rollout in full TF to keep all computation on the GPU\n # a = action dim\n # s = state dim\n # n = batch size = num states to get MPC actions for * simulated rollouts\n # i = number of states in batch for get_action\n self.input_state_ph_is = tf.placeholder(\n tf.float32, [None, get_ob_dim(env)], 'mpc_input_state')\n state_ns = tf.tile(self.input_state_ph_is, (num_simulated_paths, 1))\n # use the specified policy during MPC rollouts\n ac_space = env.action_space\n if policy is None:\n policy = self._create_random_policy(ac_space)\n self.initial_action_na = policy(state_ns, is_initial=True)\n self.input_action_ph_na = tf.placeholder(\n tf.float32, [None, self.ac_dim], 'mpc_input_action')\n def body(t, state_ns, action_na, costs):\n next_state_ns = dyn_model.predict_tf(state_ns, action_na)\n next_costs = cost_fn(state_ns, action_na, next_state_ns, costs)\n next_action_na = policy(next_state_ns, is_initial=False)\n return [t + 1, next_state_ns, next_action_na, next_costs]\n n = tf.shape(state_ns)[0]\n loop_vars = [\n tf.constant(0),\n state_ns,\n self.input_action_ph_na,\n tf.zeros((n,))]\n self.loop = tf.while_loop(lambda t, _, __, ___: t < horizon, body,\n loop_vars, back_prop=False)\n\n @staticmethod\n def _create_random_policy(ac_space):\n def policy(state_ns, **_):\n n = tf.shape(state_ns)[0]\n ac_dim = ac_space.low.shape\n ac_na = tf.random_uniform((n,) + ac_dim)\n ac_na *= (ac_space.high - ac_space.low)\n ac_na += ac_space.low\n return ac_na\n return policy\n\n def get_action(self, states):\n nstates = len(states)\n\n action_na = self.sess.run(self.initial_action_na,\n feed_dict={self.input_state_ph_is: states})\n _, _, _, trajectory_costs_n = self.sess.run(self.loop, feed_dict={\n self.input_state_ph_is: states,\n self.input_action_ph_na: action_na})\n\n # p = num simulated paths, i = nstates\n # note b/c of the way tf.tile works we need to reshape by p then i\n per_state_simulation_costs_ip = trajectory_costs_n.reshape(\n self.num_simulated_paths, nstates).T\n best_ac_ix_i = per_state_simulation_costs_ip.argmin(axis=1)\n action_samples_ipa = np.swapaxes(action_na.reshape(\n self.num_simulated_paths, nstates, self.ac_dim), 0, 1)\n best_ac_ia = action_samples_ipa[np.arange(nstates), best_ac_ix_i, :]\n\n return best_ac_ia\n\nclass BPTT(Controller):\n def __init__(self,\n env,\n dyn_model,\n horizon=None,\n cost_fn=None,\n learning_rate=None,\n depth=None,\n width=None,\n batch_size=None,\n epochs=None,\n sess=None):\n super().__init__()\n self.sess = sess\n self.batch_size = batch_size\n self.epochs = epochs\n self.ac_space = env.action_space\n self.ob_dim = get_ob_dim(env)\n self.ac_dim = get_ac_dim(env)\n self.width = width\n self.depth = depth\n\n # rnn used by policy\n self.rnn = tf.contrib.rnn.OutputProjectionWrapper(\n tf.nn.rnn_cell.MultiRNNCell(\n [tf.nn.rnn_cell.GRUCell(width) for _ in range(depth)]),\n self.ac_dim,\n activation=tf.sigmoid)\n \n # a = action dim\n # s = state dim\n # n = batch size\n # h = hidden unit size\n self.initial_rnn_state_list_nh = [\n tf.placeholder(tf.float32, [None, width]) for _ in range(depth)]\n self.input_state_ph_ns = tf.placeholder(\n tf.float32, [None, self.ob_dim])\n self.policy_action_na, self.resulting_rnn_state_nh = self._rnn_policy(\n self.input_state_ph_ns, self.initial_rnn_state_list_nh)\n self.maintained_rnn_state = None\n\n # compute the rollout in full TF to keep all computation on the GPU\n # reuse the policy network for BPTT model-based optimization\n self.bptt_initial_state_ph_ns = tf.placeholder(\n tf.float32, [batch_size, self.ob_dim], \"bptt_input_state\")\n def body(t, state_ns, rnn_state_nh, costs_n):\n action_na, next_rnn_state_nh = self._rnn_policy(\n state_ns, rnn_state_nh)\n next_state_ns = dyn_model.predict_tf(state_ns, action_na)\n next_costs_n = cost_fn(state_ns, action_na, next_state_ns, costs_n)\n return [t + 1, next_state_ns, next_rnn_state_nh, next_costs_n]\n loop_vars = [\n tf.constant(0),\n self.bptt_initial_state_ph_ns,\n self.rnn.zero_state(batch_size, tf.float32),\n tf.zeros((batch_size,))]\n _, _, _, costs_n = tf.while_loop(\n lambda t, _, __, ___: t < horizon, body, loop_vars)\n self.mean_cost = tf.reduce_mean(costs_n)\n policy_vars = self.rnn.trainable_variables\n self.update_op = tf.train.AdamOptimizer(learning_rate).minimize(\n self.mean_cost, var_list=policy_vars)\n\n def fit(self, data):\n all_obs = data.stationary_obs()\n nexamples = len(all_obs)\n nbatches = max(nexamples // self.batch_size, 1)\n batches = np.random.randint(nexamples, size=(\n self.epochs * nbatches, self.batch_size))\n for batch_idx in batches:\n input_states_sample = all_obs[batch_idx]\n self.sess.run(self.update_op, feed_dict={\n self.bptt_initial_state_ph_ns: input_states_sample})\n\n def reset(self, nstates):\n self.maintained_rnn_state = [\n np.zeros((nstates, self.width))\n for _ in range(self.depth)]\n\n def get_action(self, states_ns):\n feed_dict = {\n self.input_state_ph_ns: states_ns}\n for layer_state_ph, layer_state in zip(self.initial_rnn_state_list_nh,\n self.maintained_rnn_state):\n feed_dict[layer_state_ph] = layer_state\n action_na, next_rnn_state_nh = self.sess.run(\n [self.policy_action_na, self.resulting_rnn_state_nh],\n feed_dict=feed_dict)\n self.maintained_rnn_state = next_rnn_state_nh\n return action_na\n\n def _rnn_policy(self, state_ns, rnn_state_nh):\n ac_na, next_rnn_state_nh = self.rnn(state_ns, rnn_state_nh)\n ac_na *= (self.ac_space.high - self.ac_space.low)\n ac_na += self.ac_space.low\n return ac_na, next_rnn_state_nh\n\nclass MPCMF(Controller):\n def __init__(self,\n env,\n dyn_model,\n horizon=None,\n cost_fn=None,\n num_simulated_paths=None,\n learning_rate=None,\n depth=None,\n width=None,\n batch_size=None,\n epochs=None,\n sess=None):\n super().__init__()\n self.sess = sess\n self.batch_size = batch_size\n self.epochs = epochs\n self.ob_dim = get_ob_dim(env)\n self.ac_dim = get_ac_dim(env)\n self.width = width\n self.depth = depth\n self.ac_space = env.action_space\n\n # create placeholder for training an MPC learner\n # a = action dim\n # s = state dim\n # n = batch size\n self.input_state_ph_ns = tf.placeholder(\n tf.float32, [None, self.ob_dim])\n self.policy_action_na = self._policy(\n self.input_state_ph_ns, is_initial=False, reuse=None)\n self.expert_action_ph_na = tf.placeholder(\n tf.float32, [None, self.ac_dim])\n mse = tf.losses.mean_squared_error(\n self.expert_action_ph_na,\n self.policy_action_na)\n \n # use the learner value to expand the MPC (first action is random)\n self.mpc = MPCcontroller(\n env, dyn_model, horizon, cost_fn, num_simulated_paths, sess,\n self._policy)\n \n self.update_op = tf.train.AdamOptimizer(learning_rate).minimize(mse)\n\n def _policy(self, state_ns, is_initial=True, reuse=True):\n def exploit_policy(state_ns):\n ac_na = build_mlp(\n state_ns, scope='mf_policy_mean',\n n_layers=self.depth, size=self.width, activation=tf.nn.relu,\n output_activation=tf.sigmoid, reuse=reuse)\n ac_na *= (self.ac_space.high - self.ac_space.low)\n ac_na += self.ac_space.low\n return ac_na\n random_policy = MPCcontroller._create_random_policy(self.ac_space)\n if is_initial:\n return random_policy(state_ns)\n else:\n return exploit_policy(state_ns)\n\n def fit(self, data):\n all_obs = data.stationary_obs()\n all_acs = data.stationary_acs()\n nexamples = len(all_obs)\n assert nexamples == len(all_acs), (nexamples, len(all_acs))\n per_epoch = max(nexamples // self.batch_size, 1)\n batches = np.random.randint(nexamples, size=(\n self.epochs * per_epoch, self.batch_size))\n for i, batch_idx in enumerate(batches, 1):\n input_states_sample = all_obs[batch_idx]\n label_actions_sample = all_acs[batch_idx]\n self.sess.run(self.update_op, feed_dict={\n self.input_state_ph_ns: input_states_sample,\n self.expert_action_ph_na: label_actions_sample})\n\n def get_action(self, states_ns):\n return self.mpc.get_action(states_ns)\n") ("filetypes" "python"))) ("filepath" . "/home/vlad/Documents/cal/courses-year1/cs294-112/cal-deeprl-hw4/controllers.py") ("line_num" . 1) ("column_num" . 1)) :parser json-read)
ycmd-notify-file-ready-to-parse()
ycmd--conditional-parse(mode-enabled)
ycmd-mode()
ycmd--maybe-enable-mode()
global-ycmd-mode-enable-in-buffers()
run-hooks(change-major-mode-after-body-hook prog-mode-hook python-mode-hook)
apply(run-hooks (change-major-mode-after-body-hook prog-mode-hook python-mode-hook))
run-mode-hooks(python-mode-hook)
python-mode()
set-auto-mode-0(python-mode nil)
set-auto-mode()
#[0 "\300 \207" [set-auto-mode] 1 "\n\n(fn)"]()
funcall(#[0 "\300 \207" [set-auto-mode] 1 "\n\n(fn)"])
normal-mode(t)
after-find-file(nil t)
find-file-noselect-1(#<buffer controllers.py> "~/Documents/cal/courses-year1/cs294-112/cal-deeprl-hw4/controllers.py" nil nil "~/Documents/cal/courses-year1/cs294-112/cal-deeprl-hw4/controllers.py" (5111978 64769))
find-file-noselect("/home/vlad/Documents/cal/courses-year1/cs294-112/cal-deeprl-hw4/controllers.py" nil nil)
ido-file-internal(raise-frame)
ido-find-file()
call-interactively(ido-find-file nil nil)
command-execute(ido-find-file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment