Dohyeong Kim kimbring2

## gist:f9c166edf67c8a271d5d54698df2ddf6
from pysc2.env import sc2_env, available_actions_printer
from pysc2.env.environment import StepType
from absl import flags

FLAGS = flags.FLAGS
FLAGS(sys.argv)

class SC2Environment:
  """See PySC2 environment."""
  def __init__(self, settings):

## dqn_snake.py
#!/usr/bin/env python
from __future__ import print_function

import tensorflow as tf
import cv2
import sys
import random
import numpy as np
from collections import deque

## cartpole-test-3.py
'''
Policy gradient algorithm: here, instead of choosing the action as a deterministic function of the sign of
the weighted sum, make it so that action is chosen randomly, but where the distribution over actions
(of which there are two) depends on the numerical output of the inner product.

Policy gradient prescribes a principled parameter update rule [1, 2].

Your goal is to implement this algorithm for the simple linear model, and see how long it takes to converge.
'''

## cartpole-test-2.py
'''
The hill-climbing algorithm: Start with a random setting of the parameters,
add a small amount of noise to the parameters, and evaluate the new parameter configuration.
If it performs better than the old configuration, discard the old configuration and accept the new one.
Repeat this process for some number of iterations. How long does it take to achieve perfect performance?
'''

import gym
import random

## cartpole-test-1.py
'''
The random guessing algorithm: generate 10,000 random configurations of the model's parameters,
and pick the one that achieves the best cumulative reward. It is important to choose the distribution
over the parameters correctly.
'''

import gym
import random

env = gym.make('CartPole-v0')
	from pysc2.env import sc2_env, available_actions_printer
	from pysc2.env.environment import StepType
	from absl import flags

	FLAGS = flags.FLAGS
	FLAGS(sys.argv)

	class SC2Environment:
	"""See PySC2 environment."""
	def __init__(self, settings):
	#!/usr/bin/env python
	from __future__ import print_function

	import tensorflow as tf
	import cv2
	import sys
	import random
	import numpy as np
	from collections import deque
	'''
	Policy gradient algorithm: here, instead of choosing the action as a deterministic function of the sign of
	the weighted sum, make it so that action is chosen randomly, but where the distribution over actions
	(of which there are two) depends on the numerical output of the inner product.

	Policy gradient prescribes a principled parameter update rule [1, 2].

	Your goal is to implement this algorithm for the simple linear model, and see how long it takes to converge.
	'''
	'''
	The hill-climbing algorithm: Start with a random setting of the parameters,
	add a small amount of noise to the parameters, and evaluate the new parameter configuration.
	If it performs better than the old configuration, discard the old configuration and accept the new one.
	Repeat this process for some number of iterations. How long does it take to achieve perfect performance?
	'''

	import gym
	import random
	'''
	The random guessing algorithm: generate 10,000 random configurations of the model's parameters,
	and pick the one that achieves the best cumulative reward. It is important to choose the distribution
	over the parameters correctly.
	'''

	import gym
	import random

	env = gym.make('CartPole-v0')