Peiyu Liao pyliaorachel

## regex_useful.md

      
              1 file
            
          
              2 forks
            
          
              0 comments
            
          
              1 star
            
          
                pyliaorachel
                / regex_useful.md
            
            
              Last active
              July 20, 2023 06:13
            
              
                Some Regular Expressions that may be useful for data cleaning.
              
          
    Punctuations, US-ASCII

/[!"#$%&()*+,\-.\/:;<=>?@\[\]^_`{|}~]/
Punctuations, include Unicode ones (\u2000-\u206F: general punctuations, \u2E00-\u2E7F: supplemental punctuations)

/[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&()*+,\-.\/:;<=>?@\[\]^_`{|}~]/
Chinese characters


## Git_basics.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                pyliaorachel
                / Git_basics.md
            
            
              Last active
              August 2, 2018 03:39
            
              
                Git Basics
              
          
    Basic Flow

If you're not using any branching (i.e. you only work on master branch), you can follow the below simple and basic Git flow.
Step 0: install git & configure

Install:

MacOS: open your terminal, $ brew install git
Windows: install GitBASH in Git for Windows, then right-click anywhere on desktop and choose Git Bash


## dqn_modify_reward.py
...
next_state, reward, done, info = env.step(action)

# 修改 reward，加快訓練
x, v, theta, omega = next_state
r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8 # 小車離中間越近越好
r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5 # 柱子越正越好
reward = r1 + r2

dqn.store_transition(state, action, reward, next_state)

## cartpole_env_extract.py
...
if not done:
    reward = 1.0
elif self.steps_beyond_done is None:
    # Pole just fell!
    self.steps_beyond_done = 0
    reward = 1.0
else:
    self.steps_beyond_done += 1
    reward = 0.0

## dqn_train.py
env = gym.make('CartPole-v0')

# Environment parameters
n_actions = env.action_space.n
n_states = env.observation_space.shape[0]

# Hyper parameters
n_hidden = 50
batch_size = 32
lr = 0.01                 # learning rate

## dqn_learn.py
def learn(self):
    # 隨機取樣 batch_size 個 experience
    sample_index = np.random.choice(self.memory_capacity, self.batch_size)
    b_memory = self.memory[sample_index, :]
    b_state = torch.FloatTensor(b_memory[:, :self.n_states])
    b_action = torch.LongTensor(b_memory[:, self.n_states:self.n_states+1].astype(int))
    b_reward = torch.FloatTensor(b_memory[:, self.n_states+1:self.n_states+2])
    b_next_state = torch.FloatTensor(b_memory[:, -self.n_states:])

    # 計算現有 eval net 和 target net 得出 Q value 的落差

## dqn_store_transition.py
def store_transition(self, state, action, reward, next_state):
    # 打包 experience
    transition = np.hstack((state, [action, reward], next_state))

    # 存進 memory；舊 memory 可能會被覆蓋
    index = self.memory_counter % self.memory_capacity
    self.memory[index, :] = transition
    self.memory_counter += 1

## dqn_choose_action.py
def choose_action(self, state):
    x = torch.unsqueeze(torch.FloatTensor(state), 0)

    # epsilon-greedy
    if np.random.uniform() < self.epsilon: # 隨機
        action = np.random.randint(0, self.n_actions)
    else: # 根據現有 policy 做最好的選擇
        actions_value = self.eval_net(x) # 以現有 eval net 得出各個 action 的分數
        action = torch.max(actions_value, 1)[1].data.numpy()[0] # 挑選最高分的 action

## dqn.py
class DQN(object):
    def __init__(self, n_states, n_actions, n_hidden, batch_size, lr, epsilon, gamma, target_replace_iter, memory_capacity):
        self.eval_net, self.target_net = Net(n_states, n_actions, n_hidden), Net(n_states, n_actions, n_hidden)

        self.memory = np.zeros((memory_capacity, n_states * 2 + 2)) # 每個 memory 中的 experience 大小為 (state + next state + reward + action)
        self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=lr)
        self.loss_func = nn.MSELoss()
        self.memory_counter = 0
        self.learn_step_counter = 0 # 讓 target network 知道什麼時候要更新

## net.py
class Net(nn.Module):
    def __init__(self, n_states, n_actions, n_hidden):
        super(Net, self).__init__()

        # 輸入層 (state) 到隱藏層，隱藏層到輸出層 (action)
        self.fc1 = nn.Linear(n_states, n_hidden)
        self.out = nn.Linear(n_hidden, n_actions)

    def forward(self, x):
        x = self.fc1(x)
	...
	next_state, reward, done, info = env.step(action)

	# 修改 reward，加快訓練
	x, v, theta, omega = next_state
	r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8 # 小車離中間越近越好
	r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5 # 柱子越正越好
	reward = r1 + r2

	dqn.store_transition(state, action, reward, next_state)
	...
	if not done:
	reward = 1.0
	elif self.steps_beyond_done is None:
	# Pole just fell!
	self.steps_beyond_done = 0
	reward = 1.0
	else:
	self.steps_beyond_done += 1
	reward = 0.0
	env = gym.make('CartPole-v0')

	# Environment parameters
	n_actions = env.action_space.n
	n_states = env.observation_space.shape[0]

	# Hyper parameters
	n_hidden = 50
	batch_size = 32
	lr = 0.01 # learning rate
	def learn(self):
	# 隨機取樣 batch_size 個 experience
	sample_index = np.random.choice(self.memory_capacity, self.batch_size)
	b_memory = self.memory[sample_index, :]
	b_state = torch.FloatTensor(b_memory[:, :self.n_states])
	b_action = torch.LongTensor(b_memory[:, self.n_states:self.n_states+1].astype(int))
	b_reward = torch.FloatTensor(b_memory[:, self.n_states+1:self.n_states+2])
	b_next_state = torch.FloatTensor(b_memory[:, -self.n_states:])

	# 計算現有 eval net 和 target net 得出 Q value 的落差
	def store_transition(self, state, action, reward, next_state):
	# 打包 experience
	transition = np.hstack((state, [action, reward], next_state))

	# 存進 memory；舊 memory 可能會被覆蓋
	index = self.memory_counter % self.memory_capacity
	self.memory[index, :] = transition
	self.memory_counter += 1
	def choose_action(self, state):
	x = torch.unsqueeze(torch.FloatTensor(state), 0)

	# epsilon-greedy
	if np.random.uniform() < self.epsilon: # 隨機
	action = np.random.randint(0, self.n_actions)
	else: # 根據現有 policy 做最好的選擇
	actions_value = self.eval_net(x) # 以現有 eval net 得出各個 action 的分數
	action = torch.max(actions_value, 1)[1].data.numpy()[0] # 挑選最高分的 action
	class DQN(object):
	def __init__(self, n_states, n_actions, n_hidden, batch_size, lr, epsilon, gamma, target_replace_iter, memory_capacity):
	self.eval_net, self.target_net = Net(n_states, n_actions, n_hidden), Net(n_states, n_actions, n_hidden)

	self.memory = np.zeros((memory_capacity, n_states * 2 + 2)) # 每個 memory 中的 experience 大小為 (state + next state + reward + action)
	self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=lr)
	self.loss_func = nn.MSELoss()
	self.memory_counter = 0
	self.learn_step_counter = 0 # 讓 target network 知道什麼時候要更新
	class Net(nn.Module):
	def __init__(self, n_states, n_actions, n_hidden):
	super(Net, self).__init__()

	# 輸入層 (state) 到隱藏層，隱藏層到輸出層 (action)
	self.fc1 = nn.Linear(n_states, n_hidden)
	self.out = nn.Linear(n_hidden, n_actions)

	def forward(self, x):
	x = self.fc1(x)