pyliaorachel /
Last active July 20, 2023 06:13
Some Regular Expressions that may be useful for data cleaning.
Punctuations, US-ASCII


Punctuations, include Unicode ones (\u2000-\u206F: general punctuations, \u2E00-\u2E7F: supplemental punctuations)


Chinese characters
Git Basics

Basic Flow

If you're not using any branching (i.e. you only work on master branch), you can follow the below simple and basic Git flow.

Step 0: install git & configure


  • MacOS: open your terminal, $ brew install git
  • Windows: install GitBASH in Git for Windows, then right-click anywhere on desktop and choose Git Bash
OpenAI Gym CartPole - Deep Q-Learning (modify reward)
next_state, reward, done, info = env.step(action)
# 修改 reward,加快訓練
x, v, theta, omega = next_state
r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8 # 小車離中間越近越好
r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5 # 柱子越正越好
reward = r1 + r2
dqn.store_transition(state, action, reward, next_state)
OpenAI Gym CartPole - Deep Q-Learning (cartpole env)
if not done:
reward = 1.0
elif self.steps_beyond_done is None:
# Pole just fell!
self.steps_beyond_done = 0
reward = 1.0
self.steps_beyond_done += 1
reward = 0.0
OpenAI Gym CartPole - Deep Q-Learning (train)
env = gym.make('CartPole-v0')
# Environment parameters
n_actions = env.action_space.n
n_states = env.observation_space.shape[0]
# Hyper parameters
n_hidden = 50
batch_size = 32
lr = 0.01 # learning rate
OpenAI Gym CartPole - Deep Q-Learning (dqn learn)
def learn(self):
# 隨機取樣 batch_size 個 experience
sample_index = np.random.choice(self.memory_capacity, self.batch_size)
b_memory = self.memory[sample_index, :]
b_state = torch.FloatTensor(b_memory[:, :self.n_states])
b_action = torch.LongTensor(b_memory[:, self.n_states:self.n_states+1].astype(int))
b_reward = torch.FloatTensor(b_memory[:, self.n_states+1:self.n_states+2])
b_next_state = torch.FloatTensor(b_memory[:, -self.n_states:])
# 計算現有 eval net 和 target net 得出 Q value 的落差
OpenAI Gym CartPole - Deep Q-Learning (dqn store transition)
def store_transition(self, state, action, reward, next_state):
# 打包 experience
transition = np.hstack((state, [action, reward], next_state))
# 存進 memory;舊 memory 可能會被覆蓋
index = self.memory_counter % self.memory_capacity
self.memory[index, :] = transition
self.memory_counter += 1
OpenAI Gym CartPole - Deep Q-Learning (dqn choose action)
def choose_action(self, state):
x = torch.unsqueeze(torch.FloatTensor(state), 0)
# epsilon-greedy
if np.random.uniform() < self.epsilon: # 隨機
action = np.random.randint(0, self.n_actions)
else: # 根據現有 policy 做最好的選擇
actions_value = self.eval_net(x) # 以現有 eval net 得出各個 action 的分數
action = torch.max(actions_value, 1)[1].data.numpy()[0] # 挑選最高分的 action
OpenAI Gym CartPole - Deep Q-Learning (dqn framework)
class DQN(object):
def __init__(self, n_states, n_actions, n_hidden, batch_size, lr, epsilon, gamma, target_replace_iter, memory_capacity):
self.eval_net, self.target_net = Net(n_states, n_actions, n_hidden), Net(n_states, n_actions, n_hidden)
self.memory = np.zeros((memory_capacity, n_states * 2 + 2)) # 每個 memory 中的 experience 大小為 (state + next state + reward + action)
self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=lr)
self.loss_func = nn.MSELoss()
self.memory_counter = 0
self.learn_step_counter = 0 # 讓 target network 知道什麼時候要更新
OpenAI Gym CartPole - Deep Q-Learning (create net)
class Net(nn.Module):
def __init__(self, n_states, n_actions, n_hidden):
super(Net, self).__init__()
# 輸入層 (state) 到隱藏層,隱藏層到輸出層 (action)
self.fc1 = nn.Linear(n_states, n_hidden)
self.out = nn.Linear(n_hidden, n_actions)
def forward(self, x):
x = self.fc1(x)