Skip to content

Instantly share code, notes, and snippets.

View ravishchawla's full-sized avatar

Ravish Chawla ravishchawla

View GitHub Profile
def multi_ddpg(n_episodes=5000, max_t=2000):
init_time = time.time();
scores_deque = deque(maxlen=100);
scores = []
max_score = -np.Inf;
for i_episode in range(1, n_episodes+1):
ep_init_time = time.time();
env_info = env.reset(train_mode=True)[brain_name];
states = env_info.vector_observations;
class Agent():
"""Interacts with and learns from the environment."""
def __init__(self, state_size, action_size, replay_memory, batch_size, random_seed):
"""Initialize an Agent object.
- Instantiate the Agents and Critics, Replay Memory, and a Noise process
"""
def step(self, state, action, reward, next_state, done):
@ravishchawla
ravishchawla / ddpg_hyperparameters.csv
Created November 9, 2019 02:54
DDPG Hyperparameters
Hyperparameter value
Replay Buffer Size 1e5
Minibatch Size 128
Discount Rate 0.99
TAU 1e-3
Actor Learning Rate 1e-4
Critic Learning Rate 1e-4
L2 Weight Decay 0
@ravishchawla
ravishchawla / duel-network-4.csv
Last active September 27, 2019 17:03
Duel Network hyperparameters
Hyperparameter value
Number of Episodes 2000
Number of Timesteps 1000
Print Checkpoint step every 4
Training Batch Size 64
Discount Rate / Gamma 0.99
Learning Rate / alpha 5e-4
Number of Hidden Layers 2
Fully Connected Layer 1 Units 64
Fully Connected Layer 2 Units 64
@ravishchawla
ravishchawla / duel-network-3.py
Last active September 27, 2019 13:57
Dueling Q Network training
state, dqn_agent = env.reset(train_mode=True)[brain_name].vector_observations[0], Agent(state_size, action_size, 1024);
scores, discount = [], EPS;
for ite in range(1, num_iterations+1):
score, env_info = 0, env.reset(train_mode=True)[brain_name];
state = env_info.vector_observations[0];
for t_step in range(max_timesteps):
action = dqn_agent.act(state, discount);
pd.DataFrame(list(zip(transaction_data_only.columns[2:], tuned_rf_model.feature_importances_)), \
columns=['Attribute', 'Feature Importance']).sort_values(by='Feature Importance', ascending=False)
params = {'n_estimators' : [10, 50, 100], 'max_depth' : [5, 10, 30, 80], \
'max_features': [1, 3, 8, 15], 'min_samples_split': [3, 5, 10, 30, 50, 100]}
g_rfm = RandomForestRegressor(random_state=1024);
g_src = GridSearchCV(g_rfm, params, verbose=10, cv=5, scoring='r2');
g_src.fit(X_train, y_train)
print(g_src.best_params_)
tuned_rf_model = RandomForestRegressor(max_depth=30, max_features=3, min_samples_split=100, n_estimators=100);
transcript_portfolio = pd.merge(transcript, portfolio, left_on='offer_id', right_on='id', how='left')
transcript_by_group = transcript_portfolio.groupby(['person', 'offer_id'])
completion_details = [];
'''
Go through each group in the transaction grouping. Because iterating can be slow,
we will use vectorized operations inside the main loop.
'''
for i, g in transcript_by_group:
'''Cleaning the *Transcript* dataset'''
transcript_event = transcript['event'].str.get_dummies();
transcript_event.columns = ['event_' + '_'.join(col.split(' ')) for col in transcript_event.columns];
# standardize "offer id" column names
def transcript_value_clean(x_dict):
if 'offer id' in x_dict:
x_dict['offer_id'] = x_dict['offer id'];
del x_dict['offer id'];
return x_dict;
'''Cleaning the *Profile* dataset'''
profile = profile.dropna(axis=0, subset=['gender', 'income']);
profile_gender = profile['gender'].str.get_dummies()
profile_gender.columns = ['gender_' + col for col in profile_gender.columns];
# Separate date attributes into year, month, and day, converting to integers.
profile_date = profile['became_member_on'];
profile_year = profile_date.apply(lambda d: str(d)).str[0:4].astype('int').rename('member_year');
profile_month = profile_date.apply(lambda d: str(d)).str[4:6].astype('int').rename('member_month');
profile_day = profile_date.apply(lambda d: str(d)).str[6:8].astype('int').rename('member_day');