Ravish Chawla ravishchawla

## rl_algo_multi.py
def multi_ddpg(n_episodes=5000, max_t=2000):
    init_time = time.time();
    scores_deque = deque(maxlen=100);
    scores = []
    max_score = -np.Inf;

    for i_episode in range(1, n_episodes+1):
        ep_init_time = time.time();
        env_info = env.reset(train_mode=True)[brain_name];
        states = env_info.vector_observations;

## rl_algo_agent.py
class Agent():
    """Interacts with and learns from the environment."""

    def __init__(self, state_size, action_size, replay_memory, batch_size, random_seed):
        """Initialize an Agent object.
        - Instantiate the Agents and Critics, Replay Memory, and a Noise process
        """


    def step(self, state, action, reward, next_state, done):

## ddpg_hyperparameters.csv

          
            Hyperparameter
             value

            
              Replay Buffer Size
               1e5

            
              Minibatch Size
               128

            
              Discount Rate
               0.99

            
              TAU
               1e-3

            
              Actor Learning Rate
               1e-4

            
              Critic Learning Rate
               1e-4

            
              L2 Weight Decay
               0

## duel-network-4.csv

          
            Hyperparameter
            value

            
              Number of Episodes
               2000

            
              Number of Timesteps
               1000

            
              Print Checkpoint step every
               4

            
              Training Batch Size
               64

            
              Discount Rate / Gamma
               0.99

            
              Learning Rate / alpha
               5e-4

            
              Number of Hidden Layers
               2

            
              Fully Connected Layer 1 Units
               64

            
              Fully Connected Layer 2 Units
               64

## duel-network-3.py
state, dqn_agent = env.reset(train_mode=True)[brain_name].vector_observations[0], Agent(state_size, action_size, 1024);

scores, discount = [], EPS;

for ite in range(1, num_iterations+1):
    score, env_info = 0, env.reset(train_mode=True)[brain_name];
    state = env_info.vector_observations[0];

    for t_step in range(max_timesteps):
        action = dqn_agent.act(state, discount);

## starbucks_post_8.py
pd.DataFrame(list(zip(transaction_data_only.columns[2:], tuned_rf_model.feature_importances_)), \
             columns=['Attribute', 'Feature Importance']).sort_values(by='Feature Importance', ascending=False)

## starbucks_post_7.py
params = {'n_estimators' : [10, 50, 100], 'max_depth' : [5, 10, 30, 80], \
          'max_features': [1, 3, 8, 15], 'min_samples_split': [3, 5, 10, 30, 50, 100]}

g_rfm = RandomForestRegressor(random_state=1024);
g_src = GridSearchCV(g_rfm, params, verbose=10, cv=5, scoring='r2');
g_src.fit(X_train, y_train)

print(g_src.best_params_)

tuned_rf_model = RandomForestRegressor(max_depth=30, max_features=3, min_samples_split=100, n_estimators=100);

## starbucks_post_6.py
transcript_portfolio = pd.merge(transcript, portfolio, left_on='offer_id', right_on='id', how='left')
transcript_by_group = transcript_portfolio.groupby(['person', 'offer_id'])

completion_details = [];

'''
Go through each group in the transaction grouping. Because iterating can be slow,
we will use vectorized operations inside the main loop.
'''
for i, g in transcript_by_group:

## starbucks_post_5.py
'''Cleaning the *Transcript* dataset'''
transcript_event = transcript['event'].str.get_dummies();
transcript_event.columns = ['event_' + '_'.join(col.split(' ')) for col in transcript_event.columns];

# standardize "offer id" column names
def transcript_value_clean(x_dict):
    if 'offer id' in x_dict:
        x_dict['offer_id'] = x_dict['offer id'];
        del x_dict['offer id'];
    return x_dict;

## starbucks_post_4.py
'''Cleaning the *Profile* dataset'''
profile = profile.dropna(axis=0, subset=['gender', 'income']);
profile_gender = profile['gender'].str.get_dummies()
profile_gender.columns = ['gender_' + col for col in profile_gender.columns];
# Separate date attributes into year, month, and day, converting to integers.
profile_date = profile['became_member_on'];

profile_year = profile_date.apply(lambda d: str(d)).str[0:4].astype('int').rename('member_year');
profile_month = profile_date.apply(lambda d: str(d)).str[4:6].astype('int').rename('member_month');
profile_day = profile_date.apply(lambda d: str(d)).str[6:8].astype('int').rename('member_day');
	def multi_ddpg(n_episodes=5000, max_t=2000):
	init_time = time.time();
	scores_deque = deque(maxlen=100);
	scores = []
	max_score = -np.Inf;

	for i_episode in range(1, n_episodes+1):
	ep_init_time = time.time();
	env_info = env.reset(train_mode=True)[brain_name];
	states = env_info.vector_observations;
	class Agent():
	"""Interacts with and learns from the environment."""

	def __init__(self, state_size, action_size, replay_memory, batch_size, random_seed):
	"""Initialize an Agent object.
	- Instantiate the Agents and Critics, Replay Memory, and a Noise process
	"""


	def step(self, state, action, reward, next_state, done):
	Hyperparameter	value
	Replay Buffer Size	1e5
	Minibatch Size	128
	Discount Rate	0.99
	TAU	1e-3
	Actor Learning Rate	1e-4
	Critic Learning Rate	1e-4
	L2 Weight Decay	0
	Hyperparameter	value
	Number of Episodes	2000
	Number of Timesteps	1000
	Print Checkpoint step every	4
	Training Batch Size	64
	Discount Rate / Gamma	0.99
	Learning Rate / alpha	5e-4
	Number of Hidden Layers	2
	Fully Connected Layer 1 Units	64
	Fully Connected Layer 2 Units	64
	state, dqn_agent = env.reset(train_mode=True)[brain_name].vector_observations[0], Agent(state_size, action_size, 1024);

	scores, discount = [], EPS;

	for ite in range(1, num_iterations+1):
	score, env_info = 0, env.reset(train_mode=True)[brain_name];
	state = env_info.vector_observations[0];

	for t_step in range(max_timesteps):
	action = dqn_agent.act(state, discount);
	pd.DataFrame(list(zip(transaction_data_only.columns[2:], tuned_rf_model.feature_importances_)), \
	columns=['Attribute', 'Feature Importance']).sort_values(by='Feature Importance', ascending=False)
	params = {'n_estimators' : [10, 50, 100], 'max_depth' : [5, 10, 30, 80], \
	'max_features': [1, 3, 8, 15], 'min_samples_split': [3, 5, 10, 30, 50, 100]}

	g_rfm = RandomForestRegressor(random_state=1024);
	g_src = GridSearchCV(g_rfm, params, verbose=10, cv=5, scoring='r2');
	g_src.fit(X_train, y_train)

	print(g_src.best_params_)

	tuned_rf_model = RandomForestRegressor(max_depth=30, max_features=3, min_samples_split=100, n_estimators=100);
	transcript_portfolio = pd.merge(transcript, portfolio, left_on='offer_id', right_on='id', how='left')
	transcript_by_group = transcript_portfolio.groupby(['person', 'offer_id'])

	completion_details = [];

	'''
	Go through each group in the transaction grouping. Because iterating can be slow,
	we will use vectorized operations inside the main loop.
	'''
	for i, g in transcript_by_group:
	'''Cleaning the Transcript dataset'''
	transcript_event = transcript['event'].str.get_dummies();
	transcript_event.columns = ['event_' + '_'.join(col.split(' ')) for col in transcript_event.columns];

	# standardize "offer id" column names
	def transcript_value_clean(x_dict):
	if 'offer id' in x_dict:
	x_dict['offer_id'] = x_dict['offer id'];
	del x_dict['offer id'];
	return x_dict;
	'''Cleaning the Profile dataset'''
	profile = profile.dropna(axis=0, subset=['gender', 'income']);
	profile_gender = profile['gender'].str.get_dummies()
	profile_gender.columns = ['gender_' + col for col in profile_gender.columns];
	# Separate date attributes into year, month, and day, converting to integers.
	profile_date = profile['became_member_on'];

	profile_year = profile_date.apply(lambda d: str(d)).str[0:4].astype('int').rename('member_year');
	profile_month = profile_date.apply(lambda d: str(d)).str[4:6].astype('int').rename('member_month');
	profile_day = profile_date.apply(lambda d: str(d)).str[6:8].astype('int').rename('member_day');