Skip to content

Instantly share code, notes, and snippets.

View MoGaber's full-sized avatar

Mohamed Gaber MoGaber

View GitHub Profile
full_data["time_taken"][full_data["time_taken"] < datetime.timedelta( days=0)]
# turn the email sent date into a datetime value instead of a string
date_vectorizer = lambda x: datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S UTC+0000")
full_data["emails_date_sent"] = full_data["emails_date_sent"].apply(date_vectorizer)
full_data = pd.merge(emails_questions, answers,how='left', on=["professional_id","question_id" ])
full_data = full_data.rename(columns = {"id": "email_id"})
# get only emails that were sent immediately after getting the question (ignore the weekly newsletter emails)
immediate_emails = emails[emails["emails_frequency_level"]=="email_notification_immediate"]
from keras.models import Sequential
from keras.layers import Dense
from sklearn import datasets
from tensorflow import keras
# define the keras model
model = Sequential()
model.add(Dense(12, input_dim=12, activation='relu')) # 12 neurons and expecting 8 columns x
model.add(Dense(8, activation='relu'))#8 neurons with activation function rectified
model.add(Dense(8, activation='relu'))#8 neurons with activation function rectified
model.add(Dense(1, activation='sigmoid')) #1 neuron with sigmoid activation
med = model_dataset["avg_time_taken"].median()
model_dataset["avg_time_taken"] = model_dataset["avg_time_taken"].fillna(med)
model_dataset["avg_time_taken"]= model_dataset["avg_time_taken"][model_dataset["avg_time_taken"].notnull()].apply(lambda x: x.total_seconds()/3600)
model_dataset["avg_time_taken"].describe()
model_dataset = model_dataset.drop(columns = ["email_id", "professional_id", "emails_date_sent", "emails_frequency_level",
"question_id", "answers_id", "answers_date_added", "answers_body", "time_taken"])
cols = model_dataset.columns
colours = ['#000099', '#ffff00'] # specify the colours - yellow is missing. blue is not missing.
sns.heatmap(model_dataset[cols].isnull(), cmap=sns.color_palette(colours))
#get the answers to email ratio (#answers/#emails sent) for each professional
prof_emls = immediate_emails["emails_recipient_id"].value_counts().reset_index().rename(columns = {"index":"professional_id", "emails_recipient_id":"num_emails"})
professionals_dataset = pd.merge(professionals_dataset,prof_emls,how='left', on=["professional_id" ])
professionals_dataset["answrs_emails_ratio"] = professionals_dataset["number_q_answered"] / professionals_dataset["num_emails"]
# get the number of groups that each professional is following
prof_grp = group_members["group_memberships_user_id"].value_counts().reset_index().rename(columns = {"index":"professional_id", "group_memberships_user_id":"num_groups"})
professionals_dataset = pd.merge(professionals_dataset,prof_grp,how='left', on=["professional_id" ])
# get the number of schools that each professional is following
prof_schl = schools_members["school_memberships_user_id"].value_counts().reset_index().rename(columns = {"index":"professional_id", "school_memberships_user_id":"num_schools"})
professionals_dataset = pd.merge(professionals_dataset,prof_schl,how='left', on=["professional_id" ])