Created
April 16, 2018 22:15
-
-
Save duncangh/5317f377c7112af9220028bf99143d0f to your computer and use it in GitHub Desktop.
cron-email
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib | |
matplotlib.use('Agg') | |
import prettyplotlib as ppl | |
import brewer2mpl | |
from matplotlib.colors import Normalize | |
from ggplot import * | |
from datetime import datetime, timedelta, time | |
from hdfs import InsecureClient | |
from email.MIMEMultipart import MIMEMultipart | |
from email.MIMEText import MIMEText | |
from email.MIMEImage import MIMEImage | |
ROOT_DIR = "/app/euclid/maxwell/" | |
FROM = 'xiangyus@uber.com' | |
TO = 'marketing-tech-eng-report-group@uber.com' | |
CUR_DIR = '/home/xiangyus/maxwell_report/' | |
TOP_CITY = {12: 'Los Angeles', 5: 'New York City', 7: 'Chicago', 14: 'Miami', 1: 'San Francisco'} | |
def get_recent_week(delta): | |
'''Genearte recent week in string format.''' | |
today = datetime.now() | |
return (today - timedelta(days=today.weekday()) - timedelta(weeks=delta)).strftime('%Y-%m-%d') | |
def get_rmse(client, week): | |
'''Get rmse for specified week.''' | |
file_name = ROOT_DIR + "metrics/mmm/{}.metrics".format(week) | |
with client.read(file_name) as reader: | |
rmse = reader.next() | |
corr = reader.next() | |
return rmse.split(":")[1], corr.split(":")[1] | |
def get_goodness_plot(client, week): | |
'''Get goodness plot for specified week.''' | |
remote_file = ROOT_DIR + "plots/mmm/{}.png".format(week) | |
local_file = CUR_DIR + "{}.png".format(week) | |
client.download(remote_file, local_file, True) | |
with open(local_file, 'rb') as fp: | |
msgImg = MIMEImage(fp.read(), 'png') | |
msgImg.add_header('Content-ID', '<goodness_plot>') | |
return msgImg | |
def get_recent_weeks_rmse(client, num): | |
'''Get recent {num} weeks rmse and plot a graph.''' | |
week = [] | |
rmse_arr = [] | |
corr_arr = [] | |
for i in xrange(1, num+1): | |
cur = get_recent_week(i) | |
rmse, corr = get_rmse(client, cur) | |
rmse_arr.append(float(rmse)) | |
corr_arr.append(float(corr)) | |
week.append(datetime.strptime(cur, '%Y-%m-%d')) | |
for i in xrange(len(week)): | |
print week[i], ":", rmse_arr[i] | |
df = pd.DataFrame({ | |
"RMSE": rmse_arr, | |
"CORR": corr_arr, | |
"PredictionWeek": week | |
}) | |
plot = ggplot(aes(x="PredictionWeek", y="RMSE"), data=df) + geom_line() | |
plot.save('test.png') | |
with open("test.png", 'rb') as fp: | |
msgImg = MIMEImage(fp.read(), 'png') | |
msgImg.add_header('Content-ID', '<rmse_plot>') | |
return msgImg | |
def get_feature_weight(client, week): | |
remote_file = ROOT_DIR + 'metrics/mmm/{}_feature_weights.csv'.format(week) | |
local_file = CUR_DIR + '{}_feature_weights.csv'.format(week) | |
client.download(remote_file, local_file, True) | |
return local_file | |
def get_stacked_feature_weights(client, num): | |
'''Get recent weeks's feature weights changes on large cities''' | |
import matplotlib.pyplot as plt | |
feature_weights = {} | |
plots = {} | |
for i in xrange(1, num+1): | |
cur = get_recent_week(i) | |
local_file = get_feature_weight(client, cur) | |
all_df = pd.read_csv(local_file) | |
filtered = all_df[all_df.city_id.isin(TOP_CITY)] | |
feature_weights[cur] = filtered | |
for city in TOP_CITY: | |
print "City:", TOP_CITY[city] | |
weeks = [] | |
weights = {} | |
for cur_week in feature_weights: | |
weeks.append(datetime.strptime(cur_week, '%Y-%m-%d')) | |
weight = feature_weights[cur_week] | |
selected_weights = weight[weight['city_id']==city] | |
for col in selected_weights.columns.values: | |
if col == 'city_id': | |
continue | |
if col not in weights: | |
weights[col] = [] | |
weights[col].append(float(selected_weights[col].iloc[0])) | |
# Finish extrac multiple weeks' data | |
f, ax = plt.subplots(1, figsize=(18, 6)) | |
pre_bar = None | |
for col in weights: | |
print col, weights[col] | |
if pre_bar is None: | |
ax.bar(weeks, weights[col], width=2.75, label=col) | |
pre_bar = weights[col] | |
else: | |
ax.bar(weeks, weights[col], width=2.75, label=col, bottom=pre_bar) | |
pre_bar = [i+j for i,j in zip(pre_bar, weights[col])] | |
ax.legend(bbox_to_anchor=(1, 1)) | |
f.savefig(str(city)+'.png') | |
with open(str(city)+'.png', 'rb') as fp: | |
msgImg = MIMEImage(fp.read(), 'png') | |
msgImg.add_header('Content-ID', '<{}>'.format(city)) | |
plots[city] = msgImg | |
weights_table = "<th>City ID</th><th>Plot</th>" | |
# Construct HTML | |
for city_id in TOP_CITY: | |
weights_table += '<tr><td>{}</td><td><img src="cid:{}" height="200" width="622"</td></tr>'.format(TOP_CITY[city_id], city_id) | |
return weights_table, plots | |
def get_cov_heatmap(client, week): | |
'''Get recent week's heatmap''' | |
remote_file = ROOT_DIR + 'model/mmm/{}_cov_matrix'.format(week) | |
local_file = CUR_DIR + '{}_cov_feature'.format(week) | |
client.download(remote_file, local_file, True) | |
data = pd.read_csv(local_file).as_matrix() | |
import matplotlib.pyplot as plt | |
red_purple = brewer2mpl.get_map('RdPu', 'Sequential', 9).mpl_colormap | |
fig, ax = plt.subplots() | |
plot = ppl.pcolormesh(fig, ax, data, cmap=plt.cm.Blues, norm=Normalize(vmin=data.min(), vmax=data.max())) | |
fig.savefig('test.png', dpi=500) | |
with open("test.png", 'rb') as fp: | |
msgImg = MIMEImage(fp.read(), 'png') | |
msgImg.add_header('Content-ID', '<heatmap_plot>') | |
return msgImg | |
def construct_email(html_string): | |
msgRoot = MIMEMultipart('related') | |
msgRoot['Subject'] = 'Maxwell Pipeline Daily Summary' | |
msgRoot['From'] = FROM | |
msgRoot['To'] = TO | |
msgText = MIMEText(html_string, 'html') | |
msgRoot.attach(msgText) | |
return msgRoot | |
if __name__ == "__main__": | |
client = InsecureClient("http://hadoopmaster06-sjc1:50070", "mars") | |
recent_week = get_recent_week(1) | |
with open(CUR_DIR + "report.html") as f: | |
html_string = f.read() | |
rmse, corr = get_rmse(client, recent_week) | |
goodness_plot = get_goodness_plot(client, recent_week) | |
rmse_plot = get_recent_weeks_rmse(client, 20) | |
weights_plots_html, plots = get_stacked_feature_weights(client, 5) | |
heatmap_plot = get_cov_heatmap(client, recent_week) | |
html_string = html_string.format( | |
week = recent_week, | |
rmse = rmse, | |
corr = corr, | |
feature_weights_plot = weights_plots_html | |
) | |
email = construct_email(html_string) | |
email.attach(goodness_plot) | |
email.attach(rmse_plot) | |
email.attach(heatmap_plot) | |
for city_id in plots: | |
email.attach(plots[city_id]) | |
import smtplib | |
smtp = smtplib.SMTP('smtp.gmail.com', 587) | |
smtp.ehlo() | |
smtp.starttls() | |
smtp.login('xiangyus@uber.com', 'hnxealvlrfaqzvjw') | |
smtp.sendmail( | |
FROM, | |
TO, | |
email.as_string() | |
) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html> | |
<head> | |
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.1/css/bootstrap.min.css"> | |
<style>body{{ margin:0 100; background:whitesmoke; }}</style> | |
</head> | |
<body> | |
<h1>Maxwell Pipeline Summary - {week}</h1> | |
<!-- *** Section 1 *** ---> | |
<h2>Recent Model Accuracy</h2> | |
<table class="table table-striped"> | |
<tr> | |
<td>RMSE</td> | |
<td>{rmse}</td> | |
</tr> | |
<tr> | |
<td>Corr</td> | |
<td>{corr}</td> | |
</tr> | |
<tr> | |
<td>Goodness Plot</td> | |
<td><img src="cid:goodness_plot" height="342" width="342"></td> | |
</tr> | |
</table> | |
<!-- *** Section 2 *** ---> | |
<h2>Overall Model Performance</h2> | |
<table class="table table-striped"> | |
<tr> | |
<td>RMSE Plot</td> | |
<td><img src="cid:rmse_plot" height="342" width="342"></td> | |
</tr> | |
</table> | |
<h2>Channel Weights Analysis(Top 5)</h2> | |
<table class="table table-striped"> | |
{feature_weights_plot} | |
</table> | |
<h2>Cov Matrix Heatmap</h2> | |
<table class="table table-striped"> | |
<tr> | |
<td>Heatmap Plot</td> | |
<td><img src="cid:heatmap_plot" height="542" width="622"></td> | |
</tr> | |
</table> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment