Skip to content

Instantly share code, notes, and snippets.

@duncangh
Created April 16, 2018 22:15
Show Gist options
  • Save duncangh/5317f377c7112af9220028bf99143d0f to your computer and use it in GitHub Desktop.
Save duncangh/5317f377c7112af9220028bf99143d0f to your computer and use it in GitHub Desktop.
cron-email
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import prettyplotlib as ppl
import brewer2mpl
from matplotlib.colors import Normalize
from ggplot import *
from datetime import datetime, timedelta, time
from hdfs import InsecureClient
from email.MIMEMultipart import MIMEMultipart
from email.MIMEText import MIMEText
from email.MIMEImage import MIMEImage
ROOT_DIR = "/app/euclid/maxwell/"
FROM = 'xiangyus@uber.com'
TO = 'marketing-tech-eng-report-group@uber.com'
CUR_DIR = '/home/xiangyus/maxwell_report/'
TOP_CITY = {12: 'Los Angeles', 5: 'New York City', 7: 'Chicago', 14: 'Miami', 1: 'San Francisco'}
def get_recent_week(delta):
'''Genearte recent week in string format.'''
today = datetime.now()
return (today - timedelta(days=today.weekday()) - timedelta(weeks=delta)).strftime('%Y-%m-%d')
def get_rmse(client, week):
'''Get rmse for specified week.'''
file_name = ROOT_DIR + "metrics/mmm/{}.metrics".format(week)
with client.read(file_name) as reader:
rmse = reader.next()
corr = reader.next()
return rmse.split(":")[1], corr.split(":")[1]
def get_goodness_plot(client, week):
'''Get goodness plot for specified week.'''
remote_file = ROOT_DIR + "plots/mmm/{}.png".format(week)
local_file = CUR_DIR + "{}.png".format(week)
client.download(remote_file, local_file, True)
with open(local_file, 'rb') as fp:
msgImg = MIMEImage(fp.read(), 'png')
msgImg.add_header('Content-ID', '<goodness_plot>')
return msgImg
def get_recent_weeks_rmse(client, num):
'''Get recent {num} weeks rmse and plot a graph.'''
week = []
rmse_arr = []
corr_arr = []
for i in xrange(1, num+1):
cur = get_recent_week(i)
rmse, corr = get_rmse(client, cur)
rmse_arr.append(float(rmse))
corr_arr.append(float(corr))
week.append(datetime.strptime(cur, '%Y-%m-%d'))
for i in xrange(len(week)):
print week[i], ":", rmse_arr[i]
df = pd.DataFrame({
"RMSE": rmse_arr,
"CORR": corr_arr,
"PredictionWeek": week
})
plot = ggplot(aes(x="PredictionWeek", y="RMSE"), data=df) + geom_line()
plot.save('test.png')
with open("test.png", 'rb') as fp:
msgImg = MIMEImage(fp.read(), 'png')
msgImg.add_header('Content-ID', '<rmse_plot>')
return msgImg
def get_feature_weight(client, week):
remote_file = ROOT_DIR + 'metrics/mmm/{}_feature_weights.csv'.format(week)
local_file = CUR_DIR + '{}_feature_weights.csv'.format(week)
client.download(remote_file, local_file, True)
return local_file
def get_stacked_feature_weights(client, num):
'''Get recent weeks's feature weights changes on large cities'''
import matplotlib.pyplot as plt
feature_weights = {}
plots = {}
for i in xrange(1, num+1):
cur = get_recent_week(i)
local_file = get_feature_weight(client, cur)
all_df = pd.read_csv(local_file)
filtered = all_df[all_df.city_id.isin(TOP_CITY)]
feature_weights[cur] = filtered
for city in TOP_CITY:
print "City:", TOP_CITY[city]
weeks = []
weights = {}
for cur_week in feature_weights:
weeks.append(datetime.strptime(cur_week, '%Y-%m-%d'))
weight = feature_weights[cur_week]
selected_weights = weight[weight['city_id']==city]
for col in selected_weights.columns.values:
if col == 'city_id':
continue
if col not in weights:
weights[col] = []
weights[col].append(float(selected_weights[col].iloc[0]))
# Finish extrac multiple weeks' data
f, ax = plt.subplots(1, figsize=(18, 6))
pre_bar = None
for col in weights:
print col, weights[col]
if pre_bar is None:
ax.bar(weeks, weights[col], width=2.75, label=col)
pre_bar = weights[col]
else:
ax.bar(weeks, weights[col], width=2.75, label=col, bottom=pre_bar)
pre_bar = [i+j for i,j in zip(pre_bar, weights[col])]
ax.legend(bbox_to_anchor=(1, 1))
f.savefig(str(city)+'.png')
with open(str(city)+'.png', 'rb') as fp:
msgImg = MIMEImage(fp.read(), 'png')
msgImg.add_header('Content-ID', '<{}>'.format(city))
plots[city] = msgImg
weights_table = "<th>City ID</th><th>Plot</th>"
# Construct HTML
for city_id in TOP_CITY:
weights_table += '<tr><td>{}</td><td><img src="cid:{}" height="200" width="622"</td></tr>'.format(TOP_CITY[city_id], city_id)
return weights_table, plots
def get_cov_heatmap(client, week):
'''Get recent week's heatmap'''
remote_file = ROOT_DIR + 'model/mmm/{}_cov_matrix'.format(week)
local_file = CUR_DIR + '{}_cov_feature'.format(week)
client.download(remote_file, local_file, True)
data = pd.read_csv(local_file).as_matrix()
import matplotlib.pyplot as plt
red_purple = brewer2mpl.get_map('RdPu', 'Sequential', 9).mpl_colormap
fig, ax = plt.subplots()
plot = ppl.pcolormesh(fig, ax, data, cmap=plt.cm.Blues, norm=Normalize(vmin=data.min(), vmax=data.max()))
fig.savefig('test.png', dpi=500)
with open("test.png", 'rb') as fp:
msgImg = MIMEImage(fp.read(), 'png')
msgImg.add_header('Content-ID', '<heatmap_plot>')
return msgImg
def construct_email(html_string):
msgRoot = MIMEMultipart('related')
msgRoot['Subject'] = 'Maxwell Pipeline Daily Summary'
msgRoot['From'] = FROM
msgRoot['To'] = TO
msgText = MIMEText(html_string, 'html')
msgRoot.attach(msgText)
return msgRoot
if __name__ == "__main__":
client = InsecureClient("http://hadoopmaster06-sjc1:50070", "mars")
recent_week = get_recent_week(1)
with open(CUR_DIR + "report.html") as f:
html_string = f.read()
rmse, corr = get_rmse(client, recent_week)
goodness_plot = get_goodness_plot(client, recent_week)
rmse_plot = get_recent_weeks_rmse(client, 20)
weights_plots_html, plots = get_stacked_feature_weights(client, 5)
heatmap_plot = get_cov_heatmap(client, recent_week)
html_string = html_string.format(
week = recent_week,
rmse = rmse,
corr = corr,
feature_weights_plot = weights_plots_html
)
email = construct_email(html_string)
email.attach(goodness_plot)
email.attach(rmse_plot)
email.attach(heatmap_plot)
for city_id in plots:
email.attach(plots[city_id])
import smtplib
smtp = smtplib.SMTP('smtp.gmail.com', 587)
smtp.ehlo()
smtp.starttls()
smtp.login('xiangyus@uber.com', 'hnxealvlrfaqzvjw')
smtp.sendmail(
FROM,
TO,
email.as_string()
)
<html>
<head>
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.1/css/bootstrap.min.css">
<style>body{{ margin:0 100; background:whitesmoke; }}</style>
</head>
<body>
<h1>Maxwell Pipeline Summary - {week}</h1>
<!-- *** Section 1 *** --->
<h2>Recent Model Accuracy</h2>
<table class="table table-striped">
<tr>
<td>RMSE</td>
<td>{rmse}</td>
</tr>
<tr>
<td>Corr</td>
<td>{corr}</td>
</tr>
<tr>
<td>Goodness Plot</td>
<td><img src="cid:goodness_plot" height="342" width="342"></td>
</tr>
</table>
<!-- *** Section 2 *** --->
<h2>Overall Model Performance</h2>
<table class="table table-striped">
<tr>
<td>RMSE Plot</td>
<td><img src="cid:rmse_plot" height="342" width="342"></td>
</tr>
</table>
<h2>Channel Weights Analysis(Top 5)</h2>
<table class="table table-striped">
{feature_weights_plot}
</table>
<h2>Cov Matrix Heatmap</h2>
<table class="table table-striped">
<tr>
<td>Heatmap Plot</td>
<td><img src="cid:heatmap_plot" height="542" width="622"></td>
</tr>
</table>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment