This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get run_id from temporary view | |
runId = spark.table("run_id").head()["run_id"] | |
runId = re.findall(r'\d+', runId)[0] | |
runId = int(runId) | |
data = [[runId]] | |
# Get notebook name | |
notebook_path = spark.table("notebook_path").head()["notebook_path"] | |
path_split = notebook_path.split("/") | |
nb_name = path_split[len(path_split)-1] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Maps dashboards to HTML files | |
dashboards = { | |
'Network Graph Dashboard' : 'network_graph' | |
} | |
# Get JSON response from HTTP export request | |
response = export_notebook(run_id) | |
# For each dashboard, get content and upload to S3 | |
for view in response.get("views"): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%scala | |
val runId = dbutils.notebook.getContext.currentRunId.toString | |
Seq(runId).toDF("run_id").createOrReplaceTempView("run_id") | |
val notebookPath = dbutils.notebook.getContext().notebookPath.get | |
Seq(notebookPath).toDF("notebook_path").createOrReplaceTempView("notebook_path") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Run notebook | |
notebook_name = 'network_graph' | |
dbutils.notebook.run(notebook_name, 180) | |
# Get run_id from notebook | |
global_temp_db = spark.conf.get("spark.sql.globalTempDatabase") | |
run_id_table = 'run_id_{}'.format(notebook_name) | |
run_id = table(global_temp_db + "." + run_id_table).first()[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Databricks access credentials | |
DOMAIN = 'ACCOUNT.cloud.databricks.com' | |
TOKEN = dbutils.secrets.get(scope="databricks", key="token") | |
BASE_URL = 'https://%s/api/2.0/jobs/runs/export?run_id=' % (DOMAIN) | |
# Exports notebook with given run id as a JSON object | |
def export_notebook(run_id): | |
views_to_export = '&views_to_export=DASHBOARDS' | |
response = requests.get( | |
BASE_URL + str(run_id) + views_to_export, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# AWS | |
ACCESS_KEY = dbutils.secrets.get(scope="aws-s3", key="access_key") | |
SECRET_KEY = dbutils.secrets.get(scope="aws-s3", key="secret_key") | |
ENCODED_SECRET_KEY = dbutils.secrets.get(scope="aws-s3", key="encoded_secret_key") | |
AWS_BUCKET_NAME = "bucket-static-webpages" | |
def upload_to_s3(file_name, file_content): | |
# Check if file_name is a key in dashboards dictionary | |
if file_name not in dashboards: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# List of Grand Slams | |
tourneys = ['Australian Open','Roland Garros','Wimbledon','US Open'] | |
# Create dataframe with data of finals where winner_rank values are not null | |
tennis_df_1 = tennis_df[~np.isnan(tennis_df['winner_rank']) & (tennis_df['round']=='F')].copy() | |
plt.figure(figsize=(20,4)) | |
# Create one plot for each Grand Slam | |
for i in range(1,5): | |
plt.subplot(1,4,i) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plt.figure(figsize=(20,4)) | |
countries = {'ARG':'Argentina','ESP':'Spain','SUI':'Switzerland','USA':'United States','SRB':'Serbia'} | |
colors = ['blue','magenta','red','grey','black'] | |
i=1 | |
for k,v in countries.items(): | |
plt.subplot(1,5,i) | |
s = tennis_df[(tennis_df['tourney_level'] == 'G') & (tennis_df['winner_ioc'].isin([k]))].groupby(['tourney_year','winner_ioc'], as_index=False).agg('count') | |
plt.plot(s['tourney_year'], s['tourney_id'], color=colors[i-1], linestyle='dashed', marker='o', markerfacecolor='blue', markersize=2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Function that plots the history of wins of a particular player | |
def plot_history_player(player): | |
# Create dataframe with winner_name = player | |
pldf_1 = tennis_df[(tennis_df['winner_name'] == player)].groupby(['tourney_year','tourney_level'], as_index=False).agg(['count']) | |
pldf_2 = pldf_1['tourney_id'].reset_index() | |
fig = plt.figure(figsize=(15,5)) | |
ax = fig.add_subplot(111) | |
ax.set_prop_cycle(plt.cycler('color', plt.cm.jet(np.linspace(0, 1, 5)))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Unique number of players that won GS and Masters per period | |
s = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1975','1985'))].agg({'winner_name':'nunique'}) | |
t = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1986','1996'))].agg({'winner_name':'nunique'}) | |
u = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1997','2007'))].agg({'winner_name':'nunique'}) | |
v = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('2008','2018'))].agg({'winner_name':'nunique'}) | |
s['1975-1985'] = s['winner_name'] | |
s=s.drop('winner_name') | |
t['1986-1996'] = t['winner_name'] | |
t=t.drop('winner_name') |
NewerOlder