Skip to content

Instantly share code, notes, and snippets.

View nescobar's full-sized avatar

Nicolas Escobar nescobar

  • Asuncion, Paraguay
View GitHub Profile
# Get run_id from temporary view
runId = spark.table("run_id").head()["run_id"]
runId = re.findall(r'\d+', runId)[0]
runId = int(runId)
data = [[runId]]
# Get notebook name
notebook_path = spark.table("notebook_path").head()["notebook_path"]
path_split = notebook_path.split("/")
nb_name = path_split[len(path_split)-1]
# Maps dashboards to HTML files
dashboards = {
'Network Graph Dashboard' : 'network_graph'
}
# Get JSON response from HTTP export request
response = export_notebook(run_id)
# For each dashboard, get content and upload to S3
for view in response.get("views"):
%scala
val runId = dbutils.notebook.getContext.currentRunId.toString
Seq(runId).toDF("run_id").createOrReplaceTempView("run_id")
val notebookPath = dbutils.notebook.getContext().notebookPath.get
Seq(notebookPath).toDF("notebook_path").createOrReplaceTempView("notebook_path")
# Run notebook
notebook_name = 'network_graph'
dbutils.notebook.run(notebook_name, 180)
# Get run_id from notebook
global_temp_db = spark.conf.get("spark.sql.globalTempDatabase")
run_id_table = 'run_id_{}'.format(notebook_name)
run_id = table(global_temp_db + "." + run_id_table).first()[0]
@nescobar
nescobar / export_notebook.py
Last active September 12, 2020 20:58
Exports Databricks notebook
# Databricks access credentials
DOMAIN = 'ACCOUNT.cloud.databricks.com'
TOKEN = dbutils.secrets.get(scope="databricks", key="token")
BASE_URL = 'https://%s/api/2.0/jobs/runs/export?run_id=' % (DOMAIN)
# Exports notebook with given run id as a JSON object
def export_notebook(run_id):
views_to_export = '&views_to_export=DASHBOARDS'
response = requests.get(
BASE_URL + str(run_id) + views_to_export,
@nescobar
nescobar / upload_to_s3.py
Last active September 16, 2020 19:17
Saves file in DBFS and then uploads it to S3
# AWS
ACCESS_KEY = dbutils.secrets.get(scope="aws-s3", key="access_key")
SECRET_KEY = dbutils.secrets.get(scope="aws-s3", key="secret_key")
ENCODED_SECRET_KEY = dbutils.secrets.get(scope="aws-s3", key="encoded_secret_key")
AWS_BUCKET_NAME = "bucket-static-webpages"
def upload_to_s3(file_name, file_content):
# Check if file_name is a key in dashboards dictionary
if file_name not in dashboards:
# List of Grand Slams
tourneys = ['Australian Open','Roland Garros','Wimbledon','US Open']
# Create dataframe with data of finals where winner_rank values are not null
tennis_df_1 = tennis_df[~np.isnan(tennis_df['winner_rank']) & (tennis_df['round']=='F')].copy()
plt.figure(figsize=(20,4))
# Create one plot for each Grand Slam
for i in range(1,5):
plt.subplot(1,4,i)
plt.figure(figsize=(20,4))
countries = {'ARG':'Argentina','ESP':'Spain','SUI':'Switzerland','USA':'United States','SRB':'Serbia'}
colors = ['blue','magenta','red','grey','black']
i=1
for k,v in countries.items():
plt.subplot(1,5,i)
s = tennis_df[(tennis_df['tourney_level'] == 'G') & (tennis_df['winner_ioc'].isin([k]))].groupby(['tourney_year','winner_ioc'], as_index=False).agg('count')
plt.plot(s['tourney_year'], s['tourney_id'], color=colors[i-1], linestyle='dashed', marker='o', markerfacecolor='blue', markersize=2)
@nescobar
nescobar / atp_player_wins_history.py
Created October 17, 2018 17:54
Function that plots the history of wins of a particular player
# Function that plots the history of wins of a particular player
def plot_history_player(player):
# Create dataframe with winner_name = player
pldf_1 = tennis_df[(tennis_df['winner_name'] == player)].groupby(['tourney_year','tourney_level'], as_index=False).agg(['count'])
pldf_2 = pldf_1['tourney_id'].reset_index()
fig = plt.figure(figsize=(15,5))
ax = fig.add_subplot(111)
ax.set_prop_cycle(plt.cycler('color', plt.cm.jet(np.linspace(0, 1, 5))))
# Unique number of players that won GS and Masters per period
s = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1975','1985'))].agg({'winner_name':'nunique'})
t = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1986','1996'))].agg({'winner_name':'nunique'})
u = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1997','2007'))].agg({'winner_name':'nunique'})
v = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('2008','2018'))].agg({'winner_name':'nunique'})
s['1975-1985'] = s['winner_name']
s=s.drop('winner_name')
t['1986-1996'] = t['winner_name']
t=t.drop('winner_name')