Nicolas Escobar nescobar

## store_run_id_global.py
# Get run_id from temporary view
runId = spark.table("run_id").head()["run_id"]
runId = re.findall(r'\d+', runId)[0]
runId = int(runId)
data = [[runId]]

# Get notebook name
notebook_path = spark.table("notebook_path").head()["notebook_path"]
path_split = notebook_path.split("/")
nb_name = path_split[len(path_split)-1]

## run_upload_s3.py
# Maps dashboards to HTML files
dashboards = {
  'Network Graph Dashboard' : 'network_graph'
}

# Get JSON response from HTTP export request
response = export_notebook(run_id)

# For each dashboard, get content and upload to S3
for view in response.get("views"):

## scala_get_run_id.scala
%scala
val runId = dbutils.notebook.getContext.currentRunId.toString
Seq(runId).toDF("run_id").createOrReplaceTempView("run_id")

val notebookPath = dbutils.notebook.getContext().notebookPath.get
Seq(notebookPath).toDF("notebook_path").createOrReplaceTempView("notebook_path")

## run_notebook_run_id.py
# Run notebook
notebook_name = 'network_graph'
dbutils.notebook.run(notebook_name, 180)

# Get run_id from notebook
global_temp_db = spark.conf.get("spark.sql.globalTempDatabase")
run_id_table = 'run_id_{}'.format(notebook_name)
run_id = table(global_temp_db + "." + run_id_table).first()[0]

## export_notebook.py
# Databricks access credentials
DOMAIN = 'ACCOUNT.cloud.databricks.com'
TOKEN = dbutils.secrets.get(scope="databricks", key="token")
BASE_URL = 'https://%s/api/2.0/jobs/runs/export?run_id=' % (DOMAIN)

# Exports notebook with given run id as a JSON object
def export_notebook(run_id):
 views_to_export = '&views_to_export=DASHBOARDS'
 response = requests.get(
   BASE_URL + str(run_id) + views_to_export,

## upload_to_s3.py
# AWS
ACCESS_KEY = dbutils.secrets.get(scope="aws-s3", key="access_key")
SECRET_KEY = dbutils.secrets.get(scope="aws-s3", key="secret_key")
ENCODED_SECRET_KEY = dbutils.secrets.get(scope="aws-s3", key="encoded_secret_key")
AWS_BUCKET_NAME = "bucket-static-webpages"

def upload_to_s3(file_name, file_content):

  # Check if file_name is a key in dashboards dictionary
  if file_name not in dashboards:

## atp_winners_rankings.py
# List of Grand Slams
tourneys = ['Australian Open','Roland Garros','Wimbledon','US Open']

# Create dataframe with data of finals where winner_rank values are not null
tennis_df_1 = tennis_df[~np.isnan(tennis_df['winner_rank']) & (tennis_df['round']=='F')].copy()
plt.figure(figsize=(20,4))

# Create one plot for each Grand Slam
for i in range(1,5):
    plt.subplot(1,4,i)

## atp_countries_evolution.py
plt.figure(figsize=(20,4))

countries = {'ARG':'Argentina','ESP':'Spain','SUI':'Switzerland','USA':'United States','SRB':'Serbia'}
colors = ['blue','magenta','red','grey','black']

i=1
for k,v in countries.items():
    plt.subplot(1,5,i)
    s = tennis_df[(tennis_df['tourney_level'] == 'G') & (tennis_df['winner_ioc'].isin([k]))].groupby(['tourney_year','winner_ioc'], as_index=False).agg('count')
    plt.plot(s['tourney_year'], s['tourney_id'], color=colors[i-1], linestyle='dashed', marker='o', markerfacecolor='blue', markersize=2)

## atp_player_wins_history.py
# Function that plots the history of wins of a particular player
def plot_history_player(player):

    # Create dataframe with winner_name = player
    pldf_1 = tennis_df[(tennis_df['winner_name'] == player)].groupby(['tourney_year','tourney_level'], as_index=False).agg(['count'])
    pldf_2 = pldf_1['tourney_id'].reset_index()

    fig = plt.figure(figsize=(15,5))
    ax = fig.add_subplot(111)
    ax.set_prop_cycle(plt.cycler('color', plt.cm.jet(np.linspace(0, 1, 5))))

## atp_unique_gs_winners.py
# Unique number of players that won GS and Masters per period
s = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1975','1985'))].agg({'winner_name':'nunique'})
t = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1986','1996'))].agg({'winner_name':'nunique'})
u = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1997','2007'))].agg({'winner_name':'nunique'})
v = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('2008','2018'))].agg({'winner_name':'nunique'})

s['1975-1985'] = s['winner_name']
s=s.drop('winner_name')
t['1986-1996'] = t['winner_name']
t=t.drop('winner_name')
	# Get run_id from temporary view
	runId = spark.table("run_id").head()["run_id"]
	runId = re.findall(r'\d+', runId)[0]
	runId = int(runId)
	data = [[runId]]

	# Get notebook name
	notebook_path = spark.table("notebook_path").head()["notebook_path"]
	path_split = notebook_path.split("/")
	nb_name = path_split[len(path_split)-1]
	# Maps dashboards to HTML files
	dashboards = {
	'Network Graph Dashboard' : 'network_graph'
	}

	# Get JSON response from HTTP export request
	response = export_notebook(run_id)

	# For each dashboard, get content and upload to S3
	for view in response.get("views"):
	%scala
	val runId = dbutils.notebook.getContext.currentRunId.toString
	Seq(runId).toDF("run_id").createOrReplaceTempView("run_id")

	val notebookPath = dbutils.notebook.getContext().notebookPath.get
	Seq(notebookPath).toDF("notebook_path").createOrReplaceTempView("notebook_path")
	# Run notebook
	notebook_name = 'network_graph'
	dbutils.notebook.run(notebook_name, 180)

	# Get run_id from notebook
	global_temp_db = spark.conf.get("spark.sql.globalTempDatabase")
	run_id_table = 'run_id_{}'.format(notebook_name)
	run_id = table(global_temp_db + "." + run_id_table).first()[0]
	# Databricks access credentials
	DOMAIN = 'ACCOUNT.cloud.databricks.com'
	TOKEN = dbutils.secrets.get(scope="databricks", key="token")
	BASE_URL = 'https://%s/api/2.0/jobs/runs/export?run_id=' % (DOMAIN)

	# Exports notebook with given run id as a JSON object
	def export_notebook(run_id):
	views_to_export = '&views_to_export=DASHBOARDS'
	response = requests.get(
	BASE_URL + str(run_id) + views_to_export,
	# AWS
	ACCESS_KEY = dbutils.secrets.get(scope="aws-s3", key="access_key")
	SECRET_KEY = dbutils.secrets.get(scope="aws-s3", key="secret_key")
	ENCODED_SECRET_KEY = dbutils.secrets.get(scope="aws-s3", key="encoded_secret_key")
	AWS_BUCKET_NAME = "bucket-static-webpages"

	def upload_to_s3(file_name, file_content):

	# Check if file_name is a key in dashboards dictionary
	if file_name not in dashboards:
	# List of Grand Slams
	tourneys = ['Australian Open','Roland Garros','Wimbledon','US Open']

	# Create dataframe with data of finals where winner_rank values are not null
	tennis_df_1 = tennis_df[~np.isnan(tennis_df['winner_rank']) & (tennis_df['round']=='F')].copy()
	plt.figure(figsize=(20,4))

	# Create one plot for each Grand Slam
	for i in range(1,5):
	plt.subplot(1,4,i)
	plt.figure(figsize=(20,4))

	countries = {'ARG':'Argentina','ESP':'Spain','SUI':'Switzerland','USA':'United States','SRB':'Serbia'}
	colors = ['blue','magenta','red','grey','black']

	i=1
	for k,v in countries.items():
	plt.subplot(1,5,i)
	s = tennis_df[(tennis_df['tourney_level'] == 'G') & (tennis_df['winner_ioc'].isin([k]))].groupby(['tourney_year','winner_ioc'], as_index=False).agg('count')
	plt.plot(s['tourney_year'], s['tourney_id'], color=colors[i-1], linestyle='dashed', marker='o', markerfacecolor='blue', markersize=2)
	# Function that plots the history of wins of a particular player
	def plot_history_player(player):

	# Create dataframe with winner_name = player
	pldf_1 = tennis_df[(tennis_df['winner_name'] == player)].groupby(['tourney_year','tourney_level'], as_index=False).agg(['count'])
	pldf_2 = pldf_1['tourney_id'].reset_index()

	fig = plt.figure(figsize=(15,5))
	ax = fig.add_subplot(111)
	ax.set_prop_cycle(plt.cycler('color', plt.cm.jet(np.linspace(0, 1, 5))))
	# Unique number of players that won GS and Masters per period
	s = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1975','1985'))].agg({'winner_name':'nunique'})
	t = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1986','1996'))].agg({'winner_name':'nunique'})
	u = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('1997','2007'))].agg({'winner_name':'nunique'})
	v = tennis_df[(tennis_df['round']=='F')&(tennis_df['tourney_level'].isin(['G']))&(tennis_df['tourney_year'].between('2008','2018'))].agg({'winner_name':'nunique'})

	s['1975-1985'] = s['winner_name']
	s=s.drop('winner_name')
	t['1986-1996'] = t['winner_name']
	t=t.drop('winner_name')