Created
April 12, 2022 16:34
-
-
Save daanalytics/56cc78a2e6b1b844529939504869b102 to your computer and use it in GitHub Desktop.
F1ErgastDeveloperAPI.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/daanalytics/56cc78a2e6b1b844529939504869b102/f1ergastdeveloperapi.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "8388558f", | |
"metadata": { | |
"id": "8388558f" | |
}, | |
"source": [ | |
"**Import the necessary libraries, like Pandas and the Snowflake connector.**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "82408a7d", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2022-04-12T16:01:26.926890Z", | |
"iopub.status.busy": "2022-04-12T16:01:26.926657Z", | |
"iopub.status.idle": "2022-04-12T16:01:26.930304Z", | |
"shell.execute_reply": "2022-04-12T16:01:26.929638Z", | |
"shell.execute_reply.started": "2022-04-12T16:01:26.926865Z" | |
}, | |
"tags": [], | |
"id": "82408a7d" | |
}, | |
"outputs": [], | |
"source": [ | |
"#!pip install \"snowflake-connector-python[pandas]\"\n", | |
"\n", | |
"import pandas as pd\n", | |
"from pandas.io.json import json_normalize\n", | |
"import json\n", | |
"import requests\n", | |
"import snowflake.connector\n", | |
"from snowflake.connector.pandas_tools import write_pandas, pd_writer\n", | |
"from datetime import datetime" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "1820019d", | |
"metadata": { | |
"id": "1820019d" | |
}, | |
"source": [ | |
"**Call the Ergast API Url to retrieve the Circuits Data.**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "fa1602bc", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2022-04-12T16:01:29.497787Z", | |
"iopub.status.busy": "2022-04-12T16:01:29.497560Z", | |
"iopub.status.idle": "2022-04-12T16:01:29.661705Z", | |
"shell.execute_reply": "2022-04-12T16:01:29.661113Z", | |
"shell.execute_reply.started": "2022-04-12T16:01:29.497764Z" | |
}, | |
"tags": [], | |
"id": "fa1602bc", | |
"outputId": "691d7e9f-b5bc-436a-d589-3881c76a221f" | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>circuitId</th>\n", | |
" <th>url</th>\n", | |
" <th>circuitName</th>\n", | |
" <th>lat</th>\n", | |
" <th>long</th>\n", | |
" <th>locality</th>\n", | |
" <th>country</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>adelaide</td>\n", | |
" <td>http://en.wikipedia.org/wiki/Adelaide_Street_C...</td>\n", | |
" <td>Adelaide Street Circuit</td>\n", | |
" <td>-34.9272</td>\n", | |
" <td>138.617</td>\n", | |
" <td>Adelaide</td>\n", | |
" <td>Australia</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>ain-diab</td>\n", | |
" <td>http://en.wikipedia.org/wiki/Ain-Diab_Circuit</td>\n", | |
" <td>Ain Diab</td>\n", | |
" <td>33.5786</td>\n", | |
" <td>-7.6875</td>\n", | |
" <td>Casablanca</td>\n", | |
" <td>Morocco</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>aintree</td>\n", | |
" <td>http://en.wikipedia.org/wiki/Aintree_Motor_Rac...</td>\n", | |
" <td>Aintree</td>\n", | |
" <td>53.4769</td>\n", | |
" <td>-2.94056</td>\n", | |
" <td>Liverpool</td>\n", | |
" <td>UK</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>albert_park</td>\n", | |
" <td>http://en.wikipedia.org/wiki/Melbourne_Grand_P...</td>\n", | |
" <td>Albert Park Grand Prix Circuit</td>\n", | |
" <td>-37.8497</td>\n", | |
" <td>144.968</td>\n", | |
" <td>Melbourne</td>\n", | |
" <td>Australia</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>americas</td>\n", | |
" <td>http://en.wikipedia.org/wiki/Circuit_of_the_Am...</td>\n", | |
" <td>Circuit of the Americas</td>\n", | |
" <td>30.1328</td>\n", | |
" <td>-97.6411</td>\n", | |
" <td>Austin</td>\n", | |
" <td>USA</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" circuitId url \\\n", | |
"0 adelaide http://en.wikipedia.org/wiki/Adelaide_Street_C... \n", | |
"1 ain-diab http://en.wikipedia.org/wiki/Ain-Diab_Circuit \n", | |
"2 aintree http://en.wikipedia.org/wiki/Aintree_Motor_Rac... \n", | |
"3 albert_park http://en.wikipedia.org/wiki/Melbourne_Grand_P... \n", | |
"4 americas http://en.wikipedia.org/wiki/Circuit_of_the_Am... \n", | |
"\n", | |
" circuitName lat long locality country \n", | |
"0 Adelaide Street Circuit -34.9272 138.617 Adelaide Australia \n", | |
"1 Ain Diab 33.5786 -7.6875 Casablanca Morocco \n", | |
"2 Aintree 53.4769 -2.94056 Liverpool UK \n", | |
"3 Albert Park Grand Prix Circuit -37.8497 144.968 Melbourne Australia \n", | |
"4 Circuit of the Americas 30.1328 -97.6411 Austin USA " | |
] | |
}, | |
"execution_count": 58, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"limit = 100\n", | |
"offset = 0\n", | |
" \n", | |
"# Ergast API Url\n", | |
"api_url = f'https://ergast.com/api/f1/circuits.json'+ \"?limit=\" + str(limit) + \"&offset=\" + str(offset)\n", | |
" \n", | |
"api_response = requests.get(api_url) \n", | |
" \n", | |
"# Parsing JSON data\n", | |
"api_data = json.loads(api_response.text)\n", | |
" \n", | |
"total_circuit_records = int(api_data[\"MRData\"][\"total\"])\n", | |
" \n", | |
"df_circuits = pd.DataFrame() # Start with empty DataFrame\n", | |
" \n", | |
"# Loop through the API website until all records are retrieved \n", | |
"while (offset < total_circuit_records):\n", | |
" \n", | |
" offset = offset + 100\n", | |
" df_circuits_subset = pd.DataFrame.from_dict(pd.json_normalize(api_data[\"MRData\"][\"CircuitTable\"][\"Circuits\"]), orient='columns')\n", | |
" df_circuits_frames = [df_circuits, df_circuits_subset]\n", | |
" df_circuits = pd.concat(df_circuits_frames)\n", | |
" \n", | |
" df_circuits.rename(columns = {'Location.lat':'lat', 'Location.long':'long', 'Location.locality':'locality', 'Location.country':'country'}, inplace = True)\n", | |
" \n", | |
"df_circuits.head() " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "37336cd1", | |
"metadata": { | |
"id": "37336cd1" | |
}, | |
"source": [ | |
"**Connect to Snowflake, create a table and load the Circuits-Data into this table**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "88d8cfb6", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2022-04-12T16:01:33.428952Z", | |
"iopub.status.busy": "2022-04-12T16:01:33.428711Z", | |
"iopub.status.idle": "2022-04-12T16:01:34.423925Z", | |
"shell.execute_reply": "2022-04-12T16:01:34.423365Z", | |
"shell.execute_reply.started": "2022-04-12T16:01:33.428927Z" | |
}, | |
"tags": [], | |
"id": "88d8cfb6", | |
"outputId": "f8d38e39-6837-4a93-edf8-f88761f148f5" | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<snowflake.connector.cursor.SnowflakeCursor at 0x7fd4d5db0ee0>" | |
] | |
}, | |
"execution_count": 59, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Setting up connection to Snowflake \n", | |
"cred_location = '/home/ftp/daanalytics/creds/'\n", | |
"cred_file = 'F1cred.json' \n", | |
"\n", | |
"connect = json.loads(open(str(cred_location+cred_file)).read())\n", | |
" \n", | |
"# Snowflake credentials\n", | |
"username = connect['secrets']['username']\n", | |
"password = connect['secrets']['password']\n", | |
"account = connect['secrets']['account']\n", | |
"role = connect['secrets']['role']\n", | |
"\n", | |
"# Connect to Snowflake\n", | |
"conn = snowflake.connector.connect(\n", | |
" user = username,\n", | |
" password = password,\n", | |
" account = account,\n", | |
" role = role\n", | |
" )\n", | |
"\n", | |
"sfc = conn.cursor()\n", | |
" \n", | |
"# Connect to DEMO_DB database\n", | |
"demo_db = connect['secrets']['database'].upper()\n", | |
"sfc.execute(\"USE DATABASE \" + demo_db)\n", | |
"\n", | |
"# Connect to PRE_F1PY schema\n", | |
"f1_pre_schema = 'PRE_F1APIPY'\n", | |
"\n", | |
"sfc.execute(\"CREATE SCHEMA IF NOT EXISTS \" + f1_pre_schema)\n", | |
"sfc.execute(\"USE SCHEMA \" + f1_pre_schema)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "a95051fc", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2022-04-12T16:01:34.435772Z", | |
"iopub.status.busy": "2022-04-12T16:01:34.435571Z", | |
"iopub.status.idle": "2022-04-12T16:01:34.700907Z", | |
"shell.execute_reply": "2022-04-12T16:01:34.700350Z", | |
"shell.execute_reply.started": "2022-04-12T16:01:34.435750Z" | |
}, | |
"tags": [], | |
"id": "a95051fc", | |
"outputId": "92c44657-1238-40b4-854f-e2f4208c576b" | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<snowflake.connector.cursor.SnowflakeCursor at 0x7fd4d5db0ee0>" | |
] | |
}, | |
"execution_count": 60, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"table_name = 'Circuits'\n", | |
"f1_pre_table = 'PRE_F1APIPY_' + table_name.upper()\n", | |
" \n", | |
"# Create a CREATE TABLE SQL-statement\n", | |
"create_tbl_sql = \"CREATE TABLE IF NOT EXISTS \" + demo_db + \".\" + f1_pre_schema + \".\" + f1_pre_table + \" (\\n\"\n", | |
" \n", | |
"# Add Insert Date to DataFrame\n", | |
"df_circuits.insert(0, 'pre_insert_date', pd.Timestamp(datetime.now(), unit=\"ms\", tz=\"UTC\").round(\"ms\")) # Current Timestamp\n", | |
"\n", | |
" # Iterating trough the columns\n", | |
"df_circuits.columns = map(lambda x: str(x).upper(), df_circuits.columns)\n", | |
"df_circuits.columns\n", | |
" \n", | |
"for col in df_circuits.columns:\n", | |
" column_name = col.upper()\n", | |
" \n", | |
" if (df_circuits[col].dtype.name == \"int\" or df_circuits[col].dtype.name == \"int64\"):\n", | |
" create_tbl_sql = create_tbl_sql + column_name + \" int\"\n", | |
" elif df_circuits[col].dtype.name == \"object\":\n", | |
" create_tbl_sql = create_tbl_sql + column_name + \" varchar(16777216)\"\n", | |
" elif df_circuits[col].dtype.name == \"datetime64[ns]\":\n", | |
" create_tbl_sql = create_tbl_sql + column_name + \" datetime\"\n", | |
" elif df_circuits[col].dtype.name == \"float64\":\n", | |
" create_tbl_sql = create_tbl_sql + column_name + \" float8\"\n", | |
" elif df_circuits[col].dtype.name == \"bool\":\n", | |
" create_tbl_sql = create_tbl_sql + column_name + \" boolean\"\n", | |
" else:\n", | |
" create_tbl_sql = create_tbl_sql + column_name + \" varchar(16777216)\"\n", | |
" \n", | |
" # Deciding next steps. Either column is not the last column (add comma) else end create_tbl_statement\n", | |
" if df_circuits[col].name != df_circuits.columns[-1]:\n", | |
" create_tbl_sql = create_tbl_sql + \",\\n\"\n", | |
" else:\n", | |
" create_tbl_sql = create_tbl_sql + \")\"\n", | |
" \n", | |
"#Execute the SQL statement to create the table\n", | |
"sfc.execute(create_tbl_sql) \n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "b81522b3-d5f1-44b3-ba47-b53ab2c31826", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2022-04-12T16:01:37.006073Z", | |
"iopub.status.busy": "2022-04-12T16:01:37.005836Z", | |
"iopub.status.idle": "2022-04-12T16:01:39.575455Z", | |
"shell.execute_reply": "2022-04-12T16:01:39.574804Z", | |
"shell.execute_reply.started": "2022-04-12T16:01:37.006047Z" | |
}, | |
"tags": [], | |
"id": "b81522b3-d5f1-44b3-ba47-b53ab2c31826", | |
"outputId": "23ca2576-df62-46b0-835e-40991ec3b121" | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(True,\n", | |
" 1,\n", | |
" 79,\n", | |
" [('xjcuf/file0.txt', 'LOADED', 79, 79, 1, 0, None, None, None, None)])" | |
] | |
}, | |
"execution_count": 61, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Re-load table\n", | |
"sfc.execute('TRUNCATE TABLE IF EXISTS ' + f1_pre_table) \n", | |
"\n", | |
"# Write the data from the DataFrame to the f1_pre_table.\n", | |
"write_pandas(\n", | |
" conn = conn,\n", | |
" df = df_circuits,\n", | |
" table_name = f1_pre_table,\n", | |
" database = demo_db,\n", | |
" schema = f1_pre_schema\n", | |
" ) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "06c55f49-312e-4acc-935b-5dcd8f6f48d8", | |
"metadata": { | |
"id": "06c55f49-312e-4acc-935b-5dcd8f6f48d8" | |
}, | |
"outputs": [], | |
"source": [ | |
"" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.6" | |
}, | |
"colab": { | |
"name": "F1ErgastDeveloperAPI.ipynb", | |
"provenance": [], | |
"include_colab_link": true | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment