Skip to content

Instantly share code, notes, and snippets.

@daanalytics
Created April 12, 2022 16:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save daanalytics/56cc78a2e6b1b844529939504869b102 to your computer and use it in GitHub Desktop.
Save daanalytics/56cc78a2e6b1b844529939504869b102 to your computer and use it in GitHub Desktop.
F1ErgastDeveloperAPI.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/daanalytics/56cc78a2e6b1b844529939504869b102/f1ergastdeveloperapi.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"id": "8388558f",
"metadata": {
"id": "8388558f"
},
"source": [
"**Import the necessary libraries, like Pandas and the Snowflake connector.**"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "82408a7d",
"metadata": {
"execution": {
"iopub.execute_input": "2022-04-12T16:01:26.926890Z",
"iopub.status.busy": "2022-04-12T16:01:26.926657Z",
"iopub.status.idle": "2022-04-12T16:01:26.930304Z",
"shell.execute_reply": "2022-04-12T16:01:26.929638Z",
"shell.execute_reply.started": "2022-04-12T16:01:26.926865Z"
},
"tags": [],
"id": "82408a7d"
},
"outputs": [],
"source": [
"#!pip install \"snowflake-connector-python[pandas]\"\n",
"\n",
"import pandas as pd\n",
"from pandas.io.json import json_normalize\n",
"import json\n",
"import requests\n",
"import snowflake.connector\n",
"from snowflake.connector.pandas_tools import write_pandas, pd_writer\n",
"from datetime import datetime"
]
},
{
"cell_type": "markdown",
"id": "1820019d",
"metadata": {
"id": "1820019d"
},
"source": [
"**Call the Ergast API Url to retrieve the Circuits Data.**"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa1602bc",
"metadata": {
"execution": {
"iopub.execute_input": "2022-04-12T16:01:29.497787Z",
"iopub.status.busy": "2022-04-12T16:01:29.497560Z",
"iopub.status.idle": "2022-04-12T16:01:29.661705Z",
"shell.execute_reply": "2022-04-12T16:01:29.661113Z",
"shell.execute_reply.started": "2022-04-12T16:01:29.497764Z"
},
"tags": [],
"id": "fa1602bc",
"outputId": "691d7e9f-b5bc-436a-d589-3881c76a221f"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>circuitId</th>\n",
" <th>url</th>\n",
" <th>circuitName</th>\n",
" <th>lat</th>\n",
" <th>long</th>\n",
" <th>locality</th>\n",
" <th>country</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>adelaide</td>\n",
" <td>http://en.wikipedia.org/wiki/Adelaide_Street_C...</td>\n",
" <td>Adelaide Street Circuit</td>\n",
" <td>-34.9272</td>\n",
" <td>138.617</td>\n",
" <td>Adelaide</td>\n",
" <td>Australia</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>ain-diab</td>\n",
" <td>http://en.wikipedia.org/wiki/Ain-Diab_Circuit</td>\n",
" <td>Ain Diab</td>\n",
" <td>33.5786</td>\n",
" <td>-7.6875</td>\n",
" <td>Casablanca</td>\n",
" <td>Morocco</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>aintree</td>\n",
" <td>http://en.wikipedia.org/wiki/Aintree_Motor_Rac...</td>\n",
" <td>Aintree</td>\n",
" <td>53.4769</td>\n",
" <td>-2.94056</td>\n",
" <td>Liverpool</td>\n",
" <td>UK</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>albert_park</td>\n",
" <td>http://en.wikipedia.org/wiki/Melbourne_Grand_P...</td>\n",
" <td>Albert Park Grand Prix Circuit</td>\n",
" <td>-37.8497</td>\n",
" <td>144.968</td>\n",
" <td>Melbourne</td>\n",
" <td>Australia</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>americas</td>\n",
" <td>http://en.wikipedia.org/wiki/Circuit_of_the_Am...</td>\n",
" <td>Circuit of the Americas</td>\n",
" <td>30.1328</td>\n",
" <td>-97.6411</td>\n",
" <td>Austin</td>\n",
" <td>USA</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" circuitId url \\\n",
"0 adelaide http://en.wikipedia.org/wiki/Adelaide_Street_C... \n",
"1 ain-diab http://en.wikipedia.org/wiki/Ain-Diab_Circuit \n",
"2 aintree http://en.wikipedia.org/wiki/Aintree_Motor_Rac... \n",
"3 albert_park http://en.wikipedia.org/wiki/Melbourne_Grand_P... \n",
"4 americas http://en.wikipedia.org/wiki/Circuit_of_the_Am... \n",
"\n",
" circuitName lat long locality country \n",
"0 Adelaide Street Circuit -34.9272 138.617 Adelaide Australia \n",
"1 Ain Diab 33.5786 -7.6875 Casablanca Morocco \n",
"2 Aintree 53.4769 -2.94056 Liverpool UK \n",
"3 Albert Park Grand Prix Circuit -37.8497 144.968 Melbourne Australia \n",
"4 Circuit of the Americas 30.1328 -97.6411 Austin USA "
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"limit = 100\n",
"offset = 0\n",
" \n",
"# Ergast API Url\n",
"api_url = f'https://ergast.com/api/f1/circuits.json'+ \"?limit=\" + str(limit) + \"&offset=\" + str(offset)\n",
" \n",
"api_response = requests.get(api_url) \n",
" \n",
"# Parsing JSON data\n",
"api_data = json.loads(api_response.text)\n",
" \n",
"total_circuit_records = int(api_data[\"MRData\"][\"total\"])\n",
" \n",
"df_circuits = pd.DataFrame() # Start with empty DataFrame\n",
" \n",
"# Loop through the API website until all records are retrieved \n",
"while (offset < total_circuit_records):\n",
" \n",
" offset = offset + 100\n",
" df_circuits_subset = pd.DataFrame.from_dict(pd.json_normalize(api_data[\"MRData\"][\"CircuitTable\"][\"Circuits\"]), orient='columns')\n",
" df_circuits_frames = [df_circuits, df_circuits_subset]\n",
" df_circuits = pd.concat(df_circuits_frames)\n",
" \n",
" df_circuits.rename(columns = {'Location.lat':'lat', 'Location.long':'long', 'Location.locality':'locality', 'Location.country':'country'}, inplace = True)\n",
" \n",
"df_circuits.head() "
]
},
{
"cell_type": "markdown",
"id": "37336cd1",
"metadata": {
"id": "37336cd1"
},
"source": [
"**Connect to Snowflake, create a table and load the Circuits-Data into this table**"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "88d8cfb6",
"metadata": {
"execution": {
"iopub.execute_input": "2022-04-12T16:01:33.428952Z",
"iopub.status.busy": "2022-04-12T16:01:33.428711Z",
"iopub.status.idle": "2022-04-12T16:01:34.423925Z",
"shell.execute_reply": "2022-04-12T16:01:34.423365Z",
"shell.execute_reply.started": "2022-04-12T16:01:33.428927Z"
},
"tags": [],
"id": "88d8cfb6",
"outputId": "f8d38e39-6837-4a93-edf8-f88761f148f5"
},
"outputs": [
{
"data": {
"text/plain": [
"<snowflake.connector.cursor.SnowflakeCursor at 0x7fd4d5db0ee0>"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Setting up connection to Snowflake \n",
"cred_location = '/home/ftp/daanalytics/creds/'\n",
"cred_file = 'F1cred.json' \n",
"\n",
"connect = json.loads(open(str(cred_location+cred_file)).read())\n",
" \n",
"# Snowflake credentials\n",
"username = connect['secrets']['username']\n",
"password = connect['secrets']['password']\n",
"account = connect['secrets']['account']\n",
"role = connect['secrets']['role']\n",
"\n",
"# Connect to Snowflake\n",
"conn = snowflake.connector.connect(\n",
" user = username,\n",
" password = password,\n",
" account = account,\n",
" role = role\n",
" )\n",
"\n",
"sfc = conn.cursor()\n",
" \n",
"# Connect to DEMO_DB database\n",
"demo_db = connect['secrets']['database'].upper()\n",
"sfc.execute(\"USE DATABASE \" + demo_db)\n",
"\n",
"# Connect to PRE_F1PY schema\n",
"f1_pre_schema = 'PRE_F1APIPY'\n",
"\n",
"sfc.execute(\"CREATE SCHEMA IF NOT EXISTS \" + f1_pre_schema)\n",
"sfc.execute(\"USE SCHEMA \" + f1_pre_schema)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a95051fc",
"metadata": {
"execution": {
"iopub.execute_input": "2022-04-12T16:01:34.435772Z",
"iopub.status.busy": "2022-04-12T16:01:34.435571Z",
"iopub.status.idle": "2022-04-12T16:01:34.700907Z",
"shell.execute_reply": "2022-04-12T16:01:34.700350Z",
"shell.execute_reply.started": "2022-04-12T16:01:34.435750Z"
},
"tags": [],
"id": "a95051fc",
"outputId": "92c44657-1238-40b4-854f-e2f4208c576b"
},
"outputs": [
{
"data": {
"text/plain": [
"<snowflake.connector.cursor.SnowflakeCursor at 0x7fd4d5db0ee0>"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"table_name = 'Circuits'\n",
"f1_pre_table = 'PRE_F1APIPY_' + table_name.upper()\n",
" \n",
"# Create a CREATE TABLE SQL-statement\n",
"create_tbl_sql = \"CREATE TABLE IF NOT EXISTS \" + demo_db + \".\" + f1_pre_schema + \".\" + f1_pre_table + \" (\\n\"\n",
" \n",
"# Add Insert Date to DataFrame\n",
"df_circuits.insert(0, 'pre_insert_date', pd.Timestamp(datetime.now(), unit=\"ms\", tz=\"UTC\").round(\"ms\")) # Current Timestamp\n",
"\n",
" # Iterating trough the columns\n",
"df_circuits.columns = map(lambda x: str(x).upper(), df_circuits.columns)\n",
"df_circuits.columns\n",
" \n",
"for col in df_circuits.columns:\n",
" column_name = col.upper()\n",
" \n",
" if (df_circuits[col].dtype.name == \"int\" or df_circuits[col].dtype.name == \"int64\"):\n",
" create_tbl_sql = create_tbl_sql + column_name + \" int\"\n",
" elif df_circuits[col].dtype.name == \"object\":\n",
" create_tbl_sql = create_tbl_sql + column_name + \" varchar(16777216)\"\n",
" elif df_circuits[col].dtype.name == \"datetime64[ns]\":\n",
" create_tbl_sql = create_tbl_sql + column_name + \" datetime\"\n",
" elif df_circuits[col].dtype.name == \"float64\":\n",
" create_tbl_sql = create_tbl_sql + column_name + \" float8\"\n",
" elif df_circuits[col].dtype.name == \"bool\":\n",
" create_tbl_sql = create_tbl_sql + column_name + \" boolean\"\n",
" else:\n",
" create_tbl_sql = create_tbl_sql + column_name + \" varchar(16777216)\"\n",
" \n",
" # Deciding next steps. Either column is not the last column (add comma) else end create_tbl_statement\n",
" if df_circuits[col].name != df_circuits.columns[-1]:\n",
" create_tbl_sql = create_tbl_sql + \",\\n\"\n",
" else:\n",
" create_tbl_sql = create_tbl_sql + \")\"\n",
" \n",
"#Execute the SQL statement to create the table\n",
"sfc.execute(create_tbl_sql) \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b81522b3-d5f1-44b3-ba47-b53ab2c31826",
"metadata": {
"execution": {
"iopub.execute_input": "2022-04-12T16:01:37.006073Z",
"iopub.status.busy": "2022-04-12T16:01:37.005836Z",
"iopub.status.idle": "2022-04-12T16:01:39.575455Z",
"shell.execute_reply": "2022-04-12T16:01:39.574804Z",
"shell.execute_reply.started": "2022-04-12T16:01:37.006047Z"
},
"tags": [],
"id": "b81522b3-d5f1-44b3-ba47-b53ab2c31826",
"outputId": "23ca2576-df62-46b0-835e-40991ec3b121"
},
"outputs": [
{
"data": {
"text/plain": [
"(True,\n",
" 1,\n",
" 79,\n",
" [('xjcuf/file0.txt', 'LOADED', 79, 79, 1, 0, None, None, None, None)])"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Re-load table\n",
"sfc.execute('TRUNCATE TABLE IF EXISTS ' + f1_pre_table) \n",
"\n",
"# Write the data from the DataFrame to the f1_pre_table.\n",
"write_pandas(\n",
" conn = conn,\n",
" df = df_circuits,\n",
" table_name = f1_pre_table,\n",
" database = demo_db,\n",
" schema = f1_pre_schema\n",
" ) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "06c55f49-312e-4acc-935b-5dcd8f6f48d8",
"metadata": {
"id": "06c55f49-312e-4acc-935b-5dcd8f6f48d8"
},
"outputs": [],
"source": [
""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
},
"colab": {
"name": "F1ErgastDeveloperAPI.ipynb",
"provenance": [],
"include_colab_link": true
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment