Skip to content

Instantly share code, notes, and snippets.

@grhervas
Created July 9, 2022 11:08
Show Gist options
  • Save grhervas/739d7f5fd65ccdbad569464ba52616bf to your computer and use it in GitHub Desktop.
Save grhervas/739d7f5fd65ccdbad569464ba52616bf to your computer and use it in GitHub Desktop.
Python script to get Notion data using its API into Power BI.
import requests
import json
import pandas as pd
import locale
import numpy
# Use it to get correct date time format according to your time zone
# (Spain in this case)
locale.setlocale(locale.LC_TIME, 'es_ES.utf8')
def read_notion_db(db_id: str, auth_token: str):
"""
Get full database Notion object in json format.
Given the Notion's database URL ID and the secret token,
iterates through the paginated records (calling multiple
POST requests) and return full results in json format.
Parameters
----------
db_id : str
The ID for the Notion database (you can obtain this from
the URL).
auth_token : str
Secret bearer token for the database integration.
Returns
-------
results : list
Concatenated results in semi-structurated json format.
"""
url = f"https://api.notion.com/v1/databases/{db_id}/query"
payload={}
headers = {
'Authorization': f'Bearer {auth_token}',
'Content-Type': 'application/json',
'Notion-Version': '2022-02-22'
}
results = []
# Iterate through paginated records to get all the records
while True:
response = requests.request("POST", url, headers=headers, json=payload)
json_results = response.json()
results.extend(json_results["results"])
if json_results["has_more"] is False:
break
else:
payload = {"start_cursor": json_results["next_cursor"]}
return results
# Here you should define your arguments
AUTH_TOKEN = "your-secret-token"
DB_EXP = "your-db-id"
results_exp = read_notion_db(DB_EXP, AUTH_TOKEN)
# Read json into DataFrame
df_exp = pd.json_normalize(results_exp,
meta="properties")
# This part is specific data transformation related to my Notion
# db structure where I obtain 3 separate datasets (Income,
# Expenditure and Investment) coming from 3 different Notion dbs.
# Get just the required cols
df_exp = df_exp.filter(regex="(Importe.number)|(Fecha.date.start)|(Categoría.select.name)|(Concepto.title)$",
axis=1).copy()
# Rename columns
df_exp.columns = df_exp.columns.str.extract("properties\.(\w*)\..*", expand=False)
# Extract semi-structured text data
df_exp["Concepto"] = df_exp["Concepto"].apply(lambda x: x[0]["text"]["content"])
# Each time a print is called in the script, Power BI identify it as a data object
# adding it to the model
print(df_exp.sort_values("Fecha"))
DB_INC = "another-db-id"
results_inc = read_notion_db(DB_INC, AUTH_TOKEN)
# Read json into DataFrame
df_inc = pd.json_normalize(results_inc,
meta="properties")
# Get just the required cols
df_inc = df_inc.filter(regex="(Importe.number)|(Fecha.date.start)|(Concepto.title)$",
axis=1).copy()
# Rename columns
df_inc.columns = df_inc.columns.str.extract("properties\.(\w*)\..*", expand=False)
# Extract semi-structured text data
df_inc["Concepto"] = df_inc["Concepto"].apply(lambda x: x[0]["text"]["content"])
print(df_inc.sort_values("Fecha"))
DB_INV = "yet-another-db-id"
results_inv = read_notion_db(DB_INV, AUTH_TOKEN)
# Read json into DataFrame
df_inv = pd.json_normalize(results_inv,
meta="properties")
# Get just the required cols
df_inv = df_inv.filter(regex="(Inversión.number)|(Name.title)$",
axis=1).copy()
# Rename columns
df_inv.columns = df_inv.columns.str.extract("properties\.(\w*)\..*", expand=False)
# Extract semi-structured text data
df_inv["Name"] = df_inv["Name"].apply(lambda x: x[0]["text"]["content"])
# Rename cols
df_inv.rename(columns={"Name": "Balance"}, inplace=True)
# Create date col
df_inv["Fecha"] = pd.to_datetime(df_inv["Balance"].str.lower(), format="%B %Y")
print(df_inv.sort_values("Fecha"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment