Created
July 9, 2022 11:08
-
-
Save grhervas/739d7f5fd65ccdbad569464ba52616bf to your computer and use it in GitHub Desktop.
Python script to get Notion data using its API into Power BI.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import pandas as pd | |
import locale | |
import numpy | |
# Use it to get correct date time format according to your time zone | |
# (Spain in this case) | |
locale.setlocale(locale.LC_TIME, 'es_ES.utf8') | |
def read_notion_db(db_id: str, auth_token: str): | |
""" | |
Get full database Notion object in json format. | |
Given the Notion's database URL ID and the secret token, | |
iterates through the paginated records (calling multiple | |
POST requests) and return full results in json format. | |
Parameters | |
---------- | |
db_id : str | |
The ID for the Notion database (you can obtain this from | |
the URL). | |
auth_token : str | |
Secret bearer token for the database integration. | |
Returns | |
------- | |
results : list | |
Concatenated results in semi-structurated json format. | |
""" | |
url = f"https://api.notion.com/v1/databases/{db_id}/query" | |
payload={} | |
headers = { | |
'Authorization': f'Bearer {auth_token}', | |
'Content-Type': 'application/json', | |
'Notion-Version': '2022-02-22' | |
} | |
results = [] | |
# Iterate through paginated records to get all the records | |
while True: | |
response = requests.request("POST", url, headers=headers, json=payload) | |
json_results = response.json() | |
results.extend(json_results["results"]) | |
if json_results["has_more"] is False: | |
break | |
else: | |
payload = {"start_cursor": json_results["next_cursor"]} | |
return results | |
# Here you should define your arguments | |
AUTH_TOKEN = "your-secret-token" | |
DB_EXP = "your-db-id" | |
results_exp = read_notion_db(DB_EXP, AUTH_TOKEN) | |
# Read json into DataFrame | |
df_exp = pd.json_normalize(results_exp, | |
meta="properties") | |
# This part is specific data transformation related to my Notion | |
# db structure where I obtain 3 separate datasets (Income, | |
# Expenditure and Investment) coming from 3 different Notion dbs. | |
# Get just the required cols | |
df_exp = df_exp.filter(regex="(Importe.number)|(Fecha.date.start)|(Categoría.select.name)|(Concepto.title)$", | |
axis=1).copy() | |
# Rename columns | |
df_exp.columns = df_exp.columns.str.extract("properties\.(\w*)\..*", expand=False) | |
# Extract semi-structured text data | |
df_exp["Concepto"] = df_exp["Concepto"].apply(lambda x: x[0]["text"]["content"]) | |
# Each time a print is called in the script, Power BI identify it as a data object | |
# adding it to the model | |
print(df_exp.sort_values("Fecha")) | |
DB_INC = "another-db-id" | |
results_inc = read_notion_db(DB_INC, AUTH_TOKEN) | |
# Read json into DataFrame | |
df_inc = pd.json_normalize(results_inc, | |
meta="properties") | |
# Get just the required cols | |
df_inc = df_inc.filter(regex="(Importe.number)|(Fecha.date.start)|(Concepto.title)$", | |
axis=1).copy() | |
# Rename columns | |
df_inc.columns = df_inc.columns.str.extract("properties\.(\w*)\..*", expand=False) | |
# Extract semi-structured text data | |
df_inc["Concepto"] = df_inc["Concepto"].apply(lambda x: x[0]["text"]["content"]) | |
print(df_inc.sort_values("Fecha")) | |
DB_INV = "yet-another-db-id" | |
results_inv = read_notion_db(DB_INV, AUTH_TOKEN) | |
# Read json into DataFrame | |
df_inv = pd.json_normalize(results_inv, | |
meta="properties") | |
# Get just the required cols | |
df_inv = df_inv.filter(regex="(Inversión.number)|(Name.title)$", | |
axis=1).copy() | |
# Rename columns | |
df_inv.columns = df_inv.columns.str.extract("properties\.(\w*)\..*", expand=False) | |
# Extract semi-structured text data | |
df_inv["Name"] = df_inv["Name"].apply(lambda x: x[0]["text"]["content"]) | |
# Rename cols | |
df_inv.rename(columns={"Name": "Balance"}, inplace=True) | |
# Create date col | |
df_inv["Fecha"] = pd.to_datetime(df_inv["Balance"].str.lower(), format="%B %Y") | |
print(df_inv.sort_values("Fecha")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment