Last active
May 5, 2023 16:18
-
-
Save agent87/5337a1781f7d7e10d363e4785608ab59 to your computer and use it in GitHub Desktop.
A notion api script to make your notion page your cms.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from notion_client import Client | |
import os | |
import pandas as pd | |
""" | |
This is notion script to convert your notion page into content managment system. | |
Essentially you can publish content directily to your personal blog where this script renders/convert the blocks into HTML content. | |
Currently supported blocks are paragraphs, images, codeblocks and qoutes. | |
Install the required packages by running !pip install notion_client | |
If you would like to check out this script in action checkout | |
https://kayarn.notion.site/Kayarn-499adcff95ce460394e50afa83c610be | |
and compare it with www.kayarn.com. | |
For more information visit www.kayarn.com or reach out to arnauldkayonga1@gmail.com | |
Cheers! | |
""" | |
class notion: | |
client = Client(auth=os.environ['API_TOKEN']) | |
database_id = os.environ['DATABASE_ID'] | |
def fetch_all_pages_in_db(): | |
return notion.client.databases.query(database_id=notion.database_id) | |
def fetch_all_blocks_in_page(page_id : str) -> dict: | |
return notion.client.blocks.children.list(page_id)['results'] | |
def store_pages_to_dataframe() -> pd.DataFrame: | |
raw_pages_content : dict = notion.fetch_all_pages_in_db() | |
dataframe :pd.DataFrame = pd.DataFrame(columns=['TITLE','AUTHOR','PUBLISHED_ON','PUBLISHED','DESCRIPTION','TAGS','TYPE', 'ID', 'CONTENT', 'LAST_EDITED_TIME', 'COVER_IMG_URL']) | |
for page in raw_pages_content['results']: | |
dataframe = dataframe.append({ | |
'TITLE': page['properties']['Title']['title'][0]['plain_text'],# | |
'AUTHOR': notion.process_athor(page['properties']['Author']['people']),# | |
'PUBLISHED_ON': page['properties']['Published on']['date']['start'],# | |
'PUBLISHED': page['properties']['Published']['select']['name'],# | |
'DESCRIPTION': page['properties']['Short Description']['rich_text'][0]['plain_text'],# | |
'TAGS': [tag['name'] for tag in page['properties']['Tags']['multi_select']],# | |
'TYPE': page['properties']['Type']['select']['name'],# | |
'ID': page['id'],# | |
'LAST_EDITED_TIME': page['last_edited_time'],# | |
'COVER_IMG_URL': notion.process_cover_img_url(page['cover']),# | |
'URL' : str(page['properties']['Title']['title'][0]['plain_text']).lower().replace(" ","-"), | |
},ignore_index=True) | |
#Convert the dataype | |
dataframe['PUBLISHED_ON'] = pd.to_datetime(dataframe['PUBLISHED_ON']) | |
#sort the dataframe by published on | |
dataframe = dataframe.sort_values(by=['PUBLISHED_ON'], ascending=False) | |
return dataframe[dataframe['PUBLISHED'] == 'Published'] | |
def process_athor(author_dict: dict) -> str: | |
authors = [] | |
for author in author_dict: | |
authors.append(author['name']) | |
if len(authors) > 1: | |
return ",".join(authors[:-1]) +"&"+authors[-1] | |
else: | |
return ' '.join(authors) | |
def process_cover_img_url(cover_img_dict: dict) -> str: | |
try: | |
if cover_img_dict['type'] == "external": | |
return cover_img_dict['external']['url'] | |
if cover_img_dict['type'] == "file": | |
return cover_img_dict['file']['url'] | |
except: | |
return None | |
class converter: | |
def to_html(blocks : dict): | |
html = [] | |
for block in blocks: | |
if block['type'] == 'paragraph': | |
html.append(converter.process_paragraph(block['paragraph']['rich_text'])) | |
if block['type'] == 'heading_1': | |
html.append(f"<h1>{block['heading_1']['rich_text'][0]['plain_text']}</h1>") | |
if block['type'] == 'heading_2': | |
html.append(f"<h2>{block['heading_2']['rich_text'][0]['plain_text']}</h2>") | |
if block['type'] == 'heading_3': | |
html.append(f"<h3>{block['heading_3']['rich_text'][0]['plain_text']}</h3>") | |
if block['type'] == 'code': | |
html.append(converter.process_code(block['code']['rich_text'], block['code']['language'])) | |
if block['type'] == 'qoute': | |
html.append(converter.process_qoute(block['qoute']['rich_text'])) | |
if block['type'] == 'image': | |
html.append(converter.process_image_link(block)) | |
return "<br>\n".join(html) | |
def process_paragraph(rich_text): | |
paragraphs = [] | |
for text in rich_text: | |
plain_text = text['plain_text'] | |
if text['annotations']['bold']: | |
plain_text = f"<b>{plain_text}</b>" | |
if text['annotations']['italic']: | |
plain_text = f"<i>{plain_text}</i>" | |
if text['annotations']['strikethrough']: | |
plain_text = f"<s>{plain_text}</s>" | |
if text['annotations']['underline']: | |
plain_text = f"<u>{plain_text}</u>" | |
if text['annotations']['code']: | |
plain_text = f"<code>{plain_text}</code>" | |
if text['annotations']['color'] != "default": | |
plain_text = f"<span style='color:{text['annotations']['color']}'>{plain_text}</span>" | |
paragraphs.append(plain_text) | |
return "\n".join(paragraphs) | |
def process_code(rich_text, language): | |
codes = [] | |
for text in rich_text: | |
codes.append(text['plain_text']) | |
codes = "\n".join(codes) | |
return f'<pre><code class="language-{language}">{codes}</code></pre>' | |
def process_qoute(rich_text): | |
quotes = [] | |
for text in rich_text: | |
quotes.append(text['plain_text']) | |
quotes = "\n".join(quotes) | |
return f'<blockquote>{quotes}</blockquote>' | |
def process_image_link(img : dict) -> str: | |
return f""" | |
<figure> | |
<img src='{img['image']['file']['url']}' /> | |
<small>{img['image']['caption'][0]['plain_text']}</small> | |
</figure> | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pandas | |
notion_client |
I just added the requirements file.
As for the mb, I am not sure about you mean.
I mean this https://shorturl.at/fzU05 + you said you can install the required libraries using pip install notion_client
but as packages increase and increase you can't keep on adding that pip install <thelibrary>
over and over again. But rather you make a configuration file for it which is requirements.txt
and actually the cloner of this code can run like pip install -r requirements.txt
and it'd install all the required packages for it. I hope I was more clear
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
COOL 👍
it'd be alot cooler if you made a requirements file as in
requirements.txt
and mb a person would run pip install -e for the dependencies I think