agent87/notion_sdk.py

## notion_sdk.py
from notion_client import Client
import os
import pandas as pd

"""
This is notion script to convert your notion page into content managment system.
Essentially you can publish content directily to your personal blog where this script renders/convert the blocks into HTML content.
Currently supported blocks are paragraphs, images, codeblocks and qoutes.

Install the required packages by running !pip install notion_client

If you would like to check out this script in action checkout

https://kayarn.notion.site/Kayarn-499adcff95ce460394e50afa83c610be

and compare it with www.kayarn.com.

For more information visit www.kayarn.com or reach out to arnauldkayonga1@gmail.com

Cheers!

"""

class notion:
    client = Client(auth=os.environ['API_TOKEN'])
    database_id = os.environ['DATABASE_ID']

    def fetch_all_pages_in_db():
        return notion.client.databases.query(database_id=notion.database_id)

    def fetch_all_blocks_in_page(page_id : str) -> dict:
        return notion.client.blocks.children.list(page_id)['results']

    def store_pages_to_dataframe() -> pd.DataFrame:
        raw_pages_content : dict = notion.fetch_all_pages_in_db()
        dataframe :pd.DataFrame =  pd.DataFrame(columns=['TITLE','AUTHOR','PUBLISHED_ON','PUBLISHED','DESCRIPTION','TAGS','TYPE', 'ID', 'CONTENT', 'LAST_EDITED_TIME', 'COVER_IMG_URL'])
        for page in raw_pages_content['results']:
            dataframe = dataframe.append({
                'TITLE': page['properties']['Title']['title'][0]['plain_text'],#
                'AUTHOR': notion.process_athor(page['properties']['Author']['people']),#
                'PUBLISHED_ON': page['properties']['Published on']['date']['start'],#
                'PUBLISHED': page['properties']['Published']['select']['name'],#
                'DESCRIPTION': page['properties']['Short Description']['rich_text'][0]['plain_text'],#
                'TAGS': [tag['name'] for tag in page['properties']['Tags']['multi_select']],#
                'TYPE': page['properties']['Type']['select']['name'],#
                'ID': page['id'],#
                'LAST_EDITED_TIME': page['last_edited_time'],#
                'COVER_IMG_URL': notion.process_cover_img_url(page['cover']),#
                'URL' : str(page['properties']['Title']['title'][0]['plain_text']).lower().replace(" ","-"),

            },ignore_index=True)

        #Convert the dataype
        dataframe['PUBLISHED_ON'] = pd.to_datetime(dataframe['PUBLISHED_ON'])

        #sort the dataframe by published on
        dataframe = dataframe.sort_values(by=['PUBLISHED_ON'], ascending=False)

        return dataframe[dataframe['PUBLISHED'] == 'Published']


    def process_athor(author_dict: dict) -> str:
        authors = []
        for author in author_dict:
            authors.append(author['name'])

        if len(authors) > 1:
            return ",".join(authors[:-1]) +"&"+authors[-1]
        else:
            return  ' '.join(authors)

    def process_cover_img_url(cover_img_dict: dict) -> str:
        try:
            if cover_img_dict['type'] == "external":
                return cover_img_dict['external']['url']
            if cover_img_dict['type'] == "file":
                return cover_img_dict['file']['url']
        except:
            return None


class converter:
    def to_html(blocks : dict):
        html = []
        for block in blocks:
            if block['type'] == 'paragraph':
                html.append(converter.process_paragraph(block['paragraph']['rich_text']))
            if block['type'] == 'heading_1':
                html.append(f"<h1>{block['heading_1']['rich_text'][0]['plain_text']}</h1>")
            if block['type'] == 'heading_2':
                html.append(f"<h2>{block['heading_2']['rich_text'][0]['plain_text']}</h2>")
            if block['type'] == 'heading_3':
                html.append(f"<h3>{block['heading_3']['rich_text'][0]['plain_text']}</h3>")
            if block['type'] == 'code':
                html.append(converter.process_code(block['code']['rich_text'], block['code']['language']))
            if block['type'] == 'qoute':
                html.append(converter.process_qoute(block['qoute']['rich_text']))
            if block['type'] == 'image':
                html.append(converter.process_image_link(block))

        return "<br>\n".join(html)


    def process_paragraph(rich_text):
        paragraphs = []
        for text in rich_text:
            plain_text = text['plain_text']
            if text['annotations']['bold']:
                plain_text = f"<b>{plain_text}</b>"
            if text['annotations']['italic']:
                plain_text = f"<i>{plain_text}</i>"
            if text['annotations']['strikethrough']:
                plain_text = f"<s>{plain_text}</s>"
            if text['annotations']['underline']:
                plain_text = f"<u>{plain_text}</u>"
            if text['annotations']['code']:
                plain_text = f"<code>{plain_text}</code>"
            if text['annotations']['color'] != "default":
                plain_text = f"<span style='color:{text['annotations']['color']}'>{plain_text}</span>"
            paragraphs.append(plain_text)

        return "\n".join(paragraphs)

    def process_code(rich_text, language):
        codes = []
        for text in rich_text:
            codes.append(text['plain_text'])

        codes = "\n".join(codes)
        return f'<pre><code class="language-{language}">{codes}</code></pre>'

    def process_qoute(rich_text):
        quotes = []
        for text in rich_text:
            quotes.append(text['plain_text'])

        quotes = "\n".join(quotes)
        return f'<blockquote>{quotes}</blockquote>'

    def process_image_link(img : dict) -> str:
        return f"""
            <figure>
                <img src='{img['image']['file']['url']}' />
                <small>{img['image']['caption'][0]['plain_text']}</small>
            </figure>
        """

## requirements.txt
pandas
notion_client
	from notion_client import Client
	import os
	import pandas as pd

	"""
	This is notion script to convert your notion page into content managment system.
	Essentially you can publish content directily to your personal blog where this script renders/convert the blocks into HTML content.
	Currently supported blocks are paragraphs, images, codeblocks and qoutes.

	Install the required packages by running !pip install notion_client

	If you would like to check out this script in action checkout

	https://kayarn.notion.site/Kayarn-499adcff95ce460394e50afa83c610be

	and compare it with www.kayarn.com.

	For more information visit www.kayarn.com or reach out to arnauldkayonga1@gmail.com

	Cheers!

	"""

	class notion:
	client = Client(auth=os.environ['API_TOKEN'])
	database_id = os.environ['DATABASE_ID']

	def fetch_all_pages_in_db():
	return notion.client.databases.query(database_id=notion.database_id)

	def fetch_all_blocks_in_page(page_id : str) -> dict:
	return notion.client.blocks.children.list(page_id)['results']

	def store_pages_to_dataframe() -> pd.DataFrame:
	raw_pages_content : dict = notion.fetch_all_pages_in_db()
	dataframe :pd.DataFrame = pd.DataFrame(columns=['TITLE','AUTHOR','PUBLISHED_ON','PUBLISHED','DESCRIPTION','TAGS','TYPE', 'ID', 'CONTENT', 'LAST_EDITED_TIME', 'COVER_IMG_URL'])
	for page in raw_pages_content['results']:
	dataframe = dataframe.append({
	'TITLE': page['properties']['Title']['title'][0]['plain_text'],#
	'AUTHOR': notion.process_athor(page['properties']['Author']['people']),#
	'PUBLISHED_ON': page['properties']['Published on']['date']['start'],#
	'PUBLISHED': page['properties']['Published']['select']['name'],#
	'DESCRIPTION': page['properties']['Short Description']['rich_text'][0]['plain_text'],#
	'TAGS': [tag['name'] for tag in page['properties']['Tags']['multi_select']],#
	'TYPE': page['properties']['Type']['select']['name'],#
	'ID': page['id'],#
	'LAST_EDITED_TIME': page['last_edited_time'],#
	'COVER_IMG_URL': notion.process_cover_img_url(page['cover']),#
	'URL' : str(page['properties']['Title']['title'][0]['plain_text']).lower().replace(" ","-"),

	},ignore_index=True)

	#Convert the dataype
	dataframe['PUBLISHED_ON'] = pd.to_datetime(dataframe['PUBLISHED_ON'])

	#sort the dataframe by published on
	dataframe = dataframe.sort_values(by=['PUBLISHED_ON'], ascending=False)

	return dataframe[dataframe['PUBLISHED'] == 'Published']


	def process_athor(author_dict: dict) -> str:
	authors = []
	for author in author_dict:
	authors.append(author['name'])

	if len(authors) > 1:
	return ",".join(authors[:-1]) +"&"+authors[-1]
	else:
	return ' '.join(authors)

	def process_cover_img_url(cover_img_dict: dict) -> str:
	try:
	if cover_img_dict['type'] == "external":
	return cover_img_dict['external']['url']
	if cover_img_dict['type'] == "file":
	return cover_img_dict['file']['url']
	except:
	return None


	class converter:
	def to_html(blocks : dict):
	html = []
	for block in blocks:
	if block['type'] == 'paragraph':
	html.append(converter.process_paragraph(block['paragraph']['rich_text']))
	if block['type'] == 'heading_1':
	html.append(f"<h1>{block['heading_1']['rich_text'][0]['plain_text']}</h1>")
	if block['type'] == 'heading_2':
	html.append(f"<h2>{block['heading_2']['rich_text'][0]['plain_text']}</h2>")
	if block['type'] == 'heading_3':
	html.append(f"<h3>{block['heading_3']['rich_text'][0]['plain_text']}</h3>")
	if block['type'] == 'code':
	html.append(converter.process_code(block['code']['rich_text'], block['code']['language']))
	if block['type'] == 'qoute':
	html.append(converter.process_qoute(block['qoute']['rich_text']))
	if block['type'] == 'image':
	html.append(converter.process_image_link(block))

	return "<br>\n".join(html)


	def process_paragraph(rich_text):
	paragraphs = []
	for text in rich_text:
	plain_text = text['plain_text']
	if text['annotations']['bold']:
	plain_text = f"<b>{plain_text}</b>"
	if text['annotations']['italic']:
	plain_text = f"<i>{plain_text}</i>"
	if text['annotations']['strikethrough']:
	plain_text = f"<s>{plain_text}</s>"
	if text['annotations']['underline']:
	plain_text = f"<u>{plain_text}</u>"
	if text['annotations']['code']:
	plain_text = f"<code>{plain_text}</code>"
	if text['annotations']['color'] != "default":
	plain_text = f"<span style='color:{text['annotations']['color']}'>{plain_text}</span>"
	paragraphs.append(plain_text)

	return "\n".join(paragraphs)

	def process_code(rich_text, language):
	codes = []
	for text in rich_text:
	codes.append(text['plain_text'])

	codes = "\n".join(codes)
	return f'<pre><code class="language-{language}">{codes}</code></pre>'

	def process_qoute(rich_text):
	quotes = []
	for text in rich_text:
	quotes.append(text['plain_text'])

	quotes = "\n".join(quotes)
	return f'<blockquote>{quotes}</blockquote>'

	def process_image_link(img : dict) -> str:
	return f"""
	<figure>
	<img src='{img['image']['file']['url']}' />
	<small>{img['image']['caption'][0]['plain_text']}</small>
	</figure>
	"""