Skip to content

Instantly share code, notes, and snippets.

@blopa
Created April 17, 2021 14:34
Show Gist options
  • Save blopa/a6e49c798cf0fd5c875025c6ae282064 to your computer and use it in GitHub Desktop.
Save blopa/a6e49c798cf0fd5c875025c6ae282064 to your computer and use it in GitHub Desktop.
jekyll-markdown-script.py
from bs4 import BeautifulSoup
import unidecode
import pprint
import json
import yaml
import os
PHP_ARRAYS = "<?php $redirect_map = ["
TAGS = []
CATEGORIES = []
def main():
languages = ["br", "en"]
add_php_redirect_array = True
create_tags_json = True
create_categories_json = True
# create_posts("2008-09-29-iphone.md", False, True, False, ["en", "br"])
files = os.listdir("_posts")
for f in files:
print("Processing file: " + f)
create_posts(f, False, True, add_php_redirect_array, languages)
if add_php_redirect_array:
global PHP_ARRAYS
PHP_ARRAYS += "];";
path = "arrays.php"
php_file = open(path, mode="w+", encoding="utf-8")
php_file.write(PHP_ARRAYS)
print("Saving PHP file: " + path)
php_file.close()
if create_tags_json:
global TAGS
local_tags = list(dict.fromkeys(TAGS))
path = "tags.json"
php_file = open(path, mode="w+", encoding="utf-8")
php_file.write(json.dumps(local_tags))
print("Saving TAGS file: " + path)
php_file.close()
if create_categories_json:
global CATEGORIES
local_tags = list(dict.fromkeys(CATEGORIES))
path = "categories.json"
php_file = open(path, mode="w+", encoding="utf-8")
php_file.write(json.dumps(local_tags))
print("Saving CATEGORIES file: " + path)
php_file.close()
def get_proper_category_name(category):
if category == "geral":
return "general"
if category == "celulares":
return "cellphones"
if category == "engraçado":
return "funny"
if category == "hardware":
return "hardware"
if category == "dicas":
return "tips"
if category == "games":
return "games"
if category == "sistemas":
return "software"
if category == "filmes":
return "movies"
if category == "música":
return "music"
if category == "notícias":
return "news"
if category == "nada":
return "general"
if category == "programação":
return "coding"
if category == "bandas":
return "music"
if category == "shows":
return "music"
if category == "review":
return "review"
if category == "unboxing":
return "unboxing"
if category == "eventos":
return "events"
if category == "software":
return "software"
if category == "viagens":
return "trips"
return category
def create_posts(filename, use_categories, check_translation, add_php_redirect_array, languages):
#pp = pprint.PrettyPrinter(indent=4)
file = open("_posts/" + filename, mode='r', encoding='UTF8')
read = file.read().split("---")
content = read
yml = content[1]
try:
yml = yaml.safe_load(yml)
#print(yml["categories"])
except yaml.YAMLError as exc:
print(exc)
new_content = "---\n"
titles_translations = []
posts_translations = []
if "id" in yml:
new_content += "id: " + str(yml["id"]) + "\n"
#if "layout" in yml:
#new_content += "layout: " + yml["layout"] + "\n"
global CATEGORIES
if "category" in yml:
category_name = get_proper_category_name(yml["category"].lower())
CATEGORIES.append(category_name)
new_content += "category: [" + category_name + "]\n"
if "author" in yml:
new_content += "author: " + yml["author"].replace("blopa", "Pablo Montenegro") + "\n"
if "title" in yml:
if check_translation:
title = yml["title"].strip().strip("'").strip('"')
if "<!--:-->" not in title:
new_title = ""
for l in languages:
new_title += "<!--:" + l + "-->" + title + "<!--:-->"
title = new_title
titles = list(filter(None, title.split("<!--:-->")))
#titles = title.split("<!--:-->")
titles_translations = titles
else:
new_content += "title: " + yml["title"] + "\n"
if "categories" in yml:
if use_categories:
new_content += "categories: ["
for cat in yml["categories"]:
category_name = get_proper_category_name(cat.lower())
CATEGORIES.append(category_name)
new_content += category_name + ", "
else:
if "category" not in yml:
category_name = get_proper_category_name(yml["categories"][0].lower())
CATEGORIES.append(category_name)
new_content += "categories: [" + category_name
if new_content.endswith(", "):
new_content = new_content[:-2]
new_content += "]\n"
if "tags" in yml:
global TAGS
new_content += "tags: ["
for tag in yml["tags"]:
if tag is not "Add new tag" and tag is not "Adicionar nova tag":
tag_name = tag.replace("'", "").lower()
TAGS.append(tag_name)
new_content += "'" + tag_name + "', "
if not use_categories and "categories" in yml:
# add categories as tags
for cat in yml["categories"]:
category_name = get_proper_category_name(cat.lower())
CATEGORIES.append(category_name)
new_content += "'" + category_name + "', "
if new_content.endswith(", "):
new_content = new_content[:-2]
new_content += "]\n"
else:
TAGS.append("geral")
new_content += "tags: ['geral']\n"
if "categories" not in yml:
CATEGORIES.append(get_proper_category_name("geral"))
new_content += "categories: ['" + get_proper_category_name("geral") + "']\n"
if "category" not in yml and "categories" not in yml:
CATEGORIES.append(get_proper_category_name("geral"))
new_content += "categories: ['" + get_proper_category_name("geral") + "']\n"
post = content[2]
if post is not None:
post = post.strip().strip("'").strip('"')
if "<!--:-->" not in post:
new_post = ""
for l in languages:
new_post += "<!--:" + l + "-->" + post + "<!--:-->"
post = new_post
posts = list(filter(None, post.split("<!--:-->")))
#titles = title.split("<!--:-->")
posts_translations = posts
files = {}
filenames = {}
# print(new_content)
for lang in languages:
lang_key = lang
if lang == "br":
lang_key = "pt-br"
#if titles_translations.startswith("<!--:" + lang + "-->"):
for tit in titles_translations:
if tit.startswith("<!--:" + lang + "-->"):
if lang_key not in files:
files[lang_key] = new_content
san_title = tit.replace("<!--:" + lang + "-->", "").replace("'", "`")
filenames[lang_key] = filename[:11] + unidecode.unidecode(san_title.lower().replace("'", "").replace("’", "").replace("`", "").replace("…", "").replace('"', "").replace("-", "").replace(" ", "-").replace("?", "").replace(":", "").replace(",", "").replace(";", "").replace("!", "").replace(".", "-").replace("\\", "").replace("/", "").replace("$", "").replace("&#8217", "").replace("&#8221", "").replace("&#8230", "").replace("&#8211", "").replace("&#038", "").replace("&#8216", "").replace("&#8220", "")).replace("--", "-").replace("]", "-").replace("[", "-").replace(")", "-").replace("(", "-") + "." + lang_key +".md"
file_ending = "." + lang_key + ".md"
filenames[lang_key] = filenames[lang_key].replace("--", "-").replace("--", "-").replace("--", "-").replace("--", "-").replace("-" + file_ending, file_ending)
clean_filename = filenames[lang_key][11:].replace(file_ending, "")
if "id" in yml and add_php_redirect_array:
# TODO redo links without category or not
date = filename[:11].replace("-", "/")
if "category" in yml:
cat = yml["category"]
elif "categories" in yml:
if use_categories:
dunno = 1 # TODO
else:
cat = unidecode.unidecode(yml["categories"][0]).lower()
global PHP_ARRAYS
PHP_ARRAYS += str(yml["id"]) + " => 'https://pablomontenegro.com.br/" + lang_key + "/blog/" + cat + "/" + date + clean_filename + "', "
# print("This is the raw_filename: " + filenames[lang_key])
# print("This is the clean_filename: " + clean_filename)
files[lang_key] += "path: " + clean_filename + "\n"
# files[lang_key] += "cover: " + date[:7] + "\n"
files[lang_key] += "cover: ../../../src/images/default-cover.png\n"
files[lang_key] += "date: " + date[:10].replace("/", "-") + "\n" #yyy-mm-dd
clean_title = san_title.replace("&#8217;", "’").replace("&#8221;", '"').replace("&#8230;", "...").replace("&#8211;", "-").replace("&#038;", "&").replace("&#8216;", "`").replace("&#8220;", '"')
files[lang_key] += "excerpt: '" + clean_title + "'\n"
files[lang_key] += "title: '" + clean_title + "'\n---\n"
# print(files[lang_key])
for p in posts_translations:
con = p.strip()
# print("Ve se comeca com <!--:" + lang + "--> : ")
# print(con)
if con.startswith("<!--:" + lang + "-->"):
# print("Entrei na linguagem " + lang_key)
if lang_key not in files:
files[lang_key] = new_content
san_con = con.replace("<!--:" + lang + "-->", "")
san_con = BeautifulSoup(san_con).prettify()
files[lang_key] += san_con + "\n"
# print(files[lang_key])
#print(new_content)
# print(titles_translations)
# print(posts_translations)
#print(files["br"])
# print(files["en"])
for lang_key in files.keys():
fullpath = "posts/" + filenames[lang_key]
final_file = open(fullpath, mode="w+", encoding="utf-8")
final_file.write(files[lang_key])
print("Saving file: " + fullpath)
final_file.close()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment