Skip to content

Instantly share code, notes, and snippets.

@Moving-Electrons
Last active January 8, 2019 00:15
Show Gist options
  • Save Moving-Electrons/72771486ee9e70d0d6f3b48a4215d4a8 to your computer and use it in GitHub Desktop.
Save Moving-Electrons/72771486ee9e70d0d6f3b48a4215d4a8 to your computer and use it in GitHub Desktop.
Python 3 script for exporting Bear App documents to Pelican Site Generator. The script takes a zip file (argument) containing the exported files in Markdown format from Bear App (i.e. MD file and images), reformats the MD adequately and moves the files to Pelican content folders. More information on www.movingelectrons.net
import sys
import re
import os
import zipfile
import shutil
# Constant Definition
# -------------------
# Path to be prepended to image links in markdown file, like so: ![](IMAGE_LINK<image_filename>).
IMAGE_LNK = '/images/'
# Pelican Content Folders:
POSTS_FOLDER = 'YOUR PELICAN POST FOLDER PATH'
IMAGES_FOLDER = 'YOUR PELICAN IMAGES FOLDER PATH'
FILES_FOLDER = 'YOUR PELICAN FILES FOLDER PATH'
# -------------------
MD_EXT_LIST = ['.md', '.markdown']
IMG_EXT_LIST = ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']
def image_path(matchedObject):
'''
Adds the folder where Pelican outputs all blog images to the beginning of the in-line
referenced links. re.sub calls the function passing the result of the regex as argument.
:param matchedObject: String with the MD image code.
:return: Folder to be inserted on each MD image call.
'''
if matchedObject.group(3):
# If <Description> is present, adds it inside the MD image reference like so:
# ![](path/image.jpg "Description")
imageReference = '!['+matchedObject.group(3)+']('+IMAGE_LNK+matchedObject.group(2)+' "'+matchedObject.group(3)+'")'
else:
imageReference = matchedObject.group(1) + IMAGE_LNK + matchedObject.group(2) + ')'
return imageReference
def process_markdown(linesList, mdFilePath):
'''
Updates markdown file by including needed HTML, updating image references and putting the
resulting markdown file in the right folder for Pelican to process it.
:param linesList: List of string lines with \n at the end. Usually the result of readlines().
:param MdFilePath: Full path of markdown file including extension.
:return: Nothing.
'''
mdFileName = os.path.basename(mdFilePath)
with open(os.path.join(POSTS_FOLDER, mdFileName), 'w') as newFile:
for line in linesList:
# Extracting filename from slug:
slugFound = re.match(r'^Slug\:.(.*)', line)
if slugFound != None:
fileName = re.sub('-', ' ', slugFound.group(1)) + '.md'
# In each of the following lines, **if** there is a match, the section of _line_ that
# matches the regex is substituted by the result of each of the called functions
# (e.g. quarks_rating, before_after, etc).
# If there is no match, that line doesn't change.
# The following regex works for images referenced in any of the following ways:
# ![](folder/n-folder/image.ext),
# ![](image.ext)
# ![](image.ext "image description")
# ![](/folder/image.ext)<image description>
line = re.sub(r'(^\!\[.*\]\()(?:[^\/]*\/){0,}([^<\)]*)\)(?:<(.*)>){0,1}', image_path, line)
newFile.write(line)
try:
os.rename(os.path.join(POSTS_FOLDER, mdFileName), os.path.join(POSTS_FOLDER, fileName))
except UnboundLocalError:
print('Error: "Slug" not detected in Markdown file. File could not be renamed.')
return
def main():
'''
Takes document files exported from Bear, process them and copies them to the
appropiate Pelican content folder.
Argument can be either a single MD file or a ZIP file with MD files and
image files.
'''
try:
mainFilePath = sys.argv[1]
mainFileExt = os.path.splitext(mainFilePath)[1]
if mainFileExt in MD_EXT_LIST:
print('Markdown passed as argument. Processing...')
# Processing MD File passed as argument.
# ---
with open(mainFilePath, 'r') as mdFile:
# Reads lines from MD file and skips first line which
# has the Bear Document title:
textFileList = mdFile.readlines()[1:]
process_markdown(textFileList, mainFilePath)
elif mainFileExt == '.zip':
print('Zip file passed as argument. Processing...')
with zipfile.ZipFile(mainFilePath, 'r') as myZip:
for zipItemPath in myZip.namelist():
# Skips aux files instroduced by Mac OS:
if '__MACOSX' not in zipItemPath:
fileExtension = os.path.splitext(zipItemPath)[1]
# Processing MD files inside the zip file:
# ---
if fileExtension in MD_EXT_LIST:
print('Processing markdown file...')
# Extracting MD file from zip drive (using io.TextIOWrapper was problematic):
with myZip.open(zipItemPath) as zpdMdFile, open(
os.path.join(POSTS_FOLDER, os.path.basename(zipItemPath)), 'wb') as destMDFile:
shutil.copyfileobj(zpdMdFile, destMDFile)
# Opening and reading the unzipped MD file:
with open(os.path.join(POSTS_FOLDER, os.path.basename(zipItemPath)), 'r') as origMDFile:
# Reads lines from MD file and skips first line which
# has the Bear Document title:
textFileList = origMDFile.readlines()[1:]
process_markdown(textFileList, zipItemPath)
# Copying image files inside the zip file to the image content folder in Pelican:
# ---
elif fileExtension in IMG_EXT_LIST:
print('Moving image file...')
# Opening zipped item for reading and destination image file for writing:
with myZip.open(zipItemPath) as zpdImageFile, open(
os.path.join(IMAGES_FOLDER, os.path.basename(zipItemPath)), 'wb') as destImageFile:
shutil.copyfileobj(zpdImageFile, destImageFile)
# For any other file type:
else:
# Todo > To be implemented: action to be taken if the argument when the zip file has
# file types other than MD and images (as of today, those are the only files Ulysses
# exports.
pass
else:
# Parameter is not .md or .zip
print('File passed as an argument is not a markdown file (.md extension) or a zip file (.zip extension).')
except IndexError:
print('No argument passed to the script.')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment