Moving-Electrons/bear_to_pelican.py

## bear_to_pelican.py
import sys
import re
import os
import zipfile
import shutil

# Constant Definition
# -------------------

# Path to be prepended to image links in markdown file, like so: ![](IMAGE_LINK<image_filename>).
IMAGE_LNK = '/images/'

# Pelican Content Folders:
POSTS_FOLDER = 'YOUR PELICAN POST FOLDER PATH'
IMAGES_FOLDER = 'YOUR PELICAN IMAGES FOLDER PATH'
FILES_FOLDER = 'YOUR PELICAN FILES FOLDER PATH'
# -------------------


MD_EXT_LIST = ['.md', '.markdown']
IMG_EXT_LIST = ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']


def image_path(matchedObject):
    '''
    Adds the folder where Pelican outputs all blog images to the beginning of the in-line
    referenced links. re.sub calls the function passing the result of the regex as argument.

    :param matchedObject: String with the MD image code.
    :return: Folder to be inserted on each MD image call.
    '''

    if matchedObject.group(3):
        # If <Description> is present, adds it inside the MD image reference like so:
        # ![](path/image.jpg "Description")
        imageReference = '!['+matchedObject.group(3)+']('+IMAGE_LNK+matchedObject.group(2)+' "'+matchedObject.group(3)+'")'
    else:
        imageReference = matchedObject.group(1) + IMAGE_LNK + matchedObject.group(2) + ')'

    return imageReference

def process_markdown(linesList, mdFilePath):
    '''
    Updates markdown file by including needed HTML, updating image references and putting the
    resulting markdown file in the right folder for Pelican to process it.

    :param linesList: List of string lines with \n at the end. Usually the result of readlines().
    :param MdFilePath: Full path of markdown file including extension.
    :return: Nothing.
    '''

    mdFileName = os.path.basename(mdFilePath)

    with open(os.path.join(POSTS_FOLDER, mdFileName), 'w') as newFile:

        for line in linesList:

            # Extracting filename from slug:
            slugFound = re.match(r'^Slug\:.(.*)', line)
            if slugFound != None:
            	fileName = re.sub('-', ' ', slugFound.group(1)) + '.md'

            # In each of the following lines, **if** there is a match, the section of _line_ that
            # matches the regex is substituted by the result of each of the called functions
            # (e.g. quarks_rating, before_after, etc).
            # If there is no match, that line doesn't change.

            # The following regex works for images referenced in any of the following ways:
            # ![](folder/n-folder/image.ext),
            # ![](image.ext)
            # ![](image.ext "image description")
            # ![](/folder/image.ext)<image description>
            line = re.sub(r'(^\!\[.*\]\()(?:[^\/]*\/){0,}([^<\)]*)\)(?:<(.*)>){0,1}', image_path, line)

            newFile.write(line)

    try:
        os.rename(os.path.join(POSTS_FOLDER, mdFileName), os.path.join(POSTS_FOLDER, fileName))

    except UnboundLocalError:
        print('Error: "Slug" not detected in Markdown file. File could not be renamed.')

    return


def main():
    '''
    Takes document files exported from Bear, process them and copies them to the
    appropiate Pelican content folder.

    Argument can be either a single MD file or a ZIP file with MD files and
    image files.
    '''

    try:
        mainFilePath = sys.argv[1]
        mainFileExt = os.path.splitext(mainFilePath)[1]

        if mainFileExt in MD_EXT_LIST:

            print('Markdown passed as argument. Processing...')
            # Processing MD File passed as argument.
            # ---
            with open(mainFilePath, 'r') as mdFile:

                # Reads lines from MD file and skips first line which
                # has the Bear Document title:
                textFileList = mdFile.readlines()[1:]
                process_markdown(textFileList, mainFilePath)


        elif mainFileExt == '.zip':

            print('Zip file passed as argument. Processing...')
            with zipfile.ZipFile(mainFilePath, 'r') as myZip:

                for zipItemPath in myZip.namelist():

                    # Skips aux files instroduced by Mac OS:
                    if '__MACOSX' not in zipItemPath:

                        fileExtension = os.path.splitext(zipItemPath)[1]

                        # Processing MD files inside the zip file:
                        # ---
                        if fileExtension in MD_EXT_LIST:

                            print('Processing markdown file...')
                            # Extracting MD file from zip drive (using io.TextIOWrapper was problematic):
                            with myZip.open(zipItemPath) as zpdMdFile, open(
                                    os.path.join(POSTS_FOLDER, os.path.basename(zipItemPath)), 'wb') as destMDFile:

                                shutil.copyfileobj(zpdMdFile, destMDFile)

                            # Opening and reading the unzipped MD file:
                            with open(os.path.join(POSTS_FOLDER, os.path.basename(zipItemPath)), 'r') as origMDFile:

                                # Reads lines from MD file and skips first line which
                                # has the Bear Document title:
                                textFileList = origMDFile.readlines()[1:]
                                process_markdown(textFileList, zipItemPath)

                        # Copying image files inside the zip file to the image content folder in Pelican:
                        # ---
                        elif fileExtension in IMG_EXT_LIST:

                            print('Moving image file...')
                            # Opening zipped item for reading and destination image file for writing:
                            with myZip.open(zipItemPath) as zpdImageFile, open(
                                    os.path.join(IMAGES_FOLDER, os.path.basename(zipItemPath)), 'wb') as destImageFile:

                                shutil.copyfileobj(zpdImageFile, destImageFile)

                        # For any other file type:
                        else:
                            # Todo > To be implemented: action to be taken if the argument when the zip file has
                            # file types other than MD and images (as of today, those are the only files Ulysses
                            # exports.
                            pass

        else:
            # Parameter is not .md or .zip
            print('File passed as an argument is not a markdown file (.md extension) or a zip file (.zip extension).')


    except IndexError:
        print('No argument passed to the script.')


if __name__ == '__main__':
    main()
	import sys
	import re
	import os
	import zipfile
	import shutil

	# Constant Definition
	# -------------------

	# Path to be prepended to image links in markdown file, like so: ![](IMAGE_LINK<image_filename>).
	IMAGE_LNK = '/images/'

	# Pelican Content Folders:
	POSTS_FOLDER = 'YOUR PELICAN POST FOLDER PATH'
	IMAGES_FOLDER = 'YOUR PELICAN IMAGES FOLDER PATH'
	FILES_FOLDER = 'YOUR PELICAN FILES FOLDER PATH'
	# -------------------


	MD_EXT_LIST = ['.md', '.markdown']
	IMG_EXT_LIST = ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']


	def image_path(matchedObject):
	'''
	Adds the folder where Pelican outputs all blog images to the beginning of the in-line
	referenced links. re.sub calls the function passing the result of the regex as argument.

	:param matchedObject: String with the MD image code.
	:return: Folder to be inserted on each MD image call.
	'''

	if matchedObject.group(3):
	# If <Description> is present, adds it inside the MD image reference like so:
	# ![](path/image.jpg "Description")
	imageReference = '!['+matchedObject.group(3)+']('+IMAGE_LNK+matchedObject.group(2)+' "'+matchedObject.group(3)+'")'
	else:
	imageReference = matchedObject.group(1) + IMAGE_LNK + matchedObject.group(2) + ')'

	return imageReference

	def process_markdown(linesList, mdFilePath):
	'''
	Updates markdown file by including needed HTML, updating image references and putting the
	resulting markdown file in the right folder for Pelican to process it.

	:param linesList: List of string lines with \n at the end. Usually the result of readlines().
	:param MdFilePath: Full path of markdown file including extension.
	:return: Nothing.
	'''

	mdFileName = os.path.basename(mdFilePath)

	with open(os.path.join(POSTS_FOLDER, mdFileName), 'w') as newFile:

	for line in linesList:

	# Extracting filename from slug:
	slugFound = re.match(r'^Slug\:.(.*)', line)
	if slugFound != None:
	fileName = re.sub('-', ' ', slugFound.group(1)) + '.md'

	# In each of the following lines, if there is a match, the section of _line_ that
	# matches the regex is substituted by the result of each of the called functions
	# (e.g. quarks_rating, before_after, etc).
	# If there is no match, that line doesn't change.

	# The following regex works for images referenced in any of the following ways:
	# ![](folder/n-folder/image.ext),
	# ![](image.ext)
	# ![](image.ext "image description")
	# ![](/folder/image.ext)<image description>
	line = re.sub(r'(^\!\[.\]\()(?:[^\/]\/){0,}([^<\)])\)(?:<(.)>){0,1}', image_path, line)

	newFile.write(line)

	try:
	os.rename(os.path.join(POSTS_FOLDER, mdFileName), os.path.join(POSTS_FOLDER, fileName))

	except UnboundLocalError:
	print('Error: "Slug" not detected in Markdown file. File could not be renamed.')

	return


	def main():
	'''
	Takes document files exported from Bear, process them and copies them to the
	appropiate Pelican content folder.

	Argument can be either a single MD file or a ZIP file with MD files and
	image files.
	'''

	try:
	mainFilePath = sys.argv[1]
	mainFileExt = os.path.splitext(mainFilePath)[1]

	if mainFileExt in MD_EXT_LIST:

	print('Markdown passed as argument. Processing...')
	# Processing MD File passed as argument.
	# ---
	with open(mainFilePath, 'r') as mdFile:

	# Reads lines from MD file and skips first line which
	# has the Bear Document title:
	textFileList = mdFile.readlines()[1:]
	process_markdown(textFileList, mainFilePath)


	elif mainFileExt == '.zip':

	print('Zip file passed as argument. Processing...')
	with zipfile.ZipFile(mainFilePath, 'r') as myZip:

	for zipItemPath in myZip.namelist():

	# Skips aux files instroduced by Mac OS:
	if '__MACOSX' not in zipItemPath:

	fileExtension = os.path.splitext(zipItemPath)[1]

	# Processing MD files inside the zip file:
	# ---
	if fileExtension in MD_EXT_LIST:

	print('Processing markdown file...')
	# Extracting MD file from zip drive (using io.TextIOWrapper was problematic):
	with myZip.open(zipItemPath) as zpdMdFile, open(
	os.path.join(POSTS_FOLDER, os.path.basename(zipItemPath)), 'wb') as destMDFile:

	shutil.copyfileobj(zpdMdFile, destMDFile)

	# Opening and reading the unzipped MD file:
	with open(os.path.join(POSTS_FOLDER, os.path.basename(zipItemPath)), 'r') as origMDFile:

	# Reads lines from MD file and skips first line which
	# has the Bear Document title:
	textFileList = origMDFile.readlines()[1:]
	process_markdown(textFileList, zipItemPath)

	# Copying image files inside the zip file to the image content folder in Pelican:
	# ---
	elif fileExtension in IMG_EXT_LIST:

	print('Moving image file...')
	# Opening zipped item for reading and destination image file for writing:
	with myZip.open(zipItemPath) as zpdImageFile, open(
	os.path.join(IMAGES_FOLDER, os.path.basename(zipItemPath)), 'wb') as destImageFile:

	shutil.copyfileobj(zpdImageFile, destImageFile)

	# For any other file type:
	else:
	# Todo > To be implemented: action to be taken if the argument when the zip file has
	# file types other than MD and images (as of today, those are the only files Ulysses
	# exports.
	pass

	else:
	# Parameter is not .md or .zip
	print('File passed as an argument is not a markdown file (.md extension) or a zip file (.zip extension).')


	except IndexError:
	print('No argument passed to the script.')


	if __name__ == '__main__':
	main()