nathanlesage/conv.py Secret

## conv.py
# Convert excerpts to Zettels

# ATTENTION! WHILE THIS SCRIPT SADLY DOES NOT KILL FASCISTS,
# IT MAY CERTAINLY KILL YOUR NOTES IF HANDLED WRONGLY! MAKE
# SURE TO ONLY TEST ON A BACKUP COPY! NEVER ON PRODUCTION.
# I DID NOT CHECK THE SCRIPT AGAIN BEFORE UPLOADING!
# IN CASE OF AN EMERGENCY, I HAVE NO IDEA WHAT TO DO!

import os
import sys
import re

if len(sys.argv) < 2:
    print("No path given! Please enter a valid path to split files from!")
    exit()

input_dir = sys.argv[1] # First argv is command name, second is the dir
output_dir = './out'

# Statistics and global variables
files_created = 0
files_processed = 0
file_id = 20171123000000
warnings = [] # Holds all warnings
errors = [] # Holds all errors

if len(sys.argv) > 2:
    print("Output dir given! Using " + sys.argv[2])
    output_dir = sys.argv[2]

# Make sure the output directory exists
if not os.path.exists(output_dir):
    print("Output directory does not exist! Creating ...")
    os.makedirs(output_dir)

print("Searching directory " + input_dir)

# First we have to parse all tags (tags.txt simply contains tags one wants to find within the files, separated by newlines)
f = open(os.path.join(os.getcwd(), 'tags.txt'), 'r', encoding="utf-8", errors="surrogateescape")
tags = f.read().splitlines()
f.close()

# Some post-processing: Remove tags < 2 characters and make unique
tags = [i for i in tags if len(i) > 1]
tags = list(set(tags))

print("There are " + str(len(tags)) + " tags to choose from!")

def splitFile(infile, outdir, author):
    # Declare global ID variable and files_created var to be able to modify them
    global file_id
    global files_created
    file = ""

    # Read the file
    if os.path.exists(infile):
        with open(infile, 'r', encoding='utf-8', errors="surrogateescape") as f:
            file = f.read()
            f.close()
    else:
        errors.append("File " + infile + " does not exist!")
        return

    # f = open(infile, 'r', encoding='utf-8', errors="surrogateescape")
    # file = f.read()
    # f.close() # Close directly

    # Create variable to hold all tags that are inside a given part-file
    file_tags = []
    indexfile = '§§ ' + os.path.basename(infile) # The index file
    # All files that should be added to the index file
    files_for_index = []

    # Is a correct literature/references thingy in the file? If so, overwrite
    # the filename-based author var
    m = re.findall(r"#{1,6} (?:Literature?|References?)\n{1,3}\* (.+)", file)
    if m:
        author = m[0] # First match contains the author
    else:
        warnings.append("No Literature found in file " + os.path.basename(infile))

    # Now split by lines
    file = file.splitlines()

    # Now for each file, find heading four and join to a new file
    content = ''
    title = ''
    for line in file:
        if (line.find('####') == 0):
            # First write a file with current title + content
            if len(title) > 0 and len(content) > 0:
                # Make the tags unique by piping it through a set
                file_tags = list(set(file_tags))
                # Create a backlink to the index file, and also an ID for this file
                file_id += 1
                content = 'Back to index: [[' + indexfile[:-3] + ']]\n\n' + str(file_id) + '\n\n' + content
                # Append the tags and the author information
                content = content + '\n\n#' + ' #'.join(file_tags)
                content = content + '\n\n## Literature\n\n* ' + author
                # Remove > 2 newlines and strip to maximum 2 nl
                content = re.sub(r'\n{3,}', '\n\n', content)
                # Don't worry: The long join makes the title filename-safe
                filename = "".join([c for c in title if c.isalpha() or c.isdigit() or c==' ' or c=='-' or c=='_']).rstrip()
                try:
                    with open(os.path.join(outdir, filename + '.md'), "w", encoding="utf-8") as f:
                        f.write(content)
                        f.close()
                        files_created += 1
                        files_for_index.append((filename, file_id))
                except:
                    errors.append("Could not create file " + filename + "!")
                # f = open(os.path.join(outdir, filename + '.md'), "w", encoding="utf-8")
                # f.write(content)
                # f.close()
                # files_created += 1 # Count statistics
                # Finally add the file to the index list
                # files_for_index.append((filename, file_id))
            # Heading as filename + title
            title = line[5:]
            content = '# ' + title + '\n\n'
            # Reset the file tags
            file_tags = []
            continue # Don't re-add the respective line again
        # Append each line afterwards
        content = content + line + '\n'
        # Find tags within this part of the document
        line_lower = line.lower()
        for tag in tags:
            if line_lower.find(tag) > -1:
                file_tags.append(tag)
    # After the file has been split, generate the index file, but only if any
    # files have been generated
    if len(files_for_index) > 0:
        index_contents = '# ' + indexfile[:-3] # Exclude the .md in the title
        index_contents += '\n\n'
        for name, id in files_for_index:
            index_contents += '* [[' + str(id) + ']] ' + name + '\n'
        index_contents += '\n\n## Literature\n\n* ' + author
        f = open(os.path.join(outdir, indexfile), 'w', encoding="utf-8")
        f.write(index_contents)
        f.close()
        files_created += 1


for dirname, dirnames, filenames in os.walk(input_dir):
    # First remove dot-directories
    for subdirname in dirnames:
        if subdirname.find('.') == 0:
            dirnames.remove(subdirname)

    # Now deal with all files
    for filename in filenames:
        files_processed += 1
        print("\rProcessed " + str(files_processed) + " files!", end="")
        author = os.path.basename(dirname)
        if author == 'Single Page Excerpts':
            # For all my single page excerpts use the filename instead:
            author = filename[:-3]
        splitFile(os.path.join(dirname, filename), output_dir, author)

print("")
print("In total, " + str(files_created) + " files were created.")

if len(warnings) > 0:
    print("There were " + str(len(warnings)) + " warnings:")
    for warn in warnings:
        print(warn)
else:
    print("No warnings encountered.")

if len(errors) > 0:
    print("There were " + str(len(errors)) + " errors:")
    for err in errors:
        print(err)
else:
    print("No errors encountered.")

print("Done!")
	# Convert excerpts to Zettels

	# ATTENTION! WHILE THIS SCRIPT SADLY DOES NOT KILL FASCISTS,
	# IT MAY CERTAINLY KILL YOUR NOTES IF HANDLED WRONGLY! MAKE
	# SURE TO ONLY TEST ON A BACKUP COPY! NEVER ON PRODUCTION.
	# I DID NOT CHECK THE SCRIPT AGAIN BEFORE UPLOADING!
	# IN CASE OF AN EMERGENCY, I HAVE NO IDEA WHAT TO DO!

	import os
	import sys
	import re

	if len(sys.argv) < 2:
	print("No path given! Please enter a valid path to split files from!")
	exit()

	input_dir = sys.argv[1] # First argv is command name, second is the dir
	output_dir = './out'

	# Statistics and global variables
	files_created = 0
	files_processed = 0
	file_id = 20171123000000
	warnings = [] # Holds all warnings
	errors = [] # Holds all errors

	if len(sys.argv) > 2:
	print("Output dir given! Using " + sys.argv[2])
	output_dir = sys.argv[2]

	# Make sure the output directory exists
	if not os.path.exists(output_dir):
	print("Output directory does not exist! Creating ...")
	os.makedirs(output_dir)

	print("Searching directory " + input_dir)

	# First we have to parse all tags (tags.txt simply contains tags one wants to find within the files, separated by newlines)
	f = open(os.path.join(os.getcwd(), 'tags.txt'), 'r', encoding="utf-8", errors="surrogateescape")
	tags = f.read().splitlines()
	f.close()

	# Some post-processing: Remove tags < 2 characters and make unique
	tags = [i for i in tags if len(i) > 1]
	tags = list(set(tags))

	print("There are " + str(len(tags)) + " tags to choose from!")

	def splitFile(infile, outdir, author):
	# Declare global ID variable and files_created var to be able to modify them
	global file_id
	global files_created
	file = ""

	# Read the file
	if os.path.exists(infile):
	with open(infile, 'r', encoding='utf-8', errors="surrogateescape") as f:
	file = f.read()
	f.close()
	else:
	errors.append("File " + infile + " does not exist!")
	return

	# f = open(infile, 'r', encoding='utf-8', errors="surrogateescape")
	# file = f.read()
	# f.close() # Close directly

	# Create variable to hold all tags that are inside a given part-file
	file_tags = []
	indexfile = '§§ ' + os.path.basename(infile) # The index file
	# All files that should be added to the index file
	files_for_index = []

	# Is a correct literature/references thingy in the file? If so, overwrite
	# the filename-based author var
	m = re.findall(r"#{1,6} (?:Literature?\|References?)\n{1,3}\* (.+)", file)
	if m:
	author = m[0] # First match contains the author
	else:
	warnings.append("No Literature found in file " + os.path.basename(infile))

	# Now split by lines
	file = file.splitlines()

	# Now for each file, find heading four and join to a new file
	content = ''
	title = ''
	for line in file:
	if (line.find('####') == 0):
	# First write a file with current title + content
	if len(title) > 0 and len(content) > 0:
	# Make the tags unique by piping it through a set
	file_tags = list(set(file_tags))
	# Create a backlink to the index file, and also an ID for this file
	file_id += 1
	content = 'Back to index: [[' + indexfile[:-3] + ']]\n\n' + str(file_id) + '\n\n' + content
	# Append the tags and the author information
	content = content + '\n\n#' + ' #'.join(file_tags)
	content = content + '\n\n## Literature\n\n* ' + author
	# Remove > 2 newlines and strip to maximum 2 nl
	content = re.sub(r'\n{3,}', '\n\n', content)
	# Don't worry: The long join makes the title filename-safe
	filename = "".join([c for c in title if c.isalpha() or c.isdigit() or c==' ' or c=='-' or c=='_']).rstrip()
	try:
	with open(os.path.join(outdir, filename + '.md'), "w", encoding="utf-8") as f:
	f.write(content)
	f.close()
	files_created += 1
	files_for_index.append((filename, file_id))
	except:
	errors.append("Could not create file " + filename + "!")
	# f = open(os.path.join(outdir, filename + '.md'), "w", encoding="utf-8")
	# f.write(content)
	# f.close()
	# files_created += 1 # Count statistics
	# Finally add the file to the index list
	# files_for_index.append((filename, file_id))
	# Heading as filename + title
	title = line[5:]
	content = '# ' + title + '\n\n'
	# Reset the file tags
	file_tags = []
	continue # Don't re-add the respective line again
	# Append each line afterwards
	content = content + line + '\n'
	# Find tags within this part of the document
	line_lower = line.lower()
	for tag in tags:
	if line_lower.find(tag) > -1:
	file_tags.append(tag)
	# After the file has been split, generate the index file, but only if any
	# files have been generated
	if len(files_for_index) > 0:
	index_contents = '# ' + indexfile[:-3] # Exclude the .md in the title
	index_contents += '\n\n'
	for name, id in files_for_index:
	index_contents += '* [[' + str(id) + ']] ' + name + '\n'
	index_contents += '\n\n## Literature\n\n* ' + author
	f = open(os.path.join(outdir, indexfile), 'w', encoding="utf-8")
	f.write(index_contents)
	f.close()
	files_created += 1


	for dirname, dirnames, filenames in os.walk(input_dir):
	# First remove dot-directories
	for subdirname in dirnames:
	if subdirname.find('.') == 0:
	dirnames.remove(subdirname)

	# Now deal with all files
	for filename in filenames:
	files_processed += 1
	print("\rProcessed " + str(files_processed) + " files!", end="")
	author = os.path.basename(dirname)
	if author == 'Single Page Excerpts':
	# For all my single page excerpts use the filename instead:
	author = filename[:-3]
	splitFile(os.path.join(dirname, filename), output_dir, author)

	print("")
	print("In total, " + str(files_created) + " files were created.")

	if len(warnings) > 0:
	print("There were " + str(len(warnings)) + " warnings:")
	for warn in warnings:
	print(warn)
	else:
	print("No warnings encountered.")

	if len(errors) > 0:
	print("There were " + str(len(errors)) + " errors:")
	for err in errors:
	print(err)
	else:
	print("No errors encountered.")

	print("Done!")