Skip to content

Instantly share code, notes, and snippets.

@nathanlesage
Created February 22, 2020 08:10
Show Gist options
  • Star 11 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save nathanlesage/7fa183a6b333ca6e4056e812c975acf0 to your computer and use it in GitHub Desktop.
Save nathanlesage/7fa183a6b333ca6e4056e812c975acf0 to your computer and use it in GitHub Desktop.
Python converter Excerpts -> Zettels
# Convert excerpts to Zettels
# ATTENTION! WHILE THIS SCRIPT SADLY DOES NOT KILL FASCISTS,
# IT MAY CERTAINLY KILL YOUR NOTES IF HANDLED WRONGLY! MAKE
# SURE TO ONLY TEST ON A BACKUP COPY! NEVER ON PRODUCTION.
# I DID NOT CHECK THE SCRIPT AGAIN BEFORE UPLOADING!
# IN CASE OF AN EMERGENCY, I HAVE NO IDEA WHAT TO DO!
import os
import sys
import re
if len(sys.argv) < 2:
print("No path given! Please enter a valid path to split files from!")
exit()
input_dir = sys.argv[1] # First argv is command name, second is the dir
output_dir = './out'
# Statistics and global variables
files_created = 0
files_processed = 0
file_id = 20171123000000
warnings = [] # Holds all warnings
errors = [] # Holds all errors
if len(sys.argv) > 2:
print("Output dir given! Using " + sys.argv[2])
output_dir = sys.argv[2]
# Make sure the output directory exists
if not os.path.exists(output_dir):
print("Output directory does not exist! Creating ...")
os.makedirs(output_dir)
print("Searching directory " + input_dir)
# First we have to parse all tags (tags.txt simply contains tags one wants to find within the files, separated by newlines)
f = open(os.path.join(os.getcwd(), 'tags.txt'), 'r', encoding="utf-8", errors="surrogateescape")
tags = f.read().splitlines()
f.close()
# Some post-processing: Remove tags < 2 characters and make unique
tags = [i for i in tags if len(i) > 1]
tags = list(set(tags))
print("There are " + str(len(tags)) + " tags to choose from!")
def splitFile(infile, outdir, author):
# Declare global ID variable and files_created var to be able to modify them
global file_id
global files_created
file = ""
# Read the file
if os.path.exists(infile):
with open(infile, 'r', encoding='utf-8', errors="surrogateescape") as f:
file = f.read()
f.close()
else:
errors.append("File " + infile + " does not exist!")
return
# f = open(infile, 'r', encoding='utf-8', errors="surrogateescape")
# file = f.read()
# f.close() # Close directly
# Create variable to hold all tags that are inside a given part-file
file_tags = []
indexfile = '§§ ' + os.path.basename(infile) # The index file
# All files that should be added to the index file
files_for_index = []
# Is a correct literature/references thingy in the file? If so, overwrite
# the filename-based author var
m = re.findall(r"#{1,6} (?:Literature?|References?)\n{1,3}\* (.+)", file)
if m:
author = m[0] # First match contains the author
else:
warnings.append("No Literature found in file " + os.path.basename(infile))
# Now split by lines
file = file.splitlines()
# Now for each file, find heading four and join to a new file
content = ''
title = ''
for line in file:
if (line.find('####') == 0):
# First write a file with current title + content
if len(title) > 0 and len(content) > 0:
# Make the tags unique by piping it through a set
file_tags = list(set(file_tags))
# Create a backlink to the index file, and also an ID for this file
file_id += 1
content = 'Back to index: [[' + indexfile[:-3] + ']]\n\n' + str(file_id) + '\n\n' + content
# Append the tags and the author information
content = content + '\n\n#' + ' #'.join(file_tags)
content = content + '\n\n## Literature\n\n* ' + author
# Remove > 2 newlines and strip to maximum 2 nl
content = re.sub(r'\n{3,}', '\n\n', content)
# Don't worry: The long join makes the title filename-safe
filename = "".join([c for c in title if c.isalpha() or c.isdigit() or c==' ' or c=='-' or c=='_']).rstrip()
try:
with open(os.path.join(outdir, filename + '.md'), "w", encoding="utf-8") as f:
f.write(content)
f.close()
files_created += 1
files_for_index.append((filename, file_id))
except:
errors.append("Could not create file " + filename + "!")
# f = open(os.path.join(outdir, filename + '.md'), "w", encoding="utf-8")
# f.write(content)
# f.close()
# files_created += 1 # Count statistics
# Finally add the file to the index list
# files_for_index.append((filename, file_id))
# Heading as filename + title
title = line[5:]
content = '# ' + title + '\n\n'
# Reset the file tags
file_tags = []
continue # Don't re-add the respective line again
# Append each line afterwards
content = content + line + '\n'
# Find tags within this part of the document
line_lower = line.lower()
for tag in tags:
if line_lower.find(tag) > -1:
file_tags.append(tag)
# After the file has been split, generate the index file, but only if any
# files have been generated
if len(files_for_index) > 0:
index_contents = '# ' + indexfile[:-3] # Exclude the .md in the title
index_contents += '\n\n'
for name, id in files_for_index:
index_contents += '* [[' + str(id) + ']] ' + name + '\n'
index_contents += '\n\n## Literature\n\n* ' + author
f = open(os.path.join(outdir, indexfile), 'w', encoding="utf-8")
f.write(index_contents)
f.close()
files_created += 1
for dirname, dirnames, filenames in os.walk(input_dir):
# First remove dot-directories
for subdirname in dirnames:
if subdirname.find('.') == 0:
dirnames.remove(subdirname)
# Now deal with all files
for filename in filenames:
files_processed += 1
print("\rProcessed " + str(files_processed) + " files!", end="")
author = os.path.basename(dirname)
if author == 'Single Page Excerpts':
# For all my single page excerpts use the filename instead:
author = filename[:-3]
splitFile(os.path.join(dirname, filename), output_dir, author)
print("")
print("In total, " + str(files_created) + " files were created.")
if len(warnings) > 0:
print("There were " + str(len(warnings)) + " warnings:")
for warn in warnings:
print(warn)
else:
print("No warnings encountered.")
if len(errors) > 0:
print("There were " + str(len(errors)) + " errors:")
for err in errors:
print(err)
else:
print("No errors encountered.")
print("Done!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment