-
-
Save nathanlesage/7fa183a6b333ca6e4056e812c975acf0 to your computer and use it in GitHub Desktop.
Python converter Excerpts -> Zettels
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Convert excerpts to Zettels | |
# ATTENTION! WHILE THIS SCRIPT SADLY DOES NOT KILL FASCISTS, | |
# IT MAY CERTAINLY KILL YOUR NOTES IF HANDLED WRONGLY! MAKE | |
# SURE TO ONLY TEST ON A BACKUP COPY! NEVER ON PRODUCTION. | |
# I DID NOT CHECK THE SCRIPT AGAIN BEFORE UPLOADING! | |
# IN CASE OF AN EMERGENCY, I HAVE NO IDEA WHAT TO DO! | |
import os | |
import sys | |
import re | |
if len(sys.argv) < 2: | |
print("No path given! Please enter a valid path to split files from!") | |
exit() | |
input_dir = sys.argv[1] # First argv is command name, second is the dir | |
output_dir = './out' | |
# Statistics and global variables | |
files_created = 0 | |
files_processed = 0 | |
file_id = 20171123000000 | |
warnings = [] # Holds all warnings | |
errors = [] # Holds all errors | |
if len(sys.argv) > 2: | |
print("Output dir given! Using " + sys.argv[2]) | |
output_dir = sys.argv[2] | |
# Make sure the output directory exists | |
if not os.path.exists(output_dir): | |
print("Output directory does not exist! Creating ...") | |
os.makedirs(output_dir) | |
print("Searching directory " + input_dir) | |
# First we have to parse all tags (tags.txt simply contains tags one wants to find within the files, separated by newlines) | |
f = open(os.path.join(os.getcwd(), 'tags.txt'), 'r', encoding="utf-8", errors="surrogateescape") | |
tags = f.read().splitlines() | |
f.close() | |
# Some post-processing: Remove tags < 2 characters and make unique | |
tags = [i for i in tags if len(i) > 1] | |
tags = list(set(tags)) | |
print("There are " + str(len(tags)) + " tags to choose from!") | |
def splitFile(infile, outdir, author): | |
# Declare global ID variable and files_created var to be able to modify them | |
global file_id | |
global files_created | |
file = "" | |
# Read the file | |
if os.path.exists(infile): | |
with open(infile, 'r', encoding='utf-8', errors="surrogateescape") as f: | |
file = f.read() | |
f.close() | |
else: | |
errors.append("File " + infile + " does not exist!") | |
return | |
# f = open(infile, 'r', encoding='utf-8', errors="surrogateescape") | |
# file = f.read() | |
# f.close() # Close directly | |
# Create variable to hold all tags that are inside a given part-file | |
file_tags = [] | |
indexfile = '§§ ' + os.path.basename(infile) # The index file | |
# All files that should be added to the index file | |
files_for_index = [] | |
# Is a correct literature/references thingy in the file? If so, overwrite | |
# the filename-based author var | |
m = re.findall(r"#{1,6} (?:Literature?|References?)\n{1,3}\* (.+)", file) | |
if m: | |
author = m[0] # First match contains the author | |
else: | |
warnings.append("No Literature found in file " + os.path.basename(infile)) | |
# Now split by lines | |
file = file.splitlines() | |
# Now for each file, find heading four and join to a new file | |
content = '' | |
title = '' | |
for line in file: | |
if (line.find('####') == 0): | |
# First write a file with current title + content | |
if len(title) > 0 and len(content) > 0: | |
# Make the tags unique by piping it through a set | |
file_tags = list(set(file_tags)) | |
# Create a backlink to the index file, and also an ID for this file | |
file_id += 1 | |
content = 'Back to index: [[' + indexfile[:-3] + ']]\n\n' + str(file_id) + '\n\n' + content | |
# Append the tags and the author information | |
content = content + '\n\n#' + ' #'.join(file_tags) | |
content = content + '\n\n## Literature\n\n* ' + author | |
# Remove > 2 newlines and strip to maximum 2 nl | |
content = re.sub(r'\n{3,}', '\n\n', content) | |
# Don't worry: The long join makes the title filename-safe | |
filename = "".join([c for c in title if c.isalpha() or c.isdigit() or c==' ' or c=='-' or c=='_']).rstrip() | |
try: | |
with open(os.path.join(outdir, filename + '.md'), "w", encoding="utf-8") as f: | |
f.write(content) | |
f.close() | |
files_created += 1 | |
files_for_index.append((filename, file_id)) | |
except: | |
errors.append("Could not create file " + filename + "!") | |
# f = open(os.path.join(outdir, filename + '.md'), "w", encoding="utf-8") | |
# f.write(content) | |
# f.close() | |
# files_created += 1 # Count statistics | |
# Finally add the file to the index list | |
# files_for_index.append((filename, file_id)) | |
# Heading as filename + title | |
title = line[5:] | |
content = '# ' + title + '\n\n' | |
# Reset the file tags | |
file_tags = [] | |
continue # Don't re-add the respective line again | |
# Append each line afterwards | |
content = content + line + '\n' | |
# Find tags within this part of the document | |
line_lower = line.lower() | |
for tag in tags: | |
if line_lower.find(tag) > -1: | |
file_tags.append(tag) | |
# After the file has been split, generate the index file, but only if any | |
# files have been generated | |
if len(files_for_index) > 0: | |
index_contents = '# ' + indexfile[:-3] # Exclude the .md in the title | |
index_contents += '\n\n' | |
for name, id in files_for_index: | |
index_contents += '* [[' + str(id) + ']] ' + name + '\n' | |
index_contents += '\n\n## Literature\n\n* ' + author | |
f = open(os.path.join(outdir, indexfile), 'w', encoding="utf-8") | |
f.write(index_contents) | |
f.close() | |
files_created += 1 | |
for dirname, dirnames, filenames in os.walk(input_dir): | |
# First remove dot-directories | |
for subdirname in dirnames: | |
if subdirname.find('.') == 0: | |
dirnames.remove(subdirname) | |
# Now deal with all files | |
for filename in filenames: | |
files_processed += 1 | |
print("\rProcessed " + str(files_processed) + " files!", end="") | |
author = os.path.basename(dirname) | |
if author == 'Single Page Excerpts': | |
# For all my single page excerpts use the filename instead: | |
author = filename[:-3] | |
splitFile(os.path.join(dirname, filename), output_dir, author) | |
print("") | |
print("In total, " + str(files_created) + " files were created.") | |
if len(warnings) > 0: | |
print("There were " + str(len(warnings)) + " warnings:") | |
for warn in warnings: | |
print(warn) | |
else: | |
print("No warnings encountered.") | |
if len(errors) > 0: | |
print("There were " + str(len(errors)) + " errors:") | |
for err in errors: | |
print(err) | |
else: | |
print("No errors encountered.") | |
print("Done!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment