Skip to content

Instantly share code, notes, and snippets.

@czpython
Created April 28, 2013 07:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save czpython/5476222 to your computer and use it in GitHub Desktop.
Save czpython/5476222 to your computer and use it in GitHub Desktop.
Parses the interview files provided by http://usesthis.com
# -*- coding: utf-8 -*-
from re import match, findall, DOTALL
from os import listdir
from os.path import dirname, abspath
from unipath import Path
import yaml
import markdown
INTERVIEW_FILES_DIRECTORY = Path(abspath(__file__)).ancestor(1).child('usesthis').child('_posts')
INTERVIEW_FILE_EXTENSION = ".interview"
UNWANTED_KEYS = ['slug', 'summary', 'layout', 'credits']
HEADER_REGEX = "(?P<yml>^---.*)\n---"
WORDS_REGEX = '\[([^\[\(\)]+)\]\[([a-z0-9\.\-]+)?\]'
def main():
filenames = get_files()
md = markdown.Markdown()
interviews = []
for filename in filenames:
with open(INTERVIEW_FILES_DIRECTORY.child(filename), 'r') as md_file:
md_string = md_file.read()
yml_header = match(HEADER_REGEX, md_string, DOTALL).groupdict()['yml']
interview = yaml.load(yml_header)
# Remove unwanted keys
for key in UNWANTED_KEYS:
if key in interview:
del interview[key]
existing_words = [words[1] for words in findall(WORDS_REGEX, md_string) if words[1]]
interview['words'] = existing_words
interviews.append(interview)
return interviews
def get_files():
names = [filename
for filename in listdir(INTERVIEW_FILES_DIRECTORY) if filename.endswith(INTERVIEW_FILE_EXTENSION)
]
return names
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment