Skip to content

Instantly share code, notes, and snippets.

@kmelve
Last active August 29, 2015 14:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kmelve/98189d154376ac966e05 to your computer and use it in GitHub Desktop.
Save kmelve/98189d154376ac966e05 to your computer and use it in GitHub Desktop.
atekst parser
# -*- coding: utf-8 -*-
import os
# make list with the files
def loop_files(directory):
files = []
for fn in os.listdir(directory): # loop through directory
if(fn.endswith('txt')): # only pick text files
files.append(fn) # append filenames to list
print 'Files: ',', '.join(files) # print files in terminal, just to make sure
return files
def line_parser(file):
with open(file, 'r') as f:
articles = ["first"] # we need a list to put stuff in
str_equalspattern = "="*78
a = 0 # I know this isn't functional programming, but hey, I'm new at this
for lines in f: # loop through the lines
if not lines.startswith(str_equalspattern): # do stuff with all the lines that doesn't contain the equal signs
articles[a] += lines.translate(None, '>\n') # put the articles into one item, remove > and linebreaks. articles[0] will be the index
else:
articles.append('') # append nothing (makes a new item)
a = a + 1 # make sure the next article is added to a new item
print 'You\'ve got',len(articles),'articles. Good for you!'
return articles
dir = '.' # choose source directory
for f in loop_files(dir):
line_parser(f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment