Last active
August 29, 2015 14:13
-
-
Save kmelve/98189d154376ac966e05 to your computer and use it in GitHub Desktop.
atekst parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import os | |
# make list with the files | |
def loop_files(directory): | |
files = [] | |
for fn in os.listdir(directory): # loop through directory | |
if(fn.endswith('txt')): # only pick text files | |
files.append(fn) # append filenames to list | |
print 'Files: ',', '.join(files) # print files in terminal, just to make sure | |
return files | |
def line_parser(file): | |
with open(file, 'r') as f: | |
articles = ["first"] # we need a list to put stuff in | |
str_equalspattern = "="*78 | |
a = 0 # I know this isn't functional programming, but hey, I'm new at this | |
for lines in f: # loop through the lines | |
if not lines.startswith(str_equalspattern): # do stuff with all the lines that doesn't contain the equal signs | |
articles[a] += lines.translate(None, '>\n') # put the articles into one item, remove > and linebreaks. articles[0] will be the index | |
else: | |
articles.append('') # append nothing (makes a new item) | |
a = a + 1 # make sure the next article is added to a new item | |
print 'You\'ve got',len(articles),'articles. Good for you!' | |
return articles | |
dir = '.' # choose source directory | |
for f in loop_files(dir): | |
line_parser(f) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment