Skip to content

Instantly share code, notes, and snippets.

@aoloe
Created October 23, 2014 08:07
Show Gist options
  • Save aoloe/bba670bfc947de55046f to your computer and use it in GitHub Desktop.
Save aoloe/bba670bfc947de55046f to your computer and use it in GitHub Desktop.
import glob
import re
import csv
toc = []
p_id = re.compile('index.php\?id=(\d+)')
p_title = re.compile('<title>(.*) \| (.*)</title>')
for filename in glob.glob('index.php?id*'):
print(filename)
m = p_id.match(filename)
print(m.group(1))
id = m.group(1)
with open (filename, "r") as file:
content = file.read()
s = p_title.search(content)
print(s.group(2))
toc.append([id, s.group(2)])
with open('toc.csv', 'wb') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in toc:
spamwriter.writerow(row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment