Skip to content

Instantly share code, notes, and snippets.

@ehudbaumatz
Last active July 19, 2016 08:17
Show Gist options
  • Save ehudbaumatz/d1abae23afb56f40c3c001ab43c365ef to your computer and use it in GitHub Desktop.
Save ehudbaumatz/d1abae23afb56f40c3c001ab43c365ef to your computer and use it in GitHub Desktop.
from readability import Document
import os
import sys
import io
for fn in os.listdir(sys.argv[1]):
try:
with io.open(os.path.join(sys.argv[1], fn), encoding='utf-8') as f:
text = f.read()
if 'batch_fails' not in fn :
doc = Document(text)
readable_article = doc.summary()
readable_title = doc.short_title()
text = readable_title + '\n' + readable_article
with io.open(os.path.join(sys.argv[2], fn), 'w', encoding='utf-8') as writer:
writer.write(text)
except:
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment