Skip to content

Instantly share code, notes, and snippets.

@gnufs
Created June 21, 2014 23:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gnufs/199fa9f1ae2df1c02a55 to your computer and use it in GitHub Desktop.
Save gnufs/199fa9f1ae2df1c02a55 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys, datetime
import requests
from StringIO import StringIO
from lxml.html import parse
import PyRSS2Gen
PAGE = 'http://www.dailysabah.com/columns/dogan-eskinat/archive'
# XPath queries
TITLE = '/html/body/div[1]/section/div/div[1]/div/ul[1]/li[%d]/a/span[1]/text()'
LINK = '/html/body/div[1]/section/div/div[1]/div/ul[1]/li[%d]/a/@href'
DATE = '/html/body/div[1]/section/div/div[1]/div/ul[1]/li[%d]/a/span[2]/text()'
DESCRIPTION = '/html/body/div[1]/section/div/div[1]/div/ul[1]/li[%d]/a/text()'
def get_page():
response = requests.get(PAGE)
return StringIO(response.content)
def explore(content):
root = parse(content).getroot()
articles = []
for index in range(1, 11):
articles.append({
'title' : root.xpath(TITLE % index)[0],
'link' : 'http://www.dailysabah.com/' + root.xpath(LINK % index)[0],
'date' : root.xpath(DATE % index)[0],
'description' : root.xpath(DESCRIPTION % index)[2],
})
print articles
return articles
def make_rss(articles):
rss_items = []
for article in articles:
rss_items.append(
PyRSS2Gen.RSSItem(
title = article['title'],
link = article['link'],
description = article['description'],
guid = PyRSS2Gen.Guid(article['link']),
pubDate = article['date']
)
)
rss = PyRSS2Gen.RSS2(
title = 'Dogan Eskinat',
link = PAGE,
description = 'Latest articles written by Dogan Eskinat on Daily Sabah',
lastBuildDate = datetime.datetime.now(),
items = rss_items
)
return rss.to_xml()
def main():
content = get_page()
articles = explore(content)
print make_rss(articles)
f1=open(str(sys.argv[1]), 'w+')
f1.write(make_rss(articles))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment