looselycoupled/xbus-502-rss-example.py

## xbus-502-rss-example.py
#!/usr/bin/env python
"""
Model answer for RSS exercise

NOTE: In order to use this program, be sure to create a sub-directory
called 'articles'

"""
##########################################################################
## Imports
##########################################################################

import os
import re

import requests
import feedparser


##########################################################################
## Module Variables/Constants
##########################################################################

RSS_URL = 'http://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml'


##########################################################################
## Functions
##########################################################################

def slugify(value):
    """
    Converts to ASCII. Converts spaces to hyphens. Removes characters that
    aren't alphanumerics, underscores, or hyphens. Converts to lowercase.
    Also strips leading and trailing whitespace.

    Note: This is not production code
    """
    value = value.encode('ascii', 'ignore').decode('ascii')
    value = re.sub('[^\w\s-]', '', value).strip().lower()
    return re.sub('[-\s]+', '-', value)


def save_article(title, content):
    """
    Save HTML content using a slugged version of the title as the basis for
    the filename
    """
    filename = '%s.html' % slugify(title)
    path = os.path.join(os.getcwd(), 'articles', filename)

    f = open(path, 'wb')
    f.write(content.encode('utf-8'))
    f.close()


def main():
    """
    Main execution
    """
    # grab RSS data and parse it
    feed = feedparser.parse(RSS_URL)

    # loop through each article/RSS item
    for entry in feed.entries:

        # for clarity, assign the entry ID (which is the article url)
        # into a new variable
        url = entry['id']

        # fetch article using url
        r = requests.get(url)

        # save to disk or print an error message
        if r.ok:
            save_article(entry['title'], r.text)

        else:
            print('Could not retrieve file: {}'.format(url))


##########################################################################
## Execution
##########################################################################

if __name__ == '__main__':
    main()
	#!/usr/bin/env python
	"""
	Model answer for RSS exercise

	NOTE: In order to use this program, be sure to create a sub-directory
	called 'articles'

	"""
	##########################################################################
	## Imports
	##########################################################################

	import os
	import re

	import requests
	import feedparser


	##########################################################################
	## Module Variables/Constants
	##########################################################################

	RSS_URL = 'http://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml'


	##########################################################################
	## Functions
	##########################################################################

	def slugify(value):
	"""
	Converts to ASCII. Converts spaces to hyphens. Removes characters that
	aren't alphanumerics, underscores, or hyphens. Converts to lowercase.
	Also strips leading and trailing whitespace.

	Note: This is not production code
	"""
	value = value.encode('ascii', 'ignore').decode('ascii')
	value = re.sub('[^\w\s-]', '', value).strip().lower()
	return re.sub('[-\s]+', '-', value)


	def save_article(title, content):
	"""
	Save HTML content using a slugged version of the title as the basis for
	the filename
	"""
	filename = '%s.html' % slugify(title)
	path = os.path.join(os.getcwd(), 'articles', filename)

	f = open(path, 'wb')
	f.write(content.encode('utf-8'))
	f.close()


	def main():
	"""
	Main execution
	"""
	# grab RSS data and parse it
	feed = feedparser.parse(RSS_URL)

	# loop through each article/RSS item
	for entry in feed.entries:

	# for clarity, assign the entry ID (which is the article url)
	# into a new variable
	url = entry['id']

	# fetch article using url
	r = requests.get(url)

	# save to disk or print an error message
	if r.ok:
	save_article(entry['title'], r.text)

	else:
	print('Could not retrieve file: {}'.format(url))


	##########################################################################
	## Execution
	##########################################################################

	if __name__ == '__main__':
	main()