igrigorik/parse_delicious_xml.py

## parse_delicious_xml.py
#!/usr/bin/env python
# encoding: utf-8
"""
parse_delicious_xml.py

First download your bookmarks and restrict to specific tags if desired:

$ curl --user 'username':'password' -o delicious.xml 'https://api.del.icio.us/v1/posts/all'

Extract and count most common domains:

$ python parse_delicious_xml.py | sort | uniq -c | sort -nr > ranked_delicious_domains.txt

Created by Peter Skomoroch on 2010-08-15.
Copyright (c) 2010 Data Wrangling LLC. All rights reserved.
"""

import sys
import os
from BeautifulSoup import BeautifulStoneSoup
xml = open('delicious.xml', 'rU').read()
soup = BeautifulStoneSoup(xml)

posts = soup.findAll('post')

for i, post in enumerate(posts):
  print post['href'].split('/')[2]
	#!/usr/bin/env python
	# encoding: utf-8
	"""
	parse_delicious_xml.py

	First download your bookmarks and restrict to specific tags if desired:

	$ curl --user 'username':'password' -o delicious.xml 'https://api.del.icio.us/v1/posts/all'

	Extract and count most common domains:

	$ python parse_delicious_xml.py \| sort \| uniq -c \| sort -nr > ranked_delicious_domains.txt

	Created by Peter Skomoroch on 2010-08-15.
	Copyright (c) 2010 Data Wrangling LLC. All rights reserved.
	"""

	import sys
	import os
	from BeautifulSoup import BeautifulStoneSoup
	xml = open('delicious.xml', 'rU').read()
	soup = BeautifulStoneSoup(xml)

	posts = soup.findAll('post')

	for i, post in enumerate(posts):
	print post['href'].split('/')[2]