Skip to content

Instantly share code, notes, and snippets.

@whosaysni
Created June 7, 2013 05:32
Show Gist options
  • Save whosaysni/5727233 to your computer and use it in GitHub Desktop.
Save whosaysni/5727233 to your computer and use it in GitHub Desktop.
Fetching issues feed from GoogleCode hosted project.
# coding: utf-8
import os
import sys
import urllib
import tarfile
import time
from xml.etree import ElementTree as ET
from StringIO import StringIO
PROJECT_NAME = 'sahanadocsjp'
def FileTarInfo(name, buf, mode='0755', mtime=None, type_=tarfile.REGTYPE):
"""Creates TarInfo for given name/buf.
"""
tar_info = tarfile.TarInfo(name)
tar_info.size = len(buf)
if mtime is None:
mtime = time.time()
tar_info.mtime = mtime
tar_info.mode = int(mode, 8)
tar_info.type = type_
tar_info.uid = os.getuid()
tar_info.gid = os.getgid()
return tar_info
def do_job(project_name, archive_filename=None, **kargs):
"""Creates issues archive for specified project_name.
"""
# preparation
base_url = (
'https://code.google.com/feeds/issues/p/%s/issues/' %(project_name))
issues_query_bits = 'full?max-results=200'
# url for issue feed
issues_url = base_url+issues_query_bits
issue_comments_query_bits = '%s/comments/full'
# url template for issue comments feed
issue_comments_url_t = base_url+issue_comments_query_bits
atom_namespace_prefix = '{http://www.w3.org/2005/Atom}'
atom_entry_tagname = atom_namespace_prefix+'entry'
# xpath for issue_id search
atom_id_xpath= '/'.join([atom_entry_tagname, atom_namespace_prefix+'id'])
# archive_filename
if bool(archive_filename)==False:
archive_filename = '%s_issues.tgz' %(project_name)
# load issue feed
sys.stderr.write('Loading issues: %s\n' %(issues_url))
issues_feed_buf = urllib.urlopen(issues_url).read()
archive_file = tarfile.open(archive_filename, 'w:gz')
issues_file = StringIO(issues_feed_buf)
issues_info = FileTarInfo('issues.xml', issues_feed_buf)
archive_file.addfile(issues_info, issues_file)
sys.stderr.write('Saved issues.xml\n')
# analyze issues tree to extract entry ids
issues_tree = ET.fromstring(issues_feed_buf)
entries = issues_tree.findall(atom_id_xpath)
sys.stderr.write('Found %d entries.\n' %(len(entries)))
for entry in entries:
issue_feed_path = entry.text
issue_id_bits = issue_feed_path[len(base_url+'full'):]
# load
comments_url = issue_comments_url_t %(issue_id_bits)
sys.stderr.write(
'Loading comments for issue %s (%s)...\n'
%(issue_id_bits, comments_url))
comments_feed_buf = urllib.urlopen(comments_url).read()
# write
comments_filename = 'issue_comments_%s.xml' %(issue_id_bits)
comments_file = StringIO(comments_feed_buf)
comments_info = FileTarInfo(comments_filename, comments_feed_buf)
archive_file.addfile(comments_info, comments_file)
sys.stderr.write('Saved %s\n' %(comments_filename))
archive_file.close()
if __name__=='__main__':
if sys.argv[1:2]:
do_job(*sys.argv[1:])
else:
sys.stderr.write('Usage: %s <project_name> [archive_filename]\n' %sys.argv[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment