deeplook/list_repo_issues.py

## list_repo_issues.py
#!/usr/bin/env python

"""List number of issues for project repositories on BitBucket or GitHub.

This will list the number of issues for public repositories of a given
member with a URL like https://github.com/okfn or https://github.com/okfn.
It was written with the idea of getting insight quickly into the "issue
activity" of all projects of a repository owner.

This script uses a really simple HTML scraping approach. For anything
better capable of accessing and displaying more complex repository data
(like issue titles or anything else) you might consider writing a similar
tool using existing (or emerging) APIs listed here:

http://confluence.atlassian.com/display/BITBUCKET/Using+the+Bitbucket+REST+APIs
http://develop.github.com/p/libraries.html
"""

import re
import os
import sys
import urllib


__version__ = "0.2.2"
__license__ = "GNU General Public Licence v3 (GPLv3)"
__author__ = "Dinu Gherman"
__date__ = "2011-09-01"


def list_issues_github(url):
    "List issues for a project on GitHub.com."

    print "Number of open/closed public issues for the GitHub repository"
    print url
    print

    html = urllib.urlopen(url).read()
    repo_names = re.findall("<h3>.*?/okfn/([\w+\-]+).*?</h3>", html, re.S)
    repo_names = sorted(list(set(repo_names)))
    ml_names = max(map(len, repo_names) or [0])
    ml_count = len(str(len(repo_names)))
    for i, name in enumerate(repo_names):
        u = "%s/%s/issues" % (url, name)
        html = urllib.urlopen(u).read()
        issues = re.findall("\d+\s+[\w+\-]+\s+issues", html, re.S)
        issues = [re.sub("\s+", " ", iss) for iss in issues]
        issues = list(set(issues))
        issues.sort(cmp=lambda self, other: [-1, 1][int("c" in self)])
        fmt = "%%%ds %%-%ds" % (ml_count, ml_names)
        print fmt % (i + 1, name), ", ".join(issues) or ""


def list_issues_bitbucket(url):
    "List issues for a project on BitBucket.org."

    print "Number of all/open public issues for the BitBucket repository"
    print url
    print "(The bigger issue number is for all, the smaller for open issues.)"
    print

    html = urllib.urlopen(url).read()
    repo_names = re.findall("<h3>\s*<a.*?>([\w+\-]+)</a>\s*</h3>", html, re.S)
    repo_names = sorted(list(set(repo_names)))
    ml_names = max(map(len, repo_names) or [0])
    ml_count = len(str(len(repo_names)))
    for i, name in enumerate(repo_names):
        u = "%s/%s/issues" % (url, name)
        html = urllib.urlopen(u).read()
        issues = re.findall("(Issues\s+\(\d+\))", html, re.S)
        fmt = "%%%ds %%-%ds" % (ml_count, ml_names)
        print fmt % (i + 1, name), ", ".join(issues) or ""


if __name__ == "__main__":
    for url in sys.argv[1:]:
        if url.find("github.com") >= 0:
            list_issues_github(url)
        elif url.find("bitbucket.org") >= 0:
            list_issues_bitbucket(url)

    if not sys.argv[1:]:
        prog = os.path.basename(sys.argv[0])
        print "%s - list # of issues for projects on BitBucket/GitHub" % prog
        print "Usage: %s REPOSITORY_URL [, REPOSITORY_URL1 ...]" % prog
        print "Examples:"
        print "  %s https://github.com/okfn" % prog
        print "  %s https://bitbucket.org/okfn" % prog
	#!/usr/bin/env python

	"""List number of issues for project repositories on BitBucket or GitHub.

	This will list the number of issues for public repositories of a given
	member with a URL like https://github.com/okfn or https://github.com/okfn.
	It was written with the idea of getting insight quickly into the "issue
	activity" of all projects of a repository owner.

	This script uses a really simple HTML scraping approach. For anything
	better capable of accessing and displaying more complex repository data
	(like issue titles or anything else) you might consider writing a similar
	tool using existing (or emerging) APIs listed here:

	http://confluence.atlassian.com/display/BITBUCKET/Using+the+Bitbucket+REST+APIs
	http://develop.github.com/p/libraries.html
	"""

	import re
	import os
	import sys
	import urllib


	__version__ = "0.2.2"
	__license__ = "GNU General Public Licence v3 (GPLv3)"
	__author__ = "Dinu Gherman"
	__date__ = "2011-09-01"


	def list_issues_github(url):
	"List issues for a project on GitHub.com."

	print "Number of open/closed public issues for the GitHub repository"
	print url
	print

	html = urllib.urlopen(url).read()
	repo_names = re.findall("<h3>.?/okfn/([\w+\-]+).?</h3>", html, re.S)
	repo_names = sorted(list(set(repo_names)))
	ml_names = max(map(len, repo_names) or [0])
	ml_count = len(str(len(repo_names)))
	for i, name in enumerate(repo_names):
	u = "%s/%s/issues" % (url, name)
	html = urllib.urlopen(u).read()
	issues = re.findall("\d+\s+[\w+\-]+\s+issues", html, re.S)
	issues = [re.sub("\s+", " ", iss) for iss in issues]
	issues = list(set(issues))
	issues.sort(cmp=lambda self, other: [-1, 1][int("c" in self)])
	fmt = "%%%ds %%-%ds" % (ml_count, ml_names)
	print fmt % (i + 1, name), ", ".join(issues) or ""


	def list_issues_bitbucket(url):
	"List issues for a project on BitBucket.org."

	print "Number of all/open public issues for the BitBucket repository"
	print url
	print "(The bigger issue number is for all, the smaller for open issues.)"
	print

	html = urllib.urlopen(url).read()
	repo_names = re.findall("<h3>\s<a.?>([\w+\-]+)</a>\s*</h3>", html, re.S)
	repo_names = sorted(list(set(repo_names)))
	ml_names = max(map(len, repo_names) or [0])
	ml_count = len(str(len(repo_names)))
	for i, name in enumerate(repo_names):
	u = "%s/%s/issues" % (url, name)
	html = urllib.urlopen(u).read()
	issues = re.findall("(Issues\s+\(\d+\))", html, re.S)
	fmt = "%%%ds %%-%ds" % (ml_count, ml_names)
	print fmt % (i + 1, name), ", ".join(issues) or ""


	if __name__ == "__main__":
	for url in sys.argv[1:]:
	if url.find("github.com") >= 0:
	list_issues_github(url)
	elif url.find("bitbucket.org") >= 0:
	list_issues_bitbucket(url)

	if not sys.argv[1:]:
	prog = os.path.basename(sys.argv[0])
	print "%s - list # of issues for projects on BitBucket/GitHub" % prog
	print "Usage: %s REPOSITORY_URL [, REPOSITORY_URL1 ...]" % prog
	print "Examples:"
	print " %s https://github.com/okfn" % prog
	print " %s https://bitbucket.org/okfn" % prog