Created
January 11, 2012 22:31
-
-
Save turbolent/1597185 to your computer and use it in GitHub Desktop.
neko.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from urllib import urlopen, unquote | |
from urlparse import urljoin | |
from collections import defaultdict | |
from os.path import expanduser, basename, splitext | |
from optparse import OptionParser | |
from xml.etree.ElementTree import XMLParser | |
import re | |
from pickle import dump, load | |
packages = {} | |
class Package: | |
def dependencies(self, deps=set()): | |
global packages | |
result = set() | |
if self not in deps: | |
for dep in self.deps: | |
pkg = packages[dep] | |
if not pkg in deps: | |
result.add(pkg) | |
subdeps = pkg.dependencies(deps | Set([self])) | |
result.update(subdeps) | |
return result | |
def build(url): | |
global packages | |
HTML = "http://www.w3.org/1999/xhtml" | |
content = urlopen(url).read() | |
pattern = re.compile(r'([^-]+)-((cvs|svn)?-?r?\d.*)', re.I) | |
pattern2 = re.compile(r'([^\d]+)(\d.*)') | |
parser = XMLParser() | |
parser.entity["nbsp"] = unichr(160) | |
parser.entity["copy"] = unichr(0x169) | |
parser.feed(content) | |
root = parser.close() | |
for row in root.findall('.//{%s}tr' % HTML): | |
children = row.getchildren() | |
if len(children) > 1: | |
if children[3].text == 'tardist': | |
url = children[2].getchildren()[0].attrib['href'] | |
filename = basename(unquote(url)) | |
(name, ext) = splitext(filename) | |
version = None | |
m = pattern.match(name) | |
m2 = pattern2.match(name) | |
if m: | |
name, version = m.groups()[:2] | |
elif name.rfind('-') >= 0: | |
s = name.rsplit('-') | |
name, version = '-'.join(s[:-1]), s[-1] | |
elif m2: | |
name, version = m2.groups() | |
pkg = Package() | |
pkg.name = name | |
pkg.version = version | |
pkg.url = url | |
info = children[-1].text.strip() | |
infos = info.split(' ') | |
if info.endswith('no dependencies'): | |
pkg.deps = [] | |
else: | |
pkg.deps = infos[-1].split(',') | |
packages[infos[0]] = pkg | |
description = """Lists all URLs required for given nekoware packages. | |
First run requires -b. Sample use: | |
neko.py firefox git emacs | wget -c -i -""" | |
def main(): | |
global packages | |
parser = OptionParser(usage="Usage: %prog [OPTION...] [NAME]...", | |
description=description) | |
parser.add_option("-u", "--url", dest="url", | |
help="use URL to build index [default: %default]", metavar="URL", | |
default="http://nekoware.dustytech.net/index.php?path=current/") | |
parser.add_option("-p", "--path", dest="path", | |
help="cache index at PATH [default: %default]", metavar="PATH", | |
default="~/.nekoware") | |
parser.add_option("-b", "--build", | |
action="store_true", dest="build", default=False, | |
help="download URL and build index, otherwise use cache") | |
(options, args) = parser.parse_args() | |
path = expanduser(options.path) | |
if options.build: | |
build(options.url) | |
f = open(path, 'wb') | |
dump(packages, f) | |
f.close() | |
else: | |
f = open(path, 'rb') | |
packages = load(f) | |
f.close() | |
if len(args) == 0: | |
parser.print_help() | |
else: | |
for name in args: | |
package = packages[name] | |
print urljoin(options.url, package.url) | |
for dep in package.dependencies(): | |
print urljoin(options.url, dep.url) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment