Skip to content

Instantly share code, notes, and snippets.

@jjhelmus
Last active September 22, 2016 16:28
Show Gist options
  • Save jjhelmus/869d6827ac8e0275437e7643989974e4 to your computer and use it in GitHub Desktop.
Save jjhelmus/869d6827ac8e0275437e7643989974e4 to your computer and use it in GitHub Desktop.
#! /usr/bin/env python3
""" Find conda packages which use a binary prefix. """
import argparse
import bz2
import json
import os
import tarfile
import urllib.request
try:
from packaging.version import parse as parse_version
except ImportError:
from pip._vendor.packaging.version import parse as parse_version
def get_channel_index(channel):
""" Return the channel index for all platforms. """
# find all packages in the channel one platform at a time
index = {}
url_template = 'https://conda.anaconda.org/%s/%s/repodata.json.bz2'
for platform in ['linux-64', 'osx-64', 'win-32', 'win-64', 'linux-32']:
channel_url = 'https://conda.anaconda.org/%s' % channel
url = url_template % (channel, platform)
response = urllib.request.urlopen(url)
decomp = bz2.decompress(response.read())
json_response = json.loads(decomp.decode('utf-8'))
for fn, info in json_response['packages'].items():
# add a download url to the package
subdir = info['subdir']
info['url'] = channel_url + '/' + subdir + '/' + fn
# add package to index with platform prefix
index[platform + fn] = info
return index
def find_latest_versions(index, package_name):
""" Return the latest version and packages from a conda channel index. """
valid = [v for v in index.values() if v['name'] == package_name]
versions = [parse_version(v['version']) for v in valid]
latest_ver = str(max(versions))
entries = [v for v in valid if v['version'] == latest_ver]
if len(entries) == 0:
# fall back to sorting versions by string if all entries were removed
versions = [v['version'] for v in valid]
latest_ver = sorted(versions)[-1]
entries = [v for v in valid if v['version'] == latest_ver]
return latest_ver, entries
def parse_arguments():
""" Parse command line arguments. """
parser = argparse.ArgumentParser(
description="Find conda packages which use a prefix")
parser.add_argument(
'packages', nargs='*',
help=('Name of packages to check, leave blank to check all packages '
'on the channel'))
parser.add_argument(
'--skip', '-s', action='store', help=(
'file containing list of packages to skip when checking for '
'prefixes'))
parser.add_argument(
'--verb', '-v', action='store_true', help='verbose output')
parser.add_argument(
'--channel', '-c', action='store', default='conda-forge',
help='Conda channel to check. Default is conda-forge')
parser.add_argument(
'--json', action='store', help='Save outdated packages to json file.')
parser.add_argument(
'--directory', '-d', action='store',
default=os.path.join(os.getcwd(), 'pkg_cache'),
help='where to store packages')
return parser.parse_args()
def find_prefix_packages(index, package_names, verbose, cache_dir):
""" Return a list of packages which use a prefix. """
pkgs_with_bin_prefix = []
pkgs_with_no_bin_prefix = []
for package_name in sorted(package_names):
_, entries = find_latest_versions(index, package_name)
if not entries:
print(package_name + " : Missing any entries. Skipping...")
continue
uses_prefix = []
pkg_platforms = {e['platform'] for e in entries}
for platform in pkg_platforms:
platform_entries = [e for e in entries
if e['platform'] == platform]
# sort entired by md5 so we try the same package each time
platform_entries = sorted(platform_entries, key=lambda k: k['md5'])
url = platform_entries[0]['url']
filename = os.path.join(
cache_dir, platform + '-' + url.split('/')[-1])
# Download if not in cache
if not os.path.exists(filename):
print("Downloading:", filename)
response = urllib.request.urlopen(url)
with open(filename, 'wb') as f:
f.write(response.read())
# determine if package uses a binary prefix
tf = tarfile.open(filename)
try:
uses_prefix.append(b' binary ' in tf.extractfile(
tf.getmember('info/has_prefix')).read())
except KeyError:
uses_prefix.append(False)
if True in uses_prefix:
print(package_name, "uses a binary prefix")
pkgs_with_bin_prefix.append(package_name)
else:
pkgs_with_no_bin_prefix.append(package_name)
if verbose:
print(package_name, "does NOT use a binary prefix")
print("Uses a binary prefix:", len(pkgs_with_bin_prefix))
print("Does NOT use a binary prefix:", len(pkgs_with_no_bin_prefix))
print("Total:", len(pkgs_with_bin_prefix) + len(pkgs_with_no_bin_prefix))
return pkgs_with_bin_prefix
def main():
""" main function """
args = parse_arguments()
# create somewhere to store downloaded packages.
if not os.path.exists(args.directory):
os.makedirs(args.directory)
# determine package names to check
index = get_channel_index(args.channel)
package_names = set(args.packages)
if len(package_names) == 0: # no package names given on command line
package_names = {v['name'] for k, v in index.items()}
# remove skipped packages
if args.skip is not None:
with open(args.skip) as f:
pkgs_to_skip = [line.strip() for line in f]
package_names = [p for p in package_names if p not in pkgs_to_skip]
# find packages which use a binary prefix
pkgs_with_bin_prefix = find_prefix_packages(
index, package_names, args.verb, args.directory)
# save pkgs_with_bin_prefix to json formatted file is specified
if args.json is not None:
with open(args.json, 'w') as f:
json.dump(pkgs_with_bin_prefix, f)
if __name__ == "__main__":
main()
@jakirkham
Copy link

Made a tweak to your gist to handle this case of the binary prefix.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment