Skip to content

Instantly share code, notes, and snippets.

@winhamwr
Created July 21, 2011 14:32
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save winhamwr/1097312 to your computer and use it in GitHub Desktop.
Save winhamwr/1097312 to your computer and use it in GitHub Desktop.
Python script to install some pip-based requirements reliably in a virtualenv
#! /usr/bin/env python
"""
Update a set of virtualenvs based on a given set of pip requirements files
whenever those files change. Keeps track of requirements file changes by md5
hashing the files and storing that hash in the virtualenv. That means that
only updates to the files themselves will trigger updating (changes to a
repository won't be caught).
Optionally also supports uploading/downloading a full bundle of all requirements
to/from Amazon S3 based on the requirements hash in order to ensure that
problems with bitbucket/github/pypi/some-random-library-owner's-site don't
break subsequent builds that require the exact same requirements.
Set the environment variable ``REQ_FILES`` as a comma-separated list of paths
to your requirements files.
"""
from __future__ import with_statement
import logging
import hashlib
import subprocess
import re
import tempfile
from optparse import OptionParser
from os import environ, path, remove
from shutil import rmtree
# If we don't have pexpect already, we have to install it here, because of
# bootstrapping problems.
try:
import pexpect
except ImportError:
print 'Installing pexpect.'
subprocess.call(['pip', 'install', 'pexpect>=2.4'])
import pexpect
# If we don't have boto already, we have to install it here, because of
# bootstrapping problems.
try:
from boto.exception import S3CreateError
from boto.s3.connection import S3Connection
from boto.s3.bucket import Bucket
except ImportError:
print 'Installing boto.'
subprocess.call(['pip', 'install', 'boto==2.0'])
from boto.exception import S3CreateError
from boto.s3.connection import S3Connection
from boto.s3.bucket import Bucket
# Try to set the boto connection timeout to fail quickly in boto>=2.0
try:
from boto import config
if not config.has_section('Boto'):
config.add_section('Boto')
config.set('Boto', 'http_socket_timeout', '5')
except ImportError:
pass
if os.environ.get('REQ_FILES'):
req_string = os.environ.get('REQ_FILES')
req_paths = req_string.split(',')
assert len(req_paths) > 0
REQ_FILES = [rp.strip() for rp in req_paths]
else:
REQ_FILES = [
'config/ec2_scripts/tpl/pstat/src_requirements.txt',
'config/ec2_scripts/tpl/pstat/dev_requirements.txt',
'config/ec2_scripts/tpl/pstat/requirements.txt',
]
logger = logging.getLogger('update_reqs')
logger.setLevel(logging.INFO)
formatter = logging.Formatter('[%(name)s]%(levelname)s:%(message)s')
stderr_handler = logging.StreamHandler()
stderr_handler.setFormatter(formatter)
logger.addHandler(stderr_handler)
def files_hash(files_base, files):
m = hashlib.md5()
for file_path in files:
full_path = path.join(files_base, file_path)
with open(full_path, 'r') as f:
m.update(f.read())
return m.hexdigest()
def files_changed(hashcode, hash_path):
"""
Determine if the hash at hash_path matches the given files.
"""
if not path.exists(hash_path):
return True
with open(hash_path, 'r') as hash_f:
old_hash = hash_f.read()
return (old_hash != hashcode)
def create_hash_file(hashcode, hash_path):
"""
Hash the given files and store that hash at ``hash_path``.
"""
with open(hash_path, 'w') as hash_f:
hash_f.write(hashcode)
def pip_install_from_bundle(workspace, pip, bundle_path, retries=1, restore_backup_src=False):
"""
Install the given pybundle.
"""
build_dir = tempfile.mkdtemp(prefix='venv_build')
venv_path = path.abspath(path.join(pip, '../..'))
existing_src_path = path.join(venv_path, 'src')
# We need to move the existing src dir because pip will not overwrite any
# existing files. That means no existing src packages will get updated
if path.isdir(existing_src_path):
logger.info('Backing up existing src dir for update: %s', existing_src_path)
if path.isdir('%s.bak' % existing_src_path):
logger.warning(
"A backup-ed src directory already exists. Deleting the backup")
subprocess.call(['rm', '-r', '%s.bak' % existing_src_path])
mv_src_cmd = ['mv', '-f', existing_src_path, '%s.bak' % existing_src_path]
subprocess.call(mv_src_cmd)
# If the entire pip installation fails after retries, we need to
# restore the orginal src dir back to its original place to maintain a
# working virtualenv
restore_backup_src = True
pip_cmd = [pip, 'install', bundle_path, '--build', build_dir]
pip_returncode = subprocess.call(pip_cmd)
# Delete the build dir either way
rmtree(build_dir)
if pip_returncode == 0:
# Success
return
if retries < 1:
logger.error(
"Pip installation failed with code %s even on fresh venv",
pip_returncode)
if restore_backup_src:
logger.info('Restoring original src to: %s', existing_src_path)
mv_src_cmd = ['mv', '-f', '%s.bak' % existing_src_path, existing_src_path]
subprocess.call(mv_src_cmd)
exit(pip_returncode)
# Error during install, blow away the virtualenv and retry
logger.warning(
"Reqs install failed with error code: %s", pip_returncode)
logger.warning(
"Wiping the virtualenv and trying up to %s more time(s)",
retries)
logger.warning("Wiping virtualenv at %s", venv_path)
subprocess.call(['rm', '-r', '-f', venv_path])
pip = setup_venv(venv_path)
return pip_install_from_bundle(
workspace, pip, bundle_path, retries=retries-1, restore_backup_src=restore_backup_src)
def pip_get_bundle(pip, bundle_hash_name, s3_bucket):
_, bundle_path = tempfile.mkstemp(suffix='.pybundle')
key = s3_bucket.get_key(bundle_hash_name)
if key:
key.get_contents_to_filename(bundle_path)
return bundle_path
def pip_create_bundle(workspace, pip, retries=1):
"""
Create a pybundle for all of the requirements files and return the path.
"""
_, bundle_path = tempfile.mkstemp(suffix='.pybundle')
pip_cmd = [pip, 'bundle', bundle_path]
for req_f in REQ_FILES:
req_f = path.abspath(path.join(workspace, req_f))
pip_cmd += ['-r', req_f]
returncode = subprocess.call(pip_cmd)
if returncode != 0:
remove(bundle_path)
if retries < 1:
# We're out of retries
logger.error("Error creating the bundle")
exit(returncode)
# Let's try again
logger.info(
"There was an error creating the bundle. Retrying %s more time(s)",
retries)
return pip_create_bundle(workspace, pip, retries=retries-1)
return bundle_path
def pip_upload_bundle(bundle_path, bundle_hash_name, s3_bucket, retries=1):
"""
Upload the given bundle to s3 with the key ``<hashcode>.pybundle``
"""
key = s3_bucket.new_key(bundle_hash_name)
try:
key.set_contents_from_filename(bundle_path)
except Exception:
logger.warning("There was an error uploading the file")
if retries < 1:
logger.error("Retries exhasted upload the file")
raise
return pip_upload_bundle(
bundle_path, bundle_hash_name, s3_bucket, retries=retries-1)
def s3_bundle_exists(bundle_hash_name, s3_bucket):
"""
Determine if an s3 bundle exists for the given hashcode.
"""
return s3_bucket.get_key(bundle_hash_name)
def get_bucket(s3_bucket, s3conn):
"""
Ensures the given S3 bucket exists by creating a public bucket
if it doesn't.
"""
try:
s3conn.create_bucket(s3_bucket, policy='public-read')
except S3CreateError:
pass
return Bucket(s3conn, name=s3_bucket)
def pip_install(workspace, pip, hash_path, use_pexpect=True):
base_pip_cmd = [pip, 'install']
for req_f in REQ_FILES:
req_f = path.abspath(path.join(workspace, req_f))
pip_cmd = base_pip_cmd + ['-r', req_f]
logger.info("Calling pip command: %s" % pip_cmd)
if not use_pexpect:
returncode = subprocess.call(pip_cmd)
else:
# Call pip command, using pexpect to automatically wipe
# repositories as necessary.
proc = pexpect.spawn(' '.join(pip_cmd))
proc.setecho(True)
while True:
try:
i = proc.expect(
[
pexpect.EOF,
re.escape('What to do? (s)witch, (i)gnore, (w)ipe, (b)ackup'),
# Flush output every time there is a relevant action.
# Don't include "Obtaining", because that will mask
# the "What to do?" action.
'Downloading .*$',
'Cloning .*$',
'Deleting .*$',
'Installing .*$',
'Running .*$',
'Updating .*$',
],
timeout=10*60,
)
if i == 0: # End of file.
print proc.before,
break
elif i == 1:
print proc.before,
print proc.after,
print 'pexpect: Automatically choosing (w)ipe.'
proc.sendline('w')
else:
# Send any other line to stdout.
print proc.before,
print proc.after,
except pexpect.TIMEOUT:
print 'pexpect: Timed out.'
break
proc.close()
returncode = proc.exitstatus
if returncode != 0:
logger.error(
"Reqs install exited with code %s. ABORTING" % returncode)
subprocess.call(['rm', '-f', hash_path])
return returncode
# Success
return 0
def setup_venv(venv_path):
# Make sure the virtualenv exists and pip is installed
if not path.isdir(venv_path):
subprocess.call(['virtualenv', venv_path])
else:
logger.info("Virtualenv already exists at: %s" % venv_path)
subprocess.call([path.join(venv_path, 'bin', 'easy_install'), 'pip==0.8.1'])
pip_path = path.join(venv_path, 'bin', 'pip')
return pip_path
def _do_full_local_install(pip, hash_path, venv_path, workspace):
pip_returncode = pip_install(workspace, pip, hash_path)
if pip_returncode != 0:
# There was an error during the pip install.
# Blow away the virtualenv and try again
logger.warning(
"First attempt at reqs install failed with error code: %s" % pip_returncode)
logger.warning("Removing the virtualenv src folder and trying again")
subprocess.call(
['rm', '-r', '-f', path.abspath(path.join(venv_path, 'src'))])
pip = setup_venv(venv_path)
pip_returncode = pip_install(workspace, pip, hash_path)
if pip_returncode != 0:
logger.error(
"Pip installation failed with code %s even on fresh venv" % pip_returncode)
exit(pip_returncode)
def main():
parser = OptionParser("usage: %prog [options]")
parser.add_option('--get-s3-bundle', action="store_true", default=False,
help="Use a pip pybundle from the 's3-bucket` if it exists")
parser.add_option('--make-s3-bundle', action="store_true", default=False,
help="If not already on S3, create a pybundle and upload it")
parser.add_option('--s3-bucket', default=None,
help="S3 bucket to use for making/getting the bundle")
parser.add_option('--s3-access-key', default=None,
help="S3 access key to use for making the bundle")
parser.add_option('--s3-secret-key', default=None,
help="S3 secret access key to use for making the bundle")
options, _ = parser.parse_args()
if options.make_s3_bundle:
required_params = ['s3_bucket', 's3_access_key', 's3_secret_key']
if any([getattr(options, param) is None for param in required_params]):
parser.error((
"s3-bucket, s3-access-key and s3-secret-key are required "
"when using --make-s3-bundle"))
elif options.get_s3_bundle:
if options.s3_bucket is None:
parser.error("s3-bucket is required when using --get-s3-bundle")
venv_path = environ.get('VIRTUAL_ENV')
if venv_path is None:
parser.error(("update_reqs.py must be run from within a "
"virtualenvironment. No VIRTUAL_ENV environment variable "
"found"))
hash_path = path.join(venv_path, 'reqs_hash.txt')
workspace = path.abspath(
path.join(path.dirname(__file__), '..'))
hashcode = files_hash(workspace, REQ_FILES)
bundle_hash_name = hashcode + '.pybundle'
if files_changed(hashcode, hash_path):
logger.info((
'Requirements hash at %s does not exist or does not match. '
'Updating requirements') % hash_path)
pip = setup_venv(venv_path)
if options.make_s3_bundle:
# Install and make the s3 bundle
s3conn = S3Connection(
aws_access_key_id=options.s3_access_key,
aws_secret_access_key=options.s3_secret_key)
s3_bucket = get_bucket(options.s3_bucket, s3conn)
if s3_bundle_exists(bundle_hash_name, s3_bucket):
# The bundle already exists. Just install it
logger.info("Matching bundle already exists on S3")
bundle_path = pip_get_bundle(pip, bundle_hash_name, s3_bucket)
logger.info("Installing from bundle")
pip_install_from_bundle(workspace, pip, bundle_path)
else:
# Do a full install and then upload the bundle
logger.info("No matching bundle exists on S3")
logger.info("Creating bundle")
bundle_path = pip_create_bundle(workspace, pip)
logger.info("Installing from bundle")
pip_install_from_bundle(workspace, pip, bundle_path)
logger.info("Uploading bundle to s3")
pip_upload_bundle(bundle_path, bundle_hash_name, s3_bucket)
logger.info("Succesfully uploaded bundle to s3")
remove(bundle_path)
else:
if options.get_s3_bundle:
# Install from the s3 bundle
s3conn = S3Connection(
aws_access_key_id=options.s3_access_key,
aws_secret_access_key=options.s3_secret_key)
s3_bucket = get_bucket(options.s3_bucket, s3conn)
if not s3_bundle_exists(bundle_hash_name, s3_bucket):
logger.error("Matching bundle doesn't exist on S3.")
logger.error("Use --make-s3-bundle to create and upload bundle.")
exit(2)
# The bundle already exists. Just install it
bundle_path = pip_get_bundle(pip, bundle_hash_name, s3_bucket)
pip_install_from_bundle(workspace, pip, bundle_path)
remove(bundle_path)
else:
# local install
_do_full_local_install(pip, hash_path, venv_path, workspace)
logger.info(
"Reqs successfully installed. Saving reqs hash to %s" % hash_path)
create_hash_file(hashcode, hash_path)
logger.info("Deleting the pyc files")
subprocess.call(
['find', venv_path + '/src/', '-name', '"*.pyc"', '-delete'])
else:
logger.info(
'Reqs hash at %s already matches. Not updating.' % hash_path)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment