Created
July 21, 2011 14:32
-
-
Save winhamwr/1097312 to your computer and use it in GitHub Desktop.
Python script to install some pip-based requirements reliably in a virtualenv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
""" | |
Update a set of virtualenvs based on a given set of pip requirements files | |
whenever those files change. Keeps track of requirements file changes by md5 | |
hashing the files and storing that hash in the virtualenv. That means that | |
only updates to the files themselves will trigger updating (changes to a | |
repository won't be caught). | |
Optionally also supports uploading/downloading a full bundle of all requirements | |
to/from Amazon S3 based on the requirements hash in order to ensure that | |
problems with bitbucket/github/pypi/some-random-library-owner's-site don't | |
break subsequent builds that require the exact same requirements. | |
Set the environment variable ``REQ_FILES`` as a comma-separated list of paths | |
to your requirements files. | |
""" | |
from __future__ import with_statement | |
import logging | |
import hashlib | |
import subprocess | |
import re | |
import tempfile | |
from optparse import OptionParser | |
from os import environ, path, remove | |
from shutil import rmtree | |
# If we don't have pexpect already, we have to install it here, because of | |
# bootstrapping problems. | |
try: | |
import pexpect | |
except ImportError: | |
print 'Installing pexpect.' | |
subprocess.call(['pip', 'install', 'pexpect>=2.4']) | |
import pexpect | |
# If we don't have boto already, we have to install it here, because of | |
# bootstrapping problems. | |
try: | |
from boto.exception import S3CreateError | |
from boto.s3.connection import S3Connection | |
from boto.s3.bucket import Bucket | |
except ImportError: | |
print 'Installing boto.' | |
subprocess.call(['pip', 'install', 'boto==2.0']) | |
from boto.exception import S3CreateError | |
from boto.s3.connection import S3Connection | |
from boto.s3.bucket import Bucket | |
# Try to set the boto connection timeout to fail quickly in boto>=2.0 | |
try: | |
from boto import config | |
if not config.has_section('Boto'): | |
config.add_section('Boto') | |
config.set('Boto', 'http_socket_timeout', '5') | |
except ImportError: | |
pass | |
if os.environ.get('REQ_FILES'): | |
req_string = os.environ.get('REQ_FILES') | |
req_paths = req_string.split(',') | |
assert len(req_paths) > 0 | |
REQ_FILES = [rp.strip() for rp in req_paths] | |
else: | |
REQ_FILES = [ | |
'config/ec2_scripts/tpl/pstat/src_requirements.txt', | |
'config/ec2_scripts/tpl/pstat/dev_requirements.txt', | |
'config/ec2_scripts/tpl/pstat/requirements.txt', | |
] | |
logger = logging.getLogger('update_reqs') | |
logger.setLevel(logging.INFO) | |
formatter = logging.Formatter('[%(name)s]%(levelname)s:%(message)s') | |
stderr_handler = logging.StreamHandler() | |
stderr_handler.setFormatter(formatter) | |
logger.addHandler(stderr_handler) | |
def files_hash(files_base, files): | |
m = hashlib.md5() | |
for file_path in files: | |
full_path = path.join(files_base, file_path) | |
with open(full_path, 'r') as f: | |
m.update(f.read()) | |
return m.hexdigest() | |
def files_changed(hashcode, hash_path): | |
""" | |
Determine if the hash at hash_path matches the given files. | |
""" | |
if not path.exists(hash_path): | |
return True | |
with open(hash_path, 'r') as hash_f: | |
old_hash = hash_f.read() | |
return (old_hash != hashcode) | |
def create_hash_file(hashcode, hash_path): | |
""" | |
Hash the given files and store that hash at ``hash_path``. | |
""" | |
with open(hash_path, 'w') as hash_f: | |
hash_f.write(hashcode) | |
def pip_install_from_bundle(workspace, pip, bundle_path, retries=1, restore_backup_src=False): | |
""" | |
Install the given pybundle. | |
""" | |
build_dir = tempfile.mkdtemp(prefix='venv_build') | |
venv_path = path.abspath(path.join(pip, '../..')) | |
existing_src_path = path.join(venv_path, 'src') | |
# We need to move the existing src dir because pip will not overwrite any | |
# existing files. That means no existing src packages will get updated | |
if path.isdir(existing_src_path): | |
logger.info('Backing up existing src dir for update: %s', existing_src_path) | |
if path.isdir('%s.bak' % existing_src_path): | |
logger.warning( | |
"A backup-ed src directory already exists. Deleting the backup") | |
subprocess.call(['rm', '-r', '%s.bak' % existing_src_path]) | |
mv_src_cmd = ['mv', '-f', existing_src_path, '%s.bak' % existing_src_path] | |
subprocess.call(mv_src_cmd) | |
# If the entire pip installation fails after retries, we need to | |
# restore the orginal src dir back to its original place to maintain a | |
# working virtualenv | |
restore_backup_src = True | |
pip_cmd = [pip, 'install', bundle_path, '--build', build_dir] | |
pip_returncode = subprocess.call(pip_cmd) | |
# Delete the build dir either way | |
rmtree(build_dir) | |
if pip_returncode == 0: | |
# Success | |
return | |
if retries < 1: | |
logger.error( | |
"Pip installation failed with code %s even on fresh venv", | |
pip_returncode) | |
if restore_backup_src: | |
logger.info('Restoring original src to: %s', existing_src_path) | |
mv_src_cmd = ['mv', '-f', '%s.bak' % existing_src_path, existing_src_path] | |
subprocess.call(mv_src_cmd) | |
exit(pip_returncode) | |
# Error during install, blow away the virtualenv and retry | |
logger.warning( | |
"Reqs install failed with error code: %s", pip_returncode) | |
logger.warning( | |
"Wiping the virtualenv and trying up to %s more time(s)", | |
retries) | |
logger.warning("Wiping virtualenv at %s", venv_path) | |
subprocess.call(['rm', '-r', '-f', venv_path]) | |
pip = setup_venv(venv_path) | |
return pip_install_from_bundle( | |
workspace, pip, bundle_path, retries=retries-1, restore_backup_src=restore_backup_src) | |
def pip_get_bundle(pip, bundle_hash_name, s3_bucket): | |
_, bundle_path = tempfile.mkstemp(suffix='.pybundle') | |
key = s3_bucket.get_key(bundle_hash_name) | |
if key: | |
key.get_contents_to_filename(bundle_path) | |
return bundle_path | |
def pip_create_bundle(workspace, pip, retries=1): | |
""" | |
Create a pybundle for all of the requirements files and return the path. | |
""" | |
_, bundle_path = tempfile.mkstemp(suffix='.pybundle') | |
pip_cmd = [pip, 'bundle', bundle_path] | |
for req_f in REQ_FILES: | |
req_f = path.abspath(path.join(workspace, req_f)) | |
pip_cmd += ['-r', req_f] | |
returncode = subprocess.call(pip_cmd) | |
if returncode != 0: | |
remove(bundle_path) | |
if retries < 1: | |
# We're out of retries | |
logger.error("Error creating the bundle") | |
exit(returncode) | |
# Let's try again | |
logger.info( | |
"There was an error creating the bundle. Retrying %s more time(s)", | |
retries) | |
return pip_create_bundle(workspace, pip, retries=retries-1) | |
return bundle_path | |
def pip_upload_bundle(bundle_path, bundle_hash_name, s3_bucket, retries=1): | |
""" | |
Upload the given bundle to s3 with the key ``<hashcode>.pybundle`` | |
""" | |
key = s3_bucket.new_key(bundle_hash_name) | |
try: | |
key.set_contents_from_filename(bundle_path) | |
except Exception: | |
logger.warning("There was an error uploading the file") | |
if retries < 1: | |
logger.error("Retries exhasted upload the file") | |
raise | |
return pip_upload_bundle( | |
bundle_path, bundle_hash_name, s3_bucket, retries=retries-1) | |
def s3_bundle_exists(bundle_hash_name, s3_bucket): | |
""" | |
Determine if an s3 bundle exists for the given hashcode. | |
""" | |
return s3_bucket.get_key(bundle_hash_name) | |
def get_bucket(s3_bucket, s3conn): | |
""" | |
Ensures the given S3 bucket exists by creating a public bucket | |
if it doesn't. | |
""" | |
try: | |
s3conn.create_bucket(s3_bucket, policy='public-read') | |
except S3CreateError: | |
pass | |
return Bucket(s3conn, name=s3_bucket) | |
def pip_install(workspace, pip, hash_path, use_pexpect=True): | |
base_pip_cmd = [pip, 'install'] | |
for req_f in REQ_FILES: | |
req_f = path.abspath(path.join(workspace, req_f)) | |
pip_cmd = base_pip_cmd + ['-r', req_f] | |
logger.info("Calling pip command: %s" % pip_cmd) | |
if not use_pexpect: | |
returncode = subprocess.call(pip_cmd) | |
else: | |
# Call pip command, using pexpect to automatically wipe | |
# repositories as necessary. | |
proc = pexpect.spawn(' '.join(pip_cmd)) | |
proc.setecho(True) | |
while True: | |
try: | |
i = proc.expect( | |
[ | |
pexpect.EOF, | |
re.escape('What to do? (s)witch, (i)gnore, (w)ipe, (b)ackup'), | |
# Flush output every time there is a relevant action. | |
# Don't include "Obtaining", because that will mask | |
# the "What to do?" action. | |
'Downloading .*$', | |
'Cloning .*$', | |
'Deleting .*$', | |
'Installing .*$', | |
'Running .*$', | |
'Updating .*$', | |
], | |
timeout=10*60, | |
) | |
if i == 0: # End of file. | |
print proc.before, | |
break | |
elif i == 1: | |
print proc.before, | |
print proc.after, | |
print 'pexpect: Automatically choosing (w)ipe.' | |
proc.sendline('w') | |
else: | |
# Send any other line to stdout. | |
print proc.before, | |
print proc.after, | |
except pexpect.TIMEOUT: | |
print 'pexpect: Timed out.' | |
break | |
proc.close() | |
returncode = proc.exitstatus | |
if returncode != 0: | |
logger.error( | |
"Reqs install exited with code %s. ABORTING" % returncode) | |
subprocess.call(['rm', '-f', hash_path]) | |
return returncode | |
# Success | |
return 0 | |
def setup_venv(venv_path): | |
# Make sure the virtualenv exists and pip is installed | |
if not path.isdir(venv_path): | |
subprocess.call(['virtualenv', venv_path]) | |
else: | |
logger.info("Virtualenv already exists at: %s" % venv_path) | |
subprocess.call([path.join(venv_path, 'bin', 'easy_install'), 'pip==0.8.1']) | |
pip_path = path.join(venv_path, 'bin', 'pip') | |
return pip_path | |
def _do_full_local_install(pip, hash_path, venv_path, workspace): | |
pip_returncode = pip_install(workspace, pip, hash_path) | |
if pip_returncode != 0: | |
# There was an error during the pip install. | |
# Blow away the virtualenv and try again | |
logger.warning( | |
"First attempt at reqs install failed with error code: %s" % pip_returncode) | |
logger.warning("Removing the virtualenv src folder and trying again") | |
subprocess.call( | |
['rm', '-r', '-f', path.abspath(path.join(venv_path, 'src'))]) | |
pip = setup_venv(venv_path) | |
pip_returncode = pip_install(workspace, pip, hash_path) | |
if pip_returncode != 0: | |
logger.error( | |
"Pip installation failed with code %s even on fresh venv" % pip_returncode) | |
exit(pip_returncode) | |
def main(): | |
parser = OptionParser("usage: %prog [options]") | |
parser.add_option('--get-s3-bundle', action="store_true", default=False, | |
help="Use a pip pybundle from the 's3-bucket` if it exists") | |
parser.add_option('--make-s3-bundle', action="store_true", default=False, | |
help="If not already on S3, create a pybundle and upload it") | |
parser.add_option('--s3-bucket', default=None, | |
help="S3 bucket to use for making/getting the bundle") | |
parser.add_option('--s3-access-key', default=None, | |
help="S3 access key to use for making the bundle") | |
parser.add_option('--s3-secret-key', default=None, | |
help="S3 secret access key to use for making the bundle") | |
options, _ = parser.parse_args() | |
if options.make_s3_bundle: | |
required_params = ['s3_bucket', 's3_access_key', 's3_secret_key'] | |
if any([getattr(options, param) is None for param in required_params]): | |
parser.error(( | |
"s3-bucket, s3-access-key and s3-secret-key are required " | |
"when using --make-s3-bundle")) | |
elif options.get_s3_bundle: | |
if options.s3_bucket is None: | |
parser.error("s3-bucket is required when using --get-s3-bundle") | |
venv_path = environ.get('VIRTUAL_ENV') | |
if venv_path is None: | |
parser.error(("update_reqs.py must be run from within a " | |
"virtualenvironment. No VIRTUAL_ENV environment variable " | |
"found")) | |
hash_path = path.join(venv_path, 'reqs_hash.txt') | |
workspace = path.abspath( | |
path.join(path.dirname(__file__), '..')) | |
hashcode = files_hash(workspace, REQ_FILES) | |
bundle_hash_name = hashcode + '.pybundle' | |
if files_changed(hashcode, hash_path): | |
logger.info(( | |
'Requirements hash at %s does not exist or does not match. ' | |
'Updating requirements') % hash_path) | |
pip = setup_venv(venv_path) | |
if options.make_s3_bundle: | |
# Install and make the s3 bundle | |
s3conn = S3Connection( | |
aws_access_key_id=options.s3_access_key, | |
aws_secret_access_key=options.s3_secret_key) | |
s3_bucket = get_bucket(options.s3_bucket, s3conn) | |
if s3_bundle_exists(bundle_hash_name, s3_bucket): | |
# The bundle already exists. Just install it | |
logger.info("Matching bundle already exists on S3") | |
bundle_path = pip_get_bundle(pip, bundle_hash_name, s3_bucket) | |
logger.info("Installing from bundle") | |
pip_install_from_bundle(workspace, pip, bundle_path) | |
else: | |
# Do a full install and then upload the bundle | |
logger.info("No matching bundle exists on S3") | |
logger.info("Creating bundle") | |
bundle_path = pip_create_bundle(workspace, pip) | |
logger.info("Installing from bundle") | |
pip_install_from_bundle(workspace, pip, bundle_path) | |
logger.info("Uploading bundle to s3") | |
pip_upload_bundle(bundle_path, bundle_hash_name, s3_bucket) | |
logger.info("Succesfully uploaded bundle to s3") | |
remove(bundle_path) | |
else: | |
if options.get_s3_bundle: | |
# Install from the s3 bundle | |
s3conn = S3Connection( | |
aws_access_key_id=options.s3_access_key, | |
aws_secret_access_key=options.s3_secret_key) | |
s3_bucket = get_bucket(options.s3_bucket, s3conn) | |
if not s3_bundle_exists(bundle_hash_name, s3_bucket): | |
logger.error("Matching bundle doesn't exist on S3.") | |
logger.error("Use --make-s3-bundle to create and upload bundle.") | |
exit(2) | |
# The bundle already exists. Just install it | |
bundle_path = pip_get_bundle(pip, bundle_hash_name, s3_bucket) | |
pip_install_from_bundle(workspace, pip, bundle_path) | |
remove(bundle_path) | |
else: | |
# local install | |
_do_full_local_install(pip, hash_path, venv_path, workspace) | |
logger.info( | |
"Reqs successfully installed. Saving reqs hash to %s" % hash_path) | |
create_hash_file(hashcode, hash_path) | |
logger.info("Deleting the pyc files") | |
subprocess.call( | |
['find', venv_path + '/src/', '-name', '"*.pyc"', '-delete']) | |
else: | |
logger.info( | |
'Reqs hash at %s already matches. Not updating.' % hash_path) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment