Skip to content

Instantly share code, notes, and snippets.

@derlin
Last active July 2, 2019 06:35
Show Gist options
  • Save derlin/2196c17e72a344e45d6f8676d0be53db to your computer and use it in GitHub Desktop.
Save derlin/2196c17e72a344e45d6f8676d0be53db to your computer and use it in GitHub Desktop.
Make google BERT easily installable using setuptools

Make google's BERT easy to install using setuptools. Should keep working on later commits.

Usage (assuming a virtualenv):

# clone bert
git clone https://github.com/google-research/bert
# get gist files
wget \
https://gist.githubusercontent.com/derlin/2196c17e72a344e45d6f8676d0be53db/raw/fix_imports.py
wget \
https://gist.githubusercontent.com/derlin/2196c17e72a344e45d6f8676d0be53db/raw/setup.py
# fix imports
python fix_imports.py
# install package
python setup.py install

Optional, make the current environment available in the notebook:

pip install ipykernel
ipython kernel install --user --name=bert-lid
#!/usr/bin/env python3
"""
This script will make bert (https://github.com/google-research/bert) imports relative, e.g.:
import optimization => from . import optimization
This is useful in order to install bert as a package. See also setup.py.
It was tested on commit 0fce551b55caabcfba52c61e18f34b541aef186a .
Note: this is quite straight-forward, because all imports are global and there are no
subdirectories.
__author__: Lucy Linder
__date__: July 1, 2019
"""
from os import path
from glob import glob
import re
import fileinput
import argparse
def fix_imports(bert_dir, backup='.bk'):
# list python modules from bert (only one level)
python_files = glob(f'{bert_dir}/*.py')
all_modules = [f.replace(bert_dir + '/', '').replace('.py', '') for f in python_files]
# create a regex matching the import of any of those modules
regex = re.compile('^import ((?:' + ')|(?:'.join(all_modules) + '))')
with fileinput.FileInput(files=(python_files), inplace=True, backup=backup) as f:
# change all imports to relative imports (note: we could also use import bert.<module> instead)
for l in f:
print(regex.sub(r'from . import \1', l), end='')
if __name__ == '__main__':
default_dir = path.join(path.dirname(path.realpath(__file__)), 'bert')
parser = argparse.ArgumentParser()
parser.add_argument('--bert_dir', default=default_dir, help='bert directory')
parser.add_argument('--no-bk', action='store_true', help='edit inplace, without backup.')
args = parser.parse_args()
backup = None if args.no_bk else '.bk'
fix_imports(args.bert_dir, backup=backup)
print('done.')
#!/usr/bin/env python3
"""
setup.py in order to easily install https://github.com/google-research/bert as a module.
This requires to run fix_imports.py first.
It was tested on commit 0fce551b55caabcfba52c61e18f34b541aef186a (ensure you use tensorflow < 2.0)
__author__: Lucy Linder
__date__: July 1, 2019
"""
import setuptools
from os import path
# read long description from readme
with open(path.join(path.dirname(path.realpath(__file__)), 'bert', 'requirements.txt'), 'r') as f:
requirements = [l.strip() for l in f if len(l) > 0 and not l.isspace() and not l.strip().startswith('#')]
setuptools.setup(
name='bert-tensorflow',
version='1.0.2',
author='Google Inc.',
maintainer='Lucy Linder',
license='Apache License 2.0',
description='Bert for tensorflow',
url='https://github.com/google-research/bert',
# install all packages found under src/
packages=setuptools.find_packages(),
package_data={'': ['*.ipynb']},
# include other files such as html, css, etc
include_package_data=False, # if true, read from MANIFEST.in
zip_safe=True,
classifiers=[
'Programming Language :: Python :: 3',
'License :: Apache 2.0 License',
'Operating System :: OS Independent',
],
# for testing
setup_requires=[''],
tests_require=[''],
# regular dependencies
install_requires=requirements,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment