Last active October 5, 2018 08:13
ML installation
#set -eux
# This is necessary to for apt to access HTTPS sources
sudo apt-get update && \
sudo apt-get install apt-transport-https
# Cuda support.
# The stub is useful to us both for built-time linking and run-time linking, on CPU-only systems.
# When intended to be used with actual GPUs, make sure to (besides providing access to the host
# CUDA user libraries, either manually or through the use of nvidia-docker) exclude them. One
# convenient way to do so is to obscure its contents by a bind mount:
# docker run .... -v /non-existing-directory:/usr/local/cuda/lib64/stubs:ro ...
# Use a fixed sudo apt-get repo to stop intermittent failures due to flaky httpredir connections,
# as described by Lionel Chan at
sed -i "s/" /etc/apt/sources.list && \
sudo apt-get update && sudo apt-get install -y build-essential && \
conda update -y conda && conda install -y python=3.6 && \
pip install --upgrade pip && \
sudo apt-get -y install cmake && \
# Vowpal Rabbit
#sudo apt-get install -y libboost-program-options-dev zlib1g-dev libboost-python-dev && \
#cd /usr/lib/x86_64-linux-gnu/ && rm -f libboost_python.a && rm -f && \
#ln -sf && ln -sf libboost_python-py34.a libboost_python.a && \
#pip install vowpalwabbit && \
# Anaconda's scipy is currently behind the main release (1.0)
pip install scipy --upgrade && \
pip install seaborn python-dateutil dask pytagcloud pyyaml joblib \
husl geopy ml_metrics mne pyshp gensim && \
conda install -y -c conda-forge spacy && python -m spacy download en && \
python -m spacy download en_core_web_lg && \
# The sudo apt-get version of imagemagick is out of date and has compatibility issues, so we build from source
sudo apt-get -y install dbus fontconfig fontconfig-config fonts-dejavu-core fonts-droid ghostscript gsfonts hicolor-icon-theme \
libavahi-client3 libavahi-common-data libavahi-common3 libcairo2 libcap-ng0 libcroco3 \
libcups2 libcupsfilters1 libcupsimage2 libdatrie1 libdbus-1-3 libdjvulibre-text libdjvulibre21 libfftw3-double3 libfontconfig1 \
libfreetype6 libgdk-pixbuf2.0-0 libgdk-pixbuf2.0-common libgomp1 libgraphite2-3 libgs9 libgs9-common libharfbuzz0b libijs-0.35 \
libilmbase6 libjasper1 libjbig0 libjbig2dec0 libjpeg62-turbo liblcms2-2 liblqr-1-0 libltdl7 libmagickcore-6.q16-2 \
libmagickcore-6.q16-2-extra libmagickwand-6.q16-2 libnetpbm10 libopenexr6 libpango-1.0-0 libpangocairo-1.0-0 libpangoft2-1.0-0 \
libpaper-utils libpaper1 libpixman-1-0 libpng12-0 librsvg2-2 librsvg2-common libthai-data libthai0 libtiff5 libwmf0.2-7 \
libxcb-render0 libxcb-shm0 netpbm poppler-data p7zip-full && \
cd ~/src && \
wget && \
tar xzf ImageMagick.tar.gz && cd `ls -d ImageMagick-*` && pwd && ls -al && ./configure && \
make -j $(nproc) && make install && \
# clean up ImageMagick source files
cd ../ && rm -rf ImageMagick*
pip install opencv-python
# Tensorflow source build
# Precompile for Tesla k80 and p100. See
sudo apt-get update && \
sudo apt-get install -y python-software-properties zip
sudo sh -c 'echo "deb precise main" | tee -a /etc/apt/sources.list' && \
sudo sh -c 'echo "deb-src precise main" | tee -a /etc/apt/sources.list' && \
apt-key adv --keyserver hkp:// --recv-keys EEA14886 C857C906 2B90D010
sudo apt-get update && \
echo debconf shared/accepted-oracle-license-v1-1 select true | debconf-set-selections && \
echo debconf shared/accepted-oracle-license-v1-1 seen true | debconf-set-selections && \
sudo apt-get install -y oracle-java8-installer && \
sudo sh -c 'echo "deb [arch=amd64] stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list' && \
curl | apt-key add - && \
sudo apt-get update && sudo apt-get install -y bazel && \
sudo apt-get upgrade -y bazel
#sudo apt-get install -y python3-pip python3-dev
pip install --upgrade
# Clean up pip wheel and Bazel cache when done.
sudo apt-get install -y libfreetype6-dev && \
sudo apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing && \
# textblob
pip install textblob && \
#word cloud
pip install wordcloud && \
conda install -y -c conda-forge python-igraph && \
cd ~/src && mkdir xgboost && cd xgboost && \
git clone --depth 1 --recursive && cd xgboost && \
make && cd python-package && python install && \
pip install lightgbm && \
cd ~/src && mkdir Lasagne && cd Lasagne && \
git clone --depth 1 && cd Lasagne && \
pip install -r requirements.txt && python install && \
cd ~/src && mkdir keras && cd keras && \
git clone --depth 1 && \
cd keras && python install && \
cd ~/src && mkdir keras-rl && cd keras-rl && \
git clone --depth 1 && \
cd keras-rl && python install && \
pip install git+ && \
cd ~/src && \
git clone --depth 1 && \
cd neon && pip install -e . && \
cd ~/src && mkdir nolearn && cd nolearn && \
git clone --depth 1 && cd nolearn && \
echo "x" > README.rst && echo "x" > CHANGES.rst && \
python install && \
# Dev branch of Theano
pip install git+git:// --upgrade --no-deps && \
# put theano compiledir inside /tmp (it needs to be in writable dir)
printf "[global]\nbase_compiledir = /tmp/.theano\n" > /.theanorc && \
cd ~/src && git clone --depth 1 && \
cd pybrain && python install && \
# Base ATLAS
sudo apt-get install -y libatlas-base-dev && \
cd ~/src && git clone --depth 1 && \
cd python-Levenshtein && python install && \
cd ~/src && git clone --depth 1 && \
cd hep_ml && pip install . && \
# chainer
pip install chainer
# NLTK Project datasets
mkdir -p /usr/share/nltk_data && \
# NLTK Downloader no longer continues smoothly after an error, so we explicitly list
# the corpuses that work
python -m nltk.downloader -d /usr/share/nltk_data abc alpino averaged_perceptron_tagger \
basque_grammars biocreative_ppi bllip_wsj_no_aux \
book_grammars brown brown_tei cess_cat cess_esp chat80 city_database cmudict \
comtrans conll2000 conll2002 conll2007 crubadan dependency_treebank \
europarl_raw floresta gazetteers genesis gutenberg \
ieer inaugural indian jeita kimmo large_grammars lin_thesaurus mac_morpho machado \
masc_tagged maxent_ne_chunker maxent_treebank_pos_tagger moses_sample movie_reviews \
mte_teip5 names nps_chat omw opinion_lexicon paradigms \
pil pl196x porter_test ppattach problem_reports product_reviews_1 product_reviews_2 propbank \
pros_cons ptb punkt qc reuters rslp rte sample_grammars semcor senseval sentence_polarity \
sentiwordnet shakespeare sinica_treebank smultron snowball_data spanish_grammars \
state_union stopwords subjectivity swadesh switchboard tagsets toolbox treebank \
twitter_samples udhr2 udhr unicode_samples universal_tagset universal_treebanks_v20 \
vader_lexicon verbnet webtext word2vec_sample wordnet wordnet_ic words ycoe && \
# Stop-words
pip install stop-words && \
# clean up
rm -rf ~/.cache/pip/* && \
sudo apt-get autoremove -y && sudo apt-get clean && \
rm -rf ~/src/*
# Make sure the dynamic linker finds the right libstdc++
# Install Basemap via conda temporarily
sudo apt-get update && \
#sudo apt-get -y install libgeos-dev && \
#pip install matplotlib && \
#pip install pyshp && \
#pip install pyproj && \
#cd ~/src && git clone && \
#cd basemap/geos-3.3.3 && \
#export GEOS_DIR=/usr/local && \
#./configure --prefix=$GEOS_DIR && \
#make && make install && \
#cd .. && python install && \
conda install -y basemap && \
# Pillow (PIL)
sudo apt-get -y install zlib1g-dev liblcms2-dev libwebp-dev && \
pip install Pillow
cd ~/src && git clone && \
cd opendeep && python develop && \
# sasl is apparently an ibis dependency
sudo apt-get -y install libsasl2-dev && \
# is psycopg2
sudo apt-get install -y libpq-dev && \
pip install ibis-framework && \
# Cartopy plus dependencies
yes | conda install -y proj4 && \
pip install packaging && \
cd ~/src && git clone && \
cd Shapely && python install && \
cd ~/src && git clone && \
cd cartopy && python install && \
# MXNet
pip install mxnet && \
# h2o
# This requires python-software-properties and Java, which were installed above.
cd ~/src && mkdir h2o && cd h2o && \
wget -O latest && \
wget --no-check-certificate -i latest -O && rm latest && \
unzip && rm && cp h2o-*/h2o.jar . && \
pip install `find . -name "*whl"` && \
# Work around
pip install numpy --upgrade
# Keras setup
# Keras likes to add a config file in a custom directory when it's
# first imported. This doesn't work with our read-only filesystem, so we
# have it done now.
python -c "from keras.models import Sequential" && \
# Switch to TF backend
# Scikit-Learn nightly build
cd ~/src && git clone && \
cd scikit-learn && python build && python install && \
# HDF5 support
conda install -y h5py && \
pip install biopython && \
# PUDB, for local debugging convenience
pip install pudb && \
# Imbalanced-learn
cd ~/src && git clone && \
cd imbalanced-learn && python install && \
# Convex Optimization library
# Latest version fails to install, see
# and
# pip install cvxopt && \
# Profiling and other utilities
pip install line_profiler && \
pip install orderedmultidict && \
pip install smhasher && \
conda install -y -c bokeh datashader && \
# Boruta (python implementation)
cd ~/src && git clone && \
cd boruta_py && python install && \
cd ~/src && git clone git:// && \
cd pyeconometrics && python install && \
sudo apt-get install -y graphviz && pip install graphviz && \
sudo apt-get install -y libgdal1-dev && GDAL_CONFIG=/usr/bin/gdal-config pip install fiona && pip install geopandas && \
# Pandoc is a dependency of deap
sudo apt-get install -y pandoc && \
cd ~/src && git clone git:// && \
cd py-earth && python install
#cd ~/src && git clone && cd essentia && \
#./waf configure --mode=release --build-static --with-python --with-cpptests --with-examples --with-vamp && \
#./waf && ./waf install && mv /usr/local/lib/python3.6/site-packages/essentia /opt/conda/lib/python3.6
# Install torch and torchvision from source, so we're using the cuda/cudnn libraries installed above.
export CXXFLAGS="-std=c++11" && \
export CFLAGS="-std=c99" && \
conda install -y mkl mkl-include setuptools cmake cffi typing && \
conda install -y -c pytorch magma-cuda90 && \
cd ~/src && \
git clone -b v0.4.0 --recursive && \
cd pytorch && \
python install && \
cd ~/src && \
git clone -b v0.2.1 --recursive && \
cd vision && \
python install && \
# PyTorch Audio
sudo apt-get install -y sox libsox-dev libsox-fmt-all && \
pip install cffi && \
cd ~/src && \
git clone && \
cd audio && \
python install && \
# ggpy / ggplot
pip install git+ && \
# Basic cuda support library for python.
pip install pycuda && \
# ~~~~ CLEAN UP ~~~~
rm -rf ~/.cache/pip/* && \
sudo apt-get autoremove -y && sudo apt-get clean && \
conda clean -i -l -t -y && \
#rm -rf ~/src/*
pip install --upgrade mpld3 && \
pip install mplleaflet && \
pip install gpxpy && \
pip install arrow && \
pip install vtk && \
pip install nilearn && \
pip install nibabel && \
pip install pronouncing && \
pip install markovify && \
pip install rf_perm_feat_import && \
pip install imgaug && \
pip install preprocessing && \
pip install Baker && \
pip install && \
pip install Geohash && \
sed -i -- 's/geohash/.geohash/g' ~/anaconda3/lib/python3.6/site-packages/Geohash/ && \
pip install deap && \
pip install tpot && \
pip install scikit-optimize && \
pip install haversine && \
pip install toolz cytoolz && \
pip install sacred && \
pip install plotly && \
pip install git+ && \
pip install git+ && \
# tflean. Deep learning library featuring a higher-level API for TensorFlow.
pip install git+ && \
pip install fitter && \
pip install langid && \
# Delorean. Useful for dealing with datetime
pip install delorean && \
pip install trueskill && \
pip install heamy && \
pip install vida && \
# Useful data exploration libraries (for missing data and generating reports)
pip install missingno && \
pip install pandas-profiling && \
pip install s2sphere && \
pip install git+ && \
pip install matplotlib-venn && \
pip install pyldavis && \
# Pattern not yet Py3 compatible...
# pip install pattern && \
pip install git+git:// && \
pip install altair && \
pip install pystan && \
pip install ImageHash && \
conda install -y ecos && \
conda install -y CVXcanon && \
pip install fancyimpute && \
pip install git+ && \
pip install tifffile && \
pip install spectral && \
pip install descartes && \
pip install geojson && \
pip install pysal && \
#conda install -y gdal && \
pip install pyflux && \
pip install terminalplot && \
pip install raccoon && \
pip install pydicom && \
pip install wavio && \
pip install SimpleITK && \
pip install hmmlearn && \
pip install bayespy && \
pip install gplearn && \
pip install PyAstronomy && \
pip install squarify && \
pip install fuzzywuzzy && \
pip install python-louvain && \
pip install pyexcel-ods && \
pip install sklearn-pandas && \
pip install stemming && \
conda install -y -c conda-forge fbprophet && \
conda install -y -c conda-forge -c ioam holoviews geoviews && \
pip install hypertools && \
# Nxviz has been causing an installation issue by trying unsuccessfully to remove setuptools.
#pip install nxviz && \
pip install py_stringsimjoin && \
pip install speedml && \
pip install nibabel && \
pip install mlens && \
pip install scikit-multilearn && \
pip install -e git+ && \
pip install leven && \
pip install catboost && \
#cd ~/src && git clone --depth=1 && cd MLBox/python-package && python install && \
pip install fastFM && \
pip install lightfm && \
pip install paramnb && \
pip install folium && \
pip install scikit-plot && \
pip install dipy && \
pip install plotnine && \
pip install git+ && \
pip install scikit-surprise && \
pip install pymongo && \
pip install edward && \
pip install geoplot && \
pip install eli5 && \
pip install implicit && \
pip install dask-ml[xgboost]
pip install kmeans-smote --no-dependencies && \
# Add google PAIR-code Facets
cd /opt/ && git clone && cd facets/ && jupyter nbextension install facets-dist/ --user && \
export PYTHONPATH=$PYTHONPATH:/opt/facets/facets_overview/python/ && \
pip install --no-dependencies ethnicolr && \
# Update setuptools and add tensorpack
pip install --upgrade --ignore-installed setuptools && pip install --no-cache-dir git+git:// && \
pip install pycountry && pip install iso3166 && \
pip install pydash && \
pip install kmodes --no-dependencies && \
pip install librosa && \
pip install polyglot && \
pip install mmh3 && \
pip install fbpca && \
pip install sentencepiece && \
pip install cufflinks && \
pip install glmnet_py && \
pip install lime && \
pip install memory_profiler
# Install cython & cysignals before pyfasttext
pip install --upgrade cython && \
pip install --upgrade cysignals && \
pip install pyfasttext && \
pip install ktext && \
cd ~/src && git clone --depth=1 && cd fastText && pip install . && \
sudo apt-get install -y libhunspell-dev && pip install hunspell && \
pip install annoy && \
pip install category_encoders && \
pip install google-cloud-bigquery && \
pip install ortools && \
pip install scattertext && \
# Pandas data reader
pip install pandas-datareader && \
pip install pykoko && \
pip install wordsegment && \
pip install pyahocorasick && \
pip install wordbatch && \
pip install emoji && \
# Add Japanese morphological analysis engine
pip install janome && \
pip install wfdb && \
pip install vecstack && \
pip install sklearn-contrib-lightning && \
# yellowbrick machine learning visualization library
pip install yellowbrick && \
pip install mlcrate && \
pip install gym && \
pip install ray && \
# Required to display Altair charts in Jupyter notebook
pip install vega3 && \
jupyter nbextension install --sys-prefix --py vega3 && \
# clean up pip cache
rm -rf ~/.cache/pip/*
# and dependencies
pip install bcolz && \
pip install bleach && \
pip install certifi && \
pip install cycler && \
pip install decorator && \
pip install entrypoints && \
pip install html5lib && \
pip install ipykernel && \
pip install ipython && \
pip install ipython-genutils && \
pip install ipywidgets && \
pip install isoweek && \
pip install jedi && \
pip install Jinja2 && \
pip install jsonschema && \
pip install jupyter && \
pip install jupyter-client && \
pip install jupyter-console && \
pip install jupyter-core && \
pip install MarkupSafe && \
pip install matplotlib && \
pip install mistune && \
pip install nbconvert && \
pip install nbformat && \
pip install notebook && \
pip install numpy && \
pip install olefile && \
pip install opencv-python && \
pip install --upgrade pandas && \
pip install pandas_summary && \
pip install pandocfilters && \
pip install pexpect && \
pip install pickleshare && \
pip install Pillow && \
pip install prompt-toolkit && \
pip install ptyprocess && \
pip install Pygments && \
pip install pyparsing && \
pip install python-dateutil==2.6.0 && \
pip install pytz && \
pip install PyYAML && \
pip install pyzmq && \
pip install qtconsole && \
pip install scipy && \
pip install seaborn && \
pip install simplegeneric && \
pip install six && \
pip install terminado && \
conda install -y'testpath<0.4' && \
pip install tornado && \
pip install tqdm && \
pip install traitlets && \
pip install wcwidth && \
pip install webencodings && \
pip install widgetsnbextension && \
cd ~/src && git clone --depth=1 && \
cd fastai && python install && \
# clean up pip cache
rm -rf ~/.cache/pip/* && \
cd && #rm -rf ~/src/*
# Please add new pip/apt installs in this block. Don't forget a "&& \" at the end
# of all non-final lines. Thanks!
pip install flashtext && \
pip install marisa-trie && \
pip install pyemd && \
pip install pyupset && \
pip install -e git+ && \
pip install git+ && \
##### ^^^^ Add new contributions above here
# clean up pip cache
rm -rf ~/.cache/pip/*
# For Facets
# For Theano with MKL
# Temporary fixes and patches:
# Temporary patch for Dask getting downgraded, which breaks Keras
pip install --upgrade dask && \
# Stop jupyter nbconvert trying to rewrite its folder hierarchy
mkdir -p ~/.jupyter && touch ~/.jupyter/ && touch ~/.jupyter/migrated && \
mkdir -p /.jupyter && touch /.jupyter/ && touch /.jupyter/migrated && \
# Stop Matplotlib printing junk to the console on first load
sed -i "s/^.*Matplotlib is building the font cache using fc-list.*$/# Warning removed by Kaggle/g" ~/anaconda3/lib/python3.6/site-packages/matplotlib/ && \
# Make matplotlib output in Jupyter notebooks display correctly
mkdir -p /etc/ipython/ && echo "c = get_config(); c.IPKernelApp.matplotlib = 'inline'" > /etc/ipython/
# Add BigQuery client proxy settings
# Set backend for matplotlib
