Created
July 17, 2019 08:36
-
-
Save janfreyberg/a959244c3a523e118fd40c839ee1798d to your computer and use it in GitHub Desktop.
Pydata superintendent talk
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Use postgres/example user/password credentials | |
version: '3.1' | |
services: | |
db: | |
image: postgres | |
restart: always | |
environment: | |
POSTGRES_USER: superintendent | |
POSTGRES_PASSWORD: superintendent | |
POSTGRES_DB: labelling | |
volumes: | |
- ./postgres-data:/var/lib/postgresql/data | |
ports: | |
- 5432:5432 | |
adminer: | |
image: adminer | |
restart: always | |
ports: | |
- 8080:8080 | |
notebook: | |
image: voila | |
restart: always | |
volumes: | |
- ./voila-interface.ipynb:/home/anaconda/app/app.ipynb | |
ports: | |
- 8866:8866 | |
orchestrator: | |
image: voila | |
restart: always | |
entrypoint: /opt/conda/bin/python orchestrate.py | |
volumes: | |
- ./orchestrate.py:/home/anaconda/app/orchestrate.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
voila>=0.1.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Labelling data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.datasets import load_digits\n", | |
"from superintendent import SemiSupervisor\n", | |
"import numpy as np\n", | |
"\n", | |
"import matplotlib.pyplot as plt" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Load the MNIST dataset from Scikit-learn:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"digits = load_digits().data\n", | |
"\n", | |
"print(digits.shape)\n", | |
"\n", | |
"plt.imshow(digits[0, :].reshape(8, 8), cmap='Greys_r')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Create the labelling frontend:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"widget = SemiSupervisor.from_images(\n", | |
" features=digits,\n", | |
" options=range(10)\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"widget" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Access the labels you've just created:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# widget.labels\n", | |
"widget.new_labels[:20]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"\n", | |
"---" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Train a model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"y = np.array([int(label) for label in widget.new_labels if label is not None])\n", | |
"x = widget.features[[i for i, _ in enumerate(y)]]\n", | |
"x, y" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.linear_model import LogisticRegression\n", | |
"model = LogisticRegression(solver='lbfgs', multi_class='ovr', max_iter=1000)\n", | |
"model.fit(x, y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"model.predict_proba(x[:5, :])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Bake training into the labelling process" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"widget = SemiSupervisor.from_images(\n", | |
" features=digits,\n", | |
" labels=widget.new_labels,\n", | |
" options=range(10),\n", | |
" classifier=model,\n", | |
" reorder='entropy',\n", | |
")\n", | |
"widget" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Distribute your labelling\n", | |
"\n", | |
"---" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.datasets import load_digits\n", | |
"import numpy as np\n", | |
"digits = load_digits().data\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"if os.path.isfile(\"demo.db\"):\n", | |
" os.remove(\"demo.db\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from superintendent.distributed import SemiSupervisor\n", | |
"\n", | |
"widget = SemiSupervisor.from_images(\n", | |
" connection_string=\"sqlite:///demo.db\",\n", | |
" options=range(10)\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"widget" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"widget.add_features(digits[:1000, :])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM continuumio/miniconda3:4.6.14-alpine | |
RUN /opt/conda/bin/pip install --upgrade pip | |
RUN mkdir /home/anaconda/app | |
WORKDIR /home/anaconda/app | |
COPY docker-requirements.txt docker-requirements.txt | |
RUN /opt/conda/bin/pip install -r docker-requirements.txt | |
# install superintendent from pypi | |
COPY . . | |
RUN /opt/conda/bin/pip install --user . | |
# RUN /opt/conda/bin/pip install superintendent | |
ONBUILD COPY . . | |
ONBUILD RUN if [ -f requirements.txt ]; then pip install -r requirements.txt; fi | |
ENTRYPOINT ["/opt/conda/bin/voila"] | |
CMD ["app.ipynb"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment