Skip to content

Instantly share code, notes, and snippets.

@janfreyberg
Created July 17, 2019 08:36
Show Gist options
  • Save janfreyberg/a959244c3a523e118fd40c839ee1798d to your computer and use it in GitHub Desktop.
Save janfreyberg/a959244c3a523e118fd40c839ee1798d to your computer and use it in GitHub Desktop.
Pydata superintendent talk
# Use postgres/example user/password credentials
version: '3.1'
services:
db:
image: postgres
restart: always
environment:
POSTGRES_USER: superintendent
POSTGRES_PASSWORD: superintendent
POSTGRES_DB: labelling
volumes:
- ./postgres-data:/var/lib/postgresql/data
ports:
- 5432:5432
adminer:
image: adminer
restart: always
ports:
- 8080:8080
notebook:
image: voila
restart: always
volumes:
- ./voila-interface.ipynb:/home/anaconda/app/app.ipynb
ports:
- 8866:8866
orchestrator:
image: voila
restart: always
entrypoint: /opt/conda/bin/python orchestrate.py
volumes:
- ./orchestrate.py:/home/anaconda/app/orchestrate.py
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Labelling data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import load_digits\n",
"from superintendent import SemiSupervisor\n",
"import numpy as np\n",
"\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load the MNIST dataset from Scikit-learn:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"digits = load_digits().data\n",
"\n",
"print(digits.shape)\n",
"\n",
"plt.imshow(digits[0, :].reshape(8, 8), cmap='Greys_r')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create the labelling frontend:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"widget = SemiSupervisor.from_images(\n",
" features=digits,\n",
" options=range(10)\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"widget"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Access the labels you've just created:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# widget.labels\n",
"widget.new_labels[:20]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"---"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Train a model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y = np.array([int(label) for label in widget.new_labels if label is not None])\n",
"x = widget.features[[i for i, _ in enumerate(y)]]\n",
"x, y"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"model = LogisticRegression(solver='lbfgs', multi_class='ovr', max_iter=1000)\n",
"model.fit(x, y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model.predict_proba(x[:5, :])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Bake training into the labelling process"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"widget = SemiSupervisor.from_images(\n",
" features=digits,\n",
" labels=widget.new_labels,\n",
" options=range(10),\n",
" classifier=model,\n",
" reorder='entropy',\n",
")\n",
"widget"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Distribute your labelling\n",
"\n",
"---"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import load_digits\n",
"import numpy as np\n",
"digits = load_digits().data\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"if os.path.isfile(\"demo.db\"):\n",
" os.remove(\"demo.db\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from superintendent.distributed import SemiSupervisor\n",
"\n",
"widget = SemiSupervisor.from_images(\n",
" connection_string=\"sqlite:///demo.db\",\n",
" options=range(10)\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"widget"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"widget.add_features(digits[:1000, :])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
FROM continuumio/miniconda3:4.6.14-alpine
RUN /opt/conda/bin/pip install --upgrade pip
RUN mkdir /home/anaconda/app
WORKDIR /home/anaconda/app
COPY docker-requirements.txt docker-requirements.txt
RUN /opt/conda/bin/pip install -r docker-requirements.txt
# install superintendent from pypi
COPY . .
RUN /opt/conda/bin/pip install --user .
# RUN /opt/conda/bin/pip install superintendent
ONBUILD COPY . .
ONBUILD RUN if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
ENTRYPOINT ["/opt/conda/bin/voila"]
CMD ["app.ipynb"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment