janfreyberg/docker-compose.yml

## docker-compose.yml
# Use postgres/example user/password credentials
version: '3.1'

services:

  db:
    image: postgres
    restart: always
    environment:
      POSTGRES_USER: superintendent
      POSTGRES_PASSWORD: superintendent
      POSTGRES_DB: labelling
    volumes:
      - ./postgres-data:/var/lib/postgresql/data
    ports:
      - 5432:5432

  adminer:
    image: adminer
    restart: always
    ports:
      - 8080:8080

  notebook:
    image: voila
    restart: always
    volumes:
      - ./voila-interface.ipynb:/home/anaconda/app/app.ipynb
    ports:
      - 8866:8866

  orchestrator:
    image: voila
    restart: always
    entrypoint: /opt/conda/bin/python orchestrate.py
    volumes:
      - ./orchestrate.py:/home/anaconda/app/orchestrate.py

## docker-requirements.txt
voila>=0.1.2

## talk.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Labelling data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import load_digits\n",
    "from superintendent import SemiSupervisor\n",
    "import numpy as np\n",
    "\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Load the MNIST dataset from Scikit-learn:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "digits = load_digits().data\n",
    "\n",
    "print(digits.shape)\n",
    "\n",
    "plt.imshow(digits[0, :].reshape(8, 8), cmap='Greys_r')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create the labelling frontend:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "widget = SemiSupervisor.from_images(\n",
    "    features=digits,\n",
    "    options=range(10)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "widget"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Access the labels you've just created:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# widget.labels\n",
    "widget.new_labels[:20]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Train a model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = np.array([int(label) for label in widget.new_labels if label is not None])\n",
    "x = widget.features[[i for i, _ in enumerate(y)]]\n",
    "x, y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "model = LogisticRegression(solver='lbfgs', multi_class='ovr', max_iter=1000)\n",
    "model.fit(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.predict_proba(x[:5, :])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Bake training into the labelling process"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "widget = SemiSupervisor.from_images(\n",
    "    features=digits,\n",
    "    labels=widget.new_labels,\n",
    "    options=range(10),\n",
    "    classifier=model,\n",
    "    reorder='entropy',\n",
    ")\n",
    "widget"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Distribute your labelling\n",
    "\n",
    "---"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import load_digits\n",
    "import numpy as np\n",
    "digits = load_digits().data\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "if os.path.isfile(\"demo.db\"):\n",
    "    os.remove(\"demo.db\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from superintendent.distributed import SemiSupervisor\n",
    "\n",
    "widget = SemiSupervisor.from_images(\n",
    "    connection_string=\"sqlite:///demo.db\",\n",
    "    options=range(10)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "widget"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "widget.add_features(digits[:1000, :])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}

## voila-interface.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              voila-interface.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## voila.Dockerfile
FROM continuumio/miniconda3:4.6.14-alpine

RUN /opt/conda/bin/pip install --upgrade pip

RUN mkdir /home/anaconda/app
WORKDIR /home/anaconda/app

COPY docker-requirements.txt docker-requirements.txt
RUN /opt/conda/bin/pip install -r docker-requirements.txt

# install superintendent from pypi
COPY . .
RUN /opt/conda/bin/pip install --user .
# RUN /opt/conda/bin/pip install superintendent

ONBUILD COPY . .
ONBUILD RUN if [ -f requirements.txt ]; then pip install -r requirements.txt; fi

ENTRYPOINT ["/opt/conda/bin/voila"]
CMD ["app.ipynb"]
	# Use postgres/example user/password credentials
	version: '3.1'

	services:

	db:
	image: postgres
	restart: always
	environment:
	POSTGRES_USER: superintendent
	POSTGRES_PASSWORD: superintendent
	POSTGRES_DB: labelling
	volumes:
	- ./postgres-data:/var/lib/postgresql/data
	ports:
	- 5432:5432

	adminer:
	image: adminer
	restart: always
	ports:
	- 8080:8080

	notebook:
	image: voila
	restart: always
	volumes:
	- ./voila-interface.ipynb:/home/anaconda/app/app.ipynb
	ports:
	- 8866:8866

	orchestrator:
	image: voila
	restart: always
	entrypoint: /opt/conda/bin/python orchestrate.py
	volumes:
	- ./orchestrate.py:/home/anaconda/app/orchestrate.py
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Labelling data"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"from sklearn.datasets import load_digits\n",
	"from superintendent import SemiSupervisor\n",
	"import numpy as np\n",
	"\n",
	"import matplotlib.pyplot as plt"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Load the MNIST dataset from Scikit-learn:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"digits = load_digits().data\n",
	"\n",
	"print(digits.shape)\n",
	"\n",
	"plt.imshow(digits[0, :].reshape(8, 8), cmap='Greys_r')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Create the labelling frontend:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"widget = SemiSupervisor.from_images(\n",
	" features=digits,\n",
	" options=range(10)\n",
	")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"widget"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Access the labels you've just created:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"scrolled": true
	},
	"outputs": [],
	"source": [
	"# widget.labels\n",
	"widget.new_labels[:20]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"\n",
	"---"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Train a model"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"y = np.array([int(label) for label in widget.new_labels if label is not None])\n",
	"x = widget.features[[i for i, _ in enumerate(y)]]\n",
	"x, y"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"from sklearn.linear_model import LogisticRegression\n",
	"model = LogisticRegression(solver='lbfgs', multi_class='ovr', max_iter=1000)\n",
	"model.fit(x, y)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"model.predict_proba(x[:5, :])"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Bake training into the labelling process"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"widget = SemiSupervisor.from_images(\n",
	" features=digits,\n",
	" labels=widget.new_labels,\n",
	" options=range(10),\n",
	" classifier=model,\n",
	" reorder='entropy',\n",
	")\n",
	"widget"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Distribute your labelling\n",
	"\n",
	"---"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"from sklearn.datasets import load_digits\n",
	"import numpy as np\n",
	"digits = load_digits().data\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import os\n",
	"if os.path.isfile(\"demo.db\"):\n",
	" os.remove(\"demo.db\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"from superintendent.distributed import SemiSupervisor\n",
	"\n",
	"widget = SemiSupervisor.from_images(\n",
	" connection_string=\"sqlite:///demo.db\",\n",
	" options=range(10)\n",
	")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"widget"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"widget.add_features(digits[:1000, :])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.0"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}
	FROM continuumio/miniconda3:4.6.14-alpine

	RUN /opt/conda/bin/pip install --upgrade pip

	RUN mkdir /home/anaconda/app
	WORKDIR /home/anaconda/app

	COPY docker-requirements.txt docker-requirements.txt
	RUN /opt/conda/bin/pip install -r docker-requirements.txt

	# install superintendent from pypi
	COPY . .
	RUN /opt/conda/bin/pip install --user .
	# RUN /opt/conda/bin/pip install superintendent

	ONBUILD COPY . .
	ONBUILD RUN if [ -f requirements.txt ]; then pip install -r requirements.txt; fi

	ENTRYPOINT ["/opt/conda/bin/voila"]
	CMD ["app.ipynb"]