MaxHalford/showdown.ipynb

## showdown.ipynb
{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Showdown"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext watermark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Python implementation: CPython\n",
      "Python version       : 3.10.8\n",
      "IPython version      : 8.12.0\n",
      "\n",
      "river       : 0.15.0\n",
      "scikit-learn: 1.2.2\n",
      "torch       : 2.0.0\n",
      "vowpalwabbit: 9.8.0\n",
      "\n",
      "Compiler    : Clang 14.0.0 (clang-1400.0.29.102)\n",
      "OS          : Darwin\n",
      "Release     : 22.2.0\n",
      "Machine     : arm64\n",
      "Processor   : arm\n",
      "CPU cores   : 8\n",
      "Architecture: 64bit\n",
      "\n"
     ]
    }
   ],
   "source": [
    "%watermark -p river,scikit-learn,torch,vowpalwabbit -mv"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "models = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "import river.linear_model\n",
    "import river.optim\n",
    "\n",
    "models['River'] = lambda: river.linear_model.LogisticRegression(\n",
    "    optimizer=river.optim.SGD(lr=0.01),\n",
    "    intercept_lr=0,\n",
    "    l2=0.0\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "from vowpalwabbit import pyvw\n",
    "import river.base\n",
    "\n",
    "class VW2RiverClassifier(river.base.Classifier):\n",
    "    def __init__(self, *args, **kwargs):\n",
    "        self.vw = pyvw.Workspace(*args, **kwargs)\n",
    "\n",
    "    def _format_x(self, x):\n",
    "        return \" \".join(f\"{k}:{v}\" for k, v in x.items())\n",
    "\n",
    "    def learn_one(self, x, y):\n",
    "\n",
    "        # Convert {False, True} to {-1, 1}\n",
    "        y = int(y)\n",
    "        y_vw = 2 * y - 1\n",
    "\n",
    "        ex = self._format_x(x)\n",
    "        ex = f\"{y_vw} | {ex}\"\n",
    "        self.vw.learn(ex)\n",
    "        return self\n",
    "\n",
    "    def predict_proba_one(self, x):\n",
    "        ex = \"| \" + self._format_x(x)\n",
    "        y_pred = self.vw.predict(ex)\n",
    "        return {True: y_pred, False: 1.0 - y_pred}\n",
    "\n",
    "models['Vowpal Wabbit'] = lambda: VW2RiverClassifier(\n",
    "    sgd=True,\n",
    "    learning_rate=0.01,\n",
    "    loss_function=\"logistic\",\n",
    "    link=\"logistic\",\n",
    "    adaptive=False,\n",
    "    normalized=False,\n",
    "    invariant=False,\n",
    "    noconstant=True,\n",
    "    l2=0.0,\n",
    "    l1=0.0,\n",
    "    power_t=0,\n",
    "    quiet=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sklearn.base\n",
    "import sklearn.exceptions\n",
    "import sklearn.linear_model\n",
    "\n",
    "class SK2RiverClassifier(river.base.Classifier):\n",
    "\n",
    "    def __init__(self, estimator: sklearn.base.ClassifierMixin, classes: list):\n",
    "        self.estimator = estimator\n",
    "        self.classes = classes\n",
    "    \n",
    "    def learn_one(self, x, y):\n",
    "        self.estimator.partial_fit(X=[list(x.values())], y=[y], classes=self.classes)\n",
    "        return self\n",
    "\n",
    "    def predict_proba_one(self, x):\n",
    "        try:\n",
    "            y_pred = self.estimator.predict_proba([list(x.values())])[0]\n",
    "            return {self.classes[i]: p for i, p in enumerate(y_pred)}\n",
    "        except sklearn.exceptions.NotFittedError:\n",
    "            return {c: 1 / len(self.classes) for c in self.classes}\n",
    "\n",
    "models['scikit-learn'] = lambda: SK2RiverClassifier(\n",
    "    estimator=sklearn.linear_model.SGDClassifier(\n",
    "        loss='log_loss',\n",
    "        penalty=None,\n",
    "        fit_intercept=False,\n",
    "        learning_rate='constant',\n",
    "        eta0=0.01,\n",
    "        max_iter=1,\n",
    "    ),\n",
    "    classes=[False, True]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "\n",
    "class TorchLogisticRegression(torch.nn.Module):\n",
    "    def __init__(self, n_features: int, n_classes: int = 2):\n",
    "        super().__init__()\n",
    "        self.linear = torch.nn.Linear(n_features, n_classes, bias=False)\n",
    "        self.linear.weight.data.zero_()\n",
    "\n",
    "    def forward(self, x):\n",
    "        y = self.linear(x)\n",
    "        return torch.sigmoid(y)\n",
    "\n",
    "class Torch2RiverClassifier(river.base.Classifier):\n",
    "\n",
    "    def __init__(self, model: torch.nn.Module, classes: list):\n",
    "        self.model = model\n",
    "        self.classes = classes\n",
    "        self.criterion = torch.nn.CrossEntropyLoss()\n",
    "        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01)\n",
    "\n",
    "    def learn_one(self, x: dict, y):\n",
    "        x = torch.tensor([list(x.values())], dtype=torch.float32)\n",
    "        y = torch.tensor([y], dtype=torch.long)\n",
    "        # forward pass\n",
    "        y_pred = self.model(x)\n",
    "        loss = self.criterion(y_pred, y)\n",
    "        # backward pass and optimization\n",
    "        self.model.zero_grad(set_to_none=True)\n",
    "        loss.backward()\n",
    "        self.optimizer.step()\n",
    "        return self\n",
    "    \n",
    "    def predict_proba_one(self, x: dict):\n",
    "        x = torch.tensor([list(x.values())], dtype=torch.float32)\n",
    "        y_pred = self.model(x)\n",
    "        return dict(zip(self.classes, y_pred.detach().numpy()[0]))\n",
    "\n",
    "models['PyTorch'] = lambda: Torch2RiverClassifier(\n",
    "    model=TorchLogisticRegression(n_features=10),\n",
    "    classes=[False, True]\n",
    ")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Metrics check"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'dict' object has no attribute 'make_classification'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[27], line 7\u001b[0m\n\u001b[1;32m      4\u001b[0m datasets \u001b[39m=\u001b[39m {}\n\u001b[1;32m      6\u001b[0m \u001b[39mfor\u001b[39;00m p \u001b[39min\u001b[39;00m (\u001b[39m10\u001b[39m, \u001b[39m100\u001b[39m, \u001b[39m1000\u001b[39m):\n\u001b[0;32m----> 7\u001b[0m     X, Y \u001b[39m=\u001b[39m datasets\u001b[39m.\u001b[39;49mmake_classification(\n\u001b[1;32m      8\u001b[0m         n_samples\u001b[39m=\u001b[39m\u001b[39m10_000\u001b[39m,\n\u001b[1;32m      9\u001b[0m         n_features\u001b[39m=\u001b[39m\u001b[39m10\u001b[39m,\n\u001b[1;32m     10\u001b[0m         n_informative\u001b[39m=\u001b[39m\u001b[39m5\u001b[39m,\n\u001b[1;32m     11\u001b[0m         n_redundant\u001b[39m=\u001b[39m\u001b[39m5\u001b[39m,\n\u001b[1;32m     12\u001b[0m         n_classes\u001b[39m=\u001b[39m\u001b[39m2\u001b[39m,\n\u001b[1;32m     13\u001b[0m         random_state\u001b[39m=\u001b[39m\u001b[39m42\u001b[39m,\n\u001b[1;32m     14\u001b[0m     )\n\u001b[1;32m     15\u001b[0m     X \u001b[39m=\u001b[39m preprocessing\u001b[39m.\u001b[39mscale(X)\n\u001b[1;32m     16\u001b[0m     X \u001b[39m=\u001b[39m [\u001b[39mdict\u001b[39m(\u001b[39mzip\u001b[39m(\u001b[39mrange\u001b[39m(X\u001b[39m.\u001b[39mshape[\u001b[39m1\u001b[39m]), x)) \u001b[39mfor\u001b[39;00m x \u001b[39min\u001b[39;00m X]\n",
      "\u001b[0;31mAttributeError\u001b[0m: 'dict' object has no attribute 'make_classification'"
     ]
    }
   ],
   "source": [
    "from sklearn import datasets\n",
    "from sklearn import preprocessing\n",
    "\n",
    "data = {}\n",
    "\n",
    "for p in (10, 100, 1000):\n",
    "    X, Y = datasets.make_classification(\n",
    "        n_samples=10_000,\n",
    "        n_features=10,\n",
    "        n_informative=5,\n",
    "        n_redundant=5,\n",
    "        n_classes=2,\n",
    "        random_state=42,\n",
    "    )\n",
    "    X = preprocessing.scale(X)\n",
    "    X = [dict(zip(range(X.shape[1]), x)) for x in X]\n",
    "    data[p] = (X, Y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "River           Accuracy: 77.72%\n",
      "Vowpal Wabbit   Accuracy: 77.73%\n",
      "scikit-learn    Accuracy: 77.72%\n",
      "PyTorch         Accuracy: 77.11%\n"
     ]
    }
   ],
   "source": [
    "from river import metrics\n",
    "\n",
    "for model_name, model_init in models.items():\n",
    "    model = model_init()\n",
    "    metric = metrics.Accuracy()\n",
    "    for x, y in zip(X, Y):\n",
    "        p = model.predict_one(x)\n",
    "        model.learn_one(x, y)\n",
    "        metric.update(y, p)\n",
    "    print(f\"{model_name:<15} {metric}\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Benchmark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "90.5 ms ± 609 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "\n",
    "model = models['River']()\n",
    "for x, y in zip(X, Y):\n",
    "    p = model.predict_one(x)\n",
    "    model.learn_one(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.44 s ± 23.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "\n",
    "model = models['scikit-learn']()\n",
    "for x, y in zip(X, Y):\n",
    "    p = model.predict_one(x)\n",
    "    model.learn_one(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "281 ms ± 1.91 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "\n",
    "model = models['Vowpal Wabbit']()\n",
    "for x, y in zip(X, Y):\n",
    "    p = model.predict_one(x)\n",
    "    model.learn_one(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "833 ms ± 9.28 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "\n",
    "model = models['PyTorch']()\n",
    "for x, y in zip(X, Y):\n",
    "    with torch.no_grad():\n",
    "        p = model.predict_one(x)\n",
    "    model.learn_one(x, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 s ± 6.98 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "\n",
    "model = models['PyTorch (compiled)']()\n",
    "for x, y in zip(X, Y):\n",
    "    p = model.predict_one(x)\n",
    "    model.learn_one(x, y)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.11.0 ('.venv': venv)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "14b46bd212fa4dd89e3980db6ba7efbb9fe535833e1e483b914b71733e0a56d2"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"attachments": {},
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Showdown"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"%load_ext watermark"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Python implementation: CPython\n",
	"Python version : 3.10.8\n",
	"IPython version : 8.12.0\n",
	"\n",
	"river : 0.15.0\n",
	"scikit-learn: 1.2.2\n",
	"torch : 2.0.0\n",
	"vowpalwabbit: 9.8.0\n",
	"\n",
	"Compiler : Clang 14.0.0 (clang-1400.0.29.102)\n",
	"OS : Darwin\n",
	"Release : 22.2.0\n",
	"Machine : arm64\n",
	"Processor : arm\n",
	"CPU cores : 8\n",
	"Architecture: 64bit\n",
	"\n"
	]
	}
	],
	"source": [
	"%watermark -p river,scikit-learn,torch,vowpalwabbit -mv"
	]
	},
	{
	"attachments": {},
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Models"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [],
	"source": [
	"models = {}"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [],
	"source": [
	"import river.linear_model\n",
	"import river.optim\n",
	"\n",
	"models['River'] = lambda: river.linear_model.LogisticRegression(\n",
	" optimizer=river.optim.SGD(lr=0.01),\n",
	" intercept_lr=0,\n",
	" l2=0.0\n",
	")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {},
	"outputs": [],
	"source": [
	"from vowpalwabbit import pyvw\n",
	"import river.base\n",
	"\n",
	"class VW2RiverClassifier(river.base.Classifier):\n",
	" def __init__(self, args, *kwargs):\n",
	" self.vw = pyvw.Workspace(args, *kwargs)\n",
	"\n",
	" def _format_x(self, x):\n",
	" return \" \".join(f\"{k}:{v}\" for k, v in x.items())\n",
	"\n",
	" def learn_one(self, x, y):\n",
	"\n",
	" # Convert {False, True} to {-1, 1}\n",
	" y = int(y)\n",
	" y_vw = 2 * y - 1\n",
	"\n",
	" ex = self._format_x(x)\n",
	" ex = f\"{y_vw} \| {ex}\"\n",
	" self.vw.learn(ex)\n",
	" return self\n",
	"\n",
	" def predict_proba_one(self, x):\n",
	" ex = \"\| \" + self._format_x(x)\n",
	" y_pred = self.vw.predict(ex)\n",
	" return {True: y_pred, False: 1.0 - y_pred}\n",
	"\n",
	"models['Vowpal Wabbit'] = lambda: VW2RiverClassifier(\n",
	" sgd=True,\n",
	" learning_rate=0.01,\n",
	" loss_function=\"logistic\",\n",
	" link=\"logistic\",\n",
	" adaptive=False,\n",
	" normalized=False,\n",
	" invariant=False,\n",
	" noconstant=True,\n",
	" l2=0.0,\n",
	" l1=0.0,\n",
	" power_t=0,\n",
	" quiet=True,\n",
	")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {},
	"outputs": [],
	"source": [
	"import sklearn.base\n",
	"import sklearn.exceptions\n",
	"import sklearn.linear_model\n",
	"\n",
	"class SK2RiverClassifier(river.base.Classifier):\n",
	"\n",
	" def __init__(self, estimator: sklearn.base.ClassifierMixin, classes: list):\n",
	" self.estimator = estimator\n",
	" self.classes = classes\n",
	" \n",
	" def learn_one(self, x, y):\n",
	" self.estimator.partial_fit(X=[list(x.values())], y=[y], classes=self.classes)\n",
	" return self\n",
	"\n",
	" def predict_proba_one(self, x):\n",
	" try:\n",
	" y_pred = self.estimator.predict_proba([list(x.values())])[0]\n",
	" return {self.classes[i]: p for i, p in enumerate(y_pred)}\n",
	" except sklearn.exceptions.NotFittedError:\n",
	" return {c: 1 / len(self.classes) for c in self.classes}\n",
	"\n",
	"models['scikit-learn'] = lambda: SK2RiverClassifier(\n",
	" estimator=sklearn.linear_model.SGDClassifier(\n",
	" loss='log_loss',\n",
	" penalty=None,\n",
	" fit_intercept=False,\n",
	" learning_rate='constant',\n",
	" eta0=0.01,\n",
	" max_iter=1,\n",
	" ),\n",
	" classes=[False, True]\n",
	")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {},
	"outputs": [],
	"source": [
	"import torch\n",
	"\n",
	"class TorchLogisticRegression(torch.nn.Module):\n",
	" def __init__(self, n_features: int, n_classes: int = 2):\n",
	" super().__init__()\n",
	" self.linear = torch.nn.Linear(n_features, n_classes, bias=False)\n",
	" self.linear.weight.data.zero_()\n",
	"\n",
	" def forward(self, x):\n",
	" y = self.linear(x)\n",
	" return torch.sigmoid(y)\n",
	"\n",
	"class Torch2RiverClassifier(river.base.Classifier):\n",
	"\n",
	" def __init__(self, model: torch.nn.Module, classes: list):\n",
	" self.model = model\n",
	" self.classes = classes\n",
	" self.criterion = torch.nn.CrossEntropyLoss()\n",
	" self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01)\n",
	"\n",
	" def learn_one(self, x: dict, y):\n",
	" x = torch.tensor([list(x.values())], dtype=torch.float32)\n",
	" y = torch.tensor([y], dtype=torch.long)\n",
	" # forward pass\n",
	" y_pred = self.model(x)\n",
	" loss = self.criterion(y_pred, y)\n",
	" # backward pass and optimization\n",
	" self.model.zero_grad(set_to_none=True)\n",
	" loss.backward()\n",
	" self.optimizer.step()\n",
	" return self\n",
	" \n",
	" def predict_proba_one(self, x: dict):\n",
	" x = torch.tensor([list(x.values())], dtype=torch.float32)\n",
	" y_pred = self.model(x)\n",
	" return dict(zip(self.classes, y_pred.detach().numpy()[0]))\n",
	"\n",
	"models['PyTorch'] = lambda: Torch2RiverClassifier(\n",
	" model=TorchLogisticRegression(n_features=10),\n",
	" classes=[False, True]\n",
	")"
	]
	},
	{
	"attachments": {},
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Metrics check"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 27,
	"metadata": {},
	"outputs": [
	{
	"ename": "AttributeError",
	"evalue": "'dict' object has no attribute 'make_classification'",
	"output_type": "error",
	"traceback": [
	"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
	"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
	"Cell \u001b[0;32mIn[27], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m datasets \u001b[39m=\u001b[39m {}\n\u001b[1;32m 6\u001b[0m \u001b[39mfor\u001b[39;00m p \u001b[39min\u001b[39;00m (\u001b[39m10\u001b[39m, \u001b[39m100\u001b[39m, \u001b[39m1000\u001b[39m):\n\u001b[0;32m----> 7\u001b[0m X, Y \u001b[39m=\u001b[39m datasets\u001b[39m.\u001b[39;49mmake_classification(\n\u001b[1;32m 8\u001b[0m n_samples\u001b[39m=\u001b[39m\u001b[39m10_000\u001b[39m,\n\u001b[1;32m 9\u001b[0m n_features\u001b[39m=\u001b[39m\u001b[39m10\u001b[39m,\n\u001b[1;32m 10\u001b[0m n_informative\u001b[39m=\u001b[39m\u001b[39m5\u001b[39m,\n\u001b[1;32m 11\u001b[0m n_redundant\u001b[39m=\u001b[39m\u001b[39m5\u001b[39m,\n\u001b[1;32m 12\u001b[0m n_classes\u001b[39m=\u001b[39m\u001b[39m2\u001b[39m,\n\u001b[1;32m 13\u001b[0m random_state\u001b[39m=\u001b[39m\u001b[39m42\u001b[39m,\n\u001b[1;32m 14\u001b[0m )\n\u001b[1;32m 15\u001b[0m X \u001b[39m=\u001b[39m preprocessing\u001b[39m.\u001b[39mscale(X)\n\u001b[1;32m 16\u001b[0m X \u001b[39m=\u001b[39m [\u001b[39mdict\u001b[39m(\u001b[39mzip\u001b[39m(\u001b[39mrange\u001b[39m(X\u001b[39m.\u001b[39mshape[\u001b[39m1\u001b[39m]), x)) \u001b[39mfor\u001b[39;00m x \u001b[39min\u001b[39;00m X]\n",
	"\u001b[0;31mAttributeError\u001b[0m: 'dict' object has no attribute 'make_classification'"
	]
	}
	],
	"source": [
	"from sklearn import datasets\n",
	"from sklearn import preprocessing\n",
	"\n",
	"data = {}\n",
	"\n",
	"for p in (10, 100, 1000):\n",
	" X, Y = datasets.make_classification(\n",
	" n_samples=10_000,\n",
	" n_features=10,\n",
	" n_informative=5,\n",
	" n_redundant=5,\n",
	" n_classes=2,\n",
	" random_state=42,\n",
	" )\n",
	" X = preprocessing.scale(X)\n",
	" X = [dict(zip(range(X.shape[1]), x)) for x in X]\n",
	" data[p] = (X, Y)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"River Accuracy: 77.72%\n",
	"Vowpal Wabbit Accuracy: 77.73%\n",
	"scikit-learn Accuracy: 77.72%\n",
	"PyTorch Accuracy: 77.11%\n"
	]
	}
	],
	"source": [
	"from river import metrics\n",
	"\n",
	"for model_name, model_init in models.items():\n",
	" model = model_init()\n",
	" metric = metrics.Accuracy()\n",
	" for x, y in zip(X, Y):\n",
	" p = model.predict_one(x)\n",
	" model.learn_one(x, y)\n",
	" metric.update(y, p)\n",
	" print(f\"{model_name:<15} {metric}\")"
	]
	},
	{
	"attachments": {},
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Benchmark"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 67,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"90.5 ms ± 609 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"\n",
	"model = models['River']()\n",
	"for x, y in zip(X, Y):\n",
	" p = model.predict_one(x)\n",
	" model.learn_one(x, y)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 68,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"1.44 s ± 23.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"\n",
	"model = models['scikit-learn']()\n",
	"for x, y in zip(X, Y):\n",
	" p = model.predict_one(x)\n",
	" model.learn_one(x, y)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 69,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"281 ms ± 1.91 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"\n",
	"model = models['Vowpal Wabbit']()\n",
	"for x, y in zip(X, Y):\n",
	" p = model.predict_one(x)\n",
	" model.learn_one(x, y)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"833 ms ± 9.28 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"\n",
	"model = models['PyTorch']()\n",
	"for x, y in zip(X, Y):\n",
	" with torch.no_grad():\n",
	" p = model.predict_one(x)\n",
	" model.learn_one(x, y)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"1 s ± 6.98 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"\n",
	"model = models['PyTorch (compiled)']()\n",
	"for x, y in zip(X, Y):\n",
	" p = model.predict_one(x)\n",
	" model.learn_one(x, y)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3.11.0 ('.venv': venv)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.10.8"
	},
	"orig_nbformat": 4,
	"vscode": {
	"interpreter": {
	"hash": "14b46bd212fa4dd89e3980db6ba7efbb9fe535833e1e483b914b71733e0a56d2"
	}
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}