Skip to content

Instantly share code, notes, and snippets.

@wd15
Last active May 23, 2022 20:10
Show Gist options
  • Save wd15/3cfcde2aaf878ba74184a9dee42efeec to your computer and use it in GitHub Desktop.
Save wd15/3cfcde2aaf878ba74184a9dee42efeec to your computer and use it in GitHub Desktop.
misc
dask-worker-space
.local

Active Learning Hacking

Repository for active learning hacks

Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Active Learning Notebook\n",
"\n",
"Test out using [modAL](https://modal-python.readthedocs.io/en/latest/index.html) with PyMKS."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Todo List:\n",
"\n",
" - Make work with all the data already evaluated for speedier evaluation\n",
" - Implement with multiple runs and plot uncertainty\n",
" - Implment iGS\n",
" - Make it all functional to some degree\n",
" - PR into PyMKS for an active learning notebook"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import dask.array as da\n",
"from dask_ml.model_selection import train_test_split\n",
"from sklearn.pipeline import Pipeline\n",
"from dask_ml.decomposition import IncrementalPCA\n",
"from dask_ml.preprocessing import PolynomialFeatures\n",
"from sklearn.linear_model import LinearRegression\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.gaussian_process import GaussianProcessRegressor\n",
"from sklearn.gaussian_process.kernels import RBF\n",
"from sklearn.metrics import mean_squared_error as mse\n",
"from sklearn.metrics import r2_score\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"from pymks import (\n",
" generate_multiphase,\n",
" plot_microstructures,\n",
" PrimitiveTransformer,\n",
" TwoPointCorrelation,\n",
" GenericTransformer,\n",
" solve_fe\n",
")\n",
"\n",
"from toolz.curried import curry, pipe, valmap, itemmap, iterate, do, merge_with\n",
"from toolz.curried import map as map_\n",
"from modAL.models import ActiveLearner, CommitteeRegressor, BayesianOptimizer\n",
"from modAL.disagreement import max_std_sampling\n",
"from modAL.models import BayesianOptimizer\n",
"from modAL.acquisition import max_EI\n",
"import tqdm\n",
"import types\n",
"from pymks.fmks.func import sequence"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"x_data = np.arange(50).reshape((50, 1))\n",
"y_data = np.arange(50).reshape((50, 1))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "test_size=-24 should be either positive and smaller than the number of samples 50 or a float in the (0, 1) range",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-5-4a6ca797b8b5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mx_0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx_1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx_data\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_data\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m25\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/nix/store/xh1wxwafz46zf87syxav55mjy43f4pyj-python3.8-dask-ml-1.6.0/lib/python3.8/site-packages/dask_ml/model_selection/_split.py\u001b[0m in \u001b[0;36mtrain_test_split\u001b[0;34m(test_size, train_size, random_state, shuffle, blockwise, convert_mixed_types, *arrays, **options)\u001b[0m\n\u001b[1;32m 510\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitertools\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchain\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_iterable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_test_pairs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 511\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 512\u001b[0;31m return ms.train_test_split(\n\u001b[0m\u001b[1;32m 513\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0marrays\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 514\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtest_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/nix/store/h3jg5jg3iv8gxqb2hhfrvxnr3bb4dcah-python3.8-scikit-learn-0.23.2/lib/python3.8/site-packages/sklearn/model_selection/_split.py\u001b[0m in \u001b[0;36mtrain_test_split\u001b[0;34m(*arrays, **options)\u001b[0m\n\u001b[1;32m 2128\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2129\u001b[0m \u001b[0mn_samples\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_num_samples\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marrays\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2130\u001b[0;31m n_train, n_test = _validate_shuffle_split(n_samples, test_size, train_size,\n\u001b[0m\u001b[1;32m 2131\u001b[0m default_test_size=0.25)\n\u001b[1;32m 2132\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/nix/store/h3jg5jg3iv8gxqb2hhfrvxnr3bb4dcah-python3.8-scikit-learn-0.23.2/lib/python3.8/site-packages/sklearn/model_selection/_split.py\u001b[0m in \u001b[0;36m_validate_shuffle_split\u001b[0;34m(n_samples, test_size, train_size, default_test_size)\u001b[0m\n\u001b[1;32m 1762\u001b[0m if (test_size_type == 'i' and (test_size >= n_samples or test_size <= 0)\n\u001b[1;32m 1763\u001b[0m or test_size_type == 'f' and (test_size <= 0 or test_size >= 1)):\n\u001b[0;32m-> 1764\u001b[0;31m raise ValueError('test_size={0} should be either positive and smaller'\n\u001b[0m\u001b[1;32m 1765\u001b[0m \u001b[0;34m' than the number of samples {1} or a float in the '\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1766\u001b[0m '(0, 1) range'.format(test_size, n_samples))\n",
"\u001b[0;31mValueError\u001b[0m: test_size=-24 should be either positive and smaller than the number of samples 50 or a float in the (0, 1) range"
]
}
],
"source": [
"x_0, x_1, y_0, y_1 = train_test_split(x_data, y_data, train_size=25)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Functions for evaluating learners\n",
"\n",
"Following are functions for evaluating learners. The `oracle_func` generates the `y_data` given some `x_data`. We're assuming that it's expensive to use the oracle. Hence why we need active learning."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"@curry\n",
"def iterate_times(func, times, value):\n",
" iter_ = iterate(func, value)\n",
" for _ in tqdm.tqdm(range(times)):\n",
" next(iter_)\n",
" return next(iter_)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"@curry\n",
"def update_learner(oracle_func, x_pool, x_test, y_test, learner):\n",
" query = sequence(\n",
" learner.query,\n",
" oracle_func,\n",
" )\n",
" return pipe(\n",
" learner,\n",
" do(lambda x: x.teach(*query(x_pool))),\n",
" do(lambda x: x.scores.append(x.score(x_test, y_test)))\n",
" )\n",
"\n",
"@curry\n",
"def evaluate_learner(oracle_func, x_pool, x_test, y_test, n_query, learner):\n",
" learner.scores = [learner.score(x_test, y_test)]\n",
" return iterate_times(\n",
" update_learner(oracle_func, x_pool, x_test, y_test),\n",
" n_query,\n",
" learner\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Generate Data Functions\n",
"\n",
"Functions to generate the microstructure. The microstructures are stochastically generated from 6 nominal classes."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def shuffle(data):\n",
" tmp = np.array(data)\n",
" np.random.shuffle(tmp)\n",
" return da.from_array(tmp, chunks=data.chunks)\n",
"\n",
"def generate_x(shape, seed):\n",
" da.random.seed(seed)\n",
" np.random.seed(seed)\n",
"\n",
" tmp = [\n",
" generate_multiphase(shape=shape, grain_size=x, volume_fraction=(0.5, 0.5), chunks=50, percent_variance=0.15)\n",
" for x in [(15, 2), (2, 15), (7, 7), (9, 3), (3, 9), (2, 2)]\n",
" ]\n",
" return np.array(shuffle(da.concatenate(tmp)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Functions to generate models\n",
"\n",
"Here we use the GPR model as it returns a probability that's required by the `ActiveLearner` class."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def pca_steps():\n",
" return (\n",
" (\"reshape\", GenericTransformer(\n",
" lambda x: x.reshape(x.shape[0], x_data.shape[1], x_data.shape[2])\n",
" )),\n",
" (\"discritize\",PrimitiveTransformer(n_state=2, min_=0.0, max_=1.0)),\n",
" (\"correlations\",TwoPointCorrelation(periodic_boundary=True, cutoff=31, correlations=[(0, 1), (1, 1)])),\n",
" ('flatten', GenericTransformer(lambda x: x.reshape(x.shape[0], -1))),\n",
" ('pca', IncrementalPCA(n_components=3, svd_solver='full')),\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def make_gp_model():\n",
" kernel = 1 * RBF(length_scale=1.0, length_scale_bounds=(1e-1, 1e2))\n",
" regressor = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)\n",
" return Pipeline(steps=pca_steps() + (\n",
" ('poly', PolynomialFeatures(degree=3)),\n",
" ('regressor', regressor),\n",
" ))\n",
"\n",
"def make_linear_model():\n",
" return Pipeline(steps=pca_steps() + (\n",
" ('poly', PolynomialFeatures(degree=3))\n",
" ('regressor', LinearRegression()),\n",
" ))\n",
"\n",
"def pca_model():\n",
" return Pipeline(steps=pca_steps())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## The Oracle\n",
"\n",
"The oracle function is an FE simulation on the 2D grid."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"@curry\n",
"def oracle_from_data(shape, x_data):\n",
" y_stress = solve_fe(x_data.reshape((-1,) + shape),\n",
" elastic_modulus=(1.3, 2.5),\n",
" poissons_ratio=(0.42, 0.35),\n",
" macro_strain=0.001)['stress'][..., 0]\n",
"\n",
" return np.array(da.average(y_stress.reshape(y_stress.shape[0], -1), axis=1))\n",
"\n",
"@curry \n",
"def oracle_func(x_data, y_data, query_instance):\n",
" idx, query_value = query_instance\n",
" return query_value.reshape(1, -1), np.array([y_data[idx]]).reshape(1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Helper Functions"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def plot_parity(y_test, y_predict, label='Testing Data'):\n",
" pred_data = np.array([y_test, y_predict])\n",
" line = np.min(pred_data), np.max(pred_data)\n",
" plt.plot(pred_data[0], pred_data[1], 'o', label=label)\n",
" plt.plot(line, line, '-', linewidth=3, color='k')\n",
" plt.title('Goodness of Fit', fontsize=20)\n",
" plt.xlabel('Actual', fontsize=18)\n",
" plt.ylabel('Predicted', fontsize=18)\n",
" plt.legend(loc=2, fontsize=15)\n",
" return plt"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def three_way_split(x_data, y_data, props, random_state):\n",
" x_0, x_, y_0, y_ = train_test_split(x_data, y_data, train_size=props[0], random_state=random_state)\n",
" x_1, x_2, y_1, y_2 = train_test_split(x_, y_, train_size=props[1] / (1 - props[0]), random_state=random_state)\n",
" return flatten(x_0), flatten(x_1), flatten(x_2), y_0, y_1, y_2"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def flatten(x_data):\n",
" return x_data.reshape(x_data.shape[0], -1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Greedy Sampling"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [],
"source": [
"def split_on_ids(arr, ids):\n",
" mask = np.zeros(len(arr), dtype=bool)\n",
" mask[ids] = True\n",
" return arr[mask], arr[~mask]\n",
" \n",
"def calc_distances(d0, d1):\n",
" return np.linalg.norm(d0[:, None, :] - d1[None, :, :], ord=2, axis=-1)\n",
"\n",
"def calc_distances_nk(labeled_samples, scores):\n",
" scores_labeled, scores_unlabeled = split_on_ids(scores, labeled_samples)\n",
" if len(scores_labeled) == 0:\n",
" scores_labeled = np.mean(scores_unlabeled, axis=0)[None]\n",
" return calc_distances(scores_unlabeled, scores_labeled)\n",
"\n",
"def next_sample(distances_nk, labeled_samples, scores):\n",
" distances_n = np.min(distances_nk, axis=1)\n",
" _, unlabeled_ids = split_on_ids(np.arange(len(scores)), labeled_samples)\n",
" return unlabeled_ids[np.argmax(distances_n)]\n",
"\n",
"def next_sample_gsx(labeled_samples, scores):\n",
" distances_nk = calc_distances_nk(labeled_samples, scores)\n",
" return next_sample(distances_nk, labeled_samples, scores)\n",
" \n",
"def next_sample_igs(labeled_samples, scores):\n",
" x_scores, y_scores = scores\n",
" distances_nk_x = calc_distances_nk(labeled_samples, x_scores)\n",
" distances_nk_y = calc_distances_nk(labeled_samples, y_scores)\n",
" return next_sample(distances_nk_x * distances_nk_y, labeled_samples, x_scores)\n",
"\n",
"def query_helper(model, x_pool, init_scores, update_scores, next_func):\n",
" if not hasattr(model, 'query_data'):\n",
" model.query_data = [], init_scores()\n",
" labeled_samples, scores = model.query_data\n",
" scores = update_scores(model, scores)\n",
" next_id = next_func(labeled_samples, scores)\n",
" model.query_data = (labeled_samples + [next_id], scores)\n",
" return next_id, x_pool[next_id]\n",
"\n",
"def gsx_query(model, x_pool):\n",
" return query_helper(\n",
" model,\n",
" x_pool,\n",
" lambda: pca_model().fit_transform(x_pool),\n",
" lambda m, s: s,\n",
" next_sample_gsx\n",
" )\n",
"\n",
"def gsy_query(model, x_pool):\n",
" return query_helper(\n",
" model,\n",
" x_pool,\n",
" lambda: None,\n",
" lambda m, s: m.predict(x_pool).reshape(-1, 1),\n",
" next_sample_gsx\n",
" )\n",
"\n",
"def igs_query(model, x_pool):\n",
" return query_helper(\n",
" model,\n",
" x_pool,\n",
" lambda: (pca_model().fit_transform(x_pool), None),\n",
" lambda m, s: (s[0], m.predict(x_pool).reshape(-1, 1)),\n",
" next_sample_igs\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set up the active learners\n",
"\n",
"One is a GPR using the maximum std and the other is random"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [],
"source": [
"query_std = lambda model, x_: pipe(\n",
" model.predict(x_, return_std=True)[1],\n",
" np.argmax,\n",
" lambda i: (i, x_[i])\n",
")\n",
"\n",
"query_random = lambda model, x_: pipe(\n",
" np.random.randint(0, len(x_)),\n",
" lambda i: (i, x_[i])\n",
")\n",
"\n",
"def make_ensemble(x_train, y_train):\n",
" ensemble_learner = CommitteeRegressor(\n",
" learner_list=[\n",
" ActiveLearner(\n",
" estimator=make_gp_model(),\n",
" X_training=x_train_,\n",
" y_training=y_train_\n",
" )\n",
" for x_train_, y_train_ in zip(np.array_split(x_train, 5), np.array_split(y_train, 5))\n",
" ],\n",
" query_strategy=max_std_sampling\n",
" )\n",
"\n",
" ## required because CommitteeRegressor does not have a score function\n",
"\n",
" def score(self, x_true, y_true):\n",
" y_pred = self.predict(x_true)\n",
" return r2_score(y_true, y_pred)\n",
"\n",
" ensemble_learner.score = types.MethodType(score, ensemble_learner)\n",
" \n",
" return ensemble_learner\n",
"\n",
"@curry\n",
"def make_learner(x_train, y_train, klass, query_func):\n",
" return klass(\n",
" estimator=make_gp_model(),\n",
" query_strategy=query_func,\n",
" X_training=x_train,\n",
" y_training=y_train,\n",
" )\n",
"\n",
"def make_bayes(x_train, y_train):\n",
" return make_learner(x_train, y_train, BayesianOptimizer, max_EI)\n",
"\n",
"def make_learners(x_train, y_train):\n",
" make_active = make_learner(x_train, y_train, ActiveLearner)\n",
"\n",
" return dict(\n",
" std=make_active(query_std),\n",
" random=make_active(query_random),\n",
"# ensemble=make_ensemble(x_train, y_train),\n",
" bayes=make_bayes(x_train, y_train),\n",
" gsx=make_active(gsx_query),\n",
" igs=make_active(igs_query),\n",
" gsy=make_active(gsy_query)\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [],
"source": [
"@curry\n",
"def evaluate_item(oracle, x_pool, x_test, y_test, iterations, item):\n",
" name, learner = item\n",
" print('evaluating', name)\n",
" return name, evaluate_learner(oracle, x_pool, x_test, y_test, iterations, learner)\n",
"\n",
"@curry\n",
"def one_round(x_data, y_data, iterations, seed):\n",
" x_pool, x_test, x_train, y_pool, y_test, y_train = three_way_split(x_data, y_data, (0.8, 0.16), seed)\n",
" oracle = oracle_func(x_pool, y_pool)\n",
" eval_item = evaluate_item(oracle, x_pool, x_test, y_test, iterations)\n",
" return itemmap(eval_item, make_learners(x_train, y_train))\n",
"\n",
"def multiple_rounds(x_data, y_data, rounds, iterations):\n",
" process_scores = sequence(\n",
" lambda _: one_round(x_data, y_data, iterations, None),\n",
" valmap(lambda x: x.scores)\n",
" )\n",
" \n",
" return pipe(\n",
" [None] * rounds,\n",
" map_(process_scores),\n",
" list,\n",
" lambda x: merge_with(np.vstack)(*x),\n",
" valmap(lambda x: (np.mean(x, axis=0), np.std(x, axis=0)))\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Generate the data"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"grid_shape = (41, 41)\n",
"n_sample_per_class = 100\n",
"x_data = generate_x((n_sample_per_class,) + grid_shape, 5)\n",
"y_data = oracle_from_data(grid_shape, x_data)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 2880x288 with 10 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot_microstructures(*x_data[:10], cmap='gray', colorbar=False);"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Run the learners"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating std\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [1:17:36<00:00, 46.57s/it] \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating random\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:35<00:00, 1.04it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating bayes\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsx\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating igs\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [02:11<00:00, 1.31s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsy\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.13s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating std\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:54<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating random\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:35<00:00, 1.05it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating bayes\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsx\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating igs\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [02:11<00:00, 1.31s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsy\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating std\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating random\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:35<00:00, 1.05it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating bayes\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsx\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.13s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating igs\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [02:11<00:00, 1.31s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsy\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.13s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating std\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:54<00:00, 1.15s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating random\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:35<00:00, 1.05it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating bayes\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:54<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsx\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.13s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating igs\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [02:11<00:00, 1.31s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsy\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating std\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.13s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating random\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:35<00:00, 1.05it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating bayes\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsx\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.13s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating igs\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [02:11<00:00, 1.31s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsy\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.13s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating std\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating random\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:35<00:00, 1.05it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating bayes\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.13s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsx\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.13s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating igs\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [02:11<00:00, 1.31s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsy\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.13s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating std\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:54<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating random\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:35<00:00, 1.05it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating bayes\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:54<00:00, 1.15s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsx\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating igs\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [02:11<00:00, 1.32s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsy\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating std\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating random\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:35<00:00, 1.05it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating bayes\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsx\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating igs\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [02:11<00:00, 1.32s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsy\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.13s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating std\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:54<00:00, 1.15s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating random\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:35<00:00, 1.05it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating bayes\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:54<00:00, 1.15s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsx\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating igs\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [02:11<00:00, 1.32s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsy\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.13s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating std\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:54<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating random\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:36<00:00, 1.04it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating bayes\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsx\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:54<00:00, 1.14s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating igs\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [02:12<00:00, 1.32s/it]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"evaluating gsy\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 100/100 [01:53<00:00, 1.14s/it]\n"
]
}
],
"source": [
"scores = multiple_rounds(x_data, y_data, 10, 100)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"## The results"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0.0, 1.0)"
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x576 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.style.use('ggplot')\n",
"plt.figure(figsize=(10, 8))\n",
"ax = plt.gca()\n",
"\n",
"for k, v in scores.items():\n",
" y = v[0]\n",
" e = v[1]\n",
" x = np.arange(len(y))\n",
" ax.plot(x, y, label=k)\n",
" if k in ['std', 'bayes', 'gsx', 'gsy', 'igs']:\n",
" ax.fill_between(x, y - e, y + e, alpha=0.1)\n",
"plt.legend()\n",
"plt.xlabel('iterations')\n",
"plt.ylabel('R^2');\n",
"plt.ylim(0., 1)"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0.9, 1.0)"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x576 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.style.use('ggplot')\n",
"plt.figure(figsize=(10, 8))\n",
"ax = plt.gca()\n",
"\n",
"for k, v in scores.items():\n",
" y = v[0]\n",
" e = v[1]\n",
" x = np.arange(len(y))\n",
" ax.plot(x, y, label=k)\n",
" if k in ['std', 'bayes', 'gsx', 'gsy', 'igs']:\n",
" ax.fill_between(x, y - e, y + e, alpha=0.1)\n",
"plt.legend()\n",
"plt.xlabel('iterations')\n",
"plt.ylabel('R^2');\n",
"plt.ylim(0.9, 1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Check what the accuracy actually looks like"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"y_pred_std = learner_accuracy['std'][1].predict(x_test)\n",
"y_pred_random = learner_accuracy['random'][1].predict(x_test)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<module 'matplotlib.pyplot' from '/nix/store/c8sgkmibi2vyfw75w9vai2917j5smvq7-python3.8-matplotlib-3.3.1/lib/python3.8/site-packages/matplotlib/pyplot.py'>"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plot_parity(y_test, y_pred_random, label='random')"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<module 'matplotlib.pyplot' from '/nix/store/c8sgkmibi2vyfw75w9vai2917j5smvq7-python3.8-matplotlib-3.3.1/lib/python3.8/site-packages/matplotlib/pyplot.py'>"
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plot_parity(y_test, y_pred_std, label='std')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
#
# $ nix-shell --pure --arg withBoost false --argstr tag 20.09
#
{
tag ? "20.09",
pymksVersion ? "cf653e004848c9c68ca31a85add0d1ac8611a93f"
}:
let
pkgs = import (builtins.fetchTarball "https://github.com/NixOS/nixpkgs/archive/${tag}.tar.gz") {};
pymkssrc = builtins.fetchTarball "https://github.com/materialsinnovation/pymks/archive/${pymksVersion}.tar.gz";
pymks = pypkgs.callPackage "${pymkssrc}/default.nix" { graspi = null; };
pypkgs = pkgs.python3Packages;
extra = with pypkgs; [ black pylint flake8 ipywidgets zarr pymks h5py ];
in
(pymks.overridePythonAttrs (old: rec {
propagatedBuildInputs = old.propagatedBuildInputs;
nativeBuildInputs = propagatedBuildInputs ++ extra;
postShellHook = ''
export OMPI_MCA_plm_rsh_agent=${pkgs.openssh}/bin/ssh
SOURCE_DATE_EPOCH=$(date +%s)
export PYTHONUSERBASE=$PWD/.local
export USER_SITE=`python -c "import site; print(site.USER_SITE)"`
export PYTHONPATH=$PYTHONPATH:$USER_SITE
export PATH=$PATH:$PYTHONUSERBASE/bin
jupyter nbextension install --py widgetsnbextension --user > /dev/null 2>&1
jupyter nbextension enable widgetsnbextension --user --py > /dev/null 2>&1
pip install jupyter_contrib_nbextensions --user > /dev/null 2>&1
jupyter contrib nbextension install --user > /dev/null 2>&1
jupyter nbextension enable spellchecker/main > /dev/null 2>&1
pip install --user nbqa
pip install --user tqdm
pip install --user modAL
'';
}))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment