Skip to content

Instantly share code, notes, and snippets.

### Keybase proof
I hereby claim:
* I am spolakh on github.
* I am spolakh (https://keybase.io/spolakh) on keybase.
* I have a public key whose fingerprint is 68F1 FC1A EAFB 405C 88F0 408C 487C 9A5F 09CC 3B27
To claim this, I am signing this object:
@spolakh
spolakh / -
Last active March 31, 2016 12:13
import requests as req
from datetime import timedelta, date
from time import sleep
id_map = {'Yandex': 1839501, 'Google': 1839502, 'Mail': 1839503}
def normalize_name(name):
if name.lower().startswith('google chrome'):
return 'Google Chrome'
return name
{"nbformat_minor": 0, "cells": [{"execution_count": 62, "cell_type": "code", "source": "%pylab inline\nimport numpy as np\nimport pandas\nimport neurolab as nl", "outputs": [{"output_type": "stream", "name": "stdout", "text": "Populating the interactive namespace from numpy and matplotlib\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 55, "cell_type": "code", "source": "from pandas.io.excel import read_excel\ndf = read_excel(\"Concrete_Data.xls\")\n\ndf.columns = ['cement', 'slag', 'fly ash', 'water', 'superpl', 'coarse', 'fine', 'age', 'result']\ndf.head()", "outputs": [{"execution_count": 55, "output_type": "execute_result", "data": {"text/plain": " cement slag fly ash water superpl coarse fine age result\n0 540.0 0.0 0 162 2.5 1040.0 676.0 28 79.986111\n1 540.0 0.0 0 162 2.5 1055.0 676.0 28 61.887366\n2 332.5 142.5 0 228 0.0 932.0 594.0 270 40.269535\n3 332.5 142.5 0 228
{"nbformat_minor": 0, "cells": [{"execution_count": 18, "cell_type": "code", "source": "import numpy\nimport pandas\nfrom sklearn.preprocessing import StandardScaler\n\nX = pandas.DataFrame([[1.0, 2.0], [3.0, 4.0]])\nscaler = StandardScaler(copy=True)\nscaler.fit_transform(X)\nX", "outputs": [{"execution_count": 18, "output_type": "execute_result", "data": {"text/plain": " 0 1\n0 -1 -1\n1 1 1", "text/html": "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>0</th>\n <th>1</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>-1</td>\n <td>-1</td>\n </tr>\n <tr>\n <th>1</th>\n <td> 1</td>\n <td> 1</td>\n </tr>\n </tbody>\n</table>\n</div>"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "p
{"nbformat_minor": 0, "cells": [{"execution_count": 1, "cell_type": "code", "source": "import numpy\nimport pandas\n\ndef generate_classification_sample(n_samples, n_features, distance=2.0, n_classes=2):\n \"\"\"Generates some test distribution,\n distributions are gaussian with centers at (x, x, x, ... x), where x = class_id * distance\n \"\"\"\n from sklearn.datasets import make_blobs\n\n centers = numpy.zeros((n_classes, n_features))\n centers += numpy.arange(n_classes)[:, numpy.newaxis] * distance\n\n X, y = make_blobs(n_samples=n_samples, n_features=n_features, centers=centers)\n columns = [\"column\" + str(x) for x in range(n_features)]\n X = pandas.DataFrame(X, columns=columns)\n return X, y\n\ndef generate_classification_data(n_classes=2):\n \"\"\" Generates random number of samples and features. \"\"\"\n n_samples = 1000 + numpy.random.poisson(1000)\n n_features = numpy.random.randint(10, 16)\n sample_weight = numpy.ones(n_samples, dtype=float)\n X, y = g
{"metadata": {"kernelspec": {"display_name": "Python 3", "name": "python3", "language": "python"}, "language_info": {"file_extension": ".py", "version": "3.4.3", "nbconvert_exporter": "python", "codemirror_mode": {"name": "ipython", "version": 3}, "name": "python", "pygments_lexer": "ipython3", "mimetype": "text/x-python"}}, "cells": [{"metadata": {}, "source": "#\u0417\u0430\u0434\u0430\u0447\u0430 1", "cell_type": "markdown"}, {"execution_count": 1, "source": "%pylab inline\nimport numpy as np\ndf = np.loadtxt(\"A2P(1).txt\")", "outputs": [{"name": "stdout", "output_type": "stream", "text": "Populating the interactive namespace from numpy and matplotlib\n"}, {"name": "stderr", "output_type": "stream", "text": "WARNING: pylab import has clobbered these variables: ['sqrt']\n`%matplotlib` prevents importing * from pylab and numpy\n"}], "metadata": {"collapsed": false, "trusted": true}, "cell_type": "code"}, {"execution_count": 2, "source": "from sklearn.cross_validation import train_test_split\nprint(df.shape)
{"nbformat_minor": 0, "cells": [{"execution_count": 1, "cell_type": "code", "source": "import numpy as np\nimport pandas as pd\nimport matplotlib.pylab as plt\n%matplotlib inline", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 2, "cell_type": "code", "source": "from rep.utils import train_test_split\nfrom sklearn.metrics import roc_auc_score\n\nsig_data = pd.read_csv('../toy_datasets/toyMC_sig_mass.csv', sep='\\t')\nbck_data = pd.read_csv('../toy_datasets/toyMC_bck_mass.csv', sep='\\t')\n\nlabels = np.array([1] * len(sig_data) + [0] * len(bck_data))\ndata = pd.concat([sig_data, bck_data])\nvariables = [\"FlightDistance\", \"FlightDistanceError\", \"IP\", \"VertexChi2\", \"pt\", \"p0_pt\", \"p1_pt\", \"p2_pt\", 'LifeTime','dira']", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 3, "cell_type": "code", "source": "X_train, X_test, y_train, y_test = train_test_split(data, labels, train_size=0.5)", "outputs": [], "metadata": {"collaps
{"metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"pygments_lexer": "ipython3", "name": "python", "mimetype": "text/x-python", "file_extension": ".py", "nbconvert_exporter": "python", "version": "3.4.3", "codemirror_mode": {"name": "ipython", "version": 3}}}, "cells": [{"metadata": {"trusted": true, "collapsed": false}, "outputs": [{"text": "Populating the interactive namespace from numpy and matplotlib\n", "output_type": "stream", "name": "stdout"}], "execution_count": 151, "cell_type": "code", "source": "%pylab inline\nimport numpy as np\nfrom functools import reduce\nimport scipy.stats as st\nSAMPLE_SIZE = 100\nBS_SIZE = 5000\nMU = 5"}, {"metadata": {}, "cell_type": "markdown", "source": "#\u0417\u0430\u0434\u0430\u043d\u0438\u0435 3\n\u041f\u0443\u043d\u043a\u0442 \u0430): \u0421\u0433\u0435\u043d\u0435\u0440\u0438\u043c \u0432\u044b\u0431\u043e\u0440\u043a\u0443 \u0438 \u043f\u043e\u0441\u0442\u0440\u043e\u0438\u043c \u0434\u043e\u0432\u0
{"metadata": {"kernelspec": {"display_name": "Python 3", "name": "python3", "language": "python"}, "language_info": {"file_extension": ".py", "nbconvert_exporter": "python", "codemirror_mode": {"name": "ipython", "version": 3}, "name": "python", "pygments_lexer": "ipython3", "mimetype": "text/x-python", "version": "3.4.3"}}, "cells": [{"metadata": {"trusted": true, "collapsed": false}, "cell_type": "code", "source": "%pylab inline\nimport numpy as np\nfrom functools import reduce\nimport scipy.stats as st\nSAMPLE_SIZE = 100\nBS_SIZE = 5000\nMU = 5", "outputs": [{"text": "Populating the interactive namespace from numpy and matplotlib\n", "name": "stdout", "output_type": "stream"}], "execution_count": 151}, {"metadata": {}, "cell_type": "markdown", "source": "#\u21163\n\u041f\u0443\u043d\u043a\u0442 \u0430): \u0421\u0433\u0435\u043d\u0435\u0440\u0438\u043c \u0432\u044b\u0431\u043e\u0440\u043a\u0443 \u0438 \u043f\u043e\u0441\u0442\u0440\u043e\u0438\u043c \u0434\u043e\u0432\u0435\u0440\u0438\u0442\u0435\u043b\u04
{"nbformat_minor": 0, "cells": [{"execution_count": 1, "cell_type": "code", "source": "import numpy as np\nimport pandas as pd\nimport matplotlib.pylab as plt\n%matplotlib inline", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "\u041f\u043e\u0434\u0433\u0440\u0443\u0437\u0438\u043c \u0440\u0435\u043f\u043e\u0432\u044b\u0439 \u0434\u0430\u0442\u0430\u0441\u0435\u0442 \u043c\u0430\u0441\u0441, \u043a\u0430\u043a \u0438 \u0432 \u0442\u0443\u0442\u043e\u0440\u0438\u0430\u043b\u0435", "cell_type": "markdown", "metadata": {}}, {"execution_count": 2, "cell_type": "code", "source": "from rep.utils import train_test_split\nfrom sklearn.metrics import roc_auc_score\n\nsig_data = pd.read_csv('../toy_datasets/toyMC_sig_mass.csv', sep='\\t')\nbck_data = pd.read_csv('../toy_datasets/toyMC_bck_mass.csv', sep='\\t')\n\nlabels = np.array([1] * len(sig_data) + [0] * len(bck_data))\ndata = pd.concat([sig_data, bck_data])\nvariables = [\"FlightDistance\", \"FlightDistanceError\", \"IP\", \"VertexC