Skip to content

Instantly share code, notes, and snippets.

@RyotaBannai
Last active January 12, 2019 09:02
Show Gist options
  • Save RyotaBannai/936755846bf387b15f02a3ae07d6394a to your computer and use it in GitHub Desktop.
Save RyotaBannai/936755846bf387b15f02a3ae07d6394a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"from numpy.linalg import inv\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.metrics import mean_squared_error"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def dataset_ (N=200, idx_outlier=0, ydistance=5):\n",
" rng = np.random.RandomState(4)\n",
" data = np.dot(rng.rand(2, 2), rng.randn(2, N)).T\n",
" data[idx_outlier:idx_outlier+1,1] = ydistance\n",
" return data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"N=50\n",
"inx=10\n",
"i = dataset_(N, inx)\n",
"Z = i[:,0].reshape(len(i[:,0]),1)\n",
"y = i[:,1].reshape(len(i[:,1]),1)\n",
"b = lambda Z,y: inv(np.dot(Z.T,Z)).dot(Z.T).dot(y)\n",
"ypred = lambda Z,b: Z.dot(b)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"original_ypred = ypred(Z, b(Z,y))\n",
"p = len(i.T)-1\n",
"s2 = mean_squared_error(y, original_ypred)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"outliers,Ds = [],[]\n",
"t=2*(p+1)/N\n",
"for n in range(N):\n",
" #without i-th obeservation\n",
" tempZ = np.delete(Z, (n), axis=0)\n",
" tempY = np.delete(y, (n), axis=0)\n",
" tempB = b(tempZ, tempY)\n",
" tempYPred = ypred(tempZ, tempB)\n",
" #get rid of i-th observation from oritinal predition too\n",
" original_YPred_ = np.delete(original_ypred, (n), axis=0)\n",
" \n",
" errs = sum((original_YPred_ - tempYPred)**2)\n",
" D = errs/(p*s2)\n",
" Ds.append(D)\n",
" if D[0] > t: \n",
" outliers.append(n)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[array([0.1488515]),\n",
" array([0.00753585]),\n",
" array([0.00690743]),\n",
" array([0.00671447]),\n",
" array([0.0046191])]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Ds.sort(reverse=True);Ds[:5]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[10]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"outliers"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.6"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment