Skip to content

Instantly share code, notes, and snippets.

@ocoyawale
Forked from roberttreichler/k insurance.ipynb
Created January 29, 2018 04:26
Show Gist options
  • Save ocoyawale/10b757302b6deec45ced16e314254584 to your computer and use it in GitHub Desktop.
Save ocoyawale/10b757302b6deec45ced16e314254584 to your computer and use it in GitHub Desktop.
pandas scikit learn
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 151,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import sklearn as skl\n",
"import scipy.stats as stats\n",
"import pylab\n",
"import statsmodels.api as sm\n",
"import statsmodels.formula.api as smf\n",
"from sklearn import linear_model\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.tree import DecisionTreeRegressor\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from sklearn.cross_validation import cross_val_score\n",
"from sklearn import metrics"
]
},
{
"cell_type": "code",
"execution_count": 152,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"train_path='D:/kaggle/Insurance/train.csv'\n",
"test_path='D:/kaggle/Insurance/test.csv'\n",
"train=pd.read_csv(train_path)\n",
"test=pd.read_csv(test_path)\n",
"train=pd.DataFrame(train)\n",
"test=pd.DataFrame(test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#EDA\n",
"train.describe(percentiles=[.01,.05,.95,.99])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#list of continuous variables\n",
"cont_var=pd.DataFrame(train,columns=['Hazard','T1_V1','T1_V2','T1_V3','T2_V1','T2_V15','T2_V2','T2_V4','T2_V9'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"pd.scatter_matrix(cont_var,diagonal='kde',color='k',alpha=0.3)\n",
"plt.show()\n",
"\n",
"#pd.scatter_matrix(train,diagonal='kde',color='k',alpha=0.3)\n",
"#plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"plt.scatter(train['Hazard'],train['T1_V1'])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"stats.probplot(train['Hazard'], dist=\"norm\", plot=pylab)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"plt.scatter(train['Hazard'],train['T1_V2'])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"stats.probplot(train['T1_V2'], dist=\"norm\", plot=pylab)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"plt.scatter(train['Hazard'],train['T2_V1'])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"stats.probplot(train['T2_V1'], dist=\"norm\", plot=pylab)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"plt.scatter(train['Hazard'],train['T2_V15'])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"stats.probplot(train['T2_V15'], dist=\"norm\", plot=pylab)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"plt.scatter(train['Hazard'],train['T2_V2'])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"stats.probplot(train['T2_V2'], dist=\"norm\", plot=pylab)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"plt.scatter(train['Hazard'],train['T2_V4'])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"stats.probplot(train['T2_V4'], dist=\"norm\", plot=pylab)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"plt.scatter(train['Hazard'],train['T2_V9'])\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"stats.probplot(train['T2_V9'], dist=\"norm\", plot=pylab)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#indicator variables\n",
"ind1=pd.get_dummies(train['T1_V4'],prefix='imp_T1_V4') #categorical\n",
"ind2=pd.get_dummies(train['T1_V5'],prefix='imp_T1_V5') #categorical\n",
"ind3=pd.get_dummies(train['T1_V6'],prefix='imp_T1_V6')#categorical\n",
"ind4=pd.get_dummies(train['T1_V7'],prefix='imp_T1_V7')#categorical\n",
"ind5=pd.get_dummies(train['T1_V8'],prefix='imp_T1_V8') #categorical\n",
"ind6=pd.get_dummies(train['T1_V9'],prefix='imp_T1_V9') #categorical\n",
"ind7=pd.get_dummies(train['T1_V10'],prefix='imp_T1_V10') #categorical\n",
"ind8=pd.get_dummies(train['T1_V11'],prefix='imp_T1_V11') #categorical\n",
"ind9=pd.get_dummies(train['T1_V12'],prefix='imp_T1_V12')#categorical\n",
"ind10=pd.get_dummies(train['T1_V13'],prefix='imp_T1_V13') #categorical\n",
"ind11=pd.get_dummies(train['T1_V15'],prefix='imp_T1_V15') #categorical\n",
"ind12=pd.get_dummies(train['T1_V16'],prefix='imp_T1_V16') #categorical\n",
"ind13=pd.get_dummies(train['T1_V17'],prefix='imp_T1_V17') #categorical\n",
"ind14=pd.get_dummies(train['T2_V3'],prefix='imp_T2_V3') #categorical\n",
"ind15=pd.get_dummies(train['T2_V5'],prefix='imp_T2_V5')#categorical\n",
"ind16=pd.get_dummies(train['T2_V7'],prefix='imp_T2_V7') #categorical\n",
"ind17=pd.get_dummies(train['T2_V8'],prefix='imp_T2_V8') #ordinal\n",
"ind18=pd.get_dummies(train['T2_V11'],prefix='imp_T2_V11') #categorical\n",
"ind19=pd.get_dummies(train['T2_V12'],prefix='imp_T2_V12') #categorical\n",
"ind20=pd.get_dummies(train['T2_V13'],prefix='imp_T2_V13') #categorical\n",
"#Transform to same scale, dampen outlier influence, etc.\n",
"train['imp_T1_V1']=np.log(train['T1_V1']+1)\n",
"train['imp_T1_V2']=np.log(train['T1_V2']+1)\n",
"train['imp_T1_V3']=np.log(train['T1_V3']+1)\n",
"train['imp_T1_V14']=np.log(train['T1_V14']+1)\n",
"train['imp_T2_V1']=np.log(train['T2_V1']+1) #0-100\n",
"train['imp_T2_V2']=np.log(train['T2_V2']+1)\n",
"train['imp_T2_V4']=np.log(train['T2_V4']+1)\n",
"train['imp_T2_V6']=np.log(train['T2_V6']+1)\n",
"train['imp_T2_V9']=np.log(train['T2_V9']+1)\n",
"train['imp_T2_V10']=np.log(train['T2_V10']+1)\n",
"train['imp_T2_V14']=np.log(train['T2_V14']+1)\n",
"train['imp_T2_V15']=np.log(train['T2_V15']+1)\n",
"train['imp_hazard']=train['Hazard']\n",
"#train['imp_hazard']=np.where(train['Hazard'] > 100,100,train['Hazard'])"
]
},
{
"cell_type": "code",
"execution_count": 153,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#indicator variables all \n",
"ind1=pd.get_dummies(train['T1_V4'],prefix='imp_T1_V4') #categorical\n",
"ind2=pd.get_dummies(train['T1_V5'],prefix='imp_T1_V5') #categorical\n",
"ind3=pd.get_dummies(train['T1_V6'],prefix='imp_T1_V6')#categorical\n",
"ind4=pd.get_dummies(train['T1_V7'],prefix='imp_T1_V7')#categorical\n",
"ind5=pd.get_dummies(train['T1_V8'],prefix='imp_T1_V8') #categorical\n",
"ind6=pd.get_dummies(train['T1_V9'],prefix='imp_T1_V9') #categorical\n",
"ind7=pd.get_dummies(train['T1_V10'],prefix='imp_T1_V10') #categorical\n",
"ind8=pd.get_dummies(train['T1_V11'],prefix='imp_T1_V11') #categorical\n",
"ind9=pd.get_dummies(train['T1_V12'],prefix='imp_T1_V12')#categorical\n",
"ind10=pd.get_dummies(train['T1_V13'],prefix='imp_T1_V13') #categorical\n",
"ind11=pd.get_dummies(train['T1_V15'],prefix='imp_T1_V15') #categorical\n",
"ind12=pd.get_dummies(train['T1_V16'],prefix='imp_T1_V16') #categorical\n",
"ind13=pd.get_dummies(train['T1_V17'],prefix='imp_T1_V17') #categorical\n",
"ind14=pd.get_dummies(train['T2_V3'],prefix='imp_T2_V3') #categorical\n",
"ind15=pd.get_dummies(train['T2_V5'],prefix='imp_T2_V5')#categorical\n",
"ind16=pd.get_dummies(train['T2_V7'],prefix='imp_T2_V7') #categorical\n",
"ind17=pd.get_dummies(train['T2_V8'],prefix='imp_T2_V8') #ordinal\n",
"ind18=pd.get_dummies(train['T2_V11'],prefix='imp_T2_V11') #categorical\n",
"ind19=pd.get_dummies(train['T2_V12'],prefix='imp_T2_V12') #categorical\n",
"ind20=pd.get_dummies(train['T2_V13'],prefix='imp_T2_V13') #categorical\n",
"ind21=pd.get_dummies(train['T1_V1'],prefix='imp_T1_V1') #categorical\n",
"ind22=pd.get_dummies(train['T1_V2'],prefix='imp_T1_V2') #categorical\n",
"ind23=pd.get_dummies(train['T1_V3'],prefix='imp_T1_V3') #categorical\n",
"ind24=pd.get_dummies(train['T1_V14'],prefix='imp_T1_V14') #categorical\n",
"ind25=pd.get_dummies(train['T2_V1'],prefix='imp_T2_V1') #categorical\n",
"ind26=pd.get_dummies(train['T2_V2'],prefix='imp_T2_V2') #categorical\n",
"ind27=pd.get_dummies(train['T2_V4'],prefix='imp_T2_V4') #categorical\n",
"ind28=pd.get_dummies(train['T2_V6'],prefix='imp_T2_V6') #categorical\n",
"ind29=pd.get_dummies(train['T2_V9'],prefix='imp_T2_V9') #categorical\n",
"ind30=pd.get_dummies(train['T2_V10'],prefix='imp_T2_V10') #categorical\n",
"ind31=pd.get_dummies(train['T2_V14'],prefix='imp_T2_V14') #categorical\n",
"ind32=pd.get_dummies(train['T2_V15'],prefix='imp_T2_V15') #categorical\n",
"train['imp_hazard']=train['Hazard']\n",
"in1=pd.get_dummies(test['T1_V4'],prefix='imp_T1_V4') #categorical\n",
"in2=pd.get_dummies(test['T1_V5'],prefix='imp_T1_V5') #categorical\n",
"in3=pd.get_dummies(test['T1_V6'],prefix='imp_T1_V6')#categorical\n",
"in4=pd.get_dummies(test['T1_V7'],prefix='imp_T1_V7')#categorical\n",
"in5=pd.get_dummies(test['T1_V8'],prefix='imp_T1_V8') #categorical\n",
"in6=pd.get_dummies(test['T1_V9'],prefix='imp_T1_V9') #categorical\n",
"in7=pd.get_dummies(test['T1_V10'],prefix='imp_T1_V10') #categorical\n",
"in8=pd.get_dummies(test['T1_V11'],prefix='imp_T1_V11') #categorical\n",
"in9=pd.get_dummies(test['T1_V12'],prefix='imp_T1_V12')#categorical\n",
"in10=pd.get_dummies(test['T1_V13'],prefix='imp_T1_V13') #categorical\n",
"in11=pd.get_dummies(test['T1_V15'],prefix='imp_T1_V15') #categorical\n",
"in12=pd.get_dummies(test['T1_V16'],prefix='imp_T1_V16') #categorical\n",
"in13=pd.get_dummies(test['T1_V17'],prefix='imp_T1_V17') #categorical\n",
"in14=pd.get_dummies(test['T2_V3'],prefix='imp_T2_V3') #categorical\n",
"in15=pd.get_dummies(test['T2_V5'],prefix='imp_T2_V5')#categorical\n",
"in16=pd.get_dummies(test['T2_V7'],prefix='imp_T2_V7') #categorical\n",
"in17=pd.get_dummies(test['T2_V8'],prefix='imp_T2_V8') #ordinal\n",
"in18=pd.get_dummies(test['T2_V11'],prefix='imp_T2_V11') #categorical\n",
"in19=pd.get_dummies(test['T2_V12'],prefix='imp_T2_V12') #categorical\n",
"in20=pd.get_dummies(test['T2_V13'],prefix='imp_T2_V13') #categorical\n",
"in21=pd.get_dummies(test['T1_V1'],prefix='imp_T1_V1') #categorical\n",
"in22=pd.get_dummies(test['T1_V2'],prefix='imp_T1_V2') #categorical\n",
"in23=pd.get_dummies(test['T1_V3'],prefix='imp_T1_V3') #categorical\n",
"in24=pd.get_dummies(test['T1_V14'],prefix='imp_T1_V14') #categorical\n",
"in25=pd.get_dummies(test['T2_V1'],prefix='imp_T2_V1') #categorical\n",
"in26=pd.get_dummies(test['T2_V2'],prefix='imp_T2_V2') #categorical\n",
"in27=pd.get_dummies(test['T2_V4'],prefix='imp_T2_V4') #categorical\n",
"in28=pd.get_dummies(test['T2_V6'],prefix='imp_T2_V6') #categorical\n",
"in29=pd.get_dummies(test['T2_V9'],prefix='imp_T2_V9') #categorical\n",
"in30=pd.get_dummies(test['T2_V10'],prefix='imp_T2_V10') #categorical\n",
"in31=pd.get_dummies(test['T2_V14'],prefix='imp_T2_V14') #categorical\n",
"in32=pd.get_dummies(test['T2_V15'],prefix='imp_T2_V15') #categorical\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"in1=pd.get_dummies(test['T1_V4'],prefix='imp_T1_V4') #categorical\n",
"in2=pd.get_dummies(test['T1_V5'],prefix='imp_T1_V5') #categorical\n",
"in3=pd.get_dummies(test['T1_V6'],prefix='imp_T1_V6')#categorical\n",
"in4=pd.get_dummies(test['T1_V7'],prefix='imp_T1_V7')#categorical\n",
"in5=pd.get_dummies(test['T1_V8'],prefix='imp_T1_V8') #categorical\n",
"in6=pd.get_dummies(test['T1_V9'],prefix='imp_T1_V9') #categorical\n",
"in7=pd.get_dummies(test['T1_V10'],prefix='imp_T1_V10') #categorical\n",
"in8=pd.get_dummies(test['T1_V11'],prefix='imp_T1_V11') #categorical\n",
"in9=pd.get_dummies(test['T1_V12'],prefix='imp_T1_V12')#categorical\n",
"in10=pd.get_dummies(test['T1_V13'],prefix='imp_T1_V13') #categorical\n",
"in11=pd.get_dummies(test['T1_V15'],prefix='imp_T1_V15') #categorical\n",
"in12=pd.get_dummies(test['T1_V16'],prefix='imp_T1_V16') #categorical\n",
"in13=pd.get_dummies(test['T1_V17'],prefix='imp_T1_V17') #categorical\n",
"in14=pd.get_dummies(test['T2_V3'],prefix='imp_T2_V3') #categorical\n",
"in15=pd.get_dummies(test['T2_V5'],prefix='imp_T2_V5')#categorical\n",
"in16=pd.get_dummies(test['T2_V7'],prefix='imp_T2_V7') #categorical\n",
"in17=pd.get_dummies(test['T2_V8'],prefix='imp_T2_V8') #ordinal\n",
"in18=pd.get_dummies(test['T2_V11'],prefix='imp_T2_V11') #categorical\n",
"in19=pd.get_dummies(test['T2_V12'],prefix='imp_T2_V12') #categorical\n",
"in20=pd.get_dummies(test['T2_V13'],prefix='imp_T2_V13') #categorical\n",
"#Transform to same scale, dampen outlier influence, etc.\n",
"test['imp_T1_V1']=np.log(test['T1_V1']+1)\n",
"test['imp_T1_V2']=np.log(test['T1_V2']+1)\n",
"test['imp_T1_V3']=np.log(test['T1_V3']+1)\n",
"test['imp_T1_V14']=np.log(test['T1_V14']+1)\n",
"test['imp_T2_V1']=np.log(test['T2_V1']+1)\n",
"test['imp_T2_V2']=np.log(test['T2_V2']+1)\n",
"test['imp_T2_V4']=np.log(test['T2_V4']+1)\n",
"test['imp_T2_V6']=np.log(test['T2_V6']+1)\n",
"test['imp_T2_V9']=np.log(test['T2_V9']+1)\n",
"test['imp_T2_V10']=np.log(test['T2_V10']+1)\n",
"test['imp_T2_V14']=np.log(test['T2_V14']+1)\n",
"test['imp_T2_V15']=np.log(test['T2_V15']+1)\n"
]
},
{
"cell_type": "code",
"execution_count": 154,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"train_imp=train[['Id','imp_hazard']].join(ind1).join(ind2).join(ind3).join(ind4).join(ind5).join(ind6).join(ind7).join(ind8).join(ind9).join(ind10).join(ind11).join(ind12).join(ind13).join(ind14).join(ind15).join(ind16).join(ind17).join(ind18).join(ind19).join(ind20).join(ind21).join(ind22).join(ind23).join(ind24).join(ind25).join(ind26).join(ind27).join(ind28).join(ind29).join(ind30).join(ind31).join(ind32)\n",
"#train_imp=train[['Id','imp_hazard','imp_T1_V1','imp_T1_V2','imp_T1_V3','imp_T1_V14','imp_T2_V1','imp_T2_V2','imp_T2_V4','imp_T2_V6','imp_T2_V9','imp_T2_V10','imp_T2_V14','imp_T2_V15']].join(ind1).join(ind2).join(ind3).join(ind4).join(ind5).join(ind6).join(ind7).join(ind8).join(ind9).join(ind10).join(ind11).join(ind12).join(ind13).join(ind14).join(ind15).join(ind16).join(ind17).join(ind18).join(ind19).join(ind20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#train_imp2=train_imp.drop(['imp_T1_V4_H','imp_T1_V5_L','imp_T1_V6_N','imp_T1_V7_C','imp_T1_V8_A','imp_T1_V9_G','imp_T1_V10_2','imp_T1_V11_K','imp_T1_V12_A','imp_T1_V13_5','imp_T1_V15_F','imp_T1_V16_O','imp_T1_V17_N','imp_T2_V3_N','imp_T2_V5_F','imp_T2_V7_22','imp_T2_V8_1','imp_T2_V11_N','imp_T2_V12_N','imp_T2_V13_B'],1)"
]
},
{
"cell_type": "code",
"execution_count": 155,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"test_imp=test[['Id']].join(in1).join(in2).join(in3).join(in4).join(in5).join(in6).join(in7).join(in8).join(in9).join(in10).join(in11).join(in12).join(in13).join(in14).join(in15).join(in16).join(in17).join(in18).join(in19).join(in20).join(in21).join(in22).join(in23).join(in24).join(in25).join(in26).join(in27).join(in28).join(in29).join(in30).join(in31).join(in32)\n",
"#test_imp=test[['Id','imp_T1_V1','imp_T1_V2','imp_T1_V3','imp_T1_V14','imp_T2_V1','imp_T2_V2','imp_T2_V4','imp_T2_V6','imp_T2_V9','imp_T2_V10','imp_T2_V14','imp_T2_V15']].join(in1).join(in2).join(in3).join(in4).join(in5).join(in6).join(in7).join(in8).join(in9).join(in10).join(in11).join(in12).join(in13).join(in14).join(in15).join(in16).join(in17).join(in18).join(in19).join(in20)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#test_imp2=test_imp.drop(['imp_T1_V4_H','imp_T1_V5_L','imp_T1_V6_N','imp_T1_V7_C','imp_T1_V8_A','imp_T1_V9_G','imp_T1_V10_2','imp_T1_V11_K','imp_T1_V12_A','imp_T1_V13_5','imp_T1_V15_F','imp_T1_V16_O','imp_T1_V17_N','imp_T2_V3_N','imp_T2_V5_F','imp_T2_V7_22','imp_T2_V8_1','imp_T2_V11_N','imp_T2_V12_N','imp_T2_V13_B'],1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"train_imp.to_csv('d:/kaggle/Insurance/out_train.csv', index=False)\n",
"test_imp.to_csv('d:/kaggle/Insurance/out_test.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"train_imp2.to_csv('d:/kaggle/Insurance/out_train.csv', index=False)\n",
"test_imp2.to_csv('d:/kaggle/Insurance/out_test.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 156,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"x = train_imp2.ix[:,2:392] #predictors train\n",
"y = train_imp2.ix[:,1]# response train\n",
"test_data=test_imp2.ix[:,1:391] # predictors test"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 157,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#classification\n",
"from sklearn.svm import SVC\n",
"from sklearn.svm import NuSVC\n",
"from sklearn.svm import LinearSVC\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from sklearn.naive_bayes import MultinomialNB\n",
"from sklearn.naive_bayes import BernoulliNB\n",
"from sklearn.linear_model import SGDClassifier\n",
"from sklearn.tree import DecisionTreeClassifier\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#SVC\n",
"\n",
"svc = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3,gamma=0.0, kernel='rbf', max_iter=-1, probability=False,random_state=None, shrinking=True, tol=0.001, verbose=False)\n",
"\n",
"# Train the model using the training sets\n",
"svc.fit(x, y)\n",
"# The coefficients\n",
"#print('Coefficients: \\n', svc.coef_)\n",
"# The mean square error\n",
"#print(\"Residual sum of squares: %.2f\" % np.mean((svc.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"#print('Variance score: %.2f' % svc.score(x, y))\n",
"#print(\"Accuracy: %.2f\" % metrics.accuracy_score(svc.predict(x),y))\n",
"#print(\"avg Precision: %.2f\" % metrics.average_precision_score(svc.predict(x),y))\n",
"#print(\"Precision: %.2f\" % metrics.precision_score(svc.predict(x),y))\n",
"#print(\"Recall: %.2f\" % metrics.recall_score(svc.predict(x),y))\n",
"#print(\"AUC: %.2f\" % metrics.roc_auc_score(svc.predict(x),y))\n",
"#print(\"MAE: %.2f\" % metrics.mean_absolute_error(svc.predict(x),y))\n",
"#print(\"MSE: %.2f\" % metrics.mean_squared_error(svc.predict(x),y))\n",
"#print(\"MedianAE: %.2f\" % metrics.median_absolute_error(svc.predict(x),y))\n",
"#print(\"r^2: %.2f\" % metrics.r2_score(svc.predict(x),y))\n",
"\n",
"train_svc=pd.DataFrame(np.round(np.exp(svc.predict(x))-1,0),columns=['yhat'])\n",
"train_svc2=train_imp2[['Id','imp_hazard']].join(train_svc)\n",
"score_svc=pd.DataFrame(np.round(np.exp(svc.predict(test_data))-1,0),columns=['Hazard'])\n",
"score_svc2=test_imp2[['Id']].join(score_svc)\n",
"\n",
"train_svc2.to_csv('d:/kaggle/Insurance/train_svc2.csv', index=False)\n",
"score_svc2.to_csv('d:/kaggle/Insurance/score_svc2.csv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#NuSVC\n",
"\n",
"nusvc = NuSVC(nu=0.5, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, verbose=False, max_iter=-1, random_state=None)\n",
"\n",
"# Train the model using the training sets\n",
"nusvc.fit(x, y)\n",
"# The coefficients\n",
"#print('Coefficients: \\n', nusvc.coef_)\n",
"# The mean square error\n",
"#print(\"Residual sum of squares: %.2f\"\n",
"# % np.mean((nusvc.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"#print('Variance score: %.2f' % nusvc.score(x, y))\n",
"#print(\"Accuracy: %.2f\" % metrics.accuracy_score(nusvc.predict(x),y))\n",
"#print(\"avg Precision: %.2f\" % metrics.average_precision_score(nusvc.predict(x),y))\n",
"#print(\"Precision: %.2f\" % metrics.precision_score(nusvc.predict(x),y))\n",
"#print(\"Recall: %.2f\" % metrics.recall_score(nusvc.predict(x),y))\n",
"#print(\"AUC: %.2f\" % metrics.roc_auc_score(nusvc.predict(x),y))\n",
"#print(\"MAE: %.2f\" % metrics.mean_absolute_error(nusvc.predict(x),y))\n",
"#print(\"MSE: %.2f\" % metrics.mean_squared_error(nusvc.predict(x),y))\n",
"#print(\"MedianAE: %.2f\" % metrics.median_absolute_error(nusvc.predict(x),y))\n",
"#print(\"r^2: %.2f\" % metrics.r2_score(nusvc.predict(x),y))\n",
"\n",
"train_nusvc=pd.DataFrame(np.round(np.exp(nusvc.predict(x))-1,0),columns=['yhat'])\n",
"train_nusvc2=train_imp2[['Id','imp_hazard']].join(train_nusvc)\n",
"score_nusvc=pd.DataFrame(np.round(np.exp(nusvc.predict(test_data))-1,0),columns=['Hazard'])\n",
"score_nusvc2=test_imp2[['Id']].join(score_nusvc)\n",
"\n",
"train_nusvc2.to_csv('d:/kaggle/Insurance/train_nusvc2.csv', index=False)\n",
"score_nusvc2.to_csv('d:/kaggle/Insurance/score_nusvc2.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#LinearSVC\n",
"\n",
"lsvc = LinearSVC(penalty='l2', loss='squared_hinge', dual=True, tol=0.0001, C=1.0, multi_class='ovr', fit_intercept=True, intercept_scaling=1, class_weight=None, verbose=0, random_state=None, max_iter=1000)\n",
"\n",
"# Train the model using the training sets\n",
"lsvc.fit(x, y)\n",
"# The coefficients\n",
"#print('Coefficients: \\n', lsvc.coef_)\n",
"# The mean square error\n",
"#print(\"Residual sum of squares: %.2f\"\n",
"# % np.mean((lsvc.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"#print('Variance score: %.2f' % lsvc.score(x, y))\n",
"#print(\"Accuracy: %.2f\" % metrics.accuracy_score(lsvc.predict(x),y))\n",
"#print(\"avg Precision: %.2f\" % metrics.average_precision_score(lsvc.predict(x),y))\n",
"#print(\"Precision: %.2f\" % metrics.precision_score(lsvc.predict(x),y))\n",
"#print(\"Recall: %.2f\" % metrics.recall_score(lsvc.predict(x),y))\n",
"#print(\"AUC: %.2f\" % metrics.roc_auc_score(lsvc.predict(x),y))\n",
"#print(\"MAE: %.2f\" % metrics.mean_absolute_error(lsvc.predict(x),y))\n",
"#print(\"MSE: %.2f\" % metrics.mean_squared_error(lsvc.predict(x),y))\n",
"#print(\"MedianAE: %.2f\" % metrics.median_absolute_error(lsvc.predict(x),y))\n",
"#print(\"r^2: %.2f\" % metrics.r2_score(lsvc.predict(x),y))\n",
"\n",
"train_lsvc=pd.DataFrame(np.round(np.exp(lsvc.predict(x))-1,0),columns=['yhat'])\n",
"train_lsvc2=train_imp2[['Id','imp_hazard']].join(train_lsvc)\n",
"score_lsvc=pd.DataFrame(np.round(np.exp(lsvc.predict(test_data))-1,0),columns=['Hazard'])\n",
"score_lsvc2=test_imp2[['Id']].join(score_lsvc)\n",
"\n",
"train_lsvc2.to_csv('d:/kaggle/Insurance/train_lsvc2.csv', index=False)\n",
"score_lsvc2.to_csv('d:/kaggle/Insurance/score_lsvc2.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#random forest classifier\n",
"\n",
"rfc = RandomForestClassifier(n_estimators=10, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=1, random_state=None, verbose=0, warm_start=False, class_weight=None)\n",
"# Train the model using the training sets\n",
"rfc=rfc.fit(x, y)\n",
"# The coefficients\n",
"#print('Coefficients: \\n', rfc.coef_)\n",
"# The mean square error\n",
"#print(\"Residual sum of squares: %.2f\"\n",
"# % np.mean((rfc.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"#print('Variance score: %.2f' % rfc.score(x, y))\n",
"#print(\"Accuracy: %.2f\" % metrics.accuracy_score(rfc.predict(x),y))\n",
"#print(\"avg Precision: %.2f\" % metrics.average_precision_score(rfc.predict(x),y))\n",
"#print(\"Precision: %.2f\" % metrics.precision_score(rfc.predict(x),y))\n",
"#print(\"Recall: %.2f\" % metrics.recall_score(rfc.predict(x),y))\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(rfc.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(rfc.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(rfc.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(rfc.predict(x),y))\n",
"\n",
"train_rfc=pd.DataFrame(np.round(np.exp(rfc.predict(x))-1,0),columns=['yhat'])\n",
"train_rfc2=train_imp2[['Id','imp_hazard']].join(train_rfc)\n",
"score_rfc=pd.DataFrame(np.round(np.exp(rfc.predict(test_data))-1,0),columns=['Hazard'])\n",
"score_rfc2=test_imp2[['Id']].join(score_rfc)\n",
"\n",
"train_rfc2.to_csv('d:/kaggle/Insurance/train_rfc2.csv', index=False)\n",
"score_rfc2.to_csv('d:/kaggle/Insurance/score_rfc2.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#decision Tree\n",
"\n",
"dtc = DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, class_weight=None)\n",
"\n",
"# Train the model using the training sets\n",
"dtc.fit(x, y)\n",
"# The coefficients\n",
"#print('Coefficients: \\n', dtc.coef_)\n",
"# The mean square error\n",
"#print(\"Residual sum of squares: %.2f\"\n",
"# % np.mean((dtc.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"#print('Variance score: %.2f' % dtc.score(x, y))\n",
"#print(\"Accuracy: %.2f\" % metrics.accuracy_score(dtc.predict(x),y))\n",
"#print(\"avg Precision: %.2f\" % metrics.average_precision_score(dtc.predict(x),y))\n",
"#print(\"Precision: %.2f\" % metrics.precision_score(dtc.predict(x),y))\n",
"#print(\"Recall: %.2f\" % metrics.recall_score(dtc.predict(x),y))\n",
"\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(dtc.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(dtc.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(dtc.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(dtc.predict(x),y))\n",
"\n",
"train_dtc=pd.DataFrame(np.round(np.exp(dtc.predict(x))-1,0),columns=['yhat'])\n",
"train_dtc2=train_imp2[['Id','imp_hazard']].join(train_dtc)\n",
"score_dtc=pd.DataFrame(np.round(np.exp(dtc.predict(test_data))-1,0),columns=['Hazard'])\n",
"score_dtc2=test_imp2[['Id']].join(score_dtc)\n",
"\n",
"train_dtc2.to_csv('d:/kaggle/Insurance/train_dtc2.csv', index=False)\n",
"score_dtc2.to_csv('d:/kaggle/Insurance/score_dtc2.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#OLS Regression\n",
"\n",
"# Create linear regression object\n",
"regr_ols = linear_model.LinearRegression()\n",
"\n",
"# Train the model using the training sets\n",
"regr_ols.fit(x, y)\n",
"# The coefficients\n",
"print('Coefficients: \\n', regr_ols.coef_)\n",
"# The mean square error\n",
"print(\"Residual sum of squares: %.2f\"\n",
" % np.mean((regr_ols.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"print('Variance score: %.2f' % regr_ols.score(x, y))\n",
"\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(regr_ols.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(regr_ols.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(regr_ols.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(regr_ols.predict(x),y))\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Lasso\n",
"regr_lasso = linear_model.Lasso(alpha=.1, copy_X=True, fit_intercept=True, max_iter=1000,\n",
" normalize=False, positive=False, precompute=False, random_state=None,\n",
" selection='cyclic', tol=0.0001, warm_start=False)\n",
"\n",
"# Train the model using the training sets\n",
"regr_lasso.fit(x, y)\n",
"# The coefficients\n",
"print('Coefficients: \\n', regr_lasso.coef_)\n",
"# The mean square error\n",
"print(\"Residual sum of squares: %.2f\"\n",
" % np.mean((regr_lasso.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"print('Variance score: %.2f' % regr_lasso.score(x, y))\n",
"\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(regr_lasso.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(regr_lasso.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(regr_lasso.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(regr_lasso.predict(x),y))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Ridge\n",
"regr_ridge = linear_model.Ridge(alpha=.5,copy_X=True, fit_intercept=True, max_iter=None,\n",
" normalize=False, solver='auto', tol=0.001)\n",
" \n",
"# Train the model using the training sets\n",
"regr_ridge.fit(x, y)\n",
"# The coefficients\n",
"print('Coefficients: \\n', regr_ridge.coef_)\n",
"# The mean square error\n",
"print(\"Residual sum of squares: %.2f\"\n",
" % np.mean((regr_ridge.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"print('Variance score: %.2f' % regr_ridge.score(x, y))\n",
"\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(regr_ridge.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(regr_ridge.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(regr_ridge.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(regr_ridge.predict(x),y))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#elastic net\n",
"regr_enet = linear_model.ElasticNet(alpha=1.0, l1_ratio=0.5, fit_intercept=True, normalize=False, precompute=False, max_iter=1000, copy_X=True, tol=0.0001, warm_start=False, positive=False, random_state=None, selection='cyclic')\n",
"\n",
"# Train the model using the training sets\n",
"regr_enet.fit(x, y)\n",
"# The coefficients\n",
"print('Coefficients: \\n', regr_enet.coef_)\n",
"# The mean square error\n",
"print(\"Residual sum of squares: %.2f\"\n",
" % np.mean((regr_enet.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"print('Variance score: %.2f' % regr_enet.score(x, y))\n",
"\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(regr_enet.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(regr_enet.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(regr_enet.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(regr_enet.predict(x),y))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#lars\n",
"regr_lars = linear_model.Lars(fit_intercept=True, verbose=False, normalize=True, precompute='auto', n_nonzero_coefs=500, eps=2.2204460492503131e-16, copy_X=True, fit_path=True)\n",
"# Train the model using the training sets\n",
"regr_lars.fit(x, y)\n",
"# The coefficients\n",
"print('Coefficients: \\n', regr_lars.coef_)\n",
"# The mean square error\n",
"print(\"Residual sum of squares: %.2f\"\n",
" % np.mean((regr_lars.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"print('Variance score: %.2f' % regr_lars.score(x, y))\n",
"\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(regr_lars.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(regr_lars.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(regr_lars.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(regr_lars.predict(x),y))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#lasso lars\n",
"regr_llars = linear_model.LassoLars(alpha=1.0, fit_intercept=True, verbose=False, normalize=True, precompute='auto', max_iter=500, eps=2.2204460492503131e-16, copy_X=True, fit_path=True)\n",
"\n",
"# Train the model using the training sets\n",
"regr_llars.fit(x, y)\n",
"# The coefficients\n",
"print('Coefficients: \\n', regr_llars.coef_)\n",
"# The mean square error\n",
"print(\"Residual sum of squares: %.2f\"\n",
" % np.mean((regr_llars.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"print('Variance score: %.2f' % regr_llars.score(x, y))\n",
"\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(regr_llars.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(regr_llars.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(regr_llars.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(regr_llars.predict(x),y))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#OrthogonalMatchingPursuit\n",
"regr_omp = linear_model.OrthogonalMatchingPursuit(n_nonzero_coefs=None, tol=None, fit_intercept=True, normalize=True, precompute='auto')\n",
"\n",
"# Train the model using the training sets\n",
"regr_omp.fit(x, y)\n",
"# The coefficients\n",
"print('Coefficients: \\n', regr_omp.coef_)\n",
"# The mean square error\n",
"print(\"Residual sum of squares: %.2f\"\n",
" % np.mean((regr_omp.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"print('Variance score: %.2f' % regr_omp.score(x, y))\n",
"\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(regr_omp.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(regr_omp.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(regr_omp.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(regr_omp.predict(x),y))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Bayesian Ridge\n",
"regr_bridge = linear_model.BayesianRidge(n_iter=300, tol=0.001, alpha_1=1e-06, alpha_2=1e-06, lambda_1=1e-06, lambda_2=1e-06, compute_score=False, fit_intercept=True, normalize=False, copy_X=True, verbose=False)\n",
"\n",
"# Train the model using the training sets\n",
"regr_bridge.fit(x, y)\n",
"# The coefficients\n",
"print('Coefficients: \\n', regr_bridge.coef_)\n",
"# The mean square error\n",
"print(\"Residual sum of squares: %.2f\"\n",
" % np.mean((regr_bridge.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"print('Variance score: %.2f' % regr_bridge.score(x, y))\n",
"\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(regr_bridge.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(regr_bridge.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(regr_bridge.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(regr_bridge.predict(x),y))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#passive aggressive regressor\n",
"regr_par = linear_model.PassiveAggressiveRegressor(C=1.0, fit_intercept=True, n_iter=5, shuffle=True, verbose=0, loss='epsilon_insensitive', epsilon=0.1, random_state=None, class_weight=None, warm_start=False)\n",
"\n",
"# Train the model using the training sets\n",
"regr_par.fit(x, y)\n",
"# The coefficients\n",
"print('Coefficients: \\n', regr_par.coef_)\n",
"# The mean square error\n",
"print(\"Residual sum of squares: %.2f\"\n",
" % np.mean((regr_par.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"print('Variance score: %.2f' % regr_par.score(x, y))\n",
"\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(regr_par.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(regr_par.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(regr_par.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(regr_par.predict(x),y))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#TheilSenRegressor\n",
"regr_tsr = linear_model.TheilSenRegressor(fit_intercept=True, copy_X=True, max_subpopulation=10000.0, n_subsamples=None, max_iter=300, tol=0.001, random_state=None, n_jobs=1, verbose=False)\n",
"\n",
"\n",
"# Train the model using the training sets\n",
"regr_tsr.fit(x, y)\n",
"# The coefficients\n",
"print('Coefficients: \\n', regr_tsr.coef_)\n",
"# The mean square error\n",
"print(\"Residual sum of squares: %.2f\"\n",
" % np.mean((regr_tsr.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"print('Variance score: %.2f' % regr_tsr.score(x, y))\n",
"\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(regr_tsr.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(regr_tsr.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(regr_tsr.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(regr_tsr.predict(x),y))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Stochastic Gradient Descent Regression\n",
"\n",
"\n",
"regr_sgdr = linear_model.SGDRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01,\n",
" fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',\n",
" loss='squared_loss', n_iter=5, penalty='l2', power_t=0.25,\n",
" random_state=None, shuffle=True, verbose=0, warm_start=False)\n",
"# Train the model using the training sets\n",
"regr_sgdr.fit(x, y)\n",
"# The coefficients\n",
"print('Coefficients: \\n', regr_sgdr.coef_)\n",
"# The mean square error\n",
"print(\"Residual sum of squares: %.2f\"\n",
" % np.mean((regr_sgdr.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"print('Variance score: %.2f' % regr_sgdr.score(x, y))\n",
"\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(regr_sgdr.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(regr_sgdr.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(regr_sgdr.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(regr_sgdr.predict(x),y))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#decision Tree regressor\n",
"\n",
"\n",
"regr_dtr = DecisionTreeRegressor(criterion='mse', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None)\n",
"\n",
"# Train the model using the training sets\n",
"regr_dtr.fit(x, y)\n",
"# The coefficients\n",
"#print('Coefficients: \\n', regr_dtr.coef_)\n",
"# The mean square error\n",
"print(\"Residual sum of squares: %.2f\"\n",
" % np.mean((regr_dtr.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"print('Variance score: %.2f' % regr_dtr.score(x, y))\n",
"\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(regr_dtr.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(regr_dtr.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(regr_dtr.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(regr_dtr.predict(x),y))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#random forest regressor\n",
"\n",
"\n",
"regr_rfr = RandomForestRegressor(n_estimators=10, criterion='mse', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=1, random_state=None, verbose=0, warm_start=False)\n",
"\n",
"\n",
"# Train the model using the training sets\n",
"model_rfr = regr_rfr.fit(x, y)\n",
"# The coefficients\n",
"#print('Estimators: \\n', regr_rfr.estimators_)\n",
"#print('feature importances: \\n', regr_rfr.feature_importances_)\n",
"\n",
"# The mean square error\n",
"print(\"Residual sum of squares: %.2f\"\n",
" % np.mean((regr_rfr.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"print('Variance score: %.2f' % regr_rfr.score(x, y))\n",
"\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(regr_rfr.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(regr_rfr.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(regr_rfr.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(regr_rfr.predict(x),y))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Gaussian Naive Bayes\n",
"\n",
"gnb = GaussianNB()\n",
"# Train the model using the training sets\n",
"gnb.fit(x, y)\n",
"# The coefficients\n",
"#print('Coefficients: \\n', gnb.coef_)\n",
"# The mean square error\n",
"print(\"Residual sum of squares: %.2f\"\n",
" % np.mean((gnb.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"print('Variance score: %.2f' % gnb.score(x, y))\n",
"print(\"Accuracy: %.2f\" % metrics.accuracy_score(gnb.predict(x),y))\n",
"#print(\"avg Precision: %.2f\" % metrics.average_precision_score(gnb.predict(x),y))\n",
"print(\"Precision: %.2f\" % metrics.precision_score(gnb.predict(x),y))\n",
"print(\"Recall: %.2f\" % metrics.recall_score(gnb.predict(x),y))\n",
"#print(\"AUC: %.2f\" % metrics.roc_auc_score(gnb.predict(x),y))\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(gnb.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(gnb.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(gnb.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(gnb.predict(x),y))\n",
"\n",
"train_gnb=pd.DataFrame(np.round(np.exp(gnb.predict(x))-1,0),columns=['yhat'])\n",
"train_gnb2=train_imp2[['Id','imp_hazard']].join(train_gnb)\n",
"score_gnb=pd.DataFrame(np.round(np.exp(gnb.predict(test_data))-1,0),columns=['Hazard'])\n",
"score_gnb2=test_imp2[['Id']].join(score_gnb)\n",
"\n",
"train_gnb2.to_csv('d:/kaggle/Insurance/train_gnb2.csv', index=False)\n",
"score_gnb2.to_csv('d:/kaggle/Insurance/score_gnb2.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#multinomial nb\n",
"\n",
"#mnb = MultinomialNB(alpha=1.0, fit_prior=True, class_prior=None)\n",
"mnb = MultinomialNB()\n",
"# Train the model using the training sets\n",
"mnb.fit(x, y)\n",
"# The coefficients\n",
"print('Coefficients: \\n', mnb.coef_)\n",
"# The mean square error\n",
"print(\"Residual sum of squares: %.2f\"\n",
" % np.mean((mnb.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"print('Variance score: %.2f' % mnb.score(x, y))\n",
"print(\"Accuracy: %.2f\" % metrics.accuracy_score(mnb.predict(x),y))\n",
"#print(\"avg Precision: %.2f\" % metrics.average_precision_score(mnb.predict(x),y))\n",
"print(\"Precision: %.2f\" % metrics.precision_score(mnb.predict(x),y))\n",
"print(\"Recall: %.2f\" % metrics.recall_score(mnb.predict(x),y))\n",
"#print(\"AUC: %.2f\" % metrics.roc_auc_score(mnb.predict(x),y))\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(mnb.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(mnb.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(mnb.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(mnb.predict(x),y))\n",
"\n",
"train_mnb=pd.DataFrame(np.round(np.exp(mnb.predict(x))-1,0),columns=['yhat'])\n",
"train_mnb2=train_imp2[['Id','imp_hazard']].join(train_mnb)\n",
"score_mnb=pd.DataFrame(np.round(np.exp(mnb.predict(test_data))-1,0),columns=['Hazard'])\n",
"score_mnb2=test_imp2[['Id']].join(score_mnb)\n",
"\n",
"train_mnb2.to_csv('d:/kaggle/Insurance/train_mnb2.csv', index=False)\n",
"score_mnb2.to_csv('d:/kaggle/Insurance/score_mnb2.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#BernoulliNB\n",
"\n",
"bnb = BernoulliNB(alpha=1.0, binarize=0.0, fit_prior=True, class_prior=None)\n",
"# Train the model using the training sets\n",
"bnb.fit(x, y)\n",
"# The coefficients\n",
"print('Coefficients: \\n', bnb.coef_)\n",
"# The mean square error\n",
"print(\"Residual sum of squares: %.2f\"\n",
" % np.mean((bnb.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"print('Variance score: %.2f' % bnb.score(x, y))\n",
"print(\"Accuracy: %.2f\" % metrics.accuracy_score(bnb.predict(x),y))\n",
"#print(\"avg Precision: %.2f\" % metrics.average_precision_score(bnb.predict(x),y))\n",
"print(\"Precision: %.2f\" % metrics.precision_score(bnb.predict(x),y))\n",
"print(\"Recall: %.2f\" % metrics.recall_score(bnb.predict(x),y))\n",
"#print(\"AUC: %.2f\" % metrics.roc_auc_score(bnb.predict(x),y))\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(bnb.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(bnb.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(bnb.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(bnb.predict(x),y))\n",
"\n",
"train_bnb=pd.DataFrame(np.round(np.exp(bnb.predict(x))-1,0),columns=['yhat'])\n",
"train_bnb2=train_imp2[['Id','imp_hazard']].join(train_bnb)\n",
"score_bnb=pd.DataFrame(np.round(np.exp(bnb.predict(test_data))-1,0),columns=['Hazard'])\n",
"score_bnb2=test_imp2[['Id']].join(score_bnb)\n",
"\n",
"train_bnb2.to_csv('d:/kaggle/Insurance/train_bnb2.csv', index=False)\n",
"score_bnb2.to_csv('d:/kaggle/Insurance/score_bnb2.csv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#sgd\n",
"\n",
"sgd = SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,eta0=0.0, fit_intercept=True,l1_ratio=0.15,learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1,penalty='l2', \n",
" power_t=0.5, random_state=None, shuffle=True,verbose=0, warm_start=False)\n",
"# Train the model using the training sets\n",
"sgd.fit(x, y)\n",
"# The coefficients\n",
"#print('Coefficients: \\n', sgd.coef_)\n",
"# The mean square error\n",
"#print(\"Residual sum of squares: %.2f\"\n",
"# % np.mean((sgd.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"#print('Variance score: %.2f' % sgd.score(x, y))\n",
"#print(\"Accuracy: %.2f\" % metrics.accuracy_score(sgd.predict(x),y))\n",
"#print(\"avg Precision: %.2f\" % metrics.average_precision_score(sgd.predict(x),y))\n",
"#print(\"Precision: %.2f\" % metrics.precision_score(sgd.predict(x),y))\n",
"#print(\"Recall: %.2f\" % metrics.recall_score(sgd.predict(x),y))\n",
"#print(\"AUC: %.2f\" % metrics.roc_auc_score(sgd.predict(x),y))\n",
"#print(\"MAE: %.2f\" % metrics.mean_absolute_error(sgd.predict(x),y))\n",
"#print(\"MSE: %.2f\" % metrics.mean_squared_error(sgd.predict(x),y))\n",
"#print(\"MedianAE: %.2f\" % metrics.median_absolute_error(sgd.predict(x),y))\n",
"#print(\"r^2: %.2f\" % metrics.r2_score(sgd.predict(x),y))\n",
"\n",
"train_sgd=pd.DataFrame(np.round(np.exp(sgd.predict(x))-1,0),columns=['yhat'])\n",
"train_sgd2=train_imp2[['Id','imp_hazard']].join(train_sgd)\n",
"score_sgd=pd.DataFrame(np.round(np.exp(sgd.predict(test_data))-1,0),columns=['Hazard'])\n",
"score_sgd2=test_imp2[['Id']].join(score_sgd)\n",
"\n",
"train_sgd2.to_csv('d:/kaggle/Insurance/train_sgd2.csv', index=False)\n",
"score_sgd2.to_csv('d:/kaggle/Insurance/score_sgd2.csv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#ExtratreesRegressor\n",
"from sklearn.ensemble import ExtraTreesRegressor\n",
"\n",
"etr =ExtraTreesRegressor(n_estimators=10, criterion='mse', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, bootstrap=False, oob_score=False, n_jobs=1, random_state=None, verbose=0, warm_start=False)\n",
"\n",
"\n",
"# Train the model using the training sets\n",
"etr.fit(x, y)\n",
"# The coefficients\n",
"#print('Coefficients: \\n', dtc.coef_)\n",
"# The mean square error\n",
"#print(\"Residual sum of squares: %.2f\"\n",
"# % np.mean((dtc.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"#print('Variance score: %.2f' % dtc.score(x, y))\n",
"#print(\"Accuracy: %.2f\" % metrics.accuracy_score(dtc.predict(x),y))\n",
"#print(\"avg Precision: %.2f\" % metrics.average_precision_score(dtc.predict(x),y))\n",
"#print(\"Precision: %.2f\" % metrics.precision_score(dtc.predict(x),y))\n",
"#print(\"Recall: %.2f\" % metrics.recall_score(dtc.predict(x),y))\n",
"\n",
"print(\"MAE: %.2f\" % metrics.mean_absolute_error(etr.predict(x),y))\n",
"print(\"MSE: %.2f\" % metrics.mean_squared_error(etr.predict(x),y))\n",
"print(\"MedianAE: %.2f\" % metrics.median_absolute_error(etr.predict(x),y))\n",
"print(\"r^2: %.2f\" % metrics.r2_score(etr.predict(x),y))\n",
"\n",
"train_etr=pd.DataFrame(etr.predict(x),columns=['yhat'])\n",
"train_etr2=train_imp2[['Id','imp_hazard']].join(train_etr)\n",
"score_etr=pd.DataFrame(etr.predict(test_data),columns=['Hazard'])\n",
"score_etr2=test_imp2[['Id']].join(score_etr)\n",
"\n",
"train_etr2.to_csv('d:/kaggle/Insurance/train_etr2.csv', index=False)\n",
"score_etr2.to_csv('d:/kaggle/Insurance/score_etr2.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#GradientBoostingClassifier\n",
"from sklearn.ensemble import GradientBoostingClassifier\n",
"\n",
"gbc = GradientBoostingClassifier(loss='deviance', learning_rate=0.1, n_estimators=100, subsample=1.0, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, init=None, random_state=None, max_features=None, verbose=0, max_leaf_nodes=None, warm_start=False)\n",
"\n",
"# Train the model using the training sets\n",
"gbc.fit(x, y)\n",
"# The coefficients\n",
"#print('Coefficients: \\n', dtc.coef_)\n",
"# The mean square error\n",
"#print(\"Residual sum of squares: %.2f\"\n",
"# % np.mean((dtc.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"#print('Variance score: %.2f' % dtc.score(x, y))\n",
"#print(\"Accuracy: %.2f\" % metrics.accuracy_score(dtc.predict(x),y))\n",
"#print(\"avg Precision: %.2f\" % metrics.average_precision_score(dtc.predict(x),y))\n",
"#print(\"Precision: %.2f\" % metrics.precision_score(dtc.predict(x),y))\n",
"#print(\"Recall: %.2f\" % metrics.recall_score(dtc.predict(x),y))\n",
"\n",
"#print(\"MAE: %.2f\" % metrics.mean_absolute_error(gbc.predict(x),y))\n",
"#print(\"MSE: %.2f\" % metrics.mean_squared_error(gbc.predict(x),y))\n",
"#print(\"MedianAE: %.2f\" % metrics.median_absolute_error(gbc.predict(x),y))\n",
"#print(\"r^2: %.2f\" % metrics.r2_score(gbc.predict(x),y))\n",
"\n",
"train_gbc=pd.DataFrame(gbc.predict(x),columns=['yhat'])\n",
"train_gbc2=train_imp2[['Id','imp_hazard']].join(train_gbc)\n",
"score_gbc=pd.DataFrame(gbc.predict(test_data),columns=['Hazard'])\n",
"score_gbc2=test_imp2[['Id']].join(score_gbc)\n",
"\n",
"train_gbc2.to_csv('d:/kaggle/Insurance/train_gbc2.csv', index=False)\n",
"score_gbc2.to_csv('d:/kaggle/Insurance/score_gbc2.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#AdaBoostClassifier\n",
"from sklearn.ensemble import AdaBoostClassifier\n",
"\n",
"abc = AdaBoostClassifier(base_estimator=None, n_estimators=50, learning_rate=1.0, algorithm='SAMME.R', random_state=None)\n",
"# Train the model using the training sets\n",
"abc.fit(x, y)\n",
"# The coefficients\n",
"#print('Coefficients: \\n', dtc.coef_)\n",
"# The mean square error\n",
"#print(\"Residual sum of squares: %.2f\"\n",
"# % np.mean((dtc.predict(x) - y) ** 2))\n",
"# Explained variance score: 1 is perfect prediction\n",
"#print('Variance score: %.2f' % dtc.score(x, y))\n",
"#print(\"Accuracy: %.2f\" % metrics.accuracy_score(dtc.predict(x),y))\n",
"#print(\"avg Precision: %.2f\" % metrics.average_precision_score(dtc.predict(x),y))\n",
"#print(\"Precision: %.2f\" % metrics.precision_score(dtc.predict(x),y))\n",
"#print(\"Recall: %.2f\" % metrics.recall_score(dtc.predict(x),y))\n",
"\n",
"#print(\"MAE: %.2f\" % metrics.mean_absolute_error(gbc.predict(x),y))\n",
"#print(\"MSE: %.2f\" % metrics.mean_squared_error(gbc.predict(x),y))\n",
"#print(\"MedianAE: %.2f\" % metrics.median_absolute_error(gbc.predict(x),y))\n",
"#print(\"r^2: %.2f\" % metrics.r2_score(gbc.predict(x),y))\n",
"\n",
"train_abc=pd.DataFrame(abc.predict(x),columns=['yhat'])\n",
"train_abc2=train_imp2[['Id','imp_hazard']].join(train_abc)\n",
"score_abc=pd.DataFrame(abc.predict(test_data),columns=['Hazard'])\n",
"score_abc2=test_imp2[['Id']].join(score_abc)\n",
"\n",
"train_abc2.to_csv('d:/kaggle/Insurance/train_abc2.csv', index=False)\n",
"score_abc2.to_csv('d:/kaggle/Insurance/score_abc2.csv', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.decomposition import PCA\n",
"\n",
"pca = PCA(copy=True, n_components=None, whiten=False)\n",
"model_pca=pca.fit(x)\n",
"print(pca.explained_variance_ratio_)\n",
"print(pca.components_)\n",
"print(pca.mean_)\n",
"print(pca.n_components_)\n",
"print(pca.noise_variance_)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.decomposition import IncrementalPCA\n",
"ipca = IncrementalPCA(copy=True, n_components=None, whiten=False,batch_size=None)\n",
"ipca.fit(x)\n",
"print(ipca.explained_variance_ratio_)\n",
"# print(ipca.explained_variance_)\n",
"print(ipca.components_)\n",
"print(ipca.mean_)\n",
"# print(ipca.var_)\n",
"# print(ipca.n_components_)\n",
"# print(ipca.noise_variance_)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.decomposition import RandomizedPCA\n",
"rpca = RandomizedPCA(copy=True, n_components=None, whiten=False,iterated_power=3,random_state=None)\n",
"rpca.fit(x)\n",
"print(rpca.explained_variance_ratio_)\n",
"print(rpca.components_)\n",
"print(rpca.mean_)\n",
"#print(rpca.n_components_)\n",
"#print(rpca.noise_variance_)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from sklearn.decomposition import MiniBatchSparsePCA\n",
"mspca = MiniBatchSparsePCA(n_components=None, alpha=1, ridge_alpha=0.01, n_iter=100, callback=None, batch_size=3, verbose=False, shuffle=True, n_jobs=1, method='lars', random_state=None)\n",
"mspca.fit(x)\n",
"#print(mpca.explained_variance_ratio_)\n",
"print(mspca.components_)\n",
"#print(mspca.error_)\n",
"print(mspca.n_iter_)\n",
"#print(mpca.noise_variance_)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"poi_res.predict(test_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import statsmodels.api as sm\n",
"\n",
"#statsmodels.discrete.discrete_model.Poisson\n",
"poi_model = sm.Poisson(y, x) # offset=None, exposure=None, missing='none')\n",
"poi_res = poi_model.fit(method=\"newton\")\n",
"print(poi_res.summary())\n",
"#poi_model.predict(test_data)\n",
"poi_res.predict(test_data)\n",
"\n",
"train_poi=pd.DataFrame(exp(poi_res.predict(x)),columns=['yhat'])\n",
"train_poi2=train_imp2[['Id','imp_hazard']].join(train_poi)\n",
"score_poi=pd.DataFrame(exp(poi_res.predict(test_data)),columns=['Hazard'])\n",
"score_poi2=test_imp2[['Id']].join(score_poi)\n",
"\n",
"train_poi2.to_csv('d:/kaggle/Insurance/train_poi2.csv', index=False)\n",
"score_poi2.to_csv('d:/kaggle/Insurance/score_poi2.csv', index=False)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"poisummary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"class statsmodels.discrete.discrete_model.Poisson(endog, exog, offset=None, exposure=None, missing='none', **kwargs)\n",
"class statsmodels.discrete.discrete_model.CountResults(model, mlefit, cov_type='nonrobust', cov_kwds=None, use_t=None)\n",
"\n",
"class statsmodels.discrete.discrete_model.NegativeBinomial(endog, exog, loglike_method='nb2', offset=None, exposure=None, missing='none', **kwargs)\n",
"class statsmodels.discrete.discrete_model.NegativeBinomialResults(model, mlefit, cov_type='nonrobust', cov_kwds=None, use_t=None)\n",
"\n",
"\n",
"\n",
"class statsmodels.discrete.discrete_model.Logit(endog, exog, **kwargs)\n",
"class statsmodels.discrete.discrete_model.LogitResults(model, mlefit, cov_type='nonrobust', cov_kwds=None, use_t=None)\n",
"\n",
"class statsmodels.discrete.discrete_model.Probit(endog, exog, **kwargs)\n",
"class statsmodels.discrete.discrete_model.ProbitResults(model, mlefit, cov_type='nonrobust', cov_kwds=None, use_t=None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"class sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='liblinear', max_iter=100, multi_class='ovr', verbose=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment