abyalias/gist:e51d059569f91b5920f60803e2cf34cd

## gistfile1.txt

#Logistic Regression in R(logistf package) results..

> str(iris_qua)
'data.frame':	100 obs. of  5 variables:
 $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
 $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
 $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
 $ Petal.Width : num  1.4 1.5 0.4 1.3 1.5 0.6 1.6 1 1.3 1.4 ...
 $ Species     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...

library(logistf)
model2 <- logistf(Species ~ ., data = iris_qua, family = binomial)
summary(model2)
> summary(model2)
logistf(formula = Species ~ ., data = iris_qua, family = binomial)

Model fitted by Penalized ML
Confidence intervals and p-values by Profile Likelihood Profile Likelihood Profile Likelihood Profile Likelihood Profile Likelihood

                   coef  se(coef)  lower 0.95 upper 0.95    Chisq            p
(Intercept)  -1.9094572 4.2916556 -10.0344572   6.215543      Inf 0.000000e+00
Sepal.Length  0.1949514 1.3531859  -4.0656541   2.625906      Inf 0.000000e+00
Sepal.Width  -0.3821325 1.1902449  -4.3998915   3.334993 27.07594 1.956173e-07
Petal.Length  2.0119360 1.3252200   0.4086152   9.626672  0.00000 1.000000e+00
Petal.Width  -0.0010429 0.5667498  -1.6956076   2.023168  0.00000 1.000000e+00

Likelihood ratio test=-378.9395 on 4 df, p=1, n=100
Wald test = 3.157929 on 4 df, p = 0.5317528

Covariance-Matrix:
           [,1]       [,2]       [,3]        [,4]        [,5]
[1,] 18.4183080 -3.7583069  0.3766262 -0.64117866  0.14506984
[2,] -3.7583069  1.8311121 -1.2127691 -0.74701502 -0.18395215
[3,]  0.3766262 -1.2127691  1.4166830  0.51005296  0.10972201
[4,] -0.6411787 -0.7470150  0.5100530  1.75620810  0.07977398
[5,]  0.1450698 -0.1839522  0.1097220  0.07977398  0.32120528


#Python results....

In[49]: iris_df.head(5)

Out[49]:
   Sepal.Length  Sepal.Width  Petal.Length  Petal.Width Species
0           5.1          3.5           1.4          1.4       0
1           4.9          3.0           1.4          1.5       0
2           4.7          3.2           1.3          0.4       0
3           4.6          3.1           1.5          1.3       0
4           5.0          3.6           1.4          1.5       0

iris_df['Species'].unique()
iris_df['Species'] = iris_df['Species'].astype('category')
x = iris_df.ix[:,0:4]
y = iris_df.ix[:,-1]

In[38]: x.dtypes
Out[38]:
Sepal.Length    float64
Sepal.Width     float64
Petal.Length    float64
Petal.Width     float64
dtype: object

In[39]: y.dtypes
Out[39]: category

y = np.ravel(y)
logistic = LogisticRegression()
model = logistic.fit(x,y)
#getting the model coefficients..
model_coef= pd.DataFrame(list(zip(x.columns, np.transpose(model.coef_))))
model_intercept = model.intercept_
#scores...
logistic.predict_proba(x)

#regression results..

In[53]: model_coef
Out[53]:
              0                   1
0  Sepal.Length    [-0.35408129583]
1   Sepal.Width    [-1.54370811291]
2  Petal.Length      [2.5226864004]
3   Petal.Width  [-0.0491724675344]

In[54]: model_intercept
Out[54]: array([-0.2694529])
#scores..
In[55]: logistic.predict_proba(x)
Out[55]:
array([[ 0.98228251,  0.01771749],
       [ 0.95998154,  0.04001846],
       [ 0.97375483,  0.02624517],
       [ 0.95090312,  0.04909688],
       [ 0.98431446,  0.01568554],
       [ 0.98097681,  0.01902319],
       [ 0.97572537,  0.02427463],
       [ 0.97217524,  0.02782476],
       [ 0.94460761,  0.05539239]....

	#Logistic Regression in R(logistf package) results..

	> str(iris_qua)
	'data.frame': 100 obs. of 5 variables:
	$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
	$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
	$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
	$ Petal.Width : num 1.4 1.5 0.4 1.3 1.5 0.6 1.6 1 1.3 1.4 ...
	$ Species : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...

	library(logistf)
	model2 <- logistf(Species ~ ., data = iris_qua, family = binomial)
	summary(model2)
	> summary(model2)
	logistf(formula = Species ~ ., data = iris_qua, family = binomial)

	Model fitted by Penalized ML
	Confidence intervals and p-values by Profile Likelihood Profile Likelihood Profile Likelihood Profile Likelihood Profile Likelihood

	coef se(coef) lower 0.95 upper 0.95 Chisq p
	(Intercept) -1.9094572 4.2916556 -10.0344572 6.215543 Inf 0.000000e+00
	Sepal.Length 0.1949514 1.3531859 -4.0656541 2.625906 Inf 0.000000e+00
	Sepal.Width -0.3821325 1.1902449 -4.3998915 3.334993 27.07594 1.956173e-07
	Petal.Length 2.0119360 1.3252200 0.4086152 9.626672 0.00000 1.000000e+00
	Petal.Width -0.0010429 0.5667498 -1.6956076 2.023168 0.00000 1.000000e+00

	Likelihood ratio test=-378.9395 on 4 df, p=1, n=100
	Wald test = 3.157929 on 4 df, p = 0.5317528

	Covariance-Matrix:
	[,1] [,2] [,3] [,4] [,5]
	[1,] 18.4183080 -3.7583069 0.3766262 -0.64117866 0.14506984
	[2,] -3.7583069 1.8311121 -1.2127691 -0.74701502 -0.18395215
	[3,] 0.3766262 -1.2127691 1.4166830 0.51005296 0.10972201
	[4,] -0.6411787 -0.7470150 0.5100530 1.75620810 0.07977398
	[5,] 0.1450698 -0.1839522 0.1097220 0.07977398 0.32120528


	#Python results....

	In[49]: iris_df.head(5)

	Out[49]:
	Sepal.Length Sepal.Width Petal.Length Petal.Width Species
	0 5.1 3.5 1.4 1.4 0
	1 4.9 3.0 1.4 1.5 0
	2 4.7 3.2 1.3 0.4 0
	3 4.6 3.1 1.5 1.3 0
	4 5.0 3.6 1.4 1.5 0

	iris_df['Species'].unique()
	iris_df['Species'] = iris_df['Species'].astype('category')
	x = iris_df.ix[:,0:4]
	y = iris_df.ix[:,-1]

	In[38]: x.dtypes
	Out[38]:
	Sepal.Length float64
	Sepal.Width float64
	Petal.Length float64
	Petal.Width float64
	dtype: object

	In[39]: y.dtypes
	Out[39]: category

	y = np.ravel(y)
	logistic = LogisticRegression()
	model = logistic.fit(x,y)
	#getting the model coefficients..
	model_coef= pd.DataFrame(list(zip(x.columns, np.transpose(model.coef_))))
	model_intercept = model.intercept_
	#scores...
	logistic.predict_proba(x)

	#regression results..

	In[53]: model_coef
	Out[53]:
	0 1
	0 Sepal.Length [-0.35408129583]
	1 Sepal.Width [-1.54370811291]
	2 Petal.Length [2.5226864004]
	3 Petal.Width [-0.0491724675344]

	In[54]: model_intercept
	Out[54]: array([-0.2694529])
	#scores..
	In[55]: logistic.predict_proba(x)
	Out[55]:
	array([[ 0.98228251, 0.01771749],
	[ 0.95998154, 0.04001846],
	[ 0.97375483, 0.02624517],
	[ 0.95090312, 0.04909688],
	[ 0.98431446, 0.01568554],
	[ 0.98097681, 0.01902319],
	[ 0.97572537, 0.02427463],
	[ 0.97217524, 0.02782476],
	[ 0.94460761, 0.05539239]....