brentp/betareg.py

## betareg.py
# -*- coding: utf-8 -*-

u"""
Beta regression for modeling rates and proportions.

References
----------
Grün, Bettina, Ioannis Kosmidis, and Achim Zeileis. Extended beta regression
in R: Shaken, stirred, mixed, and partitioned. No. 2011-22. Working Papers in
Economics and Statistics, 2011.

Smithson, Michael, and Jay Verkuilen. "A better lemon squeezer?
Maximum-likelihood regression with beta-distributed dependent variables."
Psychological methods 11.1 (2006): 54.
"""
import numpy as np
import pandas as pd
import statsmodels.api as sm

from scipy.special import gammaln as lgamma

from statsmodels.base.model import GenericLikelihoodModel
from statsmodels.genmod.families import Binomial

# this is only need while #2024 is open.
class Logit(sm.families.links.Logit):

    """Logit tranform that won't overflow with large numbers."""

    def inverse(self, z):
        return 1 / (1. + np.exp(-z))

_init_example = """

    Beta regression with default of logit-link for exog and log-link
    for precision.

    >>> mod = Beta(endog, exog)
    >>> rslt = mod.fit()
    >>> print rslt.summary()

    We can also specify a formula and a specific structure and use the
    identity-link for phi.

    >>> from sm.families.links import identity
    >>> Z = patsy.dmatrix('~ temp', dat, return_type='dataframe')
    >>> mod = Beta.from_formula('iyield ~ C(batch, Treatment(10)) + temp',
    ...                         dat, Z=Z, link_phi=identity())

    In the case of proportion-data, we may think that the precision depends on
    the number of measurements. E.g for sequence data, on the number of
    sequence reads covering a site:

    >>> Z = patsy.dmatrix('~ coverage', df)
    >>> mod = Beta.from_formula('methylation ~ disease + age + gender + coverage', df, Z)
    >>> rslt = mod.fit()

"""

class Beta(GenericLikelihoodModel):

    """Beta Regression.

    This implementation uses `phi` as a precision parameter equal to
    `a + b` from the Beta parameters.
    """

    def __init__(self, endog, exog, Z=None, link=Logit(),
            link_phi=sm.families.links.Log(), **kwds):
        """
        Parameters
        ----------
        endog : array-like
            1d array of endogenous values (i.e. responses, outcomes,
            dependent variables, or 'Y' values).
        exog : array-like
            2d array of exogeneous values (i.e. covariates, predictors,
            independent variables, regressors, or 'X' values). A nobs x k
            array where `nobs` is the number of observations and `k` is
            the number of regressors. An intercept is not included by
            default and should be added by the user. See
            `statsmodels.tools.add_constant`.
        Z : array-like
            2d array of variables for the precision phi.
        link : link
            Any link in sm.families.links for `exog`
        link_phi : link
            Any link in sm.families.links for `Z`

        Examples
        --------
        {example}

        See Also
        --------
        :ref:`links`

        """.format(example=_init_example)
        assert np.all((0 < endog) & (endog < 1))
        if Z is None:
            extra_names = ['phi']
            Z = np.ones((len(endog), 1), dtype='f')
        else:
            extra_names = ['precision-%s' % zc for zc in \
                        (Z.columns if hasattr(Z, 'columns') else range(1, Z.shape[1] + 1))]
        kwds['extra_params_names'] = extra_names

        super(Beta, self).__init__(endog, exog, **kwds)
        self.link = link
        self.link_phi = link_phi

        self.Z = Z
        assert len(self.Z) == len(self.endog)

    def nloglikeobs(self, params):
        """
        Negative log-likelihood.

        Parameters
        ----------

        params : np.ndarray
            Parameter estimates
        """
        return -self._ll_br(self.endog, self.exog, self.Z, params)

    def fit(self, start_params=None, maxiter=100000, maxfun=5000, disp=False,
            method='bfgs', **kwds):
        """
        Fit the model.

        Parameters
        ----------
        start_params : array-like
            A vector of starting values for the regression
            coefficients.  If None, a default is chosen.
        maxiter : integer
            The maximum number of iterations
        disp : bool
            Show convergence stats.
        method : str
            The optimization method to use.
        """

        if start_params is None:
            start_params = sm.GLM(self.endog, self.exog, family=Binomial()
                                 ).fit(disp=False).params
            start_params = np.append(start_params, [0.5] * self.Z.shape[1])

        return super(Beta, self).fit(start_params=start_params,
                                        maxiter=maxiter, maxfun=maxfun,
                                        method=method, disp=disp, **kwds)

    def _ll_br(self, y, X, Z, params):
        nz = self.Z.shape[1]

        Xparams = params[:-nz]
        Zparams = params[-nz:]

        mu = self.link.inverse(np.dot(X, Xparams))
        phi = self.link_phi.inverse(np.dot(Z, Zparams))
        # TODO: derive a and b and constrain to > 0?

        if np.any(phi <= np.finfo(float).eps): return np.array(-np.inf)

        ll = lgamma(phi) - lgamma(mu * phi) - lgamma((1 - mu) * phi) \
                + (mu * phi - 1) * np.log(y) + (((1 - mu) * phi) - 1) \
                * np.log(1 - y)

        return ll

if __name__ == "__main__":

    import patsy
    dat = pd.read_table('gasoline.txt')
    Z = patsy.dmatrix('~ temp', dat, return_type='dataframe')
    # using other precison params with
    m = Beta.from_formula('iyield ~ C(batch, Treatment(10)) + temp', dat,
            Z=Z, link_phi=sm.families.links.identity())
    print m.fit().summary()

    fex = pd.read_csv('foodexpenditure.csv')
    m = Beta.from_formula(' I(food/income) ~ income + persons', fex)
    print m.fit().summary()
    #print GLM.from_formula('iyield ~ C(batch) + temp', dat, family=Binomial()).fit().summary()

    dev = pd.read_csv('methylation-test.csv')
    Z = patsy.dmatrix('~ age', dev, return_type='dataframe')
    m = Beta.from_formula('methylation ~ gender + CpG', dev,
            Z=Z,
            link_phi=sm.families.links.identity())
    print m.fit().summary()

## ex.R
library(betareg)

data("GasolineYield", package = "betareg")
data("FoodExpenditure", package = "betareg")

#fe_beta = betareg(I(food/income) ~ income + persons, data = FoodExpenditure)
#print(summary(fe_beta)$coefficients)


#m = betareg(yield ~ batch + temp, data = GasolineYield)
#print(summary(m))


#gy2 <- betareg(yield ~ batch + temp | temp, data = GasolineYield, link.phi="identity")
#print(summary(gy2))


meth = read.csv('methylation-test.csv')
m =  betareg(methylation ~ gender + CpG | age, meth)
print(summary(m))


## foodexpenditure.csv

          
            food
            income
            persons

            
              15.998
              62.476
              1

            
              16.652
              82.304
              5

            
              21.741
              74.679
              3

            
              7.431
              39.151
              3

            
              10.481
              64.724
              5

            
              13.548
              36.786
              3

            
              23.256
              83.052
              4

            
              17.976
              86.935
              1

            
              14.161
              88.233
              2

            
              8.825
              38.695
              2

            
              14.184
              73.831
              7

            
              19.604
              77.122
              3

            
              13.728
              45.519
              2

            
              21.141
              82.251
              2

            
              17.446
              59.862
              3

            
              9.629
              26.563
              3

            
              14.005
              61.818
              2

            
              9.16
              29.682
              1

            
              18.831
              50.825
              5

            
              7.641
              71.062
              4

            
              13.882
              41.99
              4

            
              9.67
              37.324
              3

            
              21.604
              86.352
              5

            
              10.866
              45.506
              2

            
              28.98
              69.929
              6

            
              10.882
              61.041
              2

            
              18.561
              82.469
              1

            
              11.629
              44.208
              2

            
              18.067
              49.467
              5

            
              14.539
              25.905
              5

            
              19.192
              79.178
              5

            
              25.918
              75.811
              3

            
              28.833
              82.718
              6

            
              15.869
              48.311
              4

            
              14.91
              42.494
              5

            
              9.55
              40.573
              4

            
              23.066
              44.872
              6

            
              14.751
              27.167
              7

## gasoline.txt
iyield	gravity	pressure	temp10	temp	batch
0.122	50.8	8.6	190	205	1
0.223	50.8	8.6	190	275	1
0.347	50.8	8.6	190	345	1
0.457	50.8	8.6	190	407	1
0.08	40.8	3.5	210	218	2
0.131	40.8	3.5	210	273	2
0.266	40.8	3.5	210	347	2
0.074	40	6.1	217	212	3
0.182	40	6.1	217	272	3
0.304	40	6.1	217	340	3
0.069	38.4	6.1	220	235	4
0.152	38.4	6.1	220	300	4
0.26	38.4	6.1	220	365	4
0.336	38.4	6.1	220	410	4
0.144	40.3	4.8	231	307	5
0.268	40.3	4.8	231	367	5
0.349	40.3	4.8	231	395	5
0.1	32.2	5.2	236	267	6
0.248	32.2	5.2	236	360	6
0.317	32.2	5.2	236	402	6
0.028	41.3	1.8	267	235	7
0.064	41.3	1.8	267	275	7
0.161	41.3	1.8	267	358	7
0.278	41.3	1.8	267	416	7
0.05	38.1	1.2	274	285	8
0.176	38.1	1.2	274	365	8
0.321	38.1	1.2	274	444	8
0.14	32.2	2.4	284	351	9
0.232	32.2	2.4	284	424	9
0.085	31.8	0.2	316	365	10
0.147	31.8	0.2	316	379	10
0.18	31.8	0.2	316	428	10

## methylation-test.csv

          
            Unnamed: 0
            gender
            age
            Basename
            ID
            id
            plate
            CpG
            methylation

            
              0
              0
              F
              58.231
              6042324088_R04C01
              age58.231_F
              id_0
              6042324088
              CpG_0
              0.815

            
              1
              1
              M
              52.632
              6042324088_R06C01
              age52.632_M
              id_1
              6042324088
              CpG_0
              0.803

            
              2
              2
              M
              64.679
              6042324088_R01C01
              age64.679_M
              id_2
              6042324088
              CpG_0
              0.803

            
              3
              3
              F
              55.299
              6042324088_R04C02
              age55.299_F
              id_3
              6042324088
              CpG_0
              0.808

            
              4
              4
              M
              56.019
              6042324088_R02C01
              age56.019_M
              id_4
              6042324088
              CpG_0
              0.8550000000000001

            
              5
              5
              M
              62.021
              6042324088_R01C02
              age62.021_M
              id_5
              6042324088
              CpG_0
              0.8129999999999998

            
              6
              6
              F
              52.298
              6042324088_R06C02
              age52.298_F
              id_6
              6042324088
              CpG_0
              0.816

            
              7
              7
              F
              39.71
              6042324088_R03C01
              age39.71_F
              id_7
              6042324088
              CpG_0
              0.827

            
              8
              8
              F
              57.492
              6042324088_R05C02
              age57.492_F
              id_8
              6042324088
              CpG_0
              0.829

            
              9
              9
              F
              57.623999999999995
              6042324088_R05C01
              age57.624_F
              id_9
              6042324088
              CpG_0
              0.7760000000000001

            
              10
              10
              F
              40.486999999999995
              6042324088_R03C02
              age40.487_F
              id_10
              6042324088
              CpG_0
              0.7859999999999999

            
              11
              11
              M
              53.662
              6042324088_R02C02
              age53.662_M
              id_11
              6042324088
              CpG_0
              0.822

            
              12
              12
              F
              58.231
              6042324088_R04C01
              age58.231_F
              id_0
              6042324088
              CpG_1
              0.891

            
              13
              13
              M
              52.632
              6042324088_R06C01
              age52.632_M
              id_1
              6042324088
              CpG_1
              0.894

            
              14
              14
              M
              64.679
              6042324088_R01C01
              age64.679_M
              id_2
              6042324088
              CpG_1
              0.894

            
              15
              15
              F
              55.299
              6042324088_R04C02
              age55.299_F
              id_3
              6042324088
              CpG_1
              0.869

            
              16
              16
              M
              56.019
              6042324088_R02C01
              age56.019_M
              id_4
              6042324088
              CpG_1
              0.914

            
              17
              17
              M
              62.021
              6042324088_R01C02
              age62.021_M
              id_5
              6042324088
              CpG_1
              0.889

            
              18
              18
              F
              52.298
              6042324088_R06C02
              age52.298_F
              id_6
              6042324088
              CpG_1
              0.8850000000000001

            
              19
              19
              F
              39.71
              6042324088_R03C01
              age39.71_F
              id_7
              6042324088
              CpG_1
              0.898

            
              20
              20
              F
              57.492
              6042324088_R05C02
              age57.492_F
              id_8
              6042324088
              CpG_1
              0.896

            
              21
              21
              F
              57.623999999999995
              6042324088_R05C01
              age57.624_F
              id_9
              6042324088
              CpG_1
              0.86

            
              22
              22
              F
              40.486999999999995
              6042324088_R03C02
              age40.487_F
              id_10
              6042324088
              CpG_1
              0.887

            
              23
              23
              M
              53.662
              6042324088_R02C02
              age53.662_M
              id_11
              6042324088
              CpG_1
              0.8800000000000001

            
              24
              24
              F
              58.231
              6042324088_R04C01
              age58.231_F
              id_0
              6042324088
              CpG_2
              0.936

            
              25
              25
              M
              52.632
              6042324088_R06C01
              age52.632_M
              id_1
              6042324088
              CpG_2
              0.9129999999999999

            
              26
              26
              M
              64.679
              6042324088_R01C01
              age64.679_M
              id_2
              6042324088
              CpG_2
              0.9000000000000001

            
              27
              27
              F
              55.299
              6042324088_R04C02
              age55.299_F
              id_3
              6042324088
              CpG_2
              0.9119999999999999

            
              28
              28
              M
              56.019
              6042324088_R02C01
              age56.019_M
              id_4
              6042324088
              CpG_2
              0.9349999999999999

            
              29
              29
              M
              62.021
              6042324088_R01C02
              age62.021_M
              id_5
              6042324088
              CpG_2
              0.9280000000000002

            
              30
              30
              F
              52.298
              6042324088_R06C02
              age52.298_F
              id_6
              6042324088
              CpG_2
              0.9150000000000001

            
              31
              31
              F
              39.71
              6042324088_R03C01
              age39.71_F
              id_7
              6042324088
              CpG_2
              0.9160000000000001

            
              32
              32
              F
              57.492
              6042324088_R05C02
              age57.492_F
              id_8
              6042324088
              CpG_2
              0.929

            
              33
              33
              F
              57.623999999999995
              6042324088_R05C01
              age57.624_F
              id_9
              6042324088
              CpG_2
              0.92

            
              34
              34
              F
              40.486999999999995
              6042324088_R03C02
              age40.487_F
              id_10
              6042324088
              CpG_2
              0.9160000000000001

            
              35
              35
              M
              53.662
              6042324088_R02C02
              age53.662_M
              id_11
              6042324088
              CpG_2
              0.926

## test_beta.py

# in R:
import io
import statsmodels.api as sm
import pandas as pd
import patsy
from betareg import Beta
import numpy as np

# betareg(I(food/income) ~ income + persons, data = FoodExpenditure)
_income_estimates_mean = u"""\
varname        Estimate  StdError   zvalue     Pr(>|z|)
(Intercept) -0.62254806 0.223853539 -2.781051 5.418326e-03
income      -0.01229884 0.003035585 -4.051556 5.087819e-05
persons      0.11846210 0.035340667  3.352005 8.022853e-04"""

_income_estimates_precision = u"""\
varname  Estimate StdError  zvalue     Pr(>|z|)
(phi) 35.60975   8.079598 4.407366 1.046351e-05
"""

_methylation_estimates_mean = u"""\
varname      Estimate StdError zvalue Pr(>|z|)
(Intercept)  1.44224    0.03401  42.404   2e-16
genderM      0.06986    0.04359   1.603    0.109
CpGCpG_1     0.60735    0.04834  12.563   2e-16
CpGCpG_2     0.97355    0.05311  18.331   2e-16"""

_methylation_estimates_precision = u"""\
varname Estimate StdError zvalue Pr(>|z|)
(Intercept)  8.22829    1.79098   4.594 4.34e-06
age         -0.03471    0.03276  -1.059    0.289"""


expected_income_mean = pd.read_table(io.StringIO(_income_estimates_mean), sep="\s+")
expected_income_precision = pd.read_table(io.StringIO(_income_estimates_precision), sep="\s+")

expected_methylation_mean = pd.read_table(io.StringIO(_methylation_estimates_mean), sep="\s+")
expected_methylation_precision = pd.read_table(io.StringIO(_methylation_estimates_precision), sep="\s+")

income = pd.read_csv('foodexpenditure.csv')
methylation = pd.read_csv('methylation-test.csv')

def check_same(a, b, eps, name):
    assert np.allclose(a, b, rtol=0.01, atol=eps), \
            ("different from expected", name, list(a), list(b))


def test_income_coefficients():
    model = "I(food/income) ~ income + persons"

    mod = Beta.from_formula(model, income)
    rslt = mod.fit()
    yield check_same, rslt.params[:-1], expected_income_mean['Estimate'], 1e-3, "estimates"
    yield check_same, rslt.tvalues[:-1], expected_income_mean['zvalue'], 0.1, "z-scores"
    yield check_same, rslt.pvalues[:-1], expected_income_mean['Pr(>|z|)'], 1e-3, "p-values"


def test_income_precision():
    model = "I(food/income) ~ income + persons"

    mod = Beta.from_formula(model, income)
    rslt = mod.fit()
    # note that we have to exp the phi results for now.
    yield check_same, np.exp(rslt.params[-1:]), expected_income_precision['Estimate'], 1e-3, "estimate"
    #yield check_same, rslt.tvalues[-1:], expected_income_precision['zvalue'], 0.1, "z-score"
    yield check_same, rslt.pvalues[-1:], expected_income_precision['Pr(>|z|)'], 1e-3, "p-values"


def test_methylation_coefficients():
    model = "methylation ~ gender + CpG"
    Z = patsy.dmatrix("~ age", methylation)

    mod = Beta.from_formula(model, methylation, Z=Z, link_phi=sm.families.links.identity())
    rslt = mod.fit()
    yield check_same, rslt.params[:-2], expected_methylation_mean['Estimate'], 1e-2, "estimates"
    yield check_same, rslt.tvalues[:-2], expected_methylation_mean['zvalue'], 0.1, "z-scores"
    yield check_same, rslt.pvalues[:-2], expected_methylation_mean['Pr(>|z|)'], 1e-2, "p-values"

def test_methylation_precision():
    model = "methylation ~ gender + CpG"
    Z = patsy.dmatrix("~ age", methylation)

    mod = Beta.from_formula(model, methylation, Z=Z, link_phi=sm.families.links.identity())
    rslt = mod.fit()
    #yield check_same, sm.families.links.logit()(rslt.params[-2:]), expected_methylation_precision['Estimate'], 1e-3, "estimate"
    #yield check_same, rslt.tvalues[-2:], expected_methylation_precision['zvalue'], 0.1, "z-score"
	# -- coding: utf-8 --

	u"""
	Beta regression for modeling rates and proportions.

	References
	----------
	Grün, Bettina, Ioannis Kosmidis, and Achim Zeileis. Extended beta regression
	in R: Shaken, stirred, mixed, and partitioned. No. 2011-22. Working Papers in
	Economics and Statistics, 2011.

	Smithson, Michael, and Jay Verkuilen. "A better lemon squeezer?
	Maximum-likelihood regression with beta-distributed dependent variables."
	Psychological methods 11.1 (2006): 54.
	"""
	import numpy as np
	import pandas as pd
	import statsmodels.api as sm

	from scipy.special import gammaln as lgamma

	from statsmodels.base.model import GenericLikelihoodModel
	from statsmodels.genmod.families import Binomial

	# this is only need while #2024 is open.
	class Logit(sm.families.links.Logit):

	"""Logit tranform that won't overflow with large numbers."""

	def inverse(self, z):
	return 1 / (1. + np.exp(-z))

	_init_example = """

	Beta regression with default of logit-link for exog and log-link
	for precision.

	>>> mod = Beta(endog, exog)
	>>> rslt = mod.fit()
	>>> print rslt.summary()

	We can also specify a formula and a specific structure and use the
	identity-link for phi.

	>>> from sm.families.links import identity
	>>> Z = patsy.dmatrix('~ temp', dat, return_type='dataframe')
	>>> mod = Beta.from_formula('iyield ~ C(batch, Treatment(10)) + temp',
	... dat, Z=Z, link_phi=identity())

	In the case of proportion-data, we may think that the precision depends on
	the number of measurements. E.g for sequence data, on the number of
	sequence reads covering a site:

	>>> Z = patsy.dmatrix('~ coverage', df)
	>>> mod = Beta.from_formula('methylation ~ disease + age + gender + coverage', df, Z)
	>>> rslt = mod.fit()

	"""

	class Beta(GenericLikelihoodModel):

	"""Beta Regression.

	This implementation uses `phi` as a precision parameter equal to
	`a + b` from the Beta parameters.
	"""

	def __init__(self, endog, exog, Z=None, link=Logit(),
	link_phi=sm.families.links.Log(), **kwds):
	"""
	Parameters
	----------
	endog : array-like
	1d array of endogenous values (i.e. responses, outcomes,
	dependent variables, or 'Y' values).
	exog : array-like
	2d array of exogeneous values (i.e. covariates, predictors,
	independent variables, regressors, or 'X' values). A nobs x k
	array where `nobs` is the number of observations and `k` is
	the number of regressors. An intercept is not included by
	default and should be added by the user. See
	`statsmodels.tools.add_constant`.
	Z : array-like
	2d array of variables for the precision phi.
	link : link
	Any link in sm.families.links for `exog`
	link_phi : link
	Any link in sm.families.links for `Z`

	Examples
	--------
	{example}

	See Also
	--------
	:ref:`links`

	""".format(example=_init_example)
	assert np.all((0 < endog) & (endog < 1))
	if Z is None:
	extra_names = ['phi']
	Z = np.ones((len(endog), 1), dtype='f')
	else:
	extra_names = ['precision-%s' % zc for zc in \
	(Z.columns if hasattr(Z, 'columns') else range(1, Z.shape[1] + 1))]
	kwds['extra_params_names'] = extra_names

	super(Beta, self).__init__(endog, exog, **kwds)
	self.link = link
	self.link_phi = link_phi

	self.Z = Z
	assert len(self.Z) == len(self.endog)

	def nloglikeobs(self, params):
	"""
	Negative log-likelihood.

	Parameters
	----------

	params : np.ndarray
	Parameter estimates
	"""
	return -self._ll_br(self.endog, self.exog, self.Z, params)

	def fit(self, start_params=None, maxiter=100000, maxfun=5000, disp=False,
	method='bfgs', **kwds):
	"""
	Fit the model.

	Parameters
	----------
	start_params : array-like
	A vector of starting values for the regression
	coefficients. If None, a default is chosen.
	maxiter : integer
	The maximum number of iterations
	disp : bool
	Show convergence stats.
	method : str
	The optimization method to use.
	"""

	if start_params is None:
	start_params = sm.GLM(self.endog, self.exog, family=Binomial()
	).fit(disp=False).params
	start_params = np.append(start_params, [0.5] * self.Z.shape[1])

	return super(Beta, self).fit(start_params=start_params,
	maxiter=maxiter, maxfun=maxfun,
	method=method, disp=disp, **kwds)

	def _ll_br(self, y, X, Z, params):
	nz = self.Z.shape[1]

	Xparams = params[:-nz]
	Zparams = params[-nz:]

	mu = self.link.inverse(np.dot(X, Xparams))
	phi = self.link_phi.inverse(np.dot(Z, Zparams))
	# TODO: derive a and b and constrain to > 0?

	if np.any(phi <= np.finfo(float).eps): return np.array(-np.inf)

	ll = lgamma(phi) - lgamma(mu * phi) - lgamma((1 - mu) * phi) \
	+ (mu * phi - 1) * np.log(y) + (((1 - mu) * phi) - 1) \
	* np.log(1 - y)

	return ll

	if __name__ == "__main__":

	import patsy
	dat = pd.read_table('gasoline.txt')
	Z = patsy.dmatrix('~ temp', dat, return_type='dataframe')
	# using other precison params with
	m = Beta.from_formula('iyield ~ C(batch, Treatment(10)) + temp', dat,
	Z=Z, link_phi=sm.families.links.identity())
	print m.fit().summary()

	fex = pd.read_csv('foodexpenditure.csv')
	m = Beta.from_formula(' I(food/income) ~ income + persons', fex)
	print m.fit().summary()
	#print GLM.from_formula('iyield ~ C(batch) + temp', dat, family=Binomial()).fit().summary()

	dev = pd.read_csv('methylation-test.csv')
	Z = patsy.dmatrix('~ age', dev, return_type='dataframe')
	m = Beta.from_formula('methylation ~ gender + CpG', dev,
	Z=Z,
	link_phi=sm.families.links.identity())
	print m.fit().summary()
	library(betareg)

	data("GasolineYield", package = "betareg")
	data("FoodExpenditure", package = "betareg")

	#fe_beta = betareg(I(food/income) ~ income + persons, data = FoodExpenditure)
	#print(summary(fe_beta)$coefficients)


	#m = betareg(yield ~ batch + temp, data = GasolineYield)
	#print(summary(m))



	#gy2 <- betareg(yield ~ batch + temp \| temp, data = GasolineYield, link.phi="identity")
	#print(summary(gy2))


	meth = read.csv('methylation-test.csv')
	m = betareg(methylation ~ gender + CpG \| age, meth)
	print(summary(m))
food	income	persons
15.998	62.476	1
16.652	82.304	5
21.741	74.679	3
7.431	39.151	3
10.481	64.724	5
13.548	36.786	3
23.256	83.052	4
17.976	86.935	1
14.161	88.233	2
8.825	38.695	2
14.184	73.831	7
19.604	77.122	3
13.728	45.519	2
21.141	82.251	2
17.446	59.862	3
9.629	26.563	3
14.005	61.818	2
9.16	29.682	1
18.831	50.825	5
7.641	71.062	4
13.882	41.99	4
9.67	37.324	3
21.604	86.352	5
10.866	45.506	2
28.98	69.929	6
10.882	61.041	2
18.561	82.469	1
11.629	44.208	2
18.067	49.467	5
14.539	25.905	5
19.192	79.178	5
25.918	75.811	3
28.833	82.718	6
15.869	48.311	4
14.91	42.494	5
9.55	40.573	4
23.066	44.872	6
14.751	27.167	7
	iyield gravity pressure temp10 temp batch
	0.122 50.8 8.6 190 205 1
	0.223 50.8 8.6 190 275 1
	0.347 50.8 8.6 190 345 1
	0.457 50.8 8.6 190 407 1
	0.08 40.8 3.5 210 218 2
	0.131 40.8 3.5 210 273 2
	0.266 40.8 3.5 210 347 2
	0.074 40 6.1 217 212 3
	0.182 40 6.1 217 272 3
	0.304 40 6.1 217 340 3
	0.069 38.4 6.1 220 235 4
	0.152 38.4 6.1 220 300 4
	0.26 38.4 6.1 220 365 4
	0.336 38.4 6.1 220 410 4
	0.144 40.3 4.8 231 307 5
	0.268 40.3 4.8 231 367 5
	0.349 40.3 4.8 231 395 5
	0.1 32.2 5.2 236 267 6
	0.248 32.2 5.2 236 360 6
	0.317 32.2 5.2 236 402 6
	0.028 41.3 1.8 267 235 7
	0.064 41.3 1.8 267 275 7
	0.161 41.3 1.8 267 358 7
	0.278 41.3 1.8 267 416 7
	0.05 38.1 1.2 274 285 8
	0.176 38.1 1.2 274 365 8
	0.321 38.1 1.2 274 444 8
	0.14 32.2 2.4 284 351 9
	0.232 32.2 2.4 284 424 9
	0.085 31.8 0.2 316 365 10
	0.147 31.8 0.2 316 379 10
	0.18 31.8 0.2 316 428 10
	Unnamed: 0	gender	age	Basename	ID	id	plate	CpG	methylation
0	0	F	58.231	6042324088_R04C01	age58.231_F	id_0	6042324088	CpG_0	0.815
1	1	M	52.632	6042324088_R06C01	age52.632_M	id_1	6042324088	CpG_0	0.803
2	2	M	64.679	6042324088_R01C01	age64.679_M	id_2	6042324088	CpG_0	0.803
3	3	F	55.299	6042324088_R04C02	age55.299_F	id_3	6042324088	CpG_0	0.808
4	4	M	56.019	6042324088_R02C01	age56.019_M	id_4	6042324088	CpG_0	0.8550000000000001
5	5	M	62.021	6042324088_R01C02	age62.021_M	id_5	6042324088	CpG_0	0.8129999999999998
6	6	F	52.298	6042324088_R06C02	age52.298_F	id_6	6042324088	CpG_0	0.816
7	7	F	39.71	6042324088_R03C01	age39.71_F	id_7	6042324088	CpG_0	0.827
8	8	F	57.492	6042324088_R05C02	age57.492_F	id_8	6042324088	CpG_0	0.829
9	9	F	57.623999999999995	6042324088_R05C01	age57.624_F	id_9	6042324088	CpG_0	0.7760000000000001
10	10	F	40.486999999999995	6042324088_R03C02	age40.487_F	id_10	6042324088	CpG_0	0.7859999999999999
11	11	M	53.662	6042324088_R02C02	age53.662_M	id_11	6042324088	CpG_0	0.822
12	12	F	58.231	6042324088_R04C01	age58.231_F	id_0	6042324088	CpG_1	0.891
13	13	M	52.632	6042324088_R06C01	age52.632_M	id_1	6042324088	CpG_1	0.894
14	14	M	64.679	6042324088_R01C01	age64.679_M	id_2	6042324088	CpG_1	0.894
15	15	F	55.299	6042324088_R04C02	age55.299_F	id_3	6042324088	CpG_1	0.869
16	16	M	56.019	6042324088_R02C01	age56.019_M	id_4	6042324088	CpG_1	0.914
17	17	M	62.021	6042324088_R01C02	age62.021_M	id_5	6042324088	CpG_1	0.889
18	18	F	52.298	6042324088_R06C02	age52.298_F	id_6	6042324088	CpG_1	0.8850000000000001
19	19	F	39.71	6042324088_R03C01	age39.71_F	id_7	6042324088	CpG_1	0.898
20	20	F	57.492	6042324088_R05C02	age57.492_F	id_8	6042324088	CpG_1	0.896
21	21	F	57.623999999999995	6042324088_R05C01	age57.624_F	id_9	6042324088	CpG_1	0.86
22	22	F	40.486999999999995	6042324088_R03C02	age40.487_F	id_10	6042324088	CpG_1	0.887
23	23	M	53.662	6042324088_R02C02	age53.662_M	id_11	6042324088	CpG_1	0.8800000000000001
24	24	F	58.231	6042324088_R04C01	age58.231_F	id_0	6042324088	CpG_2	0.936
25	25	M	52.632	6042324088_R06C01	age52.632_M	id_1	6042324088	CpG_2	0.9129999999999999
26	26	M	64.679	6042324088_R01C01	age64.679_M	id_2	6042324088	CpG_2	0.9000000000000001
27	27	F	55.299	6042324088_R04C02	age55.299_F	id_3	6042324088	CpG_2	0.9119999999999999
28	28	M	56.019	6042324088_R02C01	age56.019_M	id_4	6042324088	CpG_2	0.9349999999999999
29	29	M	62.021	6042324088_R01C02	age62.021_M	id_5	6042324088	CpG_2	0.9280000000000002
30	30	F	52.298	6042324088_R06C02	age52.298_F	id_6	6042324088	CpG_2	0.9150000000000001
31	31	F	39.71	6042324088_R03C01	age39.71_F	id_7	6042324088	CpG_2	0.9160000000000001
32	32	F	57.492	6042324088_R05C02	age57.492_F	id_8	6042324088	CpG_2	0.929
33	33	F	57.623999999999995	6042324088_R05C01	age57.624_F	id_9	6042324088	CpG_2	0.92
34	34	F	40.486999999999995	6042324088_R03C02	age40.487_F	id_10	6042324088	CpG_2	0.9160000000000001
35	35	M	53.662	6042324088_R02C02	age53.662_M	id_11	6042324088	CpG_2	0.926

	# in R:
	import io
	import statsmodels.api as sm
	import pandas as pd
	import patsy
	from betareg import Beta
	import numpy as np

	# betareg(I(food/income) ~ income + persons, data = FoodExpenditure)
	_income_estimates_mean = u"""\
	varname Estimate StdError zvalue Pr(>\|z\|)
	(Intercept) -0.62254806 0.223853539 -2.781051 5.418326e-03
	income -0.01229884 0.003035585 -4.051556 5.087819e-05
	persons 0.11846210 0.035340667 3.352005 8.022853e-04"""

	_income_estimates_precision = u"""\
	varname Estimate StdError zvalue Pr(>\|z\|)
	(phi) 35.60975 8.079598 4.407366 1.046351e-05
	"""

	_methylation_estimates_mean = u"""\
	varname Estimate StdError zvalue Pr(>\|z\|)
	(Intercept) 1.44224 0.03401 42.404 2e-16
	genderM 0.06986 0.04359 1.603 0.109
	CpGCpG_1 0.60735 0.04834 12.563 2e-16
	CpGCpG_2 0.97355 0.05311 18.331 2e-16"""

	_methylation_estimates_precision = u"""\
	varname Estimate StdError zvalue Pr(>\|z\|)
	(Intercept) 8.22829 1.79098 4.594 4.34e-06
	age -0.03471 0.03276 -1.059 0.289"""


	expected_income_mean = pd.read_table(io.StringIO(_income_estimates_mean), sep="\s+")
	expected_income_precision = pd.read_table(io.StringIO(_income_estimates_precision), sep="\s+")

	expected_methylation_mean = pd.read_table(io.StringIO(_methylation_estimates_mean), sep="\s+")
	expected_methylation_precision = pd.read_table(io.StringIO(_methylation_estimates_precision), sep="\s+")

	income = pd.read_csv('foodexpenditure.csv')
	methylation = pd.read_csv('methylation-test.csv')

	def check_same(a, b, eps, name):
	assert np.allclose(a, b, rtol=0.01, atol=eps), \
	("different from expected", name, list(a), list(b))


	def test_income_coefficients():
	model = "I(food/income) ~ income + persons"

	mod = Beta.from_formula(model, income)
	rslt = mod.fit()
	yield check_same, rslt.params[:-1], expected_income_mean['Estimate'], 1e-3, "estimates"
	yield check_same, rslt.tvalues[:-1], expected_income_mean['zvalue'], 0.1, "z-scores"
	yield check_same, rslt.pvalues[:-1], expected_income_mean['Pr(>\|z\|)'], 1e-3, "p-values"


	def test_income_precision():
	model = "I(food/income) ~ income + persons"

	mod = Beta.from_formula(model, income)
	rslt = mod.fit()
	# note that we have to exp the phi results for now.
	yield check_same, np.exp(rslt.params[-1:]), expected_income_precision['Estimate'], 1e-3, "estimate"
	#yield check_same, rslt.tvalues[-1:], expected_income_precision['zvalue'], 0.1, "z-score"
	yield check_same, rslt.pvalues[-1:], expected_income_precision['Pr(>\|z\|)'], 1e-3, "p-values"


	def test_methylation_coefficients():
	model = "methylation ~ gender + CpG"
	Z = patsy.dmatrix("~ age", methylation)

	mod = Beta.from_formula(model, methylation, Z=Z, link_phi=sm.families.links.identity())
	rslt = mod.fit()
	yield check_same, rslt.params[:-2], expected_methylation_mean['Estimate'], 1e-2, "estimates"
	yield check_same, rslt.tvalues[:-2], expected_methylation_mean['zvalue'], 0.1, "z-scores"
	yield check_same, rslt.pvalues[:-2], expected_methylation_mean['Pr(>\|z\|)'], 1e-2, "p-values"

	def test_methylation_precision():
	model = "methylation ~ gender + CpG"
	Z = patsy.dmatrix("~ age", methylation)

	mod = Beta.from_formula(model, methylation, Z=Z, link_phi=sm.families.links.identity())
	rslt = mod.fit()
	#yield check_same, sm.families.links.logit()(rslt.params[-2:]), expected_methylation_precision['Estimate'], 1e-3, "estimate"
	#yield check_same, rslt.tvalues[-2:], expected_methylation_precision['zvalue'], 0.1, "z-score"