Last active
March 22, 2017 14:03
-
-
Save christophergandrud/42bccce985640c5a43948523f1ae46ad to your computer and use it in GitHub Desktop.
parse.formula function from Zelig 3.5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
parse.formula<-function( formula, model,data=NULL){ | |
if(class(formula)[[1]]=="multiple") | |
return(formula) | |
nrUserOpt<-nrUserReq<-nrUserFixed<-nrUserSubreq<-0 | |
userOpt<-userReq<-userFixed<-userSubreq<-list() | |
fc <- paste("describe.", model, sep = "") | |
if (!exists(fc)) | |
stop("describe.",model," does not exsist") | |
modelReq<-do.call(fc,list()) | |
modelReq <-modelReq$parameters | |
checkNrReq<-function(modelNumEqn,nrUserReq,modelParsReq){ | |
if(length(modelNumEqn)==1){ | |
if(nrUserReq != modelNumEqn) | |
stop("The parameter \"",modelParsReq,"\" requires ",modelNumEqn, " equation(s). | |
You have provided ", nrUserReq, " See model doc. for more details") | |
}else{ | |
if(!(betweenf(nrUserReq,modelNumEqn))) | |
stop("The parameter \"",modelParsReq,"\" requires between ",modelNumEqn[[1]]," | |
and ",modelNumEqn[[2]], " equation(s). You have provided ", nrUserReq, " See model doc. for more details") | |
} | |
} | |
checkNrOpt<-function(modelNumEqn,nrUserOpt,modelParsOpt,userOpt){ | |
if(!(betweenf(nrUserOpt,modelNumEqn))) | |
if(nrUserOpt < modelNumEqn[[1]]){ | |
if(modelNumEqn[[1]]==1) | |
userOpt[[modelParsOpt]]<- as.formula("~1") | |
else | |
for(i in (nrUserOpt+1):modelNumEqn[[1]]){ | |
userOpt[[i]]<-as.formula("~1") | |
names(userOpt)[[i]]<-paste(modelParsOpt,i,sep="") | |
} | |
}else | |
stop("The parameter \"",modelParsOpt,"\" requires between ",modelNumEqn[[1]]," and ",modelNumEqn[[2]], " equation(s). You have provided ", nrUserOpt, " See model doc. for more details") | |
return(userOpt) | |
} | |
betweenf<-function(a,range){ | |
if (is.finite(range[[2]])) | |
return(a >= range[[1]] && a<=range[[2]]) | |
else | |
return(a>=range[[1]]) | |
} | |
"%w/o%" <- function(x,y) x[!x %in% y] | |
matchPars<-function(parName,userNames){ | |
res<-c() | |
for(i in 1:length(userNames)){ | |
a<-substr(userNames[[i]],nchar(parName)+1,nchar(userNames[[i]])) | |
b<-substr(userNames[[i]],1,nchar(parName)) | |
if(b==parName && (!is.na(suppressWarnings(as.numeric(a))) || userNames[[i]]==parName)) | |
res<-c(res,userNames[[i]]) | |
} | |
return (res) | |
} | |
fMode<-function(b){ | |
if(b$depVar == TRUE && b$expVar == TRUE) return (1) | |
if(b$depVar == FALSE && b$expVar == TRUE) return (2) | |
if(b$depVar == FALSE && b$expVar == FALSE) return (3) | |
if(b$depVar == TRUE && b$expVar == FALSE) return (4) | |
stop("some error occurred ... please contact the Zelig team") | |
} | |
parsType<-lapply(modelReq,fMode) | |
modelParsReq<-names(parsType[parsType==1]) | |
modelParsOpt<-names(parsType[parsType==2]) | |
modelParsFixed<-names(parsType[parsType==3]) | |
modelParsSubreq<-names(parsType[parsType==4]) | |
modelNrParsReq<-length(modelParsReq) | |
modelNrParsOpt<-length(modelParsOpt) | |
modelNrParsFixed<-length(modelParsFixed) | |
modelNrParsSubreq<-length(modelParsSubreq) | |
userNrLevels<-0 | |
dataNrLevels<-0 | |
userLevels<-c() | |
if(class(formula)[[1]]=="formula") | |
formula<-list(formula) | |
nreqns <-length(formula) | |
if(is.null(names(formula))){ | |
if(modelNrParsReq >1) | |
stop("You should name the equations. The model requires more than 1 systematic component. Please see model documentation for more details") | |
for (i in 1:nreqns){ | |
eqni<-formula[[i]] | |
if (length(eqni)==3){ | |
rootNames<-modelParsReq | |
lhs<-eqni[[2]] | |
rhs<-deparse(eqni[[3]],width.cutoff=500) | |
if(length(lhs)>1 && (lhs[[1]]=="cbind" || lhs[[1]]=="as.factor" || lhs[[1]]=="id")){ | |
if( lhs[[1]]=="cbind"){ | |
#rhs=deparse(rhs) | |
g<- as.list(lhs)[-1] | |
for (j in 1:length(g)){ | |
e<-paste(g[[j]],"~",sep="") | |
if(rhs!="1"){ | |
nrUserReq=nrUserReq+1 | |
userReq[[nrUserReq]]<-as.formula(paste(e,rhs,sep="")) | |
}else{ | |
nrUserSubreq=nrUserSubreq+1 | |
userSubreq[[nrUserSubreq]]<-as.formula(paste(e,rhs,sep="")) | |
} | |
} | |
}else{ | |
if(is.null(data)) | |
stop("Data argument is required when you use as.factor() or id() as a dependent variable\n") | |
if(lhs[[1]]=="as.factor"){ | |
varname<-as.character(lhs[[2]]) | |
userLevels<-levels(as.factor(data[[varname]]))[-1] | |
userNrLevels<-length(userLevels) | |
for (j in 1:userNrLevels){ | |
e<-paste("id(",lhs[[2]],",\"",userLevels[[j]],"\")","~",sep="") | |
if(rhs!="1"){ | |
nrUserReq=nrUserReq+1 | |
userReq[[nrUserReq]]<-as.formula(paste(e,rhs,sep="")) | |
}else{ | |
nrUserSubreq=nrUserSubreq+1 | |
userSubreq[[nrUserSubreq]]<-as.formula(paste(e,rhs,sep="")) | |
} | |
} | |
}else{ | |
varname<-as.character(lhs[[2]]) | |
userLevels<-c(userLevels,lhs[[3]]) | |
userNrLevels<-length(userLevels) | |
levels<-levels(data[[varname]]) | |
lhs<-deparse(lhs) | |
# rhs<-deparse(rhs) | |
e<-paste(lhs,"~",sep="") | |
if(rhs !="1"){ | |
nrUserReq=nrUserReq+1 | |
userReq[[nrUserReq]]<-as.formula(paste(e,rhs,sep="")) | |
}else{ | |
nrUserSubreq<-nrUserSubreq+1 | |
userSubreq[[nrUserSubreq]]<-as.formula(paste(e,rhs,sep="")) | |
} | |
} | |
} | |
}else{ | |
lhs<-deparse(lhs) | |
# rhs<-deparse(rhs) | |
e<-paste(lhs,"~",sep="") | |
if(rhs !="1"){ | |
nrUserReq=nrUserReq+1 | |
userReq[[nrUserReq]]<-as.formula(paste(e,rhs,sep="")) | |
}else{ | |
nrUserSubreq<-nrUserSubreq+1 | |
userSubreq[[nrUserSubreq]]<-as.formula(paste(e,rhs,sep="")) | |
} | |
} | |
}else{ | |
rhs<-deparse(eqni[[2]]) | |
if(rhs !="1"){ | |
nrUserOpt=nrUserOpt+1 | |
userOpt[[nrUserOpt]]<-as.formula(paste("~",rhs,sep="")) | |
}else{ | |
nrUserFixed=nrUserFixed+1 | |
userFixed[[nrUserFixed]]<-as.formula(paste("~",rhs,sep="")) | |
} | |
} | |
} | |
if (modelNrParsOpt==0){ | |
if (nrUserOpt !=0){ | |
stop("the equation(s) ",userOpt," does not match model requirements!")} | |
}else{ | |
modelNumEqn<-modelReq[[modelParsOpt]]$equations | |
userOpt<-checkNrOpt(modelNumEqn,nrUserOpt,modelParsOpt,userOpt) | |
if(length(userOpt)==1) | |
names(userOpt)<-modelParsOpt | |
else | |
names(userOpt)<-paste(modelParsOpt,1:length(userOpt),sep="") | |
} | |
if(length(modelParsFixed)>0){ | |
modelNumFixedEqns<-modelReq[[modelParsFixed]]$equations | |
for(i in 1:modelNumFixedEqns) | |
userFixed[[i]]<-as.formula("~1") | |
if(modelNumFixedEqns==1) | |
names(userFixed)<-modelParsFixed | |
else | |
names(userFixed)<-paste(modelParsFixed,1:modelNumFixedEqns,sep="") | |
} | |
if (modelNrParsReq==0){ | |
if (nrUserReq !=0){ | |
stop("the equation(s) ",userReq," does not match model requirements!")} | |
}else{ | |
modelNumEqn<-modelReq[[modelParsReq]]$equations | |
checkNrReq(modelNumEqn,nrUserReq,modelParsReq) | |
if(userNrLevels>0){ | |
if(userNrLevels !=nrUserReq) | |
stop("The number of equation for the systematic component should be equal to the number of levels -1\n") | |
names(userReq)<-userLevels | |
}else{ | |
if(nrUserReq==1) | |
names(userReq)<-modelParsReq | |
else | |
names(userReq)<-paste(modelParsReq,1:length(userReq),sep="") | |
} | |
} | |
if (modelNrParsSubreq==0){ | |
if (nrUserSubreq !=0){ | |
stop("the equation(s) ",userSubreq," does not match model requirements!")} | |
}else{ | |
modelNumEqn<-modelReq[[modelParsSubreq]]$equations | |
checkNrReq(modelNumEqn,nrUserSubreq,modelParsSubreq) | |
if(nrUserSubreq==1) | |
names(userSubreq)<-modelParsSubreq | |
else | |
names(userSubreq)<-paste(modelParsSubreq,1:length(userSubreq),sep="") | |
} | |
result<-c(userReq,userOpt,userFixed,userSubreq) | |
}else{ ##user provides names for formulas | |
modelPars<-names(modelReq) | |
parsS<-names(sort(sapply(modelPars,nchar),decreasing=TRUE)) | |
userNames<-names(formula) | |
userEqnNamesByPars<-list() | |
tmpUserNames<-userNames | |
for (i in 1:length(parsS)){ | |
userEqnNamesByPars[[parsS[[i]]]]<-matchPars(parsS[[i]],tmpUserNames) | |
tmpUserNames<-"%w/o%"(tmpUserNames,userEqnNamesByPars[[parsS[[i]]]]) | |
} | |
tmp<-"%w/o%"(userNames,unlist(userEqnNamesByPars)) | |
if (length(tmp)>0) | |
stop("Ambigous equation name ","\"",tmp,"\"") | |
res<-list() | |
userPars<-names(userEqnNamesByPars) | |
for (i in 1:length(modelPars)){ | |
modelPar<-modelPars[[i]] | |
userNumEqn<-length(userEqnNamesByPars[[modelPar]]) | |
modelNumEqn<-modelReq[[modelPar]]$equations | |
mode<-fMode(modelReq[[modelPar]]) | |
tmplst<-formula[userEqnNamesByPars[[modelPar]]] | |
if(modelNumEqn[[1]]==1 && modelNumEqn[[2]]==1 ) | |
tmpNames<-modelPar | |
else | |
tmpNames<-paste(modelPar,1:userNumEqn,sep="") | |
if(mode==1){ | |
whiche<-which(lapply(formula[(userEqnNamesByPars[[modelPar]])],length)!=3) | |
if(length(whiche)!=0) | |
stop("The equation ",formula[[names(whiche)]]," is not conform model requirements or its name is ambigous . DepVar/ExpVar is missing.\n") | |
checkNrReq(modelNumEqn,userNumEqn,modelPar) | |
whiche<-which((names(tmplst) %in% tmpNames)==FALSE) | |
if(length(whiche)!=0){ | |
warning("The name \"",names(tmplst)[whiche],"\" is ambigous. The equations of the paramter \"",modelPar,"\" are renamed\n") | |
names(tmplst)<-tmpNames | |
} | |
}else{ | |
if(mode==2){ | |
whiche<-which(lapply(formula[(userEqnNamesByPars[[modelPar]])],length)!=2) | |
if(length(whiche)!=0) | |
stop("The equation ",formula[names(whiche)]," is not conform model requirements or its name is ambigous A .\n") | |
whiche<-which((names(tmplst) %in% tmpNames)==FALSE) | |
if(length(whiche)!=0){ | |
warning("The name \"",names(tmplst)[whiche],"\" is ambigous. The equations of the paramter \"",modelPar,"\" are renamed\n") | |
names(tmplst)<-tmpNames | |
} | |
tmplst<- checkNrOpt(modelNumEqn,userNumEqn,modelPar,tmplst) | |
}else{ | |
if (mode==3){ | |
whiche<-which(tmplst !="~1") | |
if(length(whiche)>0) | |
warning("You cannot specify a formula for the parameter \"",modelPar,"\" . All your equation for this parameter are set to their default value.For example your equation:\n",deparse(formula[names(whiche)]),"\n") | |
if(userNumEqn !=modelNumEqn) | |
warning("The parameter \"",modelPar,"\" requires ",modelNumEqn, "equation(s). You are providing ",userNumEqn, " equation(s) for this parameter. This problem is fixed. All the equations for this parameter are set to the default value \n") | |
tmplst<-list() | |
if(modelNumEqn==1) | |
tmpname<-modelPar | |
else | |
tmpname<-paste(modelPar,1:modelNumEqn,sep="") | |
for(i in 1:modelNumEqn) | |
tmplst[[tmpname[[i]]]]<-as.formula("~1") | |
}else{ | |
if(mode==4) | |
{ | |
whiche<-which(lapply(formula[(userEqnNamesByPars[[modelPar]])],length)!=3) | |
whicha<-which(lapply(formula[(userEqnNamesByPars[[modelPar]])],FUN=function(a){if (a[[3]]=="1") return (TRUE) else return(FALSE)})==FALSE) | |
if(length(whiche)!=0 ) | |
stop("The equation ",formula[names(whiche)]," is not conform model requirements or its name is ambigous . DepVar/ExpVar is missing.\n") | |
else{ | |
if (length(whicha)!=0) | |
stop("The equation ",formula[names(whicha)]," is not conform model requirements or its name is ambigous . Its right hand side shoule be \"1\".\n") | |
} | |
checkNrReq(modelNumEqn, userNumEqn, modelPar) | |
whiche<-which((names(tmplst) %in% tmpNames)==FALSE) | |
if(length(whiche)!=0){ | |
warning("The name \"",names(tmplst)[whiche],"\" is ambigous. The equations of the paramter \"",modelPar,"\" are renamed\n") | |
names(tmplst)<-tmpNames | |
} | |
} | |
} | |
} | |
} | |
res[[modelPar]]<-tmplst | |
} | |
result<-c() | |
for(i in 1:length(res)) | |
result<-c(result,res[[i]]) | |
} | |
class(result)<-c("multiple","list") | |
return(result) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
\name{parse.formula} | |
\alias{parse.formula} | |
\title{Parsing user-input formulas into multiple syntax} | |
\description{Parse the input formula (or list of formulas) into the | |
standard format described below. Since labels for this format will vary | |
by model, \code{parse.formula} will evaluate a function \code{describe.model}, | |
where \code{model} is given as an input to \code{parse.formula}. | |
If the \code{describe.model} function has more than one parameter for | |
which \code{ExpVar = TRUE} and \code{DepVar = TRUE}, then the | |
user-specified equations must have labels to match those parameters, | |
else \code{parse.formula} should return an error. In addition, if the | |
formula entries are not unambiguous, then \code{parse.formula} returns an error. | |
} | |
\usage{ | |
parse.formula(formula, model, data = NULL) | |
} | |
\arguments{ | |
\item{formula}{either a single formula or a list of \code{formula} objects} | |
\item{model}{a character string specifying the name of the model} | |
\item{data}{an optional data frame for models that require a factor response variable} | |
} | |
\value{The output is a list of formula objects with class | |
\code{c("multiple", "list")}. Let's say that the name of the model is | |
\code{"bivariate.probit"}, and the corresponding describe function is | |
\code{describe.bivariate.probit}, which identifies \code{mu1} and | |
\code{mu2} as systematic components, and an ancillary parameter \code{rho}, which | |
may be parameterized, but is estimated as a scalar by default. | |
} | |
\details{Acceptable user inputs are as follows: | |
\tabular{lll}{ | |
\tab User Input \tab Output from \code{parse.formula}\cr | |
\tab \tab \cr | |
Same covariates, \tab cbind(y1, y2) ~ x1 + x2 * x3 \tab list(mu1 = y1 ~ x1 + x2 * x3,\cr | |
separate effects \tab \tab mu2 = y2 ~ x1 + x2 * x3,\cr | |
\tab \tab rho = ~ 1)\cr | |
\tab \tab \cr | |
With \code{rho} as a \tab list(cbind(y1, y2) ~ x1 + x2, \tab list(mu1 = y1 ~ x1 + x2,\cr | |
systematic equation \tab rho = ~ x4 + x5) \tab mu2 = y2 ~ x1 + x2,\cr | |
\tab \tab rho = ~ x4 + x5)\cr | |
\tab \tab \cr | |
With constraints \tab list(mu1 = y1 ~ x1 + tag(x2, "x2"), \tab list(mu1 = y1 ~ x1 + tag(x2, "x2"),\cr | |
(same variable) \tab mu2 = y2 ~ x3 + tag(x2, "x2")) \tab mu2 = y2 ~ x3 + tag(x2, "x2"),\cr | |
\tab \tab rho = ~ 1)\cr | |
\tab \tab \cr | |
With constraints \tab list(mu1 = y1 ~ x1 + tag(x2, "z1"), \tab list(mu1 = y1 ~ x1 + tag(x2, "z1"),\cr | |
(different variables) \tab mu2 = y2 ~ x3 + tag(x4, "z1")) \tab mu2 = y2 ~ x3 + tag(x4, "z1"),\cr | |
\tab \tab rho = ~ 1)\cr | |
}} | |
\examples{ | |
\dontrun{ | |
data(sanction) | |
formulae <- list(cbind(import, export) ~ coop + cost + target) | |
fml <- parse.formula(formulae, model = "bivariate.probit") | |
D <- model.frame(fml, data = sanction) | |
}} | |
\seealso{ | |
\code{\link{parse.par}}, \code{\link{model.frame.multiple}}, | |
\code{\link{model.matrix.multiple}}, and the full Zelig manual at | |
\url{http://gking.harvard.edu/zelig}. | |
} | |
\author{ | |
Kosuke Imai <\email{kimai@princeton.edu}>; Gary King | |
<\email{king@harvard.edu}>; Olivia Lau <\email{olau@fas.harvard.edu}>; Ferdinand Alimadhi | |
<\email{falimadhi@iq.harvard.edu}> | |
} | |
\keyword{utilities} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment