Last active
January 3, 2016 13:29
-
-
Save RanaivosonHerimanitra/8470213 to your computer and use it in GitHub Desktop.
built with 'data.table' package, this function aims to determine the number of sample to be surveyed in each strata using Optimal Stratified Random Sampling ( à la Neyman). Inputs include the raw dataset that contains information about strata, the name of the strata variable in this dataset and another dataset containing standard deviation of a …
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(data.table) | |
alloc_opti_data.table=function(n=1000 | |
,dataset=ese_op2013_reste | |
,strate="strate2013" | |
,sd=ecart_type ) | |
{ | |
#count obs per strata to form Nh: | |
dataset=data.table(dataset) | |
output=dataset[,.N,by=strate] | |
ecart_type=data.table(sd) | |
#set the keys and merge with standard deviation: | |
setkey(output,strate) | |
setkey(ecart_type,"strata") | |
output =output[ecart_type] | |
output=data.frame(output) | |
names(output)[2]="Nh" | |
output=data.table(output) | |
#weighted average with sd: | |
output[,SPE:=Nh*as.numeric(as.character(sd))] | |
#initial nh: | |
output[,nh:= (n*SPE)/sum(SPE)] | |
#store in a data.table: | |
output[,nh:=round(nh)] | |
output[,poids:=ifelse(nh!=0,Nh/nh,0)] | |
setkey(output,"poids") | |
output=output[!J(0)] | |
output[,reponderation:=nh>=Nh] | |
#recompute for those where nh>Nh <==> reponderation==TRUE: | |
#exhaustive survey: | |
output[,nh:=ifelse(reponderation==TRUE,Nh,nh)] | |
#recompute weight: | |
output[,poids:=ifelse(nh==Nh,1,Nh/nh)] | |
#re do all the stuff above and update while nh>Nh: | |
while ( length( which(output$reponderation == T) )!=0 ) { | |
setkey(output,"reponderation") | |
#recompute Nh,n,SPE, etc. | |
#remaining number of sample | |
n=n-data.frame(output[J(TRUE),sum(Nh)])[,2] | |
#nh=n*SPE/sum(SPE) | |
output[J(FALSE),nh:=n*SPE/sum(SPE)] | |
#update output: | |
output[J(FALSE),poids:=ifelse( round(nh)!=0,Nh/nh,NA)] | |
#initialize: | |
#ens$reponderation=FALSE | |
output$reponderation=FALSE | |
output[,reponderation:=nh>Nh] | |
#exhaustive survey: | |
output[,nh:=ifelse(reponderation==TRUE,Nh,nh)] | |
#recompute weight: | |
output[,poids:=ifelse(nh==Nh,1,Nh/nh)] | |
} | |
#return datatable with all info: | |
output=output[which(nh!=0),] | |
return ( output ) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment