Instantly share code, notes, and snippets.

Embed
What would you like to do?
esperantujo
name ujo pop uea lernu www pas edu nat
AFG afghanistan 0 27.7 1 48 - 0 0 0
ALB albanien 0 2.8 14 64 0.6 1 5 33
DZA algeriet 1 39.2 6 503 0.15 1 8 0
AND andorra - 0.078 0 167 - 0 8 0
AGO angola 1 21.5 2 42 0.19 0 0 -
ARG argentino 27 41.5 29 1965 0.60 16 61 140
ARM armenien 0 3.0 12 92 - 0 1 27
AUS australio 23 23.1 40 2568 0.83 16 52 130
AUT austria 4 8.6 44 780 0.81 8 23 79
AZE azerbadj 1 9.8 1 112 - 0 2 0
BHS bahamas 1 0.378 1 15 - 0 0 0
BGD bangladesh 0 156.6 2 49 0.39 0 1 0
BLR belarus 5 9.5 12 425 0.54 5 6 0
BEL belgien 16 11.3 143 1695 0.82 37 106 360
BLZ belize 0 0.380 1 14 - 0 0 0
BEN benin 0 10.6 14 69 - 6 6 500
BTN bhutan 0 0.8 0 8 - 0 0 0
BOL bolivia 1 11.0 2 330 - 0 3 0
BIH bosnien 1 3.8 13 175 0.72 3 7 300
BWA botswana 0 2.2 0 8 - 0 0 0
BRA brasilien 299 200.4 382 16845 0.60 137 577 385
BRN brunei 0 0.417 0 13 - 0 0 0
BGR bulgarien 7 7.2 45 622 0.53 11 10 140
BFA burkina 1 19 0 15 - 0 0 0
BDI burundi 0 10.1 15 41 - 3 9 95
KHM cambodja 0 15.6 2 26 - 1 1 -
CMR cameroun 0 22.7 4 35 - 0 1 -
CAN canada 27 35.2 67 3982 0.86 25 63 115
CAF centralafr 0 5.0 0 26 - 2 0 0
CHL chile 7 17.6 10 1541 0.67 4 11 -
COL colombia 13 47.12 24 3307 0.52 13 42 -
COG congoBraz 0 4.7 3 13 - 0 0 0
CUB cuba 2 11.3 35 152 0.26 10 13 350
CYP cypern 0 0.8 2 64 - 0 2 -
DNK danmark 9 5.7 86 995 0.95 16 22 140
DJI djibouti 0 0.9 0 8 - 0 0 0
DOM domrep 2 10.4 3 551 0.52 1 0 0
EGY egypt 1 82.1 1 364 0.5 1 0 0
ECU equador 2 15.7 4 642 0.4 1 6 0
GNQ equatorialguinea 0 1.2 0 5 - 0 0 0
ERI eritrea 0 5.3 0 7 - 0 0 0
EST estland 2 1.3 17 151 0.8 2 2 100
TLS etimor 1 1.2 4 11 - 0 0 0
ETH etiopien 0 101.9 1 43 - 0 0 0
FJI fiji - 0.9 0 8 - 0 0 0
PHL filip 12 98.4 6 584 0.44 1 3 0
FIN finland 6 5.5 97 1105 0.92 12 35 300
FRA france 86 66.4 734 7061 0.89 234 637 650
GAB gabon 0 1.8 0 19 - 0 0 0
GMB gambia 0 1.9 0 7 - 0 0 0
GEO georgien 0 3.7 3 71 - 2 2 0
GHA ghana 0 28.3 5 60 - 3 2 -
GRC greece 1 10.8 11 383 0.6 2 5 65
GRL greenland 0 0.056 0 7 - 0 0 0
GTM guatem 3 15.5 2 438 0.20 1 3 -
GIN guinea 0 12.9 0 10 - 0 0 0
GNB guineabissau 0 1.5 0 7 - 0 0 0
GUY guyana 0 0.746 0 3 - 0 0 0
HTI haiti 1 11.1 0 48 - 0 0 0
NLD holland 25 17 205 2182 0.94 32 106 235
HND honduras 2 8.1 2 199 0.18 0 1 0
HKG hongkong - 7.4 2 1048 - 1 3 -
ISL iceland 1 0.323 14 92 0.97 1 2 50
IND indien 3 1252 14 1371 0.35 4 7 150
IDN indon 8 249.9 9 1136 0.35 0 15 0
IRQ irak 2 36.8 0 53 - 0 0 0
IRN iran 57 77.5 60 1781 0.31 17 39 40
IRL irland 5 4.6 10 627 0.78 2 2 63
ISR israel 6 8.1 64 1812 0.71 3 15 160
ITA italien 29 59.8 171 2997 0.58 27 151 910
CIV ivory 0 22.7 1 53 - 0 0 100
JAM jamaica 0 2.7 0 27 - 0 0 0
JPN japan 10 127.3 378 1399 0.9 11 62 1298
JOR jordan 1 9.8 0 68 - 0 0 0
KAZ kasak 2 17.0 8 286 0.54 0 10 0
CPV kapverde - 0.531 0 23 - 0 0 0
KEN kenya 0 44.3 3 65 0.39 1 0 -
CHN kina 17 1357 135 8273 0.52 38 91 1144
KGZ kirigistan 1 6.1 1 37 - 0 2 0
COD KongoDR 2 67.5 16 118 0.022 3 18 456
KOR korea 4 50.2 87 1520 0.85 11 44 250
CRI kostariko 4 4.9 8 636 0.46 3 7 34
HRV kroatien 4 4.2 32 409 0.67 3 16 316
KWT kuwait 0 4.2 0 46 - 0 0 0
LAO laos 0 6.5 0 17 - 0 0 0
LSO lesotho 0 1.9 0 1 - 0 0 0
LVA letland 1 2.0 18 160 0.75 1 5 102
LBN libanon 1 6.0 1 88 - 0 0 0
LBR liberia 0 4.1 0 8 - 0 0 0
LBY libya 0 6.4 0 21 - 0 0 0
LIE liechtenstein - 0.037 0 8 - 0 0 0
LTU litauen 8 3.0 43 5127 0.68 13 32 960
LUX luxemborg 2 0.57 17 86 0.94 4 5 104
MDG madagaskar 1 22.9 6 66 0.61 3 4 70
MKD makedonien 1 2.1 4 48 - 2 2 45
MWI malawi 0 16.8 2 7 - 0 3 0
MYS malaysia 2 31.9 0 662 - 1 5 0
MLD maldives - 0.344 0 4 - 0 0 0
MLI mali 0 18.3 1 21 - 0 1 -
MTA malta - 0.425 3 28 0.69 0 1 3
MAR maroko 1 33.0 5 1096 0.56 0 4 0
MRT mauretania 0 3.7 0 16 - 0 0 0
MAU mauritius - 1.3 0 23 - 0 0 0
MEX mexico 40 122.3 26 5162 0.51 19 67 145
MDA moldova 0 3.6 1 102 - 1 3 0
MNG mongoliet 2 2.8 19 58 0.16 3 2 50
MNE montenegro 0 0.621 1 11 - 0 0 -
MOZ mozambique 0 26.4 0 14 - 0 0 0
MMR myanmar 1 54.6 1 25 - 0 1 0
NAM namibia 0 2.3 0 21 - 0 0 0
NPL nepal 7 31.0 26 115 - 2 4 147
NCL newkaledonia 0 0.268 2 46 - 1 6 -
NZL newzealand 8 4.5 21 577 0.83 5 6 46
NIC nicaragua 1 6.1 19 295 0.16 1 8 -
NER niger 0 20.7 3 25 - 1 1 -
NGA nigeria 1 173.6 10 77 0.46 2 2 142
NOR norge 7 5.2 36 826 0.95 14 14 144
OMN oman 0 4.6 0 7 - 0 0 0
PAK pakistano 5 182.1 40 195 0.18 29 2 450
PAN panamo 0 3.9 0 214 0.43 0 1 0
PNG papua 0 8.2 0 0 - 0 0 0
PRY paraguay 0 6.8 0 147 0.37 0 1 0
PER peru 6 30.4 9 1815 0.39 2 13 -
POL polen 48 38.5 162 4556 0.76 62 197 620
PRT portugal 5 10.3 11 1025 - 2 17 95
PRI puertorico 3 3.5 1 288 - 0 2 0
QAT qatar 0 2.6 0 37 - 0 1 0
ROU romania 5 19.8 35 422 0.5 3 17 51
RUS rusland 43 144.0 107 7343 0.71 76 193 148
RWA rwanda 0 11.6 4 21 - 0 0 0
SLB salomon 0 0.642 0 3 - 0 0 0
SLV salvadoro 1 6.3 1 218 0.23 3 2 0
SAU saudi 0 28.8 0 163 0.61 0 0 0
CHE schweiz 9 8.1 85 935 0.87 29 44 170
SEN senegal 0 14.8 4 129 - 6 12 -
SRB serbien 5 7.1 28 234 0.72 5 15 -
SLE sierraleone 0 7.1 0 10 - 0 0 0
SVK slovakia 2 5.4 30 1418 0.78 5 28 80
SVN slovenien 0 2.1 14 233 - 1 18 106
SOM somalia 0 11.1 0 3 - 0 0 0
ESP spanien 60 46.4 131 5876 0.84 47 207 285
LKA srilanka 0 21.2 3 31 - 0 0 -
SSD ssudan 0 12.1 0 1 - 0 0 0
ZAF sudafriko 10 53.0 5 288 0.49 6 2 54
SDN sudan 0 41.2 0 35 - 0 1 0
SUR surinam 0 0.541 0 7 - 0 0 0
SWE sverige 19 9.9 103 1609 0.95 30 46 275
SWZ swaziland 0 1.3 1 6 - 1 1 0
SYR syria 1 18.6 0 82 - 0 1 0
TJK tadjstikistan 1 8.6 3 14 - 1 3 25
TWN taiwan 5 23.5 10 1771 0.8 3 10 -
TZA tanzania 1 55.2 9 23 - 1 1 -
TCD tchad 0 14.5 3 2 - 0 0 0
THA thailand 3 67.0 5 277 0.29 3 3 0
CZE tjekkiet 11 10.5 86 1724 0.74 23 64 105
TGO togo 0 7.1 28 81 - 6 14 300
TTO trinidad 0 1.4 0 28 - 0 0 0
TUN tunesien 0 11.2 5 241 - 0 0 0
TKM turkmenistan 0 4.8 0 5 - 0 0 0
TUR tyrkiet 13 74.9 14 1193 0.46 0 14 0
DEU tyskland 76 81.5 480 6684 0.88 109 226 1086
ARE uae 2 9.9 1 173 - 2 2 0
UGA uganda 0 36.9 6 18 - 0 1 -
GBR uk 55 65.1 119 5484 0.82 28 90 385
UKR ukraine 13 42.9 47 2042 0.62 27 38 160
HUN ungarn 39 9.8 80 7950 0.73 35 234 120
URY uruguay 2 3.4 13 278 0.58 2 4 30
USA usa 210 318.9 245 30152 0.88 76 354 700
UZB usbekistan 0 31.6 2 72 - 1 1 0
VUT vanuatu 0 0.277 0 2 - 1 0 0
VEN venezuela 16 30.4 7 1156 0.55 4 16 32
VNM vietnam 5 89.7 63 1287 0.44 0 30 200
YEM yemen 0 27.5 0 28 - 0 0 0
ZMB zambia 0 15.9 0 15 - 1 2 0
ZWE zimbabwe 0 14.2 1 30 - 1 1 0
#loading script for maximizing any function
source("simple_maximize.R")
#reads in data
tr=read.table("esperantujodirectory2.txt", header=T, row.names=1)
#removes first column with redundant information
weird_names=tr$name
tr=tr[,-1]
#transforms everything to numbers. This will throw warnings about missing values.
tr=as.data.frame(apply(tr,c(1,2),as.numeric))
#rearranges two columns
ad=colnames(tr)
n=which(ad=="www")
ad[n]=ad[1]
ad[1]="www"
tr=tr[,ad]
#calulating the alphas,
a_js=apply(tr[,3:ncol(tr)],2, mean, na.rm=T)
a_js=a_js/sum(a_js)
#deprecated part with prior on relative frequency ----------------
total_number=233286
overall_p=total_number/(7*10^9)
#kappa=1000
prior=function(p){
return(dexp(p, rate=1/overall_p, log=T))
}
prior2=function(p){
return(dbeta(p, shape1=1, shape2=(1-overall_p)/overall_p, log=T))
}
#back to non deprecated part -----------------
# Likelihood, that is P(x,p | kappa)
#(p can go on both sides of the conditionality if one assumes a uniform prior).
likelihood_of_row=function(x,p, k=kappa){
#calculating actual number of inhabitants in country
po=x[2]*10^6
#the 'actual' data columns
rest=x[3:length(x)]
res=0
for(i in 1:length(rest)){
if(!is.na(rest[i])){#if the value is not missing
res=res+log(dnbinom(rest[i], size=k[i], mu=a_js[i]*p*po))
}
else{
#print(paste(rest[i],"was NA"))
}
}
#print(res)
#print(prior2(p))
#res=res+prior2(p)
return(res)
}
#function that finds the maximum of the likelihood for fixed value of kappa.
ff=function(x,k=kappa){
return(simple_maximize(function(p){likelihood_of_row(as.numeric(x),p,k)}, 0.0001,0,1))
}
#function that maximizes the posterior with the constraint kappa_i=k
post_all_ks_equal=function(k){
res=sum(apply(tr,1,ff, k=rep(k, length(a_js)))[2,])
print(paste0("P(p^,",k,")=",res))
return(res)
}
#function that maximizes the posterior with kappa_i=k[i]
post_all_ks_different=function(k){
if(sum(k<=0)>0){
return(Inf)
}
res=sum(apply(tr,1,ff, k=k)[2,])
pri=sum(log(dgamma(x = k, shape=2, rate=2)))
res=res+pri
r=paste0(round(k,4),sep=",", collapse="")
print(paste0("P(p^,",r,")=",round(res,6), "[",round(pri,4),"]"))
return(-res)
}
#function that maximizes the posterior with kappa_1,kappa_3,kappa_4,kappa_5=k[1]
#and kappa_2=k[2] and kappa_6)=k[6]
post_some_ks_different=function(k){
if(sum(k<=0)>0){
return(Inf)
}
k=k[c(1,2,1,1,1,3)]
res=sum(apply(tr,1,ff, k=k)[2,])
pri=sum(log(dgamma(x = k, shape=2, rate=2)))
res=res+pri
r=paste0(round(k,4),sep=",", collapse="")
print(paste0("P(p^,",r,")=",round(res,6), "[",round(pri,4),"]"))
return(-res)
}
#loads in data from datamaps.co to make it look extra nice.
country_names=read.table("world-data.csv", sep=',', header=T, row.names = 1)
extra_country_names=data.frame(name=c("Andorra",
"Hong Kong",
"Cape Verde",
"Liechtenstein",
"Maldives",
"Malta",
"Maurtitius"))
rownames(extra_country_names)=c("AND","HKG", "CPV","LIE","MLD","MTA","MAU")
#this finds the maximum/mode of the posterior under the constraint kappa_i=kappa
optimize(post_all_ks_equal, interval=c(0.1, 10), maximum = T)
#this will not converge (in standard nelder-mead time) but would otherwise find the mode mode of the posterior without constraint.
all_equal_kappa_start=optim(rep(1.67,length(a_js)), fn=post_all_ks_different)
#this will converge, and finds the posterior under the constraint kappa_1=kappa_3=kappa_4=kappa_5
diff_kappa_start=optim(rgamma(3,shape=2, rate=2), fn=post_some_ks_different)
#this is posterior mode with the constraint kappa_1=kappa_3=kappa_4=kappa_5 (result of previous command)
khatbackup=c(3.3144869, 2.6375309, 3.3144869, 3.3144869, 3.3144869, 0.2140745)
#unpacking kappa from the mode found under the constraint kappa_1=kappa_3=kappa_4=kappa_5
khat=diff_kappa_start$par[c(1,2,1,1,1,3)]
#finding the ps that maximize
bd=apply(tr,1,ff, k=khat)
#messy handling of the dataframes to get a rank based matrix tr2.
bd=t(bd)
tr2=cbind(t(apply(tr,1,function(x){x/x[2]})),bd)
tr2=as.data.frame(tr2)
tr2$name=""
rn=rownames(tr2)
for(i in 1:nrow(tr2)){
if(rn[i] %in% rownames(country_names)){
tr2$name[i]=as.character(country_names[rn[i],1])
}
else if(rn[i] %in% rownames(extra_country_names)){
tr2$name[i]=as.character(extra_country_names[rn[i],1])
}
}
#more messy handling
tmp_cols=colnames(tr2)
tmp_cols[(length(tmp_cols)-2):(length(tmp_cols)-1)]=c("rate","posterior")
colnames(tr2) <- tmp_cols
res=tr2[order(bd[,1]),]
res2=res
res2[,3:8]=apply(-res[,3:8],2,rank, ties.method="average")
res2$pop=tr$pop[order(bd[,1])]
res2$rank=nrow(res2):1
#scaling from relative frequency to interpretable numbers
ns=c(123, 604, 992, 8397,209)
ps=res2[c("NZL","LTU", "RUS","HUN","EST"), "rate"]
Ns=res2[c("NZL","LTU", "RUS","HUN","EST"), "pop"]
ps=ps[-4]
ns=ns[-4]
Ns=Ns[-4]
mean(ns/(ps*Ns))*ps*Ns
scal=mean(ns/(ps*Ns))
res2$no_eos=scal*res2$pop*res2$rate
res2$rel_eos=scal*res2$rate
#making file to upload to datamaps.co
uploadable_csv=sqrt(res2[,c("rel_eos")])
colnames(uploadable_csv)=c()
write.table(uploadable_csv, file="uploadable.csv", sep=",",
row.names = rownames(res2), quote=F, col.names = F)
write.table(uploadable_csv, file="uploadable2.csv", sep=",",
row.names = res2$name, quote=F, col.names = F)
#(in hindsight there is probably not a good reason to have two tables made)
#making table in html
library(xtable)
d_to_print=data.frame("Rank"=1:nrow(res2),
"Frequency"=rev(res2$rel_eos),
"Total"=rev(res2$no_eos),
"Proportion"=rev(res2$no_eos)/sum(res2$no_eos))
rownames(d_to_print) <- rev(res2$name)
xt=xtable(d_to_print, digits = c(0,0,1,1,4))
print(xt, type = 'html')
#making another table in html
d_to_print2=tr
rownames(d_to_print2) <- tr2$name
d_to_print2=d_to_print2[,-1]
xt=xtable(d_to_print2[c(1:10, nrow(d_to_print2)),], digits = c(0,2,0,0,0,0,0,0))
print(xt, type = 'html')
#maximizes function 'func' from starting value 'start_val' and lower and upper limits 'lower' and 'upper'
#you can only be frustrated about optim so many times before you make your own bloody function.
simple_maximize=function(func, start_val, lower=0, upper=1){
step_size=(upper-lower)/10
mins=0
best_pos=start_val
best_val=func(start_val)
while(mins<50){
left_pos=max(lower, best_pos-step_size)
right_pos=min(upper, best_pos+step_size)
#print(c(left_pos,right_pos))
left_val=func(left_pos)
right_val=func(right_pos)
#print(paste(left_val,best_val,right_val))
stuck_count=0
#print(paste(mins,step_size,left_pos,right_pos,left_val,right_val,best_val,best_pos))
while((left_val>best_val | right_val>best_val) &stuck_count<20){
#print("changed_best_pos")
i=which.max(c(left_val,right_val))
best_pos=c(left_pos,right_pos)[i]
best_val=max(left_val,right_val)
left_pos=max(lower, best_pos-step_size)
right_pos=min(upper, best_pos+step_size)
#print(c(left_pos,right_pos))
left_val=func(left_pos)
right_val=func(right_pos)
stuck_count=stuck_count+1
}
step_size=step_size/2
mins=mins+1
}
return(c(best_pos,best_val))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment