esperantujo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name ujo pop uea lernu www pas edu nat | |
AFG afghanistan 0 27.7 1 48 - 0 0 0 | |
ALB albanien 0 2.8 14 64 0.6 1 5 33 | |
DZA algeriet 1 39.2 6 503 0.15 1 8 0 | |
AND andorra - 0.078 0 167 - 0 8 0 | |
AGO angola 1 21.5 2 42 0.19 0 0 - | |
ARG argentino 27 41.5 29 1965 0.60 16 61 140 | |
ARM armenien 0 3.0 12 92 - 0 1 27 | |
AUS australio 23 23.1 40 2568 0.83 16 52 130 | |
AUT austria 4 8.6 44 780 0.81 8 23 79 | |
AZE azerbadj 1 9.8 1 112 - 0 2 0 | |
BHS bahamas 1 0.378 1 15 - 0 0 0 | |
BGD bangladesh 0 156.6 2 49 0.39 0 1 0 | |
BLR belarus 5 9.5 12 425 0.54 5 6 0 | |
BEL belgien 16 11.3 143 1695 0.82 37 106 360 | |
BLZ belize 0 0.380 1 14 - 0 0 0 | |
BEN benin 0 10.6 14 69 - 6 6 500 | |
BTN bhutan 0 0.8 0 8 - 0 0 0 | |
BOL bolivia 1 11.0 2 330 - 0 3 0 | |
BIH bosnien 1 3.8 13 175 0.72 3 7 300 | |
BWA botswana 0 2.2 0 8 - 0 0 0 | |
BRA brasilien 299 200.4 382 16845 0.60 137 577 385 | |
BRN brunei 0 0.417 0 13 - 0 0 0 | |
BGR bulgarien 7 7.2 45 622 0.53 11 10 140 | |
BFA burkina 1 19 0 15 - 0 0 0 | |
BDI burundi 0 10.1 15 41 - 3 9 95 | |
KHM cambodja 0 15.6 2 26 - 1 1 - | |
CMR cameroun 0 22.7 4 35 - 0 1 - | |
CAN canada 27 35.2 67 3982 0.86 25 63 115 | |
CAF centralafr 0 5.0 0 26 - 2 0 0 | |
CHL chile 7 17.6 10 1541 0.67 4 11 - | |
COL colombia 13 47.12 24 3307 0.52 13 42 - | |
COG congoBraz 0 4.7 3 13 - 0 0 0 | |
CUB cuba 2 11.3 35 152 0.26 10 13 350 | |
CYP cypern 0 0.8 2 64 - 0 2 - | |
DNK danmark 9 5.7 86 995 0.95 16 22 140 | |
DJI djibouti 0 0.9 0 8 - 0 0 0 | |
DOM domrep 2 10.4 3 551 0.52 1 0 0 | |
EGY egypt 1 82.1 1 364 0.5 1 0 0 | |
ECU equador 2 15.7 4 642 0.4 1 6 0 | |
GNQ equatorialguinea 0 1.2 0 5 - 0 0 0 | |
ERI eritrea 0 5.3 0 7 - 0 0 0 | |
EST estland 2 1.3 17 151 0.8 2 2 100 | |
TLS etimor 1 1.2 4 11 - 0 0 0 | |
ETH etiopien 0 101.9 1 43 - 0 0 0 | |
FJI fiji - 0.9 0 8 - 0 0 0 | |
PHL filip 12 98.4 6 584 0.44 1 3 0 | |
FIN finland 6 5.5 97 1105 0.92 12 35 300 | |
FRA france 86 66.4 734 7061 0.89 234 637 650 | |
GAB gabon 0 1.8 0 19 - 0 0 0 | |
GMB gambia 0 1.9 0 7 - 0 0 0 | |
GEO georgien 0 3.7 3 71 - 2 2 0 | |
GHA ghana 0 28.3 5 60 - 3 2 - | |
GRC greece 1 10.8 11 383 0.6 2 5 65 | |
GRL greenland 0 0.056 0 7 - 0 0 0 | |
GTM guatem 3 15.5 2 438 0.20 1 3 - | |
GIN guinea 0 12.9 0 10 - 0 0 0 | |
GNB guineabissau 0 1.5 0 7 - 0 0 0 | |
GUY guyana 0 0.746 0 3 - 0 0 0 | |
HTI haiti 1 11.1 0 48 - 0 0 0 | |
NLD holland 25 17 205 2182 0.94 32 106 235 | |
HND honduras 2 8.1 2 199 0.18 0 1 0 | |
HKG hongkong - 7.4 2 1048 - 1 3 - | |
ISL iceland 1 0.323 14 92 0.97 1 2 50 | |
IND indien 3 1252 14 1371 0.35 4 7 150 | |
IDN indon 8 249.9 9 1136 0.35 0 15 0 | |
IRQ irak 2 36.8 0 53 - 0 0 0 | |
IRN iran 57 77.5 60 1781 0.31 17 39 40 | |
IRL irland 5 4.6 10 627 0.78 2 2 63 | |
ISR israel 6 8.1 64 1812 0.71 3 15 160 | |
ITA italien 29 59.8 171 2997 0.58 27 151 910 | |
CIV ivory 0 22.7 1 53 - 0 0 100 | |
JAM jamaica 0 2.7 0 27 - 0 0 0 | |
JPN japan 10 127.3 378 1399 0.9 11 62 1298 | |
JOR jordan 1 9.8 0 68 - 0 0 0 | |
KAZ kasak 2 17.0 8 286 0.54 0 10 0 | |
CPV kapverde - 0.531 0 23 - 0 0 0 | |
KEN kenya 0 44.3 3 65 0.39 1 0 - | |
CHN kina 17 1357 135 8273 0.52 38 91 1144 | |
KGZ kirigistan 1 6.1 1 37 - 0 2 0 | |
COD KongoDR 2 67.5 16 118 0.022 3 18 456 | |
KOR korea 4 50.2 87 1520 0.85 11 44 250 | |
CRI kostariko 4 4.9 8 636 0.46 3 7 34 | |
HRV kroatien 4 4.2 32 409 0.67 3 16 316 | |
KWT kuwait 0 4.2 0 46 - 0 0 0 | |
LAO laos 0 6.5 0 17 - 0 0 0 | |
LSO lesotho 0 1.9 0 1 - 0 0 0 | |
LVA letland 1 2.0 18 160 0.75 1 5 102 | |
LBN libanon 1 6.0 1 88 - 0 0 0 | |
LBR liberia 0 4.1 0 8 - 0 0 0 | |
LBY libya 0 6.4 0 21 - 0 0 0 | |
LIE liechtenstein - 0.037 0 8 - 0 0 0 | |
LTU litauen 8 3.0 43 5127 0.68 13 32 960 | |
LUX luxemborg 2 0.57 17 86 0.94 4 5 104 | |
MDG madagaskar 1 22.9 6 66 0.61 3 4 70 | |
MKD makedonien 1 2.1 4 48 - 2 2 45 | |
MWI malawi 0 16.8 2 7 - 0 3 0 | |
MYS malaysia 2 31.9 0 662 - 1 5 0 | |
MLD maldives - 0.344 0 4 - 0 0 0 | |
MLI mali 0 18.3 1 21 - 0 1 - | |
MTA malta - 0.425 3 28 0.69 0 1 3 | |
MAR maroko 1 33.0 5 1096 0.56 0 4 0 | |
MRT mauretania 0 3.7 0 16 - 0 0 0 | |
MAU mauritius - 1.3 0 23 - 0 0 0 | |
MEX mexico 40 122.3 26 5162 0.51 19 67 145 | |
MDA moldova 0 3.6 1 102 - 1 3 0 | |
MNG mongoliet 2 2.8 19 58 0.16 3 2 50 | |
MNE montenegro 0 0.621 1 11 - 0 0 - | |
MOZ mozambique 0 26.4 0 14 - 0 0 0 | |
MMR myanmar 1 54.6 1 25 - 0 1 0 | |
NAM namibia 0 2.3 0 21 - 0 0 0 | |
NPL nepal 7 31.0 26 115 - 2 4 147 | |
NCL newkaledonia 0 0.268 2 46 - 1 6 - | |
NZL newzealand 8 4.5 21 577 0.83 5 6 46 | |
NIC nicaragua 1 6.1 19 295 0.16 1 8 - | |
NER niger 0 20.7 3 25 - 1 1 - | |
NGA nigeria 1 173.6 10 77 0.46 2 2 142 | |
NOR norge 7 5.2 36 826 0.95 14 14 144 | |
OMN oman 0 4.6 0 7 - 0 0 0 | |
PAK pakistano 5 182.1 40 195 0.18 29 2 450 | |
PAN panamo 0 3.9 0 214 0.43 0 1 0 | |
PNG papua 0 8.2 0 0 - 0 0 0 | |
PRY paraguay 0 6.8 0 147 0.37 0 1 0 | |
PER peru 6 30.4 9 1815 0.39 2 13 - | |
POL polen 48 38.5 162 4556 0.76 62 197 620 | |
PRT portugal 5 10.3 11 1025 - 2 17 95 | |
PRI puertorico 3 3.5 1 288 - 0 2 0 | |
QAT qatar 0 2.6 0 37 - 0 1 0 | |
ROU romania 5 19.8 35 422 0.5 3 17 51 | |
RUS rusland 43 144.0 107 7343 0.71 76 193 148 | |
RWA rwanda 0 11.6 4 21 - 0 0 0 | |
SLB salomon 0 0.642 0 3 - 0 0 0 | |
SLV salvadoro 1 6.3 1 218 0.23 3 2 0 | |
SAU saudi 0 28.8 0 163 0.61 0 0 0 | |
CHE schweiz 9 8.1 85 935 0.87 29 44 170 | |
SEN senegal 0 14.8 4 129 - 6 12 - | |
SRB serbien 5 7.1 28 234 0.72 5 15 - | |
SLE sierraleone 0 7.1 0 10 - 0 0 0 | |
SVK slovakia 2 5.4 30 1418 0.78 5 28 80 | |
SVN slovenien 0 2.1 14 233 - 1 18 106 | |
SOM somalia 0 11.1 0 3 - 0 0 0 | |
ESP spanien 60 46.4 131 5876 0.84 47 207 285 | |
LKA srilanka 0 21.2 3 31 - 0 0 - | |
SSD ssudan 0 12.1 0 1 - 0 0 0 | |
ZAF sudafriko 10 53.0 5 288 0.49 6 2 54 | |
SDN sudan 0 41.2 0 35 - 0 1 0 | |
SUR surinam 0 0.541 0 7 - 0 0 0 | |
SWE sverige 19 9.9 103 1609 0.95 30 46 275 | |
SWZ swaziland 0 1.3 1 6 - 1 1 0 | |
SYR syria 1 18.6 0 82 - 0 1 0 | |
TJK tadjstikistan 1 8.6 3 14 - 1 3 25 | |
TWN taiwan 5 23.5 10 1771 0.8 3 10 - | |
TZA tanzania 1 55.2 9 23 - 1 1 - | |
TCD tchad 0 14.5 3 2 - 0 0 0 | |
THA thailand 3 67.0 5 277 0.29 3 3 0 | |
CZE tjekkiet 11 10.5 86 1724 0.74 23 64 105 | |
TGO togo 0 7.1 28 81 - 6 14 300 | |
TTO trinidad 0 1.4 0 28 - 0 0 0 | |
TUN tunesien 0 11.2 5 241 - 0 0 0 | |
TKM turkmenistan 0 4.8 0 5 - 0 0 0 | |
TUR tyrkiet 13 74.9 14 1193 0.46 0 14 0 | |
DEU tyskland 76 81.5 480 6684 0.88 109 226 1086 | |
ARE uae 2 9.9 1 173 - 2 2 0 | |
UGA uganda 0 36.9 6 18 - 0 1 - | |
GBR uk 55 65.1 119 5484 0.82 28 90 385 | |
UKR ukraine 13 42.9 47 2042 0.62 27 38 160 | |
HUN ungarn 39 9.8 80 7950 0.73 35 234 120 | |
URY uruguay 2 3.4 13 278 0.58 2 4 30 | |
USA usa 210 318.9 245 30152 0.88 76 354 700 | |
UZB usbekistan 0 31.6 2 72 - 1 1 0 | |
VUT vanuatu 0 0.277 0 2 - 1 0 0 | |
VEN venezuela 16 30.4 7 1156 0.55 4 16 32 | |
VNM vietnam 5 89.7 63 1287 0.44 0 30 200 | |
YEM yemen 0 27.5 0 28 - 0 0 0 | |
ZMB zambia 0 15.9 0 15 - 1 2 0 | |
ZWE zimbabwe 0 14.2 1 30 - 1 1 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#loading script for maximizing any function | |
source("simple_maximize.R") | |
#reads in data | |
tr=read.table("esperantujodirectory2.txt", header=T, row.names=1) | |
#removes first column with redundant information | |
weird_names=tr$name | |
tr=tr[,-1] | |
#transforms everything to numbers. This will throw warnings about missing values. | |
tr=as.data.frame(apply(tr,c(1,2),as.numeric)) | |
#rearranges two columns | |
ad=colnames(tr) | |
n=which(ad=="www") | |
ad[n]=ad[1] | |
ad[1]="www" | |
tr=tr[,ad] | |
#calulating the alphas, | |
a_js=apply(tr[,3:ncol(tr)],2, mean, na.rm=T) | |
a_js=a_js/sum(a_js) | |
#deprecated part with prior on relative frequency ---------------- | |
total_number=233286 | |
overall_p=total_number/(7*10^9) | |
#kappa=1000 | |
prior=function(p){ | |
return(dexp(p, rate=1/overall_p, log=T)) | |
} | |
prior2=function(p){ | |
return(dbeta(p, shape1=1, shape2=(1-overall_p)/overall_p, log=T)) | |
} | |
#back to non deprecated part ----------------- | |
# Likelihood, that is P(x,p | kappa) | |
#(p can go on both sides of the conditionality if one assumes a uniform prior). | |
likelihood_of_row=function(x,p, k=kappa){ | |
#calculating actual number of inhabitants in country | |
po=x[2]*10^6 | |
#the 'actual' data columns | |
rest=x[3:length(x)] | |
res=0 | |
for(i in 1:length(rest)){ | |
if(!is.na(rest[i])){#if the value is not missing | |
res=res+log(dnbinom(rest[i], size=k[i], mu=a_js[i]*p*po)) | |
} | |
else{ | |
#print(paste(rest[i],"was NA")) | |
} | |
} | |
#print(res) | |
#print(prior2(p)) | |
#res=res+prior2(p) | |
return(res) | |
} | |
#function that finds the maximum of the likelihood for fixed value of kappa. | |
ff=function(x,k=kappa){ | |
return(simple_maximize(function(p){likelihood_of_row(as.numeric(x),p,k)}, 0.0001,0,1)) | |
} | |
#function that maximizes the posterior with the constraint kappa_i=k | |
post_all_ks_equal=function(k){ | |
res=sum(apply(tr,1,ff, k=rep(k, length(a_js)))[2,]) | |
print(paste0("P(p^,",k,")=",res)) | |
return(res) | |
} | |
#function that maximizes the posterior with kappa_i=k[i] | |
post_all_ks_different=function(k){ | |
if(sum(k<=0)>0){ | |
return(Inf) | |
} | |
res=sum(apply(tr,1,ff, k=k)[2,]) | |
pri=sum(log(dgamma(x = k, shape=2, rate=2))) | |
res=res+pri | |
r=paste0(round(k,4),sep=",", collapse="") | |
print(paste0("P(p^,",r,")=",round(res,6), "[",round(pri,4),"]")) | |
return(-res) | |
} | |
#function that maximizes the posterior with kappa_1,kappa_3,kappa_4,kappa_5=k[1] | |
#and kappa_2=k[2] and kappa_6)=k[6] | |
post_some_ks_different=function(k){ | |
if(sum(k<=0)>0){ | |
return(Inf) | |
} | |
k=k[c(1,2,1,1,1,3)] | |
res=sum(apply(tr,1,ff, k=k)[2,]) | |
pri=sum(log(dgamma(x = k, shape=2, rate=2))) | |
res=res+pri | |
r=paste0(round(k,4),sep=",", collapse="") | |
print(paste0("P(p^,",r,")=",round(res,6), "[",round(pri,4),"]")) | |
return(-res) | |
} | |
#loads in data from datamaps.co to make it look extra nice. | |
country_names=read.table("world-data.csv", sep=',', header=T, row.names = 1) | |
extra_country_names=data.frame(name=c("Andorra", | |
"Hong Kong", | |
"Cape Verde", | |
"Liechtenstein", | |
"Maldives", | |
"Malta", | |
"Maurtitius")) | |
rownames(extra_country_names)=c("AND","HKG", "CPV","LIE","MLD","MTA","MAU") | |
#this finds the maximum/mode of the posterior under the constraint kappa_i=kappa | |
optimize(post_all_ks_equal, interval=c(0.1, 10), maximum = T) | |
#this will not converge (in standard nelder-mead time) but would otherwise find the mode mode of the posterior without constraint. | |
all_equal_kappa_start=optim(rep(1.67,length(a_js)), fn=post_all_ks_different) | |
#this will converge, and finds the posterior under the constraint kappa_1=kappa_3=kappa_4=kappa_5 | |
diff_kappa_start=optim(rgamma(3,shape=2, rate=2), fn=post_some_ks_different) | |
#this is posterior mode with the constraint kappa_1=kappa_3=kappa_4=kappa_5 (result of previous command) | |
khatbackup=c(3.3144869, 2.6375309, 3.3144869, 3.3144869, 3.3144869, 0.2140745) | |
#unpacking kappa from the mode found under the constraint kappa_1=kappa_3=kappa_4=kappa_5 | |
khat=diff_kappa_start$par[c(1,2,1,1,1,3)] | |
#finding the ps that maximize | |
bd=apply(tr,1,ff, k=khat) | |
#messy handling of the dataframes to get a rank based matrix tr2. | |
bd=t(bd) | |
tr2=cbind(t(apply(tr,1,function(x){x/x[2]})),bd) | |
tr2=as.data.frame(tr2) | |
tr2$name="" | |
rn=rownames(tr2) | |
for(i in 1:nrow(tr2)){ | |
if(rn[i] %in% rownames(country_names)){ | |
tr2$name[i]=as.character(country_names[rn[i],1]) | |
} | |
else if(rn[i] %in% rownames(extra_country_names)){ | |
tr2$name[i]=as.character(extra_country_names[rn[i],1]) | |
} | |
} | |
#more messy handling | |
tmp_cols=colnames(tr2) | |
tmp_cols[(length(tmp_cols)-2):(length(tmp_cols)-1)]=c("rate","posterior") | |
colnames(tr2) <- tmp_cols | |
res=tr2[order(bd[,1]),] | |
res2=res | |
res2[,3:8]=apply(-res[,3:8],2,rank, ties.method="average") | |
res2$pop=tr$pop[order(bd[,1])] | |
res2$rank=nrow(res2):1 | |
#scaling from relative frequency to interpretable numbers | |
ns=c(123, 604, 992, 8397,209) | |
ps=res2[c("NZL","LTU", "RUS","HUN","EST"), "rate"] | |
Ns=res2[c("NZL","LTU", "RUS","HUN","EST"), "pop"] | |
ps=ps[-4] | |
ns=ns[-4] | |
Ns=Ns[-4] | |
mean(ns/(ps*Ns))*ps*Ns | |
scal=mean(ns/(ps*Ns)) | |
res2$no_eos=scal*res2$pop*res2$rate | |
res2$rel_eos=scal*res2$rate | |
#making file to upload to datamaps.co | |
uploadable_csv=sqrt(res2[,c("rel_eos")]) | |
colnames(uploadable_csv)=c() | |
write.table(uploadable_csv, file="uploadable.csv", sep=",", | |
row.names = rownames(res2), quote=F, col.names = F) | |
write.table(uploadable_csv, file="uploadable2.csv", sep=",", | |
row.names = res2$name, quote=F, col.names = F) | |
#(in hindsight there is probably not a good reason to have two tables made) | |
#making table in html | |
library(xtable) | |
d_to_print=data.frame("Rank"=1:nrow(res2), | |
"Frequency"=rev(res2$rel_eos), | |
"Total"=rev(res2$no_eos), | |
"Proportion"=rev(res2$no_eos)/sum(res2$no_eos)) | |
rownames(d_to_print) <- rev(res2$name) | |
xt=xtable(d_to_print, digits = c(0,0,1,1,4)) | |
print(xt, type = 'html') | |
#making another table in html | |
d_to_print2=tr | |
rownames(d_to_print2) <- tr2$name | |
d_to_print2=d_to_print2[,-1] | |
xt=xtable(d_to_print2[c(1:10, nrow(d_to_print2)),], digits = c(0,2,0,0,0,0,0,0)) | |
print(xt, type = 'html') | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#maximizes function 'func' from starting value 'start_val' and lower and upper limits 'lower' and 'upper' | |
#you can only be frustrated about optim so many times before you make your own bloody function. | |
simple_maximize=function(func, start_val, lower=0, upper=1){ | |
step_size=(upper-lower)/10 | |
mins=0 | |
best_pos=start_val | |
best_val=func(start_val) | |
while(mins<50){ | |
left_pos=max(lower, best_pos-step_size) | |
right_pos=min(upper, best_pos+step_size) | |
#print(c(left_pos,right_pos)) | |
left_val=func(left_pos) | |
right_val=func(right_pos) | |
#print(paste(left_val,best_val,right_val)) | |
stuck_count=0 | |
#print(paste(mins,step_size,left_pos,right_pos,left_val,right_val,best_val,best_pos)) | |
while((left_val>best_val | right_val>best_val) &stuck_count<20){ | |
#print("changed_best_pos") | |
i=which.max(c(left_val,right_val)) | |
best_pos=c(left_pos,right_pos)[i] | |
best_val=max(left_val,right_val) | |
left_pos=max(lower, best_pos-step_size) | |
right_pos=min(upper, best_pos+step_size) | |
#print(c(left_pos,right_pos)) | |
left_val=func(left_pos) | |
right_val=func(right_pos) | |
stuck_count=stuck_count+1 | |
} | |
step_size=step_size/2 | |
mins=mins+1 | |
} | |
return(c(best_pos,best_val)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment