This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ad=read.csv('parsed_data.txt', header=T) | |
ad=as.data.frame(apply(ad, c(1,2), function(x) ifelse(is.na(x),0,x)), stringsAsFactors = F) | |
ad[,3:ncol(ad)]=apply(ad[,3:ncol(ad)], c(1,2), as.numeric) | |
View(ad) | |
colnames(ad)[1] <- 'Rank' | |
barplot(height=ad$Speakers[1:10], names.arg=ad$Language[1:10]) | |
ad$Learners=apply(ad[,c("Advanced","Intermediate","Beginner")],1,sum) | |
ad$Learners=sapply(ad$Learners, function(x) max(x,1)) | |
normed_d=ad[,c("Advanced","Intermediate","Beginner")]/ad$Learners | |
apply(normed_d,1,sum) | |
pca_object=prcomp(normed_d*ad$Learners, scale. = TRUE) | |
ad$average_learner_level=-(as.matrix(normed_d)%*%pca_object$rotation[,2]) | |
pca_object | |
pca_object$x[,3] | |
#ad$average_learner_level=-pca_object$x[,2]/ad$Learners | |
ad$log_learners=log(ad$Learners) | |
bd=ad[1:63,] | |
plot(bd$average_learner_level, bd$log_learners) | |
flags=read.csv(file = 'language_to_flag.txt', header=TRUE) | |
bd$flags=paste0('flags_big/',flags$flag,'.png')[1:63] | |
library(ggplot2) | |
library(ggimage) | |
parse_language=function(s){ | |
sl=strsplit(s,' ')[[1]] | |
res='' | |
for(v in sl){ | |
if(!substr(v,1,1)=='('){ | |
res=paste(res,v) | |
} | |
} | |
return(substr(res,2,1000)) | |
} | |
adjust=function(df, language, amount=c(0,0), axis=c('level','learner')){ | |
for(i in 1:length(amount)){ | |
ax=axis[i] | |
if(ax=='level'){ | |
df[language, 'average_learner_level']=df[language, 'average_learner_level']+amount[i] | |
} | |
if(ax=='learner'){ | |
df[language, 'log_learners']=df[language, 'log_learners']+amount[i] | |
} | |
} | |
return(df) | |
} | |
languages=sapply(bd$Language,parse_language) | |
languages[58]='Norwegian Bokmal' | |
df=bd[,c("average_learner_level","log_learners",'flags')] | |
rownames(df) <- languages | |
df=adjust(df, 'French', amount=c(-0.02,0.01)) | |
df=adjust(df, 'Spanish', amount=c(0.015,0)) | |
df=adjust(df, 'Ukrainian', amount=c(0,-0.06)) | |
df=adjust(df, 'Greek', amount=c(-0.01,-0.01)) | |
df=adjust(df, 'Turkish', amount=c(0.02,0)) | |
df=adjust(df, 'Indonesian', amount=c(-0.02,0)) | |
df=adjust(df, 'Ido', amount=c(0,0.08)) | |
df=adjust(df, 'American Sign Language', amount=c(0.015,0)) | |
df=adjust(df, 'Romanian', amount=c(0,-0.05)) | |
df=adjust(df, 'Vietnamese', amount=c(0.02,0)) | |
df=adjust(df, 'Norwegian Bokmal', amount=c(0,-0.18)) | |
df=adjust(df, 'Klingon', amount=c(0,0.08)) | |
df=adjust(df, 'Galician', amount=c(-0.02,0)) | |
df=adjust(df, 'Lithuanian', amount=c(0,-0.08)) | |
df=adjust(df, 'Cantonese Chinese', amount=c(-0.005,-0.08)) | |
df=adjust(df, 'Basque', amount=c(-0.02,0.05)) | |
df=adjust(df, 'Afrikaans', amount=c(0.015,-0.22)) | |
library(png) | |
library(grid) | |
imgs=list() | |
for(i in 1:63){ | |
print(i) | |
png_mat=readPNG(paste0(getwd(),'/',bd$flags[i])) | |
d=dim(png_mat) | |
frame=array(rep(0,(d[1]+4)*(d[2]+4)*4), dim=c(d[1]+4,d[2]+4,4)) | |
frame[,,4]=1 | |
if(length(d)>2){ | |
frame[3:(d[1]+2),3:(d[2]+2),1:3]=png_mat[,,1:3] | |
if(d[3]==4){ | |
should_be_white=which(png_mat[,,4]==0, arr.ind=TRUE) | |
if(nrow(should_be_white)>0){ | |
for(j in 1:nrow(should_be_white)){ | |
m=should_be_white[j,1] | |
n=should_be_white[j,2] | |
frame[m+2,n+2,1:3]=1 | |
} | |
} | |
} | |
} | |
else{ | |
frame[3:(d[1]+2),3:(d[2]+2),1]=png_mat | |
frame[3:(d[1]+2),3:(d[2]+2),2]=png_mat | |
frame[3:(d[1]+2),3:(d[2]+2),3]=png_mat | |
} | |
imgs[[i]] <- frame | |
} | |
range(df$average_learner_level) | |
pca_object$rotation[,2] | |
Alta=-pca_object$rotation[1,2] | |
Meza=-pca_object$rotation[2,2] | |
Baza=-pca_object$rotation[3,2] | |
solve(Baza-Meza, -Meza+min(df$average_learner_level)) | |
solve(Baza-Alta, -Alta+max(df$average_learner_level)) | |
ggplot(df, aes(average_learner_level, log_learners)) + | |
mapply(function(xx, yy, id){ | |
g <- rasterGrob(imgs[[id]], interpolate=FALSE) | |
g$name=id | |
return(annotation_custom(g, xmin=xx-0.022, xmax=xx+0.022, ymin=yy-0.11, ymax=yy+0.11))}, | |
df$average_learner_level, | |
df$log_learners, | |
1:63)+geom_blank()+theme_light()+theme(axis.text=element_text(size=16), plot.title = element_text(hjust = 0.5,size=20, face='bold'), axis.title= element_text(size=16))+ | |
scale_x_continuous('Average Self-Assessed Ability', breaks=range(df$average_learner_level),minor_breaks=NULL, labels=c('-0.61','0.27'))+ | |
scale_y_continuous('Number of Learners',breaks=c(log(10)*1,log(10)*2,log(10)*3, log(10)*4), labels=c('10','100','1000','10000'), limits=c(log(10),log(10)*4))+ | |
ggtitle('Users of Amikumu') | |
ggsave(width = 10, height = 10, filename='main_plot.png') | |
ggplot(df, aes(average_learner_level, log_learners)) + | |
mapply(function(xx, yy, id){ | |
g <- rasterGrob(imgs[[id]], interpolate=FALSE) | |
g$name=id | |
return(annotation_custom(g, xmin=xx-0.022, xmax=xx+0.022, ymin=yy-0.11, ymax=yy+0.11))}, | |
df$average_learner_level, | |
df$log_learners, | |
1:63)+geom_blank()+theme_light()+theme(axis.text=element_text(size=16), plot.title = element_text(hjust = 0.5,size=20, face='bold'), axis.title= element_text(size=16))+ | |
scale_x_continuous('Averaga Memtaksita Lingva Nivelo', breaks=range(df$average_learner_level),minor_breaks=NULL, labels=c('-0.61','0.27'))+ | |
scale_y_continuous('Nombro de Lernantoj',breaks=c(log(10)*1,log(10)*2,log(10)*3, log(10)*4), labels=c('10','100','1000','10000'), limits=c(log(10),log(10)*4))+ggtitle('Uzantoj de Amikumu') | |
ggsave(width = 10, height = 10, filename='main_plot_eo.png') | |
df2=data.frame(id=1:63, y=c(32:1, (63-32+1):2), x=c(rep(1,32),rep(1.4,63-32)),language=languages, languages_eo=flags$Language_eo ) | |
ggplot(df2, aes(x, y, label=language)) + | |
mapply(function(xx, yy, i){ | |
g <- rasterGrob(imgs[[i]], interpolate=FALSE) | |
g$name=i | |
return(annotation_custom(g, xmin=xx-1, xmax=xx+1, ymin=yy-0.45, ymax=yy+0.45))}, | |
df2$x, | |
df2$y, | |
df2$id)+geom_blank()+theme_void()+geom_text(hjust=0, nudge_x=0.07)+scale_x_continuous(limits=c(1,1.85)) | |
ggsave(height=10,width=4.5, filename = 'legend.png') | |
ggplot(df2, aes(x, y, label=languages_eo)) + | |
mapply(function(xx, yy, i){ | |
g <- rasterGrob(imgs[[i]], interpolate=FALSE) | |
g$name=i | |
return(annotation_custom(g, xmin=xx-1, xmax=xx+1, ymin=yy-0.45, ymax=yy+0.45))}, | |
df2$x, | |
df2$y, | |
df2$id)+geom_blank()+theme_void()+geom_text(hjust=0, nudge_x=0.07)+scale_x_continuous(limits=c(1,1.85)) | |
ggsave(height=10,width=4.5, filename = 'legend_eo.png') | |
ggplot(df, aes) | |
range(bd$log_learners/log(10)) | |
ggplot(data=bd[2:5,], aes(x=average_learner_level,y=log_learners))+geom_image(aes(image=flags)) | |
summary(lm(log_learners ~ average_learner_level, data=bd)) | |
bd[,c("Language","average_learner_level")] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment