Skip to content

Instantly share code, notes, and snippets.

@svendvn
Created March 20, 2018 09:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save svendvn/4dcf664e937ae07857582298ff2bc83c to your computer and use it in GitHub Desktop.
Save svendvn/4dcf664e937ae07857582298ff2bc83c to your computer and use it in GitHub Desktop.
ad=read.csv('parsed_data.txt', header=T)
ad=as.data.frame(apply(ad, c(1,2), function(x) ifelse(is.na(x),0,x)), stringsAsFactors = F)
ad[,3:ncol(ad)]=apply(ad[,3:ncol(ad)], c(1,2), as.numeric)
View(ad)
colnames(ad)[1] <- 'Rank'
barplot(height=ad$Speakers[1:10], names.arg=ad$Language[1:10])
ad$Learners=apply(ad[,c("Advanced","Intermediate","Beginner")],1,sum)
ad$Learners=sapply(ad$Learners, function(x) max(x,1))
normed_d=ad[,c("Advanced","Intermediate","Beginner")]/ad$Learners
apply(normed_d,1,sum)
pca_object=prcomp(normed_d*ad$Learners, scale. = TRUE)
ad$average_learner_level=-(as.matrix(normed_d)%*%pca_object$rotation[,2])
pca_object
pca_object$x[,3]
#ad$average_learner_level=-pca_object$x[,2]/ad$Learners
ad$log_learners=log(ad$Learners)
bd=ad[1:63,]
plot(bd$average_learner_level, bd$log_learners)
flags=read.csv(file = 'language_to_flag.txt', header=TRUE)
bd$flags=paste0('flags_big/',flags$flag,'.png')[1:63]
library(ggplot2)
library(ggimage)
parse_language=function(s){
sl=strsplit(s,' ')[[1]]
res=''
for(v in sl){
if(!substr(v,1,1)=='('){
res=paste(res,v)
}
}
return(substr(res,2,1000))
}
adjust=function(df, language, amount=c(0,0), axis=c('level','learner')){
for(i in 1:length(amount)){
ax=axis[i]
if(ax=='level'){
df[language, 'average_learner_level']=df[language, 'average_learner_level']+amount[i]
}
if(ax=='learner'){
df[language, 'log_learners']=df[language, 'log_learners']+amount[i]
}
}
return(df)
}
languages=sapply(bd$Language,parse_language)
languages[58]='Norwegian Bokmal'
df=bd[,c("average_learner_level","log_learners",'flags')]
rownames(df) <- languages
df=adjust(df, 'French', amount=c(-0.02,0.01))
df=adjust(df, 'Spanish', amount=c(0.015,0))
df=adjust(df, 'Ukrainian', amount=c(0,-0.06))
df=adjust(df, 'Greek', amount=c(-0.01,-0.01))
df=adjust(df, 'Turkish', amount=c(0.02,0))
df=adjust(df, 'Indonesian', amount=c(-0.02,0))
df=adjust(df, 'Ido', amount=c(0,0.08))
df=adjust(df, 'American Sign Language', amount=c(0.015,0))
df=adjust(df, 'Romanian', amount=c(0,-0.05))
df=adjust(df, 'Vietnamese', amount=c(0.02,0))
df=adjust(df, 'Norwegian Bokmal', amount=c(0,-0.18))
df=adjust(df, 'Klingon', amount=c(0,0.08))
df=adjust(df, 'Galician', amount=c(-0.02,0))
df=adjust(df, 'Lithuanian', amount=c(0,-0.08))
df=adjust(df, 'Cantonese Chinese', amount=c(-0.005,-0.08))
df=adjust(df, 'Basque', amount=c(-0.02,0.05))
df=adjust(df, 'Afrikaans', amount=c(0.015,-0.22))
library(png)
library(grid)
imgs=list()
for(i in 1:63){
print(i)
png_mat=readPNG(paste0(getwd(),'/',bd$flags[i]))
d=dim(png_mat)
frame=array(rep(0,(d[1]+4)*(d[2]+4)*4), dim=c(d[1]+4,d[2]+4,4))
frame[,,4]=1
if(length(d)>2){
frame[3:(d[1]+2),3:(d[2]+2),1:3]=png_mat[,,1:3]
if(d[3]==4){
should_be_white=which(png_mat[,,4]==0, arr.ind=TRUE)
if(nrow(should_be_white)>0){
for(j in 1:nrow(should_be_white)){
m=should_be_white[j,1]
n=should_be_white[j,2]
frame[m+2,n+2,1:3]=1
}
}
}
}
else{
frame[3:(d[1]+2),3:(d[2]+2),1]=png_mat
frame[3:(d[1]+2),3:(d[2]+2),2]=png_mat
frame[3:(d[1]+2),3:(d[2]+2),3]=png_mat
}
imgs[[i]] <- frame
}
range(df$average_learner_level)
pca_object$rotation[,2]
Alta=-pca_object$rotation[1,2]
Meza=-pca_object$rotation[2,2]
Baza=-pca_object$rotation[3,2]
solve(Baza-Meza, -Meza+min(df$average_learner_level))
solve(Baza-Alta, -Alta+max(df$average_learner_level))
ggplot(df, aes(average_learner_level, log_learners)) +
mapply(function(xx, yy, id){
g <- rasterGrob(imgs[[id]], interpolate=FALSE)
g$name=id
return(annotation_custom(g, xmin=xx-0.022, xmax=xx+0.022, ymin=yy-0.11, ymax=yy+0.11))},
df$average_learner_level,
df$log_learners,
1:63)+geom_blank()+theme_light()+theme(axis.text=element_text(size=16), plot.title = element_text(hjust = 0.5,size=20, face='bold'), axis.title= element_text(size=16))+
scale_x_continuous('Average Self-Assessed Ability', breaks=range(df$average_learner_level),minor_breaks=NULL, labels=c('-0.61','0.27'))+
scale_y_continuous('Number of Learners',breaks=c(log(10)*1,log(10)*2,log(10)*3, log(10)*4), labels=c('10','100','1000','10000'), limits=c(log(10),log(10)*4))+
ggtitle('Users of Amikumu')
ggsave(width = 10, height = 10, filename='main_plot.png')
ggplot(df, aes(average_learner_level, log_learners)) +
mapply(function(xx, yy, id){
g <- rasterGrob(imgs[[id]], interpolate=FALSE)
g$name=id
return(annotation_custom(g, xmin=xx-0.022, xmax=xx+0.022, ymin=yy-0.11, ymax=yy+0.11))},
df$average_learner_level,
df$log_learners,
1:63)+geom_blank()+theme_light()+theme(axis.text=element_text(size=16), plot.title = element_text(hjust = 0.5,size=20, face='bold'), axis.title= element_text(size=16))+
scale_x_continuous('Averaga Memtaksita Lingva Nivelo', breaks=range(df$average_learner_level),minor_breaks=NULL, labels=c('-0.61','0.27'))+
scale_y_continuous('Nombro de Lernantoj',breaks=c(log(10)*1,log(10)*2,log(10)*3, log(10)*4), labels=c('10','100','1000','10000'), limits=c(log(10),log(10)*4))+ggtitle('Uzantoj de Amikumu')
ggsave(width = 10, height = 10, filename='main_plot_eo.png')
df2=data.frame(id=1:63, y=c(32:1, (63-32+1):2), x=c(rep(1,32),rep(1.4,63-32)),language=languages, languages_eo=flags$Language_eo )
ggplot(df2, aes(x, y, label=language)) +
mapply(function(xx, yy, i){
g <- rasterGrob(imgs[[i]], interpolate=FALSE)
g$name=i
return(annotation_custom(g, xmin=xx-1, xmax=xx+1, ymin=yy-0.45, ymax=yy+0.45))},
df2$x,
df2$y,
df2$id)+geom_blank()+theme_void()+geom_text(hjust=0, nudge_x=0.07)+scale_x_continuous(limits=c(1,1.85))
ggsave(height=10,width=4.5, filename = 'legend.png')
ggplot(df2, aes(x, y, label=languages_eo)) +
mapply(function(xx, yy, i){
g <- rasterGrob(imgs[[i]], interpolate=FALSE)
g$name=i
return(annotation_custom(g, xmin=xx-1, xmax=xx+1, ymin=yy-0.45, ymax=yy+0.45))},
df2$x,
df2$y,
df2$id)+geom_blank()+theme_void()+geom_text(hjust=0, nudge_x=0.07)+scale_x_continuous(limits=c(1,1.85))
ggsave(height=10,width=4.5, filename = 'legend_eo.png')
ggplot(df, aes)
range(bd$log_learners/log(10))
ggplot(data=bd[2:5,], aes(x=average_learner_level,y=log_learners))+geom_image(aes(image=flags))
summary(lm(log_learners ~ average_learner_level, data=bd))
bd[,c("Language","average_learner_level")]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment