Svend Vendelbo Nielsen svendvn

## mountains2.txt
"V1","V2","V3","V4"
"Everest",8849,688.582,"Indian Ocean"
"Kanchenjunga",8586,670,"Indian Ocean"
"Chomolhari",7090,614,"Indian Ocean"
"Gangkhar Puensum",7570,605,"Indian Ocean"
"Chimborazo",6263.47,175,"Pacific"
"Mishahuanga",4118,91,"Pacific"
"Huascaran",6746,97.7,"Pacific"
"Coropuna",6405,112,"Pacific"
"El Misti",5822,103,"Pacific"

## esperantujodirectory2.txt
name ujo pop uea lernu www pas edu nat
AFG afghanistan 0 27.7 1 48 - 0 0 0
ALB albanien 0 2.8 14 64 0.6 1 5 33
DZA algeriet 1 39.2 6 503 0.15 1 8 0
AND andorra - 0.078 0 167 - 0 8 0
AGO angola 1 21.5 2 42 0.19 0 0 -
ARG argentino 27 41.5 29 1965 0.60 16 61 140
ARM armenien 0 3.0 12 92 - 0 1 27
AUS australio 23 23.1 40 2568 0.83 16 52 130
AUT austria 4 8.6 44 780 0.81 8 23 79

## calculate_LDN_and_MDS.Rmd
---
title: "Linguistic differences"
output: html_notebook
---

Read in all the data. In the working directory, I have put several files from the AJSP database. Any subset of files will do.

```{r}
lf=list.files(pattern=".csv")

## Linguistic Diversity.txt
CODE	Country	Count	Percent	Established	Immigrant	Total	Mean	Median	Index	Coverage
AFG	Afghanistan	42	0.59	41	1	22,964,800	560,117	8,000	0.790	98%
ALB	Albania	12	0.17	8	4	2,801,786	280,179	4,220	0.503	83%
DZA	Algeria	21	0.30	18	3	33,135,600	1,743,979	40,000	0.360	90%
-	American Samoa	7	0.10	2	5	55,910	9,318	25,800	0.210	86%
AND	Andorra	5	0.07	4	1	74,270	14,854	19,650	0.671	100%
AGO	Angola	40	0.56	40	0	23,511,670	602,863	43,900	0.748	98%
-	Anguilla	2	0.03	2	0	12,450	6,225	6,225	0.141	100%
-	Antigua and Barbuda	5	0.07	2	3	135,000	33,750	66,500	0.515	80%
ARG	Argentina	38	0.54	24	14	44,146,270	1,337,766	8,410	0.165	87%

## babynames.R
a=read.table('usa_big.txt', header=F)
remove_and_make_numeric=function(s){
  return(as.numeric(substr(x = s, start = 1, stop = nchar(s)-1)))
}

summarize=function(x){
  y=x/100
  within=sum(y)
  res=0
  for(i in 1:length(y)){

## amikumu_plot.R
ad=read.csv('parsed_data.txt', header=T)
ad=as.data.frame(apply(ad, c(1,2), function(x) ifelse(is.na(x),0,x)), stringsAsFactors = F)
ad[,3:ncol(ad)]=apply(ad[,3:ncol(ad)], c(1,2), as.numeric)
View(ad)
colnames(ad)[1] <- 'Rank'
barplot(height=ad$Speakers[1:10], names.arg=ad$Language[1:10])
ad$Learners=apply(ad[,c("Advanced","Intermediate","Beginner")],1,sum)
ad$Learners=sapply(ad$Learners, function(x) max(x,1))
normed_d=ad[,c("Advanced","Intermediate","Beginner")]/ad$Learners
apply(normed_d,1,sum)

## all_pairwise_distances.csv
language1,language2,Distance
AFAR,AFAR,0
AFAR,AFRIKAANS,0.967213114754098
AFAR,ALBANIAN,0.918269230769231
AFAR,AMHARIC,0.873170731707317
AFAR,AMOY_MINNAN_CHINESE,0.942982456140351
AFAR,AZERBAIJANI_NORTH,0.9375
AFAR,B41_SHIRA,0.919540229885057
AFAR,BAMBARA,0.92948717948718
AFAR,BANGANDU,0.923076923076923
	"V1","V2","V3","V4"
	"Everest",8849,688.582,"Indian Ocean"
	"Kanchenjunga",8586,670,"Indian Ocean"
	"Chomolhari",7090,614,"Indian Ocean"
	"Gangkhar Puensum",7570,605,"Indian Ocean"
	"Chimborazo",6263.47,175,"Pacific"
	"Mishahuanga",4118,91,"Pacific"
	"Huascaran",6746,97.7,"Pacific"
	"Coropuna",6405,112,"Pacific"
	"El Misti",5822,103,"Pacific"
	name ujo pop uea lernu www pas edu nat
	AFG afghanistan 0 27.7 1 48 - 0 0 0
	ALB albanien 0 2.8 14 64 0.6 1 5 33
	DZA algeriet 1 39.2 6 503 0.15 1 8 0
	AND andorra - 0.078 0 167 - 0 8 0
	AGO angola 1 21.5 2 42 0.19 0 0 -
	ARG argentino 27 41.5 29 1965 0.60 16 61 140
	ARM armenien 0 3.0 12 92 - 0 1 27
	AUS australio 23 23.1 40 2568 0.83 16 52 130
	AUT austria 4 8.6 44 780 0.81 8 23 79
	---
	title: "Linguistic differences"
	output: html_notebook
	---

	Read in all the data. In the working directory, I have put several files from the AJSP database. Any subset of files will do.

	```{r}
	lf=list.files(pattern=".csv")
	CODE Country Count Percent Established Immigrant Total Mean Median Index Coverage
	AFG Afghanistan 42 0.59 41 1 22,964,800 560,117 8,000 0.790 98%
	ALB Albania 12 0.17 8 4 2,801,786 280,179 4,220 0.503 83%
	DZA Algeria 21 0.30 18 3 33,135,600 1,743,979 40,000 0.360 90%
	- American Samoa 7 0.10 2 5 55,910 9,318 25,800 0.210 86%
	AND Andorra 5 0.07 4 1 74,270 14,854 19,650 0.671 100%
	AGO Angola 40 0.56 40 0 23,511,670 602,863 43,900 0.748 98%
	- Anguilla 2 0.03 2 0 12,450 6,225 6,225 0.141 100%
	- Antigua and Barbuda 5 0.07 2 3 135,000 33,750 66,500 0.515 80%
	ARG Argentina 38 0.54 24 14 44,146,270 1,337,766 8,410 0.165 87%
	a=read.table('usa_big.txt', header=F)
	remove_and_make_numeric=function(s){
	return(as.numeric(substr(x = s, start = 1, stop = nchar(s)-1)))
	}

	summarize=function(x){
	y=x/100
	within=sum(y)
	res=0
	for(i in 1:length(y)){
	ad=read.csv('parsed_data.txt', header=T)
	ad=as.data.frame(apply(ad, c(1,2), function(x) ifelse(is.na(x),0,x)), stringsAsFactors = F)
	ad[,3:ncol(ad)]=apply(ad[,3:ncol(ad)], c(1,2), as.numeric)
	View(ad)
	colnames(ad)[1] <- 'Rank'
	barplot(height=ad$Speakers[1:10], names.arg=ad$Language[1:10])
	ad$Learners=apply(ad[,c("Advanced","Intermediate","Beginner")],1,sum)
	ad$Learners=sapply(ad$Learners, function(x) max(x,1))
	normed_d=ad[,c("Advanced","Intermediate","Beginner")]/ad$Learners
	apply(normed_d,1,sum)
	language1,language2,Distance
	AFAR,AFAR,0
	AFAR,AFRIKAANS,0.967213114754098
	AFAR,ALBANIAN,0.918269230769231
	AFAR,AMHARIC,0.873170731707317
	AFAR,AMOY_MINNAN_CHINESE,0.942982456140351
	AFAR,AZERBAIJANI_NORTH,0.9375
	AFAR,B41_SHIRA,0.919540229885057
	AFAR,BAMBARA,0.92948717948718
	AFAR,BANGANDU,0.923076923076923