vanatteveldt/capital.R

## capital.R

# get pikkety data on public and private capital accumulation
base = "https://raw.githubusercontent.com/ccs-amsterdam/r-course-material/master/data"
private = read_csv(paste(base, "private_capital.csv", sep = "/"))
public = read_csv(paste(base, "public_capital.csv", sep = "/"))

private
public

#  we'd like to compute the correlation between private and public capital
#  it's easy to do so for one country by joining the whole data sets:

d = inner_join(private, public, by="Year", suffix=c("_private", "_public"))
cor.test(d$U.S._private, d$U.S._public)

#  however, the merged data is quite horrible to calculate the overall (pooled)
#  correlation. We could do it by gathering the countries and separating coutry and type:

d2= d %>% gather(-Year, key = "country_type", value = "capital") %>%
  separate(country_type, into=c("country", "type"), sep="_") %>%
  spread(key = "type", value="capital")
cor.test(d2$private, d2$public)

# The above had to first combine country and type, and then separate it and spread it again
# This is more cumbersome than needed: it's a lot easier if we 'tidy' up the data first:

priv = private %>% gather(-Year, key = "Country", value = "Private")
pub = public %>% gather(-Year, key = "Country", value = "Public")

# now, we can just join and test:
c = full_join(priv, pub)
cor.test(c$Private, c$Public)

	# get pikkety data on public and private capital accumulation
	base = "https://raw.githubusercontent.com/ccs-amsterdam/r-course-material/master/data"
	private = read_csv(paste(base, "private_capital.csv", sep = "/"))
	public = read_csv(paste(base, "public_capital.csv", sep = "/"))

	private
	public

	# we'd like to compute the correlation between private and public capital
	# it's easy to do so for one country by joining the whole data sets:

	d = inner_join(private, public, by="Year", suffix=c("_private", "_public"))
	cor.test(d$U.S._private, d$U.S._public)

	# however, the merged data is quite horrible to calculate the overall (pooled)
	# correlation. We could do it by gathering the countries and separating coutry and type:

	d2= d %>% gather(-Year, key = "country_type", value = "capital") %>%
	separate(country_type, into=c("country", "type"), sep="_") %>%
	spread(key = "type", value="capital")
	cor.test(d2$private, d2$public)

	# The above had to first combine country and type, and then separate it and spread it again
	# This is more cumbersome than needed: it's a lot easier if we 'tidy' up the data first:

	priv = private %>% gather(-Year, key = "Country", value = "Private")
	pub = public %>% gather(-Year, key = "Country", value = "Public")

	# now, we can just join and test:
	c = full_join(priv, pub)
	cor.test(c$Private, c$Public)