cavedave/percentiles.r

## readme.txt
Inspired by this tweet
https://twitter.com/KBAndersen/status/1198653456581562368

data at https://federalreserve.gov/releases/z1/dataviz/dfa/distribute/table/#quarter:119;series:Net%20worth;demographic:generation;population:all;units:shares

US Birthrates per year https://www.infoplease.com/us/births/live-births-and-birth-rates-year
https://www.businessinsider.com/having-babies-is-back-2014-8?r=US&IR=T
Were about 4 million for baby boomers and millenials and 3.5 million of GenX. 2.5 million silent generation

Back of the envelope

77 million boomers
55 million genx
62 million millenials
Baby Boomer=born 1946-1964, Gen X=born 1965-1980, and Millennial=born 1981-1996

current % of population https://www.statista.com/statistics/296974/us-population-share-by-generation/


## percentiles.r
data from https://www.federalreserve.gov/releases/z1/dataviz/dfa/distribute/table/#range:1989.3,2019.2;quarter:119;series:Net%20worth;demographic:networth;population:all;units:shares

```{r}

wealth<-read_csv("dfa-networth-shares.csv")
head(wealth)
```

```{r}

#1989:Q3
wealth = wealth %>%
  mutate(time = gsub(":Q3", ".5", Date), # a with acute
time = gsub(":Q2", ".25", time), # a with acute
    time = gsub(":Q1", "", time), # a with acute
    time = gsub(":Q4", ".75", time) # a with acute
  )

head(wealth)


```


```{r}
wealth$time <- as.numeric(as.character(wealth$time))
head(wealth)
```

```{r}
library(ggplot2)
```

```{r}
names(wealth)[names(wealth) == "Category"] <- "Percentile"
```


```{r}
# stacked area
p<-ggplot(data=wealth, aes(x=time, y=`Net worth`, fill=Percentile)) +#
  geom_area(position="fill", aes(x=time, y=`Net worth`, fill=fill.order))+#
 scale_y_continuous(sec.axis = dup_axis())
```

```{r}
p
```

```{r}
head(wealth)
```

```{r}

#1989:Q3
wealth = wealth %>%
    mutate(Percentile = gsub("Top1", "Top 1%", Percentile), # a with acute
  )

```


```{r}
wealth = wealth %>%
    mutate(Percentile = gsub("Next9", "Next 9%", Percentile), # a with acute
  )
```

```{r}
wealth = wealth %>%
    mutate(Percentile = gsub("Next40", "Next 40%", Percentile), # a with acute
  )
```


```{r}
wealth = wealth %>%
    mutate(Percentile = gsub("Bottom50", "Bottom 50%", Percentile), # a with acute
  )
```


```{r}
fill.order <- factor(wealth$Percentile, levels = c('Top 1%', 'Next 9%', 'Next 40%', 'Bottom 50%'))

```

```{r}
fill <- c("#A891C7","#5EB7E3","#ECC045","#DB5355" )
p2 <- p + scale_fill_manual(values=fill)
p2
```

```{r}
p2<-p2 + theme(panel.background = element_rect(fill = "white", colour = "grey50"),plot.title = element_text(hjust = 0.5))
#p2<-p2+ labs(title = "Fraction of all US Wealth Owned by Each Generation")+xlab("Year") + ylab("Wealth")
p2<-p2+ ggtitle("US Wealth Owned by Income Percentile")+xlab("Year") + ylab("Share of Wealth")
p2
```

```{r}
p2<-p2 + annotate("text", x = 2015, y = -.02, label = "Data: federalreserve.gov", size =3)
p2<-p2 + annotate("text", x = 1992, y = -.02, label = "@iamreddave", size =2.9)
p2
```

```{r}
ggsave("wealthPer.png",height=7, width=12)#
```

## wealth.r
#libraries
library(lubridate)
library(ggplot2)

#load the data

wealth<-read_csv("dfa-generation-shares.csv")
head(wealth)
#Date Category Net worth Assets Real estate Consumer durables
#1989:Q3	Silent	78.7	73.7	67.1	56.9

#2002:Q4	Millennial	net worth went negative -0.1
#which messes up the graph so ill se it to 0
wealth$`Net worth`[wealth$`Net worth` < 0] <- 0

#what is called Silent in the data inlcudes Earlier
wealth = wealth %>%
    mutate(Category = gsub("Silent", "Silent+Earlier", Category),
  )

#turn date in 1989:Q3 format into a number
wealth = wealth %>%
  mutate(time = gsub(":Q3", ".5", Date), # a with acute
time = gsub(":Q2", ".25", time), # a with acute
    time = gsub(":Q1", "", time), # a with acute
    time = gsub(":Q4", ".75", time) # a with acute
  )
#and cast to a number
wealth$time <- as.numeric(as.character(wealth$time))

#Change the name for the legend
names(wealth)[names(wealth) == "Category"] <- "Generation"

#Change the order of the generations not to be alphabetical
fill.order <- factor(wealth$Generation, levels = c('Silent+Earlier', 'BabyBoom', 'GenX', 'Millennial'))

#make a picture
#geom_area is the stacked bar chart
p<-ggplot(data=wealth, aes(x=time, y=`Net worth`, fill=Generation)) +
  geom_area(position="fill", aes(x=time, y=`Net worth`, fill=fill.order))


#Generation colours I got from  https://www.pinterest.ie/pin/312507661629055455/?lp=true
fill <- c("#A891C7","#5EB7E3","#ECC045","#DB5355" )

p2<-p2 + theme(panel.background = element_rect(fill = "white", colour = "grey50"),plot.title = element_text(hjust = 0.5))
p2<-p2+ labs(title = "Fraction of all US Wealth Owned by Each Generation")+xlab("Year") + ylab("Wealth")


p2 <- p + scale_fill_manual(values=fill)
#annotate with points when average age of the generation is 35 to allow more direct comparisons.
p2<-p2 +
  annotate(geom = "point", x = 1990, y = .10, colour = "black", size = 2)

p2<-p2 +
  annotate(geom = "point", x = 2008, y = .045, colour = "black", size = 2)

p2<-p2 +
  annotate(geom = "point", x = 2023, y = .027, colour = "black", size =2)

p2<-p2 + annotate("text", x = 2023, y = .13, label = "Generation\n Average\nAge 35", size =4)
p2<-p2 + annotate("text", x = 2015, y = -.02, label = "Data: federalreserve.gov", size =3)

#Move the legend into the graph to save space
p2<-p2 + theme(
    legend.position = c(.95, .95),
    legend.justification = c("right", "top"),
    legend.box.just = "right",
    #legend.margin = margin(6, 6, 6, 6)
    )
p2

ggsave("wealth3.png",height=7, width=12)
	Inspired by this tweet
	https://twitter.com/KBAndersen/status/1198653456581562368

	data at https://federalreserve.gov/releases/z1/dataviz/dfa/distribute/table/#quarter:119;series:Net%20worth;demographic:generation;population:all;units:shares

	US Birthrates per year https://www.infoplease.com/us/births/live-births-and-birth-rates-year
	https://www.businessinsider.com/having-babies-is-back-2014-8?r=US&IR=T
	Were about 4 million for baby boomers and millenials and 3.5 million of GenX. 2.5 million silent generation

	Back of the envelope

	77 million boomers
	55 million genx
	62 million millenials
	Baby Boomer=born 1946-1964, Gen X=born 1965-1980, and Millennial=born 1981-1996

	current % of population https://www.statista.com/statistics/296974/us-population-share-by-generation/
	data from https://www.federalreserve.gov/releases/z1/dataviz/dfa/distribute/table/#range:1989.3,2019.2;quarter:119;series:Net%20worth;demographic:networth;population:all;units:shares

	```{r}

	wealth<-read_csv("dfa-networth-shares.csv")
	head(wealth)
	```

	```{r}

	#1989:Q3
	wealth = wealth %>%
	mutate(time = gsub(":Q3", ".5", Date), # a with acute
	time = gsub(":Q2", ".25", time), # a with acute
	time = gsub(":Q1", "", time), # a with acute
	time = gsub(":Q4", ".75", time) # a with acute
	)

	head(wealth)


	```


	```{r}
	wealth$time <- as.numeric(as.character(wealth$time))
	head(wealth)
	```

	```{r}
	library(ggplot2)
	```

	```{r}
	names(wealth)[names(wealth) == "Category"] <- "Percentile"
	```


	```{r}
	# stacked area
	p<-ggplot(data=wealth, aes(x=time, y=`Net worth`, fill=Percentile)) +#
	geom_area(position="fill", aes(x=time, y=`Net worth`, fill=fill.order))+#
	scale_y_continuous(sec.axis = dup_axis())
	```

	```{r}
	p
	```

	```{r}
	head(wealth)
	```

	```{r}

	#1989:Q3
	wealth = wealth %>%
	mutate(Percentile = gsub("Top1", "Top 1%", Percentile), # a with acute
	)

	```


	```{r}
	wealth = wealth %>%
	mutate(Percentile = gsub("Next9", "Next 9%", Percentile), # a with acute
	)
	```

	```{r}
	wealth = wealth %>%
	mutate(Percentile = gsub("Next40", "Next 40%", Percentile), # a with acute
	)
	```


	```{r}
	wealth = wealth %>%
	mutate(Percentile = gsub("Bottom50", "Bottom 50%", Percentile), # a with acute
	)
	```


	```{r}
	fill.order <- factor(wealth$Percentile, levels = c('Top 1%', 'Next 9%', 'Next 40%', 'Bottom 50%'))

	```

	```{r}
	fill <- c("#A891C7","#5EB7E3","#ECC045","#DB5355" )
	p2 <- p + scale_fill_manual(values=fill)
	p2
	```

	```{r}
	p2<-p2 + theme(panel.background = element_rect(fill = "white", colour = "grey50"),plot.title = element_text(hjust = 0.5))
	#p2<-p2+ labs(title = "Fraction of all US Wealth Owned by Each Generation")+xlab("Year") + ylab("Wealth")
	p2<-p2+ ggtitle("US Wealth Owned by Income Percentile")+xlab("Year") + ylab("Share of Wealth")
	p2
	```

	```{r}
	p2<-p2 + annotate("text", x = 2015, y = -.02, label = "Data: federalreserve.gov", size =3)
	p2<-p2 + annotate("text", x = 1992, y = -.02, label = "@iamreddave", size =2.9)
	p2
	```

	```{r}
	ggsave("wealthPer.png",height=7, width=12)#
	```
	#libraries
	library(lubridate)
	library(ggplot2)

	#load the data

	wealth<-read_csv("dfa-generation-shares.csv")
	head(wealth)
	#Date Category Net worth Assets Real estate Consumer durables
	#1989:Q3 Silent 78.7 73.7 67.1 56.9

	#2002:Q4 Millennial net worth went negative -0.1
	#which messes up the graph so ill se it to 0
	wealth$`Net worth`[wealth$`Net worth` < 0] <- 0

	#what is called Silent in the data inlcudes Earlier
	wealth = wealth %>%
	mutate(Category = gsub("Silent", "Silent+Earlier", Category),
	)

	#turn date in 1989:Q3 format into a number
	wealth = wealth %>%
	mutate(time = gsub(":Q3", ".5", Date), # a with acute
	time = gsub(":Q2", ".25", time), # a with acute
	time = gsub(":Q1", "", time), # a with acute
	time = gsub(":Q4", ".75", time) # a with acute
	)
	#and cast to a number
	wealth$time <- as.numeric(as.character(wealth$time))

	#Change the name for the legend
	names(wealth)[names(wealth) == "Category"] <- "Generation"

	#Change the order of the generations not to be alphabetical
	fill.order <- factor(wealth$Generation, levels = c('Silent+Earlier', 'BabyBoom', 'GenX', 'Millennial'))

	#make a picture
	#geom_area is the stacked bar chart
	p<-ggplot(data=wealth, aes(x=time, y=`Net worth`, fill=Generation)) +
	geom_area(position="fill", aes(x=time, y=`Net worth`, fill=fill.order))


	#Generation colours I got from https://www.pinterest.ie/pin/312507661629055455/?lp=true
	fill <- c("#A891C7","#5EB7E3","#ECC045","#DB5355" )

	p2<-p2 + theme(panel.background = element_rect(fill = "white", colour = "grey50"),plot.title = element_text(hjust = 0.5))
	p2<-p2+ labs(title = "Fraction of all US Wealth Owned by Each Generation")+xlab("Year") + ylab("Wealth")


	p2 <- p + scale_fill_manual(values=fill)
	#annotate with points when average age of the generation is 35 to allow more direct comparisons.
	p2<-p2 +
	annotate(geom = "point", x = 1990, y = .10, colour = "black", size = 2)

	p2<-p2 +
	annotate(geom = "point", x = 2008, y = .045, colour = "black", size = 2)

	p2<-p2 +
	annotate(geom = "point", x = 2023, y = .027, colour = "black", size =2)

	p2<-p2 + annotate("text", x = 2023, y = .13, label = "Generation\n Average\nAge 35", size =4)
	p2<-p2 + annotate("text", x = 2015, y = -.02, label = "Data: federalreserve.gov", size =3)

	#Move the legend into the graph to save space
	p2<-p2 + theme(
	legend.position = c(.95, .95),
	legend.justification = c("right", "top"),
	legend.box.just = "right",
	#legend.margin = margin(6, 6, 6, 6)
	)
	p2

	ggsave("wealth3.png",height=7, width=12)