RohanAlexander/miscRBitsAndPieces.R

## miscRBitsAndPieces.R
# Uncount can useful, for instance when making histograms.
test <-
  tibble(
    word = c(
      "Rohan", "Rohan",
      "Monica", "Monica", "Monica", "Monica", "Monica", "Monica",
      "Bayes", "Bayes", "Bayes", "Bayes"
    ),
    date = c(
      1998, 1999,
      1998, 1998, 1998, 1999, 1999, 1999,
      1998, 1998, 1998, 1999
    )
  )
test %>%
  count(date, word) %>%
  filter(n > 2) %>%
  uncount(n)


# case_when can be good for making decades with an option summarise by those decades at the end
data <- data %>%
  mutate(decade = case_when(
    date < 1909-12-31 ~ "1900s",
    date < 1919-12-31 ~ "1910s",
    date < 1929-12-31 ~ "1920s",
    date < 1939-12-31 ~ "1930s",
    date < 1949-12-31 ~ "1940s",
    date < 1959-12-31 ~ "1950s",
    date < 1969-12-31 ~ "1960s",
    date < 1979-12-31 ~ "1970s",
    date < 1989-12-31 ~ "1980s",
    date < 1999-12-31 ~ "1990s",
    date < 2009-12-31 ~ "2000s",
    TRUE ~ "2010s"
  )) %>%
  group_by(decade, topic) %>%
  summarise(ave_gamma = mean(gamma))


# Supply col_types to avoid warning with read_csv/read_csv2:
col_types = cols()


# Read a bunch of files and combine them into single data frame, from @WeAreRLadies:
f <- list.files(
  "my_folder",
  pattern = ".csv",
  full.names = TRUE)
d <- purrr::map_df(f, readr:read_csv, .id = "id")


# How to read Stata data
my_data <- haven::read_dta("stata_data.dta")
	# Uncount can useful, for instance when making histograms.
	test <-
	tibble(
	word = c(
	"Rohan", "Rohan",
	"Monica", "Monica", "Monica", "Monica", "Monica", "Monica",
	"Bayes", "Bayes", "Bayes", "Bayes"
	),
	date = c(
	1998, 1999,
	1998, 1998, 1998, 1999, 1999, 1999,
	1998, 1998, 1998, 1999
	)
	)
	test %>%
	count(date, word) %>%
	filter(n > 2) %>%
	uncount(n)


	# case_when can be good for making decades with an option summarise by those decades at the end
	data <- data %>%
	mutate(decade = case_when(
	date < 1909-12-31 ~ "1900s",
	date < 1919-12-31 ~ "1910s",
	date < 1929-12-31 ~ "1920s",
	date < 1939-12-31 ~ "1930s",
	date < 1949-12-31 ~ "1940s",
	date < 1959-12-31 ~ "1950s",
	date < 1969-12-31 ~ "1960s",
	date < 1979-12-31 ~ "1970s",
	date < 1989-12-31 ~ "1980s",
	date < 1999-12-31 ~ "1990s",
	date < 2009-12-31 ~ "2000s",
	TRUE ~ "2010s"
	)) %>%
	group_by(decade, topic) %>%
	summarise(ave_gamma = mean(gamma))


	# Supply col_types to avoid warning with read_csv/read_csv2:
	col_types = cols()


	# Read a bunch of files and combine them into single data frame, from @WeAreRLadies:
	f <- list.files(
	"my_folder",
	pattern = ".csv",
	full.names = TRUE)
	d <- purrr::map_df(f, readr:read_csv, .id = "id")


	# How to read Stata data
	my_data <- haven::read_dta("stata_data.dta")