James Clawson jmclawson

## get_education_data.R
load_data <- function(){
  if (!file.exists("data/scorecard.rds")) {
    "https://ed-public-download.app.cloud.gov/downloads/College_Scorecard_Raw_Data_06102024.zip" |>
      download_once() |>
      utils::unzip(files = c("Most-Recent-Cohorts-Institution.csv", "CollegeScorecardDataDictionary.xlsx"),
                   exdir = "data")

    scorecard <- readr::read_csv("data/Most-Recent-Cohorts-Institution.csv")

    dictionary <- "data/CollegeScorecardDataDictionary.xlsx" |>

## dashboard-r.qmd
---
title: "Development Indicators by Continent"
author: "Gapminder Analytics Group"
format: dashboard
---

# Charts

```{r}
#| label: setup

## linear-models-slides.qmd
---
title: "Understanding and using linear models in R"
subtitle: "A 10-slide guide to the dark side"
format:
  revealjs:
    slide-number: true
    embed-resources: true
    warning: false
    message: false
    theme: dark

## corpus_micusp.R
# helper function get_if_needed for downloading online documents exactly once: https://gist.github.com/jmclawson/65899e2de6bfee692b08141a98422240
source("https://gist.githubusercontent.com/jmclawson/65899e2de6bfee692b08141a98422240/raw/7c5590377332e427691f2331b69abd58be2141ec/get_if_needed.R")

get_micusp_metadata <- function(micusp_dir = "micusp"){
  get_if_needed("https://elicorpora.info/browse?mode=download&start=1&sort=dept&direction=desc",
                filename = "micusp_metadata.csv",
                destdir = micusp_dir)

  readr::read_csv("micusp/micusp_metadata.csv", show_col_types = FALSE) |>
    janitor::clean_names()

## move_state.R
# This process is adapted from https://sesync-ci.github.io/blog/transform-Alaska-Hawaii.html
# Here, it's offered as a function to simplify trial-and-error.

move_state <- function(
    df, # spatial dataframe
    choice, # value in "state" column
    rotation, # eg -39 * pi/180
    right, # amount to move eastward; use negative for left/west
    up# amount to move northward; use negative for down/south
  ){

## collapse_rows.R
collapse_rows <- function(df_g, col, lookleft = TRUE){
  col_num <- grep(deparse(substitute(col)), colnames(df_g$`_data`))

  collapse_style <- css(visibility = "hidden",
                        border_top = "0px")

  test_rows <- function(x) ifelse(is.na(x == lag(x)), FALSE, x == lag(x))

  if(col_num > 1 & lookleft) {
    col_left <- as.name(colnames(df_g$`_data`)[col_num - 1])

## special_imports.rmd
---
title: "Special cases of data importing in R"
---

The typical methods of importing data make it straightforward to import a single CSV file. But data will often be prepared in some other format. Some common scenarios include a folder of smaller CSV files or data prepared for use with SAS.

## Importing many CSV files

It is common to read in multiple CSV files and combine their data frames. Beyond a certain number, the process should be automated.

## sf_example.rmd
---
title: "Mapping with R"
output:
  html_document:
    df_print: paged
    toc: true
    toc_float: true
date: "2023-06-30"
---

## haven_example.rmd
---
title: "Importing SAS data into R"
---

## Get the data

```{r}
# Just download it once
if(!file.exists("medical.sas7bdat")) {
    download.file("http://www.principlesofeconometrics.com/sas/medical.sas7bdat",

## topic_model.R
library(wordcloud)
library(topicmodels)
library(plotly)

# Moves a table of texts through the necessary
# steps of preparation before building a topic
# model. The function applies these steps:
#  1. identifies text divisions by the `doc_id`
#     column
#  2. divides each of the texts into same-sized
	load_data <- function(){
	if (!file.exists("data/scorecard.rds")) {
	"https://ed-public-download.app.cloud.gov/downloads/College_Scorecard_Raw_Data_06102024.zip" \|>
	download_once() \|>
	utils::unzip(files = c("Most-Recent-Cohorts-Institution.csv", "CollegeScorecardDataDictionary.xlsx"),
	exdir = "data")

	scorecard <- readr::read_csv("data/Most-Recent-Cohorts-Institution.csv")

	dictionary <- "data/CollegeScorecardDataDictionary.xlsx" \|>
	---
	title: "Development Indicators by Continent"
	author: "Gapminder Analytics Group"
	format: dashboard
	---

	# Charts

	```{r}
	#\| label: setup
	---
	title: "Understanding and using linear models in R"
	subtitle: "A 10-slide guide to the dark side"
	format:
	revealjs:
	slide-number: true
	embed-resources: true
	warning: false
	message: false
	theme: dark
	# helper function get_if_needed for downloading online documents exactly once: https://gist.github.com/jmclawson/65899e2de6bfee692b08141a98422240
	source("https://gist.githubusercontent.com/jmclawson/65899e2de6bfee692b08141a98422240/raw/7c5590377332e427691f2331b69abd58be2141ec/get_if_needed.R")

	get_micusp_metadata <- function(micusp_dir = "micusp"){
	get_if_needed("https://elicorpora.info/browse?mode=download&start=1&sort=dept&direction=desc",
	filename = "micusp_metadata.csv",
	destdir = micusp_dir)

	readr::read_csv("micusp/micusp_metadata.csv", show_col_types = FALSE) \|>
	janitor::clean_names()
	# This process is adapted from https://sesync-ci.github.io/blog/transform-Alaska-Hawaii.html
	# Here, it's offered as a function to simplify trial-and-error.

	move_state <- function(
	df, # spatial dataframe
	choice, # value in "state" column
	rotation, # eg -39 * pi/180
	right, # amount to move eastward; use negative for left/west
	up# amount to move northward; use negative for down/south
	){
	collapse_rows <- function(df_g, col, lookleft = TRUE){
	col_num <- grep(deparse(substitute(col)), colnames(df_g$`_data`))

	collapse_style <- css(visibility = "hidden",
	border_top = "0px")

	test_rows <- function(x) ifelse(is.na(x == lag(x)), FALSE, x == lag(x))

	if(col_num > 1 & lookleft) {
	col_left <- as.name(colnames(df_g$`_data`)[col_num - 1])
	---
	title: "Special cases of data importing in R"
	---

	The typical methods of importing data make it straightforward to import a single CSV file. But data will often be prepared in some other format. Some common scenarios include a folder of smaller CSV files or data prepared for use with SAS.

	## Importing many CSV files

	It is common to read in multiple CSV files and combine their data frames. Beyond a certain number, the process should be automated.
	---
	title: "Mapping with R"
	output:
	html_document:
	df_print: paged
	toc: true
	toc_float: true
	date: "2023-06-30"
	---
	---
	title: "Importing SAS data into R"
	---

	## Get the data

	```{r}
	# Just download it once
	if(!file.exists("medical.sas7bdat")) {
	download.file("http://www.principlesofeconometrics.com/sas/medical.sas7bdat",
	library(wordcloud)
	library(topicmodels)
	library(plotly)

	# Moves a table of texts through the necessary
	# steps of preparation before building a topic
	# model. The function applies these steps:
	# 1. identifies text divisions by the `doc_id`
	# column
	# 2. divides each of the texts into same-sized