Steve Haroz steveharoz

## generate graph analyze.R
library(tidyverse)
library(lmerTest)

# subject count
COUNT = 5

set.seed(8675309)

# generate a unique intercept per subject
data = tibble(

## image.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                steveharoz
                / image.md
            
            
              Last active
              May 18, 2023 11:42
            
              
                perceived correlation of rank
              
          
## extract citation numbers.R
# Extract all citation numbers such as [1] from a PDF's text
# It also includes cases for multiples [1, 3] and ranges [1-5]
# It tries to exclude confidence intervals by skipping
#
# written by Steve Haroz with help from ChatGPT
# MIT license

library(tidyverse)
library(pdftools)

## multiple comparison simulation.R
COUNT = 100000

# How often does a single t-test of random data yield p<0.05?
replicate(COUNT,
  t.test(rnorm(20))$p.value < 0.05
) %>% mean()

#> 0.05073
# 5% false positive rate

## hierarchical pie.R
library(tidyverse)

COUNT = 40

data = tibble(
  car = paste0(sample(LETTERS, COUNT, TRUE), sample(letters, COUNT, TRUE), sample(letters, COUNT, TRUE)),
  value = rnorm(COUNT, 3),
  group = c(rep("Petrol", COUNT/2), rep("Hybrid", COUNT/4), rep("Pure Electric", COUNT/8), rep("Diesel", COUNT/8))
)

## endpoint.R
StatEndpoint <- ggproto("StatEndpoint", Stat,
  compute_group = function(data, scales) {
    # sort by x so indexing is meaningful
    data = arrange(data, x)
    # grab only the first and last row
    data[c(1,nrow(data)),]
  },

  required_aes = c("x", "y")
)

## painbow_data.csv
x,y,value
1,205,0.3125
1,204,0.3125
1,203,0.3125
1,202,0.3125
1,201,0.3125
1,200,0.3125
1,199,0.3125
1,198,0.3125
1,197,0.3125

## readme.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                steveharoz
                / readme.md
            
            
              Last active
              November 5, 2021 09:16
            
              
                XKCD colormap
              
          
    painbow moved to github: https://github.com/steveharoz/painbow


## pie chart - subcategories.R
library(tidyverse)

set.seed(999)

data = tibble(
  name = c("A1", "A2", "A3", "A4", "B1", "B2", "B3", "B4", "C1", "C2"),
  value = rnorm(10, 10, sd = 3),
  color = c(
    hcl(220, seq(60, 30, -10), seq(50, 80, 10)),
    hcl(0, seq(60, 30, -10), seq(50, 80, 10)),

## Texas congressional district simulation.R
library(tidyverse)

# arbitrary number
district_count = 38

# population from stephanie's figure
# https://twitter.com/evergreendata/status/1450862060972216320
population = c(
  rep("White", 40),
  rep("Latino", 39),
	library(tidyverse)
	library(lmerTest)

	# subject count
	COUNT = 5

	set.seed(8675309)

	# generate a unique intercept per subject
	data = tibble(
	# Extract all citation numbers such as [1] from a PDF's text
	# It also includes cases for multiples [1, 3] and ranges [1-5]
	# It tries to exclude confidence intervals by skipping
	#
	# written by Steve Haroz with help from ChatGPT
	# MIT license

	library(tidyverse)
	library(pdftools)
	COUNT = 100000

	# How often does a single t-test of random data yield p<0.05?
	replicate(COUNT,
	t.test(rnorm(20))$p.value < 0.05
	) %>% mean()

	#> 0.05073
	# 5% false positive rate
	library(tidyverse)

	COUNT = 40

	data = tibble(
	car = paste0(sample(LETTERS, COUNT, TRUE), sample(letters, COUNT, TRUE), sample(letters, COUNT, TRUE)),
	value = rnorm(COUNT, 3),
	group = c(rep("Petrol", COUNT/2), rep("Hybrid", COUNT/4), rep("Pure Electric", COUNT/8), rep("Diesel", COUNT/8))
	)
	StatEndpoint <- ggproto("StatEndpoint", Stat,
	compute_group = function(data, scales) {
	# sort by x so indexing is meaningful
	data = arrange(data, x)
	# grab only the first and last row
	data[c(1,nrow(data)),]
	},

	required_aes = c("x", "y")
	)
	x,y,value
	1,205,0.3125
	1,204,0.3125
	1,203,0.3125
	1,202,0.3125
	1,201,0.3125
	1,200,0.3125
	1,199,0.3125
	1,198,0.3125
	1,197,0.3125
	library(tidyverse)

	set.seed(999)

	data = tibble(
	name = c("A1", "A2", "A3", "A4", "B1", "B2", "B3", "B4", "C1", "C2"),
	value = rnorm(10, 10, sd = 3),
	color = c(
	hcl(220, seq(60, 30, -10), seq(50, 80, 10)),
	hcl(0, seq(60, 30, -10), seq(50, 80, 10)),
	library(tidyverse)

	# arbitrary number
	district_count = 38

	# population from stephanie's figure
	# https://twitter.com/evergreendata/status/1450862060972216320
	population = c(
	rep("White", 40),
	rep("Latino", 39),