Michael Frank mcfrank

## scale.R
library(xkcd)
library(tidyverse)

d <- tibble(age = 1:20) |>
  mutate(upper = 1e6 * age * 12 + ifelse(age > 5, 2.5e5 * 52 * (age - 5), 0),
         lower = 1e5 * age * 12) |>
  pivot_longer(upper:lower, names_to = "bound", values_to = "vocabulary")

pdf("~/Projects/AI commentaries/scale.pdf", width = 5, height = 4)
ggplot(d, aes(x = age, y = vocabulary, col = bound)) +

## gamlss_demo.R
library(wordbankr)
library(langcog)
library(tidyverse)
library(brms)
library(forcats)
library(survey)
library(gamlss)
theme_set(theme_mikabr())
font <- theme_mikabr()$text$family

## habituators.R
# relies on zoo package

# function to find the baseline value
baseline_looking <- function (lts) {
  # select lts > 12s
  lts_12 <- lts[lts > 12 & !is.na(lts)]

  # sum the first three
  baseline <- sum(lts_12[1:3], na.rm=TRUE)


## kl_sim.R
library(tidyverse)
library(assertthat)

shuffle_data <- function(d) {
  d$participant_id <- shuffle(d$participant_id)

  return(d)
}

get_knower_level <- function (q, r, technique = "perfect") {

## pubmed.py
## this little script shows off the use of the pubmed API through bioconductor
## requires installing Biopython (using pip)
## also requires installing the DTD files for each of the Entrez API calls,
## but the instructions for this are given when you run the script

## useful list of Entrez databases that can be queried through API
# pmc_pubmed	PubMed citations for these articles
# pmc_refs_pubmed	PubMed article citing PMC article
# pmc_pmc_cites	   PMC articles that given PMC article cites
# pmc_pmc_citedby	PMC article citing given PMC article

## bayesian_survival.R
# approach from
# https://www.medrxiv.org/content/10.1101/2022.11.02.22281762v1.full.pdf

library(tidyverse)
library(survival)
library(ggsurvfit)
#devtools::install_github("maxlinde/baymedr")
library(baymedr)
# library(BayesSurvival)

## sample_rmd.Rmd
---
title: "mtcars example markdown"
author: "Mike Frank"
date: "2023-03-21"
output:
  html_document:
    toc: true
    toc_float: true
---

## contrast_demo.R
library(tidyverse)
library(lme4)

sgf <- read_csv("https://raw.githubusercontent.com/langcog/experimentology/main/data/tidyverse/stiller_scales_data.csv") |>
  mutate(age_group = cut(age, 2:5, include.lowest = TRUE),
         condition_f = factor(ifelse(condition == "Label",
                                     "Experimental", "Control")),
         age_centered = age - mean(age))

mod1 <- glmer(correct ~ age * condition + (1|subid) + (1|item),

## MB1_mixedmodel_es.R
# starts at line 716 of paper.Rmd

d_lmer_scale <- d %>%
  filter(trial_type != "train") %>%
  mutate(log_lt = log(looking_time),
         age_mo = scale(age_mo, scale = FALSE),
         trial_num = trial_num - 8.5,
         item = paste0(stimulus_num, trial_type)) %>%
  filter(!is.na(log_lt), !is.infinite(log_lt))

## gam_cdi.R
library(tidyverse)
library(gamlss)

d <- readxl::read_excel("sample_data.xlsx")

# model
max_vocab <- max(d$`Vocabulary production`)

# transformation to 0-1 for beta model
# note that beta data cannot be exactly 0 or 1, it may be necessary to add/subtract .001 for data including 0s and 1s
	library(xkcd)
	library(tidyverse)

	d <- tibble(age = 1:20) \|>
	mutate(upper = 1e6 * age * 12 + ifelse(age > 5, 2.5e5 * 52 * (age - 5), 0),
	lower = 1e5 * age * 12) \|>
	pivot_longer(upper:lower, names_to = "bound", values_to = "vocabulary")

	pdf("~/Projects/AI commentaries/scale.pdf", width = 5, height = 4)
	ggplot(d, aes(x = age, y = vocabulary, col = bound)) +
	library(wordbankr)
	library(langcog)
	library(tidyverse)
	library(brms)
	library(forcats)
	library(survey)
	library(gamlss)
	theme_set(theme_mikabr())
	font <- theme_mikabr()$text$family
	# relies on zoo package

	# function to find the baseline value
	baseline_looking <- function (lts) {
	# select lts > 12s
	lts_12 <- lts[lts > 12 & !is.na(lts)]

	# sum the first three
	baseline <- sum(lts_12[1:3], na.rm=TRUE)
	library(tidyverse)
	library(assertthat)

	shuffle_data <- function(d) {
	d$participant_id <- shuffle(d$participant_id)

	return(d)
	}

	get_knower_level <- function (q, r, technique = "perfect") {
	## this little script shows off the use of the pubmed API through bioconductor
	## requires installing Biopython (using pip)
	## also requires installing the DTD files for each of the Entrez API calls,
	## but the instructions for this are given when you run the script

	## useful list of Entrez databases that can be queried through API
	# pmc_pubmed PubMed citations for these articles
	# pmc_refs_pubmed PubMed article citing PMC article
	# pmc_pmc_cites PMC articles that given PMC article cites
	# pmc_pmc_citedby PMC article citing given PMC article
	# approach from
	# https://www.medrxiv.org/content/10.1101/2022.11.02.22281762v1.full.pdf

	library(tidyverse)
	library(survival)
	library(ggsurvfit)
	#devtools::install_github("maxlinde/baymedr")
	library(baymedr)
	# library(BayesSurvival)
	---
	title: "mtcars example markdown"
	author: "Mike Frank"
	date: "2023-03-21"
	output:
	html_document:
	toc: true
	toc_float: true
	---
	library(tidyverse)
	library(lme4)

	sgf <- read_csv("https://raw.githubusercontent.com/langcog/experimentology/main/data/tidyverse/stiller_scales_data.csv") \|>
	mutate(age_group = cut(age, 2:5, include.lowest = TRUE),
	condition_f = factor(ifelse(condition == "Label",
	"Experimental", "Control")),
	age_centered = age - mean(age))

	mod1 <- glmer(correct ~ age * condition + (1\|subid) + (1\|item),
	# starts at line 716 of paper.Rmd

	d_lmer_scale <- d %>%
	filter(trial_type != "train") %>%
	mutate(log_lt = log(looking_time),
	age_mo = scale(age_mo, scale = FALSE),
	trial_num = trial_num - 8.5,
	item = paste0(stimulus_num, trial_type)) %>%
	filter(!is.na(log_lt), !is.infinite(log_lt))
	library(tidyverse)
	library(gamlss)

	d <- readxl::read_excel("sample_data.xlsx")

	# model
	max_vocab <- max(d$`Vocabulary production`)

	# transformation to 0-1 for beta model
	# note that beta data cannot be exactly 0 or 1, it may be necessary to add/subtract .001 for data including 0s and 1s