Marcus Codrescu MCodrescu

## create_data.R
n <- 1000
first_name_list <- as.character(wakefield::name(n))
last_name_list <- as.character(wakefield::name(n))
age <- as.numeric(wakefield::age(n))
employment <- as.character(wakefield::employment(n))

# Add misspellings
first_name_list_misspelled <- sapply(
    strsplit(first_name_list, ""),
    function(x) {

## sentiment_analysis.R
# Download Data Zip
download.file(
    url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
    destfile = "data.tar.gz"
)

# Unzip
untar("data.tar.gz")

# Get Files in Directories

## mnist_classification.R
training_images <- readr::read_csv(
    "train.csv",
    col_types = "cf"
)

testing_images <- readr::read_csv(
    "test.csv",
    col_types = "cf"
)

## polars-in-the-artic
# Large Example Dataset (2GB of Memory)
example_data <-
    wakefield::r_data_frame(
        n = 15000000,
        wakefield::name(replace = TRUE),
        wakefield::sex_inclusive(),
        wakefield::marital(),
        wakefield::dob(),
        wakefield::education(),
        wakefield::employment(),

## ComparingRAndPython.qmd
---
title: "Cleaning messy data with pandas and the tidyverse"
format: html
---

```{python}
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
	n <- 1000
	first_name_list <- as.character(wakefield::name(n))
	last_name_list <- as.character(wakefield::name(n))
	age <- as.numeric(wakefield::age(n))
	employment <- as.character(wakefield::employment(n))

	# Add misspellings
	first_name_list_misspelled <- sapply(
	strsplit(first_name_list, ""),
	function(x) {
	# Download Data Zip
	download.file(
	url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
	destfile = "data.tar.gz"
	)

	# Unzip
	untar("data.tar.gz")

	# Get Files in Directories
	training_images <- readr::read_csv(
	"train.csv",
	col_types = "cf"
	)

	testing_images <- readr::read_csv(
	"test.csv",
	col_types = "cf"
	)
	# Large Example Dataset (2GB of Memory)
	example_data <-
	wakefield::r_data_frame(
	n = 15000000,
	wakefield::name(replace = TRUE),
	wakefield::sex_inclusive(),
	wakefield::marital(),
	wakefield::dob(),
	wakefield::education(),
	wakefield::employment(),
	---
	title: "Cleaning messy data with pandas and the tidyverse"
	format: html
	---

	```{python}
	import pandas as pd
	import numpy as np
	import seaborn as sns
	import matplotlib.pyplot as plt