Skip to content

Instantly share code, notes, and snippets.

n <- 1000
first_name_list <- as.character(wakefield::name(n))
last_name_list <- as.character(wakefield::name(n))
age <- as.numeric(wakefield::age(n))
employment <- as.character(wakefield::employment(n))
# Add misspellings
first_name_list_misspelled <- sapply(
strsplit(first_name_list, ""),
function(x) {
# Download Data Zip
download.file(
url = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
destfile = "data.tar.gz"
)
# Unzip
untar("data.tar.gz")
# Get Files in Directories
training_images <- readr::read_csv(
"train.csv",
col_types = "cf"
)
testing_images <- readr::read_csv(
"test.csv",
col_types = "cf"
)
# Large Example Dataset (2GB of Memory)
example_data <-
wakefield::r_data_frame(
n = 15000000,
wakefield::name(replace = TRUE),
wakefield::sex_inclusive(),
wakefield::marital(),
wakefield::dob(),
wakefield::education(),
wakefield::employment(),
@MCodrescu
MCodrescu / ComparingRAndPython.qmd
Created February 2, 2023 16:41
Cleaning messy data with pandas and the tidyverse
---
title: "Cleaning messy data with pandas and the tidyverse"
format: html
---
```{python}
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt