Skip to content

Instantly share code, notes, and snippets.

@MCodrescu
Created May 16, 2023 17:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MCodrescu/45e1adf1ca19863d566a0e0fcead5820 to your computer and use it in GitHub Desktop.
Save MCodrescu/45e1adf1ca19863d566a0e0fcead5820 to your computer and use it in GitHub Desktop.
# Large Example Dataset (2GB of Memory)
example_data <-
wakefield::r_data_frame(
n = 15000000,
wakefield::name(replace = TRUE),
wakefield::sex_inclusive(),
wakefield::marital(),
wakefield::dob(),
wakefield::education(),
wakefield::employment(),
wakefield::car(),
wakefield::political(),
wakefield::sat(),
wakefield::income(),
wakefield::state()
)
# Parquet File (200 MB of Space)
rio::export(
example_data,
"example_data.parquet"
)
rm(example_data)
# Scan File
example_data <- polars::scan_parquet(
"example_data.parquet"
)
# Preview
example_data$
head(5)$
collect()$
to_data_frame() |>
View("Data Preview")
# Get Row Count
example_data$
collect()$
height
# Split the Cars Column into Make and Model
result <- example_data$
with_columns(
polars::pl$col("Car")$
cast(polars::pl$dtypes$Utf8)$
str$split(" ")$
arr$first()$
alias("Make"),
polars::pl$col("Car")$
cast(polars::pl$dtypes$Utf8)$
str$split(" ")$
arr$last()$
alias("Model")
)
# Preview
result$
head(5)$
collect()$
to_data_frame() |>
View("Preview Result")
# Mean Income by State
mean_income_by_state <- example_data$
groupby("State")$
agg(
polars::pl$col("Income")$mean()
)$
collect()$
to_data_frame()
# Median SAT by Political Alignment
median_sat_by_poltics <- example_data$
groupby("Political")$
agg(
polars::pl$col("SAT")$median()
)$
collect()$
to_data_frame()
# Marital Status by Gender
merital_status_by_gender <- example_data$
groupby("Sex", "Marital")$
agg(
polars::pl$first()$len()$alias("Count")
)$
sort("Sex")$
collect()$
to_data_frame()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment