Skip to content

Instantly share code, notes, and snippets.

@graebnerc
Created May 11, 2023 13:59
Show Gist options
  • Save graebnerc/e4fc9795d4b57f3de2bacd120e563af8 to your computer and use it in GitHub Desktop.
Save graebnerc/e4fc9795d4b57f3de2bacd120e563af8 to your computer and use it in GitHub Desktop.
#S12 - Recap
This contains the script developed during the recap session.
here::i_am("R/recap.R")
library(dplyr)
library(data.table)
library(here)
library(tidyr)
library(ggplot2)
# How to deal with large files - some hints-----------
gdp_file <- here("data/raw/API_NY.GDP.PCAP.PP.KD_DS2_en_csv_v2_5359165.csv")
gdp_data_raw <- data.table::fread(file = gdp_file)
# Use overview functions:
head(gdp_data, n = 2)
names(gdp_data)
str(gdp_data)
dplyr::glimpse(gdp_data)
# Check the unique values of the columns:
unique(gdp_data$`Indicator Name`)
unique(gdp_data$V67)
# Taking into account all this information suggests to augment the import call:
gdp_data_raw <- data.table::fread(
file = gdp_file,
header = TRUE # To ensure the column names are correct
) %>%
tibble::as_tibble(.) %>% # Facilitates printing
select( # Remove redundant columns
-c("Country Name",
"Indicator Name", "Indicator Code",
"V67")
)
# Then continue working with the data:
gdp_data_tidy <- gdp_data_raw %>%
tidyr::pivot_longer(
cols = -"Country Code",
names_to = "year",
values_to = "GDP_percapita")
# Country codes--------------
library(countrycode)
gdp_data_countrynames <- gdp_data_tidy %>%
dplyr::mutate(
countryname = countrycode::countrycode(
`Country Code`, origin = "iso3c", destination = "country.name")
) %>%
dplyr::mutate(# For manual correction do, e.g.:
countryname = ifelse(`Country Code` == "WLD", "World", countryname)
)
head(gdp_data_countrynames)
# Check potential duplicates!
# Scatter plot---------------
wine_data <- DataScienceExercises::wine2dine
ggplot(
data = wine_data,
mapping = aes(
y = `residual sugar`,
x = alcohol,
color = kind)
) +
geom_point() +
theme_bw()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment