Last active
June 1, 2022 09:40
-
-
Save jrosell/8fa719ac8bf04daa281bffdf6c89ef99 to your computer and use it in GitHub Desktop.
Importación de datos tabulares: resolver problemas de importación usando type_convert, preservar datos con write_feather para facilitar su intercambiarlos, obtener datos de internet con download.file y unzip, o obtenerlos de base de datos con DBI.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if(!require(tidyverse)) install.packages("tidyverse") | |
if(!require(feather)) install.packages("feather") | |
if(!require(RSQLite)) install.packages("RSQLite") | |
library(tidyverse) | |
library(hms) | |
library(haven) | |
library(feather) | |
library(readxl) | |
library(DBI) | |
# Análisis sintáctico de vectores | |
str(parse_guess(c("2010-10-10","2021-10-10"))) | |
x <- parse_logical(c("TRUE", "FALSE", "N")) | |
problems(x) | |
str(parse_logical(c("TRUE", "FALSE", "N"), na = c("","N"))) | |
# Análisis sintáctico de números | |
parse_double("3333.99") | |
parse_number("Precio: 3.333,99€", locale = locale(decimal_mark = ",", grouping_mark = ".")) | |
# Análisis sintáctico de cadenas de carácteres | |
x1 <- "El niño come galletas" | |
guess_encoding(charToRaw(x1)) # UTF-8 | |
parse_character(x1) | |
parse_character(x1, locale = locale(encoding = "Latin1")) | |
x2 <- "El ni\xf1o come galletas" | |
guess_encoding(charToRaw(x2)) # ISO-8859-1 | |
parse_character(x2) | |
parse_character(x2, locale = locale(encoding = "Latin1")) | |
# Análisis sintáctico de fechas y horas | |
parse_date("2020-01-13") | |
mon <- c("Jänner", "Februar", "März", "April", "Mai", "Juni", "Juli", "August", "September", "Oktober", "November", "Dezember") | |
monab <- c("Jän", "Feb", "Mrz", "Apr", "Mai", "Jun", "Jul", "Aug", "Sep", "Okt", "Nov", "Dez") | |
day <- c("Sonntag", "Montag", "Dienstag", "Mittwoch", "Donnerstag", "Freitag", "Samstag") | |
dayab <- c("So", "Mo", "Di", "Mi", "Do", "Fr", "Sa") | |
deAT <- date_names(mon, monab, day, dayab) | |
parse_date("13 Jänner 2020", "%d %B %Y", locale = locale(date_names = deAT)) | |
parse_time("19:30") | |
parse_time("7:30 pm") | |
parse_datetime("2020-01-13T19:30:30Z") | |
parse_datetime("2020-01-13T19:30:30Z", "%Y-%m-%dT%H:%M:%SZ") | |
parse_datetime("2020-01-13T20:30:30+01:00") | |
parse_datetime("2020-01-13T20:30:30+01:00", "%Y-%m-%dT%H:%M:%S%z") | |
# Encontrando problemas | |
challenge <- read_csv(readr_example("challenge.csv")) | |
problems(challenge) | |
# Resolviendo prolemas | |
challenge2 <- read_csv(readr_example("challenge.csv"), col_types = cols(.default = col_character())) | |
type_convert(challenge2) | |
# Problema resuelto | |
challenge <- read_csv( | |
readr_example("challenge.csv"), | |
col_types = cols( | |
x = col_double(), | |
y = col_date() # En vez de col_logical() | |
) | |
) | |
stop_for_problems(challenge) | |
# Guardando los datos | |
challenge <- read_csv( | |
readr_example("challenge.csv"), | |
col_types = cols( | |
x = col_double(), | |
y = col_date() # En vez de col_logical() | |
) | |
) | |
write_csv(challenge, "challenge.csv") | |
read_csv("challenge.csv") | |
write_feather(challenge, "challenge.feather") | |
read_feather("challenge.feather") | |
# Leer un fichero .xlsx desde Internet | |
download.file("https://mossos.gencat.cat/.content/home/01_els_mossos_desquadra/indicadors_i_qualitat/Dades_obertes/fitxers/Fets_2020.xls","~/R/data/Fets_2020.xls") | |
delitos <- read_excel("~/R/data/Fets_2020.xls", skip = 3) | |
glimpse(delitos) | |
# Leer microdatos .sav desde .zip | |
codigo <- 3283 | |
zip_file <- paste("~/R/data/cis/MD",codigo,".zip", sep="") | |
sav_file <- paste(codigo,".sav", sep="") | |
url <- paste("http://datos.cis.es/webFtp/redireccionador.jsp?dwld=/Microdatos/MD",codigo,".zip&Email=domain@domain.com&Terminos=1", sep="") | |
download.file(url, zip_file) | |
sav_filepath <- unzip(zipfile=zip_file, files = sav_file, exdir=tempdir()) | |
ceo <- read_sav(sav_filepath) | |
glimpse(ceo) | |
# Trabajar con SQLLite | |
con <- dbConnect(RSQLite::SQLite(), ":memory:") | |
dbWriteTable(con, "mtcars", mtcars) | |
dbListTables(con) | |
dbListFields(con, "mtcars") | |
res <- dbSendQuery(con, "SELECT * FROM mtcars WHERE cyl = 4") | |
dbFetch(res) | |
dbClearResult(res) | |
dbDisconnect(con) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment