Skip to content

Instantly share code, notes, and snippets.

View Keiku's full-sized avatar
🐢
Slowly but surely.

Keiichi Kuroyanagi Keiku

🐢
Slowly but surely.
View GitHub Profile
@Keiku
Keiku / tidyr_reshape.r
Last active February 22, 2017 02:21
Reshaping with tidyr
library("dplyr")
library("tidyr")
library("data.table")
smp <- data_frame(
ID = rep(1:3, 2),
BMI = rep(c(21, 26), 3),
sbp = rep(c(150, 120), 3),
nendo = rep(2008:2009, 3)
)
@Keiku
Keiku / tidy_quantile.r
Created January 6, 2017 04:59
Calculate percentiles.
library(dplyr)
library(broom)
mtcars %>%
group_by(cyl) %>%
do(tidy(t(quantile(.$mpg, probs = seq(0, 1, 0.25)))))
# Source: local data frame [3 x 6]
# Groups: cyl [3]
#
# cyl X0. X25. X50. X75. X100.
@Keiku
Keiku / freq.r
Created January 11, 2017 05:32
Calculate frequency.
freq <- function(df, ...){
df %<>%
group_by_(...) %>%
summarise(count = n()) %>%
arrange_(.dots = ...) %>%
ungroup() %>%
mutate(
cum_count = cumsum(count),
percent = count / sum(count),
cum_percent = cumsum(percent)
@Keiku
Keiku / serialization_benchmark.r
Created January 12, 2017 08:17
Serialization benchmark.
library(readr)
library(data.table)
library(feather)
object.size(df)
# 1654613472 bytes
system.time(write_csv(df, "df_write_csv.csv"))
# ユーザ システム 経過
# 160.540 29.079 200.667
system.time(fwrite(df, "df_fwrite.csv"))
@Keiku
Keiku / mlr_iris_example.r
Created January 19, 2017 02:55
iris example with mlr.
library(mlr)
set.seed(123, "L'Ecuyer")
iris.task = classif.task = makeClassifTask(id = "iris-example", data = iris, target = "Species")
resamp = makeResampleDesc("CV", iters = 10L)
lrn = makeLearner("classif.rpart")
control.grid = makeTuneControlGrid()
@Keiku
Keiku / calc_elapsed_months.r
Created January 19, 2017 10:57
Calculate elapsed months.
library(dplyr)
library(lubridate)
df <- data_frame(
id = c(1, 1, 1, 2, 2, 2),
ym = c("201512", "201601", "201603", "201512", "201602", "201603")
)
elapsed_months <- function(end, start) {
12 * (year(end) - year(start)) + (month(end) - month(start))
@Keiku
Keiku / misc.r
Last active March 9, 2017 05:00
Misc funcions.
options(scipen = 100, dplyr.width = Inf, dplyr.print_max = Inf)
'%nin%' <- Negate('%in%')
keep_vecs <- function(x, y) x[x %in% y]
drop_vecs <- function(x, y) x[!x %in% y]
keep_vars <- function(.data, x) dplyr::select_(.data, .dots = x)
drop_vars <- function(.data, x) dplyr::select(.data, -one_of(x))
intersect_all <- function(...) Reduce(intersect, list(...))
union_all <- function(...) Reduce(union, list(...))
@Keiku
Keiku / impute.r
Last active January 26, 2017 07:34
impute a included NA valiable.
library(dplyr)
data <- data_frame(var = c(0, NA, 2))
data %>% mutate(var = coalesce(var, 1))
data %>% mutate(var = replace(var, which(is.na(var)), 1))
data %>% mutate(var = if_else(is.na(var), 1, var))
# A tibble: 3 × 1
# var
# <dbl>
# 1 0
@Keiku
Keiku / count_missing_values.r
Created January 26, 2017 10:53
count missing values of all columns in DataFrame.
library(mice)
library(purrr)
map_df(airquality, function(x) sum(is.na(x)))
# A tibble: 1 × 6
# Ozone Solar.R Wind Temp Month Day
# <int> <int> <int> <int> <int> <int>
# 1 37 7 0 0 0 0
@Keiku
Keiku / convert_number_strings_to_numbers.py
Last active January 8, 2023 20:45
Convert number strings with commas in pandas DataFrame to float.
import pandas as pd
import locale
from locale import atof
df = pd.DataFrame([['1,200', '4,200'], ['7,000', '-0.03'], ['5', '0']],
columns=['col1', 'col2'])
# col1 col2
# 0 1,200 4,200
# 1 7,000 -0.03
# 2 5 0