Last active
May 16, 2018 14:45
-
-
Save stijnvanhoey/6c9bef0be4a57f45257131518c120661 to your computer and use it in GitHub Desktop.
Sequence in string to sequence string + separate rows
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyr) | |
library(dplyr) | |
library(purrr) | |
#' Create sequence from colon sepearted sequence in string format | |
#' | |
#' @param input character character vector with : based sequences | |
#' in between comma separated numbers | |
#' | |
#' @return character | |
#' | |
#' @examples | |
#' colon_to_seq("1:3,4,5,6") | |
#' colon_to_seq("5:8,4,5,6") | |
#' colon_to_seq("1,2,3:5,6:10,11") | |
#' Create sequence from colon sepearted sequence in string format | |
#' | |
#' @param input character character vector with : based sequences | |
#' in between comma separated numbers | |
#' | |
#' @return character | |
#' | |
#' @examples | |
#' colon_to_seq("1:3,4,5,6") | |
#' colon_to_seq("5:8,4,5,6") | |
#' colon_to_seq("1,2,3:5,6:10,11") | |
colon_to_seq <- function(input){ | |
seq_locs <- gregexpr("\\d+:\\d+", input) | |
matched <- regmatches(input, seq_locs)[[1]] | |
split_support <- function(text) { | |
split_seq <- strsplit(text, ":")[[1]] | |
paste(as.character(seq(split_seq[1], split_seq[2])), collapse = ",") | |
} | |
regmatches(input, seq_locs, invert = FALSE) <- list(map_chr(matched, split_support)) | |
input | |
} | |
# TESTING THE FUNTIONALITY | |
input_1 <- "1:3,4,5,6" | |
output_1 <- "1,2,3,4,5,6" | |
input_2 <- "5:8,4,5,6" | |
output_2 <- "5,6,7,8,4,5,6" | |
input_3 <- "4,5,6" | |
output_3 <- "4,5,6" | |
input_4 <- "6:10" | |
output_4 <- "6,7,8,9,10" | |
input_5 <- "1,2,3:5,6:10,11" | |
output_5 <- "1,2,3,4,5,6,7,8,9,10,11" | |
assertthat::are_equal(output_1, colon_to_seq(input_1)) | |
assertthat::are_equal(output_2, colon_to_seq(input_2)) | |
assertthat::are_equal(output_3, colon_to_seq(input_3)) | |
assertthat::are_equal(output_4, colon_to_seq(input_4)) | |
# Extending towards a dataframe | |
my_df <- as.data.frame(list("ID" = c(input_1,input_5,input_3), | |
"val" = rep("b", 3)), | |
stringsAsFactors = FALSE) | |
output_df <- as.data.frame(list("ID" = c(output_1,output_5,output_3), | |
"val" = rep("b", 3)), | |
stringsAsFactors = FALSE) | |
my_df %>% | |
rowwise() %>% | |
mutate(ID = colon_to_seq(.data$ID)) %>% | |
ungroup() %>% | |
separate_rows(ID, sep = ",") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment