Skip to content

Instantly share code, notes, and snippets.

@stijnvanhoey
Last active May 16, 2018 14:45
Show Gist options
  • Save stijnvanhoey/6c9bef0be4a57f45257131518c120661 to your computer and use it in GitHub Desktop.
Save stijnvanhoey/6c9bef0be4a57f45257131518c120661 to your computer and use it in GitHub Desktop.
Sequence in string to sequence string + separate rows
library(tidyr)
library(dplyr)
library(purrr)
#' Create sequence from colon sepearted sequence in string format
#'
#' @param input character character vector with : based sequences
#' in between comma separated numbers
#'
#' @return character
#'
#' @examples
#' colon_to_seq("1:3,4,5,6")
#' colon_to_seq("5:8,4,5,6")
#' colon_to_seq("1,2,3:5,6:10,11")
#' Create sequence from colon sepearted sequence in string format
#'
#' @param input character character vector with : based sequences
#' in between comma separated numbers
#'
#' @return character
#'
#' @examples
#' colon_to_seq("1:3,4,5,6")
#' colon_to_seq("5:8,4,5,6")
#' colon_to_seq("1,2,3:5,6:10,11")
colon_to_seq <- function(input){
seq_locs <- gregexpr("\\d+:\\d+", input)
matched <- regmatches(input, seq_locs)[[1]]
split_support <- function(text) {
split_seq <- strsplit(text, ":")[[1]]
paste(as.character(seq(split_seq[1], split_seq[2])), collapse = ",")
}
regmatches(input, seq_locs, invert = FALSE) <- list(map_chr(matched, split_support))
input
}
# TESTING THE FUNTIONALITY
input_1 <- "1:3,4,5,6"
output_1 <- "1,2,3,4,5,6"
input_2 <- "5:8,4,5,6"
output_2 <- "5,6,7,8,4,5,6"
input_3 <- "4,5,6"
output_3 <- "4,5,6"
input_4 <- "6:10"
output_4 <- "6,7,8,9,10"
input_5 <- "1,2,3:5,6:10,11"
output_5 <- "1,2,3,4,5,6,7,8,9,10,11"
assertthat::are_equal(output_1, colon_to_seq(input_1))
assertthat::are_equal(output_2, colon_to_seq(input_2))
assertthat::are_equal(output_3, colon_to_seq(input_3))
assertthat::are_equal(output_4, colon_to_seq(input_4))
# Extending towards a dataframe
my_df <- as.data.frame(list("ID" = c(input_1,input_5,input_3),
"val" = rep("b", 3)),
stringsAsFactors = FALSE)
output_df <- as.data.frame(list("ID" = c(output_1,output_5,output_3),
"val" = rep("b", 3)),
stringsAsFactors = FALSE)
my_df %>%
rowwise() %>%
mutate(ID = colon_to_seq(.data$ID)) %>%
ungroup() %>%
separate_rows(ID, sep = ",")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment