dnanto/ft.R

## ft.R
read_ft <- function(file) {
  # read tsv with a maximum of five columns
  suppressWarnings(
    read_tsv(
      file,
      col_names = c("start", "end", "name", "key", "val"),
      col_types = cols(.default = "c")
    )
  ) %>%
    # associate rows with a feature accession and feature identifier number
    mutate(
      acc = ifelse(!is.na(start) & startsWith(start, ">"), start, NA),
      id = cumsum(!is.na(name))
    ) %>%
    fill(acc, name, .direction = "down") %>%
    # remove feature accession rows
    filter(!(!is.na(start) & startsWith(start, ">"))) %>%
    # store partial feature information
    mutate(
      p5 = startsWith(start, "<"),
      p3 = startsWith(end, ">"),
      start = as.integer(str_remove(start, "<")),
      end = as.integer(str_remove(end, ">"))
    ) %>%
    # split table by feature accession
    split(.$acc) %>%
    lapply(function(ele) {
      list(
        # feature name and coordinates
        locus = (
          select(ele, id, name, start, end) %>%
            filter(complete.cases(.))
        ),
        # qualifier key-value pairs
        qual = (
          select(ele, id, key, val) %>%
            filter(complete.cases(.))
        )
      )
    })
}
	read_ft <- function(file) {
	# read tsv with a maximum of five columns
	suppressWarnings(
	read_tsv(
	file,
	col_names = c("start", "end", "name", "key", "val"),
	col_types = cols(.default = "c")
	)
	) %>%
	# associate rows with a feature accession and feature identifier number
	mutate(
	acc = ifelse(!is.na(start) & startsWith(start, ">"), start, NA),
	id = cumsum(!is.na(name))
	) %>%
	fill(acc, name, .direction = "down") %>%
	# remove feature accession rows
	filter(!(!is.na(start) & startsWith(start, ">"))) %>%
	# store partial feature information
	mutate(
	p5 = startsWith(start, "<"),
	p3 = startsWith(end, ">"),
	start = as.integer(str_remove(start, "<")),
	end = as.integer(str_remove(end, ">"))
	) %>%
	# split table by feature accession
	split(.$acc) %>%
	lapply(function(ele) {
	list(
	# feature name and coordinates
	locus = (
	select(ele, id, name, start, end) %>%
	filter(complete.cases(.))
	),
	# qualifier key-value pairs
	qual = (
	select(ele, id, key, val) %>%
	filter(complete.cases(.))
	)
	)
	})
	}