IdoBar/cn.mops.R

## cn.mops.R
#!/usr/bin/env Rscript
#PBS -V

# This script will call copy number variants using cn.mops
# usage: Rscript cn.mops.R --help

library(optparse)

option_list = list(
  make_option(c("-i", "--input_dir"), type="character", default=NULL,
              help="input folder containing bams files", metavar="character"),
    make_option(c("-o", "--out_dir"), type="character", default="output",
              help="output folder name [default= %default]", metavar="character"),
    make_option(c("-t", "--threads"), type="character", default=as.integer(Sys.getenv("NCPUS")),
              help="number of parallel threads to use [default= %default]", metavar="integer")
)

opt_parser = OptionParser(option_list=option_list)
opt = parse_args(opt_parser)

if (is.null(opt$input_dir)){
  print_help(opt_parser)
  stop("At least one argument must be supplied (input dir).\n", call.=FALSE)
}

library(cn.mops)
library(tidyverse)

if (!dir.exists(opt$out_dir)) dir.create(opt$out_dir)
bams <- list.files(opt$input_dir, ".+.bam$", full.names = TRUE)
bam_names <- sub(".bam", "",  basename(bams))
bamDataRanges <- getReadCountsFromBAM(bams, bam_names, parallel=as.integer(opt$threads)) # extract read counts from BAMs
resHaplo <- haplocn.mops(bamDataRanges, parallel=as.integer(opt$threads)) # identify CN regions
if (length(resHaplo@cnvs)==0) stop("No CNV regions detected, exiting.\n", call.=FALSE) # exit if no regions idetified
results_haplo <- calcIntegerCopyNumbers(resHaplo) # convert to integer CN

# plot outputs for each sample
iwalk(bam_names, .f = ~{
  png(file.path(opt$out_dir, glue::glue("cn_mops_{.x}_haplo.png")), width=1800, height=1200)
		segplot(results_haplo, sampleIdx=.y)
	dev.off()
})

# Haploid Results
results_haplo_df <- as_tibble(cnvr(results_haplo)) %>%
  # set_names(sub("^X", "", names(.))) %>%
  rename(chrom=seqnames) %>%
				   mutate(name="", score=0, across(!1:5, .fns = ~as.integer(sub("CN", "", .x)))) %>%
  select(chrom, start, end,  name, score, strand, width, everything()) %>%
				   arrange(chrom) %>%
  set_names(c("chrom", "start", "end", "name", "score", "strand", "width",bam_names))

# save to file
write_tsv(results_haplo_df, file="output/cn_mops_haplo_df.bed")
	#!/usr/bin/env Rscript
	#PBS -V

	# This script will call copy number variants using cn.mops
	# usage: Rscript cn.mops.R --help

	library(optparse)

	option_list = list(
	make_option(c("-i", "--input_dir"), type="character", default=NULL,
	help="input folder containing bams files", metavar="character"),
	make_option(c("-o", "--out_dir"), type="character", default="output",
	help="output folder name [default= %default]", metavar="character"),
	make_option(c("-t", "--threads"), type="character", default=as.integer(Sys.getenv("NCPUS")),
	help="number of parallel threads to use [default= %default]", metavar="integer")
	)

	opt_parser = OptionParser(option_list=option_list)
	opt = parse_args(opt_parser)

	if (is.null(opt$input_dir)){
	print_help(opt_parser)
	stop("At least one argument must be supplied (input dir).\n", call.=FALSE)
	}

	library(cn.mops)
	library(tidyverse)

	if (!dir.exists(opt$out_dir)) dir.create(opt$out_dir)
	bams <- list.files(opt$input_dir, ".+.bam$", full.names = TRUE)
	bam_names <- sub(".bam", "", basename(bams))
	bamDataRanges <- getReadCountsFromBAM(bams, bam_names, parallel=as.integer(opt$threads)) # extract read counts from BAMs
	resHaplo <- haplocn.mops(bamDataRanges, parallel=as.integer(opt$threads)) # identify CN regions
	if (length(resHaplo@cnvs)==0) stop("No CNV regions detected, exiting.\n", call.=FALSE) # exit if no regions idetified
	results_haplo <- calcIntegerCopyNumbers(resHaplo) # convert to integer CN

	# plot outputs for each sample
	iwalk(bam_names, .f = ~{
	png(file.path(opt$out_dir, glue::glue("cn_mops_{.x}_haplo.png")), width=1800, height=1200)
	segplot(results_haplo, sampleIdx=.y)
	dev.off()
	})

	# Haploid Results
	results_haplo_df <- as_tibble(cnvr(results_haplo)) %>%
	# set_names(sub("^X", "", names(.))) %>%
	rename(chrom=seqnames) %>%
	mutate(name="", score=0, across(!1:5, .fns = ~as.integer(sub("CN", "", .x)))) %>%
	select(chrom, start, end, name, score, strand, width, everything()) %>%
	arrange(chrom) %>%
	set_names(c("chrom", "start", "end", "name", "score", "strand", "width",bam_names))

	# save to file
	write_tsv(results_haplo_df, file="output/cn_mops_haplo_df.bed")