Skip to content

Instantly share code, notes, and snippets.

@holgerbrandl
Last active October 26, 2018 09:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save holgerbrandl/3d88c11519340ce915f964d0845d4c91 to your computer and use it in GitHub Desktop.
Save holgerbrandl/3d88c11519340ce915f964d0845d4c91 to your computer and use it in GitHub Desktop.
Rewrites a big-wig file while adding a 'chr' prefix to the chromosome names. Requires `kscript`.Usage `add_chr_prefix_to_bw.kts chrom_sizes.fai in.bw out.bw`
#!/usr/bin/env kscript
// kshell_kts.sh /Users/brandl/.kscript/kscript_tmp_project__adjust_bigwig_ids.kts_1540471113973/src/adjust_bigwig_ids.kts
@file:DependsOn("org.jetbrains.bio:big:0.8.3")
@file:DependsOn("com.xenomachina:kotlin-argparser:2.0.7")
import com.xenomachina.argparser.ArgParser
import com.xenomachina.argparser.DefaultHelpFormatter
import com.xenomachina.argparser.mainBody
import org.apache.log4j.ConsoleAppender
import org.apache.log4j.Level
import org.apache.log4j.Logger
import org.apache.log4j.SimpleLayout
import org.jetbrains.bio.big.BedGraphSection
import org.jetbrains.bio.big.BigWigFile
import org.jetbrains.bio.big.FixedStepSection
import org.jetbrains.bio.big.VariableStepSection
import java.io.File
import java.nio.file.Files
import java.nio.file.Paths
// For details https://github.com/JetBrains-Research/big and in particular https://github.com/JetBrains-Research/big/issues/40
class MyArgs(parser: ArgParser) {
val chromSizes by parser.positional("chrom_sizes") { File(this) }
val bwInput by parser.positional("bw_input") { File(this) }
val bwOutput by parser.positional("bw_output") { File(this) }
}
// see https://github.com/xenomachina/kotlin-argparser/issues/65
val parsedArgs = mainBody { ArgParser(args, helpFormatter = DefaultHelpFormatter("")).parseInto(::MyArgs) }
//TODO also support species genome build names (mm10, hg19) instead along chrom_size file arguments
// E.g. /Volumes/plantx/planmine_v3/dd_Smes_v2/mine_export/dd_Smes_v2.pcf.contigs.fasta.fai /Users/brandl/Desktop/dd_Smes_v2.bw /Users/brandl/Desktop/dd_Smes_v2_new.bw
// E.g. download from http://hgdownload.soe.ucsc.edu/goldenPath/hg19/bigZips/hg19.chrom.sizes
val chrSizesPath = Paths.get(parsedArgs.chromSizes.toURI())
val inputPath = Paths.get(parsedArgs.bwInput.toURI())
val outputPath = Paths.get(parsedArgs.bwOutput.toURI())
val LOG = Logger.getLogger("big_wig_id_converter")
// Optional logger
Logger.getRootLogger().apply {
addAppender(ConsoleAppender(SimpleLayout()))
level = Level.INFO
}
LOG.info("Loading chromosome sizes..")
val chromSizes = Files.readAllLines(chrSizesPath)
.asSequence()
.map { line ->
val chunks = line.split("\t", limit = 10)
// require(chunks.size == 2)
chunks[0] to chunks[1].toInt()
}.toList()
val chromosomes = chromSizes.asSequence().map { it.first }
LOG.info("Opening BigFile...")
BigWigFile.read(inputPath).use { bwInput ->
val compression = bwInput.compression
LOG.info("Loading Wig sections...")
val wigSections = bwInput.chromosomes
.valueCollection()
.flatMap { chr ->
require(chr in chromosomes) {
"Chromosome length isn't specified for chromosome: $chr"
}
bwInput.query(chr)
}
LOG.info(" Done")
LOG.info("Converting Wig sections...")
val updatedWigSections = wigSections.map { section ->
val fixedChrName = "chr${section.chrom}"
val e = when (section) {
is FixedStepSection -> section.copy(chrom = fixedChrName)
is VariableStepSection -> section.copy(chrom = fixedChrName)
is BedGraphSection -> section.copy(chrom = fixedChrName)
else -> error("Unknown section type: ${section.javaClass}")
}
e
}
LOG.info(" Done")
LOG.info("Writing Wig sections...")
BigWigFile.write(updatedWigSections, chromSizes, outputPath, compression = compression)
LOG.info(" Done")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment