Skip to content

Instantly share code, notes, and snippets.

@johandahlberg
Created September 11, 2012 12:46
Show Gist options
  • Save johandahlberg/3698172 to your computer and use it in GitHub Desktop.
Save johandahlberg/3698172 to your computer and use it in GitHub Desktop.
A Picard class for switching read names according to a translation table in a white space separated file.
package net.sf.picard.sam;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import net.sf.picard.cmdline.CommandLineProgram;
import net.sf.picard.cmdline.Option;
import net.sf.picard.cmdline.StandardOptionDefinitions;
import net.sf.picard.cmdline.Usage;
import net.sf.picard.io.IoUtil;
import net.sf.picard.util.Log;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileHeader.SortOrder;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMFileWriterFactory;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
public class ReplaceSampleNameInReadGroup extends CommandLineProgram{
@Usage(programVersion="1.0")
public String USAGE = "Replaces the read group sample names as defined in the input csv file.";
@Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME, doc="Input file (bam or sam).")
public File INPUT = null;
@Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="Output file (bam or sam).")
public File OUTPUT = null;
@Option(shortName=StandardOptionDefinitions.SORT_ORDER_SHORT_NAME, optional=true,
doc="Optional sort order to output in. If not supplied OUTPUT is in the same order as INPUT.")
public SortOrder SORT_ORDER;
@Option(shortName="TR",doc="Whitespace separated translation file with current names in first column and the new names in the second column.")
public File TRANSLATION_FILE;
private final Log log = Log.getInstance(ReplaceSampleNameInReadGroup.class);
/** Required main method implementation. */
public static void main(final String[] argv) {
new ReplaceSampleNameInReadGroup().instanceMainWithExit(argv);
}
protected int doWork() {
IoUtil.assertFileIsReadable(INPUT);
IoUtil.assertFileIsReadable(TRANSLATION_FILE);
IoUtil.assertFileIsWritable(OUTPUT);
//Initialize the translation file
Map<String, String> translationTable = initializeTranslationTable(TRANSLATION_FILE);
SAMFileReader in = new SAMFileReader(INPUT);
// create the new header and output file
final SAMFileHeader inHeader = in.getFileHeader();
final SAMFileHeader outHeader = inHeader.clone();
List<SAMReadGroupRecord> readGroups = outHeader.getReadGroups();
for (SAMReadGroupRecord samReadGroupRecord : readGroups) {
// Change from the old sample name to the new.
samReadGroupRecord.setSample(translationTable.get(samReadGroupRecord.getSample()));
}
outHeader.setReadGroups(readGroups);
if (SORT_ORDER != null) outHeader.setSortOrder(SORT_ORDER);
final SAMFileWriter outWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader,
outHeader.getSortOrder() == inHeader.getSortOrder(),
OUTPUT);
for (final SAMRecord read : in) {
outWriter.addAlignment(read);
}
// cleanup
in.close();
outWriter.close();
return 0;
}
private Map<String, String> initializeTranslationTable(File transFile) {
Map<String,String> translations = new HashMap<String,String>();
try{
FileInputStream fstream = new FileInputStream(transFile);
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String strLine;
while((strLine = br.readLine()) != null) {
String key = strLine.split("\\s+")[0];
String value = strLine.split("\\s+")[1];
translations.put(key, value);
}
}
catch (Exception e)
{
System.err.println("Error: " + e.getMessage());
}
return translations;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment