Skip to content

Instantly share code, notes, and snippets.

@geocarvalho
Created May 23, 2017 12:33
Show Gist options
  • Save geocarvalho/023f87998165f1320e13f6ff0ddb118a to your computer and use it in GitHub Desktop.
Save geocarvalho/023f87998165f1320e13f6ff0ddb118a to your computer and use it in GitHub Desktop.
Format samplesheet to use in bcl2fastq
from Bio.Seq import Seq
import pandas as pd
import csv
import sys
import os
def reverse_complement(sequence):
"""
Give me the sequence and I give you the reverse-complement of it
"""
seq = Seq(str(sequence))
reverse = str(seq.reverse_complement())
return reverse
def main():
path, arq = os.path.split(os.path.abspath(sys.argv[1]))
sample_sheet = path+"/"+arq
new_arq = path+"/"+arq.split(".")[0]+"-new.csv"
# Organize the DataFrame with the reverse complement
sample_df = pd.read_csv(sample_sheet, skiprows=20)
sample_df["index2"] = sample_df["index2"].apply(reverse_complement)
with open(sample_sheet) as master, open(new_arq, "w") as matched:
cr = csv.reader(master)
cw = csv.writer(matched)
for i in range(20):
cw.writerow(next(cr))
sample_df.set_index("Sample_ID").to_csv(matched)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment