Skip to content

Instantly share code, notes, and snippets.

@geocarvalho
Created May 23, 2017 12:32
Show Gist options
  • Save geocarvalho/e6fcaa8b0194e4e14aca533ba4e76879 to your computer and use it in GitHub Desktop.
Save geocarvalho/e6fcaa8b0194e4e14aca533ba4e76879 to your computer and use it in GitHub Desktop.
Script to organize FASTQs in directories and construct a CSV file input on QIAGEN site taking as argument the bioinfo worklist
import os
import sys
import pandas as pd
#Get the worklist as a parameter
worklist_bioinfo = sys.argv[1]
#Create DataFrame from worklist
header = ['pool', 'analysis_type', 'exame', 'control', 'gender']
worklist_df = pd.read_csv(worklist_bioinfo, header=None, names=header)
worklist_df.set_index(['pool'], inplace=True)
worklist_df['sample name'] = worklist_df.index
worklist_df['read file 1'] = 0
worklist_df['read file 2'] = 0
worklist_df['gender'] = worklist_df['gender'].replace({
'F':'Female', 'M':'Male', '-':'None'})
#Iterate trough folder and find fastqs
for root, dirs, files in os.walk("."):
for file in files:
if file.endswith("fastq.gz"):
file_name = file.split(".")[0]
pool_name = file.split("_")[0]
if "R1" in file_name:
worklist_df.ix[pool_name, 'read file 1'] = file
else:
worklist_df.ix[pool_name, 'read file 2'] = file
worklist_df = worklist_df[(worklist_df['read file 1']!=0) & (worklist_df['read file 2']!=0)].fillna("None")
exames = worklist_df['exame'].unique()
for exame in exames:
df_to_write = worklist_df[worklist_df['exame'] == exame]
#Create folder with exam name
if not os.path.exists(os.path.dirname(
os.path.realpath(__file__)) + "/" + exame):
os.makedirs(exame)
#Move all FASTQ files to exam folder
for index, row in df_to_write.iterrows():
os.rename(row['read file 1'], '%s/%s' % (exame, row['read file 1']))
os.rename(row['read file 2'], '%s/%s' % (exame, row['read file 2']))
#Create txt file inside exam folder
df_to_write = df_to_write[['read file 1', 'read file 2', 'sample name', 'gender']]
df_to_write.to_csv('%s/qiagen-%s.txt' % (exame,exame), index=None, sep='\t')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment