Skip to content

Instantly share code, notes, and snippets.

@ipurusho
Last active May 13, 2016 21:14
Show Gist options
  • Save ipurusho/35191ec6962b8fc9203042678a2b5561 to your computer and use it in GitHub Desktop.
Save ipurusho/35191ec6962b8fc9203042678a2b5561 to your computer and use it in GitHub Desktop.
import sys
import os
import re
import glob
#sys.argv[1] = action
#sys.argv[2] = top level folder
#sys.argv[3] = read
def subdirectories(args):
return [name for name in os.listdir(args)
if os.path.isdir(os.path.join(args, name))]
def findKey(input_dict, value): #util for dict
return {k for k, v in input_dict.items() if v == value}
def rchop(thestring, ending): #util for regex
if thestring.endswith(ending):
return thestring[:-len(ending)]
return thestring
def newName(oldName): #util for regex
prefix = '[A-Z]_+'
suffix = '.+(_[bc])'
noPool = re.match(prefix,oldName).group(0)
sampleOnly = re.match(suffix,oldName[len(noPool):])
return rchop(sampleOnly.group(0),sampleOnly.group(1))
def sampleDict(path): #files in dir are made into dictionary, value is R1 or R2
direc = path # Get current working directory
ext = '.gz' # Select your file delimiter
exp = "R[1|2]"
regex = re.compile(exp)
samples = {} # Create an empty dict
# Select only files with the ext extension
fastq = [i for i in os.listdir(direc) if os.path.splitext(i)[1] == ext]
for f in fastq:
if regex.search(f) is not None:
read = re.findall(exp, f)
samples.update({f:read[0]})
return samples
def merge(samples): #merges files
oldName = samples.keys()[1]
return "cat " + " ".join(sorted([str(x) for x in findKey(samples,sys.argv[3])])) + " > " + newName(oldName) + "_" + sys.argv[3]+"_"+sys.argv[1]+".fastq.gz"
def rename(samples):
oldName = samples.keys()[1]
return "mv " + " ".join(sorted([str(x) for x in findKey(samples,sys.argv[3])])) + " " + newName(oldName) + "_" + sys.argv[3]+"_"+sys.argv[1]+".fastq.gz"
def main():
if sys.argv[1] == "merge":
for subdir in subdirectories(sys.argv[2]):
os.system(merge(sampleDict(sys.argv[2]+subdir)))
if sys.argv[1] == "rename":
for subdir in subdirectories(sys.argv[2]):
os.system(rename(sampleDict(sys.argv[2]+subdir)))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment