Skip to content

Instantly share code, notes, and snippets.

@jeremiaheb
Last active October 9, 2015 05:58
Show Gist options
  • Save jeremiaheb/3450010 to your computer and use it in GitHub Desktop.
Save jeremiaheb/3450010 to your computer and use it in GitHub Desktop.
RVC Quality Control Methods
class newOne:
def__init__(self, new_one)
class dataEntryFile:
def __init__(self, entryFile):
self.entryFile = entryFile
def readFileToList(self):
return [line.strip().split('~') for line in open(self.entryFile).readlines()]
class QualityControl:
def __init__(self, sampleList, speciesList, substrateList, ):
self.sampleList = sampleList
self.speciesList = speciesList
self.substrateList = substrateList
self.msnSamples =[msn[0] for msn in sampleList]
self.msnSpecies = [msn[0] for msn in speciesList]
self.msnSubstrate = [msn[0] for msn in substrateList]
def chkSamSubCnt(self):
missingSubstrateFiles = 0
missingSampleFiles = 0
if len(self.sampleList) == len(self.substrateList):
print " # of samples equals # of substrates"
else:
print "samples and substrates mismatch"
print str(missingSubstrateFiles) + "missing from substrate files"
print str(missingSampleFiles) + "missing from sample files"
for msn in self.msnSamples:
if msn in self.msnSubstrate:
pass
else:
missingSubstrateFiles += 1
print msn + " not found in Substrate list"
for msn in self.msnSubstrate:
if msn in self.msnSamples:
pass
else:
missingSampleFiles += 1
print msn + " not found in Samples list"
print "Check Complete"
def chkSamSubCnt2(self):
missingSubstrateFiles = []
missingSampleFiles = []
for msn in self.msnSamples:
if not msn in self.msnSubstrate:
missingSubstrateFiles.append(msn)
print "(" + str(len(missingSubstrateFiles)) + ")" + " missing from substrate files"
for item in missingSubstrateFiles:
print item + " not found in substrate list"
for msn in self.msnSubstrate:
if not msn in self.msnSamples:
missingSampleFiles.append(msn)
print "(" + str(len(missingSampleFiles)) + ")" + " missing from sample files"
for item in missingSampleFiles:
print item + " not found in sample list"
print "Check Complete"
def chkSamSpe(self):
missingSpeciesFiles = []
missingSampleFiles = []
for msn in self.msnSamples:
if not msn in self.msnSpecies:
missingSpeciesFiles.append(msn)
print "(" + str(len(missingSpeciesFiles)) + ")" + " missing from species files"
for item in missingSpeciesFiles:
print item + " not found in species list"
for msn in self.msnSpecies:
if not msn in self.msnSamples:
missingSampleFiles.append(msn)
print "(" + str(len(set(missingSampleFiles))) + ")" + " missing from sample files"
for item in set(missingSampleFiles):
print item + " not found in sample list"
print "Check Complete"
def duplicates(self):
sampleDict = {}
substrateDict = {}
for msn in self.msnSamples:
if msn in sampleDict:
sampleDict[msn] += 1
else:
sampleDict[msn] = 1
print "List of Duplicate MSN in Samples file"
print [ key for key, value in sampleDict.items() if value > 1]
do you see this now in edit, I changed it
@tdouce
Copy link

tdouce commented Aug 24, 2012

def self.missing_sample_files(self):
    missingSampleFiles = []
    for msn in self.msnSamples:
        if not msn in self.msnSpecies:
            missingSpeciesFiles.append(msn)
    return missingSampleFiles

def chkSamSpe(self):
missingSpeciesFiles = []

    missingSampleFiles = self.missing_sample_files()

    print "(" + str(len(missingSpeciesFiles)) + ")" + " missing from species files"
    for item in missingSpeciesFiles:
        print item + " not found in species list"
    for msn in self.msnSpecies:
        if not msn in self.msnSamples:
            missingSampleFiles.append(msn)
    print "(" + str(len(set(missingSampleFiles))) + ")" + " missing from sample files"
    for item in set(missingSampleFiles):
        print item + " not found in sample list"
    print "Check Complete"

@tdouce
Copy link

tdouce commented Aug 24, 2012

def self.missing_samples(self):
    missingSamples = []
    for msn in self.msnSamples:
        if not msn in self.msnSpecies:
            missingSpecies.append(msn)
    return missingSamples

def self.report_missing(self, missing_sample_list):

   print "(" + str(len(missing_sample_list)) + ")" + " missing from species files"

   for item in set(missing_sample_list):
        print item + " not found in sample list"

def check_sample_species(self):

    missing_species_files = self.missing_samples()

    report_missing(self, missing_species_files ):

    missing_sample_files = self.missing_samples()

    report_missing(self, missing_sample_files ):

    print "Check Complete"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment