Skip to content

Instantly share code, notes, and snippets.

@rjpbonnal
Created November 30, 2017 17:43
Show Gist options
  • Save rjpbonnal/cdc441c1d698620596cef3dbde0fc982 to your computer and use it in GitHub Desktop.
Save rjpbonnal/cdc441c1d698620596cef3dbde0fc982 to your computer and use it in GitHub Desktop.
# http://gdac.broadinstitute.org/runs/sampleReports/latest/READ_Replicate_Samples.html
TCGAanalyze_RemoveReplicateSamples<-function(tabDF, typesample){
tabDF.typesample <- tabDF[, TCGAquery_SampleTypes(colnames(tabDF), typesample=typesample)]
# detect duplicated patient for typesample
tabDF.typesample.duplicated = duplicated(tabDF.typesample$patient)
if (sum(tabDF.typesample.duplicated)>0){
tabDF.typesample.not_duplicated = !(tabDF.typesample$patient %in% tabDF.typesample[,tabDF.typesample.duplicated]$patient)
# print(tabDF.typesample$patient[tabDF.typesample.duplicated])
# print(class(tabDF.typesample$patient[tabDF.typesample.duplicated]))
barcodes_solved_duplication=sapply(tabDF.typesample$patient[tabDF.typesample.duplicated],function(patient, dataset){
barcodes = dataset[,dataset$patient==patient]$barcode
# search for H
H_barcodes = sapply(barcodes, function(barcode){
stringi::stri_sub(stringi::stri_split(str = barcode, regex = "-")[[1]][5], -1) == "H"
})
if (sum(H_barcodes)==0){
R_barcodes = sapply(barcodes, function(barcode){
stringi::stri_sub(stringi::stri_split(str = barcode, regex = "-")[[1]][5], -1) == "R"
})
if (sum(R_barcodes)==0){
warning("Removing duplicates, there is no H or R in the duplicated barcodes.")
} else {
barcodes = barcodes[R_barcodes]
}
} else {
barcodes = barcodes[H_barcodes]
}
barcodes = sort(x=barcodes,decreasing = TRUE)
print(barcodes[1])
return(barcodes[1])
}, tabDF.typesample)
return(tabDF.typesample[,c(tabDF.typesample[,tabDF.typesample.not_duplicated]$barcode, barcodes_solved_duplication)])
} else {
return(tabDF.typesample)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment