Skip to content

Instantly share code, notes, and snippets.

@gaiusjaugustus
Created June 13, 2016 17:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gaiusjaugustus/c63a826bc6935b505059f2e2b0a4e437 to your computer and use it in GitHub Desktop.
Save gaiusjaugustus/c63a826bc6935b505059f2e2b0a4e437 to your computer and use it in GitHub Desktop.
# Split data set in (tumor, normal) pair
dsT <- sapply(strsplit(Tumorsamples, split = ".", fixed=TRUE),"[[", 1)
dsN <- sapply(strsplit(Normalsamples, split = ".", fixed=TRUE),"[[", 1)
# Load all samples (in case more samples were downloaded)
cdf <- AffymetrixCdfFile$byChipType(chipType, tags="Full")
print("print(cdf)")
print(cdf)
for (i in 1:length(dsT)){
csR <- AffymetrixCelSet$byName(dataSet, cdf=cdf)
TumorNormalpair <- c(T=dsT[i], N=dsN[i])
print(TumorNormalpair)
csR <- csR[indexOf(csR, TumorNormalpair)]
print("print(csR)")
print(csR)
############
print("\n\nAllele-specific copy-number estimates\nHere we will use an allele-specific version of CRMA v2 to estimate total CNs (TCNs) and allele B fractions (BAFs) from the two CEL files. ASCRMA v2 will take care of signal normalization etc:")
#################
####source('http://callr.org/install#HenrikBengtsson/sfit')
res <- doASCRMAv2(csR, verbose=verbose)
print("head(res)")
head(res)
######################
# Extracting PSCN estimates for the tumor-normal pair
#####################
#Next we need to extract TCNs and BAFs for the tumor and the matched normal:
print("\n\nExtracting PSCN estimates for the tumor-normal pair\nNext we need to extract TCNs and BAFs for the tumor and the matched normal:")
print("\nExtract (total,beta) estimates for the tumor-normal pair")
data <- extractPSCNArray(res$total)
dimnames(data)[[3]] <- names(TumorNormalpair)
print("str(data)")
str(data)
print("\n\nActually, at this stage the TCNs have not been standardized toward a reference. Here we will calculate TCNs as C = 2*T/N where T and N are the total 'CN' signals for the tumor and the matched normal, respectively.")
print("\nTotal CNs for the tumor relative to the matched normal")
CT <- 2 * (data[,"total","T"] / data[,"total","N"])
print("\nAllele B fractions for the tumor")
betaT <- data[,"fracB","T"]
print("\nAllele B fractions for the normal")
betaN <- data[,"fracB","N"]
print("\nWhat is remaining is to get the genomic locations for these data points:")
print("\nGet (chromosome, position) annotation data")
ugp <- getAromaUgpFile(res$total)
chromosome <- ugp[,1,drop=TRUE]
x <- ugp[,2,drop=TRUE]
print("\nWe now have all the PSCN data we need:")
print("\nSetup data structure for Paired PSCBS")
df <- data.frame(chromosome=chromosome, x=x, CT=CT, betaT=betaT, betaN=betaN)
print("head(df)")
head(df)
#########################################
print("\n\nParent-specific copy-number segmentation")
#########################################
print("\nWe next use Paired PSCBS to segment the above PSCN estimates:")
library(PSCBS)
print("\nDrop TCN Outliers")
data <- dropSegmentationOutliers(df)
print("\n Identify large gaps")
gaps <- findLargeGaps(data, minLength = 1e+06)
gaps
knownsegments <- gapsToSegments(gaps)
knownsegments
fit <- segmentByPairedPSCBS(data, knownSegments = knownsegments,
preserveScale = FALSE, seed = 33333, verbose = verbose,
avgDH="median")
print("str(fit)")
str(fit)
print("Prune Segments by HClust \n This should clean things up.")
fitP <-pruneByHClust(fit, h=0.5, verbose = -10, hclustMethod = "complete")
str(fitP)
print("Add additional columns about ROH, LOH, etc")
# ROH = Runs of homozygosity (in the normal)
fitP <- callROH(fitP, verbose = -10)
# AB = Segments in Allelic Balance
fitP <- callAB(fitP, verbose = -10)
# LOH = Segments in LOH
fitP <- callLOH(fitP, verbose = -10)
# NTCN = Segments with Neutral Total Copy Number
fitP <- callNTCN(fitP, verbose = verbose)
#To access the table of identified segments, use the getSegments() method, which returns a data.frame (so it can easily be written to file):
segs <- getSegments(fitP)
pairName <- paste(TumorNormalpair, collapse="vs")
path <- paste0("PSCBSdata/",dataSet,",",chipType)
print(pairName)
####fwrite(segs, paste0("PSCBSdata/",dataSet,",",chipType,"/",pairName,".txt"), quote=FALSE, col.names = TRUE, sep="\t")
pathname <- writeSegments(fitP, name=pairName, simplify=TRUE ,path = path)
print("We can plot the TCN, the decrease-of-heterozygosity (DH), and the minor-major CN estimates as follows:")
pairName <- paste0("TCGA_Blacks_GW6Full_smoothed/",paste(TumorNormalpair, collapse="vs"))
chrTag <- sprintf("Chr%s", seqToHumanReadable(getChromosomes(fitP)))
toPNG(pairName, tags=c(chrTag, "PairedPSCBS"), width=4000, aspectRatio=0.6, {
plotTracks(fitP)
})
# Figure. Paired PSCBS segmentation results for the GSM318736 & GSM318737 tumor-normal pair.
}
#####################
######################################
# Error after callNTCN
> fitP <- callNTCN(fitP, verbose = verbose)
[2016-06-11 09:54:29] Exception: Less that two modes were found in the empirical density of C1 after removing 7 modes that are too weak (density < 0.2): 1
at #30. estimateKappaByC1Density.PairedPSCBS(this, ...)
- estimateKappaByC1Density.PairedPSCBS() is in environment 'PSCBS'
at #29. estimateKappaByC1Density(this, ...)
- estimateKappaByC1Density() is in environment 'PSCBS'
at #28. estimateKappa.PairedPSCBS(fit)
- estimateKappa.PairedPSCBS() is in environment 'PSCBS'
at #27. estimateKappa(fit)
- estimateKappa() is in environment 'PSCBS'
at #26. getVector.Arguments(static, x, ..., .name = .name)
- getVector.Arguments() is in environment 'R.utils'
at #25. getVector(static, x, ..., .name = .name)
- getVector() is in environment 'R.utils'
at #24. getNumerics.Arguments(static, ..., asMode = "double", disallow = disallow)
- getNumerics.Arguments() is in environment 'R.utils'
at #23. getNumerics(static, ..., asMode = "double", disallow = disallow)
- getNumerics() is in environment 'R.utils'
at #22. getDoubles.Arguments(static, ..., length = length)
- getDoubles.Arguments() is in environment 'R.utils'
at #21. getDoubles(static, ..., length = length)
- getDoubles() is in environment 'R.utils'
at #20. getDouble.Arguments(static, ...)
- getDouble.Arguments() is in environment 'R.utils'
at #19. getDouble(static, ...)
- getDouble() is in environment 'R.utils'
- originating from '<text>'
at #18. Arguments$getDouble(kappa, range = c(0, 1), disallow = disallow)
- Arguments$getDouble() is local of the calling function
at #17. estimateDeltaCN.PairedPSCBS(fit)
- estimateDeltaCN.PairedPSCBS() is in environment 'PSCBS'
at #16. estimateDeltaCN(fit)
- estimateDeltaCN() is in environment 'PSCBS'
at #15. getVector.Arguments(static, x, ..., .name = .name)
- getVector.Arguments() is in environment 'R.utils'
at #14. getVector(static, x, ..., .name = .name)
- getVector() is in environment 'R.utils'
at #13. getNumerics.Arguments(static, ..., asMode = "double", disallow = disallow)
- getNumerics.Arguments() is in environment 'R.utils'
at #12. getNumerics(static, ..., asMode = "double", disallow = disallow)
- getNumerics() is in environment 'R.utils'
at #11. getDoubles.Arguments(static, ..., length = length)
- getDoubles.Arguments() is in environment 'R.utils'
at #10. getDoubles(static, ..., length = length)
- getDoubles() is in environment 'R.utils'
at #09. getDouble.Arguments(static, ...)
- getDouble.Arguments() is in environment 'R.utils'
at #08. getDouble(static, ...)
- getDouble() is in environment 'R.utils'
- originating from '<text>'
at #07. Arguments$getDouble(delta, range = c(0, Inf), disallow = disallow)
- Arguments$getDouble() is local of the calling function
at #06. callCopyNeutralByTCNofAB.PairedPSCBS(fit, ..., force = force)
- callCopyNeutralByTCNofAB.PairedPSCBS() is in environment 'PSCBS'
at #05. callCopyNeutralByTCNofAB(fit, ..., force = force)
- callCopyNeutralByTCNofAB() is in environment 'PSCBS'
at #04. callCopyNeutral.PairedPSCBS(...)
- callCopyNeutral.PairedPSCBS() is in environment 'PSCBS'
at #03. callCopyNeutral(...)
- callCopyNeutral() is in environment 'PSCBS'
at #02. callNTCN.PairedPSCBS(fitP, verbose = verbose)
- callNTCN.PairedPSCBS() is in environment 'PSCBS'
at #01. callNTCN(fitP, verbose = verbose)
- callNTCN() is in environment 'PSCBS'
Show Traceback
Rerun with Debug
Error: Less that two modes were found in the empirical density of C1 after removing 7 modes that are too weak (density < 0.2): 1
##############
##########################
# Traceback of error
Error: Less that two modes were found in the empirical density of C1 after removing 7 modes that are too weak (density < 0.2): 1
35 stop(cond)
34 throw.Exception(Exception(...))
33 throw(Exception(...))
32 throw.default(sprintf("Less that two modes were found in the empirical density of C1 after removing %d modes that are too weak (density < %g): %d",
nModes - nrow(fit), minDensity, nrow(fit)))
31 throw(sprintf("Less that two modes were found in the empirical density of C1 after removing %d modes that are too weak (density < %g): %d",
nModes - nrow(fit), minDensity, nrow(fit)))
30 estimateKappaByC1Density.PairedPSCBS(this, ...)
29 estimateKappaByC1Density(this, ...)
28 estimateKappa.PairedPSCBS(fit)
27 estimateKappa(fit)
26 getVector.Arguments(static, x, ..., .name = .name)
25 getVector(static, x, ..., .name = .name)
24 getNumerics.Arguments(static, ..., asMode = "double", disallow = disallow)
23 getNumerics(static, ..., asMode = "double", disallow = disallow)
22 getDoubles.Arguments(static, ..., length = length)
21 getDoubles(static, ..., length = length)
20 getDouble.Arguments(static, ...)
19 getDouble(static, ...) at <text>#1
18 Arguments$getDouble(kappa, range = c(0, 1), disallow = disallow)
17 estimateDeltaCN.PairedPSCBS(fit)
16 estimateDeltaCN(fit)
15 getVector.Arguments(static, x, ..., .name = .name)
14 getVector(static, x, ..., .name = .name)
13 getNumerics.Arguments(static, ..., asMode = "double", disallow = disallow)
12 getNumerics(static, ..., asMode = "double", disallow = disallow)
11 getDoubles.Arguments(static, ..., length = length)
10 getDoubles(static, ..., length = length)
9 getDouble.Arguments(static, ...)
8 getDouble(static, ...) at <text>#1
7 Arguments$getDouble(delta, range = c(0, Inf), disallow = disallow)
6 callCopyNeutralByTCNofAB.PairedPSCBS(fit, ..., force = force)
5 callCopyNeutralByTCNofAB(fit, ..., force = force)
4 callCopyNeutral.PairedPSCBS(...)
3 callCopyNeutral(...)
2 callNTCN.PairedPSCBS(fitP, verbose = verbose)
1 callNTCN(fitP, verbose = verbose)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment