Created
November 27, 2019 10:01
-
-
Save mwalzer/6bd5c25d607762d86485f7e1a1a9d5d7 to your computer and use it in GitHub Desktop.
proteomics LFQ with nextflow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
!/usr/bin/env nextflow | |
/* | |
======================================================================================== | |
Epiphany OpenMS workflow for protein inference | |
======================================================================================== | |
@#### Authors | |
Mathias Walzer <walzer@ebi.ac.uk> | |
---------------------------------------------------------------------------------------- | |
---------------------------------------------------------------------------------------- | |
Pipeline overview: | |
- 1: Download a PRIDE experiment from an FTP URL | |
- 2: Converting the RAW data into mzML files | |
- 3: comet | |
- 4: epifany | |
---------------------------------------------------------------------------------------- | |
*/ | |
params.fasta = "/hps/nobackup2/proteomics/OPRA/Human-ReferenceProteome-Canonical-Isoform-73911-plusdecoy-trypsin.fasta" | |
//params.pxd = "PXD003133" | |
//params.ftp = "ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2015/11/PXD003133" | |
//params.result_folder = "/hps/nobackup2/proteomics/OPRA/PXD003133" | |
def helpMessage() { | |
log.info""" | |
========================================= | |
Usage: | |
The typical command for running the pipeline is as follows: | |
nextflow run main.nf -c config.nf -profile local | |
Mandatory arguments: | |
--ftp The project ftp folder in PRIDE (e.g. ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2015/11/PXD003133) | |
""".stripIndent() | |
} | |
/* | |
* SET UP CONFIGURATION VARIABLES | |
*/ | |
// Show help message | |
params.help = false | |
if (params.help){ | |
helpMessage() | |
exit 0 | |
} | |
/* | |
* Download raw files from FTP | |
*/ | |
process downloadFiles { | |
container '/hps/nobackup2/proteomics/ubuntu-wget.simg' | |
memory { 4.GB * task.attempt } | |
errorStrategy 'retry' | |
output: | |
file '*.raw' into rawFiles | |
script: | |
""" | |
wget -v -r -nd -A "*.raw" --no-host-directories --cut-dirs=1 ${params.ftp} | |
""" | |
} | |
/* | |
* Sanitise filename for each raw file | |
*/ | |
process sanitiseFilename { | |
errorStrategy 'retry' | |
input: | |
file rawFile from rawFiles.flatten() | |
output: | |
file "*.raw" into rawFilesClean | |
script: | |
""" | |
#!/usr/bin/env python | |
import os | |
import re | |
import shutil | |
#https://github.com/django/django/blob/master/django/utils/text.py | |
def get_valid_filename(s): | |
s = str(s).strip().replace(\" \", \"_\") | |
return re.sub(r\"(?u)[^-\\w.]\", \"\", s) | |
ofp = \"${rawFile}\" | |
fn = os.path.basename(ofp) | |
dn = os.path.dirname(ofp) | |
new_name = get_valid_filename(fn) | |
nfp = os.path.join(dn,new_name) | |
if nfp==ofp: | |
#out_dest = open(\"${rawFile.baseName}_.raw\", 'wb') | |
#in_src = open(ofp, 'rb') | |
#shutil.copyfileobj(in_src, out_dest) | |
os.symlink(ofp,\"${rawFile.baseName}_.raw\") | |
else: | |
os.rename(ofp, nfp) | |
""" | |
} | |
/* | |
* Generate the mzML for each raw file | |
*/ | |
process generateMZML { | |
container '/hps/nobackup2/proteomics/biocontainers-thermorawfileparser-1.1.8.simg' | |
memory { 4.GB * task.attempt } | |
errorStrategy 'retry' | |
/*publishDir "${params.result_folder}", mode: 'copy', overwrite: true*/ | |
input: | |
file rawFile from rawFilesClean.flatten() | |
output: | |
file '*.mzML' into spectraFiles | |
script: | |
""" | |
ThermoRawFileParser.sh -i=$rawFile -f=2 -o=./ | |
""" | |
} | |
process PeakPicker{ | |
container '/hps/nobackup2/proteomics/openms_V2.4.0_proteomic_lfq_2.simg' | |
memory { 4.GB * task.attempt } | |
//publishDir "${params.result_folder}", mode: 'copy', overwrite: true | |
// errorStrategy { assert task.errorMessage ==~ 'Centroided data provided but profile spectra expected' ? 'ignore' : 'retry' } //errorReport instead of errorMessage??? | |
//errorStrategy 'ignore' | |
errorStrategy 'retry' | |
input: | |
file mzML_file from spectraFiles.flatten() | |
output: | |
file "*.mzML" into spectraPicked | |
script: | |
""" | |
PeakPickerHiRes -in $mzML_file -out ${mzML_file.baseName}_pp.mzML | |
""" | |
} | |
process CometAdapter{ | |
container '/hps/nobackup2/proteomics/openms_V2.4.0_proteomic_lfq_2.simg' | |
memory { 4.GB * task.attempt } | |
//publishDir "${params.result_folder}", mode: 'copy', overwrite: true | |
errorStrategy 'retry' | |
input: | |
file mzML_file from spectraPicked.flatten() | |
output: | |
file "*.idXML" into cometIDs | |
script: | |
""" | |
CometAdapter -in $mzML_file -database ${params.fasta} -allowed_missed_cleavages 1 -fixed_modifications "Carbamidomethyl (C)" -variable_modifications "Oxidation (M)" -enzyme Trypsin -precursor_mass_tolerance 20.0 -precursor_error_units ppm -out ${mzML_file.baseName}_comet.idXML | |
""" | |
} | |
process IDMerger{ | |
container '/hps/nobackup2/proteomics/openms_V2.4.0_proteomic_lfq_2.simg' | |
memory { 8.GB * task.attempt } | |
//publishDir "${params.result_folder}", mode: 'copy', overwrite: true | |
errorStrategy 'retry' | |
input: | |
file cometIDslist from cometIDs.collect() | |
output: | |
file "*.idXML" into mergedIDs | |
script: | |
""" | |
IDMerger -out ${params.pxd}.idXML -annotate_file_origin -merge_proteins_add_PSMs -in $cometIDslist | |
""" | |
} | |
process PeptideIndexer{ | |
container '/hps/nobackup2/proteomics/openms_V2.4.0_proteomic_lfq_2.simg' | |
memory { 8.GB * task.attempt } | |
//publishDir "${params.result_folder}", mode: 'copy', overwrite: true | |
errorStrategy 'retry' | |
input: | |
file mergedID from mergedIDs | |
output: | |
file "*.idXML" into piIDs | |
script: | |
""" | |
PeptideIndexer -out ${mergedID.baseName}_pi.idXML -in $mergedID -enzyme:name Trypsin -enzyme:specificity full -fasta ${params.fasta} | |
""" | |
} | |
process PSMFeatureExtractor{ | |
container '/hps/nobackup2/proteomics/openms_V2.4.0_proteomic_lfq_2.simg' | |
memory { 8.GB * task.attempt } | |
//publishDir "${params.result_folder}", mode: 'copy', overwrite: true | |
errorStrategy 'retry' | |
input: | |
file piID from piIDs | |
output: | |
file "*.idXML" into psmFs | |
script: | |
""" | |
PSMFeatureExtractor -out ${piID.baseName}_fe.idXML -in $piID | |
""" | |
} | |
process PercolatorAdapter{ | |
container '/hps/nobackup2/proteomics/openms_V2.4.0_proteomic_lfq_2.simg' | |
memory { 8.GB * task.attempt } | |
//publishDir "${params.result_folder}", mode: 'copy', overwrite: true | |
errorStrategy 'retry' | |
input: | |
file psmF from psmFs | |
output: | |
file "*.idXML" into percoIDs | |
script: | |
""" | |
PercolatorAdapter -out ${psmF.baseName}_fe.idXML -in $psmF -enzyme trypsin -score_type pep | |
""" | |
} | |
process Epifany{ | |
container '/hps/nobackup2/proteomics/openms_V2.4.0_proteomic_lfq_2.simg' | |
memory { 8.GB * task.attempt } | |
publishDir "${params.result_folder}", mode: 'copy', overwrite: true | |
errorStrategy 'retry' | |
input: | |
file percoID from percoIDs | |
output: | |
file "*.idXML" into inferProt | |
script: | |
""" | |
Epifany -out ${percoID.baseName}_epi.idXML -in $percoID -protein_fdr true | |
""" | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
profiles{ | |
local { | |
docker.enabled = false | |
singularity.enabled = true | |
singularity.autoMounts = true | |
singularity.runOptions = '-B /hps/nobackup/proteomics:/hps/nobackup/proteomics:rw -B /hps/nobackup2/proteomics:/hps/nobackup2/proteomics:rw' | |
process { | |
withLabel: big_mem { | |
maxForks = 1 | |
} | |
} | |
} | |
trace { | |
enabled = true | |
} | |
cluster { | |
docker.enabled = false | |
singularity.enabled = true | |
singularity.autoMounts = true | |
singularity.runOptions = '-B /hps/nobackup/proteomics:/hps/nobackup/proteomics:rw -B /hps/nobackup2/proteomics:/hps/nobackup2/proteomics:rw' | |
process { | |
executor = 'lsf' | |
maxRetries = 5 | |
} | |
} | |
} | |
singularity.enabled = true | |
singularity.cacheDir = "$baseDir/image" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment