Skip to content

Instantly share code, notes, and snippets.

@pcantalupo
pcantalupo / score_table_functions.R
Last active March 15, 2023 14:35
single cell cluster score scores aggregate
# Aggregate a score table from single cell level to the cluster level
# Params
# scores - data.frame or matrix where rows are cells and columns are celltypes.
# Rownames or colnames are not required
# clusters - vector of same length as number of rows in 'scores'.
# The values specify the group for each row (i.e. c(3, 1, 0, 1, 2, 0, 3, ...))
# Return - A matrix with the mean scores. Columns are in the same order.
# Rows are sorted based on the typeof(clusters) (integer -> integer sorted, character -> character sorted).
# Rownames are set to the cluster value
scores_clusterlevel = function(scores, clusters) {
@pcantalupo
pcantalupo / color.R
Last active January 29, 2023 12:19
visualize display hex codes in R to see the color
# Different ways to plot Hex codes to see the color
# using 'show_col()' comes from https://datacornering.com/how-to-create-and-preview-hex-color-code-sequence-in-r/
pacman::p_load('viridis','RColorBrewer','scales')
# Display Viridis colors
colors = viridis::magma(12)
scales::show_col(colors, cex_label = 1, ncol = 4)
process cutadapt {
tag "${sampleid}"
container 'quay.io/biocontainers/cutadapt:1.18--py36_0'
publishDir "$params.outdir/cutadapt", mode: 'copy'
input:
tuple sampleid, path(read1), path(read2) from ch_samples_cutadapt
output:
tuple sampleid, path("${sampleid}_R1.cutadapt.fastq.gz"), path("${sampleid}_R2.cutadapt.fastq.gz") into ( ch_cutadapt_fastqc, ch_cutadapt_star, ch_cutadapt_for_sortmerna )
library(optparse)
##################### Options ########################
opts=list(
make_option("--project", action="store", default="", type="character", help="Project (default: none)"),
make_option("--mc", action="store", default=3, type="integer", help="Min cells (default: 3)"),
make_option("--resolution", action="store", default=0.8, type="numeric", help="Resolution value (default: 0.8)"),
make_option("--usehvg", action="store", default=TRUE, type="logical", help="Use HVG? (default: TRUE")
)
opts=parse_args(OptionParser(option_list=opts))
@pcantalupo
pcantalupo / gist:990add5d05938316a791573b5b8ea64d
Created February 2, 2021 15:33
Copy all the .command.sh scripts used in nfcore pipeline run to current directory
$ cut -f 2,4 ../execution_trace.txt | grep -v ^hash | perl -pe 's/ \(\S+\)$/.sh/' | tr : _ | while IFS=$'\t' read -r -a myarray; do dir=${myarray[0]}; process=${myarray[1]}; cp -v ../../../work/"$dir"*/.command.sh $process; done
‘../../../work/fd/f407a60eb791dbe1b0cd5a2f4d0da2/.command.sh’ -> ‘RNASEQ_INPUT_CHECK_SAMPLESHEET_CHECK.sh’
‘../../../work/19/a3f9ef62792935a4e8f3452bedb915/.command.sh’ -> ‘RNASEQ_CAT_FASTQ.sh’
‘../../../work/9d/f6079c32bdaa9471dcfc52e826b0e0/.command.sh’ -> ‘RNASEQ_PREPARE_GENOME_GTF2BED.sh’
‘../../../work/a1/96ca0e62620ea4e682fdd8096ee46a/.command.sh’ -> ‘RNASEQ_PREPARE_GENOME_GET_CHROM_SIZES.sh’
‘../../../work/76/5335dbfa3241ec59fafc25d9abc7de/.command.sh’ -> ‘RNASEQ_PREPARE_GENOME_GTF_GENE_FILTER.sh’
‘../../../work/e2/523c975ead28b5181984ff7d64e5f3/.command.sh’ -> ‘RNASEQ_PREPARE_GENOME_RSEM_PREPAREREFERENCE_TRANSCRIPTS.sh’
‘../../../work/c7/c8ffe7adc5ad790aa2fe6608b114eb/.command.sh’ -> ‘RNASEQ_FASTQC_UMITOOLS_TRIMGALORE_FASTQC.sh’
‘../../../work/89/7b23e5e3ef8e6cf43a67fb7480277c
For this sequence, Guppy mean_qscore_template = 14.09
@5df6a215-bf48-4c72-a961-b483907a6e65
GTGTACTTCGTTCAGTTACGTATTGCTGGTAGCTTTTATCCGTTGAGCGACGGCCATTCCACAATGTACCGCCGGATCACTAAGTCCTGCTTTCCGCACCTGCTCGACTTGTAAGTCTTGCAGTCAACCCTTTTACCTTTGTGCTCTGCATATGGTTTCTGACCATATTGAGTGTAGCTTTGAACGCCTCCGTTACTCTTTAGGAAGCGACCGCCCCAGTCAAACTACCCACCACGCACTCTCCTTCCCAGATAAGGGGAACGGGTTAGAAAATCAATTTAGCAAGGGTGGTATTTCAAGGTTGACTCCACTAGAACTAGCGTCCCAGCTTCAAAGTCTCCCACTGTCTACACATGCTAAACCAATTTTCAATACGAAGTTATAGTAAAGCTCCACGGGGGCTTTTCGTCTTGATACGATAACCAGCGTTTTCACTGGTACCATAATTTCACCGAGTCCAATGTTGAGACAGTAGGGGAGATCATTGCGCCTTTCGTCCCAGCAATATCAGCACCAACAGAAAAGCAATACGTAACT
+
$%%*(9:>5<<B2.=BA02&+))):?AAA@;3BFFB@BLEA8*:=?AB?@C&)02+4837,-333***/77/;=@AC<89,76;;:7.,::=A28+867)*.35<=?;>BADF>;9?ABA<9?=<+,:6$&.336:::=B8C?BIFBB<@E@@<=@B-4/4/0A@IBFE339@B=3,5569==>?7;=CJA=@@;?GDBB:)42)')+7::?9?>?8588:CDB>9*AA?;;6-3$#$$(&,,27;:=:55?;849899<>>+.995::<9D=@CKHLI<A8ALJ>9:B<=<DDFGA*,-:@@<<997=@@<;@EI?@=AAHJH@FGBBB959<22;:D::0*)()3'*78*BDEFJDHA46A>AEGII=A?8.7;A56DEBA:3CFGG>F
@seq1
AGTCGAAAGT
+
HHHHHIIIII
@seq2
GGGTACGTAC
+
!(*+,-./AB
@seq3
TTATTAGCGA
$ cat samplesheet.csv
group,replicate,barcode,input_file,genome,transcriptome
cdcp6,1,1,,/bgfs/uchandran/projects/duprex_nanopore_covid/refs/GCA_009937905.1_ASM993790v1_genomic.fa,
$ cat results/pipeline_info/pipeline_report.txt
----------------------------------------------------
,--./,-.
$ ls -R fast5_* fastq*
fast5_fail:
FAO43207_fail_e9e6d183_0.fast5 FAO43207_fail_e9e6d183_20.fast5 FAO43207_fail_e9e6d183_31.fast5 FAO43207_fail_e9e6d183_42.fast5
FAO43207_fail_e9e6d183_10.fast5 FAO43207_fail_e9e6d183_21.fast5 FAO43207_fail_e9e6d183_32.fast5 FAO43207_fail_e9e6d183_43.fast5
FAO43207_fail_e9e6d183_11.fast5 FAO43207_fail_e9e6d183_22.fast5 FAO43207_fail_e9e6d183_33.fast5 FAO43207_fail_e9e6d183_44.fast5
FAO43207_fail_e9e6d183_12.fast5 FAO43207_fail_e9e6d183_23.fast5 FAO43207_fail_e9e6d183_34.fast5 FAO43207_fail_e9e6d183_45.fast5
FAO43207_fail_e9e6d183_13.fast5 FAO43207_fail_e9e6d183_24.fast5 FAO43207_fail_e9e6d183_35.fast5 FAO43207_fail_e9e6d183_4.fast5
FAO43207_fail_e9e6d183_14.fast5 FAO43207_fail_e9e6d183_25.fast5 FAO43207_fail_e9e6d183_36.fast5 FAO43207_fail_e9e6d183_5.fast5
FAO43207_fail_e9e6d183_15.fast5 FAO43207_fail_e9e6d183_26.fast5 FAO43207_fail_e9e6d183_37.fast5 FAO43207_fail_e9e6d183_6.fast5
FAO43207_fail_e9e6d183_16.fast5 FAO43207_fail_e9e6d183_27.fast5 FAO43207_
$ nextflow run nf-core/rnaseq -c myconfig.config --pseudo_aligner salmon -r dev --reads 'reads/*_tu_KAM56A3_S17.{1,2}.fq.gz' --reverseStranded --fasta GRCh38.d1.vd1.fa --gtf gencode.v22.annotation.gtf --skipBiotypeQC --removeRiboRNA -resume
N E X T F L O W ~ version 20.04.1
Launching `nf-core/rnaseq` [distracted_wiles] - revision: e14e0d4912 [dev]
WARN: It appears you have never run this project before -- Option `-resume` is ignored
Extracting transcript fastas from genome fasta + gtf/gff
WARN: The `into` operator should be used to connect two or more target channels -- consider to replace it with `.set { gtfFile }`
----------------------------------------------------
,--./,-.
___ __ __ __ ___ /,-._.--~'
|\ | |__ __ / ` / \ |__) |__ } {