hiraksarkar/chit_sheet.md

## chit_sheet.md

      
    Raw
  

              chit_sheet.md
            
          
    PATH that worked in newton

export PATH="/home/linuxbrew/.linuxbrew/bin/:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/usr/lib/jvm/java-8-oracle/bin:/usr/lib/jvm/java-8-oracle/db/bin:/usr/lib/jvm/java-8-oracle/jre/bin:/home/linuxbrew/.linuxbrew/bin/:/home/linuxbrew/.linuxbrew/bin/"

hello
Transferring files from newton

shuf -n 1000 /bio2/home/mferdman/paired.bulk-only > oct_28/file_list\

Append directory

sed -i -e 's/^/\/bio1\/download\//' data/srr.1K.hg

Append scp address

sed -e 's/$/ moamin@seawulfmin:\/gpfs\/scratch\/moamin\/hirak_sra_contamination\/reads\//' -i data/srr.1K.hg


sed -i -e 's/^/scp -r /' /mnt/scratch4/meta_genome/seawulf_stuff/oct_28/file_list\
sed -e 's/$/ hsarkar@seawulf:\/gpfs\/scratch\/hsarkar\/mappings-matter\/data\/fastq_files\//' -i seqc_A_BGI.list.loc
Touch all directory

find  -type f  -exec touch {} + 
Bioinformatics

DESEq2

> for (meth in methods){
+    files <- file.path('/mnt/scratch1/hirak/mappings-matter/data/seqc_analysis', samples$experiment,meth, "quant.sf")
+    names(files) <- samples$experiment
+    txi <- tximport(files, type = "salmon", tx2gene = tx2gene)
+    write.table(txi$counts, file=file.path("/mnt/scratch1/hirak/mappings-matter/data/seqc_analysis", paste0(meth,".tsv")), sep="\t",  quote = FALSE)
+ }

Grep transcript sequence

sed -n -e '/ENST00000425657.1/,/^>/ p' /mnt/scratch6/avi/data/cgat/references/txome/hg_transcriptome.fasta  | sed -e '1d;$d' > ENST00000425657.1.seq

Transcript type

awk -F "\t" '$3 == "gene" { print $9 }' <gtf_file> |  awk -F "; " '{print $1,$5}' | tr -d "\"" | awk -F " " '{print $2,$4}'
Gene type

awk -F "\t" '$3 == "gene" { print $9 }' <gtf_file> |  awk -F "; " '{print $1,$2}' | tr -d "\"" | awk -F " " '{print $2,$4}'
Add unique header number to fasta

awk '/>/{print $0(++i)}!/>/' file
Take every 2nd line from the python

awk '{if(NR%4==2) print $0}'
Search one file against another file

 grep --no-group-separator -F -f to_search.lst all_coverage/all.1.coverage > all_coverage/to_search.coverage
Split files

awk 'NR%20==1 { file = FILENAME "_" sprintf("%04d", NR+19) } { print > file }'

OR

split -a 3 -l 5000 -d filtered_sampled_100.fa experiment/seq_
 
split -l4 --numeric-suffixes=1 --suffix-length=3  ../sample_feb_26.list ""

for f in [0-9]* ; do mv "$f" "$((10#${f#x}+1)).lst" ; done

Salmon/Minnow + newton specific

path that works without pain

sftp set up

{
    "host": "newton",
    "port": 1350,
    "username": "hirak",
    "password": null,
    "protocol": "sftp",
    "agent": "$SSH_AUTH_SOCK",
    "privateKeyPath": "/Users/hirak/.ssh/id_rsa",
    "passphrase": null,
    "passive": false,
    "interactiveAuth": false,
    "remotePath": "/home/hirak/Projects/minnow/cpp",
    "uploadOnSave": true,
    "syncMode": "update",
    "ignore": [
        "**/.vscode/**",
        "**/.github/**",
        "**/.git/**",
        "**/.DS_Store",
        "**/.drone/**",
        "**/bin/**",
        "**/build/**",
        "**/doc/**",
        "**/tests/**",
        "**/docker/**"
    ]


}
Mappingmatters

/home/rob/dream-smc-rna/


bash extensive_bam_search.sh /mnt/scratch4/meta_genome/seawulf_stuff/oct_23/bam_extraction_dir.1

Length of reads
for file in `cat srr.1K.hg`; do echo `zcat $file/*_1.fastq.gz |  head -2 | tail -1 | wc -m`; done > srr.1K.hg.length

paste srr.1K.hg srr.1K.hg.length | awk '{n=split($1,a,"/"); print a[n]"\t"$2}'

paste srr.1K.hg srr.1K.hg.length | awk '{if($2 >= 75) {print $1}}' > srr.1K.filtered.hg

Bed operations

Sort and merge exon intervals
cat gencode.v29.chr_patch_hapl_scaff.annotation.gtf | awk 'BEGIN{OFS="\t";} $3=="exon" {print $1,$4-1,$5,$12}' | tr -d "\"" | tr -d ";" | ~/bedtools2/bin/sortBed | ~/bedtools2/bin/mergeBed -i - -c 4 -o collapse > gencode_exon_merged.bed
Seawulf related search

Take hold of a long queue

qsub -I -q long -l nodes=1:ppn=28,walltime=48:00:00
SLURM

srun -J PUG-1 -N 1 -p gpu-long --ntasks-per-node=28 --pty bash
module load shared
module load slurm/17.11.12

conda activate torch-base
module load cuda100/toolkit/10.0

Find incomplete runs
find quants -mindepth 2 -maxdepth 2 -type d '!' -exec test -e "{}/quant.sf" ';' -print


CV
2762