Skip to content

Instantly share code, notes, and snippets.

View slavailn's full-sized avatar

slava ilnytskyy slavailn

View GitHub Profile
@slavailn
slavailn / GEOquery_download.R
Last active April 22, 2024 17:54
Download GEO datasets with GEOquery
library(GEOquery)
# Download existing RA datasets
# Study 1
# Teixeira VH, Olaso R, Martin-Magniette ML, Lasbleiz S et al. Transcriptome
# analysis describing new immunity and defense genes in peripheral blood
# mononuclear cells of rheumatoid arthritis patients.
# PLoS One 2009 Aug 27;4(8):e6803. PMID: 19710928
# Reference: GSE15573
@slavailn
slavailn / get_snps_by_rsids.sh
Last active March 27, 2024 21:35
Retrive SNP sites from VCF files based on rs ids
# Get SNP records based on rs ids from VCF file
# Kevin Blighe recipe from https://www.biostars.org/p/373852/
# Create a file with snp ids of interest, one id per line
# We can give the newly created file any name, for example snp.txt
bcftools view --include ID==@snp.txt target.bcf
@slavailn
slavailn / fetch_fastq.sh
Last active March 7, 2024 20:29
Download raw fastq files from SRA with sra tools
# Taken from https://www.biostars.org/p/111040/
# Examine and save metadata
esearch -db sra -query PRJNA484081 | efetch -format runinfo > bioproj.csv
# The first column of comma separated runinfo file are run ids
cat bioproj.csv | cut -d ',' -f 1 | head
# Download first 4 files as an example
cat bioproj.csv | cut -d ',' -f 1 | grep 'SRR' # Check of we are selecting right files
@slavailn
slavailn / merge_lanes.sh
Created November 22, 2023 16:55
Merge fastq files that belong to the same sample, but were generated from different lanes
#! /bin/bash
# Taken from https://github.com/stephenturner/mergelanes/issues/1
# Exercise caution, does not work accurately in every case:
# Not working accurately for sample IDs like "A11_Barcodexxx_S11_L001_R1_001".
# It cat together all L001 pertaining to sample ID A11 with different barcodes also
ls *R1* | cut -d _ -f 1 | sort | uniq \
| while read id; do \
cat $id*R1*.fastq.gz > $id.R1.fastq.gz;
@slavailn
slavailn / biomaRt_paralogs,R
Created May 17, 2021 05:00
Get paralogs for the list of genes using biomaRt
# Get paralogs for the list of genes
library(biomaRt)
human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
gene_id <- c("TPM1", "BOD1", "ADAP1")
results <- getBM(attributes = c("ensembl_gene_id",
"external_gene_name",
"hsapiens_paralog_ensembl_gene",
"hsapiens_paralog_associated_gene_name"),
filters = "external_gene_name",
library(GenomicFeatures)
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
library(BSgenome.Mmusculus.UCSC.mm10)
library(Biostrings)
# Fetch promoter sequences to detect enriched transcription factor
# motifs in the promoters of differentially expressed genes
setwd("/path/to/dir")
list.files()
@slavailn
slavailn / annotate_MACS2_peaks.R
Created November 18, 2017 00:12
Annotate MACS2 peaks using ChIPpeakAnno
library("ChIPpeakAnno")
library("GenomicRanges")
library("org.At.tair.db")
library("TxDb.Athaliana.BioMart.plantsmart28")
library("biomaRt")
# Annotate genomic intervals in bed format using ChIPpeakAnno
# This script was designed for Arabidopsis, but can be easily changed for
# any other organism available through biomaRt
@slavailn
slavailn / entrez_to_KEGG.R
Created August 25, 2023 20:16
Download KEGG pathways with related entrez ids
library(KEGGREST)
library(org.Rn.eg.db)
# Download entrez ids and corresponding KEGG pathways followed
# by creation of a table where one column is entrez id and another
# column is a comma separated list of KEGG pathways
# Download pathway to entrez id relationship
rno_path_eg <- keggLink("pathway", "rno")
names(rno_path_eg) <- gsub("rno:", "", names(rno_path_eg))
@slavailn
slavailn / topGO_nonModel_organism.R
Created November 1, 2017 20:57
Run topGO on non-model organism
library(dplyr)
library(ggplot2)
library(reshape2)
library(topGO)
library(plyr)
# Read in some table with gene ids, for example expressions
coreGenes <- read.table(<some_table_gene_ids>)
# Get gene mappings to GO terms
@slavailn
slavailn / Two_factor_anova_DESeq2.R
Created February 21, 2023 20:51
Two-factor analysis with DESeq2
library(RColorBrewer)
library(ggplot2)
library(ggrepel)
setwd("<path/to/dir>")
# Load DESeq2 object
load("expression_data/DESeqOBJ.RData")
dds