This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(GEOquery) | |
# Download existing RA datasets | |
# Study 1 | |
# Teixeira VH, Olaso R, Martin-Magniette ML, Lasbleiz S et al. Transcriptome | |
# analysis describing new immunity and defense genes in peripheral blood | |
# mononuclear cells of rheumatoid arthritis patients. | |
# PLoS One 2009 Aug 27;4(8):e6803. PMID: 19710928 | |
# Reference: GSE15573 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get SNP records based on rs ids from VCF file | |
# Kevin Blighe recipe from https://www.biostars.org/p/373852/ | |
# Create a file with snp ids of interest, one id per line | |
# We can give the newly created file any name, for example snp.txt | |
bcftools view --include ID==@snp.txt target.bcf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Taken from https://www.biostars.org/p/111040/ | |
# Examine and save metadata | |
esearch -db sra -query PRJNA484081 | efetch -format runinfo > bioproj.csv | |
# The first column of comma separated runinfo file are run ids | |
cat bioproj.csv | cut -d ',' -f 1 | head | |
# Download first 4 files as an example | |
cat bioproj.csv | cut -d ',' -f 1 | grep 'SRR' # Check of we are selecting right files |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
# Taken from https://github.com/stephenturner/mergelanes/issues/1 | |
# Exercise caution, does not work accurately in every case: | |
# Not working accurately for sample IDs like "A11_Barcodexxx_S11_L001_R1_001". | |
# It cat together all L001 pertaining to sample ID A11 with different barcodes also | |
ls *R1* | cut -d _ -f 1 | sort | uniq \ | |
| while read id; do \ | |
cat $id*R1*.fastq.gz > $id.R1.fastq.gz; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get paralogs for the list of genes | |
library(biomaRt) | |
human <- useMart("ensembl", dataset = "hsapiens_gene_ensembl") | |
gene_id <- c("TPM1", "BOD1", "ADAP1") | |
results <- getBM(attributes = c("ensembl_gene_id", | |
"external_gene_name", | |
"hsapiens_paralog_ensembl_gene", | |
"hsapiens_paralog_associated_gene_name"), | |
filters = "external_gene_name", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(GenomicFeatures) | |
library(TxDb.Mmusculus.UCSC.mm10.knownGene) | |
library(BSgenome.Mmusculus.UCSC.mm10) | |
library(Biostrings) | |
# Fetch promoter sequences to detect enriched transcription factor | |
# motifs in the promoters of differentially expressed genes | |
setwd("/path/to/dir") | |
list.files() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("ChIPpeakAnno") | |
library("GenomicRanges") | |
library("org.At.tair.db") | |
library("TxDb.Athaliana.BioMart.plantsmart28") | |
library("biomaRt") | |
# Annotate genomic intervals in bed format using ChIPpeakAnno | |
# This script was designed for Arabidopsis, but can be easily changed for | |
# any other organism available through biomaRt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(KEGGREST) | |
library(org.Rn.eg.db) | |
# Download entrez ids and corresponding KEGG pathways followed | |
# by creation of a table where one column is entrez id and another | |
# column is a comma separated list of KEGG pathways | |
# Download pathway to entrez id relationship | |
rno_path_eg <- keggLink("pathway", "rno") | |
names(rno_path_eg) <- gsub("rno:", "", names(rno_path_eg)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(ggplot2) | |
library(reshape2) | |
library(topGO) | |
library(plyr) | |
# Read in some table with gene ids, for example expressions | |
coreGenes <- read.table(<some_table_gene_ids>) | |
# Get gene mappings to GO terms |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(RColorBrewer) | |
library(ggplot2) | |
library(ggrepel) | |
setwd("<path/to/dir>") | |
# Load DESeq2 object | |
load("expression_data/DESeqOBJ.RData") | |
dds |
NewerOlder