Skip to content

Instantly share code, notes, and snippets.

View bonifazi's full-sized avatar

Renzo bonifazi

View GitHub Profile
@bonifazi
bonifazi / grep.txt
Last active February 6, 2023 20:36
grep commands
# grep a string in multiple file names and include subdirectories
grep --include 'file1.txt' --include 'file2.txt' -rn --ignore-case TITLE directory1 directory2
@bonifazi
bonifazi / Template_Rscript.R
Last active March 31, 2023 05:25
Rscript for command line execution and --flagged arguments checks
#!/usr/bin/env Rscript
#
# Purpose: what this script does.
# Author: Renzo Bonifazi - dd-mm-yyyy
# Usage:
# # Rscript --vanilla myscript.R --file my_path/myfile.csv --output myoutput.csv
#
# Example:
# # Rscript --vanilla Make_pedigree.R --file Renzo/Documents/file.csv --output Output.csv
#
@bonifazi
bonifazi / init_project.sh
Last active February 13, 2023 22:23
Initiate a project structure
#!/usr/bin/env bash
# initiate a project directory structure
mkdir -p Backups # to store any backup
mkdir -p Data/Raw_data # raw data
mkdir -p Data/Raw_data/geno # genotypes are in a sub-dir (add more if needed)
mkdir -p Data/Proc_data # processed, i.e. cleaned data at different steps. Use horizontal dirs for middle steps.
mkdir -p Masters # Main wrappers to run the pipeline (usually bash)
mkdir -p Programs/Scripts # Collection of scripts (any language)
mkdir -p Programs/Scripts/R_func # Collection of R functions (which can be re-used from other projects)
mkdir -p Programs/Software # Binaries and executables with their licences
@bonifazi
bonifazi / checkpkgs.R
Last active April 26, 2023 17:18
Check and load R packages
# different (similar) ways to check and load pkgs in R
# method 1
check_pkgs <- function(pkg_list){
for(pkg in pkg_list) {
if(!require(pkg, character.only = T)){
stop('Please install the ', pkg, ' package, using "install.packages(', pkg, ')"', call. = F)
}
}
}
check_pkgs(pkg_list = c("asserththat", "ggplot2"))
@bonifazi
bonifazi / nested_list_to_dataframe.R
Last active May 10, 2023 12:21
Convert nested list to dataframe quickly
pacman::p_load(Hmisc, rrapply)
# loop variables
scenarios <- c("scenario1", "scenario2")
groups <- c("grp1", "grp2", "grp3", "grp4")
# create two lists, one for the plots and one for the statistics only
ALL_stats <- list()
ALL_plots <- list()
for(scenario in scenarios){
@bonifazi
bonifazi / Plink2PCA.R
Last active May 19, 2023 08:47
PCA for large datasets with Plink2 --pca approx
# First compute PCA files using Plink2 from the command line
# Refer to https://www.cog-genomics.org/plink/2.0/strat#pca for how PCA approximation is done
# As suggested in the link above, --maf is applied for variants below 0.01
# Plink2 command line is:
# plink2 --cow --bfile <mybfile> --keep-allele-order --maf 0.01 --pca approx
######### plot in R ################
# Output: a 4 pages pdf file with: pages 1 to 3 are scatterplots of PCx vs PCx (with PCx from 1 to 3), page 4 is a 3D scatterplot of the first 3 PCs.
pacman::p_load(dplyr, data.table, ggplot2, scatterplot3d)
pca_res <- fread(file = "plink2.eigenvec")
@bonifazi
bonifazi / count_animal_genotypes.awk
Created June 29, 2023 14:54
Count 0,1,2,5 per animal genotypes
awk 'BEGIN {
OFS = ","
print "ID", 0, 1, 2, 5
}
{
for (i = 2; i <= NF; i++) {
count[$1,$i]++
}
vals[$1] = $1
}
@bonifazi
bonifazi / .bashrc
Last active May 5, 2024 16:54
~/.bashrc aliases
alias ll='ls -lah --color=auto --classify --group-directories-first --time-style=long-iso'
# alias ll='ls -lah --color=auto --classify --group-directories-first --time-style=long-iso --sort=time -r'
alias diff-s='diff -s -wy --suppress-common-lines'
alias squeue_comment="squeue --format='%18i %9P %8j %8u %2t %10M %6D %R %k'"