Ming Tang crazyhottommy

## simplified_dendrogram_to_circlepack.R
library(tidyverse)
library(ggraph)
library(dendextend)
library(igraph)

cut_df <- function(dendrogram, height, c){
    #Function to cut a dendrogram
    cd <- cutree(dendrogram, h = height) %>% as.data.frame()
    cd$ID <- row.names(cd)
    cd <- cd %>% as_tibble()

## geom_flat_violin.R
# somewhat hackish solution to:
# https://twitter.com/EamonCaddigan/status/646759751242620928
# based mostly on copy/pasting from ggplot2 geom_violin source:
# https://github.com/hadley/ggplot2/blob/master/R/geom-violin.r

library(ggplot2)
library(dplyr)


"%||%" <- function(a, b) {

## geom_flat_violin.R
# somewhat hackish solution to:
# https://twitter.com/EamonCaddigan/status/646759751242620928
# based mostly on copy/pasting from ggplot2 geom_violin source:
# https://github.com/hadley/ggplot2/blob/master/R/geom-violin.r

library(ggplot2)
library(dplyr)


"%||%" <- function(a, b) {

## ebi_url_from_srr.py
#API created by @apfejes (Anthony Fejes) on top of my half-cooked script
#python ebi_url_from_srr.py --file srr_list.txt | xargs -I {} wget {}

import argparse
def prepareURL(srr_name, prefix="ftp://ftp.sra.ebi.ac.uk/vol1/fastq/"):
    dir_1=srr_name[:6]
    dir_2=""
    url=""
    num_digits=sum(s.isdigit() for s in srr_name)
    if(num_digits == 6):

## umap.R
#install UMAP from https://github.com/lmcinnes/umap
#install.packages("rPython")

umap <- function(x,n_neighbors=10,min_dist=0.1,metric="euclidean"){
  x <- as.matrix(x)
  colnames(x) <- NULL
  rPython::python.exec( c( "def umap(data,n,mdist,metric):",
                  "\timport umap" ,
                  "\timport numpy",
                  "\tembedding = umap.UMAP(n_neighbors=n,min_dist=mdist,metric=metric).fit_transform(data)",

## mp_primer_v2.sh
#!/bin/bash

printf "\n *** BIS BATCH PRIMER version 2.0 ***"
printf "\n\n !!! 'Primer3 & fastx-toolkit' must be installed on the system.\n\n !!! Edit parameters (e.g. sizes, Tm, and etc) before start\n\n "
printf "\n\n  Usage : \n    ./mp_primer.sh FASTA PARAMETER \n\n"
printf "   >>>  input FASTA = "$1
printf " \n   >>>  parameters  = "$2
printf "\n\n\n  ()()() Running... \n\n"

if [ -f $1 -a -f $2 ]; then

## get_all_clin.R
# This code will get all clinical indexed data from TCGA
library(TCGAbiolinks)
library(data.table)
clinical <- TCGAbiolinks:::getGDCprojects()$project_id %>%
            regexPipes::grep("TCGA",value=T) %>%
            sort %>%
            plyr::alply(1,GDCquery_clinic, .progress = "text") %>%
            rbindlist
readr::write_csv(clinical,path = paste0("all_clin_indexed.csv"))

## maf_legacy.R
query.maf.hg19 <- GDCquery(project = "TCGA-COAD",
                           data.category = "Simple nucleotide variation",
                           data.type = "Simple somatic mutation",
                           access = "open",
                           legacy = TRUE)
# Check maf availables
knitr::kable(getResults(query.maf.hg19)[,c("created_datetime","file_name")])

query.maf.hg19 <- GDCquery(project = "TCGA-COAD",
                           data.category = "Simple nucleotide variation",

## bkup_dotfiles_configs.md

      
              1 file
            
          
              1 fork
            
          
              0 comments
            
          
              3 stars
            
          
                crazyhottommy
                / bkup_dotfiles_configs.md
            
            
              Last active
              November 27, 2022 07:31
                — forked from sbamin/bkup_dotfiles_configs.md
            
              
                How to rsync dot files and directories of remote server
              
          
    backup dotfiles


Following will copy all of dot ~/. files and directories (including its contents) directly underneath home directory.
To avoid copying cache and other local configs, e.g., that of web browser, java apps, etc., preferably query directory size tool under entire home $HOME/, using ncdu $HOME of similar tool.
Exclude all those large directories using rsync --exclude=.local --exclude=.cache format
Avoid rsync password, ssh keys, .bash_history, etc. if you are uploading to github, etc.
rsync home dotfiles and configs as follows:

# in your local machine


## msigdf_clusterprofiler.R
## devtools::install_github("stephenturner/msigdf")
library(msigdf)
library(dplyr)
library(clusterProfiler)

c2 <- msigdf.human %>%
    filter(collection == "c2") %>% select(geneset, entrez) %>% as.data.frame

data(geneList)
de <- names(geneList)[1:100]
	library(tidyverse)
	library(ggraph)
	library(dendextend)
	library(igraph)

	cut_df <- function(dendrogram, height, c){
	#Function to cut a dendrogram
	cd <- cutree(dendrogram, h = height) %>% as.data.frame()
	cd$ID <- row.names(cd)
	cd <- cd %>% as_tibble()
	# somewhat hackish solution to:
	# https://twitter.com/EamonCaddigan/status/646759751242620928
	# based mostly on copy/pasting from ggplot2 geom_violin source:
	# https://github.com/hadley/ggplot2/blob/master/R/geom-violin.r

	library(ggplot2)
	library(dplyr)


	"%\|\|%" <- function(a, b) {
	#API created by @apfejes (Anthony Fejes) on top of my half-cooked script
	#python ebi_url_from_srr.py --file srr_list.txt \| xargs -I {} wget {}

	import argparse
	def prepareURL(srr_name, prefix="ftp://ftp.sra.ebi.ac.uk/vol1/fastq/"):
	dir_1=srr_name[:6]
	dir_2=""
	url=""
	num_digits=sum(s.isdigit() for s in srr_name)
	if(num_digits == 6):
	#install UMAP from https://github.com/lmcinnes/umap
	#install.packages("rPython")

	umap <- function(x,n_neighbors=10,min_dist=0.1,metric="euclidean"){
	x <- as.matrix(x)
	colnames(x) <- NULL
	rPython::python.exec( c( "def umap(data,n,mdist,metric):",
	"\timport umap" ,
	"\timport numpy",
	"\tembedding = umap.UMAP(n_neighbors=n,min_dist=mdist,metric=metric).fit_transform(data)",
	#!/bin/bash

	printf "\n * BIS BATCH PRIMER version 2.0 *"
	printf "\n\n !!! 'Primer3 & fastx-toolkit' must be installed on the system.\n\n !!! Edit parameters (e.g. sizes, Tm, and etc) before start\n\n "
	printf "\n\n Usage : \n ./mp_primer.sh FASTA PARAMETER \n\n"
	printf " >>> input FASTA = "$1
	printf " \n >>> parameters = "$2
	printf "\n\n\n ()()() Running... \n\n"

	if [ -f $1 -a -f $2 ]; then
	# This code will get all clinical indexed data from TCGA
	library(TCGAbiolinks)
	library(data.table)
	clinical <- TCGAbiolinks:::getGDCprojects()$project_id %>%
	regexPipes::grep("TCGA",value=T) %>%
	sort %>%
	plyr::alply(1,GDCquery_clinic, .progress = "text") %>%
	rbindlist
	readr::write_csv(clinical,path = paste0("all_clin_indexed.csv"))
	query.maf.hg19 <- GDCquery(project = "TCGA-COAD",
	data.category = "Simple nucleotide variation",
	data.type = "Simple somatic mutation",
	access = "open",
	legacy = TRUE)
	# Check maf availables
	knitr::kable(getResults(query.maf.hg19)[,c("created_datetime","file_name")])

	query.maf.hg19 <- GDCquery(project = "TCGA-COAD",
	data.category = "Simple nucleotide variation",
	## devtools::install_github("stephenturner/msigdf")
	library(msigdf)
	library(dplyr)
	library(clusterProfiler)

	c2 <- msigdf.human %>%
	filter(collection == "c2") %>% select(geneset, entrez) %>% as.data.frame

	data(geneList)
	de <- names(geneList)[1:100]