Skip to content

Instantly share code, notes, and snippets.

View clairemcwhite's full-sized avatar

Claire D. McWhite clairemcwhite

View GitHub Profile
@clairemcwhite
clairemcwhite / setup_google_beta_lifesciences.sh
Last active February 5, 2021 22:55
Notes on setting up and using the google cloud lifesciences pipelines
# PROJECT_ID is the billable project code "word-word-number"
# Having -life-sciences suffix may be important
export BUCKET=gs://PROJECT_ID-life-sciences
# Make a bucket (storage folder) for input and output data
# See intro to installing gsutil in this gist https://gist.github.com/clairemcwhite/ca33c9b7385ee4e2b64641353399f3ba
gsutil mb ${BUCKET}
# First do the example script
# This is extra instructions to access and query the hail tables from the pan ukbiobank
# https://pan.ukbb.broadinstitute.org/docs/hail-format/index.html
#
# Open a google cloud account, set up billing, get project id (word-word-number)
# Create a virtual machine
# Select Ubuntu 16.04
# Allow access to all Cloud API (unknown if needed)
# Need python 3.7 for hail
read_fasta <- function(fasta_filename, annot = FALSE){
fasta <- seqinr::read.fasta(fasta_filename, as.string = TRUE)
# Convert seqinr SeqFastadna object to data.frame
fasta_df <- fasta %>%
sapply(function(x){x[1:length(x)]}) %>%
as.data.frame %>%
broom::fix_data_frame(newcol = "ID", newnames = "Sequence")
if(annot == TRUE){
@clairemcwhite
clairemcwhite / consistent_fontsize.R
Last active January 19, 2021 01:04
Consistent exported font sizes
# Only needed if using theme_cowplot. Otherwise, use any theme.
library(cowplot)
theme_cowplot_consistent_text <- function (font_size = 8) {
theme_cowplot() %+replace%
theme(strip.text = element_text(size = font_size),
axis.text = element_text(colour = "black", size = font_size),
plot.title = element_text(size = font_size),
@clairemcwhite
clairemcwhite / clustered_heatmap.R
Last active January 6, 2020 19:44
Clustered ggplot2 heatmap
library(tidyverse)
get_order <- function(df, distmethod = "pearson", hclustmethod = "average", output_ordername = "order"){
#Get the row ordering from a clustering
hr <- hclust(as.dist(1-cor(t(df), method=distmethod)), method=hclustmethod)
order <- data.frame(hr$labels[hr$order])
order$ordering <- rownames(order)
names(order) <- c("ID", output_ordername)
return(order)
}
@clairemcwhite
clairemcwhite / random_walk_animation.R
Last active January 9, 2023 23:08
Script to animate the path of a random walk across a graph - A walker starts at a node and takes random steps through the graph, tending to get "stuck" in dense subgraphs
library(tidyverse) # For everything
library(gganimate) # For animating graphs
library(ggraph) # For plotting graphs
library(magick) # For exporting gif
library(tidygraph) # For manipulating graphs
library(colorspace) # For sequential_hcl
library(igraph) # For random walk and as_ids
@clairemcwhite
clairemcwhite / simplified_dendrogram_to_circlepack.R
Last active March 18, 2023 16:53
Demo of going from an igraph dendrogram to a simplified view as a ggraph circlepack as in https://twitter.com/clairemcwhite/status/1079895521446293505
library(tidygraph)
library(tidyverse)
library(ggraph)
library(dendextend)
library(igraph)
palette_OkabeIto <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999")
cut_df <- function(dendrogram, height, c){