Skip to content

Instantly share code, notes, and snippets.

View andreagrioni's full-sized avatar

Andrea Grioni andreagrioni

View GitHub Profile
@andreagrioni
andreagrioni / read_h5.py
Created May 14, 2021 21:48
read h5 file
import h5py
import pandas as pd
# set path to h5 file
filepath = "h5_path"
# read HDF5 file
f = h5py.File(filepath, 'r')
# get each dataset and convert to pandas DataFrame
dset_x_train = pd.DataFrame(f['x_train'])
dset_y_train = pd.DataFrame(f['y_train'])
@andreagrioni
andreagrioni / write_h5.py
Created May 14, 2021 21:56
write pandas dataframes as h5
import h5py
# create HDF5 file
with h5py.File(PROCESSPATH.joinpath("final_df.h5"), 'w') as hf:
dset_x_train = hf.create_dataset(
'x_train', data=X_train, shape=X_train.shape, compression='gzip', chunks=True)
dset_y_train = hf.create_dataset(
'y_train', data=y_train, shape=y_train.shape, compression='gzip', chunks=True)
dset_x_test = hf.create_dataset(
'x_test', data=X_test, shape=X_test.shape, compression='gzip', chunks=True)
# remove starting lines till pattern
# this can be used to clean the adat file from headers and make it
# compatible as input to SODA
sed -e '1,/TABLE_BEGIN/d' file_input.adat > file_output.adat
# Comprehension
actions = [1,2,3]
accepted = [1,2]
eval_args = { (action) : (True if action in accepted else False) for action in actions }
# time I
import timeit
def ciao():
a = sum([ i for i in range(0,1000)] )
return(a)
@andreagrioni
andreagrioni / python.ipynb
Created November 22, 2021 08:55
Python.ipynb
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
#' generate volcano plots with EnanchedVolcano
#' @table a tibble in the format limma::topTable
#' @coef the coeffincient to be filtered
#' @img_dir save image to directory
#' @fdr_threshold the max FDR allowed to color targets
#' @save bool (def. TRUE)
make_volcano <- function(table, coef, img_dir, fdr_threshold=0.1, save=TRUE) {
# filter for coefficient
table %>%
dplyr::filter(coeff == coef) -> tmp
@andreagrioni
andreagrioni / pivot_topTable.R
Created November 26, 2021 10:02
Pivot to wider formant summary top table
# topTable summary from limma
table_iv %>%
# select target coeffients
dplyr::select(dplyr::all_of( c("Var1", targets_coef))) %>%
# pivot coefficient to long format
tidyr::pivot_longer(!Var1) %>%
# separate coefficients into new cols
tidyr::separate(name, into=c("ARMCD", "day")) %>%
# pivot wider along days/timepoint
tidyr::pivot_wider(names_from=day, values_from=value) %>%
# create screeplot with PCAtools package
## find optimum number of PCs to retain
elbow <- PCAtools::findElbowPoint(p$variance)
## set image path
image_filepath <- fs::path(data_dir, glue::glue("img/{bmd_tag}.screeplot.png"))
## open image file
png(image_filepath, width=4, height=4, units="in", res=300)
## create image
PCAtools::screeplot(
p,
# Create PCA object for PCAtools package
# select columns of interest
# and pivot wider
measuraments_annotated %>%
dplyr::select(
dplyr::all_of(c("SeqId", "q_norm", "SampleId"))) %>%
tidyr::pivot_wider(names_from=SeqId, values_from=q_norm) %>%
dplyr::distinct() -> metrics_df
# retrieve measurement and transpose to matrix
metrics_df %>%
@andreagrioni
andreagrioni / render_multiple_figures.Rmd
Created December 2, 2021 00:46
add multiple images programatically within an Rmd file.
```{r, results = 'asis'}
# add path to folder with figures
plots <- list.files(folder_path)
# create string for image
for(i in plots){
filename <- file.path("plot", i)
cat("![text](",filename,")")
}
```