Andrea Grioni andreagrioni

## read_h5.py
import h5py
import pandas as pd
# set path to h5 file
filepath = "h5_path"

# read HDF5 file
f = h5py.File(filepath, 'r')
# get each dataset and convert to pandas DataFrame
dset_x_train = pd.DataFrame(f['x_train'])
dset_y_train = pd.DataFrame(f['y_train'])

## write_h5.py
import h5py

# create HDF5 file
with h5py.File(PROCESSPATH.joinpath("final_df.h5"), 'w') as hf:
    dset_x_train = hf.create_dataset(
'x_train', data=X_train, shape=X_train.shape, compression='gzip', chunks=True)
    dset_y_train = hf.create_dataset(
'y_train', data=y_train, shape=y_train.shape, compression='gzip', chunks=True)
    dset_x_test = hf.create_dataset(
'x_test', data=X_test, shape=X_test.shape, compression='gzip', chunks=True)

## clean_adat.sh
# remove starting lines till pattern
# this can be used to clean the adat file from headers and make it
# compatible as input to SODA
sed -e '1,/TABLE_BEGIN/d' file_input.adat > file_output.adat

## utils_python
# Comprehension
actions = [1,2,3]
accepted = [1,2]
eval_args = { (action) : (True if action in accepted else False) for action in actions }

# time I
import timeit
def ciao():
  a = sum([ i for i in range(0,1000)] )
  return(a)

## python.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                andreagrioni
                / python.ipynb
            
            
              Created
              November 22, 2021 08:55
            
              
                Python.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## make_volcano.R
#' generate volcano plots with EnanchedVolcano
#' @table a tibble in the format limma::topTable
#' @coef the coeffincient to be filtered
#' @img_dir save image to directory
#' @fdr_threshold the max FDR allowed to color targets
#' @save bool (def. TRUE)
make_volcano <- function(table, coef, img_dir, fdr_threshold=0.1, save=TRUE) {
  # filter for coefficient
  table %>%
    dplyr::filter(coeff == coef) -> tmp

## pivot_topTable.R
# topTable summary from limma
table_iv %>%
  # select target coeffients
  dplyr::select(dplyr::all_of( c("Var1", targets_coef))) %>%
  # pivot coefficient to long format
  tidyr::pivot_longer(!Var1) %>%
  # separate coefficients into new cols
  tidyr::separate(name, into=c("ARMCD", "day")) %>%
  # pivot wider along days/timepoint
  tidyr::pivot_wider(names_from=day, values_from=value) %>%

## screeplot.R
# create screeplot with PCAtools package
## find optimum number of PCs to retain
elbow <- PCAtools::findElbowPoint(p$variance)
## set image path
image_filepath <- fs::path(data_dir, glue::glue("img/{bmd_tag}.screeplot.png"))
## open image file
png(image_filepath, width=4, height=4, units="in", res=300)
## create image
PCAtools::screeplot(
  p,

## pca_object.R
# Create PCA object for PCAtools package
# select columns of interest
# and pivot wider
measuraments_annotated %>%
  dplyr::select(
    dplyr::all_of(c("SeqId", "q_norm", "SampleId"))) %>%
  tidyr::pivot_wider(names_from=SeqId, values_from=q_norm) %>%
  dplyr::distinct() -> metrics_df
# retrieve measurement and transpose to matrix
metrics_df %>%

## render_multiple_figures.Rmd
```{r, results = 'asis'}
# add path to folder with figures
plots <- list.files(folder_path)
# create string for image
for(i in plots){
  filename <- file.path("plot", i)
  cat("![text](",filename,")")
}
```
	import h5py
	import pandas as pd
	# set path to h5 file
	filepath = "h5_path"

	# read HDF5 file
	f = h5py.File(filepath, 'r')
	# get each dataset and convert to pandas DataFrame
	dset_x_train = pd.DataFrame(f['x_train'])
	dset_y_train = pd.DataFrame(f['y_train'])
	import h5py

	# create HDF5 file
	with h5py.File(PROCESSPATH.joinpath("final_df.h5"), 'w') as hf:
	dset_x_train = hf.create_dataset(
	'x_train', data=X_train, shape=X_train.shape, compression='gzip', chunks=True)
	dset_y_train = hf.create_dataset(
	'y_train', data=y_train, shape=y_train.shape, compression='gzip', chunks=True)
	dset_x_test = hf.create_dataset(
	'x_test', data=X_test, shape=X_test.shape, compression='gzip', chunks=True)
	# remove starting lines till pattern
	# this can be used to clean the adat file from headers and make it
	# compatible as input to SODA
	sed -e '1,/TABLE_BEGIN/d' file_input.adat > file_output.adat
	# Comprehension
	actions = [1,2,3]
	accepted = [1,2]
	eval_args = { (action) : (True if action in accepted else False) for action in actions }

	# time I
	import timeit
	def ciao():
	a = sum([ i for i in range(0,1000)] )
	return(a)
	#' generate volcano plots with EnanchedVolcano
	#' @table a tibble in the format limma::topTable
	#' @coef the coeffincient to be filtered
	#' @img_dir save image to directory
	#' @fdr_threshold the max FDR allowed to color targets
	#' @save bool (def. TRUE)
	make_volcano <- function(table, coef, img_dir, fdr_threshold=0.1, save=TRUE) {
	# filter for coefficient
	table %>%
	dplyr::filter(coeff == coef) -> tmp
	# topTable summary from limma
	table_iv %>%
	# select target coeffients
	dplyr::select(dplyr::all_of( c("Var1", targets_coef))) %>%
	# pivot coefficient to long format
	tidyr::pivot_longer(!Var1) %>%
	# separate coefficients into new cols
	tidyr::separate(name, into=c("ARMCD", "day")) %>%
	# pivot wider along days/timepoint
	tidyr::pivot_wider(names_from=day, values_from=value) %>%
	# create screeplot with PCAtools package
	## find optimum number of PCs to retain
	elbow <- PCAtools::findElbowPoint(p$variance)
	## set image path
	image_filepath <- fs::path(data_dir, glue::glue("img/{bmd_tag}.screeplot.png"))
	## open image file
	png(image_filepath, width=4, height=4, units="in", res=300)
	## create image
	PCAtools::screeplot(
	p,
	# Create PCA object for PCAtools package
	# select columns of interest
	# and pivot wider
	measuraments_annotated %>%
	dplyr::select(
	dplyr::all_of(c("SeqId", "q_norm", "SampleId"))) %>%
	tidyr::pivot_wider(names_from=SeqId, values_from=q_norm) %>%
	dplyr::distinct() -> metrics_df
	# retrieve measurement and transpose to matrix
	metrics_df %>%
	```{r, results = 'asis'}
	# add path to folder with figures
	plots <- list.files(folder_path)
	# create string for image
	for(i in plots){
	filename <- file.path("plot", i)
	cat("![text](",filename,")")
	}
	```