Michael Kuhn mkuhn

## guides.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mkuhn
                / guides.md
            
            
              Created
              June 30, 2021 08:24
            
              
                Hollow circles for size legend
              
          
    library(tidyverse)

set.seed(23)

d <- tibble(x = rnorm(10), y = rnorm(10), a = rep(c("x", "y"),5), b = rep(1:5, 2))

d %>% 
  ggplot(aes(x, y, color = a, size = b)) + geom_point() +
 guides(colour = guide_legend(order = 1), 

  
## pdz
Scribble:

B7Z2Y1_HUMAN
ARHG7_HUMAN
ARHG8_HUMAN
KCNA5_HUMAN
VWCE_HUMAN
DNML1_HUMAN
PKP4_HUMAN
NXPE2_HUMAN

## inplace_comparison.R
library(Rcpp)

`%count<%` <- cppFunction('
size_t count_less(NumericVector x, NumericVector y) {

  const size_t nx = x.size();
  const size_t ny = y.size();

  if (nx > 1 & ny > 1) stop("Only one parameter can be a vector!");

## gist:b1b6ad8d37c63b1e929d
> library(purrr)
> ll <- list(list(a=1:3, b=4), list(a=5:7, b=8))
> ll %>% map(lift_dl(c)) %>% map_call(rbind)
     a1 a2 a3 b
[1,]  1  2  3 4
[2,]  5  6  7 8
> Reduce(rbind, ll)
     a         b
init Integer,3 4
     Integer,3 8

## initial_param_vs_runtime.R
library(purrr)

runtimes <- get_elapsed_time(fit)[,2]

inits <- get_inits(fit)

## traditional conversion to a matrix
# m.init <- do.call(rbind, lapply(inits, function(l) do.call(c, l)))

## using purrr

## fitting_with_uncertainty.Rmd
---
title: "Fitting with uncertainty"
author: "Michael Kuhn"
date: "24 Aug 2015"
output: html_document
---

In this toy example, we assume that we've independently measured values $x$ and $y$ and want to find a linear relationship between them, accounting for measurement uncertainty. Each $x$ and $y$ value is assigned a different uncertainty, and the challenge is to take this information into account. A standard linear model will treat all points equally.

When we treat each measurement as a multivariate normal distribution, we can find a point along the proposed fitted line that is maximizing the probability density function (i.e. that has a maximum likelihood). Thus, given a slope and intercept, we virtually move all measurements to their most likely point along the line, and use the likelihood at this point.

## code.R
library(dplyr)

a <- data.frame(foo = 1:10, bar = "bar")
b <- tbl_df(a)

a[,1]
b[,1]

paste0(a[,1], "!")
paste0(b[,1], "!")

## gist:9ad4da2e039e5c6741e2
library(ggplot2)
library(gtable)

# create example data
set.seed(42)
dataset_names <- c("Human", "Mouse", "Fly", "Worm")
datasets <- data.frame(name = factor(dataset_names, levels=dataset_names), parity = factor(c(0, 0, 1, 0)), v50 = runif(4, max=0.5), y=1:4)
data <- data.frame( dataset1 = rep(datasets$name, 4), dataset2 = rep(datasets$name, each = 4), z = runif(16,min = 0, max = 0.5) )

pal <- c("#dddddd", "#aaaaaa")

## InChi.js
// International Chemical Identifier Regex, by lo sauer - lsauer.com
// Morphine InchI:
var x="InChI=1S/C17H19NO3/c1-18-7-6-17-10-3-5-13(20)16(17)21-15-12(19)4-2-9(14(15)17)8-11(10)18/h2-5,10-11,13,16,19-20H,6-8H2,1H3/t10-,11+,13-,16-,17-/m0/s1"

// applying an organic character-subset
// we could check for the length property, but in case of 0 matches 'null' is returned -> hence !!.. \ generally equal to Boolean(..)
!!x.trim().match(/^((InChI=)?[^J][0-9BCOHNSOPrIFla+\-\(\)\\\/,pqbtmsih]{6,})$/ig)
>true
//generic:
x.trim().match(/^((InChI=)?[^J][0-9a-z+\-\(\)\\\/,]+)$/ig)

## plot with densities.R
library(ggplot2)
library(gridExtra)

mtcars$cyl <- ordered(mtcars$cyl)
p <- ggplot(mtcars, aes(mpg, hp, colour = cyl)) + geom_point()

p1 <- p + theme(legend.position = "none")

p2 <- ggplot(mtcars, aes(x=mpg, group=cyl, colour=cyl))
p2 <- p2 + stat_density(fill = NA, position="dodge")
	Scribble:

	B7Z2Y1_HUMAN
	ARHG7_HUMAN
	ARHG8_HUMAN
	KCNA5_HUMAN
	VWCE_HUMAN
	DNML1_HUMAN
	PKP4_HUMAN
	NXPE2_HUMAN
	library(Rcpp)

	`%count<%` <- cppFunction('
	size_t count_less(NumericVector x, NumericVector y) {

	const size_t nx = x.size();
	const size_t ny = y.size();

	if (nx > 1 & ny > 1) stop("Only one parameter can be a vector!");
	> library(purrr)
	> ll <- list(list(a=1:3, b=4), list(a=5:7, b=8))
	> ll %>% map(lift_dl(c)) %>% map_call(rbind)
	a1 a2 a3 b
	[1,] 1 2 3 4
	[2,] 5 6 7 8
	> Reduce(rbind, ll)
	a b
	init Integer,3 4
	Integer,3 8
	library(purrr)

	runtimes <- get_elapsed_time(fit)[,2]

	inits <- get_inits(fit)

	## traditional conversion to a matrix
	# m.init <- do.call(rbind, lapply(inits, function(l) do.call(c, l)))

	## using purrr
	---
	title: "Fitting with uncertainty"
	author: "Michael Kuhn"
	date: "24 Aug 2015"
	output: html_document
	---

	In this toy example, we assume that we've independently measured values $x$ and $y$ and want to find a linear relationship between them, accounting for measurement uncertainty. Each $x$ and $y$ value is assigned a different uncertainty, and the challenge is to take this information into account. A standard linear model will treat all points equally.

	When we treat each measurement as a multivariate normal distribution, we can find a point along the proposed fitted line that is maximizing the probability density function (i.e. that has a maximum likelihood). Thus, given a slope and intercept, we virtually move all measurements to their most likely point along the line, and use the likelihood at this point.
	library(dplyr)

	a <- data.frame(foo = 1:10, bar = "bar")
	b <- tbl_df(a)

	a[,1]
	b[,1]

	paste0(a[,1], "!")
	paste0(b[,1], "!")
	library(ggplot2)
	library(gtable)

	# create example data
	set.seed(42)
	dataset_names <- c("Human", "Mouse", "Fly", "Worm")
	datasets <- data.frame(name = factor(dataset_names, levels=dataset_names), parity = factor(c(0, 0, 1, 0)), v50 = runif(4, max=0.5), y=1:4)
	data <- data.frame( dataset1 = rep(datasets$name, 4), dataset2 = rep(datasets$name, each = 4), z = runif(16,min = 0, max = 0.5) )

	pal <- c("#dddddd", "#aaaaaa")
	// International Chemical Identifier Regex, by lo sauer - lsauer.com
	// Morphine InchI:
	var x="InChI=1S/C17H19NO3/c1-18-7-6-17-10-3-5-13(20)16(17)21-15-12(19)4-2-9(14(15)17)8-11(10)18/h2-5,10-11,13,16,19-20H,6-8H2,1H3/t10-,11+,13-,16-,17-/m0/s1"

	// applying an organic character-subset
	// we could check for the length property, but in case of 0 matches 'null' is returned -> hence !!.. \ generally equal to Boolean(..)
	!!x.trim().match(/^((InChI=)?[^J][0-9BCOHNSOPrIFla+\-\(\)\\\/,pqbtmsih]{6,})$/ig)
	>true
	//generic:
	x.trim().match(/^((InChI=)?[^J][0-9a-z+\-\(\)\\\/,]+)$/ig)
	library(ggplot2)
	library(gridExtra)

	mtcars$cyl <- ordered(mtcars$cyl)
	p <- ggplot(mtcars, aes(mpg, hp, colour = cyl)) + geom_point()

	p1 <- p + theme(legend.position = "none")

	p2 <- ggplot(mtcars, aes(x=mpg, group=cyl, colour=cyl))
	p2 <- p2 + stat_density(fill = NA, position="dodge")