Nicolas Coutin npjc

## README.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                npjc
                / README.md
            
            
              Created
              May 29, 2014 23:24
            
              
                eg_gist2
              
          
              We couldn’t find that file to show.
              
            
## gist:2942000b736827557781

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                npjc
                / gist:2942000b736827557781
            
            
              Created
              August 31, 2014 06:18
            
          
  title
  author
  date
  output
  
  
  2 - normalizing counts
  Nicolas Coutin
  August 29, 2014
  html_document
  
  
Generate windowed reference files.

Run bash scripts


## gist:89094d38d2b58c96dd6d
with combined 3-colbed + 4th col. id
```r
library(data.table)

y = data.table(chr=c("Chr1", "Chr1", "Chr1"), start=c(1, 15,1),
               end=c(4, 18, 55), id=letters[1:3])
setkey(y, chr, start, end)

dt <- foverlaps(y, y, type="any")
```

## one_two_threes
library(httr)
library(dplyr)
url <- "https://www.github.com"

# all possible alpha numeric names
allowed_n_letters <- 1:3 # just change this to check others...
possible_paths <- lapply(allowed_n_letters,function(x) {
  combn(letters, x) %>% apply(2, paste0, collapse="")
}) %>% unlist()

## script.R

      
              3 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                npjc
                / script.R
            
            
              Last active
              August 29, 2015 14:22
            
              
                split a file into chunks based on a column and write each chunk to a file.
              
          
    synopsis

I have a large file (too big to read in memory) and I want to generate subsets of the file. The subsets are defined by a column in the file. In other words, I split one file into n files where n = unique values in file column.
use it

library(data.table)
path <- "small.tsv"
split_file(path , col_idx = 4, chunk_size = 1e7, skip = 0)

  
## gist:1673ccdc124fecb2649a
# pace to secs...minute:seconds character string to seconds integer
pts <- function(ms = "4:45") {
  tmp <- as.integer(unlist(strsplit(ms, ":", fixed = TRUE)))
  tmp[1]*60 + tmp[2]
}

# seconds to total time in hour:minute:seconds
ttl_time <- function(s) {
  hrs <- floor(s / 3600)
  leftovers <- s %% 3600

## cachenev.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                npjc
                / cachenev.md
            
            
              Created
              August 17, 2015 21:37
            
          
use a cache environment in aaa_cache.R:

bg_ws for biogrid webservice.
use a function such as bg_ws_build() which assigns useful variables

url
uris (service endpoints)
access key
parameters


in zzz.R:


## number2words.r
# John Fox's numbers2words found here:
# http://tolstoy.newcastle.edu.au/R/help/05/04/2715.html
numbers2words <- function(x){
  # helper
  helper <- function(x){

    digits <- rev(strsplit(as.character(x), "")[[1]])
    nDigits <- length(digits)
    if (nDigits == 1) as.vector(ones[digits])
    else if (nDigits == 2)

## pkg-at-a-glance.r
#' the name of the package we want to check
pkg <- "tidyr"
library(pkg, character.only = TRUE)
#' do the work
pacman::p_load(readr, dplyr, tidyr, ggplot2)
`%||%` <- function(a, b) if (!is.null(a)) a else b
ui <- getNamespaceExports(pkg)
only_funs <- unlist(lapply(ui, function(x) is.function(get(x))))
ui <- ui[only_funs]
l <- lapply(ui, function(f) {

## partition-view.r
pacman::p_load(readr, dplyr, tidyr, ggplot2)
library(partitions)
n <- 5
vals <- as.vector(parts(n))
len <- length(vals) / n
part <- rep(1:len, each = n)

d <- data_frame(vals, part)
d <- d %>%
  group_by(part) %>%
	with combined 3-colbed + 4th col. id
	```r
	library(data.table)

	y = data.table(chr=c("Chr1", "Chr1", "Chr1"), start=c(1, 15,1),
	end=c(4, 18, 55), id=letters[1:3])
	setkey(y, chr, start, end)

	dt <- foverlaps(y, y, type="any")
	```
	library(httr)
	library(dplyr)
	url <- "https://www.github.com"

	# all possible alpha numeric names
	allowed_n_letters <- 1:3 # just change this to check others...
	possible_paths <- lapply(allowed_n_letters,function(x) {
	combn(letters, x) %>% apply(2, paste0, collapse="")
	}) %>% unlist()
	# pace to secs...minute:seconds character string to seconds integer
	pts <- function(ms = "4:45") {
	tmp <- as.integer(unlist(strsplit(ms, ":", fixed = TRUE)))
	tmp[1]*60 + tmp[2]
	}

	# seconds to total time in hour:minute:seconds
	ttl_time <- function(s) {
	hrs <- floor(s / 3600)
	leftovers <- s %% 3600
	# John Fox's numbers2words found here:
	# http://tolstoy.newcastle.edu.au/R/help/05/04/2715.html
	numbers2words <- function(x){
	# helper
	helper <- function(x){

	digits <- rev(strsplit(as.character(x), "")[[1]])
	nDigits <- length(digits)
	if (nDigits == 1) as.vector(ones[digits])
	else if (nDigits == 2)
	#' the name of the package we want to check
	pkg <- "tidyr"
	library(pkg, character.only = TRUE)
	#' do the work
	pacman::p_load(readr, dplyr, tidyr, ggplot2)
	`%\|\|%` <- function(a, b) if (!is.null(a)) a else b
	ui <- getNamespaceExports(pkg)
	only_funs <- unlist(lapply(ui, function(x) is.function(get(x))))
	ui <- ui[only_funs]
	l <- lapply(ui, function(f) {
	pacman::p_load(readr, dplyr, tidyr, ggplot2)
	library(partitions)
	n <- 5
	vals <- as.vector(parts(n))
	len <- length(vals) / n
	part <- rep(1:len, each = n)

	d <- data_frame(vals, part)
	d <- d %>%
	group_by(part) %>%