Devin Pastoor dpastoor

## packages
packages <- c(
  "bear",
  "data.table",
  "deSolve",
  "devtools",
  "dplyr",
  "formatR",
  "ggplot2",
  "gridExtra",
  "installr",

## package.R
#' Simplified loading and installing of packages
#'
#' This is a wrapper to \code{\link{require}} and \code{\link{install.packages}}.
#' Specifically, this will first try to load the package(s) and if not found
#' it will install then load the packages. Additionally, if the
#' \code{update=TRUE} parameter is specified it will check the currently
#' installed package version with what is available on CRAN (or mirror) and
#' install the newer version.
#'
#' @param pkgs a character vector with the names of the packages to load.

## ggkm.R
#’ Create a Kaplan-Meier plot using ggplot2
#’
#’ @param sfit a \code{\link[survival]{survfit}} object
#’ @param returns logical: if \code{TRUE}, return an ggplot object
#’ @param xlabs x-axis label
#’ @param ylabs y-axis label
#’ @param ystratalabs The strata labels. \code{Default = levels(summary(sfit)$strata)}
#’ @param ystrataname The legend name. Default = “Strata”
#’ @param timeby numeric: control the granularity along the time-axis
#’ @param main plot title

## server.r
library("shiny")
library("plotly")
library("ggplot2")

shinyServer(function(input, output) {
  output$text <- renderText({
    ggiris <- qplot(Petal.Width, Sepal.Length, data=iris, color=Species)
    py <- plotly("RgraphingAPI", "ektgzomjbx")
    res <- py$ggplotly(ggiris)
    iframe <- paste("<iframe height=\"600\" id=\"igraph\" scrolling=\"no\" seamless=\"seamless\" src=\"",

## gist:6715007b0d4c1e88ce59
{
 "metadata": {
  "name": "",
  "signature": "sha256:9f8cc010920e3ba427bf8782337a847d8f5129fe6b3a868aad7eeaa6fb3187c2"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [

## gist:73b1fbeebc0baf8fde80
#' Get URLs to blog post full text for all posts
#' by scraping them out of each page of the
#' main blog aggregator
# from: https://github.com/benmarwick/dayofarchaeology/blob/master/001_scrape_for_links_to_fulltext.r


library(RCurl)
library(XML)

n <- 100 # determined by inspecting the first page

## gist:29b76bdde130a8a5b8ba
save.xlsx <- function (file, ...)
  {
      require(xlsx, quietly = TRUE)
      objects <- list(...)
      fargs <- as.list(match.call(expand.dots = TRUE))
      objnames <- as.character(fargs)[-c(1, 2)]
      nobjects <- length(objects)
      for (i in 1:nobjects) {
          if (i == 1)
              write.xlsx(objects[[i]], file, sheetName = objnames[i])

## qsub.py
import os
import random
import string
import tempfile
import subprocess

def random_id(length=8):
    return ''.join(random.sample(string.ascii_letters + string.digits, length))

TEMPLATE_SERIAL = """

## check
library(ggplot2)
library(stringr)
### functions -------------

cov_convergence <- function(wd) {
  setwd(wd)
  dir_list <- list.dirs(recursive=FALSE)

  results <- lapply(str_replace_all(dir_list, pattern="./", ""), function(x) {
    num <- str_replace_all(x, pattern="sse_", "")

## group_effect.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                dpastoor
                / group_effect.md
            
            
              Last active
              August 29, 2015 14:10
                — forked from arunsrinivasan/group_effect.md
            
              
                an update on keyed performance for dt and dplyr benchmarks
              
          
    Update: The timings are now updated with runs from R v3.1.0.

A small note on this tweet from @KevinUshey and this tweet from @ChengHLee:
The number of rows, while is important, is only one of the factors that influence the time taken to perform the join. From my benchmarking experience, the two features that I found to influence join speed, especially on hash table based approaches (ex: dplyr), much more are:

The number of unique groups.
The number of columns to perform the join based on - note that this is also related to the previous point as in most cases, more the columns, more the number of unique groups.

That is, these features influence join speed in spite of having the same number of rows.
	packages <- c(
	"bear",
	"data.table",
	"deSolve",
	"devtools",
	"dplyr",
	"formatR",
	"ggplot2",
	"gridExtra",
	"installr",
	#' Simplified loading and installing of packages
	#'
	#' This is a wrapper to \code{\link{require}} and \code{\link{install.packages}}.
	#' Specifically, this will first try to load the package(s) and if not found
	#' it will install then load the packages. Additionally, if the
	#' \code{update=TRUE} parameter is specified it will check the currently
	#' installed package version with what is available on CRAN (or mirror) and
	#' install the newer version.
	#'
	#' @param pkgs a character vector with the names of the packages to load.
	#’ Create a Kaplan-Meier plot using ggplot2
	#’
	#’ @param sfit a \code{\link[survival]{survfit}} object
	#’ @param returns logical: if \code{TRUE}, return an ggplot object
	#’ @param xlabs x-axis label
	#’ @param ylabs y-axis label
	#’ @param ystratalabs The strata labels. \code{Default = levels(summary(sfit)$strata)}
	#’ @param ystrataname The legend name. Default = “Strata”
	#’ @param timeby numeric: control the granularity along the time-axis
	#’ @param main plot title
	library("shiny")
	library("plotly")
	library("ggplot2")

	shinyServer(function(input, output) {
	output$text <- renderText({
	ggiris <- qplot(Petal.Width, Sepal.Length, data=iris, color=Species)
	py <- plotly("RgraphingAPI", "ektgzomjbx")
	res <- py$ggplotly(ggiris)
	iframe <- paste("<iframe height=\"600\" id=\"igraph\" scrolling=\"no\" seamless=\"seamless\" src=\"",
	{
	"metadata": {
	"name": "",
	"signature": "sha256:9f8cc010920e3ba427bf8782337a847d8f5129fe6b3a868aad7eeaa6fb3187c2"
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	#' Get URLs to blog post full text for all posts
	#' by scraping them out of each page of the
	#' main blog aggregator
	# from: https://github.com/benmarwick/dayofarchaeology/blob/master/001_scrape_for_links_to_fulltext.r


	library(RCurl)
	library(XML)

	n <- 100 # determined by inspecting the first page
	save.xlsx <- function (file, ...)
	{
	require(xlsx, quietly = TRUE)
	objects <- list(...)
	fargs <- as.list(match.call(expand.dots = TRUE))
	objnames <- as.character(fargs)[-c(1, 2)]
	nobjects <- length(objects)
	for (i in 1:nobjects) {
	if (i == 1)
	write.xlsx(objects[[i]], file, sheetName = objnames[i])
	import os
	import random
	import string
	import tempfile
	import subprocess

	def random_id(length=8):
	return ''.join(random.sample(string.ascii_letters + string.digits, length))

	TEMPLATE_SERIAL = """
	library(ggplot2)
	library(stringr)
	### functions -------------

	cov_convergence <- function(wd) {
	setwd(wd)
	dir_list <- list.dirs(recursive=FALSE)

	results <- lapply(str_replace_all(dir_list, pattern="./", ""), function(x) {
	num <- str_replace_all(x, pattern="sse_", "")