Mike Lin mlin

## node_cluster_child_process.js
/*

$ uname -a
Linux coderuler 3.2.0-24-generic #39-Ubuntu SMP Mon May 21 16:52:17 UTC 2012 x86_64 x86_64 x86_64 GNU/Linux
$ node -v
v0.6.18
$ echo "console.log('foobar'); process.exit(0);" > foo.js
$ node node_cluster_child_process.js
child stderr >

## netclient_https_threads.ml
(*
ocamlfind ocamlopt -o netclient_https_threads -thread -linkpkg -package threads,netclient,ssl,equeue-ssl netclient_https_threads.ml

http://docs.camlcity.org/docs/godipkg/3.12/godi-ocamlnet/doc/godi-ocamlnet/html/Https_client.html
*)

open Printf
module HTTP = Http_client
module HTTPS = Https_client
;;

## gist:5e82ef4fa7031258492f
* index (vg_index:main) (failed) job-Bk01J1Q0QpJ6F3BZv5ZyjBBq
  mlin 2015-11-08 19:44:06 (runtime 12:57:05)
2015-11-09 11:33:40 index ALERT This job has been restarted due to UnresponsiveWorker from running this job
2015-11-09 11:33:49 index INFO Logging initialized (priority)
2015-11-09 11:33:50 index INFO Downloading bundled file gcc4.9_runtime_debs.tar
2015-11-09 11:33:51 index STDOUT >>> Unpacking gcc4.9_runtime_debs.tar to /
2015-11-09 11:33:51 index INFO Downloading bundled file vg_bundle.tar.gz
2015-11-09 11:33:52 index STDOUT >>> Unpacking vg_bundle.tar.gz to /
2015-11-09 11:33:53 index INFO Installing apt packages libjansson4, dx-toolkit
2015-11-09 11:33:57 index INFO Setting SSH public key

## commonNames_assemblies.semijson
["Human","hg19"],
["Chimp","panTro4"],
["Gorilla","gorGor3"],
["Orangutan","ponAbe2"],
["Gibbon","nomLeu3"],
["Rhesus","rheMac3"],
["Crab_eating_macaque","macFas5"],
["Baboon","papHam1"],
["Green_monkey","chlSab1"],
["Marmoset","calJac3"],

## dx-yml-build
#!/usr/bin/env python
#
# dx-yml-build: transcodes dxapp.yml to dxapp.json and then runs `dx build`
# with command-line arguments passed through.
#
# Requires PyYAML (apt-get install python-yaml OR pip install pyyaml)

import os, sys
import yaml, json

## keybase.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mlin
                / keybase.md
            
            
              Last active
              May 22, 2018 21:34
            
              
                keybase.md
              
          
    Keybase proof

I hereby claim:

I am mlin on github.
I am mlin (https://keybase.io/mlin) on keybase.
I have a public key ASCO-NadYMiwqGxb9_4cD-VFjMbVqrk7ors-n9seZl_A5wo

To claim this, I am signing this object:

  
## dxWDL_ci_init.py
#!/usr/bin/env python
#
# Initializes a git repository with some suggested best practices for DNAnexus
# WDL workflow development & continuous integration. Run
#    dxWDL_ci_init.py hello_world
# to create a subdirectory with that name, initialized as a local git repo,
# with the following which you can then customize:
#
# hello_world.wdl
#   Trivial WDL workflow template.

## Dockerfile
# Dockerfile for PhyloCSF using OpenBLAS to take advantage of vector
# instructions (AVX). On supported processors, this can significantly speed up
# PhyloCSF (25-50%) compared to the default GSL BLAS.
# Example usage:
#   docker build -t mlin:PhyloCSF https://gist.githubusercontent.com/mlin/5ea4ca5d2a2a198e5659/raw/Dockerfile
#   docker run -v /path/to/host/data:/data mlin:PhyloCSF 29mammals /data/input.fa
# PhyloCSF homepage: https://github.com/mlin/PhyloCSF/wiki
FROM ubuntu:trusty
MAINTAINER Mike Lin <mlin@mlin.net>
RUN apt-get update

## swarmsub.py
#!/usr/bin/env python3

import sys
import time
import docker
import multiprocessing
from argparse import ArgumentParser, REMAINDER

def swarmsub(image, command=None, cpu=1, mounts=None):
    client = docker.from_env()

## split_vcf_for_spark.wdl
version 1.0

task split_vcf_for_spark {
    # Quickly split a large .vcf.gz file into a specified number of compressed partitions.
    #
    # Motivation: calling SparkContext.textFile on a single large vcf.gz can be painfully slow,
    # because it's decompressed and parsed in ~1 thread. Use this to first split it up (with a
    # faster multithreaded pipeline); then tell Spark to parallel load the data using textFile on a
    # glob pattern.
    #
	/*

	$ uname -a
	Linux coderuler 3.2.0-24-generic #39-Ubuntu SMP Mon May 21 16:52:17 UTC 2012 x86_64 x86_64 x86_64 GNU/Linux
	$ node -v
	v0.6.18
	$ echo "console.log('foobar'); process.exit(0);" > foo.js
	$ node node_cluster_child_process.js
	child stderr >
	(*
	ocamlfind ocamlopt -o netclient_https_threads -thread -linkpkg -package threads,netclient,ssl,equeue-ssl netclient_https_threads.ml

	http://docs.camlcity.org/docs/godipkg/3.12/godi-ocamlnet/doc/godi-ocamlnet/html/Https_client.html
	*)

	open Printf
	module HTTP = Http_client
	module HTTPS = Https_client
	;;
	* index (vg_index:main) (failed) job-Bk01J1Q0QpJ6F3BZv5ZyjBBq
	mlin 2015-11-08 19:44:06 (runtime 12:57:05)
	2015-11-09 11:33:40 index ALERT This job has been restarted due to UnresponsiveWorker from running this job
	2015-11-09 11:33:49 index INFO Logging initialized (priority)
	2015-11-09 11:33:50 index INFO Downloading bundled file gcc4.9_runtime_debs.tar
	2015-11-09 11:33:51 index STDOUT >>> Unpacking gcc4.9_runtime_debs.tar to /
	2015-11-09 11:33:51 index INFO Downloading bundled file vg_bundle.tar.gz
	2015-11-09 11:33:52 index STDOUT >>> Unpacking vg_bundle.tar.gz to /
	2015-11-09 11:33:53 index INFO Installing apt packages libjansson4, dx-toolkit
	2015-11-09 11:33:57 index INFO Setting SSH public key
	["Human","hg19"],
	["Chimp","panTro4"],
	["Gorilla","gorGor3"],
	["Orangutan","ponAbe2"],
	["Gibbon","nomLeu3"],
	["Rhesus","rheMac3"],
	["Crab_eating_macaque","macFas5"],
	["Baboon","papHam1"],
	["Green_monkey","chlSab1"],
	["Marmoset","calJac3"],
	#!/usr/bin/env python
	#
	# dx-yml-build: transcodes dxapp.yml to dxapp.json and then runs `dx build`
	# with command-line arguments passed through.
	#
	# Requires PyYAML (apt-get install python-yaml OR pip install pyyaml)

	import os, sys
	import yaml, json
	#!/usr/bin/env python
	#
	# Initializes a git repository with some suggested best practices for DNAnexus
	# WDL workflow development & continuous integration. Run
	# dxWDL_ci_init.py hello_world
	# to create a subdirectory with that name, initialized as a local git repo,
	# with the following which you can then customize:
	#
	# hello_world.wdl
	# Trivial WDL workflow template.
	# Dockerfile for PhyloCSF using OpenBLAS to take advantage of vector
	# instructions (AVX). On supported processors, this can significantly speed up
	# PhyloCSF (25-50%) compared to the default GSL BLAS.
	# Example usage:
	# docker build -t mlin:PhyloCSF https://gist.githubusercontent.com/mlin/5ea4ca5d2a2a198e5659/raw/Dockerfile
	# docker run -v /path/to/host/data:/data mlin:PhyloCSF 29mammals /data/input.fa
	# PhyloCSF homepage: https://github.com/mlin/PhyloCSF/wiki
	FROM ubuntu:trusty
	MAINTAINER Mike Lin <mlin@mlin.net>
	RUN apt-get update
	#!/usr/bin/env python3

	import sys
	import time
	import docker
	import multiprocessing
	from argparse import ArgumentParser, REMAINDER

	def swarmsub(image, command=None, cpu=1, mounts=None):
	client = docker.from_env()
	version 1.0

	task split_vcf_for_spark {
	# Quickly split a large .vcf.gz file into a specified number of compressed partitions.
	#
	# Motivation: calling SparkContext.textFile on a single large vcf.gz can be painfully slow,
	# because it's decompressed and parsed in ~1 thread. Use this to first split it up (with a
	# faster multithreaded pipeline); then tell Spark to parallel load the data using textFile on a
	# glob pattern.
	#