Nick Harding hardingnj

## Dockerfile
FROM ubuntu

RUN dpkg-divert --local --rename --add /sbin/initctl
RUN ln -s /bin/true /sbin/initctl

RUN echo "deb http://archive.ubuntu.com/ubuntu precise main universe" > /etc/apt/sources.list
RUN apt-get update
RUN apt-get upgrade -y

RUN apt-get -y install mysql-client mysql-server

## mysql2sqlite.sh
#!/bin/sh

# Converts a mysqldump file into a Sqlite 3 compatible file. It also extracts the MySQL `KEY xxxxx` from the
# CREATE block and create them in separate commands _after_ all the INSERTs.

# Awk is choosen because it's fast and portable. You can use gawk, original awk or even the lightning fast mawk.
# The mysqldump file is traversed only once.

# Usage: $ ./mysql2sqlite mysqldump-opts db-name | sqlite3 database.sqlite
# Example: $ ./mysql2sqlite --no-data -u root -pMySecretPassWord myDbase | sqlite3 database.sqlite

## apply.function.colwise
apply.function.colwise <- function(FUNC, x, x.columns = rownames(x), y.columns = colnames(x), ignore.diag = identical(x.columns, y.columns), ...) {

    checks <- c(is.numeric, is.character);

    # passing in a factor results in unexpected results, due to implicit numeric recasting
    stopifnot(
        any(sapply(checks, function(FUN) FUN(y.columns))),
        any(sapply(checks, function(FUN) FUN(x.columns)))
        );

## maskfasta
#!/bin/bash
BAM=this.bam
FASTA=that.fa
OUT=theother.fa
bedtools genomecov -ibam $BAM -bga | awk '$4>0' | bedtools maskfasta -fi $FASTA -bed - -fo $OUT;

## speedtest.R
method.list <- list(
    slow = function(iter) {
        var <- NULL;
        for (i in 1:iter) {
            var <- c(
                var,
                sqrt(i)
                );
            }
        var

## hdf5_compression_test.py
#! /usr/bin/python
#
# This example creates and writes GZIP compressed dataset.
#
import h5py
import numpy as np
#
# Create files
file_gzip = h5py.File('gzip.h5','w')
file_lzf  = h5py.File('lzf.h5' ,'w')

## hdf5_compression_test.R
#! /usr/bin/R
library(rhdf5)

# Suceeds:
dat_gzip <- tryCatch(
  { dat_gzip <- h5read('gzip.h5', "/"); print(summary(dat_gzip)); },
  error = function(e) { stop(e) }
  )

# Fails:

## rhdf5_example.py
#! /usr/bin/python
#
# This example creates and writes GZIP compressed dataset.
#
import h5py
import numpy as np
import random
import string
#
nrow = 1000000;

## rhdf5_test.R
#! /usr/bin/R
library(rhdf5)

print(h5ls('gzip.h5'));

test_limits <- seq(1e3, 1e6, 1e3)

for(limit in test_limits) {
  print(limit)
  dat_gzip <- h5read('gzip.h5', "DS1/", index = list(1:20,1:limit));

## shapeit_bug_report
#! /bin/bash

curl -O https://mathgen.stats.ox.ac.uk/genetics_software/shapeit/files/example.tar.gz
tar -xvzf example.tar.gz
FILE=example/GLs.vcf

gunzip ${FILE}.gz
shapeit --input-vcf $FILE --output-max unzipped.haps unzipped.sample

gzip -c ${FILE} > ${FILE}.gzip.vcf.gz
	FROM ubuntu

	RUN dpkg-divert --local --rename --add /sbin/initctl
	RUN ln -s /bin/true /sbin/initctl

	RUN echo "deb http://archive.ubuntu.com/ubuntu precise main universe" > /etc/apt/sources.list
	RUN apt-get update
	RUN apt-get upgrade -y

	RUN apt-get -y install mysql-client mysql-server
	#!/bin/sh

	# Converts a mysqldump file into a Sqlite 3 compatible file. It also extracts the MySQL `KEY xxxxx` from the
	# CREATE block and create them in separate commands _after_ all the INSERTs.

	# Awk is choosen because it's fast and portable. You can use gawk, original awk or even the lightning fast mawk.
	# The mysqldump file is traversed only once.

	# Usage: $ ./mysql2sqlite mysqldump-opts db-name \| sqlite3 database.sqlite
	# Example: $ ./mysql2sqlite --no-data -u root -pMySecretPassWord myDbase \| sqlite3 database.sqlite
	apply.function.colwise <- function(FUNC, x, x.columns = rownames(x), y.columns = colnames(x), ignore.diag = identical(x.columns, y.columns), ...) {

	checks <- c(is.numeric, is.character);

	# passing in a factor results in unexpected results, due to implicit numeric recasting
	stopifnot(
	any(sapply(checks, function(FUN) FUN(y.columns))),
	any(sapply(checks, function(FUN) FUN(x.columns)))
	);
	#!/bin/bash
	BAM=this.bam
	FASTA=that.fa
	OUT=theother.fa
	bedtools genomecov -ibam $BAM -bga \| awk '$4>0' \| bedtools maskfasta -fi $FASTA -bed - -fo $OUT;
	method.list <- list(
	slow = function(iter) {
	var <- NULL;
	for (i in 1:iter) {
	var <- c(
	var,
	sqrt(i)
	);
	}
	var
	#! /usr/bin/python
	#
	# This example creates and writes GZIP compressed dataset.
	#
	import h5py
	import numpy as np
	#
	# Create files
	file_gzip = h5py.File('gzip.h5','w')
	file_lzf = h5py.File('lzf.h5' ,'w')
	#! /usr/bin/R
	library(rhdf5)

	# Suceeds:
	dat_gzip <- tryCatch(
	{ dat_gzip <- h5read('gzip.h5', "/"); print(summary(dat_gzip)); },
	error = function(e) { stop(e) }
	)

	# Fails:
	#! /usr/bin/R
	library(rhdf5)

	print(h5ls('gzip.h5'));

	test_limits <- seq(1e3, 1e6, 1e3)

	for(limit in test_limits) {
	print(limit)
	dat_gzip <- h5read('gzip.h5', "DS1/", index = list(1:20,1:limit));
	#! /bin/bash

	curl -O https://mathgen.stats.ox.ac.uk/genetics_software/shapeit/files/example.tar.gz
	tar -xvzf example.tar.gz
	FILE=example/GLs.vcf

	gunzip ${FILE}.gz
	shapeit --input-vcf $FILE --output-max unzipped.haps unzipped.sample

	gzip -c ${FILE} > ${FILE}.gzip.vcf.gz