Daniel E Cook danielecook

## lafitness.py
from pyquery import PyQuery as pq
from icalendar import Calendar, Event
import datetime
from datetime import date, timedelta
from dateutil.relativedelta import *
from dateutil.parser import *
from pprint import pprint as pp

clubid = 722
url = "https://www.lafitness.com/Pages/ClassSchedulePrintVersion.aspx?clubid=%s" % clubid

## R_helper_functions.R
# I am trying to make R a little easier by adding a few helper functions. Most of these mimic functionality seen in Stata.

# This function attempts to mimic the order command in Stata;
# Usage:
# df <- corder(df,<list of columns>)

# Order variables in a data frame.

corder <- function(df,...) {
  cols <-as.vector(eval(substitute((alist(...)))),mode="character")

## orthologs.sh
wget 'ftp://ftp.ncbi.nih.gov/pub/HomoloGene/current/homologene.data'
egrep "\t9606\t" homologene.data | sort | cut -f 1,3,4 > human.txt
egrep "\t6239\t" homologene.data | sort | cut -f 1,3,4 > celegans.txt
join -1 1 -2 1 -t $'\t' human.txt celegans.txt | cut -f 2,3,4,5 | sort | echo -e "Human_Entrez\tHuman_Symbol\tElegans_Entrez\tElegans_Symbol\n$(cat -)" > orthologs.txt
rm human.txt celegans.txt homologene.data

## Check_Fastqs.py
#!/usr/bin/python
import re
from itertools import groupby as g
import subprocess
import sys
from collections import OrderedDict

def most_common(L):
  return max(g(sorted(L)), key=lambda(x, v):(len(list(v)),-L.index(x)))[0]

## plot_runkeeper.R
# Special thanks for insights from flowingdata.com regarding this.

library(plotKML)
library(plyr)
library(dplyr)
library(fpc)

num_locations <- 5

# Usage: Place this script in the directory containing your runkeeper data. You can run from terminal using 'Rscript map_runkeeper.R', or

## SRX_SRA_download.sh
function SRX_fetch_fastq() {
    sra_set=`esearch -db sra -query $1 | efetch -format docsum | xtract -element Run@acc`
    echo "Downloading Run $1:"
    echo ${sra_set}
    echo "-------"
    for SRA in $sra_set; do
        echo "Downloading $SRA"
        fastq-dump $SRA
    done;
}

## worm_tracker.R
library(stringr)
library(dplyr)
"""
# Generate concatenated worm_track data using the following
for folder in `ls -d *\/`; do
    for file in `ls $folder/worm*`; do
        cat $file | awk -v file=$file '{print file","$1}' >> worm_track_all.txt
    done;
done;
"""

## LCR_region.sh
#!/bin/bash
wget 'http://hgdownload.soe.ucsc.edu/goldenPath/ce10/database/rmsk.txt.gz' -O LCR_rmsk.txt.gz
gunzip -kfc LCR_rmsk.txt.gz | grep 'Low_complexity' | cut -f 6,7,8 > LCR_ce10_rmsk.bed
rm LCR_rmsk.txt.gz


# Generate the set of regions complementary (e.g. NOT low complexity)
# Download c. elegans chromosome information
mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e "select chrom, size from ce10.chromInfo"  > ce10.genome
bedtools complement -i LCR_ce10_rmsk.bed  -g ce10.genome | sort -k 1,1 -k2,2n > LCR_complement_ce10.bed

## bcftools wrapper.py
import os, subprocess, uuid, re
import vcf.filters


class bcf(file):
    def __init__(self, file):
        # Start by storing basic information about the vcf/bcf
        self.file = file
        self.ops = []

## .bash_profile
echo "Sync Profile Loaded"

export PS1="\w 🍔  "
alias refresh="source ~/.bash_profile"
alias git log=“git log --graph --pretty=format:'%Cred%h%Creset -%C(yellow)%d%Creset %s %Cgreen(%cr) %C(bold blue)<%an>%Creset' --abbrev-commit”

export PATH=/usr/local/bin:$PATH

# Get working directory of frontmost finder window.
cdf() {
	from pyquery import PyQuery as pq
	from icalendar import Calendar, Event
	import datetime
	from datetime import date, timedelta
	from dateutil.relativedelta import *
	from dateutil.parser import *
	from pprint import pprint as pp

	clubid = 722
	url = "https://www.lafitness.com/Pages/ClassSchedulePrintVersion.aspx?clubid=%s" % clubid
	# I am trying to make R a little easier by adding a few helper functions. Most of these mimic functionality seen in Stata.

	# This function attempts to mimic the order command in Stata;
	# Usage:
	# df <- corder(df,<list of columns>)

	# Order variables in a data frame.

	corder <- function(df,...) {
	cols <-as.vector(eval(substitute((alist(...)))),mode="character")
	wget 'ftp://ftp.ncbi.nih.gov/pub/HomoloGene/current/homologene.data'
	egrep "\t9606\t" homologene.data \| sort \| cut -f 1,3,4 > human.txt
	egrep "\t6239\t" homologene.data \| sort \| cut -f 1,3,4 > celegans.txt
	join -1 1 -2 1 -t $'\t' human.txt celegans.txt \| cut -f 2,3,4,5 \| sort \| echo -e "Human_Entrez\tHuman_Symbol\tElegans_Entrez\tElegans_Symbol\n$(cat -)" > orthologs.txt
	rm human.txt celegans.txt homologene.data
	#!/usr/bin/python
	import re
	from itertools import groupby as g
	import subprocess
	import sys
	from collections import OrderedDict

	def most_common(L):
	return max(g(sorted(L)), key=lambda(x, v):(len(list(v)),-L.index(x)))[0]
	# Special thanks for insights from flowingdata.com regarding this.

	library(plotKML)
	library(plyr)
	library(dplyr)
	library(fpc)

	num_locations <- 5

	# Usage: Place this script in the directory containing your runkeeper data. You can run from terminal using 'Rscript map_runkeeper.R', or
	function SRX_fetch_fastq() {
	sra_set=`esearch -db sra -query $1 \| efetch -format docsum \| xtract -element Run@acc`
	echo "Downloading Run $1:"
	echo ${sra_set}
	echo "-------"
	for SRA in $sra_set; do
	echo "Downloading $SRA"
	fastq-dump $SRA
	done;
	}
	library(stringr)
	library(dplyr)
	"""
	# Generate concatenated worm_track data using the following
	for folder in `ls -d *\/`; do
	for file in `ls $folder/worm*`; do
	cat $file \| awk -v file=$file '{print file","$1}' >> worm_track_all.txt
	done;
	done;
	"""
	#!/bin/bash
	wget 'http://hgdownload.soe.ucsc.edu/goldenPath/ce10/database/rmsk.txt.gz' -O LCR_rmsk.txt.gz
	gunzip -kfc LCR_rmsk.txt.gz \| grep 'Low_complexity' \| cut -f 6,7,8 > LCR_ce10_rmsk.bed
	rm LCR_rmsk.txt.gz


	# Generate the set of regions complementary (e.g. NOT low complexity)
	# Download c. elegans chromosome information
	mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e "select chrom, size from ce10.chromInfo" > ce10.genome
	bedtools complement -i LCR_ce10_rmsk.bed -g ce10.genome \| sort -k 1,1 -k2,2n > LCR_complement_ce10.bed
	import os, subprocess, uuid, re
	import vcf.filters



	class bcf(file):
	def __init__(self, file):
	# Start by storing basic information about the vcf/bcf
	self.file = file
	self.ops = []
	echo "Sync Profile Loaded"

	export PS1="\w 🍔 "
	alias refresh="source ~/.bash_profile"
	alias git log=“git log --graph --pretty=format:'%Cred%h%Creset -%C(yellow)%d%Creset %s %Cgreen(%cr) %C(bold blue)<%an>%Creset' --abbrev-commit”

	export PATH=/usr/local/bin:$PATH

	# Get working directory of frontmost finder window.
	cdf() {