Jamie Heather JamieHeather

## download-human-ig-stitchr-files.py
#!/usr/bin/python3
# -*- coding: utf-8 -*-

"""
download-human-ig-stitchr-files.py
An example script to download human IG germline genes into a format suitable for use with stitchr
"""


import os

## download-proteome-fastas.py
"""
download-proteome-fastas.py

Used to download whole proteomes from UniProt.

First you need to go to http://www.uniprot.org/proteomes/ and download lists of proteome accessions
    - Can be compressed or uncompressed, as many files as desired
    - Run this script in the same directory
    - Files should be named in format [unique-identifier].tsv(.gz - if compressed)
        - 'tsv' HAS to be present, separated from the identifier by a single period

## tidy-up-dated-files.sh
# Goes through directory of appropriately named files and decants into labeled year-month gzipped tarball archives
# Files MUST be dated in ISO format in filename, i.e. YYYY-MM-DD

# Currently set up to only work from 2010-2019; add extra loops for decades(/centuries etc) as needed

# First get rid of pesky spaces in names (which mess up the xargs command)
find -name "* *" -type f | rename 's/ /_/g'

# Then loop through desired files and sort into appropriate tarbals
for y in {1..9}

## DecombinatorNucleotideCDR3.py
# See https://github.com/innate2adaptive/Decombinator
# Change line 300 (in CDR3translator.py version '3.1') from:
return aa[start_cdr3:end_cdr3]

# …to:
return nt[start_cdr3*3:3*end_cdr3]

## exploring_vdjdb.sh
# Download repo, build database then navigate to it
brew install groovy # OSX groovy installation, replace depending on your setup
git clone https://github.com/antigenomics/vdjdb-db.git
cd vdjdb-db/src/
groovy -cp . BuildDatabase.groovy
cd ../database/

# See fields
head -1 vdjdb_full.txt

## get_hg19_sequence.py
"""
get_hg19_sequence.py
Jamie Heather, February 2017
For use on Python 2.7, requires urllib2 module
"""

import urllib2

def get_hg19_seq(chrm, seq_from, seq_to):
    """
	#!/usr/bin/python3
	# -- coding: utf-8 --

	"""
	download-human-ig-stitchr-files.py
	An example script to download human IG germline genes into a format suitable for use with stitchr
	"""


	import os
	"""
	download-proteome-fastas.py

	Used to download whole proteomes from UniProt.

	First you need to go to http://www.uniprot.org/proteomes/ and download lists of proteome accessions
	- Can be compressed or uncompressed, as many files as desired
	- Run this script in the same directory
	- Files should be named in format [unique-identifier].tsv(.gz - if compressed)
	- 'tsv' HAS to be present, separated from the identifier by a single period
	# Goes through directory of appropriately named files and decants into labeled year-month gzipped tarball archives
	# Files MUST be dated in ISO format in filename, i.e. YYYY-MM-DD

	# Currently set up to only work from 2010-2019; add extra loops for decades(/centuries etc) as needed

	# First get rid of pesky spaces in names (which mess up the xargs command)
	find -name "* *" -type f \| rename 's/ /_/g'

	# Then loop through desired files and sort into appropriate tarbals
	for y in {1..9}
	# See https://github.com/innate2adaptive/Decombinator
	# Change line 300 (in CDR3translator.py version '3.1') from:
	return aa[start_cdr3:end_cdr3]

	# …to:
	return nt[start_cdr33:3end_cdr3]
	# Download repo, build database then navigate to it
	brew install groovy # OSX groovy installation, replace depending on your setup
	git clone https://github.com/antigenomics/vdjdb-db.git
	cd vdjdb-db/src/
	groovy -cp . BuildDatabase.groovy
	cd ../database/

	# See fields
	head -1 vdjdb_full.txt
	"""
	get_hg19_sequence.py
	Jamie Heather, February 2017
	For use on Python 2.7, requires urllib2 module
	"""

	import urllib2

	def get_hg19_seq(chrm, seq_from, seq_to):
	"""