Simon Cockell sjcockell

## get_local_covid_data.R
library(tidyverse)
library(zoo)

read_csv('https://coronavirus.data.gov.uk/downloads/csv/coronavirus-cases_latest.csv') %>%
  dplyr::filter(`Area type` == 'Upper tier local authority') %>%
  dplyr::arrange(desc(`Area name`)) %>%
  dplyr::group_by(desc(`Area name`)) %>%
  dplyr::mutate(`Cases - 7 day rolling average` = zoo::rollmean(`Daily lab-confirmed cases`, k = 7, fill = NA)) %>%
  dplyr::ungroup() %>%
  dplyr::filter(`Area name` %in% c('Gateshead', 'Newcastle upon Tyne')) %>%

## get_sra_data.sh
#!/bin/bash

VAR=$(tail -n +2 SraRunTable.txt | cut -d ',' -f 1)

# This is a loop for downloading the data

for i in ${VAR}
  do
    if [ -f ${i}.fastq.gz ]
      then

## build_alignments.py
import get_sequences
import uniprot_mapping
import urllib2
import shlex, subprocess

def main(file):
    with open(file) as f:
        data = f.read()
    groups = data.split('"') #file has protein name per line with " delineating groups
    groups = organise_groups(groups)

## get_sequence_from_name.py
import uniprot_mapping
import urllib2

def main(file):
    fh = open(file, 'r')
    for line in fh.readlines():
        if not line.startswith('"'): #ignore comment lines
            name = line.rstrip()
            id = uniprot_mapping.uniprot_mapping('ACC+ID', 'ACC', name)
            mapped = parse_return_string(id)

## get_cath_domains.py
import urllib
import os, os.path
from optparse import OptionParser

def main(superfamily):
    #fetch the list of domains in the superfamily from the CathDomainList
    dom_lst = get_domain_list(superfamily)
    #for each domain, retrieve the PDB file from CATH
    get_domain_structures(dom_lst, superfamily)

## signalp.py
import os
from optparse import OptionParser

def main(file, path):
    """runs SignalP for every sequence file in a directory"""
    for filename in os.listdir(file):
        #other checks other than filer suffix would be more sophisticated
        if filename.endswith('.fa') or filename.endswith('.fasta'):
            sig = signalp(path, 'gram+', os.path.join(file, filename))
            length = len(sig)

## uniprot_mapping.py
import urllib
import urllib2

def uniprot_mapping(fromtype, totype, identifier):
    """Takes an identifier, and types of identifier
    (to and from), and calls the UniProt mapping service"""
    base = 'http://www.uniprot.org'
    tool = 'mapping'
    params = {'from':fromtype,
                'to':totype,

## posterous.py
def python_gist():
  "Does Posterous really support Gist drop-ins?"
  print "Testing, testing 1,2,3..."
	library(tidyverse)
	library(zoo)

	read_csv('https://coronavirus.data.gov.uk/downloads/csv/coronavirus-cases_latest.csv') %>%
	dplyr::filter(`Area type` == 'Upper tier local authority') %>%
	dplyr::arrange(desc(`Area name`)) %>%
	dplyr::group_by(desc(`Area name`)) %>%
	dplyr::mutate(`Cases - 7 day rolling average` = zoo::rollmean(`Daily lab-confirmed cases`, k = 7, fill = NA)) %>%
	dplyr::ungroup() %>%
	dplyr::filter(`Area name` %in% c('Gateshead', 'Newcastle upon Tyne')) %>%
	#!/bin/bash

	VAR=$(tail -n +2 SraRunTable.txt \| cut -d ',' -f 1)

	# This is a loop for downloading the data

	for i in ${VAR}
	do
	if [ -f ${i}.fastq.gz ]
	then
	import get_sequences
	import uniprot_mapping
	import urllib2
	import shlex, subprocess

	def main(file):
	with open(file) as f:
	data = f.read()
	groups = data.split('"') #file has protein name per line with " delineating groups
	groups = organise_groups(groups)
	import urllib
	import os, os.path
	from optparse import OptionParser

	def main(superfamily):
	#fetch the list of domains in the superfamily from the CathDomainList
	dom_lst = get_domain_list(superfamily)
	#for each domain, retrieve the PDB file from CATH
	get_domain_structures(dom_lst, superfamily)
	import os
	from optparse import OptionParser

	def main(file, path):
	"""runs SignalP for every sequence file in a directory"""
	for filename in os.listdir(file):
	#other checks other than filer suffix would be more sophisticated
	if filename.endswith('.fa') or filename.endswith('.fasta'):
	sig = signalp(path, 'gram+', os.path.join(file, filename))
	length = len(sig)
	import urllib
	import urllib2

	def uniprot_mapping(fromtype, totype, identifier):
	"""Takes an identifier, and types of identifier
	(to and from), and calls the UniProt mapping service"""
	base = 'http://www.uniprot.org'
	tool = 'mapping'
	params = {'from':fromtype,
	'to':totype,
	def python_gist():
	"Does Posterous really support Gist drop-ins?"
	print "Testing, testing 1,2,3..."