pythseq pythseq

## download_kegg.sh
# Get a list of all organisms
curl -s  "http://rest.kegg.jp/list/organism"  > organisms-all.txt

# Get just a few of interest
cat organisms-all.txt | awk '$2~/^(hsa|mmu|rno|cfa|bta|gga|xla|xtr|dre|dme|cel|ath|ehi|tgo|eco|sau|mtu|mav|cje|ccol)$/' > organisms-of-interest.txt

# Get the accession codes for each
cut -f1 organisms-of-interest.txt > organisms-of-interest-codes.txt

# Make a directory to put all the kgml files downloaded

## job_array_demo.sh
#!/bin/bash
# Illustrating the use of job arrays in SLURM
# use the command: sbatch --array=1-100 job_array_demo.sh

# Partition for the job:
#SBATCH -p main

# Multithreaded (SMP) job: must run on one node
#SBATCH --nodes=1

## job_array_demo.sh
#!/bin/bash
# Illustrating the use of job arrays in SLURM
# use the command: sbatch --array=1-100 job_array_demo.sh

# Partition for the job:
#SBATCH -p main

# Multithreaded (SMP) job: must run on one node
#SBATCH --nodes=1

## parse_uniref_xml.py
#!/usr/bin/python

import os
import sys
import xml
import gzip
import json
import time
from collections import defaultdict
import pandas as pd

## gimp.md

      
              2 files
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                pythseq
                / gimp.md
            
            
              Created
              May 14, 2018 09:30
                — forked from lindenb/gimp.md
            
              
                gimp 2.6 procedures . xslt gimp xml procedures gimp scheme 
              
          
    Gimp Procedures

(script-fu-round-corners run-mode image drawable value toggle value value value toggle toggle)

.
Parameter(s)


## to_jupyter.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                pythseq
                / to_jupyter.md
            
            
              Created
              October 3, 2018 16:56
                — forked from lexnederbragt/to_jupyter.md
            
              
                Tools to generate Jupyter Notebooks from plain (markup) text files
              
          
    Tools to generate Jupyter Notebooks from plain (markup) text files:

(Originally posted on twitter, here incorporating responses)
Notedown

Markdown to Jupyter Notebook, and back https://github.com/aaren/notedown
Jupinx

Restructured text (rst) to Jupyter Notebook https://github.com/QuantEcon/sphinxcontrib-jupyter, see https://medium.com/quantecon-blog/introducing-jupinx-60ba9fc12f4f

  
## SRA_Runs_to_BioSample.R
library(rentrez)
library(assertthat)
library(readr)
search_ind <- function(term){
  # get the IDs for a run ID
  # ERR457868 searched, returns 1011219
  results <- entrez_search(db="sra", term=term)$ids
  assert_that(length(results) == 1)
  results
}

## genbank_to_tbl.py
# requires biopython
# run like:
#   genbank_to_tbl.py "my organism name" "my strain ID" "ncbi project id" < my_sequence.gbk
#   writes seq.fsa, seq.tbl as output

import sys
from copy import copy
from Bio import SeqIO

def find_gene_entry(features, locus_tag):

## dust_python.py
from collections import deque
import itertools

# Make dictionary of triplets
i = 0
triplet_index = {}
inverse_triplet = {}
for x in list(itertools.product(['A','T','G','C'], repeat=3)):
    triplet_index[''.join(x)] = i
    inverse_triplet[i] = ''.join(x)

## simple_for_loop_for_mapping.sh
#!/bin/bash

# A simple loop to serially map all samples.
# referenced from within http://merenlab.org/tutorials/assembly_and_mapping/

# how many threads should each mapping task use?
NUM_THREADS=4

for sample in `awk '{print $1}' samples.txt`
do
	# Get a list of all organisms
	curl -s "http://rest.kegg.jp/list/organism" > organisms-all.txt

	# Get just a few of interest
	cat organisms-all.txt \| awk '$2~/^(hsa\|mmu\|rno\|cfa\|bta\|gga\|xla\|xtr\|dre\|dme\|cel\|ath\|ehi\|tgo\|eco\|sau\|mtu\|mav\|cje\|ccol)$/' > organisms-of-interest.txt

	# Get the accession codes for each
	cut -f1 organisms-of-interest.txt > organisms-of-interest-codes.txt

	# Make a directory to put all the kgml files downloaded
	#!/bin/bash
	# Illustrating the use of job arrays in SLURM
	# use the command: sbatch --array=1-100 job_array_demo.sh

	# Partition for the job:
	#SBATCH -p main

	# Multithreaded (SMP) job: must run on one node
	#SBATCH --nodes=1
	#!/usr/bin/python

	import os
	import sys
	import xml
	import gzip
	import json
	import time
	from collections import defaultdict
	import pandas as pd
	library(rentrez)
	library(assertthat)
	library(readr)
	search_ind <- function(term){
	# get the IDs for a run ID
	# ERR457868 searched, returns 1011219
	results <- entrez_search(db="sra", term=term)$ids
	assert_that(length(results) == 1)
	results
	}
	# requires biopython
	# run like:
	# genbank_to_tbl.py "my organism name" "my strain ID" "ncbi project id" < my_sequence.gbk
	# writes seq.fsa, seq.tbl as output

	import sys
	from copy import copy
	from Bio import SeqIO

	def find_gene_entry(features, locus_tag):
	from collections import deque
	import itertools

	# Make dictionary of triplets
	i = 0
	triplet_index = {}
	inverse_triplet = {}
	for x in list(itertools.product(['A','T','G','C'], repeat=3)):
	triplet_index[''.join(x)] = i
	inverse_triplet[i] = ''.join(x)
	#!/bin/bash

	# A simple loop to serially map all samples.
	# referenced from within http://merenlab.org/tutorials/assembly_and_mapping/

	# how many threads should each mapping task use?
	NUM_THREADS=4

	for sample in `awk '{print $1}' samples.txt`
	do