Thiago Britto Borges tbrittoborges

## pandas_reverse_complement.py
def reverse_complement(sequence):
     tab = str.maketrans("ACGT", "TGCA")
     return sequence.translate(tab)[::-1]

def apply_rc(row):
     if row['strand'] == '-':
         row['seq'] = reverse_complement(row['seq'])

     return row


## gist:f3a58425f5f5d5fbab747af5dc364d83
# run this in you bash command line
# list all r3 packages installed with conda:
conda list | grep r3 | awk '{print $1}')

# remove all pakages r3
for i in $(conda list | grep r3 | awk '{print $1}'); do conda remove -y $i; done

# finally, remove R
conda remove r-essentials

## example_flowdiagram.tex
\documentclass{article}
\usepackage{tikz}
\usepackage{array}
\usepackage{siunitx}

\usetikzlibrary{shapes.geometric, shapes.misc, arrows, fit, calc}

\newcommand\addvmargin[1]{
  \node[fit=(current bounding box),inner ysep=#1,inner xsep=0]{};
}

## unlistfy.py
df['new'] = df['new'].str.split('/') # example how to listfy a column of strings
temp = pd.DataFrame(df['new'].dropna().tolist())

temp = temp.stack()
temp.index = temp.index.droplevel(1) # index need to be coherent with the original dataframe
temp.name = 'new_colum' # name of the new column in the original dataframe

df = df.join(temp)

## pd_latex_table.py
def better_table(table, caption, name):

    start = r"""
\begin{{table}}[!htb]
\sisetup{{round-mode=places, round-precision=2}}
\caption{{{}}}\label{{table:{}}}
\centering
""".format(caption, name)

    end = r"\end{table}"

## bioinfo_bits.py
#
import operator
sequence = "ACGACTGATCGATCGATCGATGCATCGATCGACGAT"
random_positions = random.sample(xrange(len(sequence)), 30)
get_positions = operator.itemgetter(*random_positions)
get_positions(sequence)
('T', 'C', 'G', 'C', 'A', 'C', 'C', 'T', 'A', 'T', 'G', 'T', 'A', 'T', 'C', 'C', 'T', 'T', 'A', 'G', 'T', 'A', 'A', 'A', 'C', 'G', 'G', 'C', 'G', 'A')

from itertools import groupby

## fetch_uniprot_gff.py
from urlparse import parse_qs

import pandas as pd

def _fetch_uniprot_gff(identifier):
    """
    Retrieve UniProt data from the GFF file

    :param identifier: UniProt accession identifier
    :type identifier: str

## pandas_Series_to_fasta.py
print '>\n' + '\n> \n'.join(list(df.query(...).sequence_column))

## automated_github_issue.py
import requests

data = {"title": "Found a bug", # str
        "body": "I'm having a problem with this.", # str
        "assignee": None, # username str or None
        "milestone": 1, # int
        "labels": ['label1']} # list of str

def submit_github_issue(data, token, username, repo):
    headers = {'Content-Type':'application/json',

## pdb_pandas.py
import pandas as pd
colspecs = [(0, 6), (6, 11), (12, 16), (16, 17), (17, 20), (21, 22), (22, 26),
            (26, 27), (30, 38), (38, 46), (46, 54), (54, 60), (60, 66), (76, 78),
            (78, 80)]

names = ['ATOM', 'serial', 'name', 'altloc', 'resname', 'chainid', 'resseq',
         'icode', 'x', 'y', 'z', 'occupancy', 'tempfactor', 'element', 'charge']

pdb = pd.read_fwf(pdb_path, names=names, colspecs=colspecs)
	def reverse_complement(sequence):
	tab = str.maketrans("ACGT", "TGCA")
	return sequence.translate(tab)[::-1]

	def apply_rc(row):
	if row['strand'] == '-':
	row['seq'] = reverse_complement(row['seq'])

	return row
	# run this in you bash command line
	# list all r3 packages installed with conda:
	conda list \| grep r3 \| awk '{print $1}')

	# remove all pakages r3
	for i in $(conda list \| grep r3 \| awk '{print $1}'); do conda remove -y $i; done

	# finally, remove R
	conda remove r-essentials
	\documentclass{article}
	\usepackage{tikz}
	\usepackage{array}
	\usepackage{siunitx}

	\usetikzlibrary{shapes.geometric, shapes.misc, arrows, fit, calc}

	\newcommand\addvmargin[1]{
	\node[fit=(current bounding box),inner ysep=#1,inner xsep=0]{};
	}
	df['new'] = df['new'].str.split('/') # example how to listfy a column of strings
	temp = pd.DataFrame(df['new'].dropna().tolist())

	temp = temp.stack()
	temp.index = temp.index.droplevel(1) # index need to be coherent with the original dataframe
	temp.name = 'new_colum' # name of the new column in the original dataframe

	df = df.join(temp)
	def better_table(table, caption, name):

	start = r"""
	\begin{{table}}[!htb]
	\sisetup{{round-mode=places, round-precision=2}}
	\caption{{{}}}\label{{table:{}}}
	\centering
	""".format(caption, name)

	end = r"\end{table}"
	#
	import operator
	sequence = "ACGACTGATCGATCGATCGATGCATCGATCGACGAT"
	random_positions = random.sample(xrange(len(sequence)), 30)
	get_positions = operator.itemgetter(*random_positions)
	get_positions(sequence)
	('T', 'C', 'G', 'C', 'A', 'C', 'C', 'T', 'A', 'T', 'G', 'T', 'A', 'T', 'C', 'C', 'T', 'T', 'A', 'G', 'T', 'A', 'A', 'A', 'C', 'G', 'G', 'C', 'G', 'A')

	from itertools import groupby
	from urlparse import parse_qs

	import pandas as pd

	def _fetch_uniprot_gff(identifier):
	"""
	Retrieve UniProt data from the GFF file

	:param identifier: UniProt accession identifier
	:type identifier: str
	import requests

	data = {"title": "Found a bug", # str
	"body": "I'm having a problem with this.", # str
	"assignee": None, # username str or None
	"milestone": 1, # int
	"labels": ['label1']} # list of str

	def submit_github_issue(data, token, username, repo):
	headers = {'Content-Type':'application/json',
	import pandas as pd
	colspecs = [(0, 6), (6, 11), (12, 16), (16, 17), (17, 20), (21, 22), (22, 26),
	(26, 27), (30, 38), (38, 46), (46, 54), (54, 60), (60, 66), (76, 78),
	(78, 80)]

	names = ['ATOM', 'serial', 'name', 'altloc', 'resname', 'chainid', 'resseq',
	'icode', 'x', 'y', 'z', 'occupancy', 'tempfactor', 'element', 'charge']

	pdb = pd.read_fwf(pdb_path, names=names, colspecs=colspecs)