Skip to content

Instantly share code, notes, and snippets.

View tbrittoborges's full-sized avatar
🎯

Thiago Britto Borges tbrittoborges

🎯
View GitHub Profile
@tbrittoborges
tbrittoborges / pandas_reverse_complement.py
Created December 22, 2017 12:26
Pandas reverse complement
def reverse_complement(sequence):
tab = str.maketrans("ACGT", "TGCA")
return sequence.translate(tab)[::-1]
def apply_rc(row):
if row['strand'] == '-':
row['seq'] = reverse_complement(row['seq'])
return row
@tbrittoborges
tbrittoborges / gist:f3a58425f5f5d5fbab747af5dc364d83
Created November 7, 2017 13:36
remove_r_installed_by_conda.sh
# run this in you bash command line
# list all r3 packages installed with conda:
conda list | grep r3 | awk '{print $1}')
# remove all pakages r3
for i in $(conda list | grep r3 | awk '{print $1}'); do conda remove -y $i; done
# finally, remove R
conda remove r-essentials
@tbrittoborges
tbrittoborges / example_flowdiagram.tex
Last active July 21, 2016 15:04
example of a flow diagram with latex tikzpicture
\documentclass{article}
\usepackage{tikz}
\usepackage{array}
\usepackage{siunitx}
\usetikzlibrary{shapes.geometric, shapes.misc, arrows, fit, calc}
\newcommand\addvmargin[1]{
\node[fit=(current bounding box),inner ysep=#1,inner xsep=0]{};
}
@tbrittoborges
tbrittoborges / unlistfy.py
Last active May 20, 2016 11:16
Pandas unlistfy one colum
df['new'] = df['new'].str.split('/') # example how to listfy a column of strings
temp = pd.DataFrame(df['new'].dropna().tolist())
temp = temp.stack()
temp.index = temp.index.droplevel(1) # index need to be coherent with the original dataframe
temp.name = 'new_colum' # name of the new column in the original dataframe
df = df.join(temp)
@tbrittoborges
tbrittoborges / pd_latex_table.py
Last active February 9, 2018 22:14
Pandas recipe for better latex tables
def better_table(table, caption, name):
start = r"""
\begin{{table}}[!htb]
\sisetup{{round-mode=places, round-precision=2}}
\caption{{{}}}\label{{table:{}}}
\centering
""".format(caption, name)
end = r"\end{table}"
@tbrittoborges
tbrittoborges / bioinfo_bits.py
Created February 22, 2016 12:02
python interesting bits of bioinformatics
#
import operator
sequence = "ACGACTGATCGATCGATCGATGCATCGATCGACGAT"
random_positions = random.sample(xrange(len(sequence)), 30)
get_positions = operator.itemgetter(*random_positions)
get_positions(sequence)
('T', 'C', 'G', 'C', 'A', 'C', 'C', 'T', 'A', 'T', 'G', 'T', 'A', 'T', 'C', 'C', 'T', 'T', 'A', 'G', 'T', 'A', 'A', 'A', 'C', 'G', 'G', 'C', 'G', 'A')
from itertools import groupby
@tbrittoborges
tbrittoborges / fetch_uniprot_gff.py
Last active February 19, 2016 18:32
Load an Uniprot GFF directly to a pandas.DataFrame
from urlparse import parse_qs
import pandas as pd
def _fetch_uniprot_gff(identifier):
"""
Retrieve UniProt data from the GFF file
:param identifier: UniProt accession identifier
:type identifier: str
@tbrittoborges
tbrittoborges / pandas_Series_to_fasta.py
Created December 16, 2015 18:59
print a pandas column in the fasta format.
print '>\n' + '\n> \n'.join(list(df.query(...).sequence_column))
@tbrittoborges
tbrittoborges / automated_github_issue.py
Last active February 17, 2016 18:03
automatically sends a github issue
import requests
data = {"title": "Found a bug", # str
"body": "I'm having a problem with this.", # str
"assignee": None, # username str or None
"milestone": 1, # int
"labels": ['label1']} # list of str
def submit_github_issue(data, token, username, repo):
headers = {'Content-Type':'application/json',
@tbrittoborges
tbrittoborges / pdb_pandas.py
Last active March 20, 2024 09:17
guide to read .pdb files with pandas
import pandas as pd
colspecs = [(0, 6), (6, 11), (12, 16), (16, 17), (17, 20), (21, 22), (22, 26),
(26, 27), (30, 38), (38, 46), (46, 54), (54, 60), (60, 66), (76, 78),
(78, 80)]
names = ['ATOM', 'serial', 'name', 'altloc', 'resname', 'chainid', 'resseq',
'icode', 'x', 'y', 'z', 'occupancy', 'tempfactor', 'element', 'charge']
pdb = pd.read_fwf(pdb_path, names=names, colspecs=colspecs)