This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
import operator | |
sequence = "ACGACTGATCGATCGATCGATGCATCGATCGACGAT" | |
random_positions = random.sample(xrange(len(sequence)), 30) | |
get_positions = operator.itemgetter(*random_positions) | |
get_positions(sequence) | |
('T', 'C', 'G', 'C', 'A', 'C', 'C', 'T', 'A', 'T', 'G', 'T', 'A', 'T', 'C', 'C', 'T', 'T', 'A', 'G', 'T', 'A', 'A', 'A', 'C', 'G', 'G', 'C', 'G', 'A') | |
from itertools import groupby |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def better_table(table, caption, name): | |
start = r""" | |
\begin{{table}}[!htb] | |
\sisetup{{round-mode=places, round-precision=2}} | |
\caption{{{}}}\label{{table:{}}} | |
\centering | |
""".format(caption, name) | |
end = r"\end{table}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df['new'] = df['new'].str.split('/') # example how to listfy a column of strings | |
temp = pd.DataFrame(df['new'].dropna().tolist()) | |
temp = temp.stack() | |
temp.index = temp.index.droplevel(1) # index need to be coherent with the original dataframe | |
temp.name = 'new_colum' # name of the new column in the original dataframe | |
df = df.join(temp) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
\documentclass{article} | |
\usepackage{tikz} | |
\usepackage{array} | |
\usepackage{siunitx} | |
\usetikzlibrary{shapes.geometric, shapes.misc, arrows, fit, calc} | |
\newcommand\addvmargin[1]{ | |
\node[fit=(current bounding box),inner ysep=#1,inner xsep=0]{}; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# run this in you bash command line | |
# list all r3 packages installed with conda: | |
conda list | grep r3 | awk '{print $1}') | |
# remove all pakages r3 | |
for i in $(conda list | grep r3 | awk '{print $1}'); do conda remove -y $i; done | |
# finally, remove R | |
conda remove r-essentials |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def reverse_complement(sequence): | |
tab = str.maketrans("ACGT", "TGCA") | |
return sequence.translate(tab)[::-1] | |
def apply_rc(row): | |
if row['strand'] == '-': | |
row['seq'] = reverse_complement(row['seq']) | |
return row | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def junction_type2(row): | |
"""Junction type classification""" | |
# if there is no exons supported by realible junctions | |
# return a interable with empty strings | |
if row['exons_w_junct_sup'] is None: | |
return ['', ''] | |
type_ = [] | |
# each row is a junction | |
j_start, j_end, exons, strand = row.loc[[ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def analyse(genome, set_a, match_a='any', region_a='any', | |
set_b=None, match_b='any', region_b='any', | |
combine='or', genes=None, window_a=-1, window_b=-1, | |
datadir=None): | |
# It takes the name of the genome assembly to use, and at least a list of set A regulator names. | |
# A simple analysis run with a custom regulator would be: | |
from dorina.run import analyse | |
results = analyse('hg19', ['/path/to/custom/regulator.bed', 'PARCLIP_PUM2_hg19']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for f in raw_reads{39..50}.fq.gz | |
do | |
echo "$f " | |
gzip -cd $f | awk 'BEGIN { t=0.0;sq=0.0; n=0;} ;NR%4==2 {n++;L=length($0);t+=L;sq+=L*L;}END{m=t/n;printf("total %d avg=%f stddev=%f\n",n,m,sq/n-m*m);}' | |
done | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def read_fasta_from_str(fasta): | |
""" | |
:param str fasta: multiple sequences in fasta string | |
""" | |
from itertools import groupby | |
def is_header(line): | |
return line.startswith(">") |