Skip to content

Instantly share code, notes, and snippets.

View kaspermunch's full-sized avatar

Kasper Munch kaspermunch

View GitHub Profile
@kaspermunch
kaspermunch / modpath.py
Last active March 19, 2020 20:53
Simple manipulation of paths
import os, re
def modpath(p, parent=None, base=None, suffix=None):
par, name = os.path.split(p)
name_no_suffix, suf = os.path.splitext(name)
if type(suffix) is str:
suf = suffix
if parent is not None:
par = parent
if base is not None:
@kaspermunch
kaspermunch / reused_gwf_templates.py
Last active March 20, 2020 14:42
Manual naming reused GWF templates
from gwf import Workflow, AnonymousTarget
def mask_template(path):
inputs = {'path': path}
outputs = {'path': path + '.masked'}
options = {}
spec = """./some_command {}""".format(path)
return AnonymousTarget(inputs=inputs, outputs=outputs, options=options, spec=spec)
def step_one_template(path):
@kaspermunch
kaspermunch / reused_gwf_templates_with_groups.py
Last active March 20, 2020 14:42
Naming of reused GWF templates using groups
from gwf import Workflow, AnonymousTarget
from groups import Group # separate Gist
def mask_template(path):
inputs = {'path': path}
outputs = {'path': path + '.masked'}
options = {}
spec = """./some_command {}""".format(path)
return AnonymousTarget(inputs=inputs, outputs=outputs, options=options, spec=spec)
@kaspermunch
kaspermunch / workflows_per_population.py
Last active March 20, 2020 14:44
Separate GWF workflows for each population
from gwf import Workflow, AnonymousTarget
def mask_template(path):
inputs = {'path': path}
outputs = {'path': path + '.masked'}
options = {}
spec = """./some_command {}""".format(path)
return AnonymousTarget(inputs=inputs, outputs=outputs, options=options, spec=spec)
def step_one_template(path):
@kaspermunch
kaspermunch / workflows_per_analysis.py
Last active March 20, 2020 14:44
Separate GWF workflows for two similar analyses
from gwf import Workflow, AnonymousTarget
def mask_template(path):
inputs = {'path': path}
outputs = {'path': path + '.masked'}
options = {}
spec = """./some_command {}""".format(path)
return AnonymousTarget(inputs=inputs, outputs=outputs, options=options, spec=spec)
def step_one_template(path):
def optimize_dataframe(df):
converted_df = pandas.DataFrame()
floats_optim = (df
.select_dtypes(include=['float'])
.apply(pandas.to_numeric,downcast='float')
)
converted_df[floats_optim.columns] = floats_optim
@kaspermunch
kaspermunch / end_of_bashrc.sh
Created April 10, 2020 10:20
Add this to end of .bashrc to allow activation of environments using `conda activate` inside bash scripts on the cluster.
export -f conda
export -f __conda_activate
export -f __conda_reactivate
export -f __conda_hashr
export -f __add_sys_prefix_to_path
@kaspermunch
kaspermunch / test_interval_overlap.py
Last active April 11, 2020 12:23
Test if two half-open intervals overlap using two comparisons instead of eight.
overlap = (start1 <= start2 < end1 or
start1 <= end2 < end1 or
start2 <= start1 < end2 or
start2 <= end1 < end2)
# same as:
overlap = not (end1 <= start2 or end2 <= start1)
# same as:
overlap = not (end1 <= start2) and not (end2 <= start1)
# same as (by De Morgan's laws):
overlap = end1 > start2 and end2 > start1
@kaspermunch
kaspermunch / monospace_tables.py
Created April 22, 2020 06:10
Add this to a cell to get monospace font for numbers in jupyter tables
%%html
<style> table { font-variant-numeric: tabular-nums; } </style>
%matplotlib inline
# proxies for internet access on the cluster
%env http_proxy=http://proxy-default:3128
%env https_proxy=http://proxy-default:3128
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import warnings