Skip to content

Instantly share code, notes, and snippets.

@Ken-Kuroki
Ken-Kuroki / notify.py
Last active September 29, 2020 13:25
Slack Notify
from datetime import datetime
import subprocess
import json
import requests
class SlackNotifier:
def __init__(self, slack_url, msg='Done!'):
self.start = datetime.now()
@Ken-Kuroki
Ken-Kuroki / find_nan_inf.py
Created May 10, 2020 04:49
Find rows containing nan or inf
import pandas as pd
import numpy as np
df = pd.DataFrame([[1.,2.,np.nan,np.inf],[1.,2.,3.,4.,]]).T
df[np.isnan(df).any(axis=1) | np.isinf(df).any(axis=1)]
@Ken-Kuroki
Ken-Kuroki / julia_multithread.jl
Created March 3, 2020 16:39
Julia multithreads
# Check nthreads() is large enough
println(Threads.nthreads())
function f(input)
return input*2
end
function main()
inputs = [i for i in 20:30];
@Ken-Kuroki
Ken-Kuroki / python_r_julia.md
Last active February 10, 2020 18:52
How to exchange data between Python, R, and Julia

Here's how to transfer your data frames between Python, R, and Julia by using Feather format. An article The Best Format to Save Pandas Data provides speed comparisons with other formats and concludes Feather is the best format to store data for a short term.

Note for pandas users: Feather doesn't support data frames with a custom index. Execute df.reset_index() before writing your data in a Feather file if you store anything in the pandas index.

Python

import pandas as pd
import feather

# read
@Ken-Kuroki
Ken-Kuroki / pooling.py
Last active November 18, 2019 07:50
Python process pooling
from multiprocessing import Pool
def func(a): # in case you want to pass multiple arguments, pack them by making a data type, or use starmap
b = a**2
return a, b # return the argument in addition to the result for convenience
with Pool(20) as p:
output = {k: v for k, v in p.imap(func, range(100))}
@Ken-Kuroki
Ken-Kuroki / speedtest.sh
Created November 15, 2019 06:57
Test file transfer speed
#!/bin/sh
for i in $(seq 01 10); do
fallocate -l 1GB dummy${i}
done
for i in $(seq 01 10); do
echo "File${i} start, $(date +%T)"
cp dummy${i} /foo/bar/destination/
done
@Ken-Kuroki
Ken-Kuroki / getcolor.py
Created October 10, 2019 02:39
Get hex format colorcode from a matplotlib colormap
def get_colorcode(colormap, value):
"""
Gets hex format colorcode (eg. '#ff0000') from a matplotlib colormap.
Parameters
----------
colormap : matplotlib.colors.Colormap
Matplotlib colormap, a subclass of Colormap
value : float
Value in [0, 1) interval that specifies the color
@Ken-Kuroki
Ken-Kuroki / itol_colorstrip.py
Created October 7, 2019 11:36
Generate iTOL "color strip" annotation file from pandas series
import numpy as np
import pandas as pd
from ete3 import PhyloTree
from itertools import cycle
def generate_colorstrip(labels: pd.Series, save_file: str, palette: list = None) -> None:
# labels must be a pd.Series whose index are leaf names and values are their labels
if palette is None:
palette = ["#4E79A7", "#A0CBE8", "#F28E2B", "#FFBE7D", "#59A14F",
"#8CD17D", "#B6992D", "#F1CE63", "#499894", "#86BDB6",
@Ken-Kuroki
Ken-Kuroki / prime_factorize.py
Created October 6, 2019 13:20
Find prime factors
def prime_factorize(n):
answers = []
while n % 2 == 0:
answers.append(2)
n //= 2
f = 3
while f**2 <= n: # instead of f <= n**0.5
if n % f == 0:
answers.append(f)
n //= f
@Ken-Kuroki
Ken-Kuroki / get_gcd.py
Created October 6, 2019 09:12
Calculate GCD
def get_gcd(a, b):
if b > a:
a, b = b, a
r = a%b
if r == 0:
return b
else:
return get_gcd(b, r)