Skip to content

Instantly share code, notes, and snippets.

@seahrh
seahrh / rsync.sh
Last active January 18, 2022 03:02
prefer rsync to scp. See https://linux.die.net/man/1/rsync
# -v, --verbose
# -a, --archive want recursion and preserve almost everything
# -P same as --partial --progress
# -z, --compress compress file data during the transfer
rsync -vaPz file.txt user@server:~/user
rsync -vaPz dir user@server:~/user
@seahrh
seahrh / shared_dict.py
Last active August 6, 2021 07:16
multiprocessing: update shared dictionary by multiple processes
import json
import multiprocessing
def dowork(qids, shared_dict, lock):
lock.acquire() # type: ignore
for qid in qids:
# update shared_dict
pass
lock.release()
@seahrh
seahrh / jq.sh
Last active February 8, 2022 09:29
jq examples
cat input/squad20/dev-v2.0.json | jq '.data[].paragraphs[].qas[] | select(.id=="5ad24ce8d7d075001a428c0e")' -C | less -r
cat input/squad20/dev-v2.0.json | jq ".data[].title" -C | less -r
cat input/squad20/dev-v2.0.json | jq ".data[].paragraphs[].qas[].id" -C | less -r
cat input/squad20/dev-v2.0.json | jq ".data[].paragraphs[].qas[].id" -C | grep -ni 'needle'
@seahrh
seahrh / drop_rows.py
Created July 13, 2021 07:42
pandas drop rows
index = df[df["title"].str.len() == 0].index
df.drop(index=index, inplace=True)
index = df[df["is_disamb"] == 1].index
df.drop(index=index, inplace=True)
@seahrh
seahrh / __init__.py
Created July 12, 2021 06:45
python logging config
import logging
from logging.config import fileConfig
LOG_INI = CONF[ENVIRONMENT]["LOG_INI"]
LOG_DIR = CONF[ENVIRONMENT]["LOG_DIR"]
def get_logger(name: str = None):
logging.config.fileConfig(LOG_INI, defaults={"logdir": LOG_DIR})
@seahrh
seahrh / brands.txt
Created June 12, 2021 01:20
List of e-commerce brands
2xu
3com
3d younique
3m
3m scotchlite
4head
5 star
55 soul
81 customs
81stgeneration
@seahrh
seahrh / safe.py
Created June 4, 2021 06:23
@functools.wraps(f) makes decorator pickleable; works with multithreading
def safe_func(default_value=None):
def decorate(f):
@functools.wraps(f)
def wrapper(*args, **kwds):
try:
res = f(*args, **kwds)
except:
res = default_value
return res
return wrapper
@seahrh
seahrh / pandas_drop_empty_string
Created June 1, 2021 07:28
Pandas drop rows that have empty string
%%time
len1 = len(train)
train["col"] = train["col"].str.strip()
train.drop(train[train["col"].str.len() == 0].index, inplace=True)
len2 = len(train)
print(f"{len1 - len2} rows deleted")
@seahrh
seahrh / timestamp_dir.py
Created May 12, 2021 01:40
Make timestamped directory
import pathlib
from datetime import datetime
ts = datetime.now().strftime('%Y%m%d_%H%M%S')
job_dir = f"models/mlp/{ts}"
pathlib.Path(job_dir).mkdir(parents=True, exist_ok=True)
@seahrh
seahrh / pandas.py
Created May 12, 2021 01:38
Pandas config
pd.set_option("use_inf_as_na", True)
pd.set_option("max_info_columns", 9999)
pd.set_option("display.max_columns", 9999)
pd.set_option("display.max_rows", 9999)
pd.set_option('max_colwidth', 9999)