Skip to content

Instantly share code, notes, and snippets.

@vietvudanh
Last active April 21, 2024 01:51
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vietvudanh/8ecd1c8228ec7d1890895cb9b34df586 to your computer and use it in GitHub Desktop.
Save vietvudanh/8ecd1c8228ec7d1890895cb9b34df586 to your computer and use it in GitHub Desktop.
My common snippets

Bash

shebang

#!/usr/bin/env bash
CWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

date loop

start='2019-01-01'
end='2019-02-01'

start=$(date -d $start +%Y%m%d)
end=$(date -d $end +%Y%m%d)

while [[ $start -le $end ]]
do
        echo $(date -d $start +%Y-%m-%d)
        start=$(date -d"$start + 1 day" +"%Y%m%d")

done

linux

# ssh
mkdir .ssh
touch .ssh/authorized_keys
chmod 700 .ssh
chmod 600 .ssh/authorized_keys

# get user's ram usage
function ram {
    for _user in $(ps haux | awk '{print $1}' | sort -u); do     ps haux | awk -v user=${_user} '$1 ~ user { sum += $4} END { print user, sum; }'            ; done | sort -n -k2,2
}

# zsh
emulate sh -c "source ~/.bash_profile"

# find
find . -type f -mmin -10 -not -path "./scheduler/*" -not -path "./dag_processor_manager/*"
find . -type f -mmin -10 -not -path "./scheduler/*" -not -path "./dag_processor_manager/*" -exec grep -i 'error' {} +

# rsync
## not create new folder inside /...tests
rsync -avzp --del -e "ssh -p myport" user@hostname:/var/www/tests/ /var/www/tests

git

[alias]
        st = "status -s -b"
        c = "commit"

        # --all
        # --verbose
        br = "branch -a -v"

        co = "checkout"
        cb = "checkout -b"

        # Short hash, relative date and message.
        logd = "log --pretty='%C(yellow)%h %C(cyan)%ar %Creset%s'"

        # --graph:
        #     Draw a text-based graphical representation of the commit history on
        #     the left hand side of the output.
        # --decorate:
        #     Print out the ref names of any commits that are shown. Defaults to
        #     short optionm such that the name prefixes refs/heads/, refs/tags/ and
        #     refs/remotes/ will not be printed.
        logline = "log --graph --pretty=format:'%Cred%h%Creset -%C(yellow)%d%Creset %s %Cgreen(%cr) %C(bold blue)<%an>%Creset' --abbrev-commit"
        hs = "log --pretty='%C(yellow)%h %C(cyan)%cd %Cblue%aN%C(auto)%d %Creset%s' --graph --date=relative --date-order"

Python

conda

conda env list
conda create --name dagster-3.9.11 python=3.9.11

ipython kernel

pip install ipykernel
python -m ipykernel install --user --name python3.9.4-spark-nb --display-name "Python 3.9 (spark-nb)"

chunks

def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]
        
def chunks_iter(l, n):
    it = iter(iterable)
    while True:
       chunk = tuple(itertools.islice(it, n))
       if not chunk:
           return
       yield chunk

def range_generator(upper, batch_size):
    """Yield successive n-sized chunks from l."""
    return [
        (i, i + batch_size)
        for i in range(0, upper, batch_size)
    ]

Pandas

import

import os
import sys

import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns

from IPython.core.display import display, HTML
display(
  HTML("<style>.container {width: 100% ! important;}</style>")
)

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 200)
pd.set_option('max_colwidth', 400)

%matplotlib inline
sns.set()

%load_ext autoreload
%autoreload 2

read excel sheets

xls = pd.ExcelFile('/apps/jupyter/users/vietvu/FS/reports/monthly_report/20220215/final/20220211_vnpt_fe_fs_final.xlsx')
print(f"Sheets: {xls.sheet_names}")
df_detail = xls.parse('Sheet1')

Oracle

metadata

select * from all_tables;
select * from table_privileges where owner = '' or grantee = '';

PostgreSQL

metadata

-- tables
select * from pg_tables where schemaname like 'data%';

-- create table

spark

spark.sparkContext.getConf().getAll()

from delta.tables import DeltaTable
deltaTable = DeltaTable.forPath(spark, "/data/events/")
deltaTable.delete("date = '2017-01-01'")                # predicate using SQL formatted string

# compact (not work at the time of testing)
partition = 'date = "2021-20-10"'
spark.read
  .format("delta")
  .load(table)
  .where(partition)
  .repartition(numFiles)
  .write
  .format("delta")
  .mode("overwrite")
  .option("replaceWhere", partition)
  .save(table)
# delta asVersion
val df = spark.read
  .format("delta")
  .option("versionAsOf", "5238")
  .load("/path/to/my/table")
# delta history
fullHistoryDF = deltaTable.history() 

# join indicator
def merge_indicator(
    df1: DataFrame,
    df2: DataFrame,
    col: str,
    how: str) -> DataFrame:
    """
    merge dataframes and append column `_merge` (both, right_only, left_only) like pandas'
    """
    col2 = col + '2'
    df2 = df2.withColumnRenamed(col, col2)
    sdf = df2.join(df2, df1[col] == df2[col2], how)
    out_df = (sdf
        .withColumn('_merge',
            F.when(sdf[col].isNull(), 'right_only')
             .when(sdf[col2].isNull(), 'left_only')
             .otherwise('both')
            )
        .withColumn(col, F.coalesce(sdf[col], sdf[col2]))
        .drop(col2)
        )
    return out_df

Google Colab

# mount data from Google Drive for using in this notebook
from pathlib import Path
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
root_path = Path("/content/gdrive/My Drive/")

Docker

# logs
docker logs --since 20220101 fsprod3.7
docker update --restart no redis

multiple lines with value

df_data = pd.melt(df_data_orgn, id_vars=['month'], value_vars=['value1', 'value2''])

fig = plt.figure(figsize=(15, 8))
ax = sns.lineplot(data=df_data, x='month_dis', y='value', hue='variable')
palette = ['r','b','g']
for item, color in zip(df_data.groupby('variable'), palette):
    #item[1] is a grouped data frame
    for x,y in item[1][['month','value']].values:
        ax.text(x,y, f'{y:.3f}',color=color)
        
# table color
.style.background_gradient(axis=0, cmap='YlOrRd')  
# scroll dock
gsettings set org.gnome.shell.extensions.dash-to-dock scroll-action 'cycle-windows'
#
# install
## requires: zsh, git, fzf
brew install fzf
sh -c "$(curl -fsSL https://raw.github.com/ohmyzsh/ohmyzsh/master/tools/install.sh)"
git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions
git clone https://github.com/zsh-users/zsh-syntax-highlighting.git ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-syntax-highlighting
git clone https://github.com/joshskidmore/zsh-fzf-history-search ${ZSH_CUSTOM:=~/.oh-my-zsh/custom}/plugins/zsh-fzf-history-search
plugins=( zsh-autosuggestions zsh-syntax-highlighting zsh-fzf-history-search)
@vietvudanh
Copy link
Author

import ssl

ssl._create_default_https_context = ssl._create_unverified_context

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment