vietvudanh/snippets.md

## snippets.md

      
    Raw
  

              snippets.md
            
          
    Bash

shebang

#!/usr/bin/env bash
CWD="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
date loop

start='2019-01-01'
end='2019-02-01'

start=$(date -d $start +%Y%m%d)
end=$(date -d $end +%Y%m%d)

while [[ $start -le $end ]]
do
        echo $(date -d $start +%Y-%m-%d)
        start=$(date -d"$start + 1 day" +"%Y%m%d")

done
linux

# ssh
mkdir .ssh
touch .ssh/authorized_keys
chmod 700 .ssh
chmod 600 .ssh/authorized_keys

# get user's ram usage
function ram {
    for _user in $(ps haux | awk '{print $1}' | sort -u); do     ps haux | awk -v user=${_user} '$1 ~ user { sum += $4} END { print user, sum; }'            ; done | sort -n -k2,2
}

# zsh
emulate sh -c "source ~/.bash_profile"

# find
find . -type f -mmin -10 -not -path "./scheduler/*" -not -path "./dag_processor_manager/*"
find . -type f -mmin -10 -not -path "./scheduler/*" -not -path "./dag_processor_manager/*" -exec grep -i 'error' {} +

# rsync
## not create new folder inside /...tests
rsync -avzp --del -e "ssh -p myport" user@hostname:/var/www/tests/ /var/www/tests
git

[alias]
        st = "status -s -b"
        c = "commit"

        # --all
        # --verbose
        br = "branch -a -v"

        co = "checkout"
        cb = "checkout -b"

        # Short hash, relative date and message.
        logd = "log --pretty='%C(yellow)%h %C(cyan)%ar %Creset%s'"

        # --graph:
        #     Draw a text-based graphical representation of the commit history on
        #     the left hand side of the output.
        # --decorate:
        #     Print out the ref names of any commits that are shown. Defaults to
        #     short optionm such that the name prefixes refs/heads/, refs/tags/ and
        #     refs/remotes/ will not be printed.
        logline = "log --graph --pretty=format:'%Cred%h%Creset -%C(yellow)%d%Creset %s %Cgreen(%cr) %C(bold blue)<%an>%Creset' --abbrev-commit"
        hs = "log --pretty='%C(yellow)%h %C(cyan)%cd %Cblue%aN%C(auto)%d %Creset%s' --graph --date=relative --date-order"


Python

conda

conda env list
conda create --name dagster-3.9.11 python=3.9.11
ipython kernel

pip install ipykernel
python -m ipykernel install --user --name python3.9.4-spark-nb --display-name "Python 3.9 (spark-nb)"
chunks

def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]
        
def chunks_iter(l, n):
    it = iter(iterable)
    while True:
       chunk = tuple(itertools.islice(it, n))
       if not chunk:
           return
       yield chunk

def range_generator(upper, batch_size):
    """Yield successive n-sized chunks from l."""
    return [
        (i, i + batch_size)
        for i in range(0, upper, batch_size)
    ]
Pandas

import

import os
import sys

import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns

from IPython.core.display import display, HTML
display(
  HTML("<style>.container {width: 100% ! important;}</style>")
)

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 200)
pd.set_option('max_colwidth', 400)

%matplotlib inline
sns.set()

%load_ext autoreload
%autoreload 2
read excel sheets

xls = pd.ExcelFile('/apps/jupyter/users/vietvu/FS/reports/monthly_report/20220215/final/20220211_vnpt_fe_fs_final.xlsx')
print(f"Sheets: {xls.sheet_names}")
df_detail = xls.parse('Sheet1')

Oracle

metadata

select * from all_tables;
select * from table_privileges where owner = '' or grantee = '';

PostgreSQL

metadata

-- tables
select * from pg_tables where schemaname like 'data%';

-- create table

spark

spark.sparkContext.getConf().getAll()

from delta.tables import DeltaTable
deltaTable = DeltaTable.forPath(spark, "/data/events/")
deltaTable.delete("date = '2017-01-01'")                # predicate using SQL formatted string

# compact (not work at the time of testing)
partition = 'date = "2021-20-10"'
spark.read
  .format("delta")
  .load(table)
  .where(partition)
  .repartition(numFiles)
  .write
  .format("delta")
  .mode("overwrite")
  .option("replaceWhere", partition)
  .save(table)
# delta asVersion
val df = spark.read
  .format("delta")
  .option("versionAsOf", "5238")
  .load("/path/to/my/table")
# delta history
fullHistoryDF = deltaTable.history() 

# join indicator
def merge_indicator(
    df1: DataFrame,
    df2: DataFrame,
    col: str,
    how: str) -> DataFrame:
    """
    merge dataframes and append column `_merge` (both, right_only, left_only) like pandas'
    """
    col2 = col + '2'
    df2 = df2.withColumnRenamed(col, col2)
    sdf = df2.join(df2, df1[col] == df2[col2], how)
    out_df = (sdf
        .withColumn('_merge',
            F.when(sdf[col].isNull(), 'right_only')
             .when(sdf[col2].isNull(), 'left_only')
             .otherwise('both')
            )
        .withColumn(col, F.coalesce(sdf[col], sdf[col2]))
        .drop(col2)
        )
    return out_df

Google Colab

# mount data from Google Drive for using in this notebook
from pathlib import Path
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
root_path = Path("/content/gdrive/My Drive/")

Docker

# logs
docker logs --since 20220101 fsprod3.7
docker update --restart no redis

  
## snippets_plotting.md

      
    Raw
  

              snippets_plotting.md
            
          
    multiple lines with value

df_data = pd.melt(df_data_orgn, id_vars=['month'], value_vars=['value1', 'value2''])

fig = plt.figure(figsize=(15, 8))
ax = sns.lineplot(data=df_data, x='month_dis', y='value', hue='variable')
palette = ['r','b','g']
for item, color in zip(df_data.groupby('variable'), palette):
    #item[1] is a grouped data frame
    for x,y in item[1][['month','value']].values:
        ax.text(x,y, f'{y:.3f}',color=color)
        
# table color
.style.background_gradient(axis=0, cmap='YlOrRd')  

  
## ubuntu
# scroll dock
gsettings set org.gnome.shell.extensions.dash-to-dock scroll-action 'cycle-windows'

#

## zsh
# install
## requires: zsh, git, fzf
brew install fzf

sh -c "$(curl -fsSL https://raw.github.com/ohmyzsh/ohmyzsh/master/tools/install.sh)"
git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions
git clone https://github.com/zsh-users/zsh-syntax-highlighting.git ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-syntax-highlighting
git clone https://github.com/joshskidmore/zsh-fzf-history-search ${ZSH_CUSTOM:=~/.oh-my-zsh/custom}/plugins/zsh-fzf-history-search


plugins=( zsh-autosuggestions zsh-syntax-highlighting zsh-fzf-history-search)
	# scroll dock
	gsettings set org.gnome.shell.extensions.dash-to-dock scroll-action 'cycle-windows'

	#
	# install
	## requires: zsh, git, fzf
	brew install fzf

	sh -c "$(curl -fsSL https://raw.github.com/ohmyzsh/ohmyzsh/master/tools/install.sh)"
	git clone https://github.com/zsh-users/zsh-autosuggestions ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-autosuggestions
	git clone https://github.com/zsh-users/zsh-syntax-highlighting.git ${ZSH_CUSTOM:-~/.oh-my-zsh/custom}/plugins/zsh-syntax-highlighting
	git clone https://github.com/joshskidmore/zsh-fzf-history-search ${ZSH_CUSTOM:=~/.oh-my-zsh/custom}/plugins/zsh-fzf-history-search


	plugins=( zsh-autosuggestions zsh-syntax-highlighting zsh-fzf-history-search)