Skip to content

Instantly share code, notes, and snippets.

View data4sci's full-sized avatar

Robert Samarek data4sci

  • VSB - Technical University of Ostrava
  • Ostrava, Czech Republic
View GitHub Profile
@data4sci
data4sci / pandas_profiling.py
Created June 10, 2021 08:24
pandas-profiling
import pandas as pd
import numpy as np
#from ipywidgets import widgets
from pandas_profiling import ProfileReport
#from pandas_profiling.utils.cache import cache_file
profile = ProfileReport(df, title="#### Title ####", html={'style': {'full_width': True}}, sort="None")
# HTML report in a iframe
profile.to_notebook_iframe()
# nebo samotné `profile` také funguje
profile
@data4sci
data4sci / import.py
Created June 10, 2021 08:23
import lokálního modulu
sys.path.append("/etc/scripts/etl/etl_scripts/")
from etl_utils_package.edison import edison_connect, edison_programy2df
from etl_utils_package.dwh import conn_close
@data4sci
data4sci / fuzzy.py
Created June 10, 2021 08:19
fuzzy matching- fuzzymatcher
import fuzzymatcher
left_on = ["Facility Name", "Address", "City", "State"]
right_on = [
"Provider Name", "Provider Street Address", "Provider City",
"Provider State"
]
matched_results = fuzzymatcher.fuzzy_left_join(hospital_accounts,
hospital_reimbursement,
left_on,
right_on,
@data4sci
data4sci / applymap.py
Created June 10, 2021 08:18
pandas style.applymap
def color_neprijat_red(val):
color = 'red' if val == 'Nepřijat' else 'black'
return 'color: %s' % color
df.style.applymap(color_neprijat_red)
@data4sci
data4sci / comma_sep_list.py
Last active June 10, 2021 08:26
groupby ==>> pd.Series, vícenásobné hodnoty odděleny čárkou
cisla = df.groupby(['ID_PERSON'])['CISLO_PRIHLASKY'].apply(lambda x: ', '.join(x))
#obecně:
df.groupby("content_id")['tag'].apply(lambda tags: ','.join(tags))
@data4sci
data4sci / scatter_matrix.py
Created June 10, 2021 08:15
scatter matrix
grr = pd.plotting.scatter_matrix(df, figsize=(15, 15), marker='o')
# nebo
import seaborn as sns
sns.pairplot(piv, kind = "reg")
plt.show()
@data4sci
data4sci / co_occurence.py
Created June 10, 2021 08:14
co occurence matrix #python #pandas
coocc = df.T.dot(df)
#optionaly - fill diagonal with 0s
np.fill_diagonal(coocc.values, 0)
@data4sci
data4sci / monitor.sh
Created June 10, 2021 08:12
monitor settings #xrandr
$ cvt -r -v 3200 1800 60
# 3200x1800 59.94 Hz (CVT 5.76M9-R) hsync: 111.01 kHz; pclk: 373.00 MHz
Modeline "3200x1800R" 373.00 3200 3248 3280 3360 1800 1803 1808 1852 +hsync -vsync
$ xrandr --newmode "3200x1800R" 373.00 3200 3248 3280 3360 1800 1803 1808 1852 +hsync -vsync
$ xrandr --addmode eDP-1 3200x1800R
#$ xrandr --output eDP-1 --mode 3200x1800R
@data4sci
data4sci / sys_info.sh
Created June 10, 2021 08:11
system info
inxi -Fxz
@data4sci
data4sci / clone_to_folder.sh
Created June 10, 2021 08:10
clone repo to existing folder
git clone git@gitlab.vsb.cz:sam029/p000-sam029-test.git temp
mv temp/.git test/.git
rm -rf temp