Skip to content

Instantly share code, notes, and snippets.

View mark-clements's full-sized avatar

mark mark-clements

  • _
View GitHub Profile
@mark-clements
mark-clements / docker
Last active September 21, 2022 08:56
## docker command shortlist
docker cp <containerId>:/file/path/within/container /host/path/target
docker stop $(docker container ls -q)
docker stop containername
docker rm containername
docker system prune -a # delete all containers and images
docker container ps -a
docker network ls
docker volume ls -q
######## VIF correlation
from statsmodels.stats.outliers_influence import variance_inflation_factor
# filter of quantitative columsn retained thus far
cols = ['age', 'anc_1er_achat', 'anc_1ere_vis','anc_creation',
'anc_der_achat_j', 'anc_der_vis_j', 'ca_t4', 'del_cpt_1er_achat_j',
'mnt_achat_max', 'mnt_achat_min', 'mnt_achat_moy', 'nb_achats_t4',
'nb_cat_vis', 'nb_sec_vis', 'nb_vis_tot', 'nb_visit_apres-midi_14_18',
'nb_visit_matin_6_10', 'nb_visit_midi_10_14', 'nb_visit_soir_18_22',
'membercreationdate','firstattendingdate']
#############################################
# OneHotEncode Qualitative Variables
#############################################
# When incorporating nominal variables, you must first consider
whether they have 'directionality'. Does converting 'red', 'green', 'blue',
to 1, 2, 3 indicate that 'blue' is greater than 'red'? If the answer is no
you should not simply convert a column via factorize or to type 'category'.
You should instead use OneHotEncoder or get_dummies, which creates
seperate columns
# simple bar plot
# bar charts groups are typically categorical variables.
# histograms the groups are typically intervals of another continuous variable.
ax = r['percentage'].plot(kind="bar")
ax.set(xlabel='Total Purchases by Customer', ylabel='% of Best = True')
plt.gca().yaxis.grid(True, alpha = 0.5)
plt.title('Percentage of total Best==True group by Number of Purchases')
plt.show
# Barplot of model performance
@mark-clements
mark-clements / lexicon - python (sqlalchemy)
Created June 10, 2020 09:02
lexicon - python (sqlalchemy)
# Import sqlalchemy's create_engine() function
from sqlalchemy import create_engine
# Create the database engine
engine = create_engine("sqlite:///data.db")
# View the tables in the database
print(engine.table_names())
# Create the database engine
library(kableExtra)
# KableExtra, example of setting conditional colour with base R
mtcars$mpg <- cell_spec(mtcars$mpg, color = ifelse(mtcars$mpg > 20, "red", "blue"))
################################################################################
# print each element of a dt to table
################################################################################
```{r results='asis'}
# get factor levels used in model
model_step_5$xlevels
# fitted values of model
model$fitted
# predict against new dataset requires the use of 'newdata' or will revert to previous data
predict(model, newdata = test, type = "response")
#### Some tabs in rmarkdown : {.tabset .tabset-fade}
##### Tab 1
This section must be one level below the tabset header above, or the tabs will not appear
##### Tab 2
Same rule as above
##############################################################################
# tidyr
##############################################################################
library(tidyr)
#####################################
# gather columns into key-value pairs
#####################################
# get first observation for each Species in iris data -- base R
mini_iris <- iris[c(1, 51, 101), ]
################################################################################
# File includes / common script headers
################################################################################
library(data.table)
# setwd à l'emplacement du fichier
root <- setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
################################################################################
# Vignette, excerpts
################################################################################