git clone git@github.com:YOUR-USERNAME/YOUR-FORKED-REPO.git
cd into/cloned/fork-repo
git remote add upstream git://github.com/ORIGINAL-DEV-USERNAME/REPO-YOU-FORKED-FROM.git
git fetch upstream
# From https://stackoverflow.com/questions/23586510/return-multiple-columns-from-pandas-apply | |
def sizes(s): | |
return locale.format("%.1f", s / 1024.0, grouping=True) + ' KB', \ | |
locale.format("%.1f", s / 1024.0 ** 2, grouping=True) + ' MB', \ | |
locale.format("%.1f", s / 1024.0 ** 3, grouping=True) + ' GB' | |
df_test['size_kb'], df_test['size_mb'], df_test['size_gb'] = zip(*df_test['size'].apply(sizes)) |
pivot.columns | |
MultiIndex([('mean', 'is_suitable'), | |
('size', 'is_suitable')], | |
) | |
pivot.columns.map('_'.join) | |
Index(['mean_is_suitable', 'size_is_suitable'], dtype='object') |
def group_others(serie: pd.Series, | |
min_threshold: int) -> pd.Series: | |
""" | |
This function finds categorical values with little representation | |
and group them under the category "OTHERS" to mitigate the curse | |
of dimensionality, thus avoiding overfitting | |
""" | |
condition = (serie.value_counts() < min_threshold).values | |
other_group = list(serie.value_counts()[condition].index) |
import numpy as np | |
from keras.models import Sequential | |
from keras.layers.core import Activation, Dense | |
from keras.optimizers import SGD | |
X = np.array([[0,0],[0,1],[1,0],[1,1]], "float32") | |
y = np.array([[0],[1],[1],[0]], "float32") | |
model = Sequential() | |
model.add(Dense(2, input_dim=2, activation='sigmoid')) |
import pylab as plt | |
plt.plot([1,2,3,10], [1,2,3,4]) | |
%matplot plt # Include this in the same cell as the plot |
def diversity_percentage(df, columns): | |
""" | |
This function returns the number of different elements in each column as a percentage of the total elements in the group. | |
A low value indicates there are many repeated elements. | |
Example 1: a value of 0 indicates all values are the same. | |
Example 2: a value of 100 indicates all values are different. | |
""" | |
diversity = dict() | |
for col in columns: |
def plot_nulls(dataframe): | |
def null_perc(dataframe): | |
return 100*dataframe.isnull().sum()/len(dataframe) | |
nulls = null_perc(dataframe) | |
plt.figure(1, figsize=(5,20)) # Customize this if needed | |
ax = sns.barplot(x=nulls, y=list(range(len(nulls))), orient='h', color="blue") | |
_ = plt.yticks(plt.yticks()[0], nulls.index) | |
ax.xaxis.set_ticks_position('top') |
##VGG16 model for Keras
This is the Keras model of the 16-layer network used by the VGG team in the ILSVRC-2014 competition.
It has been obtained by directly converting the Caffe model provived by the authors.
Details about the network architecture can be found in the following arXiv paper:
Very Deep Convolutional Networks for Large-Scale Image Recognition
K. Simonyan, A. Zisserman
from IPython.core.debugger import Tracer; | |
# Place this call wherever you want to start debugging | |
Tracer()() | |
""" | |
Some PDB Debuger commands: | |
n(ext) line and run this one | |
c(ontinue) running until next breakpoint |