Ricardo Guerrero Gómez-Olmedo ricgu8086

## apply_return_multiple_columns.py
# From https://stackoverflow.com/questions/23586510/return-multiple-columns-from-pandas-apply

def sizes(s):
    return locale.format("%.1f", s / 1024.0, grouping=True) + ' KB', \
        locale.format("%.1f", s / 1024.0 ** 2, grouping=True) + ' MB', \
        locale.format("%.1f", s / 1024.0 ** 3, grouping=True) + ' GB'
df_test['size_kb'],  df_test['size_mb'], df_test['size_gb'] = zip(*df_test['size'].apply(sizes))

## pandas_combine_multi_index.py
pivot.columns

MultiIndex([('mean', 'is_suitable'),
            ('size', 'is_suitable')],
           )

pivot.columns.map('_'.join)

Index(['mean_is_suitable', 'size_is_suitable'], dtype='object')

## group_others.py
def group_others(serie: pd.Series,
                  min_threshold: int) -> pd.Series:
  """
  This function finds categorical values with little representation
  and group them under the category "OTHERS" to mitigate the curse
  of dimensionality, thus avoiding overfitting
  """

  condition = (serie.value_counts() < min_threshold).values
  other_group = list(serie.value_counts()[condition].index)

## xor_keras.py
import numpy as np
from keras.models import Sequential
from keras.layers.core import Activation, Dense
from keras.optimizers import SGD

X = np.array([[0,0],[0,1],[1,0],[1,1]], "float32")
y = np.array([[0],[1],[1],[0]], "float32")

model = Sequential()
model.add(Dense(2, input_dim=2, activation='sigmoid'))

## matplotlib_aws_emr.py
import pylab as plt

plt.plot([1,2,3,10], [1,2,3,4])
%matplot plt # Include this in the same cell as the plot

## diversity_percentage.py
def diversity_percentage(df, columns):
    """
    This function returns the number of different elements in each column as a percentage of the total elements in the group.
    A low value indicates there are many repeated elements.
    Example 1: a value of 0 indicates all values are the same.
    Example 2: a value of 100 indicates all values are different.
    """
    diversity = dict()

    for col in columns:

## plot_nulls.py
def plot_nulls(dataframe):

    def null_perc(dataframe):
        return 100*dataframe.isnull().sum()/len(dataframe)

    nulls = null_perc(dataframe)
    plt.figure(1, figsize=(5,20)) # Customize this if needed
    ax = sns.barplot(x=nulls, y=list(range(len(nulls))), orient='h', color="blue")
    _ = plt.yticks(plt.yticks()[0], nulls.index)
    ax.xaxis.set_ticks_position('top')

## gist:1775ad401ff2b4a97cddb4d40acfdbb8

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                ricgu8086
                / gist:1775ad401ff2b4a97cddb4d40acfdbb8
            
            
              Created
              December 1, 2017 20:06
                — forked from CristinaSolana/gist:1885435
            
              
                Keeping a fork up to date
              
          
    1. Clone your fork:

git clone git@github.com:YOUR-USERNAME/YOUR-FORKED-REPO.git

2. Add remote from original repository in your forked repository:

cd into/cloned/fork-repo
git remote add upstream git://github.com/ORIGINAL-DEV-USERNAME/REPO-YOU-FORKED-FROM.git
git fetch upstream


## readme.md

      
              2 files
            
          
              1 fork
            
          
              0 comments
            
          
              0 stars
            
          
                ricgu8086
                / readme.md
            
            
              Created
              October 16, 2016 19:19
                — forked from baraldilorenzo/readme.md
            
              
                VGG-16 pre-trained model for Keras
              
          
    ##VGG16 model for Keras
This is the Keras model of the 16-layer network used by the VGG team in the ILSVRC-2014 competition.
It has been obtained by directly converting the Caffe model provived by the authors.
Details about the network architecture can be found in the following arXiv paper:
Very Deep Convolutional Networks for Large-Scale Image Recognition

K. Simonyan, A. Zisserman

  
## Debug Jupyter Notebook.py
from IPython.core.debugger import Tracer;


# Place this call wherever you want to start debugging
Tracer()()

"""
Some PDB Debuger commands:
n(ext) line and run this one
c(ontinue) running until next breakpoint
	# From https://stackoverflow.com/questions/23586510/return-multiple-columns-from-pandas-apply

	def sizes(s):
	return locale.format("%.1f", s / 1024.0, grouping=True) + ' KB', \
	locale.format("%.1f", s / 1024.0 ** 2, grouping=True) + ' MB', \
	locale.format("%.1f", s / 1024.0 ** 3, grouping=True) + ' GB'
	df_test['size_kb'], df_test['size_mb'], df_test['size_gb'] = zip(*df_test['size'].apply(sizes))
	pivot.columns

	MultiIndex([('mean', 'is_suitable'),
	('size', 'is_suitable')],
	)

	pivot.columns.map('_'.join)

	Index(['mean_is_suitable', 'size_is_suitable'], dtype='object')
	def group_others(serie: pd.Series,
	min_threshold: int) -> pd.Series:
	"""
	This function finds categorical values with little representation
	and group them under the category "OTHERS" to mitigate the curse
	of dimensionality, thus avoiding overfitting
	"""

	condition = (serie.value_counts() < min_threshold).values
	other_group = list(serie.value_counts()[condition].index)
	import numpy as np
	from keras.models import Sequential
	from keras.layers.core import Activation, Dense
	from keras.optimizers import SGD

	X = np.array([[0,0],[0,1],[1,0],[1,1]], "float32")
	y = np.array([[0],[1],[1],[0]], "float32")

	model = Sequential()
	model.add(Dense(2, input_dim=2, activation='sigmoid'))
	import pylab as plt

	plt.plot([1,2,3,10], [1,2,3,4])
	%matplot plt # Include this in the same cell as the plot
	def diversity_percentage(df, columns):
	"""
	This function returns the number of different elements in each column as a percentage of the total elements in the group.
	A low value indicates there are many repeated elements.
	Example 1: a value of 0 indicates all values are the same.
	Example 2: a value of 100 indicates all values are different.
	"""
	diversity = dict()

	for col in columns:
	def plot_nulls(dataframe):

	def null_perc(dataframe):
	return 100*dataframe.isnull().sum()/len(dataframe)

	nulls = null_perc(dataframe)
	plt.figure(1, figsize=(5,20)) # Customize this if needed
	ax = sns.barplot(x=nulls, y=list(range(len(nulls))), orient='h', color="blue")
	_ = plt.yticks(plt.yticks()[0], nulls.index)
	ax.xaxis.set_ticks_position('top')
	from IPython.core.debugger import Tracer;


	# Place this call wherever you want to start debugging
	Tracer()()

	"""
	Some PDB Debuger commands:
	n(ext) line and run this one
	c(ontinue) running until next breakpoint