arivero/grokking.py

## grokking.py
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

def create_synthetic_data(input_dim=1024, num_samples=10000):
    num_classes = input_dim
    y = np.random.randint(0, num_classes, size=(num_samples,))
    y= tf.keras.utils.to_categorical(y, num_classes)
    X = y
    return X, y
input_dim=1024
num_samples = 10000
X, y = create_synthetic_data(input_dim=input_dim, num_samples=num_samples)

# Custom callback for detailed per-step logging
class DetailedLoggingCallback(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs=None):
        self.step_accuracy = []
        self.learning_rate = []

    def on_train_batch_begin(self, batch, logs=None):
        # Reset the metric at the start of each batch
        self.model.reset_metrics()

    def on_train_batch_end(self, batch, logs=None):
        # Log the metric at the end of each batch
        self.step_accuracy.append(logs.get('accuracy'))
        self.learning_rate.append(self.model.optimizer.iterations.numpy())


activators = ['selu', 'linear', 'elu', 'tanh', 'leaky_relu', 'softsign', 'relu6', 'relu', 'gelu', 'swish', 'softplus', 'sigmoid', 'hard_sigmoid', 'exponential']
activators  = ['selu','tanh','linear','relu', 'gelu']
dtypes = [tf.float32, tf.float16, tf.float64]

def create_model(input_dim, num_layers, activation='relu', dtype=tf.float32):
    model = tf.keras.Sequential()
    model.add(tf.keras.Input(shape=(input_dim,), dtype=dtype))
    for _ in range(num_layers):
        model.add(tf.keras.layers.Dense(128, activation=activation, dtype=dtype))
    model.add(tf.keras.layers.Dense(input_dim, activation='softmax', dtype=dtype)) #sigmoid seria para multiclass
    return model
#plt.rcParams.update({'axes.facecolor': 'white', 'figure.facecolor': 'white'})
input_dim = 1024
nlayers = 12
for tipo in dtypes:
  for opt in ['RMSprop', 'Adam', 'SGD', 'Adagrad', 'Adadelta', 'Adamax', 'Nadam', 'Ftrl']:
    plt.figure(figsize=(12, 8))
    order = {}
    line={}
    for act in activators:
            model = create_model(input_dim=input_dim, num_layers=nlayers, activation=act, dtype=tipo)
            model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
            detailed_logging_callback = DetailedLoggingCallback()
            model.fit(X, y, epochs=2*nlayers*nlayers, batch_size=256, verbose=0, callbacks=[detailed_logging_callback])
            smoothed_accuracy = detailed_logging_callback.step_accuracy
            lr = detailed_logging_callback.learning_rate
            plt.scatter(range(len(smoothed_accuracy)), smoothed_accuracy, label=f'{act}',s=1)

    plt.xlabel('Training Step')
    plt.ylabel('Accuracy')
    plt.title(f'Per-Step {tipo.name} {opt} Training Accuracy for {nlayers} Layers and Different Activators')
    plt.legend()
    plt.show()
	import tensorflow as tf
	import numpy as np
	import matplotlib.pyplot as plt

	def create_synthetic_data(input_dim=1024, num_samples=10000):
	num_classes = input_dim
	y = np.random.randint(0, num_classes, size=(num_samples,))
	y= tf.keras.utils.to_categorical(y, num_classes)
	X = y
	return X, y
	input_dim=1024
	num_samples = 10000
	X, y = create_synthetic_data(input_dim=input_dim, num_samples=num_samples)

	# Custom callback for detailed per-step logging
	class DetailedLoggingCallback(tf.keras.callbacks.Callback):
	def on_train_begin(self, logs=None):
	self.step_accuracy = []
	self.learning_rate = []

	def on_train_batch_begin(self, batch, logs=None):
	# Reset the metric at the start of each batch
	self.model.reset_metrics()

	def on_train_batch_end(self, batch, logs=None):
	# Log the metric at the end of each batch
	self.step_accuracy.append(logs.get('accuracy'))
	self.learning_rate.append(self.model.optimizer.iterations.numpy())



	activators = ['selu', 'linear', 'elu', 'tanh', 'leaky_relu', 'softsign', 'relu6', 'relu', 'gelu', 'swish', 'softplus', 'sigmoid', 'hard_sigmoid', 'exponential']
	activators = ['selu','tanh','linear','relu', 'gelu']
	dtypes = [tf.float32, tf.float16, tf.float64]

	def create_model(input_dim, num_layers, activation='relu', dtype=tf.float32):
	model = tf.keras.Sequential()
	model.add(tf.keras.Input(shape=(input_dim,), dtype=dtype))
	for _ in range(num_layers):
	model.add(tf.keras.layers.Dense(128, activation=activation, dtype=dtype))
	model.add(tf.keras.layers.Dense(input_dim, activation='softmax', dtype=dtype)) #sigmoid seria para multiclass
	return model
	#plt.rcParams.update({'axes.facecolor': 'white', 'figure.facecolor': 'white'})
	input_dim = 1024
	nlayers = 12
	for tipo in dtypes:
	for opt in ['RMSprop', 'Adam', 'SGD', 'Adagrad', 'Adadelta', 'Adamax', 'Nadam', 'Ftrl']:
	plt.figure(figsize=(12, 8))
	order = {}
	line={}
	for act in activators:
	model = create_model(input_dim=input_dim, num_layers=nlayers, activation=act, dtype=tipo)
	model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
	detailed_logging_callback = DetailedLoggingCallback()
	model.fit(X, y, epochs=2nlayersnlayers, batch_size=256, verbose=0, callbacks=[detailed_logging_callback])
	smoothed_accuracy = detailed_logging_callback.step_accuracy
	lr = detailed_logging_callback.learning_rate
	plt.scatter(range(len(smoothed_accuracy)), smoothed_accuracy, label=f'{act}',s=1)

	plt.xlabel('Training Step')
	plt.ylabel('Accuracy')
	plt.title(f'Per-Step {tipo.name} {opt} Training Accuracy for {nlayers} Layers and Different Activators')
	plt.legend()
	plt.show()