Skip to content

Instantly share code, notes, and snippets.

View kashif's full-sized avatar

Kashif Rasul kashif

  • Berlin, Germany
  • 08:23 (UTC +01:00)
  • X @krasul
View GitHub Profile
from collections import defaultdict
import numpy as np
import pandas as pd
from rich.console import Console
from rich.table import Table
import torch
import torch.nn as nn
@kashif
kashif / cem.md
Last active November 7, 2023 12:56
Cross Entropy Method

Cross Entropy Method

How do we solve for the policy optimization problem which is to maximize the total reward given some parametrized policy?

Discounted future reward

To begin with, for an episode the total reward is the sum of all the rewards. If our environment is stochastic, we can never be sure if we will get the same rewards the next time we perform the same actions. Thus the more we go into the future the more the total future reward may diverge. So for that reason it is common to use the discounted future reward where the parameter discount is called the discount factor and is between 0 and 1.

A good strategy for an agent would be to always choose an action that maximizes the (discounted) future reward. In other words we want to maximize the expected reward per episode.

# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
@kashif
kashif / fashion_mnist_cnn.py
Last active October 26, 2022 22:04
Fashion Mnist Benchmark
'''Trains a simple convnet on the Zalando MNIST dataset.
Gets to 81.03% test accuracy after 30 epochs
(there is still a lot of margin for parameter tuning).
3 seconds per epoch on a GeForce GTX 980 GPU with CuDNN 5.
'''
from __future__ import print_function
import numpy as np
from mnist import MNIST
@kashif
kashif / keras_metrics.py
Created November 1, 2021 10:08
pt-keras-metrics
import tensorflow as tf
import torch
from torchmetrics import Metric
def tf2pt(x_tf=None):
if x_tf is None:
return None
@kashif
kashif / cifar10_wide_resnet.py
Last active May 10, 2021 02:44
Keras Wide Residual Networks CIFAR-10
from __future__ import print_function
from keras.datasets import cifar10
from keras.layers import merge, Input
from keras.layers.convolutional import Convolution2D, ZeroPadding2D, AveragePooling2D
from keras.layers.core import Dense, Activation, Flatten, Dropout
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
@kashif
kashif / cifar10_resnet.py
Last active February 3, 2021 09:06
Keras Pre-activation Residual Network for CIFAR-10
from __future__ import print_function
from keras.datasets import cifar10
from keras.layers import merge, Input
from keras.layers.convolutional import Convolution2D, ZeroPadding2D, AveragePooling2D
from keras.layers.core import Dense, Activation, Flatten
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
@kashif
kashif / batch_embedded_GBRT.py
Last active September 7, 2020 14:40
Batch GBRT
from sklearn.datasets import load_boston
from sklearn.linear_model import (LinearRegression, Ridge,
Lasso, RandomizedLasso)
from sklearn.feature_selection import RFE, f_regression
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import numpy as np
#from minepy import MINE
from sklearn.metrics import mean_squared_error
@kashif
kashif / batch_EN.py
Last active September 7, 2020 14:40
Batch ElasticNet
from sklearn.datasets import load_boston
from sklearn.linear_model import (LinearRegression, Ridge, LassoCV, ElasticNetCV,
ElasticNet, Lasso, RandomizedLasso)
from sklearn.feature_selection import RFE, f_regression
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import numpy as np
import pdb
#from minepy import MINE
@kashif
kashif / batch_SGDEN.py
Last active September 7, 2020 14:39
Batch SGD ElasticNet
from sklearn.datasets import load_boston
from sklearn.linear_model import (LinearRegression, Ridge, SGDRegressor,
Lasso, ElasticNetCV)
from sklearn.preprocessing import MinMaxScaler
import numpy as np
#from minepy import MINE
from sklearn.metrics import mean_squared_error