Jaemin Cho j-min

## gensim2projector_tf.py
# required tensorflow 0.12
# required gensim 0.13.3+ for new api model.wv.index2word or just use model.index2word

from gensim.models import Word2Vec
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector

# loading your gensim
model = Word2Vec.load("YOUR-MODEL")

## bnlstm.py
"""adapted from https://github.com/OlavHN/bnlstm to store separate population statistics per state"""
import tensorflow as tf, numpy as np
RNNCell = tf.nn.rnn_cell.RNNCell

class BNLSTMCell(RNNCell):
    '''Batch normalized LSTM as described in arxiv.org/abs/1603.09025'''
    def __init__(self, num_units, is_training_tensor, max_bn_steps, initial_scale=0.1, activation=tf.tanh, decay=0.95):
        """
        * max bn steps is the maximum number of steps for which to store separate population stats
        """

## labels_1024.tsv
7
2
1
0
4
1
4
9
5
9

## treernn.py
"""
TreeLSTM[1] implementation in Pytorch

Based on dynet benchmarks :
  https://github.com/neulab/dynet-benchmark/blob/master/dynet-py/treenn.py
  https://github.com/neulab/dynet-benchmark/blob/master/chainer/treenn.py
Other References:
  https://github.com/pytorch/examples/tree/master/word_language_model
  https://github.com/pfnet/chainer/blob/29c67fe1f2140fa8637201505b4c5e8556fad809/chainer/functions/activation/slstm.py
  https://github.com/stanfordnlp/treelstm

## RWA.py
# An implementation of "Machine Learning on Sequential Data Using a Recurrent Weighted Average" using pytorch
# https://arxiv.org/pdf/1703.01253.pdf
#
#
# This is a RNN (recurrent neural network) type that uses a weighted average of values seen in the past, rather
# than a separate running state.
#
# Check the test code at the bottom for an example of usage, where you can compare it's performance
# against LSTM and GRU, at a classification task from the paper. It handily beats both the LSTM and
# GRU :)

## internals.md

      
              1 file
            
          
              11 forks
            
          
              2 comments
            
          
              122 stars
            
          
                killeent
                / internals.md
            
            
              Last active
              February 14, 2023 05:15
            
          
    A Tour of PyTorch Internals (Part I)

The fundamental unit in PyTorch is the Tensor. This post will serve as an overview for how we implement Tensors in PyTorch, such that the user can interact with it from the Python shell. In particular, we want to answer four main questions:

How does PyTorch extend the Python interpreter to define a Tensor type that can be manipulated from Python code?
How does PyTorch wrap the C libraries that actually define the Tensor's properties and methods?
How does PyTorch cwrap work to generate code for Tensor methods?
How does PyTorch's build system take all of these components to compile and generate a workable application?

Extending the Python Interpreter

PyTorch defines a new package torch. In this post we will consider the ._C module. This module is known as an "extension module" - a Python module written in C. Such modules allow us to define new built-in object types (e.g. the Tensor) and to call C/C++ functions.

  
## dropout_bayesian_approximation_tensorflow.py
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.contrib.distributions import Bernoulli


class VariationalDense:
    """Variational Dense Layer Class"""
    def __init__(self, n_in, n_out, model_prob, model_lam):
        self.model_prob = model_prob

## compact_bilinear_pooling.py
# References:
# [1] Multimodal Compact Bilinear Pooling for Visual Question Answering and Visual Grounding, Fukui et al., https://arxiv.org/abs/1606.01847
# [2] Compact Bilinear Pooling, Gao et al., https://arxiv.org/abs/1511.06062
# [3] Fast and Scalable Polynomial Kernels via Explicit Feature Maps, Pham and Pagh, https://chbrown.github.io/kdd-2013-usb/kdd/p239.pdf
# [4] Fastfood — Approximating Kernel Expansions in Loglinear Time, Le et al., https://arxiv.org/abs/1408.3060
# [5] Original implementation in Caffe: https://github.com/gy20073/compact_bilinear_pooling

# TODO: migrate to use of new native complex64 types
# TODO: change strided x coo matmul to torch.matmul(): M[sparse_coo] @ M[strided] -> M[strided]

## tied_linear.py
import torch, torch.nn as nn, torch.nn.functional as F
import numpy as np
import torch.optim as optim

# tied autoencoder using off the shelf nn modules
class TiedAutoEncoderOffTheShelf(nn.Module):
    def __init__(self, inp, out, weight):
        super().__init__()
        self.encoder = nn.Linear(inp, out, bias=False)
        self.decoder = nn.Linear(out, inp, bias=False)

## top-k-top-p.py
def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
    """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
        Args:
            logits: logits distribution shape (vocabulary size)
            top_k >0: keep only top k tokens with highest probability (top-k filtering).
            top_p >0.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
                Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
    """
    assert logits.dim() == 1  # batch size 1 for now - could be updated for more but the code would be less clear
    top_k = min(top_k, logits.size(-1))  # Safety check
	# required tensorflow 0.12
	# required gensim 0.13.3+ for new api model.wv.index2word or just use model.index2word

	from gensim.models import Word2Vec
	import tensorflow as tf
	from tensorflow.contrib.tensorboard.plugins import projector

	# loading your gensim
	model = Word2Vec.load("YOUR-MODEL")
	"""adapted from https://github.com/OlavHN/bnlstm to store separate population statistics per state"""
	import tensorflow as tf, numpy as np
	RNNCell = tf.nn.rnn_cell.RNNCell

	class BNLSTMCell(RNNCell):
	'''Batch normalized LSTM as described in arxiv.org/abs/1603.09025'''
	def __init__(self, num_units, is_training_tensor, max_bn_steps, initial_scale=0.1, activation=tf.tanh, decay=0.95):
	"""
	* max bn steps is the maximum number of steps for which to store separate population stats
	"""
	"""
	TreeLSTM[1] implementation in Pytorch

	Based on dynet benchmarks :
	https://github.com/neulab/dynet-benchmark/blob/master/dynet-py/treenn.py
	https://github.com/neulab/dynet-benchmark/blob/master/chainer/treenn.py
	Other References:
	https://github.com/pytorch/examples/tree/master/word_language_model
	https://github.com/pfnet/chainer/blob/29c67fe1f2140fa8637201505b4c5e8556fad809/chainer/functions/activation/slstm.py
	https://github.com/stanfordnlp/treelstm
	# An implementation of "Machine Learning on Sequential Data Using a Recurrent Weighted Average" using pytorch
	# https://arxiv.org/pdf/1703.01253.pdf
	#
	#
	# This is a RNN (recurrent neural network) type that uses a weighted average of values seen in the past, rather
	# than a separate running state.
	#
	# Check the test code at the bottom for an example of usage, where you can compare it's performance
	# against LSTM and GRU, at a classification task from the paper. It handily beats both the LSTM and
	# GRU :)
	import numpy as np
	import tensorflow as tf
	import matplotlib.pyplot as plt
	from tensorflow.contrib.distributions import Bernoulli


	class VariationalDense:
	"""Variational Dense Layer Class"""
	def __init__(self, n_in, n_out, model_prob, model_lam):
	self.model_prob = model_prob
	# References:
	# [1] Multimodal Compact Bilinear Pooling for Visual Question Answering and Visual Grounding, Fukui et al., https://arxiv.org/abs/1606.01847
	# [2] Compact Bilinear Pooling, Gao et al., https://arxiv.org/abs/1511.06062
	# [3] Fast and Scalable Polynomial Kernels via Explicit Feature Maps, Pham and Pagh, https://chbrown.github.io/kdd-2013-usb/kdd/p239.pdf
	# [4] Fastfood — Approximating Kernel Expansions in Loglinear Time, Le et al., https://arxiv.org/abs/1408.3060
	# [5] Original implementation in Caffe: https://github.com/gy20073/compact_bilinear_pooling

	# TODO: migrate to use of new native complex64 types
	# TODO: change strided x coo matmul to torch.matmul(): M[sparse_coo] @ M[strided] -> M[strided]
	import torch, torch.nn as nn, torch.nn.functional as F
	import numpy as np
	import torch.optim as optim

	# tied autoencoder using off the shelf nn modules
	class TiedAutoEncoderOffTheShelf(nn.Module):
	def __init__(self, inp, out, weight):
	super().__init__()
	self.encoder = nn.Linear(inp, out, bias=False)
	self.decoder = nn.Linear(out, inp, bias=False)
	def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
	""" Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
	Args:
	logits: logits distribution shape (vocabulary size)
	top_k >0: keep only top k tokens with highest probability (top-k filtering).
	top_p >0.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
	Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
	"""
	assert logits.dim() == 1 # batch size 1 for now - could be updated for more but the code would be less clear
	top_k = min(top_k, logits.size(-1)) # Safety check