王嘉楠 lan2720

## calculate_trainset_mean_std.py
"""
in this script, we calculate the image per channel mean and standard
deviation in the training set, do not calculate the statistics on the
whole dataset, as per here http://cs231n.github.io/neural-networks-2/#datapre
"""

import numpy as np
from os import listdir
from os.path import join, isdir
from glob import glob

## spacy_srl.py
# This small script shows how to use AllenNLP Semantic Role Labeling (http://allennlp.org/) with SpaCy 2.0 (http://spacy.io) components and extensions
# Script installs allennlp default model
# Important: Install allennlp form source and replace the spacy requirement with spacy-nightly in the requirements.txt
# Developed for SpaCy 2.0.0a18

from allennlp.commands import DEFAULT_MODELS
from allennlp.common.file_utils import cached_path
from allennlp.service.predictors import SemanticRoleLabelerPredictor
from allennlp.models.archival import load_archive

## pytorch_attention_audio.py
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.optim import lr_scheduler
import torch.utils.data as data
from torch.nn.utils.rnn import pack_padded_sequence as pack, pad_packed_sequence as unpack
import torchaudio
import torchaudio.transforms as tat

## gist:a835909ffd15b9927820d175a48dee41
import numpy as np

def ApEn(U, m, r):

    def _maxdist(x_i, x_j):
        return max([abs(ua - va) for ua, va in zip(x_i, x_j)])

    def _phi(m):
        x = [[U[j] for j in range(i, i + m - 1 + 1)] for i in range(N - m + 1)]
        C = [len([1 for x_j in x if _maxdist(x_i, x_j) <= r]) / (N - m + 1.0) for x_i in x]

## pad_packed_demo.py
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

seqs = ['gigantic_string','tiny_str','medium_str']

# make <pad> idx 0
vocab = ['<pad>'] + sorted(set(''.join(seqs)))

# make model

## internals.md

      
              1 file
            
          
              11 forks
            
          
              2 comments
            
          
              122 stars
            
          
                killeent
                / internals.md
            
            
              Last active
              February 14, 2023 05:15
            
          
    A Tour of PyTorch Internals (Part I)

The fundamental unit in PyTorch is the Tensor. This post will serve as an overview for how we implement Tensors in PyTorch, such that the user can interact with it from the Python shell. In particular, we want to answer four main questions:

How does PyTorch extend the Python interpreter to define a Tensor type that can be manipulated from Python code?
How does PyTorch wrap the C libraries that actually define the Tensor's properties and methods?
How does PyTorch cwrap work to generate code for Tensor methods?
How does PyTorch's build system take all of these components to compile and generate a workable application?

Extending the Python Interpreter

PyTorch defines a new package torch. In this post we will consider the ._C module. This module is known as an "extension module" - a Python module written in C. Such modules allow us to define new built-in object types (e.g. the Tensor) and to call C/C++ functions.

  
## viz_net_pytorch.py
from graphviz import Digraph
from torch.autograd import Variable
import torch


def make_dot(var, params=None):
    if params is not None:
        assert isinstance(params.values()[0], Variable)
        param_map = {id(v): k for k, v in params.items()}

## masked_cross_entropy.py
def _sequence_mask(sequence_length, max_len=None):
    if max_len is None:
        max_len = sequence_length.data.max()
    batch_size = sequence_length.size(0)
    seq_range = torch.range(0, max_len - 1).long()
    seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
    seq_range_expand = Variable(seq_range_expand)
    if sequence_length.is_cuda:
        seq_range_expand = seq_range_expand.cuda()
    seq_length_expand = (sequence_length.unsqueeze(1)

## gensim2projector_tf.py
# required tensorflow 0.12
# required gensim 0.13.3+ for new api model.wv.index2word or just use model.index2word

from gensim.models import Word2Vec
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector

# loading your gensim
model = Word2Vec.load("YOUR-MODEL")

## projector.md

      
              3 files
            
          
              3 forks
            
          
              2 comments
            
          
              9 stars
            
          
                korakot
                /  projector.md
            
            
              Last active
              December 29, 2023 13:15
            
              
                Load data to Embedding Projector
              
          
    Tensorboard Embedding Projector can load external data from gist.
Call them with your own config like this
http://projector.tensorflow.org/?config=https://gist.githubusercontent.com/korakot/c480edd1fcf7e02c49ccddbf5ac43fb9/raw/21bcc93899e0a3db334e4d04b7f6ef7d6c2b09c8/config.json

tensorPath: numerical data without column name, in bytes or tsv format
metadataPath: labels or field columns. If it's just 1 column, assume it to be label and don't need the first line.

More details from announcment,
how-to,
	"""
	in this script, we calculate the image per channel mean and standard
	deviation in the training set, do not calculate the statistics on the
	whole dataset, as per here http://cs231n.github.io/neural-networks-2/#datapre
	"""

	import numpy as np
	from os import listdir
	from os.path import join, isdir
	from glob import glob
	# This small script shows how to use AllenNLP Semantic Role Labeling (http://allennlp.org/) with SpaCy 2.0 (http://spacy.io) components and extensions
	# Script installs allennlp default model
	# Important: Install allennlp form source and replace the spacy requirement with spacy-nightly in the requirements.txt
	# Developed for SpaCy 2.0.0a18

	from allennlp.commands import DEFAULT_MODELS
	from allennlp.common.file_utils import cached_path
	from allennlp.service.predictors import SemanticRoleLabelerPredictor
	from allennlp.models.archival import load_archive
	import argparse
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from torch.autograd import Variable
	from torch.optim import lr_scheduler
	import torch.utils.data as data
	from torch.nn.utils.rnn import pack_padded_sequence as pack, pad_packed_sequence as unpack
	import torchaudio
	import torchaudio.transforms as tat
	import numpy as np

	def ApEn(U, m, r):

	def _maxdist(x_i, x_j):
	return max([abs(ua - va) for ua, va in zip(x_i, x_j)])

	def _phi(m):
	x = [[U[j] for j in range(i, i + m - 1 + 1)] for i in range(N - m + 1)]
	C = [len([1 for x_j in x if _maxdist(x_i, x_j) <= r]) / (N - m + 1.0) for x_i in x]
	from graphviz import Digraph
	from torch.autograd import Variable
	import torch


	def make_dot(var, params=None):
	if params is not None:
	assert isinstance(params.values()[0], Variable)
	param_map = {id(v): k for k, v in params.items()}
	def _sequence_mask(sequence_length, max_len=None):
	if max_len is None:
	max_len = sequence_length.data.max()
	batch_size = sequence_length.size(0)
	seq_range = torch.range(0, max_len - 1).long()
	seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
	seq_range_expand = Variable(seq_range_expand)
	if sequence_length.is_cuda:
	seq_range_expand = seq_range_expand.cuda()
	seq_length_expand = (sequence_length.unsqueeze(1)
	# required tensorflow 0.12
	# required gensim 0.13.3+ for new api model.wv.index2word or just use model.index2word

	from gensim.models import Word2Vec
	import tensorflow as tf
	from tensorflow.contrib.tensorboard.plugins import projector

	# loading your gensim
	model = Word2Vec.load("YOUR-MODEL")