杨海宏 RamonYeung

## BPE
import re, collections

def get_stats(vocab):
   pairs = collections.defaultdict(int)
   for word, freq in vocab.items():
       symbols = word.split()
       for i in range(len(symbols)-1):
           pairs[symbols[i],symbols[i+1]] += freq
   return pairs

## bad_grad_viz.py
from graphviz import Digraph
import torch
from torch.autograd import Variable, Function

def iter_graph(root, callback):
    queue = [root]
    seen = set()
    while queue:
        fn = queue.pop()
        if fn in seen:

## tmux_cheatsheet.markdown

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                RamonYeung
                / tmux_cheatsheet.markdown
            
            
              Created
              April 6, 2019 09:05
                — forked from henrik/tmux_cheatsheet.markdown
            
              
                tmux cheatsheet
              
          
    tmux cheatsheet

As configured in my dotfiles.
start new:
tmux

start new with session name:

  
## tmux-cheatsheet.markdown

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                RamonYeung
                / tmux-cheatsheet.markdown
            
            
              Created
              April 5, 2019 03:38
                — forked from MohamedAlaa/tmux-cheatsheet.markdown
            
              
                tmux shortcuts & cheatsheet
              
          
    tmux shortcuts & cheatsheet

start new:
tmux

start new with session name:
tmux new -s myname


## pad_packed_demo.py
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

seqs = ['gigantic_string','tiny_str','medium_str']

# make <pad> idx 0
vocab = ['<pad>'] + sorted(set(''.join(seqs)))

# make model

## download_glue_data.py
''' Script for downloading all GLUE data.

Note: for legal reasons, we are unable to host MRPC.
You can either use the version hosted by the SentEval team, which is already tokenized,
or you can download the original data from (https://download.microsoft.com/download/D/4/6/D46FF87A-F6B9-4252-AA8B-3604ED519838/MSRParaphraseCorpus.msi) and extract the data from it manually.
For Windows users, you can run the .msi file. For Mac and Linux users, consider an external library such as 'cabextract' (see below for an example).
You should then rename and place specific files in a folder (see below for an example).

mkdir MRPC
cabextract MSRParaphraseCorpus.msi -d MRPC

## fast_glove.py
# coding: utf-8

import logging
import re
from collections import Counter

import numpy as np
import torch
from sklearn.datasets import fetch_20newsgroups
from torch.autograd import Variable

## rank_metrics.py
"""Information Retrieval metrics

Useful Resources:
http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt
http://www.nii.ac.jp/TechReports/05-014E.pdf
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf
Learning to Rank for Information Retrieval (Tie-Yan Liu)
"""
import numpy as np

## pg-pong.py
""" Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """
import numpy as np
import cPickle as pickle
import gym

# hyperparameters
H = 200 # number of hidden layer neurons
batch_size = 10 # every how many episodes to do a param update?
learning_rate = 1e-4
gamma = 0.99 # discount factor for reward

## tree.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                RamonYeung
                / tree.md
            
            
              Created
              December 29, 2016 08:15
                — forked from upsuper/tree.md
            
              
                一行 Python 实现树
              
          
    一行 Python 实现树

使用 Python 内置的 defaultdict，我们可以很容易的定义一个树形数据结构：
def tree(): return defaultdict(tree)
就是这样！
	import re, collections

	def get_stats(vocab):
	pairs = collections.defaultdict(int)
	for word, freq in vocab.items():
	symbols = word.split()
	for i in range(len(symbols)-1):
	pairs[symbols[i],symbols[i+1]] += freq
	return pairs
	from graphviz import Digraph
	import torch
	from torch.autograd import Variable, Function

	def iter_graph(root, callback):
	queue = [root]
	seen = set()
	while queue:
	fn = queue.pop()
	if fn in seen:
	import torch
	import torch.nn as nn
	from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

	seqs = ['gigantic_string','tiny_str','medium_str']

	# make <pad> idx 0
	vocab = ['<pad>'] + sorted(set(''.join(seqs)))

	# make model
	''' Script for downloading all GLUE data.

	Note: for legal reasons, we are unable to host MRPC.
	You can either use the version hosted by the SentEval team, which is already tokenized,
	or you can download the original data from (https://download.microsoft.com/download/D/4/6/D46FF87A-F6B9-4252-AA8B-3604ED519838/MSRParaphraseCorpus.msi) and extract the data from it manually.
	For Windows users, you can run the .msi file. For Mac and Linux users, consider an external library such as 'cabextract' (see below for an example).
	You should then rename and place specific files in a folder (see below for an example).

	mkdir MRPC
	cabextract MSRParaphraseCorpus.msi -d MRPC
	# coding: utf-8

	import logging
	import re
	from collections import Counter

	import numpy as np
	import torch
	from sklearn.datasets import fetch_20newsgroups
	from torch.autograd import Variable
	"""Information Retrieval metrics

	Useful Resources:
	http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt
	http://www.nii.ac.jp/TechReports/05-014E.pdf
	http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
	http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf
	Learning to Rank for Information Retrieval (Tie-Yan Liu)
	"""
	import numpy as np
	""" Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """
	import numpy as np
	import cPickle as pickle
	import gym

	# hyperparameters
	H = 200 # number of hidden layer neurons
	batch_size = 10 # every how many episodes to do a param update?
	learning_rate = 1e-4
	gamma = 0.99 # discount factor for reward