Skip to content

Instantly share code, notes, and snippets.

@rpryzant
rpryzant / process.sh
Created Apr 6, 2017
Conjoining nmt datasets
View process.sh
#! /usr/bin/env bash
# Processes the v2 subtitles/aspec corpora into one combined corpus
# Requirements
# - Processed subtitlesv2 corpus
# - ASPEC
CORPUS1=$1
CORPUS2=$2
TARGET=$3 # target language = [ja, zh, fr]
LOC="/scr/rpryzant/chinese_english_corpora/"
@rpryzant
rpryzant / imdb.py
Created Aug 22, 2017
Fetch movie information from IMDB using Python
View imdb.py
# working version of https://gist.github.com/jayrambhia/1678382
import urllib2
import json
def search(query):
get_url = 'http://theapache64.xyz:8080/movie_db/search?keyword=%s' % query
response = urllib2.urlopen(get_url).read().decode('utf-8')
return json.loads(response)
View attention
"""
Usage (for our feedforward context):
make sure you initialize the layer with
score_fn='bahdanau'
and then when you use the module in your forward()
method, you can feed it a vector of zeros for your query:
query = torch.zeros(rnn_outputs[:, 0, :].shape)
View gist:561cc1b4d372cce7479fd14290eacbc3
def rm_refs(x):
REF_RE = '<ref([-\w=" <>]+)?>.*?<([ ]+)?\/([ ]+)?ref>'
x = re.sub(REF_RE, ' ', x)
# leading </ref>
if '</ref>' in x:
x = re.sub(REF_RE, ' ', '<ref>' + x)
# trailing <ref>
if '<ref' in x:
x = re.sub(REF_RE, ' ', x + '</ref>')
@rpryzant
rpryzant / pc_utils.py
Last active Mar 18, 2020
Util functions for computing and removing principal components
View pc_utils.py
from sklearn.decomposition import TruncatedSVD
def compute_pc(X,npc=1):
"""
Compute the principal components.
X: numpy array [data, features]
npc: num principal components
"""
svd = TruncatedSVD(n_components=npc, n_iter=7, random_state=0)