Skip to content

Instantly share code, notes, and snippets.

@yohokuno
yohokuno / residual_rnn_cell.py
Created January 18, 2017 10:14
Residual RNN cell for TensorFlow 0.10
import tensorflow as tf
from tensorflow.python.ops.rnn_cell import RNNCell
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.util import nest
class ResidualRNNCell(RNNCell):
"""RNN cell composed sequentially of multiple simple cells with residual connection."""
def __init__(self, cells, state_is_tuple=False):
@yohokuno
yohokuno / ptb_word_lm.py
Last active May 3, 2018 07:33
TensorFlow 0.10 implementation of Variational Dropout (paper: https://arxiv.org/abs/1512.05287)
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
@yohokuno
yohokuno / ptb_word_lm.py
Created December 22, 2016 14:13
TensorFlow implementation of "A Theoretically Grounded Application of Dropout in Recurrent Neural Networks Yarin Gal, Zoubin Ghahramani
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
@yohokuno
yohokuno / ptb_word_lm.py
Created December 22, 2016 14:13
TensorFlow implementation of "A Theoretically Grounded Application of Dropout in Recurrent Neural Networks Yarin Gal, Zoubin Ghahramani
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
@yohokuno
yohokuno / ptb_word_lm.py
Created December 22, 2016 14:13
TensorFlow implementation of "A Theoretically Grounded Application of Dropout in Recurrent Neural Networks Yarin Gal, Zoubin Ghahramani
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
@yohokuno
yohokuno / open_subtitles.py
Created August 26, 2016 11:00
Open Subtitle parser for neural conversation model
#!/usr/bin/env python3
import argparse
import xml.etree.ElementTree as ET
import tarfile
import gzip
# Download open subtitles from
# http://opus.lingfil.uu.se/download.php?f=OpenSubtitles2016/ja.tar.gz
def create_pairs(sentences):
@yohokuno
yohokuno / twitter_replies.py
Last active December 15, 2017 05:27
Twitter replies collector from public stream API
#!/usr/bin/env python3
import tweepy
import re
import argparse
import sys
import time
import traceback
# Fill with your Twitter API keys!
consumer_key = ''
@yohokuno
yohokuno / bleu.py
Last active August 17, 2016 10:40
BLEU: automatic evaluation for machine translation
#!/usr/bin/env python3
import argparse
import math
def ngrams(sentence, n):
for i in range(len(sentence) - n):
yield tuple(sentence[i:i + n + 1])
@yohokuno
yohokuno / eijiro.py
Last active August 26, 2016 11:05
Preprocess English-Japanese parallel corpus in Eijiro
#!/usr/bin/env python3
import argparse
import re
# Eijiro is a English-Japanese dictionary and contains 600k parallel corpus that can be used for training machine translation models.
# http://www.eijiro.jp/
# This script takes REIJI1441.TXT (converted to UTF-8) and split into English and Japanese files.
# It additionally cleans some tags like ■, 【出典】 and 〈俗〉.
# Example command: split_eijiro.py REIJI1441.UTF8.TXT en ja