Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/perl
use warnings;
use strict;
use Carp qw{ confess };
use List::Util qw{ shuffle };
use Test::More tests => 5;
use Benchmark qw{ cmpthese };
my @ATTRS;
@martinpopel
martinpopel / t2t_text2subwords.py
Created February 13, 2018 10:16
a script to compute the number of subwords in a given raw bi-text, useful for estimating the number of training epochs in T2T
#!/usr/bin/env python3
from tensor2tensor.data_generators import text_encoder
import tensorflow as tf
import sys
flags = tf.flags
FLAGS = flags.FLAGS
flags.DEFINE_string("vocab", None, "Path to the subword vocabulary")