Skip to content

Instantly share code, notes, and snippets.

@kingjr
Created June 8, 2018 21:41
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save kingjr/97d0f942b8b0b62d8378a1c1af3748dd to your computer and use it in GitHub Desktop.
Save kingjr/97d0f942b8b0b62d8378a1c1af3748dd to your computer and use it in GitHub Desktop.
Find simplest google translate request that generate the maximum number of unique words
from googletrans import Translator
from itertools import product
from pandas import DataFrame, read_csv
import numpy as np
import string
import time
import os
# Get google translator object
translator = Translator()
if os.path.isfile('results.csv'):
print('load...')
results = read_csv('results.csv')
else:
results = DataFrame(columns=('input', 'output', 'size'))
# make simple alphabet
az = string.ascii_lowercase[:26]
n_words = 20
n_letters = 4
# Brute-force search
for n_letter in range(n_letters):
for chars in product(*[az] * n_letter):
input_string = ' '.join([''.join(chars)] * n_words)
if input_string in results['input'].values:
continue
# Deal with Google anti flood policy
translated = False
while not translated:
try:
output_string = translator.translate(input_string).text
translated = True
except ValueError:
time.sleep(1)
# count number of unique word
size = len(np.unique(output_string.split(' ')))
results = results.append(dict(input=input_string,
output=output_string,
size=size),
ignore_index=True)
disp = results.sort_values(['size', 'output'], ascending=False)
print(disp[:10])
# save
results.to_csv('results.csv', header=True, index=False, encoding='utf-8')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment