Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Find simplest google translate request that generate the maximum number of unique words
from googletrans import Translator
from itertools import product
from pandas import DataFrame, read_csv
import numpy as np
import string
import time
import os
# Get google translator object
translator = Translator()
if os.path.isfile('results.csv'):
print('load...')
results = read_csv('results.csv')
else:
results = DataFrame(columns=('input', 'output', 'size'))
# make simple alphabet
az = string.ascii_lowercase[:26]
n_words = 20
n_letters = 4
# Brute-force search
for n_letter in range(n_letters):
for chars in product(*[az] * n_letter):
input_string = ' '.join([''.join(chars)] * n_words)
if input_string in results['input'].values:
continue
# Deal with Google anti flood policy
translated = False
while not translated:
try:
output_string = translator.translate(input_string).text
translated = True
except ValueError:
time.sleep(1)
# count number of unique word
size = len(np.unique(output_string.split(' ')))
results = results.append(dict(input=input_string,
output=output_string,
size=size),
ignore_index=True)
disp = results.sort_values(['size', 'output'], ascending=False)
print(disp[:10])
# save
results.to_csv('results.csv', header=True, index=False, encoding='utf-8')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.