Last active
October 13, 2022 19:23
-
-
Save bigEvilBanana/20bc1cbee604fc0ee149e6fe8fe0e347 to your computer and use it in GitHub Desktop.
translator + stem (tor) example, specially for https://t.me/seo_code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The code below is so bad that I am ashamed of it. | |
# But I was too lazy to refactor it, so I publish it as is. | |
# The main thing is that it shows principles of work with translator + stem (tor). | |
# but please, don't use it as it is on production. | |
# Copyright @seo_code (TG: https://t.me/seo_code) | |
# Required packages: | |
# pip install translate aiohttp stem aiohttp-socks | |
import asyncio | |
from textwrap import wrap | |
from stem import process, SocketClosed | |
import aiohttp | |
from aiohttp_socks import ProxyConnector | |
from stem import Signal | |
from stem.control import Controller | |
from translate import Translator | |
from translate.providers import MyMemoryProvider | |
USE_TOR = True | |
class NoTranslateMatchesError(Exception): | |
def __init__(self, error: str, base_text: str): | |
self.error = error | |
self.base_text = base_text | |
def __str__(self): | |
return f"NoTranslateMatchesError ({self.error}). Base text: {self.base_text}" | |
class MyMemoryProviderProxy(MyMemoryProvider): | |
proxies = {"http": "socks5://127.0.0.1:9050", "https": "socks5://127.0.0.1:9050"} | |
def __init__(self, use_tor: bool = True, **kwargs): | |
self.use_tor = use_tor | |
super().__init__(**kwargs) | |
@staticmethod | |
def renew_ip(): | |
with Controller.from_port(port=9050) as c: | |
c.signal(Signal.RELOAD) | |
async def get_translation(self, text): | |
data = await self._make_request(text) | |
translation = data["responseData"]["translatedText"] | |
if translation: | |
return translation | |
else: | |
matches = data["matches"] | |
if not matches: | |
raise NoTranslateMatchesError(f"Response data doesn't have any translate matches", base_text=text) | |
next_best_match = next(match for match in matches) | |
return next_best_match["translation"] | |
async def _make_request(self, text): | |
params = {"q": text, "langpair": self.languages} | |
if self.email: | |
params["de"] = self.email | |
connector = None | |
if self.use_tor: | |
connector = ProxyConnector.from_url("socks5://127.0.0.1:9050") | |
async with aiohttp.ClientSession(connector=connector) as session: | |
async with session.get(self.base_url, params=params, headers=self.headers) as resp: | |
return await resp.json() | |
class TranslatorProxy(Translator): | |
def __init__(self, to_lang, from_lang="en", provider=None, secret_access_key=None, use_tor: bool = True, **kwargs): | |
super().__init__(to_lang, from_lang="en", provider=None, secret_access_key=None, **kwargs) | |
self.provider = MyMemoryProviderProxy( | |
from_lang=self.from_lang, to_lang=self.to_lang, secret_access_key=None, use_tor=use_tor, **kwargs | |
) | |
async def translate(self, text): | |
if self.from_lang == self.to_lang: | |
return text | |
text_list = wrap(text, 1000, replace_whitespace=False) | |
r = [] | |
for text_wraped in text_list: | |
try: | |
t = await self.provider.get_translation(text_wraped) | |
r.append(t) | |
except RuntimeError as e: | |
print(e) | |
return " ".join(r) | |
translator = TranslatorProxy( | |
to_lang="de", | |
from_lang="en", | |
provider="mymemory", | |
email="example@gmail.com", | |
use_tor=USE_TOR, | |
) | |
async def translate(text: str): | |
try: | |
translated = await translator.translate(text) | |
if "MYMEMORY WARNING" in translated: | |
if not USE_TOR: | |
raise ValueError("MYMEMORY WARNING. IP limitation :(") | |
print(translated) | |
print("========= Renew IP =========") | |
MyMemoryProviderProxy.renew_ip() | |
translated = await translator.translate(text) | |
return translated | |
except Exception as e: | |
print(e) | |
raise e | |
async def main(): | |
tor_process = process.launch_tor() | |
texts_to_translate = [ | |
"Hello, world!", | |
"Once upon a time, we knew that our ancestors were on the verge of extinction. " | |
"The great explorers and poets of the Old World, from Alexander the Great to Chaucer, " | |
"are dead and gone. A good many of our ancient explorers and poets have died in battle—and perhaps " | |
"others—against the invaders", | |
"My name is Teven and I am a teacher. I know you have a job to do. " | |
"My wife knows you know what you are doing. " | |
"I know I am not perfect and I do not have enough answers for you. " | |
"I have been called 'one man' and I am 'too busy' to change that. I am just working", | |
"The upward pressure that housing costs have been putting on inflation can now probably be safely ignored. " | |
"If only there was nothing else going on but the rent. " | |
"The Labor Department on Thursday reported that consumer prices rose a seasonally adjusted 0.4% " | |
"in September from August, putting them 8.2% above their year-earlier level. " | |
"Core prices, which exclude food and energy items in an effort to better track inflation’s trend, " | |
"rose 0.6% on the month, and 6.6% on the year. Both the headline and core measures were above economists’ " | |
"estimates, dashing any remaining hopes that Federal Reserve policy makers might raise rates " | |
"by half a percentage point, rather than again by three-quarters of a point, " | |
"when they next meet in November." | |
] | |
# simulate a lot of texts | |
for i in range(100): | |
for text in texts_to_translate: | |
try: | |
translated = await translate(text) | |
print(translated) | |
except SocketClosed: | |
tor_process.kill() | |
tor_process = process.launch_tor() | |
tor_process.kill() | |
if __name__ == '__main__': | |
asyncio.run(main()) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Make sure that you have installed the Tor on your machine and it's available on port 9050
proxies = {"http": "socks5://127.0.0.1:9050", "https": "socks5://127.0.0.1:9050"}