-
-
Save Marocco2/5268909dba2c450c214c240e90defb27 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
try: | |
import ujson as json | |
except ImportError: | |
import json | |
import random | |
import requests | |
#import os | |
JSONRPC_VERSION = "2.0" | |
HEADERS = { | |
"accept": "*/*", | |
"accept-encoding": "gzip, deflate, br", | |
"host": "www.deepl.com", | |
"connection": "keep-alive", | |
"referer": "https://www.deepl.com/translator", | |
"content-type": "application/json", | |
#"user-agent": "Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0" | |
} | |
class getClientState(): | |
def __init__(self, proxy="", ua="Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0"): | |
global HEADERS | |
self.method = "getClientState" | |
self.params = { | |
#"clientVars": { | |
# "testGroupId": random.randint(100, 9999), | |
# "testGroupIdIsNew": True | |
#}, | |
"v": "20180814" | |
} | |
self.id_number = random.randint(100, 9999) * 10000 | |
self.proxy = proxy | |
HEADERS["user-agent"] = ua | |
def dump(self): | |
self.id_number += 1 | |
data = { | |
"id": self.id_number, | |
"jsonrpc": JSONRPC_VERSION, | |
"method": self.method, | |
"params": self.params | |
} | |
return data | |
def dumps(self): | |
data = self.dump() | |
data_str = json.dumps(data) | |
return data_str.encode("utf-8") | |
def send(self, url): | |
s = requests.Session() | |
s.proxies = {} | |
if type(self.proxy) is str and len(self.proxy)>0: | |
s.proxies['http'] = self.proxy | |
s.proxies['https'] = self.proxy | |
#else: | |
# s.proxies['http'] = 'socks5h://localhost:9150' | |
# s.proxies['https'] = 'socks5h://localhost:9150' | |
req = s.request("POST", url, data=self.dumps(), headers=HEADERS) | |
data_str = req.content | |
resp = json.loads(data_str.decode("utf-8")) | |
if "result" in resp: | |
return resp["id"] | |
else: | |
raise clientError(resp["error"]) | |
class clientError(Exception): | |
def __init__(self, error_obj): | |
self.code = error_obj["code"] | |
self.message = error_obj["message"] | |
if "data" in error_obj: | |
self.data = error_obj["data"] | |
def __str__(self): | |
return "{}: {}".format(self.code, self.message) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
try: | |
import ujson as json | |
except ImportError: | |
import json | |
import random | |
import requests | |
import os | |
JSONRPC_VERSION = "2.0" | |
HEADERS = { | |
"accept": "*/*", | |
"accept-encoding": "gzip, deflate, br", | |
#"Accept-Language": "en", | |
"connection": "keep-alive", | |
"host": "www2.deepl.com", | |
"origin": "https://www.deepl.com", | |
"referer": "https://www.deepl.com/", | |
"content-type": "application/json", | |
#"user-agent": "Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0" | |
} | |
class JSONRPCBuilder(): | |
def __init__( | |
self, | |
method, | |
params, | |
id_number, | |
proxy="", | |
ua="Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0" | |
): | |
global HEADERS | |
self.method = method | |
self.params = params | |
self.id_number = id_number | |
self.proxy = proxy | |
HEADERS["user-agent"] = ua | |
def dump(self): | |
self.id_number += 1 | |
data = { | |
"jsonrpc": JSONRPC_VERSION, | |
"method": self.method, | |
"params": self.params, | |
"id": self.id_number | |
} | |
return data | |
def dumps(self): | |
data = self.dump() | |
data_str = json.dumps(data) | |
print(data_str) | |
return data_str.encode("utf-8") | |
def send(self, url): | |
s = requests.Session() | |
s.proxies = {} | |
if type(self.proxy) is str and len(self.proxy) > 0: | |
s.proxies['http'] = self.proxy | |
s.proxies['https'] = self.proxy | |
#else: | |
# s.proxies['http'] = 'socks5h://localhost:9150' | |
# s.proxies['https'] = 'socks5h://localhost:9150' | |
req = s.request("POST", url, data=self.dumps(), headers=HEADERS) | |
data_str = req.content | |
resp = json.loads(data_str.decode("utf-8")) | |
if "result" in resp: | |
return resp["result"] | |
else: | |
raise JSONRPCError(resp["error"]) | |
def getId(self): | |
return self.id_number | |
class JSONRPCError(Exception): | |
def __init__(self, error_obj): | |
self.code = error_obj["code"] | |
self.message = error_obj["message"] | |
if "data" in error_obj: | |
self.data = error_obj["data"] | |
def __str__(self): | |
return "{}: {}".format(self.code, self.message) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from urllib.error import URLError | |
import time | |
from .jsonrpc import JSONRPCBuilder | |
from .getclientstate import getClientState | |
import re | |
POST_URL = "https://www2.deepl.com/jsonrpc" | |
GET_URL = "https://www.deepl.com/PHP/backend/clientState.php?request_type=jsonrpc&il=EN" | |
AUTO_LANG = "auto" | |
TARGET_LANGS = ["EN", "DE", "FR", "ES", "IT", "NL", "PL", "RU", "PT"] | |
SOURCE_LANGS = TARGET_LANGS + [AUTO_LANG] | |
LENGTH_LIMIT = 5000 | |
IsComputed = False | |
class Translator(): | |
def __init__( | |
self, | |
proxy="", | |
ua="Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0", | |
check_length_limit=True): | |
"""The Translator class. | |
:param src_lang: The source language. | |
:param dst_lang: The output language. | |
:param check_length_limit: whether to check strings for length or not. | |
Default is ``True``. | |
""" | |
self.proxy = proxy | |
self.ua = ua | |
self.get = getClientState(self.proxy, self.ua) | |
self.id_num = self.get.send(GET_URL) | |
self.check_length_limit = check_length_limit | |
self.user_preferred_langs = [] | |
def check_lang(self, src_lang, dst_lang): | |
self.src_lang = src_lang | |
self.dst_lang = dst_lang.upper() | |
if self.src_lang.upper() not in (SOURCE_LANGS and AUTO_LANG.upper()): | |
raise ValueError("Input language not supported.") | |
if self.dst_lang not in TARGET_LANGS: | |
raise ValueError("Output language not supported.") | |
return | |
def split_into_sentences(self, text): | |
global IsComputed | |
""" | |
Split a string into sentences using the DeepL API. | |
:param text: A string to be split. | |
:returns: A list of sentences with type string. | |
:raises TranslationError: If there was an exception during the | |
translation. | |
""" | |
if not text: | |
return [] | |
method = "LMT_split_into_sentences" | |
params = { | |
"texts": [text.strip()], | |
"lang": { | |
"user_preferred_langs": [] | |
} | |
} | |
params["lang"]["lang_user_selected"] = self.src_lang | |
if not IsComputed and not self.user_preferred_langs: | |
params["lang"]["user_preferred_langs"] = self.dst_lang | |
else: | |
if not self.dst_lang in self.user_preferred_langs: | |
self.user_preferred_langs += [self.dst_lang] | |
params["lang"]["user_preferred_langs"] = self.user_preferred_langs | |
self.id_num += 1 | |
resp = _send_jsonrpc(method, params, self.id_num - 1, self.proxy, | |
self.ua) | |
if self.src_lang != resp["lang"]: | |
IsComputed = True | |
self.src_lang_computed = resp["lang"] | |
if not self.src_lang_computed in self.user_preferred_langs: | |
self.user_preferred_langs += [self.src_lang_computed] | |
return resp["splitted_texts"][0] | |
def translate_sentences(self, | |
sentences, | |
src_lang, | |
dst_lang, | |
priority=1, | |
quality=""): | |
global IsComputed | |
""" | |
Translate a list of single sentences or string of sentences into a list | |
of translations. If a string was passed, it will be split into a list | |
of sentences using the DeepL API first. | |
:param sentences: A list of strings or string to be translated. | |
:returns: A list of translated strings. | |
:raises LengthLimitExceeded: If the length of a string exeeds the | |
length limit of the DeepL API, an exception is raised. | |
:raises TranslationError: If there was an exception during the | |
translation. | |
""" | |
# catch [], empty string and empty list | |
self.check_lang(src_lang, dst_lang) | |
if not sentences: | |
return [] | |
elif type(sentences) is str: | |
if not [sentences] == re.split('(?<=[.!:?]) +', sentences): | |
sentences = self.split_into_sentences(sentences) | |
else: | |
sentences = [sentences] | |
jobs = self._build_jobs(sentences, quality) | |
#time.sleep(1) | |
o = 1 | |
for j in range(0, len(jobs)): | |
o += len(re.findall("i", jobs[j]["raw_en_sentence"])) | |
method = "LMT_handle_jobs" | |
ts = int(time.time() * 10) * 100 + 1000 | |
params = { | |
"jobs": jobs, | |
"lang": {}, | |
"priority": priority, | |
"timestamp": ts + (o - ts % o) | |
} | |
if IsComputed: | |
params["lang"]["source_lang_computed"] = self.src_lang_computed | |
params["lang"]["user_preferred_langs"] = [ | |
self.src_lang_computed, self.dst_lang | |
] | |
else: | |
params["lang"]["source_lang_user_selected"] = self.src_lang | |
params["lang"]["user_preferred_langs"] = [self.dst_lang] | |
# params["lang"]["source_lang_user_selected"] = self.src_lang | |
params["lang"]["target_lang"] = self.dst_lang | |
self.id_num += 1 | |
resp = _send_jsonrpc(method, params, self.id_num - 1, self.proxy, | |
self.ua) | |
translations = resp["translations"] | |
def extract(obj): | |
if obj["beams"]: | |
return obj["beams"][0]["postprocessed_sentence"] | |
else: | |
return EmptyTranslation() | |
return [extract(obj) for obj in translations] | |
#def translate_sentence(self, sentence, src_lang, dst_lang, priority=-1, quality="fast"): | |
# """ | |
# Translate a single sentence. Be aware that translation might be | |
# incorrect if a string with multiple sentences is passed. If unsure, | |
# use ``translate_sentences`` or split the string via | |
# ``split_into_sentences`` first. | |
# :param sentence: A string to be translated. | |
# :returns: The translated string. | |
# :raises LengthLimitExceeded: If the length of the string exeeds the | |
# length limit of the DeepL API, an exception is raised. | |
# :raises TranslationError: If there was an exception during the | |
# translation. | |
# """ | |
# self.check_lang(src_lang, dst_lang) | |
# if not sentence: | |
# return "" | |
# return self.translate_sentences([sentence], src_lang, dst_lang, priority, quality)[0] | |
def _build_jobs(self, sentences, quality=""): | |
jobs = list() | |
k = 0 | |
for s in sentences: | |
if self.check_length_limit and len(s) > LENGTH_LIMIT: | |
raise LengthLimitExceeded() | |
else: | |
if k == 0: | |
if k == len(sentences) - 1: | |
before = [] | |
after = [] | |
else: | |
before = [] | |
after = [sentences[k + 1]] | |
elif k > len(sentences) - 2: | |
if len(before) > 4: | |
del before[0] | |
before += [sentences[k - 1]] | |
else: | |
before += [sentences[k - 1]] | |
after = [] | |
else: | |
if len(before) > 4: | |
del before[0] | |
before += [sentences[k - 1]] | |
else: | |
before += [sentences[k - 1]] | |
after = [sentences[k + 1]] | |
job = { | |
"kind": "default", | |
"raw_en_context_after": after.copy(), | |
"raw_en_context_before": before.copy(), | |
"raw_en_sentence": s, | |
} | |
if quality != "": | |
job["quality"] = quality | |
k += 1 | |
jobs.append(job) | |
return jobs | |
def _send_jsonrpc( | |
method, | |
params, | |
id_num, | |
proxy="", | |
ua="Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0" | |
): | |
try: | |
rpc = JSONRPCBuilder(method, params, id_num, proxy, ua) | |
return rpc.send(POST_URL) | |
except URLError as e: | |
raise TranslationError(e.reason) | |
class LengthLimitExceeded(Exception): | |
pass | |
class TranslationError(Exception): | |
def __init__(self, reason): | |
self.reason = reason | |
def __repr__(self): | |
return "TranslationError: " + self.reason | |
class EmptyTranslation(): | |
def __repr__(self): | |
return "<EmptyTranslation>" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment