Skip to content

Instantly share code, notes, and snippets.

@Marocco2
Last active March 29, 2021 18:35
Show Gist options
  • Save Marocco2/5268909dba2c450c214c240e90defb27 to your computer and use it in GitHub Desktop.
Save Marocco2/5268909dba2c450c214c240e90defb27 to your computer and use it in GitHub Desktop.
try:
import ujson as json
except ImportError:
import json
import random
import requests
#import os
JSONRPC_VERSION = "2.0"
HEADERS = {
"accept": "*/*",
"accept-encoding": "gzip, deflate, br",
"host": "www.deepl.com",
"connection": "keep-alive",
"referer": "https://www.deepl.com/translator",
"content-type": "application/json",
#"user-agent": "Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0"
}
class getClientState():
def __init__(self, proxy="", ua="Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0"):
global HEADERS
self.method = "getClientState"
self.params = {
#"clientVars": {
# "testGroupId": random.randint(100, 9999),
# "testGroupIdIsNew": True
#},
"v": "20180814"
}
self.id_number = random.randint(100, 9999) * 10000
self.proxy = proxy
HEADERS["user-agent"] = ua
def dump(self):
self.id_number += 1
data = {
"id": self.id_number,
"jsonrpc": JSONRPC_VERSION,
"method": self.method,
"params": self.params
}
return data
def dumps(self):
data = self.dump()
data_str = json.dumps(data)
return data_str.encode("utf-8")
def send(self, url):
s = requests.Session()
s.proxies = {}
if type(self.proxy) is str and len(self.proxy)>0:
s.proxies['http'] = self.proxy
s.proxies['https'] = self.proxy
#else:
# s.proxies['http'] = 'socks5h://localhost:9150'
# s.proxies['https'] = 'socks5h://localhost:9150'
req = s.request("POST", url, data=self.dumps(), headers=HEADERS)
data_str = req.content
resp = json.loads(data_str.decode("utf-8"))
if "result" in resp:
return resp["id"]
else:
raise clientError(resp["error"])
class clientError(Exception):
def __init__(self, error_obj):
self.code = error_obj["code"]
self.message = error_obj["message"]
if "data" in error_obj:
self.data = error_obj["data"]
def __str__(self):
return "{}: {}".format(self.code, self.message)
try:
import ujson as json
except ImportError:
import json
import random
import requests
import os
JSONRPC_VERSION = "2.0"
HEADERS = {
"accept": "*/*",
"accept-encoding": "gzip, deflate, br",
#"Accept-Language": "en",
"connection": "keep-alive",
"host": "www2.deepl.com",
"origin": "https://www.deepl.com",
"referer": "https://www.deepl.com/",
"content-type": "application/json",
#"user-agent": "Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0"
}
class JSONRPCBuilder():
def __init__(
self,
method,
params,
id_number,
proxy="",
ua="Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0"
):
global HEADERS
self.method = method
self.params = params
self.id_number = id_number
self.proxy = proxy
HEADERS["user-agent"] = ua
def dump(self):
self.id_number += 1
data = {
"jsonrpc": JSONRPC_VERSION,
"method": self.method,
"params": self.params,
"id": self.id_number
}
return data
def dumps(self):
data = self.dump()
data_str = json.dumps(data)
print(data_str)
return data_str.encode("utf-8")
def send(self, url):
s = requests.Session()
s.proxies = {}
if type(self.proxy) is str and len(self.proxy) > 0:
s.proxies['http'] = self.proxy
s.proxies['https'] = self.proxy
#else:
# s.proxies['http'] = 'socks5h://localhost:9150'
# s.proxies['https'] = 'socks5h://localhost:9150'
req = s.request("POST", url, data=self.dumps(), headers=HEADERS)
data_str = req.content
resp = json.loads(data_str.decode("utf-8"))
if "result" in resp:
return resp["result"]
else:
raise JSONRPCError(resp["error"])
def getId(self):
return self.id_number
class JSONRPCError(Exception):
def __init__(self, error_obj):
self.code = error_obj["code"]
self.message = error_obj["message"]
if "data" in error_obj:
self.data = error_obj["data"]
def __str__(self):
return "{}: {}".format(self.code, self.message)
from urllib.error import URLError
import time
from .jsonrpc import JSONRPCBuilder
from .getclientstate import getClientState
import re
POST_URL = "https://www2.deepl.com/jsonrpc"
GET_URL = "https://www.deepl.com/PHP/backend/clientState.php?request_type=jsonrpc&il=EN"
AUTO_LANG = "auto"
TARGET_LANGS = ["EN", "DE", "FR", "ES", "IT", "NL", "PL", "RU", "PT"]
SOURCE_LANGS = TARGET_LANGS + [AUTO_LANG]
LENGTH_LIMIT = 5000
IsComputed = False
class Translator():
def __init__(
self,
proxy="",
ua="Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0",
check_length_limit=True):
"""The Translator class.
:param src_lang: The source language.
:param dst_lang: The output language.
:param check_length_limit: whether to check strings for length or not.
Default is ``True``.
"""
self.proxy = proxy
self.ua = ua
self.get = getClientState(self.proxy, self.ua)
self.id_num = self.get.send(GET_URL)
self.check_length_limit = check_length_limit
self.user_preferred_langs = []
def check_lang(self, src_lang, dst_lang):
self.src_lang = src_lang
self.dst_lang = dst_lang.upper()
if self.src_lang.upper() not in (SOURCE_LANGS and AUTO_LANG.upper()):
raise ValueError("Input language not supported.")
if self.dst_lang not in TARGET_LANGS:
raise ValueError("Output language not supported.")
return
def split_into_sentences(self, text):
global IsComputed
"""
Split a string into sentences using the DeepL API.
:param text: A string to be split.
:returns: A list of sentences with type string.
:raises TranslationError: If there was an exception during the
translation.
"""
if not text:
return []
method = "LMT_split_into_sentences"
params = {
"texts": [text.strip()],
"lang": {
"user_preferred_langs": []
}
}
params["lang"]["lang_user_selected"] = self.src_lang
if not IsComputed and not self.user_preferred_langs:
params["lang"]["user_preferred_langs"] = self.dst_lang
else:
if not self.dst_lang in self.user_preferred_langs:
self.user_preferred_langs += [self.dst_lang]
params["lang"]["user_preferred_langs"] = self.user_preferred_langs
self.id_num += 1
resp = _send_jsonrpc(method, params, self.id_num - 1, self.proxy,
self.ua)
if self.src_lang != resp["lang"]:
IsComputed = True
self.src_lang_computed = resp["lang"]
if not self.src_lang_computed in self.user_preferred_langs:
self.user_preferred_langs += [self.src_lang_computed]
return resp["splitted_texts"][0]
def translate_sentences(self,
sentences,
src_lang,
dst_lang,
priority=1,
quality=""):
global IsComputed
"""
Translate a list of single sentences or string of sentences into a list
of translations. If a string was passed, it will be split into a list
of sentences using the DeepL API first.
:param sentences: A list of strings or string to be translated.
:returns: A list of translated strings.
:raises LengthLimitExceeded: If the length of a string exeeds the
length limit of the DeepL API, an exception is raised.
:raises TranslationError: If there was an exception during the
translation.
"""
# catch [], empty string and empty list
self.check_lang(src_lang, dst_lang)
if not sentences:
return []
elif type(sentences) is str:
if not [sentences] == re.split('(?<=[.!:?]) +', sentences):
sentences = self.split_into_sentences(sentences)
else:
sentences = [sentences]
jobs = self._build_jobs(sentences, quality)
#time.sleep(1)
o = 1
for j in range(0, len(jobs)):
o += len(re.findall("i", jobs[j]["raw_en_sentence"]))
method = "LMT_handle_jobs"
ts = int(time.time() * 10) * 100 + 1000
params = {
"jobs": jobs,
"lang": {},
"priority": priority,
"timestamp": ts + (o - ts % o)
}
if IsComputed:
params["lang"]["source_lang_computed"] = self.src_lang_computed
params["lang"]["user_preferred_langs"] = [
self.src_lang_computed, self.dst_lang
]
else:
params["lang"]["source_lang_user_selected"] = self.src_lang
params["lang"]["user_preferred_langs"] = [self.dst_lang]
# params["lang"]["source_lang_user_selected"] = self.src_lang
params["lang"]["target_lang"] = self.dst_lang
self.id_num += 1
resp = _send_jsonrpc(method, params, self.id_num - 1, self.proxy,
self.ua)
translations = resp["translations"]
def extract(obj):
if obj["beams"]:
return obj["beams"][0]["postprocessed_sentence"]
else:
return EmptyTranslation()
return [extract(obj) for obj in translations]
#def translate_sentence(self, sentence, src_lang, dst_lang, priority=-1, quality="fast"):
# """
# Translate a single sentence. Be aware that translation might be
# incorrect if a string with multiple sentences is passed. If unsure,
# use ``translate_sentences`` or split the string via
# ``split_into_sentences`` first.
# :param sentence: A string to be translated.
# :returns: The translated string.
# :raises LengthLimitExceeded: If the length of the string exeeds the
# length limit of the DeepL API, an exception is raised.
# :raises TranslationError: If there was an exception during the
# translation.
# """
# self.check_lang(src_lang, dst_lang)
# if not sentence:
# return ""
# return self.translate_sentences([sentence], src_lang, dst_lang, priority, quality)[0]
def _build_jobs(self, sentences, quality=""):
jobs = list()
k = 0
for s in sentences:
if self.check_length_limit and len(s) > LENGTH_LIMIT:
raise LengthLimitExceeded()
else:
if k == 0:
if k == len(sentences) - 1:
before = []
after = []
else:
before = []
after = [sentences[k + 1]]
elif k > len(sentences) - 2:
if len(before) > 4:
del before[0]
before += [sentences[k - 1]]
else:
before += [sentences[k - 1]]
after = []
else:
if len(before) > 4:
del before[0]
before += [sentences[k - 1]]
else:
before += [sentences[k - 1]]
after = [sentences[k + 1]]
job = {
"kind": "default",
"raw_en_context_after": after.copy(),
"raw_en_context_before": before.copy(),
"raw_en_sentence": s,
}
if quality != "":
job["quality"] = quality
k += 1
jobs.append(job)
return jobs
def _send_jsonrpc(
method,
params,
id_num,
proxy="",
ua="Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0"
):
try:
rpc = JSONRPCBuilder(method, params, id_num, proxy, ua)
return rpc.send(POST_URL)
except URLError as e:
raise TranslationError(e.reason)
class LengthLimitExceeded(Exception):
pass
class TranslationError(Exception):
def __init__(self, reason):
self.reason = reason
def __repr__(self):
return "TranslationError: " + self.reason
class EmptyTranslation():
def __repr__(self):
return "<EmptyTranslation>"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment