Skip to content

Instantly share code, notes, and snippets.

@farzadhallaji
Last active November 12, 2023 23:43
Show Gist options
  • Save farzadhallaji/bbe5ac054e9f2e552d282233fc8b6aab to your computer and use it in GitHub Desktop.
Save farzadhallaji/bbe5ac054e9f2e552d282233fc8b6aab to your computer and use it in GitHub Desktop.
convert list of words to html file to memrize it
# -*- coding: utf-8 -*-
"""vocab.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1GNxF9QyJc8hL3KgVKB8t4GPgH8sGNTci
"""
# ! pip install google-translate-for-goldendict
class Token:
"""
https://www.52pojie.cn/thread-707169-1-1.html
https://www.jianshu.com/p/af74f0719267
"""
def __init__(self, tkk):
self.tkk = tkk
def calculate_token(self, text):
if self.tkk == "":
"""
422392.71207223
406644.3293161072
431767.4042228602
440498.1287591069
"""
self.tkk = "440498.1287591069"
[first_seed, second_seed] = self.tkk.split(".")
try:
d = bytearray(text.encode('UTF-8'))
except UnicodeDecodeError:
d = bytearray(text)
a = int(first_seed)
for value in d:
a += value
a = self._work_token(a, "+-a^+6")
a = self._work_token(a, "+-3^+b+-f")
a ^= int(second_seed)
if 0 > a:
a = (a & 2147483647) + 2147483648
a %= 1E6
a = int(a)
return str(a) + "." + str(a ^ int(first_seed))
@staticmethod
def _rshift(val, n):
return val >> n if val >= 0 else (val + 0x100000000) >> n
def _work_token(self, a, seed):
for i in range(0, len(seed) - 2, 3):
char = seed[i + 2]
d = ord(char[0]) - 87 if char >= "a" else int(char)
d = self._rshift(a, d) if seed[i + 1] == "+" else a << d
a = a + d & 4294967295 if seed[i] == "+" else a ^ d
return a
import requests
import sys
import urllib.parse
import asyncio
from functools import partial
import re
import argparse
class GoogleTranslate(object):
def __init__(self, args):
self.http_host = args.host
self.http_proxy = args.proxy
self.synonyms_en = args.synonyms
self.definitions_en = args.definitions
self.examples_en = args.examples
self.result_code = 'utf-8' if args.type == 'html' else sys.stdout.encoding
# sys.stdout.reconfigure(encoding=self.result_code) if args.type == 'html' else None
self.result_code = 'utf-8' if args.type == 'html' else sys.stdout.encoding
if hasattr(sys.stdout, 'reconfigure'):
sys.stdout.reconfigure(encoding=self.result_code)
self.alternative_language = args.alternative
self.result_type = args.type
self.target_language = ''
self.query_string = ''
self.result = ''
def get_url(self, tl, qry, tk):
url = f'https://{self.http_host}/translate_a/single?client=gtx&sl=auto&tl={tl}&hl=en&dt=at&dt=bd&dt=ex&' \
f'dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=sos&dt=ss&dt=t&ssel=0&tsel=0&kc=1&tk={tk}&q={qry}'
return url
def get_synonym(self, resp):
if resp[1]:
self.result += '\n=========\n'
self.result += f'Translations of {self.query_string}\n'
for x in resp[1]:
self.result += f'# {x[0][0]}.\n'
for y in x[2]:
self.result += f'{y[0]}: {", ".join(y[1])}\n'
def get_result(self, resp):
for x in resp[0]:
self.result += x[0] if x[0] else ''
self.result += '\n'
def get_definitions(self, resp):
self.result += '\n=========\n'
self.result += f'0_0: Definitions of {self.query_string}\n'
for x in resp[12]:
self.result += f'# {x[0] if x[0] else ""}.\n'
for y in x[1]:
self.result += f' - {y[0]}\n'
self.result += f' * {y[2]}\n' if len(y) >= 3 else ''
def get_examples(self, resp):
self.result += '\n=========\n'
self.result += f'0_0: Examples of {self.query_string}\n'
for x in resp[13][0]:
self.result += f' * {x[0]}\n'
def get_synonyms_en(self, resp):
self.result += '\n=========\n'
self.result += f'0_0: Synonyms of {self.query_string}\n'
for idx, x in enumerate(resp[11]):
self.result += f'# {x[0]}.\n'
for y in x[1]:
self.result += ', '.join(y[0]) + '\n'
def get_resp(self, url):
proxies = {
'http': f'http://{self.http_proxy.strip() if self.http_proxy.strip() else "127.0.0.1:1080"}',
'https': f'http://{self.http_proxy.strip() if self.http_proxy.strip() else "127.0.0.1:1080"}'
}
base_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:96.0) Gecko/20100101 Firefox/96.0'}
session = requests.Session()
session.headers = base_headers
resp = session.get(url, proxies=proxies if self.http_proxy.strip() else None, timeout=5).json()
return resp
def result_to_html(self):
css_text = """\
<style type="text/css">
p {white-space: pre-wrap;}
pos {color: #afb7f3;}
example {color: #008080;}
gray {color: #606060;}
</style>"""
self.result = re.sub(r'(?m)^(#.*)', r'<pos><b>\1</b></pos>', self.result)
self.result = re.sub(r'(?m)^([*].*)', r'<example>\1</example>', self.result)
self.result = re.sub(r'(?m)^(0_0:.*?of)(.*)', r'<gray>\1</gray>\2', self.result)
match = re.compile(rf"(?m)^({re.escape('^_^')}: Translate)(.*)(To)(.*)")
self.result = match.sub(r'<gray>\1</gray>\2<gray>\3</gray>\4', self.result)
self.result = f'<html>\n<head>\n{css_text}\n</head>\n<body>\n<p>{self.result}</p>\n</body>\n</html>'
async def get_translation(self, target_language, query_string, tkk=''):
self.result = ''
self.target_language = target_language
self.query_string = query_string
tk = Token(tkk).calculate_token(self.query_string)
if len(self.query_string) > 5000:
return '(╯‵□′)╯︵┻━┻: Maximum characters exceeded...'
parse_query = urllib.parse.quote_plus(self.query_string)
url = self.get_url(self.target_language, parse_query, tk)
url_alt = self.get_url(self.alternative_language, parse_query, tk)
try:
loop = asyncio.get_running_loop()
resp = loop.run_in_executor(None, partial(self.get_resp, url))
resp_alt = loop.run_in_executor(None, partial(self.get_resp, url_alt))
[resp, resp_alt] = await asyncio.gather(resp, resp_alt)
if resp[2] == self.target_language:
self.result += f'Translate {resp[2]} To {self.alternative_language}\n'
self.get_result(resp)
self.result += '---------\n'
self.get_result(resp_alt)
self.get_synonym(resp_alt)
else:
self.result += f'Translate {resp[2]} To {self.target_language}\n{self.query_string}\n---------\n'
self.get_result(resp)
self.get_synonym(resp)
if self.synonyms_en and len(resp) >= 12 and resp[11]:
self.get_synonyms_en(resp)
if self.definitions_en and len(resp) >= 13 and resp[12]:
self.get_definitions(resp)
if self.examples_en and len(resp) >= 14 and resp[13]:
self.get_examples(resp)
if self.result_type == 'html':
self.result_to_html()
else:
self.result = self.result.replace('<b>', '').replace('</b>', '')
return self.result.encode(self.result_code, 'ignore').decode(self.result_code)
except requests.exceptions.ReadTimeout:
return '╰(‵□′)╯: ReadTimeout...'
except requests.exceptions.ProxyError:
return '(╯‵□′)╯︵┻━┻: ProxyError...'
except Exception as e:
return f'Errrrrrrrrror: {e}'
def get_args():
default = '(default: %(default)s)'
parser = argparse.ArgumentParser()
parser.add_argument('target', type=str, default='en', help=f'target language, eg: zh-CN, {default}')
parser.add_argument('query', type=str, default='', help='query string')
parser.add_argument('-s', dest='host', type=str, default='translate.googleapis.com', help=f'host name {default}')
parser.add_argument('-p', dest='proxy', type=str, default='', help='proxy server (eg: 127.0.0.1:1080)')
parser.add_argument('-a', dest='alternative', type=str, default='en', help=f'alternative language {default}')
parser.add_argument('-r', dest='type', type=str, default='html', help=f'result type {default}')
parser.add_argument('-k', dest='tkk', type=str, default='', help='tkk')
parser.add_argument('-m', dest='synonyms', action='store_true', help='show synonyms')
parser.add_argument('-d', dest='definitions', action='store_true', help='show definitions')
parser.add_argument('-e', dest='examples', action='store_true', help='show examples')
return parser.parse_args()
from collections import namedtuple
def create_args(target='en', query='', host='translate.googleapis.com', proxy='', alternative='en', type='html', tkk='', synonyms=False, definitions=False, examples=False):
Args = namedtuple('Args', ['target', 'query', 'host', 'proxy', 'alternative', 'type', 'tkk', 'synonyms', 'definitions', 'examples'])
return Args(target, query, host, proxy, alternative, type, tkk, synonyms, definitions, examples)
async def main_async(args=None):
args = args if args else get_args()
g_trans = GoogleTranslate(args)
trans = await g_trans.get_translation(args.target, args.query, tkk=args.tkk)
return trans
# import os
# os.listdir()
words = []
with open('./4000-1-2') as f:
words = f.readlines()
words = [word.replace('\n','') for word in words]
words = sorted(list(set(words)))
word_htmls = {}
for word in words:
result = await main_async(create_args(query=word, target='fa'))
word_htmls[word]=result
combined_html = '''
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<style>
body {
background-color: #1e1e1e;
color: #cfcfcf;
font-family: Arial, sans-serif;
margin: 0;
padding: 0;
font-size: 16px; /* Default font size for desktop */
}
.word-container {
border: 1px solid #a33;
background-color: #2a2a2a;
margin: 15px;
padding: 5px;
cursor: pointer;
position: relative;
}
.word-title {
flex-grow: 1;
cursor: pointer;
}
.mark-read-button {
background-color: #3a3a3a;
color: white;
border: 1px solid #a33;
border-radius: 5px;
position: absolute;
top: 50%;
right: 10px;
transform: translateY(-50%);
padding: 5px 10px;
cursor: pointer;
}
.content {
display: none;
color: #f3f3f3;
clear: both;
padding-top: 10px;
}
.separator {
width: 100%;
border-top: 2px solid #a33;
margin: 20px 0;
}
.toggle {
cursor: pointer;
color: #a33;
}
.fab {
padding: 10px 15px;
color: white;
border: none;
border-radius: 5px;
cursor: pointer;
position: fixed;
bottom: 20px;
z-index: 1000;
}
#collapseAllBtn {
background-color: #a33;
right: 20px;
}
#scrollToSeparatorBtn {
background-color: #3a3a3a;
right: 140px;
}
#resetBtn {
background-color: #4a4a4a;
left: 20px;
}
/* Responsive font size for smaller screens */
@media screen and (max-width: 600px) {
body {
font-size: 18px; /* Slightly larger font size for mobile */
}
.word-container {
margin: 10px 5px;
padding: 3px;
}
/* Additional styles for smaller screens */
}
</style>
</head>
<body>
<div id="aboveLine">
<!-- Words above the line will go here -->
</div>
<div class="separator"></div>
<div id="belowLine">
<!-- Words below the line will go here initially -->
</div>
<button id="collapseAllBtn" class="fab" onclick="collapseAll()">Collapse All</button>
<button id="scrollToSeparatorBtn" class="fab" onclick="scrollToSeparator()">Scroll to Separator</button>
<button id="resetBtn" class="fab" onclick="resetLayout()">Reset Changes</button>
<script>
function togglePosition(id) {
var element = document.getElementById(id);
var nextElementToExpand = element.nextElementSibling;
var aboveLine = document.getElementById('aboveLine');
var belowLine = document.getElementById('belowLine');
var movingToAbove = belowLine.contains(element); // Check if the element is in belowLine
var movingToBelow = aboveLine.contains(element); // Check if the element is in aboveLine
// Insert the element in the target parent while maintaining order
var targetParent = movingToAbove ? aboveLine : belowLine;
var children = Array.from(targetParent.children);
var index = children.findIndex(child => parseInt(child.id.replace('word', '')) > parseInt(id.replace('word', '')));
if (index === -1) {
targetParent.appendChild(element);
} else {
targetParent.insertBefore(element, children[index]);
}
// Collapse all contents in both aboveLine and belowLine
document.querySelectorAll('.word-container .content').forEach(function(content) {
content.style.display = 'none';
});
//automatically expand the first word in belowLine
// Expand the content of the word immediately following the moved element in its new container
if (nextElementToExpand) {
var contentToExpand = nextElementToExpand.querySelector('.content');
contentToExpand.style.display = 'block';
scrollToElement(nextElementToExpand);
}
saveState();
}
function toggleContent(id) {
var allContents = document.querySelectorAll('.word-container .content');
var targetElement = null;
allContents.forEach(function(content) {
if (content.parentNode.id !== id) {
content.style.display = 'none'; // Collapse all other contents
} else {
targetElement = content.parentNode; // Target element to scroll to
}
});
var content = document.getElementById(id).querySelector('.content');
if (content.style.display === 'none') {
content.style.display = 'block'; // Expand the content
scrollToElement(targetElement); // Scroll to the expanded element
} else {
content.style.display = 'none';
}
}
function scrollToElement(element) {
if (element) {
element.scrollIntoView({
behavior: 'smooth',
block: 'start'
});
}
}
function getLocalStorageKey() {
// Use window.location.pathname to get the file name or a unique part of the URL
// This assumes your HTML files have different names or paths
return 'wordState-' + window.location.pathname;
}
function saveState() {
var state = [];
document.querySelectorAll('.word-container').forEach(function(container) {
var isAbove = document.getElementById('aboveLine').contains(container);
var isCollapsed = container.querySelector('.content').style.display === 'none';
state.push({id: container.id, isAbove: isAbove, isCollapsed: isCollapsed});
});
localStorage.setItem(getLocalStorageKey(), JSON.stringify(state));
}
function loadState() {
var state = JSON.parse(localStorage.getItem(getLocalStorageKey()));
if (!state) {
// No saved state, save the current state as the initial state
saveInitialState();
state = JSON.parse(localStorage.getItem(getLocalStorageKey())); // Reload the state after saving the initial state
}
// Load the saved state
applyState(state);
}
function saveInitialState() {
var initialState = [];
document.querySelectorAll('.word-container').forEach(function(container) {
initialState.push({
id: container.id,
isAbove: false, // Assuming all words are initially below
isCollapsed: true // Assuming all contents are initially collapsed
});
});
localStorage.setItem(getLocalStorageKey(), JSON.stringify(initialState));
}
function applyState(state) {
var aboveLine = document.getElementById('aboveLine');
var belowLine = document.getElementById('belowLine');
state.forEach(function(item) {
var element = document.getElementById(item.id);
if (element) {
if (item.isAbove) {
aboveLine.appendChild(element);
} else {
belowLine.appendChild(element);
}
var contentDisplay = item.isCollapsed ? 'none' : 'block';
element.querySelector('.content').style.display = contentDisplay;
}
});
}
window.onload = function() {
loadState();
}
function collapseAll() {
document.querySelectorAll('.word-container .content').forEach(function(content) {
content.style.display = 'none';
});
}
function scrollToSeparator() {
document.querySelector('.separator').scrollIntoView({
behavior: 'smooth'
});
}
function resetLayout() {
// Retrieve and apply the initial state
var initialState = JSON.parse(localStorage.getItem(getLocalStorageKey()));
if (initialState) {
applyState(initialState);
}
// Clear saved state in localStorage
localStorage.removeItem(getLocalStorageKey());
// Reload the page to reinitialize event handlers and state
// Wait a brief moment before reloading the page
setTimeout(function() {
window.location.reload();
}, 100); // Wait 100 milliseconds
}
document.getElementById('collapseAllBtn').onclick = collapseAll;
</script>
</body>
</html>
'''
# Append each word's HTML to the belowLine div initially
for index, (word, html) in enumerate(word_htmls.items(), start=1):
combined_html += f'''
<div id="word{index}" class="word-container">
<div class="word-title" onclick="toggleContent('word{index}')"><span class="toggle">></span> {word}</div>
<button class="mark-read-button" onclick="togglePosition('word{index}')">mark</button>
<div class="content" onclick="toggleContent('word{index}')">{html}</div>
</div>
'''
# Add the closing tags for belowLine, body, and html
combined_html += '\n</div>\n</body>\n</html>'
# Write to a file
with open("combined_words.html", "w", encoding="utf-8") as file:
file.write(combined_html)
@farzadhallaji
Copy link
Author

pip install google-translate-for-goldendict

@farzadhallaji
Copy link
Author

reference: github repo

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment