Last active
January 6, 2024 14:01
-
-
Save qguv/4ff8ecb09ff63d4cd5dd41d77aac7bb9 to your computer and use it in GitHub Desktop.
Extract the most commonly used stickers from your Telegram chat history.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
''' | |
Extract the most commonly used stickers from your Telegram chat history as: | |
- a JSON obj mapping sticker paths to the number of times sent; or | |
- the above in text; or | |
- a simple webpage showing all the stickers. | |
''' | |
_epilog = '''\ | |
To get your Telegram chat history: | |
0. download telegram desktop and log in | |
1. go to settings -> advanced -> export telegram data | |
2. make sure only the following boxes are ticked: | |
- account information | |
- personal chats | |
- private groups & only my messages | |
- public groups & only my messages | |
- stickers | |
3. under "location and format", select "machine readable json" | |
4. click export | |
''' | |
import argparse | |
import collections | |
import hashlib | |
import http.server | |
import json | |
import os | |
import pathlib | |
import socketserver | |
MD5_BUF_SIZE = 65536 | |
def get_sticker_msgs(tg_export): | |
uid = tg_export['personal_information']['user_id'] | |
from_id = f'user{uid}' | |
for chat in tg_export['chats']['list']: | |
for message in chat['messages']: | |
if ( | |
message['type'] == 'message' | |
and message['from_id'] == from_id | |
and message.get('media_type') == 'sticker' | |
): | |
yield { | |
'date': message['date'], | |
'file': message['file'], | |
} | |
def calc_md5(path): | |
md5 = hashlib.md5() | |
with open(path, 'rb') as f: | |
while True: | |
chunk = f.read(MD5_BUF_SIZE) | |
if not chunk: | |
break | |
md5.update(chunk) | |
return md5.hexdigest() | |
class Canon: | |
def __init__(self): | |
self.path_to_md5 = {} | |
self.md5_to_path = {} | |
def __getitem__(self, path): | |
try: | |
md5 = self.path_to_md5[path] | |
except KeyError: | |
md5 = calc_md5(path) | |
self.path_to_md5[path] = md5 | |
try: | |
return self.md5_to_path[md5] | |
except KeyError: | |
self.md5_to_path[md5] = path | |
return path | |
def parse_args(): | |
parser = argparse.ArgumentParser(description=__doc__, epilog=_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) | |
parser.add_argument('--input', '-i', action='store', type=argparse.FileType('rb'), default='result.json', help='the telegram export file (default: %(default)s)') | |
subparsers = parser.add_subparsers() | |
serve_parser = subparsers.add_parser('serve', help='create a local webserver to browse the results') | |
serve_parser.set_defaults(cmd='serve') | |
write_parser = subparsers.add_parser('write', help='write results to a file or stdout') | |
write_parser.set_defaults(cmd='write') | |
write_parser.add_argument('OUTFILE', type=argparse.FileType('w'), default='-', help='path to a file to write to, or - for stdout') | |
write_parser.add_argument('--format', action='store', choices=('json', 'html', 'txt'), help='override detected output format, or provide a format when using stdout for output') | |
args = parser.parse_args() | |
if 'cmd' not in args: | |
parser.error('subcommand required') | |
if args.input.name == '<stdin>': | |
parser.error("can't use stdin as input, because the stickers directory would be unknown") | |
if 'cmd' == 'write' and not args.format: | |
outfile = args.OUTFILE | |
if '.' in outfile: | |
args.format = outfile.rsplit('.', maxsplit=1)[-1] | |
else: | |
parser.error('cannot determine output filetype; either give OUTFILE a suffix, or use --format') | |
return args | |
html_start = '''\ | |
<!doctype html> | |
<html> | |
<head> | |
<meta charset="utf-8"> | |
<title>telegram sticker usage</title> | |
<style> | |
* { | |
box-sizing: border-box; | |
} | |
body { | |
background: rgb(63,94,251); | |
background: linear-gradient(90deg, rgba(63,94,251,1) 0%, rgba(252,70,107,1) 100%); | |
text-align: justify; | |
color: white; | |
font-size: 150%; | |
font-family: sans-serif; | |
} | |
h1 { | |
text-align: center; | |
text-shadow: 0 0 5px black; | |
} | |
.sticker { | |
display: inline-block; | |
background-color: #333d; | |
margin: 10px; | |
border: 6px solid black; | |
border-radius: 14px; | |
} | |
.sticker .title { | |
border-radius: 12px 12px 0 0; | |
vertical-align: bottom; | |
padding: 18px 0; | |
background: white; | |
color: #333; | |
text-align: center; | |
} | |
.sticker .content { | |
margin: 10px; | |
} | |
.sticker img { | |
max-width: 300px; | |
max-height: 300px; | |
} | |
.sticker tgs-player { | |
width: 300px; | |
height: 300px; | |
} | |
</style> | |
<script> | |
function summary_clicked(ev) { | |
for (const summary of document.getElementsByTagName('summary')) { | |
if (summary !== ev.target) { | |
summary.parentElement.removeAttribute('open'); | |
} | |
} | |
} | |
function dom_content_loaded() { | |
for (const summary of document.getElementsByTagName('summary')) { | |
summary.addEventListener('click', summary_clicked); | |
} | |
} | |
document.addEventListener('DOMContentLoaded', dom_content_loaded); | |
</script> | |
<script src="https://unpkg.com/@lottiefiles/lottie-player@0.4.0/dist/tgs-player.js"></script> | |
</head> | |
<body> | |
''' | |
html_end = ''' | |
</body> | |
</html> | |
''' | |
def cmd_serve(tg_export, stats, _args): | |
with open('index.html', 'w') as f: | |
export_html(tg_export, stats, f) | |
PORT = 8000 | |
Handler = http.server.SimpleHTTPRequestHandler | |
print(f'http://127.0.0.1:{PORT}') | |
with socketserver.TCPServer(("", PORT), Handler) as httpd: | |
try: | |
httpd.serve_forever() | |
except KeyboardInterrupt: | |
pass | |
os.remove('index.html') | |
def cmd_write(tg_export, stats, args): | |
if args.format == 'json': | |
json.dump(dict(stats.most_common()), args.output, indent=4) | |
elif args.format == 'txt': | |
for path, freq in stats.most_common(): | |
print(f'{freq:5} {path}', file=args.output) | |
elif args.format == 'html': | |
export_html(tg_export, stats, args.output) | |
def main(args): | |
os.chdir(pathlib.Path(args.input.name).parent) | |
tg_export = json.load(args.input) | |
msgs = get_sticker_msgs(tg_export) | |
canon = Canon() | |
stats = collections.Counter(canon[msg['file']] for msg in msgs) | |
if args.cmd == 'serve': | |
return cmd_serve(tg_export, stats, args) | |
elif args.cmd == 'write': | |
return cmd_write(tg_export, stats, args) | |
def export_html(tg_export, stats, outpath): | |
print(html_start, file=outpath) | |
username = tg_export['personal_information']['username'] | |
print(f"<h1>{username}'s sticker addiction</h1>", file=outpath) | |
for i, (path, freq) in enumerate(stats.most_common()): | |
if i % 10 == 0: | |
if i == 0: | |
print(f'<details open>', file=outpath) | |
else: | |
print(f'</details><details>', file=outpath) | |
print(f'<summary>#{i+1}—{i+10}</summary>', file=outpath) | |
if path.endswith('.tgs'): | |
img = f'<tgs-player hover loop mode="normal" src="{path}"></tgs-player>' | |
else: | |
img = f'<img src="{path}">' | |
print(f'<div class="sticker"><div class="title">#{i+1} - sent {freq} times</div><div class="content">{img}</div></div>', file=outpath) | |
print('</details>', file=outpath) | |
print(html_end, file=outpath) | |
if __name__ == '__main__': | |
main(parse_args()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment