Skip to content

Instantly share code, notes, and snippets.

@qguv
Last active January 6, 2024 14:01
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save qguv/4ff8ecb09ff63d4cd5dd41d77aac7bb9 to your computer and use it in GitHub Desktop.
Save qguv/4ff8ecb09ff63d4cd5dd41d77aac7bb9 to your computer and use it in GitHub Desktop.
Extract the most commonly used stickers from your Telegram chat history.
#!/usr/bin/env python3
'''
Extract the most commonly used stickers from your Telegram chat history as:
- a JSON obj mapping sticker paths to the number of times sent; or
- the above in text; or
- a simple webpage showing all the stickers.
'''
_epilog = '''\
To get your Telegram chat history:
0. download telegram desktop and log in
1. go to settings -> advanced -> export telegram data
2. make sure only the following boxes are ticked:
- account information
- personal chats
- private groups & only my messages
- public groups & only my messages
- stickers
3. under "location and format", select "machine readable json"
4. click export
'''
import argparse
import collections
import hashlib
import http.server
import json
import os
import pathlib
import socketserver
MD5_BUF_SIZE = 65536
def get_sticker_msgs(tg_export):
uid = tg_export['personal_information']['user_id']
from_id = f'user{uid}'
for chat in tg_export['chats']['list']:
for message in chat['messages']:
if (
message['type'] == 'message'
and message['from_id'] == from_id
and message.get('media_type') == 'sticker'
):
yield {
'date': message['date'],
'file': message['file'],
}
def calc_md5(path):
md5 = hashlib.md5()
with open(path, 'rb') as f:
while True:
chunk = f.read(MD5_BUF_SIZE)
if not chunk:
break
md5.update(chunk)
return md5.hexdigest()
class Canon:
def __init__(self):
self.path_to_md5 = {}
self.md5_to_path = {}
def __getitem__(self, path):
try:
md5 = self.path_to_md5[path]
except KeyError:
md5 = calc_md5(path)
self.path_to_md5[path] = md5
try:
return self.md5_to_path[md5]
except KeyError:
self.md5_to_path[md5] = path
return path
def parse_args():
parser = argparse.ArgumentParser(description=__doc__, epilog=_epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('--input', '-i', action='store', type=argparse.FileType('rb'), default='result.json', help='the telegram export file (default: %(default)s)')
subparsers = parser.add_subparsers()
serve_parser = subparsers.add_parser('serve', help='create a local webserver to browse the results')
serve_parser.set_defaults(cmd='serve')
write_parser = subparsers.add_parser('write', help='write results to a file or stdout')
write_parser.set_defaults(cmd='write')
write_parser.add_argument('OUTFILE', type=argparse.FileType('w'), default='-', help='path to a file to write to, or - for stdout')
write_parser.add_argument('--format', action='store', choices=('json', 'html', 'txt'), help='override detected output format, or provide a format when using stdout for output')
args = parser.parse_args()
if 'cmd' not in args:
parser.error('subcommand required')
if args.input.name == '<stdin>':
parser.error("can't use stdin as input, because the stickers directory would be unknown")
if 'cmd' == 'write' and not args.format:
outfile = args.OUTFILE
if '.' in outfile:
args.format = outfile.rsplit('.', maxsplit=1)[-1]
else:
parser.error('cannot determine output filetype; either give OUTFILE a suffix, or use --format')
return args
html_start = '''\
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<title>telegram sticker usage</title>
<style>
* {
box-sizing: border-box;
}
body {
background: rgb(63,94,251);
background: linear-gradient(90deg, rgba(63,94,251,1) 0%, rgba(252,70,107,1) 100%);
text-align: justify;
color: white;
font-size: 150%;
font-family: sans-serif;
}
h1 {
text-align: center;
text-shadow: 0 0 5px black;
}
.sticker {
display: inline-block;
background-color: #333d;
margin: 10px;
border: 6px solid black;
border-radius: 14px;
}
.sticker .title {
border-radius: 12px 12px 0 0;
vertical-align: bottom;
padding: 18px 0;
background: white;
color: #333;
text-align: center;
}
.sticker .content {
margin: 10px;
}
.sticker img {
max-width: 300px;
max-height: 300px;
}
.sticker tgs-player {
width: 300px;
height: 300px;
}
</style>
<script>
function summary_clicked(ev) {
for (const summary of document.getElementsByTagName('summary')) {
if (summary !== ev.target) {
summary.parentElement.removeAttribute('open');
}
}
}
function dom_content_loaded() {
for (const summary of document.getElementsByTagName('summary')) {
summary.addEventListener('click', summary_clicked);
}
}
document.addEventListener('DOMContentLoaded', dom_content_loaded);
</script>
<script src="https://unpkg.com/@lottiefiles/lottie-player@0.4.0/dist/tgs-player.js"></script>
</head>
<body>
'''
html_end = '''
</body>
</html>
'''
def cmd_serve(tg_export, stats, _args):
with open('index.html', 'w') as f:
export_html(tg_export, stats, f)
PORT = 8000
Handler = http.server.SimpleHTTPRequestHandler
print(f'http://127.0.0.1:{PORT}')
with socketserver.TCPServer(("", PORT), Handler) as httpd:
try:
httpd.serve_forever()
except KeyboardInterrupt:
pass
os.remove('index.html')
def cmd_write(tg_export, stats, args):
if args.format == 'json':
json.dump(dict(stats.most_common()), args.output, indent=4)
elif args.format == 'txt':
for path, freq in stats.most_common():
print(f'{freq:5} {path}', file=args.output)
elif args.format == 'html':
export_html(tg_export, stats, args.output)
def main(args):
os.chdir(pathlib.Path(args.input.name).parent)
tg_export = json.load(args.input)
msgs = get_sticker_msgs(tg_export)
canon = Canon()
stats = collections.Counter(canon[msg['file']] for msg in msgs)
if args.cmd == 'serve':
return cmd_serve(tg_export, stats, args)
elif args.cmd == 'write':
return cmd_write(tg_export, stats, args)
def export_html(tg_export, stats, outpath):
print(html_start, file=outpath)
username = tg_export['personal_information']['username']
print(f"<h1>{username}'s sticker addiction</h1>", file=outpath)
for i, (path, freq) in enumerate(stats.most_common()):
if i % 10 == 0:
if i == 0:
print(f'<details open>', file=outpath)
else:
print(f'</details><details>', file=outpath)
print(f'<summary>#{i+1}—{i+10}</summary>', file=outpath)
if path.endswith('.tgs'):
img = f'<tgs-player hover loop mode="normal" src="{path}"></tgs-player>'
else:
img = f'<img src="{path}">'
print(f'<div class="sticker"><div class="title">#{i+1} - sent {freq} times</div><div class="content">{img}</div></div>', file=outpath)
print('</details>', file=outpath)
print(html_end, file=outpath)
if __name__ == '__main__':
main(parse_args())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment