Skip to content

Instantly share code, notes, and snippets.

Last active January 6, 2024 14:01
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save qguv/4ff8ecb09ff63d4cd5dd41d77aac7bb9 to your computer and use it in GitHub Desktop.
Save qguv/4ff8ecb09ff63d4cd5dd41d77aac7bb9 to your computer and use it in GitHub Desktop.
Extract the most commonly used stickers from your Telegram chat history.
#!/usr/bin/env python3
Extract the most commonly used stickers from your Telegram chat history as:
- a JSON obj mapping sticker paths to the number of times sent; or
- the above in text; or
- a simple webpage showing all the stickers.
_epilog = '''\
To get your Telegram chat history:
0. download telegram desktop and log in
1. go to settings -> advanced -> export telegram data
2. make sure only the following boxes are ticked:
- account information
- personal chats
- private groups & only my messages
- public groups & only my messages
- stickers
3. under "location and format", select "machine readable json"
4. click export
import argparse
import collections
import hashlib
import http.server
import json
import os
import pathlib
import socketserver
MD5_BUF_SIZE = 65536
def get_sticker_msgs(tg_export):
uid = tg_export['personal_information']['user_id']
from_id = f'user{uid}'
for chat in tg_export['chats']['list']:
for message in chat['messages']:
if (
message['type'] == 'message'
and message['from_id'] == from_id
and message.get('media_type') == 'sticker'
yield {
'date': message['date'],
'file': message['file'],
def calc_md5(path):
md5 = hashlib.md5()
with open(path, 'rb') as f:
while True:
chunk =
if not chunk:
return md5.hexdigest()
class Canon:
def __init__(self):
self.path_to_md5 = {}
self.md5_to_path = {}
def __getitem__(self, path):
md5 = self.path_to_md5[path]
except KeyError:
md5 = calc_md5(path)
self.path_to_md5[path] = md5
return self.md5_to_path[md5]
except KeyError:
self.md5_to_path[md5] = path
return path
def parse_args():
parser = argparse.ArgumentParser(description=__doc__, epilog=_epilog, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('--input', '-i', action='store', type=argparse.FileType('rb'), default='result.json', help='the telegram export file (default: %(default)s)')
subparsers = parser.add_subparsers()
serve_parser = subparsers.add_parser('serve', help='create a local webserver to browse the results')
write_parser = subparsers.add_parser('write', help='write results to a file or stdout')
write_parser.add_argument('OUTFILE', type=argparse.FileType('w'), default='-', help='path to a file to write to, or - for stdout')
write_parser.add_argument('--format', action='store', choices=('json', 'html', 'txt'), help='override detected output format, or provide a format when using stdout for output')
args = parser.parse_args()
if 'cmd' not in args:
parser.error('subcommand required')
if == '<stdin>':
parser.error("can't use stdin as input, because the stickers directory would be unknown")
if 'cmd' == 'write' and not args.format:
outfile = args.OUTFILE
if '.' in outfile:
args.format = outfile.rsplit('.', maxsplit=1)[-1]
parser.error('cannot determine output filetype; either give OUTFILE a suffix, or use --format')
return args
html_start = '''\
<!doctype html>
<meta charset="utf-8">
<title>telegram sticker usage</title>
* {
box-sizing: border-box;
body {
background: rgb(63,94,251);
background: linear-gradient(90deg, rgba(63,94,251,1) 0%, rgba(252,70,107,1) 100%);
text-align: justify;
color: white;
font-size: 150%;
font-family: sans-serif;
h1 {
text-align: center;
text-shadow: 0 0 5px black;
.sticker {
display: inline-block;
background-color: #333d;
margin: 10px;
border: 6px solid black;
border-radius: 14px;
.sticker .title {
border-radius: 12px 12px 0 0;
vertical-align: bottom;
padding: 18px 0;
background: white;
color: #333;
text-align: center;
.sticker .content {
margin: 10px;
.sticker img {
max-width: 300px;
max-height: 300px;
.sticker tgs-player {
width: 300px;
height: 300px;
function summary_clicked(ev) {
for (const summary of document.getElementsByTagName('summary')) {
if (summary !== {
function dom_content_loaded() {
for (const summary of document.getElementsByTagName('summary')) {
summary.addEventListener('click', summary_clicked);
document.addEventListener('DOMContentLoaded', dom_content_loaded);
<script src=""></script>
html_end = '''
def cmd_serve(tg_export, stats, _args):
with open('index.html', 'w') as f:
export_html(tg_export, stats, f)
PORT = 8000
Handler = http.server.SimpleHTTPRequestHandler
with socketserver.TCPServer(("", PORT), Handler) as httpd:
except KeyboardInterrupt:
def cmd_write(tg_export, stats, args):
if args.format == 'json':
json.dump(dict(stats.most_common()), args.output, indent=4)
elif args.format == 'txt':
for path, freq in stats.most_common():
print(f'{freq:5} {path}', file=args.output)
elif args.format == 'html':
export_html(tg_export, stats, args.output)
def main(args):
tg_export = json.load(args.input)
msgs = get_sticker_msgs(tg_export)
canon = Canon()
stats = collections.Counter(canon[msg['file']] for msg in msgs)
if args.cmd == 'serve':
return cmd_serve(tg_export, stats, args)
elif args.cmd == 'write':
return cmd_write(tg_export, stats, args)
def export_html(tg_export, stats, outpath):
print(html_start, file=outpath)
username = tg_export['personal_information']['username']
print(f"<h1>{username}'s sticker addiction</h1>", file=outpath)
for i, (path, freq) in enumerate(stats.most_common()):
if i % 10 == 0:
if i == 0:
print(f'<details open>', file=outpath)
print(f'</details><details>', file=outpath)
print(f'<summary>#{i+1}—{i+10}</summary>', file=outpath)
if path.endswith('.tgs'):
img = f'<tgs-player hover loop mode="normal" src="{path}"></tgs-player>'
img = f'<img src="{path}">'
print(f'<div class="sticker"><div class="title">#{i+1} - sent {freq} times</div><div class="content">{img}</div></div>', file=outpath)
print('</details>', file=outpath)
print(html_end, file=outpath)
if __name__ == '__main__':
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment