Skip to content

Instantly share code, notes, and snippets.

@iwalfy
Last active June 26, 2023 07:52
Show Gist options
  • Save iwalfy/094b237fbfdc901b8c8c779181707890 to your computer and use it in GitHub Desktop.
Save iwalfy/094b237fbfdc901b8c8c779181707890 to your computer and use it in GitHub Desktop.
Convert Telegram chat export to BAZMAN ready file
#!/usr/bin/env python3
#
# Convert Telegram chat export to BAZMAN ready file
# Version: 1.1
#
# (c) Catware-Foundation, 2022, 2023
# Mikhail Lebedinets, 2022, 2023
#
# 26.06.2023 - Added support for HTML exports
#
import json
import sys
import os
def error_handler(message):
print("Error: {}".format(message))
exit()
def tg2bm(export_json_obj):
id = export_json_obj["id"]
name = export_json_obj["name"]
messages = export_json_obj["messages"]
total = len(messages)
print("Chat: {} (id: {})".format(name, id))
result_string = ""
for index, msg in enumerate(messages):
print("Working on {} of {}...".format(index, total), end="\r")
msg_type = msg["type"]
if msg_type != "message":
continue
if "forwarded_from" in msg:
continue
msg_text = msg["text"]
text_type = type(msg_text)
to_append = ""
if isinstance(msg_text, list):
for msg_part in msg_text:
if isinstance(msg_part, str):
to_append += msg_part
continue
to_append += msg_part["text"]
else:
to_append = msg_text
if to_append.strip():
result_string += "{}\n".format(to_append)
print("\nDone!")
return result_string
def tg2bm_html(html):
try:
from BeautifulSoup import BeautifulSoup
except ImportError:
try:
from bs4 import BeautifulSoup
except ImportError:
error_handler("No BeautifulSoup installed! Use `pip install bs4` to install it.")
parser = BeautifulSoup(html, "html.parser")
posts = parser.find_all("div", attrs={"class":"body"})
result = ""
for post in posts:
if post.find("div", attrs={"class": "forwarded"}):
continue
text_elem = post.find("div", attrs={"class": "text"})
if text_elem:
text = text_elem.get_text().strip()
result += text + "\n"
return result
def main():
if len(sys.argv) < 3:
print("Usage: ./tg2bm.py [--html] <export file> <output file>")
exit()
if sys.argv[1] == "--html":
ishtml = True
if len(sys.argv) < 4:
print("Usage: ./tg2bm.py --html <export directory> <output file>")
exit()
export_file = sys.argv[2]
output_file = sys.argv[3]
else:
export_file = sys.argv[1]
output_file = sys.argv[2]
if not os.path.exists(export_file):
error_handler("{} not exist!".format(export_file))
if ishtml:
if not os.path.isdir(export_file):
error_handler("{} is not a directory!".format(export_file))
else:
if os.path.isdir(export_file):
error_handler("{} is a directory!".format(export_file))
if os.path.isdir(output_file):
error_handler("{} is a directory!".format(output_file))
if ishtml:
print("Loading export files...")
files = os.listdir(export_file)
result = ""
for file in files:
if file.endswith(".html"):
print(f"Reading file {file}...")
with open(f"{export_file}/{file}", "r") as f:
result += tg2bm_html(f.read())
with open(output_file, "w") as f:
f.write(result)
exit()
try:
f = open(export_file, "r")
export_json = f.read()
f.close()
except:
error_handler("Failed to read export file!")
try:
export_json_obj = json.loads(export_json)
except:
error_handler("Failed to parse export file!")
result = tg2bm(export_json_obj)
try:
f = open(output_file, "w")
f.write(result)
f.close()
except:
error_handler("Failed to save result!")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment