iwalfy/tg2bm.py

## tg2bm.py
#!/usr/bin/env python3
#
# Convert Telegram chat export to BAZMAN ready file
# Version: 1.1
#
# (c) Catware-Foundation, 2022, 2023
#     Mikhail Lebedinets, 2022, 2023
#
# 26.06.2023 - Added support for HTML exports
#

import json
import sys
import os

def error_handler(message):
  print("Error: {}".format(message))
  exit()

def tg2bm(export_json_obj):

  id = export_json_obj["id"]
  name = export_json_obj["name"]
  messages = export_json_obj["messages"]

  total = len(messages)

  print("Chat: {} (id: {})".format(name, id))

  result_string = ""

  for index, msg in enumerate(messages):
    print("Working on {} of {}...".format(index, total), end="\r")

    msg_type = msg["type"]

    if msg_type != "message":
      continue

    if "forwarded_from" in msg:
      continue

    msg_text = msg["text"]
    text_type = type(msg_text)

    to_append = ""

    if isinstance(msg_text, list):
      for msg_part in msg_text:
        if isinstance(msg_part, str):
          to_append += msg_part
          continue

        to_append += msg_part["text"]
    else:
      to_append = msg_text

    if to_append.strip():
      result_string += "{}\n".format(to_append)

  print("\nDone!")
  return result_string

def tg2bm_html(html):
  try:
    from BeautifulSoup import BeautifulSoup
  except ImportError:
    try:
      from bs4 import BeautifulSoup
    except ImportError:
      error_handler("No BeautifulSoup installed! Use `pip install bs4` to install it.")

  parser = BeautifulSoup(html, "html.parser")
  posts = parser.find_all("div", attrs={"class":"body"})

  result = ""

  for post in posts:
    if post.find("div", attrs={"class": "forwarded"}):
      continue

    text_elem = post.find("div", attrs={"class": "text"})
    if text_elem:
      text = text_elem.get_text().strip()
      result += text + "\n"

  return result

def main():
  if len(sys.argv) < 3:
    print("Usage: ./tg2bm.py [--html] <export file> <output file>")
    exit()

  if sys.argv[1] == "--html":
    ishtml = True
    if len(sys.argv) < 4:
      print("Usage: ./tg2bm.py --html <export directory> <output file>")
      exit()

    export_file = sys.argv[2]
    output_file = sys.argv[3]
  else:
    export_file = sys.argv[1]
    output_file = sys.argv[2]


  if not os.path.exists(export_file):
    error_handler("{} not exist!".format(export_file))

  if ishtml:
    if not os.path.isdir(export_file):
      error_handler("{} is not a directory!".format(export_file))
  else:
    if os.path.isdir(export_file):
      error_handler("{} is a directory!".format(export_file))

  if os.path.isdir(output_file):
    error_handler("{} is a directory!".format(output_file))


  if ishtml:
    print("Loading export files...")
    files = os.listdir(export_file)

    result = ""

    for file in files:
      if file.endswith(".html"):
        print(f"Reading file {file}...")
        with open(f"{export_file}/{file}", "r") as f:
          result += tg2bm_html(f.read())

    with open(output_file, "w") as f:
      f.write(result)

    exit()

  try:
    f = open(export_file, "r")
    export_json = f.read()
    f.close()
  except:
    error_handler("Failed to read export file!")

  try:
    export_json_obj = json.loads(export_json)
  except:
    error_handler("Failed to parse export file!")

  result = tg2bm(export_json_obj)

  try:
    f = open(output_file, "w")
    f.write(result)
    f.close()
  except:
    error_handler("Failed to save result!")

if __name__ == "__main__":
  main()
	#!/usr/bin/env python3
	#
	# Convert Telegram chat export to BAZMAN ready file
	# Version: 1.1
	#
	# (c) Catware-Foundation, 2022, 2023
	# Mikhail Lebedinets, 2022, 2023
	#
	# 26.06.2023 - Added support for HTML exports
	#

	import json
	import sys
	import os

	def error_handler(message):
	print("Error: {}".format(message))
	exit()

	def tg2bm(export_json_obj):

	id = export_json_obj["id"]
	name = export_json_obj["name"]
	messages = export_json_obj["messages"]

	total = len(messages)

	print("Chat: {} (id: {})".format(name, id))

	result_string = ""

	for index, msg in enumerate(messages):
	print("Working on {} of {}...".format(index, total), end="\r")

	msg_type = msg["type"]

	if msg_type != "message":
	continue

	if "forwarded_from" in msg:
	continue

	msg_text = msg["text"]
	text_type = type(msg_text)

	to_append = ""

	if isinstance(msg_text, list):
	for msg_part in msg_text:
	if isinstance(msg_part, str):
	to_append += msg_part
	continue

	to_append += msg_part["text"]
	else:
	to_append = msg_text

	if to_append.strip():
	result_string += "{}\n".format(to_append)

	print("\nDone!")
	return result_string

	def tg2bm_html(html):
	try:
	from BeautifulSoup import BeautifulSoup
	except ImportError:
	try:
	from bs4 import BeautifulSoup
	except ImportError:
	error_handler("No BeautifulSoup installed! Use `pip install bs4` to install it.")

	parser = BeautifulSoup(html, "html.parser")
	posts = parser.find_all("div", attrs={"class":"body"})

	result = ""

	for post in posts:
	if post.find("div", attrs={"class": "forwarded"}):
	continue

	text_elem = post.find("div", attrs={"class": "text"})
	if text_elem:
	text = text_elem.get_text().strip()
	result += text + "\n"

	return result

	def main():
	if len(sys.argv) < 3:
	print("Usage: ./tg2bm.py [--html] <export file> <output file>")
	exit()

	if sys.argv[1] == "--html":
	ishtml = True
	if len(sys.argv) < 4:
	print("Usage: ./tg2bm.py --html <export directory> <output file>")
	exit()

	export_file = sys.argv[2]
	output_file = sys.argv[3]
	else:
	export_file = sys.argv[1]
	output_file = sys.argv[2]


	if not os.path.exists(export_file):
	error_handler("{} not exist!".format(export_file))

	if ishtml:
	if not os.path.isdir(export_file):
	error_handler("{} is not a directory!".format(export_file))
	else:
	if os.path.isdir(export_file):
	error_handler("{} is a directory!".format(export_file))

	if os.path.isdir(output_file):
	error_handler("{} is a directory!".format(output_file))


	if ishtml:
	print("Loading export files...")
	files = os.listdir(export_file)

	result = ""

	for file in files:
	if file.endswith(".html"):
	print(f"Reading file {file}...")
	with open(f"{export_file}/{file}", "r") as f:
	result += tg2bm_html(f.read())

	with open(output_file, "w") as f:
	f.write(result)

	exit()

	try:
	f = open(export_file, "r")
	export_json = f.read()
	f.close()
	except:
	error_handler("Failed to read export file!")

	try:
	export_json_obj = json.loads(export_json)
	except:
	error_handler("Failed to parse export file!")

	result = tg2bm(export_json_obj)

	try:
	f = open(output_file, "w")
	f.write(result)
	f.close()
	except:
	error_handler("Failed to save result!")

	if __name__ == "__main__":
	main()