Skip to content

Instantly share code, notes, and snippets.

@muety
Last active September 16, 2021 13:27
Show Gist options
  • Save muety/04bb3572edeb08f9b4b0a23d5b8ba690 to your computer and use it in GitHub Desktop.
Save muety/04bb3572edeb08f9b4b0a23d5b8ba690 to your computer and use it in GitHub Desktop.
A script to help you migrate your whole WhatsApp chat history with a person to Telegram
#!/bin/python
import os
import re
import shutil
# A script to help you migrate your whole WhatsApp chat history with a person to Telegram
# Instructions
# 1. Install "Backup WhatsApp Chats" extenstion to Chrome
# 2. Buy a license
# 3. Open WhatsApp web and select the chat to export
# 4. Open the plugin, choose export type 'HTML', choose to download unresolved media
# 5. Download and extract the zip file to data/exported
# 6. Open the plugin again and choose export type 'Text'
# 7. Download the text file to data/exported as well
# 8. Edit CHAT_NAME and SELF_NAME constants below
# 9. Create output directory at data/converted
# 10. Run 'python convert.py'
# 11. Copy the contents of data/converted to your phone
# 12. Open your favorite file browser, select all files and share them to Telegram
# -> Telegram should now ask you where to import the messages
# 13. Select the target chat and hit import
# Issues
# - For some reason, the current day is skipped during export
# - For some reason, copying the output files to the phone via USB will cause media files to not be imported. Instead, I uploaded them to Nextcloud, then synced them on the phone using the Nextcloud app and then shared them to Telegram using a file manager. The trick is probably to somehow get this (https://github.com/DrKLO/Telegram/blob/368822d20f879f5ca851e4cbf13506eda4e48bfc/TMessagesProj/src/main/java/org/telegram/ui/LaunchActivity.java#L1391) method to return null, so that the shared media documents are added to documentsUrisArray instead of documentsPathsArray.
# Relevant code in Telegram for Android
# - https://github.com/DrKLO/Telegram/blob/368822d20f879f5ca851e4cbf13506eda4e48bfc/TMessagesProj/src/main/java/org/telegram/messenger/MessagesController.java#L789
# - https://github.com/DrKLO/Telegram/blob/368822d20f879f5ca851e4cbf13506eda4e48bfc/TMessagesProj/src/main/java/org/telegram/ui/LaunchActivity.java#L1365
# - https://github.com/DrKLO/Telegram/blob/368822d20f879f5ca851e4cbf13506eda4e48bfc/TMessagesProj/src/main/java/org/telegram/messenger/SendMessagesHelper.java#L5826
# Constants
CHAT_NAME = 'John Doe' # change this
SELF_NAME = 'Jane Doe' # change this
INPUT_DIR = './data/exported'
OUTPUT_DIR = './data/converted'
OUTPUT_FILE = f'WhatsApp Chat mit {CHAT_NAME}.txt'
DATE_REGEX = '(\d{4}/\d{2}/\d{2}), (\d{2}:\d{2}:\d{2})'
LINE_REGEX = f'^{DATE_REGEX} - ({CHAT_NAME}|{SELF_NAME}): .+'
MEDIA_FILE_REGEX = '.+\.(jpg|jpeg|png|mp4|oga|webp)$'
MEDIA_INDICATOR = '<Media omitted>'
MEDIA_TARGET_SUFFIX = '(Datei angehängt)'
# Methods
def read_messages():
lines = []
file_name = f'{INPUT_DIR}/{CHAT_NAME}.txt'
print(f'reading {file_name}')
with open(file_name, 'r') as f:
while True:
line = f.readline()
if not line:
break
if re.match(LINE_REGEX, line):
lines.append(line)
elif not re.match(DATE_REGEX, line) and len(lines) > 0:
lines[-1] += line
print(f'read {len(lines)} messages')
return lines
def find_media():
return [f for f in os.listdir(INPUT_DIR) if re.match(MEDIA_FILE_REGEX, f, re.IGNORECASE)]
def extract_datetime(message):
date_match = re.search(DATE_REGEX, message)
date_components = date_match.group(1).split('/')
time_components = date_match.group(2).split(':')
return date_components, time_components
def replace_media_refs(messages):
new_messages = [m for m in messages]
media_files = find_media()
print(f'found {len(media_files)} media files')
for i, m in enumerate(new_messages):
if not MEDIA_INDICATOR in m:
continue
date_components, time_components = extract_datetime(m)
media_prefix = f'{"_".join(date_components)}_{"".join(time_components)}'
media_candidates = [f for f in media_files if f.startswith(media_prefix)]
if len(media_candidates) == 0:
print(f'warning: did not find matching media file for message at {media_prefix}')
continue
new_messages[i] = m.replace(MEDIA_INDICATOR, f'{media_candidates[0]} {MEDIA_TARGET_SUFFIX}')
media_files.remove(media_candidates[0])
return new_messages
def write_messages(messages):
out_file_name = f'{OUTPUT_DIR}/{OUTPUT_FILE}'
print(f'saving messages to {out_file_name}')
with open(out_file_name, 'w') as f:
for m in messages:
# dc, tc = extract_datetime(m)
# m = re.sub(DATE_REGEX, f'{dc[2]}.{dc[1]}.{dc[0][2:]}, {":".join(tc[:2])}', m) if dc and tc else m
f.write(m)
def copy_media():
for f in find_media():
shutil.copy2(f'{INPUT_DIR}/{f}', f'{OUTPUT_DIR}/{f}')
if __name__ == '__main__':
print('reading messages')
messages = read_messages()
print('replacing media references')
messages = replace_media_refs(messages)
print('saving output')
write_messages(messages)
print('copying media')
copy_media()
print('done')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment