Last active
September 16, 2021 13:27
-
-
Save muety/04bb3572edeb08f9b4b0a23d5b8ba690 to your computer and use it in GitHub Desktop.
A script to help you migrate your whole WhatsApp chat history with a person to Telegram
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/python | |
import os | |
import re | |
import shutil | |
# A script to help you migrate your whole WhatsApp chat history with a person to Telegram | |
# Instructions | |
# 1. Install "Backup WhatsApp Chats" extenstion to Chrome | |
# 2. Buy a license | |
# 3. Open WhatsApp web and select the chat to export | |
# 4. Open the plugin, choose export type 'HTML', choose to download unresolved media | |
# 5. Download and extract the zip file to data/exported | |
# 6. Open the plugin again and choose export type 'Text' | |
# 7. Download the text file to data/exported as well | |
# 8. Edit CHAT_NAME and SELF_NAME constants below | |
# 9. Create output directory at data/converted | |
# 10. Run 'python convert.py' | |
# 11. Copy the contents of data/converted to your phone | |
# 12. Open your favorite file browser, select all files and share them to Telegram | |
# -> Telegram should now ask you where to import the messages | |
# 13. Select the target chat and hit import | |
# Issues | |
# - For some reason, the current day is skipped during export | |
# - For some reason, copying the output files to the phone via USB will cause media files to not be imported. Instead, I uploaded them to Nextcloud, then synced them on the phone using the Nextcloud app and then shared them to Telegram using a file manager. The trick is probably to somehow get this (https://github.com/DrKLO/Telegram/blob/368822d20f879f5ca851e4cbf13506eda4e48bfc/TMessagesProj/src/main/java/org/telegram/ui/LaunchActivity.java#L1391) method to return null, so that the shared media documents are added to documentsUrisArray instead of documentsPathsArray. | |
# Relevant code in Telegram for Android | |
# - https://github.com/DrKLO/Telegram/blob/368822d20f879f5ca851e4cbf13506eda4e48bfc/TMessagesProj/src/main/java/org/telegram/messenger/MessagesController.java#L789 | |
# - https://github.com/DrKLO/Telegram/blob/368822d20f879f5ca851e4cbf13506eda4e48bfc/TMessagesProj/src/main/java/org/telegram/ui/LaunchActivity.java#L1365 | |
# - https://github.com/DrKLO/Telegram/blob/368822d20f879f5ca851e4cbf13506eda4e48bfc/TMessagesProj/src/main/java/org/telegram/messenger/SendMessagesHelper.java#L5826 | |
# Constants | |
CHAT_NAME = 'John Doe' # change this | |
SELF_NAME = 'Jane Doe' # change this | |
INPUT_DIR = './data/exported' | |
OUTPUT_DIR = './data/converted' | |
OUTPUT_FILE = f'WhatsApp Chat mit {CHAT_NAME}.txt' | |
DATE_REGEX = '(\d{4}/\d{2}/\d{2}), (\d{2}:\d{2}:\d{2})' | |
LINE_REGEX = f'^{DATE_REGEX} - ({CHAT_NAME}|{SELF_NAME}): .+' | |
MEDIA_FILE_REGEX = '.+\.(jpg|jpeg|png|mp4|oga|webp)$' | |
MEDIA_INDICATOR = '<Media omitted>' | |
MEDIA_TARGET_SUFFIX = '(Datei angehängt)' | |
# Methods | |
def read_messages(): | |
lines = [] | |
file_name = f'{INPUT_DIR}/{CHAT_NAME}.txt' | |
print(f'reading {file_name}') | |
with open(file_name, 'r') as f: | |
while True: | |
line = f.readline() | |
if not line: | |
break | |
if re.match(LINE_REGEX, line): | |
lines.append(line) | |
elif not re.match(DATE_REGEX, line) and len(lines) > 0: | |
lines[-1] += line | |
print(f'read {len(lines)} messages') | |
return lines | |
def find_media(): | |
return [f for f in os.listdir(INPUT_DIR) if re.match(MEDIA_FILE_REGEX, f, re.IGNORECASE)] | |
def extract_datetime(message): | |
date_match = re.search(DATE_REGEX, message) | |
date_components = date_match.group(1).split('/') | |
time_components = date_match.group(2).split(':') | |
return date_components, time_components | |
def replace_media_refs(messages): | |
new_messages = [m for m in messages] | |
media_files = find_media() | |
print(f'found {len(media_files)} media files') | |
for i, m in enumerate(new_messages): | |
if not MEDIA_INDICATOR in m: | |
continue | |
date_components, time_components = extract_datetime(m) | |
media_prefix = f'{"_".join(date_components)}_{"".join(time_components)}' | |
media_candidates = [f for f in media_files if f.startswith(media_prefix)] | |
if len(media_candidates) == 0: | |
print(f'warning: did not find matching media file for message at {media_prefix}') | |
continue | |
new_messages[i] = m.replace(MEDIA_INDICATOR, f'{media_candidates[0]} {MEDIA_TARGET_SUFFIX}') | |
media_files.remove(media_candidates[0]) | |
return new_messages | |
def write_messages(messages): | |
out_file_name = f'{OUTPUT_DIR}/{OUTPUT_FILE}' | |
print(f'saving messages to {out_file_name}') | |
with open(out_file_name, 'w') as f: | |
for m in messages: | |
# dc, tc = extract_datetime(m) | |
# m = re.sub(DATE_REGEX, f'{dc[2]}.{dc[1]}.{dc[0][2:]}, {":".join(tc[:2])}', m) if dc and tc else m | |
f.write(m) | |
def copy_media(): | |
for f in find_media(): | |
shutil.copy2(f'{INPUT_DIR}/{f}', f'{OUTPUT_DIR}/{f}') | |
if __name__ == '__main__': | |
print('reading messages') | |
messages = read_messages() | |
print('replacing media references') | |
messages = replace_media_refs(messages) | |
print('saving output') | |
write_messages(messages) | |
print('copying media') | |
copy_media() | |
print('done') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment