Last active
October 14, 2024 21:11
-
-
Save hay/7f5124f9992038d6c1ed00e1ed52772f to your computer and use it in GitHub Desktop.
Convert JSONL files from https://github.com/tvdstaaij/telegram-history-dump to CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Converts a JSONL file generated with telegram-history-dump (1) to CSV | |
# Usage: python telegram-csv.py <path to json file> <path to output csv file> | |
# Example: python telegram-csv.py Bob.json Bob.csv | |
# 1: https://github.com/tvdstaaij/telegram-history-dump | |
from datetime import datetime | |
import unicodecsv as csv | |
import json, sys | |
def get_isodate(msg): | |
date = msg.get("date", None) | |
if not date: | |
return "unknown" | |
return datetime.fromtimestamp(date).isoformat() | |
def main(): | |
if len(sys.argv) != 3: | |
sys.exit("No json and/or csv file given") | |
jsonpath = sys.argv[1] | |
csvpath = sys.argv[2] | |
jsonfile = open(jsonpath, "r") | |
csvfile = open(csvpath, "w") | |
csvwriter = csv.writer(csvfile) | |
csvwriter.writerow(["from", "to", "date", "text"]) | |
for item in jsonfile: | |
msg = json.loads(item) | |
csvwriter.writerow([ | |
msg["from"].get("print_name", "unknown"), | |
msg["to"].get("print_name", "unknown"), | |
get_isodate(msg), | |
msg.get("text", "no text") | |
]) | |
jsonfile.close() | |
csvfile.close() | |
if __name__ == "__main__": | |
main() |
I added ID fields, but it is all messed up, why?
csvwriter.writerow(["from", "id from", "to", "id to", "date", "text"])
for item in jsonfile:
msg = json.loads(item)
csvwriter.writerow([
msg["from"].get("username", "unknown"),
msg["from"].get("peer_id", "unknown"),
msg["to"].get("username", "unknown"),
msg["to"].get("peer_id", "unknown"),
get_isodate(msg),
msg.get("text", "no text")
])
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How to replace \n with " " in text messages in your script?