Convert JSONL files from https://github.com/tvdstaaij/telegram-history-dump to CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Converts a JSONL file generated with telegram-history-dump (1) to CSV | |
# Usage: python telegram-csv.py <path to json file> <path to output csv file> | |
# Example: python telegram-csv.py Bob.json Bob.csv | |
# 1: https://github.com/tvdstaaij/telegram-history-dump | |
from datetime import datetime | |
import unicodecsv as csv | |
import json, sys | |
def get_isodate(msg): | |
date = msg.get("date", None) | |
if not date: | |
return "unknown" | |
return datetime.fromtimestamp(date).isoformat() | |
def main(): | |
if len(sys.argv) != 3: | |
sys.exit("No json and/or csv file given") | |
jsonpath = sys.argv[1] | |
csvpath = sys.argv[2] | |
jsonfile = open(jsonpath, "r") | |
csvfile = open(csvpath, "w") | |
csvwriter = csv.writer(csvfile) | |
csvwriter.writerow(["from", "to", "date", "text"]) | |
for item in jsonfile: | |
msg = json.loads(item) | |
csvwriter.writerow([ | |
msg["from"].get("print_name", "unknown"), | |
msg["to"].get("print_name", "unknown"), | |
get_isodate(msg), | |
msg.get("text", "no text") | |
]) | |
jsonfile.close() | |
csvfile.close() | |
if __name__ == "__main__": | |
main() |
I added ID fields, but it is all messed up, why?
csvwriter.writerow(["from", "id from", "to", "id to", "date", "text"])
for item in jsonfile:
msg = json.loads(item)
csvwriter.writerow([
msg["from"].get("username", "unknown"),
msg["from"].get("peer_id", "unknown"),
msg["to"].get("username", "unknown"),
msg["to"].get("peer_id", "unknown"),
get_isodate(msg),
msg.get("text", "no text")
])
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How to replace \n with " " in text messages in your script?