Created
December 5, 2020 10:13
-
-
Save ilimugur/a5c440721fdb4126ec7bf2fe78592a95 to your computer and use it in GitHub Desktop.
Short&crude script to help migrate Facebook Notes (sunsetting) to Wordpress. Download the data for your page/profile from Facebook, find the notes.json included, update the parameters in the script (hint: search for TODO) and run it. Then, just import the CSV output to your Wordpress website/blog. (Disclaimer: Does NOT handle styling or cover ph…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
from datetime import datetime | |
import json | |
def processInput(filePath): | |
with open(filePath, 'r') as f: | |
deserializedJSON = json.load(f) | |
notesData = deserializedJSON['notes'] | |
return notesData | |
def getDiagnostics(notes): | |
numNotes = len(notes) | |
print("Found " + str(numNotes) + " notes.") | |
numEmptyTitleWithEmptyText = 0 | |
numEmptyTitleWithNonemptyText = 0 | |
numNonemptyTitleWithEmptyText = 0 | |
numNonemptyTitleWithNonemptyText = 0 | |
numEmptyCoverPhotoWithEmptyTitle = 0 | |
numEmptyCoverPhotoWithNonemptyTitle = 0 | |
numNonemptyCoverPhotoWithEmptyTitle = 0 | |
numNonemptyCoverPhotoWithNonemptyTitle = 0 | |
maxTextLength = 0 | |
minTextLength = 100000 | |
lenDict = {} | |
for dtNote, note in notes: | |
if len(note['title']) == 0: | |
if len(note['text']) == 0: | |
numEmptyTitleWithEmptyText += 1 | |
else: | |
numEmptyTitleWithNonemptyText += 1 | |
if 'cover_photo' in note and 'uri' in note['cover_photo']: | |
numNonemptyCoverPhotoWithEmptyTitle += 1 | |
else: | |
numEmptyCoverPhotoWithEmptyTitle += 1 | |
else: | |
if len(note['text']) == 0: | |
numNonemptyTitleWithEmptyText += 1 | |
else: | |
numNonemptyTitleWithNonemptyText += 1 | |
if 'cover_photo' in note and 'uri' in note['cover_photo']: | |
numNonemptyCoverPhotoWithNonemptyTitle += 1 | |
else: | |
numEmptyCoverPhotoWithNonemptyTitle += 1 | |
maxTextLength = max(maxTextLength, len(note['text'])) | |
if len(note['text']) > 0: | |
minTextLength = min(minTextLength, len(note['text'])) | |
if len(note['text']) in lenDict: | |
lenDict[ len(note['text']) ].append(note['created_timestamp']) | |
else: | |
lenDict[ len(note['text']) ] = [note['created_timestamp']] | |
print("Found " + str(numEmptyTitleWithEmptyText) + " notes with empty title AND text.") | |
print("Found " + str(numEmptyTitleWithNonemptyText) + " notes with empty title BUT nonempty text.") | |
print("Found " + str(numNonemptyTitleWithEmptyText) + " notes with nonempty title BUT empty text.") | |
print("Found " + str(numNonemptyTitleWithNonemptyText) + " notes with nonempty title AND nonempty text.") | |
print("Longest text length is: " + str(maxTextLength)) | |
print("Shortest text length is: " + str(minTextLength)) | |
print("Found " + str(numEmptyCoverPhotoWithEmptyTitle) + " notes with empty cover photo AND title.") | |
print("Found " + str(numEmptyCoverPhotoWithNonemptyTitle) + " notes with empty cover photo BUT nonempty title.") | |
print("Found " + str(numNonemptyCoverPhotoWithEmptyTitle) + " notes with nonempty cover photo BUT empty title.") | |
print("Found " + str(numNonemptyCoverPhotoWithNonemptyTitle) + " notes with nonempty cover photo AND nonempty title.") | |
def parseInput(notesInput, authorUsername, postType, postStatus): | |
notes = [] | |
notesInputForDiagnostic = [] | |
for noteInput in notesInput: | |
timestamp = noteInput['created_timestamp'] | |
title = noteInput['title'] | |
text = noteInput['text'] | |
# Datetime processing and formatting | |
timestampDatetime = datetime.fromtimestamp(timestamp) | |
# WARNING: Timestamp provided by Facebook pages' timestamp data | |
# does not seem to be in GMT/UTC. It may be a localized timestamp | |
# based on the local timezone of the page/profile account. | |
# This part needs updating if your local timezone differs from | |
# the timezone of the system to which you'll feed this data. | |
postDateString = timestampDatetime.strftime('%Y-%m-%d %H:%M:%S') | |
# Title processing | |
titleString = title.encode('latin1').decode('utf8') | |
# Text processing | |
contentString = text.encode('latin1').decode('utf8') | |
noteCSVData = {'post_author': authorUsername, 'post_date': postDateString, | |
'post_type': postType, 'post_status': postStatus, | |
'post_title': titleString, 'post_content': contentString} | |
notes.append((timestampDatetime, noteCSVData)) | |
notesInputForDiagnostic.append((timestampDatetime, noteInput)) | |
notes.sort() | |
notesInputForDiagnostic.sort(key=(lambda noteInfo : noteInfo[0])) | |
getDiagnostics(notesInputForDiagnostic) | |
return notes | |
def migrateToCSV(notes, outputFilePath): | |
fieldnames = ['post_author', 'post_date', 'post_type', 'post_status', 'post_title', 'post_content'] | |
with open(outputFilePath, mode='w') as outputFile: | |
dictWriter = csv.DictWriter(outputFile, | |
fieldnames=fieldnames, | |
delimiter = ',', | |
quotechar = '"', | |
quoting = csv.QUOTE_NONNUMERIC) | |
dictWriter.writeheader() | |
for note in notes: | |
dictWriter.writerow(note[1]) | |
notesFilePath = './notes.json' # TODO: Change it to the path of your notes.json file | |
authorUsername = 'WPAuthorUsername' # TODO: Change it to Wordpress username for the author of the posts | |
postType = 'post' | |
postStatus = 'publish' | |
outputFilePath = 'outputFacebook.csv' # TODO: Change it to the path of your desired output file location | |
notesInput = processInput(notesFilePath) | |
notes = parseInput(notesInput, authorUsername, postType, postStatus) | |
migrateToCSV(notes, outputFilePath) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment