Skip to content

Instantly share code, notes, and snippets.

@ilimugur
Created December 5, 2020 10:13
Show Gist options
  • Save ilimugur/a5c440721fdb4126ec7bf2fe78592a95 to your computer and use it in GitHub Desktop.
Save ilimugur/a5c440721fdb4126ec7bf2fe78592a95 to your computer and use it in GitHub Desktop.
Short&crude script to help migrate Facebook Notes (sunsetting) to Wordpress. Download the data for your page/profile from Facebook, find the notes.json included, update the parameters in the script (hint: search for TODO) and run it. Then, just import the CSV output to your Wordpress website/blog. (Disclaimer: Does NOT handle styling or cover ph…
import csv
from datetime import datetime
import json
def processInput(filePath):
with open(filePath, 'r') as f:
deserializedJSON = json.load(f)
notesData = deserializedJSON['notes']
return notesData
def getDiagnostics(notes):
numNotes = len(notes)
print("Found " + str(numNotes) + " notes.")
numEmptyTitleWithEmptyText = 0
numEmptyTitleWithNonemptyText = 0
numNonemptyTitleWithEmptyText = 0
numNonemptyTitleWithNonemptyText = 0
numEmptyCoverPhotoWithEmptyTitle = 0
numEmptyCoverPhotoWithNonemptyTitle = 0
numNonemptyCoverPhotoWithEmptyTitle = 0
numNonemptyCoverPhotoWithNonemptyTitle = 0
maxTextLength = 0
minTextLength = 100000
lenDict = {}
for dtNote, note in notes:
if len(note['title']) == 0:
if len(note['text']) == 0:
numEmptyTitleWithEmptyText += 1
else:
numEmptyTitleWithNonemptyText += 1
if 'cover_photo' in note and 'uri' in note['cover_photo']:
numNonemptyCoverPhotoWithEmptyTitle += 1
else:
numEmptyCoverPhotoWithEmptyTitle += 1
else:
if len(note['text']) == 0:
numNonemptyTitleWithEmptyText += 1
else:
numNonemptyTitleWithNonemptyText += 1
if 'cover_photo' in note and 'uri' in note['cover_photo']:
numNonemptyCoverPhotoWithNonemptyTitle += 1
else:
numEmptyCoverPhotoWithNonemptyTitle += 1
maxTextLength = max(maxTextLength, len(note['text']))
if len(note['text']) > 0:
minTextLength = min(minTextLength, len(note['text']))
if len(note['text']) in lenDict:
lenDict[ len(note['text']) ].append(note['created_timestamp'])
else:
lenDict[ len(note['text']) ] = [note['created_timestamp']]
print("Found " + str(numEmptyTitleWithEmptyText) + " notes with empty title AND text.")
print("Found " + str(numEmptyTitleWithNonemptyText) + " notes with empty title BUT nonempty text.")
print("Found " + str(numNonemptyTitleWithEmptyText) + " notes with nonempty title BUT empty text.")
print("Found " + str(numNonemptyTitleWithNonemptyText) + " notes with nonempty title AND nonempty text.")
print("Longest text length is: " + str(maxTextLength))
print("Shortest text length is: " + str(minTextLength))
print("Found " + str(numEmptyCoverPhotoWithEmptyTitle) + " notes with empty cover photo AND title.")
print("Found " + str(numEmptyCoverPhotoWithNonemptyTitle) + " notes with empty cover photo BUT nonempty title.")
print("Found " + str(numNonemptyCoverPhotoWithEmptyTitle) + " notes with nonempty cover photo BUT empty title.")
print("Found " + str(numNonemptyCoverPhotoWithNonemptyTitle) + " notes with nonempty cover photo AND nonempty title.")
def parseInput(notesInput, authorUsername, postType, postStatus):
notes = []
notesInputForDiagnostic = []
for noteInput in notesInput:
timestamp = noteInput['created_timestamp']
title = noteInput['title']
text = noteInput['text']
# Datetime processing and formatting
timestampDatetime = datetime.fromtimestamp(timestamp)
# WARNING: Timestamp provided by Facebook pages' timestamp data
# does not seem to be in GMT/UTC. It may be a localized timestamp
# based on the local timezone of the page/profile account.
# This part needs updating if your local timezone differs from
# the timezone of the system to which you'll feed this data.
postDateString = timestampDatetime.strftime('%Y-%m-%d %H:%M:%S')
# Title processing
titleString = title.encode('latin1').decode('utf8')
# Text processing
contentString = text.encode('latin1').decode('utf8')
noteCSVData = {'post_author': authorUsername, 'post_date': postDateString,
'post_type': postType, 'post_status': postStatus,
'post_title': titleString, 'post_content': contentString}
notes.append((timestampDatetime, noteCSVData))
notesInputForDiagnostic.append((timestampDatetime, noteInput))
notes.sort()
notesInputForDiagnostic.sort(key=(lambda noteInfo : noteInfo[0]))
getDiagnostics(notesInputForDiagnostic)
return notes
def migrateToCSV(notes, outputFilePath):
fieldnames = ['post_author', 'post_date', 'post_type', 'post_status', 'post_title', 'post_content']
with open(outputFilePath, mode='w') as outputFile:
dictWriter = csv.DictWriter(outputFile,
fieldnames=fieldnames,
delimiter = ',',
quotechar = '"',
quoting = csv.QUOTE_NONNUMERIC)
dictWriter.writeheader()
for note in notes:
dictWriter.writerow(note[1])
notesFilePath = './notes.json' # TODO: Change it to the path of your notes.json file
authorUsername = 'WPAuthorUsername' # TODO: Change it to Wordpress username for the author of the posts
postType = 'post'
postStatus = 'publish'
outputFilePath = 'outputFacebook.csv' # TODO: Change it to the path of your desired output file location
notesInput = processInput(notesFilePath)
notes = parseInput(notesInput, authorUsername, postType, postStatus)
migrateToCSV(notes, outputFilePath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment