Created
May 4, 2019 11:36
-
-
Save haseebelahi/0ef3a52b89b6890e66290d006c94ac10 to your computer and use it in GitHub Desktop.
Facebook Data JSON to CSVs - Simple python code to convert data downloaded from Facebook in JSON to CSVs for easier use in Data Exploration
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
data_folders = ['2010-2013', '2014-2016', '2017-2017', '2018-2018', '2019-2019'] | |
posts_output = open('csvs/posts.csv', 'w') | |
comments_output = open('csvs/comments.csv', 'w') | |
location_output = open('csvs/locations.csv', 'w') | |
header = "timestamp,text" | |
posts_output.write(header + "\n") | |
comments_output.write(header + "\n") | |
location_output.write("timestamp,lat,long" + "\n") | |
post_count = 1 | |
comment_count = 1 | |
for folder in data_folders: | |
with open('facebook-haseebelaahi-' + folder + '/posts/your_posts_1.json') as f: | |
posts = json.load(f) | |
for post in posts: | |
try: | |
post_data = post['data'] | |
timestamp = post['timestamp'] | |
for item in post_data: | |
try: | |
post_text = item['post'] | |
post_text = post_text.replace('\n', ' ').replace('\r', ' ').replace(',', ' ') | |
if post_text.find('http') == -1: | |
posts_output.write(str(timestamp) + "," + post_text + "\n") | |
print("post# " + str(post_count) + "," + post_text) | |
post_count += 1 | |
except: | |
pass | |
except: | |
pass | |
print("******************************** COMMENTS ********************************") | |
with open('facebook-haseebelaahi-' + folder + '/comments/comments.json') as f: | |
comments = json.load(f) | |
for comment in comments['comments']: | |
try: | |
comment_data = comment['data'] | |
timestamp = comment['timestamp'] | |
for item in comment_data: | |
try: | |
comment_text = item['comment']['comment'] | |
comment_text = comment_text.replace('\n', ' ').replace('\r', ' ').replace(',', ' ') | |
if comment_text.find('http') == -1 and len(comment_text) > 1: | |
comments_output.write(str(timestamp) + "," + comment_text + "\n") | |
print("comment# " + str(comment_count) + "," + comment_text) | |
comment_count += 1 | |
except: | |
pass | |
except: | |
pass | |
try: | |
with open('facebook-haseebelaahi-' + folder + '/location/location_history.json') as f: | |
location = json.load(f) | |
for location in location['location_history']: | |
try: | |
location_data = location['coordinate'] | |
timestamp = location['creation_timestamp'] | |
location_output.write(str(timestamp) + "," + str(location_data['latitude']) + "," + str(location_data['longitude']) + "\n") | |
print(str(location_data['latitude']) + "," + str(location_data['longitude'])) | |
except: | |
pass | |
except Exception as e: | |
pass | |
posts_output.close() | |
comments_output.close() | |
location_output.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment