Skip to content

Instantly share code, notes, and snippets.

@astariul
Created March 15, 2019 07:48
Show Gist options
  • Save astariul/fb60873c4d3630f1e3e9b873276ff945 to your computer and use it in GitHub Desktop.
Save astariul/fb60873c4d3630f1e3e9b873276ff945 to your computer and use it in GitHub Desktop.
import json
# Read entire file
posts = []
i = 1
j = 0
with open('tifu_all_tokenized_and_filtered.json', 'r') as fp:
for line in fp:
print("{} / 79,949".format(i))
i += 1
try:
x = json.loads(line)['tldr']
x = json.loads(line)['tldr_tokenized']
j += 1
y = i
except KeyError:
pass
posts.append(json.loads(line))
#print('=================')
# print(posts[-1]['selftext'].encode('utf-8'))
print('=================')
print(posts[-1]['selftext_without_tldr'].encode('utf-8'))
#print('=================')
#print(posts[-1]['title'].encode('utf-8'))
print('=================')
print(posts[-1]['trimmed_title'].encode('utf-8'))
try:
print(posts[-1]['tldr'].encode('utf-8'))
except:
pass
#print('=================')
#print(posts[-1]['id'])
input()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment