Skip to content

Instantly share code, notes, and snippets.

@jgabriellima
Forked from spicyramen/to_pickle.py
Created March 19, 2018 15:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jgabriellima/25756844b39e29808cbb80a0ce36dff8 to your computer and use it in GitHub Desktop.
Save jgabriellima/25756844b39e29808cbb80a0ce36dff8 to your computer and use it in GitHub Desktop.
from nltk.tokenize import word_tokenize
import pickle
import pprint
import json
"""
(heads, descs, keywords) = ([headline], [description], )
"""
DATA_FILE = 'data/signalmedia-1m.jsonl.test'
PICKLE_FILE = 'data/tokens.pkl'
def write_to_pickle(filename, data):
with open(filename, 'wb') as f:
pickle.dump(data, f)
def read_from_pickle(filename):
with open(filename, 'r') as f:
return pickle.load(f)
def get_json_data(filename):
heads = []
descs = []
HEADER = "title"
DESCRIPTION = "content"
with open(filename, 'r') as json_data:
for json_object in json_data:
heads.append(json.loads(json_object)[HEADER])
descs.append(json.loads(json_object)[DESCRIPTION])
return heads, descs, None
data = get_json_data(DATA_FILE)
write_to_pickle(PICKLE_FILE, data)
data = read_from_pickle(PICKLE_FILE)
pprint.pprint(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment