Last active
June 27, 2018 18:33
-
-
Save ravishchawla/0f2ed1a3ac85bc052f3cc1ad32c0951e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Read reviews from a JSON-formatted file into an array. | |
''' | |
lines = []; | |
num_pos = 0; num_neg = 0; num_total = 75000; | |
with open('data/review.json', 'r') as f: | |
for line in f: | |
if (len(lines) >= (num_total * 2)): | |
break; | |
json_info = json.loads(line); | |
if json_info['stars'] > 3: | |
if num_pos > num_total: | |
continue; | |
num_pos = num_pos + 1; | |
elif json_info['stars'] < 3: | |
if num_neg > num_total: | |
continue; | |
num_neg = num_neg + 1; | |
else: | |
continue; | |
lines.append(json.loads(line)); | |
''' | |
Separate line data into reviews and labels | |
''' | |
reviews = [line['text'] for line in lines]; | |
stars = [line['stars'] for line in lines]; | |
labels = ['1' if star > 3 else '0' for star in stars]; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment