Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import gzip
import sys
import logging
print("BEGIN NOTEBOOK")
print("OPENING GZIP FILE")
#sys.stdout.write()
with gzip.open ('/mnt/data/rajiv/AmazonProductReviewDataset/all.txt.gz' , 'rt') as f:
file_content = f.read()
print("FILE READ")
## LIST OF SEPERATED SENTENCES
b = str(file_content).split('review/text')
texts = " "
for i in range(len(b)):
if i == 0:
continue
texts = texts + '\n' + b[i].split('\\n')[0]
with open('/home/rajivratn/laiba/AmazonReviews/Amazon_Reviews_Seperated.txt', 'w') as f:
f.write(texts)
print("FILE WRITTEN")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.