Skip to content

Instantly share code, notes, and snippets.

def create_lexicon():
lexicon = [] #create an empty list
with open('pos.txt', 'r') as f: #as we are already in the folder where file is stored, 'r' is used as we reading the file
lines = f.readlines() #read all the lines
for line in lines:
line = line.lower() #convert all the lines into lowercase letters
line = word_tokenize(line) #split the sentence into words
lexicon += line #add each word to lexicon
#repeat the same process with negative examples
with open('neg.txt', 'r') as f:
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
!mkdir -p drive
!google-drive-ocamlfuse drive
import os
os.chdir('drive/google_colab/sentiment classification on 10k samples')
import tensorflow as tf
import numpy as np
import nltk
nltk.download()
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
import random
import pickle
from collections import Counter
lemmatizer = WordNetLemmatizer() #for ease of writting
hm_lines = 100000 #maximum number of lines that we are processing
def feature_vectors(input_file_name,lexicon,classification): #classification is [0, 1] for positive and [1, 0] for negative
#it is similar to 0 for negative and 1 for positives
featureset = [] #creating empty list
with open(input_file_name,'r') as f:
contents = f.readlines()
for l in contents[:hm_lines]: #number of lines we need to process
current_words = word_tokenize(l.lower()) #converting sentence to lowercase and then splitting it to words
current_words = [lemmatizer.lemmatize(i) for i in current_words]
def create_feature_sets_and_labels(test_size = 0.1): #separate the data into training and testing
#test size is the size of the testing data
lexicon = create_lexicon()
features = []
features += feature_vectors('pos.txt',lexicon,[1,0])
features += feature_vector('neg.txt',lexicon,[0,1])
random.shuffle(features) #to shuffle all the feature vectors
features = np.array(features)
testing_size = int(test_size*len(features)) # testing size will be .1 of the total data
# extract features from each photo in the directory
def extract_features(directory):
# load the model
model = Encoder()
# model.to(device)
model.eval()
# extract features from each photo
features = dict()
for i, name in enumerate(listdir(directory)):
# load an image from file
class Encoder(nn.Module):
"""
Encodes the input image to a vector.
# """
def __init__(self):
super(Encoder, self).__init__()
vgg = models.vgg16(pretrained=True)
model = torch.nn.Sequential()
class Encoder(nn.Module):
"""
Encodes the input image to a vector.
# """
def __init__(self):
super(Encoder, self).__init__()
vgg = models.vgg16(pretrained=True)
model = torch.nn.Sequential()