This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with open(linksPath, 'r') as file: | |
WebScrap.urls = file.readlines() | |
urlFilter = re.compile( | |
r'^(?:http)s?://' # http:// or https:// | |
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' | |
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' #ip | |
r'(?:/?|[/?]\S+)$', re.IGNORECASE) | |
WebScrap.urls = [url.strip() for url in WebScrap.urls if urlFilter.match(url.strip())] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
content = [] | |
for url in WebScrap.urls: | |
try: | |
hdr = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} | |
req = urllib2.Request(url, headers=hdr) | |
page = urllib2.urlopen(req) | |
soup = BeautifulSoup(page.read(), "lxml") | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib2 | |
from bs4 import BeautifulSoup | |
import re | |
import traceback | |
import httplib | |
class WebScrap: | |
urls = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import the TextBlob package | |
from textblob import TextBlob | |
# A simple sentiment analysis test | |
# Create a TextBlob from a string or a text | |
test_blob = TextBlob("This is an awesome day!") | |
# Extract sentiment from blob | |
print(test_blob.sentiment) | |
# Printed: Sentiment(polarity=1.0, subjectivity=1.0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import the TextBlob package | |
from textblob import TextBlob | |
# A negative text | |
negative_blob = TextBlob("This is an awful day!") | |
# Extract sentiment from blob | |
print(negative_blob.sentiment) | |
# Print: Sentiment(polarity=-1.0, subjectivity=1.0) | |
# A neutral text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Another neutral text | |
neutral_blob = TextBlob("My day is neither positive nor negative") | |
# Extract sentiment from blob | |
print(neutral_blob.sentiment) | |
#Print: Sentiment(polarity=-0.03636363636363636, subjectivity=0.4727272727272727) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
import numpy as np | |
from os import listdir | |
from os.path import isfile, join | |
import os | |
# Function to unpickle the dataset | |
def unpickle_all_data(directory): | |
# Initialize the variables |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.utils import np_utils | |
# Training and validation files | |
files = ['training/train-y', 'training/train-x', | |
'validation/test-y', 'validation/test-x'] | |
# Load training labels | |
with open(input_path+files[0], 'rb') as lbpath: | |
y_train = pickle.load(lbpath, encoding='bytes') | |
# Load training samples | |
with open(input_path+files[1], 'rb') as imgpath: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import keras | |
from keras.models import Sequential | |
from keras.utils import np_utils | |
from keras.preprocessing.image import ImageDataGenerator | |
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization | |
from keras.layers import Conv2D, MaxPooling2D | |
from keras.datasets import cifar10 | |
from keras import regularizers | |
from keras.callbacks import LearningRateScheduler | |
import numpy as np |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#data augmentation | |
datagen = ImageDataGenerator( | |
featurewise_center=False, # set input mean to 0 over the dataset | |
samplewise_center=False, # set each sample mean to 0 | |
featurewise_std_normalization=False, # divide inputs by std of the dataset | |
samplewise_std_normalization=False, # divide each input by its std | |
zca_whitening=False, # apply ZCA whitening | |
rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) | |
width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) | |
height_shift_range=0.1, # randomly shift images vertically (fraction of total height) |
OlderNewer