Created
June 30, 2016 13:09
-
-
Save Akramz/1b16a0bcf998dc7d14e7ae83b606c332 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
import os | |
import numpy as np | |
import pandas as pd | |
from sys import exit | |
from collections import Counter | |
from sklearn import preprocessing | |
from sklearn.cross_validation import train_test_split | |
from keras.utils.np_utils import to_categorical | |
from keras.models import Sequential | |
from keras.layers import Activation | |
from keras.layers.convolutional import Convolution2D, MaxPooling2D | |
from keras.layers import Dropout, Flatten, Dense | |
from keras.utils.visualize_util import plot | |
# get images and dates | |
dates_images = [] | |
# conditions | |
conditions = [] | |
# prepare the conditions data | |
tangier = pd.read_csv('/home/akram/Dropbox/DataSets/airports_weather/GMTT-Tanger.csv') | |
tangier = tangier[['Conditions', 'DateUTC']] | |
tangier['Conditions'] = tangier['Conditions'].fillna(method='ffill') | |
tangier['DateUTC'] = pd.to_datetime(tangier.DateUTC, format='%Y-%m-%d %H:%M:%S') | |
# clean conditions | |
tangier['Conditions'] = tangier['Conditions'].fillna(method='ffill') | |
tangier.ix[tangier['Conditions']=='nan', 'Conditions'] = '-9999' | |
tangier.ix[tangier['Conditions']=='Unknown', 'Conditions'] = '-9999' | |
tangier.ix[tangier['Conditions'].isin(['Scattered Clouds', 'Partly Cloudy', 'Clear']), 'Conditions'] = 'Clear' | |
tangier.ix[tangier['Conditions'].isin(['Mostly Cloudy', 'Overcast', 'Funnel Cloud']), 'Conditions'] = 'Cloudy' | |
tangier.ix[tangier['Conditions'].isin(['Mist', 'Light Mist']), 'Conditions'] = 'Mist' | |
tangier.ix[tangier['Conditions'].isin(['Light Freezing Rain', 'Light Rain', 'Unknown Precipitation']), 'Conditions'] = 'Light Rain' | |
tangier.ix[tangier['Conditions'].isin(['Rain Showers', 'Heavy Rain', 'Light Rain Showers', 'Heavy Rain Showers', 'Freezing Rain']), 'Conditions'] = 'Rain' | |
tangier.ix[tangier['Conditions'].isin(['Heavy Thunderstorm', 'Heavy Thunderstorms with Hail', 'Thunderstorm', 'Light Thunderstorm', 'Thunderstorms and Rain', 'Light Thunderstorms and Rain', 'Heavy Thunderstorms and Rain', 'Thunderstorms with Hail']), 'Conditions'] = 'Thunderstorm' | |
tangier.ix[tangier['Conditions'].isin(['Light Hail', 'Heavy Small Hail', 'Heavy Hail Showers', 'Light Thunderstorms with Hail', 'Light Small Hail Showers', 'Light Hail Showers', 'Hail Showers', 'Hail', 'Small Hail']), 'Conditions'] = 'Hail' | |
tangier.ix[tangier['Conditions'].isin(['Fog', 'Light Fog', 'Partial Fog', 'Patches of Fog', 'Heavy Fog', 'Shallow Fog', 'Light Freezing Fog']), 'Conditions'] = 'Fog' | |
tangier.ix[tangier['Conditions'].isin(['Low Drifting Widespread Dust', 'Widespread Dust', 'Dust Whirls']), 'Conditions'] = 'Dust' | |
tangier.ix[tangier['Conditions'].isin(['Blowing Sand', 'Sand', 'Light Sand', 'Sandstorm', 'Light Sandstorm', 'Low Drifting Sand']), 'Conditions'] = 'Sand' | |
tangier.ix[tangier['Conditions'].isin(['Light Snow', 'Snow Grains', 'Light Snow Grains', 'Snow', 'Low Drifting Snow', 'Light Snow Showers', 'Heavy Snow', 'Light Thunderstorms and Snow']), 'Conditions'] = 'Snow' | |
tangier.ix[tangier['Conditions'].isin(['Ice Pellets', 'Ice Crystals', 'Light Ice Pellets']), 'Conditions'] = 'Ice' | |
tangier.ix[tangier['Conditions'].isin(['Light Drizzle', 'Drizzle', 'Light Freezing Drizzle', 'Heavy Drizzle', 'Heavy Freezing Drizzle']), 'Conditions'] = 'Drizzle' | |
tangier.ix[tangier['Conditions'].isin(['Light Haze', 'Heavy Haze']), 'Conditions'] = 'Haze' | |
tangier['Conditions'] = tangier['Conditions'].replace(to_replace='-9999', method='ffill') | |
# encode conditions | |
encoder = preprocessing.LabelEncoder() | |
encoder.fit(tangier.Conditions) | |
# transform conditions into numbers | |
tangier['E_Conditions'] = encoder.transform(tangier.Conditions.values) | |
del tangier['Conditions'] | |
# get dates and images | |
j = 0 | |
for i in os.listdir(os.getcwd()): | |
if j > 99: | |
break | |
j = j + 1 | |
if i.endswith(".jpeg"): | |
try: | |
#print "processing " + i | |
img = cv2.imread(i, 0) | |
dates_images.append([i[:-5], img]) | |
del img | |
except: | |
print 'BAD IMAGE' | |
continue | |
# the real lists we need | |
dates = [] | |
X = [] | |
y = [] | |
# fill conditions | |
for date_image in dates_images: | |
#print 'Yo!' | |
# get what we need | |
year = int(date_image[0].split("-")[0]) | |
month = int(date_image[0].split("-")[1]) | |
day = int(date_image[0].split("-")[2]) | |
hour = int(date_image[0].split("-")[3]) | |
# now let's get the value we want of the condition | |
solution = tangier.loc[(tangier.DateUTC.dt.year == year) & (tangier.DateUTC.dt.month == month) & (tangier.DateUTC.dt.day == day) & (tangier.DateUTC.dt.hour == hour)] | |
if solution.empty: | |
continue | |
else: | |
dates.append(date_image[0]) | |
X.append(date_image[1]) | |
y.append(Counter(list(solution.E_Conditions)).most_common(1)[0][0]) | |
#conditions.append([date_image[0], Counter(list(solution.Conditions)).most_common(1)[0][0], date_image[1]]) | |
del solution | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) | |
X_train, X_test, y_train, y_test = np.array(X_train), np.array(X_test), np.array(y_train), np.array(y_test) | |
# get some useful numbers | |
n_train, height, width = X_train.shape | |
n_test, _, _ = X_test.shape | |
# we have to preprocess the data into the right form | |
X_train = X_train.reshape(n_train, 1, height, width).astype('float32') | |
X_test = X_test.reshape(n_test, 1, height, width).astype('float32') | |
# normalize from [0, 255] to [0, 1] | |
X_train /= 255 | |
X_test /= 255 | |
# 17 types of conditions | |
n_classes = 17 | |
y_train = to_categorical(y_train, n_classes) | |
y_test = to_categorical(y_test, n_classes) | |
# to the model | |
model = Sequential() | |
# number of convolutional filters | |
n_filters = 32 | |
# convolution filter size | |
# i.e. we will use a n_conv x n_conv filter | |
n_conv = 3 | |
# pooling window size | |
# i.e. we will use a n_pool x n_pool pooling window | |
n_pool = 2 | |
# adding the layers | |
model.add(Convolution2D( | |
n_filters, n_conv, n_conv, | |
# apply the filter to only full parts of the image | |
# (i.e. do not "spill over" the border) | |
# this is called a narrow convolution | |
border_mode='valid', | |
# we have a 28x28 single channel (grayscale) image | |
# so the input shape should be (1, 28, 28) | |
input_shape=(1, height, width) | |
)) | |
model.add(Activation('relu')) | |
model.add(Convolution2D(n_filters, n_conv, n_conv)) | |
model.add(Activation('relu')) | |
# then we apply pooling to summarize the features | |
# extracted thus far | |
model.add(MaxPooling2D(pool_size=(n_pool, n_pool))) | |
model.add(Dropout(0.25)) | |
# flatten the data for the 1D layers | |
model.add(Flatten()) | |
# Dense(n_outputs) | |
model.add(Dense(32)) | |
model.add(Activation('relu')) | |
model.add(Dropout(0.5)) | |
# the softmax output layer gives us a probablity for each class | |
model.add(Dense(n_classes)) | |
model.add(Activation('softmax')) | |
model.compile( | |
loss='categorical_crossentropy', | |
optimizer='adam', | |
metrics=['accuracy'] | |
) | |
plot(model, to_file='/home/akram/Convolutional_model.png', show_shapes=True) | |
exit(15) | |
# how many examples to look at during each training iteration | |
batch_size = 128 | |
# how many times to run through the full set of examples | |
n_epochs = 25 | |
# the training may be slow depending on your computer | |
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=n_epochs, validation_data=(X_test, y_test)) | |
loss, accuracy = model.evaluate(X_test, y_test) | |
print('loss:', loss) | |
print('accuracy:', accuracy) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment