Skip to content

Instantly share code, notes, and snippets.

@Akramz
Created June 30, 2016 13:09
Show Gist options
  • Save Akramz/1b16a0bcf998dc7d14e7ae83b606c332 to your computer and use it in GitHub Desktop.
Save Akramz/1b16a0bcf998dc7d14e7ae83b606c332 to your computer and use it in GitHub Desktop.
import cv2
import os
import numpy as np
import pandas as pd
from sys import exit
from collections import Counter
from sklearn import preprocessing
from sklearn.cross_validation import train_test_split
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Activation
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.utils.visualize_util import plot
# get images and dates
dates_images = []
# conditions
conditions = []
# prepare the conditions data
tangier = pd.read_csv('/home/akram/Dropbox/DataSets/airports_weather/GMTT-Tanger.csv')
tangier = tangier[['Conditions', 'DateUTC']]
tangier['Conditions'] = tangier['Conditions'].fillna(method='ffill')
tangier['DateUTC'] = pd.to_datetime(tangier.DateUTC, format='%Y-%m-%d %H:%M:%S')
# clean conditions
tangier['Conditions'] = tangier['Conditions'].fillna(method='ffill')
tangier.ix[tangier['Conditions']=='nan', 'Conditions'] = '-9999'
tangier.ix[tangier['Conditions']=='Unknown', 'Conditions'] = '-9999'
tangier.ix[tangier['Conditions'].isin(['Scattered Clouds', 'Partly Cloudy', 'Clear']), 'Conditions'] = 'Clear'
tangier.ix[tangier['Conditions'].isin(['Mostly Cloudy', 'Overcast', 'Funnel Cloud']), 'Conditions'] = 'Cloudy'
tangier.ix[tangier['Conditions'].isin(['Mist', 'Light Mist']), 'Conditions'] = 'Mist'
tangier.ix[tangier['Conditions'].isin(['Light Freezing Rain', 'Light Rain', 'Unknown Precipitation']), 'Conditions'] = 'Light Rain'
tangier.ix[tangier['Conditions'].isin(['Rain Showers', 'Heavy Rain', 'Light Rain Showers', 'Heavy Rain Showers', 'Freezing Rain']), 'Conditions'] = 'Rain'
tangier.ix[tangier['Conditions'].isin(['Heavy Thunderstorm', 'Heavy Thunderstorms with Hail', 'Thunderstorm', 'Light Thunderstorm', 'Thunderstorms and Rain', 'Light Thunderstorms and Rain', 'Heavy Thunderstorms and Rain', 'Thunderstorms with Hail']), 'Conditions'] = 'Thunderstorm'
tangier.ix[tangier['Conditions'].isin(['Light Hail', 'Heavy Small Hail', 'Heavy Hail Showers', 'Light Thunderstorms with Hail', 'Light Small Hail Showers', 'Light Hail Showers', 'Hail Showers', 'Hail', 'Small Hail']), 'Conditions'] = 'Hail'
tangier.ix[tangier['Conditions'].isin(['Fog', 'Light Fog', 'Partial Fog', 'Patches of Fog', 'Heavy Fog', 'Shallow Fog', 'Light Freezing Fog']), 'Conditions'] = 'Fog'
tangier.ix[tangier['Conditions'].isin(['Low Drifting Widespread Dust', 'Widespread Dust', 'Dust Whirls']), 'Conditions'] = 'Dust'
tangier.ix[tangier['Conditions'].isin(['Blowing Sand', 'Sand', 'Light Sand', 'Sandstorm', 'Light Sandstorm', 'Low Drifting Sand']), 'Conditions'] = 'Sand'
tangier.ix[tangier['Conditions'].isin(['Light Snow', 'Snow Grains', 'Light Snow Grains', 'Snow', 'Low Drifting Snow', 'Light Snow Showers', 'Heavy Snow', 'Light Thunderstorms and Snow']), 'Conditions'] = 'Snow'
tangier.ix[tangier['Conditions'].isin(['Ice Pellets', 'Ice Crystals', 'Light Ice Pellets']), 'Conditions'] = 'Ice'
tangier.ix[tangier['Conditions'].isin(['Light Drizzle', 'Drizzle', 'Light Freezing Drizzle', 'Heavy Drizzle', 'Heavy Freezing Drizzle']), 'Conditions'] = 'Drizzle'
tangier.ix[tangier['Conditions'].isin(['Light Haze', 'Heavy Haze']), 'Conditions'] = 'Haze'
tangier['Conditions'] = tangier['Conditions'].replace(to_replace='-9999', method='ffill')
# encode conditions
encoder = preprocessing.LabelEncoder()
encoder.fit(tangier.Conditions)
# transform conditions into numbers
tangier['E_Conditions'] = encoder.transform(tangier.Conditions.values)
del tangier['Conditions']
# get dates and images
j = 0
for i in os.listdir(os.getcwd()):
if j > 99:
break
j = j + 1
if i.endswith(".jpeg"):
try:
#print "processing " + i
img = cv2.imread(i, 0)
dates_images.append([i[:-5], img])
del img
except:
print 'BAD IMAGE'
continue
# the real lists we need
dates = []
X = []
y = []
# fill conditions
for date_image in dates_images:
#print 'Yo!'
# get what we need
year = int(date_image[0].split("-")[0])
month = int(date_image[0].split("-")[1])
day = int(date_image[0].split("-")[2])
hour = int(date_image[0].split("-")[3])
# now let's get the value we want of the condition
solution = tangier.loc[(tangier.DateUTC.dt.year == year) & (tangier.DateUTC.dt.month == month) & (tangier.DateUTC.dt.day == day) & (tangier.DateUTC.dt.hour == hour)]
if solution.empty:
continue
else:
dates.append(date_image[0])
X.append(date_image[1])
y.append(Counter(list(solution.E_Conditions)).most_common(1)[0][0])
#conditions.append([date_image[0], Counter(list(solution.Conditions)).most_common(1)[0][0], date_image[1]])
del solution
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_test, y_train, y_test = np.array(X_train), np.array(X_test), np.array(y_train), np.array(y_test)
# get some useful numbers
n_train, height, width = X_train.shape
n_test, _, _ = X_test.shape
# we have to preprocess the data into the right form
X_train = X_train.reshape(n_train, 1, height, width).astype('float32')
X_test = X_test.reshape(n_test, 1, height, width).astype('float32')
# normalize from [0, 255] to [0, 1]
X_train /= 255
X_test /= 255
# 17 types of conditions
n_classes = 17
y_train = to_categorical(y_train, n_classes)
y_test = to_categorical(y_test, n_classes)
# to the model
model = Sequential()
# number of convolutional filters
n_filters = 32
# convolution filter size
# i.e. we will use a n_conv x n_conv filter
n_conv = 3
# pooling window size
# i.e. we will use a n_pool x n_pool pooling window
n_pool = 2
# adding the layers
model.add(Convolution2D(
n_filters, n_conv, n_conv,
# apply the filter to only full parts of the image
# (i.e. do not "spill over" the border)
# this is called a narrow convolution
border_mode='valid',
# we have a 28x28 single channel (grayscale) image
# so the input shape should be (1, 28, 28)
input_shape=(1, height, width)
))
model.add(Activation('relu'))
model.add(Convolution2D(n_filters, n_conv, n_conv))
model.add(Activation('relu'))
# then we apply pooling to summarize the features
# extracted thus far
model.add(MaxPooling2D(pool_size=(n_pool, n_pool)))
model.add(Dropout(0.25))
# flatten the data for the 1D layers
model.add(Flatten())
# Dense(n_outputs)
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dropout(0.5))
# the softmax output layer gives us a probablity for each class
model.add(Dense(n_classes))
model.add(Activation('softmax'))
model.compile(
loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
plot(model, to_file='/home/akram/Convolutional_model.png', show_shapes=True)
exit(15)
# how many examples to look at during each training iteration
batch_size = 128
# how many times to run through the full set of examples
n_epochs = 25
# the training may be slow depending on your computer
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=n_epochs, validation_data=(X_test, y_test))
loss, accuracy = model.evaluate(X_test, y_test)
print('loss:', loss)
print('accuracy:', accuracy)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment