Akramz/clean_process_images.py

## clean_process_images.py
import cv2
import os
import numpy as np
import pandas as pd
from sys import exit
from collections import Counter
from sklearn import preprocessing
from sklearn.cross_validation import train_test_split
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Activation
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.utils.visualize_util import plot

# get images and dates
dates_images = []

# conditions
conditions = []

# prepare the conditions data
tangier 		   										 = pd.read_csv('/home/akram/Dropbox/DataSets/airports_weather/GMTT-Tanger.csv')
tangier 		   										 = tangier[['Conditions', 'DateUTC']]
tangier['Conditions'] 									 = tangier['Conditions'].fillna(method='ffill')
tangier['DateUTC'] 										 = pd.to_datetime(tangier.DateUTC, format='%Y-%m-%d %H:%M:%S')

# clean conditions
tangier['Conditions'] = tangier['Conditions'].fillna(method='ffill')
tangier.ix[tangier['Conditions']=='nan', 'Conditions'] = '-9999'
tangier.ix[tangier['Conditions']=='Unknown', 'Conditions'] = '-9999'
tangier.ix[tangier['Conditions'].isin(['Scattered Clouds', 'Partly Cloudy', 'Clear']), 'Conditions'] = 'Clear'
tangier.ix[tangier['Conditions'].isin(['Mostly Cloudy', 'Overcast', 'Funnel Cloud']), 'Conditions'] = 'Cloudy'
tangier.ix[tangier['Conditions'].isin(['Mist', 'Light Mist']), 'Conditions'] = 'Mist'
tangier.ix[tangier['Conditions'].isin(['Light Freezing Rain', 'Light Rain', 'Unknown Precipitation']), 'Conditions'] = 'Light Rain'
tangier.ix[tangier['Conditions'].isin(['Rain Showers', 'Heavy Rain', 'Light Rain Showers', 'Heavy Rain Showers', 'Freezing Rain']), 'Conditions'] = 'Rain'
tangier.ix[tangier['Conditions'].isin(['Heavy Thunderstorm', 'Heavy Thunderstorms with Hail', 'Thunderstorm', 'Light Thunderstorm', 'Thunderstorms and Rain', 'Light Thunderstorms and Rain', 'Heavy Thunderstorms and Rain', 'Thunderstorms with Hail']), 'Conditions'] = 'Thunderstorm'
tangier.ix[tangier['Conditions'].isin(['Light Hail', 'Heavy Small Hail', 'Heavy Hail Showers', 'Light Thunderstorms with Hail', 'Light Small Hail Showers', 'Light Hail Showers', 'Hail Showers', 'Hail', 'Small Hail']), 'Conditions'] = 'Hail'
tangier.ix[tangier['Conditions'].isin(['Fog', 'Light Fog', 'Partial Fog', 'Patches of Fog', 'Heavy Fog', 'Shallow Fog', 'Light Freezing Fog']), 'Conditions'] = 'Fog'
tangier.ix[tangier['Conditions'].isin(['Low Drifting Widespread Dust', 'Widespread Dust', 'Dust Whirls']), 'Conditions'] = 'Dust'
tangier.ix[tangier['Conditions'].isin(['Blowing Sand', 'Sand', 'Light Sand', 'Sandstorm', 'Light Sandstorm', 'Low Drifting Sand']), 'Conditions'] = 'Sand'
tangier.ix[tangier['Conditions'].isin(['Light Snow', 'Snow Grains', 'Light Snow Grains', 'Snow', 'Low Drifting Snow', 'Light Snow Showers', 'Heavy Snow', 'Light Thunderstorms and Snow']), 'Conditions'] = 'Snow'
tangier.ix[tangier['Conditions'].isin(['Ice Pellets', 'Ice Crystals', 'Light Ice Pellets']), 'Conditions'] = 'Ice'
tangier.ix[tangier['Conditions'].isin(['Light Drizzle', 'Drizzle', 'Light Freezing Drizzle', 'Heavy Drizzle', 'Heavy Freezing Drizzle']), 'Conditions'] = 'Drizzle'
tangier.ix[tangier['Conditions'].isin(['Light Haze', 'Heavy Haze']), 'Conditions'] = 'Haze'
tangier['Conditions'] = tangier['Conditions'].replace(to_replace='-9999', method='ffill')

# encode conditions
encoder = preprocessing.LabelEncoder()
encoder.fit(tangier.Conditions)

# transform conditions into numbers
tangier['E_Conditions'] = encoder.transform(tangier.Conditions.values)
del tangier['Conditions']

# get dates and images
j = 0
for i in os.listdir(os.getcwd()):
	if j > 99:
		break
	j = j + 1
	if i.endswith(".jpeg"):
		try:
			#print "processing " + i
			img        = cv2.imread(i, 0)
			dates_images.append([i[:-5], img])
			del img
		except:
			print 'BAD IMAGE'
		continue

# the real lists we need
dates = []
X = []
y = []

# fill conditions
for date_image in dates_images:
	#print 'Yo!'
	# get what we need
	year  = int(date_image[0].split("-")[0])
	month = int(date_image[0].split("-")[1])
	day   = int(date_image[0].split("-")[2])
	hour  = int(date_image[0].split("-")[3])

	# now let's get the value we want of the condition
	solution = tangier.loc[(tangier.DateUTC.dt.year == year) & (tangier.DateUTC.dt.month == month) & (tangier.DateUTC.dt.day == day) & (tangier.DateUTC.dt.hour == hour)]
	if solution.empty:
		continue
	else:
		dates.append(date_image[0])
		X.append(date_image[1])
		y.append(Counter(list(solution.E_Conditions)).most_common(1)[0][0])
		#conditions.append([date_image[0], Counter(list(solution.Conditions)).most_common(1)[0][0], date_image[1]])
	del solution

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_test, y_train, y_test = np.array(X_train), np.array(X_test), np.array(y_train), np.array(y_test)

# get some useful numbers
n_train, height, width = X_train.shape
n_test, _, _ = X_test.shape

# we have to preprocess the data into the right form
X_train = X_train.reshape(n_train, 1, height, width).astype('float32')
X_test = X_test.reshape(n_test, 1, height, width).astype('float32')

# normalize from [0, 255] to [0, 1]
X_train /= 255
X_test /= 255

# 17 types of conditions
n_classes = 17

y_train = to_categorical(y_train, n_classes)
y_test = to_categorical(y_test, n_classes)

# to the model
model = Sequential()

# number of convolutional filters
n_filters = 32

# convolution filter size
# i.e. we will use a n_conv x n_conv filter
n_conv = 3

# pooling window size
# i.e. we will use a n_pool x n_pool pooling window
n_pool = 2

# adding the layers
model.add(Convolution2D(
        n_filters, n_conv, n_conv,

        # apply the filter to only full parts of the image
        # (i.e. do not "spill over" the border)
        # this is called a narrow convolution
        border_mode='valid',

        # we have a 28x28 single channel (grayscale) image
        # so the input shape should be (1, 28, 28)
        input_shape=(1, height, width)
))
model.add(Activation('relu'))

model.add(Convolution2D(n_filters, n_conv, n_conv))
model.add(Activation('relu'))

# then we apply pooling to summarize the features
# extracted thus far
model.add(MaxPooling2D(pool_size=(n_pool, n_pool)))

model.add(Dropout(0.25))

# flatten the data for the 1D layers
model.add(Flatten())

# Dense(n_outputs)
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dropout(0.5))

# the softmax output layer gives us a probablity for each class
model.add(Dense(n_classes))
model.add(Activation('softmax'))

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

plot(model, to_file='/home/akram/Convolutional_model.png', show_shapes=True)

exit(15)

# how many examples to look at during each training iteration
batch_size = 128

# how many times to run through the full set of examples
n_epochs = 25

# the training may be slow depending on your computer
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=n_epochs, validation_data=(X_test, y_test))

loss, accuracy = model.evaluate(X_test, y_test)
print('loss:', loss)
print('accuracy:', accuracy)
	import cv2
	import os
	import numpy as np
	import pandas as pd
	from sys import exit
	from collections import Counter
	from sklearn import preprocessing
	from sklearn.cross_validation import train_test_split
	from keras.utils.np_utils import to_categorical
	from keras.models import Sequential
	from keras.layers import Activation
	from keras.layers.convolutional import Convolution2D, MaxPooling2D
	from keras.layers import Dropout, Flatten, Dense
	from keras.utils.visualize_util import plot

	# get images and dates
	dates_images = []

	# conditions
	conditions = []

	# prepare the conditions data
	tangier = pd.read_csv('/home/akram/Dropbox/DataSets/airports_weather/GMTT-Tanger.csv')
	tangier = tangier[['Conditions', 'DateUTC']]
	tangier['Conditions'] = tangier['Conditions'].fillna(method='ffill')
	tangier['DateUTC'] = pd.to_datetime(tangier.DateUTC, format='%Y-%m-%d %H:%M:%S')

	# clean conditions
	tangier['Conditions'] = tangier['Conditions'].fillna(method='ffill')
	tangier.ix[tangier['Conditions']=='nan', 'Conditions'] = '-9999'
	tangier.ix[tangier['Conditions']=='Unknown', 'Conditions'] = '-9999'
	tangier.ix[tangier['Conditions'].isin(['Scattered Clouds', 'Partly Cloudy', 'Clear']), 'Conditions'] = 'Clear'
	tangier.ix[tangier['Conditions'].isin(['Mostly Cloudy', 'Overcast', 'Funnel Cloud']), 'Conditions'] = 'Cloudy'
	tangier.ix[tangier['Conditions'].isin(['Mist', 'Light Mist']), 'Conditions'] = 'Mist'
	tangier.ix[tangier['Conditions'].isin(['Light Freezing Rain', 'Light Rain', 'Unknown Precipitation']), 'Conditions'] = 'Light Rain'
	tangier.ix[tangier['Conditions'].isin(['Rain Showers', 'Heavy Rain', 'Light Rain Showers', 'Heavy Rain Showers', 'Freezing Rain']), 'Conditions'] = 'Rain'
	tangier.ix[tangier['Conditions'].isin(['Heavy Thunderstorm', 'Heavy Thunderstorms with Hail', 'Thunderstorm', 'Light Thunderstorm', 'Thunderstorms and Rain', 'Light Thunderstorms and Rain', 'Heavy Thunderstorms and Rain', 'Thunderstorms with Hail']), 'Conditions'] = 'Thunderstorm'
	tangier.ix[tangier['Conditions'].isin(['Light Hail', 'Heavy Small Hail', 'Heavy Hail Showers', 'Light Thunderstorms with Hail', 'Light Small Hail Showers', 'Light Hail Showers', 'Hail Showers', 'Hail', 'Small Hail']), 'Conditions'] = 'Hail'
	tangier.ix[tangier['Conditions'].isin(['Fog', 'Light Fog', 'Partial Fog', 'Patches of Fog', 'Heavy Fog', 'Shallow Fog', 'Light Freezing Fog']), 'Conditions'] = 'Fog'
	tangier.ix[tangier['Conditions'].isin(['Low Drifting Widespread Dust', 'Widespread Dust', 'Dust Whirls']), 'Conditions'] = 'Dust'
	tangier.ix[tangier['Conditions'].isin(['Blowing Sand', 'Sand', 'Light Sand', 'Sandstorm', 'Light Sandstorm', 'Low Drifting Sand']), 'Conditions'] = 'Sand'
	tangier.ix[tangier['Conditions'].isin(['Light Snow', 'Snow Grains', 'Light Snow Grains', 'Snow', 'Low Drifting Snow', 'Light Snow Showers', 'Heavy Snow', 'Light Thunderstorms and Snow']), 'Conditions'] = 'Snow'
	tangier.ix[tangier['Conditions'].isin(['Ice Pellets', 'Ice Crystals', 'Light Ice Pellets']), 'Conditions'] = 'Ice'
	tangier.ix[tangier['Conditions'].isin(['Light Drizzle', 'Drizzle', 'Light Freezing Drizzle', 'Heavy Drizzle', 'Heavy Freezing Drizzle']), 'Conditions'] = 'Drizzle'
	tangier.ix[tangier['Conditions'].isin(['Light Haze', 'Heavy Haze']), 'Conditions'] = 'Haze'
	tangier['Conditions'] = tangier['Conditions'].replace(to_replace='-9999', method='ffill')

	# encode conditions
	encoder = preprocessing.LabelEncoder()
	encoder.fit(tangier.Conditions)

	# transform conditions into numbers
	tangier['E_Conditions'] = encoder.transform(tangier.Conditions.values)
	del tangier['Conditions']

	# get dates and images
	j = 0
	for i in os.listdir(os.getcwd()):
	if j > 99:
	break
	j = j + 1
	if i.endswith(".jpeg"):
	try:
	#print "processing " + i
	img = cv2.imread(i, 0)
	dates_images.append([i[:-5], img])
	del img
	except:
	print 'BAD IMAGE'
	continue

	# the real lists we need
	dates = []
	X = []
	y = []

	# fill conditions
	for date_image in dates_images:
	#print 'Yo!'
	# get what we need
	year = int(date_image[0].split("-")[0])
	month = int(date_image[0].split("-")[1])
	day = int(date_image[0].split("-")[2])
	hour = int(date_image[0].split("-")[3])

	# now let's get the value we want of the condition
	solution = tangier.loc[(tangier.DateUTC.dt.year == year) & (tangier.DateUTC.dt.month == month) & (tangier.DateUTC.dt.day == day) & (tangier.DateUTC.dt.hour == hour)]
	if solution.empty:
	continue
	else:
	dates.append(date_image[0])
	X.append(date_image[1])
	y.append(Counter(list(solution.E_Conditions)).most_common(1)[0][0])
	#conditions.append([date_image[0], Counter(list(solution.Conditions)).most_common(1)[0][0], date_image[1]])
	del solution

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
	X_train, X_test, y_train, y_test = np.array(X_train), np.array(X_test), np.array(y_train), np.array(y_test)

	# get some useful numbers
	n_train, height, width = X_train.shape
	n_test, _, _ = X_test.shape

	# we have to preprocess the data into the right form
	X_train = X_train.reshape(n_train, 1, height, width).astype('float32')
	X_test = X_test.reshape(n_test, 1, height, width).astype('float32')

	# normalize from [0, 255] to [0, 1]
	X_train /= 255
	X_test /= 255

	# 17 types of conditions
	n_classes = 17

	y_train = to_categorical(y_train, n_classes)
	y_test = to_categorical(y_test, n_classes)

	# to the model
	model = Sequential()

	# number of convolutional filters
	n_filters = 32

	# convolution filter size
	# i.e. we will use a n_conv x n_conv filter
	n_conv = 3

	# pooling window size
	# i.e. we will use a n_pool x n_pool pooling window
	n_pool = 2

	# adding the layers
	model.add(Convolution2D(
	n_filters, n_conv, n_conv,

	# apply the filter to only full parts of the image
	# (i.e. do not "spill over" the border)
	# this is called a narrow convolution
	border_mode='valid',

	# we have a 28x28 single channel (grayscale) image
	# so the input shape should be (1, 28, 28)
	input_shape=(1, height, width)
	))
	model.add(Activation('relu'))

	model.add(Convolution2D(n_filters, n_conv, n_conv))
	model.add(Activation('relu'))

	# then we apply pooling to summarize the features
	# extracted thus far
	model.add(MaxPooling2D(pool_size=(n_pool, n_pool)))

	model.add(Dropout(0.25))

	# flatten the data for the 1D layers
	model.add(Flatten())

	# Dense(n_outputs)
	model.add(Dense(32))
	model.add(Activation('relu'))
	model.add(Dropout(0.5))

	# the softmax output layer gives us a probablity for each class
	model.add(Dense(n_classes))
	model.add(Activation('softmax'))

	model.compile(
	loss='categorical_crossentropy',
	optimizer='adam',
	metrics=['accuracy']
	)

	plot(model, to_file='/home/akram/Convolutional_model.png', show_shapes=True)

	exit(15)

	# how many examples to look at during each training iteration
	batch_size = 128

	# how many times to run through the full set of examples
	n_epochs = 25

	# the training may be slow depending on your computer
	model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=n_epochs, validation_data=(X_test, y_test))

	loss, accuracy = model.evaluate(X_test, y_test)
	print('loss:', loss)
	print('accuracy:', accuracy)