Skip to content

Instantly share code, notes, and snippets.

View FavioVazquez's full-sized avatar
💻
Changing the world :)

Favio André Vázquez FavioVazquez

💻
Changing the world :)
View GitHub Profile
!cp flower_photos/daisy/100080576_f52e8ee070_n.jpg flower_photos/sample/
!cp flower_photos/daisy/10140303196_b88d3d6cec.jpg flower_photos/sample/
!cp flower_photos/tulips/100930342_92e8746431_n.jpg flower_photos/sample/
import IPython.display as dp
# collect all .png files in ssample dir
fs = !ls flower_photos/sample/*.jpg
# create list of image objects
images = []
for ea in fs:
images.append(dp.Image(filename=ea, format='png'))
from sparkdl import readImages
# Read images using Spark
image_df = readImages("flower_photos/sample/")
from pyspark.ml.image import ImageSchema
from pyspark.sql.functions import lit
from sparkdl.image import imageIO
tulips_df = ImageSchema.readImages("flower_photos/tulips").withColumn("label", lit(1))
daisy_df = imageIO.readImagesWithCustomFn("flower_photos/daisy", decode_f=imageIO.PIL_decode).withColumn("label", lit(0))
tulips_train, tulips_test, _ = tulips_df.randomSplit([0.1, 0.05, 0.85]) # use larger training sets (e.g. [0.6, 0.4] for getting more images)
daisy_train, daisy_test, _ = daisy_df.randomSplit([0.1, 0.05, 0.85]) # use larger training sets (e.g. [0.6, 0.4] for getting more images)
train_df = tulips_train.unionAll(daisy_train)
test_df = tulips_test.unionAll(daisy_test)
from pyspark.ml.classification import LogisticRegression
from pyspark.ml import Pipeline
from sparkdl import DeepImageFeaturizer
featurizer = DeepImageFeaturizer(inputCol="image", outputCol="features", modelName="InceptionV3")
lr = LogisticRegression(maxIter=10, regParam=0.05, elasticNetParam=0.3, labelCol="label")
p = Pipeline(stages=[featurizer, lr])
p_model = p.fit(train_df)
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
tested_df = p_model.transform(test_df)
evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
print("Test set accuracy = " + str(evaluator.evaluate(tested_df.select("prediction", "label"))))
from pyspark.sql.types import DoubleType
from pyspark.sql.functions import expr
from pyspark.sql.functions import *
from pyspark.sql.types import *
def _p1(v):
return float(v.array[1])y
take_one = udf(_p1, DoubleType())
df = tested_df.withColumn("p", take_one(tested_df.probability))
from sparkdl import DeepImagePredictor
# Read images using Spark
image_df = ImageSchema.readImages("flower_photos/sample/")
predictor = DeepImagePredictor(inputCol="image", outputCol="predicted_labels", modelName="InceptionV3", decodePredictions=True, topK=10)
predictions_df = predictor.transform(image_df)
df = p_model.transform(image_df)
# 100930342_92e8746431_n.jpg not a daisy
df.select("image.origin",(1-take_one(df.probability)).alias("p_daisy")).show(truncate=False)
+---------------------------------------------------+--------------------+
|origin |p_daisy |
+---------------------------------------------------+--------------------+
|.../100930342_92e8746431_n.jpg |0.016760347798379538|
|.../10140303196_b88d3d6cec.jpg |0.9704259547739851 |
|.../100080576_f52e8ee070_n.jpg |0.9705190124824862 |
from keras.applications import InceptionV3
model = InceptionV3(weights="imagenet")
model.save('model-full.h5') # saves to the local filesystem