This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import IPython.display as dp | |
# collect all .png files in ssample dir | |
fs = !ls flower_photos/sample/*.jpg | |
# create list of image objects | |
images = [] | |
for ea in fs: | |
images.append(dp.Image(filename=ea, format='png')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sparkdl import readImages | |
# Read images using Spark | |
image_df = readImages("flower_photos/sample/") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.ml.image import ImageSchema | |
from pyspark.sql.functions import lit | |
from sparkdl.image import imageIO | |
tulips_df = ImageSchema.readImages("flower_photos/tulips").withColumn("label", lit(1)) | |
daisy_df = imageIO.readImagesWithCustomFn("flower_photos/daisy", decode_f=imageIO.PIL_decode).withColumn("label", lit(0)) | |
tulips_train, tulips_test, _ = tulips_df.randomSplit([0.1, 0.05, 0.85]) # use larger training sets (e.g. [0.6, 0.4] for getting more images) | |
daisy_train, daisy_test, _ = daisy_df.randomSplit([0.1, 0.05, 0.85]) # use larger training sets (e.g. [0.6, 0.4] for getting more images) | |
train_df = tulips_train.unionAll(daisy_train) | |
test_df = tulips_test.unionAll(daisy_test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.ml.evaluation import MulticlassClassificationEvaluator | |
tested_df = p_model.transform(test_df) | |
evaluator = MulticlassClassificationEvaluator(metricName="accuracy") | |
print("Test set accuracy = " + str(evaluator.evaluate(tested_df.select("prediction", "label")))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql.types import DoubleType | |
from pyspark.sql.functions import expr | |
from pyspark.sql.functions import * | |
from pyspark.sql.types import * | |
def _p1(v): | |
return float(v.array[1])y | |
take_one = udf(_p1, DoubleType()) | |
df = tested_df.withColumn("p", take_one(tested_df.probability)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sparkdl import DeepImagePredictor | |
# Read images using Spark | |
image_df = ImageSchema.readImages("flower_photos/sample/") | |
predictor = DeepImagePredictor(inputCol="image", outputCol="predicted_labels", modelName="InceptionV3", decodePredictions=True, topK=10) | |
predictions_df = predictor.transform(image_df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df = p_model.transform(image_df) | |
# 100930342_92e8746431_n.jpg not a daisy | |
df.select("image.origin",(1-take_one(df.probability)).alias("p_daisy")).show(truncate=False) | |
+---------------------------------------------------+--------------------+ | |
|origin |p_daisy | | |
+---------------------------------------------------+--------------------+ | |
|.../100930342_92e8746431_n.jpg |0.016760347798379538| | |
|.../10140303196_b88d3d6cec.jpg |0.9704259547739851 | | |
|.../100080576_f52e8ee070_n.jpg |0.9705190124824862 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.applications import InceptionV3 | |
model = InceptionV3(weights="imagenet") | |
model.save('model-full.h5') # saves to the local filesystem |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.applications.inception_v3 import preprocess_input | |
from keras.preprocessing.image import img_to_array, load_img | |
import numpy as np | |
from pyspark.sql.types import StringType | |
from sparkdl import KerasImageFileTransformer | |
def loadAndPreprocessKerasInceptionV3(uri): | |
# this is a typical way to load and prep images in keras | |
image = img_to_array(load_img(uri, target_size=(299, 299))) # image dimensions for InceptionV3 | |
image = np.expand_dims(image, axis=0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.applications import InceptionV3 | |
from sparkdl.udf.keras_image_model import registerKerasImageUDF | |
registerKerasImageUDF("inceptionV3_udf", InceptionV3(weights="imagenet")) |