Skip to content

Instantly share code, notes, and snippets.

@ryanrhymes
Last active February 24, 2018 20:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ryanrhymes/01214acfcb21ca777ed743ee32146772 to your computer and use it in GitHub Desktop.
Save ryanrhymes/01214acfcb21ca777ed743ee32146772 to your computer and use it in GitHub Desktop.
Zoo - Google InceptionV3

InceptionV3

InceptionV3 is one of Google’s latest effort to do image recognition. This is a standard task in computer vision, where models try to classify entire images into 1000 classes, like "Zebra", "Dalmatian", and "Dishwasher". Compared with previous DNN models, InceptionV3 has one of the most complex networks architectures in computer vision models. The original paper of this network is here.

Usage

This gist implements an InceptionV3 service in Owl, and provides simple interfaces to use. Here is an example:

#zoo "9428a62a31dbea75511882ab8218076f"

let img = "/path/to/your/image.png";;
let labels = InceptionV3.infer img;;
let labels_json   = InceptionV3.to_json ~top:5 labels;;
let labels_tuples = InceptionV3.to_tuples labels;;

The infer function takes image path as input. The image chould be of any popular formats: jpeg, png, etc. This gist contains an exemplar image for you to use, but feel free to use your own.

The output of this function is a 1x1000 vector. The user can further get human-readable classification results by passing this vector to_json or to_tuples. The output of former function is the top-N inference result as a json string, and the latter's is a list, each element in the form of [class: string; propability: float]. The probability is in range [0, 1].

The top parameter specifies how many top-N results are shown. It is default to be 5.

Prerequisite

This application relies on the tool ImageMagick to manipulate image format conversion and resizing. Please make sure it is installed. For example, on Ubuntu or Debian, you can use command:

sudo apt-get install imagemagick
01214acfcb21ca777ed743ee32146772
#!/usr/bin/env owl
open Owl
open Owl_types
open Algodiff.S
open Neural
open Neural.S
open Neural.S.Graph
#zoo "51eaf74c65fa14c8c466ecfab2351bbd" (* Imagenet_cls*)
#zoo "86a1748bbc898f2e42538839edba00e1" (* ImageUtils *)
let channel_last = true (* The same in Keras Conv layer *)
let include_top = true (* if false, no final Dense layer *)
let img_size = 299 (* include_top = true means img_size have to be exact 299 *)
let gist_id = "9428a62a31dbea75511882ab8218076f"
let extend_dir filename =
if Sys.file_exists filename then filename
else Sys.getenv "HOME" ^ "/.owl/zoo/" ^ gist_id ^ "/" ^ filename
let weight_file = extend_dir "inception_owl.network"
let conv2d_bn ?(padding=SAME) kernel stride nn =
conv2d ~padding kernel stride nn
|> normalisation ~training:false ~axis:3
|> activation Activation.Relu
let mix_typ1 in_shape bp_size nn =
let branch1x1 = conv2d_bn [|1;1;in_shape;64|] [|1;1|] nn in
let branch5x5 = nn
|> conv2d_bn [|1;1;in_shape;48|] [|1;1|]
|> conv2d_bn [|5;5;48;64|] [|1;1|]
in
let branch3x3dbl = nn
|> conv2d_bn [|1;1;in_shape;64|] [|1;1|]
|> conv2d_bn [|3;3;64;96|] [|1;1|]
|> conv2d_bn [|3;3;96;96|] [|1;1|]
in
let branch_pool = nn
|> avg_pool2d [|3;3|] [|1;1|]
|> conv2d_bn [|1;1;in_shape; bp_size |] [|1;1|]
in
concatenate 3 [|branch1x1; branch5x5; branch3x3dbl; branch_pool|]
let mix_typ3 nn =
let branch3x3 = conv2d_bn [|3;3;288;384|] [|2;2|] ~padding:VALID nn in
let branch3x3dbl = nn
|> conv2d_bn [|1;1;288;64|] [|1;1|]
|> conv2d_bn [|3;3;64;96|] [|1;1|]
|> conv2d_bn [|3;3;96;96|] [|2;2|] ~padding:VALID
in
let branch_pool = max_pool2d [|3;3|] [|2;2|] ~padding:VALID nn in
concatenate 3 [|branch3x3; branch3x3dbl; branch_pool|]
let mix_typ4 size nn =
let branch1x1 = conv2d_bn [|1;1;768;192|] [|1;1|] nn in
let branch7x7 = nn
|> conv2d_bn [|1;1;768;size|] [|1;1|]
|> conv2d_bn [|1;7;size;size|] [|1;1|]
|> conv2d_bn [|7;1;size;192|] [|1;1|]
in
let branch7x7dbl = nn
|> conv2d_bn [|1;1;768;size|] [|1;1|]
|> conv2d_bn [|7;1;size;size|] [|1;1|]
|> conv2d_bn [|1;7;size;size|] [|1;1|]
|> conv2d_bn [|7;1;size;size|] [|1;1|]
|> conv2d_bn [|1;7;size;192|] [|1;1|]
in
let branch_pool = nn
|> avg_pool2d [|3;3|] [|1;1|] (* padding = SAME *)
|> conv2d_bn [|1;1; 768; 192|] [|1;1|]
in
concatenate 3 [|branch1x1; branch7x7; branch7x7dbl; branch_pool|]
let mix_typ8 nn =
let branch3x3 = nn
|> conv2d_bn [|1;1;768;192|] [|1;1|]
|> conv2d_bn [|3;3;192;320|] [|2;2|] ~padding:VALID
in
let branch7x7x3 = nn
|> conv2d_bn [|1;1;768;192|] [|1;1|]
|> conv2d_bn [|1;7;192;192|] [|1;1|]
|> conv2d_bn [|7;1;192;192|] [|1;1|]
|> conv2d_bn [|3;3;192;192|] [|2;2|] ~padding:VALID
in
let branch_pool = max_pool2d [|3;3|] [|2;2|] ~padding:VALID nn in
concatenate 3 [|branch3x3; branch7x7x3; branch_pool|]
let mix_typ9 input nn =
let branch1x1 = conv2d_bn [|1;1;input;320|] [|1;1|] nn in
let branch3x3 = conv2d_bn [|1;1;input;384|] [|1;1|] nn in
let branch3x3_1 = branch3x3 |> conv2d_bn [|1;3;384;384|] [|1;1|] in
let branch3x3_2 = branch3x3 |> conv2d_bn [|3;1;384;384|] [|1;1|] in
let branch3x3 = concatenate 3 [| branch3x3_1; branch3x3_2 |] in
let branch3x3dbl = nn |> conv2d_bn [|1;1;input;448|] [|1;1|] |> conv2d_bn [|3;3;448;384|] [|1;1|] in
let branch3x3dbl_1 = branch3x3dbl |> conv2d_bn [|1;3;384;384|] [|1;1|] in
let branch3x3dbl_2 = branch3x3dbl |> conv2d_bn [|3;1;384;384|] [|1;1|] in
let branch3x3dbl = concatenate 3 [|branch3x3dbl_1; branch3x3dbl_2|] in
let branch_pool = nn |> avg_pool2d [|3;3|] [|1;1|] |> conv2d_bn [|1;1;input;192|] [|1;1|] in
concatenate 3 [|branch1x1; branch3x3; branch3x3dbl; branch_pool|]
let make_network img_size =
input [|img_size;img_size;3|]
|> conv2d_bn [|3;3;3;32|] [|2;2|] ~padding:VALID
|> conv2d_bn [|3;3;32;32|] [|1;1|] ~padding:VALID
|> conv2d_bn [|3;3;32;64|] [|1;1|]
|> max_pool2d [|3;3|] [|2;2|] ~padding:VALID
|> conv2d_bn [|1;1;64;80|] [|1;1|] ~padding:VALID
|> conv2d_bn [|3;3;80;192|] [|1;1|] ~padding:VALID
|> max_pool2d [|3;3|] [|2;2|] ~padding:VALID
|> mix_typ1 192 32 |> mix_typ1 256 64 |> mix_typ1 288 64
|> mix_typ3
|> mix_typ4 128 |> mix_typ4 160 |> mix_typ4 160 |> mix_typ4 192
|> mix_typ8
|> mix_typ9 1280 |> mix_typ9 2048
|> global_avg_pool2d
|> linear 1000 ~act_typ:Activation.Softmax
|> get_network
(* input: name of input image; output: 1x1000 ndarray *)
let infer img =
let nn = Graph.load weight_file in
let filename = String.split_on_char '/' img |> List.rev |> List.hd in
let prefix = Filename.remove_extension filename in
let tmp_img = Filename.temp_file prefix ".ppm" in
let _ = Sys.command ("convert -resize 299x299\\! " ^ img ^ " " ^ tmp_img) in
let img_ppm = ImageUtils.(load_ppm tmp_img |> extend_dim |> normalise) in
Graph.model nn img_ppm
(* input: 1x1000 ndarray; output: top-N inference result list,
* each element in the form of [class: string; propability: float] *)
let to_tuples ?(top=5) label =
Imagenet_cls.to_tuples ~top label
(* input: 1x1000 ndarray; output: top-N inference result as a json string *)
let to_json ?(top=5) label =
Imagenet_cls.to_json ~top label
let _ =
let example = extend_dir "panda.png" in
infer example |> to_json |> Printf.printf "%s\n"
This file has been truncated, but you can view the full file.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment