Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@jzstark
Last active May 4, 2019 18:10
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jzstark/9428a62a31dbea75511882ab8218076f to your computer and use it in GitHub Desktop.
Save jzstark/9428a62a31dbea75511882ab8218076f to your computer and use it in GitHub Desktop.
An implementation of InceptionV3 network with Owl

InceptionV3

InceptionV3 is one of Google’s latest effort to do image recognition. This is a standard task in computer vision, where models try to classify entire images into 1000 classes, like "Zebra", "Dalmatian", and "Dishwasher". Compared with previous DNN models, InceptionV3 has one of the most complex networks architectures in computer vision models. The original paper of this network is here.

Usage

This gist implements an InceptionV3 service in Owl, and provides simple interfaces to use. Here is an example:

#zoo "9428a62a31dbea75511882ab8218076f"

let img = "/path/to/your/image.png";;
let labels = InceptionV3.infer img;;
let labels_json   = InceptionV3.to_json ~top:5 labels;;
let labels_tuples = InceptionV3.to_tuples labels;;

The infer function takes image path as input. The image chould be of any popular formats: jpeg, png, etc. This gist contains an exemplar image for you to use, but feel free to use your own.

The output of this function is a 1x1000 vector. The user can further get human-readable classification results by passing this vector to_json or to_tuples. The output of former function is the top-N inference result as a json string, and the latter's is a list, each element in the form of [class: string; propability: float]. The probability is in range [0, 1].

The top parameter specifies how many top-N results are shown. It is default to be 5.

Prerequisite

This application relies on the tool ImageMagick to manipulate image format conversion and resizing. Please make sure it is installed. For example, on Ubuntu or Debian, you can use command:

sudo apt-get install imagemagick
open InceptionV3
let _ =
let image = Sys.argv.(1) in
let json = infer image |> to_json in
let name = (Filename.remove_extension image) ^ ".json" in
Owl_io.write_file name json
(executables
(names classify)
(libraries owl owl-base))
#!/usr/bin/env owl
open Owl
open Owl_types
open Neural.S
open Neural.S.Graph
#zoo "51eaf74c65fa14c8c466ecfab2351bbd" (* Imagenet_cls*)
#zoo "86a1748bbc898f2e42538839edba00e1" (* ImageUtils *)
let channel_last = true (* The same in Keras Conv layer *)
let include_top = true (* if false, no final Dense layer *)
let img_size = 299 (* include_top = true means img_size have to be exact 299 *)
let weight_file = Owl_zoo_path.extend_zoo_path "inception_owl.weight"
let conv2d_bn ?(padding=SAME) kernel stride nn =
conv2d ~padding kernel stride nn
|> normalisation ~training:false ~axis:3
|> activation Activation.Relu
let mix_typ1 in_shape bp_size nn =
let branch1x1 = conv2d_bn [|1;1;in_shape;64|] [|1;1|] nn in
let branch5x5 = nn
|> conv2d_bn [|1;1;in_shape;48|] [|1;1|]
|> conv2d_bn [|5;5;48;64|] [|1;1|]
in
let branch3x3dbl = nn
|> conv2d_bn [|1;1;in_shape;64|] [|1;1|]
|> conv2d_bn [|3;3;64;96|] [|1;1|]
|> conv2d_bn [|3;3;96;96|] [|1;1|]
in
let branch_pool = nn
|> avg_pool2d [|3;3|] [|1;1|]
|> conv2d_bn [|1;1;in_shape; bp_size |] [|1;1|]
in
concatenate 3 [|branch1x1; branch5x5; branch3x3dbl; branch_pool|]
let mix_typ3 nn =
let branch3x3 = conv2d_bn [|3;3;288;384|] [|2;2|] ~padding:VALID nn in
let branch3x3dbl = nn
|> conv2d_bn [|1;1;288;64|] [|1;1|]
|> conv2d_bn [|3;3;64;96|] [|1;1|]
|> conv2d_bn [|3;3;96;96|] [|2;2|] ~padding:VALID
in
let branch_pool = max_pool2d [|3;3|] [|2;2|] ~padding:VALID nn in
concatenate 3 [|branch3x3; branch3x3dbl; branch_pool|]
let mix_typ4 size nn =
let branch1x1 = conv2d_bn [|1;1;768;192|] [|1;1|] nn in
let branch7x7 = nn
|> conv2d_bn [|1;1;768;size|] [|1;1|]
|> conv2d_bn [|1;7;size;size|] [|1;1|]
|> conv2d_bn [|7;1;size;192|] [|1;1|]
in
let branch7x7dbl = nn
|> conv2d_bn [|1;1;768;size|] [|1;1|]
|> conv2d_bn [|7;1;size;size|] [|1;1|]
|> conv2d_bn [|1;7;size;size|] [|1;1|]
|> conv2d_bn [|7;1;size;size|] [|1;1|]
|> conv2d_bn [|1;7;size;192|] [|1;1|]
in
let branch_pool = nn
|> avg_pool2d [|3;3|] [|1;1|] (* padding = SAME *)
|> conv2d_bn [|1;1; 768; 192|] [|1;1|]
in
concatenate 3 [|branch1x1; branch7x7; branch7x7dbl; branch_pool|]
let mix_typ8 nn =
let branch3x3 = nn
|> conv2d_bn [|1;1;768;192|] [|1;1|]
|> conv2d_bn [|3;3;192;320|] [|2;2|] ~padding:VALID
in
let branch7x7x3 = nn
|> conv2d_bn [|1;1;768;192|] [|1;1|]
|> conv2d_bn [|1;7;192;192|] [|1;1|]
|> conv2d_bn [|7;1;192;192|] [|1;1|]
|> conv2d_bn [|3;3;192;192|] [|2;2|] ~padding:VALID
in
let branch_pool = max_pool2d [|3;3|] [|2;2|] ~padding:VALID nn in
concatenate 3 [|branch3x3; branch7x7x3; branch_pool|]
let mix_typ9 input nn =
let branch1x1 = conv2d_bn [|1;1;input;320|] [|1;1|] nn in
let branch3x3 = conv2d_bn [|1;1;input;384|] [|1;1|] nn in
let branch3x3_1 = branch3x3 |> conv2d_bn [|1;3;384;384|] [|1;1|] in
let branch3x3_2 = branch3x3 |> conv2d_bn [|3;1;384;384|] [|1;1|] in
let branch3x3 = concatenate 3 [| branch3x3_1; branch3x3_2 |] in
let branch3x3dbl = nn |> conv2d_bn [|1;1;input;448|] [|1;1|] |> conv2d_bn [|3;3;448;384|] [|1;1|] in
let branch3x3dbl_1 = branch3x3dbl |> conv2d_bn [|1;3;384;384|] [|1;1|] in
let branch3x3dbl_2 = branch3x3dbl |> conv2d_bn [|3;1;384;384|] [|1;1|] in
let branch3x3dbl = concatenate 3 [|branch3x3dbl_1; branch3x3dbl_2|] in
let branch_pool = nn |> avg_pool2d [|3;3|] [|1;1|] |> conv2d_bn [|1;1;input;192|] [|1;1|] in
concatenate 3 [|branch1x1; branch3x3; branch3x3dbl; branch_pool|]
let make_network img_size =
input [|img_size;img_size;3|]
|> conv2d_bn [|3;3;3;32|] [|2;2|] ~padding:VALID
|> conv2d_bn [|3;3;32;32|] [|1;1|] ~padding:VALID
|> conv2d_bn [|3;3;32;64|] [|1;1|]
|> max_pool2d [|3;3|] [|2;2|] ~padding:VALID
|> conv2d_bn [|1;1;64;80|] [|1;1|] ~padding:VALID
|> conv2d_bn [|3;3;80;192|] [|1;1|] ~padding:VALID
|> max_pool2d [|3;3|] [|2;2|] ~padding:VALID
|> mix_typ1 192 32 |> mix_typ1 256 64 |> mix_typ1 288 64
|> mix_typ3
|> mix_typ4 128 |> mix_typ4 160 |> mix_typ4 160 |> mix_typ4 192
|> mix_typ8
|> mix_typ9 1280 |> mix_typ9 2048
|> global_avg_pool2d
|> linear 1000 ~act_typ:Activation.(Softmax 1)
|> get_network
(* input: name of input image; output: 1x1000 ndarray *)
let infer img =
let nn = make_network 299 in
Graph.load_weights nn weight_file;
let filename = String.split_on_char '/' img |> List.rev |> List.hd in
let prefix = Filename.remove_extension filename in
let tmp_img = Filename.temp_file prefix ".ppm" in
let _ = Sys.command ("convert -resize 299x299\\! " ^ img ^ " " ^ tmp_img) in
let img_ppm = ImageUtils.(load_ppm tmp_img |> extend_dim |> normalise) in
Graph.model nn img_ppm
(* input: 1x1000 ndarray; output: top-N inference result list,
* each element in the form of [class: string; propability: float] *)
let to_tuples ?(top=5) label =
Imagenet_cls.to_tuples ~top label
(* input: 1x1000 ndarray; output: top-N inference result as a json string *)
let to_json ?(top=5) label =
Imagenet_cls.to_json ~top label
let test () =
let example = Owl_zoo_path.extend_zoo_path "panda.png" in
infer example |> to_json |> Printf.printf "%s\n"
This file has been truncated, but you can view the full file.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment