Skip to content

Instantly share code, notes, and snippets.

@jzstark jzstark/#readme.md
Last active Nov 7, 2018

Embed
What would you like to do?
InceptionV3 with Owl's CGraph module

InceptionV3-CG

InceptionV3 is one of Google’s latest effort to do image recognition. This is a standard task in computer vision, where models try to classify entire images into 1000 classes, like "Zebra", "Dalmatian", and "Dishwasher". Compared with previous DNN models, InceptionV3 has one of the most complex networks architectures in computer vision models. The Computation Graph (CG) module is used to reduce memory usage. The original paper of this network is here.

Usage

This gist implements an InceptionV3 service in Owl, and provides simple interfaces to use. Here is an example:

#zoo "519ed7298d7ab8b5a4bdeb9195e5d696"

let img = "/path/to/your/image.png";;
let labels = InceptionV3_CG.infer img;;
let labels_json   = InceptionV3_CG.to_json ~top:5 labels;;
let labels_tuples = InceptionV3_CG.to_tuples labels;;

The infer function takes image path as input. The image chould be of any popular formats: jpeg, png, etc. This gist contains an exemplar image for you to use, but feel free to use your own.

The output of this function is a 1x1000 vector. The user can further get human-readable classification results by passing this vector to_json or to_tuples. The output of former function is the top-N inference result as a json string, and the latter's is a list, each element in the form of [class: string; propability: float]. The probability is in range [0, 1].

The top parameter specifies how many top-N results are shown. It is default to be 5.

Prerequisite

This application relies on the tool ImageMagick to manipulate image format conversion and resizing. Please make sure it is installed. For example, on Ubuntu or Debian, you can use command:

sudo apt-get install imagemagick
#!/usr/bin/env owl
open Owl
open Owl_types
open Algodiff.S
module CPU_Engine = Owl_computation_cpu_engine.Make (Dense.Ndarray.S)
module CGCompiler = Owl_neural_compiler.Make (CPU_Engine)
open CGCompiler.Neural
open CGCompiler.Neural.Graph
open CGCompiler.Neural.Algodiff
#zoo "51eaf74c65fa14c8c466ecfab2351bbd" (* Imagenet_cls*)
#zoo "86a1748bbc898f2e42538839edba00e1" (* ImageUtils *)
let pack x = CGCompiler.Engine.pack_arr x |> Algodiff.pack_arr
let unpack x = Algodiff.unpack_arr x |> CGCompiler.Engine.unpack_arr
let channel_last = true (* The same in Keras Conv layer *)
let include_top = true (* if false, no final Dense layer *)
let img_size = 299 (* include_top = true means img_size have to be exact 299 *)
let weight_file = Owl_zoo_path.extend_zoo_path "inception_owl_cg.weight"
let conv2d_bn ?(padding=SAME) kernel stride nn =
conv2d ~padding kernel stride nn
|> normalisation ~training:false ~axis:3
|> activation Activation.Relu
let mix_typ1 in_shape bp_size nn =
let branch1x1 = conv2d_bn [|1;1;in_shape;64|] [|1;1|] nn in
let branch5x5 = nn
|> conv2d_bn [|1;1;in_shape;48|] [|1;1|]
|> conv2d_bn [|5;5;48;64|] [|1;1|]
in
let branch3x3dbl = nn
|> conv2d_bn [|1;1;in_shape;64|] [|1;1|]
|> conv2d_bn [|3;3;64;96|] [|1;1|]
|> conv2d_bn [|3;3;96;96|] [|1;1|]
in
let branch_pool = nn
|> avg_pool2d [|3;3|] [|1;1|]
|> conv2d_bn [|1;1;in_shape; bp_size |] [|1;1|]
in
concatenate 3 [|branch1x1; branch5x5; branch3x3dbl; branch_pool|]
let mix_typ3 nn =
let branch3x3 = conv2d_bn [|3;3;288;384|] [|2;2|] ~padding:VALID nn in
let branch3x3dbl = nn
|> conv2d_bn [|1;1;288;64|] [|1;1|]
|> conv2d_bn [|3;3;64;96|] [|1;1|]
|> conv2d_bn [|3;3;96;96|] [|2;2|] ~padding:VALID
in
let branch_pool = max_pool2d [|3;3|] [|2;2|] ~padding:VALID nn in
concatenate 3 [|branch3x3; branch3x3dbl; branch_pool|]
let mix_typ4 size nn =
let branch1x1 = conv2d_bn [|1;1;768;192|] [|1;1|] nn in
let branch7x7 = nn
|> conv2d_bn [|1;1;768;size|] [|1;1|]
|> conv2d_bn [|1;7;size;size|] [|1;1|]
|> conv2d_bn [|7;1;size;192|] [|1;1|]
in
let branch7x7dbl = nn
|> conv2d_bn [|1;1;768;size|] [|1;1|]
|> conv2d_bn [|7;1;size;size|] [|1;1|]
|> conv2d_bn [|1;7;size;size|] [|1;1|]
|> conv2d_bn [|7;1;size;size|] [|1;1|]
|> conv2d_bn [|1;7;size;192|] [|1;1|]
in
let branch_pool = nn
|> avg_pool2d [|3;3|] [|1;1|] (* padding = SAME *)
|> conv2d_bn [|1;1; 768; 192|] [|1;1|]
in
concatenate 3 [|branch1x1; branch7x7; branch7x7dbl; branch_pool|]
let mix_typ8 nn =
let branch3x3 = nn
|> conv2d_bn [|1;1;768;192|] [|1;1|]
|> conv2d_bn [|3;3;192;320|] [|2;2|] ~padding:VALID
in
let branch7x7x3 = nn
|> conv2d_bn [|1;1;768;192|] [|1;1|]
|> conv2d_bn [|1;7;192;192|] [|1;1|]
|> conv2d_bn [|7;1;192;192|] [|1;1|]
|> conv2d_bn [|3;3;192;192|] [|2;2|] ~padding:VALID
in
let branch_pool = max_pool2d [|3;3|] [|2;2|] ~padding:VALID nn in
concatenate 3 [|branch3x3; branch7x7x3; branch_pool|]
let mix_typ9 input nn =
let branch1x1 = conv2d_bn [|1;1;input;320|] [|1;1|] nn in
let branch3x3 = conv2d_bn [|1;1;input;384|] [|1;1|] nn in
let branch3x3_1 = branch3x3 |> conv2d_bn [|1;3;384;384|] [|1;1|] in
let branch3x3_2 = branch3x3 |> conv2d_bn [|3;1;384;384|] [|1;1|] in
let branch3x3 = concatenate 3 [| branch3x3_1; branch3x3_2 |] in
let branch3x3dbl = nn |> conv2d_bn [|1;1;input;448|] [|1;1|] |> conv2d_bn [|3;3;448;384|] [|1;1|] in
let branch3x3dbl_1 = branch3x3dbl |> conv2d_bn [|1;3;384;384|] [|1;1|] in
let branch3x3dbl_2 = branch3x3dbl |> conv2d_bn [|3;1;384;384|] [|1;1|] in
let branch3x3dbl = concatenate 3 [|branch3x3dbl_1; branch3x3dbl_2|] in
let branch_pool = nn |> avg_pool2d [|3;3|] [|1;1|] |> conv2d_bn [|1;1;input;192|] [|1;1|] in
concatenate 3 [|branch1x1; branch3x3; branch3x3dbl; branch_pool|]
let make_network img_size =
input [|img_size;img_size;3|]
|> conv2d_bn [|3;3;3;32|] [|2;2|] ~padding:VALID
|> conv2d_bn [|3;3;32;32|] [|1;1|] ~padding:VALID
|> conv2d_bn [|3;3;32;64|] [|1;1|]
|> max_pool2d [|3;3|] [|2;2|] ~padding:VALID
|> conv2d_bn [|1;1;64;80|] [|1;1|] ~padding:VALID
|> conv2d_bn [|3;3;80;192|] [|1;1|] ~padding:VALID
|> max_pool2d [|3;3|] [|2;2|] ~padding:VALID
|> mix_typ1 192 32 |> mix_typ1 256 64 |> mix_typ1 288 64
|> mix_typ3
|> mix_typ4 128 |> mix_typ4 160 |> mix_typ4 160 |> mix_typ4 192
|> mix_typ8
|> mix_typ9 1280 |> mix_typ9 2048
|> global_avg_pool2d
|> linear 1000 ~act_typ:Activation.(Softmax 1)
|> get_network
(* input: name of input image; output: 1x1000 ndarray *)
let infer img =
let nn = make_network 299 in
Graph.init nn;
Graph.load_weights nn weight_file;
let filename = String.split_on_char '/' img |> List.rev |> List.hd in
let prefix = Filename.remove_extension filename in
let tmp_img = Filename.temp_file prefix ".ppm" in
let _ = Sys.command ("convert -resize 299x299\\! " ^ img ^ " " ^ tmp_img) in
let img_ppm = ImageUtils.(load_ppm tmp_img |> extend_dim |> normalise) in
CGCompiler.model nn (pack img_ppm) |> unpack
(* input: 1x1000 ndarray; output: top-N inference result list,
* each element in the form of [class: string; propability: float] *)
let to_tuples ?(top=5) label =
Imagenet_cls.to_tuples ~top label
(* input: 1x1000 ndarray; output: top-N inference result as a json string *)
let to_json ?(top=5) label =
Imagenet_cls.to_json ~top label
let test () =
let example = Owl_zoo_path.extend_zoo_path "panda.png" in
infer example |> to_json |> Printf.printf "%s\n"
This file has been truncated, but you can view the full file.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.