Skip to content

Instantly share code, notes, and snippets.

@yuriploc
Last active September 27, 2023 18:56
Show Gist options
  • Save yuriploc/044213c2a1527ef19cecc5b6ef76a3f3 to your computer and use it in GitHub Desktop.
Save yuriploc/044213c2a1527ef19cecc5b6ef76a3f3 to your computer and use it in GitHub Desktop.
An Elixir LiveBook running YOLO on Elixir Nx and friends. Forked from "Hans Elias B. Josephsen - Object detection in Elixir with Axon @ warsaw.ex #10" talk

YOLOv8m

Mix.install(
  [
    {:axon_onnx, ">= 0.0.0"},
    {:exla, ">= 0.0.0"},
    {:evision, ">= 0.0.0"},
    {:image, ">= 0.0.0"},
    {:kino, "~> 0.10"},
    {:kino_vega_lite, "~> 0.1.8"},
    {:nx, "~> 0.6", override: true},
    {:nx_image, ">= 0.0.0"},
    {:ortex, ">= 0.0.0"},
    {:vega_lite, "~> 0.1.8"},
    {:yolo, git: "https://github.com/yuriploc/yolov8_elixir.git"}
  ],
  config: [nx: [default_backend: {EXLA.Backend, client: :host}]],
  system_env: [
    {"EVISION_PREFER_PRECOMPILED", false}
  ]
)

defmodule Util do
  def draw_bboxes(bboxes, image) do
    Enum.reduce(bboxes, image, fn [cx, cy, w, h | _], image ->
      Image.Draw.rect!(image, round(cx - w / 2), round(cy - w / 2), round(w), round(h),
        fill: false,
        color: :red
      )
    end)
  end

  def draw_bbox_labels(object_boxes, image) do
    Enum.reduce(object_boxes, image, fn {boxes, class_name}, image ->
      Enum.reduce(boxes, image, fn [cx, cy, w, h | _probs], image ->
        {text_image, _alpha} =
          Image.split_alpha(Image.Text.text!(class_name, text_fill_color: :red))

        {:ok, image} =
          image
          |> Image.Draw.rect!(round(cx - w / 2), round(cy - w / 2), round(w), round(h),
            fill: false,
            color: :red
          )
          |> Image.Draw.image(
            text_image,
            min(max(round(cx - w / 2), 0), 640),
            min(max(round(cy - h / 2), 0), 640)
          )

        image
      end)
    end)
  end

  def filter_predictions(bboxes, threshold \\ 0.5) do
    boxes = Nx.slice(bboxes, [0, 0], [8400, 4])
    probs = Nx.slice(bboxes, [0, 4], [8400, 80])
    max_prob = Nx.reduce_max(probs, axes: [1])
    sorted_idxs = Nx.argsort(max_prob, direction: :desc)
    boxes = Nx.take(Nx.concatenate([boxes, Nx.new_axis(max_prob, 1)], axis: 1), sorted_idxs)

    Enum.take_while(Nx.to_list(boxes), fn [_, _, _, _, prob] -> prob > threshold end)
  end

  def image_from_tensor(tensor) do
    t = tensor
    {:ok, img} = Image.from_nx(t)
    img
  end
end

classes = [
  "person",
  "bicycle",
  "car",
  "motorbike",
  "aeroplane",
  "bus",
  "train",
  "truck",
  "boat",
  "traffic light",
  "fire hydrant",
  "stop sign",
  "parking meter",
  "bench",
  "bird",
  "cat",
  "dog",
  "horse",
  "sheep",
  "cow",
  "elephant",
  "bear",
  "zebra",
  "giraffe",
  "backpack",
  "umbrella",
  "handbag",
  "tie",
  "suitcase",
  "frisbee",
  "skis",
  "snowboard",
  "sports ball",
  "kite",
  "baseball bat",
  "baseball glove",
  "skateboard",
  "surfboard",
  "tennis racket",
  "bottle",
  "wine glass",
  "cup",
  "fork",
  "knife",
  "spoon",
  "bowl",
  "banana",
  "apple",
  "sandwich",
  "orange",
  "broccoli",
  "carrot",
  "hot dog",
  "pizza",
  "donut",
  "cake",
  "chair",
  "sofa",
  "pottedplant",
  "bed",
  "diningtable",
  "toilet",
  "tvmonitor",
  "laptop",
  "mouse",
  "remote",
  "keyboard",
  "cell phone",
  "microwave",
  "oven",
  "toaster",
  "sink",
  "refrigerator",
  "book",
  "clock",
  "vase",
  "scissors",
  "teddy bear",
  "hair drier",
  "toothbrush"
]

Section

model_path = "/home/yuri/codes/axon_talk/yolov8m.onnx"
model = Ortex.load(model_path)
video_path = "/home/yuri/codes/video-object-detection/red-shirt.mp4"
video = Evision.VideoCapture.videoCapture(video_path)
model
frame =
  video
  |> Evision.VideoCapture.read()
  |> Evision.resize({_width = 640, _height = 640})
frame =
  video
  |> Evision.VideoCapture.read()
  |> Evision.resize({_width = 640, _height = 640})

tensor =
  frame
  |> Evision.Mat.to_nx()
  |> Nx.backend_transfer({EXLA.Backend, client: :host})

input_tensor =
  tensor
  |> Nx.transpose(axes: [2, 0, 1])
  |> Nx.as_type(:f32)
  |> Nx.divide(255)
  |> Nx.new_axis(0)

{output} = Ortex.run(model, input_tensor)

result =
  output[0]
  |> Nx.backend_transfer({EXLA.Backend, client: :host})
  |> Nx.transpose(axes: [1, 0])

printed =
  result
  |> Yolo.NMS.nms(0.5)
  |> Enum.zip(classes)
  |> Util.draw_bbox_labels(Util.image_from_tensor(tensor))

# printed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment