Ben Vanik benvanik

## methods.mlir
    vm.import @vmvx.add.2d.f32(%lhs_buffer : !vm.buffer, %lhs_offset : i64, %lhs_strides : tuple<i64, i64>, %rhs_buffer : !vm.buffer, %rhs_offset : i64, %rhs_strides : tuple<i64, i64>, %out_buffer : !vm.buffer, %out_offset : i64, %out_strides : tuple<i64, i64>, %sizes : tuple<i64, i64>)
    vm.import @vmvx.add.2d.i32(%lhs_buffer : !vm.buffer, %lhs_offset : i64, %lhs_strides : tuple<i64, i64>, %rhs_buffer : !vm.buffer, %rhs_offset : i64, %rhs_strides : tuple<i64, i64>, %out_buffer : !vm.buffer, %out_offset : i64, %out_strides : tuple<i64, i64>, %sizes : tuple<i64, i64>)
    vm.import @vmvx.and.2d.i32(%lhs_buffer : !vm.buffer, %lhs_offset : i64, %lhs_strides : tuple<i64, i64>, %rhs_buffer : !vm.buffer, %rhs_offset : i64, %rhs_strides : tuple<i64, i64>, %out_buffer : !vm.buffer, %out_offset : i64, %out_strides : tuple<i64, i64>, %sizes : tuple<i64, i64>)
    vm.import @vmvx.div.2d.f32(%lhs_buffer : !vm.buffer, %lhs_offset : i64, %lhs_strides : tuple<i64, i64>, %rhs_buffer : !vm.buffer, %rhs_offset : i64, %rhs_st

## unidirectional_lstm.mlir
// -----// IR Dump After TopLevelSCFToCFG //----- //
func.func private @ForwardLoopCond_gFAnjWGSoLs__.167(%arg0: tensor<i64>, %arg1: tensor<i64>, %arg2: tensor<40xf32>, %arg3: tensor<i64>, %arg4: tensor<74x40xf32>, %arg5: tensor<i64>, %arg6: tensor<1x10xf32>, %arg7: tensor<1x10xf32>, %arg8: tensor<5x1x64xf32>, %arg9: tensor<5x1x1xf32>, %arg10: tensor<5x1x1xf32>, %arg11: tensor<5xi64>, %arg12: tensor<5x1x10xf32>, %arg13: tensor<5x1x10xf32>) -> tensor<i1> {
  %0 = "mhlo.compare"(%arg0, %arg1) {comparison_direction = #mhlo<"comparison_direction LT">} : (tensor<i64>, tensor<i64>) -> tensor<i1>
  return %0 : tensor<i1>
}

// -----// IR Dump After MHLOToMHLOPreprocessing //----- //

## launch.json
{
            "name": "(gdb) iree-compile",
            "type": "cppdbg",
            "request": "launch",
            "preLaunchTask": "build-iree-compile",
            "program": "${command:cmake.buildDirectory}/tools/iree-compile",
            "args": [
                // "-iree-vm-bytecode-module-output-format=annotated-mlir-text",
                "-iree-vm-bytecode-source-listing=${workspaceFolder}/../iree-tmp/vm.mlir",
                "-iree-vm-emit-polyglot-zip=true",

## api_interfaces_cc.h
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,

## 0-static-input.mlir
// Simple dispatch of static shapes.
func @staticShapeDispatch(%arg0 : tensor<8x4xf32>) -> tensor<4x8xf32> {
  %x = constant 100 : index
  %y = constant 50 : index
  // %x, %y here are the workgroup counts along a 2D grid to dispatch; backends turn them into 3D XYZ.
  %0 = flow.dispatch.workgroups[%x, %y](%arg0) : (tensor<8x4xf32>) -> (tensor<4x8xf32>) = (
    // I/O are modeled in the region as ref arguments that have some special ops available.
    %arg : !flow.dispatch.input<8x4xf32>, %ret : !flow.dispatch.output<4x8xf32>
  ) {
    // Loads a tensor from an input; can be tiled with offsets/sizes/strides.

## dispatch_workgroups.mlir
// RUN: iree-opt -allow-unregistered-dialect -split-input-file %s | iree-opt -allow-unregistered-dialect -split-input-file | IreeFileCheck %s

func @workgroups(%arg0 : tensor<?x4xf32>, %arg1 : index) -> tensor<4x?xf32> {
  %x = constant 100 : index
  %y = constant 50 : index
  %0 = flow.dispatch.workgroups[%x, %y](%arg0, %arg1) : (tensor<?x4xf32>, index) -> (tensor<4x?xf32>) =
  (%arg0_capture : !flow.dispatch.input<?x4xf32>, %arg1_capture : index, %ret0 : !flow.dispatch.output<4x?xf32>) {

    // Query symbolic workgroup info:

## add.mlir

//===----------------------------------------------------------------------===//
// b = add(a, a)
//===----------------------------------------------------------------------===//

// NOTE: this represents what our tflite import flow should produce; the _
// prefixed functions are all synthesized by us. We use the VM dialect in here
// now because std has no list and other stuff. In a real flow we may have a
// iree_tflite dialect that has pseudo ops for these things that then plug into
// the VM conversion interface, or maybe we just emit them as-is at input

## iree-wasm.md

      
              1 file
            
          
              0 forks
            
          
              1 comment
            
          
              0 stars
            
          
                benvanik
                / iree-wasm.md
            
            
              Last active
              November 20, 2020 08:05
            
              
                IREE WebAssembly executable kernel manifesto
              
          
    This is the shape of what we are talking about:
// Let's run a tile worth of work within a larger grid dispatch. That grid is
// defined *by us in the compiler* - it could, for example be a grid of 1x1x1
// such that this function is called once. Or, if there was benefit, you could
// make it go wider (like how ruy fans out work to a threadpool). And you can
// emit the code to choose the grid size/shape at runtime based on anything you
// want. That's what IREE gives you today. This here is your executable kernel
// equivalent to CUDA kernel or compute shader.

  
## accelerator-devkit-proto-0-baseboard-bottom-pos.csv

          
            Ref
            Val
            Package
            PosX
            PosY
            Rot
            Side

            
              C3
              1uF/16V
              C_0402_1005Metric
              -46.872500
              -5.840000
              180.000000
              bottom

            
              C5
              0.01uF
              C_0201_0603Metric
              -20.400000
              -15.455000
              90.000000
              bottom

            
              C6
              0.1uF
              C_0201_0603Metric
              -20.500000
              -13.850000
              -90.000000
              bottom

            
              C7
              0.01uF
              C_0201_0603Metric
              -17.700000
              -15.050000
              0.000000
              bottom

            
              C8
              0.1uF
              C_0201_0603Metric
              -19.305000
              -15.050000
              180.000000
              bottom

            
              C11
              0.01uF
              C_0201_0603Metric
              -20.100000
              -18.750000
              90.000000
              bottom

            
              C12
              0.1uF
              C_0201_0603Metric
              -20.900000
              -18.750000
              90.000000
              bottom

            
              C13
              0.01uF
              C_0201_0603Metric
              -18.500000
              -19.600000
              90.000000
              bottom

            
              C14
              0.1uF
              C_0201_0603Metric
              -22.500000
              -15.000000
              180.000000
              bottom

## unidirectional_lstm vulkan error
          %25 = xla_hlo.minimum %23, %24 : tensor<1x10xf32>
          %26 = "xla_hlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
          %27 = xla_hlo.maximum %25, %26 : tensor<1x10xf32>
          %28 = "xla_hlo.slice"(%8) {limit_indices = dense<[1, 40]> : tensor<2xi64>, start_indices = dense<[0, 30]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<1x40xf32>) -> tensor<1x10xf32>
          %29 = xla_hlo.multiply %6, %28 : tensor<1x10xf32>
          %30 = "xla_hlo.tanh"(%29) : (tensor<1x10xf32>) -> tensor<1x10xf32>
          %31 = xla_hlo.multiply %6, %30 : tensor<1x10xf32>
          %32 = xla_hlo.add %6, %31 : tensor<1x10xf32>
          %33 = "xla_hlo.tanh"(%27) : (tensor<1x10xf32>) -> tensor<1x10xf32>
          %34 = xla_hlo.multiply %32, %33 : tensor<1x10xf32>
	vm.import @vmvx.add.2d.f32(%lhs_buffer : !vm.buffer, %lhs_offset : i64, %lhs_strides : tuple<i64, i64>, %rhs_buffer : !vm.buffer, %rhs_offset : i64, %rhs_strides : tuple<i64, i64>, %out_buffer : !vm.buffer, %out_offset : i64, %out_strides : tuple<i64, i64>, %sizes : tuple<i64, i64>)
	vm.import @vmvx.add.2d.i32(%lhs_buffer : !vm.buffer, %lhs_offset : i64, %lhs_strides : tuple<i64, i64>, %rhs_buffer : !vm.buffer, %rhs_offset : i64, %rhs_strides : tuple<i64, i64>, %out_buffer : !vm.buffer, %out_offset : i64, %out_strides : tuple<i64, i64>, %sizes : tuple<i64, i64>)
	vm.import @vmvx.and.2d.i32(%lhs_buffer : !vm.buffer, %lhs_offset : i64, %lhs_strides : tuple<i64, i64>, %rhs_buffer : !vm.buffer, %rhs_offset : i64, %rhs_strides : tuple<i64, i64>, %out_buffer : !vm.buffer, %out_offset : i64, %out_strides : tuple<i64, i64>, %sizes : tuple<i64, i64>)
	vm.import @vmvx.div.2d.f32(%lhs_buffer : !vm.buffer, %lhs_offset : i64, %lhs_strides : tuple<i64, i64>, %rhs_buffer : !vm.buffer, %rhs_offset : i64, %rhs_st
	// -----// IR Dump After TopLevelSCFToCFG //----- //
	func.func private @ForwardLoopCond_gFAnjWGSoLs__.167(%arg0: tensor<i64>, %arg1: tensor<i64>, %arg2: tensor<40xf32>, %arg3: tensor<i64>, %arg4: tensor<74x40xf32>, %arg5: tensor<i64>, %arg6: tensor<1x10xf32>, %arg7: tensor<1x10xf32>, %arg8: tensor<5x1x64xf32>, %arg9: tensor<5x1x1xf32>, %arg10: tensor<5x1x1xf32>, %arg11: tensor<5xi64>, %arg12: tensor<5x1x10xf32>, %arg13: tensor<5x1x10xf32>) -> tensor<i1> {
	%0 = "mhlo.compare"(%arg0, %arg1) {comparison_direction = #mhlo<"comparison_direction LT">} : (tensor<i64>, tensor<i64>) -> tensor<i1>
	return %0 : tensor<i1>
	}

	// -----// IR Dump After MHLOToMHLOPreprocessing //----- //
	{
	"name": "(gdb) iree-compile",
	"type": "cppdbg",
	"request": "launch",
	"preLaunchTask": "build-iree-compile",
	"program": "${command:cmake.buildDirectory}/tools/iree-compile",
	"args": [
	// "-iree-vm-bytecode-module-output-format=annotated-mlir-text",
	"-iree-vm-bytecode-source-listing=${workspaceFolder}/../iree-tmp/vm.mlir",
	"-iree-vm-emit-polyglot-zip=true",
	// Copyright 2020 Google LLC
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// Simple dispatch of static shapes.
	func @staticShapeDispatch(%arg0 : tensor<8x4xf32>) -> tensor<4x8xf32> {
	%x = constant 100 : index
	%y = constant 50 : index
	// %x, %y here are the workgroup counts along a 2D grid to dispatch; backends turn them into 3D XYZ.
	%0 = flow.dispatch.workgroups[%x, %y](%arg0) : (tensor<8x4xf32>) -> (tensor<4x8xf32>) = (
	// I/O are modeled in the region as ref arguments that have some special ops available.
	%arg : !flow.dispatch.input<8x4xf32>, %ret : !flow.dispatch.output<4x8xf32>
	) {
	// Loads a tensor from an input; can be tiled with offsets/sizes/strides.
	// RUN: iree-opt -allow-unregistered-dialect -split-input-file %s \| iree-opt -allow-unregistered-dialect -split-input-file \| IreeFileCheck %s

	func @workgroups(%arg0 : tensor<?x4xf32>, %arg1 : index) -> tensor<4x?xf32> {
	%x = constant 100 : index
	%y = constant 50 : index
	%0 = flow.dispatch.workgroups[%x, %y](%arg0, %arg1) : (tensor<?x4xf32>, index) -> (tensor<4x?xf32>) =
	(%arg0_capture : !flow.dispatch.input<?x4xf32>, %arg1_capture : index, %ret0 : !flow.dispatch.output<4x?xf32>) {

	// Query symbolic workgroup info:

	//===----------------------------------------------------------------------===//
	// b = add(a, a)
	//===----------------------------------------------------------------------===//

	// NOTE: this represents what our tflite import flow should produce; the _
	// prefixed functions are all synthesized by us. We use the VM dialect in here
	// now because std has no list and other stuff. In a real flow we may have a
	// iree_tflite dialect that has pseudo ops for these things that then plug into
	// the VM conversion interface, or maybe we just emit them as-is at input
Ref	Val	Package	PosX	PosY	Rot	Side
C3	1uF/16V	C_0402_1005Metric	-46.872500	-5.840000	180.000000	bottom
C5	0.01uF	C_0201_0603Metric	-20.400000	-15.455000	90.000000	bottom
C6	0.1uF	C_0201_0603Metric	-20.500000	-13.850000	-90.000000	bottom
C7	0.01uF	C_0201_0603Metric	-17.700000	-15.050000	0.000000	bottom
C8	0.1uF	C_0201_0603Metric	-19.305000	-15.050000	180.000000	bottom
C11	0.01uF	C_0201_0603Metric	-20.100000	-18.750000	90.000000	bottom
C12	0.1uF	C_0201_0603Metric	-20.900000	-18.750000	90.000000	bottom
C13	0.01uF	C_0201_0603Metric	-18.500000	-19.600000	90.000000	bottom
C14	0.1uF	C_0201_0603Metric	-22.500000	-15.000000	180.000000	bottom
	%25 = xla_hlo.minimum %23, %24 : tensor<1x10xf32>
	%26 = "xla_hlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
	%27 = xla_hlo.maximum %25, %26 : tensor<1x10xf32>
	%28 = "xla_hlo.slice"(%8) {limit_indices = dense<[1, 40]> : tensor<2xi64>, start_indices = dense<[0, 30]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<1x40xf32>) -> tensor<1x10xf32>
	%29 = xla_hlo.multiply %6, %28 : tensor<1x10xf32>
	%30 = "xla_hlo.tanh"(%29) : (tensor<1x10xf32>) -> tensor<1x10xf32>
	%31 = xla_hlo.multiply %6, %30 : tensor<1x10xf32>
	%32 = xla_hlo.add %6, %31 : tensor<1x10xf32>
	%33 = "xla_hlo.tanh"(%27) : (tensor<1x10xf32>) -> tensor<1x10xf32>
	%34 = xla_hlo.multiply %32, %33 : tensor<1x10xf32>