Ben Vanik benvanik

## gist:488d35a4b47db979536d
/*    0.0 */       exec
/*    3   */          vfetch_full r1.xyz1, r0.x, vf95, DataFormat=FMT_32_32_32_FLOAT, Stride=7, Signed=true, NumFormat=integer, PrefetchCount=7
/*    4   */          vfetch_mini r0, Offset=3, DataFormat=FMT_32_32_32_32_FLOAT, Signed=true, NumFormat=integer
/*    0.1 */       alloc interpolators
/*    1.0 */       exec
/*    5   */          max o0, r0, r0
/*    1.1 */       alloc position
/*    2.0 */       exec
/*    6   */          max oPos, r1, r1
/*    2.1 */       exece

## gist:ac2ce3fde86114d88d60d57898353f36

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                benvanik
                / gist:ac2ce3fde86114d88d60d57898353f36
            
            
              Last active
              December 27, 2016 17:27
            
              
                Factorio mods
              
          
    Quality of life:

https://mods.factorio.com/mods/Choumiko/RailTanker
https://mods.factorio.com/mods/Narc/EvoGUI
https://mods.factorio.com/mods/Narc/YARM
https://mods.factorio.com/mods/Choumiko/Foreman
https://mods.factorio.com/mods/trold/Bottleneck
https://mods.factorio.com/mods/GotLag/Flow%20Control
https://mods.factorio.com/mods/Supercheese/Squeak%20Through
https://mods.factorio.com/mods/Neomore/Advanced-Electric
https://mods.factorio.com/mods/binbinhfr/WaterWell


## BUILD
package(default_visibility = ["//visibility:public"])

cc_library(
    name = "a",
    srcs = ["a.cc"],
    hdrs = ["a.h"],
)

cc_library(
    name = "b",

## SIMD.td
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,

## endtoend_hal_ops.mlir
*** IR Dump After Canonicalizer ***


module @hal_usage {
  func @hloElementwiseOps(%arg0: tensor<4xf32>) -> tensor<4xf32> attributes {iree.module.export} {
    %0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
    %1 = xla_hlo.sub %0, %arg0 : tensor<4xf32>
    %2 = xla_hlo.mul %1, %arg0 : tensor<4xf32>
    return %2 : tensor<4xf32>
  }

## spirv_executable.mlir
hal.executable @simpleMath_ex_dispatch_0 {
  hal.interface @legacy_io {
    hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
    hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write|Discard"
  }
  hal.executable.entry_point @simpleMath_rgn_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (tensor<4xf32>) -> tensor<4xf32>, workgroup_size = dense<[32, 1, 1]> : vector<3xi32>}
  hal.executable.binary attributes {data = dense<"0x0800000053505645A4FFFFFFA00600007000000004000000EEFFFFFF04000000010000000C000000000006000800040006000000040000000200000034000000100000000C001400040008000C0010000C000000010000000700000001000000200000000C0010000000040008000C000C0000000700000001000000200000008A010000030223070000010016000000390000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000E00030000000000010000000F000C00050000000F00000073696D706C654D6174685F72676E5F64697370617463685F

## HALDeviceSwitch.td
def HAL_DeviceSwitchOp : HAL_Op<"device.switch"> {
  let summary = [{runtime device switch pseudo op}];
  let description = [{
    Switches between multiple regions based on the runtime device type.
    The provided regions are pattern-matched against the runtime backend of the
    given device and executed only when the device matches.

    As the patterns can match on wildcards this enables conditions that have
    similar bodies to be folded. The patterns themselves are only matched once
    at startup and then the results are cached; the runtime overhead is

## 0-input.mlir
module {
  func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> attributes {iree.module.export} {
    %0 = xla_hlo.multiply %arg0, %arg1 {name = "mul.1"} : tensor<4xf32>
    return %0 : tensor<4xf32>
  }
}

## 0-input.mlir
module attributes {tf.versions = {bad_consumers = [], min_consumer = 12 : i32, producer = 370 : i32}} {
  flow.variable @"__iree_flow___sm_node186__m.layer-2.kernel" dense<1.200000e+00> : tensor<7x7x3x64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node187__m.layer-2.bias" dense<0.000000e+00> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node193__m.layer-3.gamma" dense<1.000000e+00> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node194__m.layer-3.beta" dense<0.000000e+00> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node195__m.layer-3.moving_mean" dense<0.000000e+00> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node196__m.layer-3.moving_variance" dense<1.000000e+00> : tensor<64xf32> attributes {sym_visibility = "private"}
  flow.variable @"__iree_flow___sm_node213__m.layer-7.kernel" dense<1.2

## unidirectional_lstm vulkan error
          %25 = xla_hlo.minimum %23, %24 : tensor<1x10xf32>
          %26 = "xla_hlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
          %27 = xla_hlo.maximum %25, %26 : tensor<1x10xf32>
          %28 = "xla_hlo.slice"(%8) {limit_indices = dense<[1, 40]> : tensor<2xi64>, start_indices = dense<[0, 30]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<1x40xf32>) -> tensor<1x10xf32>
          %29 = xla_hlo.multiply %6, %28 : tensor<1x10xf32>
          %30 = "xla_hlo.tanh"(%29) : (tensor<1x10xf32>) -> tensor<1x10xf32>
          %31 = xla_hlo.multiply %6, %30 : tensor<1x10xf32>
          %32 = xla_hlo.add %6, %31 : tensor<1x10xf32>
          %33 = "xla_hlo.tanh"(%27) : (tensor<1x10xf32>) -> tensor<1x10xf32>
          %34 = xla_hlo.multiply %32, %33 : tensor<1x10xf32>
	/* 0.0 */ exec
	/* 3 */ vfetch_full r1.xyz1, r0.x, vf95, DataFormat=FMT_32_32_32_FLOAT, Stride=7, Signed=true, NumFormat=integer, PrefetchCount=7
	/* 4 */ vfetch_mini r0, Offset=3, DataFormat=FMT_32_32_32_32_FLOAT, Signed=true, NumFormat=integer
	/* 0.1 */ alloc interpolators
	/* 1.0 */ exec
	/* 5 */ max o0, r0, r0
	/* 1.1 */ alloc position
	/* 2.0 */ exec
	/* 6 */ max oPos, r1, r1
	/* 2.1 */ exece
	package(default_visibility = ["//visibility:public"])

	cc_library(
	name = "a",
	srcs = ["a.cc"],
	hdrs = ["a.h"],
	)

	cc_library(
	name = "b",
	// Copyright 2019 Google LLC
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	* IR Dump After Canonicalizer *


	module @hal_usage {
	func @hloElementwiseOps(%arg0: tensor<4xf32>) -> tensor<4xf32> attributes {iree.module.export} {
	%0 = xla_hlo.add %arg0, %arg0 : tensor<4xf32>
	%1 = xla_hlo.sub %0, %arg0 : tensor<4xf32>
	%2 = xla_hlo.mul %1, %arg0 : tensor<4xf32>
	return %2 : tensor<4xf32>
	}
	hal.executable @simpleMath_ex_dispatch_0 {
	hal.interface @legacy_io {
	hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
	hal.interface.binding @ret0, set=0, binding=1, type="StorageBuffer", access="Write\|Discard"
	}
	hal.executable.entry_point @simpleMath_rgn_dispatch_0 attributes {interface = @legacy_io, ordinal = 0 : i32, signature = (tensor<4xf32>) -> tensor<4xf32>, workgroup_size = dense<[32, 1, 1]> : vector<3xi32>}
	hal.executable.binary attributes {data = dense<"0x0800000053505645A4FFFFFFA00600007000000004000000EEFFFFFF04000000010000000C000000000006000800040006000000040000000200000034000000100000000C001400040008000C0010000C000000010000000700000001000000200000000C0010000000040008000C000C0000000700000001000000200000008A010000030223070000010016000000390000000000000011000200010000000A000B005350565F4B48525F73746F726167655F6275666665725F73746F726167655F636C617373000000000E00030000000000010000000F000C00050000000F00000073696D706C654D6174685F72676E5F64697370617463685F
	def HAL_DeviceSwitchOp : HAL_Op<"device.switch"> {
	let summary = [{runtime device switch pseudo op}];
	let description = [{
	Switches between multiple regions based on the runtime device type.
	The provided regions are pattern-matched against the runtime backend of the
	given device and executed only when the device matches.

	As the patterns can match on wildcards this enables conditions that have
	similar bodies to be folded. The patterns themselves are only matched once
	at startup and then the results are cached; the runtime overhead is
	module {
	func @simple_mul(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>) -> tensor<4xf32> attributes {iree.module.export} {
	%0 = xla_hlo.multiply %arg0, %arg1 {name = "mul.1"} : tensor<4xf32>
	return %0 : tensor<4xf32>
	}
	}
	module attributes {tf.versions = {bad_consumers = [], min_consumer = 12 : i32, producer = 370 : i32}} {
	flow.variable @"__iree_flow___sm_node186__m.layer-2.kernel" dense<1.200000e+00> : tensor<7x7x3x64xf32> attributes {sym_visibility = "private"}
	flow.variable @"__iree_flow___sm_node187__m.layer-2.bias" dense<0.000000e+00> : tensor<64xf32> attributes {sym_visibility = "private"}
	flow.variable @"__iree_flow___sm_node193__m.layer-3.gamma" dense<1.000000e+00> : tensor<64xf32> attributes {sym_visibility = "private"}
	flow.variable @"__iree_flow___sm_node194__m.layer-3.beta" dense<0.000000e+00> : tensor<64xf32> attributes {sym_visibility = "private"}
	flow.variable @"__iree_flow___sm_node195__m.layer-3.moving_mean" dense<0.000000e+00> : tensor<64xf32> attributes {sym_visibility = "private"}
	flow.variable @"__iree_flow___sm_node196__m.layer-3.moving_variance" dense<1.000000e+00> : tensor<64xf32> attributes {sym_visibility = "private"}
	flow.variable @"__iree_flow___sm_node213__m.layer-7.kernel" dense<1.2
	%25 = xla_hlo.minimum %23, %24 : tensor<1x10xf32>
	%26 = "xla_hlo.broadcast_in_dim"(%cst_0) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<f32>) -> tensor<1x10xf32>
	%27 = xla_hlo.maximum %25, %26 : tensor<1x10xf32>
	%28 = "xla_hlo.slice"(%8) {limit_indices = dense<[1, 40]> : tensor<2xi64>, start_indices = dense<[0, 30]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<1x40xf32>) -> tensor<1x10xf32>
	%29 = xla_hlo.multiply %6, %28 : tensor<1x10xf32>
	%30 = "xla_hlo.tanh"(%29) : (tensor<1x10xf32>) -> tensor<1x10xf32>
	%31 = xla_hlo.multiply %6, %30 : tensor<1x10xf32>
	%32 = xla_hlo.add %6, %31 : tensor<1x10xf32>
	%33 = "xla_hlo.tanh"(%27) : (tensor<1x10xf32>) -> tensor<1x10xf32>
	%34 = xla_hlo.multiply %32, %33 : tensor<1x10xf32>