Jakub Kuderski kuhar

## generate_flame_graph.sh
echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid

sudo perf record -F 999 -g -- tools/iree-compile --iree-hal-target-backends=rocm --iree-rocm-target-chip=gfx942 stable_diffusion_xl_base_1_0_64_1024x1024_fp16_unet_linalg_nithin.mlir --iree-preprocessing-transform-spec-filename=config-preprocessing.mlir
sudo chown "$USER:$USER" perf.data

pprof -http : perf.data

## google.config
---
Language:        Cpp
# BasedOnStyle:  Google
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignArrayOfStructures: None
AlignConsecutiveAssignments:
  Enabled:         false
  AcrossEmptyLines: false
  AcrossComments:  false

## out.log
// -----// IR Dump After TypePropagation (iree-codegen-type-propagation) //----- //
func.func @_reverse_dim0_dispatch_0() {
  %c0 = arith.constant 0 : index
  %0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x1xf32>>
  %1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
  %2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x1xf32>> -> tensor<2x1xf32>
  %3 = tensor.empty() : tensor<2x1xf32>
  %4 = iree_linalg_ext.reverse dimensions(dense<0> : tensor<1xi64>) ins(%2 : tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) : tensor<2x1xf32>
  flow.dispatch.tensor.store %4, %1, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
  return

## out.log
// -----// IR Dump After TosaToSCF (tosa-to-scf) //----- //
func.func @main() {
  %0 = util.unfoldable_constant dense<1> : tensor<1024x1024xi8>
  %1 = util.unfoldable_constant dense<1> : tensor<1024x1024xi8>
  %c0_i32 = arith.constant 0 : i32
  %c0 = arith.constant 0 : index
  %2 = tensor.empty() : tensor<1024x1024xi32>
  %3 = linalg.fill ins(%c0_i32 : i32) outs(%2 : tensor<1024x1024xi32>) -> tensor<1024x1024xi32>
  %4 = linalg.matmul ins(%0, %1 : tensor<1024x1024xi8>, tensor<1024x1024xi8>) outs(%3 : tensor<1024x1024xi32>) -> tensor<1024x1024xi32>
  check.expect_eq_const(%4, dense<1024> : tensor<1024x1024xi32>) : tensor<1024x1024xi32>

## CMakeLists.txt
cmake_minimum_required(VERSION 2.8.4)
project(ean_logic)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1y -g -DNDEBUG=1")

set(
	SOURCE_FILES example.cpp
	IntervalArithmetic.cpp
	Interval.cpp
	)

## cc_fiexed.cpp
#include <ratio>

template <char... Ts>
struct seq;

template <class Seq>
struct parse_sign
{
	static constexpr auto sign = intmax_t{1};
	using tail = Seq;

## curry.cpp
#include <iostream>
#include <type_traits>
#include <tuple>
using namespace std;


namespace impl
{

	template <typename Functor, typename Tuple, size_t... Index>

## HttpClient.h
#ifndef KL_HTTP_CLIENT_H
#define KL_HTTP_CLIENT_H

#include "cocos2d.h"
#include "cocos-ext.h"
#include "ExtensionMacros.h"
USING_NS_CC;
USING_NS_CC_EXT;

#include "downloader/HttpRequest.h"

## Callback.h
/*
* Callback.h
*
*  Created on: Dec 28, 2013
*      Author: Jakub 'kuhar' Kuderski
*/

#ifndef CALLBACK_H_
#define CALLBACK_H_

## Delegate.h
/*
* Delegate.h
*
*  Created on: Dec 18, 2013
*      Author: Jakub 'kuhar' Kuderski
*/

#ifndef DELEGATE_H
#define DELEGATE_H
	echo 0 \| sudo tee /proc/sys/kernel/perf_event_paranoid

	sudo perf record -F 999 -g -- tools/iree-compile --iree-hal-target-backends=rocm --iree-rocm-target-chip=gfx942 stable_diffusion_xl_base_1_0_64_1024x1024_fp16_unet_linalg_nithin.mlir --iree-preprocessing-transform-spec-filename=config-preprocessing.mlir
	sudo chown "$USER:$USER" perf.data

	pprof -http : perf.data
	---
	Language: Cpp
	# BasedOnStyle: Google
	AccessModifierOffset: -1
	AlignAfterOpenBracket: Align
	AlignArrayOfStructures: None
	AlignConsecutiveAssignments:
	Enabled: false
	AcrossEmptyLines: false
	AcrossComments: false
	// -----// IR Dump After TypePropagation (iree-codegen-type-propagation) //----- //
	func.func @_reverse_dim0_dispatch_0() {
	%c0 = arith.constant 0 : index
	%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x1xf32>>
	%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
	%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x1xf32>> -> tensor<2x1xf32>
	%3 = tensor.empty() : tensor<2x1xf32>
	%4 = iree_linalg_ext.reverse dimensions(dense<0> : tensor<1xi64>) ins(%2 : tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) : tensor<2x1xf32>
	flow.dispatch.tensor.store %4, %1, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
	return
	// -----// IR Dump After TosaToSCF (tosa-to-scf) //----- //
	func.func @main() {
	%0 = util.unfoldable_constant dense<1> : tensor<1024x1024xi8>
	%1 = util.unfoldable_constant dense<1> : tensor<1024x1024xi8>
	%c0_i32 = arith.constant 0 : i32
	%c0 = arith.constant 0 : index
	%2 = tensor.empty() : tensor<1024x1024xi32>
	%3 = linalg.fill ins(%c0_i32 : i32) outs(%2 : tensor<1024x1024xi32>) -> tensor<1024x1024xi32>
	%4 = linalg.matmul ins(%0, %1 : tensor<1024x1024xi8>, tensor<1024x1024xi8>) outs(%3 : tensor<1024x1024xi32>) -> tensor<1024x1024xi32>
	check.expect_eq_const(%4, dense<1024> : tensor<1024x1024xi32>) : tensor<1024x1024xi32>
	cmake_minimum_required(VERSION 2.8.4)
	project(ean_logic)

	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1y -g -DNDEBUG=1")

	set(
	SOURCE_FILES example.cpp
	IntervalArithmetic.cpp
	Interval.cpp
	)
	#include <ratio>

	template <char... Ts>
	struct seq;

	template <class Seq>
	struct parse_sign
	{
	static constexpr auto sign = intmax_t{1};
	using tail = Seq;
	#include <iostream>
	#include <type_traits>
	#include <tuple>
	using namespace std;


	namespace impl
	{

	template <typename Functor, typename Tuple, size_t... Index>
	#ifndef KL_HTTP_CLIENT_H
	#define KL_HTTP_CLIENT_H

	#include "cocos2d.h"
	#include "cocos-ext.h"
	#include "ExtensionMacros.h"
	USING_NS_CC;
	USING_NS_CC_EXT;

	#include "downloader/HttpRequest.h"
	/*
	* Callback.h
	*
	* Created on: Dec 28, 2013
	* Author: Jakub 'kuhar' Kuderski
	*/

	#ifndef CALLBACK_H_
	#define CALLBACK_H_
	/*
	* Delegate.h
	*
	* Created on: Dec 18, 2013
	* Author: Jakub 'kuhar' Kuderski
	*/

	#ifndef DELEGATE_H
	#define DELEGATE_H