Skip to content

Instantly share code, notes, and snippets.

View kuhar's full-sized avatar

Jakub Kuderski kuhar

  • AMD (AI Group)
  • Toronto, ON, Canada
  • 05:55 (UTC -04:00)
View GitHub Profile
@kuhar
kuhar / generate_flame_graph.sh
Created March 26, 2024 22:10
Flame graph example
echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
sudo perf record -F 999 -g -- tools/iree-compile --iree-hal-target-backends=rocm --iree-rocm-target-chip=gfx942 stable_diffusion_xl_base_1_0_64_1024x1024_fp16_unet_linalg_nithin.mlir --iree-preprocessing-transform-spec-filename=config-preprocessing.mlir
sudo chown "$USER:$USER" perf.data
pprof -http : perf.data
---
Language: Cpp
# BasedOnStyle: Google
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignArrayOfStructures: None
AlignConsecutiveAssignments:
Enabled: false
AcrossEmptyLines: false
AcrossComments: false
// -----// IR Dump After TypePropagation (iree-codegen-type-propagation) //----- //
func.func @_reverse_dim0_dispatch_0() {
%c0 = arith.constant 0 : index
%0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<2x1xf32>>
%1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) : !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
%2 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<2x1xf32>> -> tensor<2x1xf32>
%3 = tensor.empty() : tensor<2x1xf32>
%4 = iree_linalg_ext.reverse dimensions(dense<0> : tensor<1xi64>) ins(%2 : tensor<2x1xf32>) outs(%3 : tensor<2x1xf32>) : tensor<2x1xf32>
flow.dispatch.tensor.store %4, %1, offsets = [0, 0], sizes = [2, 1], strides = [1, 1] : tensor<2x1xf32> -> !flow.dispatch.tensor<writeonly:tensor<2x1xf32>>
return
This file has been truncated, but you can view the full file.
// -----// IR Dump After TosaToSCF (tosa-to-scf) //----- //
func.func @main() {
%0 = util.unfoldable_constant dense<1> : tensor<1024x1024xi8>
%1 = util.unfoldable_constant dense<1> : tensor<1024x1024xi8>
%c0_i32 = arith.constant 0 : i32
%c0 = arith.constant 0 : index
%2 = tensor.empty() : tensor<1024x1024xi32>
%3 = linalg.fill ins(%c0_i32 : i32) outs(%2 : tensor<1024x1024xi32>) -> tensor<1024x1024xi32>
%4 = linalg.matmul ins(%0, %1 : tensor<1024x1024xi8>, tensor<1024x1024xi8>) outs(%3 : tensor<1024x1024xi32>) -> tensor<1024x1024xi32>
check.expect_eq_const(%4, dense<1024> : tensor<1024x1024xi32>) : tensor<1024x1024xi32>
@kuhar
kuhar / CMakeLists.txt
Last active August 29, 2015 14:18
Interval Arithmetic
cmake_minimum_required(VERSION 2.8.4)
project(ean_logic)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1y -g -DNDEBUG=1")
set(
SOURCE_FILES example.cpp
IntervalArithmetic.cpp
Interval.cpp
)
@kuhar
kuhar / cc_fiexed.cpp
Created January 9, 2015 19:27
compile-time fixed-point
#include <ratio>
template <char... Ts>
struct seq;
template <class Seq>
struct parse_sign
{
static constexpr auto sign = intmax_t{1};
using tail = Seq;
@kuhar
kuhar / curry.cpp
Last active August 29, 2015 14:08
Curry
#include <iostream>
#include <type_traits>
#include <tuple>
using namespace std;
namespace impl
{
template <typename Functor, typename Tuple, size_t... Index>
@kuhar
kuhar / HttpClient.h
Created March 11, 2014 11:06
cocos2d-x 2.x HttpClient - checking file size before downloading
#ifndef KL_HTTP_CLIENT_H
#define KL_HTTP_CLIENT_H
#include "cocos2d.h"
#include "cocos-ext.h"
#include "ExtensionMacros.h"
USING_NS_CC;
USING_NS_CC_EXT;
#include "downloader/HttpRequest.h"
@kuhar
kuhar / Callback.h
Last active January 1, 2016 15:19
Cocos2d-x C++11 Callbacks
/*
* Callback.h
*
* Created on: Dec 28, 2013
* Author: Jakub 'kuhar' Kuderski
*/
#ifndef CALLBACK_H_
#define CALLBACK_H_
@kuhar
kuhar / Delegate.h
Last active January 1, 2016 00:49
C++11 delegates for cocos2d-x
/*
* Delegate.h
*
* Created on: Dec 18, 2013
* Author: Jakub 'kuhar' Kuderski
*/
#ifndef DELEGATE_H
#define DELEGATE_H