郑启航 zhen8838

## mod_build_stage.py
# from tvm.script import ir as I
# from tvm.script import tir as T
# from tvm.script import relax as R

@I.ir_module
class Module:
    @T.prim_func(private=True)
    def cast(var_A: T.handle, var_compute: T.handle):
        T.func_attr({"tir.is_scheduled": 1, "tir.noalias": T.bool(True)})
        n = T.int32()

## mod_depoly.py
# from tvm.script import ir as I
# from tvm.script import tir as T
# from tvm.script import relax as R

@I.ir_module
class Module:
    @T.prim_func(private=True)
    def cast(var_A: T.handle, var_compute: T.handle):
        T.func_attr({"tir.noalias": T.bool(True)})
        n = T.int64()

## mod_after_get_model.py
# from tvm.script import ir as I
# from tvm.script import tir as T
# from tvm.script import relax as R

@I.ir_module
class Module:
    @T.prim_func(private=True)
    def extend_te(var_A: T.handle, var_concat_te: T.handle):
        T.func_attr({"tir.noalias": T.bool(True)})
        n = T.int64()

## codegen.cpp
Saved variable root of type int32
Saved variable i of type int32
Saved variable j of type int32
Saved variable c of type int32
Saved variable t0 of type int32
[/Users/lisa/Documents/tiramisu/src/tiramisu_core.cpp:7183 computation]
|   Constructing an unscheduled computation.
|   Creating computation input
|   Constructed iteration domain: {input[i, j, c] : 0<=i<100 and 0<=j<200 and 0<=c<3}
|   [/Users/lisa/Documents/tiramisu/src/tiramisu_core.cpp:6972 init_computation]

## computation.cpp
Saved variable t4 of type int32
[src/tiramisu_core.cpp:7196 computation]
|   Constructing a scheduled computation.
|   Creating computation bx
|   Constructed iteration domain: {bx[i, j, c] : 0<=i<98 and 0<=j<198 and 0<=c<3}
|   [src/tiramisu_core.cpp:6985 init_computation]
|   |   Constructing the computation: {bx[i, j, c] : 0<=i<98 and 0<=j<198 and 0<=c<3}
|   |   [src/tiramisu_core.cpp:2794 name_unnamed_iteration_domain_dimensions]
|   |   |   named unnameed iteration domain: { bx[i, j, c] : 0 <= i <= 97 and 0 <= j <= 197 and 0 <= c <= 2 }
|   |   Constructing the computation name: bx

## mesh_backward.py
node_dict = {
    "VECTOR_OUT_0": "VECTOR_OUT_0 [label = \"VECTOR_OUT_0\";shape = record;];",
    "CONST1_0": "CONST1_0 [label = \"CONST1\";shape = record;];",
    "CONST1_1": "CONST1_1 [label = \"CONST2\";shape = record;];",
    "CONST2_0": "CONST2_0 [label = \"CONST3\";shape = record;];",
    "CONST2_1": "CONST2_1 [label = \"CONST4\";shape = record;];",
    "ADDSUB0": "ADDSUB0 [label = \"ADDSUB0 | <0> | <1> \";shape = record;];",
    "ADDSUB1": "ADDSUB1 [label = \"ADDSUB1 | <0> | <1> \";shape = record;];",
    "ADDSUB2": "ADDSUB2 [label = \"ADDSUB2 | <0> | <1> \";shape = record;];",
    "ADDSUB3": "ADDSUB3 [label = \"ADDSUB3 | <0> | <1> \";shape = record;];",

## get_rsp.c
#include <stdio.h>
void get_rsp(unsigned long *value)
{
    __asm__("ldr     x8, [sp, #8]");
    __asm__("mov     x9, sp");
    __asm__("str     x9, [x8]");
}

int main()
{

## bounds_infer_conv2d.py
from torch import nn
import torch
groups = 2  # group 指的是把输入channel 和输出channel都分这么多组. 然后每一组内部还是和普通卷积一样的.
conv2d = nn.Conv2d(in_channels=6, out_channels=10, kernel_size=3, groups=2, bias=True)


w = conv2d.weight  # [oc, ic/groups, kh, kw]

input = torch.rand(1, 6, 6, 6)
in_shape = input.shape

## UnitTestDLLCall.cs
using Xunit;
using System;
using System.IO;
using Nncase.IR;
using System.Diagnostics;
using System.Runtime.InteropServices;
using System.Linq;
using System.Linq.Expressions;
using System.Reflection;
using System.Reflection.Emit;

## draw_focal_loss_value.py
def focal_sigmoid_cross_entropy_with_logits(labels: tf.Tensor,
                                            logits: tf.Tensor,
                                            gamma: float = 2.0,
                                            alpha: float = 0.25):
  pred_sigmoid = tf.nn.sigmoid(logits)
  pt = (1 - pred_sigmoid) * labels + pred_sigmoid * (1 - labels)
  focal_weight = (alpha * labels + (1 - alpha) * (1 - labels)) * tf.math.pow(pt, gamma)
  loss = tf.nn.sigmoid_cross_entropy_with_logits(labels, logits) * focal_weight
  return loss
	# from tvm.script import ir as I
	# from tvm.script import tir as T
	# from tvm.script import relax as R

	@I.ir_module
	class Module:
	@T.prim_func(private=True)
	def cast(var_A: T.handle, var_compute: T.handle):
	T.func_attr({"tir.is_scheduled": 1, "tir.noalias": T.bool(True)})
	n = T.int32()
	Saved variable root of type int32
	Saved variable i of type int32
	Saved variable j of type int32
	Saved variable c of type int32
	Saved variable t0 of type int32
	[/Users/lisa/Documents/tiramisu/src/tiramisu_core.cpp:7183 computation]
	\| Constructing an unscheduled computation.
	\| Creating computation input
	\| Constructed iteration domain: {input[i, j, c] : 0<=i<100 and 0<=j<200 and 0<=c<3}
	\| [/Users/lisa/Documents/tiramisu/src/tiramisu_core.cpp:6972 init_computation]
	Saved variable t4 of type int32
	[src/tiramisu_core.cpp:7196 computation]
	\| Constructing a scheduled computation.
	\| Creating computation bx
	\| Constructed iteration domain: {bx[i, j, c] : 0<=i<98 and 0<=j<198 and 0<=c<3}
	\| [src/tiramisu_core.cpp:6985 init_computation]
	\| \| Constructing the computation: {bx[i, j, c] : 0<=i<98 and 0<=j<198 and 0<=c<3}
	\| \| [src/tiramisu_core.cpp:2794 name_unnamed_iteration_domain_dimensions]
	\| \| \| named unnameed iteration domain: { bx[i, j, c] : 0 <= i <= 97 and 0 <= j <= 197 and 0 <= c <= 2 }
	\| \| Constructing the computation name: bx
	node_dict = {
	"VECTOR_OUT_0": "VECTOR_OUT_0 [label = \"VECTOR_OUT_0\";shape = record;];",
	"CONST1_0": "CONST1_0 [label = \"CONST1\";shape = record;];",
	"CONST1_1": "CONST1_1 [label = \"CONST2\";shape = record;];",
	"CONST2_0": "CONST2_0 [label = \"CONST3\";shape = record;];",
	"CONST2_1": "CONST2_1 [label = \"CONST4\";shape = record;];",
	"ADDSUB0": "ADDSUB0 [label = \"ADDSUB0 \| <0> \| <1> \";shape = record;];",
	"ADDSUB1": "ADDSUB1 [label = \"ADDSUB1 \| <0> \| <1> \";shape = record;];",
	"ADDSUB2": "ADDSUB2 [label = \"ADDSUB2 \| <0> \| <1> \";shape = record;];",
	"ADDSUB3": "ADDSUB3 [label = \"ADDSUB3 \| <0> \| <1> \";shape = record;];",
	#include <stdio.h>
	void get_rsp(unsigned long *value)
	{
	__asm__("ldr x8, [sp, #8]");
	__asm__("mov x9, sp");
	__asm__("str x9, [x8]");
	}

	int main()
	{
	from torch import nn
	import torch
	groups = 2 # group 指的是把输入channel 和输出channel都分这么多组. 然后每一组内部还是和普通卷积一样的.
	conv2d = nn.Conv2d(in_channels=6, out_channels=10, kernel_size=3, groups=2, bias=True)


	w = conv2d.weight # [oc, ic/groups, kh, kw]

	input = torch.rand(1, 6, 6, 6)
	in_shape = input.shape
	using Xunit;
	using System;
	using System.IO;
	using Nncase.IR;
	using System.Diagnostics;
	using System.Runtime.InteropServices;
	using System.Linq;
	using System.Linq.Expressions;
	using System.Reflection;
	using System.Reflection.Emit;
	def focal_sigmoid_cross_entropy_with_logits(labels: tf.Tensor,
	logits: tf.Tensor,
	gamma: float = 2.0,
	alpha: float = 0.25):
	pred_sigmoid = tf.nn.sigmoid(logits)
	pt = (1 - pred_sigmoid) * labels + pred_sigmoid * (1 - labels)
	focal_weight = (alpha * labels + (1 - alpha) * (1 - labels)) * tf.math.pow(pt, gamma)
	loss = tf.nn.sigmoid_cross_entropy_with_logits(labels, logits) * focal_weight
	return loss