Chi_Liu AmosLewis

## ubuntu22_env_setup.sh
# Ubuntu 22 LTS
sudo apt install htop
sudo apt install curl
sudo apt install wget
sudo apt install git
sudo apt install clang-format
sudo apt install zsh
sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)"

# PYTHON

## 1218_chatglm_forward9-dispatch-tensors-annotation.mlir
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "haswell", cpu_features = "-prfchw,-cldemote,+avx,+aes,+sahf,+pclmul,-xop,+crc32,-xsaves,-avx512fp16,-usermsr,-sm4,+sse4.1,-avx512ifma,+xsave,-avx512pf,+sse4.2,-tsxldtrk,-ptwrite,-widekl,-sm3,+invpcid,+64bit,-xsavec,-avx10.1-512,-avx512vpopcntdq,+cmov,-avx512vp2intersect,-avx512cd,+movbe,-avxvnniint8,-avx512er,-amx-int8,-kl,-avx10.1-256,-sha512,-avxvnni,-rtm,-adx,+avx2,-hreset,-movdiri,-serialize,-vpclmulqdq,-avx512vl,-uintr,-clflushopt,-raoint,-cmpccxadd,+bmi,-amx-tile,+sse,-gfni,-avxvnniint16,-amx-fp16,+xsaveopt,+rdrnd,-avx512f,-amx-bf16,-avx512bf16,-avx512vnni,+cx8,-avx512bw,+sse3,-pku,+fsgsbase,-clzero,-mwaitx,-lwp,+lzcnt,-sha,-movdir64b,-wbnoinvd,-enqcmd,-prefetchwt1,-avxneconvert,-tbm,-pconfig,-amx-complex,+ssse3,+cx16,+bmi2,+fma,+popcnt,-avxifma,+f16c,-avx512bitalg,-rdpru,-clwb,+mmx,+sse2,-rdseed,-avx512vbmi2,-prefetchi,-rdpid,-fma4,-avx512vbmi,-shstk,-vaes,-waitpkg,-sgx,+fxsr,-avx512dq,-sse4a", d

## chatglm_fail_log_dispatch9_1218_with_max_15964.txt
(shark.venv) ➜  SHARK git:(main) ✗ iree-compile chatglm-6b-int4.mlir --iree-input-type=tm_tensor --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=llvm-cpu --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --mlir-pass-pipeline-crash-reproducer=/nodclouddata/chi/src/SHARK/nan/dispatch/2/tmp/core-reproducer.mlir --iree-llvmcpu-target-cpu-features=host --iree-llvmcpu-target-triple=x86_64-linux-gnu --iree-llvmcpu-enable-ukernels --iree-llvmcpu-stack-allocation-limit=256000 --iree-global-opt-enable-quantized-matmul-reassociation --iree-stream-resource-max-allocation-size=4294967295 --iree-vm-bytecode-module-strip-source-map=true --iree-util-zero-fill-elided-attrs --iree-opt-strip-assertions=false --verify=true --iree-flow-break-dispatch=@forward:9 -o /tmp/chatglm9.vmfb
<eval_with_key>.5:38:41: warning: skipping consteval initializer: unsupported type for current jit configuration: 'tensor<4608x64x64xi4>'
<eval_with_key>.5:173:43: warning: skipping consteval initializer:

## Qwen_fail_log_20131214.txt
(shark.venv) ➜  SHARK git:(main) ✗ python nan/qwen_compile.py
shark_tank local cache is located at /home/chi/.local/shark_tank/ . You may change this by setting the --local_tank_cache= flag
tokenizer_config.json: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 174/174 [00:00<00:00, 641kB/s]
tokenization_qwen.py: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.62k/9.62k [00:00<00:00, 36.3MB/s]
A new version of the following files was downloaded from https://huggingface.co/Qwen/Qwen-7B-Chat:
- tokenization_qwen.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
qwen.tiktoken: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2.56M/2.56M [00:00<00:00, 13.8MB/s]
[DEBUG] generating mlir o

## chatglm_fail_1214.txt
(shark.venv) ➜  SHARK git:(main) ✗ python nan/chatglm.py
shark_tank local cache is located at /home/chi/.local/shark_tank/ . You may change this by setting the --local_tank_cache= flag
[DEBUG] generating mlir on device
/nodclouddata/chi/src/SHARK/nan/chatglm.py:103: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  input_ids = torch.tensor(input_ids)
/nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
  torch.utils._pytree._register_pytree_node(
/nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
  torch.ut

## chatglm_dispatch.mlir
(shark.venv) ➜  SHARK git:(main) ✗ iree-compile chatglm-6b-int4.mlir --iree-input-type=tm_tensor --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=llvm-cpu --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --mlir-pass-pipeline-crash-reproducer=/nodclouddata/chi/src/SHARK/nan/dispatch/2/tmp/core-reproducer.mlir --iree-llvmcpu-target-cpu-features=host --iree-llvmcpu-target-triple=x86_64-linux-gnu --iree-llvmcpu-enable-ukernels --iree-llvmcpu-stack-allocation-limit=256000 --iree-global-opt-enable-quantized-matmul-reassociation --iree-stream-resource-max-allocation-size=4294967295 --iree-vm-bytecode-module-strip-source-map=true --iree-util-zero-fill-elided-attrs --iree-opt-strip-assertions=false --verify=true --iree-flow-break-dispatch=@forward:9 --iree-flow-trace-dispatch-tensors -mlir-print-ir-after=iree-flow-annotate-dispatches -mlir-elide-elementsattrs-if-larger=4 -o /tmp/chatglm9.vmfb

// -----// IR Dump After AnnotateDispatches (iree-flow-annotate-dispatches) //--

## module_forward_dispatch_9.mlir
hal.executable public @forward_dispatch_9 {
  hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu = "haswell", cpu_features = "-prfchw,-cldemote,+avx,+aes,+sahf,+pclmul,-xop,+crc32,-xsaves,-avx512fp16,-usermsr,-sm4,+sse4.1,-avx512ifma,+xsave,-avx512pf,+sse4.2,-tsxldtrk,-ptwrite,-widekl,-sm3,+invpcid,+64bit,-xsavec,-avx10.1-512,-avx512vpopcntdq,+cmov,-avx512vp2intersect,-avx512cd,+movbe,-avxvnniint8,-avx512er,-amx-int8,-kl,-avx10.1-256,-sha512,-avxvnni,-rtm,-adx,+avx2,-hreset,-movdiri,-serialize,-vpclmulqdq,-avx512vl,-uintr,-clflushopt,-raoint,-cmpccxadd,+bmi,-amx-tile,+sse,-gfni,-avxvnniint16,-amx-fp16,+xsaveopt,+rdrnd,-avx512f,-amx-bf16,-avx512bf16,-avx512vnni,+cx8,-avx512bw,+sse3,-pku,+fsgsbase,-clzero,-mwaitx,-lwp,+lzcnt,-sha,-movdir64b,-wbnoinvd,-enqcmd,-prefetchwt1,-avxneconvert,-tbm,-pconfig,-amx-complex,+ssse3,+cx16,+bmi2,+fma,+popcnt,-avxifma,+f16c,-avx512bitalg,-rdpru,-clwb,+mmx,+sse2,-rdseed,-avx512vbmi2,-prefetchi,-rdpid,-fma4,-avx512vbmi,-shstk,-vaes,

## iree-run-module--help.txt
(shark.venv) ➜  SHARK git:(main) ✗ iree-run-module --help
# ============================================================================
# 👻 IREE: iree-run-module
# ============================================================================

Runs a function within a compiled IREE module and handles I/O parsing
and optional expected value verification/output processing. Modules
can be provided by file path (`--module=file.vmfb`) or read from stdin
(`--module=-`) and the function to execute matches the original name
provided to the compiler (`--function=foo` for `func.func @foo`).

## llama_torch2linalg.mlir
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map3 = affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>
#map4 = affine_map<(d0, d1, d2) -> (d0, d1, 0)>
#map5 = affine_map<(d0, d1, d2) -> (d2)>
#map6 = affine_map<(d0, d1) -> (d0, d1)>
#map7 = affine_map<(d0, d1) -> (d1, d0)>
#map8 = affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>
#map9 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3)>

## llama_test.mlir
module {
  func.func @main(%arg0: !torch.vtensor<[64],f32>, %arg1: !torch.vtensor<[64],f32>, %arg2: !torch.vtensor<[64],f32>, %arg3: !torch.vtensor<[64],f32>, %arg4: !torch.vtensor<[64],f32>, %arg5: !torch.vtensor<[16,64],f32>, %arg6: !torch.vtensor<[64,64],f32>, %arg7: !torch.vtensor<[64,64],f32>, %arg8: !torch.vtensor<[64,64],f32>, %arg9: !torch.vtensor<[64,64],f32>, %arg10: !torch.vtensor<[256,64],f32>, %arg11: !torch.vtensor<[256,64],f32>, %arg12: !torch.vtensor<[64,256],f32>, %arg13: !torch.vtensor<[64,64],f32>, %arg14: !torch.vtensor<[64,64],f32>, %arg15: !torch.vtensor<[64,64],f32>, %arg16: !torch.vtensor<[64,64],f32>, %arg17: !torch.vtensor<[256,64],f32>, %arg18: !torch.vtensor<[256,64],f32>, %arg19: !torch.vtensor<[64,256],f32>, %arg20: !torch.vtensor<[16,64],f32>, %arg21: !torch.vtensor<[4096,8],complex<f32>>, %arg22: !torch.vtensor<[32,2048,4,16],f32>, %arg23: !torch.vtensor<[32,2048,4,16],f32>, %arg24: !torch.vtensor<[32,2048,4,16],f32>, %arg25: !torch.vtensor<[32,2048,4,16],f32>, %arg26: !torch.v
	# Ubuntu 22 LTS
	sudo apt install htop
	sudo apt install curl
	sudo apt install wget
	sudo apt install git
	sudo apt install clang-format
	sudo apt install zsh
	sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)"

	# PYTHON
	(shark.venv) ➜ SHARK git:(main) ✗ iree-compile chatglm-6b-int4.mlir --iree-input-type=tm_tensor --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=llvm-cpu --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --mlir-pass-pipeline-crash-reproducer=/nodclouddata/chi/src/SHARK/nan/dispatch/2/tmp/core-reproducer.mlir --iree-llvmcpu-target-cpu-features=host --iree-llvmcpu-target-triple=x86_64-linux-gnu --iree-llvmcpu-enable-ukernels --iree-llvmcpu-stack-allocation-limit=256000 --iree-global-opt-enable-quantized-matmul-reassociation --iree-stream-resource-max-allocation-size=4294967295 --iree-vm-bytecode-module-strip-source-map=true --iree-util-zero-fill-elided-attrs --iree-opt-strip-assertions=false --verify=true --iree-flow-break-dispatch=@forward:9 -o /tmp/chatglm9.vmfb
	<eval_with_key>.5:38:41: warning: skipping consteval initializer: unsupported type for current jit configuration: 'tensor<4608x64x64xi4>'
	<eval_with_key>.5:173:43: warning: skipping consteval initializer:
	(shark.venv) ➜ SHARK git:(main) ✗ python nan/qwen_compile.py
	shark_tank local cache is located at /home/chi/.local/shark_tank/ . You may change this by setting the --local_tank_cache= flag
	tokenizer_config.json: 100%\|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████\| 174/174 [00:00<00:00, 641kB/s]
	tokenization_qwen.py: 100%\|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████\| 9.62k/9.62k [00:00<00:00, 36.3MB/s]
	A new version of the following files was downloaded from https://huggingface.co/Qwen/Qwen-7B-Chat:
	- tokenization_qwen.py
	. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
	qwen.tiktoken: 100%\|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████\| 2.56M/2.56M [00:00<00:00, 13.8MB/s]
	[DEBUG] generating mlir o
	(shark.venv) ➜ SHARK git:(main) ✗ python nan/chatglm.py
	shark_tank local cache is located at /home/chi/.local/shark_tank/ . You may change this by setting the --local_tank_cache= flag
	[DEBUG] generating mlir on device
	/nodclouddata/chi/src/SHARK/nan/chatglm.py:103: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
	input_ids = torch.tensor(input_ids)
	/nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
	torch.utils._pytree._register_pytree_node(
	/nodclouddata/chi/src/SHARK/shark.venv/lib/python3.11/site-packages/transformers/utils/generic.py:311: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
	torch.ut
	(shark.venv) ➜ SHARK git:(main) ✗ iree-compile chatglm-6b-int4.mlir --iree-input-type=tm_tensor --iree-vm-bytecode-module-output-format=flatbuffer-binary --iree-hal-target-backends=llvm-cpu --mlir-print-debuginfo --mlir-print-op-on-diagnostic=false --mlir-pass-pipeline-crash-reproducer=/nodclouddata/chi/src/SHARK/nan/dispatch/2/tmp/core-reproducer.mlir --iree-llvmcpu-target-cpu-features=host --iree-llvmcpu-target-triple=x86_64-linux-gnu --iree-llvmcpu-enable-ukernels --iree-llvmcpu-stack-allocation-limit=256000 --iree-global-opt-enable-quantized-matmul-reassociation --iree-stream-resource-max-allocation-size=4294967295 --iree-vm-bytecode-module-strip-source-map=true --iree-util-zero-fill-elided-attrs --iree-opt-strip-assertions=false --verify=true --iree-flow-break-dispatch=@forward:9 --iree-flow-trace-dispatch-tensors -mlir-print-ir-after=iree-flow-annotate-dispatches -mlir-elide-elementsattrs-if-larger=4 -o /tmp/chatglm9.vmfb

	// -----// IR Dump After AnnotateDispatches (iree-flow-annotate-dispatches) //--
	(shark.venv) ➜ SHARK git:(main) ✗ iree-run-module --help
	# ============================================================================
	# 👻 IREE: iree-run-module
	# ============================================================================

	Runs a function within a compiled IREE module and handles I/O parsing
	and optional expected value verification/output processing. Modules
	can be provided by file path (`--module=file.vmfb`) or read from stdin
	(`--module=-`) and the function to execute matches the original name
	provided to the compiler (`--function=foo` for `func.func @foo`).
	#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
	#map1 = affine_map<(d0, d1, d2) -> (d0, d1)>
	#map2 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
	#map3 = affine_map<(d0, d1, d2, d3) -> (0, 0, d2, d3)>
	#map4 = affine_map<(d0, d1, d2) -> (d0, d1, 0)>
	#map5 = affine_map<(d0, d1, d2) -> (d2)>
	#map6 = affine_map<(d0, d1) -> (d0, d1)>
	#map7 = affine_map<(d0, d1) -> (d1, d0)>
	#map8 = affine_map<(d0, d1, d2, d3) -> (0, d1, 0, d3)>
	#map9 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3)>