Skip to content

Instantly share code, notes, and snippets.

@twmht
twmht / train.py
Created November 28, 2023 09:29
training code for stanfordCars dataset
# inspired from https://www.kaggle.com/code/deepbear/pytorch-car-classifier-90-accuracy
# the dataset is downloaded from https://github.com/pytorch/vision/issues/7545
import torchvision
import torch
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.models as models
import time
import torch.optim as optim
@twmht
twmht / optimize.py
Last active October 20, 2023 11:28
# from tvm.contrib.torch import optimize_torch
import tvm.tir.tensor_intrin
import contextlib
import tempfile
import tvm
import onnx
from tvm import meta_schedule as ms
from tvm import relay
def get_network(weight, batch_size, layout="NHWC", dtype="float32", use_sparse=False):
[20:42:10] /home/acer/tvm/src/relay/transforms/convert_layout.cc:99: Warning: Desired layout(s) not specified for op: nn.max_pool2d
[20:42:10] /home/acer/tvm/src/relay/transforms/convert_layout.cc:99: Warning: Desired layout(s) not specified for op: nn.global_avg_pool2d
[20:42:14] /home/acer/tvm/src/relay/transforms/to_mixed_precision.cc:528: Warning: Op "layout_transform" not registered FTVMMixedPrecisionConversionType appears 2 times in graph.
[20:42:20] /home/acer/tvm/src/runtime/contrib/cudnn/conv_forward.cc:135: CUDNN Found 8 fwd algorithms, choosing CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
[20:42:20] /home/acer/tvm/src/runtime/contrib/cudnn/conv_forward.cc:138: 0) CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM - time: 1.27395 ms, Memory: 19071503
[20:42:20] /home/acer/tvm/src/runtime/contrib/cudnn/conv_forward.cc:138: 1) CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM - time: 1.27405 ms, Memory: 19071503
[20:42:20] /home/acer/tvm/src/runtime/contrib/cudnn/conv_forward.cc:138: 2) CUDNN_
[10:15:26] /home/acer/tvm/src/relay/transforms/convert_layout.cc:99: Warning: Desired layout(s) not specified for op: nn.max_pool2d
[10:15:27] /home/acer/tvm/src/relay/transforms/convert_layout.cc:99: Warning: Desired layout(s) not specified for op: nn.global_avg_pool2d
[10:15:35] /home/acer/tvm/src/relay/transforms/to_mixed_precision.cc:491: Warning: Op "layout_transform" not registered FTVMMixedPrecisionConversionType appears 2 times in graph.
2023-08-01 10:15:50 [INFO] Logging directory: /home/acer/test_meta_tensorcore_vgg16/logs
2023-08-01 10:15:51 [INFO] LocalBuilder: max_workers = 6
2023-08-01 10:15:51 [INFO] LocalRunner: max_workers = 1
2023-08-01 10:15:51 [INFO] [task_scheduler.cc:159] Initializing Task #0: "fused_nn_conv2d_add"
Traceback (most recent call last):
File "test_meta_scheduler.py", line 79, in <module>
database = ms.relay_integration.tune_relay(
[14:23:22] /home/acer/tvm/src/relay/transforms/convert_layout.cc:99: Warning: Desired layout(s) not specified for op: nn.max_pool2d
[14:23:22] /home/acer/tvm/src/relay/transforms/convert_layout.cc:99: Warning: Desired layout(s) not specified for op: nn.global_avg_pool2d
[14:23:31] /home/acer/tvm/src/relay/transforms/to_mixed_precision.cc:491: Warning: Op "layout_transform" not registered FTVMMixedPrecisionConversionType appears 2 times in graph.
2023-07-28 14:23:49 [INFO] Logging directory: /tmp/tmp_vfiw0j9/logs
2023-07-28 14:23:49 [INFO] LocalBuilder: max_workers = 6
2023-07-28 14:23:49 [INFO] LocalRunner: max_workers = 1
2023-07-28 14:23:49 [INFO] [task_scheduler.cc:159] Initializing Task #0: "fused_nn_conv2d_add"
Traceback (most recent call last):
File "test_meta_scheduler.py", line 72, in <module>
database = ms.relay_integration.tune_relay(
import torch
import torchvision.models as models
import torch.onnx as onnx
# 載入 ResNet-18 模型
model = models.resnet50(pretrained=True)
# 將模型設定為評估模式
model.eval()
@twmht
twmht / Dockerfile.forge
Last active March 17, 2023 07:21
Dockerfile for cuda11.3+ubuntu20.04
From nvidia/cuda:11.3.0-runtime-ubuntu20.04
RUN apt-get update
RUN DEBIAN_FRONTEND=noninteractive apt-get install software-properties-common openssh-server curl sudo git -y
RUN apt-get install build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev -y
RUN add-apt-repository ppa:neovim-ppa/stable && apt-get update
RUN apt-get install neovim -y
RUN apt-get install tmux -y
# RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash && . ~/.nvm/nvm.sh && nvm install node && nvm alias default node
RUN curl -sSL install-node.vercel.app/lts | bash -s -- -y
_base_ = [
'mmcls::_base_/datasets/cifar10_bs16.py',
'mmcls::_base_/schedules/cifar10_bs128.py',
'mmcls::_base_/default_runtime.py'
]
architecture = dict(
type='mmcls.ImageClassifier',
backbone=dict(
type='mmcls.ResNet_CIFAR',
@twmht
twmht / index.html
Created April 2, 2014 14:27
Simple ajax with python
<!DOCTYPE html>
<html>
<head>
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.11.0/jquery.min.js">
</script>
<script>
function ProcessSimpleCgi()
{
param1Data = $("#param1").val();
param2Data = $("#param2").val();
@twmht
twmht / reproduce1.py
Created November 17, 2021 05:49
reproduce cupy runtime error with pytorch
import torch
import cupy as cp
import numpy as np
def fp16_clamp(x, min=None, max=None):
if not x.is_cuda and x.dtype == torch.float16:
# clamp for cpu float16, tensor fp16 has no clamp implementation
return x.float().clamp(min, max).half()
return x.clamp(min, max)