Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

View Laurawly's full-sized avatar

Leyuan Wang Laurawly

  • Bytedance
  • United States
View GitHub Profile
import os
import numpy as np
import tvm
from tvm import te, auto_scheduler, topi
from tvm.topi.testing import conv2d_nchw_python
from tvm.contrib import cublas
target = tvm.target.Target('cuda')
M = 8192
#from dlib_alignment import dlib_detect_face, face_recover
import torch
from PIL import Image
import torchvision.transforms as transforms
from models.SRGAN_model import SRGANModel
import numpy as np
import argparse
#import utils
import cv2
import random
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
#include <mma.h>
extern "C" __global__ void default_function_kernel0( int* __restrict__ A, int* __restrict__ B, int* __restrict__ compute) {
nvcuda::wmma::fragment<nvcuda::wmma::accumulator, 8, 8, 32, int> compute_wmma_accumulator[2];
__shared__ int A_shared[512];
__shared__ int B_shared[512];
nvcuda::wmma::fragment<nvcuda::wmma::matrix_a, 8, 8, 32, nvcuda::wmma::experimental::precision::s4, nvcuda::wmma::row_major> A_shared_wmma_matrix_a[2];
nvcuda::wmma::fragment<nvcuda::wmma::matrix_b, 8, 8, 32, nvcuda::wmma::experimental::precision::s4, nvcuda::wmma::col_major> B_shared_wmma_matrix_b[1];
for (int i_c_init = 0; i_c_init < 1; ++i_c_init) {
(void)nvcuda::wmma::fill_fragment(compute_wmma_accumulator[i_c_init], 0.000000e+00f);
}
This file has been truncated, but you can view the full file.
t # 0
69244 276143 1 1
v 0 1
v 1 1
v 2 1
v 3 1
v 4 1
v 5 1
v 6 1
v 7 1
t # 0
5 4 1 1
v 0 1
v 1 1
v 2 1
v 3 1
v 4 1
e 0 1 1
e 0 2 1
e 1 3 1
// attr [get_valid_counts_phase_one.v0] storage_scope = "global"
allocate get_valid_counts_phase_one.v0[int32 * 15360]
// attr [get_valid_counts_phase_one.v1] storage_scope = "global"
allocate get_valid_counts_phase_one.v1[int32 * 15360]
// attr [get_valid_counts_phase_two.v1] storage_scope = "global"
allocate get_valid_counts_phase_two.v1[int32 * 496]
// attr [get_valid_counts_phase_two.v0] storage_scope = "global"
allocate get_valid_counts_phase_two.v0[int32 * 15360]
// attr [get_valid_counts_phase_three] storage_scope = "global"
allocate get_valid_counts_phase_three[int32 * 496]
Traceback (most recent call last):
File "demo.py", line 19, in <module>
classifier = gcv.model_zoo.get_model('resnet50_v1d', pretrained=True, ctx=ctx)
File "/home/laurawly/workspace/install/lib/python3.6/site-packages/gluoncv-0.5.0-py3.6.egg/gluoncv/model_zoo/model_zoo.py", line 241, in get_model
File "/home/laurawly/workspace/install/lib/python3.6/site-packages/gluoncv-0.5.0-py3.6.egg/gluoncv/model_zoo/resnetv1b.py", line 629, in resnet50_v1d
File "/home/laurawly/workspace/install/lib/python3.6/site-packages/mxnet/gluon/block.py", line 402, in load_parameters
params[name]._load_init(loaded[name], ctx)
File "/home/laurawly/workspace/install/lib/python3.6/site-packages/mxnet/gluon/parameter.py", line 271, in _load_init
self._init_impl(data, ctx)
File "/home/laurawly/workspace/install/lib/python3.6/site-packages/mxnet/gluon/parameter.py", line 311, in _init_impl
Traceback (most recent call last):
File "topi/tests/python/test_topi_vision.py", line 437, in <module>
test_get_valid_counts()
File "topi/tests/python/test_topi_vision.py", line 84, in test_get_valid_counts
verify_get_valid_counts((1, 15360, 5), 0)
File "topi/tests/python/test_topi_vision.py", line 74, in verify_get_valid_counts
check_device(device)
File "topi/tests/python/test_topi_vision.py", line 70, in check_device
tvm.testing.assert_allclose(tvm_out1.asnumpy(), np_out1, rtol=1e-3)
File "/home/ubuntu/workplace/tvm/python/tvm/_ffi/ndarray.py", line 283, in asnumpy
def test_sort():
dshape = (1, 500)
data = tvm.placeholder(dshape, name="data")
np_data = np.random.rand(dshape[0], dshape[1]).astype(data.dtype)
print(np_data)
np_result = np.argsort(-np_data)
def check_device(device):
ctx = tvm.context(device, 0)
if not ctx.exist:
print("Skip because %s is not enabled" % device)