JackieWu wkcn

## test_fused_kernels.py
import math

import torch
from torch.nn import LayerNorm

from megatron.enums import AttnMaskType
from megatron.model.fused_layer_norm import MixedFusedLayerNorm
from megatron.model.fused_softmax import FusedScaleMaskSoftmax
from megatron.model.utils import attention_mask_func
from megatron.global_vars import _parse_args

## test_flash_attn.py
import torch
from torch import nn
import numpy as np
from flash_attn.flash_attention import FlashAttention


class Attention(nn.Module):
    use_flash_attn: bool = False

    def __init__(

## synset_words.txt
n01440764 tench, Tinca tinca
n01443537 goldfish, Carassius auratus
n01484850 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
n01491361 tiger shark, Galeocerdo cuvieri
n01494475 hammerhead, hammerhead shark
n01496331 electric ray, crampfish, numbfish, torpedo
n01498041 stingray
n01514668 cock
n01514859 hen
n01518878 ostrich, Struthio camelus

## fp8_gemm.py
import torch
import transformer_engine.pytorch.cpp_extensions as texcpp
from transformer_engine.pytorch.module import get_workspace
import transformer_engine_extensions as tex

scale = 1.0

meta = tex.FP8TensorMeta()
meta.scale = torch.ones(1,dtype=torch.float32, device="cuda") * scale
meta.scale_inv = torch.ones(1, dtype=torch.float32, device="cuda") / scale

## measure_fp8_speed.py
import torch
import transformer_engine.pytorch as te
from transformer_engine.common.recipe import Format, DelayedScaling
import transformer_engine_extensions as tex
import copy
import math
from typing import Callable, Optional

def speedometer(
        module: torch.nn.Module,

## op.h
/*!
*  Copyright (c) 2019 by Contributors
* \file op.h
* \brief definition of all the operators
* \author Chuntao Hong, Xin Li
*/

#ifndef MXNET_CPP_OP_H_
#define MXNET_CPP_OP_H_

## vmware_guard.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                wkcn
                / vmware_guard.md
            
            
              Created
              June 20, 2019 12:50
            
              
                vmware
              
          
    type this in CMD in sequential order:
bcdedit /create {0cb3b571-2f2e-4343-a879-d86a476d7215} /d "DebugTool" /application osloader
bcdedit /set {0cb3b571-2f2e-4343-a879-d86a476d7215} path "\EFI\Microsoft\Boot\SecConfig.efi"
bcdedit /set {bootmgr} bootsequence {0cb3b571-2f2e-4343-a879-d86a476d7215}

  
## custom_op_design.cpp
#include <iostream>
#include <initializer_list>
using namespace std;

typedef union {
  int64_t v_int64;
  double v_float64;
  void* v_handle;
} TValue;

## del_thread.py
import mxnet as mx
from mxnet.base import check_call, _LIB
from multiprocessing.pool import ThreadPool
import time

num_workers = 16

old_deleter = mx.nd.NDArray.__del__
del_pool = ThreadPool(num_workers)

## dataloader.py
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
	import math

	import torch
	from torch.nn import LayerNorm

	from megatron.enums import AttnMaskType
	from megatron.model.fused_layer_norm import MixedFusedLayerNorm
	from megatron.model.fused_softmax import FusedScaleMaskSoftmax
	from megatron.model.utils import attention_mask_func
	from megatron.global_vars import _parse_args
	import torch
	from torch import nn
	import numpy as np
	from flash_attn.flash_attention import FlashAttention


	class Attention(nn.Module):
	use_flash_attn: bool = False

	def __init__(
	n01440764 tench, Tinca tinca
	n01443537 goldfish, Carassius auratus
	n01484850 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
	n01491361 tiger shark, Galeocerdo cuvieri
	n01494475 hammerhead, hammerhead shark
	n01496331 electric ray, crampfish, numbfish, torpedo
	n01498041 stingray
	n01514668 cock
	n01514859 hen
	n01518878 ostrich, Struthio camelus
	import torch
	import transformer_engine.pytorch.cpp_extensions as texcpp
	from transformer_engine.pytorch.module import get_workspace
	import transformer_engine_extensions as tex

	scale = 1.0

	meta = tex.FP8TensorMeta()
	meta.scale = torch.ones(1,dtype=torch.float32, device="cuda") * scale
	meta.scale_inv = torch.ones(1, dtype=torch.float32, device="cuda") / scale
	import torch
	import transformer_engine.pytorch as te
	from transformer_engine.common.recipe import Format, DelayedScaling
	import transformer_engine_extensions as tex
	import copy
	import math
	from typing import Callable, Optional

	def speedometer(
	module: torch.nn.Module,
	/*!
	* Copyright (c) 2019 by Contributors
	* \file op.h
	* \brief definition of all the operators
	* \author Chuntao Hong, Xin Li
	*/

	#ifndef MXNET_CPP_OP_H_
	#define MXNET_CPP_OP_H_
	#include <iostream>
	#include <initializer_list>
	using namespace std;

	typedef union {
	int64_t v_int64;
	double v_float64;
	void* v_handle;
	} TValue;
	import mxnet as mx
	from mxnet.base import check_call, _LIB
	from multiprocessing.pool import ThreadPool
	import time

	num_workers = 16

	old_deleter = mx.nd.NDArray.__del__
	del_pool = ThreadPool(num_workers)
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#