Yi Liu yiliu30

## basic_strategy.py
"""Tests for quantization"""
import numpy as np
import unittest
import shutil
import os
import yaml
import tensorflow as tf

def build_fake_yaml():
    fake_yaml = '''

## inc_distributed_tuning.py

from typing import List

class Adaptor:
    def __init__(self) -> None:
        pass

    def evaluate(self, model):
        pass


## model-level.py
from collections import deque
import time
import os
import subprocess

SLEEP_TIME = 0.5


class Scheduler:

## ipex-sq-ut.py


import torch
class M(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = torch.nn.Conv2d(3, 1, 1)
        self.linear = torch.nn.Linear(224 * 224, 5)

    def forward(self, x):

## block-fallback.py
# branch yuwenzho/recipe
block_list = self.capability.get('block_info', [])

class BlockFallbackTuningSampler(TuningSampler):
    def __init__(self):
        pass

    def __iter__(self):
        for op_list in block_list:
            fallback all ops in op_list

## launcher.py
# Copyright (c) 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,

## ns_blog.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                yiliu30
                / ns_blog.md
            
            
              Created
              June 14, 2023 03:27
            
          
    Neural Solution: Bring the Optimization Power of Intel® Neural Compressor as a Service

Neural Solution, Intel Neural Compressor, Distributed Tuning Deep Learning, Quantization
In today's fast-paced world of deep learning, model compression techniques play a crucial role in enhancing efficiency and reducing computational resources. Intel® Neural Compressor (INC) is a cutting-edge tool that offers a wide range of popular model compression techniques, including quantization, pruning, distillation, and neural architecture search on mainstream frameworks. It supports a wide range of Intel hardware and has been extensively tested. The tool validates thousands of models from popular models by leveraging zero-code optimization solution Neural Coder and automatic accuracy-driven quantization strategies.
In this blog, we are happy

  
## ds_z3.py

import torch
from torch.nn import Module, Parameter
from typing import Deque, Set

module = torch.nn.Linear(3, 3)


class PartitionedParameterCoordinator:
    def __all_gather_params(self, params: Set[Parameter], forward: bool, quantize: bool = False) -> None:

## note.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                yiliu30
                / note.md
            
            
              Created
              January 23, 2024 05:52
            
              
                Note for auto complete the Tying Hints in VSCode
              
          
    1. Plugs


Python
pylance

2. setting

add .vscode/setting.json into current workspace.
{
    "python.languageServer": "Default",
 "python.analysis.inlayHints.callArgumentNames": "off",

  
## quant_bert.py
from transformers import AutoModel, AutoTokenizer
import torch

import copy
from torch.ao.quantization.quantize_pt2e import prepare_pt2e, convert_pt2e
import torch.ao.quantization.quantizer.x86_inductor_quantizer as xiq
from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
from torch._export import capture_pre_autograd_graph

model_name_or_path = "bert-base-uncased"
	"""Tests for quantization"""
	import numpy as np
	import unittest
	import shutil
	import os
	import yaml
	import tensorflow as tf

	def build_fake_yaml():
	fake_yaml = '''

	from typing import List

	class Adaptor:
	def __init__(self) -> None:
	pass

	def evaluate(self, model):
	pass
	from collections import deque
	import time
	import os
	import subprocess

	SLEEP_TIME = 0.5



	class Scheduler:


	import torch
	class M(torch.nn.Module):
	def __init__(self):
	super().__init__()
	self.conv = torch.nn.Conv2d(3, 1, 1)
	self.linear = torch.nn.Linear(224 * 224, 5)

	def forward(self, x):
	# branch yuwenzho/recipe
	block_list = self.capability.get('block_info', [])

	class BlockFallbackTuningSampler(TuningSampler):
	def __init__(self):
	pass

	def __iter__(self):
	for op_list in block_list:
	fallback all ops in op_list
	# Copyright (c) 2023 Intel Corporation
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,

	import torch
	from torch.nn import Module, Parameter
	from typing import Deque, Set

	module = torch.nn.Linear(3, 3)


	class PartitionedParameterCoordinator:
	def __all_gather_params(self, params: Set[Parameter], forward: bool, quantize: bool = False) -> None:
	from transformers import AutoModel, AutoTokenizer
	import torch

	import copy
	from torch.ao.quantization.quantize_pt2e import prepare_pt2e, convert_pt2e
	import torch.ao.quantization.quantizer.x86_inductor_quantizer as xiq
	from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
	from torch._export import capture_pre_autograd_graph

	model_name_or_path = "bert-base-uncased"