This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
strict digraph { | |
"0 /nncf_model_input_0" [id=0, type=nncf_model_input]; | |
"1 embedding.weight" [id=1, type=nncf_model_const]; | |
"2 ThreeLinearModel/Embedding[embedding]/embedding_0" [id=2, type=embedding]; | |
"3 linear1.weight" [id=3, type=nncf_model_const, style=filled, fillcolor="#dddddd"]; | |
"4 linear1.bias" [id=4, type=nncf_model_const, style=filled, fillcolor="#dddddd"]; | |
"5 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/abs_0" [id=5, type=abs, style=filled, fillcolor="#dddddd"]; | |
"6 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/le_0" [id=6, type=le, style=filled, fillcolor="#dddddd"]; | |
"7 ThreeLinearModel/Linear[linear1]/ActivationsSparsifier/masked_fill_0" [id=7, type=masked_fill, style=filled, fillcolor="#dddddd"]; | |
"8 ThreeLinearModel/Linear[linear1]/linear_0" [id=8, type=linear, style=filled, fillcolor="#dddddd"]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import copy | |
import os | |
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' | |
from functools import partial | |
from pathlib import Path | |
import json | |
from dataclasses import dataclass | |
import typing | |
import torch |
- OV_MODELS or TORCH_MODELS can be a model_id on huggingface or a local folder
- For text tasks, there is a patch to ensure the sequence length is fixed. Can be removed if the model accepts arbitrary shape
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# %% | |
import torch.utils.benchmark as benchmark | |
import torch.nn as nn | |
from dataclasses import dataclass | |
import torch | |
import os | |
os.environ['CUDA_VISIBLE_DEVICES'] = '0' | |
class FeedForward(nn.Module): | |
def __init__(self, input_dim, intermediate_size): |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' | |
from optimum.intel.openvino import OVModelForImageClassification | |
import pandas as pd | |
import numpy as np | |
import datasets | |
import evaluate # Use pip install git+https://github.com/huggingface/evaluate.git | |
from evaluate import evaluator |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' | |
from collections import defaultdict | |
import time | |
import numpy as np | |
import pandas as pd | |
from multiprocessing import Pool |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from contextlib import contextmanager | |
from unittest.mock import patch | |
from optimum.intel.openvino import OVModelForQuestionAnswering | |
import pandas as pd | |
import datasets | |
import evaluate | |
from evaluate import evaluator | |
from transformers import AutoTokenizer, pipeline, AutoModelForQuestionAnswering |
This model conducts unstructured magnitude pruning, quantization and distillation at the same time on BERT-base when finetuning on the GLUE SST2 dataset. It achieves the following results on the evaluation set:
- Torch accuracy: 0.9128
- OpenVINO IR accuracy: 0.9128
- Sparsity in transformer block linear layers: 0.80
NewerOlder