This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Tokenizer Performance Comparison Script | |
| A comprehensive tool for evaluating tokenizer efficiency across multiple languages. | |
| Supports HuggingFace datasets, local files, and built-in samples. | |
| Author: https://github.com/ParagEkbote | |
| """ | |
| from typing import Tuple, List, Dict, Optional, Callable, Literal |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from pruna import smash, SmashConfig | |
| # Define per-model Smash + torch.compile configs | |
| models = { | |
| "HuggingFaceTB/SmolLM2-360M": { | |
| "bits": 4, "group_size": 64, | |
| "compiler": "torch_compile", | |
| "torch_compile_mode": "max-autotune", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import torch | |
| import argparse | |
| from datetime import datetime | |
| from tqdm import tqdm | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from pruna import smash, SmashConfig | |
| from pruna.data.pruna_datamodule import PrunaDataModule | |
| from pruna.evaluation.evaluation_agent import EvaluationAgent | |
| from pruna.evaluation.task import Task |