Skip to content

Instantly share code, notes, and snippets.

@Narsil
Last active October 11, 2021 14:30
Show Gist options
  • Save Narsil/de88b2d7c242c29772a61af56a5c8270 to your computer and use it in GitHub Desktop.
Save Narsil/de88b2d7c242c29772a61af56a5c8270 to your computer and use it in GitHub Desktop.
from transformers import pipeline
import random
from torch.utils.data import Dataset
import tqdm
pipe = pipeline("text-classification", device=0)
class MyDataset(Dataset):
def __len__(self):
return 1000
def __getitem__(self, i):
return "This is a test" * random.randrange(1, 30)
dataset = MyDataset()
print("-" * 30)
print("Streaming no batching")
for out in tqdm.tqdm(pipe(dataset)):
pass
print("-" * 30)
print("Streaming batch_size=8")
for out in tqdm.tqdm(pipe(dataset, batch_size=8), total=len(dataset)):
pass
print("-" * 30)
print("Streaming batch_size=64")
for out in tqdm.tqdm(pipe(dataset, batch_size=64), total=len(dataset)):
pass
print("-" * 30)
print("Streaming batch_size=256")
for out in tqdm.tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
pass
print("-" * 30)
print("Streaming batch_size=512")
for out in tqdm.tqdm(pipe(dataset, batch_size=512), total=len(dataset)):
pass
print("-" * 30)
print("Streaming batch_size=1024")
for out in tqdm.tqdm(pipe(dataset, batch_size=1024), total=len(dataset)):
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment