Skip to content

Instantly share code, notes, and snippets.

View quinsulon's full-sized avatar

Dr. Q quinsulon

  • Philadelphia, PA and Washington, DC
View GitHub Profile
@aditya-malte
aditya-malte / smallberta_pretraining.ipynb
Created February 22, 2020 13:41
smallBERTa_Pretraining.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer
class TransformerModel(nn.Module):
def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5):
@thomwolf
thomwolf / loading_wikipedia.py
Last active January 18, 2024 14:04
Load full English Wikipedia dataset in HuggingFace nlp library
import os; import psutil; import timeit
from datasets import load_dataset
mem_before = psutil.Process(os.getpid()).memory_info().rss >> 20
wiki = load_dataset("wikipedia", "20200501.en", split='train')
mem_after = psutil.Process(os.getpid()).memory_info().rss >> 20
print(f"RAM memory used: {(mem_after - mem_before)} MB")
s = """batch_size = 1000
for i in range(0, len(wiki), batch_size):