#!/bin/bash | |
# Show username after each process in nvidia-smi | |
# like: | |
# ... | |
# +------------------------------------------------------+ | |
# | Processes: GPU Memory | | |
# | GPU PID Type Process name Usage | | |
# |======================================================| | |
# | 0 150752 C python 830MiB | User: user1 | |
# | 1 2185 C /usr/bin/python 1090MiB | User: user2 |
import torch | |
import torch.nn as nn | |
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence | |
seqs = ['gigantic_string','tiny_str','medium_str'] | |
# make <pad> idx 0 | |
vocab = ['<pad>'] + sorted(set(''.join(seqs))) | |
# make model |
#!/bin/bash | |
# this script installs GCC 5.4.0 | |
# to use it navigate to your home directory and type: | |
# sh install-gcc-5.4.0.sh | |
# download and install gcc 4.9.3 | |
wget https://github.com/gcc-mirror/gcc/archive/gcc-5_4_0-release.tar.gz | |
tar xzf gcc-5_4_0-release.tar.gz | |
cd gcc-5_4_0-release |
# Prerequisites | |
# 1. MSVC 2017 C++ Build Tools | |
# 2. CMAKE 3.0 or up | |
# 3. 64 bits of Windows | |
# 4. Anaconda / MiniConda 64 bits | |
# Prerequisites for CUDA | |
# 1. CUDA 8.0 or up | |
# 2. NVTX( in CUDA as Visual Studio Integration. if fail to install, you can extract | |
# the CUDA installer exe and found the NVTX installer under the CUDAVisualStudioIntegration) |
from collections import defaultdict | |
def frequent_rec(patt, mdb): | |
results.append((len(mdb), patt)) | |
occurs = defaultdict(list) | |
for (i, startpos) in mdb: | |
seq = db[i] | |
for j in range(startpos + 1, len(seq)): | |
l = occurs[seq[j]] |
import torch | |
from transformers import BertTokenizer, BertModel, BertForMaskedLM | |
import logging | |
logging.basicConfig(level=logging.INFO)# OPTIONAL | |
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') | |
model = BertForMaskedLM.from_pretrained('bert-base-uncased') | |
model.eval() |
import torch | |
from transformers import OpenAIGPTTokenizer, OpenAIGPTLMHeadModel | |
from transformers import GPT2Tokenizer, GPT2LMHeadModel | |
import numpy as np | |
from scipy.special import softmax | |
def model_init(model_string, cuda): | |
if model_string.startswith("gpt2"): | |
tokenizer = GPT2Tokenizer.from_pretrained(model_string) | |
model = GPT2LMHeadModel.from_pretrained(model_string) |
from transformers import GPT2Tokenizer, GPT2LMHeadModel | |
import torch | |
from torch.nn import CrossEntropyLoss | |
from tqdm import trange | |
max_length = 24 | |
batch_size = 200 | |
Yoav Goldberg, Jan 23, 2021.
The FAccT paper "On the Dangers of Stochastic Parrots: Can Languae Models be Too Big" by Bender, Gebru, McMillan-Major and Shmitchell has been the center of a controversary recently. The final version is now out, and, owing a lot to this controversary, would undoubtly become very widely read. I read an earlier draft of the paper, and I think that the new and updated final version is much improved in many ways: kudos for the authors for this upgrade. I also agree with and endorse most of the content. This is important stuff, you should read it.
However, I do find some aspects of the paper (and the resulting discourse around it and around technology) to be problematic. These weren't clear to me when initially reading the first draft several months ago, but they became very clear to me now. These points are for the most part