Skip to content

Instantly share code, notes, and snippets.

🎯
Focusing

(Bill) Yuchen Lin yuchenlin

🎯
Focusing
View GitHub Profile
@yuchenlin
yuchenlin / batched_roberta_infer.py
Last active May 23, 2020
Batched version for using RoBERTa to do inference
View batched_roberta_infer.py
import torch
import numpy as np
from tqdm import tqdm
from fairseq.models.roberta import RobertaModel
from fairseq.data.data_utils import collate_tokens
from torch.utils.data import DataLoader, SequentialSampler
roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')
roberta.eval()
roberta.cuda()
View clean_conceptnet.py
```
wget https://s3.amazonaws.com/conceptnet/downloads/2017/edges/conceptnet-assertions-5.5.5.csv.gz
gunzip -k conceptnet-assertions-5.5.5.csv.gz
```
import json
def del_pos(s):
"""
Deletes part-of-speech encoding from an entity string, if present.
@yuchenlin
yuchenlin / gpt_sent_prob.py
Last active Apr 24, 2020
Compute sentence probability using GPT-2 with huggingface transformers
View gpt_sent_prob.py
import torch
from transformers import OpenAIGPTTokenizer, OpenAIGPTLMHeadModel
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import numpy as np
from scipy.special import softmax
def model_init(model_string, cuda):
if model_string.startswith("gpt2"):
tokenizer = GPT2Tokenizer.from_pretrained(model_string)
model = GPT2LMHeadModel.from_pretrained(model_string)
@yuchenlin
yuchenlin / masked_word_prediction_bert.py
Last active Feb 17, 2020
A simple example script for predicting masked words in a sentence using BERT.
View masked_word_prediction_bert.py
import torch
from transformers import BertTokenizer, BertModel, BertForMaskedLM
import logging
logging.basicConfig(level=logging.INFO)# OPTIONAL
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
model.eval()
You can’t perform that action at this time.