Skip to content

Instantly share code, notes, and snippets.

View khuangaf's full-sized avatar
:octocat:
Focusing

Kung-Hsiang Steeve Huang khuangaf

:octocat:
Focusing
View GitHub Profile
# compute average hit rate for all users
def precision_at_k(predictions, k):
'''
Return the average ndcg for each users
args:
predictions: np.array user-item predictions
returns:
hit_rate: float, computed hit rate
'''
# compute average hit rate for all users
def precision_at_k(predictions, k):
'''
Return the average ndcg for each users
args:
predictions: np.array user-item predictions
returns:
hit_rate: float, computed hit rate
'''
class CustomBERTModel(BertPreTrainedModel):
def __init__(self, config, num_class):
super(CustomBERTModel, self).__init__(config)
self.bert = BertModel(config)
self.linear = nn.Linear(config.hidden_size, num_class)
model = CustomBERTModel.from_pretrained('bert-base-uncased',num_class=10)
model = BertModel.from_pretrained('bert-base-uncased')
all_doc_tokens=['SEP']
orig_to_tok_index=[]
for (i, word) in enumerate(words):
orig_to_tok_index.append(len(all_doc_tokens))
sub_tokens = tokenizer.tokenize(token)
all_doc_tokens.extend(sub_tokens)
tokenizer.convert_tokens_to_ids(tokens)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokens = tokenizer.tokenize('Learn Hugging Face Transformers & BERT with PyTorch in 5 Minutes')
tokens = ['[CLS]'] + tokens + ['[SEP]']
# infer the topic distribution of the second corpus.
lda[common_corpus[1]]
'''
output
[(0, 0.014287902),
(1, 0.014287437),
(2, 0.014287902),
(3, 0.014285716),
(4, 0.014285716),
(5, 0.014285714),
from gensim.test.utils import common_texts
from gensim.corpora.dictionary import Dictionary
from gensim.models import LdaModel
# Create a corpus from a list of texts
common_dictionary = Dictionary(common_texts)
common_corpus = [common_dictionary.doc2bow(text) for text in common_texts]
# Train the model on the corpus.
lda = LdaModel(common_corpus, num_topics=10)
from gensim.test.utils import common_texts
from gensim.corpora.dictionary import Dictionary
from gensim.models import LdaModel
# Create a corpus from a list of texts
common_dictionary = Dictionary(common_texts)
common_corpus = [common_dictionary.doc2bow(text) for text in common_texts]
# Train the model on the corpus.
lda = LdaModel(common_corpus, num_topics=10)