Skip to content

Instantly share code, notes, and snippets.

View hemaher0's full-sized avatar
🐵

hemaher0

🐵
View GitHub Profile
query = word_vectors[0] # 'The'
# 문장을 단어를 분리하고 각 단어를 벡터로 변환
sentence = "The cat sat on the mat."
words = sentence.lower().split()
print(words)
word_vectors = torch.tensor([model[w] for w in words if w in model])
print(word_vectors)
import torch
import torch.nn.functional as F
import gensim.downloader as api
from gensim.models import Word2Vec
# 사전에 훈련된 Word2Vec 모델 로드
model = api.load('word2vec-google-news-300')
# In[]
from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize
import nltk
nltk.download('punkt')
# In[]
sentences = ["cats and dogs are great pets.",
"dogs are very loyal animals.",
"cats are beautiful animals."]
skip_gram_model = Word2Vec(sentences=tokenized_sentences, vector_size=100, window=2, min_count=1, workers=4, sg=0)
print(skip_gram_model)
# In[]
from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize
import nltk
nltk.download('punkt')
# In[]
sentences = ["cats and dogs are great pets.",
"dogs are very loyal animals.",
"cats are beautiful animals."]
print(cbow_model.wv.most_similar('dogs'))
print(cbow_model)
cbow_model = Word2Vec(sentences=tokenized_sentences, vector_size=100, window=2, min_count=1, workers=4, sg=0)
# 토큰화 (단어 단위로 분리)
tokenized_sentences = [word_tokenize(sentence.lower()) for sentence in sentences]