This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
query = word_vectors[0] # 'The' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 문장을 단어를 분리하고 각 단어를 벡터로 변환 | |
sentence = "The cat sat on the mat." | |
words = sentence.lower().split() | |
print(words) | |
word_vectors = torch.tensor([model[w] for w in words if w in model]) | |
print(word_vectors) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn.functional as F | |
import gensim.downloader as api | |
from gensim.models import Word2Vec | |
# 사전에 훈련된 Word2Vec 모델 로드 | |
model = api.load('word2vec-google-news-300') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# In[] | |
from gensim.models import Word2Vec | |
from nltk.tokenize import word_tokenize | |
import nltk | |
nltk.download('punkt') | |
# In[] | |
sentences = ["cats and dogs are great pets.", | |
"dogs are very loyal animals.", | |
"cats are beautiful animals."] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
skip_gram_model = Word2Vec(sentences=tokenized_sentences, vector_size=100, window=2, min_count=1, workers=4, sg=0) | |
print(skip_gram_model) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# In[] | |
from gensim.models import Word2Vec | |
from nltk.tokenize import word_tokenize | |
import nltk | |
nltk.download('punkt') | |
# In[] | |
sentences = ["cats and dogs are great pets.", | |
"dogs are very loyal animals.", | |
"cats are beautiful animals."] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print(cbow_model.wv.most_similar('dogs')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print(cbow_model) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cbow_model = Word2Vec(sentences=tokenized_sentences, vector_size=100, window=2, min_count=1, workers=4, sg=0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 토큰화 (단어 단위로 분리) | |
tokenized_sentences = [word_tokenize(sentence.lower()) for sentence in sentences] |
NewerOlder