Skip to content

Instantly share code, notes, and snippets.

@domen111
Created May 6, 2019 09:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save domen111/64cb6628cd57c2ffa4f91024ba6190ae to your computer and use it in GitHub Desktop.
Save domen111/64cb6628cd57c2ffa4f91024ba6190ae to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from gensim.test.utils import common_texts, get_tmpfile\n",
"from gensim.models import Word2Vec\n",
"\n",
"w2v_model = Word2Vec(common_texts, size=100, window=5, min_count=1)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[['human', 'interface', 'computer'],\n",
" ['survey', 'user', 'computer', 'system', 'response', 'time'],\n",
" ['eps', 'user', 'interface', 'system'],\n",
" ['system', 'human', 'system', 'eps'],\n",
" ['user', 'response', 'time'],\n",
" ['trees'],\n",
" ['graph', 'trees'],\n",
" ['graph', 'minors', 'trees'],\n",
" ['graph', 'minors', 'survey']]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"common_texts"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['system',\n",
" 'user',\n",
" 'trees',\n",
" 'graph',\n",
" 'human',\n",
" 'interface',\n",
" 'computer',\n",
" 'survey',\n",
" 'response',\n",
" 'time',\n",
" 'eps',\n",
" 'minors']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# index to word\n",
"w2v_model.wv.index2word"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"我們需要利用 `index2word` 來算出 `word2index`,接著我們就能把 training / testing data 中的資料以 index 取代。"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[-1.4890118e-03, -4.5906939e-03, 1.7917742e-03, ...,\n",
" 1.1652057e-03, -4.3145278e-03, -1.6234915e-03],\n",
" [ 4.1555176e-03, -1.7337966e-03, -4.5758467e-03, ...,\n",
" -2.7107631e-03, 4.7598602e-03, 4.2530061e-03],\n",
" [ 4.5080658e-05, 1.8863901e-03, -4.7816625e-03, ...,\n",
" -4.4701910e-03, -9.9671488e-05, 2.9279427e-03],\n",
" ...,\n",
" [ 7.8272796e-04, -1.6770075e-03, -4.0623671e-03, ...,\n",
" 4.6074665e-03, -1.1475774e-03, 4.9830242e-03],\n",
" [-1.0417016e-03, 3.9325394e-03, -4.4449898e-03, ...,\n",
" -3.0505881e-03, 3.4645137e-03, -3.1374148e-03],\n",
" [-2.5614763e-03, -8.8947389e-04, -7.1159453e-04, ...,\n",
" 1.5468400e-03, -3.7911797e-03, 2.8648013e-03]], dtype=float32)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# index to vectors\n",
"# 之後會將這個對應表傳入 keras\n",
"w2v_model.wv.vectors"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment