Created
October 10, 2019 06:33
-
-
Save tteofili/cd72138c78b576506f33db6d530f2467 to your computer and use it in GitHub Desktop.
PPA-PCA-PPA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 52, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.decomposition import PCA\n", | |
"import copy\n", | |
"import numpy as np\n", | |
"\n", | |
"def ppa(embedding_matrix_orig, n_components = 1):\n", | |
" pca = PCA(n_components=n_components)\n", | |
" embedding_matrix = copy.deepcopy(embedding_matrix_orig)\n", | |
" temp = embedding_matrix - np.average(embedding_matrix, axis=0)\n", | |
" principalComponents = pca.fit_transform(temp)\n", | |
" principalAxes = pca.components_\n", | |
" toSubstract = np.matmul(np.matmul(embedding_matrix, principalAxes.T), principalAxes)\n", | |
" processed = embedding_matrix - toSubstract\n", | |
" return processed\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 73, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[=-------------------------------------------------] 2.6% 43.4/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[==------------------------------------------------] 4.5% 75.2/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[===-----------------------------------------------] 6.4% 106.1/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[====----------------------------------------------] 8.8% 145.9/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[======--------------------------------------------] 12.0% 199.6/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[==========----------------------------------------] 20.8% 345.2/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[===========---------------------------------------] 23.8% 395.2/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[=============-------------------------------------] 26.7% 444.7/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[==============------------------------------------] 29.9% 496.5/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[===================-------------------------------] 38.9% 646.3/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[======================----------------------------] 44.3% 737.2/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[=======================---------------------------] 47.0% 782.0/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[=========================-------------------------] 51.9% 862.7/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[===========================-----------------------] 55.2% 918.3/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[=============================---------------------] 58.1% 965.5/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[==============================--------------------] 60.7% 1010.0/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[===============================-------------------] 63.8% 1060.8/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[=================================-----------------] 67.1% 1115.8/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[==================================----------------] 69.9% 1163.0/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[====================================--------------] 73.1% 1216.2/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[======================================------------] 76.8% 1277.3/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[=========================================---------] 82.6% 1373.3/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[===========================================-------] 86.6% 1439.3/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[=============================================-----] 90.3% 1501.2/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[==============================================----] 93.4% 1553.8/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[================================================--] 96.6% 1605.5/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[=================================================-] 99.5% 1655.2/1662.8MB downloaded" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"IOPub message rate exceeded.\n", | |
"The notebook server will temporarily stop sending output\n", | |
"to the client in order to avoid crashing it.\n", | |
"To change this limit, set the config variable\n", | |
"`--NotebookApp.iopub_msg_rate_limit`.\n", | |
"\n", | |
"Current values:\n", | |
"NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", | |
"NotebookApp.rate_limit_window=3.0 (secs)\n", | |
"\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"import gensim.downloader as api\n", | |
"model = api.load(\"word2vec-google-news-300\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 74, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `wv` (Attribute will be removed in 4.0.0, use self instead).\n", | |
" \"\"\"Entry point for launching an IPython kernel.\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"[('info', 0.7363681793212891),\n", | |
" ('infomation', 0.6800296306610107),\n", | |
" ('infor_mation', 0.6733849048614502),\n", | |
" ('informaiton', 0.6639008522033691),\n", | |
" ('informa_tion', 0.660125732421875),\n", | |
" ('informationon', 0.6339334845542908),\n", | |
" ('informationabout', 0.6320979595184326),\n", | |
" ('Information', 0.6186580657958984),\n", | |
" ('informaion', 0.6093292236328125),\n", | |
" ('details', 0.6063088774681091)]" | |
] | |
}, | |
"execution_count": 74, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model.wv.most_similar(\"information\",topn=10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 85, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `wv` (Attribute will be removed in 4.0.0, use self instead).\n", | |
" \"\"\"Entry point for launching an IPython kernel.\n", | |
"/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `syn0` (Attribute will be removed in 4.0.0, use self.vectors instead).\n", | |
" \"\"\"Entry point for launching an IPython kernel.\n" | |
] | |
} | |
], | |
"source": [ | |
"weights = model.wv.syn0" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 87, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dim = 8" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 88, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"reduced = ppa(weights, n_components = dim)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 89, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pca = PCA(n_components = dim)\n", | |
"reduced = reduced - np.mean(reduced)\n", | |
"principalComponents = pca.fit_transform(reduced)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 90, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"reduced = ppa(principalComponents, n_components = dim)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 91, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:3: DeprecationWarning: Call to deprecated `wv` (Attribute will be removed in 4.0.0, use self instead).\n", | |
" This is separate from the ipykernel package so we can avoid doing imports until\n", | |
"/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:4: DeprecationWarning: Call to deprecated `wv` (Attribute will be removed in 4.0.0, use self instead).\n", | |
" after removing the cwd from sys.path.\n", | |
"/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:5: DeprecationWarning: Call to deprecated `syn0` (Attribute will be removed in 4.0.0, use self.vectors instead).\n", | |
" \"\"\"\n" | |
] | |
} | |
], | |
"source": [ | |
"from gensim.models import KeyedVectors\n", | |
"outv = KeyedVectors(dim)\n", | |
"outv.vocab = model.wv.vocab\n", | |
"outv.index2word = model.wv.index2word\n", | |
"outv.syn0 = reduced" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 95, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `wv` (Attribute will be removed in 4.0.0, use self instead).\n", | |
" \"\"\"Entry point for launching an IPython kernel.\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"[('Marais_Erasmus', 0.9967363476753235),\n", | |
" ('receiver_Cortez_Hankton', 0.9956709146499634),\n", | |
" ('Lt._Rahn_Farder', 0.9949665665626526),\n", | |
" ('Ehat', 0.9944389462471008),\n", | |
" ('manager_MV_Sridhar', 0.9940130710601807),\n", | |
" ('retailiation', 0.9931172132492065),\n", | |
" ('WatchGuard_LiveSecurity_R', 0.9930428266525269),\n", | |
" ('Probable_TE', 0.9926935434341431),\n", | |
" ('sister_Joann_Reimel', 0.9926388263702393),\n", | |
" ('spokeswoman_Malorie_Lucich', 0.9926244616508484)]" | |
] | |
}, | |
"execution_count": 95, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"outv.wv.most_similar(\"information\",topn=10)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment