Skip to content

Instantly share code, notes, and snippets.

@oshikiri
Created March 5, 2017 13:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oshikiri/da68f7b9b2aa9c626280176060266e34 to your computer and use it in GitHub Desktop.
Save oshikiri/da68f7b9b2aa9c626280176060266e34 to your computer and use it in GitHub Desktop.
(oscca-)sembei の実行例
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Segmentation-free Word embeddings using sembei package"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-05T22:36:57.665730",
"start_time": "2017-03-05T22:36:52.425409"
},
"collapsed": false
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"matplotlib.style.use('ggplot')\n",
"matplotlib.rc('font', family=['IPAexGothic'])\n",
"%matplotlib inline\n",
"\n",
"import sembei as sb"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-05T22:36:57.841427",
"start_time": "2017-03-05T22:36:57.668956"
},
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"3952451"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open('/path/to/jawiki-latest-pages-articles_text_10M.txt',\n",
" mode='r', encoding='utf-8', errors='ignore') as f:\n",
" corpus_str = ''.join(line.replace('\\n', ' ') for line in f.readlines())\n",
"\n",
"len(corpus_str)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-05T22:36:57.846841",
"start_time": "2017-03-05T22:36:57.843084"
},
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'別については、平安時代末期にはすでにいずれもの音となり発音上の区別が無くなっていたことにより、相当な表記の揺れがあり、格助詞の「を」を除き前例による基準を見出すことができなかった。そこで『下官集』では'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"corpus_str[100000:100100]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-05T22:37:03.490990",
"start_time": "2017-03-05T22:36:57.848484"
},
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"min count : ('耀', 5) 0\n",
"# of 1-gram : 4295\n",
"Coverage : 0.9993060002514895\n",
"\n",
"min count : ('。奇', 2) 0\n",
"# of 2-gram : 208807\n",
"Coverage : 0.9672782787186989\n",
"\n",
"min count : ('り、厳', 5) 0\n",
"# of 3-gram : 930381\n",
"Coverage : 0.6883371862168564\n",
"\n",
"min count : ('評だった', 5) 0\n",
"# of 4-gram : 1738573\n",
"Coverage : 0.47033549562031257\n",
"\n",
"min count : ('気温が30', 4) 0\n",
"# of 5-gram : 2355014\n",
"Coverage : 0.33733119019059316\n",
"\n",
"min count : ('。 ; ラン', 4) 0\n",
"# of 6-gram : 2793533\n",
"Coverage : 0.22495332642960025\n",
"\n",
"min count : ('。 * 11月2', 10) 0\n",
"# of 8-gram : 3304594\n",
"Coverage : 0.0692668928722962\n",
"\n",
"min count : ('受賞歴 == ', 13) 0\n",
"# of 7-gram : 3095326\n",
"Coverage : 0.08868623545238132\n",
"\n",
"CPU times: user 242 ms, sys: 135 ms, total: 377 ms\n",
"Wall time: 5.62 s\n"
]
}
],
"source": [
"%%time\n",
"n_extract_tuple = [(1, 3000), (2, 100000), (3, 100000), (4, 100000), (5, 100000),\n",
" (6, 70000), (7, 10000), (8, 10000)]\n",
"\n",
"vocabulary_all = sb.utils.ngram.extract_topn_ngram_lossycounting(\n",
" corpus_str, width_ngram=len(n_extract_tuple), n_extract_tuple=n_extract_tuple,\n",
" epsilon=1e-7, support_threshold=1e-7, n_processes=20)\n",
" \n",
"size_vocabulary_all = len(vocabulary_all)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-05T22:37:03.765898",
"start_time": "2017-03-05T22:37:03.493328"
},
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2017-03-05 22:37:03.763815\n",
"Parameters :\n",
" size_vocabulary : 493000\n",
" dim : 200\n",
" size_window : 1\n",
" wide_window : False\n",
" n_iter_rsvd : 6\n",
" max_n_ngram : 8\n",
" inc : 1\n",
" \n"
]
}
],
"source": [
"sembei = sb.embed.Sembei(corpus_str, vocabulary=vocabulary_all, dim=200,\n",
" n_iter_rsvd=6, wide_window=False)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-05T22:37:41.157832",
"start_time": "2017-03-05T22:37:03.767677"
},
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"# of nonzero : 44028048\n",
"size : 493000x986000\n",
"density : 0.009057442737884131\n",
"\n",
"\n",
"CPU times: user 14 s, sys: 4.55 s, total: 18.5 s\n",
"Wall time: 37.4 s\n"
]
}
],
"source": [
"%%time\n",
"sembei.construct_cooccurrence_matrix(n_cores=4, n_chunk=40, n_chunk_pool=10)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-05T22:40:52.358084",
"start_time": "2017-03-05T22:37:41.159599"
},
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZ4AAAEeCAYAAAC6zHPXAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X9Q1HX+B/DnIiwIrkBKAiPqiKayKpwUKuIP0FCbzh+Z\n2DTXjTNijk72Qzrnqkkzu/O6spqr6NSyTsdmwMPGbz80zdADV8Df5mKnxKkr5A80YEFhZdnvH9t+\nWnCB5fN589lleT7+Yfezv168RZ68f3zeH43NZrOBiIhIJX6eLoCIiHoWBg8REamKwUNERKpi8BAR\nkaoYPEREpCoGDxERqYrBQ0REqmLwEBGRqvw9XYAIDQ0N2Lp1K7RaLYYPH46pU6d6uiQiImqD1/Z4\nioqK8O6772LFihUtjhsMBrz88st46aWXsH37dgDA5cuX8eijjyIzMxNnzpyR9XlGo1FxzWTHthSH\nbSkO21IcpW3ptcETGhqKzMxMWK1W6VhVVRVyc3OxZs0abNiwATdv3kRJSQkeeOABDBo0CBcvXkRg\nYKCsz+MPpThsS3HYluKwLcXx2eAZNWoUdDpdi2OnTp3ChAkTEBQUBACYMWMGSkpKAADnz5/Hnj17\nsHjxYrVLJSKiTuhWczxmsxlhYWHS/fDwcNTW1qK2thYFBQVYvny5B6sjIiJ3aLx9d+ply5Zh06ZN\nAIDvv/8e169fxxNPPAHA3t07ePAgYmJicOHCBYSEhMDf3x+ZmZku38toNLboImZkZHT9N0BE5INy\nc3Ol23q9Hnq93u3Xen2PxzkXx40bh/Xr12PevHkICgpCfn4+kpKSkJSU5NZ7uWqcyspKAIBOp4PZ\nbBZXeA/GthSHbSkO21Kc6OhoRX+4e33waDQa6XZYWBjmz5+PtWvXwt/fH6NGjXI7dIiIyDt4/VBb\nV2OPRzy2pThsS3HYluJER0crer3XrmojIiLf1CODx2g0tpgYIyKizsnNzZV9Pg+H2jjUJhzbUhy2\npThsS3E41EZERN0Kg4eIiFTF4CEiIlX1yODh4gIiImW4uEABLi4Qj20pDttSHLalOFxcQERE3QqD\nh4iIVMXgISIiVTF4iIhIVQweIiJSVY8MHi6nJiJShsupFeByavHYluKwLcVhW4rD5dRERNStMHiI\niEhVDB4iIlIVg4eIiFTF4CEiIlX1yODhcmoiImW4nFoBLqcWj20pDttSHLalOFxOTURE3QqDh4iI\nVMXgISIiVTF4iIhIVQweIiJSFYOHiIhUxeAhIiJVdXnwWK1WNDc3d/XHdApPICUiUkbJCaTCgicr\nKwtVVVUAgPLychQVFeHu3buwWq3Yu3cvfvjhB1EfpZher0dGRoanyyAi6rYyMjKg1+tlvdZfVBEr\nV65EWFgYAGDfvn2wWq3Ytm0bkpKSMG3aNJSVlWHMmDGiPo6IiLopYcEzZMiQFreTk5MREBCAgoIC\nfPLJJy0eJyKinkvRXm3Hjh3DuHHj4OfXcsTOZrPh2LFjGDNmDIKCghQX2ZW4V5t4bEtx2JbisC3F\n8ehebW+99Rb++9//3nNco9HgoYce8vrQISIi9SleXGC1WkXUQUREPYTiOZ6ioiLk5eWhuroa/fv3\nx/jx4zFt2jT4+wubPiIiIh+iuMeTn5+PiIgIJCYmQqvVYvv27fjTn/6EiooKEfUREZGPUdwtWbRo\nEebMmSPdb2xsxLfffosNGzbgr3/9K/r27av0I4iIyIco6vEEBQUhMjKyxbHAwEDMmTMHK1aswM6d\nOxUVp6aCgl6eLoGIqEdQFDx6vR5nzpxx+VhcXJzXLjxwtWUOg4eIyH0e2zLniSeewKFDh3DgwAGX\nj3vrAgNumUNEpIzHtswZNGgQXnjhBbz33nv4/vvvMW3aNMTGxiIoKAhnz55FbW2tkrdXxcaNOgDA\nO+8EwmKx3544sRHJyRZPlkVE5LMU7Vzg8PPPP+Pzzz/HsWPHpJ2oR4wYgRdffNHrFxc4di54//1+\nWLnypoer8Q08Q1wctqU4bEtxlO5cIGQsLCoqCllZWbh9+zYqKysREhKCqKgoEW9NREQ+RugkTHBw\nMIYNGybyLVUzebJ3LoQgIvI1vALprxg8RETqYPAQEZGqGDxERKQqBg8REamKwUNERKoSHjyVlZW4\nfPmy6LclIiIfIWs5dX19PdavX4/MzExp+XRdXR3efPNNnD9/HgAwZMgQvPLKK15/AikREalLVo8n\nLy8PJpOpxSagmzdvxvnz5zF79mwsWrQIV69eRU5OjrBCiYjIN8jq8RQXF2P69OkYMWIEAODatWso\nLi7GrFmzsHjxYgDAnTt3UFhYKKxQkYxGI4xGo8uNQg0GLfdpIyLqQG5uLvR6vayNQmUFT01NTYvr\n8BQUFMDPzw/z58+XjoWHh3vtvkjtNdaRI4EMHiKiDijZ4V/WUNuAAQOkuZzq6mrs3bsXiYmJCAsL\nk55TXl6Ofv36yS6MiIh8k6weT3p6OrZu3QqTyYSamho0NDRg4cKF0uNlZWU4fPgw5s6dK6zQrmQw\naHH8uBYWiw7vvKOTjvPyCERE4skKnpkzZ8JqtaKwsBBRUVFYuHAhBg8eLD1eVlaG+++/H/PmzRNW\naFdKTrZg5kyLNDSYleWdQ4RERL5A9u7UjzzyCB555BGXj82aNQtTp05FUFCQ7MI8jYsMiIi6hqIT\nSK1WK4xGI7777jvcuHFDOl5eXo7evXsrLs4TJk5sBGBfZEBEROLJDp4zZ85g5cqVeP3117FlyxaY\nTCYA9tB56aWXsHnzZmFFqsm5l2MwaD1YCRGRb5IVPOXl5XjzzTfRq1evFkuoAWDo0KHIzMzEgQMH\ncPDgQRE1qsZg0GLjRh02brQvMnDcZgAREYkja45n586d0Ol02LBhAywWC7744osWjz/88MM4efIk\n9u/fj2nTpomoUxXJyZZ75nW40ICISCxZwfPjjz9i5syZ6NOnD27duuXyOXq9Hrm5uYqK8wSDQYsj\nRwJhMGhRVPTbPA+XVhMRiSEreJqamjpcsVZbWyurIE9z9HomTtTiyBELezxERILJmuMZOHAgTp48\n2ebjdXV1OHjwIIYOHSq7ME/jIgMioq4hK3hmz56NH3/8Edu2bYPFYv8FrdFoAACnTp3Cq6++iurq\nasyePVtcpR7Qemm1waBlCBERKSRrqG3KlCm4ePEivv76a+zbtw+A/bII9fX1aGy0/7KeO3cukpKS\nxFXqAa3ndBwBxLkeIiL5ZO9c8Mc//hEJCQnYu3cvLly4gOrqagQHByMuLg6zZ89GfHy8yDpV51hk\nAEDav81g0CImxtrey4iIqAMam81m83QRnlRZWQkA0Ol0Li/jYDBosWZNX4SG2qRVbhMmNCIw0IZn\nnqlj78eFttqSOo9tKQ7bUpzo6GhFr1e0ZU5PkJxswezZjcjLu4lVq8xYtcqMvLybSEy8i+RkC+d8\niIg6SdZQW2lpqdvPjYuLk/MRXsWxyMAV53kfbixKRNQxWcGzbt06t5+bk5Mj5yO8iiNM+vZthtEY\nIG2pA9iH4kymXkhOtvDqpUREbpAVPAsWLJCWTztrbm5GUVERKisrkZ6ejtjYWMUFdgWj0Qij0djp\nS7cuXVoPwB42EybYh9js8z6N2LhRB5Opl/Q4A4iIfFlubi70ej30en2nXysreNr7hb1o0SLs2rUL\nu3fvxqxZs+S8fZeT21gOyckW5ObaV7eNGmWRFh04vppMvWA0NmDp0nophBhGRORLOvuHu7MuWVzw\n2GOPYcyYMdixY0dXvL1XyMi4jawsM2bPbpQWHKxaZcZ771UjOdmC2lp70zrmgBz7vxER9XRdtqpt\n5MiRnVqE0N04ei+OhQeOXQ0c8z+O247hN6DlDgjOX4mIehLZJ5B2pKyszOU8kK9xBFBysgVGYwNq\na/2k4beaGg3OndPCaAxAaWkA4uLuAkCLxQiO13Iojoh6ClnBc+jQoTYfq6+vxw8//IATJ05g6tSp\nsgvrThyB4Vh8AACzZzciK8uM558PQ0yMFX37NqOoKBCzZjXAZOol9XYcK+EcXxlAROTrZAVPdnZ2\nh89JSEjA4sWL5bx9tzdxYqPUm4mJsSIrywyTKQxAI0ymXigqCsTGjfbFCM69IID7wRGR75MVPMuX\nL3d5XKPRQKfTITo6GpGRkYoK686cQ8MxB5SRcRtHjgRK1/eJibGipkaD0lKt1BsC7AHkGIpz9IrY\nEyIiXyIreLrT5aw9xXnux/kr8FsvCLAPyU2c2IiNG+3Hd+4MhvN5QTEx1nuG4lq/HxFRd8K92lTU\neiWc42tyskXa9XrChMZ7ej/OjhwJdLk6jivkiKi76LJVbdQ2V72gjIzbSE62wGQKQ02NvWdTVBSI\noiJ7sFRU+EtB5Agp54UJjvdzPmG19TEiIm/QYfC4s5CgLRqNps35ILJrHUIZGbcRE2OfC9q40b4f\n3MSJjVizpq+0MKGoyN4bqq31u2cDU1dh5GqYjmFERJ7SYfC0t3TaHQyeznEVBo5LMziWZzt6PTt3\nBuL558NQUWH/ZzQYtC7DyKGjMCIiUkOHwfPBBx+oUQc5aT0X5Hw7JsYqLU5wfG0vjHJyerscpnPg\nMB0Rqa3D4ImIiFCjDnLB+Re+qzBycBVG7Q3TdWbOiD0jIhKNiwu6GVfnCLV3oTrnYTo5c0YO7BkR\nkSiKgufnn39GRUUF6uvrWxxvbGxEdXU1SktL8dprryn5CGqHq9VxrsKodZi4O2fU1jBdRz2j4OBe\nSEjouu+biLo3WcHT0NCA7OxsFBcXt/mc4OBgDB8+XHZhJI+rMOrsnJHSnpFWaw8eXouIiFyRFTw7\nduzA8ePHsWTJEkRFReGNN97A8uXLMWzYMJSVlSEvLw/x8fHIzMwUXS8p4O6ckfNz5PSMhg797VpE\n7pxnREQ9i6zgKSkpQVpaGtLT0/HLL78AsPdwBg4ciIEDByIxMRGrV69GREQE5s6dK7RgEsvdYbrO\n9ozKy/u16Bm13oWbc0ZEPZesLXPq6uoQFRUFAAgMtP8Cqa2tlR7X6XRITU3Fd999J6BEUovzMF3r\nIbuJExvb7B05ekZ5eTexcOFtTJrUhJgYK0pLA6QL4+3dG3TPhfEcnK/S6uqKra62BuIWQUTdl6we\nT0REBMrLywHYezrBwcEoLS3FjBkzpOcEBATg1q1bYqokj+tMzyg21oqVK6ul+45duG02G86d0yI/\nPxBVVfYAau+k1/Z6Se3txsBhPCLvJit4kpOTsWvXLiQkJCAlJQVxcXEoLi7GsWPH8OCDD+Lq1avY\nv39/j740gi/raAFDcLA9VFrvwu0YnnN3zqj1tYpccRVG7g7jcTiPyDNkBc/8+fNx+vRplJWVISUl\nBfPmzcOJEyfw1ltvQavVwmKx/2fOysoSWix5v+RkC3S6QJjNLXtGjjBwaGvOyBFGe/YEttlL6mg3\nBmft9ZYYUESeISt4AgIC8Nprr6Gurg4AMHz4cLz00kvYvXs3bt26haioKMyZMwcjR44UWix1L+6e\nZ+SsvV5Se4sZHMG0bVuwW8N4Du4M53UmoFofI6J7yQqe3NxcpKWloX///tKxsWPHYuzYscIKI9/S\n3oXx2pozat1Lcn6v1rsxuDuM5wimvXuDUFoa4NZwHtD5gOIcFFHbZAVPXl4edu3ahTFjxiA1NRVJ\nSUnw9+fuOyRPe3NGgHu7MThrb+m3I5hiYqyw2WxtDufJDajW3OlBOd9ufayg4LeTcVu3C1F3JSst\nsrKycPjwYZw4cQJnzpxBnz59MHnyZKSmpmLw4MGia6Qeqr1eUnu7MbQXSoB7w3mdDSjnIb7OzkE5\nvh9XweQIHiXDfRz2I28jK3iSkpKQlJSExsZGHD16FAaDAfv378eePXswdOhQpKWlYdKkSQgODhZd\nb5vq6+uRk5ODqqoqrF69WrXPJc9yFUbu9pbaG84D3A8opXNQev3dTn3PnR3ukxNaRF1J0fhYYGAg\nUlJSkJKSgtu3b6OkpAQlJSXYtm0btm3bhvHjx+OZZ56R/f5FRUU4cuQILly40OJKqAaDAV999RVs\nNhvi4uLw1FNPoXfv3njyySexdetWJd8S+Zj2ekuiAsrVZ3Y0B3X2rP2/XmlpAEpLA1wGk+P2xx8P\n6NSCCVe6ehEFe1rUGcImZoKDgxEZGYmoqChcunQJVVVVOHfunKL3DA0NRWZmJl588UXpWFVVFXJz\nc/G3v/0NQUFBeO+991BSUoKkpCQEBQUp/Taoh3FnOK8zAdX6mCvOc1Dt9Zyyssx4//1+WLnyplsL\nJlyFltw5KiW9KjlzWVwZ2LMoDp6LFy+isLAQBoMBN2/ehL+/Px588EEsXboU8fHxit571KhR9xw7\ndeoUJkyYIIXMjBkzcPDgQSQlJSn6LKL2dDaglMxBudLRggmRc1Rye1WuiAgw9rp8j6zguXbtGgoL\nC3H48GFUVFQAAAYPHozf//73mDx5Mvr06SO0SGdmsxlhYWHS/fDw8Bb7xBF5E3d7UM63nY9Nnmy9\n55i7OjtHJbdX5W5Pq71FFu0R1etytUKQvS/PkBU8zz77LAAgJCQE6enpSE1NxdChQ4UW1pbQ0FBc\nv35dul9dXY3Q0FC3Xms0GmE0GqX7GRkZ0Ons/wG1Wq10m5RhW7pv5kzHrcBfbwe2ODZ9ei9YLLp7\njul0Okyfbv9l77jd3rGCAvttx7+NVquVnuc4FhvbC7GxwMsvWxEb24iXX7bgr3+1P89x251jy5YF\nYvBgG/z8rDh7NgBmcy+YTPb9iLdvD8GNG37S17aO7dwZApPJD1evBuHSJQ0GD7ZBp0O79bd17Phx\nLWbOtIfH5Mk6HD+u/bXtLdJj7h4rKLC37+TJVhQU9JK+dnTM1eOOPyq6q9zcXOm2Xq+HXq93+7Wy\ngmf06NFIS0tDUlISAgIC5LxFp9hsNun2uHHjsH79esybNw9BQUHIz89vMcy2YsWKNt/HVeOYzfa/\nCHU6nXSblGFbiuOqLRMSALMZ0lVeHbfbO3b7thZmswWJib99tT/P9TGLxf65Fovu12Nmt49FRQEr\nV9qPpafDrWHBtlYGlpfbUFQUiMOHgX37/H5dbOEv9bAct10d+7//85N6XRYLcPWqTfb35Dh24IDu\n138D+23H146OuXr89u1GYQs51KbT6ZCRkSH79bKC59VXX5X9gXJoNBrpdlhYGObPn4+1a9fC398f\no0aN4vwOUQe6ehGFiNWArettb2WgvPmtAOzbN8CtoUK552V1htzdMLpiHsxxTC3dYruBTZs2tbjv\nWMJNRF2js4solM5ldXZlYEdczW85VgjK7X11dF6Wq9DasiUEZrNfi8eV7obhisjVh2oEVLcIHiLq\nnkQEmKd6Xc51KOl9iVhpqFZPzN2AUqpHBo9jkYGSMUoiUoeoXperFYJq9L6cyd0NQ3RPTMQy+tzc\n3E4vKnDokcEjt7GIqPtxhM/kyVaYzZ7rfbV+3Bt6YnKW0RsMWhw5AvUXF1RVVbW4JAIRUU+gJLTa\nelzEQg4l5JycDEQr+kxZwbNq1So8+eSTmDVrlqIPJyLqqeTuhtEV82BqkxU8er0eZWVlomshIiIZ\nRK8+7OqA0ticz850U3V1NTZu3IgHHngACxYsUPXyB6JVVlYC4EmPIrEtxWFbisO2FCc62gNDbWfP\nnsXDDz+Mo0ePYuXKlYiPj0dkZCT8/Pzuee7jjz+uqMCuwFVtRETKKFnVJqvHs2jRIrefm5OT09m3\nVxV7POKxLcVhW4rDthTHIz2etWvXKvpQIiLquWQFT1xcnOg6iIioh7h3UoaIiKgLKdq54Oeff0ZF\nRQXq6+tbHG9sbER1dTVKS0vx2muvKfkIIiLyMbKCp6GhAdnZ2SguLm7zOcHBwRg+fLjswroSV7UR\nESmj+l5tO3bswPHjx7FkyRJERUXhjTfewPLlyzFs2DCUlZUhLy8P8fHxyMzMlPP2XY57tRERKaP6\nXm0lJSVIS0tDeno6fvnlFwD2Hs7AgQMxcOBAJCYmYvXq1YiIiMDcuXNlF0dERL5H1uKCuro6REVF\nAQACA+07rNbW1kqP63Q6pKam4rvvvhNQIhER+RJZwRMREYHy8nIA9p5OcHAwSktLWzwnICAAt27d\nUl4hERH5FFnBk5ycjMLCQhQWFgKwn9dTXFyMY8eOAQCuXr2K/fv3IzIyUlylRETkE2TN8cyfPx+n\nT59GWVkZUlJSMG/ePJw4cQJvvfUWtFotLBb7bqdZWVlCiyUiou5P1l5tAHD37l3U1dUhPDwcAHDm\nzBns3r0bt27dQlRUFObMmYORI0cKLbYrcK828diW4rAtxWFbiuORvdoA+xyOI3QAYOzYsRg7dqyi\nYtTC83iIiJRRfXdqX8Iej3hsS3HYluKwLcVR2uPhXm1ERKSqDofasrOzZb+5RqPB8uXLZb+eiIh8\nT4fBc+jQIUUfwOAhIiJnHQbPBx98oEYdRETUQ3QYPBEREWrUQUREPQQXFxARkapkncfTel+29vAy\n2URE5ExW8Kxbt87t5+bk5Mj5iC7FE0iJiJRR/UJwCxYsgEajued4c3MzioqKUFlZifT0dMTGxsp5\n+y7HC8ERESmj+oXg2vvARYsWYdeuXdi9ezdmzZoluzAiIvJNXbK44LHHHsOYMWOwY8eOrnh7IiLq\nxrpsVdvIkSM7tQiBiIh6hi4LnrKyMpfzQERE1LPJmuNpbxud+vp6/PDDDzhx4gSmTp0quzAiIvJN\nsoLHnY1DExISsHjxYjlvT0REPkxW8LS18adGo4FOp0N0dDQiIyMVFUZERL5JVvBMmzZNcBlERNRT\nyFpccP78edTU1HT4PJvNhtLSUpw4cQJWq1XORxERkY+R1eP5y1/+gqeeegrDhw/HuXPnEBQUhISE\nBISFhbV43rvvvovi4mIAwIgRI7BmzRr4+8v6SKG4ZQ4RkTKqb5kTGxuLPXv24OOPP4bNZgMABAcH\n47nnnkNCQgIAoKKiAsXFxfjd736HcePG4ZNPPsHevXvx6KOPyvlIobhlDhGRMkr+cJc11DZhwgRc\nuXIFCxcuxNtvv43nn38eWq0WH330EZqamgAAJpMJgH0LnfT0dCQmJsJgMMgulIiIfIOs4Dlw4AAm\nTZqEBQsWICYmBhMnTsTixYtRXV2N//3vfwCAuro6AEBUVBQAey+poqJCUNlERNRdyQqeiooKDB06\ntMWxIUOGAAAsFgsASIsJtFotACAkJETqDRERUc8lK3j69u2Lixcvtjjm6OmEh4cDAKqqqgAAZrMZ\nAFBbWwudTie3TiIi8hGy53gKCwuRl5cHk8mEo0eP4l//+hf8/Pzw5Zdf4siRI8jPz0f//v1RVFSE\n5uZmHD9+HAMHDhRdPxERdTMam2NZWic0Njbi73//O86ePSsdu++++/Dcc8/hnXfeQU1NDYKCgrBm\nzRq8/vrr0Gg0uHPnDpYvX+51J59WVlYCAHQ6ndQ7I2XYluKwLcVhW4oTHR2t6PWygsfhzJkzuHTp\nEkJCQjB+/HiEhISgtrYWRqMRw4YNQ0REBAwGA7744guMGDECS5Ys8bodqxk84rEtxWFbisO2FMej\nweMLGDzisS3FYVuKw7YUR2nwdNn1eIiIiFxRtH/NlStXcOXKFVgsFrTVceI1eYiIyJms4KmursbG\njRtx/vz5Dp/L4CEiImeygmfr1q04f/484uPjMXbsWPTp00d0XURE5KNkBc/p06cxfvx4rFq1SnQ9\nquDu1EREyqi+O7VGo8EDDzwg56VegbtTExEpo/ru1CNHjnRrfoeIiKg1WcHz5JNP4tSpU9izZ4/o\neoiIyMfJGmorKytDSkoKPvvsMxw8eBCJiYnw83OdYY8//riiAomIyLfICp5NmzZJty9evHjPTtXO\nGDxERORMVvCsXbtWdB1ERNRDyAqeuLg40XUQEVEPwb3aiIhIVW71eNatWweNRoMXXngBOp0O2dnZ\nbr25RqPB8uXLFRVIRES+xa3gKS0tBQDcvXsXAHDo0CG3P4DBQ0REztwKHsdigr59+wIAPvjgg66r\niIiIfJpbwdN6MUFERESXFENERL5P+OKCyspKXL58WfTbEhGRj5C1nLq+vh7r169HZmYmhg0bBgCo\nq6vDm2++Ke3hNmTIELzyyivS8BwREREgs8eTl5cHk8kEq9UqHdu8eTPOnz+P2bNnY9GiRbh69Spy\ncnKEFUpERL5BVo+nuLgY06dPx4gRIwAA165dQ3FxMWbNmoXFixcDAO7cuYPCwkJhhRIRkW+Q1eOp\nqalBZGSkdL+goAB+fn6YP3++dCw8PBxms1l5hURE5FNkBc+AAQOkuZzq6mrs3bsXiYmJCAsLk55T\nXl6Ofv36iamSiIh8hqyhtvT0dGzduhUmkwk1NTVoaGjAwoULpcfLyspw+PBhzJ07V1ihIvHS10RE\nyqh+6euZM2fCarWisLAQUVFRWLhwIQYPHiw9XlZWhvvvvx/z5s2T8/Zdjpe+JiJSRskf7hqbzWYT\nWIvkzp076N27d1e8tVCVlZUAAJ1OxzkpQdiW4rAtxWFbihMdHa3o9V22O3V3CB0iIlIfL4tARESq\nYvAQEZGqGDxERKQqBg8REamKwUNERKpi8BARkaoYPEREpCoGDxERqYrBQ0REqmLwEBGRqhg8RESk\nKgYPERGpisFDRESqYvAQEZGqGDxERKQqBg8REamKwUNERKpi8BARkaoYPEREpCoGDxERqYrBQ0RE\nqmLwEBGRqhg8RESkKgYPERGpisFDRESq8vd0ASI0NTVhy5Yt6NWrF6ZMmYKRI0d6uiQiImqD1wZP\nUVERjhw5ggsXLiA7O1s6bjAY8NVXX8FmsyEuLg5PPfUU/vOf/yA1NRUjR45EdnY2g4eIyIt57VBb\naGgoMjMzYbVapWNVVVXIzc3FmjVrsGHDBty8eRPFxcW4fPkyhg4dCgDw8/Pab4mIiODFwTNq1Cjo\ndLoWx06dOoUJEyYgKCgIADBjxgwcPXoUzc3N0Gg0niiTiIg6yWuDxxWz2YywsDDpfnh4OGpqajBo\n0CD89NNPsNlsaG5u9mCFRETUEa+d43ElNDQU169fl+5XV1cjLCwMU6ZMwaeffor8/HykpaW1+Xqj\n0Qij0ShAZMzuAAAF8ElEQVTdz8jIQHR0tHS/dQ+L5GNbisO2FIdtKU5ubq50W6/XQ6/Xu/9im5db\nunSpdPuXX36xrVq1ynbnzh2bzWazvf/++7bi4mIhn5OTkyPkfYhtKRLbUhy2pThK29LrezzOczdh\nYWGYP38+1q5dC39/f4waNQpJSUkerI6IiDrL64Nn06ZNLe6npKQgJSXFQ9UQEZFS3WpxQVfq1Pgk\ntYttKQ7bUhy2pThK21Jjs9lsgmohIiLqEHs8RESkKgYPERGpyusXF6jB1f5v5L7s7GxUVlYiICAA\nAPDoo4+if//++PTTT9HU1IS+ffvimWeeQXBwsIcr9T6u9iS8ePEiPvvss3va7vbt2/joo49QXV2N\n5uZmLF26FEOGDPHsN+BFXLXlwYMHsXv3bunE89GjR2PBggVsyw4YDAZ888036NWrF8LDw7FixQpU\nVlaK+7kUsaa7O7tx44btueeek84Nevfdd4WdG9RTrFu3znb37t0Wx55//nnbpUuXbDabzfbtt9/a\nPvnkE0+U5vVKS0tttbW1tqefflo61lbb/fOf/7Tt2bPHZrPZbJcuXbKtXr1a/YK9mKu2zMnJsZ0+\nffqe57It22Y2m21//vOfpf/T27dvt3399ddCfy57/FCbq/3fSkpKPFxV91JfX4/Nmzdj7dq12Lp1\nKy5duoQ+ffpg0KBBAIC0tDScPHnSw1V6p9Z7ElZWVrbZdidPnsT06dMBAIMGDULv3r1b7OTR07na\n3/HGjRs4fPgw1q1bh7fffhs3btwAwLZsT58+fbB+/Xr4+9sHxKxWK7RardCfyx4/1OZq/7fa2loP\nVtT9xMbG4vHHH8d9992Hf//73/jss89atKm/vz/30HNTXV1dm21ntVql4UzAfkJ1TU0N7r//ftXr\n7C5iYmIwbNgwxMXFobS0FP/4xz+wfv16tmUH/P39cffuXezYsQNNTU2IiYkR+nPZ44PH1f5voaGh\nHqyo+3n66ael2xMmTIDRaGwR3k1NTdJfT9S+0NBQ1NTUSPed2y4wMLDFff6sdmzOnDnS7bi4OKnH\nw7Zs361bt7Bp0yY88sgjiI+Px7Vr14T+XPb4obZx48bh6NGjaGhoAADk5+fjoYce8nBV3YfFYkFO\nTg6ampoA2IcuY2Nj0dDQAJPJBAA4dOgQEhISPFmm17P9ejrdgAED0NjYiCtXrgBo2Xbjxo1Dfn4+\nAODKlStoaGjgX+gu2JxOTdy9ezdu3rwJAPjpp5/Qr18/AGzL9ty9excffvghli1bhvj4eADify55\nAimAwsJCfPnll9L+b3/4wx88XVK38s033+DgwYMICQlBeHg4nn76aVy9ehUff/wx/Pz80KdPH65q\n68CyZcuk7aEuXbqELVu23NN29fX1+PDDD1FbWwuNRoPMzEwMHjzYw5V7H+e2PHv2LD7//HMEBATA\n398fS5cuRWRkJNuyHSdOnMCWLVsQGRkpHRs9ejQefPBBYT+XDB4iIlJVjx9qIyIidTF4iIhIVQwe\nIiJSFYOHiIhUxeAhIiJVMXiIiEhVDB4iIlIVg4eIiFTF4CHyUnV1dXj22Wfx1VdfeboUIqEYPERe\nymKxoHfv3tzKhXwOt8whIiJVscdDRESqYvAQEZGqeHUuIi9jMpnw7bffoqqqClOmTEFycrKnSyIS\nij0eIi/S1NSEPXv2YMmSJRg7diy++OILT5dEJByDh8iLlJSUIDk5GRqNBsePH8fAgQM9XRKRcAwe\nIi8yYsQIjB49GteuXcPZs2eRmprq6ZKIhGPwEHmRfv36AQAOHDiAiIgIjB071sMVEYnH4CHyMs3N\nzTh06BDS0tIAANevX/dwRURiMXiIvMypU6dQW1uL1NRUWK1W7Nu3z9MlEQnF4CHyMpWVlRg0aBDC\nw8Oxf/9+TJkyxdMlEQnF4CHyMpMmTUJwcDC2bduG4OBgDBo0yNMlEQnFvdqIiEhV7PEQEZGqGDxE\nRKQqBg8REamKwUNERKpi8BARkaoYPEREpCoGDxERqYrBQ0REqmLwEBGRqhg8RESkqv8HcjWhpLxb\n+0oAAAAASUVORK5CYII=\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7f8508aeccf8>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 7min 19s, sys: 32.2 s, total: 7min 51s\n",
"Wall time: 3min 11s\n"
]
}
],
"source": [
"%%time\n",
"sembei.compute()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-05T22:40:54.526262",
"start_time": "2017-03-05T22:40:52.359718"
},
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>...</th>\n",
" <th>190</th>\n",
" <th>191</th>\n",
" <th>192</th>\n",
" <th>193</th>\n",
" <th>194</th>\n",
" <th>195</th>\n",
" <th>196</th>\n",
" <th>197</th>\n",
" <th>198</th>\n",
" <th>199</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>バチョ</th>\n",
" <td>0.077998</td>\n",
" <td>-0.037611</td>\n",
" <td>-0.020097</td>\n",
" <td>-0.066258</td>\n",
" <td>0.010417</td>\n",
" <td>-0.140067</td>\n",
" <td>-0.106383</td>\n",
" <td>0.088268</td>\n",
" <td>-0.054278</td>\n",
" <td>-0.086803</td>\n",
" <td>...</td>\n",
" <td>0.020286</td>\n",
" <td>-0.104530</td>\n",
" <td>0.010540</td>\n",
" <td>-0.021246</td>\n",
" <td>0.008079</td>\n",
" <td>0.038633</td>\n",
" <td>-0.044461</td>\n",
" <td>0.046050</td>\n",
" <td>-0.068043</td>\n",
" <td>-0.162837</td>\n",
" </tr>\n",
" <tr>\n",
" <th>語)</th>\n",
" <td>0.056905</td>\n",
" <td>-0.000030</td>\n",
" <td>0.062438</td>\n",
" <td>-0.011356</td>\n",
" <td>0.052454</td>\n",
" <td>-0.033687</td>\n",
" <td>0.019676</td>\n",
" <td>-0.020970</td>\n",
" <td>0.055687</td>\n",
" <td>-0.087037</td>\n",
" <td>...</td>\n",
" <td>0.127746</td>\n",
" <td>-0.132135</td>\n",
" <td>0.087781</td>\n",
" <td>-0.057754</td>\n",
" <td>0.124884</td>\n",
" <td>0.074581</td>\n",
" <td>0.011535</td>\n",
" <td>-0.043010</td>\n",
" <td>-0.049325</td>\n",
" <td>0.105588</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0-02</th>\n",
" <td>0.037308</td>\n",
" <td>0.145455</td>\n",
" <td>-0.045877</td>\n",
" <td>0.030019</td>\n",
" <td>-0.032574</td>\n",
" <td>0.047172</td>\n",
" <td>0.013472</td>\n",
" <td>0.036119</td>\n",
" <td>0.029550</td>\n",
" <td>0.032121</td>\n",
" <td>...</td>\n",
" <td>0.080449</td>\n",
" <td>0.019541</td>\n",
" <td>0.141052</td>\n",
" <td>-0.087551</td>\n",
" <td>0.016398</td>\n",
" <td>-0.012597</td>\n",
" <td>0.074907</td>\n",
" <td>0.028787</td>\n",
" <td>0.077165</td>\n",
" <td>-0.015193</td>\n",
" </tr>\n",
" <tr>\n",
" <th>lu-r</th>\n",
" <td>0.014693</td>\n",
" <td>0.018605</td>\n",
" <td>0.031527</td>\n",
" <td>-0.042050</td>\n",
" <td>0.134051</td>\n",
" <td>0.138170</td>\n",
" <td>-0.100017</td>\n",
" <td>0.001874</td>\n",
" <td>0.014674</td>\n",
" <td>0.020066</td>\n",
" <td>...</td>\n",
" <td>-0.053971</td>\n",
" <td>0.064431</td>\n",
" <td>-0.076410</td>\n",
" <td>-0.051773</td>\n",
" <td>0.129474</td>\n",
" <td>0.167587</td>\n",
" <td>-0.199108</td>\n",
" <td>-0.071694</td>\n",
" <td>-0.073959</td>\n",
" <td>-0.077074</td>\n",
" </tr>\n",
" <tr>\n",
" <th>* ×</th>\n",
" <td>0.103788</td>\n",
" <td>0.006394</td>\n",
" <td>0.128155</td>\n",
" <td>-0.055478</td>\n",
" <td>-0.057030</td>\n",
" <td>-0.013949</td>\n",
" <td>-0.009306</td>\n",
" <td>0.048166</td>\n",
" <td>-0.165246</td>\n",
" <td>0.062002</td>\n",
" <td>...</td>\n",
" <td>0.031081</td>\n",
" <td>0.026098</td>\n",
" <td>0.059643</td>\n",
" <td>-0.044120</td>\n",
" <td>0.010884</td>\n",
" <td>0.020322</td>\n",
" <td>0.037533</td>\n",
" <td>0.014124</td>\n",
" <td>-0.088021</td>\n",
" <td>-0.008631</td>\n",
" </tr>\n",
" <tr>\n",
" <th>放送(A</th>\n",
" <td>0.033215</td>\n",
" <td>0.004244</td>\n",
" <td>0.020444</td>\n",
" <td>-0.012796</td>\n",
" <td>0.030776</td>\n",
" <td>0.006534</td>\n",
" <td>-0.063223</td>\n",
" <td>-0.009870</td>\n",
" <td>-0.015151</td>\n",
" <td>-0.019767</td>\n",
" <td>...</td>\n",
" <td>-0.001466</td>\n",
" <td>-0.028490</td>\n",
" <td>0.042410</td>\n",
" <td>-0.029716</td>\n",
" <td>-0.039965</td>\n",
" <td>0.058726</td>\n",
" <td>-0.016588</td>\n",
" <td>0.000103</td>\n",
" <td>0.004351</td>\n",
" <td>0.068114</td>\n",
" </tr>\n",
" <tr>\n",
" <th>南條</th>\n",
" <td>0.092183</td>\n",
" <td>0.025092</td>\n",
" <td>-0.040878</td>\n",
" <td>-0.210191</td>\n",
" <td>-0.075419</td>\n",
" <td>0.045938</td>\n",
" <td>-0.019855</td>\n",
" <td>0.007060</td>\n",
" <td>-0.114902</td>\n",
" <td>0.094371</td>\n",
" <td>...</td>\n",
" <td>-0.053765</td>\n",
" <td>0.010106</td>\n",
" <td>0.100130</td>\n",
" <td>0.042323</td>\n",
" <td>0.100870</td>\n",
" <td>-0.067852</td>\n",
" <td>0.063837</td>\n",
" <td>-0.159801</td>\n",
" <td>0.059474</td>\n",
" <td>-0.039502</td>\n",
" </tr>\n",
" <tr>\n",
" <th>、ソクラテス</th>\n",
" <td>0.098753</td>\n",
" <td>-0.069497</td>\n",
" <td>-0.128208</td>\n",
" <td>-0.042553</td>\n",
" <td>-0.016165</td>\n",
" <td>0.023803</td>\n",
" <td>0.017016</td>\n",
" <td>-0.036826</td>\n",
" <td>-0.002750</td>\n",
" <td>0.006691</td>\n",
" <td>...</td>\n",
" <td>0.066177</td>\n",
" <td>0.111697</td>\n",
" <td>0.058886</td>\n",
" <td>-0.041210</td>\n",
" <td>0.048598</td>\n",
" <td>0.025722</td>\n",
" <td>0.046111</td>\n",
" <td>0.028791</td>\n",
" <td>-0.068115</td>\n",
" <td>0.025351</td>\n",
" </tr>\n",
" <tr>\n",
" <th>*1983年</th>\n",
" <td>0.074382</td>\n",
" <td>0.064198</td>\n",
" <td>0.026473</td>\n",
" <td>-0.097615</td>\n",
" <td>0.036517</td>\n",
" <td>-0.008862</td>\n",
" <td>0.090672</td>\n",
" <td>-0.014979</td>\n",
" <td>-0.077349</td>\n",
" <td>0.021358</td>\n",
" <td>...</td>\n",
" <td>-0.129474</td>\n",
" <td>0.024275</td>\n",
" <td>0.068709</td>\n",
" <td>-0.052284</td>\n",
" <td>0.059597</td>\n",
" <td>0.026180</td>\n",
" <td>0.068189</td>\n",
" <td>-0.032604</td>\n",
" <td>0.016550</td>\n",
" <td>0.003851</td>\n",
" </tr>\n",
" <tr>\n",
" <th>現状</th>\n",
" <td>0.127993</td>\n",
" <td>-0.071221</td>\n",
" <td>-0.120712</td>\n",
" <td>-0.151789</td>\n",
" <td>0.030281</td>\n",
" <td>0.012364</td>\n",
" <td>0.144940</td>\n",
" <td>-0.043020</td>\n",
" <td>0.138425</td>\n",
" <td>0.050534</td>\n",
" <td>...</td>\n",
" <td>-0.089055</td>\n",
" <td>-0.077025</td>\n",
" <td>0.059590</td>\n",
" <td>0.060203</td>\n",
" <td>-0.063410</td>\n",
" <td>0.031642</td>\n",
" <td>-0.124048</td>\n",
" <td>0.054329</td>\n",
" <td>0.080577</td>\n",
" <td>-0.162585</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 200 columns</p>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5 6 \\\n",
"バチョ 0.077998 -0.037611 -0.020097 -0.066258 0.010417 -0.140067 -0.106383 \n",
"語) 0.056905 -0.000030 0.062438 -0.011356 0.052454 -0.033687 0.019676 \n",
"0-02 0.037308 0.145455 -0.045877 0.030019 -0.032574 0.047172 0.013472 \n",
"lu-r 0.014693 0.018605 0.031527 -0.042050 0.134051 0.138170 -0.100017 \n",
"* × 0.103788 0.006394 0.128155 -0.055478 -0.057030 -0.013949 -0.009306 \n",
"放送(A 0.033215 0.004244 0.020444 -0.012796 0.030776 0.006534 -0.063223 \n",
"南條 0.092183 0.025092 -0.040878 -0.210191 -0.075419 0.045938 -0.019855 \n",
"、ソクラテス 0.098753 -0.069497 -0.128208 -0.042553 -0.016165 0.023803 0.017016 \n",
"*1983年 0.074382 0.064198 0.026473 -0.097615 0.036517 -0.008862 0.090672 \n",
"現状 0.127993 -0.071221 -0.120712 -0.151789 0.030281 0.012364 0.144940 \n",
"\n",
" 7 8 9 ... 190 191 192 \\\n",
"バチョ 0.088268 -0.054278 -0.086803 ... 0.020286 -0.104530 0.010540 \n",
"語) -0.020970 0.055687 -0.087037 ... 0.127746 -0.132135 0.087781 \n",
"0-02 0.036119 0.029550 0.032121 ... 0.080449 0.019541 0.141052 \n",
"lu-r 0.001874 0.014674 0.020066 ... -0.053971 0.064431 -0.076410 \n",
"* × 0.048166 -0.165246 0.062002 ... 0.031081 0.026098 0.059643 \n",
"放送(A -0.009870 -0.015151 -0.019767 ... -0.001466 -0.028490 0.042410 \n",
"南條 0.007060 -0.114902 0.094371 ... -0.053765 0.010106 0.100130 \n",
"、ソクラテス -0.036826 -0.002750 0.006691 ... 0.066177 0.111697 0.058886 \n",
"*1983年 -0.014979 -0.077349 0.021358 ... -0.129474 0.024275 0.068709 \n",
"現状 -0.043020 0.138425 0.050534 ... -0.089055 -0.077025 0.059590 \n",
"\n",
" 193 194 195 196 197 198 199 \n",
"バチョ -0.021246 0.008079 0.038633 -0.044461 0.046050 -0.068043 -0.162837 \n",
"語) -0.057754 0.124884 0.074581 0.011535 -0.043010 -0.049325 0.105588 \n",
"0-02 -0.087551 0.016398 -0.012597 0.074907 0.028787 0.077165 -0.015193 \n",
"lu-r -0.051773 0.129474 0.167587 -0.199108 -0.071694 -0.073959 -0.077074 \n",
"* × -0.044120 0.010884 0.020322 0.037533 0.014124 -0.088021 -0.008631 \n",
"放送(A -0.029716 -0.039965 0.058726 -0.016588 0.000103 0.004351 0.068114 \n",
"南條 0.042323 0.100870 -0.067852 0.063837 -0.159801 0.059474 -0.039502 \n",
"、ソクラテス -0.041210 0.048598 0.025722 0.046111 0.028791 -0.068115 0.025351 \n",
"*1983年 -0.052284 0.059597 0.026180 0.068189 -0.032604 0.016550 0.003851 \n",
"現状 0.060203 -0.063410 0.031642 -0.124048 0.054329 0.080577 -0.162585 \n",
"\n",
"[10 rows x 200 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vectors = sembei.get_vectors(gamma=1e-6)\n",
"vectors.sample(n=10)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2017-03-05T22:40:57.705396",
"start_time": "2017-03-05T22:40:54.527765"
},
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>プログラム</th>\n",
" <th>中国</th>\n",
" <th>倒す</th>\n",
" <th>数学</th>\n",
" <th>江戸時代</th>\n",
" <th>生成</th>\n",
" <th>確率</th>\n",
" <th>鉄腕アトム</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>プロセス</td>\n",
" <td>ドイツ</td>\n",
" <td>施す</td>\n",
" <td>言語学</td>\n",
" <td>戦国時代</td>\n",
" <td>実行</td>\n",
" <td>規範</td>\n",
" <td>火の鳥</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>アプリケーション</td>\n",
" <td>朝鮮</td>\n",
" <td>離す</td>\n",
" <td>哲学</td>\n",
" <td>1950年代</td>\n",
" <td>否定</td>\n",
" <td>慣習</td>\n",
" <td>新宝島</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>システム</td>\n",
" <td>スペイン</td>\n",
" <td>示す</td>\n",
" <td>科学</td>\n",
" <td>1960年代</td>\n",
" <td>拡張</td>\n",
" <td>形状</td>\n",
" <td>陽だまりの樹</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>プロセッサ</td>\n",
" <td>イタリア</td>\n",
" <td>計る</td>\n",
" <td>政治学</td>\n",
" <td>19世紀</td>\n",
" <td>理解</td>\n",
" <td>階層構造</td>\n",
" <td>ひぃ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>ハードウェア</td>\n",
" <td>フランス</td>\n",
" <td>押す</td>\n",
" <td>物理学</td>\n",
" <td>鎌倉時代</td>\n",
" <td>観察</td>\n",
" <td>感情</td>\n",
" <td>フクちゃん</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>ソフトウェア</td>\n",
" <td>ヨーロッパ</td>\n",
" <td>向上させる</td>\n",
" <td>医学</td>\n",
" <td>17世紀</td>\n",
" <td>フェアに説明</td>\n",
" <td>儀式</td>\n",
" <td>バンパイヤ</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>データ</td>\n",
" <td>アメリカ</td>\n",
" <td>踏む</td>\n",
" <td>地理学</td>\n",
" <td>2000年代</td>\n",
" <td>定式化</td>\n",
" <td>個性</td>\n",
" <td>ウイングマン</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>端末</td>\n",
" <td>広東</td>\n",
" <td>貼る</td>\n",
" <td>生物学</td>\n",
" <td>平安時代</td>\n",
" <td>決定</td>\n",
" <td>感覚</td>\n",
" <td>リボンの騎士</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>プログ</td>\n",
" <td>琉球</td>\n",
" <td>検証する</td>\n",
" <td>宗教</td>\n",
" <td>室町時代</td>\n",
" <td>阻止</td>\n",
" <td>文字列</td>\n",
" <td>ふたりの誓い</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>コンテンツ</td>\n",
" <td>欧</td>\n",
" <td>拡張する</td>\n",
" <td>心理学</td>\n",
" <td>1970年代</td>\n",
" <td>優先</td>\n",
" <td>語尾</td>\n",
" <td>饗宴</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" プログラム 中国 倒す 数学 江戸時代 生成 確率 鉄腕アトム\n",
"0 プロセス ドイツ 施す 言語学 戦国時代 実行 規範 火の鳥\n",
"1 アプリケーション 朝鮮 離す 哲学 1950年代 否定 慣習 新宝島\n",
"2 システム スペイン 示す 科学 1960年代 拡張 形状 陽だまりの樹\n",
"3 プロセッサ イタリア 計る 政治学 19世紀 理解 階層構造 ひぃ\n",
"4 ハードウェア フランス 押す 物理学 鎌倉時代 観察 感情 フクちゃん\n",
"5 ソフトウェア ヨーロッパ 向上させる 医学 17世紀 フェアに説明 儀式 バンパイヤ\n",
"6 データ アメリカ 踏む 地理学 2000年代 定式化 個性 ウイングマン\n",
"7 端末 広東 貼る 生物学 平安時代 決定 感覚 リボンの騎士\n",
"8 プログ 琉球 検証する 宗教 室町時代 阻止 文字列 ふたりの誓い\n",
"9 コンテンツ 欧 拡張する 心理学 1970年代 優先 語尾 饗宴"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query_list = '鉄腕アトム 生成 確率 プログラム 倒す 数学 江戸時代 中国'.split(' ')\n",
"sb.utils.show.get_topn_df(sembei, query_list)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [Root]",
"language": "python",
"name": "Python [Root]"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
},
"nav_menu": {},
"toc": {
"navigate_menu": true,
"number_sections": false,
"sideBar": true,
"threshold": 6,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": true
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment