Skip to content

Instantly share code, notes, and snippets.

@takezoe
Last active May 4, 2018 00:52
Show Gist options
  • Save takezoe/c574410f38fdb11d0495a5577c42166a to your computer and use it in GitHub Desktop.
Save takezoe/c574410f38fdb11d0495a5577c42166a to your computer and use it in GitHub Desktop.
scikit-learnでGitHubのイシューのラベル判定をやってみた
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.naive_bayes import BernoulliNB\n",
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_json(\"https://github.com/takezoe/github-issues-exporter/raw/master/export.json\", lines = True)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>body</th>\n",
" <th>isPullRequest</th>\n",
" <th>labels</th>\n",
" <th>title</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>This PR add show SMTP error message as follows...</td>\n",
" <td>True</td>\n",
" <td>[improvement]</td>\n",
" <td>Show SMTP Error message in testing email settings</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>In 4.24.0, branch protection setting has probl...</td>\n",
" <td>True</td>\n",
" <td>[bug]</td>\n",
" <td>Fix branch protection problem</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>This bug introduced by #1962. It changed [diff...</td>\n",
" <td>True</td>\n",
" <td>[bug]</td>\n",
" <td>Fix editor preview bug</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>In #1880 I figured out this problem. #1880 is ...</td>\n",
" <td>False</td>\n",
" <td>[bug]</td>\n",
" <td>Issue/PullRequest hooks doesn't called when is...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>This PR fixes #1880.\\r\\n\\r\\nBut, it doesn't ca...</td>\n",
" <td>True</td>\n",
" <td>[bug]</td>\n",
" <td>call issue closed webhook when pushed commit c...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>In [git-flow](https://github.com/nvie/gitflow)...</td>\n",
" <td>True</td>\n",
" <td>[feature]</td>\n",
" <td>close and mark as merged PR by pushed commits</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>PR message shows \"into user:branch from user :...</td>\n",
" <td>True</td>\n",
" <td>[bug]</td>\n",
" <td>don't separate user:branch in PR message</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>### Before submitting a pull-request to GitBuc...</td>\n",
" <td>True</td>\n",
" <td>[improvement]</td>\n",
" <td>show tags on commits page</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Hi.\\r\\n\\r\\nI try to trigger a jenkins pipeline...</td>\n",
" <td>False</td>\n",
" <td>[question]</td>\n",
" <td>Trigger a Jenkins Pipeline after a Push</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>GitBucket 4.23.1.\\r\\n\\r\\nCreate a repo, then c...</td>\n",
" <td>False</td>\n",
" <td>[bug]</td>\n",
" <td>Internal server error when you try to download...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>### Before submitting a pull-request to GitBuc...</td>\n",
" <td>True</td>\n",
" <td>[improvement]</td>\n",
" <td>Update with propper mobile/tablet scalling</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>Discussed in #1265.\\r\\n\\r\\n![20180423-00088](h...</td>\n",
" <td>True</td>\n",
" <td>[feature]</td>\n",
" <td>add tag on commit.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>Sort usernames for useful order.\\r\\n\\r\\n![2018...</td>\n",
" <td>True</td>\n",
" <td>[improvement]</td>\n",
" <td>sort username for issue comments</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>In the conversation tab of pull requests, do g...</td>\n",
" <td>True</td>\n",
" <td>[improvement]</td>\n",
" <td>Improve pull request comments presentation</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>and some enhancements about internal processin...</td>\n",
" <td>True</td>\n",
" <td>[improvement]</td>\n",
" <td>Apply ApiAuthenticationFilter to /api/* to cov...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>This PR fix #1956 \\r\\n### Before submitting a ...</td>\n",
" <td>True</td>\n",
" <td>[bug]</td>\n",
" <td>fix #1956</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>### Before submitting an issue to GitBucket I ...</td>\n",
" <td>False</td>\n",
" <td>[bug]</td>\n",
" <td>Issue closed by commit comment on any branch (...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>This PR adds dummy `id` field for user/reposit...</td>\n",
" <td>True</td>\n",
" <td>[improvement]</td>\n",
" <td>Improve webhook payload compatibility for disc...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td></td>\n",
" <td>True</td>\n",
" <td>[improvement]</td>\n",
" <td>Keep wrap mode of Ace editor using localStorage</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>Closes #1924</td>\n",
" <td>True</td>\n",
" <td>[bug]</td>\n",
" <td>Disable removed user's repositories</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>See #1826 to know details of the issue.</td>\n",
" <td>True</td>\n",
" <td>[improvement]</td>\n",
" <td>Disallow users and repositories which have dif...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>I am trying GitBucket and AWS CodeBuild for co...</td>\n",
" <td>False</td>\n",
" <td>[improvement]</td>\n",
" <td>AWS CodeBuild requires a very long webhook URL...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>## Issue\\r\\n**Impacted version**: 4.20.0\\r\\n\\r...</td>\n",
" <td>False</td>\n",
" <td>[question]</td>\n",
" <td>ERRORclass java.net.SocketException Socket is ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>## Issue\\r\\n**Impacted version**: all\\r\\n\\r\\n*...</td>\n",
" <td>False</td>\n",
" <td>[LDAP/SSO, feature]</td>\n",
" <td>Request: Support for LDAP group sync</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>## Issue\\r\\n**Impacted version**: 4.23.0\\r\\n\\r...</td>\n",
" <td>False</td>\n",
" <td>[improvement]</td>\n",
" <td>Javascript slowdowns for large pull requests</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51</th>\n",
" <td>## Issue\\r\\n**Impacted version**: 4.23.0 (poss...</td>\n",
" <td>False</td>\n",
" <td>[bug]</td>\n",
" <td>Contents API seems unavailable in Version 4.23.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>### Before submitting a pull-request to GitBuc...</td>\n",
" <td>True</td>\n",
" <td>[improvement]</td>\n",
" <td>Add X-Forwarded-Proto support for baseurl parsing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>I am using GitBucket behind a SSL load balance...</td>\n",
" <td>False</td>\n",
" <td>[improvement]</td>\n",
" <td>OIDC redirect URI does not respect x-forwarded...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>Caused by changing tail slash handling in GitB...</td>\n",
" <td>True</td>\n",
" <td>[bug]</td>\n",
" <td>Fix a bug that fails to get the list of files ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>This PR adds extra mail address support. If me...</td>\n",
" <td>True</td>\n",
" <td>[feature]</td>\n",
" <td>Add extra mail address support.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1962</th>\n",
" <td>Do you have any plan to add \"plugin\" mechanism...</td>\n",
" <td>False</td>\n",
" <td>[feature]</td>\n",
" <td>Add third-party plugin support</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1963</th>\n",
" <td>GitBucket always add README.md after repositor...</td>\n",
" <td>False</td>\n",
" <td>[improvement]</td>\n",
" <td>Make it possible to create empty repository</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1965</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[improvement, pending]</td>\n",
" <td>Issue label search should be AND condition</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1966</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[feature]</td>\n",
" <td>Add user icon setting or Gravatar support</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1967</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[feature]</td>\n",
" <td>Display assigned user on issue list</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1968</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[feature]</td>\n",
" <td>Display issues for users on the dashborad</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1969</th>\n",
" <td>Highlight milestone due date if due date is ne...</td>\n",
" <td>False</td>\n",
" <td>[feature]</td>\n",
" <td>Alert by milestone due date</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1970</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[improvement]</td>\n",
" <td>Show GitBucket version on the header</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1971</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[feature]</td>\n",
" <td>Add branch tab to repository viewer</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1972</th>\n",
" <td>It is inconvenient for issue management becaus...</td>\n",
" <td>False</td>\n",
" <td>[improvement]</td>\n",
" <td>Admin user cannot add itself to Collaborators</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1973</th>\n",
" <td>Usually English is enough for issue label name...</td>\n",
" <td>False</td>\n",
" <td>[improvement]</td>\n",
" <td>Multi-byte label support</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1974</th>\n",
" <td>Git 1.7 is OK. But 1.8 is maybe not available....</td>\n",
" <td>False</td>\n",
" <td>[bug]</td>\n",
" <td>Git 1.8 not available</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1975</th>\n",
" <td>ユーザ情報の登録/変更時、すでに存在するメールアドレスを設定すると 500 が返されます。\\...</td>\n",
" <td>False</td>\n",
" <td>[bug]</td>\n",
" <td>Return 500 when registering/updating a user wh...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1976</th>\n",
" <td>When I add collaborators, 1st user can be adde...</td>\n",
" <td>False</td>\n",
" <td>[bug]</td>\n",
" <td>Two or more users cannot add as Collaborators</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1977</th>\n",
" <td>Wiki link `[[label|page]]` does not work well ...</td>\n",
" <td>False</td>\n",
" <td>[bug]</td>\n",
" <td>Wiki link does not work</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1978</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[bug]</td>\n",
" <td>Can't delete repository by foreign key constra...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1979</th>\n",
" <td>When I create a new wiki page, gitbucket show ...</td>\n",
" <td>False</td>\n",
" <td>[bug]</td>\n",
" <td>Redirects to wrong url when create a new wiki ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1980</th>\n",
" <td>Add some features like below to the commit pag...</td>\n",
" <td>False</td>\n",
" <td>[feature]</td>\n",
" <td>Improvement for the commit page</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1981</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[feature]</td>\n",
" <td>File editing in the repository viewer</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1982</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[feature]</td>\n",
" <td>File attachment in Issues</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1983</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[feature]</td>\n",
" <td>Batch updating for issues</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1984</th>\n",
" <td>Administrator can toggle mail notification at ...</td>\n",
" <td>False</td>\n",
" <td>[feature]</td>\n",
" <td>Mail notification</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1985</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[duplicate, feature]</td>\n",
" <td>Comment for commit and diff</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1986</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[feature]</td>\n",
" <td>Team management (like Organization in Github)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1987</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[feature, plugin]</td>\n",
" <td>Star</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1988</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[feature, plugin]</td>\n",
" <td>Statistics</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1989</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[feature]</td>\n",
" <td>Network graph</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1990</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[feature]</td>\n",
" <td>User (and repository) activity timeline</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1991</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[feature]</td>\n",
" <td>Repository search</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1992</th>\n",
" <td></td>\n",
" <td>False</td>\n",
" <td>[feature]</td>\n",
" <td>Fork and pull request</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1382 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" body isPullRequest \\\n",
"0 This PR add show SMTP error message as follows... True \n",
"3 In 4.24.0, branch protection setting has probl... True \n",
"6 This bug introduced by #1962. It changed [diff... True \n",
"9 In #1880 I figured out this problem. #1880 is ... False \n",
"10 This PR fixes #1880.\\r\\n\\r\\nBut, it doesn't ca... True \n",
"11 In [git-flow](https://github.com/nvie/gitflow)... True \n",
"12 PR message shows \"into user:branch from user :... True \n",
"14 ### Before submitting a pull-request to GitBuc... True \n",
"16 Hi.\\r\\n\\r\\nI try to trigger a jenkins pipeline... False \n",
"18 GitBucket 4.23.1.\\r\\n\\r\\nCreate a repo, then c... False \n",
"19 ### Before submitting a pull-request to GitBuc... True \n",
"27 Discussed in #1265.\\r\\n\\r\\n![20180423-00088](h... True \n",
"30 Sort usernames for useful order.\\r\\n\\r\\n![2018... True \n",
"33 In the conversation tab of pull requests, do g... True \n",
"35 and some enhancements about internal processin... True \n",
"37 This PR fix #1956 \\r\\n### Before submitting a ... True \n",
"38 ### Before submitting an issue to GitBucket I ... False \n",
"39 This PR adds dummy `id` field for user/reposit... True \n",
"40 True \n",
"41 Closes #1924 True \n",
"42 See #1826 to know details of the issue. True \n",
"43 I am trying GitBucket and AWS CodeBuild for co... False \n",
"44 ## Issue\\r\\n**Impacted version**: 4.20.0\\r\\n\\r... False \n",
"47 ## Issue\\r\\n**Impacted version**: all\\r\\n\\r\\n*... False \n",
"49 ## Issue\\r\\n**Impacted version**: 4.23.0\\r\\n\\r... False \n",
"51 ## Issue\\r\\n**Impacted version**: 4.23.0 (poss... False \n",
"52 ### Before submitting a pull-request to GitBuc... True \n",
"53 I am using GitBucket behind a SSL load balance... False \n",
"54 Caused by changing tail slash handling in GitB... True \n",
"55 This PR adds extra mail address support. If me... True \n",
"... ... ... \n",
"1962 Do you have any plan to add \"plugin\" mechanism... False \n",
"1963 GitBucket always add README.md after repositor... False \n",
"1965 False \n",
"1966 False \n",
"1967 False \n",
"1968 False \n",
"1969 Highlight milestone due date if due date is ne... False \n",
"1970 False \n",
"1971 False \n",
"1972 It is inconvenient for issue management becaus... False \n",
"1973 Usually English is enough for issue label name... False \n",
"1974 Git 1.7 is OK. But 1.8 is maybe not available.... False \n",
"1975 ユーザ情報の登録/変更時、すでに存在するメールアドレスを設定すると 500 が返されます。\\... False \n",
"1976 When I add collaborators, 1st user can be adde... False \n",
"1977 Wiki link `[[label|page]]` does not work well ... False \n",
"1978 False \n",
"1979 When I create a new wiki page, gitbucket show ... False \n",
"1980 Add some features like below to the commit pag... False \n",
"1981 False \n",
"1982 False \n",
"1983 False \n",
"1984 Administrator can toggle mail notification at ... False \n",
"1985 False \n",
"1986 False \n",
"1987 False \n",
"1988 False \n",
"1989 False \n",
"1990 False \n",
"1991 False \n",
"1992 False \n",
"\n",
" labels \\\n",
"0 [improvement] \n",
"3 [bug] \n",
"6 [bug] \n",
"9 [bug] \n",
"10 [bug] \n",
"11 [feature] \n",
"12 [bug] \n",
"14 [improvement] \n",
"16 [question] \n",
"18 [bug] \n",
"19 [improvement] \n",
"27 [feature] \n",
"30 [improvement] \n",
"33 [improvement] \n",
"35 [improvement] \n",
"37 [bug] \n",
"38 [bug] \n",
"39 [improvement] \n",
"40 [improvement] \n",
"41 [bug] \n",
"42 [improvement] \n",
"43 [improvement] \n",
"44 [question] \n",
"47 [LDAP/SSO, feature] \n",
"49 [improvement] \n",
"51 [bug] \n",
"52 [improvement] \n",
"53 [improvement] \n",
"54 [bug] \n",
"55 [feature] \n",
"... ... \n",
"1962 [feature] \n",
"1963 [improvement] \n",
"1965 [improvement, pending] \n",
"1966 [feature] \n",
"1967 [feature] \n",
"1968 [feature] \n",
"1969 [feature] \n",
"1970 [improvement] \n",
"1971 [feature] \n",
"1972 [improvement] \n",
"1973 [improvement] \n",
"1974 [bug] \n",
"1975 [bug] \n",
"1976 [bug] \n",
"1977 [bug] \n",
"1978 [bug] \n",
"1979 [bug] \n",
"1980 [feature] \n",
"1981 [feature] \n",
"1982 [feature] \n",
"1983 [feature] \n",
"1984 [feature] \n",
"1985 [duplicate, feature] \n",
"1986 [feature] \n",
"1987 [feature, plugin] \n",
"1988 [feature, plugin] \n",
"1989 [feature] \n",
"1990 [feature] \n",
"1991 [feature] \n",
"1992 [feature] \n",
"\n",
" title \n",
"0 Show SMTP Error message in testing email settings \n",
"3 Fix branch protection problem \n",
"6 Fix editor preview bug \n",
"9 Issue/PullRequest hooks doesn't called when is... \n",
"10 call issue closed webhook when pushed commit c... \n",
"11 close and mark as merged PR by pushed commits \n",
"12 don't separate user:branch in PR message \n",
"14 show tags on commits page \n",
"16 Trigger a Jenkins Pipeline after a Push \n",
"18 Internal server error when you try to download... \n",
"19 Update with propper mobile/tablet scalling \n",
"27 add tag on commit. \n",
"30 sort username for issue comments \n",
"33 Improve pull request comments presentation \n",
"35 Apply ApiAuthenticationFilter to /api/* to cov... \n",
"37 fix #1956 \n",
"38 Issue closed by commit comment on any branch (... \n",
"39 Improve webhook payload compatibility for disc... \n",
"40 Keep wrap mode of Ace editor using localStorage \n",
"41 Disable removed user's repositories \n",
"42 Disallow users and repositories which have dif... \n",
"43 AWS CodeBuild requires a very long webhook URL... \n",
"44 ERRORclass java.net.SocketException Socket is ... \n",
"47 Request: Support for LDAP group sync \n",
"49 Javascript slowdowns for large pull requests \n",
"51 Contents API seems unavailable in Version 4.23.0 \n",
"52 Add X-Forwarded-Proto support for baseurl parsing \n",
"53 OIDC redirect URI does not respect x-forwarded... \n",
"54 Fix a bug that fails to get the list of files ... \n",
"55 Add extra mail address support. \n",
"... ... \n",
"1962 Add third-party plugin support \n",
"1963 Make it possible to create empty repository \n",
"1965 Issue label search should be AND condition \n",
"1966 Add user icon setting or Gravatar support \n",
"1967 Display assigned user on issue list \n",
"1968 Display issues for users on the dashborad \n",
"1969 Alert by milestone due date \n",
"1970 Show GitBucket version on the header \n",
"1971 Add branch tab to repository viewer \n",
"1972 Admin user cannot add itself to Collaborators \n",
"1973 Multi-byte label support \n",
"1974 Git 1.8 not available \n",
"1975 Return 500 when registering/updating a user wh... \n",
"1976 Two or more users cannot add as Collaborators \n",
"1977 Wiki link does not work \n",
"1978 Can't delete repository by foreign key constra... \n",
"1979 Redirects to wrong url when create a new wiki ... \n",
"1980 Improvement for the commit page \n",
"1981 File editing in the repository viewer \n",
"1982 File attachment in Issues \n",
"1983 Batch updating for issues \n",
"1984 Mail notification \n",
"1985 Comment for commit and diff \n",
"1986 Team management (like Organization in Github) \n",
"1987 Star \n",
"1988 Statistics \n",
"1989 Network graph \n",
"1990 User (and repository) activity timeline \n",
"1991 Repository search \n",
"1992 Fork and pull request \n",
"\n",
"[1382 rows x 4 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 特定のラベルがついているデータのみ使用する\n",
"filter = df[\"labels\"].map(lambda x: (\"bug\" in x) or (\"question\" in x) or (\"improvement\" in x) or (\"feature\" in x))\n",
"df2 = df[filter]\n",
"df2"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# ラベルをbug、question、requestの三種類にまとめる\n",
"labels = df2[\"labels\"].map(lambda x: \"bug\" if x.count(\"bug\") > 0 else (\"question\" if x.count(\"question\") > 0 else \"request\"))"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"9844"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# タイトルと本文を結合したテキストをベクトル化\n",
"count = CountVectorizer()\n",
"vector = count.fit_transform(df2[\"title\"] + \" \" + df2[\"body\"].map(str))\n",
"#vector = count.fit_transform(df[\"title\"])\n",
"vocabulary = count.vocabulary_\n",
"len(vocabulary)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"# 学習データとテストデータに分割\n",
"train_vector, test_vector, train_label, test_label = train_test_split(vector, labels, test_size=0.1)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 学習\n",
"model = BernoulliNB()\n",
"model.fit(train_vector, train_label)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train accuracy: 0.681\n",
"Test accuracy: 0.626\n"
]
}
],
"source": [
"# 精度を確認\n",
"print('Train accuracy: {:.3f}'.format(model.score(train_vector, train_label)))\n",
"print('Test accuracy: {:.3f}'.format(model.score(test_vector, test_label)))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['request'], dtype='<U8')"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# テスト\n",
"count = CountVectorizer(vocabulary = vocabulary)\n",
"test = pd.Series([\"Empty URL markdown link causes java.lang.NullPointerException\"])\n",
"vect = count.fit_transform(test)\n",
"model.predict(vect)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment