Skip to content

Instantly share code, notes, and snippets.

@cytms
Created April 1, 2014 08:36
Show Gist options
  • Save cytms/9910250 to your computer and use it in GitHub Desktop.
Save cytms/9910250 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"\u98df\u8b5c\u81ea\u52d5\u5206\u985e (Gaussian NB)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%pylab inline --no-import-all\n",
"\n",
"from pylab import *\n",
"from sklearn.naive_bayes import GaussianNB\n",
"import MySQLdb\n",
"\n",
"db = MySQLdb.connect(host=\"localhost\", user=\"root\", db=\"cookpad_production\")\n",
"recipe_cur = db.cursor()\n",
"\n",
"# feature dimension is equal to total ingredient data -- this is sparse!\n",
"ingredient_cur = db.cursor()\n",
"ingredient_cur.execute(\"SELECT COUNT(id) FROM ingredients\")\n",
"feature_dimension = int(ingredient_cur.fetchall()[0][0])\n",
"\n",
"# classifier\n",
"f = open('src/regions', 'r')\n",
"classifier = {}\n",
"class_title = []\n",
"for line in f:\n",
" string = line.strip()\n",
" classifier[string] = []\n",
" class_title.append(string)\n",
"\n",
"f.close()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n",
"\u897f\u5f0f\u6599\u7406"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\u65e5\u5f0f\u6599\u7406"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\u5357\u6d0b\u6599\u7406"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\u97d3\u5f0f\u6599\u7406"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\u4e2d\u5f0f\u6599\u7406"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Training Phase"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# testing datum collection\n",
"list_cur = db.cursor()\n",
"list_cur.execute(\"SELECT list_recipe_ships.recipe_id, lists.name FROM list_recipe_ships INNER JOIN lists ON list_recipe_ships.list_id=lists.id\")\n",
"list_list = list_cur.fetchall()\n",
"\n",
"for item in list_list:\n",
" for t in class_title:\n",
" if item[1].find(t) != -1:\n",
" classifier[t].append(item[0])\n",
"\n",
"# inverted file is evaluated by user\n",
"X = []\n",
"y = []\n",
"gnb = GaussianNB()\n",
"for key in classifier:\n",
" thres = 0\n",
" for e in classifier[key]:\n",
" if thres <= 100:\n",
" recipe_cur.execute(\"SELECT ingredient_id FROM relationships WHERE recipe_id = (%d)\" % int(e))\n",
" ingredient_list = recipe_cur.fetchall()\n",
" x = [0] * feature_dimension\n",
" for list_item in ingredient_list: \n",
" try:\n",
" x[list_item[0]] = 1\n",
" except:\n",
" continue\n",
" X.append(x)\n",
" y.append(key)\n",
" thres += 1\n",
" else:\n",
" break\n",
" print key\n",
" \n",
"gnb.fit(X, y)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Testing Phase"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"testing_cur = db.cursor()\n",
"testing_cur.execute(\"SELECT id, name FROM recipes WHERE state='published' LIMIT 0, 100\")\n",
"testing_datum = testing_cur.fetchall()\n",
"TEST = []\n",
"for item in testing_datum:\n",
" e = item[0]\n",
" recipe_cur.execute(\"SELECT ingredient_id FROM relationships WHERE recipe_id = (%d)\" % int(e))\n",
" ingredient_list = recipe_cur.fetchall()\n",
" testing = [0] * feature_dimension\n",
" for list_item in ingredient_list:\n",
" testing[list_item[0]] = 1\n",
" TEST.append(testing)\n",
" \n",
"y_pred = gnb.predict(TEST)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"index = 0\n",
"for item in testing_datum:\n",
" print \"%s: %s\" % (item[1], y_pred[index])\n",
" index += 1\n",
" "
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\u4e09\u676f\u4e2d\u5377: \u5357\u6d0b\u6599\u7406\n",
"\u7fa9\u5f0f\u98a8\u5473\u5df4\u85a9\u7c73\u514b\u7092\u8766: \u65e5\u5f0f\u6599\u7406\n",
"\u4e00\u500b\u4eba\u7684\u7c21\u55ae\u6599\u7406\u2605\u9bae\u852c\u8d77\u53f8\u6b50\u59c6\u86cb\u6372\u2605: \u65e5\u5f0f\u6599\u7406\n",
"\u529b\u529b\u7d20\u98df-\u9ebb\u6cb9\u674f\u9b91\u83c7\u86cb\u7092\u98ef: \u4e2d\u5f0f\u6599\u7406\n",
"\u6dbc\u62cc\u5c0f\u9ec3\u74dc: \u4e2d\u5f0f\u6599\u7406\n",
"\u2764\u751c\u871c\u6696\u5fc3\u7d05\u8c46\u6e6f\u2764\u7d05\u8c46\u6e6fX3\u7a2e\u5403\u6cd5: \u5357\u6d0b\u6599\u7406\n",
"\u53e4\u65e9\u5473\u9ebb\u6cb9\u86cb\u9eb5\u7dda: \u4e2d\u5f0f\u6599\u7406\n",
"\u6696\u6696\u91d1\u8272\u725b\u5c3e\u6e6f: \u897f\u5f0f\u6599\u7406\n",
"\u4f86\u7897\u6696\u547c\u547c\u7684\u767d\u83dc\u96de\u6e6f\u5427\uff01: \u5357\u6d0b\u6599\u7406\n",
"\u6eff\u53e3\u5976\u6cb9\u9999\uff1a\u7dad\u4e5f\u7d0d\u9165\u9905: \u897f\u5f0f\u6599\u7406\n",
"\u4e00\u500b\u4eba\u7684\u7c21\u55ae\u6599\u7406\u2605\u91d1\u6c99\u8c46\u8150\u2605: \u65e5\u5f0f\u6599\u7406\n",
"\u91d1\u9ec3\u8a98\u4eba\u7684\u67f3\u6a59\u53ef\u53ef\u8584\u5854 : \u897f\u5f0f\u6599\u7406\n",
"\u4e00\u500b\u4eba\u7684\u7c21\u55ae\u6599\u7406\u2605\u6ed1\u86cb\u725b\u8089\u2605: \u65e5\u5f0f\u6599\u7406\n",
"\u6436\u773c\u5bb6\u5e38\u83dc\uff1a\u828b\u982d\u71d2\u96de: \u5357\u6d0b\u6599\u7406\n",
"\u4f4e\u5361\u7248-\u5976\u6cb9\u71c9\u96de\u8089\u9bae\u852c: \u897f\u5f0f\u6599\u7406\n",
"\u9e79\u8c6c\u8089\u7fa9\u5927\u5229\u9eb5: \u65e5\u5f0f\u6599\u7406\n",
"\u9999\u8349\u7c7d\u676f\u5b50\u86cb\u7cd5\uff0b\u963f\u83ef\u7530\u5de7\u514b\u529b\u5976\u6cb9\u971c\u2764: \u897f\u5f0f\u6599\u7406\n",
"\u81ea\u5bb6\u5496\u5561\u9928\uff1a\u8513\u8d8a\u8393\u53f8\u5eb7: \u65e5\u5f0f\u6599\u7406\n",
"\u5341\u6996\u5065\u5eb7\u4fbf\u7576: \u4e2d\u5f0f\u6599\u7406\n",
"\u9c39\u9b5a\u6dbc\u9eb5\u5957\u9910\uff0d Polydice \u611b\u6599\u7406\u5206\u4eab\u9910\u7b2c\u4e00\u56de: \u65e5\u5f0f\u6599\u7406\n",
"\u54c1\u975a\u4e0a\u83dc-\u9752\u6c5f\u83dc\u7092\u98ef: \u4e2d\u5f0f\u6599\u7406\n",
"\u3010\u7f8e\u570b Choice\u3011\u539a\u5207 \u53bb\u9aa8\u5ae9\u80a9\u725b\u5c0f\u6392: \u65e5\u5f0f\u6599\u7406\n",
"\u54c1\u975a\u4e0a\u83dc-\u96d9\u86cb\u7092\u98ef: \u65e5\u5f0f\u6599\u7406\n",
"\u54c1\u975a\u4e0a\u83dc-\u525d\u76ae\u8fa3\u6912\u9183\u5c0f\u9ec3\u74dc: \u4e2d\u5f0f\u6599\u7406\n",
"\u54c1\u975a\u4e0a\u83dc-\u9ed1\u6912\u96de\u6392: \u5357\u6d0b\u6599\u7406\n",
"\u54c1\u975a\u4e0a\u83dc-\u8d77\u53f8\u539a\u86cb\u71d2: \u65e5\u5f0f\u6599\u7406\n",
"\u82e6\u6200: \u65e5\u5f0f\u6599\u7406\n",
"Tenz\u5bb6\u4e2d\u79cb\u50b3\u7d71\u7f8e\u98df\uff1a\u58fd\u559c\u71d2: \u65e5\u5f0f\u6599\u7406\n",
"\u54c1\u975a\u4e0a\u83dc-\u8336\u85b0\u86cb: \u97d3\u5f0f\u6599\u7406\n",
"[\u4e00\u934b\u6599\u7406] \u7d05\u9152\u71d2\u96de: \u5357\u6d0b\u6599\u7406\n",
"\u9999\u6fc3\u53ef\u53e3\u7684\u65e5\u5f0f\u5496\u54e9\u96de: \u65e5\u5f0f\u6599\u7406\n",
"\u84b8\u6c34\u86cb: \u65e5\u5f0f\u6599\u7406\n",
"\u4e0b\u9152\u826f\u4f34-\u8150\u4e73\u70b8\u96de\u584a\uff01: \u4e2d\u5f0f\u6599\u7406\n",
"\u9eef\u7136\u92b7\u9b42\u5496\u55b1\u98ef: \u65e5\u5f0f\u6599\u7406\n",
"[\u71b1\u7092\u5fc5\u9ede] \u7cd6\u918b\u9b5a\u7247: \u65e5\u5f0f\u6599\u7406\n",
"\u54c1\u975a\u4e0a\u83dc-\u8518\u9b1a\u70cf\u9aa8\u96de\u6e6f: \u4e2d\u5f0f\u6599\u7406\n",
"\u96fb\u934b\u6c34\u9903: \u65e5\u5f0f\u6599\u7406\n",
"\u71b1\u7092\u7d05\u866b: \u5357\u6d0b\u6599\u7406\n",
"\u81d8\u5473\u98ef: \u5357\u6d0b\u6599\u7406\n",
"\u54c1\u975a\u4e0a\u83dc-\u8766\u4ec1\u86cb\u7092\u98ef: \u65e5\u5f0f\u6599\u7406\n",
"\u300c\u7530\u5b63\u77f3\u71d2\u300d\u71b1\u60c5\u2668\u5b5c\u7136\u5f69\u6912\u897f\u82b9\u725b ~ \u3295: \u65e5\u5f0f\u6599\u7406\n",
"\u597d\u857e\u7d72\u4e0a\u83dc--\u8001\u9955\u6700\u611b:\u53e4\u65e9\u5473\u767d\u9be7\u7c73\u7c89\u6e6f: \u4e2d\u5f0f\u6599\u7406\n",
"[\u5730\u7344\u6599\u7406]\u611b\u5fc3\u773c\u7403\u4fbf\u7576: \u65e5\u5f0f\u6599\u7406\n",
"\u99ac\u9234\u85af\u901a\u5fc3\u7c89\u6c99\u62c9: \u65e5\u5f0f\u6599\u7406\n",
"\u6cb9\u7206\u849c\u9999\u8766: \u65e5\u5f0f\u6599\u7406\n",
"\u610f\u5916\u914d\u51fa\u597d\u6ecb\u5473\uff0d\u674f\u9b91\u83c7\u9ebb\u6cb9\u9eb5\u7dda\uff082\u4eba\u4efd\uff09: \u4e2d\u5f0f\u6599\u7406\n",
"\u9069\u5408\u53f0\u7063\u4eba\u7626\u8eab\u7528\u7684\u53f0\u5f0f\u751f\u83dc\u6c99\u62c9: \u65e5\u5f0f\u6599\u7406\n",
"\u9999\u83c7\u7af9\u7b4d\u8089\u5305: \u4e2d\u5f0f\u6599\u7406\n",
"\u2764\u611b\u5fc3\u86cb\u70e4\u571f\u53f8\u2764 \u737b\u4e0a\u6211\u7684\u5c0f\u5c0f\u5fc3\u610f: \u97d3\u5f0f\u6599\u7406\n",
"\u5728\u5bb6\u4e5f\u53ef\u4ee5\u81ea\u5df1\u505a\u7684\u6708\u4eae\u8766\u9905: \u5357\u6d0b\u6599\u7406\n",
"\u91ac\u71d2\u82e6\u74dc: \u4e2d\u5f0f\u6599\u7406\n",
"\u6e1b\u80a5\u9910-\u8c46\u6f3f\u96de\u8089\u934b: \u897f\u5f0f\u6599\u7406\n",
"[\u9b06\u9905\u7c89\u98df\u8b5c]\u5c0f\u70e4\u7bb1\u5c0f\u5e78\u798f---\u8089\u6842\u860b\u679c\u9b06\u7cd5: \u897f\u5f0f\u6599\u7406\n",
"\u67f3\u6a59\u7126\u7cd6\u91c0\u8349\u8393: \u897f\u5f0f\u6599\u7406\n",
"\u54c1\u975a\u4e0a\u83dc-\u9e7d\u9165\u8766: \u5357\u6d0b\u6599\u7406\n",
"\u2665\u6211\u7684\u624b\u4f5c\u6599\u7406\u2665\u8fa3\u91ac\u9999\u6839\u571f\u8c46\u7d72: \u897f\u5f0f\u6599\u7406\n",
"[\u4e00\u4eba\u7368\u4eab] \u86cb\u9903\u6fc3\u6e6f: \u897f\u5f0f\u6599\u7406\n",
"\u82f1\u5f0f\u9b06\u9905 scone: \u897f\u5f0f\u6599\u7406\n",
"\u597d\u857e\u7d72\u4e0a\u83dc--\u8d85\u7c21\u55ae\u89aa\u5b50\u5de7\u514b\u529b\u676f\u5b50\u86cb\u7cd5: \u897f\u5f0f\u6599\u7406\n",
"\u2665\u6211\u7684\u624b\u4f5c\u6599\u7406\u2665\u6912\u9ebb\u725b\u8089: \u5357\u6d0b\u6599\u7406\n",
"\u54c1\u975a\u70d8\u7119-\u6ab8\u6aac\u676f\u5b50\u5c0f\u86cb\u7cd5: \u897f\u5f0f\u6599\u7406\n",
"\u6e05\u723d\u7684\u51ac\u74dc\u6392\u9aa8\u6e6f: \u4e2d\u5f0f\u6599\u7406\n",
"\u4e00\u9ede\u90fd\u4e0d\u82e6\u7684\u5208\u83dc\u96de\u6e6f\uff01: \u5357\u6d0b\u6599\u7406\n",
"\u81ea\u88fd\u751c\u96de\u91ac-\u6708\u4eae\u8766\u9905\u6cbe\u91ac: \u5357\u6d0b\u6599\u7406\n",
"\u9752\u860b\u679c\u6d77\u5e36\u82bd\u8f15\u98df\u6c99\u62c9: \u65e5\u5f0f\u6599\u7406\n",
"\u529b\u529b\u96a8\u610f\u716e-\u9bae\u8766\u7c89\u7d72\u7172: \u4e2d\u5f0f\u6599\u7406\n",
"\u4e00\u500b\u4eba\u7684\u7c21\u55ae\u6599\u7406\u2605\u5976\u6cb9\u6ab8\u9999\u87f9\u8089\u7fa9\u5927\u5229\u9eb5\u2605: \u897f\u5f0f\u6599\u7406\n",
"Costco\u8f15\u9b06\u716e\uff0d\u6cf0\u5f0f\u91ac\u8896\u73cd\u83c7: \u65e5\u5f0f\u6599\u7406\n",
"\u2665\u6211\u7684\u624b\u4f5c\u6599\u7406\u2665\u677e\u5742\u56db\u5b63\u7092\u98ef: \u65e5\u5f0f\u6599\u7406\n",
"\u756a\u8304\u852c\u83dc\u725b\u8089\u6e6f: \u5357\u6d0b\u6599\u7406\n",
"\u3010\u6731\u8a18\u98df\u8b5c\u3011\u9999\u8178\u6f22\u5821: \u897f\u5f0f\u6599\u7406\n",
"\u6d77\u9bae\u714e\u9905: \u97d3\u5f0f\u6599\u7406\n",
"\u60c5\u4eba\u679c: \u65e5\u5f0f\u6599\u7406\n",
"\u2665\u6211\u7684\u624b\u4f5c\u6599\u7406\u2665\u5bae\u4fdd\u76ae\u86cb: \u4e2d\u5f0f\u6599\u7406\n",
"\u2665\u6211\u7684\u624b\u4f5c\u6599\u7406\u2665 \u788e\u7c73\u96de\u4e01: \u4e2d\u5f0f\u6599\u7406\n",
"\u5357\u74dc\u57f9\u6839\u7fa9\u5927\u5229\u9eb5: \u897f\u5f0f\u6599\u7406\n",
"\u8001\u5abd\u7684\u767d\u83dc\u9b6f: \u4e2d\u5f0f\u6599\u7406\n",
"[\u7559\u5b78\u751f\u4e94\u5206\u9418\u6599\u7406] \u8543\u8304\u83e0\u83dc\u788e\u8089\u7092\u9eb5: \u5357\u6d0b\u6599\u7406\n",
"\u963f\u5b24\u7684\u5730\u74dc\u6e6f\u5713: \u4e2d\u5f0f\u6599\u7406\n",
"\u597d\u857e\u7d72\u4e0a\u83dc--\u9ad8\u9e97\u83dc\u6c34\u9903: \u4e2d\u5f0f\u6599\u7406\n",
"\u5821\u96de\u6599\u7406: \u5357\u6d0b\u6599\u7406\n",
"[\u7559\u5b78\u751f\u4e94\u5206\u9418\u6599\u7406] \u4e2d\u5f0f\u9999\u83c7\u83e0\u83dc\u8c46\u8150\u7092\u610f\u5927\u5229\u9903: \u65e5\u5f0f\u6599\u7406\n",
"[\u63d0\u59c6\u58eb\u98df\u8b5c] \u590f\u65e5\u98a8\u6cf0\u5f0f\u6912\u9ebb\u96de - \u9178\u751c\u958b\u80c3\u6cf0\u5f0f\u6599\u7406: \u5357\u6d0b\u6599\u7406\n",
"\u767d\u9152\u6de1\u83dc\u751c\u6912\u57f9\u6839\u5929\u4f7f\u9eb5: \u897f\u5f0f\u6599\u7406\n",
"\u7c21\u6613\u70b8\u91ac: \u4e2d\u5f0f\u6599\u7406\n",
"\u54c1\u975a\u4e0a\u83dc-\u5305\u9903\u5b50: \u4e2d\u5f0f\u6599\u7406\n",
"\u54c1\u975a\u4e0a\u83dc-\u714e\u9903: \u4e2d\u5f0f\u6599\u7406\n",
"\u963f\u5b24\u311f\u6ef7\u8089\u98ef: \u65e5\u5f0f\u6599\u7406\n",
"\u96fb\u934b\u81d8\u8178\u98ef: \u5357\u6d0b\u6599\u7406\n",
"\u9178\u83dc\u7092\u9eb5\u8178: \u65e5\u5f0f\u6599\u7406\n",
"[\u7559\u5b78\u751f\u4e94\u5206\u9418\u6599\u7406] \u852c\u83dc\u7d5e\u8089\u7fa9\u5927\u5229\u9eb5: \u97d3\u5f0f\u6599\u7406\n",
"\u6696\u4e0a\u5fc3\u982d\uff0e\u84b8\u71d2\u9152\u8766: \u65e5\u5f0f\u6599\u7406\n",
"\u529b\u529b\u96a8\u610f\u716e-\u5ae9\u7092\u9ec3\u74dc\u96de\u7403: \u5357\u6d0b\u6599\u7406\n",
"\u5927\u4eba\u5473\u7684\u3010\u674f\u4ec1\u5e03\u6717\u5c3c Almond Brownie\u3011: \u897f\u5f0f\u6599\u7406\n",
"\u7f85\u52d2\u4e73\u916a\u9eb5\u5305: \u897f\u5f0f\u6599\u7406\n",
"\u9e79\u6e6f\u5713: \u4e2d\u5f0f\u6599\u7406\n",
"[\u7559\u5b78\u751f\u4e94\u5206\u9418\u6599\u7406] \u6ce1\u83dc\u7092\u5e74\u7cd5\uff01: \u97d3\u5f0f\u6599\u7406\n",
"\u529b\u529b\u751c\u9ede-\u53ef\u980c\u5e03\u4e01: \u65e5\u5f0f\u6599\u7406\n",
"\u2665\u6211\u7684\u624b\u4f5c\u6599\u7406\u2665 \u8591\u6bcd\u9d28\u98ef: \u5357\u6d0b\u6599\u7406\n",
"\u2665\u6211\u7684\u624b\u4f5c\u6599\u7406\u2665 \u871c\u9165\u96de\u6392: \u65e5\u5f0f\u6599\u7406\n"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment