Skip to content

Instantly share code, notes, and snippets.

@psinger
Created March 16, 2015 14:31
Show Gist options
  • Save psinger/c1bfedc870900d5d2053 to your computer and use it in GitHub Desktop.
Save psinger/c1bfedc870900d5d2053 to your computer and use it in GitHub Desktop.
{
"metadata": {
"css": [
""
],
"name": "",
"signature": "sha256:2a33067410fdd0356cedb2c0893414e80734c219ba9dd2bba82e2ad1b379606b"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import xml.etree.ElementTree as ET\n",
"tree = ET.parse('Restaurants_Train.xml')\n",
"root = tree.getroot()\n",
"\n",
"data = []\n",
"labels = []\n",
"\n",
"for child in root:\n",
" for c in child:\n",
" if c.tag == 'text':\n",
" data.append(c.text)\n",
" if c.tag == 'aspectCategories':\n",
" if c[0].attrib['category'] == 'service':\n",
" labels.append(0)\n",
" elif c[0].attrib['category'] == 'food':\n",
" labels.append(1)\n",
" elif c[0].attrib['category'] == 'ambience':\n",
" labels.append(2)\n",
" elif c[0].attrib['category'] == 'price':\n",
" labels.append(3)\n",
" else:\n",
" data.pop()\n",
" \n",
"print len(data)\n",
"print len(labels)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1962\n",
"1962\n"
]
}
],
"prompt_number": 30
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"from sklearn import svm\n",
"from sklearn import cross_validation\n",
"import numpy as np\n",
"\n",
"labels = np.array(labels)\n",
"\n",
"vec = TfidfVectorizer()\n",
"X = vec.fit_transform(data)\n",
"\n",
"clf = svm.SVC(kernel='linear', C=1)\n",
"scores = cross_validation.cross_val_score(clf, X, labels, cv=10, scoring='f1')\n",
"print scores.mean()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"0.786095714575\n"
]
}
],
"prompt_number": 36
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment