Skip to content

Instantly share code, notes, and snippets.

@observerss
Created September 3, 2014 04:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save observerss/1ab3ea87b3d5d69ce08e to your computer and use it in GitHub Desktop.
Save observerss/1ab3ea87b3d5d69ce08e to your computer and use it in GitHub Desktop.
Optimizing Python
{
"metadata": {
"name": "",
"signature": "sha256:f7fc0e66200bb5a9cae8e5e59abfaa99691caa881252e9266f11e7cffc7dafe6"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"\u4f18\u5316Python\u6570\u5b66\u8ba1\u7b97"
]
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"1. \u77e2\u91cf\u8fd0\u7b97\u4e0e\u5757\u8fd0\u7b97"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \u4ec0\u4e48\u662f\u77e2\u91cf\u8fd0\u7b97\uff1f\n",
"\n",
"\u5bf9\u77e2\u91cf(\u6570\u7ec4)\u4e2d\u7684\u6bcf\u4e2a\u5143\u7d20\u8fdb\u884c\u540c\u4e00\u79cd\u64cd\u4f5c\n",
"\n",
"e.g [1, 2, 3, 4] * 3 = [3, 6, 9, 12]\n",
"\n",
"### \u4ec0\u4e48\u662f\u5757\u8fd0\u7b97?\n",
"\n",
"\u5bf9\u5757(\u77e9\u9635)\u4e2d\u7684\u6bcf\u4e2a\u5143\u7d20\u8fdb\u884c\u540c\u4e00\u79cd\u64cd\u4f5c\n",
"\n",
"e.g. [[1, 2], [3, 4]] - 1 = [[0, 1], [2, 3]]\n",
"\n",
"### \u77e2\u91cf\u8fd0\u7b97\u4e0e\u5faa\u73af\u7684\u533a\u522b?\n",
"\n",
"a = np.array([1, 2, 3, 4])\n",
"\n",
"`a * 3` \u548c `[x * 3 for x in a]` \u5728\u6570\u503c\u4e0a\u662f\u7b49\u4ef7\u7684\n",
"\n",
"\u4f46\u662f, \u7b2c\u4e00\u4e2a\u64cd\u4f5c\u662fnumpy\u5185\u90e8\u51fd\u6570\uff0c\u53ea\u5728\u8f93\u5165\u8f93\u51fa\u65f6\u4e0ePython\u5bf9\u8c61\u8fdb\u884c\u4e86\u8f6c\u6362\uff0c\u5185\u90e8\u90fd\u662fC\u8fd0\u7b97\uff0c\u800c\u7b2c\u4e8c\u79cd\u65b9\u6cd5\uff0c\u6bcf\u6b21\u4ece\u6570\u7ec4\u4e2d\u53d6\u5bf9\u8c61\u548c\u8fd0\u7b97\u90fd\u662fPython\u64cd\u4f5c\u3002\n",
"\n",
"\u4e8e\u662f\u4f18\u5316\u7684\u6700\u91cd\u8981\u7684\u4e00\u4e2a\u6982\u5ff5\u5c31\u662f\uff0c**\u80fd\u7528\u77e2\u91cf\uff0c\u5757\u8fd0\u7b97\u7684\u7edd\u4e0d\u7528\u5faa\u73af**"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"\n",
"N, F = 1000, 20\n",
"\n",
"A = np.random.randn(N, F)\n",
"\n",
"def sig(x, mean, std):\n",
" return (x-mean)/std\n",
"\n",
"def sig_loop():\n",
" svals = []\n",
" for j in range(F):\n",
" a = A[:, j]\n",
" mean = np.mean(a)\n",
" std = np.std(a)\n",
" svals.append([sig(x, mean, std) for x in a])\n",
" return svals\n",
"\n",
"def sig_vec():\n",
" svals = []\n",
" for j in range(F):\n",
" a = A[:, j]\n",
" mean = np.mean(a)\n",
" std = np.std(a)\n",
" svals.append(sig(a, mean, std))\n",
" return svals\n",
"\n",
"def sig_mat():\n",
" return sig(A, np.mean(A, 0), np.std(A, 0))\n",
"\n",
"%timeit sig_loop()\n",
"%timeit sig_vec()\n",
"%timeit sig_mat()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"100 loops, best of 3: 17.8 ms per loop\n",
"100 loops, best of 3: 2.02 ms per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"1000 loops, best of 3: 294 \u00b5s per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 1
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. \u54ea\u4e9b\u64cd\u4f5c\u652f\u6301\u77e2\u91cf/\u5757\u8fd0\u7b97\n",
"\n",
"\u57fa\u672c\u4e0a\u5168\u90e8\n",
"\n",
"```python\n",
"# \u53d6\u77e9\u9635\u7684\u6bcf\u5217\u7684\u5747\u503c\u4e0e\u6807\u51c6\u5dee\n",
"A.mean(axis=0)\n",
"A.std(axis=0)\n",
"\n",
"# \u53d6\u77e9\u9635\u6bcf\u5217\u6700\u5927\u7684\u5143\u7d20\n",
"A.max(axis=0)\n",
"\n",
"# \u53d6\u77e9\u9635\u6bcf\u5217\u6700\u5927\u7684\u5143\u7d20\uff0c\u5ffd\u7565nan\u503c\n",
"np.nanmax(A, axis=0)\n",
"\n",
"# \u53d6array\u7684\u524d50\u4e2a\u6700\u5c0f\u5143\u7d20\u7684\u4e0b\u6807\n",
"a.argsort()[:50]\n",
"\n",
"# \u5982\u679c\u4e0d\u5728\u610f\u987a\u5e8f\uff0c\u8fd8\u6709\u66f4\u5feb\u7684\u65b9\u6cd5\n",
"a.argpartition(50)[:50]\n",
"\n",
"# \u6bd4\u8f83\u77e9\u9635\u4efb\u610f\u4e24\u884c\u4e4b\u95f4\u7684\u8ddd\u79bb\n",
"scipy.spatial.distance.pdist(A)\n",
"\n",
"# \u4e0a\u4e09\u89d2\u77e9\u9635\u4e0e\u65b9\u9635\u4e4b\u95f4\u7684\u5750\u6807\u53d8\u6362\n",
"numpy.triu_indices(N, k)\n",
"```\n",
"\n",
"\u6240\u4ee5**\u5728\u5199\u4efb\u4f55\u5faa\u73af\u4e4b\u524d\uff0c\u5148\u770b\u770bnumpy/scipy\u6709\u6ca1\u6709\u5bf9\u5e94\u7684\u77e2\u91cf/\u5757\u8fd0\u7b97**"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. Mongodb\u7684\u4f18\u5316\n",
"\n",
"### Mongoengine\u7684\u95ee\u9898\n",
"\n",
"Mongoengine\u662f\u4e00\u4e2a\u975e\u5e38\u6162\u7684ORM\u6846\u67b6\uff0c\u5b83\u8fd0\u7528\u4e86\u5927\u91cfmetaprograming\u6280\u5de7\uff0c\u5728\u529f\u80fd\u5f3a\u5927\u7684\u540c\u65f6\u6027\u80fd\u975e\u5e38\u5751\u7239"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from mongoengine import connect, Document, ListField\n",
"\n",
"class Test(Document):\n",
" g = ListField()\n",
" \n",
"t = Test(g=['3']*20)\n",
"\n",
"class Test2:\n",
" g = ['3']*20\n",
" \n",
"t2 = Test2()\n",
"\n",
"%timeit t.g\n",
"%timeit t2.g"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"10000 loops, best of 3: 164 \u00b5s per loop\n",
"10000000 loops, best of 3: 50.8 ns per loop"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\u5982\u679c\u4e00\u5b9a\u8981\u7528mongodb\u7684\u8bdd\uff0c\u81f3\u5c11\u5f97\u8fd9\u6837"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%timeit t._data['g']"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"10000000 loops, best of 3: 85.8 ns per loop\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### \u6279\u91cf\u63d2\u5165\n",
"\n",
"\u4ece2.6\u7248\u672c\u5f00\u59cb\uff0cmongodb\u5df2\u7ecf\u652f\u6301\u6279\u91cf\u63d2\u5165\u4e00\u5806\u65e0\u5e8fdocument\u6765\u51cf\u5c11\u5f80\u8fd4\u901a\u4fe1\uff0c\u52a0\u901f\u6027\u80fd\n",
"\n",
"\u5728pymongo, insert\u65b9\u6cd5\u53ef\u4ee5\u652f\u6301\u4e00\u4e2aiterable\u7684\u4e1c\u4e1c\n",
"\n",
"`pymongo.Collection.insert([doc1, doc2, ...])`\n",
"\n",
"\u5728mongoengine, Document\u4e5f\u652f\u6301\u4e00\u4e2aclass\u7ea7\u522b\u7684\u63d2\u5165\n",
"\n",
"`mongoengine.Document.insert([Doc1, Doc2, ...])`"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pymongo\n",
"from mongoengine import connect, Document, ListField\n",
"connect('test')\n",
"db = pymongo.MongoClient().test\n",
"\n",
"class Test(Document):\n",
" g = ListField()\n",
"\n",
"docs1 = [Test(g=['3']*20) for _ in range(10000)]\n",
"docs2 = [{'g':['3']*20} for _ in range(10000)]\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"db.drop_collection('test')\n",
"%time Test.objects.insert(docs1)\n",
"\n",
"db.drop_collection('test')\n",
"%time x=[doc.save() for doc in docs1]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 2.5 s, sys: 24.5 ms, total: 2.53 s\n",
"Wall time: 2.7 s\n",
"CPU times: user 3.83 s, sys: 218 ms, total: 4.05 s"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Wall time: 4.92 s\n"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"db.drop_collection('test')\n",
"%time db.test.insert(docs2)\n",
"\n",
"db.drop_collection('test')\n",
"%time x=[db.test.insert(doc) for doc in docs2]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 301 ms, sys: 3.25 ms, total: 304 ms\n",
"Wall time: 414 ms\n",
"CPU times: user 1.82 s, sys: 209 ms, total: 2.03 s"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Wall time: 2.88 s\n"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
}
],
"metadata": {}
}
]
}
@observerss
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment