Skip to content

Instantly share code, notes, and snippets.

@cfriedline
Created June 4, 2014 06:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cfriedline/57c9f35b31225ced7931 to your computer and use it in GitHub Desktop.
Save cfriedline/57c9f35b31225ced7931 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": "",
"signature": "sha256:24625e0326d219f8b77f0317e421df165d0adfb7925fac44e4f9cc5c35a28bd6"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import os\n",
"os.environ['R_HOME'] = '/home/cfriedline/lib64/R'\n",
"import rpy2.robjects\n",
"import random\n",
"import string\n",
"import tempfile\n",
"import dendropy\n",
"from multiprocessing import Pool\n",
"from IPython.parallel import Client"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"rc = Client(profile='sge')\n",
"dview = rc[:]\n",
"lview = rc.load_balanced_view()\n",
"len(rc)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 2,
"text": [
"10"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with dview.sync_imports():\n",
" import os\n",
" import rpy2\n",
" import rpy2.robjects\n",
" import random\n",
" import string\n",
" import tempfile\n",
" import dendropy\n",
" import socket\n",
" from multiprocessing import Pool\n",
" \n",
"def setup_cluster_engines():\n",
" os.environ['R_HOME'] = '/home/cfriedline/lib64/R'\n",
" r = rpy2.robjects.r\n",
" ape = r('library(ape)')\n",
" return socket.gethostname(), os.getpid(), os.environ['R_HOME'], rpy2.__version__\n",
"dview['setup_cluster_engines'] = setup_cluster_engines\n",
"dview.apply(setup_cluster_engines).get()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"importing os on engine(s)\n",
"importing rpy2 on engine(s)\n",
"importing rpy2.robjects on engine(s)\n",
"importing random on engine(s)\n",
"importing string on engine(s)"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"importing tempfile on engine(s)\n",
"importing dendropy on engine(s)\n",
"importing socket on engine(s)\n",
"importing Pool from multiprocessing on engine(s)"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"[('godel97', 26811, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel97', 26870, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel97', 26819, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel97', 26866, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel97', 26873, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel199', 17510, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel199', 17464, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel199', 17512, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel199', 17511, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel199', 17507, '/home/cfriedline/lib64/R', '2.4.0')]"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"r = rpy2.robjects.r\n",
"ape = r('library(ape)')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def create_tree(num_tips, type):\n",
" \"\"\"\n",
" creates the taxa tree in R\n",
" @param num_tips: number of taxa to create\n",
" @param type: type for naming (e.g., 'taxa')\n",
" @return: a dendropy Tree\n",
" @rtype: dendropy.Tree\n",
" \"\"\"\n",
" r = rpy2.robjects.r\n",
" rpy2.robjects.globalenv['numtips'] = num_tips\n",
" rpy2.robjects.globalenv['treetype'] = type\n",
" name = _get_random_string(20)\n",
" if type == \"T\":\n",
" r(\"%s = rtree(numtips, rooted=T, tip.label=paste(treetype, seq(1:(numtips)), sep=''))\" % name)\n",
" else:\n",
" r(\"%s = rtree(numtips, rooted=F, tip.label=paste(treetype, seq(1:(numtips)), sep=''))\" % name)\n",
" tree = r[name]\n",
" return ape_to_dendropy(tree)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def ape_to_dendropy(phylo):\n",
" \"\"\"\n",
" converts an ape tree to dendropy tree\n",
" @param phylo: ape instance from rpy2\n",
" @return: a dendropy tree\n",
" @rtype: dendropy.Tree\n",
" \"\"\"\n",
" f = tempfile.NamedTemporaryFile()\n",
" rpy2.robjects.r['write.nexus'](phylo, file=f.name)\n",
" tree = dendropy.Tree.get_from_path(f.name, \"nexus\")\n",
" f.close()\n",
" return tree"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def _get_random_string(length):\n",
" \"\"\"\n",
" gets a random string of letters/numbers, ensuring that it does not start with a\n",
" number\n",
" @param length: length of the string\n",
" @return: the random string\n",
" @rtype: string\n",
" \"\"\"\n",
" s = ''.join(random.choice(string.letters + string.digits) for i in xrange(length))\n",
" if not s[0] in string.letters:\n",
" return _get_random_string(length)\n",
" return s"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tree = create_tree(100, \"T\")\n",
"tree.as_newick_string()[0:80]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
"'(((T6:0.3194573249,T88:0.7137461954):0.426507879,T42:0.09966852632):0.2807099563'"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def create_tree_mp(num_taxa):\n",
" t = create_tree(num_taxa, \"T\")\n",
" return t\n",
"\n",
"def get_taxa_trees(num_trees, num_taxa):\n",
" jobs_mp = []\n",
" jobs_ip = []\n",
" jobs = []\n",
" res = []\n",
" pool = Pool(num_trees)\n",
" for i in xrange(num_trees):\n",
" jobs_mp.append(pool.apply_async(create_tree_mp, (num_taxa,)))\n",
" jobs.append(create_tree_mp(num_taxa))\n",
" jobs_ip.append(lview.apply_async(create_tree_mp, num_taxa))\n",
" pool.close()\n",
" pool.join()\n",
" res.append(jobs)\n",
" res.append([x.get() for x in jobs_mp])\n",
" res.append([x.get() for x in jobs_ip])\n",
" return res"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dview['create_tree'] = create_tree\n",
"dview['_get_random_string'] = _get_random_string\n",
"dview['ape_to_dendropy'] = ape_to_dendropy\n",
"dview['create_tree_mp'] = create_tree_mp\n",
"dview['get_taxa_trees'] = get_taxa_trees"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"trees = get_taxa_trees(5, 10)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for t in trees[0]:\n",
" print t.as_newick_string()[0:80]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(((T2:0.7014786459,(((T10:0.1535879213,(T6:0.9837731475,T8:0.2049651504):0.79080\n",
"((T7:0.5806423163,(T5:0.007633442292,T2:0.2177157113):0.09601430735):0.305033218\n",
"((((((T6:0.2959137154,T7:0.4578026251):0.8627177938,(T8:0.08809919422,T3:0.10175\n",
"(T7:0.2577645453,((T10:0.1396945077,T4:0.6536828352):0.7271465769,(T2:0.26139505\n",
"((T10:0.01782502211,T5:0.9478464343):0.4384911573,((T1:0.201613334,T9:0.52947913\n"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for t in trees[1]:\n",
" print t.as_newick_string()[0:80]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(((T2:0.7014786459,(((T10:0.1535879213,(T6:0.9837731475,T8:0.2049651504):0.79080\n",
"(((T2:0.7014786459,(((T10:0.1535879213,(T6:0.9837731475,T8:0.2049651504):0.79080\n",
"(((T2:0.7014786459,(((T10:0.1535879213,(T6:0.9837731475,T8:0.2049651504):0.79080\n",
"(((T2:0.7014786459,(((T10:0.1535879213,(T6:0.9837731475,T8:0.2049651504):0.79080\n",
"(((T2:0.7014786459,(((T10:0.1535879213,(T6:0.9837731475,T8:0.2049651504):0.79080\n"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for t in trees[2]:\n",
" print t.as_newick_string()[0:80]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(((T1:0.1959578383,(T4:0.4092330183,T5:0.1314637179):0.6633579263):0.07092879713\n",
"((((T10:0.5492198297,(T9:0.8054336836,T8:0.9056934002):0.6771697227):0.465427928\n",
"(((T9:0.06959938002,((T10:0.05167591712,T1:0.7737517059):0.6082972996,T7:0.63657\n",
"(T6:0.5223825315,((((T10:0.3305313124,T3:0.1639568484):0.5333597104,(T1:0.155836\n",
"((T4:0.9040003123,(T1:0.7009222899,(T9:0.5908546504,T7:0.3244189187):0.872680337\n"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment