Skip to content

Instantly share code, notes, and snippets.

@cfriedline
Created June 4, 2014 07:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cfriedline/0095ad55d645a7202cc6 to your computer and use it in GitHub Desktop.
Save cfriedline/0095ad55d645a7202cc6 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": "",
"signature": "sha256:5507a20bf985d7ff9c8b454bf414997a33b671d82b7ceaaf24f89a61d1397ce0"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import os\n",
"os.environ['R_HOME'] = '/home/cfriedline/lib64/R'\n",
"import rpy2.robjects\n",
"import random\n",
"import string\n",
"import tempfile\n",
"import dendropy\n",
"import scipy\n",
"from multiprocessing import Pool\n",
"from IPython.parallel import Client"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"rc = Client(profile='sge')\n",
"dview = rc[:]\n",
"lview = rc.load_balanced_view()\n",
"len(rc)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 2,
"text": [
"10"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with dview.sync_imports():\n",
" import os\n",
" import rpy2\n",
" import rpy2.robjects\n",
" import random\n",
" import string\n",
" import tempfile\n",
" import dendropy\n",
" import socket\n",
" import scipy\n",
" from multiprocessing import Pool\n",
" \n",
"def setup_cluster_engines():\n",
" os.environ['R_HOME'] = '/home/cfriedline/lib64/R'\n",
" r = rpy2.robjects.r\n",
" ape = r('library(ape)')\n",
" return socket.gethostname(), os.getpid(), os.environ['R_HOME'], rpy2.__version__\n",
"dview['setup_cluster_engines'] = setup_cluster_engines\n",
"dview.apply(setup_cluster_engines).get()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"importing os on engine(s)\n",
"importing rpy2 on engine(s)\n",
"importing rpy2.robjects on engine(s)\n",
"importing random on engine(s)\n",
"importing string on engine(s)\n",
"importing tempfile on engine(s)"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"importing dendropy on engine(s)\n",
"importing socket on engine(s)\n",
"importing scipy on engine(s)\n",
"importing Pool from multiprocessing on engine(s)\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"[('godel199', 17947, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel97', 28753, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel97', 28756, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel97', 28781, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel97', 28779, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel97', 28752, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel199', 18050, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel199', 18095, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel199', 18083, '/home/cfriedline/lib64/R', '2.4.0'),\n",
" ('godel199', 18077, '/home/cfriedline/lib64/R', '2.4.0')]"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"r = rpy2.robjects.r\n",
"ape = r('library(ape)')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def create_tree(num_tips, type):\n",
" \"\"\"\n",
" creates the taxa tree in R\n",
" @param num_tips: number of taxa to create\n",
" @param type: type for naming (e.g., 'taxa')\n",
" @return: a dendropy Tree\n",
" @rtype: dendropy.Tree\n",
" \"\"\"\n",
" r = rpy2.robjects.r\n",
" rpy2.robjects.globalenv['numtips'] = num_tips\n",
" rpy2.robjects.globalenv['treetype'] = type\n",
" name = _get_random_string(20)\n",
" if type == \"T\":\n",
" r(\"%s = rtree(numtips, rooted=T, tip.label=paste(treetype, seq(1:(numtips)), sep=''))\" % name)\n",
" else:\n",
" r(\"%s = rtree(numtips, rooted=F, tip.label=paste(treetype, seq(1:(numtips)), sep=''))\" % name)\n",
" tree = r[name]\n",
" return ape_to_dendropy(tree)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def ape_to_dendropy(phylo):\n",
" \"\"\"\n",
" converts an ape tree to dendropy tree\n",
" @param phylo: ape instance from rpy2\n",
" @return: a dendropy tree\n",
" @rtype: dendropy.Tree\n",
" \"\"\"\n",
" f = tempfile.NamedTemporaryFile()\n",
" rpy2.robjects.r['write.nexus'](phylo, file=f.name)\n",
" tree = dendropy.Tree.get_from_path(f.name, \"nexus\")\n",
" f.close()\n",
" return tree"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def _get_random_string(length):\n",
" \"\"\"\n",
" gets a random string of letters/numbers, ensuring that it does not start with a\n",
" number\n",
" @param length: length of the string\n",
" @return: the random string\n",
" @rtype: string\n",
" \"\"\"\n",
" choices = \"%s%s\" % (string.letters,string.digits)\n",
" s = ''.join(scipy.random.choice(list(choices),10))\n",
" if s[0] not in string.letters:\n",
" return _get_random_string(length)\n",
" return s"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tree = create_tree(100, \"T\")\n",
"tree.as_newick_string()[0:80]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
"'((((T88:0.2252453833,T52:0.4084565411):0.2996090709,((((T68:0.1808569834,T67:0.2'"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def reseed(args):\n",
" scipy.random.seed()\n",
" return os.getpid()\n",
"\n",
"def pool_reseed(pool, jobs):\n",
" res = pool.map(reseed, range(jobs))\n",
" if len(set(res)) != jobs:\n",
" return pool_reseed(pool, jobs)\n",
" return True, res"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def create_tree_mp(num_taxa):\n",
" t = create_tree(num_taxa, \"T\")\n",
" return t\n",
"\n",
"def get_taxa_trees(num_trees, num_taxa):\n",
" jobs_mp = []\n",
" jobs_ip = []\n",
" jobs = []\n",
" res = []\n",
" pool = Pool(num_trees)\n",
" print pool_reseed(pool, num_trees)\n",
" for i in xrange(num_trees):\n",
" jobs_mp.append(pool.apply_async(create_tree_mp, (num_taxa,)))\n",
" jobs.append(create_tree_mp(num_taxa))\n",
" jobs_ip.append(lview.apply_async(create_tree_mp, num_taxa))\n",
" pool.close()\n",
" pool.join()\n",
" res.append(jobs)\n",
" res.append([x.get() for x in jobs_mp])\n",
" res.append([x.get() for x in jobs_ip])\n",
" return res"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dview['create_tree'] = create_tree\n",
"dview['_get_random_string'] = _get_random_string\n",
"dview['ape_to_dendropy'] = ape_to_dendropy\n",
"dview['create_tree_mp'] = create_tree_mp\n",
"dview['get_taxa_trees'] = get_taxa_trees"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# jobs = 10\n",
"# pool = Pool(jobs)\n",
"# pool_reseed(pool, jobs)\n",
"# if pool_reseed:\n",
"# for i in xrange(10):\n",
"# print pool.apply_async(_get_random_string, (20,)).get()\n",
"# pool.close()\n",
"# pool.join()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"trees = get_taxa_trees(5, 10)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(True, [3499, 3500, 3501, 3503, 3502])\n"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for t in trees[0]:\n",
" print t.as_newick_string()[0:80]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846\n",
"(((T7:0.1649202022,(((T6:0.432419851,T1:0.670651369):0.9676914262,(T9:0.25170810\n",
"(((T9:0.1202223536,((T10:0.1974260821,T2:0.9239173683):0.3418610748,T8:0.3907029\n",
"(((T6:0.3341352192,T8:0.7529280938):0.5023088937,(((T1:0.2147031985,T10:0.728587\n",
"(T5:0.870002911,(((T10:0.2627973619,T7:0.5444249122):0.1685904923,T3:0.799313113\n"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for t in trees[1]:\n",
" print t.as_newick_string()[0:80]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846\n",
"((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846\n",
"((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846\n",
"((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846\n",
"((T4:0.2106896029,T7:0.03095384012):0.4023617527,((T2:0.7520255479,T8:0.68991846\n"
]
}
],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for t in trees[2]:\n",
" print t.as_newick_string()[0:80]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(((T10:0.5095300437,T2:0.8708823652):0.1151310115,T8:0.6142042589):0.9769569514,\n",
"(((T5:0.1514588273,((T1:0.4826061821,T7:0.1636368397):0.9322419968,(T6:0.8749330\n",
"((T10:0.9323699088,(T1:0.2327637093,(((T2:0.8742392994,T4:0.8461208662):0.681684\n",
"(((T4:0.946286211,T8:0.6548262595):0.8693261528,(T5:0.911844848,T1:0.6759150678)\n",
"((((T2:0.7475027808,T10:0.4808094592):0.7282650121,(T9:0.5261673003,T5:0.9762509\n"
]
}
],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment