crowding/lazy.frame.R

## readme.html
<!DOCTYPE html>
<!-- saved from url=(0014)about:internet -->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>

<title>Lazy data frame</title>

<!-- Styles for R syntax highlighter -->
<style type="text/css">
   pre .operator,
   pre .paren {
     color: rgb(104, 118, 135)
   }

   pre .literal {
     color: rgb(88, 72, 246)
   }

   pre .number {
     color: rgb(0, 0, 205);
   }

   pre .comment {
     color: rgb(76, 136, 107);
   }

   pre .keyword {
     color: rgb(0, 0, 255);
   }

   pre .identifier {
     color: rgb(0, 0, 0);
   }

   pre .string {
     color: rgb(3, 106, 7);
   }
</style>

<!-- R syntax highlighter -->
<script type="text/javascript">
var hljs=new function(){function m(p){return p.replace(/&/gm,"&amp;").replace(/</gm,"&lt;")}function f(r,q,p){return RegExp(q,"m"+(r.cI?"i":"")+(p?"g":""))}function b(r){for(var p=0;p<r.childNodes.length;p++){var q=r.childNodes[p];if(q.nodeName=="CODE"){return q}if(!(q.nodeType==3&&q.nodeValue.match(/\s+/))){break}}}function h(t,s){var p="";for(var r=0;r<t.childNodes.length;r++){if(t.childNodes[r].nodeType==3){var q=t.childNodes[r].nodeValue;if(s){q=q.replace(/\n/g,"")}p+=q}else{if(t.childNodes[r].nodeName=="BR"){p+="\n"}else{p+=h(t.childNodes[r])}}}if(/MSIE [678]/.test(navigator.userAgent)){p=p.replace(/\r/g,"\n")}return p}function a(s){var r=s.className.split(/\s+/);r=r.concat(s.parentNode.className.split(/\s+/));for(var q=0;q<r.length;q++){var p=r[q].replace(/^language-/,"");if(e[p]){return p}}}function c(q){var p=[];(function(s,t){for(var r=0;r<s.childNodes.length;r++){if(s.childNodes[r].nodeType==3){t+=s.childNodes[r].nodeValue.length}else{if(s.childNodes[r].nodeName=="BR"){t+=1}else{if(s.childNodes[r].nodeType==1){p.push({event:"start",offset:t,node:s.childNodes[r]});t=arguments.callee(s.childNodes[r],t);p.push({event:"stop",offset:t,node:s.childNodes[r]})}}}}return t})(q,0);return p}function k(y,w,x){var q=0;var z="";var s=[];function u(){if(y.length&&w.length){if(y[0].offset!=w[0].offset){return(y[0].offset<w[0].offset)?y:w}else{return w[0].event=="start"?y:w}}else{return y.length?y:w}}function t(D){var A="<"+D.nodeName.toLowerCase();for(var B=0;B<D.attributes.length;B++){var C=D.attributes[B];A+=" "+C.nodeName.toLowerCase();if(C.value!==undefined&&C.value!==false&&C.value!==null){A+='="'+m(C.value)+'"'}}return A+">"}while(y.length||w.length){var v=u().splice(0,1)[0];z+=m(x.substr(q,v.offset-q));q=v.offset;if(v.event=="start"){z+=t(v.node);s.push(v.node)}else{if(v.event=="stop"){var p,r=s.length;do{r--;p=s[r];z+=("</"+p.nodeName.toLowerCase()+">")}while(p!=v.node);s.splice(r,1);while(r<s.length){z+=t(s[r]);r++}}}}return z+m(x.substr(q))}function j(){function q(x,y,v){if(x.compiled){return}var u;var s=[];if(x.k){x.lR=f(y,x.l||hljs.IR,true);for(var w in x.k){if(!x.k.hasOwnProperty(w)){continue}if(x.k[w] instanceof Object){u=x.k[w]}else{u=x.k;w="keyword"}for(var r in u){if(!u.hasOwnProperty(r)){continue}x.k[r]=[w,u[r]];s.push(r)}}}if(!v){if(x.bWK){x.b="\\b("+s.join("|")+")\\s"}x.bR=f(y,x.b?x.b:"\\B|\\b");if(!x.e&&!x.eW){x.e="\\B|\\b"}if(x.e){x.eR=f(y,x.e)}}if(x.i){x.iR=f(y,x.i)}if(x.r===undefined){x.r=1}if(!x.c){x.c=[]}x.compiled=true;for(var t=0;t<x.c.length;t++){if(x.c[t]=="self"){x.c[t]=x}q(x.c[t],y,false)}if(x.starts){q(x.starts,y,false)}}for(var p in e){if(!e.hasOwnProperty(p)){continue}q(e[p].dM,e[p],true)}}function d(B,C){if(!j.called){j();j.called=true}function q(r,M){for(var L=0;L<M.c.length;L++){if((M.c[L].bR.exec(r)||[null])[0]==r){return M.c[L]}}}function v(L,r){if(D[L].e&&D[L].eR.test(r)){return 1}if(D[L].eW){var M=v(L-1,r);return M?M+1:0}return 0}function w(r,L){return L.i&&L.iR.test(r)}function K(N,O){var M=[];for(var L=0;L<N.c.length;L++){M.push(N.c[L].b)}var r=D.length-1;do{if(D[r].e){M.push(D[r].e)}r--}while(D[r+1].eW);if(N.i){M.push(N.i)}return f(O,M.join("|"),true)}function p(M,L){var N=D[D.length-1];if(!N.t){N.t=K(N,E)}N.t.lastIndex=L;var r=N.t.exec(M);return r?[M.substr(L,r.index-L),r[0],false]:[M.substr(L),"",true]}function z(N,r){var L=E.cI?r[0].toLowerCase():r[0];var M=N.k[L];if(M&&M instanceof Array){return M}return false}function F(L,P){L=m(L);if(!P.k){return L}var r="";var O=0;P.lR.lastIndex=0;var M=P.lR.exec(L);while(M){r+=L.substr(O,M.index-O);var N=z(P,M);if(N){x+=N[1];r+='<span class="'+N[0]+'">'+M[0]+"</span>"}else{r+=M[0]}O=P.lR.lastIndex;M=P.lR.exec(L)}return r+L.substr(O,L.length-O)}function J(L,M){if(M.sL&&e[M.sL]){var r=d(M.sL,L);x+=r.keyword_count;return r.value}else{return F(L,M)}}function I(M,r){var L=M.cN?'<span class="'+M.cN+'">':"";if(M.rB){y+=L;M.buffer=""}else{if(M.eB){y+=m(r)+L;M.buffer=""}else{y+=L;M.buffer=r}}D.push(M);A+=M.r}function G(N,M,Q){var R=D[D.length-1];if(Q){y+=J(R.buffer+N,R);return false}var P=q(M,R);if(P){y+=J(R.buffer+N,R);I(P,M);return P.rB}var L=v(D.length-1,M);if(L){var O=R.cN?"</span>":"";if(R.rE){y+=J(R.buffer+N,R)+O}else{if(R.eE){y+=J(R.buffer+N,R)+O+m(M)}else{y+=J(R.buffer+N+M,R)+O}}while(L>1){O=D[D.length-2].cN?"</span>":"";y+=O;L--;D.length--}var r=D[D.length-1];D.length--;D[D.length-1].buffer="";if(r.starts){I(r.starts,"")}return R.rE}if(w(M,R)){throw"Illegal"}}var E=e[B];var D=[E.dM];var A=0;var x=0;var y="";try{var s,u=0;E.dM.buffer="";do{s=p(C,u);var t=G(s[0],s[1],s[2]);u+=s[0].length;if(!t){u+=s[1].length}}while(!s[2]);if(D.length>1){throw"Illegal"}return{r:A,keyword_count:x,value:y}}catch(H){if(H=="Illegal"){return{r:0,keyword_count:0,value:m(C)}}else{throw H}}}function g(t){var p={keyword_count:0,r:0,value:m(t)};var r=p;for(var q in e){if(!e.hasOwnProperty(q)){continue}var s=d(q,t);s.language=q;if(s.keyword_count+s.r>r.keyword_count+r.r){r=s}if(s.keyword_count+s.r>p.keyword_count+p.r){r=p;p=s}}if(r.language){p.second_best=r}return p}function i(r,q,p){if(q){r=r.replace(/^((<[^>]+>|\t)+)/gm,function(t,w,v,u){return w.replace(/\t/g,q)})}if(p){r=r.replace(/\n/g,"<br>")}return r}function n(t,w,r){var x=h(t,r);var v=a(t);var y,s;if(v){y=d(v,x)}else{return}var q=c(t);if(q.length){s=document.createElement("pre");s.innerHTML=y.value;y.value=k(q,c(s),x)}y.value=i(y.value,w,r);var u=t.className;if(!u.match("(\\s|^)(language-)?"+v+"(\\s|$)")){u=u?(u+" "+v):v}if(/MSIE [678]/.test(navigator.userAgent)&&t.tagName=="CODE"&&t.parentNode.tagName=="PRE"){s=t.parentNode;var p=document.createElement("div");p.innerHTML="<pre><code>"+y.value+"</code></pre>";t=p.firstChild.firstChild;p.firstChild.cN=s.cN;s.parentNode.replaceChild(p.firstChild,s)}else{t.innerHTML=y.value}t.className=u;t.result={language:v,kw:y.keyword_count,re:y.r};if(y.second_best){t.second_best={language:y.second_best.language,kw:y.second_best.keyword_count,re:y.second_best.r}}}function o(){if(o.called){return}o.called=true;var r=document.getElementsByTagName("pre");for(var p=0;p<r.length;p++){var q=b(r[p]);if(q){n(q,hljs.tabReplace)}}}function l(){if(window.addEventListener){window.addEventListener("DOMContentLoaded",o,false);window.addEventListener("load",o,false)}else{if(window.attachEvent){window.attachEvent("onload",o)}else{window.onload=o}}}var e={};this.LANGUAGES=e;this.highlight=d;this.highlightAuto=g;this.fixMarkup=i;this.highlightBlock=n;this.initHighlighting=o;this.initHighlightingOnLoad=l;this.IR="[a-zA-Z][a-zA-Z0-9_]*";this.UIR="[a-zA-Z_][a-zA-Z0-9_]*";this.NR="\\b\\d+(\\.\\d+)?";this.CNR="\\b(0[xX][a-fA-F0-9]+|(\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)";this.BNR="\\b(0b[01]+)";this.RSR="!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|\\.|-|-=|/|/=|:|;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~";this.ER="(?![\\s\\S])";this.BE={b:"\\\\.",r:0};this.ASM={cN:"string",b:"'",e:"'",i:"\\n",c:[this.BE],r:0};this.QSM={cN:"string",b:'"',e:'"',i:"\\n",c:[this.BE],r:0};this.CLCM={cN:"comment",b:"//",e:"$"};this.CBLCLM={cN:"comment",b:"/\\*",e:"\\*/"};this.HCM={cN:"comment",b:"#",e:"$"};this.NM={cN:"number",b:this.NR,r:0};this.CNM={cN:"number",b:this.CNR,r:0};this.BNM={cN:"number",b:this.BNR,r:0};this.inherit=function(r,s){var p={};for(var q in r){p[q]=r[q]}if(s){for(var q in s){p[q]=s[q]}}return p}}();hljs.LANGUAGES.cpp=function(){var a={keyword:{"false":1,"int":1,"float":1,"while":1,"private":1,"char":1,"catch":1,"export":1,virtual:1,operator:2,sizeof:2,dynamic_cast:2,typedef:2,const_cast:2,"const":1,struct:1,"for":1,static_cast:2,union:1,namespace:1,unsigned:1,"long":1,"throw":1,"volatile":2,"static":1,"protected":1,bool:1,template:1,mutable:1,"if":1,"public":1,friend:2,"do":1,"return":1,"goto":1,auto:1,"void":2,"enum":1,"else":1,"break":1,"new":1,extern:1,using:1,"true":1,"class":1,asm:1,"case":1,typeid:1,"short":1,reinterpret_cast:2,"default":1,"double":1,register:1,explicit:1,signed:1,typename:1,"try":1,"this":1,"switch":1,"continue":1,wchar_t:1,inline:1,"delete":1,alignof:1,char16_t:1,char32_t:1,constexpr:1,decltype:1,noexcept:1,nullptr:1,static_assert:1,thread_local:1,restrict:1,_Bool:1,complex:1},built_in:{std:1,string:1,cin:1,cout:1,cerr:1,clog:1,stringstream:1,istringstream:1,ostringstream:1,auto_ptr:1,deque:1,list:1,queue:1,stack:1,vector:1,map:1,set:1,bitset:1,multiset:1,multimap:1,unordered_set:1,unordered_map:1,unordered_multiset:1,unordered_multimap:1,array:1,shared_ptr:1}};return{dM:{k:a,i:"</",c:[hljs.CLCM,hljs.CBLCLM,hljs.QSM,{cN:"string",b:"'\\\\?.",e:"'",i:"."},{cN:"number",b:"\\b(\\d+(\\.\\d*)?|\\.\\d+)(u|U|l|L|ul|UL|f|F)"},hljs.CNM,{cN:"preprocessor",b:"#",e:"$"},{cN:"stl_container",b:"\\b(deque|list|queue|stack|vector|map|set|bitset|multiset|multimap|unordered_map|unordered_set|unordered_multiset|unordered_multimap|array)\\s*<",e:">",k:a,r:10,c:["self"]}]}}}();hljs.LANGUAGES.r={dM:{c:[hljs.HCM,{cN:"number",b:"\\b0[xX][0-9a-fA-F]+[Li]?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+(?:[eE][+\\-]?\\d*)?L\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+\\.(?!\\d)(?:i\\b)?",e:hljs.IMMEDIATE_RE,r:1},{cN:"number",b:"\\b\\d+(?:\\.\\d*)?(?:[eE][+\\-]?\\d*)?i?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\.\\d+(?:[eE][+\\-]?\\d*)?i?\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"keyword",b:"(?:tryCatch|library|setGeneric|setGroupGeneric)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\.",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\d+(?![\\w.])",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\b(?:function)",e:hljs.IMMEDIATE_RE,r:2},{cN:"keyword",b:"(?:if|in|break|next|repeat|else|for|return|switch|while|try|stop|warning|require|attach|detach|source|setMethod|setClass)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"literal",b:"(?:NA|NA_integer_|NA_real_|NA_character_|NA_complex_)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"literal",b:"(?:NULL|TRUE|FALSE|T|F|Inf|NaN)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"identifier",b:"[a-zA-Z.][a-zA-Z0-9._]*\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"operator",b:"<\\-(?!\\s*\\d)",e:hljs.IMMEDIATE_RE,r:2},{cN:"operator",b:"\\->|<\\-",e:hljs.IMMEDIATE_RE,r:1},{cN:"operator",b:"%%|~",e:hljs.IMMEDIATE_RE},{cN:"operator",b:">=|<=|==|!=|\\|\\||&&|=|\\+|\\-|\\*|/|\\^|>|<|!|&|\\||\\$|:",e:hljs.IMMEDIATE_RE,r:0},{cN:"operator",b:"%",e:"%",i:"\\n",r:1},{cN:"identifier",b:"`",e:"`",r:0},{cN:"string",b:'"',e:'"',c:[hljs.BE],r:0},{cN:"string",b:"'",e:"'",c:[hljs.BE],r:0},{cN:"paren",b:"[[({\\])}]",e:hljs.IMMEDIATE_RE,r:0}]}};
hljs.initHighlightingOnLoad();
</script>

<!-- MathJax scripts -->
<script type="text/javascript" src="https://c328740.ssl.cf1.rackcdn.com/mathjax/2.0-latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
</script>


<style type="text/css">
body, td {
   font-family: sans-serif;
   background-color: white;
   font-size: 12px;
   margin: 8px;
}

tt, code, pre {
   font-family: 'DejaVu Sans Mono', 'Droid Sans Mono', 'Lucida Console', Consolas, Monaco, monospace;
}

h1 {
   font-size:2.2em;
}

h2 {
   font-size:1.8em;
}

h3 {
   font-size:1.4em;
}

h4 {
   font-size:1.0em;
}

h5 {
   font-size:0.9em;
}

h6 {
   font-size:0.8em;
}

a:visited {
   color: rgb(50%, 0%, 50%);
}

pre {
   margin-top: 0;
   max-width: 95%;
   border: 1px solid #ccc;
   white-space: pre-wrap;
}

pre code {
   display: block; padding: 0.5em;
}

code.r, code.cpp {
   background-color: #F8F8F8;
}

table, td, th {
  border: none;
}

blockquote {
   color:#666666;
   margin:0;
   padding-left: 1em;
   border-left: 0.5em #EEE solid;
}

hr {
   height: 0px;
   border-bottom: none;
   border-top-width: thin;
   border-top-style: dotted;
   border-top-color: #999999;
}

@media print {
   * {
      background: transparent !important;
      color: black !important;
      filter:none !important;
      -ms-filter: none !important;
   }

   body {
      font-size:12pt;
      max-width:100%;
   }

   a, a:visited {
      text-decoration: underline;
   }

   hr {
      visibility: hidden;
      page-break-before: always;
   }

   pre, blockquote {
      padding-right: 1em;
      page-break-inside: avoid;
   }

   tr, img {
      page-break-inside: avoid;
   }

   img {
      max-width: 100% !important;
   }

   @page :left {
      margin: 15mm 20mm 15mm 10mm;
   }

   @page :right {
      margin: 15mm 10mm 15mm 20mm;
   }

   p, h2, h3 {
      orphans: 3; widows: 3;
   }

   h2, h3 {
      page-break-after: avoid;
   }
}
</style>


</head>

<body>
<h3>Lazy data frame</h3>

<pre><code class="r">opts_chunk$set(tidy = FALSE)
library(vadr)
library(plyr)
library(microbenchmark)
source(&quot;lazy.frame.R&quot;)
</code></pre>

<p>The insights exploited here are that the fastest way to get an environment with vars <code>a</code>, <code>b</code>, <code>c</code>, is:</p>

<pre><code class="r">lazy &lt;- function(a,b,c) (environment())
explicit &lt;- function(...)list2env(list(...))
microbenchmark(
    lazy = lazy(1,2,3),
    explicit = explicit(a=1, b=2, c=3))
</code></pre>

<pre><code>## Unit: microseconds
##      expr   min    lq median    uq  max neval
##      lazy 1.179 1.450  1.681 1.841 2288   100
##  explicit 8.698 9.162  9.312 9.643 2779   100
</code></pre>

<p>And given such environment and row indices, the fastest way to make a subset is:</p>

<pre><code class="r">lazy_subset &lt;- function(e, ix) lazy(e$a[ix], e$b[ix], e$c[ix])
explicit_subset &lt;- function(e, ix) as.environment(lapply(as.list(e), `[`, ix))
e &lt;- lazy(1:10, letters[1:10], as.list(1:10))
microbenchmark(
    lazy = lazy_subset(e, 1:5),
    explicit = explicit_subset(e, 1:5))
</code></pre>

<pre><code>## Unit: microseconds
##      expr    min     lq median     uq  max neval
##      lazy  1.739  2.048  2.532  2.737 5064   100
##  explicit 26.882 27.511 27.817 28.136 4072   100
</code></pre>

<p>(Fast because no subsets are actually computed, only promises to compute a subset in the future, only for those columns that are demanded)</p>

<h3>Setup:</h3>

<pre><code class="r">bbI &lt;- idata.frame(baseball)
bbL &lt;- lazy.frame(baseball)
nr &lt;- nrow(baseball)
nc &lt;- ncol(baseball)
colnames &lt;- colnames(baseball)
</code></pre>

<h3>Row subset + eval:</h3>

<pre><code class="r">microbenchmark(
    df = with(baseball[sample(nr, 1), ], rbi/ab),
    idf = with(bbI[sample(nr, 1), ], rbi/ab),
    lazy = with(bbL[sample(nr, 1), ], rbi/ab))
</code></pre>

<pre><code>## Unit: microseconds
##  expr    min      lq  median      uq    max neval
##    df  491.0  539.01  600.63   645.8   1884   100
##   idf 8144.7 8580.31 8921.75 11015.9  66710   100
##  lazy   64.7   70.22   87.07   118.6 245123   100
</code></pre>

<h3>Column subset (same column):</h3>

<pre><code class="r">microbenchmark(
    df = baseball[&quot;rbi&quot;]$rbi[1],
    idf = bbI[&quot;rbi&quot;]$rbi[1],
    lazy = bbL[&quot;rbi&quot;]$rbi[1])
</code></pre>

<pre><code>## Unit: microseconds
##  expr     min     lq median     uq   max neval
##    df  195.22  220.2  265.4  318.0  3177   100
##   idf 5167.25 5284.1 5424.2 5706.5  9494   100
##  lazy   97.84  106.4  155.9  171.4 52320   100
</code></pre>

<h3>Column subset (random column):</h3>

<pre><code class="r">microbenchmark(
    df = baseball[sample(colnames, 1)][1,1],
    idf = bbI[sample(colnames, 1)][1,1],
    lazy = bbL[sample(colnames, 1)][1,1])
</code></pre>

<pre><code>## Unit: microseconds
##  expr    min     lq median     uq   max neval
##    df  282.1  332.7  379.7  554.3  4840   100
##   idf 1214.6 1244.4 1278.3 1439.9 10663   100
##  lazy  122.0  141.2  178.8  222.9  7846   100
</code></pre>

<h3>Column subset (random set of columns):</h3>

<p>(this forces a lot of macro expansion, but is an unusual use pattern)</p>

<pre><code class="r">microbenchmark(
    df = baseball[runif(colnames) &gt; 0.5][1,1],
    idf = bbI[runif(colnames) &gt; 0.5][1,1],
    lazy = bbL[runif(colnames) &gt; 0.5][1,1])
</code></pre>

<pre><code>## Unit: microseconds
##  expr    min   lq median    uq   max neval
##    df  867.9 1679   2203  4912 56785   100
##   idf 1256.5 1365   1449  1535  5371   100
##  lazy 5958.3 8442   9050 10192 61848   100
</code></pre>

<h3>Random access single element:</h3>

<pre><code class="r">microbenchmark(
    df = baseball[sample(nr, 1), sample(colnames, 1)],
    idf = bbI[sample(nr, 1), sample(colnames, 1)],
    lazy = bbL[sample(nr, 1), sample(colnames, 1)])
</code></pre>

<pre><code>## Unit: microseconds
##  expr    min     lq median     uq     max neval
##    df  84.25  94.11  97.80 105.43 3751.77   100
##   idf 287.36 300.44 308.29 325.38 4002.25   100
##  lazy  35.37  37.35  38.98  43.22   89.33   100
</code></pre>

<h3>Group-by and eval:</h3>

<pre><code class="r">microbenchmark(times=1,
               df = dlply(baseball, &quot;id&quot;, with, mean(rbi)/mean(ab)),
               idf = dlply(bbI, &quot;id&quot;, with, mean(rbi)/mean(ab)),
               lazy = dlply(bbL, &quot;id&quot;, with, mean(rbi)/mean(ab)))
</code></pre>

<pre><code>## Unit: milliseconds
##  expr     min      lq  median      uq     max neval
##    df   470.8   470.8   470.8   470.8   470.8     1
##   idf 11745.4 11745.4 11745.4 11745.4 11745.4     1
##  lazy   181.5   181.5   181.5   181.5   181.5     1
</code></pre>

</body>

</html>

## readme.md

      
    Raw
  

              readme.md
            
          
    Lazy data frame

opts_chunk$set(tidy = FALSE)
library(vadr)
library(plyr)
library(microbenchmark)
source("lazy.frame.R")
The insights exploited here are that the fastest way to get an environment with vars a, b, c, is:
lazy <- function(a,b,c) (environment())
explicit <- function(...)list2env(list(...))
microbenchmark(
    lazy = lazy(1,2,3),
    explicit = explicit(a=1, b=2, c=3))
## Unit: microseconds
##      expr   min    lq median    uq  max neval
##      lazy 1.179 1.450  1.681 1.841 2288   100
##  explicit 8.698 9.162  9.312 9.643 2779   100

And given such environment and row indices, the fastest way to make a subset is:
lazy_subset <- function(e, ix) lazy(e$a[ix], e$b[ix], e$c[ix])
explicit_subset <- function(e, ix) as.environment(lapply(as.list(e), `[`, ix))
e <- lazy(1:10, letters[1:10], as.list(1:10))
microbenchmark(
    lazy = lazy_subset(e, 1:5),
    explicit = explicit_subset(e, 1:5))
## Unit: microseconds
##      expr    min     lq median     uq  max neval
##      lazy  1.739  2.048  2.532  2.737 5064   100
##  explicit 26.882 27.511 27.817 28.136 4072   100

(Fast because no subsets are actually computed, only promises to compute a subset in the future, only for those columns that are demanded)
Setup:

bbI <- idata.frame(baseball)
bbL <- lazy.frame(baseball)
nr <- nrow(baseball)
nc <- ncol(baseball)
colnames <- colnames(baseball)
Row subset + eval:

microbenchmark(
    df = with(baseball[sample(nr, 1), ], rbi/ab),
    idf = with(bbI[sample(nr, 1), ], rbi/ab),
    lazy = with(bbL[sample(nr, 1), ], rbi/ab))
## Unit: microseconds
##  expr    min      lq  median      uq    max neval
##    df  491.0  539.01  600.63   645.8   1884   100
##   idf 8144.7 8580.31 8921.75 11015.9  66710   100
##  lazy   64.7   70.22   87.07   118.6 245123   100

Column subset (same column):

microbenchmark(
    df = baseball["rbi"]$rbi[1],
    idf = bbI["rbi"]$rbi[1],
    lazy = bbL["rbi"]$rbi[1])
## Unit: microseconds
##  expr     min     lq median     uq   max neval
##    df  195.22  220.2  265.4  318.0  3177   100
##   idf 5167.25 5284.1 5424.2 5706.5  9494   100
##  lazy   97.84  106.4  155.9  171.4 52320   100

Column subset (random column):

microbenchmark(
    df = baseball[sample(colnames, 1)][1,1],
    idf = bbI[sample(colnames, 1)][1,1],
    lazy = bbL[sample(colnames, 1)][1,1])
## Unit: microseconds
##  expr    min     lq median     uq   max neval
##    df  282.1  332.7  379.7  554.3  4840   100
##   idf 1214.6 1244.4 1278.3 1439.9 10663   100
##  lazy  122.0  141.2  178.8  222.9  7846   100

Column subset (random set of columns):

(this forces a lot of macro expansion, but is an unusual use pattern)
microbenchmark(
    df = baseball[runif(colnames) > 0.5][1,1],
    idf = bbI[runif(colnames) > 0.5][1,1],
    lazy = bbL[runif(colnames) > 0.5][1,1])
## Unit: microseconds
##  expr    min   lq median    uq   max neval
##    df  867.9 1679   2203  4912 56785   100
##   idf 1256.5 1365   1449  1535  5371   100
##  lazy 5958.3 8442   9050 10192 61848   100

Random access single element:

microbenchmark(
    df = baseball[sample(nr, 1), sample(colnames, 1)],
    idf = bbI[sample(nr, 1), sample(colnames, 1)],
    lazy = bbL[sample(nr, 1), sample(colnames, 1)])
## Unit: microseconds
##  expr    min     lq median     uq     max neval
##    df  84.25  94.11  97.80 105.43 3751.77   100
##   idf 287.36 300.44 308.29 325.38 4002.25   100
##  lazy  35.37  37.35  38.98  43.22   89.33   100

Group-by and eval:

microbenchmark(times=1,
               df = dlply(baseball, "id", with, mean(rbi)/mean(ab)),
               idf = dlply(bbI, "id", with, mean(rbi)/mean(ab)),
               lazy = dlply(bbL, "id", with, mean(rbi)/mean(ab)))
## Unit: milliseconds
##  expr     min      lq  median      uq     max neval
##    df   470.8   470.8   470.8   470.8   470.8     1
##   idf 11745.4 11745.4 11745.4 11745.4 11745.4     1
##  lazy   181.5   181.5   181.5   181.5   181.5     1


## readme.Rmd
### Lazy data frame

```{r}
opts_chunk$set(tidy=FALSE)
library(vadr)
library(plyr)
library(microbenchmark)
source("lazy.frame.R")
```

The insights exploited here are that the fastest way to get an environment with vars `a`, `b`, `c`, is:

```{r}
lazy <- function(a,b,c) (environment())
explicit <- function(...)list2env(list(...))
microbenchmark(
    lazy = lazy(1,2,3),
    explicit = explicit(a=1, b=2, c=3))
```

And given such environment and row indices, the fastest way to make a subset is:

```{r}
lazy_subset <- function(e, ix) lazy(e$a[ix], e$b[ix], e$c[ix])
explicit_subset <- function(e, ix) as.environment(lapply(as.list(e), `[`, ix))
e <- lazy(1:10, letters[1:10], as.list(1:10))
microbenchmark(
    lazy = lazy_subset(e, 1:5),
    explicit = explicit_subset(e, 1:5))
```

(Fast because no subsets are actually computed, only promises to compute a subset in the future, only for those columns that are demanded)

### Setup:
```{r}
bbI <- idata.frame(baseball)
bbL <- lazy.frame(baseball)
nr <- nrow(baseball)
nc <- ncol(baseball)
colnames <- colnames(baseball)
```

### Row subset + eval:
```{r}
microbenchmark(
    df = with(baseball[sample(nr, 1), ], rbi/ab),
    idf = with(bbI[sample(nr, 1), ], rbi/ab),
    lazy = with(bbL[sample(nr, 1), ], rbi/ab))
```

### Column subset (same column):
```{r}
microbenchmark(
    df = baseball["rbi"]$rbi[1],
    idf = bbI["rbi"]$rbi[1],
    lazy = bbL["rbi"]$rbi[1])
```

### Column subset (random column):
```{r}
microbenchmark(
    df = baseball[sample(colnames, 1)][1,1],
    idf = bbI[sample(colnames, 1)][1,1],
    lazy = bbL[sample(colnames, 1)][1,1])
```

### Column subset (random set of columns):
(this forces a lot of macro expansion, but is an unusual use pattern)
```{r}
microbenchmark(
    df = baseball[runif(colnames) > 0.5][1,1],
    idf = bbI[runif(colnames) > 0.5][1,1],
    lazy = bbL[runif(colnames) > 0.5][1,1])
```

### Random access single element:
```{r}
microbenchmark(
    df = baseball[sample(nr, 1), sample(colnames, 1)],
    idf = bbI[sample(nr, 1), sample(colnames, 1)],
    lazy = bbL[sample(nr, 1), sample(colnames, 1)])
```

### Group-by and eval:
```{r}
microbenchmark(times=1,
               df = dlply(baseball, "id", with, mean(rbi)/mean(ab)),
               idf = dlply(bbI, "id", with, mean(rbi)/mean(ab)),
               lazy = dlply(bbL, "id", with, mean(rbi)/mean(ab)))
```

## lazy.frame.R
# lazy data frame
#
# like idata.frame, but exploits lazy evaluation + macro code generation
# instead of active bindings
library(vadr)

lazy.frame <- function(df, enclos=parent.frame(), ...) UseMethod("lazy.frame")

lazy.frame.lazy.frame <- function(df, ...) df

lazy.frame.environment <- function(df, ..., col.order=ls(df)) {
  #not kosher, environment attrs get set by reference.
  attr(df, "accessory") <- accessory_env(df, col.order)
  class(df) <- union("lazy.frame", class(df))
  df
}

lazy.frame.data.frame <- function(df, enclos=parent.frame()) {
  for (n in names(df))
      if (is.array(df[[n]])) stop("Array column '", n, "' not supported")

  col.order <- structure(names(df), names=names(df))
  e <- list2env(df, parent=enclos)
  attr(e, "accessory") <- accessory_env(e, col.order)
  class(e) <- c("lazy.frame", "environment")
  e
}

lazy.frame.default <- function(df, enclos=parent.frame())
    lazy.frame(as.data.frame(df), enclos)

accessory_env <- function(
    #Holds reference to e, and subsetting methods.
    #these are lazy, created on demand.
    #also note accessory env always inherits from package, so that
    #data env is allowed to inherit from wherever
    e,
    col.order,
    #use do.call because this lets "macro" memoize on the col names
    row_subset = do.call(row_subsetter, as.list(col.order)),
    col_subset = do.call(col_subsetter, as.list(col.order)),
    dim = c(length(e[[col.order[[1]]]]), length(col.order))) {
  environment()
}

#Row subset function constructor.
#Arguments are colnames, result is accessor function
row_subsetter <- macro(function(...) {
  col.order <- structure(c(...), names=c(...))
  #macro looks up entire closure
  qe(function(e, rows) {
    #create new env with promises to subset each column
    new.env <- (function(`.(col.order)`=..(missing_value(length(col.order))))
                environment())(
                    ..(qqply(e$`.(col)`[rows])(col=col.order)))
    parent.env(new.env) <- parent.env(e)
    attr(new.env, "accessory") <- accessory_env(
        new.env,
        col.order,
        #column names do not change, so can reuse existing accessors
        attr(e, "accessory")$row_subset,
        attr(e , "accessory")$col_subset)
    class(new.env) <- c("lazy.frame", "environment")
    new.env
  })
})

#Column subset function constructor.
#arguments are colnames, result is accessor function
col_subsetter <- macro(function(...) {
  col.order <- structure(c(...), names=c(...))
  args <- qqply(`.(col)`=e$`.(col)`)(col=col.order)
  col_subset_inner(col.order, args)
})

col_subset_inner <- function(col.order, args) {
  function(e, cols) {
    new.cols <- col.order[cols]
    new.env <- do.call(env.list, args[new.cols])
    parent.env(new.env) <- parent.env(e)
    attr(new.env, "accessory") <- accessory_env(
        new.env, new.cols,
        col_subset = col_subset_inner(new.cols, args[new.cols]))
    class(new.env) <- c("lazy.frame", "environment")
    new.env
  }
}

env.list <- macro(function(...) {
  args <- list(...)
  #print(names(args)) #verify caching works
  qq((function(`.(names(args))`=..(missing_value(length(args)))) environment())(
      ..(args)))
})

`[.lazy.frame` <- function(x, i, j, drop=TRUE) {
  if (nargs() == 2) {
    j <- i
    i <- missing_value()
    drop <- FALSE
  }
  if (!missing(j)) {
    if (length(j) == 1 && drop && !is.logical(j)) {
      if (missing(i)) i <- TRUE
      return(x[[j]][i])
    } else {
      x <- attr(x, "accessory")$col_subset(x, j)
    }
  }
  if (!missing(i)) {
    x <- attr(x, "accessory")$row_subset(x, i)
  }
  x
}

`[[.lazy.frame` <- function(x, i) {
  get(attr(x, "accessory")$col.order[[i]], x)
}

dim.lazy.frame <- function(df) {
  attr(df, "accessory")$dim
}

names.lazy.frame <- function(df) {
  attr(df, "accessory")$col.order
}

as.list.lazy.frame <- function(df) mget(attr(df, "accessory")$col.order, df)

as.data.frame.lazy.frame <- function(df) plyr::quickdf(as.list(df))

############################################################

## slowplyr.R

#plyr slow example
n<-100000
grp1 <- sample(1:750, n, replace=T)
grp2 <- sample(1:750, n, replace=T)
d <- data.frame(x=rnorm(n), y=rnorm(n), grp1=grp1, grp2=grp2)

summarise2 <- function (.data, ...) {
  if (is.list(.data))
      env <- list2env(.data, parent = parent.frame())
  else
      env <- .data

  cols <- eval(substitute(alist(...)))
  for (col in names(cols)) {
    env[[col]] <- eval(cols[[col]], env)
  }
  quickdf(mget(names(cols), env))
}

if(FALSE) {
system.time(ddply(d, .(grp1, grp2), summarise, avx = mean(x), avy=mean(y)))
##                     with plyr 1.8
##    user  system elapsed
## 231.115 105.899 370.422
##                     with patched rbind.fill
##   user  system elapsed
## 43.972   0.442  46.048

##                     adding faster summarise
system.time(ddply(d, .(grp1, grp2), summarise2, avx = mean(x), avy=mean(y)))
##    user  system elapsed
##  44.889   0.456  47.178

##                     adding idata.frame
system.time(ddply(idata.frame(d), .(grp1, grp2), summarise2, avx = mean(x), avy=mean(y)))
## ##    user  system elapsed
## ## 50.608   0.464  53.708

##                     with lazy.frame
system.time(ddply(lazy.frame(d), .(grp1, grp2), summarise2, avx = mean(x), avy=mean(y)))
##   user  system elapsed
## 43.972   0.442  46.048

## with dlply
system.time(dlply(d, .(grp1, grp2), summarise2, avx = mean(x), avy=mean(y)))
##    user  system elapsed
##  30.765   0.976  36.123
system.time(dlply(lazy.frame(d), .(grp1, grp2), summarise2, avx = mean(x), avy=mean(y)))
##    user  system elapsed
##  25.540   0.845  30.686

summarizer <- macro(function(...) {
  transforms <- list(...)
  qq( function(data) {
    if(!is.environment(data))
        data <- list2env(data, parent=parent.env(environment()))
    f <- function() {
      ..(qqply( `.(target)` <- .(expr) )(target=names(transforms),
                                         expr=transforms))
      environment()
    }
    environment(f) <- data
    quickdf(as.list(f()))
  })
})

summarizer2 <- macro(function(...) {
  transforms <- list(...)
  qq( function(data) {
    if(!is.environment(data))
        data <- list2env(data, parent=parent.env(environment()))
    f <- function() {
      ..(qqply( `.(target)` <- .(expr) )(target=names(transforms),
                                         expr=transforms))
      environment()
    }
    environment(f) <- data
    out <- f()
    parent.env(out) <- parent.env(environment())
    lazy.frame(out, col.order=.(names(transforms)))
  })
})

##dlply with summarizer macro

system.time(dlply(d, .(grp1, grp2), summarizer(avx = mean(x), avy=mean(y)),
                  .progress="text"))
## and lazy frame
system.time(dlply(lazy.frame(d), .(grp1, grp2), summarizer(avx = mean(x), avy=mean(y)),
                  .progress="text"))
##
system.time(dlply(lazy.frame(d), .(grp1, grp2), summarizer2(avx = mean(x), avy=mean(y)),
                  .progress="text"))
}
	<!DOCTYPE html>
	<!-- saved from url=(0014)about:internet -->
	<html>
	<head>
	<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>

	<title>Lazy data frame</title>

	<!-- Styles for R syntax highlighter -->
	<style type="text/css">
	pre .operator,
	pre .paren {
	color: rgb(104, 118, 135)
	}

	pre .literal {
	color: rgb(88, 72, 246)
	}

	pre .number {
	color: rgb(0, 0, 205);
	}

	pre .comment {
	color: rgb(76, 136, 107);
	}

	pre .keyword {
	color: rgb(0, 0, 255);
	}

	pre .identifier {
	color: rgb(0, 0, 0);
	}

	pre .string {
	color: rgb(3, 106, 7);
	}
	</style>

	<!-- R syntax highlighter -->
	<script type="text/javascript">
	var hljs=new function(){function m(p){return p.replace(/&/gm,"&").replace(/</gm,"<")}function f(r,q,p){return RegExp(q,"m"+(r.cI?"i":"")+(p?"g":""))}function b(r){for(var p=0;p<r.childNodes.length;p++){var q=r.childNodes[p];if(q.nodeName=="CODE"){return q}if(!(q.nodeType==3&&q.nodeValue.match(/\s+/))){break}}}function h(t,s){var p="";for(var r=0;r<t.childNodes.length;r++){if(t.childNodes[r].nodeType==3){var q=t.childNodes[r].nodeValue;if(s){q=q.replace(/\n/g,"")}p+=q}else{if(t.childNodes[r].nodeName=="BR"){p+="\n"}else{p+=h(t.childNodes[r])}}}if(/MSIE [678]/.test(navigator.userAgent)){p=p.replace(/\r/g,"\n")}return p}function a(s){var r=s.className.split(/\s+/);r=r.concat(s.parentNode.className.split(/\s+/));for(var q=0;q<r.length;q++){var p=r[q].replace(/^language-/,"");if(e[p]){return p}}}function c(q){var p=[];(function(s,t){for(var r=0;r<s.childNodes.length;r++){if(s.childNodes[r].nodeType==3){t+=s.childNodes[r].nodeValue.length}else{if(s.childNodes[r].nodeName=="BR"){t+=1}else{if(s.childNodes[r].nodeType==1){p.push({event:"start",offset:t,node:s.childNodes[r]});t=arguments.callee(s.childNodes[r],t);p.push({event:"stop",offset:t,node:s.childNodes[r]})}}}}return t})(q,0);return p}function k(y,w,x){var q=0;var z="";var s=[];function u(){if(y.length&&w.length){if(y[0].offset!=w[0].offset){return(y[0].offset<w[0].offset)?y:w}else{return w[0].event=="start"?y:w}}else{return y.length?y:w}}function t(D){var A="<"+D.nodeName.toLowerCase();for(var B=0;B<D.attributes.length;B++){var C=D.attributes[B];A+=" "+C.nodeName.toLowerCase();if(C.value!==undefined&&C.value!==false&&C.value!==null){A+='="'+m(C.value)+'"'}}return A+">"}while(y.length\|\|w.length){var v=u().splice(0,1)[0];z+=m(x.substr(q,v.offset-q));q=v.offset;if(v.event=="start"){z+=t(v.node);s.push(v.node)}else{if(v.event=="stop"){var p,r=s.length;do{r--;p=s[r];z+=("</"+p.nodeName.toLowerCase()+">")}while(p!=v.node);s.splice(r,1);while(r<s.length){z+=t(s[r]);r++}}}}return z+m(x.substr(q))}function j(){function q(x,y,v){if(x.compiled){return}var u;var s=[];if(x.k){x.lR=f(y,x.l\|\|hljs.IR,true);for(var w in x.k){if(!x.k.hasOwnProperty(w)){continue}if(x.k[w] instanceof Object){u=x.k[w]}else{u=x.k;w="keyword"}for(var r in u){if(!u.hasOwnProperty(r)){continue}x.k[r]=[w,u[r]];s.push(r)}}}if(!v){if(x.bWK){x.b="\\b("+s.join("\|")+")\\s"}x.bR=f(y,x.b?x.b:"\\B\|\\b");if(!x.e&&!x.eW){x.e="\\B\|\\b"}if(x.e){x.eR=f(y,x.e)}}if(x.i){x.iR=f(y,x.i)}if(x.r===undefined){x.r=1}if(!x.c){x.c=[]}x.compiled=true;for(var t=0;t<x.c.length;t++){if(x.c[t]=="self"){x.c[t]=x}q(x.c[t],y,false)}if(x.starts){q(x.starts,y,false)}}for(var p in e){if(!e.hasOwnProperty(p)){continue}q(e[p].dM,e[p],true)}}function d(B,C){if(!j.called){j();j.called=true}function q(r,M){for(var L=0;L<M.c.length;L++){if((M.c[L].bR.exec(r)\|\|[null])[0]==r){return M.c[L]}}}function v(L,r){if(D[L].e&&D[L].eR.test(r)){return 1}if(D[L].eW){var M=v(L-1,r);return M?M+1:0}return 0}function w(r,L){return L.i&&L.iR.test(r)}function K(N,O){var M=[];for(var L=0;L<N.c.length;L++){M.push(N.c[L].b)}var r=D.length-1;do{if(D[r].e){M.push(D[r].e)}r--}while(D[r+1].eW);if(N.i){M.push(N.i)}return f(O,M.join("\|"),true)}function p(M,L){var N=D[D.length-1];if(!N.t){N.t=K(N,E)}N.t.lastIndex=L;var r=N.t.exec(M);return r?[M.substr(L,r.index-L),r[0],false]:[M.substr(L),"",true]}function z(N,r){var L=E.cI?r[0].toLowerCase():r[0];var M=N.k[L];if(M&&M instanceof Array){return M}return false}function F(L,P){L=m(L);if(!P.k){return L}var r="";var O=0;P.lR.lastIndex=0;var M=P.lR.exec(L);while(M){r+=L.substr(O,M.index-O);var N=z(P,M);if(N){x+=N[1];r+='<span class="'+N[0]+'">'+M[0]+"</span>"}else{r+=M[0]}O=P.lR.lastIndex;M=P.lR.exec(L)}return r+L.substr(O,L.length-O)}function J(L,M){if(M.sL&&e[M.sL]){var r=d(M.sL,L);x+=r.keyword_count;return r.value}else{return F(L,M)}}function I(M,r){var L=M.cN?'<span class="'+M.cN+'">':"";if(M.rB){y+=L;M.buffer=""}else{if(M.eB){y+=m(r)+L;M.buffer=""}else{y+=L;M.buffer=r}}D.push(M);A+=M.r}function G(N,M,Q){var R=D[D.length-1];if(Q){y+=J(R.buffer+N,R);return false}var P=q(M,R);if(P){y+=J(R.buffer+N,R);I(P,M);return P.rB}var L=v(D.length-1,M);if(L){var O=R.cN?"</span>":"";if(R.rE){y+=J(R.buffer+N,R)+O}else{if(R.eE){y+=J(R.buffer+N,R)+O+m(M)}else{y+=J(R.buffer+N+M,R)+O}}while(L>1){O=D[D.length-2].cN?"</span>":"";y+=O;L--;D.length--}var r=D[D.length-1];D.length--;D[D.length-1].buffer="";if(r.starts){I(r.starts,"")}return R.rE}if(w(M,R)){throw"Illegal"}}var E=e[B];var D=[E.dM];var A=0;var x=0;var y="";try{var s,u=0;E.dM.buffer="";do{s=p(C,u);var t=G(s[0],s[1],s[2]);u+=s[0].length;if(!t){u+=s[1].length}}while(!s[2]);if(D.length>1){throw"Illegal"}return{r:A,keyword_count:x,value:y}}catch(H){if(H=="Illegal"){return{r:0,keyword_count:0,value:m(C)}}else{throw H}}}function g(t){var p={keyword_count:0,r:0,value:m(t)};var r=p;for(var q in e){if(!e.hasOwnProperty(q)){continue}var s=d(q,t);s.language=q;if(s.keyword_count+s.r>r.keyword_count+r.r){r=s}if(s.keyword_count+s.r>p.keyword_count+p.r){r=p;p=s}}if(r.language){p.second_best=r}return p}function i(r,q,p){if(q){r=r.replace(/^((<[^>]+>\|\t)+)/gm,function(t,w,v,u){return w.replace(/\t/g,q)})}if(p){r=r.replace(/\n/g,"<br>")}return r}function n(t,w,r){var x=h(t,r);var v=a(t);var y,s;if(v){y=d(v,x)}else{return}var q=c(t);if(q.length){s=document.createElement("pre");s.innerHTML=y.value;y.value=k(q,c(s),x)}y.value=i(y.value,w,r);var u=t.className;if(!u.match("(\\s\|^)(language-)?"+v+"(\\s\|$)")){u=u?(u+" "+v):v}if(/MSIE [678]/.test(navigator.userAgent)&&t.tagName=="CODE"&&t.parentNode.tagName=="PRE"){s=t.parentNode;var p=document.createElement("div");p.innerHTML="<pre><code>"+y.value+"</code></pre>";t=p.firstChild.firstChild;p.firstChild.cN=s.cN;s.parentNode.replaceChild(p.firstChild,s)}else{t.innerHTML=y.value}t.className=u;t.result={language:v,kw:y.keyword_count,re:y.r};if(y.second_best){t.second_best={language:y.second_best.language,kw:y.second_best.keyword_count,re:y.second_best.r}}}function o(){if(o.called){return}o.called=true;var r=document.getElementsByTagName("pre");for(var p=0;p<r.length;p++){var q=b(r[p]);if(q){n(q,hljs.tabReplace)}}}function l(){if(window.addEventListener){window.addEventListener("DOMContentLoaded",o,false);window.addEventListener("load",o,false)}else{if(window.attachEvent){window.attachEvent("onload",o)}else{window.onload=o}}}var e={};this.LANGUAGES=e;this.highlight=d;this.highlightAuto=g;this.fixMarkup=i;this.highlightBlock=n;this.initHighlighting=o;this.initHighlightingOnLoad=l;this.IR="[a-zA-Z][a-zA-Z0-9_]";this.UIR="[a-zA-Z_][a-zA-Z0-9_]";this.NR="\\b\\d+(\\.\\d+)?";this.CNR="\\b(0[xX][a-fA-F0-9]+\|(\\d+(\\.\\d)?\|\\.\\d+)([eE][-+]?\\d+)?)";this.BNR="\\b(0b[01]+)";this.RSR="!\|!=\|!==\|%\|%=\|&\|&&\|&=\|\\\|\\=\|\\+\|\\+=\|,\|\\.\|-\|-=\|/\|/=\|:\|;\|<\|<<\|<<=\|<=\|=\|==\|===\|>\|>=\|>>\|>>=\|>>>\|>>>=\|\\?\|\\[\|\\{\|\\(\|\\^\|\\^=\|\\\|\|\\\|=\|\\\|\\\|\|~";this.ER="(?![\\s\\S])";this.BE={b:"\\\\.",r:0};this.ASM={cN:"string",b:"'",e:"'",i:"\\n",c:[this.BE],r:0};this.QSM={cN:"string",b:'"',e:'"',i:"\\n",c:[this.BE],r:0};this.CLCM={cN:"comment",b:"//",e:"$"};this.CBLCLM={cN:"comment",b:"/\\",e:"\\/"};this.HCM={cN:"comment",b:"#",e:"$"};this.NM={cN:"number",b:this.NR,r:0};this.CNM={cN:"number",b:this.CNR,r:0};this.BNM={cN:"number",b:this.BNR,r:0};this.inherit=function(r,s){var p={};for(var q in r){p[q]=r[q]}if(s){for(var q in s){p[q]=s[q]}}return p}}();hljs.LANGUAGES.cpp=function(){var a={keyword:{"false":1,"int":1,"float":1,"while":1,"private":1,"char":1,"catch":1,"export":1,virtual:1,operator:2,sizeof:2,dynamic_cast:2,typedef:2,const_cast:2,"const":1,struct:1,"for":1,static_cast:2,union:1,namespace:1,unsigned:1,"long":1,"throw":1,"volatile":2,"static":1,"protected":1,bool:1,template:1,mutable:1,"if":1,"public":1,friend:2,"do":1,"return":1,"goto":1,auto:1,"void":2,"enum":1,"else":1,"break":1,"new":1,extern:1,using:1,"true":1,"class":1,asm:1,"case":1,typeid:1,"short":1,reinterpret_cast:2,"default":1,"double":1,register:1,explicit:1,signed:1,typename:1,"try":1,"this":1,"switch":1,"continue":1,wchar_t:1,inline:1,"delete":1,alignof:1,char16_t:1,char32_t:1,constexpr:1,decltype:1,noexcept:1,nullptr:1,static_assert:1,thread_local:1,restrict:1,_Bool:1,complex:1},built_in:{std:1,string:1,cin:1,cout:1,cerr:1,clog:1,stringstream:1,istringstream:1,ostringstream:1,auto_ptr:1,deque:1,list:1,queue:1,stack:1,vector:1,map:1,set:1,bitset:1,multiset:1,multimap:1,unordered_set:1,unordered_map:1,unordered_multiset:1,unordered_multimap:1,array:1,shared_ptr:1}};return{dM:{k:a,i:"</",c:[hljs.CLCM,hljs.CBLCLM,hljs.QSM,{cN:"string",b:"'\\\\?.",e:"'",i:"."},{cN:"number",b:"\\b(\\d+(\\.\\d)?\|\\.\\d+)(u\|U\|l\|L\|ul\|UL\|f\|F)"},hljs.CNM,{cN:"preprocessor",b:"#",e:"$"},{cN:"stl_container",b:"\\b(deque\|list\|queue\|stack\|vector\|map\|set\|bitset\|multiset\|multimap\|unordered_map\|unordered_set\|unordered_multiset\|unordered_multimap\|array)\\s<",e:">",k:a,r:10,c:["self"]}]}}}();hljs.LANGUAGES.r={dM:{c:[hljs.HCM,{cN:"number",b:"\\b0[xX][0-9a-fA-F]+[Li]?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+(?:[eE][+\\-]?\\d)?L\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+\\.(?!\\d)(?:i\\b)?",e:hljs.IMMEDIATE_RE,r:1},{cN:"number",b:"\\b\\d+(?:\\.\\d)?(?:[eE][+\\-]?\\d)?i?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\.\\d+(?:[eE][+\\-]?\\d)?i?\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"keyword",b:"(?:tryCatch\|library\|setGeneric\|setGroupGeneric)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\.",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\d+(?![\\w.])",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\b(?:function)",e:hljs.IMMEDIATE_RE,r:2},{cN:"keyword",b:"(?:if\|in\|break\|next\|repeat\|else\|for\|return\|switch\|while\|try\|stop\|warning\|require\|attach\|detach\|source\|setMethod\|setClass)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"literal",b:"(?:NA\|NA_integer_\|NA_real_\|NA_character_\|NA_complex_)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"literal",b:"(?:NULL\|TRUE\|FALSE\|T\|F\|Inf\|NaN)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"identifier",b:"[a-zA-Z.][a-zA-Z0-9._]\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"operator",b:"<\\-(?!\\s\\d)",e:hljs.IMMEDIATE_RE,r:2},{cN:"operator",b:"\\->\|<\\-",e:hljs.IMMEDIATE_RE,r:1},{cN:"operator",b:"%%\|~",e:hljs.IMMEDIATE_RE},{cN:"operator",b:">=\|<=\|==\|!=\|\\\|\\\|\|&&\|=\|\\+\|\\-\|\\\|/\|\\^\|>\|<\|!\|&\|\\\|\|\\$\|:",e:hljs.IMMEDIATE_RE,r:0},{cN:"operator",b:"%",e:"%",i:"\\n",r:1},{cN:"identifier",b:"`",e:"`",r:0},{cN:"string",b:'"',e:'"',c:[hljs.BE],r:0},{cN:"string",b:"'",e:"'",c:[hljs.BE],r:0},{cN:"paren",b:"[[({\\])}]",e:hljs.IMMEDIATE_RE,r:0}]}};
	hljs.initHighlightingOnLoad();
	</script>

	<!-- MathJax scripts -->
	<script type="text/javascript" src="https://c328740.ssl.cf1.rackcdn.com/mathjax/2.0-latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
	</script>


	<style type="text/css">
	body, td {
	font-family: sans-serif;
	background-color: white;
	font-size: 12px;
	margin: 8px;
	}

	tt, code, pre {
	font-family: 'DejaVu Sans Mono', 'Droid Sans Mono', 'Lucida Console', Consolas, Monaco, monospace;
	}

	h1 {
	font-size:2.2em;
	}

	h2 {
	font-size:1.8em;
	}

	h3 {
	font-size:1.4em;
	}

	h4 {
	font-size:1.0em;
	}

	h5 {
	font-size:0.9em;
	}

	h6 {
	font-size:0.8em;
	}

	a:visited {
	color: rgb(50%, 0%, 50%);
	}

	pre {
	margin-top: 0;
	max-width: 95%;
	border: 1px solid #ccc;
	white-space: pre-wrap;
	}

	pre code {
	display: block; padding: 0.5em;
	}

	code.r, code.cpp {
	background-color: #F8F8F8;
	}

	table, td, th {
	border: none;
	}

	blockquote {
	color:#666666;
	margin:0;
	padding-left: 1em;
	border-left: 0.5em #EEE solid;
	}

	hr {
	height: 0px;
	border-bottom: none;
	border-top-width: thin;
	border-top-style: dotted;
	border-top-color: #999999;
	}

	@media print {
	* {
	background: transparent !important;
	color: black !important;
	filter:none !important;
	-ms-filter: none !important;
	}

	body {
	font-size:12pt;
	max-width:100%;
	}

	a, a:visited {
	text-decoration: underline;
	}

	hr {
	visibility: hidden;
	page-break-before: always;
	}

	pre, blockquote {
	padding-right: 1em;
	page-break-inside: avoid;
	}

	tr, img {
	page-break-inside: avoid;
	}

	img {
	max-width: 100% !important;
	}

	@page :left {
	margin: 15mm 20mm 15mm 10mm;
	}

	@page :right {
	margin: 15mm 10mm 15mm 20mm;
	}

	p, h2, h3 {
	orphans: 3; widows: 3;
	}

	h2, h3 {
	page-break-after: avoid;
	}
	}
	</style>



	</head>

	<body>
	<h3>Lazy data frame</h3>

	<pre><code class="r">opts_chunk$set(tidy = FALSE)
	library(vadr)
	library(plyr)
	library(microbenchmark)
	source("lazy.frame.R")
	</code></pre>

	<p>The insights exploited here are that the fastest way to get an environment with vars <code>a</code>, <code>b</code>, <code>c</code>, is:</p>

	<pre><code class="r">lazy <- function(a,b,c) (environment())
	explicit <- function(...)list2env(list(...))
	microbenchmark(
	lazy = lazy(1,2,3),
	explicit = explicit(a=1, b=2, c=3))
	</code></pre>

	<pre><code>## Unit: microseconds
	## expr min lq median uq max neval
	## lazy 1.179 1.450 1.681 1.841 2288 100
	## explicit 8.698 9.162 9.312 9.643 2779 100
	</code></pre>

	<p>And given such environment and row indices, the fastest way to make a subset is:</p>

	<pre><code class="r">lazy_subset <- function(e, ix) lazy(e$a[ix], e$b[ix], e$c[ix])
	explicit_subset <- function(e, ix) as.environment(lapply(as.list(e), `[`, ix))
	e <- lazy(1:10, letters[1:10], as.list(1:10))
	microbenchmark(
	lazy = lazy_subset(e, 1:5),
	explicit = explicit_subset(e, 1:5))
	</code></pre>

	<pre><code>## Unit: microseconds
	## expr min lq median uq max neval
	## lazy 1.739 2.048 2.532 2.737 5064 100
	## explicit 26.882 27.511 27.817 28.136 4072 100
	</code></pre>

	<p>(Fast because no subsets are actually computed, only promises to compute a subset in the future, only for those columns that are demanded)</p>

	<h3>Setup:</h3>

	<pre><code class="r">bbI <- idata.frame(baseball)
	bbL <- lazy.frame(baseball)
	nr <- nrow(baseball)
	nc <- ncol(baseball)
	colnames <- colnames(baseball)
	</code></pre>

	<h3>Row subset + eval:</h3>

	<pre><code class="r">microbenchmark(
	df = with(baseball[sample(nr, 1), ], rbi/ab),
	idf = with(bbI[sample(nr, 1), ], rbi/ab),
	lazy = with(bbL[sample(nr, 1), ], rbi/ab))
	</code></pre>

	<pre><code>## Unit: microseconds
	## expr min lq median uq max neval
	## df 491.0 539.01 600.63 645.8 1884 100
	## idf 8144.7 8580.31 8921.75 11015.9 66710 100
	## lazy 64.7 70.22 87.07 118.6 245123 100
	</code></pre>

	<h3>Column subset (same column):</h3>

	<pre><code class="r">microbenchmark(
	df = baseball["rbi"]$rbi[1],
	idf = bbI["rbi"]$rbi[1],
	lazy = bbL["rbi"]$rbi[1])
	</code></pre>

	<pre><code>## Unit: microseconds
	## expr min lq median uq max neval
	## df 195.22 220.2 265.4 318.0 3177 100
	## idf 5167.25 5284.1 5424.2 5706.5 9494 100
	## lazy 97.84 106.4 155.9 171.4 52320 100
	</code></pre>

	<h3>Column subset (random column):</h3>

	<pre><code class="r">microbenchmark(
	df = baseball[sample(colnames, 1)][1,1],
	idf = bbI[sample(colnames, 1)][1,1],
	lazy = bbL[sample(colnames, 1)][1,1])
	</code></pre>

	<pre><code>## Unit: microseconds
	## expr min lq median uq max neval
	## df 282.1 332.7 379.7 554.3 4840 100
	## idf 1214.6 1244.4 1278.3 1439.9 10663 100
	## lazy 122.0 141.2 178.8 222.9 7846 100
	</code></pre>

	<h3>Column subset (random set of columns):</h3>

	<p>(this forces a lot of macro expansion, but is an unusual use pattern)</p>

	<pre><code class="r">microbenchmark(
	df = baseball[runif(colnames) > 0.5][1,1],
	idf = bbI[runif(colnames) > 0.5][1,1],
	lazy = bbL[runif(colnames) > 0.5][1,1])
	</code></pre>

	<pre><code>## Unit: microseconds
	## expr min lq median uq max neval
	## df 867.9 1679 2203 4912 56785 100
	## idf 1256.5 1365 1449 1535 5371 100
	## lazy 5958.3 8442 9050 10192 61848 100
	</code></pre>

	<h3>Random access single element:</h3>

	<pre><code class="r">microbenchmark(
	df = baseball[sample(nr, 1), sample(colnames, 1)],
	idf = bbI[sample(nr, 1), sample(colnames, 1)],
	lazy = bbL[sample(nr, 1), sample(colnames, 1)])
	</code></pre>

	<pre><code>## Unit: microseconds
	## expr min lq median uq max neval
	## df 84.25 94.11 97.80 105.43 3751.77 100
	## idf 287.36 300.44 308.29 325.38 4002.25 100
	## lazy 35.37 37.35 38.98 43.22 89.33 100
	</code></pre>

	<h3>Group-by and eval:</h3>

	<pre><code class="r">microbenchmark(times=1,
	df = dlply(baseball, "id", with, mean(rbi)/mean(ab)),
	idf = dlply(bbI, "id", with, mean(rbi)/mean(ab)),
	lazy = dlply(bbL, "id", with, mean(rbi)/mean(ab)))
	</code></pre>

	<pre><code>## Unit: milliseconds
	## expr min lq median uq max neval
	## df 470.8 470.8 470.8 470.8 470.8 1
	## idf 11745.4 11745.4 11745.4 11745.4 11745.4 1
	## lazy 181.5 181.5 181.5 181.5 181.5 1
	</code></pre>

	</body>

	</html>
	### Lazy data frame

	```{r}
	opts_chunk$set(tidy=FALSE)
	library(vadr)
	library(plyr)
	library(microbenchmark)
	source("lazy.frame.R")
	```

	The insights exploited here are that the fastest way to get an environment with vars `a`, `b`, `c`, is:

	```{r}
	lazy <- function(a,b,c) (environment())
	explicit <- function(...)list2env(list(...))
	microbenchmark(
	lazy = lazy(1,2,3),
	explicit = explicit(a=1, b=2, c=3))
	```

	And given such environment and row indices, the fastest way to make a subset is:

	```{r}
	lazy_subset <- function(e, ix) lazy(e$a[ix], e$b[ix], e$c[ix])
	explicit_subset <- function(e, ix) as.environment(lapply(as.list(e), `[`, ix))
	e <- lazy(1:10, letters[1:10], as.list(1:10))
	microbenchmark(
	lazy = lazy_subset(e, 1:5),
	explicit = explicit_subset(e, 1:5))
	```

	(Fast because no subsets are actually computed, only promises to compute a subset in the future, only for those columns that are demanded)

	### Setup:
	```{r}
	bbI <- idata.frame(baseball)
	bbL <- lazy.frame(baseball)
	nr <- nrow(baseball)
	nc <- ncol(baseball)
	colnames <- colnames(baseball)
	```

	### Row subset + eval:
	```{r}
	microbenchmark(
	df = with(baseball[sample(nr, 1), ], rbi/ab),
	idf = with(bbI[sample(nr, 1), ], rbi/ab),
	lazy = with(bbL[sample(nr, 1), ], rbi/ab))
	```

	### Column subset (same column):
	```{r}
	microbenchmark(
	df = baseball["rbi"]$rbi[1],
	idf = bbI["rbi"]$rbi[1],
	lazy = bbL["rbi"]$rbi[1])
	```

	### Column subset (random column):
	```{r}
	microbenchmark(
	df = baseball[sample(colnames, 1)][1,1],
	idf = bbI[sample(colnames, 1)][1,1],
	lazy = bbL[sample(colnames, 1)][1,1])
	```

	### Column subset (random set of columns):
	(this forces a lot of macro expansion, but is an unusual use pattern)
	```{r}
	microbenchmark(
	df = baseball[runif(colnames) > 0.5][1,1],
	idf = bbI[runif(colnames) > 0.5][1,1],
	lazy = bbL[runif(colnames) > 0.5][1,1])
	```

	### Random access single element:
	```{r}
	microbenchmark(
	df = baseball[sample(nr, 1), sample(colnames, 1)],
	idf = bbI[sample(nr, 1), sample(colnames, 1)],
	lazy = bbL[sample(nr, 1), sample(colnames, 1)])
	```

	### Group-by and eval:
	```{r}
	microbenchmark(times=1,
	df = dlply(baseball, "id", with, mean(rbi)/mean(ab)),
	idf = dlply(bbI, "id", with, mean(rbi)/mean(ab)),
	lazy = dlply(bbL, "id", with, mean(rbi)/mean(ab)))
	```
	# lazy data frame
	#
	# like idata.frame, but exploits lazy evaluation + macro code generation
	# instead of active bindings
	library(vadr)

	lazy.frame <- function(df, enclos=parent.frame(), ...) UseMethod("lazy.frame")

	lazy.frame.lazy.frame <- function(df, ...) df

	lazy.frame.environment <- function(df, ..., col.order=ls(df)) {
	#not kosher, environment attrs get set by reference.
	attr(df, "accessory") <- accessory_env(df, col.order)
	class(df) <- union("lazy.frame", class(df))
	df
	}

	lazy.frame.data.frame <- function(df, enclos=parent.frame()) {
	for (n in names(df))
	if (is.array(df[[n]])) stop("Array column '", n, "' not supported")

	col.order <- structure(names(df), names=names(df))
	e <- list2env(df, parent=enclos)
	attr(e, "accessory") <- accessory_env(e, col.order)
	class(e) <- c("lazy.frame", "environment")
	e
	}

	lazy.frame.default <- function(df, enclos=parent.frame())
	lazy.frame(as.data.frame(df), enclos)

	accessory_env <- function(
	#Holds reference to e, and subsetting methods.
	#these are lazy, created on demand.
	#also note accessory env always inherits from package, so that
	#data env is allowed to inherit from wherever
	e,
	col.order,
	#use do.call because this lets "macro" memoize on the col names
	row_subset = do.call(row_subsetter, as.list(col.order)),
	col_subset = do.call(col_subsetter, as.list(col.order)),
	dim = c(length(e[[col.order[[1]]]]), length(col.order))) {
	environment()
	}

	#Row subset function constructor.
	#Arguments are colnames, result is accessor function
	row_subsetter <- macro(function(...) {
	col.order <- structure(c(...), names=c(...))
	#macro looks up entire closure
	qe(function(e, rows) {
	#create new env with promises to subset each column
	new.env <- (function(`.(col.order)`=..(missing_value(length(col.order))))
	environment())(
	..(qqply(e$`.(col)`[rows])(col=col.order)))
	parent.env(new.env) <- parent.env(e)
	attr(new.env, "accessory") <- accessory_env(
	new.env,
	col.order,
	#column names do not change, so can reuse existing accessors
	attr(e, "accessory")$row_subset,
	attr(e , "accessory")$col_subset)
	class(new.env) <- c("lazy.frame", "environment")
	new.env
	})
	})

	#Column subset function constructor.
	#arguments are colnames, result is accessor function
	col_subsetter <- macro(function(...) {
	col.order <- structure(c(...), names=c(...))
	args <- qqply(`.(col)`=e$`.(col)`)(col=col.order)
	col_subset_inner(col.order, args)
	})

	col_subset_inner <- function(col.order, args) {
	function(e, cols) {
	new.cols <- col.order[cols]
	new.env <- do.call(env.list, args[new.cols])
	parent.env(new.env) <- parent.env(e)
	attr(new.env, "accessory") <- accessory_env(
	new.env, new.cols,
	col_subset = col_subset_inner(new.cols, args[new.cols]))
	class(new.env) <- c("lazy.frame", "environment")
	new.env
	}
	}

	env.list <- macro(function(...) {
	args <- list(...)
	#print(names(args)) #verify caching works
	qq((function(`.(names(args))`=..(missing_value(length(args)))) environment())(
	..(args)))
	})

	`[.lazy.frame` <- function(x, i, j, drop=TRUE) {
	if (nargs() == 2) {
	j <- i
	i <- missing_value()
	drop <- FALSE
	}
	if (!missing(j)) {
	if (length(j) == 1 && drop && !is.logical(j)) {
	if (missing(i)) i <- TRUE
	return(x[[j]][i])
	} else {
	x <- attr(x, "accessory")$col_subset(x, j)
	}
	}
	if (!missing(i)) {
	x <- attr(x, "accessory")$row_subset(x, i)
	}
	x
	}

	`[[.lazy.frame` <- function(x, i) {
	get(attr(x, "accessory")$col.order[[i]], x)
	}

	dim.lazy.frame <- function(df) {
	attr(df, "accessory")$dim
	}

	names.lazy.frame <- function(df) {
	attr(df, "accessory")$col.order
	}

	as.list.lazy.frame <- function(df) mget(attr(df, "accessory")$col.order, df)

	as.data.frame.lazy.frame <- function(df) plyr::quickdf(as.list(df))

	############################################################

	#plyr slow example
	n<-100000
	grp1 <- sample(1:750, n, replace=T)
	grp2 <- sample(1:750, n, replace=T)
	d <- data.frame(x=rnorm(n), y=rnorm(n), grp1=grp1, grp2=grp2)

	summarise2 <- function (.data, ...) {
	if (is.list(.data))
	env <- list2env(.data, parent = parent.frame())
	else
	env <- .data

	cols <- eval(substitute(alist(...)))
	for (col in names(cols)) {
	env[[col]] <- eval(cols[[col]], env)
	}
	quickdf(mget(names(cols), env))
	}

	if(FALSE) {
	system.time(ddply(d, .(grp1, grp2), summarise, avx = mean(x), avy=mean(y)))
	## with plyr 1.8
	## user system elapsed
	## 231.115 105.899 370.422
	## with patched rbind.fill
	## user system elapsed
	## 43.972 0.442 46.048

	## adding faster summarise
	system.time(ddply(d, .(grp1, grp2), summarise2, avx = mean(x), avy=mean(y)))
	## user system elapsed
	## 44.889 0.456 47.178

	## adding idata.frame
	system.time(ddply(idata.frame(d), .(grp1, grp2), summarise2, avx = mean(x), avy=mean(y)))
	## ## user system elapsed
	## ## 50.608 0.464 53.708

	## with lazy.frame
	system.time(ddply(lazy.frame(d), .(grp1, grp2), summarise2, avx = mean(x), avy=mean(y)))
	## user system elapsed
	## 43.972 0.442 46.048

	## with dlply
	system.time(dlply(d, .(grp1, grp2), summarise2, avx = mean(x), avy=mean(y)))
	## user system elapsed
	## 30.765 0.976 36.123
	system.time(dlply(lazy.frame(d), .(grp1, grp2), summarise2, avx = mean(x), avy=mean(y)))
	## user system elapsed
	## 25.540 0.845 30.686

	summarizer <- macro(function(...) {
	transforms <- list(...)
	qq( function(data) {
	if(!is.environment(data))
	data <- list2env(data, parent=parent.env(environment()))
	f <- function() {
	..(qqply( `.(target)` <- .(expr) )(target=names(transforms),
	expr=transforms))
	environment()
	}
	environment(f) <- data
	quickdf(as.list(f()))
	})
	})

	summarizer2 <- macro(function(...) {
	transforms <- list(...)
	qq( function(data) {
	if(!is.environment(data))
	data <- list2env(data, parent=parent.env(environment()))
	f <- function() {
	..(qqply( `.(target)` <- .(expr) )(target=names(transforms),
	expr=transforms))
	environment()
	}
	environment(f) <- data
	out <- f()
	parent.env(out) <- parent.env(environment())
	lazy.frame(out, col.order=.(names(transforms)))
	})
	})

	##dlply with summarizer macro

	system.time(dlply(d, .(grp1, grp2), summarizer(avx = mean(x), avy=mean(y)),
	.progress="text"))
	## and lazy frame
	system.time(dlply(lazy.frame(d), .(grp1, grp2), summarizer(avx = mean(x), avy=mean(y)),
	.progress="text"))
	##
	system.time(dlply(lazy.frame(d), .(grp1, grp2), summarizer2(avx = mean(x), avy=mean(y)),
	.progress="text"))
	}