HeronStatsPlugins: r-learning.html

File r-learning.html, 43.7 KB (added by dconnolly, 5 years ago)

notes on R and the R conference from a programmer's perspective (generated HTML)

Line 
1<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
2"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3<!-- saved from url=(0014)about:internet -->
4<html>
5<head>
6<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
7
8<title>R from a Programmer&#39;s Point of View</title>
9
10<base target="_blank"/>
11
12<style type="text/css">
13body, td {
14   font-family: sans-serif;
15   background-color: white;
16   font-size: 12px;
17   margin: 8px;
18}
19
20tt, code, pre {
21   font-family: 'DejaVu Sans Mono', 'Droid Sans Mono', 'Lucida Console', Consolas, Monaco, monospace;
22}
23
24h1 {
25   font-size:2.2em;
26}
27
28h2 {
29   font-size:1.8em;
30}
31
32h3 {
33   font-size:1.4em;
34}
35
36h4 {
37   font-size:1.0em;
38}
39
40h5 {
41   font-size:0.9em;
42}
43
44h6 {
45   font-size:0.8em;
46}
47
48a:visited {
49   color: rgb(50%, 0%, 50%);
50}
51
52pre {   
53   margin-top: 0;
54   max-width: 95%;
55   border: 1px solid #ccc;
56}
57
58pre code {
59   display: block; padding: 0.5em;
60}
61
62code.r {
63   background-color: #F8F8F8;
64}
65
66table, td, th {
67  border: none;
68}
69
70blockquote {
71   color:#666666;
72   margin:0;
73   padding-left: 1em;
74   border-left: 0.5em #EEE solid;
75}
76
77hr {
78   height: 0px;
79   border-bottom: none;
80   border-top-width: thin;
81   border-top-style: dotted;
82   border-top-color: #999999;
83}
84
85@media print {
86   * {
87      background: transparent !important;
88      color: black !important;
89      filter:none !important;
90      -ms-filter: none !important;
91   }
92
93   body {
94      font-size:12pt;
95      max-width:100%;
96   }
97       
98   a, a:visited {
99      text-decoration: underline;
100   }
101
102   hr {
103      visibility: hidden;
104      page-break-before: always;
105   }
106
107   pre, blockquote {
108      padding-right: 1em;
109      page-break-inside: avoid;
110   }
111
112   tr, img {
113      page-break-inside: avoid;
114   }
115
116   img {
117      max-width: 100% !important;
118   }
119
120   @page :left {
121      margin: 15mm 20mm 15mm 10mm;
122   }
123     
124   @page :right {
125      margin: 15mm 10mm 15mm 20mm;
126   }
127
128   p, h2, h3 {
129      orphans: 3; widows: 3;
130   }
131
132   h2, h3 {
133      page-break-after: avoid;
134   }
135}
136
137</style>
138
139<!-- Styles for R syntax highlighter -->
140<style type="text/css">
141   pre .operator,
142   pre .paren {
143     color: rgb(104, 118, 135)
144   }
145
146   pre .literal {
147     color: rgb(88, 72, 246)
148   }
149
150   pre .number {
151     color: rgb(0, 0, 205);
152   }
153
154   pre .comment {
155     color: rgb(76, 136, 107);
156   }
157
158   pre .keyword {
159     color: rgb(0, 0, 255);
160   }
161
162   pre .identifier {
163     color: rgb(0, 0, 0);
164   }
165
166   pre .string {
167     color: rgb(3, 106, 7);
168   }
169</style>
170
171<!-- R syntax highlighter -->
172<script type="text/javascript">
173var hljs=new function(){function m(p){return p.replace(/&/gm,"&amp;").replace(/</gm,"&lt;")}function f(r,q,p){return RegExp(q,"m"+(r.cI?"i":"")+(p?"g":""))}function b(r){for(var p=0;p<r.childNodes.length;p++){var q=r.childNodes[p];if(q.nodeName=="CODE"){return q}if(!(q.nodeType==3&&q.nodeValue.match(/\s+/))){break}}}function h(t,s){var p="";for(var r=0;r<t.childNodes.length;r++){if(t.childNodes[r].nodeType==3){var q=t.childNodes[r].nodeValue;if(s){q=q.replace(/\n/g,"")}p+=q}else{if(t.childNodes[r].nodeName=="BR"){p+="\n"}else{p+=h(t.childNodes[r])}}}if(/MSIE [678]/.test(navigator.userAgent)){p=p.replace(/\r/g,"\n")}return p}function a(s){var r=s.className.split(/\s+/);r=r.concat(s.parentNode.className.split(/\s+/));for(var q=0;q<r.length;q++){var p=r[q].replace(/^language-/,"");if(e[p]){return p}}}function c(q){var p=[];(function(s,t){for(var r=0;r<s.childNodes.length;r++){if(s.childNodes[r].nodeType==3){t+=s.childNodes[r].nodeValue.length}else{if(s.childNodes[r].nodeName=="BR"){t+=1}else{if(s.childNodes[r].nodeType==1){p.push({event:"start",offset:t,node:s.childNodes[r]});t=arguments.callee(s.childNodes[r],t);p.push({event:"stop",offset:t,node:s.childNodes[r]})}}}}return t})(q,0);return p}function k(y,w,x){var q=0;var z="";var s=[];function u(){if(y.length&&w.length){if(y[0].offset!=w[0].offset){return(y[0].offset<w[0].offset)?y:w}else{return w[0].event=="start"?y:w}}else{return y.length?y:w}}function t(D){var A="<"+D.nodeName.toLowerCase();for(var B=0;B<D.attributes.length;B++){var C=D.attributes[B];A+=" "+C.nodeName.toLowerCase();if(C.value!==undefined&&C.value!==false&&C.value!==null){A+='="'+m(C.value)+'"'}}return A+">"}while(y.length||w.length){var v=u().splice(0,1)[0];z+=m(x.substr(q,v.offset-q));q=v.offset;if(v.event=="start"){z+=t(v.node);s.push(v.node)}else{if(v.event=="stop"){var p,r=s.length;do{r--;p=s[r];z+=("</"+p.nodeName.toLowerCase()+">")}while(p!=v.node);s.splice(r,1);while(r<s.length){z+=t(s[r]);r++}}}}return z+m(x.substr(q))}function j(){function q(x,y,v){if(x.compiled){return}var u;var s=[];if(x.k){x.lR=f(y,x.l||hljs.IR,true);for(var w in x.k){if(!x.k.hasOwnProperty(w)){continue}if(x.k[w] instanceof Object){u=x.k[w]}else{u=x.k;w="keyword"}for(var r in u){if(!u.hasOwnProperty(r)){continue}x.k[r]=[w,u[r]];s.push(r)}}}if(!v){if(x.bWK){x.b="\\b("+s.join("|")+")\\s"}x.bR=f(y,x.b?x.b:"\\B|\\b");if(!x.e&&!x.eW){x.e="\\B|\\b"}if(x.e){x.eR=f(y,x.e)}}if(x.i){x.iR=f(y,x.i)}if(x.r===undefined){x.r=1}if(!x.c){x.c=[]}x.compiled=true;for(var t=0;t<x.c.length;t++){if(x.c[t]=="self"){x.c[t]=x}q(x.c[t],y,false)}if(x.starts){q(x.starts,y,false)}}for(var p in e){if(!e.hasOwnProperty(p)){continue}q(e[p].dM,e[p],true)}}function d(B,C){if(!j.called){j();j.called=true}function q(r,M){for(var L=0;L<M.c.length;L++){if((M.c[L].bR.exec(r)||[null])[0]==r){return M.c[L]}}}function v(L,r){if(D[L].e&&D[L].eR.test(r)){return 1}if(D[L].eW){var M=v(L-1,r);return M?M+1:0}return 0}function w(r,L){return L.i&&L.iR.test(r)}function K(N,O){var M=[];for(var L=0;L<N.c.length;L++){M.push(N.c[L].b)}var r=D.length-1;do{if(D[r].e){M.push(D[r].e)}r--}while(D[r+1].eW);if(N.i){M.push(N.i)}return f(O,M.join("|"),true)}function p(M,L){var N=D[D.length-1];if(!N.t){N.t=K(N,E)}N.t.lastIndex=L;var r=N.t.exec(M);return r?[M.substr(L,r.index-L),r[0],false]:[M.substr(L),"",true]}function z(N,r){var L=E.cI?r[0].toLowerCase():r[0];var M=N.k[L];if(M&&M instanceof Array){return M}return false}function F(L,P){L=m(L);if(!P.k){return L}var r="";var O=0;P.lR.lastIndex=0;var M=P.lR.exec(L);while(M){r+=L.substr(O,M.index-O);var N=z(P,M);if(N){x+=N[1];r+='<span class="'+N[0]+'">'+M[0]+"</span>"}else{r+=M[0]}O=P.lR.lastIndex;M=P.lR.exec(L)}return r+L.substr(O,L.length-O)}function J(L,M){if(M.sL&&e[M.sL]){var r=d(M.sL,L);x+=r.keyword_count;return r.value}else{return F(L,M)}}function I(M,r){var L=M.cN?'<span class="'+M.cN+'">':"";if(M.rB){y+=L;M.buffer=""}else{if(M.eB){y+=m(r)+L;M.buffer=""}else{y+=L;M.buffer=r}}D.push(M);A+=M.r}function G(N,M,Q){var R=D[D.length-1];if(Q){y+=J(R.buffer+N,R);return false}var P=q(M,R);if(P){y+=J(R.buffer+N,R);I(P,M);return P.rB}var L=v(D.length-1,M);if(L){var O=R.cN?"</span>":"";if(R.rE){y+=J(R.buffer+N,R)+O}else{if(R.eE){y+=J(R.buffer+N,R)+O+m(M)}else{y+=J(R.buffer+N+M,R)+O}}while(L>1){O=D[D.length-2].cN?"</span>":"";y+=O;L--;D.length--}var r=D[D.length-1];D.length--;D[D.length-1].buffer="";if(r.starts){I(r.starts,"")}return R.rE}if(w(M,R)){throw"Illegal"}}var E=e[B];var D=[E.dM];var A=0;var x=0;var y="";try{var s,u=0;E.dM.buffer="";do{s=p(C,u);var t=G(s[0],s[1],s[2]);u+=s[0].length;if(!t){u+=s[1].length}}while(!s[2]);if(D.length>1){throw"Illegal"}return{r:A,keyword_count:x,value:y}}catch(H){if(H=="Illegal"){return{r:0,keyword_count:0,value:m(C)}}else{throw H}}}function g(t){var p={keyword_count:0,r:0,value:m(t)};var r=p;for(var q in e){if(!e.hasOwnProperty(q)){continue}var s=d(q,t);s.language=q;if(s.keyword_count+s.r>r.keyword_count+r.r){r=s}if(s.keyword_count+s.r>p.keyword_count+p.r){r=p;p=s}}if(r.language){p.second_best=r}return p}function i(r,q,p){if(q){r=r.replace(/^((<[^>]+>|\t)+)/gm,function(t,w,v,u){return w.replace(/\t/g,q)})}if(p){r=r.replace(/\n/g,"<br>")}return r}function n(t,w,r){var x=h(t,r);var v=a(t);var y,s;if(v){y=d(v,x)}else{return}var q=c(t);if(q.length){s=document.createElement("pre");s.innerHTML=y.value;y.value=k(q,c(s),x)}y.value=i(y.value,w,r);var u=t.className;if(!u.match("(\\s|^)(language-)?"+v+"(\\s|$)")){u=u?(u+" "+v):v}if(/MSIE [678]/.test(navigator.userAgent)&&t.tagName=="CODE"&&t.parentNode.tagName=="PRE"){s=t.parentNode;var p=document.createElement("div");p.innerHTML="<pre><code>"+y.value+"</code></pre>";t=p.firstChild.firstChild;p.firstChild.cN=s.cN;s.parentNode.replaceChild(p.firstChild,s)}else{t.innerHTML=y.value}t.className=u;t.result={language:v,kw:y.keyword_count,re:y.r};if(y.second_best){t.second_best={language:y.second_best.language,kw:y.second_best.keyword_count,re:y.second_best.r}}}function o(){if(o.called){return}o.called=true;var r=document.getElementsByTagName("pre");for(var p=0;p<r.length;p++){var q=b(r[p]);if(q){n(q,hljs.tabReplace)}}}function l(){if(window.addEventListener){window.addEventListener("DOMContentLoaded",o,false);window.addEventListener("load",o,false)}else{if(window.attachEvent){window.attachEvent("onload",o)}else{window.onload=o}}}var e={};this.LANGUAGES=e;this.highlight=d;this.highlightAuto=g;this.fixMarkup=i;this.highlightBlock=n;this.initHighlighting=o;this.initHighlightingOnLoad=l;this.IR="[a-zA-Z][a-zA-Z0-9_]*";this.UIR="[a-zA-Z_][a-zA-Z0-9_]*";this.NR="\\b\\d+(\\.\\d+)?";this.CNR="\\b(0[xX][a-fA-F0-9]+|(\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)";this.BNR="\\b(0b[01]+)";this.RSR="!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|\\.|-|-=|/|/=|:|;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~";this.ER="(?![\\s\\S])";this.BE={b:"\\\\.",r:0};this.ASM={cN:"string",b:"'",e:"'",i:"\\n",c:[this.BE],r:0};this.QSM={cN:"string",b:'"',e:'"',i:"\\n",c:[this.BE],r:0};this.CLCM={cN:"comment",b:"//",e:"$"};this.CBLCLM={cN:"comment",b:"/\\*",e:"\\*/"};this.HCM={cN:"comment",b:"#",e:"$"};this.NM={cN:"number",b:this.NR,r:0};this.CNM={cN:"number",b:this.CNR,r:0};this.BNM={cN:"number",b:this.BNR,r:0};this.inherit=function(r,s){var p={};for(var q in r){p[q]=r[q]}if(s){for(var q in s){p[q]=s[q]}}return p}}();hljs.LANGUAGES.r={dM:{c:[hljs.HCM,{cN:"number",b:"\\b0[xX][0-9a-fA-F]+[Li]?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+(?:[eE][+\\-]?\\d*)?L\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+\\.(?!\\d)(?:i\\b)?",e:hljs.IMMEDIATE_RE,r:1},{cN:"number",b:"\\b\\d+(?:\\.\\d*)?(?:[eE][+\\-]?\\d*)?i?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\.\\d+(?:[eE][+\\-]?\\d*)?i?\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"keyword",b:"(?:tryCatch|library|setGeneric|setGroupGeneric)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\.",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\d+(?![\\w.])",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\b(?:function)",e:hljs.IMMEDIATE_RE,r:2},{cN:"keyword",b:"(?:if|in|break|next|repeat|else|for|return|switch|while|try|stop|warning|require|attach|detach|source|setMethod|setClass)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"literal",b:"(?:NA|NA_integer_|NA_real_|NA_character_|NA_complex_)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"literal",b:"(?:NULL|TRUE|FALSE|T|F|Inf|NaN)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"identifier",b:"[a-zA-Z.][a-zA-Z0-9._]*\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"operator",b:"<\\-(?!\\s*\\d)",e:hljs.IMMEDIATE_RE,r:2},{cN:"operator",b:"\\->|<\\-",e:hljs.IMMEDIATE_RE,r:1},{cN:"operator",b:"%%|~",e:hljs.IMMEDIATE_RE},{cN:"operator",b:">=|<=|==|!=|\\|\\||&&|=|\\+|\\-|\\*|/|\\^|>|<|!|&|\\||\\$|:",e:hljs.IMMEDIATE_RE,r:0},{cN:"operator",b:"%",e:"%",i:"\\n",r:1},{cN:"identifier",b:"`",e:"`",r:0},{cN:"string",b:'"',e:'"',c:[hljs.BE],r:0},{cN:"string",b:"'",e:"'",c:[hljs.BE],r:0},{cN:"paren",b:"[[({\\])}]",e:hljs.IMMEDIATE_RE,r:0}]}};
174hljs.initHighlightingOnLoad();
175</script>
176
177
178
179
180</head>
181
182<body>
183<p>up to: <a href="http://informatics.kumc.edu/work/wiki/HeronStatsPlugins">HeronStatsPlugins</a></p>
184
185<h1>R from a Programmer&#39;s Point of View</h1>
186
187<p>by Dan Connolly</p>
188
189<p>These are some notes on R from <a href="http://biostat.mc.vanderbilt.edu/wiki/Main/UseR-2012">The 8th International R User Conference</a> in Nashville, June 2012.</p>
190
191<p>In a lot of ways, R is a little like JavaScript: scheme with C-like syntax. It&#39;s a dynamic language, much like python or ruby, with a large standard library for math and statistics:</p>
192
193<pre><code class="r">1 + 1
194</code></pre>
195
196<pre><code>## [1] 2
197</code></pre>
198
199<pre><code class="r">sin(pi/2)
200</code></pre>
201
202<pre><code>## [1] 1
203</code></pre>
204
205<h2>Vectors Everywhere</h2>
206
207<p>The first thing to get used to is: there are no scalars. The most primitive datatype is vector:</p>
208
209<pre><code class="r">sizes &lt;- c(2, 4, 6, 8)
210sizes * 3
211</code></pre>
212
213<pre><code>## [1]  6 12 18 24
214</code></pre>
215
216<pre><code class="r">sin(pi/sizes)
217</code></pre>
218
219<pre><code>## [1] 1.0000 0.7071 0.5000 0.3827
220</code></pre>
221
222<pre><code class="r">sizes + 1:4
223</code></pre>
224
225<pre><code>## [1]  3  6  9 12
226</code></pre>
227
228<pre><code class="r">sizes + 0:1
229</code></pre>
230
231<pre><code>## [1] 2 5 6 9
232</code></pre>
233
234<pre><code class="r">set.seed(1234)
235round(runif(3, min = 0, max = 5))
236</code></pre>
237
238<pre><code>## [1] 1 3 3
239</code></pre>
240
241<p>c() is for concatenate. 1:3 is short for seq(from=1, to=3). Note that adding a short vector (0:1) to a long vector wraps the short one.</p>
242
243<h2>Copy on write, no sharing</h2>
244
245<p>Assignment in R is more like php than python: vectors get copied:</p>
246
247<pre><code class="r">x &lt;- c(1, 2, 3)
248y &lt;- x
249x[2] = 9
250x
251</code></pre>
252
253<pre><code>## [1] 1 9 3
254</code></pre>
255
256<pre><code class="r">y
257</code></pre>
258
259<pre><code>## [1] 1 2 3
260</code></pre>
261
262<p>Indexing starts at 1, as opposed to 0 as in C etc.</p>
263
264<h2>Assignment, or &ldquo;gets&rdquo;</h2>
265
266<p>R has a slightly novel approach to the = vs == syntax:</p>
267
268<pre><code class="r">a &lt;- 2
269a * a
270</code></pre>
271
272<pre><code>## [1] 4
273</code></pre>
274
275<p>Using = in place of &lt;- reportedly works <em>almost</em> everywhere, but it&#39;s frowned upon.</p>
276
277<p>But that&#39;s a minor issue.</p>
278
279<h2>Argument evaluation, delayed</h2>
280
281<p>The stuff that turns your head sideways is lazy/delayed evaluation of arguments:</p>
282
283<pre><code class="r">ignore.first &lt;- function(a, b) {
284    b * 2
285}
286ignore.first(1/0, 3)
287</code></pre>
288
289<pre><code>## [1] 6
290</code></pre>
291
292<p>Argument evaluation is not only delayed  but the so-called promises (the R manual calls them promises, but since they can&#39;t be broken, that&#39;s something of a misnomer) include the expression:</p>
293
294<pre><code class="r">expression.parts &lt;- function(e) {
295    substitute(e)
296}
297expression.parts(x + y * z)
298</code></pre>
299
300<pre><code>## x + y * z
301</code></pre>
302
303<p>That looks pretty wonky from the perspective of most general-purpose computing languages, but it makes a lot of sense when doing statistical modelling and plotting, as we&#39;ll see below.</p>
304
305<h2>Working with data in dataframes</h2>
306
307<p>Modelling typically starts with some data. We&#39;ll synthesize it here, using the workhorse dataframe (much like a database table):</p>
308
309<pre><code class="r">speeds &lt;- runif(10, min = 25, max = 50)
310erf &lt;- function(x) 2 * pnorm(x * sqrt(2)) - 1
311stopping &lt;- data.frame(speed = speeds, distance = (speeds^2 + erf(speeds)))
312</code></pre>
313
314<h2>Formulas in Interactive Plotting</h2>
315
316<p>Interactive visualization through plotting is one use of unevaluated arguments. We can plot stopping distance as a function of speed.</p>
317
318<pre><code class="r">plot(distance ~ speed, stopping)
319</code></pre>
320
321<p><img src="" alt="plot of chunk unnamed-chunk-8"/> </p>
322
323<p>The parabola becomes more clear if we zoom out to include the origin:</p>
324
325<pre><code class="r">plot(distance ~ speed, stopping, xlim = c(0, max(stopping$speed)),
326    ylim = c(0, max(stopping$distance)))
327</code></pre>
328
329<p><img src="" alt="plot of chunk unnamed-chunk-9"/> </p>
330
331<h2>Formulas in linear models</h2>
332
333<p>Another use of unevaluated formulas is linear models; I don&#39;t yet understand them, but I gather they&#39;re a mainstay of analysis with R:</p>
334
335<pre><code class="r">m &lt;- lm(distance ~ 0 + speed^2, stopping)
336coef(m)
337</code></pre>
338
339<pre><code>## speed
340## 39.51
341</code></pre>
342
343<h2>The R Learning Cliff</h2>
344
345<p>“I’m going to assume you know what a generalized linear model is,” said Bill Venables in the R short course. Nowhere in the R world is there a definition of basic concepts such as linear model or standard deviation. The help for sd says:</p>
346
347<blockquote>
348<p>This function computes the standard deviation of the values in x.</p>
349</blockquote>
350
351<p>Gee, thanks. The reference to the var function looked promising, but nope. They just bottom out with</p>
352
353<blockquote>
354<h2>References</h2>
355
356<p>Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) The New S Language. Wadsworth &amp; Brooks/Cole.</p>
357</blockquote>
358
359<p>In fairness, I suppose the python docs for sort() don&#39;t spell out how to sort items in a list.</p>
360
361<p>So then it’s off to wikipedia’s statistics materials; but I haven’t found a good connection between what I know (set theory, discrete mathematics, real analysis, toplogy) and the foundations of statistics. I understand some examples and special cases, but not many general definitions.</p>
362
363<h2>R help is &ldquo;obtuse&rdquo;</h2>
364
365<p>In the &ldquo;crash course&rdquo; tutorial, I learned I&#39;m not the only one who doesn&#39;t find R&#39;s help very helpful: “frequently the complaint about R help is that the help is obtuse. Far too subtle for mere mortals. To get help with xyz, search for R xyz example”</p>
366
367<h2>R Development Tools</h2>
368
369<p>Bill Venables uses ESS, the R mode for Emacs, but says these days he’d recommend R Studio.</p>
370
371<p>I used Emacs + ESS when developing rgate. Since I&#39;d rather not infect the next generation with the emacs virus, I installed R for Eclipse in preparation for the conference. But I don&#39;t think I&#39;ve used R for Eclipse since.</p>
372
373<p>The talk by the Rstudio guys (JJ Alair, who developed ColdFusion etc.) was pretty cool. I&#39;m using Rstudio and the MarkDown integration to draft this little article. <a href="http://yihui.name/knitr">Knitter</a> is cool, but this &ldquo;literate programming&rdquo;&ldquo; style seems somewhat inside-out, to me. I prefer to generate documentation from the normal source code. (.R, .py, .C) a la doxygen, doctest, sphynx. But I&#39;m giving it a try.</p>
374
375<h2>doctest for R? Almost&hellip;</h2>
376
377<p>In the crash course (slide: &quot;Unit tests in R&rdquo;) I learned about a convention for mixing runnable examples with package documentation (*.Rd), much like python&#39;s doctest. Yay!</p>
378
379<p>But&hellip; the conventions don&#39;t include checking that the output of the examples matches any expected results. Sigh. So close.</p>
380
381<p>I suppose one could add something to make the examples fail if they don&#39;t produce the expected results. That&#39;s possibly useful, but not nearly as useful as an established community norm for doing so.</p>
382
383<p>Wickham’s devtools package looks interesting. (Wickham is clearly a leading light&hellip; his ggplot2 was used everywhere.)</p>
384
385<h2>R performance, profiling</h2>
386
387<p>Performance was a theme of the conference (as well as reproducible research, which is another article altogether).</p>
388
389<p>Norm Matloff gave a great invited talk (<a href="http://heather.cs.ucdavis.edu/UseR2012.pdf">slides</a>) on <strong>Parallel R, Revisited</strong>, summarizing major hardware trends, e.g.</p>
390
391<ul>
392<li>NVIDIA currently dominant in the GPU world. Intel likely to enter the market, with the obvious fragmentation risk.</li>
393<li>Knight’s Ferry “next year” for several years</li>
394<li>CUDA currently dominant</li>
395<li>OpenCL stalled?</li>
396<li>OpenACC - a la openmp</li>
397</ul>
398
399<p>Then he showed his &ldquo;software alchemy&rdquo; technique, which achieves super-linear speedup in some applications that seemed common/important.</p>
400
401<p>Another talk by <strong>Justin Talbot</strong> went into more detail. He reminded us that clock speed topped out in ~2003 at ~3Ghz, but 2 to 4 cores is consumer technology today and he expectes 8 to 16 cores on laptops in ~5 years. Memory isn’t getting much faster either. Since we have more cores, we’re memory-starved. Have to do more with registers and caches.</p>
402
403<p>He noted 3 distinct performance areas in R: scalar (interpreter), matrix, and vector. The conventional wisdom is that vector operations in R are as fast as C code, but he found that they were only as fast as <em>poorly written</em> C code (too many copies); 7% as fast as hand-tuned C code. So he was able to get 60x speed-up through a combination of better C code and multi-core use.</p>
404
405<p>Tim Hesterberg from Google gave an invited talk including speeding up dataframes&hellip; reducing the number of copies from 8 to 3 in some common operations. He mentioned a few ways to find out what to optimize: tracemem, Rprofmem, &ndash;enable-memory-profiling</p>
406
407</body>
408
409</html>
410