HeronStatsPlugins: rgate_api_tour.html

File rgate_api_tour.html, 36.1 KB (added by dconnolly, 5 years ago)

rgate API tour: use - rather than | in concept codes to avoid issues with markdown tables

Line 
1<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
2"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
3<!-- saved from url=(0014)about:internet -->
4<html>
5<head>
6<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
7
8<title>A tour of the rgate API</title>
9
10<base target="_blank"/>
11
12<style type="text/css">
13body, td {
14   font-family: sans-serif;
15   background-color: white;
16   font-size: 12px;
17   margin: 8px;
18}
19
20tt, code, pre {
21   font-family: 'DejaVu Sans Mono', 'Droid Sans Mono', 'Lucida Console', Consolas, Monaco, monospace;
22}
23
24h1 {
25   font-size:2.2em;
26}
27
28h2 {
29   font-size:1.8em;
30}
31
32h3 {
33   font-size:1.4em;
34}
35
36h4 {
37   font-size:1.0em;
38}
39
40h5 {
41   font-size:0.9em;
42}
43
44h6 {
45   font-size:0.8em;
46}
47
48a:visited {
49   color: rgb(50%, 0%, 50%);
50}
51
52pre {   
53   margin-top: 0;
54   max-width: 95%;
55   border: 1px solid #ccc;
56}
57
58pre code {
59   display: block; padding: 0.5em;
60}
61
62code.r {
63   background-color: #F8F8F8;
64}
65
66table, td, th {
67  border: none;
68}
69
70blockquote {
71   color:#666666;
72   margin:0;
73   padding-left: 1em;
74   border-left: 0.5em #EEE solid;
75}
76
77hr {
78   height: 0px;
79   border-bottom: none;
80   border-top-width: thin;
81   border-top-style: dotted;
82   border-top-color: #999999;
83}
84
85@media print {
86   * {
87      background: transparent !important;
88      color: black !important;
89      filter:none !important;
90      -ms-filter: none !important;
91   }
92
93   body {
94      font-size:12pt;
95      max-width:100%;
96   }
97       
98   a, a:visited {
99      text-decoration: underline;
100   }
101
102   hr {
103      visibility: hidden;
104      page-break-before: always;
105   }
106
107   pre, blockquote {
108      padding-right: 1em;
109      page-break-inside: avoid;
110   }
111
112   tr, img {
113      page-break-inside: avoid;
114   }
115
116   img {
117      max-width: 100% !important;
118   }
119
120   @page :left {
121      margin: 15mm 20mm 15mm 10mm;
122   }
123     
124   @page :right {
125      margin: 15mm 10mm 15mm 20mm;
126   }
127
128   p, h2, h3 {
129      orphans: 3; widows: 3;
130   }
131
132   h2, h3 {
133      page-break-after: avoid;
134   }
135}
136
137</style>
138
139<!-- Styles for R syntax highlighter -->
140<style type="text/css">
141   pre .operator,
142   pre .paren {
143     color: rgb(104, 118, 135)
144   }
145
146   pre .literal {
147     color: rgb(88, 72, 246)
148   }
149
150   pre .number {
151     color: rgb(0, 0, 205);
152   }
153
154   pre .comment {
155     color: rgb(76, 136, 107);
156   }
157
158   pre .keyword {
159     color: rgb(0, 0, 255);
160   }
161
162   pre .identifier {
163     color: rgb(0, 0, 0);
164   }
165
166   pre .string {
167     color: rgb(3, 106, 7);
168   }
169</style>
170
171<!-- R syntax highlighter -->
172<script type="text/javascript">
173var hljs=new function(){function m(p){return p.replace(/&/gm,"&amp;").replace(/</gm,"&lt;")}function f(r,q,p){return RegExp(q,"m"+(r.cI?"i":"")+(p?"g":""))}function b(r){for(var p=0;p<r.childNodes.length;p++){var q=r.childNodes[p];if(q.nodeName=="CODE"){return q}if(!(q.nodeType==3&&q.nodeValue.match(/\s+/))){break}}}function h(t,s){var p="";for(var r=0;r<t.childNodes.length;r++){if(t.childNodes[r].nodeType==3){var q=t.childNodes[r].nodeValue;if(s){q=q.replace(/\n/g,"")}p+=q}else{if(t.childNodes[r].nodeName=="BR"){p+="\n"}else{p+=h(t.childNodes[r])}}}if(/MSIE [678]/.test(navigator.userAgent)){p=p.replace(/\r/g,"\n")}return p}function a(s){var r=s.className.split(/\s+/);r=r.concat(s.parentNode.className.split(/\s+/));for(var q=0;q<r.length;q++){var p=r[q].replace(/^language-/,"");if(e[p]){return p}}}function c(q){var p=[];(function(s,t){for(var r=0;r<s.childNodes.length;r++){if(s.childNodes[r].nodeType==3){t+=s.childNodes[r].nodeValue.length}else{if(s.childNodes[r].nodeName=="BR"){t+=1}else{if(s.childNodes[r].nodeType==1){p.push({event:"start",offset:t,node:s.childNodes[r]});t=arguments.callee(s.childNodes[r],t);p.push({event:"stop",offset:t,node:s.childNodes[r]})}}}}return t})(q,0);return p}function k(y,w,x){var q=0;var z="";var s=[];function u(){if(y.length&&w.length){if(y[0].offset!=w[0].offset){return(y[0].offset<w[0].offset)?y:w}else{return w[0].event=="start"?y:w}}else{return y.length?y:w}}function t(D){var A="<"+D.nodeName.toLowerCase();for(var B=0;B<D.attributes.length;B++){var C=D.attributes[B];A+=" "+C.nodeName.toLowerCase();if(C.value!==undefined&&C.value!==false&&C.value!==null){A+='="'+m(C.value)+'"'}}return A+">"}while(y.length||w.length){var v=u().splice(0,1)[0];z+=m(x.substr(q,v.offset-q));q=v.offset;if(v.event=="start"){z+=t(v.node);s.push(v.node)}else{if(v.event=="stop"){var p,r=s.length;do{r--;p=s[r];z+=("</"+p.nodeName.toLowerCase()+">")}while(p!=v.node);s.splice(r,1);while(r<s.length){z+=t(s[r]);r++}}}}return z+m(x.substr(q))}function j(){function q(x,y,v){if(x.compiled){return}var u;var s=[];if(x.k){x.lR=f(y,x.l||hljs.IR,true);for(var w in x.k){if(!x.k.hasOwnProperty(w)){continue}if(x.k[w] instanceof Object){u=x.k[w]}else{u=x.k;w="keyword"}for(var r in u){if(!u.hasOwnProperty(r)){continue}x.k[r]=[w,u[r]];s.push(r)}}}if(!v){if(x.bWK){x.b="\\b("+s.join("|")+")\\s"}x.bR=f(y,x.b?x.b:"\\B|\\b");if(!x.e&&!x.eW){x.e="\\B|\\b"}if(x.e){x.eR=f(y,x.e)}}if(x.i){x.iR=f(y,x.i)}if(x.r===undefined){x.r=1}if(!x.c){x.c=[]}x.compiled=true;for(var t=0;t<x.c.length;t++){if(x.c[t]=="self"){x.c[t]=x}q(x.c[t],y,false)}if(x.starts){q(x.starts,y,false)}}for(var p in e){if(!e.hasOwnProperty(p)){continue}q(e[p].dM,e[p],true)}}function d(B,C){if(!j.called){j();j.called=true}function q(r,M){for(var L=0;L<M.c.length;L++){if((M.c[L].bR.exec(r)||[null])[0]==r){return M.c[L]}}}function v(L,r){if(D[L].e&&D[L].eR.test(r)){return 1}if(D[L].eW){var M=v(L-1,r);return M?M+1:0}return 0}function w(r,L){return L.i&&L.iR.test(r)}function K(N,O){var M=[];for(var L=0;L<N.c.length;L++){M.push(N.c[L].b)}var r=D.length-1;do{if(D[r].e){M.push(D[r].e)}r--}while(D[r+1].eW);if(N.i){M.push(N.i)}return f(O,M.join("|"),true)}function p(M,L){var N=D[D.length-1];if(!N.t){N.t=K(N,E)}N.t.lastIndex=L;var r=N.t.exec(M);return r?[M.substr(L,r.index-L),r[0],false]:[M.substr(L),"",true]}function z(N,r){var L=E.cI?r[0].toLowerCase():r[0];var M=N.k[L];if(M&&M instanceof Array){return M}return false}function F(L,P){L=m(L);if(!P.k){return L}var r="";var O=0;P.lR.lastIndex=0;var M=P.lR.exec(L);while(M){r+=L.substr(O,M.index-O);var N=z(P,M);if(N){x+=N[1];r+='<span class="'+N[0]+'">'+M[0]+"</span>"}else{r+=M[0]}O=P.lR.lastIndex;M=P.lR.exec(L)}return r+L.substr(O,L.length-O)}function J(L,M){if(M.sL&&e[M.sL]){var r=d(M.sL,L);x+=r.keyword_count;return r.value}else{return F(L,M)}}function I(M,r){var L=M.cN?'<span class="'+M.cN+'">':"";if(M.rB){y+=L;M.buffer=""}else{if(M.eB){y+=m(r)+L;M.buffer=""}else{y+=L;M.buffer=r}}D.push(M);A+=M.r}function G(N,M,Q){var R=D[D.length-1];if(Q){y+=J(R.buffer+N,R);return false}var P=q(M,R);if(P){y+=J(R.buffer+N,R);I(P,M);return P.rB}var L=v(D.length-1,M);if(L){var O=R.cN?"</span>":"";if(R.rE){y+=J(R.buffer+N,R)+O}else{if(R.eE){y+=J(R.buffer+N,R)+O+m(M)}else{y+=J(R.buffer+N+M,R)+O}}while(L>1){O=D[D.length-2].cN?"</span>":"";y+=O;L--;D.length--}var r=D[D.length-1];D.length--;D[D.length-1].buffer="";if(r.starts){I(r.starts,"")}return R.rE}if(w(M,R)){throw"Illegal"}}var E=e[B];var D=[E.dM];var A=0;var x=0;var y="";try{var s,u=0;E.dM.buffer="";do{s=p(C,u);var t=G(s[0],s[1],s[2]);u+=s[0].length;if(!t){u+=s[1].length}}while(!s[2]);if(D.length>1){throw"Illegal"}return{r:A,keyword_count:x,value:y}}catch(H){if(H=="Illegal"){return{r:0,keyword_count:0,value:m(C)}}else{throw H}}}function g(t){var p={keyword_count:0,r:0,value:m(t)};var r=p;for(var q in e){if(!e.hasOwnProperty(q)){continue}var s=d(q,t);s.language=q;if(s.keyword_count+s.r>r.keyword_count+r.r){r=s}if(s.keyword_count+s.r>p.keyword_count+p.r){r=p;p=s}}if(r.language){p.second_best=r}return p}function i(r,q,p){if(q){r=r.replace(/^((<[^>]+>|\t)+)/gm,function(t,w,v,u){return w.replace(/\t/g,q)})}if(p){r=r.replace(/\n/g,"<br>")}return r}function n(t,w,r){var x=h(t,r);var v=a(t);var y,s;if(v){y=d(v,x)}else{return}var q=c(t);if(q.length){s=document.createElement("pre");s.innerHTML=y.value;y.value=k(q,c(s),x)}y.value=i(y.value,w,r);var u=t.className;if(!u.match("(\\s|^)(language-)?"+v+"(\\s|$)")){u=u?(u+" "+v):v}if(/MSIE [678]/.test(navigator.userAgent)&&t.tagName=="CODE"&&t.parentNode.tagName=="PRE"){s=t.parentNode;var p=document.createElement("div");p.innerHTML="<pre><code>"+y.value+"</code></pre>";t=p.firstChild.firstChild;p.firstChild.cN=s.cN;s.parentNode.replaceChild(p.firstChild,s)}else{t.innerHTML=y.value}t.className=u;t.result={language:v,kw:y.keyword_count,re:y.r};if(y.second_best){t.second_best={language:y.second_best.language,kw:y.second_best.keyword_count,re:y.second_best.r}}}function o(){if(o.called){return}o.called=true;var r=document.getElementsByTagName("pre");for(var p=0;p<r.length;p++){var q=b(r[p]);if(q){n(q,hljs.tabReplace)}}}function l(){if(window.addEventListener){window.addEventListener("DOMContentLoaded",o,false);window.addEventListener("load",o,false)}else{if(window.attachEvent){window.attachEvent("onload",o)}else{window.onload=o}}}var e={};this.LANGUAGES=e;this.highlight=d;this.highlightAuto=g;this.fixMarkup=i;this.highlightBlock=n;this.initHighlighting=o;this.initHighlightingOnLoad=l;this.IR="[a-zA-Z][a-zA-Z0-9_]*";this.UIR="[a-zA-Z_][a-zA-Z0-9_]*";this.NR="\\b\\d+(\\.\\d+)?";this.CNR="\\b(0[xX][a-fA-F0-9]+|(\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)";this.BNR="\\b(0b[01]+)";this.RSR="!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|\\.|-|-=|/|/=|:|;|<|<<|<<=|<=|=|==|===|>|>=|>>|>>=|>>>|>>>=|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~";this.ER="(?![\\s\\S])";this.BE={b:"\\\\.",r:0};this.ASM={cN:"string",b:"'",e:"'",i:"\\n",c:[this.BE],r:0};this.QSM={cN:"string",b:'"',e:'"',i:"\\n",c:[this.BE],r:0};this.CLCM={cN:"comment",b:"//",e:"$"};this.CBLCLM={cN:"comment",b:"/\\*",e:"\\*/"};this.HCM={cN:"comment",b:"#",e:"$"};this.NM={cN:"number",b:this.NR,r:0};this.CNM={cN:"number",b:this.CNR,r:0};this.BNM={cN:"number",b:this.BNR,r:0};this.inherit=function(r,s){var p={};for(var q in r){p[q]=r[q]}if(s){for(var q in s){p[q]=s[q]}}return p}}();hljs.LANGUAGES.cpp=function(){var a={keyword:{"false":1,"int":1,"float":1,"while":1,"private":1,"char":1,"catch":1,"export":1,virtual:1,operator:2,sizeof:2,dynamic_cast:2,typedef:2,const_cast:2,"const":1,struct:1,"for":1,static_cast:2,union:1,namespace:1,unsigned:1,"long":1,"throw":1,"volatile":2,"static":1,"protected":1,bool:1,template:1,mutable:1,"if":1,"public":1,friend:2,"do":1,"return":1,"goto":1,auto:1,"void":2,"enum":1,"else":1,"break":1,"new":1,extern:1,using:1,"true":1,"class":1,asm:1,"case":1,typeid:1,"short":1,reinterpret_cast:2,"default":1,"double":1,register:1,explicit:1,signed:1,typename:1,"try":1,"this":1,"switch":1,"continue":1,wchar_t:1,inline:1,"delete":1,alignof:1,char16_t:1,char32_t:1,constexpr:1,decltype:1,noexcept:1,nullptr:1,static_assert:1,thread_local:1,restrict:1,_Bool:1,complex:1},built_in:{std:1,string:1,cin:1,cout:1,cerr:1,clog:1,stringstream:1,istringstream:1,ostringstream:1,auto_ptr:1,deque:1,list:1,queue:1,stack:1,vector:1,map:1,set:1,bitset:1,multiset:1,multimap:1,unordered_set:1,unordered_map:1,unordered_multiset:1,unordered_multimap:1,array:1,shared_ptr:1}};return{dM:{k:a,i:"</",c:[hljs.CLCM,hljs.CBLCLM,hljs.QSM,{cN:"string",b:"'\\\\?.",e:"'",i:"."},{cN:"number",b:"\\b(\\d+(\\.\\d*)?|\\.\\d+)(u|U|l|L|ul|UL|f|F)"},hljs.CNM,{cN:"preprocessor",b:"#",e:"$"},{cN:"stl_container",b:"\\b(deque|list|queue|stack|vector|map|set|bitset|multiset|multimap|unordered_map|unordered_set|unordered_multiset|unordered_multimap|array)\\s*<",e:">",k:a,r:10,c:["self"]}]}}}();hljs.LANGUAGES.r={dM:{c:[hljs.HCM,{cN:"number",b:"\\b0[xX][0-9a-fA-F]+[Li]?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+(?:[eE][+\\-]?\\d*)?L\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\b\\d+\\.(?!\\d)(?:i\\b)?",e:hljs.IMMEDIATE_RE,r:1},{cN:"number",b:"\\b\\d+(?:\\.\\d*)?(?:[eE][+\\-]?\\d*)?i?\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"number",b:"\\.\\d+(?:[eE][+\\-]?\\d*)?i?\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"keyword",b:"(?:tryCatch|library|setGeneric|setGroupGeneric)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\.",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\.\\.\\d+(?![\\w.])",e:hljs.IMMEDIATE_RE,r:10},{cN:"keyword",b:"\\b(?:function)",e:hljs.IMMEDIATE_RE,r:2},{cN:"keyword",b:"(?:if|in|break|next|repeat|else|for|return|switch|while|try|stop|warning|require|attach|detach|source|setMethod|setClass)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"literal",b:"(?:NA|NA_integer_|NA_real_|NA_character_|NA_complex_)\\b",e:hljs.IMMEDIATE_RE,r:10},{cN:"literal",b:"(?:NULL|TRUE|FALSE|T|F|Inf|NaN)\\b",e:hljs.IMMEDIATE_RE,r:1},{cN:"identifier",b:"[a-zA-Z.][a-zA-Z0-9._]*\\b",e:hljs.IMMEDIATE_RE,r:0},{cN:"operator",b:"<\\-(?!\\s*\\d)",e:hljs.IMMEDIATE_RE,r:2},{cN:"operator",b:"\\->|<\\-",e:hljs.IMMEDIATE_RE,r:1},{cN:"operator",b:"%%|~",e:hljs.IMMEDIATE_RE},{cN:"operator",b:">=|<=|==|!=|\\|\\||&&|=|\\+|\\-|\\*|/|\\^|>|<|!|&|\\||\\$|:",e:hljs.IMMEDIATE_RE,r:0},{cN:"operator",b:"%",e:"%",i:"\\n",r:1},{cN:"identifier",b:"`",e:"`",r:0},{cN:"string",b:'"',e:'"',c:[hljs.BE],r:0},{cN:"string",b:"'",e:"'",c:[hljs.BE],r:0},{cN:"paren",b:"[[({\\])}]",e:hljs.IMMEDIATE_RE,r:0}]}};
174hljs.initHighlightingOnLoad();
175</script>
176
177
178
179
180</head>
181
182<body>
183<h1>A tour of the rgate API</h1>
184
185<p>by Dan Connolly, <a href="http://informatics.kumc.edu/">KUMC Informatics</a></p>
186
187<p><em>For contenxt, see <a href="http://informatics.kumc.edu/work/wiki/HeronStatsPlugins">HeronStatsPlugins</a>.</em></p>
188
189<p>The <code>rgate</code> back-end integrates R scripts such as <code>km_analysis.R</code> with i2b2 plug-ins. The <code>km_analysis_test.R</code> script simulates the rgate calling environment:</p>
190
191<pre><code class="r">source(&quot;km_analysis.R&quot;)
192source(&quot;km_analysis_test.R&quot;)
193</code></pre>
194
195<h2>Querying Patient Sets</h2>
196
197<p>The core of the interface is an R object that provides access to data about a patient set. Suppose we have a patient set with 30 patients. Since one patient may have multiple cases or no cases in the tumor registry, let&#39;s suppose these patients have a total of 35 cancer cases.</p>
198
199<pre><code class="r">pset &lt;- mock.patients(n.patient = 30, n.case = 35)
200class(pset)
201</code></pre>
202
203<pre><code>## [1] &quot;patients&quot;
204</code></pre>
205
206<pre><code class="r">pset$id
207</code></pre>
208
209<pre><code>## [1] 123
210</code></pre>
211
212<h3>The I2B2 Star Schema</h3>
213
214<p>I2B2 integrates all data about patients into an <code>observation_fact</code> table. Each observation has a <code>concept_cd</code> that is related to any number of <code>concept_path</code>s. For example, the code <code>SEER_SITE:32010</code> is related to</p>
215
216<ul>
217<li><code>\\i2b2\i2b2\naaccr\SEER Site\Endocrine System\Thyroid\</code></li>
218<li><code>\\i2b2\i2b2\naaccr\SEER Site\Endocrine System\</code></li>
219<li><code>\\i2b2\i2b2\naaccr\SEER Site\</code></li>
220<li>etc.</li>
221</ul>
222
223<p>The <code>observations</code> method on a patient set takes a vector of concept paths and returns the relevant facts as an R dataframe:</p>
224
225<pre><code class="r">survival.paths &lt;- mock.paths()
226
227obs.db &lt;- observations(pset, c(survival.paths$event))
228markdown.df(obs.db)
229</code></pre>
230
231<table><thead>
232<tr>
233<th>PATIENT_NUM</th>
234<th>START_DATE</th>
235<th>CONCEPT_CD</th>
236<th>NAME_CHAR</th>
237<th>PANEL</th>
238</tr>
239</thead><tbody>
240<tr>
241<td>22</td>
242<td>1980-04-21 11:41:31</td>
243<td>MOCK-VITAL:y</td>
244<td>Deceased</td>
245<td>\i2b2\naaccr\S:4 Follow-up/Recurrence/Death\1760 Vital Status\0 Dead (CoC)\</td>
246</tr>
247<tr>
248<td>26</td>
249<td>1980-04-02 21:59:54</td>
250<td>MOCK-VITAL:y</td>
251<td>Deceased</td>
252<td>\i2b2\naaccr\S:4 Follow-up/Recurrence/Death\1760 Vital Status\0 Dead (CoC)\</td>
253</tr>
254<tr>
255<td>18</td>
256<td>1980-02-04 22:45:44</td>
257<td>MOCK-VITAL:y</td>
258<td>Deceased</td>
259<td>\i2b2\naaccr\S:4 Follow-up/Recurrence/Death\1760 Vital Status\0 Dead (CoC)\</td>
260</tr>
261<tr>
262<td>11</td>
263<td>1981-02-20 22:41:56</td>
264<td>MOCK-VITAL:y</td>
265<td>Deceased</td>
266<td>\i2b2\naaccr\S:4 Follow-up/Recurrence/Death\1760 Vital Status\0 Dead (CoC)\</td>
267</tr>
268<tr>
269<td>15</td>
270<td>1980-06-03 01:13:07</td>
271<td>MOCK-VITAL:y</td>
272<td>Deceased</td>
273<td>\i2b2\naaccr\S:4 Follow-up/Recurrence/Death\1760 Vital Status\0 Dead (CoC)\</td>
274</tr>
275</tbody></table>
276
277<h2>I2B2 Data Set for Survival Analysis</h2>
278
279<p>Those <code>survival.paths</code> are a mock-up of the paths we need for survival analysis:</p>
280
281<pre><code class="r">survival.paths
282</code></pre>
283
284<pre><code>## $start
285## [1] &quot;\\i2b2\\naaccr\\S:1 Cancer Identification\\0390 Date of Diagnosis\\&quot;
286##
287## $end
288## [1] &quot;\\i2b2\\naaccr\\S:4 Follow-up/Recurrence/Death\\1750 Date of Last Contact\\&quot;
289##
290## $event
291## [1] &quot;\\i2b2\\naaccr\\S:4 Follow-up/Recurrence/Death\\1760 Vital Status\\0 Dead (CoC)\\&quot;
292##
293## $stratum
294## [1] &quot;\\i2b2\\naaccr\\S:1 Cancer Identification\\0440 Grade\\&quot;
295##
296</code></pre>
297
298<p>To plot a survival curve, we need, for each patient/subject:</p>
299
300<ul>
301<li>a time duration (computed from <code>$start</code> and <code>$end</code> observations)</li>
302<li>a status (computed from <code>$event</code> observations)</li>
303<li>a stratum (computed from <code>$stratum</code> observations)</li>
304</ul>
305
306<p>So the data set from the database looks like this:</p>
307
308<pre><code class="r">obs.db &lt;- observations(pset, unlist(survival.paths))
309markdown.df(head(obs.db, 20))
310</code></pre>
311
312<table><thead>
313<tr>
314<th>PATIENT_NUM</th>
315<th>START_DATE</th>
316<th>CONCEPT_CD</th>
317<th>NAME_CHAR</th>
318<th>PANEL</th>
319</tr>
320</thead><tbody>
321<tr>
322<td>3</td>
323<td>1982-07-17 19:46:08</td>
324<td>MOCK-GRADE:2</td>
325<td>2</td>
326<td>\i2b2\naaccr\S:1 Cancer Identification\0440 Grade\</td>
327</tr>
328<tr>
329<td>26</td>
330<td>1981-10-16 11:43:38</td>
331<td>MOCK-GRADE:3</td>
332<td>3</td>
333<td>\i2b2\naaccr\S:1 Cancer Identification\0440 Grade\</td>
334</tr>
335<tr>
336<td>15</td>
337<td>1982-06-06 18:20:56</td>
338<td>MOCK-GRADE:4</td>
339<td>4</td>
340<td>\i2b2\naaccr\S:1 Cancer Identification\0440 Grade\</td>
341</tr>
342<tr>
343<td>17</td>
344<td>1983-09-09 07:34:35</td>
345<td>MOCK:end</td>
346<td>Last Contact</td>
347<td>\i2b2\naaccr\S:4 Follow-up/Recurrence/Death\1750 Date of Last Contact\</td>
348</tr>
349<tr>
350<td>26</td>
351<td>1981-10-16 11:43:38</td>
352<td>MOCK:start</td>
353<td>Diagnosed</td>
354<td>\i2b2\naaccr\S:1 Cancer Identification\0390 Date of Diagnosis\</td>
355</tr>
356<tr>
357<td>3</td>
358<td>1985-06-30 01:09:07</td>
359<td>MOCK:end</td>
360<td>Last Contact</td>
361<td>\i2b2\naaccr\S:4 Follow-up/Recurrence/Death\1750 Date of Last Contact\</td>
362</tr>
363<tr>
364<td>20</td>
365<td>1985-06-26 13:54:18</td>
366<td>MOCK:end</td>
367<td>Last Contact</td>
368<td>\i2b2\naaccr\S:4 Follow-up/Recurrence/Death\1750 Date of Last Contact\</td>
369</tr>
370<tr>
371<td>6</td>
372<td>1982-05-04 07:15:49</td>
373<td>MOCK-GRADE:3</td>
374<td>3</td>
375<td>\i2b2\naaccr\S:1 Cancer Identification\0440 Grade\</td>
376</tr>
377<tr>
378<td>23</td>
379<td>1980-04-21 11:41:31</td>
380<td>MOCK:start</td>
381<td>Diagnosed</td>
382<td>\i2b2\naaccr\S:1 Cancer Identification\0390 Date of Diagnosis\</td>
383</tr>
384<tr>
385<td>23</td>
386<td>1980-04-21 11:41:31</td>
387<td>MOCK-GRADE:4</td>
388<td>4</td>
389<td>\i2b2\naaccr\S:1 Cancer Identification\0440 Grade\</td>
390</tr>
391<tr>
392<td>21</td>
393<td>1982-10-23 18:22:51</td>
394<td>MOCK:start</td>
395<td>Diagnosed</td>
396<td>\i2b2\naaccr\S:1 Cancer Identification\0390 Date of Diagnosis\</td>
397</tr>
398<tr>
399<td>19</td>
400<td>1980-04-02 21:59:54</td>
401<td>MOCK:start</td>
402<td>Diagnosed</td>
403<td>\i2b2\naaccr\S:1 Cancer Identification\0390 Date of Diagnosis\</td>
404</tr>
405<tr>
406<td>4</td>
407<td>1983-06-18 22:04:55</td>
408<td>MOCK:end</td>
409<td>Last Contact</td>
410<td>\i2b2\naaccr\S:4 Follow-up/Recurrence/Death\1750 Date of Last Contact\</td>
411</tr>
412<tr>
413<td>16</td>
414<td>1981-06-28 04:33:57</td>
415<td>MOCK-GRADE:2</td>
416<td>2</td>
417<td>\i2b2\naaccr\S:1 Cancer Identification\0440 Grade\</td>
418</tr>
419<tr>
420<td>22</td>
421<td>1980-02-04 22:45:44</td>
422<td>MOCK:start</td>
423<td>Diagnosed</td>
424<td>\i2b2\naaccr\S:1 Cancer Identification\0390 Date of Diagnosis\</td>
425</tr>
426<tr>
427<td>21</td>
428<td>1984-05-07 18:40:37</td>
429<td>MOCK:end</td>
430<td>Last Contact</td>
431<td>\i2b2\naaccr\S:4 Follow-up/Recurrence/Death\1750 Date of Last Contact\</td>
432</tr>
433<tr>
434<td>18</td>
435<td>1981-08-12 01:27:31</td>
436<td>MOCK-GRADE:4</td>
437<td>4</td>
438<td>\i2b2\naaccr\S:1 Cancer Identification\0440 Grade\</td>
439</tr>
440<tr>
441<td>22</td>
442<td>1980-04-21 11:41:31</td>
443<td>MOCK-VITAL:y</td>
444<td>Deceased</td>
445<td>\i2b2\naaccr\S:4 Follow-up/Recurrence/Death\1760 Vital Status\0 Dead (CoC)\</td>
446</tr>
447<tr>
448<td>8</td>
449<td>1982-03-31 17:53:58</td>
450<td>MOCK:start</td>
451<td>Diagnosed</td>
452<td>\i2b2\naaccr\S:1 Cancer Identification\0390 Date of Diagnosis\</td>
453</tr>
454<tr>
455<td>9</td>
456<td>1980-06-03 01:13:07</td>
457<td>MOCK-GRADE:3</td>
458<td>3</td>
459<td>\i2b2\naaccr\S:1 Cancer Identification\0440 Grade\</td>
460</tr>
461</tbody></table>
462
463<p>It starts to make sense as a whole if we sort it and prune dates and paths:</p>
464
465<pre><code class="r">obs.display &lt;- function(obs) {
466    sorted &lt;- obs[order(obs$PATIENT_NUM, obs$PANEL, obs$CONCEPT_CD), ]
467    fix.types &lt;- transform(sorted, START_DATE = substr(START_DATE, 1, 10), PANEL = as.character(PANEL))
468    transform(fix.types, PANEL = paste(&quot;...&quot;, substr(PANEL, nchar(PANEL) - 10 +
469        1, nchar(PANEL))))
470}
471markdown.df(head(obs.display(obs.db), 30))
472</code></pre>
473
474<table><thead>
475<tr>
476<th>PATIENT_NUM</th>
477<th>START_DATE</th>
478<th>CONCEPT_CD</th>
479<th>NAME_CHAR</th>
480<th>PANEL</th>
481</tr>
482</thead><tbody>
483<tr>
484<td>3</td>
485<td>1982-07-17</td>
486<td>MOCK:start</td>
487<td>Diagnosed</td>
488<td>&hellip; Diagnosis\</td>
489</tr>
490<tr>
491<td>3</td>
492<td>1981-09-02</td>
493<td>MOCK:start</td>
494<td>Diagnosed</td>
495<td>&hellip; Diagnosis\</td>
496</tr>
497<tr>
498<td>3</td>
499<td>1982-07-17</td>
500<td>MOCK-GRADE:2</td>
501<td>2</td>
502<td>&hellip; 440 Grade\</td>
503</tr>
504<tr>
505<td>3</td>
506<td>1981-09-02</td>
507<td>MOCK-GRADE:4</td>
508<td>4</td>
509<td>&hellip; 440 Grade\</td>
510</tr>
511<tr>
512<td>3</td>
513<td>1985-06-30</td>
514<td>MOCK:end</td>
515<td>Last Contact</td>
516<td>&hellip; t Contact\</td>
517</tr>
518<tr>
519<td>3</td>
520<td>1984-02-10</td>
521<td>MOCK:end</td>
522<td>Last Contact</td>
523<td>&hellip; t Contact\</td>
524</tr>
525<tr>
526<td>4</td>
527<td>1981-12-07</td>
528<td>MOCK:start</td>
529<td>Diagnosed</td>
530<td>&hellip; Diagnosis\</td>
531</tr>
532<tr>
533<td>4</td>
534<td>1981-02-20</td>
535<td>MOCK:start</td>
536<td>Diagnosed</td>
537<td>&hellip; Diagnosis\</td>
538</tr>
539<tr>
540<td>4</td>
541<td>1981-12-07</td>
542<td>MOCK-GRADE:1</td>
543<td>1</td>
544<td>&hellip; 440 Grade\</td>
545</tr>
546<tr>
547<td>4</td>
548<td>1981-02-20</td>
549<td>MOCK-GRADE:4</td>
550<td>4</td>
551<td>&hellip; 440 Grade\</td>
552</tr>
553<tr>
554<td>4</td>
555<td>1983-06-18</td>
556<td>MOCK:end</td>
557<td>Last Contact</td>
558<td>&hellip; t Contact\</td>
559</tr>
560<tr>
561<td>4</td>
562<td>1984-01-04</td>
563<td>MOCK:end</td>
564<td>Last Contact</td>
565<td>&hellip; t Contact\</td>
566</tr>
567<tr>
568<td>5</td>
569<td>1982-07-01</td>
570<td>MOCK:start</td>
571<td>Diagnosed</td>
572<td>&hellip; Diagnosis\</td>
573</tr>
574<tr>
575<td>5</td>
576<td>1982-07-01</td>
577<td>MOCK-GRADE:4</td>
578<td>4</td>
579<td>&hellip; 440 Grade\</td>
580</tr>
581<tr>
582<td>5</td>
583<td>1985-05-16</td>
584<td>MOCK:end</td>
585<td>Last Contact</td>
586<td>&hellip; t Contact\</td>
587</tr>
588<tr>
589<td>6</td>
590<td>1982-05-04</td>
591<td>MOCK:start</td>
592<td>Diagnosed</td>
593<td>&hellip; Diagnosis\</td>
594</tr>
595<tr>
596<td>6</td>
597<td>1982-05-04</td>
598<td>MOCK-GRADE:3</td>
599<td>3</td>
600<td>&hellip; 440 Grade\</td>
601</tr>
602<tr>
603<td>6</td>
604<td>1984-10-01</td>
605<td>MOCK:end</td>
606<td>Last Contact</td>
607<td>&hellip; t Contact\</td>
608</tr>
609<tr>
610<td>8</td>
611<td>1982-03-31</td>
612<td>MOCK:start</td>
613<td>Diagnosed</td>
614<td>&hellip; Diagnosis\</td>
615</tr>
616<tr>
617<td>8</td>
618<td>1982-03-31</td>
619<td>MOCK-GRADE:1</td>
620<td>1</td>
621<td>&hellip; 440 Grade\</td>
622</tr>
623<tr>
624<td>8</td>
625<td>1985-11-23</td>
626<td>MOCK:end</td>
627<td>Last Contact</td>
628<td>&hellip; t Contact\</td>
629</tr>
630<tr>
631<td>9</td>
632<td>1981-06-20</td>
633<td>MOCK:start</td>
634<td>Diagnosed</td>
635<td>&hellip; Diagnosis\</td>
636</tr>
637<tr>
638<td>9</td>
639<td>1980-06-03</td>
640<td>MOCK:start</td>
641<td>Diagnosed</td>
642<td>&hellip; Diagnosis\</td>
643</tr>
644<tr>
645<td>9</td>
646<td>1981-06-12</td>
647<td>MOCK:start</td>
648<td>Diagnosed</td>
649<td>&hellip; Diagnosis\</td>
650</tr>
651<tr>
652<td>9</td>
653<td>1980-11-06</td>
654<td>MOCK:start</td>
655<td>Diagnosed</td>
656<td>&hellip; Diagnosis\</td>
657</tr>
658<tr>
659<td>9</td>
660<td>1980-11-06</td>
661<td>MOCK-GRADE:2</td>
662<td>2</td>
663<td>&hellip; 440 Grade\</td>
664</tr>
665<tr>
666<td>9</td>
667<td>1980-06-03</td>
668<td>MOCK-GRADE:3</td>
669<td>3</td>
670<td>&hellip; 440 Grade\</td>
671</tr>
672<tr>
673<td>9</td>
674<td>1981-06-12</td>
675<td>MOCK-GRADE:3</td>
676<td>3</td>
677<td>&hellip; 440 Grade\</td>
678</tr>
679<tr>
680<td>9</td>
681<td>1981-06-20</td>
682<td>MOCK-GRADE:4</td>
683<td>4</td>
684<td>&hellip; 440 Grade\</td>
685</tr>
686<tr>
687<td>9</td>
688<td>1985-01-11</td>
689<td>MOCK:end</td>
690<td>Last Contact</td>
691<td>&hellip; t Contact\</td>
692</tr>
693</tbody></table>
694
695<h2>Pivoting Entity-Attribute-Value (EAV) Data</h2>
696
697<p>This entity-attribute-value (<a href="http://en.wikipedia.org/wiki/Entity%E2%80%93attribute%E2%80%93value_model">EAV</a>) structure is a bit awkward to deal with, so our <code>km_analysis.R</code> script includes some routines to pivot columns:</p>
698
699<pre><code class="r">obs.t0 &lt;- obs.pivot.date(obs.db, survival.paths$start, &quot;t0&quot;, first.t = T)
700head(obs.t0)
701</code></pre>
702
703<pre><code>##    patient                  t0
704## 5       26 1981-10-16 11:43:38
705## 9       23 1980-04-21 11:41:31
706## 12      19 1980-04-02 21:59:54
707## 15      22 1980-02-04 22:45:44
708## 19       8 1982-03-31 17:53:58
709## 23      15 1980-02-16 12:18:43
710</code></pre>
711
712<pre><code class="r">obs.tend &lt;- obs.pivot.date(obs.db, survival.paths$end, &quot;tend&quot;, first.t = T)
713obs.t &lt;- data.frame(patient = obs.t0$patient, t = difftime(obs.tend$t, obs.t0$t))
714head(obs.t)
715</code></pre>
716
717<pre><code>##   patient           t
718## 1      26  692.8 days
719## 2      23 1892.1 days
720## 3      19 1172.0 days
721## 4      22 1531.1 days
722## 5       8 1010.1 days
723## 6      15 1454.8 days
724</code></pre>
725
726<pre><code class="r">obs.outcome &lt;- obs.pivot.logical(obs.db, survival.paths$event, &quot;outcome&quot;, first.t = T)
727head(obs.outcome)
728</code></pre>
729
730<pre><code>##    patient outcome
731## 18      22    TRUE
732## 36      26    TRUE
733## 57      18    TRUE
734## 91      11    TRUE
735## 95      15    TRUE
736</code></pre>
737
738<pre><code class="r">obs.stratum &lt;- obs.pivot.name(obs.db, survival.paths$stratum, &quot;stratum&quot;, first.t = T)
739head(obs.stratum)
740</code></pre>
741
742<pre><code>##    patient stratum
743## 2       26       3
744## 8        6       3
745## 10      23       4
746## 14      16       2
747## 17      18       4
748## 20       9       3
749</code></pre>
750
751<p>Note the use of <code>first.t=T</code> to select the earliest observation in the case of multiple cases for a patient.</p>
752
753<h2>A Survival Data Set</h2>
754
755<p>We can merge all the observations into one <code>data.frame</code>;<br/>
756note the use of <code>all.x=TRUE</code> a la an SQL left join:</p>
757
758<pre><code class="r">data &lt;- merge(merge(obs.t, obs.outcome, all.x = TRUE), obs.stratum, all.x = TRUE)
759data$outcome &lt;- ifelse(is.na(data$outcome), F, data$outcome)
760data$t &lt;- as.numeric(data$t)/365
761data
762</code></pre>
763
764<pre><code>##    patient      t outcome stratum
765## 1        3 3.0824   FALSE       4
766## 2        4 4.4925   FALSE       4
767## 3        5 0.6104   FALSE       4
768## 4        6 0.6618   FALSE       3
769## 5        8 2.7675   FALSE       1
770## 6        9 4.3296   FALSE       3
771## 7       10 1.1639   FALSE       3
772## 8       11 2.7948    TRUE       1
773## 9       12 2.5302   FALSE       4
774## 10      13 2.0049   FALSE       1
775## 11      14 1.8095   FALSE       2
776## 12      15 3.9857    TRUE       3
777## 13      16 1.7948   FALSE       2
778## 14      17 4.6085   FALSE       2
779## 15      18 2.3304    TRUE       4
780## 16      19 3.2109   FALSE       3
781## 17      20 5.3016   FALSE       2
782## 18      21 2.5090   FALSE       2
783## 19      22 4.1948    TRUE       3
784## 20      23 5.1837   FALSE       4
785## 21      26 1.8982    TRUE       3
786## 22      27 2.5882   FALSE       3
787## 23      28 5.0354   FALSE       2
788</code></pre>
789
790<h3>Survival Plot</h3>
791
792<p>The <a href="http://cran.r-project.org/web/packages/survival//index.html">R surival package</a> provides plotting support:</p>
793
794<pre><code class="r">library(survival)
795fit &lt;- survfit(Surv(data$t, data$outcome) ~ data$stratum, data)
796labels &lt;- sort(unique(data$stratum))
797colors &lt;- rainbow(length(labels))
798plot(fit, col = colors)
799</code></pre>
800
801<p><img src="" alt="plot of chunk survival_plot"/> </p>
802
803</body>
804
805</html>
806