Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/epic_labs_transform.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 12.2 KB
Line 
1/* EPIC to i2b2 Labs ETL: transform
2
3 * Copyright (c) 2013 University of Kansas Medical Center
4 * part of the HERON* open source codebase; see NOTICE file for license details.
5 * http://informatics.kumc.edu/work/wiki/HERON
6
7* Load the epic clinical lab details into i2b2.
8*/
9
10
11/*****************
12 * Labs
13 *
14 * TODO: parse >60 into nval_num=60, valueflag_cd=gt
15 *       likewise <0.2, < 4
16 *****************/
17
18/* Check that we're connected to an Epic CLARITY database.
19   */
20select pat_id from CLARITY.patient where 1 = 0;
21
22/*
23select count(distinct to_number(cop.order_type_c))
24from CLARITY.order_results cor
25  join CLARITY.order_proc cop on cor.order_proc_id = cop.order_proc_id;
26*/
27
28
29/* ===================
30 * Create Labs Summary
31 * ===================
32 * This is used multiple times to process certain lab values and identify
33 * the last lab value for a patient.
34 */
35whenever sqlerror continue;
36drop table clarity_lab_summary_tbl;
37whenever sqlerror exit;
38
39create table clarity_lab_summary_tbl as
40with lab_values as
41(select
42     cop.pat_enc_csn_id,
43     cor.pat_id,
44     cor.component_id,
45     (case when cor.result_time is null then cpe.contact_date
46           else cor.result_time
47     end) as result_time,
48     cor.order_proc_id,
49     cor.line,
50     cor.ord_num_value,
51     cor.ord_value,
52     cor.reference_unit,
53     cop.order_type_c,
54     reference_low,
55     reference_high
56
57from clarity.order_results cor
58  join clarity.order_proc cop on cor.order_proc_id = cop.order_proc_id
59  join clarity.pat_enc cpe on cpe.pat_enc_csn_id = cop.pat_enc_csn_id
60where cor.result_status_c = 3
61    -- select result_status_c from clarity.zc_result_status where abbr='Final'
62  and cop.order_type_c in (7, 61)
63    -- select order_type_c from clarity.zc_order_type where abbr in ('Lab', 'CONVERSION')
64  and cor.component_id in (select distinct componentid from component_whitelist)
65    --select components from the labs whitelist in the curated data
66  and cor.ord_num_value is not null -- filtering final might imply this.
67),
68
69cardio_values as
70(select
71     cop.pat_enc_csn_id,
72     cor.pat_id,
73     cor.component_id,
74     (case when cor.result_time is null then cpe.contact_date
75           else cor.result_time
76     end) as result_time,
77     cor.order_proc_id,
78     cor.line,
79     cor.ord_num_value,
80     cor.ord_value,
81     cor.reference_unit,
82     cop.order_type_c,
83     reference_low,
84     reference_high
85
86from clarity.order_results cor
87  join clarity.order_proc cop on cor.order_proc_id = cop.order_proc_id
88  join clarity.pat_enc cpe on cpe.pat_enc_csn_id = cop.pat_enc_csn_id
89where cop.order_type_c in (1785,29,5,1783,1781)
90   /*1785-Device Check; 29-Echo; 5-Imaging; 1783-Nuclear Cardiology; 1781-CV Peripheral Vascular*/
91  and cor.component_id in (select distinct componentid from component_whitelist)
92    --select components from the labs whitelist in the curated data
93  and cor.ord_num_value is not null -- filtering final might imply this.
94    /*Cardio orders in the order_results are the procedures that are performed
95    and are not the lab tests like order_type_c 7 and 61.
96    Therefore, result_status_c=3 will not work for Cardio order results
97    as there is only result and currently the result_status_c for these
98    tests not currently populated in epic.*/
99--select count(*) from clarity_lab_summary_tbl;
100),
101
102regex_const as
103(select '^\d+(\.\d{1,2})?$' numeric_w_d, '%-%' has_hyphen
104 -- '^\d+(\.\d{1,2})?$' looks for only numerics/decimals.
105 from dual
106),
107
108all_labs as
109(select * from lab_values
110 union all select * from cardio_values
111),
112
113/* Validate reference_low,reference_high values;Check for numeric/decimals and
114no '-' in reference_low and reference_high; */
115validate_low_high_ord as
116(select lv.*,
117        case when lv.reference_low is not null
118             and regexp_like(lv.reference_low, const.numeric_w_d)
119             and (lv.reference_low not like const.has_hyphen)
120        then 1 else 0 end valid_low,
121        case when
122             lv.reference_high is not null
123             and regexp_like(lv.reference_high, const.numeric_w_d)
124             and (lv.reference_high not like const.has_hyphen)
125        then 1 else 0 end valid_high,
126        case when lv.ord_num_value <> 9999999
127             and lv.ord_value is not null
128        then 1 else 0 end valid_ord
129 from (all_labs) lv,
130       regex_const const
131)
132
133select pat_enc_csn_id,
134       pat_id,
135       component_id,
136       result_time,
137       order_proc_id,
138       line,
139       ord_num_value,
140       ord_value,
141       reference_unit,
142       order_type_c,
143       reference_low,
144       reference_high,
145       case when valid_ord = 1 and valid_low = 1 and valid_high = 1
146             and (cast(ord_value as number)
147                  between cast(reference_low as number)
148                     and cast(reference_high as number))
149            then '@' --Normal
150            when valid_ord = 1 and valid_low = 1
151             and cast(ord_value as number) < cast(reference_low as number)
152            then 'L' --Low
153            when valid_ord = 1 and valid_high = 1
154             and cast(ord_value as number) > cast(reference_high as number)
155            then 'H' --High
156            else ''
157       end as result_flag
158from validate_low_high_ord;
159/*According to i2b2 design document, for numeric results(VALTYPE_CD="N" or
160ord_num_value<>9999999) @,H,L are used to flag the outlying or abnormal values;
161@=Normal,H=High and L=Low */
162
163/* ===========================
164 * Extract relation lab values
165 * ===========================
166 * Create a summary table of all of the unique lab values having the curated lab
167 * regex table ("LAB_VALUE_REGEX") applied.
168 *
169 * Example
170 * -------
171 *
172 * ord_value      '>(\d*\.)?\d+'            extracted_value    vf_cd
173 * '>60'     ------------------------>            60            'G'
174 *
175 *            (\d*\.)?\d+-(\d*\.)?\d+
176 * '2-5'     ------------------------>            2             'GE'
177 *  ...                                           5             'LE'
178 */
179whenever sqlerror continue;
180drop table lab_regex_summary_tbl;
181whenever sqlerror exit;
182
183create table lab_regex_summary_tbl as
184
185with distinct_ord_value as
186(select distinct ord_value from clarity_lab_summary_tbl
187 where ord_num_value = 9999999
188   -- SPEED HACK: prefilters assuming that all of the values we care about either
189   -- start with '<' or '>', contain a '-', or end with '+'
190   and (ord_value like '<%'
191     or ord_value like '>%'
192     or ord_value like '%-%'
193     or ord_value like '%+'
194   )
195),
196
197reg_exp_collection as
198(select '(\d*\.)?\d+' as dec_num from dual
199),
200
201labs_value_regex_subst as
202(select regexp_replace(regex, '\[:dec_num:\]', dec_num) as regex, vf_cd, num_occ
203 from lab_value_regex, reg_exp_collection
204)
205
206select
207       dov.ord_value
208     , lab_regex.regex
209     , lab_regex.vf_cd
210     , to_number
211       (case when (lab_regex.num_occ = 1) then regexp_substr(dov.ord_value, rec.dec_num)
212             when (lab_regex.num_occ = 2) then regexp_substr(dov.ord_value, rec.dec_num, 1, 2)
213             else 1/0||''
214        end) as extracted_value
215     , num_occ as unique_id
216
217from reg_exp_collection rec, distinct_ord_value dov
218join labs_value_regex_subst lab_regex on regexp_like(dov.ord_value, '^'||lab_regex.regex||'$');
219
220whenever sqlerror continue;
221drop index lab_regex_summary_tbl_idx;
222whenever sqlerror exit;
223
224create index lab_regex_summary_tbl_idx on lab_regex_summary_tbl(ord_value);
225
226
227
228/* ====================================
229 * Create the observation_fact_lab view
230 * ====================================
231 */
232
233create or replace view observation_fact_lab as
234with
235
236normal_lab_values as
237(
238   select
239      cls.pat_enc_csn_id
240    , cls.pat_id
241    , cls.component_id
242    , cls.result_time
243    , cls.order_proc_id
244    , cls.order_type_c
245    , cls.line
246    , (case when cls.ord_num_value = 9999999 then 'T'
247            else 'N'
248       end) as valtype_cd
249    , (case when cls.ord_num_value = 9999999 then cls.ord_value
250            else 'E' -- i2b2 doc says 'EQ', but code uses 'E'. todo: report to devs
251       end) as tval_char
252    , (case when cls.ord_num_value = 9999999 then null
253            else cls.ord_num_value
254       end) as nval_num
255    , result_flag as valueflag_cd
256    , cls.reference_unit
257    , '@' as modifier_cd
258    , 1 * 10 as unique_lab_value
259   from clarity_lab_summary_tbl cls
260),
261
262extracted_lab_values as
263(select
264    /*+ LEADING(lrs, cls)
265        INDEX(lrs, lab_regex_summary_tbl_idx)*/
266      cls.pat_enc_csn_id
267    , cls.pat_id
268    , cls.component_id
269    , cls.result_time
270    , cls.order_proc_id
271    , cls.order_type_c
272    , cls.line
273    , 'N' as valtype_cd
274    , lrs.vf_cd as tval_char
275    , lrs.extracted_value as nval_num
276    , result_flag as valueflag_cd
277    , cls.reference_unit
278    , '@' as modifier_cd
279    , 2 * 10 + to_number(lrs.unique_id) as unique_lab_value
280 from clarity_lab_summary_tbl cls
281 join lab_regex_summary_tbl lrs on cls.ord_value = lrs.ord_value
282 where ord_num_value = 9999999
283),
284
285processed_lab_values as
286(
287   select * from normal_lab_values union all
288   select * from extracted_lab_values
289),
290
291last_lab_values as
292(
293   select distinct
294          pat_enc_csn_id
295        , pat_id
296        , component_id
297        , result_time
298        , order_proc_id
299        , order_type_c
300        , line
301        , valtype_cd
302        , tval_char
303        , nval_num
304        , valueflag_cd
305        , reference_unit
306        , 'Labs|Aggregate:Last' as modifier_cd
307        , unique_lab_value
308   from
309   (
310     -- http://stackoverflow.com/questions/121387/fetch-the-row-which-has-the-max-value-for-a-column
311     select plv.*, max(plv.result_time) over (partition by plv.pat_id,
312                                                           component_id,
313                                                           reference_unit) as max_result_time
314     from processed_lab_values plv
315   )
316   where result_time = max_result_time
317),
318
319median_lab_value as
320(
321   select distinct
322          pat_enc_csn_id
323        , pat_id
324        , component_id
325        , result_time
326        , order_proc_id
327        , order_type_c
328        , line
329        , valtype_cd
330        , tval_char
331        , nval_num
332        , valueflag_cd
333        , reference_unit
334        , 'Labs|Aggregate:Median' as modifier_cd
335        , unique_lab_value
336   from
337   (
338     -- http://stackoverflow.com/questions/121387/fetch-the-row-which-has-the-max-value-for-a-column
339     select nlv.*,
340            median(nlv.nval_num) over (partition by nlv.pat_id,
341                                                    component_id,
342                                                    reference_unit) as median_nval_num,
343            max(nlv.result_time) over (partition by nlv.pat_id,
344                                                    component_id,
345                                                    reference_unit,
346                                                    nlv.nval_num) as max_result_time
347     from normal_lab_values nlv
348   )
349   where nval_num = median_nval_num
350     and result_time = max_result_time
351),
352
353all_lab_vals as
354(
355   select * from normal_lab_values
356   union all select * from extracted_lab_values
357   union all select * from last_lab_values
358   union all select * from median_lab_value
359)
360
361select
362     to_char(alv.pat_enc_csn_id) encounter_ide,
363     mod(alv.pat_enc_csn_id, &&heron_etl_chunks)+1 as part,
364     alv.pat_id patient_ide,
365     'KUH|COMPONENT_ID:' || alv.component_id concept_cd,
366     '@' provider_id,
367     alv.result_time  start_date,
368     alv.modifier_cd,
369     /* use alv key to ensure uniqueness of observation_fact key
370      * key is ORDER_PROC_ID, LINE, UNIQUE_LAB_VALUE
371      * Max (line) was measured at 508, so 10000 should be enough.
372      *
373      * UNIQUE_LAB_VALUE differentiates between the same fact has its
374      * value represented differently (e.g. '50-60' is represented as 3 facts:
375      * Type      Value                        UNIQUE_LAB_VALUE   
376      * string -> '50-60'                            10 
377      * number -> 50 with a tval_char of 'GE'        21
378      * number -> 60 with a tval_char of 'LE'        22
379      */
380     alv.order_proc_id * 1000000 + alv.line * 100 + unique_lab_value instance_num,
381     alv.order_type_c,
382     alv.valtype_cd,
383     alv.tval_char,
384     alv.nval_num,
385     alv.valueflag_cd,
386     alv.reference_unit units_cd,
387     alv.result_time end_date, -- TODO: better end date?
388     null location_cd,
389     to_number(null) confidence_num,
390     to_date(null) update_date, -- TODO: better update_date?
391     alv.component_id -- for batching
392from all_lab_vals alv;
Note: See TracBrowser for help on using the repository browser.