Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/bsr_i2b2_transform.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 4.2 KB
Line 
1/* bsr to 12b2 ETL part I: transform 
2
3Copyright (c) 2012 University of Kansas Medical Center
4part of the HERON* open source codebase; see NOTICE file for license details.
5* http://informatics.kumc.edu/work/wiki/HERON
6
7This is the first step to include another data source in the existing build
8In this code we are bulding the following views
9bsr_patient_dimension, bsr_visit_dimension, observation_fact_bsr
10Next step is to build bsr_dimension_load.sql.
11
12*/
13
14-- Test that we're in the KUMC sid.
15select MRN from BSR.bsr_table where 1=0;
16
17whenever sqlerror continue; -- in case the indexes already exist
18create index BSR.bsr_table_mrn
19  on BSR.bsr_table (MRN);
20whenever sqlerror exit;
21
22/*******************
23 Patient_dimension
24 *******************/
25
26/* Table ddl ensures that it does not  have null MRNs */
27
28create or replace view bsr_patient_dimension
29as select distinct  ltrim(kp.MRN, '0') as MRN
30  , mod(ora_hash(MRN), &&heron_etl_chunks)+1 as part
31from BSR.bsr_table kp
32where MRN is not null;
33
34
35/*****************
36  bsr_visit_dimension
37 ******************/
38
39/* We assume sample_id is unique. We once had to clean up the data
40to ensure this:
41
42truncate table bsr_distinct;
43drop table bsr_distinct;
44
45create global temporary table bsr_distinct
46on commit preserve rows
47as select distinct * from bsr.bsr_table;
48
49select tot, cleaned, tot-cleaned from (
50select
51 (select count(*) from bsr.bsr_table) as tot,
52 (select count(*) from bsr_distinct) as cleaned
53 from dual);
54
55truncate table bsr.bsr_table;
56
57insert into bsr.bsr_table
58select * from bsr_distinct;
59*/
60
61/** What to do with missing dates?
62
63For now, test that there are few of them and skip them.
64
65cf http://informatics.kumc.edu/work/ticket/907#comment:7
66*/
67select missing_dates / total,
68case when missing_dates / total < 0.001 then 1 else 1/0
69       end as few_missing_bsr_dates
70       from (
71select
72 (select count(*)
73  from bsr.bsr_table b
74  where date_collected is null) missing_dates,
75 (select count(*)
76  from bsr.bsr_table b) total
77from dual
78);
79
80/* Handle N/A or null SURGPATH */
81
82---Sometimes the surgpath are duplicate
83
84create or replace view bsr_visit as
85select case when SURGPATH is null
86                 or SURGPATH in ('N/A', 'NA', 'n/a')
87            then to_char(date_collected, 'yyyy-mm-dd') || ',' || MRN
88            else surgpath || ',' || MRN end as visit
89    , b.*
90from bsr.bsr_table b
91where date_collected is not null;
92
93----
94           
95
96/* The visit determines the MRN */
97/* todo: figure out how to use having instead of rownum. */
98select case when qty = 1 then 1 else 1/0 end as one_mrn_per_surgpath from (
99 select * from (
100  select count(distinct MRN) qty, visit
101  from bsr_visit
102  group by visit
103  order by 1 desc
104 ) where rownum < 2
105);
106
107
108/* We use SURGPATH to collect samples into visits/encounters.
109 * Note that these encounters may span dates, hence the min/max below.
110 *
111 * In a few odd cases, the same SURGPATH is used for different patients.
112 * And sometimes there is no surgpath at all
113 */
114create or replace view bsr_visit_dimension as
115select visit ENCOUNTER_IDE
116     , 'A' as encounter_ide_status
117     , ltrim(MRN, '0') PATIENT_IDE
118     , min(date_collected) START_DATE
119     , max(date_collected) end_date
120     , 1 as part
121from (select visit
122              , MRN
123              , date_collected
124  from bsr_visit)
125  group by visit, 'A', MRN;
126
127/*********************
128  observation_fact_bsr
129 *********************/
130
131
132create or replace view observation_fact_bsr as
133select vd.encounter_ide
134     , ltrim(bt.MRN, '0') as patient_ide
135     , 'KUMC|BSR|SAMPLE_DESC:'||bt.sample_desc concept_cd
136     , '@' provider_id
137     , vd.start_date
138     , '@' modifier_cd
139     , bt.sample_id instance_num
140     , 'T' valtype_cd
141     , '@' tval_char
142     , to_number(null) nval_num
143     , null valueflag_cd
144     , null units_cd
145     , vd.end_date
146     , null location_cd
147     , to_number(null) confidence_num
148     , to_date(null) update_date
149     , mod(ora_hash(bt.MRN), &&heron_etl_chunks)+1 as part
150from bsr_visit bt
151join bsr_visit_dimension vd
152  on vd.encounter_ide = bt.visit
153;
154
155/* Make sure these join conditions are true in at least some cases.
156   (regression test) */
157select case qty when 0 then 1/0 else 1 end as some_bsr_facts
158from (
159select count(*) qty
160    from observation_fact_bsr
161);
Note: See TracBrowser for help on using the repository browser.