Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/idx_i2b2_transform.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 13.2 KB
Line 
1/* IDX to 12b2 ETL part I: transform
2
3Copyright (c) 2012 University of Kansas Medical Center
4part of the HERON* open source codebase; see NOTICE file for license details.
5* http://informatics.kumc.edu/work/wiki/HERON
6
7*/
8
9-- Test that we're in the IDXP sid.
10select HOSPITAL_MEDICAL_RECORD from "&&kupi".idx_table where 1=0;
11
12whenever sqlerror continue; -- in case the indexes already exist
13create index "&&kupi".idx_table_mrn
14  on "&&kupi".idx_table (HOSPITAL_MEDICAL_RECORD);
15create index "&&kupi".idx_table_bill
16  on "&&kupi".idx_table (bill_inv_number);
17whenever sqlerror exit;
18
19/* During the HERON Sappa release (#2099), we found that in one case there were
20two MRNs for the same bill_inv_number.  In order to continue with ETL this time,
21I created ticket #2116 to investigate further but for now just exclude those
22MRNs.  The test below makes sure that there is only one case where the
23bill_inv_number maps to multiple (only 2) MRNs.
24*/
25whenever sqlerror continue;
26  drop table idx_dup_mrns;
27whenever sqlerror exit;
28
29
30create table idx_dup_mrns as
31with
32bill_inv_dup as (
33  select qty, bill_inv_number from (
34    select count(distinct HOSPITAL_MEDICAL_RECORD) qty, bill_inv_number
35    from "&&kupi".idx_table
36    group by bill_inv_number
37    )
38  where qty > 1
39  )
40select distinct idx.hospital_medical_record mrn, bd.bill_inv_number
41from bill_inv_dup bd
42join "&&kupi".idx_table idx on idx.bill_inv_number = bd.bill_inv_number
43;
44commit;
45
46select case when count(*) > 0  then 1/0 else 1 end as mostly_unique_mrn_per_bill_inv from (
47  select * from idx_dup_mrns
48  )
49;
50
51
52/*******************
53 * Patient_dimension
54 */
55
56/* TODO: handle IDX data with null MRN.
57   For now, just be sure it's in the noise.
58 */
59
60
61select case when with_mrn / tot > 0.99 then 1 else 1/0 end as test_result from (
62select
63(select count(*)
64from "&&kupi".idx_table) as tot,
65(select count(*)
66from "&&kupi".idx_table
67where HOSPITAL_MEDICAL_RECORD is not null) as with_mrn
68from dual
69);
70
71create or replace view idx_patient_dimension
72as select distinct kp.HOSPITAL_MEDICAL_RECORD as MRN
73  , mod(ora_hash(HOSPITAL_MEDICAL_RECORD), &&heron_etl_chunks)+1 as part
74from "&&kupi".idx_table kp
75where HOSPITAL_MEDICAL_RECORD is not null
76and kp.hospital_medical_record not in
77  (select mrn from idx_dup_mrns);
78
79
80/*****************
81 * idx_visit_dimension
82 */
83
84
85create or replace view idx_visit_dimension as
86select BILL_INV_NUMBER ENCOUNTER_IDE
87     , 'A' as encounter_ide_status
88     , MRN PATIENT_IDE
89     , min(to_date(service_date, 'mm/dd/yyyy')) START_DATE
90     , max(to_date(service_date, 'mm/dd/yyyy')) end_date
91     , mod(BILL_INV_NUMBER, &&heron_etl_chunks)+1 as part
92from (select distinct
93                BILL_INV_NUMBER
94              , HOSPITAL_MEDICAL_RECORD MRN
95              , service_date
96      from "&&kupi".idx_table where hospital_medical_record not in (
97        select mrn from idx_dup_mrns
98        )
99      )
100group by BILL_INV_NUMBER, 'A', MRN;
101
102
103/***************
104 */
105create or replace view provider_dimension as
106select
107    substr(division_name, 1, 5)||'-'||ora_hash(service_provider) provider_id
108  , division_name || '\' || service_provider provider_path
109  , service_provider name_char
110from (select distinct service_provider, division_name from "&&kupi".idx_table);
111
112
113/* Demographic observation facts: punt to Epic, for now. (#406) */
114
115
116/*********
117 * Hictr Flag
118 */
119
120/* Presuming bill_inv_number increases with time, this picks out the
121   most recent visit.*/
122create or replace view pick_visit as
123select max(vd.encounter_ide) encounter_ide, vd.patient_ide
124from idx_visit_dimension vd
125group by vd.patient_ide;
126
127
128create or replace view observation_fact_hictr as
129select vd.encounter_ide
130     , kit.MRN as patient_ide
131     , 'HICTR_PARTICIPANT:yes' concept_cd
132     , '@' provider_id
133     , vd.start_date
134     , '@' modifier_cd
135     , 1  instance_num
136     , '@' valtype_cd
137     , '@' tval_char
138     , to_number(null) nval_num
139     , null valueflag_cd
140     , null units_cd
141     , vd.end_date
142     , null location_cd -- TODO: use division_name?
143     , to_number(null) confidence_num
144     , to_date(null) update_date
145     , mod(ora_hash(kit.MRN), &&heron_etl_chunks)+1 as part
146from ((select distinct kit.hospital_medical_record as MRN
147  from "&&kupi".idx_table kit
148  where kit.hipaa_date_opt_in is not null)
149  minus
150  (select distinct kit.hospital_medical_record as MRN
151  from "&&kupi".idx_table kit
152  where kit.hipaa_date_opt_in is not null
153  and to_date(kit.hipaa_date_opt_in, 'mm/dd/yyyy') <= to_date(kit.hipaa_date_revoked, 'mm/dd/yyyy'))) kit
154join pick_visit
155  on pick_visit.patient_ide = kit.mrn
156join idx_visit_dimension vd
157  on vd.encounter_ide = pick_visit.encounter_ide
158;
159
160/* We have at least 1.5% of all IDX in Frontiers. */
161select case when div > .015 then 1 else 1/0 end as test_result from (
162  with
163    hictr as ( select count(*) hictr_num from observation_fact_hictr hictr ),
164    tot as ( select count(*) tot_num from ( select distinct(hospital_medical_record) from "&&kupi".idx_table ) )
165  select hictr_num / tot_num as div
166  from hictr, tot
167);
168
169/************
170 * #2896 : add email adress as a fact in Heron
171 * per ticket:2896#comment:29, striking the obsolete code per #2116
172 */
173create or replace view observation_fact_email as
174select
175    bill_inv_number encounter_ide
176  , HOSPITAL_MEDICAL_RECORD patient_ide
177  , schemes.demo_idx_email_scheme concept_cd
178  , '@' provider_id
179  , to_date(service_date, 'mm/dd/yyyy') start_date
180  , '@' modifier_cd
181  , rownum  instance_num -- data seems to include duplicate records
182  , '@' valtype_cd
183  , '@' tval_char
184  , to_number(null) nval_num
185  , '@' valueflag_cd
186  , null units_cd
187  , to_date(service_date, 'mm/dd/yyyy') end_date
188  , '@' location_cd
189  , to_number(null) confidence_num
190  , to_date(null) update_date
191  , mod(bill_inv_number, &&heron_etl_chunks)+1 as part
192from "&&kupi".idx_table
193cross join scheme_labels@deid schemes
194where patient_email is not null
195;
196
197
198/**************
199 * Diagnoses
200 */
201create or replace view observation_fact_dx as
202with
203dx_mods as (
204select 'DiagObs:Primary' modifier_cd from dual
205union all
206select 'DiagObs:Clinic' from dual),
207dx_pivot as (
208select distinct *
209from ( (
210  select diagnosis_number_1 diagnosis
211       , 1 instance_num
212       , bill_inv_number, hospital_medical_record
213       , service_date
214       , division_name
215       , service_provider
216      from "&&kupi".idx_table
217) union (
218  select diagnosis_number_2 diagnosis
219       , 2 instance_num
220       , bill_inv_number, hospital_medical_record
221       , service_date
222       , division_name
223       , service_provider
224      from "&&kupi".idx_table
225) union (
226  select diagnosis_number_3 diagnosis
227       , 3 instance_num
228       , bill_inv_number, hospital_medical_record
229       , service_date
230       , division_name
231       , service_provider
232      from "&&kupi".idx_table
233) union (
234  select diagnosis_number_4 diagnosis
235       , 4 instance_num
236       , bill_inv_number, hospital_medical_record
237       , service_date
238       , division_name
239       , service_provider
240      from "&&kupi".idx_table
241) ) where diagnosis is not null
242),
243dx_per_mod as (
244 select * from dx_pivot, dx_mods
245 where dx_mods.modifier_cd = 'DiagObs:Clinic'
246 or (dx_mods.modifier_cd = 'DiagObs:Primary'
247     and instance_num = 1)
248)
249
250select
251    dx.bill_inv_number encounter_ide
252  , dx.hospital_medical_record patient_ide
253  , 'ICD9:' ||
254     -- There is no '486.' so change it to '486'
255     case
256       when dx.diagnosis like '%.' then substr(dx.diagnosis, 1, length(dx.diagnosis)-1)
257       else dx.diagnosis
258     end concept_cd
259  , substr(division_name, 1, 5)||'-'||ora_hash(service_provider) provider_id
260  , to_date(service_date, 'mm/dd/yyyy') start_date
261  , modifier_cd
262  , (bill_inv_number * 10) + instance_num as instance_num
263  , '@' valtype_cd
264  , '@' tval_char
265  , to_number(null) nval_num
266  , null valueflag_cd
267  , null units_cd
268  , to_date(service_date, 'mm/dd/yyyy') end_date
269  , null location_cd -- TODO: use division_name?
270  , to_number(null) confidence_num
271  , to_date(null) update_date
272  , mod(ora_hash(dx.bill_inv_number), &&heron_etl_chunks)+1 as part
273from dx_per_mod dx
274where hospital_medical_record not in (
275  select mrn from idx_dup_mrns
276  )
277;
278
279
280/************
281 * Procedures
282 */
283create or replace view observation_fact_procedure as
284select
285    bill_inv_number encounter_ide
286  , HOSPITAL_MEDICAL_RECORD patient_ide
287  , 'CPT:' || proc_code_number concept_cd
288  , '@' provider_id
289  , to_date(service_date, 'mm/dd/yyyy') start_date
290  , '@' modifier_cd
291  , rownum  instance_num -- data seems to include duplicate records
292  , '@' valtype_cd
293  , '@' tval_char
294  , to_number(null) nval_num
295  , '@' valueflag_cd
296  , null units_cd
297  , to_date(service_date, 'mm/dd/yyyy') end_date
298  , '@' location_cd
299  , to_number(null) confidence_num
300  , to_date(null) update_date
301  , mod(bill_inv_number, &&heron_etl_chunks)+1 as part
302from "&&kupi".idx_table
303where hospital_medical_record not in (
304  select mrn from idx_dup_mrns
305  );
306
307
308/************
309* Area facts (Areas w/in Clinical Service Lines)
310 */
311 -- multiple procedures to an area can occur during the same encounter/visit
312create or replace view observation_fact_area as
313select distinct
314    bill_inv_number encounter_ide
315  , HOSPITAL_MEDICAL_RECORD patient_ide
316  , 'KUMC|VISITDETAIL|OPSERVICES:'||division_code||'|OPAREAS:' || billing_area_code concept_cd
317  , '@' provider_id
318  , to_date(service_date, 'mm/dd/yyyy') start_date
319  , '@' modifier_cd
320  , bill_inv_number instance_num 
321  , '@' valtype_cd
322  , '@' tval_char
323  , to_number(null) nval_num
324  , '@' valueflag_cd
325  , null units_cd
326  , to_date(service_date, 'mm/dd/yyyy') end_date
327  , '@' location_cd
328  , to_number(null) confidence_num
329  , to_date(null) update_date
330  , mod(bill_inv_number, &&heron_etl_chunks)+1 as part
331from "&&kupi".idx_table
332where hospital_medical_record not in (
333  select mrn from idx_dup_mrns
334  );
335
336
337/************
338* Place of Service
339 */
340create or replace view observation_fact_pos as
341select distinct
342    bill_inv_number encounter_ide
343  , HOSPITAL_MEDICAL_RECORD patient_ide
344  ,'KUMC|VISITDETAIL|POS(IDX):'||hcfavalue concept_cd
345  , '@' provider_id
346  , to_date(service_date, 'mm/dd/yyyy') start_date
347  , '@' modifier_cd
348  , bill_inv_number instance_num 
349  , '@' valtype_cd
350  , '@' tval_char
351  , to_number(null) nval_num
352  , '@' valueflag_cd
353  , null units_cd
354  , to_date(service_date, 'mm/dd/yyyy') end_date
355  , '@' location_cd
356  , to_number(null) confidence_num
357  , to_date(null) update_date
358  , mod(bill_inv_number, &&heron_etl_chunks)+1 as part
359from "&&kupi".idx_table
360where hospital_medical_record not in (
361  select mrn from idx_dup_mrns
362  );
363
364
365/***********
366 * Billing and Service Provider
367 */
368create or replace view provider_dimension_idx as
369  -- handle providers whose names are spelled different ways
370select provider_id, min(name_char) name_char, min(npi) npi from (
371select kit.service_provider_code provider_id, kit.service_npi_number npi, kit.service_provider name_char from kupi.idx_table kit
372union all
373select kit.billing_provider_code, kit.billing_npi_number, kit.billing_provider from kupi.idx_table kit
374union all
375select kit.ccp_code, kit.ccp_npi_number, kit.ccp_name from kupi.idx_table kit
376)
377where provider_id is not null
378  and name_char is not null
379group by provider_id
380-- TODO: provider_path from division, billing_area
381-- use the path where the provider occurs most often? or combine division code, billing area code into provider_id?
382;
383-- eyeball it:
384-- select * from provider_dimension_idx;
385
386create or replace view observation_fact_idx_provider
387as
388  select distinct bill_inv_number encounter_ide, hospital_medical_record patient_ide
389  , concept_cd
390  , provider_code provider_id
391  ,  to_date(service_date, 'mm/dd/yyyy') start_date
392    /* It seems worthwhile using modifier_cd to record the source of this info,
393    though Diag... is a bit of a misnomer. YAGNI? */
394  , 'DiagObs:Clinic' modifier_cd
395  , rownum instance_num -- handle the case of different providers in the same bill_inv_number visit
396  , 'Ti' valtype_cd -- _i for identified info
397  -- provide provider name as an expedient to avoid adding provider_dimension to databuilder; cf #3696
398  , provider_name tval_char
399  , to_number(null) nval_num, '@' valueflag_cd, null units_cd
400  , to_date(service_date, 'mm/dd/yyyy') end_date
401  , '@' location_cd
402  , to_number(null) confidence_num
403  , mod(bill_inv_number, &&heron_etl_chunks) + 1 as part
404  from
405    (
406    /* Use group by to handle multiple procedures by the same Dr. in the same visit/bill,
407     * same provider_code with different spellings of provider_name.
408     */
409    select bill_inv_number, hospital_medical_record, service_date
410         , concept_cd, provider_code, min(provider_name) provider_name
411    from
412      (select kit.bill_inv_number, kit.hospital_medical_record, service_date
413      , 'PROVIDER|SERVICE:' concept_cd, kit.service_provider_code provider_code, kit.service_provider provider_name
414      from kupi.idx_table kit
415      union all
416      select kit.bill_inv_number, kit.hospital_medical_record, service_date
417      , 'PROVIDER|BILLING:' concept_cd, kit.billing_provider_code provider_code, kit.billing_provider provider_name
418      from kupi.idx_table kit
419      union all
420      select kit.bill_inv_number, kit.hospital_medical_record, service_date
421      , 'PROVIDER|CCP:' concept_cd, kit.ccp_code provider_code, kit.ccp_name provider_name
422      from kupi.idx_table kit
423      ) where provider_code is not null
424      group by bill_inv_number, hospital_medical_record, service_date, concept_cd, provider_code
425    ) ;
426
427-- select * from observation_fact_idx_provider;
428
Note: See TracBrowser for help on using the repository browser.