Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/epic_dimensions_load.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 18.8 KB
Line 
1/* epic_dimensions_load.sql: Load i2b2 dimensions from Epic Clarity.
2
3Copyright (c) 2012 University of Kansas Medical Center
4part of the HERON* open source codebase; see NOTICE file for license details.
5* http://informatics.kumc.edu/work/wiki/HERON
6
7For an overview, see http://informatics.kumc.edu/work/wiki/HeronLoad
8
9We assume the following permissions:
10 grant create view to &me;
11 grant select any table to &me;
12and perhaps some others (e.g. insert).
13*/
14
15/* Check that we're connected to the identified repository
16   and the datamart tables have been created a la
17   edu.harvard.i2b2.data/Release_1-4/NewInstall/Demodata/scripts/crc_create_datamart_oracle.sql
18   */
19select * from NightHeronData.observation_fact where 1 = 0;
20
21/* Check that the uploader service tables have been created a la
22   i2b2/edu.harvard.i2b2.data/Release_1-4/NewInstall/Demodata/scripts/crc_create_uploader_oracle.sql
23*/
24select * from NightHeronData.upload_status where 1 = 0;
25
26/* Check that the views from epic_i2b2_transform.sql have
27   been created.*/
28select * from hospital_visit_dimension where 1=0;   
29
30/* epic_audit_info looks up the source_cd for this ETL job. */
31create or replace view epic_audit_info as
32select * from BlueHeronData.source_master@deid
33where source_cd like 'Epic@%';
34
35/* Alternate source of encounter IDs. */
36create or replace view epic_hsp_audit_info as
37select
38  substr(source_cd, 1, instr(source_cd, '@') - 1) ||
39  '+hsp_account_id' ||
40  substr(source_cd, instr(source_cd, '@')) source_cd
41from epic_audit_info;
42
43/* Encounter IDs based on PAT_ID, day */
44create or replace view epic_pat_day_audit_info as
45select
46  substr(source_cd, 1, instr(source_cd, '@') - 1) ||
47  '+pat_id_day' ||
48  substr(source_cd, instr(source_cd, '@')) source_cd
49from epic_audit_info;
50
51create or replace view sms_audit_info as
52select * from BlueHeronData.source_master@deid
53where source_cd like 'SMS@%';
54
55
56/* Drop indexes for performance and to save rollback.
57
58Note: Since ETL uses joins on this table, we'll re-create PK after.
59*/
60whenever sqlerror continue;
61drop index NightHeronData.PM_UPLOADID_IDX;
62drop index NightHeronData.PM_PATNUM_IDX;
63drop index NightHeronData.PM_ENCPNUM_IDX;
64alter table NightHeronData.patient_mapping
65  disable constraint patient_mapping_pk;
66drop index NightHeronData.patient_mapping_pk;
67whenever sqlerror exit;
68
69/* First add MRNs to the patient_mapping since there are some cases where
70   an MRN maps to multiple pat_ids (ref: #2385) */
71insert into NightHeronData.patient_mapping
72  (patient_num, patient_ide,
73   patient_ide_status, project_id, patient_ide_source,
74   import_date, upload_id, download_date, sourcesystem_cd )
75(select NightHeronData.SQ_UP_PATDIM_PATIENTNUM.nextval, ep.MRN,
76        /* project_id is used for the Identity Management (IM) cell which we
77           currently don't use.  Refer to CRC and IM documentation from
78           i2b2.org.
79         */
80        ep.status, '@', sms.source_cd,
81        sysdate, up.upload_id, :download_date, up.source_cd
82  from (
83        /* Since there is a possible one-to-many relationship between
84           "MRN"s and "pat_id"s and we only want to add each MRN to the mapping
85           table once, it is necessary to deterministically filter the
86           patient_dimension view such that a single MRN is associated with
87           only 1 part and 1 patient status (hence the partition by MRN on each)
88         */
89        select distinct MRN
90             , max(part) over (partition by MRN) as part
91             , max(status) over (partition by MRN) as status
92        from patient_dimension) ep
93     , NightHeronData.upload_status up
94     , sms_audit_info sms
95  where ep.MRN is not null
96    and up.upload_id = :upload_id
97    and ep.part = :part);
98commit;
99
100/* map pat_id's too */
101insert into NightHeronData.patient_mapping
102  (patient_num,
103   patient_ide,
104   patient_ide_status,
105   project_id,
106   patient_ide_source,
107   import_date, upload_id, download_date, sourcesystem_cd )
108(select coalesce(pmap.patient_num,
109                 NightHeronData.SQ_UP_PATDIM_PATIENTNUM.nextval),
110        pd.patient_ide,
111        pd.status,
112        '@',
113        up.source_cd,
114        sysdate, up.upload_id, :download_date, up.source_cd
115       from patient_dimension pd
116       left join NightHeronData.patient_mapping pmap
117              on pmap.patient_ide = pd.MRN
118       , NightHeronData.upload_status up 
119       where up.upload_id = :upload_id
120         and pd.part = :part
121      );
122commit;
123
124
125create unique index NightHeronData.patient_mapping_pk
126 on NightHeronData.patient_mapping
127 (PATIENT_IDE,PATIENT_IDE_SOURCE,PROJECT_ID)
128  nologging;
129
130alter table NightHeronData.patient_mapping
131  enable constraint patient_mapping_pk;
132commit; -- Make intermediate results available for review. and flush undo.
133
134
135-- make a view of just these mappings
136create or replace view mrn_patient_mappings as
137select patient_ide as MRN, patient_num, patient_ide_source
138from NightHeronData.patient_mapping pmap
139join sms_audit_info sms
140  on pmap.patient_ide_source = sms.source_cd
141where pmap.patient_ide_status = 'A';
142explain plan for
143select * from mrn_patient_mappings;
144SELECT PLAN_TABLE_OUTPUT FROM TABLE(DBMS_XPLAN.DISPLAY());
145
146/*
147truncate table NightHeronData.encounter_mapping;
148drop sequence NightHeronData.SQ_UP_ENCDIM_ENCOUNTERNUM;
149create sequence NightHeronData.SQ_UP_ENCDIM_ENCOUNTERNUM;
150*/
151
152whenever sqlerror continue;
153drop index NightHeronData.EM_UPLOADID_IDX;
154drop index NightHeronData.EM_IDX_ENCPATH;
155drop index NightHeronData.EM_ENCNUM_IDX;
156alter table NightHeronData.ENCOUNTER_MAPPING
157  disable constraint ENCOUNTER_MAPPING_PK;
158drop index NightHeronData.ENCOUNTER_MAPPING_PK;
159whenever sqlerror exit;
160
161
162/*
163 * Encounter mappings for MPI_ID
164 */
165insert into NightHeronData.encounter_mapping
166  (encounter_num, encounter_ide,
167   encounter_ide_status, encounter_ide_source, project_id,
168   patient_ide, patient_ide_source,
169   import_date, upload_id, download_date, sourcesystem_cd
170  )
171(select NightHeronData.SQ_UP_ENCDIM_ENCOUNTERNUM.nextval,
172        vd.mpi_encounter_id encounter_ide, 'A', sms_audit_info.source_cd,
173         '@', vd.patient_mrn patient_ide, sms_audit_info.source_cd,
174        sysdate, up.upload_id, :download_date,up.source_cd
175 from
176 (select distinct
177        v.mpi_encounter_id,
178        v.patient_mrn,
179        v.part
180  from unique_hospital_visit_view v
181  where v.mpi_encounter_id is not null) vd,
182  NightHeronData.upload_status up,
183  sms_audit_info,  -- we presume MPI_ID is issues by SMS
184  epic_audit_info
185 where up.upload_id = :upload_id
186  and vd.part = :part);
187
188/*
189 * Encounter mappings for HSP_ACCOUNT_ID
190 */
191insert into NightHeronData.encounter_mapping
192  (encounter_num, encounter_ide,
193   encounter_ide_status, encounter_ide_source, project_id,
194   patient_ide, patient_ide_source,
195   import_date, upload_id, download_date, sourcesystem_cd
196  )
197(select case -- is there a corresponding MPI_ID for this HSP_ACCOUNT_ID?
198           when emap_mpi.encounter_num is not null
199           -- yes; use the encounter_num allocate above
200           then emap_mpi.encounter_num
201           -- no; make a new one.
202           else NightHeronData.SQ_UP_ENCDIM_ENCOUNTERNUM.nextval
203        end encounter_num,
204        v.hsp_account_id encounter_ide, 'A', epic_hsp_audit_info.source_cd,
205        '@', v.pat_id patient_ide, epic_audit_info.source_cd,
206        sysdate, up.upload_id, :download_date,up.source_cd
207 from unique_hospital_visit_view v
208 left join (
209   select encounter_ide, encounter_num
210   from NightHeronData.encounter_mapping
211   where encounter_ide_source = (select source_cd from sms_audit_info)
212   ) emap_mpi
213   on emap_mpi.encounter_ide = v.mpi_encounter_id,
214 NightHeronData.upload_status up, epic_hsp_audit_info, epic_audit_info
215 where up.upload_id = :upload_id
216  and v.part = :part);
217
218
219/*
220 * Encounter mappings for patient day.
221 */
222insert into NightHeronData.encounter_mapping
223  (encounter_num, encounter_ide,
224   encounter_ide_status, encounter_ide_source, project_id,
225   patient_ide, patient_ide_source,
226   import_date, upload_id, download_date, sourcesystem_cd )
227(select case -- is there a HSP_ACCOUNT_ID for this pat_enc_csn_id?
228           when v.encounter_num is not null
229           -- yes; use the encounter_num allocate above
230           then v.encounter_num
231           -- no; make a new one.
232           else NightHeronData.SQ_UP_ENCDIM_ENCOUNTERNUM.nextval
233        end encounter_num,
234        v.encounter_ide encounter_ide, 'A', epic_pat_day_audit_info.source_cd,
235        '@', v.pat_id patient_ide, epic_audit_info.source_cd,
236        sysdate, up.upload_id, :download_date,up.source_cd
237 from (
238 /* We'd like to just select distinct patient, day from patient_day_visit,
239  * but we need to trace back to any existing encounter mappings above,
240  * and the cardinalities are sort of tricky.
241  * So we join on pat_enc_csn_id to hospital_visit_dimension
242  * and then on HSP_ACCOUNT_ID to existing encounter_mapping(s);
243  * Then we use group by to sort of simulate distinct patient, day
244  * while keeping a relevant encounter_num, if there is one.
245  */
246 select v.encounter_ide, v.pat_id, v.part, min(emap_hsp.encounter_num) encounter_num
247 from patient_day_visit v
248  left join hospital_visit_dimension hv
249    on hv.pat_enc_csn_id = v.pat_enc_csn_id
250  left join (
251   select encounter_ide, encounter_num
252   from NightHeronData.encounter_mapping
253   where encounter_ide_source = (select source_cd from epic_hsp_audit_info)
254   ) emap_hsp
255   on emap_hsp.encounter_ide = hv.HSP_ACCOUNT_ID
256   group by v.encounter_ide, v.pat_id, v.part) v,
257 NightHeronData.upload_status up, epic_audit_info, epic_pat_day_audit_info
258 where up.upload_id = :upload_id
259  and v.part = :part);
260
261
262
263/*
264 * Encounter mappings for PAT_ENC_CSN_ID
265 */
266insert into NightHeronData.encounter_mapping
267  (encounter_num, encounter_ide,
268   encounter_ide_status, encounter_ide_source, project_id,
269   patient_ide, patient_ide_source,
270   import_date, upload_id, download_date, sourcesystem_cd )
271(select case -- Is there a patient day mapping for this PAT_ENC_CSN_ID?
272           when emap_pday.encounter_num is not null
273           -- yes; use the encounter_num allocate above
274           then emap_pday.encounter_num
275           -- no; something has gone horribly wrong.
276           else 1/0
277        end encounter_num,
278        v.pat_enc_csn_id encounter_ide, 'A', epic_audit_info.source_cd,
279        '@', v.pat_id patient_ide, epic_audit_info.source_cd,
280        sysdate, up.upload_id, :download_date,up.source_cd
281 from clarity.pat_enc v
282 left join (
283   select encounter_ide, encounter_num
284   from NightHeronData.encounter_mapping
285   where encounter_ide_source = (select source_cd from epic_pat_day_audit_info)
286   ) emap_pday
287   on emap_pday.encounter_ide = TO_CHAR(v.contact_date,'YYYYMMDD') || v.pat_id,
288 NightHeronData.upload_status up, epic_audit_info
289 where up.upload_id = :upload_id
290 and v.pat_id is not null -- this case shows up in our test data, at least
291 );
292
293
294/*
295 * Encounter mappings for fabricated visits.
296 */
297insert into NightHeronData.encounter_mapping
298  (encounter_num, encounter_ide,
299   encounter_ide_status, encounter_ide_source, project_id,
300   patient_ide, patient_ide_source,
301   import_date, upload_id, download_date, sourcesystem_cd )
302(select NightHeronData.SQ_UP_ENCDIM_ENCOUNTERNUM.nextval encounter_num,
303        v.encounter_ide encounter_ide, 'A', epic_audit_info.source_cd,
304        '@', v.patient_ide, epic_audit_info.source_cd,
305        sysdate, up.upload_id, :download_date,up.source_cd
306 from fab_reg_visit v,
307 NightHeronData.upload_status up, epic_audit_info
308 where up.upload_id = :upload_id
309  and v.part = :part);
310
311
312
313create unique index NightHeronData.ENCOUNTER_MAPPING_PK
314  on NightHeronData.ENCOUNTER_MAPPING (ENCOUNTER_IDE,ENCOUNTER_IDE_SOURCE,PROJECT_ID,PATIENT_IDE,PATIENT_IDE_SOURCE)
315  nologging;
316alter table NightHeronData.ENCOUNTER_MAPPING
317  enable constraint ENCOUNTER_MAPPING_PK;
318commit;
319
320
321/*
322For debugging duplicate keys...
323
324create index NightHeronData.ENCOUNTER_MAPPING_DEBUG
325  on NightHeronData.ENCOUNTER_MAPPING (ENCOUNTER_IDE,ENCOUNTER_IDE_SOURCE)
326  nologging;
327
328select count(*), ENCOUNTER_IDE,ENCOUNTER_IDE_SOURCE
329from NightHeronData.ENCOUNTER_MAPPING
330group by ENCOUNTER_IDE,ENCOUNTER_IDE_SOURCE
331having count(*) > 1
332;
333*/
334
335-- 3385 rows inserted (BLD)
336
337/* Never mind the optional patient dimension columns.
338
339See section 3.3 PATIENT_DIMENSION
340of i2b2 Clinical Research Chart (CRC) Design Document
341Document Version: 1.1
342I2b2 Software Release: 1.4
343 */
344whenever sqlerror continue;
345/* Add date shift, ssn and various identified columns.
346Do them in independent statements in case only one is missing.
347 */
348alter table NightHerondata.patient_dimension
349  add (home_phone varchar2(50));
350alter table NightHerondata.patient_dimension
351  add (work_phone varchar2(50)) ;
352alter table NightHerondata.patient_dimension
353  add (email_address varchar2(255)) ;
354alter table NightHerondata.patient_dimension
355  add (state_cd varchar2(254)) ;
356alter table NightHerondata.patient_dimension
357  add (county_cd varchar2(254)) ;
358alter table NightHerondata.patient_dimension
359  add (zip_cd varchar2(60)) ; 
360alter table NightHerondata.patient_dimension
361  add (city varchar2(50)) ;
362alter table NightHerondata.patient_dimension
363  add (add_line_1 varchar2(50)) ;
364alter table NightHerondata.patient_dimension
365  add (add_line_2 varchar2(50)) ;
366alter table NightHerondata.patient_dimension
367  add (pat_name varchar2(300)) ;
368alter table NightHerondata.patient_dimension
369  add (date_shift number) ;
370alter table NightHerondata.patient_dimension
371  add (ssn varchar2(45)) ;
372alter table NightHerondata.patient_dimension
373  add (mrn varchar2(50)) ;
374alter table NightHerondata.patient_dimension
375  add (age_in_years_num_hipaa number) ;
376alter table NightHerondata.patient_dimension
377  add (birth_date_hipaa date) ;
378whenever sqlerror exit;
379
380whenever sqlerror continue;
381alter table NightHerondata.patient_dimension
382  disable constraint PATIENT_DIMENSION_PK;
383drop index NightHerondata.PATIENT_DIMENSION_PK;
384drop index NightHerondata.PD_IDX_DATES;
385drop index NightHerondata.PD_IDX_AllPatientDim;
386drop index NightHerondata.PD_IDX_StateCityZip;
387drop index NightHerondata.PATD_UPLOADID_IDX;
388whenever sqlerror exit;
389
390insert into NightHerondata.patient_dimension (
391  PATIENT_NUM, VITAL_STATUS_CD, BIRTH_DATE, birth_date_hipaa, DEATH_DATE, sex_cd
392, AGE_IN_YEARS_NUM, age_in_years_num_hipaa, language_cd, race_cd, marital_status_cd
393, religion_cd, date_shift, ssn, mrn
394, pat_name,add_line_1,add_line_2,city,county_cd,state_cd
395, zip_cd,home_phone,work_phone,email_address
396, import_date, upload_id, download_date, sourcesystem_cd )
397(SELECT
398   pd.patient_num, pd.VITAL_STATUS_CD, pd.BIRTH_DATE, pd.birth_date_hipaa, pd.DEATH_DATE
399 , pd.sex_cd, pd.age_in_years_num, pd.age_in_years_num_hipaa, pd.language_cd
400 , pd.race_cd, pd.marital_status_cd
401 , pd.religion_cd
402 , round(dbms_random.value((select deid_date_shift_days from hipaa_constants),0))
403 , pd.ssn, pd.mrn
404 , pd.pat_name,pd.add_line_1,pd.add_line_2,pd.city,pd.county_cd,pd.state_cd
405 , pd.zip_cd,pd.home_phone,pd.work_phone,pd.email_address
406 , sysdate, up.upload_id, :download_date, up.source_cd
407 from NightHerondata.upload_status up,
408     /* Epic's patient table potentially has multiple `pat_id`s associated
409      * with a single MRN (which is what the HERON `patient_num` is keyed from).
410      * Thus, it is necessary to choose only `pat_id` (or rather one row) from
411      * patient table for each MRN (`patient_num`).
412      * The row is chosen somewhat arbitrarily prefering the the latest
413      * `pat_id`.
414      */
415    (select pmap.patient_num,
416            epic_pd.*,
417            -- assign a different id for each record with the same patient_num
418            row_number () over (partition by pmap.patient_num
419                                order by epic_pd.patient_ide desc) rn
420     from patient_dimension epic_pd
421     join NightHerondata.patient_mapping pmap
422       on epic_pd.patient_ide = pmap.patient_ide
423      and pmap.patient_ide_status = 'A' -- skip test patients
424      and pmap.patient_ide_source = (select source_cd from epic_audit_info)) pd
425  where up.upload_id = :upload_id
426    and pd.part = :part
427    and rn = 1); -- keep only the first record for each patient_num
428commit;
429
430/*******
431Test that ensures Nightherondata.patient_dimension.email_address has many distinct values per #2896
432*******/
433insert into etl_test_values (test_value, test_domain, test_name, result_id, result_date)
434with test_key as (
435  select 'Demographics' test_domain,
436  'many_emails' test_name from dual
437),
438
439test_values as
440(select count(distinct email_address) as t_values
441 from NightHeronData.patient_dimension
442 where email_address is not null)
443
444select tv.t_values as test_value
445     , tk.test_domain as test_domain
446     , tk.test_name as test_name
447     , sq_result_id.nextval as result_id
448     , sysdate as result_date
449from test_values tv, test_key tk
450;
451 
452create unique index NightHerondata.PATIENT_DIMENSION_PK
453  on NightHerondata.patient_dimension(PATIENT_NUM);
454alter table NightHerondata.patient_dimension
455  enable constraint PATIENT_DIMENSION_PK;
456
457
458whenever sqlerror continue;
459drop INDEX NightHerondata.VD_UPLOADID_IDX;
460drop INDEX NightHerondata.VISITDIM_EN_PN_LP_IO_SD_IDX;
461drop INDEX NightHerondata.VISITDIM_STD_EDD_IDX;
462alter table NightHerondata.visit_dimension
463  disable constraint VISIT_DIMENSION_PK;
464drop index NightHerondata.VISIT_DIMENSION_PK;
465whenever sqlerror exit;
466
467
468/* Build visit dimension from patient-day mappings, which exhaust ENCOUNTER_NUM. */
469insert into NightHerondata.visit_dimension (
470  ENCOUNTER_NUM, PATIENT_NUM,
471  active_status_cd, start_date,
472  import_date, upload_id, download_date, sourcesystem_cd )
473(select
474   pday.encounter_num, pday.patient_num,
475   'A' active_status_cd /*@@move constant to view; document*/,
476   pday.start_date,
477   sysdate, up.upload_id, :download_date, epic_audit_info.source_cd
478  from
479  (
480   /*
481    * Just one start date per encounter_num.
482    */
483   select encounter_num, patient_num, min(start_date) start_date
484   from (
485    select pday.encounter_num, pmap.patient_num,
486           to_date(substr(pday.encounter_ide, 1, 8), 'YYYYMMDD') start_date
487    from NightHerondata.encounter_mapping pday
488    join NightHerondata.patient_mapping pmap
489    on pday.patient_ide = pmap.patient_ide
490    and pday.patient_ide_source = pmap.patient_ide_source
491    and pmap.patient_ide_status = 'A' -- skip test patients
492    where pday.encounter_ide_source = (select source_cd from epic_pat_day_audit_info)
493    )
494    group by encounter_num, patient_num) pday,
495    NightHerondata.upload_status up,
496    epic_audit_info
497  where up.upload_id = :upload_id);
498commit;
499
500create unique index NightHerondata.VISIT_DIMENSION_PK
501  on NightHerondata.visit_dimension(ENCOUNTER_NUM,PATIENT_NUM);
502alter table NightHerondata.visit_dimension
503  enable constraint VISIT_DIMENSION_PK;
504
505
506/* Summary stats */
507update NightHeronData.upload_status
508  set loaded_record = (
509  select (select count(*) from NightHerondata.patient_mapping)
510      + (select count(*) from NightHerondata.encounter_mapping)
511      + (select count(*) from NightHerondata.patient_dimension)
512      + (select count(*) from NightHerondata.visit_dimension)
513      + (select count(*) from NightHerondata.provider_dimension)
514  from dual
515  )
516  where upload_id = :upload_id;
517update NightHeronData.upload_status
518  set no_of_record = loaded_record
519  where upload_id = :upload_id;
Note: See TracBrowser for help on using the repository browser.