Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/epic_visit_transform.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 9.9 KB
Line 
1/* EPIC to i2b2 Patient/Visit ETL: transform
2
3Copyright (c) 2013 University of Kansas Medical Center
4part of the HERON* open source codebase; see NOTICE file for license details.
5* http://informatics.kumc.edu/work/wiki/HERON
6
7Load the epic patients and visit details into i2b2.
8*/
9
10/* Check that we're connected to an Epic CLARITY database.
11   */
12select pat_id from CLARITY.patient where 1 = 0;
13
14/*
15 * Patient_dimension
16 *
17 * Note we include both the pat_id and the MRN from identity_id.
18 * Both are added to patient_mapping in epic_i2b2_load.sql.
19 */
20
21/*
22#2896 : add email adress as a fact in Heron
23updates are made to patient dimension view, p.email_address is used to indicate the presence of email address
24*/
25
26/* Test data for multiple races (#698)
27
28insert into CLARITY.patient_race (pat_id, line, patient_race_c)
29select cpr.pat_id, 2, 6
30from clarity.patient_race cpr
31where pat_id in ('Z22', 'Z6');
32 */
33
34/* As per HIPAA, the age reported in deidentified data should be capped in
35de-identified data.  Since researchers are likely to be more interested in the
36clinical facts happening near the end of life rather than the beginning, we
37shift the birthdate forward as necessary to limit the age.  birth_date and
38age_in_years_num contain the real birth date and age.  birth_date_hipaa and
39age_in_years_num_hipaa contain the adjusted information.  The _hipaa columns are
40used in the de-identified database.
41*/
42create or replace view patient_birth_vital as
43with
44age_vital_raw as (
45  select p.*,
46         case
47           when pat_status_c = '1' then 'n' -- 1 is alive
48           when pat_status_c = '2' and death_date is not null then 'y' -- 2 is Deceased
49           else '@'
50         end as VITAL_STATUS_CD,
51         case when pat_status_c = 2 and death_date is not null
52            and birth_date < death_date  -- prune occasional nonsense
53         then death_date
54         else sysdate end last_alive_date
55from
56clarity.patient p
57),
58
59age_vital_2 as (
60select months_between(last_alive_date, birth_date) / 12 f_age_in_years_num,
61       r.*
62from age_vital_raw r)
63
64select case when f_age_in_years_num > hcon.deid_age_cap_years
65         then add_months(birth_date, months_between(last_alive_date, birth_date) - (deid_age_cap_years * 12))
66         else birth_date end birth_date_hipaa,
67       case when f_age_in_years_num > hcon.deid_age_cap_years
68         then hcon.deid_age_cap_years else floor(f_age_in_years_num) end age_in_years_num_hipaa,
69       floor(f_age_in_years_num) age_in_years_num, v2.*
70from age_vital_2 v2, hipaa_constants hcon
71;
72
73
74create or replace view patient_dimension as
75(Select
76  p.pat_id PATIENT_IDE,
77  ltrim((select min(identity_id)  -- Sigh. dups in our test data.
78         from CLARITY.identity_id cii
79         where cii.pat_id = p.pat_id
80           and cii.identity_type_id=10) -- KUH MRNs from EAD
81      , '0') as MRN,
82  mod(ora_hash(p.pat_id), &&heron_etl_chunks)+1 as part,
83  p.VITAL_STATUS_CD,
84  p.birth_date,
85  p.birth_date_hipaa,   
86  p.death_date,
87  case when zse.abbr is null then '@' else lower(zse.abbr) end sex_cd,
88  p.age_in_years_num,
89  p.age_in_years_num_hipaa,
90  case when l.name is null then '@' else lower(l.name) end language_cd,
91  case
92     when cpr.patient_race_c is NULL then '@'
93     when czpr.abbr is null then 'other'
94     else lower(czpr.abbr)
95  end  race_cd,
96  case
97     when p.ethnic_group_c is NULL then '@'
98     when eg.abbr is null then 'o' -- other; e.g. 100, 200 in ClarityB
99     else lower(eg.abbr)
100  end  ethnicity_cd,
101  case when ms.abbr is null then '@' else lower(ms.abbr) end marital_status_cd,
102  case when re.name is null then '@' else lower(re.name) end religion_cd,
103  p.pat_first_name first_name, p.pat_middle_name middle_name, p.pat_last_name last_name,
104  p.pat_id, p.epic_pat_id, p.ssn,
105  p.add_line_1, p.add_line_2, p.city, p.zip ZIP_CD,
106  p.reg_date,
107  p.home_phone, p.work_phone, p.email_address,
108  case when p.email_address is null then '@' else 'Email Exists' end email_onfile,
109  p.pat_status,
110  case when p.county_c is NULL then '@' else zcc.name end  county_cd,
111  case when p.state_c is NULL then '@' else zcs.name end  state_cd,
112  p.pat_name,
113  p.update_date,
114  /* 3.8 Patient Mapping in CRC Design says:
115           
116     Patient_ide_status gives the status of the patient number in
117     the source system, for example, if it is Active or Inactive
118     or Deleted or Merged.
119         
120     I'm not certain how Active becomes 'A'; I suppose by inspection
121     of the demo data.
122         
123     We want to add a mapping for each MRN
124         
125     We'll treat epic test patients as deleted.
126   */
127  case when etp.ku_mrn is not null then 'D'
128       else 'A'
129  end as status
130  from patient_birth_vital p
131    left join CLARITY.zc_sex zse on p.sex_c = zse.rcpt_mem_sex_c
132    left join CLARITY.zc_language l on p.language_c = l.language_c
133    left join (
134      /* In case of multiple races, prefer low numers, which represent
135         common races, to higher numbers, e.g. Other */
136      select pat_id, min(patient_race_c) patient_race_c from clarity.patient_race
137      group by pat_id
138    ) cpr on cpr.pat_id = p.pat_id
139    left join CLARITY.zc_patient_race czpr on cpr.patient_race_c = czpr.patient_race_c
140    left join CLARITY.zc_ethnic_group eg on p.ethnic_group_c = eg.ethnic_group_c
141    left join CLARITY.zc_marital_status ms on p.marital_status_c = ms.marital_status_c
142    left join CLARITY.zc_county zcc on p.county_c=zcc.county_c
143    left join CLARITY.zc_state zcs on p.state_c=zcs.state_c
144    left join CLARITY.zc_religion re on p.religion_c = re.religion_c
145    left join epic_test_patients etp on p.pat_id = etp.pat_id
146      -- left outer join CLARITY.zc_patient_status ps on p.pat_status_c = ps.patient_status_c
147)
148;
149explain plan for
150select * from patient_dimension;
151SELECT PLAN_TABLE_OUTPUT FROM TABLE(DBMS_XPLAN.DISPLAY());
152
153/*
154 * visit_dimension
155 */
156 
157-- Make sure everybody has at least one visit by fabricating one per patient
158-- based on reg_date.
159create or replace view fab_reg_visit as
160(select
161  'fabricated_for_' || p.pat_id ENCOUNTER_IDE,
162  null EXTERNAL_VISIT_ID,
163  p.pat_id PATIENT_IDE,
164  p.reg_date START_DATE,
165  p.reg_date END_DATE,
166  null hsp_account_id,
167  null mpi_encounter_id,
168  null patient_mrn,
169  mod(ora_hash(p.pat_id), &&heron_etl_chunks)+1 part
170 from clarity.patient p);
171
172
173create or replace view hospital_visit_dimension_view as
174-- !unique on pat_enc_csn_id only
175(select
176  to_char(hsp.hsp_account_id) as hsp_account_id,
177  hsp.pat_id,
178  hsp.hosp_admsn_time as start_date,
179  hsp.hosp_disch_time as end_date,
180  mpi.mpi_id mpi_encounter_id,
181-- !cci.identity_id seems to be more accurate in returning correct MRNs
182  cii.identity_id as patient_mrn,
183  hsp.pat_enc_csn_id,
184  mod(hsp.pat_enc_csn_id, &&heron_etl_chunks)+1 as PART
185from
186  clarity.pat_enc_hsp hsp
187-- we seem to have a few hsp_account_ids with multiple pat_ids!
188left join (
189     select count(distinct hsp.pat_id), hsp.hsp_account_id
190     from clarity.pat_enc_hsp hsp
191     where hsp.hsp_account_id is not null
192     group by hsp.hsp_account_id
193     having count(distinct hsp.pat_id) > 1
194) nonsense on nonsense.hsp_account_id = hsp.hsp_account_id
195left join clarity.hsp_acct_mpi mpi
196  on mpi.hsp_account_id = hsp.hsp_account_id
197-- !the clarity identity table can be used to get a MRN for a patient
198left join clarity.identity_id cii
199  on hsp.pat_id = cii.pat_id
200  and cii.identity_type_id=10 -- identity_id_type: '10' -> 'MRN'
201where nonsense.hsp_account_id is null
202and cii.identity_id /* aka MRN */ is not null
203);
204
205whenever sqlerror continue;
206truncate table hospital_visit_dimension;
207drop table hospital_visit_dimension;
208whenever sqlerror exit;
209
210-- make the hospital_visit_dimension a table for performance reasons
211-- (the view joins pat_enc_hsp back on itself)
212create table hospital_visit_dimension as
213select * from hospital_visit_dimension_view;
214
215create or replace view unique_hospital_visit_view as
216-- !one per hsp_account_id
217-- !need to filter out duplicate instances of hsp_account_id from the table merge.
218-- !pat_id, mpi_encounter_id, patient_mrn, should all be the same for each
219-- !hsp_account_id, but we want to get the minimum admsn_time and the
220-- !maximum disch_time.
221(select hspvis.*,
222        mod(hspvis.hsp_account_id, &&heron_etl_chunks)+1 as PART from
223 (select hsp_account_id, pat_id, MIN(start_date) start_date,
224         MAX(end_date) end_date,
225         mpi_encounter_id, patient_mrn
226  from hospital_visit_dimension
227  group by hsp_account_id, pat_id, mpi_encounter_id, patient_mrn) hspvis
228);
229
230create or replace view patient_day_visit as
231(select TO_CHAR(contact_date,'YYYYMMDD') || pat_enc.pat_id encounter_ide,
232        trunc(contact_date) start_date,
233        pat_enc.pat_id, pat_enc.pat_enc_csn_id,
234        pat_enc.contact_date as contact_date,
235        mod(ora_hash(pat_enc.pat_id), &&heron_etl_chunks)+1 as PART
236 from clarity.pat_enc
237 where pat_enc.pat_id is not null
238);
239
240whenever sqlerror continue;
241truncate table visit_dimension;
242drop table visit_dimension;
243whenever sqlerror exit;
244
245/*Discharge Disposition Codes for the Visits*/
246
247create or replace view epic_discharge_disposition as
248  select to_char(hsp.pat_enc_csn_id) as encounter_ide    ,
249    hsp.pat_id as patient_ide                            ,
250    ddc.disch_disp_c,
251    'KUMC|DischargeDisposition:' || (case when hsp.disch_disp_c is null
252                                          then '0'
253                                          else to_char(hsp.disch_disp_c)
254                                    end) as concept_cd,
255      case
256      when hsp.hosp_disch_time is null then patenc.contact_date
257      else hsp.hosp_disch_time
258     end start_date,
259    '@' as modifier_cd,
260    hsp.pat_enc_csn_id as instance_num,
261    hsp.hosp_disch_time as end_date,
262    hsp.hosp_disch_time as update_date,
263    mod(hsp.pat_enc_csn_id, &&heron_etl_chunks)+1 as part,
264    fconst.* -- add in default values for provider_id, valtype_cd etc.
265from clarity.pat_enc_hsp hsp
266join clarity.pat_enc patenc  on hsp.pat_enc_csn_id=patenc.pat_enc_csn_id
267left join clarity.zc_disch_disp ddc on hsp.disch_disp_c=ddc.disch_disp_c,
268discrete_fact_constants fconst
269
270where hsp.pat_enc_csn_id is NOT NULL;
Note: See TracBrowser for help on using the repository browser.