Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/epic_demographic_transform.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 11.1 KB
Line 
1/* EPIC to i2b2 Demographic ETL: transform
2
3Copyright (c) 2013 University of Kansas Medical Center
4part of the HERON* open source codebase; see NOTICE file for license details.
5* http://informatics.kumc.edu/work/wiki/HERON
6
7Load the epic demographic details into i2b2.
8
9See i2b2_facts_deid.sql re the use of valtype_cd in de-identification.
10
11*/
12
13/* Check that we're connected to an Epic CLARITY database.
14   */
15select pat_id from CLARITY.patient where 1 = 0;
16
17/* Check for curated data */
18select threshold from zips_near_kumc where 1=0;
19
20/*
21 * Demographic observation facts: first we pivot data,
22 * and then observation_fact_demo view itself.
23 */
24 /* #2896 : add email adress as a fact in Heron*/
25create or replace view patient_demographics_pivot as select * from
26( select patient_ide,
27  'DEM|SEX:'|| sex_cd CONCEPT_CD
28, 1 instance_num
29, case when birth_date is null then sysdate else birth_date end start_date
30, '@' deid
31  from patient_dimension)
32 
33union all
34( select patient_ide
35, schemes.demo_epic_email_scheme CONCEPT_CD
36, 1 instance_num
37, case when birth_date is null then sysdate else birth_date end start_date
38, '@' deid
39  from patient_dimension
40  cross join scheme_labels@deid schemes
41  where email_address is not null) 
42
43union all
44( select patient_ide,
45  'DEM|AGE:' || age_in_years_num_hipaa CONCEPT_CD
46, 1 instance_num
47, case when reg_date is null then sysdate else reg_date end start_date
48, '@' deid
49  from patient_dimension
50  where age_in_years_num_hipaa = age_in_years_num )
51
52union all
53( select patient_ide,
54  'DEM|AGE:' || age_in_years_num_hipaa CONCEPT_CD
55, 1 instance_num
56, case when reg_date is null then sysdate else reg_date end start_date
57, '@d' deid
58  from patient_dimension
59  where age_in_years_num_hipaa < age_in_years_num)
60
61union all
62( select patient_ide,
63  'DEM|AGE:' || AGE_IN_YEARS_NUM CONCEPT_CD
64, 1 instance_num
65, case when reg_date is null then sysdate else reg_date end start_date
66, '@i' deid
67  from patient_dimension
68  where age_in_years_num_hipaa < age_in_years_num)
69
70union all
71( select patient_ide,
72  schemes.demo_language_scheme|| language_cd CONCEPT_CD
73, 1 instance_num
74, case when reg_date is null then sysdate else reg_date end start_date
75, '@' deid
76  from patient_dimension
77  cross join scheme_labels@deid schemes)
78union all
79( select patient_ide,
80  'DEM|MARITAL:'|| marital_status_cd CONCEPT_CD
81, 1 instance_num
82, case when reg_date is null then sysdate else reg_date end start_date
83, '@' deid
84  from patient_dimension)
85union all
86
87/* handle multiple races per patient (#867)
88
89select count(*), pat_id
90from clarity.patient_race cpr
91group by pat_id
92having count(*) > 1;
93-- 44
94*/
95
96select p.pat_id patient_ide
97     , schemes.demo_race_scheme|| case
98         when cpr.patient_race_c is NULL then '@'
99         when czpr.abbr is null then 'other'
100         else lower(czpr.abbr)
101        end  CONCEPT_CD
102     , coalesce(cpr.line, 1) instance_num
103       -- handle null reg_date (335 cases with non-null patient_race_c)
104     , case when p.reg_date is null then sysdate else p.reg_date end start_date
105     , '@' deid
106from CLARITY.patient p
107left join clarity.patient_race cpr
108       on cpr.pat_id = p.pat_id
109left join CLARITY.zc_patient_race czpr on cpr.patient_race_c = czpr.patient_race_c
110cross join scheme_labels@deid schemes
111
112union all
113( select patient_ide,
114  schemes.demo_ethnicity_scheme|| ethnicity_cd CONCEPT_CD
115, 1 instance_num
116, case when reg_date is null then sysdate else reg_date end start_date
117, '@' deid
118  from patient_dimension
119  cross join scheme_labels@deid schemes)
120union all
121( select patient_ide,
122  schemes.demo_religion_scheme|| religion_cd CONCEPT_CD
123, 1 instance_num
124, case when reg_date is null then sysdate else reg_date end start_date
125, '@' deid
126  from patient_dimension
127  cross join scheme_labels@deid schemes)
128  ;
129explain plan for
130select * from patient_demographics_pivot;
131SELECT PLAN_TABLE_OUTPUT FROM TABLE(DBMS_XPLAN.DISPLAY());
132
133
134create or replace view patient_dem_geo_pivot as select * from
135( select p.pat_id patient_ide,
136  schemes.demo_state_scheme || zst.abbr CONCEPT_CD
137, case when reg_date is null then sysdate else reg_date end start_date
138, '@' deid
139  from clarity.patient p
140  join clarity.zc_state zst on p.state_c = zst.state_c
141  cross join scheme_labels@deid schemes)
142
143union all
144( select p.pat_id,
145  'DEM|ZIPCODE:' || p.zip CONCEPT_CD
146, case when reg_date is null then sysdate else reg_date end start_date
147, '@i' deid
148  from clarity.patient p where p.zip is not null)
149
150/* 5, 10, etc. mi from KUMC */
151union all
152( select p.pat_id,
153  'DEM|GEO|KUMC:' || z.threshold || 'mi' CONCEPT_CD
154, case when reg_date is null then sysdate else reg_date end start_date
155, '@' deid
156  from clarity.patient p
157  join zips_near_kumc z
158    on substr(p.zip, 1, 5) = z.forepoint)
159
160/* Nearby school districts. */
161union all
162( select p.pat_id,
163  'DEM|SCHOOL_DISTRICT:' || sd.concept_name as concept_cd
164, case when reg_date is null then sysdate else reg_date end start_date
165, '@' deid
166  from clarity.patient p
167  join school_districts sd
168    on substr(p.zip, 1, 5) = sd.zipcode
169)
170;
171
172
173/*
174todo?
175    left outer join CLARITY.zc_country zc on p.country_c = zc.country_c
176    left outer join CLARITY.ZC_STATE zs on p.state_c = zs.state_c
177    left outer join CLARITY.zc_county zcc on p.county_c = zcc.county_c
178*/
179
180
181/* Age at visit
182*/
183create or replace view observation_fact_age_at_visit as
184select to_char(enc.pat_enc_csn_id) encounter_ide,
185  enc.pat_id patient_ide,
186  'DEM|AGEATV:' || (
187    case realm.deid
188      when '@' then enc.delta_yrs
189      when '@i' then enc.delta_yrs
190      else
191        case when enc.delta_yrs > hcon.deid_age_cap_years then hcon.deid_age_cap_years else enc.delta_yrs end
192    end) CONCEPT_CD,
193  '@' PROVIDER_ID,
194  enc.contact_date start_date,
195  '@' modifier_cd,
196  pat_enc_csn_id as instance_num,
197  realm.deid valtype_cd, null tval_char, to_number(null) nval_num, null valueflag_cd, null units_cd,
198  enc.contact_date end_date,
199  null location_cd,
200  to_number(null) confidence_num,
201  enc.update_date update_date,
202  mod(enc.pat_enc_csn_id, &&heron_etl_chunks) + 1 as part
203from hipaa_constants hcon, (
204 select pe.pat_enc_csn_id, pe.contact_date, pe.update_date,
205        pat.pat_id, pat.birth_date,
206        floor(months_between(pe.contact_date, pat.birth_date) / 12) delta_yrs
207 from clarity.pat_enc pe
208 join clarity.patient pat
209   on pe.pat_id = pat.pat_id
210) enc join
211(select * from (select '@' deid from dual
212 union all
213 select '@i' from dual
214 union all
215 select '@d' from dual), hipaa_constants) realm
216 on ((enc.delta_yrs <= realm.deid_age_cap_years and realm.deid = '@')
217     or enc.delta_yrs > realm.deid_age_cap_years and realm.deid != '@')
218where delta_yrs is not null
219;
220
221/* eyeball it:
222select p.pat_id, p.birth_date, f.start_date, f.concept_cd, p.age_in_years_num
223from observation_fact_age_at_visit f
224join patient_dimension p
225  on f.patient_ide = p.pat_id
226  order by f.start_date;
227*/
228
229
230-- select count(*) from most_recent_visit
231
232create or replace view observation_fact_demo as select
233  fv.encounter_ide,
234  pd.patient_ide,
235  pivot.CONCEPT_CD,
236  '@' PROVIDER_ID,
237  pivot.start_date,
238  '@' modifier_cd,
239  pivot.instance_num,
240  pivot.deid valtype_cd,
241  null tval_char, to_number(null) nval_num, null valueflag_cd, null units_cd,
242  pivot.start_date end_date,
243  null location_cd,
244  to_number(null) confidence_num,
245  pd.update_date update_date,
246  pd.part
247from patient_dimension pd
248 join patient_demographics_pivot pivot on pd.patient_ide = pivot.patient_ide
249 join fab_reg_visit fv on fv.patient_ide = pivot.patient_ide
250;
251
252
253create or replace view observation_fact_geo as select
254  fv.encounter_ide,
255  pd.patient_ide,
256  pivot.CONCEPT_CD,
257  '@' PROVIDER_ID,
258  pivot.start_date,
259  '@' modifier_cd,
260  1 instance_num,
261  pivot.deid valtype_cd,
262  null tval_char, to_number(null) nval_num, null valueflag_cd, null units_cd,
263  pivot.start_date end_date,
264  null location_cd,
265  to_number(null) confidence_num,
266  pd.update_date update_date,
267  pd.part
268from patient_dimension pd
269 join patient_dem_geo_pivot pivot on pd.patient_ide = pivot.patient_ide
270 join fab_reg_visit fv on fv.patient_ide = pivot.patient_ide
271;
272explain plan for
273select * from observation_fact_geo;
274SELECT PLAN_TABLE_OUTPUT FROM TABLE(DBMS_XPLAN.DISPLAY());
275
276-- select * from clarity.zc_patient_status;
277
278-- Make sure very few records show dead patients without an actual death date.
279
280/*
281There were 57 records in the production patient_dimension as of 2012.12.12 that
282indicated the patient was deceased but had NULL for the death date.  The number
283was up to 95 in data from 2013.07, but jumped to 546 a month later in data from
2842013.08. 
285
286We contacted the hospital reporting team and found that the sudden increase is a
287known issue and under investigation.
288
289For now, comment out the test below.  See #2289.
290
291We won't load vital facts for patients marked as deceased but without a death
292date.  See code below that creates the observation_fact_vital view.
293
294with
295total as(
296  select count(*) cnt from patient_dimension pd
297  ),
298missing as(
299  select
300    count(*) cnt
301  from
302    patient_dimension pd
303  where
304    pd.death_date is null and pd.VITAL_STATUS_CD = 'y'
305  ),
306percent_missing as(
307  select
308    round(100 * (missing.cnt / total.cnt), 2) prcnt
309  from
310    missing, total
311  )
312select case when prcnt > 0.1 then 1/0 else 1 end unknown_death_date from percent_missing;
313*/
314
315create or replace view observation_fact_vital as select
316  fv.encounter_ide,
317  pd.patient_ide,
318  'DEM|VITAL:' || VITAL_STATUS_CD as concept_cd,
319  '@' PROVIDER_ID,
320  -- Patient is alive or unknown, use sysdate.  Otherwise, use death date.
321  case
322    when pd.VITAL_STATUS_CD in ('n', '@') then sysdate
323    when pd.death_date is not null then pd.death_date
324  end as start_date,
325  '@' modifier_cd, 1 instance_num,
326  '@' valtype_cd, null tval_char, to_number(null) nval_num, null valueflag_cd, null units_cd,
327  case when pd.death_date is null then sysdate
328  else pd.death_date
329  end as end_date,
330  null location_cd,
331  to_number(null) confidence_num,
332  pd.update_date update_date,
333  pd.part
334from patient_dimension pd
335 join fab_reg_visit fv on fv.patient_ide = pd.patient_ide
336where not (pd.death_date is null and pd.VITAL_STATUS_CD = 'y')
337;
338
339
340/* Here's how we made some death date test data based on columns that we use:
341
342insert into clarity.patient (
343  pat_id
344, pat_status_c, pat_status
345, birth_date, death_date
346, sex_c, language_c, ethnic_group_c, marital_status_c, religion_c
347, pat_first_name, pat_middle_name, pat_last_name
348, epic_pat_id
349, ssn
350, add_line_1, add_line_2, city, zip
351, reg_date
352, home_phone, work_phone, email_address
353, update_date)
354select pat_id
355, 2, 'Deceased'
356, death_date - age_in_days as birth_date
357, death_date
358, 1, null, null, null, null
359, first_name, null, 'BMITEST'
360, pat_id
361, null
362, addr_1, null, 'Nowhereville', '66160'
363, death_date - 3 as reg_date
364, null, null, null
365, sysdate as update_date
366from (select
367    'Z' || round(dbms_random.value*10000000, 0) as pat_id
368  , sysdate - dbms_random.value*1000 as death_date
369  , 365 * (40 + dbms_random.value*40) as age_in_days
370  , dbms_random.string('A', 6+dbms_random.value*5) as first_name
371  , dbms_random.string('A', 6+dbms_random.value*20) as addr_1
372  from (
373    Select Rownum item
374    From dual
375    Connect By Rownum <= 20));
376
377select pat_status_c, pat_status, count(*)
378from clarity.patient p
379group by pat_status_c, pat_status
380order by 2 desc;
381
382select * from clarity.zc_patient_status ps;
383*/
Note: See TracBrowser for help on using the repository browser.