Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/epic_diag_tx.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 9.4 KB
Line 
1/* epic_diag_tx -- transform Epic diagnoses into I2B2 observation_fact shape
2
3Copyright (c) 2015 University of Kansas Medical Center
4part of the HERON* open source codebase; see NOTICE file for license details.
5* http://informatics.kumc.edu/work/wiki/HERON
6
7Ref:
8
9  * Clarity Data Dictionary - Epic 2012
10    Catalog (Last updated: 07/14/12)
11    https://galaxy.epic.com/Search/GetFile?url=7900!44!100!1770229
12
13*/
14
15/* Check that we're connected to an Epic CLARITY database with edg. */
16select pat_id from CLARITY.patient where 1 = 0;
17select dx_id from clarity.clarity_edg where 1 = 0;
18
19create or replace view observation_fact_enc_dx as
20with
21
22enc_dx_mods as (
23select 'DiagObs:PAT_ENC_DX' modifier_cd from dual
24union all
25select 'DiagObs:PRIMARY_DX_YN' from dual
26),
27
28enc_dx as (
29select
30  ped.pat_enc_csn_id, ped.pat_id,
31  -- To avoid synonyms, use ICD9 codes directly when applicable.
32  -- c.f.
33  labels.dx_id_scheme || ped.dx_id as concept_cd,
34  ped.contact_date start_date,
35  enc_dx_mods.modifier_cd,
36  -- keyed by pat_enc_csn_id, line
37  -- using ora_hash(), we can put this into the instance_num
38  -- Probability of hash collision would be 10^-6 with 93 of the same diagnoses
39  -- in the same financial encounter on the same date (if the events were independent).
40  ora_hash(to_char(ped.pat_enc_csn_id) || ',' || to_char(ped.line)) instance_num,
41  ped.contact_date end_date,
42  ped.update_date update_date
43from CLARITY.Pat_enc_dx ped
44   , scheme_labels@deid labels
45   , enc_dx_mods
46where
47  enc_dx_mods.modifier_cd = 'DiagObs:PAT_ENC_DX'
48  or (
49    enc_dx_mods.modifier_cd = 'DiagObs:PRIMARY_DX_YN'
50    and ped.primary_dx_yn = 'Y'
51  )
52)
53
54select
55  to_char(dx.pat_enc_csn_id) encounter_ide, dx.pat_id patient_ide,
56  dx.concept_cd,
57  dx.START_DATE,
58  dx.modifier_cd,
59  dx.instance_num,
60  dx.end_date,
61  dx.update_date,
62   mod(dx.pat_enc_csn_id, &&heron_etl_chunks)+1 as part,
63   fcon.*
64from enc_dx dx,
65discrete_fact_constants fcon
66;
67
68
69/** observation_fact_pl_dx
70
71Problem_ept_csn seems to be missing a little over 7% of the time. :-/
72
73with pl_stats as (
74select count(*) qty
75from clarity.problem_list),
76pl_missing_enc as (
77select count(*) qty from clarity.problem_list
78where problem_ept_csn is null)
79
80select pl_stats.qty tot, pl_missing_enc.qty missing
81     , round(pl_missing_enc.qty / pl_stats.qty * 100, 4) pct_missing
82from pl_stats, pl_missing_enc
83;
84*/
85
86
87create or replace view observation_fact_pl_dx as
88
89with
90problem_mods as (
91select null scheme, 'DiagObs:PROBLEM_LIST' modifier_cd from dual
92 union all select 'PROBLEM_STATUS_C:', null from dual
93 union all select null, 'DiagObs:PRINCIPAL_PL_YN' from dual
94 union all select null, 'DiagObs:HOSPITAL_PL_YN' from dual),
95
96pl_dx as (
97select
98  coalesce(to_char(pl.problem_ept_csn), 'fabricated_for_' || pl.pat_id) encounter_ide
99, pl.pat_id
100, labels.dx_id_scheme || pl.dx_id as concept_cd
101, coalesce(pl.noted_date, pl.date_of_entry) start_date
102, coalesce(tm.modifier_cd,
103           tm.scheme ||
104           decode(tm.scheme,
105                  'PROBLEM_STATUS_C:', pl.problem_status_c)) modifier_cd
106, pl.problem_list_id instance_num
107, coalesce(pl.resolved_date, pl.noted_date, pl.date_of_entry) end_date
108, pl.update_date update_date
109from clarity.problem_list pl, problem_mods tm, scheme_labels@deid labels
110where
111  tm.modifier_cd = 'DiagObs:PROBLEM_LIST'
112  or ( tm.scheme = 'PROBLEM_STATUS_C:' and pl.problem_status_c is not null )
113  or (tm.modifier_cd = 'DiagObs:PRINCIPAL_PL_YN' and pl.PRINCIPAL_PL_YN = 'Y')
114  or (tm.modifier_cd = 'DiagObs:HOSPITAL_PL_YN' and pl.HOSPITAL_PL_YN = 'Y')
115)
116
117select
118  dx.encounter_ide, dx.pat_id patient_ide,
119  dx.concept_cd,
120  dx.START_DATE,
121  dx.modifier_cd,
122  dx.instance_num,
123  dx.end_date,
124  dx.update_date,
125   mod(ora_hash(dx.encounter_ide), &&heron_etl_chunks)+1 as part,
126   fcon.*
127from pl_dx dx,
128discrete_fact_constants fcon
129;
130
131
132/* Billing diagnosis - #3082
133See also GPC ticket #90 "Diagnoses Modifiers for data attribution"
134*/
135create or replace view billing_diag_strs as
136select
137  'DX|BILL:ADMIT NONPRIMARY' admit_nonprimary
138  , 'DX|BILL:ADMIT PRIMARY' admit_primary
139  , 'DX|BILL:DC PRIMARY' discharge_primary
140  , 'DX|BILL:DC NONPRIMARY' discharge_nonprimary
141  , 'DX|BILL:POA' discharge_poa -- Present on admission
142  , 'DX|PROF:NONPRIMARY' professional_nonprimary
143  , 'DX|PROF:PRIMARY' professional_primary
144from dual
145;
146
147/* Test that max line is <= 99 since we put line in the last two digits of the
148instance_num for uniqueness (as it appears there may be duplicate non-primary
149diagnosis records).
150*/
151
152insert into etl_test_values (test_value, test_domain, test_name, result_id, result_date)
153with test_key as (
154    select 'Diagnoses' test_domain,
155    'line_not_exceed_max' test_name from dual
156  ),
157max_line as (
158  select max(line) test_value from (
159    select line from clarity.hsp_acct_admit_dx
160    union all
161    select line from clarity.hsp_acct_dx_list
162    )
163  )
164select test_value, test_key.*, sq_result_id.nextval, sysdate
165from test_key, max_line;
166commit;
167
168create or replace view hsp_encounter as
169select distinct
170  /* clarity.pat_enc_hsp sometimes has several contacts on the same day with the
171  same hsp_account_id.  Also, there are often many different discharge and
172  contact times.  Somewhat arbitrarily pick the max of each admission, discharge,
173  contact times per account id.
174  */
175  hsp_account_id, pat_id,
176  max(coalesce(hosp_admsn_time, contact_date)) over (partition by hsp_account_id, pat_id) start_date,
177  max(coalesce(hosp_disch_time, contact_date)) over (partition by hsp_account_id, pat_id) end_date,
178  max(contact_date) over(partition by hsp_account_id, pat_id) update_date
179from clarity.pat_enc_hsp
180;
181
182create or replace view epic_billing_diag_admit as
183select
184  to_char(admit.hsp_account_id) encounter_ide, to_char(henc.pat_id) patient_ide,
185  scheme_labels.dx_id_scheme || admit.admit_dx_id concept_cd,
186  henc.start_date,
187  case when admit.line = 1 then strs.admit_primary else strs.admit_nonprimary end modifier_cd,
188  (admit.hsp_account_id * 100) + admit.line instance_num,
189  henc.end_date,
190  henc.update_date,
191  mod(ora_hash(admit.hsp_account_id), &&heron_etl_chunks) + 1 part,
192  fconst.*
193from 
194  clarity.hsp_acct_admit_dx admit
195join hsp_encounter henc on henc.hsp_account_id = admit.hsp_account_id
196cross join discrete_fact_constants fconst
197cross join scheme_labels@deid
198cross join billing_diag_strs strs
199where admit.admit_dx_id is not null
200;
201
202create or replace view discharge_diag as
203select
204  to_char(discharge.hsp_account_id) encounter_ide, to_char(henc.pat_id) patient_ide,
205  scheme_labels.dx_id_scheme || discharge.dx_id concept_cd,
206  henc.start_date,
207  discharge.line, discharge.final_dx_poa_c,
208  (discharge.hsp_account_id * 100) + discharge.line instance_num,
209  henc.end_date,
210  henc.update_date,
211  mod(ora_hash(discharge.hsp_account_id), &&heron_etl_chunks) + 1 part,
212  fconst.*
213from 
214  clarity.hsp_acct_dx_list discharge
215join hsp_encounter henc on henc.hsp_account_id = discharge.hsp_account_id
216cross join discrete_fact_constants fconst
217cross join scheme_labels@deid
218where discharge.dx_id is not null
219;
220
221/* Primary/non-primary modifiers for discharge diag
222*/
223create or replace view epic_billing_diag_discharge as
224select
225  dd.encounter_ide, dd.patient_ide, dd.concept_cd, dd.start_date,
226  case
227    when dd.line = 1 then strs.discharge_primary
228    else strs.discharge_nonprimary
229  end modifier_cd,
230  dd.instance_num,
231  dd.end_date, dd.update_date, dd.part, dd.provider_id, dd.valtype_cd, dd.tval_char,
232  dd.nval_num, dd.valueflag_cd, dd.units_cd, dd.location_cd, dd.confidence_num
233from discharge_diag dd
234cross join billing_diag_strs strs
235
236union all
237
238/* Discharge diagnosis present on admission
239*/
240select
241  dd.encounter_ide, dd.patient_ide, dd.concept_cd, dd.start_date,
242  strs.discharge_poa modifier_cd, dd.instance_num, dd.end_date, dd.update_date,
243  dd.part, dd.provider_id, dd.valtype_cd, dd.tval_char, dd.nval_num,
244  dd.valueflag_cd, dd.units_cd, dd.location_cd, dd.confidence_num
245from discharge_diag dd
246cross join billing_diag_strs strs
247where dd.final_dx_poa_c = 1 -- "Yes" - see clarity.zc_dx_poa
248;
249
250create or replace view epic_billing_diag_professional as
251with enc_dxp as (
252  select distinct dxprof.pat_enc_csn_id, dxprof.primary_dx_id dx_id, strs.professional_primary modifier_cd
253  from  clarity.arpb_transactions dxprof
254  cross join billing_diag_strs strs
255 
256  union all
257 
258  select distinct npdx.pat_enc_csn_id, npdx.dx_id, strs.professional_nonprimary modifier_cd
259  from (
260    select dxprof.pat_enc_csn_id, dxprof.dx_two_id dx_id from clarity.arpb_transactions dxprof
261    union all
262    select dxprof.pat_enc_csn_id, dxprof.dx_three_id dx_id from clarity.arpb_transactions dxprof
263    union all
264    select dxprof.pat_enc_csn_id, dxprof.dx_four_id dx_id from clarity.arpb_transactions dxprof
265    union all
266    select dxprof.pat_enc_csn_id, dxprof.dx_five_id dx_id from clarity.arpb_transactions dxprof
267    union all
268    select dxprof.pat_enc_csn_id, dxprof.dx_six_id dx_id from clarity.arpb_transactions dxprof
269    ) npdx 
270  cross join billing_diag_strs strs
271  )
272select
273  to_char(dxp.pat_enc_csn_id) encounter_ide, to_char(enc.pat_id) patient_ide,
274  scheme_labels.dx_id_scheme || dxp.dx_id concept_cd,
275  coalesce(enc.hosp_admsn_time, enc.contact_date) start_date,
276  dxp.modifier_cd,
277  (dxp.pat_enc_csn_id) instance_num,
278  coalesce(enc.hosp_dischrg_time, enc.contact_date) end_date,
279  enc.update_date,
280  mod(ora_hash(dxp.pat_enc_csn_id), &&heron_etl_chunks) + 1 part,
281  fconst.*
282from 
283  enc_dxp dxp
284join clarity.pat_enc enc on enc.pat_enc_csn_id = dxp.pat_enc_csn_id
285cross join discrete_fact_constants fconst
286cross join scheme_labels@deid
287where dxp.dx_id is not null and dxp.pat_enc_csn_id is not null
288;
Note: See TracBrowser for help on using the repository browser.