Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/uhc_i2b2_transform.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 18.9 KB
Line 
1/* UHC to 12b2 ETL part I: transform   
2
3Copyright (c) 2012 University of Kansas Medical Center
4part of the HERON* open source codebase; see NOTICE file for license details.
5* http://informatics.kumc.edu/work/wiki/HERON
6
7Information on how UHC data is organized came from documents downloaded from
8https://www.uhc.edu/.  Specifically, "CDBRM_Downloads_File_Specification.xls"
9which was downloaded and provided as an ETL reference by by the KU Hospital
10Organizational Improvement team.
11*/
12
13 -- Test that we're in the KUMC sid.
14 select patientid from uhc.encounter where 1=0;
15 
16 
17 whenever sqlerror continue; -- in case the indexes already exist
18/* do we need to create any index on UHC data?*/
19 create index uhc.encounter_patientid
20   on uhc.encounter (patientid);
21
22create unique index uhc.encounter_recordid
23 on uhc.encounter (recordid);
24
25 whenever sqlerror exit;
26 
27 /*******************
28 Patient_dimension
29  *******************/
30 /* Table ddl ensures that it does not  have null PatientIDs if this is not needed in transform -delete it*/
31 
32 create or replace view patient_dimension
33 as select distinct enc.patientid as patid
34   , mod(ora_hash(enc.patientId), &&heron_etl_chunks)+1 as part
35 from uhc.encounter enc
36 ;
37 
38 create or replace view uhc_visit_dimension as
39select enc.encounterid
40     , 'A' as encounter_ide_status
41     , enc.patientid
42     ,enc.admissiondate START_DATE
43     , enc.dischargedate end_date
44from uhc.encounter enc
45;
46   
47 /* ICD9CM diagnosis code (decimal assumed after third position, except for E codes betwe 4th and 5th) */
48 create or replace view uhc_observation_fact_dx as select
49  en.encounterid, en.patientid,
50  Case
51  when length(diag.icd9cmdiagcode) > 3
52  then
53  Case
54  when  substr(diag.icd9cmdiagcode, 1, 1) = 'E' then
55   'ICD9:' || substr(diag.icd9cmdiagcode, 1, 4) || '.' || substr(diag.icd9cmdiagcode, 5 )
56  else
57   'ICD9:' || substr(diag.icd9cmdiagcode, 1, 3) || '.' || substr(diag.icd9cmdiagcode, 4 )
58  end
59  else
60   'ICD9:' || diag.icd9cmdiagcode
61  end concept_cd,
62 '@' provider_id, -- TODO: better way?
63 en.admissiondate START_DATE,
64 mods.modifier_cd,
65  -- recordid seems to be 1-1 with encounterid, which is already in the key
66  diag.sequencenumber instance_num,
67  '@' valtype_cd, '@' tval_char, to_number(null) nval_num, null valueflag_cd, null units_cd,
68  en.dischargedate end_date, -- TODO: better way?
69  null location_cd,
70  to_number(null) confidence_num,
71  to_date(null) update_date,
72  mod(en.encounterid, &&heron_etl_chunks)+1 as part
73from UHC.encounter en, UHC.diagnosis diag,
74(select 'DiagObs:UHC_DIAGNOSIS' as modifier_cd from dual
75 union all
76 select 'DiagObs:Primary' as modifier_cd from dual
77 ) mods
78where diag.recordid = en.recordid
79and (
80  mods.modifier_cd = 'DiagObs:UHC_DIAGNOSIS'
81  or
82  (mods.modifier_cd = 'DiagObs:Primary'
83   and diag.sequencenumber = 1)
84  );
85
86-- eyeball it:
87-- select * from uhc_observation_fact_dx order by encounterid, instance_num;
88
89 /*  ICD9CM procedure code (decimal assumed after second position) */
90 create or replace view uhc_observation_fact_proc as select
91  en.encounterid, en.patientid,
92 
93   'ICD9:' || substr(proc.icd9cmproccode, 1, 2) || '.' || substr(proc.icd9cmproccode, 3 ) concept_cd,
94  '@' provider_id, -- TODO: better way?
95 en.admissiondate START_DATE,
96 'DiagObs:UHC_PROCEDURE' modifier_cd,
97 -- assume sequencenumber < 1000 (measured at 63)
98 en.recordid * 1000 + proc.sequencenumber instance_num,
99  '@' valtype_cd, '@' tval_char, to_number(null) nval_num, null valueflag_cd, null units_cd,
100  en.dischargedate end_date, -- TODO: better way?
101  null location_cd,
102  to_number(null) confidence_num,
103  to_date(null) update_date,
104  mod(en.encounterid, &&heron_etl_chunks)+1 as part
105from UHC.encounter en, UHC.procedure proc
106where proc.recordid =en.recordid
107order by proc.recordid, proc.sequencenumber;
108
109/*  core measures */
110 create or replace view uhc_observation_fact_core as select
111  en.encounterid,
112  en.patientid,
113  'UHC|COREMEASURE|'|| case when core.measureid is null
114                            then core.code else cmc.code end
115                    || ':'  || core.category  as concept_cd,
116  '@' provider_id,
117 en.admissiondate START_DATE,
118 '@' modifier_cd,
119 en.recordid instance_num,
120  '@' valtype_cd, '@' tval_char, to_number(null) nval_num, null valueflag_cd, null units_cd,
121  en.dischargedate end_date,
122  null location_cd,
123  to_number(null) confidence_num,
124  to_date(null) update_date,
125  mod(en.encounterid, &&heron_etl_chunks)+1 as part
126from UHC.encounter en, UHC.coremeasures core
127left join uhc.core_measurescode cmc on core.measureid=cmc.measureid
128where core.recordid =en.recordid
129order by core.recordid;
130
131/*  Expected LOS */
132 create or replace view uhc_observation_fact_expected as
133 select
134 distinct
135 en.encounterid,
136  en.patientid,
137  'UHC|EXPECTEDLOS:'|| exp.risktypecode as concept_cd ,
138  '@' provider_id,
139 en.admissiondate START_DATE,
140 '@'  modifier_cd,
141 en.recordid instance_num,
142 'N' valtype_cd ,
143 'E' tval_char,
144   exp.expectedlos nval_num,
145  null valueflag_cd,
146  null units_cd,
147  en.dischargedate end_date,
148  null location_cd,
149  to_number(null) confidence_num,
150  to_date(null) update_date,
151  mod(en.encounterid, &&heron_etl_chunks)+1 as part
152from UHC.encounter en, UHC.expectedvalues exp
153where exp.recordid =en.recordid
154;
155
156/* physician specialty and role */
157 create or replace view uhc_observation_fact_spec as
158 select
159 distinct
160  en.encounterid,
161  en.patientid,
162  'UHC|PHYSSPECCODE:'|| spec.otherphysicianspecialty  as concept_cd,
163  '@' provider_id,
164 en.admissiondate START_DATE,
165 '@' modifier_cd,
166 en.recordid instance_num,
167  '@' valtype_cd, '@' tval_char, to_number(null) nval_num, null valueflag_cd, null units_cd,
168  en.dischargedate end_date,
169  null location_cd,
170  to_number(null) confidence_num,
171  to_date(null) update_date,
172  mod(en.encounterid, &&heron_etl_chunks)+1 as part
173from UHC.encounter en, UHC.otherphysician spec
174where spec.recordid =en.recordid
175UNION all
176select
177distinct
178  en.encounterid,
179  en.patientid,
180  'UHC|PHYSROLE:'|| spec.otherphysicianrole  as concept_cd,
181  '@' provider_id,
182 en.admissiondate START_DATE,
183 '@' modifier_cd,
184 en.recordid instance_num,
185  '@' valtype_cd, '@' tval_char, to_number(null) nval_num, null valueflag_cd, null units_cd,
186  en.dischargedate end_date,
187  null location_cd,
188  to_number(null) confidence_num,
189  to_date(null) update_date,
190  mod(en.encounterid, &&heron_etl_chunks)+1 as part
191from UHC.encounter en, UHC.otherphysician spec
192where spec.recordid =en.recordid
193;
194
195
196 /*********************
197   observation_fact_uhc
198  *********************/
199create or replace view observation_fact_uhc as
200
201with
202en as (
203select gend.code gender_code
204     , race.code race_code
205     , uhcemsdrg.code uhcemsdrg_code
206     , en.*
207from UHC.encounter en
208left outer join UHC.enc_sex gend
209on gend.code = en.sex
210left outer join UHC.enc_race race
211on race.code = to_number(en.race)
212left outer join UHC.enc_uhcmsdrg uhcemsdrg
213on to_number(uhcemsdrg.code) = to_number(en.uhcmsdrg)
214),
215
216uhc_encounter_facts as (
217select en.patientid
218     , en.encounterid
219     , case
220       when a.scheme is null then a.concept_cd
221       else a.scheme ||
222         decode(a.scheme,
223                'UHC|Gender:', to_char(gender_code),
224                'UHC|Race:', to_char(race_code),
225                'UHC|ServiceLine:', en.uhcserviceline,
226                'UHC|BaseMSDRG:', en.basemsdrg,
227                'UHC|DISCHSTCODE:', en.dischargestatuscode,
228                'UHC|ADMSTCODE:', en.admissionstatuscode,
229                'UHC|MSDRGMDC:', to_char(en.msdrgmdc),
230                'UHC|APRPRODLINE:', to_char(en.aprproductline),
231                'UHC|APRDRG:', en.aprdrg,
232                'UHC|PRIMARYPAYER:',en.UHCPRIMARYPAYER,
233                'UHC|SECONDARYPAYER:',en.UHCSECONDARYPAYER,
234                'UHC|ADMDAY:',               
235           -- odd... why do the mapping here rather than in concepts?
236           decode(en.admitdayofweek,
237                  1, 'SUNDAY',
238                  2, 'MONDAY',
239                  3, 'TUESDAY',
240                  4, 'WEDNESDAY',
241                  5, 'THURSDAY',
242                  6, 'FRIDAY',
243                  7, 'SATURDAY'),
244                'UHC|UHCMSDRG:', uhcemsdrg_code,
245                'UHC|APRSOI:', en.aprseverityofillness,
246                'UHC|APRROM:', en.admitaprriskofmortality
247               
248                )
249       end concept_cd
250     , '@' modifier_cd
251     , en.recordid instance_num
252     , a.valtype_cd
253     , case when a.valtype_cd = 'N' then 'E' else '@' end tval_char
254     , case
255       when a.valtype_cd = '@' then to_number(null)
256       when a.concept_cd = 'UHC|LOS:'|| '1' then en.los
257       when a.concept_cd = 'UHC|ICUDAYS:'|| '1' then en.icudays
258       end nval_num
259     , en.admissiondate
260     , en.dischargedate
261     , en.dischargedate end_date
262from en, (
263  select 'UHC|Gender:' as scheme, null as concept_cd, '@' as valtype_cd from dual
264  union all
265  select 'UHC|Race:' as scheme, null, '@' from dual
266  union all
267  select 'UHC|ServiceLine:', null, '@' from dual
268  union all
269  select 'UHC|BaseMSDRG:', null, '@' from dual
270  union all
271  -- od... why use || between two string constants?
272  select null, 'UHC|LOS:'|| '1', 'N' from dual
273  union all
274  select null, 'UHC|ICUDAYS:'|| '1', 'N' from dual
275  union all
276  select 'UHC|DISCHSTCODE:', null, '@' from dual
277  union all
278  select 'UHC|ADMSTCODE:', null, '@' from dual
279  union all
280  select 'UHC|MSDRGMDC:', null, '@' from dual
281  union all
282  select 'UHC|APRPRODLINE:', null, '@' from dual
283  union all
284  select 'UHC|APRDRG:', null, '@' from dual
285  union all
286  select 'UHC|ADMDAY:', null, '@' from dual
287  union all
288  select 'UHC|UHCMSDRG:', null, '@' from dual
289  union all
290  select 'UHC|APRSOI:', null, '@' from dual
291  union all
292  select 'UHC|APRROM:', null, '@' from dual
293  union all
294  select 'UHC|PRIMARYPAYER:', null, '@' from dual
295  union all
296  select 'UHC|SECONDARYPAYER:', null, '@' from dual
297  ) a
298),
299
300
301uhc_procedure_facts as (
302select en.patientid
303     , en.encounterid
304           , 'UHC|CCSICD9CMPROC:'|| proc.ccsicd9cmproccode concept_cd
305     , '@' modifier_cd
306     , en.recordid * 100000 + proc.sequencenumber instance_num
307     , '@' valtype_cd
308     , '@' tval_char
309     , to_number(null) nval_num
310     , en.admissiondate
311     , en.dischargedate
312     , en.dischargedate end_date
313from UHC.encounter en, UHC.procedure proc,
314UHC.proc_ccs_code ccs
315where proc.recordid = en.recordid and
316ccs.code = proc.ccsicd9cmproccode
317),
318
319uhc_all_facts as (
320select * from uhc_encounter_facts
321union all
322select * from uhc_procedure_facts
323)
324
325select uhc_all_facts.patientid
326     , uhc_all_facts.encounterid
327           , uhc_all_facts.concept_cd
328     , '@' provider_id
329     , uhc_all_facts.modifier_cd
330     , uhc_all_facts.instance_num
331     , uhc_all_facts.valtype_cd
332     , uhc_all_facts.tval_char
333     , uhc_all_facts.nval_num
334     , null valueflag_cd
335     , null units_cd
336     , null location_cd 
337     , to_number(null) confidence_num
338     , to_date(null) update_date
339     /* For discharge resulting in death, use the discharge date rather than the
340     admission date (ticket #1695).
341     */
342     , case when uhc_all_facts.concept_cd in ('UHC|DISCHSTCODE:20', 'UHC|DISCHSTCODE:41') then uhc_all_facts.dischargedate
343     else uhc_all_facts.admissiondate end start_date
344     , uhc_all_facts.end_date
345     ,mod(uhc_all_facts.encounterid, &&heron_etl_chunks)+1 as part
346from uhc_all_facts
347--! don't insert facts ICU days when the patient is recorded as having 0 days in the ICU
348where not (uhc_all_facts.concept_cd = 'UHC|ICUDAYS:'|| '1' and
349           uhc_all_facts.nval_num = 0)
350
351-- could perhaps factor out confidence_num etc. from below,
352-- but since the code is working, I'm leaving it as is for now -- Dan C.
353
354UNION all
355select en.patientid
356      ,en.encounterid
357          , 'UHC|READMISSION:'|| '1' concept_cd
358      , '@' provider_id
359      , '@' modifier_cd
360       , en.recordid instance_num
361      , 'N' valtype_cd
362      , 'E' tval_char
363     , readmission.daystoreadmit nval_num
364      , null valueflag_cd
365      , null units_cd
366      , null location_cd 
367      , to_number(null) confidence_num
368      , to_date(null) update_date
369       ,en.admissiondate start_date
370      ,en.dischargedate end_date
371      ,mod(en.encounterid, &&heron_etl_chunks)+1 as part
372from UHC.encounter en, UHC.readmission readmission
373where readmission.recordid = en.recordid
374UNION all
375select en.patientid
376      ,en.encounterid
377          , 'UHC|RISKPOOL:'|| riskpool.riskpoolcode concept_cd
378      , '@' provider_id
379      , '@' modifier_cd
380       , en.recordid instance_num
381      , '@' valtype_cd
382      , '@' tval_char
383     , riskpool.complicationcount nval_num
384      , null valueflag_cd
385      , null units_cd
386      , null location_cd 
387      , to_number(null) confidence_num
388      , to_date(null) update_date
389       ,en.admissiondate start_date
390      ,en.dischargedate end_date
391       ,mod(en.encounterid, &&heron_etl_chunks)+1 as part
392from UHC.encounter en, UHC.riskpool riskpool
393where riskpool.recordid = en.recordid
394union all
395select en.patientid
396      ,en.encounterid
397          , 'UHC|ADMSRC:'|| admsrc.id concept_cd
398      , '@' provider_id
399      , '@' modifier_cd
400      , en.recordid instance_num
401      , '@' valtype_cd
402      , '@' tval_char
403     , to_number(null) nval_num
404      , null valueflag_cd
405      , null units_cd
406      , null location_cd 
407      , to_number(null) confidence_num
408      , to_date(null) update_date
409       ,en.admissiondate start_date
410      ,en.dischargedate end_date
411       ,mod(en.encounterid, &&heron_etl_chunks)+1 as part
412from UHC.encounter en
413join UHC.enc_adm_src_po_origin admsrc
414on en.admsrponumberoforigin = rtrim(admsrc.code)
415WHERE en.admissionstatuscode = 4 --for new borns
416and admsrc.id>9
417union all
418select en.patientid
419      ,en.encounterid
420          , 'UHC|ADMSRC:'|| admsrc.id concept_cd
421      , '@' provider_id
422      , '@' modifier_cd
423      , en.recordid instance_num
424      , '@' valtype_cd
425      , '@' tval_char
426     , to_number(null) nval_num
427      , null valueflag_cd
428      , null units_cd
429      , null location_cd 
430      , to_number(null) confidence_num
431      , to_date(null) update_date
432       ,en.admissiondate start_date
433      ,en.dischargedate end_date
434       ,mod(en.encounterid, &&heron_etl_chunks)+1 as part
435from UHC.encounter en
436join UHC.enc_adm_src_po_origin admsrc
437on en.admsrponumberoforigin = rtrim(admsrc.code)
438WHERE en.admissionstatuscode != 4 --for all others
439and admsrc.id not in(13,14,15,16,23,24)
440UNION all
441select en.patientid
442      ,en.encounterid
443          , 'UHC|CCSICD9DIAG:'|| diag.ccsicd9cmdiagcode concept_cd
444      , '@' provider_id
445      , '@' modifier_cd
446       , en.recordid * 10000 + diag.sequencenumber instance_num
447      , '@' valtype_cd
448      , '@' tval_char
449     , to_number(null) nval_num
450      , null valueflag_cd
451      , null units_cd
452      , null location_cd 
453      , to_number(null) confidence_num
454      , to_date(null) update_date
455       ,en.admissiondate start_date
456      ,en.dischargedate end_date
457       ,mod(en.encounterid, &&heron_etl_chunks)+1 as part
458from UHC.encounter en, UHC.diagnosis diag,UHC.diag_icd9_ccs ccs
459where diag.recordid = en.recordid
460and diag.ccsicd9cmdiagcode = ccs.code
461UNION all
462select en.patientid
463      ,en.encounterid
464          , 'UHC|AHRQQUALITY:'|| ahrequality.ahrqqualitycode || '|'  || ahrequality.numeratordenominatorflag  as concept_cd
465      , '@' provider_id
466      , '@' modifier_cd
467       , en.recordid instance_num
468     , '@' valtype_cd
469      , '@' tval_char
470     , to_number(null) nval_num 
471      , null valueflag_cd
472      , null units_cd
473      , null location_cd 
474      , to_number(null) confidence_num
475      , to_date(null) update_date
476       ,en.admissiondate start_date
477      ,en.dischargedate end_date
478       ,mod(en.encounterid, &&heron_etl_chunks)+1 as part
479from UHC.encounter en, UHC.ahrqquality ahrequality
480where ahrequality.recordid = en.recordid
481UNION all
482select en.patientid
483      ,en.encounterid
484          , 'UHC|AHRQSAFETY:'|| ahrqsafety.ahrqsafetycode || '|'  || ahrqsafety.numeratordenominatorflag  as concept_cd 
485      , '@' provider_id
486      , '@' modifier_cd
487       , en.recordid instance_num
488      , '@' valtype_cd
489      , '@' tval_char
490     , to_number(null) nval_num 
491      , null valueflag_cd
492      , null units_cd
493      , null location_cd 
494      , to_number(null) confidence_num
495      , to_date(null) update_date
496       ,en.admissiondate start_date
497      ,en.dischargedate end_date
498      ,mod(en.encounterid, &&heron_etl_chunks)+1 as part
499from UHC.encounter en, UHC.ahrqsafety ahrqsafety
500where ahrqsafety.recordid = en.recordid
501UNION all
502select en.patientid
503      ,en.encounterid
504          , 'UHC|AHRQPEDIATRIC:'|| ahrqpediatric.ahrqpediatriccode || '|'  || ahrqpediatric.numeratordenominatorflag  as concept_cd
505      , '@' provider_id
506      , '@' modifier_cd
507       , en.recordid instance_num
508      , '@' valtype_cd
509      , '@' tval_char
510      , to_number(null) nval_num 
511      ,null valueflag_cd
512      , null units_cd
513      , null location_cd 
514      , to_number(null) confidence_num
515      , to_date(null) update_date
516       ,en.admissiondate start_date
517      ,en.dischargedate end_date
518       ,mod(en.encounterid, &&heron_etl_chunks)+1 as part
519from UHC.encounter en, UHC.ahrqpediatric ahrqpediatric
520where ahrqpediatric.recordid = en.recordid
521UNION all
522select en.patientid
523      ,en.encounterid
524          , 'UHC|COMORBIDITY:'|| comor.comorbiditycode concept_cd
525      , '@' provider_id
526      , '@' modifier_cd
527       , en.recordid instance_num
528      , '@' valtype_cd
529      , '@' tval_char
530     , to_number(null) nval_num
531      , null valueflag_cd
532      , null units_cd
533      , null location_cd 
534      , to_number(null) confidence_num
535      , to_date(null) update_date
536       ,en.admissiondate start_date
537      ,en.dischargedate end_date
538      ,mod(en.encounterid, &&heron_etl_chunks)+1 as part
539from UHC.encounter en, UHC.comorbidity comor
540where comor.recordid = en.recordid
541UNION all
542select en.patientid
543      ,en.encounterid
544          , 'UHC|COMPLICATION:'|| compl.complicationcode concept_cd
545      , '@' provider_id
546      , '@' modifier_cd
547       , en.recordid instance_num
548      , '@' valtype_cd
549      , '@' tval_char
550     , to_number(null) nval_num
551      , null valueflag_cd
552      , null units_cd
553      , null location_cd 
554      , to_number(null) confidence_num
555      , to_date(null) update_date
556       ,en.admissiondate start_date
557      ,en.dischargedate end_date
558       ,mod(en.encounterid, &&heron_etl_chunks)+1 as part
559from UHC.encounter en, UHC.complication compl
560where compl.recordid = en.recordid
561;
562/* Make sure these join conditions are true in at least some cases.
563   (regression test) */
564select case qty when 0 then 1/0 else 1 end as some_uhc_facts
565from (
566select count(*) qty
567    from observation_fact_uhc
568);
Note: See TracBrowser for help on using the repository browser.