Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/i2b2_facts_deid.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 5.6 KB
Line 
1/** i2b2_facts_deid.sql -- de-identify i2b2 observation facts
2
3Copyright (c) 2012 University of Kansas Medical Center
4part of the HERON* open source codebase; see NOTICE file for license details.
5* http://informatics.kumc.edu/work/wiki/HERON
6
7Note the use of valtype_cd in de-identification:
8 * _i - only for identified DB
9 * _d - only for de-identified DB
10 * _ - for both DBs
11
12per :upload_id , per :part.
13 
14*/
15
16alter session set NLS_DATE_FORMAT = 'YYYY-MM-DD HH24:MI';
17
18/* Check for id repository, uploader service tables, blueheron link */
19select * from NightHeronData.observation_fact where 1 = 0;
20select * from NightHeronData.upload_status where 1 = 0;
21select concept_cd from BlueHeronData.observation_fact@deid where 1 = 0;
22
23
24insert into NightHeronData.observation_fact (
25    encounter_num, sub_encounter, patient_num, concept_cd, provider_id, start_date
26  , modifier_cd, instance_num, valtype_cd, tval_char, nval_num, valueflag_cd, units_cd
27  , end_date, location_cd, confidence_num
28  , update_date, download_date, import_date, sourcesystem_cd, upload_id
29)
30select
31    f.encounter_num, f.sub_encounter, f.patient_num
32  , f.concept_cd
33  , f.provider_id
34  , f.start_date
35  , f.modifier_cd, f.instance_num
36  , substr(f.valtype_cd, 1, 1) valtype_cd
37  , f.tval_char
38  , f.nval_num, f.valueflag_cd, f.units_cd
39  , f.end_date
40  , f.location_cd, f.confidence_num
41  , f.update_date, f.download_date
42  , SYSDATE as import_date
43  , f.sourcesystem_cd, f.upload_id
44from observation_fact_upload f
45where
46  /* 'd' as the second character in the valtype_cd indicates that the row should
47  only be inserted into the de-identified fact table (BlueHeron) not the identified
48  fact table (NightHeron).
49  */
50  (f.valtype_cd not like '_d' or f.valtype_cd is null)
51  and
52  mod(f.encounter_num, &&heron_etl_chunks)+1 = :part
53;
54
55
56/* Create (if not there) the instance num mapping table,
57 * its deid sequence populator, and add an index
58 */
59whenever sqlerror continue;
60
61create sequence nightherondata.instance_num_id
62--! To avoid false positives in epic_verify_deid_med_order.sql resulting
63--! from the instance_num incrementing to valid ranges of PHI based numbers,
64--! we start with the maximum allowable value for instance num (18#'s)
65--! and decrement.
66--! Digits 123456789012345678 <- 18
67  maxvalue 999999999999999999
68start with 999999999999999999 increment by -1;
69
70create table NightHeronData.instance_mapping
71  (id_instance_num integer not null,
72   deid_instance_num integer not null);
73alter table NightHeronData.instance_mapping
74  add constraint instance_mapping_pk primary key(id_instance_num);
75
76whenever sqlerror exit;
77
78
79/* Find the instance nums in the fact table that
80 * are not in the mapping table and add the mapping
81 */
82insert into NightHeronData.instance_mapping (
83  id_instance_num
84, deid_instance_num)
85
86with missing_instance_nums as
87(
88  select distinct f.instance_num
89  from observation_fact_upload f
90  minus
91  select im.id_instance_num
92  from NightHeronData.instance_mapping im
93)
94select instance_num, NightHeronData.instance_num_id.nextval
95from missing_instance_nums
96where &&add_to_instance_mapping;
97
98
99/* Make a de-id upload_status record, copying some fields from id to de-id. */
100delete from BlueHeronData.upload_status@deid
101where upload_id=:upload_id;
102insert into BlueHeronData.upload_status@deid (
103  upload_id
104, upload_label
105, user_id
106, source_cd
107, load_date
108, message
109, transform_name)
110select upload_id
111     , upload_label
112     -- I guess this doesn't work across links;
113     -- it raises ORA-02070
114     -- , (select to_char(username) from user_users) as user_id
115     -- so we'll just assume deid user = ETL user
116     , user_id
117     , source_cd
118     , sysdate as load_date
119     , message
120     , transform_name
121from NightHeronData.upload_status up
122where up.upload_id=:upload_id;
123
124
125/* Shift start/end dates, dates in values */
126insert into BlueHeronData.observation_fact@deid (
127    patient_num, encounter_num, sub_encounter, concept_cd, provider_id, start_date
128  , modifier_cd, instance_num, valtype_cd, tval_char, nval_num, valueflag_cd, units_cd
129  , end_date, location_cd, confidence_num
130  , update_date, download_date, import_date, sourcesystem_cd, upload_id
131)
132select
133    f.patient_num, f.encounter_num, ora_hash(f.sub_encounter)
134  , f.concept_cd
135  , f.provider_id
136  -- date-shift the observation
137  , f.start_date + pdim.date_shift as start_date
138  , f.modifier_cd
139  , &&instance_num --im.deid_instance_num
140  , substr(f.valtype_cd, 1, 1) valtype_cd
141  , case
142      when valtype_cd in ('D', 'Dd')
143      then case
144        when regexp_like(tval_char, '[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]')
145        then to_char(to_date(f.tval_char,'yyyy-mm-dd hh24:mi:ss') + pdim.date_shift,
146                     'yyyy-mm-dd hh24:mi:ss')
147        else null end
148      else f.tval_char
149    end as tval_char
150  , f.nval_num, f.valueflag_cd, f.units_cd
151  , f.end_date + pdim.date_shift as end_date
152  , f.location_cd, f.confidence_num
153  , f.update_date + pdim.date_shift as update_date
154  , f.download_date
155  , SYSDATE as import_date
156  , f.sourcesystem_cd, f.upload_id
157from observation_fact_upload f
158  join NightHeronData.patient_dimension pdim
159    on pdim.patient_num = f.patient_num
160  &&instance_mapping_join
161  --join NightHeronData.instance_mapping im on im.id_instance_num = f.instance_num
162where
163  /* 'i' as the second character in the valtype_cd indicates that the row should
164  only be inserted into the identified fact table (NightHeron) not the de-identified
165  fact table (BlueHeron).
166  */
167  (f.valtype_cd not like '_i' or f.valtype_cd is null)
168  and
169  mod(f.encounter_num, &&heron_etl_chunks)+1 = :part
170;
171
172
173/* Record end date.
174 */
175update BlueHeronData.upload_status@deid
176set end_date = sysdate, load_status='OK'
177where upload_id=:upload_id;
Note: See TracBrowser for help on using the repository browser.