Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/epic_notes_transform.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 10.7 KB
Line 
1/* EPIC to 12b2 ETL: Notes transform
2
3Copyright (c) 2012 University of Kansas Medical Center
4part of the HERON* open source codebase; see NOTICE file for license details.
5* http://informatics.kumc.edu/work/wiki/HERON
6
7For an overview, see http://informatics.kumc.edu/work/wiki/HeronLoad
8
9References Clarity Notes Training Materials: "CLR212 Clarity Data Model - ASAP"
10Available in Galaxy: https://galaxy.epic.com/?#Browse/page=8100!44!230!875196
11
12*/
13
14/* Check that we're connected to an Epic CLARITY database.
15 */
16select pat_id from CLARITY.patient where 1 = 0;
17
18/* Filter clarity.hno_note_text to only the note rows we care about (latest) */
19create or replace view latest_note_text as
20select * from
21 (select note_id, line, note_csn_id, contact_date, contact_date_real
22      , max(contact_date_real) over (partition by note_id, line) last_contact_date_real
23      , max(note_csn_id) over (partition by note_id, line, contact_date_real) last_note_csn_id
24      , note_text
25  from clarity.hno_note_text)
26where contact_date_real = last_contact_date_real
27and note_csn_id = last_note_csn_id;
28
29/* Join all of the notes tables together and further filter notes (remove one
30   that have been deleted) */
31create or replace view notes_and_text as
32select note.ip_note_type_c
33     , note.note_id
34     , note.pat_enc_csn_id
35     , coalesce(last_note.line, 1) line
36     , last_note.note_text
37     , coalesce(note.create_instant_dttm, last_note.contact_date) start_date
38from clarity.hno_info note
39left join latest_note_text last_note
40       on note.note_id = last_note.note_id
41-- only include notes that have not had the deleted status
42where not exists (select * from clarity.note_enc_info ninfo
43                  where note.note_id = ninfo.note_id
44                  and ninfo.note_status_c = 4)
45-- ip_note_type_c is part of the concept_cd so it can't be null
46  and note.ip_note_type_c is not null
47-- try to find a reasonable start date, otherwise ignore the fact
48  and coalesce(note.create_instant_dttm, last_note.contact_date) is not null;
49
50
51create or replace view observation_fact_notes as
52-- Put note text in id notes
53with id_notes as
54(select nat.*, 'Ti' as valtype_cd, coalesce(nat.note_text, '@') as tval_char
55 from notes_and_text nat),
56
57-- Strip note text from deid notes (temporary. eventually join with deid table)
58deid_notes as
59(select nat.*, 'Td' as valtype_cd, '@' as tval_char
60 from notes_and_text nat
61 -- TODO join with DEID-ed notes to and supply deid note as tval_char
62 )
63 
64select to_char(hsp.pat_id) patient_ide
65     , to_char(hsp.pat_enc_csn_id) encounter_ide
66     , schemes.note_type_scheme || notes.ip_note_type_c concept_cd
67     , '@' provider_id
68     , '@' modifier_cd
69     -- `select max(note_id) from clarity.hno_info` -> 99999999 (on prod)
70     -- `select max(line) from clarity.hno_note_text` -> 116 (on prod)
71     /* Decoding `valtype_cd` to differentiate id notes and deid notes in the
72      * instance_num.  Both get inserted into `observation_fact_upload` at the
73      * same time and `valtype_cd` is not part of the primary key.
74      */
75     , (notes.note_id * 10000 + notes.line) * 10
76        + decode(notes.valtype_cd, 'Ti', 1,
77                                   'Td', 2,
78                                         3) instance_num
79     , notes.valtype_cd valtype_cd
80     , notes.tval_char tval_char
81     , to_number(null) nval_num
82     , null valueflag_cd
83     , null units_cd
84     , null location_cd
85     , to_number(null) confidence_num
86     , to_date(null) update_date
87     , notes.start_date start_date
88     , hsp.hosp_disch_time end_date
89     , mod(hsp.pat_enc_csn_id, &&heron_etl_chunks)+1 as part
90 from CLARITY.pat_enc_hsp hsp
91 cross join scheme_labels@DEID schemes
92 join (select * from id_notes union all
93       select * from deid_notes) notes
94   on notes.pat_enc_csn_id = hsp.pat_enc_csn_id;
95
96
97--Load all EPIC concept observations documented in context of a note
98create or replace view observation_fact_notes_txt as
99
100with base_notes as
101(
102  select hsp.pat_id
103       , hsp.pat_enc_csn_id
104       , conc.concept_id
105       , conc.name
106       , conc.data_type_c
107       , csed.hlv_id
108       , ninfo.entry_instant_dttm
109       , csed.cur_value_datetime
110       , csev.smrtdta_elem_value
111  from clarity.smrtdta_elem_data csed
112  join clarity.hno_info note on note.note_id = csed.record_id_numeric
113  join clarity.note_enc_info ninfo on ninfo.note_id = note.note_id
114                                   and to_char(note.create_instant_dttm , 'DD-MON-YY') =
115                                       to_char(ninfo.entry_instant_dttm , 'DD-MON-YY')
116                                 --and note.create_instant_dttm = ninfo.entry_instant_dttm | TODO find out why this is commented out
117                                   and ninfo.note_status_c in (2,7) --signed and finalized
118  join clarity.clarity_concept conc on conc.concept_id = csed.element_id
119                                    and csed.context_name = 'NOTE'
120                                    and substr(conc.concept_id, 1, 3) in ('EPI','MED','KU#','UKP') --filtered concepts
121  join clarity.pat_enc hsp on hsp.pat_enc_csn_id = note.pat_enc_csn_id --includes all ambulatory visits
122  join clarity.smrtdta_elem_value csev on csev.hlv_id = csed.hlv_id
123),
124
125
126-- There are currently 3 basic processing patterns for smart notes: Standard, Number, and Value
127-- These patterns are keyed to magic strings 'Standard', 'Number', and 'Value, and
128-- are used in creation of the temporary views standard_notes, number_notes, value_notes
129type_instruct as
130(
131   select 1 as type, 'Standard' as instruction from dual union all
132   select 2 as type, 'Number' as instruction from dual union all --Process 'Number' Type as a Number
133   select 3 as type, 'Standard' as instruction from dual union all
134   select 4 as type, 'Standard' as instruction from dual union all
135   select 5 as type, 'Value' as instruction from dual union all --Process 'Category' Type as a Value
136   select 6 as type, 'Value' as instruction from dual union all --Process 'Boolean' Type as a Value
137   select 7 as type, 'Standard' as instruction from dual union all
138   select 8 as type, 'Value' as instruction from dual union all --Process 'Element ID' Type as a Value
139   select 9 as type, 'Standard' as instruction from dual union all
140   select 10 as type, 'Standard' as instruction from dual union all
141   select 11 as type, 'Standard' as instruction from dual union all
142   select 12 as type, 'Standard' as instruction from dual
143),
144
145base_notes_with_instructions as
146(
147   select bn.*, ti.instruction
148   from base_notes bn
149   join type_instruct ti on bn.data_type_c = ti.type
150),
151
152num_fill as
153(
154  select '@' as valtype_cd
155       , '@' as tval_char
156       , to_number(null) as nval_num
157  from dual
158),
159
160--Standard notes require no special processing and use the clarity_concept.concept_id as a the concept_cd
161standard_notes as
162(
163   select 'KUMC|SMRT|N:'|| bmwi.concept_id as concept_cd
164        , bmwi.*
165        , nf.*
166   from base_notes_with_instructions  bmwi, num_fill nf
167   where bmwi.instruction = 'Standard'
168),
169
170-- Number notes generate the concept_cd the same as Standard Notes but use the smrtdta_elem_value as their nval_num
171number_notes as
172(
173   select 'KUMC|SMRT|N:'|| bmwi.concept_id as concept_cd
174        , bmwi.*
175        , 'N' as valtype_cd
176        , 'E' as tval_char
177        , (case when regexp_like (smrtdta_elem_value, '^\d*\.?(\d)+$')
178                then to_number(smrtdta_elem_value)
179                else to_number(null)
180          end) as nval_num
181   from base_notes_with_instructions  bmwi, num_fill nf
182   where bmwi.instruction = 'Number'
183),
184
185-- Value notes have a special concept_cd which is concatenation of the concept_cd with smrtdta_elem_value.
186-- Note it is possible that smrtdta_elem_value could be hashed to save space.
187value_notes as
188(
189   select 'KUMC|SMRT|N:'|| bmwi.concept_id||(case
190                                                when trim(smrtdta_elem_value) is not null --make sure the smrtdta_elem_value is not
191                                                                                          --null or an empty string
192                                                                                         
193                                                then '$'||(case when bmwi.data_type_c = 5 --category type facts need to have
194                                                                                          --their smrtdta_elem_value hashed
195                                                                                          --to fit the 50 char space requirement
196                                                                then to_char(ora_hash(smrtdta_elem_value))
197                                                                else smrtdta_elem_value
198                                                           end)
199                                                else ''
200                                                end) as concept_cd
201        , bmwi.*
202        , nf.*
203   from base_notes_with_instructions  bmwi, num_fill nf
204   where bmwi.instruction = 'Value'
205     and not (bmwi.data_type_c = 6 and trim(bmwi.smrtdta_elem_value) not in ('0', '1')) -- only keep boolean facts that are '0' or '1'
206),
207
208
209notes_collection as
210(
211  select * from standard_notes union all
212  select * from number_notes union all
213  select * from value_notes
214)
215
216select distinct
217     to_char(pat_id) patient_ide
218   , to_char(pat_enc_csn_id) encounter_ide
219   , concept_cd
220   , '@' provider_id
221   , '@' modifier_cd
222   , to_char(hlv_id) instance_num
223   , valtype_cd
224   , tval_char
225   , nval_num
226   , null valueflag_cd
227   , null units_cd
228   , null location_cd 
229   , to_number(null) confidence_num
230   , to_date(null) update_date
231   , entry_instant_dttm start_date
232   , cur_value_datetime end_date
233   , mod(hlv_id, &&heron_etl_chunks)+1 as part
234from notes_collection
235;
236
237
238------------------------------------------------
239--==TEST 'smrtdta_elem_value' HASH COLLISION==--
240------------------------------------------------
241create or replace view ccc_collision as
242select distinct
243     ccc.concept_id as concept_id,
244     to_char(ora_hash(csev.smrtdta_elem_value)) as data_value,
245     csev.smrtdta_elem_value as raw_data_value
246 
247 from clarity.clarity_concept ccc
248 join clarity.smrtdta_elem_data csed on ccc.concept_id = csed.element_id
249 join clarity.smrtdta_elem_value csev on csev.hlv_id = csed.hlv_id
250                                   and csev.line = 1
251                                   and trim(csev.smrtdta_elem_value) is not null
252 where ccc.data_type_c = 5
253 order by concept_id;
254
255select 1/0 as collision_detected
256from ccc_collision cc1
257join ccc_collision cc2 on cc1.concept_id = cc2.concept_id          --same concept_id
258                      and cc1.data_value = cc2.data_value          --same data_value (hash of smrtdta_elem_value)
259                      and cc1.raw_data_value != cc2.raw_data_value --different raw_ data_value (raw of smrtdta_elem_value
260;
261
262--600 seconds for test on Clarity1
Note: See TracBrowser for help on using the repository browser.