Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/epic_flowsheets_multiselect.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 6.5 KB
Line 
1/* epic_flowsheets_multiselect.sql -- make summary tables for multiselect
2
3Copyright (c) 2012 University of Kansas Medical Center
4part of the HERON* open source codebase; see NOTICE file for license details.
5* http://informatics.kumc.edu/work/wiki/HERON
6
7see also: epic_flowsheets_transform.sql
8
9 */
10
11/***************************
12 * multiselectflows -- for value_type_name 'Custom List' with multi select 'Y'
13
14select * from flowsheet_measurement where multi_sel_yn = 1 and value_type_name = 'Custom List';
15
16The interesting ones are the ones with multiple values.
17Values are separated by semicolons.
18
19select * from flowsheet_measurement
20where multi_sel_yn = 1
21and value_type_name = 'Custom List'
22and meas_value like '%;%';
23*/
24
25/* First, just grab all the distinct multi-select values.
26 * While we're at it, count them and record the time range of use.
27 */
28
29whenever sqlerror continue;
30truncate table flow_measure_values;
31drop table flow_measure_values;
32whenever sqlerror exit;
33
34create global temporary table flow_measure_values
35on commit preserve rows as
36select count(*) qty
37     , flo_meas_id
38     , meas_value
39     , min(recorded_time) min_t
40     , max(recorded_time) max_t
41from flowsheet_measurement
42where multi_sel_yn = 1
43  and value_type_name = 'Custom List'
44group by flo_meas_id, meas_value;
45-- select count(*) from flow_measure_values;
46-- 248484
47-- select * from flow_measure_values
48
49
50/*
51Next, using REGEXP_SUBSTR's occurrence argument,
52make one row with a;b;c into 3 rows with
53  a;b;c a
54  a;b;c b
55  a;b;c c
56
57First, we need a list of 1...max(choices_in_a_meas_value)
58cf. http://www.orafaq.com/wiki/Oracle_Row_Generator_Techniques and
59http://stackoverflow.com/questions/2847226/sql-to-generate-a-list-of-numbers-from-1-to-100
60*/
61create or replace view enumerate_items as
62with choice_counts as
63   ( -- strip all but ; and add 1 to the length
64     -- e.g. 'aaa;b;ccc' => ';;' => 3
65     -- use nvl for the case of 'aaa' => null => 1
66   select nvl(length(regexp_replace(meas_value, '[^;]+', '')), 0) + 1 choices
67   from flow_measure_values)
68Select Rownum item
69From dual
70Connect By Rownum <= (select max(choices) from choice_counts);
71-- select * from enumerate_items;
72
73
74/* Tweak BLD data to have 'x; X' case.
75select flo_meas_id
76from flowsheet_measurement fm
77where fm.multi_sel_yn = 1;
78
79301310
80
81then dup record, change line and value. */
82
83
84/*
85Keep table flow_measure_choices around for building the concept hierarchy?
86Hmm... as long as there are no hash collisions, we don't need to.
87*/
88
89create or replace view flow_measure_choices as
90select fmvc.*, ora_hash(choice_label) choice_hash from (
91  select fmv.qty
92       , fmv.flo_meas_id
93       , fmv.meas_value
94       , fmv.min_t
95       , fmv.max_t
96       , trim(regexp_substr(fmv.meas_value, '[^;]+', 1, ei.item)) choice_label
97  from flow_measure_values fmv, enumerate_items ei
98) fmvc where fmvc.choice_label is not null;
99-- select * from flow_measure_choices;
100
101
102/* check for collisions: if the first count is > 1, we have a problem.
103
104with hash_check as (
105  select distinct flo_meas_id, choice_label, choice_hash
106  from flow_measure_choices )
107select count(*), flo_meas_id, choice_hash
108from hash_check
109group by flo_meas_id, choice_hash
110order by 1 desc;
111*/
112
113
114/* browse around the results
115select fmc.flo_meas_id
116     , ifgd.flo_meas_name
117     , fmc.choice_label
118     , ifcl.line
119     , ifcl.abbreviation
120     , ifcl.custom_value
121     , sum(qty)
122     , min(min_t)
123     , max(max_t)
124from flow_measure_choices fmc
125  left outer join CLARITY.ip_flo_cust_list ifcl
126    on fmc.flo_meas_id = ifcl.flo_meas_id
127    and (fmc.choice_label) = ifcl.abbreviation
128      or fmc.choice_label) = ifcl.custom_value)
129  join CLARITY.ip_flo_gp_data ifgd
130    on fmc.flo_meas_id = ifgd.flo_meas_id
131group by fmc.flo_meas_id, ifgd.flo_meas_name, fmc.choice_label, ifcl.line, ifcl.abbreviation, ifcl.custom_value
132order by 1, 2;
133
134select sum(qty)
135from flow_measure_choices fmc
136  left outer join CLARITY.ip_flo_cust_list ifcl
137    on fmc.flo_meas_id = ifcl.flo_meas_id
138    and (fmc.choice = ifcl.abbreviation
139      or fmc.choice = ifcl.custom_value)
140where ifcl.line is null;
141--4,540,985 out of 83,051,340; ~5%
142
143 */
144
145
146/*
147 * We assume choices that differ only by
148 * case aren't used in the same measurement.
149
150select count(*), flo_meas_id, meas_value, choice_hash, choice_label
151from flow_measure_choices
152group by flo_meas_id, meas_value, choice_hash, choice_label
153order by 1 desc;
154
155 */
156
157
158
159/* Now join the choices with ip_flo_cust_list.
160 * A temporary table won't suffice here since we're
161 * using the data in other sessions.
162
163we assume none of these:
164*/
165select case count(*) when 0 then 1 else 1/0 end as test_result from (
166select * from CLARITY.ip_flo_cust_list ifcl
167where ifcl.abbreviation is not null and ifcl.custom_value is null
168);
169
170
171truncate table flow_measure_multi;
172insert into flow_measure_multi
173select distinct meas_value
174              , fmc.flo_meas_id
175              , case
176                  when ifcl.line is null then choice_label
177                  else ifcl.custom_value
178                end choice
179              , case
180                  when ifcl.line is null then choice_hash
181                  else null
182                end choice_hash
183              , ifcl.line
184from flow_measure_choices fmc
185 left outer join CLARITY.ip_flo_cust_list ifcl
186  on fmc.flo_meas_id = ifcl.flo_meas_id
187  and (fmc.choice_label = ifcl.abbreviation
188    or fmc.choice_label = ifcl.custom_value);
189-- select * from flow_measure_multi;
190-- select count(*) from flow_measure_multi;
191
192
193/*
194Check uniqueness of key material:
195
196>1 here is bad news: */
197
198select case when qty = 1 then 1 else 0 end as test_result from (
199select count(*) as qty, meas_value, flo_meas_id, line
200from flow_measure_multi
201where line is not null
202group by meas_value, flo_meas_id, line
203order by 1 desc
204) where rownum = 1;
205
206/*earlier problem:
207select *
208from flow_measure_multi
209where flo_meas_id='1982'
210and meas_value='yes;Bed;Y';
211
212*/
213
214
215/*
216Any results from this query show meas_value data with no corresponding
217ip_flo_cust_list record; evidently ip_flo_cust_list gets updated
218without maintatining consistency with old meas_value data.
219
220This query shows the unmatched choice along with *all* of the
221ip_flo_cust_list values for that flo_meas_id.
222
223select ifgd.flo_meas_name
224     , choice
225     , fmm.flo_meas_id
226     , ifcl.abbreviation
227     , ifcl.custom_value
228     , fmm.meas_value
229from flow_measure_multi fmm
230 join CLARITY.ip_flo_gp_data ifgd
231   on fmm.flo_meas_id = ifgd.flo_meas_id
232 join CLARITY.ip_flo_cust_list ifcl
233   on fmm.flo_meas_id = ifcl.flo_meas_id
234where fmm.line is null;
235
236select count(*)
237from flow_measure_multi fmm
238where fmm.line is null;
239-- 60296 out of 946744; about 6%
240*/
Note: See TracBrowser for help on using the repository browser.