Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/flowsheet_concepts_reorg.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 11.2 KB
Line 
1/** flowsheet_concepts_prune.sql -- prune flowsheet concepts by similarity etc.
2
3Copyright (c) 2012 University of Kansas Medical Center
4part of the HERON* open source codebase; see NOTICE file for license details.
5* http://informatics.kumc.edu/work/wiki/HERON
6
7 * See also flowsheet_concepts_prune.sql.
8 */
9
10alter session set NLS_DATE_FORMAT = 'YYYY-MM-DD HH:MI';
11set timing on;
12set echo on;
13select sysdate from dual;
14
15
16/* Enumerate all the un-merged concepts,
17   presuming <35 and non-real-build stuff has already
18   been pruned from flowsheet_pruned_trees
19*/
20create or replace view flowsheets_pruned_root as
21select
22    to_number(null) threshold
23  , 'R' kind
24  , 'FA' c_visualattributes
25  , 1 c_hlevel
26  , to_number(null) tgm_id
27  , null c_basecode
28  , 'Flowsheets (research)' c_name
29  , '\i2b2\Flowsheets Pruned\' c_dimcode
30from dual;
31
32create or replace view flowsheet_thresholds as
33select 0.9 threshold from dual union all
34select 0.5 from dual union all
35select 0.25 from dual union all
36select 0.1 from dual;
37
38create or replace view flowsheets_threshold_buckets as
39select
40    ft.threshold
41  , 'B' kind
42  , 'FA' c_visualattributes
43  , c_hlevel + 1 as c_hlevel
44  , to_number(null) tgm_id
45  , null c_basecode
46  , 'Threshold ' || ft.threshold as c_name
47  , c_dimcode || 'SIM:' || ft.threshold || '\' as c_dimcode
48from flowsheets_pruned_root fpr, flowsheet_thresholds ft;
49
50/* Using separate tables is a little goofy, but oh well... */
51create or replace view group_merges_t as
52select 0.9 threshold,  t.* from group_merges_0_90 t union all
53select 0.5,  t.* from group_merges_0_50 t union all
54select 0.25, t.* from group_merges_0_25 t union all
55select 0.1,  t.* from group_merges_0_10 t;
56
57create or replace view fs_tree_work_t as
58select 0.9 threshold, t.* from fs_tree_work_0_90 t union all
59select 0.5,  t.* from fs_tree_work_0_50 t union all
60select 0.25, t.* from fs_tree_work_0_25 t union all
61select 0.1,  t.* from fs_tree_work_0_10 t;
62
63create or replace view fs_mem_work_t as
64select 0.9 threshold,  t.* from fs_mem_work_0_90 t union all
65select 0.5,  t.* from fs_mem_work_0_50 t union all
66select 0.25, t.* from fs_mem_work_0_25 t union all
67select 0.1,  t.* from fs_mem_work_0_10 t;
68
69
70drop table flowsheet_pruned_trees;
71create table flowsheet_pruned_trees as
72select distinct threshold, kind, tgm_id, c_name
73from fs_tree_work_t
74where kind in ('T', 'G')
75  and pruned=0;
76
77create or replace view flowsheets_template_concepts as
78select
79    ftb.threshold
80  , 'T' kind
81  , 'FA' c_visualattributes
82  , ftb.c_hlevel + 1 as c_hlevel
83  , fpt.tgm_id
84  , null c_basecode
85  , fpt.c_name
86  , c_dimcode || 'T:' || fpt.tgm_id || '\' as c_dimcode
87from flowsheets_threshold_buckets ftb
88     join flowsheet_pruned_trees fpt
89       on fpt.threshold = ftb.threshold
90where fpt.kind = 'T'
91/*order by tgm_id desc, threshold*/;
92
93create or replace view flowsheets_group_concepts as
94select
95    ftc.threshold
96  , 'G' kind
97  , 'FA' c_visualattributes
98  , ftc.c_hlevel + 1 as c_hlevel
99  , fpt.tgm_id
100  , null c_basecode
101  , fpt.c_name
102  , c_dimcode || 'G:' || fpt.tgm_id || '\' as c_dimcode
103from flowsheets_template_concepts ftc
104     join fs_mem_work_t fmw
105       on fmw.whole_kind = 'T'
106         and fmw.threshold = ftc.threshold
107         and fmw.whole_tgm_id = ftc.tgm_id
108     join flowsheet_pruned_trees fpt
109       on fpt.threshold = ftc.threshold
110         and fpt.tgm_id = fmw.part_tgm_id
111where fmw.merged = 0 and fpt.kind = 'G'
112/*order by tgm_id desc, whole_tgm_id desc, threshold*/;
113
114create or replace view flowsheets_measures as
115select distinct
116    'M' kind
117  , ftw.c_visualattributes
118  , ftw.tgm_id
119  , ftw.c_basecode
120  , ftw.c_name
121from flowsheet_tree_work ftw
122where kind='M'
123  and c_basecode not like '%DIASTOLIC';
124
125-- measures directly on templates
126create or replace view flowsheets_tmeasure_concepts as
127select
128    ftc.threshold
129  , fm.kind
130  , fm.c_visualattributes
131  , ftc.c_hlevel + 1 as c_hlevel
132  , fm.tgm_id
133  , fm.c_basecode
134  , fm.c_name
135  , ftc.c_dimcode || 'M:' || fm.tgm_id || '\' as c_dimcode
136from flowsheets_template_concepts ftc
137     join (select distinct whole_tgm_id, part_tgm_id
138           from whole_part
139           where whole_kind = 'T' and part_kind = 'M'
140           and part_c_dimcode not like '%G:%') pw
141       on pw.whole_tgm_id = ftc.tgm_id
142     join flowsheets_measures fm
143       on fm.tgm_id = pw.part_tgm_id;
144
145-- measures in groups
146create or replace view flowsheets_gmeasure_concepts as
147select
148    fgc.threshold
149  , fm.kind
150  , fm.c_visualattributes
151  , fgc.c_hlevel + 1 as c_hlevel
152  , fm.tgm_id
153  , fm.c_basecode
154  , fm.c_name
155  , fgc.c_dimcode || 'M:' || fm.tgm_id || '\' as c_dimcode
156from flowsheets_group_concepts fgc
157     join (select distinct threshold, whole_tgm_id, part_tgm_id
158           from fs_mem_work_t
159           where whole_kind = 'G' and merged=0) fmw
160       on fmw.whole_tgm_id = fgc.tgm_id
161        and fmw.threshold = fgc.threshold
162     join flowsheets_measures fm
163       on fm.tgm_id = fmw.part_tgm_id;
164
165
166
167
168-- choice, blood pressure concepts
169create or replace view flowsheets_mpart_concepts as
170select
171    fmc.threshold
172  , ftw.kind
173  , ftw.c_visualattributes
174  , fmc.c_hlevel + 1 as c_hlevel
175  , ftw.tgm_id
176  , ftw.c_basecode
177  , ftw.c_name
178  , fmc.c_dimcode || 'MX:' || ftw.c_basecode || '\' as c_dimcode
179from (select * from flowsheets_tmeasure_concepts union all
180      select * from flowsheets_gmeasure_concepts) fmc
181     join (select distinct kind, c_visualattributes, tgm_id, c_basecode, c_name
182           from flowsheet_tree_work
183           where pruned = 0 and kind = 'M+' or
184                 kind = 'MS' or
185                 (kind = 'M' and c_basecode like '%DIASTOLIC')) ftw
186       on ftw.tgm_id = fmc.tgm_id;
187
188
189/*
190select * from flowsheets_mpart_concepts
191
192select count(*), threshold from flowsheets_mpart_concepts group by threshold;
193
194-- scratchpad below, slightly different than flowsheet_prune_impact_summary
195select count(*), threshold
196from (select * from flowsheets_tmeasure_concepts union all
197      select * from flowsheets_gmeasure_concepts)
198      group by threshold;
199     
200-- scratchpad below
201select count(*)
202from (select distinct kind, c_visualattributes, tgm_id, c_basecode, c_name
203           from flowsheet_tree_work
204           where kind = 'M+' or
205                 kind = 'MS' or
206                 (kind = 'M' and c_basecode like '%DIASTOLIC'))
207-- 59048
208 */
209 
210drop table flowsheets_concept_hierarchy;
211create table flowsheets_concept_hierarchy as
212select * from (
213  select * from flowsheets_pruned_root union all
214  select * from flowsheets_threshold_buckets union all
215  select * from flowsheets_template_concepts union all
216  select * from flowsheets_group_concepts union all
217  select * from flowsheets_tmeasure_concepts union all
218  select * from flowsheets_gmeasure_concepts union all
219  select * from flowsheets_mpart_concepts
220)
221order by c_dimcode;
222-- select count(*) from flowsheets_concept_hierarchy;
223-- 316615
224
225/*****************
226 * Summarize impact of merging
227 */
228
229create or replace view flowsheet_prune_impact_summary as
230select kinds.label step
231     , orig.qty as "original"
232     , real_build.qty as "Real build"
233     , rem35.qty as "Remove<35"
234     , t9.qty as "G, T >= 0.9"
235     , t5.qty as "G, T >= 0.5"
236     , t25.qty as "G, T >= 0.25"
237     , t1.qty as "G, T >= 0.1"
238from
239 (select 'Total concepts' label, null kind, 1 col from dual union all
240 select 'Templates', 'T', 2 from dual union all
241 select 'Groups', 'G', 3 from dual union all
242 select 'Measures', 'M', 4 from dual union all
243 select 'Measure choices', 'M+', 5 from dual) kinds
244
245join (
246 select kind, count(*) qty
247 from flowsheet_tree_work
248 group by kind
249union all
250 select null, count(*) qty
251 from flowsheet_tree_work) orig
252on orig.kind=kinds.kind
253  or (orig.kind is null and kinds.kind is null)
254
255join (
256 select kind, count(*) qty
257 from flowsheet_tree_work
258 where real_build <> 0
259 group by kind
260union all
261 select null, count(*) qty
262 from flowsheet_tree_work
263  where real_build <> 0) real_build
264on real_build.kind=kinds.kind
265  or (real_build.kind is null and kinds.kind is null)
266
267join (
268 select kind, count(*) qty
269 from flowsheet_tree_work ftw
270 where pruned=0 and real_build <> 0
271 group by kind
272union all
273 select null, count(*) qty
274 from flowsheet_tree_work
275 where pruned=0 and real_build <> 0) rem35
276on rem35.kind=kinds.kind
277  or (rem35.kind is null and kinds.kind is null)
278
279join (
280 select kind, count(*) qty
281 from flowsheets_concept_hierarchy fch
282 where threshold = 0.9
283 group by kind
284union all
285 select null, count(*) qty
286 from flowsheets_concept_hierarchy
287 where threshold = 0.9
288) t9
289on t9.kind=kinds.kind
290  or (t9.kind is null and kinds.kind is null)
291
292join (
293 select kind, count(*) qty
294 from flowsheets_concept_hierarchy fch
295 where threshold = 0.5
296 group by kind
297union all
298 select null, count(*) qty
299 from flowsheets_concept_hierarchy
300 where threshold = 0.5
301) t5
302on t5.kind=kinds.kind
303  or (t5.kind is null and kinds.kind is null)
304
305join (
306 select kind, count(*) qty
307 from flowsheets_concept_hierarchy fch
308 where threshold = 0.25
309 group by kind
310union all
311 select null, count(*) qty
312 from flowsheets_concept_hierarchy
313 where threshold = 0.25
314) t25
315on t25.kind=kinds.kind
316  or (t25.kind is null and kinds.kind is null)
317
318join (
319 select kind, count(*) qty
320 from flowsheets_concept_hierarchy fch
321 where threshold = 0.1
322 group by kind
323union all
324 select null, count(*) qty
325 from flowsheets_concept_hierarchy
326 where threshold = 0.1
327) t1
328on t1.kind=kinds.kind
329  or (t1.kind is null and kinds.kind is null)
330order by col;
331
332select * from flowsheet_prune_impact_summary;
333
334
335/* Why so many tempates at 0.90?
336Ah... real_build.
337Don't forget to re-build both flowsheet_pruned_trees
338and flowsheets_concept_hierarchy
339
340select count(*)
341from fs_tree_work_0_90 fch
342where kind='T';
343
344select *
345from fs_tree_work_0_90 fch
346where kind='T' and tgm_id in
347(select distinct tgm_id
348from flowsheet_tree_work
349where kind='T' and real_build=0);
350
351select *
352from flowsheets_concept_hierarchy fch
353where threshold=0.9 and kind='T' and tgm_id in
354(select distinct tgm_id
355from flowsheet_tree_work
356where kind='T' and real_build=0);
357*/
358
359delete from BlueHeronMetadata.i2b2 where c_fullname like '\i2b2\Flowsheets Pruned\%';
360insert into BlueHeronMetadata.i2b2 (
361  c_hlevel, c_fullname, c_name, c_basecode, c_dimcode,
362  c_visualattributes, /*@@c_metadataxml,*/
363  c_synonym_cd, c_facttablecolumn, c_tablename, c_columnname, c_columndatatype, c_operator,
364  import_date, sourcesystem_cd)
365select c_hlevel
366     , c_dimcode
367     , c_name
368     , c_basecode
369     , c_dimcode
370     , c_visualattributes
371     /*, c_metadataxml*/
372     , i2b2_normal_concept.*
373     , sysdate
374     , aud.source_cd
375from flowsheets_concept_hierarchy, epic_audit_info aud, i2b2_normal_concept;
376-- select * from BlueHeronMetadata.i2b2 where c_fullname like '\i2b2\Flowsheets Pruned\%';
377
378
379
380/* why so many concepts? */
381select whole_kind, threshold, cohort, to_char(avg(siz), '99.99') a, count(*) qty, to_char(avg(siz)*count(*), '999999.99') a_q
382from (
383 select threshold, whole_kind, whole_tgm_id, 'unmerged' cohort, count(*) siz
384 from fs_mem_work_t where merged=0
385 group by threshold, whole_kind, whole_tgm_id, 'unmerged'
386union all
387 select threshold, whole_kind, whole_tgm_id, 'all' cohort, count(*) siz
388 from fs_mem_work_t
389 group by threshold, whole_kind, whole_tgm_id, 'all'
390union all
391 select threshold, whole_kind, whole_tgm_id, 'before' cohort, count(*) siz
392 from fs_mem_work_t where whole_tgm_id < 100000000000
393 group by threshold, whole_kind, whole_tgm_id, 'before'
394) group by whole_kind, threshold, cohort
395order by whole_kind, threshold desc, cohort;
Note: See TracBrowser for help on using the repository browser.