Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/create_concept_shortcuts.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 15.2 KB
Line 
1/* create_concept_shortcuts.sql: creates shortcut concepts form existing concepts
2 *
3 * Copyright (c) 2013 University of Kansas Medical Center
4 * part of the HERON* open source codebase; see NOTICE file for license details.
5 * http://informatics.kumc.edu/work/wiki/HERON
6 *
7 * Shortcut Table Schema
8 * ---------------------
9 * parent - the direct parent (reference "key") of the shortcut concept
10 * key - the key serves two purposes: it is the identifier used to construct
11 *       the hierarchy (along with the "parent") and is the means to identify
12 *       the concept in heron_terms if it is *not* a fabricated concept.
13 * key_type - identifies what column in the concept_dimension the key correlates
14 *            to.  Currently supports: "name" (c_name), "basecode" (c_basecode),
15              and "fullpath" (c_fullpath).
16 * name - the new c_name of the shortcut concept (can be null if you want to
17 *        keep the original c_name).
18 * kind - "Kinds" of shortcut concepts:
19 *       node - an existing concept w/o its sub concepts
20 *       tree - an existing concept w/ ALL of its sub concepts
21 *       fabricated - a brand new hierarchical concept
22 *
23 * Suppose we have a large, unwieldy hierarchy of meds and we
24 * want to make some of them easier to find/use. We want to make Asprin and
25 * Meds2 (a.k.a. Vitamins) easier to use.  So we make a new high-level folder,
26 * Easy-Meds. It has three children, one of which has a further child.
27 *
28 * Shortcut Table Example
29 * ----------------------
30 * |----------------------------------------------------------------|
31 * |   =parent=   |   =key=   | =key_type= |   =name=   |  =kind=   |
32 * |----------------------------------------------------------------|
33 * |     null     |     A     |    name    | Easy Meds  | fabricated|
34 * |----------------------------------------------------------------|
35 * |      A       |     B     |    name    | Pain Meds  | fabricated|
36 * |----------------------------------------------------------------|
37 * |      B       |  Aspirin  |    name    |    null    |   tree    |
38 * |----------------------------------------------------------------|
39 * |      A       |   Meds2   |    name    |  Vitamins  |   tree    |
40 * |----------------------------------------------------------------|
41 * |      A       | MED_ID:12 |  basecode  | Heart Meds |   node    |
42 * |----------------------------------------------------------------|
43 * Generates (in i2b2 land):
44 * |A| Easy Meds
45 *  +---->|B| Pain Meds
46 *  |      +---->|*Aspirin*| Aspirin
47 *  |               +---->|*Aspirin liquid*|
48 *  |               +---->|*Aspirin pill*|
49 *  |               |            +------------->|*Aspirin big pill*|
50 *  |               +---->(etc)
51 *  +---->|*Meds2*| Vitamins
52 *  +---->|*MED_ID:12*| Heart Med
53 *
54 * ("*...*" - Denotes an existing concept in heron terms)
55 *
56 * Substitution Variable List
57 * --------------------------
58 *
59 * The script is parameterized by 4 substitutions:
60 * 1) &&shortcut_table_name - the name of the shortcut origin table
61 *                            (i.e. the name of the curated shortcut table)
62 * 2) &&shortcut_prefix - the prefix path to be attached to the c_fullname of
63 *                        each new shortcut concept.
64 * 3) &&source_cd - the specified origin of the new shortcuts.
65 * 4) &&heron_terms - the table where the original concepts reside.
66 *                    (Also the table where the shortcut terms will be
67 *                     inserted)
68 */
69 
70 
71/*
72create or replace view test_shortcut_table as
73
74select 'Abridged' as key,                   'Cancer Case Identifiers (Abridged)' as name, 'fabricated' as kind, null as parent,                 'name' as key_type from dual union all
75select 'Demographics' as key,               'Demographics' as name,                       'fabricated' as kind, 'Abridged' as parent,           'name' as key_type from dual union all
76select '0160 Race' key,                     null as name,                                 'tree' as kind,       'Demographics' as parent,       'name' as key_type from dual union all
77select '0190 Spanish/Hispanic Origin' key,  null as name,                                 'tree' as kind,       'Demographics' as parent,       'name' as key_type from dual union all
78select '0220 Sex' key,                      null as name,                                 'tree' as kind,       'Demographics' as parent,       'name' as key_type from dual union all
79select '0240 Date of Birth' key,            null as name,                                 'tree' as kind,       'Demographics' as parent,       'name' as key_type from dual union all
80select '0521 Morph' key,                    null as name,                                 'tree' as kind,       'Abridged' as parent,           'name' as key_type from dual union all
81select '0490 Diagnostic Confirmation' key,  null as name,                                 'tree' as kind,       'Abridged' as parent,           'name' as key_type from dual union all
82select '0523 Behavior Code ICD-O-3' key,    null as name,                                 'tree' as kind,       'Abridged' as parent,           'name' as key_type from dual union all
83select '0440 Grade' key,                    null as name,                                 'tree' as kind,       'Abridged' as parent,           'name' as key_type from dual union all
84select '0610 Class of Case' key,            null as name,                                 'node' as kind,       'Abridged' as parent,           'name' as key_type from dual union all
85select 'Analytic' key,                      'Analytic' as name,                           'fabricated' as kind, '0610 Class of Case' as parent, 'name' as key_type from dual union all
86select 'Non-analytic' key,                  'Non-analytic' as name,                       'fabricated' as kind, '0610 Class of Case' as parent, 'name' as key_type from dual union all
87select 'NAACCR|610:20' key,                 null as name,                                 'tree' as kind,       'Analytic' as parent,           'basecode' as key_type from dual union all
88select 'NAACCR|610:32' key,                 null as name,                                 'tree' as kind,       'Non-analytic' as parent,       'basecode' as key_type from dual union all
89select '0390 Date of Diagnosis' key,        null as name,                                 'tree' as kind,       'Abridged' as parent,           'name' as key_type from dual;
90
91undefine shortcut_table_name;
92undefine shortcut_prefix;
93undefine source_cd;
94undefine heron_terms;
95
96create or replace view test_invalid_shortcut_table as
97
98select 'Abridged' as key,                   'Cancer Case Identifiers (Abridged)' as name, 'fabricated' as kind, null as parent,                 'name' as key_type from dual union all
99select 'Demographics' as key,               'Demographics' as name,                       'fabricated' as kind, 'Abridged' as parent,           'name' as key_type from dual union all
100select '0160 Race' key,                     null as name,                                 'tree' as kind,       'Demographics' as parent,       'name' as key_type from dual union all
101select '0190 Spanish/Hispanic Origin' key,  null as name,                                 'tree' as kind,       'Demographics' as parent,       'name' as key_type from dual union all
102select '0220 Sex' key,                      null as name,                                 'tree' as kind,       'Demographics' as parent,       'name' as key_type from dual union all
103select null key,                            null as name,                                 'tree' as kind,       'Demographics' as parent,       null as key_type from dual union all
104select '0521 Morph' key,                    null as name,                                 'tree' as kind,       'Abridged' as parent,           'name' as key_type from dual union all
105select '0490 Diagnostic Confirmation' key,  null as name,                                 'tree' as kind,       'Abridged' as parent,           'name' as key_type from dual union all
106select '0523 Behavior Code ICD-O-3' key,    null as name,                                 'tree' as kind,       'Abridged' as parent,           'namer' as key_type from dual union all
107select '0440 Grade' key,                    null as name,                                 'tree' as kind,       'Abridged' as parent,           'name' as key_type from dual union all
108select '0610 Class of Case' key,            null as name,                                 'noder' as kind,      'Abridged' as parent,           'name' as key_type from dual union all
109select 'Analytic' key,                      'Analytic' as name,                           'fabricated' as kind, '0610 Class of Case' as parent, null as key_type from dual union all
110select 'Non-analytic' key,                  null as name,                                 'fabricated' as kind, '0610 Class of Case' as parent,  null as key_type from dual union all
111select 'NAACCR|610:20' key,                 null as name,                                 'tree' as kind,       'Analytic' as parent,           'basecode' as key_type from dual union all
112select 'NAACCR|610:32' key,                 'Dupe2' as name,                              'tree' as kind,       'Non-analytic' as parent,       'basecode' as key_type from dual union all
113select 'NAACCR|610:32' key,                 'Dupe' as name,                               'tree' as kind,       'Non-analytic' as parent,       'basecode' as key_type from dual union all
114select '0390 Date of Diagnosis' key,        null as name,                                 'tree' as kind,       'Abridged 2' as parent,          'name' as key_type from dual;
115
116*/
117
118-- Insure that heron_terms has been constructed
119select * from blueheronmetadata.heron_terms where 1 = 0;
120
121create or replace view shortcut_table as select * from &&shortcut_table_name;
122
123-- Validate that the curated table does not violate any constraints
124select case when count(*) > 0 then 1/0 else 0 end as invalid_discrete_value
125from
126(
127  select * from(
128  select st.*,
129    case
130      when st.key is null
131        then 'Key cannot be null'
132      when st.key in (select st_cmp.key
133                      from shortcut_table st_cmp
134                      group by key having count(*) > 1)
135        then 'Key is not unique'
136      when st.kind not in ('tree', 'fabricated', 'node')
137        then 'Kind must be "tree", "fabricated" or "node"'
138      when st.kind = 'fabricated' and name is null
139        then 'If "fabricated" then a name must exist'
140      when st.kind != 'fabricated' and st.key_type not in ('name', 'basecode', 'fullname')
141        then 'If not "fabricated" then the key_type must be "name", "basecode", or "fullname"'
142      when not exists (select null from shortcut_table st_cmp where st.parent = st_cmp.key)
143           and st.parent is not null
144        then 'Parent referenced, but the corresponding key does not exist in the curated table'
145      else null
146    end as err_desc
147  from shortcut_table st)
148  where err_desc is not null
149);
150
151
152create or replace view sc_heron_terms_merge as
153
154with sc_fab_fill as
155(select null as c_hlevel,
156       null as c_fullname,
157       null as c_name,
158       'N' as c_synonym_cd,
159       'FA' as c_visual_attributes,
160       null as c_totalnum,
161       null as c_basecode,
162       null as c_metadataxml,
163       'concept_cd' as c_facttablecolumn,
164       'concept_dimension' as c_tablename,
165       'concept_path' as c_columnname,
166       'T' as c_columndatatype,
167       'like' as c_operator,
168       null as c_dimcode,
169       null as c_comment,
170       null as c_tooltip,
171       '@' as m_applied_path,
172       null as update_date,
173       null as download_date,
174       null as import_date,
175       null as sourcesystem_cd, -- fix this... to be the right source system
176       null as valuetype_cd,
177       null as m_exclusion_cd,
178       null as c_path,
179       null as c_symbol
180from dual)
181
182select ht.*,
183      st.parent,
184      st.key,
185       (case when st.name is not null
186        then st.name
187        else ht.c_name
188        end) new_c_name,
189      st.kind,
190      substr(ht.c_fullname,
191             instr(ht.c_fullname, '\', -2) + 1,
192             length(ht.c_fullname) - instr(ht.c_fullname, '\', -2) -1) as path_name
193from shortcut_table st
194join blueheronmetadata.&&heron_terms ht on st.kind != 'fabricated'
195                                           and (ht.c_name like st.key || '%' and
196                                                    st.key_type = 'name'
197                                                or
198                                                ht.c_basecode = st.key and
199                                                    st.key_type = 'basecode'
200                                                or
201                                                ht.c_fullname = st.key and
202                                                    st.key_type = 'fullname'
203                                               
204                                              )
205union all
206select ff.*, st.parent, st.key, st.name, st.kind, st.key from shortcut_table st, sc_fab_fill ff where st.kind = 'fabricated';
207
208-- select * from sc_heron_terms_merge;
209
210--Validate that sc_heron_terms_merge view has only one term for each parent and
211--key identified in the the original shortcut table. 
212--(e.g. pattern didn't find more than one match)
213
214select case when count(*) > 0 then 1/0 else 0 end as concepts_with_same_parent_key
215from
216(select count(*) as entries, shtm.parent, shtm.key
217 from sc_heron_terms_merge shtm
218 group by shtm.parent, shtm.key
219 having count(*) > 1);
220
221
222create or replace view shortcut_concepts as
223
224with test_shortcut_closure as
225(select htm.*,
226        '&&shortcut_prefix' || sys_connect_by_path(htm.path_name, '\')||'\' as new_c_fullpath
227 from sc_heron_terms_merge htm
228 start with parent is null
229 connect by prior key = parent),
230 
231tree_concepts as
232(select ht.*, null as parent, null as key, ht.c_name as new_c_name, null as kind,
233       null as path_name,
234       sc.new_c_fullpath||substr(ht.c_fullname,length(sc.c_fullname)+1) as new_c_fullpath
235 from blueheronmetadata.heron_terms ht
236 join test_shortcut_closure sc on ht.c_fullname like sc.c_fullname || '_%'
237                              and sc.c_fullname is not null
238                              and sc.kind = 'tree'
239)
240
241select * from tree_concepts union all
242select * from test_shortcut_closure;
243
244--select * from shortcut_concepts;
245
246insert into blueheronmetadata.heron_terms (
247  c_hlevel, c_fullname, c_name, c_basecode, c_dimcode, c_visualattributes,
248  c_synonym_cd, c_facttablecolumn, c_tablename, c_columnname, c_columndatatype,
249  c_operator, m_applied_path,
250  update_date, import_date, sourcesystem_cd
251)
252
253select
254  length(new_c_fullpath) - length(replace(new_c_fullpath, '\', '')) - 2 as c_hlevel,
255  new_c_fullpath as c_fullname,
256  new_c_name as c_name,
257  c_basecode as c_basecode,
258  new_c_fullpath as c_dimcode,
259  c_visualattributes as c_visualattributes,
260  c_synonym_cd as c_synonym_cd,
261  c_facttablecolumn as c_facttablecolumn,
262  c_tablename as c_tablename,
263  c_columnname as c_columnname,
264  c_columndatatype as c_columndatatype,
265  c_operator as c_operator,
266  m_applied_path as m_applied_path,
267  (case when update_date is not null then update_date else sysdate
268   end) as update_date,
269  (case when import_date is not null then import_date else sysdate
270   end) as import_date,
271  (select source_cd
272   from blueherondata.source_master
273   where source_cd like '&&source_cd' || '@%') as sourcesystem_cd
274
275from shortcut_concepts;
Note: See TracBrowser for help on using the repository browser.