Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/concepts_merge.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 16.5 KB
Line 
1/** concepts_merge -- combine concepts from various sources.
2
3Copyright (c) 2012-2014 University of Kansas Medical Center
4part of the HERON* open source codebase; see NOTICE file for license details.
5* http://informatics.kumc.edu/work/wiki/HERON
6
7See also epic_concepts_load.sql .
8
9for performance work:
10set timing on;
11set echo on;
12*/
13
14-- Check that we're running in the de-id database.
15select c_table_cd from blueheronmetadata.table_access where 1=0;
16
17-- Check that we've got the 1.6 concepts table
18select m_applied_path from i2b2metadata2.i2b2 where 1=0;
19
20
21whenever sqlerror continue;
22alter table BlueHeronMetadata.heron_terms drop (term_id);
23whenever sqlerror exit;
24truncate table BlueHeronMetadata.heron_terms;
25
26-- Copy concepts from i2b2 sources.
27insert into BlueHeronMETADATA.heron_terms
28select * from I2B2METADATA2.I2B2 m
29  -- we have our own religion/language/race/age codes
30  where c_fullname like '\i2b2\Demographics\%\Not Recorded\%'
31    or (c_fullname not like '\i2b2\Demographics\Religion\_%'
32    and c_fullname not like '\i2b2\Demographics\Language\_%'
33    and c_fullname not like '\i2b2\Demographics\Race\_%'
34    and c_fullname not like '\i2b2\Demographics\Zip codes\%'
35    and c_fullname not like '\i2b2\Demographics\Age\%'
36    -- we get our Diagnoses from the UMLS; but see below re old queries
37    and c_fullname not like '\i2b2\Diagnoses\_%'
38    -- ... and our own lab codes
39    and c_fullname not like '\i2b2\Labtests\_%'
40    -- ... and our own alerts
41    and c_fullname not like '\i2b2\Alerts\_%'   
42    -- ... and our own medications
43    and c_fullname not like '\i2b2\Medications\_%'
44    -- we don't support these yet
45    and c_fullname not like '\i2b2\Provider%'
46    and c_fullname not like '\i2b2\Reports%'
47    and c_fullname not like '\i2b2\Visit Details%'
48    and c_fullname not like '\i2b2\Demographics\Income%'
49    )
50  --Don't merge in any modifiers from the i2b2 data - we'll add what we support.
51  and c_tablename != 'MODIFIER_DIMENSION';
52-- 59,691 rows
53
54
55/** Fill Modifier Dimension from metadata table.
56
57TODO: move this to concepts_activate.sql
58*/
59truncate table BlueHeronData.modifier_dimension;
60
61insert into BlueHeronData.modifier_dimension (
62  modifier_path, modifier_cd, name_char,
63  modifier_blob, update_date, download_date,
64  import_date, sourcesystem_cd, upload_id)
65select c_fullname modifier_path
66     , c_basecode modifier_cd
67     , c_name name_char
68     , null modifier_blob
69     , update_date, download_date, import_date, sourcesystem_cd
70     , null upload_id
71from BlueHeronMetadata.epic_terms
72where c_tablename = 'MODIFIER_DIMENSION';
73
74insert into BlueHeronMetadata.HERON_TERMS
75select * from BlueHeronMetadata.epic_demo_terms;
76
77insert into BlueHeronMetadata.HERON_TERMS
78select * from BlueHeronMetadata.epic_terms;
79-- 982,020 rows inserted.
80
81
82/* This isn't really part of merging concepts, but it's
83not clear where else to put it.
84So while we're mucking about with i2b2 1.6-ism,
85turn off the query result types that don't work.
86*/
87update BlueHeronData.QT_QUERY_RESULT_TYPE qrt
88set qrt.visual_attribute_type_id = 'LH'
89where qrt.description not in ('Patient set', 'Number of patients');
90
91
92delete from BLUEHERONMETADATA.heron_terms
93where c_fullname like '\i2b2\naaccr\%';
94insert into BlueHeronMetadata.HERON_TERMS
95select * from BlueHeronMetadata.NAACCR_ontology;
96-- 16345 rows inserted.
97
98insert into BlueHeronMetadata.HERON_TERMS
99select * from BlueHeronMetadata.uhcterms;
100
101insert into BlueHeronMetadata.HERON_TERMS
102select * from BlueHeronMetadata.rxnorm_terms;
103
104/* #2071 Removing dependencies between concepts_merge.sql and concept_stats.sql
105insert into BlueHeronMetadata.HERON_TERMS
106select * from BlueHeronMetadata.redcap_terms;
107*/
108
109delete from BLUEHERONMETADATA.custom_meta
110where c_fullname = '\i2b2\Demographics\HICTR Participant\';
111
112insert into BLUEHERONMETADATA.custom_meta (
113  c_hlevel, c_fullname, c_name, c_basecode, c_dimcode,
114  c_visualattributes,
115  c_synonym_cd, c_facttablecolumn, c_tablename, c_columnname, c_columndatatype,
116    c_operator, m_applied_path,
117  update_date, import_date, sourcesystem_cd
118)
119select
120  2, c.concept_path, c.concept_name, c.concept_cd, c.concept_path,
121  'LA',
122  norm.*,
123  sysdate, sysdate, aud.source_cd
124from (
125  select '\i2b2\Demographics\HICTR Participant\' as concept_path
126       , 'HICTR_PARTICIPANT:yes' as concept_cd
127       , 'Frontiers Research Participant Registry' as concept_name
128  from dual) c
129  , BlueHeronMetadata.normal_concept norm
130  , (select * from BlueHeronData.Source_Master
131     where source_cd like 'heron-admin@%') aud;
132
133/* NCDR Concepts*/
134insert into BLUEHERONMETADATA.heron_terms(
135  c_hlevel, c_fullname, c_name, c_basecode, c_dimcode,
136  c_visualattributes,c_metadataxml,
137  c_synonym_cd, c_facttablecolumn, c_tablename, c_columnname, c_columndatatype,
138  c_operator,  m_applied_path,
139  update_date, import_date, sourcesystem_cd
140)
141select * from ncdr_terms;
142
143/***************
144 * Demographics concepts: SS Death Master File
145 */
146delete from BLUEHERONMETADATA.custom_meta
147where c_fullname = '\i2b2\Demographics\Vital Status\Deceased per SSA\';
148
149insert into BLUEHERONMETADATA.custom_meta (
150  c_hlevel, c_fullname, c_name, c_basecode, c_dimcode,
151  c_visualattributes,
152  c_synonym_cd, c_facttablecolumn, c_tablename, c_columnname, c_columndatatype,
153  c_operator, m_applied_path,
154  update_date, import_date, sourcesystem_cd
155)
156select
157  3, c.concept_path, c.concept_name, c.concept_cd, c.concept_path,
158  'LA',
159  norm.*,
160  sysdate, sysdate, src.source_cd
161from (
162  select '\i2b2\Demographics\Vital Status\Deceased per SSA\' as concept_path
163       , 'DEM|VITAL|SSA:y' as concept_cd
164       , 'Deceased per SSA' as concept_name
165  from dual) c
166  , BlueHeronMetadata.normal_concept norm
167  , (select * from BlueHeronData.source_master
168where source_cd like 'SSN@%')  src;
169
170-- TODO: consider a separate table for the BSR ontology.
171insert into BlueHeronMetadata.HERON_TERMS
172select * from BlueHeronMetadata.custom_meta;
173
174commit;
175
176
177/** ICD9/10 Diagnosis hierarchy
178
179keep handy:
180delete from BlueHeronMetadata.heron_terms
181where c_fullname like '\i2b2\Diagnoses\A18090800\%';
182
183#1319: Post change the number of leaves have gone down from 14567 to 18
184and number of folders has gone up from 3169 to 17718
185
186For usability, hide we hide the the "DISEASES AND INJURIES" folder - see #1380.
187*/
188
189create or replace view diag_const as
190select '\i2b2\Diagnoses\ICD9\A18090800\A8359006\' diseases_and_injuries, icd9_scheme scheme from scheme_labels;
191
192/* Verify that the folder want to hide is actually in the hierarchy before
193attempting to hide it.  See #2822.
194*/
195select case when count(*) > 0 then 1 else 1/0 end diseases_and_injuries_test from(
196select * from blueheronmetadata.umls_icd9_10 uicd, diag_const dc
197where uicd.c_fullname = dc.diseases_and_injuries
198);
199
200whenever sqlerror continue;
201drop table unique_dx_codes;
202whenever sqlerror exit;
203
204create table unique_dx_codes as
205select distinct icd9_10_code
206from blueheronmetadata.epic_icd9_10 et;
207
208insert into BlueHeronMetadata.heron_terms
209SELECT
210  C_HLEVEL,
211  C_FULLNAME ,
212  C_NAME ,
213  C_SYNONYM_CD ,
214  CASE
215    WHEN c_fullname = dc.diseases_and_injuries THEN 'FH'
216    WHEN ett.icd9_10_code IS NOT NULL THEN 'FA'
217    ELSE uicd.C_VISUALATTRIBUTES
218  END AS C_VISUALATTRIBUTES,
219  C_TOTALNUM ,
220  C_BASECODE ,
221  C_METADATAXML ,
222  C_FACTTABLECOLUMN ,
223  C_TABLENAME ,
224  C_COLUMNNAME ,
225  C_COLUMNDATATYPE ,
226  C_OPERATOR ,
227  C_DIMCODE ,
228  C_COMMENT ,
229  C_TOOLTIP ,
230  M_APPLIED_PATH ,
231  UPDATE_DATE ,
232  DOWNLOAD_DATE ,
233  IMPORT_DATE ,
234  SOURCESYSTEM_CD ,
235  VALUETYPE_CD ,
236  M_EXCLUSION_CD ,
237  C_PATH ,
238  C_SYMBOL
239FROM BlueHeronMetadata.umls_icd9_10 uicd
240cross join diag_const dc
241LEFT JOIN unique_dx_codes ett
242ON dc.scheme || ett.icd9_10_code = uicd.C_BASECODE
243;
244
245-- Testing if folder/leaf assignment is done properly, for #1319
246-- In the earlier version of code the following ICD9 codes were wrongly made folders
247
248with leaf_dx_with_child as (
249select *
250from BlueHeronMetadata.heron_terms umls
251join BlueHeronMetadata.heron_terms epic
252on epic.c_fullname like (umls.c_fullname || '_%')
253where umls.c_basecode in (
254   'ICD9:649.40',
255   'ICD9:E973' 
256   )
257   and umls.c_visualattributes = 'LA'
258)
259select case when count(*) = 0 then 1 else 1/0 end no_child_of_leaf_dx
260from leaf_dx_with_child
261;
262
263
264/**
265To separate diagnoses codes from rest of epic terms #1319
266*/
267insert into BlueHeronMetadata.heron_terms
268SELECT C_HLEVEL ,
269C_FULLNAME,
270C_NAME    ,
271C_SYNONYM_CD,
272C_VISUALATTRIBUTES,
273C_TOTALNUM,
274C_BASECODE,
275C_METADATAXML,
276C_FACTTABLECOLUMN,
277C_TABLENAME,
278C_COLUMNNAME,
279C_COLUMNDATATYPE,
280C_OPERATOR,
281C_DIMCODE,
282C_COMMENT,
283C_TOOLTIP,
284M_APPLIED_PATH,
285UPDATE_DATE,
286DOWNLOAD_DATE,
287IMPORT_DATE,
288SOURCESYSTEM_CD,
289VALUETYPE_CD,
290M_EXCLUSION_CD,
291C_PATH,
292C_SYMBOL
293from BlueHeronMetadata.epic_icd9_10
294;
295
296
297/* To enable queries built against the original pre-UMLS i2b2
298   diagnosis hierarchy to continue to work, load the terms,
299   but leave them (H)idden. */
300INSERT
301INTO BlueHeronMETADATA.heron_terms
302SELECT C_HLEVEL,
303  C_FULLNAME,
304  C_NAME,
305  C_SYNONYM_CD,
306  'FH' C_VISUALATTRIBUTES, -- does Folder vs Leaf matter for Hidden terms?
307  C_TOTALNUM,
308  C_BASECODE,
309  C_METADATAXML,
310  C_FACTTABLECOLUMN,
311  C_TABLENAME,
312  C_COLUMNNAME,
313  C_COLUMNDATATYPE,
314  C_OPERATOR,
315  C_DIMCODE,
316  C_COMMENT,
317  C_TOOLTIP,
318  M_APPLIED_PATH,
319  UPDATE_DATE,
320  DOWNLOAD_DATE,
321  IMPORT_DATE,
322  SOURCESYSTEM_CD,
323  VALUETYPE_CD,
324  M_EXCLUSION_CD,
325  C_PATH,
326  C_SYMBOL
327FROM I2B2METADATA2.I2B2 m
328WHERE c_fullname LIKE '\i2b2\Diagnoses\_%' ;
329
330/* To enable old queries, insert old hidden paths (see #2610).  Create an empty
331table of old terms if it doesn't exist already.
332*/
333whenever sqlerror continue;
334create table i2b2metadata2.old_kuh_dx_terms as
335  select * from blueheronmetadata.heron_terms where 1=0;
336whenever sqlerror exit;
337
338insert
339into blueheronmetadata.heron_terms
340select c_hlevel,
341  c_fullname,
342  c_name,
343  c_synonym_cd,
344  c_visualattributes,
345  c_totalnum,
346  c_basecode,
347  c_metadataxml,
348  c_facttablecolumn,
349  c_tablename,
350  c_columnname,
351  c_columndatatype,
352  c_operator,
353  c_dimcode,
354  c_comment,
355  c_tooltip,
356  m_applied_path,
357  update_date,
358  download_date,
359  import_date,
360  sourcesystem_cd,
361  valuetype_cd,
362  m_exclusion_cd,
363  c_path,
364  c_symbol
365from i2b2metadata2.old_kuh_dx_terms;
366
367
368--Collapse the "DISEASES AND INJURIES" category by dropping the c_hlevel by one.
369--except "DISEASES AND INJURIES" itself.
370update blueheronmetadata.heron_terms ht
371set ht.c_hlevel = ht.c_hlevel - 1
372where ht.c_fullname like '\i2b2\Diagnoses\ICD9\A18090800\A8359006\_%';
373
374--Update the names of the "E" codes and "V" codes.
375update blueheronmetadata.heron_terms ht
376set ht.c_name = 'zz E-codes'
377where ht.c_name = 'SUPPLEMENTARY CLASSIFICATION OF EXTERNAL CAUSES OF INJURY AND POISONING';
378
379update blueheronmetadata.heron_terms ht
380set ht.c_name = 'zz V-codes'
381where ht.c_name = 'SUPPLEMENTARY CLASSIFICATION OF FACTORS INFLUENCING HEALTH STATUS AND CONTACT WITH HEALTH SERVICES';
382
383
384-- Remove any KUH leaf diagnosis terms that we don't have facts for.   
385delete from blueheronmetadata.heron_terms ht where ht.c_basecode in (
386  select
387    dx.c_basecode
388  from blueheronmetadata.unique_epic_dx_concepts dx
389  left join blueheronmetadata.counts_by_concept cbc on cbc.concept_cd = dx.c_basecode
390  where cbc.concept_cd is null
391  )
392and ht.c_tablename != 'MODIFIER_DIMENSION';
393 
394--Hide NCDR concepts which do not have facts
395update blueheronmetadata.heron_terms
396set c_visualattributes=substr(c_visualattributes,1,1)||'H'
397where c_fullname in(
398select distinct ht.c_fullname from  BlueHeronMetadata.heron_terms ht
399  left join blueheronmetadata.counts_by_concept cbc on cbc.concept_cd = ht.c_basecode
400        where 
401    ht.c_fullname like '\i2b2\NCDR%'
402    and ht.c_basecode is not null
403    and cbc.concept_cd is null
404    and ht.c_basecode like 'NCDR%'
405    and c_tablename != 'MODIFIER_DIMENSION'
406    and ht.c_basecode not like '%NCDR|CURRENT_AGE%'
407    and c_visualattributes!='FA'
408    or (c_fullname like '\i2b2\NCDR\Administration\%' or
409        c_fullname like '\i2b2\NCDR\Episode of Care\%')
410    );
411   
412/* Load CPT
413
414To get the development version:
415drop table I2B2METADATA2.CPT;
416create table  I2B2METADATA2.CPT as select * from I2B2METADATA2.CPT@bmidev;
417*/
418-- todo: add m_applied_path etc. columns to I2B2METADATA2.CPT
419
420insert into BLUEHERONMETADATA.HERON_TERMS (
421 C_HLEVEL
422 , C_FULLNAME
423 , C_NAME
424 , C_SYNONYM_CD
425 , C_VISUALATTRIBUTES
426 , C_TOTALNUM
427 , C_BASECODE
428 , C_METADATAXML
429 , C_FACTTABLECOLUMN
430 , C_TABLENAME
431 , C_COLUMNNAME
432 , C_COLUMNDATATYPE
433 , C_OPERATOR
434 , C_DIMCODE
435 , C_COMMENT
436 , C_TOOLTIP
437 , UPDATE_DATE
438 , DOWNLOAD_DATE
439 , IMPORT_DATE
440 , SOURCESYSTEM_CD
441 , VALUETYPE_CD
442 , m_applied_path
443)
444select cpt.*, '@' from I2B2METADATA2.CPT cpt;
445
446
447/**
448 * Inserting lost/hidden parents needed for counts by concepts to function
449 * correctly. Needed by the changes in #2132.
450 *
451 */
452insert into blueheronmetadata.heron_terms (
453  c_hlevel, c_fullname, c_name, c_basecode, c_dimcode,
454  c_visualattributes,
455  c_synonym_cd, c_facttablecolumn, c_tablename, c_columnname, c_columndatatype,
456  c_operator, m_applied_path,
457  update_date, import_date, sourcesystem_cd
458)
459with lost_hidden_parents as
460(select '\i2b2\Diagnoses\ICD9\A18090800\' as concept_path,
461        'Diagnoses Hidden Root' as concept_name,
462        2 as c_hlevel, -- this is the normal c_hlevel for \x\y\z\
463        (select source_cd from blueherondata.source_master where source_cd like 'UMLS@%') as source_cd
464        from dual union all
465 select '\i2b2\Procedures\PRC\' as concept_path,
466        'Procedures Hidden Root' as concept_name,
467        2 as c_hlevel,
468        (select source_cd from blueherondata.source_master where source_cd like 'Epic@%') as source_cd
469        from dual union all
470 select '\i2b2\Diagnoses\ICD9\zz V-codes\Body mass index (V85)\' as concept_path,
471        'Body mass index (V85)' as concept_name,
472        3 as c_hlevel,
473        (select source_cd from blueherondata.source_master where source_cd like 'UMLS@%') as source_cd
474        from dual
475)
476select
477  lhp.c_hlevel, lhp.concept_path, lhp.concept_name, null as concept_cd, lhp.concept_path,
478  'FH',
479  norm.*,
480  sysdate, sysdate, lhp.source_cd
481from lost_hidden_parents lhp
482   , blueheronmetadata.normal_concept norm;
483
484/* Insert old medication hierarchy (all items are hidden) so that old queries
485still work (GPC ticket 152).
486
487Created like:
488create table i2b2metadata2.old_kuh_medication_tree as (
489  select
490  C_HLEVEL, C_FULLNAME, C_NAME, C_SYNONYM_CD,
491  substr(C_VISUALATTRIBUTES, 1, 1) || 'H ' C_VISUALATTRIBUTES,
492  C_TOTALNUM, C_BASECODE, C_METADATAXML, C_FACTTABLECOLUMN, C_TABLENAME,
493  C_COLUMNNAME, C_COLUMNDATATYPE, C_OPERATOR, C_DIMCODE, C_COMMENT, C_TOOLTIP,
494  M_APPLIED_PATH, UPDATE_DATE, DOWNLOAD_DATE, IMPORT_DATE, SOURCESYSTEM_CD,
495  VALUETYPE_CD, M_EXCLUSION_CD, C_PATH, C_SYMBOL
496  from blueheronmetadata.heron_terms
497  where c_fullname like '\i2b2\Medications\_%'
498  )
499; --73,542 rows
500*/
501insert into BLUEHERONMETADATA.HERON_TERMS (
502  C_HLEVEL, C_FULLNAME, C_NAME, C_SYNONYM_CD, C_VISUALATTRIBUTES, C_TOTALNUM,
503  C_BASECODE, C_METADATAXML, C_FACTTABLECOLUMN, C_TABLENAME, C_COLUMNNAME,
504  C_COLUMNDATATYPE, C_OPERATOR, C_DIMCODE, C_COMMENT, C_TOOLTIP,  M_APPLIED_PATH,
505  UPDATE_DATE, DOWNLOAD_DATE, IMPORT_DATE, SOURCESYSTEM_CD, VALUETYPE_CD,
506  M_EXCLUSION_CD, C_PATH, C_SYMBOL
507)
508select * from i2b2metadata2.old_kuh_medication_tree;
509
510
511/* Insert old Diagnoses hierarchy (all items are hidden) so that old queries
512still work (ticket 3527).
513
514Created like:
515create table i2b2metadata2.old_icd9_diag_tree as (
516  select
517  C_HLEVEL, C_FULLNAME, C_NAME, C_SYNONYM_CD,
518  substr(C_VISUALATTRIBUTES, 1, 1) || 'H ' C_VISUALATTRIBUTES,
519  C_TOTALNUM, C_BASECODE, C_METADATAXML, C_FACTTABLECOLUMN, C_TABLENAME,
520  C_COLUMNNAME, C_COLUMNDATATYPE, C_OPERATOR, C_DIMCODE, C_COMMENT, C_TOOLTIP,
521  M_APPLIED_PATH, UPDATE_DATE, DOWNLOAD_DATE, IMPORT_DATE, SOURCESYSTEM_CD,
522  VALUETYPE_CD, M_EXCLUSION_CD, C_PATH, C_SYMBOL
523  from blueheronmetadata.heron_terms
524  where c_fullname like '\i2b2\Diagnoses\A18090800\%' --Diagnoses Hidden Root
525  )
526;
527*/
528whenever sqlerror continue;
529-- In case the table doesn't exist (meaning, we don't need the old paths)
530create table i2b2metadata2.old_icd9_diag_tree as
531  select * from blueheronmetadata.heron_terms where 1=0;
532whenever sqlerror exit;
533
534insert into BLUEHERONMETADATA.HERON_TERMS (
535  C_HLEVEL, C_FULLNAME, C_NAME, C_SYNONYM_CD, C_VISUALATTRIBUTES, C_TOTALNUM,
536  C_BASECODE, C_METADATAXML, C_FACTTABLECOLUMN, C_TABLENAME, C_COLUMNNAME,
537  C_COLUMNDATATYPE, C_OPERATOR, C_DIMCODE, C_COMMENT, C_TOOLTIP,  M_APPLIED_PATH,
538  UPDATE_DATE, DOWNLOAD_DATE, IMPORT_DATE, SOURCESYSTEM_CD, VALUETYPE_CD,
539  M_EXCLUSION_CD, C_PATH, C_SYMBOL
540)
541select * from i2b2metadata2.old_icd9_diag_tree;
542
543/*
544 Insure that all heron terms have a trailing backslash in the full name and dimcode
545 */
546select case when count(*) > 0 then 1/0 else 1 end as has_trailing_backslash
547--select *
548 from blueheronmetadata.heron_terms ht
549 where substr(ht.c_fullname, length(ht.c_fullname), 1) != '\' or
550      substr(ht.c_dimcode, length(ht.c_dimcode), 1) != '\'
551;
Note: See TracBrowser for help on using the repository browser.