Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/umls_dx_concepts.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 6.0 KB
Line 
1/* umls_dx_concepts -- build Diagnosis hierarchy from UMLS ICD9 data.
2
3Copyright (c) 2012 University of Kansas Medical Center
4part of the HERON* open source codebase; see NOTICE file for license details.
5* http://informatics.kumc.edu/work/wiki/HERON
6
7REF
8UMLS(R) Reference Manual
9Bethesda (MD): National Library of Medicine (US); September 2009-.
10http://www.ncbi.nlm.nih.gov/books/NBK9676/
11
12"Every occurrence of a string in each source vocabulary is assigned a unique atom identifier (AUI)."
13 -- 2.3.3 Atoms and Atom Identifiers
14
15e.g. mrconso is unique on aui.
16
17sometimes handy:
18alter session set current_schema = umls;
19
20select *
21from umls.mrdoc
22where dockey = 'TTY'
23and value in ('PT', 'HT')
24and type='expanded_form';
25
26*/
27
28select patient_num from nightherondata.observation_fact where 1 = 0;
29select aui from umls.mrconso where 1=0;
30select c_basecode from blueheronmetadata.umls_icd9_10@deid where 1 = 0;
31
32/* My first attempt at staging UMLS only included SNOMEDCT;
33   Let's verify that ICD9/ICD10 is loaded. */
34
35
36select case when count(*) > 1 then 1 else 1/0 end as umls_icd9_10_stage_ok from (
37  select * from (
38    -- Run inner query separately for a summary by versioned source abbreviation
39    with umls_concept_stats as (
40    select count(*) qty, con.sab
41    from umls.mrconso con
42    group by con.sab)
43   
44    select cs.*, mrsab.vsab, mrsab.son
45    from umls_concept_stats cs
46    join umls.mrsab on mrsab.rsab = cs.sab
47    order by cs.sab
48    )
49  where (vsab = 'ICD9CM_2012' and qty >= 40000) or (vsab = 'ICD9CM_2013' and qty >= 40000) or (vsab='ICD10CM_2013' and qty >= 40000)
50  )
51;
52
53
54
55/* The mrhier.prt column seems to be analagous to i2b2's concept_path */
56whenever sqlerror continue;
57drop table umls_icd9_terms; -- Code previous to inclusion of ICD10
58drop table umls_icd9_10_terms;
59whenever sqlerror exit;
60
61create table umls_icd9_10_terms as
62with
63
64aud as (
65select *
66from BlueHeronData.source_master@deid
67where source_cd like 'UMLS@%'
68),
69
70diag_root as (
71select 1 c_hlevel, '\i2b2\Diagnoses\' c_fullname
72from dual
73),
74
75icd9_root as (
76select 2 c_hlevel, c_fullname || 'ICD9\' c_fullname, icd9_scheme scheme, 'ICD9CM' umls_sab, 'ICD9' c_name
77from diag_root
78cross join scheme_labels@deid
79),
80
81icd10_root as (
82select 2 c_hlevel, c_fullname || 'ICD10\' c_fullname, icd10_scheme scheme, 'ICD10CM' umls_sab, 'ICD10' c_name
83from diag_root
84cross join scheme_labels@deid
85),
86
87umls_icd9_10 as (
88select hier.ptr
89     , folder.code folder_code, folder.str folder_str
90     , chd.code chd_code, chd.str chd_str, chd.aui
91     , folder.sab
92from umls.mrhier hier
93join umls.mrconso folder on hier.paui = folder.aui
94join umls.mrconso chd on hier.aui = chd.aui
95where folder.sab in (
96  select umls_sab from icd9_root
97  union all
98  select umls_sab from icd10_root
99  )
100  and hier.ptr not like 'A18090800.A8352133%' -- exclude ICD9 procedures
101),
102
103folders as (
104select distinct
105  -- Hide the top level "ICD-10-CM TABULAR LIST of DISEASES and INJURIES" folder
106  case when ptr = 'A20098492' then 'FH' else 'FA' end c_visualattributes
107              , ptr, null path_seg
108              , folder_code code, folder_str str, sab
109from umls_icd9_10
110),
111
112leaves as (
113select 'LA' c_visualattributes
114     , leaf.ptr, leaf.chd_code || '\' path_seg
115     , leaf.chd_code code, leaf.chd_str str, leaf.sab
116from umls_icd9_10 leaf
117-- join with hier again to exclude interior nodes from leaf
118left join umls.mrhier lhier
119       on lhier.paui = leaf.aui
120where lhier.paui is null
121),
122
123roots as (
124select 'FA' c_visualattributes, null code, c_name str, c_hlevel, c_fullname, umls_sab, scheme
125  from (
126    select * from icd9_root
127    union all
128    select * from icd10_root
129    )
130  ),
131 
132ptr_to_path as (
133select t.c_visualattributes, t.code, t.str
134       -- ack: http://www.sqlsnippets.com/en/topic-12818.html
135    ,  roots.c_hlevel
136       + coalesce(length(regexp_replace(t.ptr, '[^\.]', NULL)), 0)
137       + t.offset c_hlevel
138    ,  roots.c_fullname || replace(t.ptr, '.', '\')
139       || '\' || t.path_seg concept_path, roots.scheme
140from roots
141join (
142  select 0 offset, t.* from folders t
143  union all
144  select 1 offset, t.* from leaves t
145  ) t
146on t.sab = roots.umls_sab
147union all
148select c_visualattributes, code, str, c_hlevel, c_fullname, scheme from roots
149)
150
151select c_hlevel
152     , concept_path c_fullname
153     , case when code is not null then code || ' ' || str else str end c_name
154     , case when code is not null then scheme || code else code end c_basecode
155     , concept_path c_dimcode
156      -- TODO: correct FA vs. LA for diagnoses.
157     , c_visualattributes
158     , norm.*
159     , sysdate update_date, sysdate import_date, aud.source_cd sourcesystem_cd
160from ptr_to_path, BlueHeronMetadata.normal_concept@deid norm, aud, scheme_labels@deid
161;
162
163
164with icd9_test_cases as (
165select *
166from umls_icd9_10_terms
167where c_basecode in (
168   'ICD9:426.82', -- Long qt syndrome; genesis of #441
169   'ICD9:153.9', -- a leaf; leaves were missed by an earlier algorithm
170   'ICD9:599.70', -- a code not included in i2b2 demo metadata
171   'ICD9:V58.69' -- wasn't sure if this one was in there at one point.
172)
173)
174select case when (count(*)) = 4 then 1 else 1/0 end idc9_terms_found
175from icd9_test_cases
176;
177
178with meningitis_term as (
179select *
180from umls_icd9_10_terms
181where c_name = 'Meningitis due to ECHO virus'
182and c_basecode = 'ICD9:047.1'
183)
184select case when c_hlevel = 6 then 1 else 1/0 end as icd9_level_check
185from meningitis_term;
186
187
188delete from blueheronmetadata.umls_icd9_10@deid;
189
190insert into blueheronmetadata.umls_icd9_10@deid (
191  c_hlevel, c_fullname, c_name, c_basecode, c_dimcode,
192  c_visualattributes,
193  c_synonym_cd, c_facttablecolumn, c_tablename, c_columnname, c_columndatatype,
194  c_operator,  m_applied_path,
195  update_date, import_date, sourcesystem_cd
196)
197select * from umls_icd9_10_terms
198;
199
200
201/* Road not taken: find root using relationship table. seems to have roughly the same
202   results, but perhaps it's not a simple or widely used as the mrhier approach.
203select pcon.*, rel.*, chcon.*
204from umls.mrrel rel
205join umls.mrconso pcon on rel.aui2 = pcon.aui -- counter-intuitive, but yup
206join umls.mrconso chcon on rel.aui1 = chcon.aui
207where chcon.sab in ('ICD9CM')
208and rel.rel = 'CHD'
209and pcon.code < '100'
210order by pcon.code;
211*/
Note: See TracBrowser for help on using the repository browser.