Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/epic_dx_concepts.sql @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 10.4 KB
Line 
1/* epic_dx_concepts -- connect clarity DX_ID terms to ICD9 terms
2
3Copyright (c) 2015 University of Kansas Medical Center
4part of the HERON* open source codebase; see NOTICE file for license details.
5* http://informatics.kumc.edu/work/wiki/HERON
6
7Ref Epic 2014 Clarity Data Dictionary
8   
9*/
10
11-- Check that we're in the identified DB, connected to clarity and DEID.
12select patient_num from nightherondata.observation_fact where 1 = 0;
13select dx_id from clarity.clarity_edg where 1 = 0;
14select c_basecode from blueheronmetadata.epic_icd9_10@deid where 1 = 0;
15select c_basecode from I2B2METADATA2.I2B2@deid where 1 = 0;
16
17
18create or replace view dx_audit_info as
19-- Note DX concepts actually come from a combination of sources: UMLS and Epic Clarity
20select * from BlueHeronData.source_master@deid
21where source_cd like 'Epic@%'
22;
23
24
25/* Since we can't select from on dblink and insert to another,
26let's use a temp table. */
27whenever sqlerror continue;
28truncate table epic_dx_terms;
29drop table epic_dx_terms;
30whenever sqlerror exit;
31
32/* Parent/child relationships according to clarity_edg joined with the UMLS
33ICD9/ICD10 hierarchy so we can get the ICD parent.  Note that extra fields are
34added in this query for eyeballing.
35*/
36create or replace view icd9_dx_parent_children as
37select
38  edg_child.dx_id chd_dx, edg_child.dx_name chd_name,
39  icd9_child.code chd_icd9,
40  umls_child.c_name chd_umls_name, umls_child.c_fullname chd_umls_path, umls_child.c_hlevel chd_level,
41  edg_parent.dx_id par_dx, edg_parent.dx_name par_name,
42  icd9_parent.code par_icd9,
43  umls_parent.c_name par_umls_name, umls_parent.c_fullname par_umls_path, umls_parent.c_hlevel par_level
44from clarity.clarity_edg edg_child
45cross join scheme_labels@deid
46-- Not all DXs have a parent DX_ID
47left join clarity.clarity_edg edg_parent on edg_parent.dx_id = edg_child.parent_dx_id
48left join clarity.edg_current_icd9 icd9_child on icd9_child.dx_id = edg_child.dx_id
49left join clarity.edg_current_icd9 icd9_parent on icd9_parent.dx_id = edg_parent.dx_id
50left join blueheronmetadata.umls_icd9_10@deid umls_child on
51  (umls_child.c_basecode = scheme_labels.icd9_scheme || icd9_child.code)
52left join blueheronmetadata.umls_icd9_10@deid umls_parent on
53  (umls_parent.c_basecode = scheme_labels.icd9_scheme || icd9_parent.code)
54;
55
56create or replace view icd10_dx_parent_children as
57select
58  edg_child.dx_id chd_dx, edg_child.dx_name chd_name,
59  icd10_child.code chd_icd10,
60  umls_child.c_name chd_umls_name, umls_child.c_fullname chd_umls_path, umls_child.c_hlevel chd_level,
61  edg_parent.dx_id par_dx, edg_parent.dx_name par_name,
62  icd10_parent.code par_icd10,
63  umls_parent.c_name par_umls_name, umls_parent.c_fullname par_umls_path, umls_parent.c_hlevel par_level
64from clarity.clarity_edg edg_child
65cross join scheme_labels@deid
66-- Not all DXs have a parent DX_ID
67left join clarity.clarity_edg edg_parent on edg_parent.dx_id = edg_child.parent_dx_id
68left join clarity.edg_current_icd10 icd10_child on icd10_child.dx_id = edg_child.dx_id
69left join clarity.edg_current_icd10 icd10_parent on icd10_parent.dx_id = edg_parent.dx_id
70left join blueheronmetadata.umls_icd9_10@deid umls_child on
71  (umls_child.c_basecode = scheme_labels.icd10_scheme || icd10_child.code)
72left join blueheronmetadata.umls_icd9_10@deid umls_parent on
73  (umls_parent.c_basecode = scheme_labels.icd10_scheme || icd10_parent.code)
74;
75
76
77/* Note: None of the IM* or M* codes map to UMLS paths (obviously) but the parents
78don't either.
79
80select *
81from dx_parent_children
82where (chd_dx like 'M%' or chd_dx like 'IM%') and
83  (chd_umls_path is not null or par_umls_path is not null);
84*/
85
86/* Besides explicit exceptions noted above, make sure that all the DX_IDs have
87a home in the UMLS ICD9 tree.
88*/
89insert into etl_test_values (test_value, test_domain, test_name, result_id, result_date)
90with test_key as (
91  select 'Diagnoses' test_domain,
92  'dx_ids_without_umls_home' test_name from dual
93  ),
94dx_no_home as (
95  select *
96  from icd9_dx_parent_children dxp
97  where dxp.par_umls_path is null and dxp.chd_umls_path is null
98  and
99    (
100    /* Skipping things like IMOxxx - almost all 'Reserved for non-ICD9 billable
101    problem concepts' - a few other odd strings like 'Present in any copy,
102    complete or'
103    */
104    dxp.chd_icd9 not like 'IM%'
105    /* Skip things like M9000/0 - ICD9-CM I guess?
106    */
107    and not dxp.chd_icd9 like 'M%'
108    /* A few stragglers.
109    */
110    and dxp.chd_dx not in (
111        13597 --Place of occurrence
112      , 19002 --OTHER DIAGNOSIS - PLEASE ANNOTATE.
113      , 19003 --LOS WITHIN PROCEDURE CODE
114      , 1115526 --FAA
115      , 13304 --V COLLISION W OTHER OBJ
116      , 19000 --NEGATIVE HISTORY OF
117      , 19001 --PAST MEDICAL HISTORY OF
118      , 19004 -- ERRONEOUS ENCOUNTER--DISREGARD
119      )
120    )
121  ),
122dx_no_home_count as (
123  select count(*) test_value from dx_no_home
124  )
125select dx_no_home_count.test_value, test_key.*, sq_result_id.nextval, sysdate
126from test_key, dx_no_home_count
127;
128commit;
129
130create global temporary table epic_dx_terms
131on commit preserve rows as
132select
133  c_hlevel, path c_fullname, c_name, c_basecode, path c_dimcode, c_visualattributes,
134  norm.*, sysdate update_date, sysdate import_date, aud.source_cd sourcesystem_cd,
135  icd9_10_code
136from (
137  select * from (
138    select
139      coalesce(dxpc.chd_level, dxpc.par_level) + 1 c_hlevel,
140      coalesce(dxpc.chd_umls_path, dxpc.par_umls_path) || dxpc.chd_dx || '\' path,
141      dxpc.chd_name c_name,
142      scheme_labels.dx_id_scheme || dxpc.chd_dx c_basecode,
143      'LA' as c_visualattributes, coalesce(dxpc.chd_icd9, dxpc.par_icd9) icd9_10_code
144    from icd9_dx_parent_children dxpc
145    cross join scheme_labels@deid
146
147    union all
148
149    /* Clarity specifies parent DX IDs for some DX IDs (in
150    clarity_edg.parent_dx_id). So, if a parent DX_ID maps to multiple ICD10s but
151    the child does not, then we'll get duplicates given the coalesce logic below
152    that picks the child path over the parent path.  Since the child path is
153    what we want if available (as we believe it to be a more specific match), use
154    distinct to avoid duplicates.   
155    */
156    select distinct
157      coalesce(dxpc.chd_level, dxpc.par_level) + 1 c_hlevel,
158      coalesce(dxpc.chd_umls_path, dxpc.par_umls_path) || dxpc.chd_dx || '\' path,
159      dxpc.chd_name c_name,
160      scheme_labels.dx_id_scheme || dxpc.chd_dx c_basecode,
161      'LA' as c_visualattributes, coalesce(dxpc.chd_icd10, dxpc.par_icd10) icd9_10_code
162    from icd10_dx_parent_children dxpc
163    cross join scheme_labels@deid
164    order by c_hlevel
165    )
166  where c_hlevel is not null
167  ), BlueHeronMetadata.normal_concept@deid norm, dx_audit_info aud
168;
169
170create or replace view other_dx_concepts as
171with dxs as (
172  select
173  cbc.concept_cd,
174  substr(cbc.concept_cd, length(scheme_labels.dx_id_scheme) + 1) dx_id,
175  cbc.facts, cbc.patients
176  from blueheronmetadata.counts_by_concept@deid cbc
177  cross join scheme_labels@deid
178  where cbc.concept_cd like scheme_labels.dx_id_scheme || '%'
179  )
180select
181  -- Extra columns left in for eyeballing
182  dxs.*, edg.dx_name, edg.icd9_code, edg.current_icd9_list, edg.current_icd10_list,
183  icd9.code icd9_tbl_code, icd10.code icd10_tbl_code
184from dxs
185left join epic_dx_terms dxt on dxt.c_basecode = dxs.concept_cd
186join clarity.clarity_edg edg on edg.dx_id = dxs.dx_id
187left join clarity.edg_current_icd9 icd9 on icd9.dx_id = edg.dx_id
188left join clarity.edg_current_icd10 icd10 on icd10.dx_id = edg.dx_id
189where dxt.c_basecode is null
190/* Somewhat arbitrary limit to cut down on things in the "other" folder that
191have very few facts.
192*/
193and dxs.facts >= 10
194;
195
196
197insert into epic_dx_terms
198select
199  length(path) - length(replace(path, '\', '')) - 2 c_hlevel,
200  path c_fullname, c_name, c_basecode, path c_dimcode, c_visualattributes,
201  norm.*, sysdate update_date, sysdate import_date, aud.source_cd sourcesystem_cd,
202  icd9_code
203from (
204  with other_strs as (
205    select '\i2b2\Diagnoses\' dx_path,
206    'Other Diagnoses Concepts' c_name
207    from dual
208    ),
209  base as (
210    select other_strs.dx_path || other_strs.c_name || '\' path
211    from other_strs
212    )
213  select
214    base.path || oc.dx_id || '\' path,
215    oc.dx_name c_name, oc.concept_cd c_basecode,
216    'LA' as c_visualattributes, null icd9_code
217  from base, other_strs, other_dx_concepts oc
218  union all
219  select
220    base.path path, other_strs.c_name, null c_basecode, 'FA' as c_visualattributes,
221    null icd9_code
222  from base, other_strs
223  ), BlueHeronMetadata.normal_concept@deid norm, dx_audit_info aud
224;
225
226
227/* Make sure we include at least one inactive DX_ID, which were excluded earlier. */
228
229insert into etl_test_values (test_value, test_domain, test_name, result_id, result_date)
230with test_key as (
231  select 'Diagnoses' test_domain,
232  'at_least_one_inactive_dx_id' test_name from dual
233  ),
234one_inactive_dx as (
235  select count(*) test_value from epic_dx_terms
236  where c_basecode = (select dx_id_scheme from scheme_labels@deid) || 199608
237  )
238select one_inactive_dx.test_value, test_key.*, sq_result_id.nextval, sysdate
239from test_key, one_inactive_dx
240;
241commit;
242
243/* For tickets #1319 and #1613, to properly tag concepts as FA and LA */
244insert into etl_test_values (test_value, test_domain, test_name, result_id, result_date)
245with test_key as (
246  select 'Diagnoses' test_domain,
247  'dx_with_children_are_fa' test_name from dual
248  ),
249dx_children_fa as (
250  select count (distinct c_basecode) test_value from epic_dx_terms where
251  c_basecode in (
252    select scheme_labels.dx_id_scheme || dx_ids.dx_id
253    from (
254      select 1040713 dx_id from dual union all
255      select 1041101 dx_id from dual union all
256      select 1083118 dx_id from dual union all
257      select 209352 dx_id from dual union all
258      select 209354 dx_id from dual
259      ) dx_ids
260    cross join scheme_labels@deid
261    )
262  and c_visualattributes = 'LA'
263  )
264select dx_children_fa.test_value, test_key.*, sq_result_id.nextval, sysdate
265from test_key, dx_children_fa
266;
267commit;
268
269delete from
270BLUEHERONMETADATA.epic_icd9_10@deid;
271
272insert into BLUEHERONMETADATA.epic_icd9_10@deid (
273  c_hlevel, c_fullname, c_name, c_basecode, c_dimcode,
274  c_visualattributes,
275  c_synonym_cd, c_facttablecolumn, c_tablename, c_columnname, c_columndatatype,
276  c_operator,  m_applied_path,
277  update_date, import_date, sourcesystem_cd, icd9_10_code
278)
279select * from epic_dx_terms
280;
281
282
283truncate table epic_dx_terms;
284drop table epic_dx_terms;
285
286delete from blueheronmetadata.unique_epic_dx_concepts@deid;
287-- dx_id column is the primary key for clarity_edg.
288insert into blueheronmetadata.unique_epic_dx_concepts@deid
289select dx_id_scheme || dx_id from clarity.clarity_edg
290cross join scheme_labels@deid;
Note: See TracBrowser for help on using the repository browser.