source: heron_load/epic_med_mapping.sql @ 0:d42b60edc4bf

Revision 0:d42b60edc4bf, 32.7 KB checked in by Dan Connolly <dconnolly@…>, 3 weeks ago (diff)

Merge with heron-arkansas (#2581, #2592)

Line 
1/** epic_med_mapping -- map Epic meds to standard terminology RxNorm/NDF-RT
2
3Copyright (c) 2012 University of Kansas Medical Center
4part of the HERON* open source codebase; see NOTICE file for license details.
5* http://informatics.kumc.edu/work/wiki/HERON
6
7Hierarchy based on "Drug Products by VA Class".  This was the organization
8suggested by "2010 paper on mashing up RxNorm and NDF-RT hierarchy" found here:
9  http://www.ncbi.nlm.nih.gov/pubmed/21347044
10
11The RxNorm documentation is located here:
12http://www.nlm.nih.gov/research/umls/rxnorm/docs/2012/rxnorm_doco_full_2012-3.html
13
14Our hierarchy:
15
16VA Drug Class (NDF-RT)
17  |- (Possible sub-VA Drug Class (NDF-RT))
18    |- Semantic Clinical Dose Form (SCDF) from RxNorm [Oral product, Pill, injectable, etc]
19      |- Clarity Drug
20     
21The general steps are:
22- Map Clarity drugs to RXCUI (RX Concept Unique Identifier) so we can map to
23synonyms/drug classes/etc in RxNorm.  This is done via NDC and GCN Sequence
24Number.
25  - Map Clarity drugs to Semantic Clinical Dose Form (SCDF)
26  - Map Clarity drugs to children of VA Drug Classes
27  - Map the SCDF to VA Drug Class
28- From the parent/child relationships found above, create the i2b2 compatible
29terms tree. 
30 
31*/
32
33-- Check access: should be running on nheron
34select upload_id from NIGHTHERONDATA.observation_fact where 1=0;
35
36-- Check access to KUMC/rxnorm
37select rxcui from rxnorm.rxnconso where 1=0;
38
39-- Audit constants for stuff from RxNorm
40create or replace view rxnorm_audit_info as
41select * from BlueHeronData.source_master@deid
42where source_cd like 'rxnorm@%';
43
44whenever sqlerror continue;
45drop table rxcui_to_one_string;
46whenever sqlerror exit;
47
48/* Create a table of rxconcepts to strings.  Since an RxCUI may have lots of
49strings, it's nice to be able to pick just one.
50*/
51create table rxcui_to_one_string as
52select rxcui, str from(
53  -- Get the primary terms - use max on aui to just get one.
54  with rxcon_aui_pt as(
55    select rxcui, max(rxaui) aui
56    from rxnorm.rxnconso
57    where tty = 'PT'
58    group by rxcui
59    ),
60  -- Get one string for each rxcui
61  rxcon_aui_all as(
62    select rxcui, max(rxaui) aui
63    from rxnorm.rxnconso
64    group by rxcui
65    ),
66  -- filter to get non-primary terms
67  non_pt_aui as(
68    select all_aui.rxcui, all_aui.aui
69    from rxcon_aui_all all_aui
70    left join rxcon_aui_pt pt on pt.rxcui = all_aui.rxcui
71    where pt.rxcui is null
72    ),
73  picked_auis as(
74    select * from rxcon_aui_pt
75    union
76    select * from non_pt_aui
77    )
78  select con.rxcui, con.str from picked_auis pa
79  join rxnorm.rxnconso con on con.rxaui = pa.aui
80);
81
82
83/* Map our clarity medication id to GCN and then to RxCUI
84*/
85create or replace view clarity_med_id_to_rxcui_gcn as select * from(
86  with med_id_to_gcn as(
87    --Map the clarity medication ID to the clarity GCN
88    --TODO: There are multiple GCNs for a single med id.  What to do?
89    --TODO: Deal with "suppressed" column?
90    select
91      ccm.medication_id, crmg.gcn_seqno, ccm.name
92    from clarity.clarity_medication ccm
93    join clarity.rx_med_gcnseqno crmg on crmg.medication_id = ccm.medication_id
94    ),
95  gcn_to_rxui as(
96    --Map the clarity GCN to the RxNorm GCN/CUI
97    select
98      mtog.medication_id, mtog.gcn_seqno, mtog.name clarity_name, rxc.code,
99      rxc.rxcui, rxc.tty, rxc.str rxname
100    from med_id_to_gcn mtog
101    join rxnorm.rxnconso rxc on rxc.code = lpad(mtog.gcn_seqno, 6, '0')
102    where rxc.sab='NDDF' and rxc.tty != 'IN' --skip ingredients (#1246 comment:13)
103    )
104  select
105    medication_id clarity_med_id, gcn_seqno clarity_gcn, code rxn_gcn, rxcui, tty,
106    clarity_name, rxname
107  from gcn_to_rxui
108  );
109
110/* what term types?
111111652  CDA
112111652  CDC
113111652  CDD
114*/
115/*
116select distinct
117  doc.value, doc.expl
118from clarity_med_id_to_rxcui_gcn ctorx
119join rxnorm.rxndoc doc on doc.value = ctorx.tty
120where doc.dockey = 'TTY';
121
122select count(*), tty from clarity_med_id_to_rxcui_gcn ctorx
123group by tty;
124
125-- up to 2421 rxcuis medid!
126select count(*), rxcui from clarity_med_id_to_rxcui_gcn ctorx
127group by rxcui order by count(*) desc;
128*/
129
130 
131/* Map our clarity medication id to NDC and then to RxCUI
132*/
133create or replace view clarity_med_id_to_rxcui_ndc as select * from(
134  with med_id_to_ndc as(
135    --Map the clarity medication ID to the clarity NDC
136    --TODO: A single medication id maps to multiple NDCs, how to handle?
137    --TODO: Deal with "suppressed" column?
138    --TODO: NDC normalization
139    select
140      ccm.medication_id, cndc.ndc_code, ccm.name clarity_name
141    from clarity.clarity_medication ccm
142    join clarity.clarity_ndc_codes cndc on cndc.medication_id=ccm.medication_id
143    ),
144  ndc_to_rxui as(
145    --Map the clarity NDC to the RxNorm NDC/CUI
146    select
147      mton.medication_id, mton.ndc_code, sat.rxcui, con.str rxname, clarity_name
148    from med_id_to_ndc mton
149    join rxnorm.rxnsat sat on sat.atv = mton.ndc_code
150    join rxnorm.rxnconso con on sat.rxcui = con.rxcui
151    where sat.atn='NDC' --and con.tty != 'IN' --skip ingredients (#1246 comment:13)
152    )
153  select
154    medication_id clarity_med_id, ndc_code, clarity_name, rxcui, rxname
155  from ndc_to_rxui
156  ); 
157
158
159/* Map Clarity medications to RxNorm (Clarity medication_id to rxcui).
160Temporary tables are used instead of views to reduce run time.
161*/
162whenever sqlerror continue;
163drop table clarity_med_id_to_rxcui;
164whenever sqlerror exit;
165
166create table clarity_med_id_to_rxcui as(
167  select distinct
168    clarity_med_id, rxcui, 'GCN' as src
169  from
170    clarity_med_id_to_rxcui_gcn
171
172  union
173
174  select distinct
175    clarity_med_id, rxcui, 'NDC' as src
176  from
177    clarity_med_id_to_rxcui_ndc
178  );
179
180commit;
181
182/* eyeball
183select * from clarity_med_id_to_rxcui; 
184select count(*) from clarity_med_id_to_rxcui; --125176
185*/
186
187/* Incorporate MedEx NLP name-to-RxCui tables.  Note that we sometimes get
188different RxCUIs based on brand name and generic name.  Just use MedEx for meds
189that we can't match with NDC/GCN
190
191TODO: From spot-checking brand vs. generic, they appear to be synonyms. Consider
192verifying that they are in fact the same using a RxNorm/UMLS synonym relationship.
193
194TODO: Automate the creation of the tables in KUMC.  Tables are below - data
195imported by SQL developer
196
197create table rxnorm.clarity_name_to_rxcui_medex (
198  CLARITY_NAME VARCHAR2( 255 BYTE ),
199  UMLSCUI VARCHAR2(16 BYTE),
200  RXCUI VARCHAR2(8 BYTE)
201  );
202
203--generic name
204create table rxnorm.g_clarity_name_to_rxcui_medex as (
205  select * from rxnorm.clarity_name_to_rxcui_medex where 1 = 0
206  );
207
208MedEx Reference:
209http://knowledgemap.mc.vanderbilt.edu/research/content/medex-tool-finding-medication-information
210*/
211
212
213/* MedEx uses the string 'null' to represent null values.  If that string is
214imported directly into Oracle, we get a string 'null' rather than a null value. 
215So, verify here that there aren't any strings 'null'.
216*/
217select case when num = 0 then 1 else 1 / 0 end null_as_string from(
218  select count(*) num from(
219    select
220      clarity_name, rxcui
221    from
222      rxnorm.clarity_name_to_rxcui_medex
223   
224    union
225   
226    select
227      clarity_name, rxcui
228    from
229      rxnorm.g_clarity_name_to_rxcui_medex
230  )
231  where rxcui = 'null'
232);
233
234
235whenever sqlerror continue;
236drop table clarity_med_id_to_rxcui_medex;
237whenever sqlerror exit;
238
239/* Match clarity medication id to RxNorm CUI.  Note that as of now, the MedEx
240output doesn't provide a good way to link back to the source data.  So, match
241on medication name.  This isn't exact since the MedEx output mangles the names
242when it interprets a medication name as being more than one sentence (like
243"A.E.R. WITCH HAZEL 12.5-50 % TP PADM." gets changed to "WITCH HAZEL 12.5-50 %
244TP PADM.". 
245
246TODO: Find a better way.  We're working with the MedEx authors on this now.
247*/
248create table clarity_med_id_to_rxcui_medex as select * from(
249  with ctom as(
250    select distinct
251      medication_id clarity_med_id,
252        case
253          --arbitrarily favor the non-generic name if both exist.
254          when mxrxcui is not null then mxrxcui 
255          else gmxrxcui
256        end rxcui
257    from(
258        select distinct
259          ccm.medication_id, ccm.name, ccm.generic_name, mx.rxcui mxrxcui, gmx.rxcui gmxrxcui
260        from
261          clarity.clarity_medication ccm
262        left join
263          rxnorm.clarity_name_to_rxcui_medex mx
264        on
265          mx.clarity_name = ccm.name
266        left join
267          rxnorm.g_clarity_name_to_rxcui_medex gmx
268        on
269          gmx.clarity_name = ccm.generic_name
270        where ( mx.rxcui is not null or gmx.rxcui is not null )
271      )
272    )
273  select ctom.clarity_med_id, ctom.rxcui
274  from ctom
275  left join clarity_med_id_to_rxcui cmrx on cmrx.clarity_med_id = ctom.clarity_med_id
276  where cmrx.clarity_med_id is null
277  ); 
278
279
280-- Eyeball the results of the MedEx matching
281/*
282select
283  ccm.medication_id clarity_id, mx.rxcui medex_rxcui, ccm.name clarity_name,
284  ccm.generic_name clarity_generic_name, con.str rxnorm_name, con.tty, doc.expl
285from
286  clarity_med_id_to_rxcui_medex mx
287join
288  clarity.clarity_medication ccm
289on
290  mx.clarity_med_id = ccm.medication_id
291join
292  rxnorm.rxnconso con
293on
294  con.rxcui = mx.rxcui
295join
296  rxnorm.rxndoc doc
297on
298  doc.value = con.tty
299where
300  doc.dockey = 'TTY'
301order by
302  ccm.medication_id;
303 
304 
305with tty_counts as(
306  select
307    count(*) cnt, con.tty
308  from
309    clarity_med_id_to_rxcui_medex mx
310  join
311    rxnorm.rxnconso con
312  on
313    con.rxcui = mx.rxcui
314  group by con.tty
315  )
316select tty, cnt, doc.expl
317from tty_counts
318join rxnorm.rxndoc doc
319on doc.value = tty_counts.tty
320where doc.dockey = 'TTY'
321order by cnt desc;
322*/ 
323
324--What percentage of clarity medications to we have an RxCUI for now?
325/*
326with missing_rxcuis as(
327  select count(*) cnt from(
328    select
329      distinct ccm.medication_id clarity_id
330    from
331      clarity.clarity_medication ccm
332    left join
333      clarity_med_id_to_rxcui ctorx
334    on
335      ccm.medication_id = ctorx.clarity_med_id
336    where
337      ctorx.clarity_med_id is null
338    )
339  ),
340all_clarity_ids as(
341  select count(*) cnt from(
342    select distinct medication_id from clarity.clarity_medication ccm
343    )
344  )
345select
346  100 * (missing_rxcuis.cnt / all_clarity_ids.cnt) percent_missing
347from
348  missing_rxcuis, all_clarity_ids;
349--26% missing
350*/
351
352/* Precisely which Clarity medication ids do we not have an RxCUI for?  Could
353do the same with medex.
354*/
355/*
356select
357  clarity_med_id, ccm.name clarity_name,
358  ccm.generic_name generic_clarity_name
359from(
360    select
361      distinct ccm.medication_id clarity_med_id
362    from
363      clarity.clarity_medication ccm
364    left join
365      clarity_med_id_to_rxcui ctorx
366    on
367      ccm.medication_id = ctorx.clarity_med_id
368    where
369      ctorx.clarity_med_id is null
370    )
371join
372  clarity.clarity_medication ccm
373on
374  ccm.medication_id = clarity_med_id;
375*/ 
376
377/* Insert into our med_id to RxCui list.  There shouldn't be any overlap wrt
378clarity medication id since we only insert into the medex table if we couldn't
379find an rxcui using GCN/NDC.
380*/
381insert into clarity_med_id_to_rxcui
382  select m.clarity_med_id, m.rxcui, 'MDX' src
383  from clarity_med_id_to_rxcui_medex m;
384--21,509 rows inserted.
385
386--Re-running coverage test above: down to 10% missing
387
388/* Map medications to SCDF (Semantic Clinical Dose Form). 
389Combined into the parent-child relationship view to create the hierarchy.
390*/
391whenever sqlerror continue;
392drop table cmed_to_scdf;
393whenever sqlerror exit;
394
395--Use the chd_medid as the medication concept and the rxcui for the SCDF concept.
396create table cmed_to_scdf as
397select con.str par, ccm.name chd, con.rxaui par_aui, c2r.clarity_med_id chd_medid,
398con.rxcui par_rxcui, c2r.rxcui chd_rxcui
399from rxnorm.rxnrel rel
400join clarity_med_id_to_rxcui c2r on c2r.rxcui=rel.rxcui2
401join rxnorm.rxnconso con on con.rxcui=rel.rxcui1
402join clarity.clarity_medication ccm on ccm.medication_id=c2r.clarity_med_id
403where rela = 'isa' and ( con.tty = 'SCDF' or con.tty = 'SBDF' );
404
405--How many different places do meds map?
406/*
407with medid_count as(
408  select count(*) cnt, chd_medid clarity_med_id
409  from cmed_to_scdf
410  group by chd_medid
411  )
412select medid_count.cnt, medid_count.clarity_med_id, ccm.name
413from medid_count
414join clarity.clarity_medication ccm
415on ccm.medication_id = medid_count.clarity_med_id
416order by cnt desc;
417*/
418
419
420/* Map SCDF to VA class.  We have the medications mapped to NDF-RT drug classes
421already.  We also have the medications mapped to the SCDF.  So, use those
422relationships to determine the SCDF to VA class relationship.
423*/
424whenever sqlerror continue;
425drop table cmed_to_va_class;
426whenever sqlerror exit;
427
428
429--TODO:  Pick one of the methods?  We eeked out a few more matches when I used both...hmm...
430--TODO:  Learn SQL!  Why do we get less in the total than each of the DISTINCT parts?  Must be dups somehow? I thought distinct would fix that.
431create table cmed_to_va_class as(
432
433select distinct
434  ccm.name chd, c2r.clarity_med_id chd_medid, con.str par, con.rxcui par_rxcui
435from rxnorm.rxnrel rel
436join rxnorm.rxnsat sat_chd on sat_chd.rxaui=rel.rxaui2
437join rxnorm.rxnsat sat_par on sat_par.rxaui=rel.rxaui1
438join clarity_med_id_to_rxcui c2r on c2r.rxcui=sat_chd.rxcui
439join clarity.clarity_medication ccm on c2r.clarity_med_id = ccm.medication_id
440join rxnorm.rxnconso con on con.rxaui = sat_par.rxaui
441where rel.rel = 'CHD' and sat_par.atv = 'VA Class'--32,710
442
443union
444
445select distinct
446  ccm.name chd, ccm.medication_id chd_medid, sat.atv par, con.rxcui par_rxcui
447from clarity_med_id_to_rxcui ctorx
448join clarity.clarity_medication ccm on ccm.medication_id = ctorx.clarity_med_id
449join rxnorm.rxnsat sat on sat.rxcui = ctorx.rxcui 
450left join rxnorm.rxnconso con
451on con.str = sat.atv
452where sat.atn = 'VA_CLASS_NAME' --33,268
453);
454
455
456/*
457select count(*) from cmed_to_va_class; --33,562 total
458
459select count(*) from(
460  select distinct clarity_med_id from clarity_med_id_to_rxcui ctorx
461  );--91,385 meds
462*/ 
463
464/* So, how many of the meds actually map the the VA class? Note: Things are
465mapping to more than one place - distinct cuts the number down significantly.
466*/
467/*
468with meds_mapped_to_va as(
469  select
470    count(*) cnt
471  from(
472    select
473      distinct chd_medid
474    from
475      cmed_to_va_class
476    )
477  ),
478meds_with_rxcuis as(
479  select count(*) cnt from(
480    select
481      distinct clarity_med_id 
482    from
483      clarity_med_id_to_rxcui
484    )
485  )
486select
487  meds_mapped_to_va.cnt mapped_va, meds_with_rxcuis.cnt rxcuis
488from
489  meds_mapped_to_va, meds_with_rxcuis;
490*/
491
492/* Parent/child relationships for SCDF/NDF-RT VA Class
493Combined into the parent-child relationship view to create the hierarchy.
494*/
495whenever sqlerror continue;
496drop table scdf_to_va_class;
497whenever sqlerror exit;
498
499create table scdf_to_va_class as
500select vaclass.par par, scdf.par chd, scdf.par_rxcui chd_rxcui, vaclass.par_rxcui
501from cmed_to_scdf scdf
502join cmed_to_va_class vaclass on vaclass.chd_medid=scdf.chd_medid;
503
504/*
505select * from cmed_to_va_class;
506select count(*) from scdf_to_va_class; --47808
507*/
508
509/* What meds are we missing from the hierarchy?
510*/
511whenever sqlerror continue;
512drop table missing_meds_by_fact_cnt;
513whenever sqlerror exit;
514
515--create index va_med_id on cmed_to_va_class(chd_medid);
516
517/* Order missing meds by fact count. 
518*/
519create table missing_meds_by_fact_cnt as select * from(
520
521with mapped_med_ids as(
522  --Distinct concepts that link to RxNorm
523  select distinct chd_medid medication_id, chd name
524  from cmed_to_va_class 
525  ),
526med_facts as(
527  select distinct to_number(substr(cbc.concept_cd, 19)) medication_id, cbc.facts
528  from blueheronmetadata.counts_by_concept@deid cbc
529  where cbc.concept_cd like 'KUH|MEDICATION_ID:%'
530  )
531select distinct mf.medication_id, com.name, mf.facts cnt
532from med_facts mf
533left join mapped_med_ids mid on mid.medication_id = mf.medication_id
534join clarity.clarity_medication com on com.medication_id = mf.medication_id
535where mid.medication_id is null
536order by mf.facts desc
537
538);
539
540/* Map some common medications based on their name. 
541--eyeball which ones to pick off first based on fact count
542select * from missing_meds_by_fact_cnt order by cnt desc;
543*/
544
545whenever sqlerror continue;
546drop table map_by_name;
547whenever sqlerror exit;
548
549
550create table map_by_name as select distinct * from (
551  with
552  like_names as (
553    /* Eyeballing new matches:
554      - If matching on name alone (both the name of "to be matched" and the name
555        of "already in the hierarchy") "par" should be null.
556      - To specify a parent RXCUI, add it to the "par" column.
557    select '%CEFEPIME%' n, null par from dual
558    union all
559    select 'LIDOCAINE%IJ%' n, 691893 par from dual --[CN204] LOCAL ANESTHETICS,INJECTION
560    */
561    select like_name, parent_rxcui from med_map_like_name@deid
562    ),
563  manual_mappings as (
564   select distinct ms.medication_id, cmed_match.par_rxcui
565    from missing_meds_by_fact_cnt ms
566    join (select *
567          from cmed_to_va_class ctv
568          join like_names ln on upper(ctv.chd) like ln.like_name
569          where (ln.parent_rxcui is null or (ln.parent_rxcui is not null and ln.parent_rxcui = ctv.par_rxcui))) cmed_match
570    on ms.name like cmed_match.like_name
571    )
572  select mis.name chd, mm.medication_id chd_medid, ctov.par, mm.par_rxcui
573  from manual_mappings mm
574  join missing_meds_by_fact_cnt mis on mis.medication_id = mm.medication_id
575  join cmed_to_va_class ctov on ctov.par_rxcui = mm.par_rxcui
576)
577;
578
579/*
580-- eyeball how many more med ids/facts we get when matching by name
581with
582now_mapped_id as (
583  select distinct chd_medid
584  from map_by_name mbn
585  ),
586prev_unknown_cnt as (
587  select count(*) cnt from (select distinct medication_id from missing_meds_by_fact_cnt)
588  ),
589prev_unknown_facts as (
590  select sum(cnt) cnt from missing_meds_by_fact_cnt
591  ),
592now_mapped as (
593  select * from now_mapped_id nmid
594  join missing_meds_by_fact_cnt mm on mm.medication_id = nmid.chd_medid
595  )
596select 'Previously unmapped medication id count: ' title, cnt from prev_unknown_cnt
597union
598select 'Previously unmapped medication id fact count: ' title, cnt from prev_unknown_facts
599union
600select 'Newly mapped medication id count' title, count(*) cnt from now_mapped_id
601union
602select 'Fact count of medication ids newly mapped' title, sum(nm.cnt) cnt from now_mapped nm
603;
604*/
605
606/* Delete medication ids from the missing meds table where we've found a place
607for them.
608
609Before/after eyeballing:
610select count(*) from missing_meds_by_fact_cnt;
611select count(*) from (select distinct chd_medid from map_by_name);
612*/
613
614delete from missing_meds_by_fact_cnt
615where exists (
616  select chd_medid
617  from map_by_name
618  where map_by_name.chd_medid = missing_meds_by_fact_cnt.medication_id
619  )
620;
621
622
623/* Investigate what percentage of facts we can cover with the top x by count.
624Note that the i2b2 default display limit for child items in the ontology appears
625to be 500.
626
627with full_count as(
628  select sum(cnt) sum_cnt
629  from missing_meds_by_fact_cnt
630  ),
631top_count as(
632  select sum(cnt) sum_cnt
633  from missing_meds_by_fact_cnt
634  where rownum <= 500
635  )
636select round( 100 * (tc.sum_cnt / fc.sum_cnt), 2) prcnt
637from top_count tc, full_count fc; --91.93%
638*/
639
640
641--Meds that map to VA class but NOT to an scdf
642create or replace view va_meds_no_scdf as(
643  select distinct va.chd, va.chd_medid, va.par, va.par_rxcui   
644  from cmed_to_va_class va
645  left join cmed_to_scdf sc on sc.chd_medid = va.chd_medid
646  where sc.chd_medid is null
647  );
648 
649 
650-- Special-case VA class
651create or replace view special_va_class_folders as(
652  select 'Other Medication Concepts' class_name from dual
653  );
654 
655/* Special manually mapped meds (top-level)
656Note:  Making this a view doesn't work - possibly due to the fact it would be
657attempting to pull from a remote table and insert into another remote table.
658
659"SQL Error: ORA-02019: connection description for remote database not found"
660*/
661whenever sqlerror continue;
662drop table manual_top_level_meds;
663whenever sqlerror exit;
664
665
666create table manual_top_level_meds as (
667  select cm.medication_id, cm.name
668  from clarity.clarity_medication cm
669  where cm.medication_id in (250702, 210462, 210461)
670  );
671
672
673--Put unmapped medications here:
674create or replace view unmapped_meds as select * from(
675with meds_to_map as(
676  select * from (
677    select medication_id, name
678    from missing_meds_by_fact_cnt
679    --from missing_meds_by_fact_cnt
680    --Number chosen based on i2b2 limit and coverage test above
681    where medication_id not in (select medication_id from manual_top_level_meds)
682    )
683  where rownum <= 500
684  )
685select
686  mm.name chd, mm.medication_id chd_medid, spec.class_name par, null par_rxcui
687from meds_to_map mm, special_va_class_folders spec
688);
689
690
691/* Temp table for class-level parent-child relationships in the NDF-RT hierarchy.
692*/
693whenever sqlerror continue;
694drop table ndfrt_class_rel;
695whenever sqlerror exit;
696
697
698create table ndfrt_class_rel as
699  select
700    substr(con1.str, 0, 64) || ' #' || con1.rxcui i1, rel.rel,
701    substr(con2.str, 0, 64) || ' #' || con2.rxcui i2,
702    sat2.atv i2type, con1.rxcui i1rxcui, con2.rxcui i2rxcui
703  from
704    rxnorm.rxnrel rel
705  join
706    rxnorm.rxnconso con1 on con1.rxaui=rel.rxaui1
707  join
708    rxnorm.rxnconso con2 on con2.rxaui=rel.rxaui2
709  join
710    rxnorm.rxnsat sat1 on sat1.code = con1.scui
711  join
712    rxnorm.rxnsat sat2 on sat2.code = con2.scui
713  where
714    --i1 is a VA class, i2 may not be.  But, keep track of the type of 12.
715    rel.sab = 'NDFRT' and sat1.atv = 'VA Class';
716
717
718-- Use the i2b2 Medication tree
719create or replace view ndfrt_base as(
720  select 'Medications' name 
721  from dual );
722
723
724/* Combine all our relationships in a parent/child view.  Relationships include
725things that aren't classes so we can get classes that don't have anything below
726them in the hierarchy.
727*/
728
729create or replace view ndfrt_par_chd as(
730--Top level concept
731select null as par, b.name chd, 'FA' va, null par_basecode, null chd_basecode from ndfrt_base b
732
733union
734
735--Top level class
736select b.name par, chd, va, null par_basecode, c_basecode chd_basecode from(
737  select item chd, 'FA' va, c_basecode from
738    (with allparentclasses as(
739      select distinct i1 item, i1rxcui rxcui from ndfrt_class_rel r
740      where r.rel = 'CHD'
741      ),
742    allchildclasses as(
743      select distinct i1 item, i2rxcui rxcui from ndfrt_class_rel r
744      where r.rel = 'PAR' and r.i2type = 'VA Class'
745      )
746    select distinct ap.item, 'RXCUI:' || ap.rxcui c_basecode
747    from allparentclasses ap
748    left join allchildclasses ac
749    on ap.item = ac.item
750    where ac.item is null)
751    ), ndfrt_base b
752
753union
754
755--Special Cases
756select b.name par, topspec.name chd, 'LA' va, null par_basecode,
757'KUH|MEDICATION_ID:' || topspec.medication_id chd_basecode
758from ndfrt_base b, manual_top_level_meds topspec
759
760union
761
762-- Previously unmapped meds - the "unmapped" class
763select b.name par, spec.class_name chd, 'FA' va, null par_basecode, null chd_basecode
764from ndfrt_base b, special_va_class_folders spec
765
766union
767
768select um.par,
769  substr(um.chd, 1, 64) || ' #' || um.chd_medid, 'LA' va,
770  null par_basecode,
771  'KUH|MEDICATION_ID:' || um.chd_medid chd_basecode
772from unmapped_meds um
773
774union
775
776--The rest of the class relationships.
777select i2 par, i1 chd, 'FA' va, 'RXCUI:' || i2rxcui par_basecode,
778'RXCUI:' || i1rxcui chd_basecode
779from ndfrt_class_rel
780where i2type = 'VA Class' and rel='PAR'
781
782union
783
784--Add the SCDF as folders
785/*
786TODO: Some SCDF names are really long - can we abbreviate instead of truncate?
787Most appear to have '/' in them - perhaps something like the following to get
788everythign after the last slash?
789
790TODO: Use AUI as the fullname?
791
792select * from(
793  select ltrim(substr(str, -(length(str)-instr(str, '/', -1 ))), ' ') abbv, str, rxcui from(
794    select rxcui, str from rxnorm.rxnconso where tty = 'SCDF' and length(str) > 64
795    )
796  )order by length(str) desc;
797
798WARNING:  The method of truncation here must match for the union of the SDCF
799parent/child rows and the medication parent/child rows.  Otherwise, the strings
800won't match for the full-path conversion done later for inserting into the i2b2-
801compatible ontology.
802
803To avoid duplication after truncating, append the RXCUI to the full-name/concept
804path string.  The #+RXCUI is similar to what's done for medication ids (also
805refer to epic_meds_transform.sql).
806*/
807select substr(par, 0, 64) || ' #' || to_char(par_rxcui) par,
808  substr(chd, 0,64) || ' #' || to_char(chd_rxcui) chd, 'FA' va,
809  'RXCUI:' || par_rxcui par_basecode, 'RXCUI:' || chd_rxcui chd_basecode
810  from scdf_to_va_class
811
812union
813
814/* Get our meds as leaves.  The medication names are truncated the same
815way as before in epic_meds_transform.sql - 15 chars + # + med_id.
816*/
817
818select substr(par, 0, 64) || ' #' || to_char(par_rxcui),
819  substr(chd, 1, 64) || ' #' || chd_medid, 'LA' va,
820  'RXCUI:' || par_rxcui par_basecode,
821  'KUH|MEDICATION_ID:' || chd_medid chd_basecode
822from (
823  select chd, chd_medid, par, par_rxcui from cmed_to_scdf
824  union
825  select chd, chd_medid, par, par_rxcui from va_meds_no_scdf
826  union
827  select chd, chd_medid, par, par_rxcui from map_by_name
828  )
829);
830
831
832/* Now, insert an i2b2 compatible ontology into rxnorm_terms.
833ref: http://www.adp-gmbh.ch/ora/sql/connect_by.html
834ref: http://docs.oracle.com/cd/B19306_01/server.102/b14200/queries003.htm
835*/
836
837-- Delete existing records in the rxnorm_terms table then insert
838delete from blueheronmetadata.rxnorm_terms@deid;
839
840
841/* Exclude our special-case medications from having the modifiers applied.
842Refer to i2b2 ontology design (m_exclusion_cd):
843https://www.i2b2.org/software/files/PDF/current/Ontology_Design.pdf
844
845Note:  This code depends on epic_concepts_load being called first.
846*/
847insert into BLUEHERONMETADATA.rxnorm_terms@deid (
848  c_hlevel, c_fullname, c_name, c_synonym_cd,
849  c_visualattributes, c_basecode, c_facttablecolumn,
850  c_tablename, c_columnname, c_columndatatype, c_operator,
851  c_dimcode, c_tooltip, m_applied_path, update_date,
852  import_date, sourcesystem_cd, m_exclusion_cd )
853select
854c_hlevel, c_fullname, c_name, c_synonym_cd, c_visualattributes, c_basecode,
855c_facttablecolumn, c_tablename, c_columnname, c_columndatatype,
856c_operator, c_dimcode, c_tooltip,
857substr(m_applied_path, 1, instr(m_applied_path, '\', -1)) || m.name || '\%' m_applied_path,
858update_date, import_date, sourcesystem_cd, 'X' m_exclusion_cd
859from blueheronmetadata.epic_terms@deid, manual_top_level_meds m
860where c_tablename = 'MODIFIER_DIMENSION'
861and c_fullname like '\Medication\%'
862;
863
864
865insert into BLUEHERONMETADATA.rxnorm_terms@deid (
866  c_hlevel, c_fullname, c_name, c_basecode, c_dimcode,
867  c_visualattributes,c_tooltip,
868  c_synonym_cd, c_facttablecolumn, c_tablename, c_columnname, c_columndatatype,
869  c_operator, m_applied_path,
870  update_date, import_date, sourcesystem_cd)
871select terms.c_hlevel, terms.concept_path cp1, terms.c_name, terms.concept_code,
872  terms.concept_path cp2, terms.c_visualattributes, null as c_tooltip,
873  norm.*,
874  sysdate as update_date, sysdate as import_date, aud.source_cd as source_cd
875from   
876  (
877  select
878    level as C_HLEVEL,
879    '\i2b2' || SYS_CONNECT_BY_PATH(chd, '\') || '\' as concept_path,
880
881    case --A bit of a kludge.  Strip off '#' + RXCUI/MEDID that we added to the end.
882      --TODO: consider other options to limit length? ticket:1048#comment:33
883      when instr( chd, '#' ) = 0
884      then chd
885      else  substr( chd, 1, instr( chd, '#', -1 ) - 2 )
886    end C_NAME,   
887   
888    va C_VISUALATTRIBUTES, chd_basecode concept_code,
889    null C_METADATAXML
890    from ndfrt_par_chd
891    start with par is null
892    connect by prior chd = par
893  )terms, BlueHeronMetadata.normal_concept@deid norm, rxnorm_audit_info aud
894  where c_hlevel != 1;
895
896commit;
897
898/* Make sure there are no duplicates in the full paths of the RxNorm terms table.
899Note that a particular term can be excluded from having a modifier attached even
900when that modifier is applied at a higher level in the hierarchy.  To exclude a
901term, insert another modifier row into the terms table with the same full path
902but with applied path set to the term to be excluded and the exclusion code set
903to 'X'.  So, duplicate rows are acceptable if they are used in this way to
904exclude terms from modifiers.
905*/
906select case when count(*) > 0 then 1/0 else 1 end dup_concepts from(
907  select count(*), c_fullname, m_exclusion_cd, m_applied_path
908  from blueheronmetadata.rxnorm_terms@deid
909  group by c_fullname , m_exclusion_cd, m_applied_path
910  having count(*) > 1 order by count(*) desc
911);
912
913
914/* Check our maximum path length - enforced by the terms table anyway
915select case when len > 700 then 1/0 else 1 end rxnorm_path_length from (
916  select max(length(c_fullname)) len from blueheronmetadata.rxnorm_terms@deid
917);
918*/
919
920/* Test to see how much coverage we have.
921
922For some reason, if we use a temp able the test query takes a few seconds.  if
923we include it in the with/as statement it takes MANY minutes.
924*/
925
926
927with med_facts as(
928  --Distinct counts by concept wrt medications
929  select distinct concept_cd, facts
930  from blueheronmetadata.counts_by_concept@deid cbc
931  where cbc.concept_cd like 'KUH|MEDICATION_ID:%'
932  ),
933rxnorm_med_concepts as(
934  --Distinct concepts that link to RxNorm
935  select distinct(c_basecode) concept_cd
936  from BLUEHERONMETADATA.rxnorm_terms@deid
937  where c_visualattributes = 'LA'
938  ),
939joined_rx_med_concepts as(
940  -- Find facts where we don't map to RxNorm
941  select distinct mfct.concept_cd facts_concept_cd, mfct.facts, rxnc.concept_cd rxn_concept_cd
942  from med_facts mfct
943  left join rxnorm_med_concepts rxnc
944  on mfct.concept_cd = rxnc.concept_cd
945  where rxnc.concept_cd is null
946  ),
947total_facts as(
948  -- Total facts summed up
949  select sum(medf.facts) sumfacts
950  from med_facts medf
951  ),
952non_matching_rxfacts as(
953  --Non-Matching facts summed up
954  select sum(j.facts) sumfacts
955  from joined_rx_med_concepts j
956  ),
957report as(
958  select round(100*( 1 - (non_matching_rxfacts.sumfacts / total_facts.sumfacts) ), 2) prcnt_matching_rxnorm,
959    total_facts.sumfacts total_facts, non_matching_rxfacts.sumfacts non_matching_rxnorm_facts
960  from total_facts, non_matching_rxfacts
961  )
962select case when r.prcnt_matching_rxnorm < 99.0 then 1/0 else 1 end rxnorm_fact_coverage
963from report r;
964
965
966/* What are the medications we're missing (ordered by fact count).
967Note:  There may be multiple rows for each concept/count since we may have found
968more than one RxCUI!
969*/
970
971/*
972with med_facts as(
973  --Distinct counts by concept wrt medications
974  select distinct concept_cd, facts
975  from blueheronmetadata.counts_by_concept@deid cbc
976  where cbc.concept_cd like 'KUH|MEDICATION_ID:%'
977  ),
978rxnorm_med_concepts as(
979  --Distinct concepts that link to RxNorm
980  select distinct(c_basecode) concept_cd
981  from BLUEHERONMETADATA.rxnorm_terms@deid 
982  where c_visualattributes = 'LA'
983  ),
984joined_med_concepts as(
985  -- Find facts where we don't map to RxNorm
986  select distinct mfct.concept_cd facts_concept_cd, mfct.facts, rxnc.concept_cd rxn_concept_cd
987  from med_facts mfct
988  left join rxnorm_med_concepts rxnc
989  on mfct.concept_cd = rxnc.concept_cd
990  where rxnc.concept_cd is null
991  ),
992missing_meds as(
993  select
994    j.facts_concept_cd, j.facts, ccm.name clarity_name
995    from joined_med_concepts j
996    join clarity.clarity_medication ccm
997    on ccm.medication_id = substr(j.facts_concept_cd, 19)
998    order by facts desc
999  ),
1000code_match as(
1001  select mm.facts_concept_cd, ctorx.rxcui, mm.facts, mm.clarity_name, ctorx.src, s.str rxnorm_name
1002  from missing_meds mm
1003  left join clarity_med_id_to_rxcui ctorx on substr(mm.facts_concept_cd, 19) = ctorx.clarity_med_id
1004  left join rxcui_to_one_string s on s.rxcui = ctorx.rxcui
1005  )
1006select cm.facts_concept_cd, cm.rxcui, cm.facts, cm.clarity_name, cm.src, cm.rxnorm_name
1007from code_match cm
1008order by cm.facts desc; 
1009*/
1010
1011
1012/* Test to make sure at least one of the medications that didn't get put in the
1013right place is now in the hierarchy (see ticket 1451):
1014KUH|MEDICATION_ID:210319 = "CEFAZOLIN INJ 1GM IVP"
1015*/
1016select case when cnt > 0 then 1/0 else 1 end med_match_by_name_1451 from (
1017  select count(*) cnt from (
1018    select c_name, c_fullname, c_basecode
1019    from blueheronmetadata.rxnorm_terms@deid
1020    where c_basecode = 'KUH|MEDICATION_ID:210319'
1021    )
1022  where c_fullname not like (
1023    select c_fullname || '%' from blueheronmetadata.rxnorm_terms@deid where c_basecode = 'RXCUI:91079'
1024    )
1025  )
1026;
1027
1028
1029--Drop the temporary tables.
1030--TODO: Consider using global temp tables?
1031drop table clarity_med_id_to_rxcui;
1032drop table clarity_med_id_to_rxcui_medex;
1033drop table cmed_to_scdf;
1034drop table cmed_to_va_class;
1035drop table scdf_to_va_class;
1036drop table ndfrt_class_rel;
1037drop table missing_meds_by_fact_cnt;
1038drop table rxcui_to_one_string;
1039drop table manual_top_level_meds;
1040drop table map_by_name;
Note: See TracBrowser for help on using the repository browser.