Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/epic_etl.py @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 21.4 KB
Line 
1'''epic_etl.py -- ETL tasks for Epic/clarity
2--------------------------------------------
3
4Epic Clarity Database and Schema
5********************************
6
7The source is a CLARITY schema per the Epic__ 'Clarity Spring 2009 data model'.
8
9__ http://www.epic.com/
10
11  >>> import heron_build
12  >>> options = _option  # un-hide for testing
13  >>> _, flat = heron_build.flat_options(
14  ...             read_config=heron_build.config_res(options.config))
15  >>> options.update(flat)
16
17The config should supply the oracle sid for the CLARITY schema::
18
19  >>> options.clarity_sid
20  'CLARITY_DB_SID'
21
22The connection from :func:`heron_build.id_db` to the CLARITY sid is
23made via a database link that assumes the same password in both sides.
24
25.. todo:: see if the same-password restriction can be relaxed
26
27Clarity Dump File
28*****************
29
30The i2b2 audit download date is taken from the clarity dump file
31from the start of the HeronLoad__ process:
32
33  >>> _ = options.clarity_dump
34
35For production settings, see `heron-prod1.ini` and `heron-prod2.ini`.
36
37__ http://informatics.kumc.edu/work/wiki/HeronLoad
38
39
40Flowsheet Fact Views
41********************
42
43To specify which flowsheet views to load from, use::
44
45  >>> [v.strip() for v in flowsheet_fact_views.split(',')][:3]
46  ['numerictypeflows', 'weightmeasureflows', 'patweightmeasureflows']
47  >>> len(flowsheet_fact_views.split())
48  14
49
50.. todo:: refactor all loading to use fact_views; move tests around
51
52:copyright: Copyright 2010-2013 University of Kansas Medical Center
53            part of the `HERON open source codebase`__;
54            see NOTICE file for license details.
55
56__ http://informatics.kumc.edu/work/wiki/HERON
57'''
58
59from warnings import warn
60
61from paver.easy import task, needs, might_call, no_help
62
63
64def _paver_import_work_around():
65    import sys
66    import os
67    sys.path.append(os.path.dirname(__file__))
68_paver_import_work_around()
69
70
71from heron_build import (ID_SCHEMA,
72                         run_scripts, run_id_scripts,
73                         load_curated_data)
74from heron_build import single_loader, multi_loader, view_loader, curated_data
75from i2b2_deid import do_deid_dimensions, do_deid_facts
76
77
78epic_view_kwds = dict(server='id_server',
79                      section='id')
80epic_dimension_loader_kwds = dict(
81    script='epic_dimensions_load.sql',
82    label='Epic mappings and dimensions',
83    source_hint='Epic',
84    capture_release_list=['dump'],
85    dump=lambda options: options.path_for.clarity_dump,
86    postprocess=lambda options, job_id:
87                do_deid_dimensions(options, [job_id], 'i2b2_star_truncate.sql')
88)
89epic_facts_loader_kwds = dict(
90    script='epic_facts_load.sql',
91    source_hint='Epic',
92    capture_release_list=['dump', 'variables'],
93    dump=lambda options: options.path_for.clarity_dump,
94    variables=lambda transform_name: dict(epic_fact_view=transform_name),
95    addl_params=dict(pat_source_cd='Epic@kumed.com',
96                     enc_source_cd='Epic@kumed.com'),
97    postprocess=lambda options, job_id:
98                do_deid_facts(options, [job_id], deid_instance_num=True)
99)
100
101
102@task
103@no_help
104@view_loader('epic_view_constants.sql', **epic_view_kwds)
105def make_epic_view_constants(options):
106    pass
107
108
109@task
110@no_help
111@needs('make_epic_visit_views')
112@view_loader('epic_allergy_transform.sql', **epic_view_kwds)
113def make_epic_allergy_views(options):
114    pass
115
116
117@task
118@no_help
119@view_loader('epic_alerts_transform.sql', **epic_view_kwds)
120def make_epic_alert_views(options):
121    pass
122
123
124@task
125@no_help
126@might_call('make_epic_visit_views')
127@needs('dblink_deid_id', 'load_zip_data', 'load_school_districts')
128@view_loader('epic_demographic_transform.sql', **epic_view_kwds)
129def make_epic_demographic_views(options):
130    pass
131
132
133@task
134@no_help
135@needs('make_epic_view_constants', 'make_scheme_labels')
136@view_loader('epic_mychart_transform.sql', **epic_view_kwds)
137def make_epic_mychart_views(options):
138    pass
139
140
141@task
142@no_help
143@needs('make_epic_view_constants', 'make_scheme_labels')
144@view_loader('epic_diag_tx.sql', **epic_view_kwds)
145def make_epic_diag_views(options):
146    pass
147
148
149@task
150@no_help
151@view_loader('epic_enc_vitals_transform.sql', **epic_view_kwds)
152def make_epic_enc_vitals_views(options):
153    pass
154
155
156@task
157@no_help
158@view_loader(('epic_flowsheets_transform.sql', 'flowsheet_concepts.sql'),
159             **epic_view_kwds)
160def make_epic_flowsheet_views(options):
161    pass
162
163
164@task
165@no_help
166@needs('load_labs_regex', 'load_component_whitelist')
167@view_loader('epic_labs_transform.sql', **epic_view_kwds)
168def make_epic_lab_views(options):
169    pass
170
171
172@task
173@no_help
174@needs('make_epic_view_constants', 'make_scheme_labels')
175@view_loader('epic_medical_history.sql', **epic_view_kwds)
176def make_epic_medical_history_views(options):
177    pass
178
179
180@task
181@no_help
182@view_loader('epic_meds_transform.sql', **epic_view_kwds)
183def make_epic_meds_views(options):
184    pass
185
186
187@task
188@no_help
189@needs('make_epic_view_constants', 'load_microlab_component_whitelist')
190@view_loader('epic_microbiology_transform.sql', **epic_view_kwds)
191def make_epic_microbiology_views(options):
192    pass
193
194
195@task
196@no_help
197@view_loader('epic_notes_transform.sql', **epic_view_kwds)
198def make_epic_notes_views(options):
199    pass
200
201
202@task
203@no_help
204@view_loader(('epic_orders_transform.sql', 'epic_order_sets.sql'),
205             **epic_view_kwds)
206def make_epic_order_views(options):
207    pass
208
209
210@task
211@no_help
212@view_loader('epic_services_transform.sql', **epic_view_kwds)
213def make_epic_service_views(options):
214    pass
215
216
217@task
218@no_help
219@needs('make_epic_view_constants')
220@view_loader('epic_social_history.sql', **epic_view_kwds)
221def make_epic_social_history_views(options):
222    pass
223
224
225@task
226@no_help
227@needs('make_epic_view_constants', 'load_epic_test_patients')
228@view_loader('epic_visit_transform.sql', **epic_view_kwds)
229def make_epic_visit_views(options):
230    pass
231
232
233@task
234@no_help
235@needs('make_epic_view_constants')
236@view_loader('epic_patient_adt_transform.sql', **epic_view_kwds)
237def make_epic_adt_views(options):
238    pass
239
240
241@task
242@no_help
243@needs('make_epic_view_constants')
244@view_loader('epic_ed_episode_transform.sql', **epic_view_kwds)
245def make_epic_ed_episode_views(options):
246    pass
247
248
249@task
250@needs('make_epic_allergy_views',
251       'make_epic_alert_views',
252       'make_epic_diag_views',
253       'make_epic_demographic_views',
254       'make_epic_lab_views',
255       'make_epic_medical_history_views',
256       'make_epic_meds_views',
257       'make_epic_microbiology_views',
258       'make_epic_notes_views',
259       'make_epic_order_views',
260       'make_epic_social_history_views',
261       'make_epic_enc_vitals_views',
262       'make_epic_visit_views',
263       'make_epic_mychart_views')
264def make_epic_views():
265    '''Make/Validate all the epic views'''
266    warn('make_epic_views needs refactoring', category=DeprecationWarning)
267
268
269@task
270def drop_all_clarity_views(options,
271                           script=('remove_all_user_views (no file)'),
272                           section='clarity'):
273    '''Drops all the views of the current user in clarity.
274    '''
275    with options.id_server.database(section).transaction(script) as dropper:
276        dropper.execute('select view_name from user_views')
277        views_to_remove = dropper.fetchall()
278        for view_loader in views_to_remove:
279            dropper.execute('drop view_loader %s' % view_loader)
280
281
282@task
283@curated_data('deid_db',
284              'age_terms', 'curated_data/age_terms.csv')
285def load_age_terms(options):
286    '''Load datafile for age terms'''
287
288
289@task
290@curated_data('deid_db',
291              'med_map_like_name', 'curated_data/med_map_like_name.csv')
292def load_med_map_like_name(options):
293    '''
294    Load datafile containing name patterns we use to map medications
295    based on similar names.
296    '''
297
298
299@task
300@curated_data('deid_db',
301              'med_map_manual_curation',
302              'curated_data/med_map_manual_curation.csv')
303def load_med_map_manual_curation(options):
304    '''
305    Load datafile containing manually curated medication concepts.
306    '''
307
308
309@task
310@curated_data('deid_db',
311              'med_map_exclude_modifiers',
312              'curated_data/med_map_exclude_modifiers.csv')
313def load_med_map_exclude_modifiers(options):
314    '''
315    Load datafile containing medication concepts that should not have
316    modifiers applied.
317    '''
318
319
320@task
321@needs('load_med_map_like_name', 'load_med_map_manual_curation',
322       'load_med_map_exclude_modifiers')
323def load_med_map_curated_data(options):
324    '''
325    Load curated data related to medication mapping.
326    '''
327
328
329@task
330@curated_data('deid_db',
331              'organism_name_cat_filter',
332              'curated_data/organism_name_categorization_filter.csv')
333def load_curated_micro_concept_data(options):
334    '''Load datafile for needed for micro-organism categorization '''
335
336
337@task
338@curated_data('deid_db',
339              'LOCAL_LABS', 'curated_data/local-labs.csv')
340def load_local_lab_categories(options):
341    '''Load datafile for local labs'''
342
343
344@task
345@curated_data('deid_db',
346              'gpc_billing_diag_modifiers',
347              'curated_data/gpc_billing_diag_modifiers.csv')
348def load_gpc_billing_diag_modifiers(options):
349    '''Load GPC billing diagnosis modifiers'''
350
351
352@task
353def load_labs_regex(options,
354                    table='lab_value_regex',
355                    csv_file='curated_data/lab_regex.csv'):
356    '''Load datafile for regex reduction of lab value'''
357    # @@TODO: remove explicit call to load_curated_data
358    # and use the "@curated_data" decorator
359    # This will have to be done after the architecture move to single sid.
360    load_curated_data(options.id_server.database('clarity'), table, csv_file)
361
362
363@task
364@curated_data('id_db',
365              'epic_test_patients', 'curated_data/epic_test_patients.csv')
366def load_epic_test_patients(options):
367    '''Load datafile containing epic test patients'''
368
369
370@task
371@curated_data('id_db',
372              'component_whitelist', 'curated_data/componentids_whitelist.csv')
373def load_component_whitelist(options):
374    '''Load datafile for componentids whitelist'''
375
376
377@task
378@curated_data('id_db',
379              'microlab_component_whitelist',
380              'curated_data/microlab_componentids_whitelist.csv')
381def load_microlab_component_whitelist(options):
382    '''Load datafile for componentids whitelist'''
383
384
385@task
386@run_scripts(('i2b2_star_truncate.sql'),
387             capture_release_list=['db'],
388             db=lambda options: options.id_db,
389             variables=dict(star=ID_SCHEMA))
390def truncate_i2b2_star(options):
391    pass
392
393
394@task
395@needs('dblink_deid_id','make_epic_visit_views',
396       'load_epic_test_patients', 'truncate_i2b2_star')
397@might_call('heron_init')
398@single_loader('load_epic_dimensions', **epic_dimension_loader_kwds)
399def load_epic_dimensions(options):
400    '''Load, de-id patient, visit dimensions from Epic.
401    '''
402
403    # Before we start, count how many records we're blowing away.
404    # @@TODO: move this inside epic_dimensions_load.sql
405    # with transaction(options.id_db) as q:
406    #    q.execute('''
407    # select (select count(*) from NightHerondata.patient_mapping)
408    #  + (select count(*) from NightHerondata.encounter_mapping)
409    #  + (select count(*) from NightHerondata.patient_dimension)
410    #  + (select count(*) from NightHerondata.visit_dimension)
411    #  + (select count(*) from NightHerondata.provider_dimension)
412    #  + (select count(*) from NightHerondata.observation_fact) qty from dual
413    #              ''')
414    #    qty_del = q.fetchone()[0]
415
416
417@task
418@needs('make_epic_demographic_views')
419@might_call('load_epic_dimensions')
420@multi_loader([('observation_fact_geo',
421                'Zipcodes and derived geographic observations from Epic'),
422               ('observation_fact_vital',
423                'Epic patient vital status observation facts'),
424               ('observation_fact_demo',
425                'Epic demographic observation facts'),
426               ('observation_fact_age_at_visit',
427                'Age at visit from Epic')],
428              **epic_facts_loader_kwds)
429@run_id_scripts(['epic_dem_check.sql'])
430def load_epic_demographics(options):
431    '''Load, de-id demographic observations from Epic.
432    '''
433
434
435@task
436@needs('make_epic_mychart_views')
437@might_call('load_epic_dimensions')
438@multi_loader([('mychart_ob_facts',
439                'My Chart related observation facts')],
440              **epic_facts_loader_kwds)
441def load_mychart_facts(options):
442    '''Load, de-id My Chart observations from Epic
443    '''
444
445
446@task
447@needs('make_epic_service_views')
448@multi_loader([('observation_fact_services',
449                'Epic clinical services observation facts')],
450              **epic_facts_loader_kwds)
451def load_epic_services(options):
452    '''Load, de-id clinical services observations from Epic.
453    '''
454
455
456@task
457@needs('make_epic_notes_views')
458@multi_loader([('observation_fact_notes', 'Epic Notes observation facts'),
459               ('observation_fact_notes_txt',
460                'Epic Notes Text observation facts')],
461              **epic_facts_loader_kwds)
462def load_epic_notes(options):
463    '''Load, de-id clinical services observations from Epic.
464    '''
465
466
467@task
468@needs('make_epic_meds_views')
469@multi_loader([
470    ('kuh_med_disp_mods', 'Dispensed medication modifiers'),
471    ('kuh_daily_dose_med_mods', 'Medication daily dose modifiers'),
472    ('kuh_med_mar_mods', 'Medication administration record modifiers'),
473    ('kuh_mar_dose_med_mods', 'Medication dose modifiers from the MAR'),
474    ('kuh_historical_other_med_mods',
475     'Historical/other medication modifiers'),
476    ('kuh_prn_med_mods', 'PRN medication modifiers'),
477    ('kuh_inp_outp_med_mods', 'Inpatient/Outpatient medication modifiers')],
478    **epic_facts_loader_kwds)
479def load_epic_med_facts(options):
480    '''Load, de-id medication order, dispense observations/modifiers from Epic.
481    '''
482
483
484@task
485@needs('make_epic_diag_views')
486@might_call('load_epic_dimensions')
487@multi_loader([
488    ('observation_fact_enc_dx', 'Epic encounter diagnosis observations'),
489    ('observation_fact_pl_dx', 'Epic problem list diagnosis observations'),
490    # Professional diganosis uses epic_facts_loader_kwds unlike other
491    # billing diagnosis types.
492    ('epic_billing_diag_professional', 'Epic professional billing diagnosis')],
493    **epic_facts_loader_kwds)
494def load_epic_diagnosis(options):
495    '''Load, de-id diagnosis observations from Epic.
496    '''
497
498
499@task
500@needs('make_epic_diag_views')
501@might_call('load_epic_dimensions')
502@multi_loader([
503    ('epic_billing_diag_admit', 'Epic admit billing diagnosis observations'),
504    ('epic_billing_diag_discharge',
505     'Epic discharge billing diagnosis observations')],
506    **dict(epic_facts_loader_kwds,
507           addl_params=dict(pat_source_cd='Epic@kumed.com',
508                            enc_source_cd='Epic+hsp_account_id@kumed.com')))
509def load_epic_billing_diagnosis(options):
510    '''Load, de-id billing diagnosis observations from Epic.
511    '''
512
513
514@task
515@needs('make_epic_lab_views')
516@multi_loader([('observation_fact_lab', 'Epic Lab observation facts')],
517              **epic_facts_loader_kwds)
518def load_epic_labs(options):
519    '''Load, de-id Lab observations from Epic.
520    '''
521
522
523@task
524@needs('make_epic_social_history_views')
525@multi_loader([
526    ('tobacco_user', 'Epic Tobacco User'),
527    ('smoking_quit_date', 'Epic Smoking Quit Date'),
528    ('smoking_tob_use', 'Epic Smoking Tobacco Use'),
529    ('tobacco_pak_per_day', 'Epic Tobacco Packs Per Day'),
530    ('tobacco_used_years', 'Epic Years of Tobacco Usage'),
531    ('smokeless_tob_use', 'Epic Smokeless Tobacco Use'),
532    ('smokeless_quit_date', 'Epic Smokeless Quit Date'),
533    ('sexually_active', 'Epic Sexually Active')],
534    **epic_facts_loader_kwds)
535def load_epic_social_history(options):
536    '''Load, de-id Social History observations from Epic.
537    '''
538
539
540@task
541@needs('make_epic_microbiology_views')
542@multi_loader([
543    ('microbiology_sensitivity', 'Epic Microbiology Sensitivity'),
544    ('microbiology_negativeresults', 'Epic Microbiology Negative Results'),
545    ('microbiology_positiveresults', 'Epic Micro Positive No Sensitivity')],
546    **epic_facts_loader_kwds)
547def load_epic_microbiology(options):
548    '''Load, de-id microbiology observations from Epic.
549    '''
550
551
552@task
553@needs('make_epic_visit_views')
554@multi_loader([('epic_discharge_disposition', 'Epic Disch Disp Codes')],
555              **epic_facts_loader_kwds)
556def load_epic_disch_disp_codes(options):
557    '''Load, de-id discharge disposition codes from Epic.
558    '''
559
560
561@task
562@needs('make_epic_adt_views')
563@multi_loader([('epic_hospital_adt_obs',
564                'Epic ADT Patient Class observation facts')],
565              **epic_facts_loader_kwds)
566def load_epic_adt_visit_details(options):
567    '''Load de-id [A]dmission [D]ischarge and [T]ransfer visit details
568       from Epic.
569    '''
570
571
572@task
573@needs('make_epic_ed_episode_views')
574@multi_loader([('epic_ed_episode_obs',
575                'Epic Emergency Department (ED) episode flag facts')],
576              **epic_facts_loader_kwds)
577def load_epic_ed_episode(options):
578    '''Load/de-id ED episode facts from Epic.
579    '''
580
581
582@task
583@needs('make_epic_allergy_views')
584@multi_loader([('allergy_reaction', 'Epic Allergy and Reactions')],
585              **epic_facts_loader_kwds)
586def load_epic_allergy(options):
587    '''Load, de-id allergy observations from Epic.
588    '''
589
590
591@task
592@needs('make_epic_alert_views')
593@multi_loader([('observation_fact_alerts', 'Epic Best Practice Alerts')],
594              **epic_facts_loader_kwds)
595def load_epic_alerts(options):
596    '''Load, de-id allergy observations from Epic.
597    '''
598
599
600@task
601@needs('make_epic_order_views')
602@multi_loader([
603    ('observation_fact_orders', 'Epic procedure orders observation facts'),
604    # Order_sets concept load is failing due to 11G.
605    # For the time being we won't load order set
606    # facts.  This will be fixed in #2249
607    #'epic_order_sets':
608    #    'Epic order sets as observation facts',
609    #'epic_order_set_proc_and_meds':
610    #    'Epic order set procedures and medications'
611    #' as observation facts'
612],
613    **epic_facts_loader_kwds)
614def load_epic_orders(options):
615    '''Load, de-id procedure orders observations from Epic.
616    '''
617
618
619@task
620@needs('make_epic_medical_history_views')
621@multi_loader([('medical_hx', 'Epic medical history'),
622               ('surgical_hx', 'Epic surgical history'),
623               ('family_hx', 'Epic family history')],
624              **epic_facts_loader_kwds)
625def load_epic_medical_history(options):
626    '''Load, de-id medical, surgical, and family history observations.
627    '''
628
629
630@task
631@needs('make_epic_enc_vitals_views')
632@multi_loader([('observation_fact_enc_vitals',
633                'Epic encounter vitals observation facts')],
634              **epic_facts_loader_kwds)
635def load_epic_enc_vitals(options):
636    '''Load, de-id encounter related vitals observations from Epic.
637    '''
638
639
640@task
641@needs('make_epic_flowsheet_views')
642@run_scripts('epic_flowsheets_multiselect.sql',
643             capture_release_list=['db'],
644             db=lambda options: options.id_db)
645def summarize_epic_flowsheets(options):
646    '''Summarize Nursing Flowsheet observations from Epic for multiselect.
647    '''
648
649
650flowsheet_fact_views = '''numerictypeflows,
651                          weightmeasureflows,
652                          patweightmeasureflows,
653                          bpsysmeasureflows,
654                          bpdiameasureflows,
655                          heightmeasureflows,
656                          patheightmeasureflows,
657                          datemeasureflows,
658                          timemeasureflows,
659                          cattypeflows,
660                          temperatureflows,
661                          singleselectflows,
662                          multiselectflows,
663                          stringtypeflows
664                          '''
665                    # todo: we're not bothering with concentrationflows etc., are we?
666                    # 'concentrationflows',
667                    # 'doseflows',
668
669
670@task
671@needs('summarize_epic_flowsheets')
672@multi_loader(
673    #for comprehension to generate a dictionary of flowsheet views ->
674    #labels
675    [(flow_view.strip(), 'Epic_Nursing_Observations:' + flow_view.strip())
676     for flow_view in flowsheet_fact_views.split(',')],
677    **epic_facts_loader_kwds
678)
679def load_epic_flowsheets(options):
680    '''Load, de-id Nursing Flowsheet observations from Epic.
681    '''
682
683
684@task
685@needs('dblink_deid_id')
686@run_scripts(('test_medication_modifiers.sql', 'test_uhc.sql'),
687             capture_release_list=['db'],
688             db=lambda options: options.deid_db)
689@might_call('load_epic_med_facts', 'make_epic_views')
690def check_med_modifiers(options):
691    '''Run SQL-level tests for medication modifiers.
692
693    .. note:: not included in normal production ETL
694    '''
695
696
697@task
698@needs('dblink_deid_id',
699       'make_scheme_labels')
700@run_scripts('epic_demo_concepts.sql',
701             capture_release_list=['db'],
702             db=lambda options: options.deid_db)
703def load_epic_demo_concepts(options):
704    '''Load epic demographic concepts into i2b2 de-identified repository.
705    '''
706
707
708@task
709@needs('dblink_deid_id',
710       'load_local_lab_categories',
711       'make_epic_flowsheet_views',
712       'make_epic_microbiology_views',
713       'load_curated_micro_concept_data',
714       'load_enc_type_adt_map',
715       'load_gpc_billing_diag_modifiers',
716       'load_epic_demo_concepts')
717@run_scripts('epic_concepts_load.sql',
718             capture_release_list=['db'],
719             db=lambda options: options.deid_db)
720def load_epic_concepts(options):
721    '''Load concepts (esp from Epic) into i2b2 de-identified repository.
722
723    This actually needs summarize_epic_flowsheets to run first,
724    but we leave the dependency implicit, since it is normally
725    done during heron_load.
726
727    .. todo:: move load_geo_concepts, load_bsr_categories dependencies
728              to merge_concepts or the like
729    '''
730
731
732from paver.easy import options as _option
733
734_option(clarity_sid=None,
735        clarity_concept_port=None,
736        clarity_dump=None)
Note: See TracBrowser for help on using the repository browser.