Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/pavement.py @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 5.4 KB
Line 
1'''pavement.py -- wrapper for for HERON extract/transform/load (ETL) tasks
2--------------------------------------------------------------------------
3
4Command line usage follows paver__ task conventions. To find all tasks::
5
6  $ paver help
7
8__ http://www.blueskyonmars.com/projects/paver/
9
10:copyright: Copyright 2010-2013 University of Kansas Medical Center
11            part of the `HERON open source codebase`__;
12            see NOTICE file for license details.
13
14__ http://informatics.kumc.edu/work/wiki/HERON
15'''
16
17from hashlib import md5
18import logging
19
20from paver.easy import task
21from paver.tasks import help
22from paver.path import path
23from paver.tasks import environment
24
25
26def _paver_import_workaround():
27    import os, sys
28    sys.path.append(os.path.dirname(__file__))
29_paver_import_workaround()
30
31import db_util
32import epic_etl
33import heron_build
34import heron_create
35import i2b2_datasources
36import i2b2_deid
37import idx_etl
38import kumc_etl
39import qt_patient_update
40from structured_logging import NestedEvents
41
42log = logging.getLogger(__name__)
43
44
45def _dependencies_for_pyflakes():
46    help
47    epic_etl
48    heron_create
49    idx_etl
50    kumc_etl
51    i2b2_deid
52    i2b2_datasources
53    qt_patient_update
54
55
56@task
57def auto(options, info, dry_run):
58    from datetime import date, datetime
59    from getpass import getuser
60    from os import environ
61    from os.path import expanduser
62    from random import Random
63    import logging.config
64    import time
65
66    config, flat = heron_build.flat_options(
67        expanduser=expanduser,
68        getuser=getuser,
69        read_config=lambda cp: cp.read(options.config))
70    # add options.section_item for each [section] and item.
71    options.update(flat)
72
73    logging.config.fileConfig(options.logging_config,
74                              defaults=dict(today=date.today()))
75
76    class PathFor(object):
77        def __getattr__(self, attr):
78            return path(options[attr])
79
80    class EnvFor(object):
81        def __getattr__(self, attr):
82            return environ[options[attr]]
83
84    options.path_for = PathFor()
85    options.env_for = EnvFor()
86
87    events = NestedEvents('heron', logging.getLogger, time.time)
88    options.events = events
89    checksum = md5(path(options.config).bytes()).hexdigest()
90    events.log(__name__, logging.INFO, '%s %s', checksum, options.config,
91               md5sum=checksum, config=options.config)
92
93    key = password_cap(dry_run, config, environ)
94
95    def config_hostport(section):
96        host, port = [config.get(section, opt) for opt in ('host', 'port')]
97        return host, int(port)
98
99    dbs = heron_build.make_dbs(events,
100                               get_hostport=config_hostport,
101                               key=key,
102                               ora_connect=dbi_cap(dry_run, events))
103
104    # TODO: use paver_env rather than options for these
105    options.update(dbs)
106
107    options.rng = Random()
108    # gross. paver special-cases callables
109    options.clock = lambda: datetime
110    options.getuser = lambda: getuser
111
112    monkeypatch_paver_logging(environment, events)
113
114
115def password_cap(dry_run, config, env):
116    if dry_run:
117        return lambda section: (
118            config.get(section, 'sid'),
119            config.get(section, 'username'),
120            'dry_run_password')
121
122    def keyring_get_password(keyring, svc, u):
123        password = keyring.get_password(svc, u)
124        if not password:
125            raise IOError(
126                'missing password; try: python setpass.py %s' % svc)
127        return password
128
129    def key(section):
130        sid, username = [config.get(section, opt)
131                         for opt in ('sid', 'username')]
132
133        if (config.has_option(section, 'use_keyring')
134            and config.getboolean(section, 'use_keyring')):  # noqa
135            import keyring
136            prevent_glib_warnings()
137
138            host, port_, sid = [config.get(section, opt)
139                                for opt in ('host', 'port', 'sid')]
140            port = int(port_)
141            access = db_util.ora_access(sid, host, port)
142            password = keyring_get_password(keyring, access, username)
143        elif config.has_option(section, 'password_env'):
144            password = env.get(config.get(section, 'password_env'), '')
145        else:
146            password = config.get(section, 'password')
147        return sid, username, password
148
149    return key
150
151
152def dbi_cap(dry_run, events):
153    '''Don't attempt to access real DB for dry run.
154    '''
155    # TODO: consider renaming cx to dbi
156    def real_cx():
157        import cx_Oracle
158        return cx_Oracle
159
160    dbi = db_util.DryDBI if dry_run else real_cx
161    return db_util.make_ora_connect(dbi, events)
162
163
164def prevent_glib_warnings():
165    '''Avoid warnings about application name not set (#1163)
166    ACK: http://mindbending.org/en/bending-gnome-keyring-with-python-part-2  # noqa
167    '''
168    try:
169        import glib
170        glib.set_application_name('paver')
171    except ImportError:
172        pass
173
174
175def monkeypatch_paver_logging(environment, events):
176    level_map = {1: 10,  # debug
177                 2: 20,  # info
178                 3: 40,  # error
179                 }
180
181    def _log(paver_level, message, args):
182        if message.startswith('---> '):
183            _, taskname = message.split('---> ', 1)
184            logname = taskname
185            event = dict(task=taskname)
186        else:
187            logname = __name__
188            event = {}
189        events.log(logname, level_map[paver_level], message, *args, **event)
190
191    environment._log = _log
Note: See TracBrowser for help on using the repository browser.