Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/i2b2_datasources.py @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 7.8 KB
Line 
1'''i2b2_datasources -- paver task to configure i2b2 JBoss datasources
2
3:copyright: Copyright 2010-2015 University of Kansas Medical Center
4            part of the `HERON open source codebase`__;
5            see NOTICE file for license details.
6
7__ http://informatics.kumc.edu/work/wiki/HERON
8'''
9import hashlib
10from csv import reader
11from collections import namedtuple
12from functools import partial
13from pkg_resources import resource_stream
14
15from paver.easy import task, options as _option
16from paver.tasks import environment as paver_env
17
18import db_util
19
20XML_HDR = '''<?xml version="1.0" encoding="UTF-8"?>
21<datasources xmlns="http://www.jboss.org/ironjacamar/schema">'''
22
23XML_DATASOURCE = '''
24    <datasource jta="false" jndi-name="java:/%(datasource)s"
25        pool-name="%(datasource)s" enabled="true" use-ccm="false">
26        <connection-url>%(url)s</connection-url>
27        <driver-class>%(driver_class)s</driver-class>
28        <driver>%(driver)s</driver>
29        <security>
30            <user-name>%(username)s</user-name>
31            <password>%(password)s</password>
32        </security>
33        <validation>
34            <validate-on-match>false</validate-on-match>
35            <background-validation>false</background-validation>
36        </validation>
37        <statement>
38            <share-prepared-statements>false</share-prepared-statements>
39        </statement>
40    </datasource>
41'''
42
43XML_FTR = '</datasources>'
44
45
46OracleParams = namedtuple('OracleParams', 'url driver_class driver')
47
48
49def mk_oracle_params(host='server', port=1521, sid='dbsid',
50                     driver_class='oracle.jdbc.OracleDriver',
51                     driver='ojdbc6.jar'):
52    '''
53    >>> mk_oracle_params()
54    ... # doctest: +NORMALIZE_WHITESPACE
55    OracleParams(url='jdbc:oracle:thin:@server:1521:dbsid',
56     driver_class='oracle.jdbc.OracleDriver', driver='ojdbc6.jar')
57    '''
58    url = 'jdbc:oracle:thin:@%(host)s:%(port)s:%(sid)s' % dict(
59        host=host, port=port, sid=sid)
60    return OracleParams(url=url, driver_class=driver_class, driver=driver)
61
62
63DataSource = namedtuple('DataSource', 'datasource username id_deid')
64
65
66def ds_from_csv(rd):
67    ''' Parse datasource CSV file
68    >>> import StringIO
69    >>> sio = StringIO.StringIO()
70    >>> sio.write("""DATASOURCE,DATABASE_USER,ID_DEID
71    ... OntologyBootStrapDS,i2b2hive,ID
72    ... CRCBootStrapDS,i2b2hive,ID""")
73    >>> sio.seek(0)
74    >>> ds_from_csv(sio)
75    ... # doctest: +NORMALIZE_WHITESPACE
76    [DataSource(datasource='OntologyBootStrapDS',
77                username='i2b2hive', id_deid='ID'),
78     DataSource(datasource='CRCBootStrapDS',
79                username='i2b2hive', id_deid='ID')]
80    '''
81    rd.next()  # Skip header
82    return map(DataSource._make, reader(rd))
83
84
85def mk_xml_ds(ds=DataSource('fakeds', 'user', 'id'), password='passwd',
86              op=mk_oracle_params()):
87    '''
88    >>> print mk_xml_ds()
89    ... # doctest: +NORMALIZE_WHITESPACE
90        <datasource jta="false" jndi-name="java:/fakeds"
91            pool-name="fakeds" enabled="true" use-ccm="false">
92            <connection-url>jdbc:oracle:thin:@server:1521:dbsid</connection-url>
93            <driver-class>oracle.jdbc.OracleDriver</driver-class>
94            <driver>ojdbc6.jar</driver>
95            <security>
96                <user-name>user</user-name>
97                <password>passwd</password>
98            </security>
99            <validation>
100                <validate-on-match>false</validate-on-match>
101                <background-validation>false</background-validation>
102            </validation>
103            <statement>
104                <share-prepared-statements>false</share-prepared-statements>
105            </statement>
106        </datasource>
107    '''
108    params = ds._asdict().copy()
109    params.update(op._asdict())
110    params.update(password=password)
111
112    return XML_DATASOURCE % params
113
114
115def mk_pass_dict(dslist, mk_pass=lambda db_user: 'passwd',
116                 usefrom={}):
117    '''
118    >>> common_ds = DataSource('ds_comm', 'usr_comm', 'DEID')
119    >>> dslist = [DataSource(*common_ds), DataSource('ds', 'usr', 'ID')]
120    >>> usefrom = dict([(DataSource(*common_ds), 'common_password')])
121    >>> sorted(mk_pass_dict(dslist=dslist, usefrom=usefrom).items(),
122    ...        key=lambda t: t[0].datasource)
123    ... # doctest: +NORMALIZE_WHITESPACE
124    [(DataSource(datasource='ds', username='usr', id_deid='ID'), 'passwd'),
125     (DataSource(datasource='ds_comm', username='usr_comm',
126                 id_deid='DEID'), 'common_password')]
127    >>> sorted(mk_pass_dict([DataSource('singleds', 'singleusr',
128    ...                                 'DEID')]).items(),
129    ...        key=lambda t: t[0].datasource)
130    ... # doctest: +NORMALIZE_WHITESPACE
131    [(DataSource(datasource='singleds', username='singleusr',
132                 id_deid='DEID'), 'passwd')]
133
134    '''
135    def make_only_new_passes(username, passwords={}):
136        if username in passwords:
137            return passwords[username]
138        passwords[username] = mk_pass(db_user=d.username)
139        return passwords[username]
140
141    return dict([(d, usefrom[d] if d in usefrom
142                  else (make_only_new_passes(username=d.username)))
143                 for d in dslist])
144
145
146def hexdigest(txt):
147    '''mimic i2b2's own hex digest algorithm
148
149    It seems to omit leading 0's.
150
151    >>> hexdigest('test')
152    '98f6bcd4621d373cade4e832627b4f6'
153    '''
154    return ''.join([hex(ord(b))[2:] for b in hashlib.md5(txt).digest()])
155
156
157@task
158def configure_i2b2_datasources(options, deid_ds='blueheron_ds.csv',
159                               id_ds='nightheron_ds.csv',
160                               service_account='OBFSC_SERVICE_ACCOUNT',
161                               deid_obfsc_user='blueherondata',
162                               id_obfsc_user='nightherondata'):
163    '''Write J2EE datasources to connect i2b2 to the id/deid repositories.
164    '''
165    def gen_set_password(dba_account, db_user):
166        pw = db_util.genpw(options.rng)
167
168        with db_util.transaction(dba_account) as admin:
169            admin.execute('alter user %s identified by %s' % (db_user, pw))
170            admin.execute('grant create session to %s' % db_user)
171
172        return pw
173
174    deid_ds = mk_pass_dict(ds_from_csv(resource_stream(__name__, deid_ds)),
175                           mk_pass=partial(
176                               gen_set_password,
177                               dba_account=options.deid_server.account('DBA')))
178
179    id_ds = mk_pass_dict(ds_from_csv(resource_stream(__name__, id_ds)),
180                         mk_pass=partial(
181                             gen_set_password,
182                             dba_account=options.id_server.account('DBA')),
183                         usefrom=deid_ds)
184
185    db_params = dict(
186        DEID=mk_oracle_params(host=options.i2b2_deid_host,
187                              port=options.i2b2_deid_port,
188                              sid=options.i2b2_deid_sid),
189        ID=mk_oracle_params(host=options.i2b2_id_host,
190                            port=options.i2b2_id_port,
191                            sid=options.i2b2_id_sid))
192
193    def get_pass(ds_passes, usr):
194        for ds in ds_passes:
195            if ds.username.lower() == usr.lower():
196                return ds_passes[ds]
197
198    for i2b2, cfg, dbe, obfsc_user in [
199            (deid_ds, options.path_for.i2b2_deid_datasources,
200             options.deid_db, deid_obfsc_user),
201            (id_ds, options.path_for.i2b2_id_datasources,
202             options.id_db, id_obfsc_user)]:
203
204        with cfg.open('w') as fout:
205            fout.write(XML_HDR)
206            for ds in i2b2:
207                fout.write(mk_xml_ds(ds, i2b2[ds], db_params[ds.id_deid]))
208            fout.write(XML_FTR)
209
210        with db_util.transaction(dbe) as admin:
211            admin.execute('''
212                          UPDATE I2B2PM.PM_USER_DATA
213                          SET PASSWORD = :password
214                          WHERE user_id= :user_id''',
215                          dict(password=hexdigest(get_pass(i2b2, obfsc_user)),
216                               user_id=service_account))
Note: See TracBrowser for help on using the repository browser.