Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/log/etl_log_xunit.py @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 6.5 KB
Line 
1r'''Make XUnit testsuite document representing an ETL log.
2
3Recall from `etl_log_review` and `etl_log_outline` that ETL logs are
4in CSV format, with a variable length `detail`::
5
6  >>> TEST_DATA.split('\n')[0].split(',')
7  ... # doctest: +NORMALIZE_WHITESPACE
8  ['time', 'usec', 'level', 'log_path_name', 'message',
9   'dur', 'skip', 'parent', 'seqno', 'detail']
10
11  >>> entries = list(each_entry(StringIO(TEST_DATA)))
12  >>> e_stmt, e_task, e_run = [entries[k] for k in [8, 1, 3]]
13  >>> e_run
14  ... # doctest: +NORMALIZE_WHITESPACE
15  Entry(time='2013-08-26 16:46:00', usec='616', level='DEBUG',
16        log_path_name='heron.curated_data_zips_near_66160_csv',
17        message='run()', dur='', skip='\nREQ', parent='0', seqno='3',
18        detail=['script', 'curated_data_zips_near_66160_csv'])
19
20The `as_test` function represents a log entry as an XUnit `testcase` element::
21
22  >>> elt, skip, err = as_test(e_run)
23  >>> elt.tag
24  'testcase'
25  >>> sorted(elt.attrib.keys())
26  ['classname', 'id', 'name', 'time']
27
28It also lets the caller count skipped tests and errors::
29
30  >>> skip, err
31  (False, False)
32
33XUnit expects Java-esque syntax for class names.  Class name is
34taken from the log path name if its last segment is capitalized::
35
36  >>> e_stmt.log_path_name
37  'heron.curated_data_zips_near_66160_csv.id.Statement'
38  >>> as_test(e_stmt)[0].attrib['classname']
39  'heron.curated_data_zips_near_66160_csv.id.Statement'
40
41Otherwise, an arbitrary class name is added::
42
43  >>> e_task.log_path_name
44  'heron.pavement'
45  >>> as_test(e_task)[0].attrib['classname']
46  'heron.pavement.Event'
47
48If the message has parentheses like a method call, we get the
49method name there; we also use the seqno in an attempt to preserve
50order::
51
52  >>> e_run.seqno, e_run.message
53  ('3', 'run()')
54  >>> as_test(e_run)[0].attrib['name']
55  '_0003_run'
56
57.. note:: trying to fit an ordered, arbitrarily nested structure into
58          the xunit structure is awkward:
59          - xunit output tends to be sorted alphabetically
60            - prepending numbers works, but it's awkward and unstable
61          - xunit tests have a name and a classname, where a classname
62            has an arbitrarily nested package prefix
63
64XUnit expects durations in seconds::
65
66  >>> e_stmt.dur
67  '0:00:00.035444'
68  >>> as_test(e_stmt)[0].attrib['time']
69  '0.035'
70
71'''
72
73from xml.etree import ElementTree as ET
74
75from etl_log_outline import each_entry
76
77import pkg_resources as pkg
78
79
80def main(argv, stdout, open_arg):
81    log_fn = argv[1]
82    log_lines = open_arg(log_fn)
83
84    save_test_suite(log_lines, log_fn)(stdout)
85
86
87def save_test_suite(log_lines, log_fn):
88    test_info = [as_test(entry) for entry in
89                 each_entry(log_lines)
90                 if entry.level != 'DEBUG'
91                 and entry.time != 'time']
92    tests = [t for t, _, _ in test_info]
93    return XunitFormatter.save(
94        name=log_fn,
95        qty=len(tests),
96        failures=len([1 for t, skip, err in test_info if err]),
97        skip=len([1 for t, skip, err in test_info if skip]),
98        tests=tests)
99
100
101TEST_DATA = pkg.resource_string(__name__, 'etl_log_ex.txt')
102
103
104def as_test(entry,
105            error_type='Exception'):
106    package, cls = entry.log_path_name.rsplit('.', 1)
107    if not cls[0].isupper():
108        package, cls = (entry.log_path_name, 'Event')
109
110    message = entry.message
111    testMethod = '_%04d_%s' % (int(entry.seqno),
112                               (message.split('(')[0]
113                                if '(' in message else ''))
114
115    detail = dict(zip(entry.detail[::2], entry.detail[1::2]))
116    ifexc = lambda f: f(entry) if 'exc_info' in detail else None
117
118    t = XunitFormatter.xml_test(
119        classname='%s.%s' % (package, cls),
120        name=testMethod,
121        id="_%d" % int(entry.seqno),
122        time=0 if not entry.dur else parse_dur(entry.dur),
123        skip=entry.skip.strip() == 'OPT',
124        error_type=ifexc(lambda e: error_type),
125        message=ifexc(lambda e: e.message),
126        error_text=ifexc(lambda e: detail['exc_info']),
127        stdout=detail.get('code', None),
128        stderr=ifexc(lambda e: detail['exc_info']))
129
130    return t, entry.skip == 'OPT', 'exc_info' in detail
131
132
133def parse_dur(dur):
134    h, m, s = [float(s) for s in dur.split(':', 2)]
135    return s + 60 * (m + 60 * h)
136
137
138class XunitFormatter(object):
139    @classmethod
140    def save(cls, name, qty, failures, skip, tests):
141        # TODO: errors?
142        suite = ET.Element('testsuite', {}, name=name, tests=str(qty),
143                           failures=str(failures), skip=str(skip))
144        suite.text = '\n'
145        for t in tests:
146            suite.append(t)
147
148        return ET.ElementTree(suite).write
149
150    @classmethod
151    def xml_test(cls, classname, name, id, time, skip=False,
152                 error_type=None, message=None, error_text=None,
153                 stdout=None, stderr=None):
154        '''
155        >>> from sys import stdout
156        >>> xml_test = XunitFormatter.xml_test
157        >>> def t(*args, **kw):
158        ...     ET.ElementTree(xml_test(*args, **kw)).write(stdout)
159
160        >>> t('company', 'AT&T', "_21", 1)
161        <testcase classname="company" id="_21" name="AT&amp;T" time="1.000">
162        </testcase>
163
164        >>> t('class', 'name', "_22", 1, error_type='Bad!')
165        <testcase classname="class" id="_22" name="name" time="1.000">
166        <error type="Bad!" />
167        </testcase>
168
169        >>> t('class', 'name', "_23", 1, stdout='OHAI')
170        <testcase classname="class" id="_23" name="name" time="1.000">
171        <system-out>OHAI</system-out>
172        </testcase>
173        '''
174        tc = ET.Element('testcase', {},
175                        id=id,
176                        classname=classname, name=name,
177                        time="%0.3f" % time)
178        tc.text = '\n'
179        tc.tail = '\n'
180
181        if skip:
182            s = ET.SubElement(tc, 'skipped')
183            s.tail = '\n'
184        if error_type:
185            err = ET.SubElement(tc, 'error', {'type': error_type})
186            err.tail = '\n'
187            if message:
188                err.attrib['message'] = message
189            if error_text:
190                err.text = error_text
191        if stdout:
192            s = ET.SubElement(tc, 'system-out')
193            s.tail = '\n'
194            s.text = stdout
195        if stderr:
196            s = ET.SubElement(tc, 'system-err')
197            s.tail = '\n'
198            s.text = stderr
199
200        return tc
201
202
203if __name__ == '__main__':
204    def _with_caps():
205        from __builtin__ import open as openf
206        from sys import argv, stdout
207
208        def open_arg(n):
209            if n not in argv: raise IOError
210            return openf(n)
211
212        main(argv[:], stdout, open_arg)
213
214    _with_caps()
Note: See TracBrowser for help on using the repository browser.