Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/etl_notify.py @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 14.4 KB
Line 
1r'''etl_notify.py -- Used to Monitor paver commands during ETL load.
2
3USAGE::
4  $ python etl_notify.py python paver <command>
5
6:copyright: Copyright 2010-2013 University of Kansas Medical Center
7            part of the `HERON open source codebase`__;
8            see NOTICE file for license details.
9
10__ http://informatics.kumc.edu/work/wiki/HERON
11'''
12
13from ConfigParser import SafeConfigParser
14from email.mime.text import MIMEText
15import threading
16import time
17
18# TODO: get from config file
19WATCHERS = ['ngraham@kumc.edu', 'bhamlin@kumc.edu', 'mhoag@kumc.edu']
20BOT_ADDR = 'heron-admin+etlnotify@kumc.edu'
21
22
23def main(argv_config, calendar, configure_logging, makeFileChecker,
24         makeTailCmd, os_path, paverCmd, raw_input, sendEmail):
25    '''Run paver, monitor log, and notify by email about result.
26
27    TODO: either make tests platform-independent or use posixpath.
28    >>> import os
29
30    Suppose paver fails:
31
32    >>> cr = CmdRunner(MockSproc('paver blah', 'OOPSIE!', time.sleep), 'paver do_stuff') # noqa
33    >>> main(paverCmd=cr, **Mock.caps(os.path))
34    ... # doctest: +ELLIPSIS
35    Log file name is: /THE/LOG/DIR/heron_load-2012-10-19.log
36    MockSMTP:sendmail()
37     From: heron-admin+etlnotify@kumc.edu
38     To: ...
39    Content-Type: text/plain; charset="us-ascii"
40    MIME-Version: 1.0
41    Content-Transfer-Encoding: 7bit
42    Subject: Paver Command Terminated With Errors
43    From: heron-admin+etlnotify@kumc.edu
44    To: ...
45    <BLANKLINE>
46    Paver command terminated:
47    <BLANKLINE>
48    ------
49    stdout
50    ------
51    paver blah
52    <BLANKLINE>
53    ------
54    stderr
55    ------
56    OOPSIE!
57    <BLANKLINE>
58    ---
59    Log
60    ---
61    tail log blah
62    Paver command terminated:
63    <BLANKLINE>
64    ------
65    stdout
66    ------
67    paver blah
68    <BLANKLINE>
69    ------
70    stderr
71    ------
72    OOPSIE!
73    <BLANKLINE>
74    ---
75    Log
76    ---
77    tail log blah
78    Checker thread exiting.
79
80
81    In the normal case, we get...
82    note:  Pass 1 second to the mock process so the thread has time to
83    check the file.
84
85    Log file name is: /THE/LOG/DIR/heron_load-2012-10-19.log
86    MockSMTP:sendmail()
87     From: heron-admin+etlnotify@kumc.edu
88     To: ['ngraham@kumc.edu', 'bhamlin@kumc.edu']
89    Content-Type: text/plain; charset="us-ascii"
90    MIME-Version: 1.0
91    Content-Transfer-Encoding: 7bit
92    Subject: Paver Command Terminated Successfully
93    From: heron-admin+etlnotify@kumc.edu
94    To: ngraham@kumc.edu,bhamlin@kumc.edu
95    <BLANKLINE>
96    Paver command terminated:
97    <BLANKLINE>
98    ------
99    stdout
100    ------
101    paver blah
102    <BLANKLINE>
103    ------
104    stderr
105    ------
106    <BLANKLINE>
107    <BLANKLINE>
108    ---
109    Log
110    ---
111    tail log blah
112    Paver command terminated:
113    <BLANKLINE>
114    ------
115    stdout
116    ------
117    paver blah
118    <BLANKLINE>
119    ------
120    stderr
121    ------
122    <BLANKLINE>
123    <BLANKLINE>
124    ---
125    Log
126    ---
127    tail log blah
128    Checker thread exiting.
129
130
131    What if the user aborts?
132    >>> cr = CmdRunner(MockSproc('paver blah', '', time.sleep), 'paver do_stuff') # noqa
133    >>> main(paverCmd=cr, **Mock.caps(os.path, 'n'))
134    ... # doctest: +ELLIPSIS
135    Log file name is: /THE/LOG/DIR/heron_load-2012-10-19.log
136    Aborting at user request.
137
138    If the log file doesn't increase in size?
139    >>> cr = CmdRunner(MockSproc('paver blah', '', time.sleep, 2), 'paver do_stuff') # noqa
140    >>> main(paverCmd=cr, **Mock.caps(os.path, filefound=True, fincr=0))
141    ... # doctest: +ELLIPSIS
142    Log file name is: /THE/LOG/DIR/heron_load-2012-10-19.log
143    ...
144    Subject: Log Update Warning
145    ...
146
147    If the log file doesn't exist?
148    >>> cr = CmdRunner(MockSproc('paver blah', '', time.sleep, 2), 'paver do_stuff') # noqa
149    >>> main(paverCmd=cr, **Mock.caps(os.path, filefound=False, fincr=0))
150    ... # doctest: +ELLIPSIS
151    Log file name is: /THE/LOG/DIR/heron_load-2012-10-19.log
152    filetocheck is not found!
153    ...
154
155    '''
156    log = configure_logging()
157
158    cp = argv_config()
159    logFileName = etl_log(cp, os_path.join, calendar.today())
160
161    log.info('Log file name is: %s' % logFileName)
162
163    fileChecker = makeFileChecker(log, logFileName)
164    tailCmd = makeTailCmd(logFileName)
165
166    cnfrm = '''paver command:\t\t%s
167log file:\t\t%s
168Confirm the above information.  Continue? (y/n):''' \
169    % (str(paverCmd), str(fileChecker))
170
171    if raw_input(cnfrm).lower() != 'y':
172        log.info('Aborting at user request.')
173        return
174
175    # Kick off our file checker
176    fileChecker.start()
177
178    # Run our paver command
179    stdout, stderr = paverCmd.runCmd()
180
181    logOut, _ = tailCmd.runCmd()
182    ret = '''Paver command terminated:
183
184------
185stdout
186------
187%s
188
189------
190stderr
191------
192%s
193
194---
195Log
196---
197%s''' % (stdout, stderr, logOut)
198
199    # Report on the result
200    title = 'Paver Command Terminated '
201    if len(stderr) != 0:
202        title += 'With Errors'
203    else:
204        title += 'Successfully'
205
206    try_send(log, sendEmail, ret, title)
207
208    log.info(ret)
209
210    fileChecker.stop()
211
212
213class MockSproc(object):
214    PIPE = 'mockpipe'
215
216    class proc(object):
217        def __init__(self, stdout_msg, stderr_msg, sleep, cmdSec):
218            self._stdout_msg = stdout_msg
219            self._stderr_msg = stderr_msg
220            self._sleep = sleep
221            self._cmdSec = cmdSec
222
223        def communicate(self):
224            self._sleep(self._cmdSec)
225            return self._stdout_msg, self._stderr_msg
226
227    def __init__(self, stdout_msg, stderr_msg, sleep, cmdSec=0):
228        self._proc = self.proc(stdout_msg, stderr_msg, sleep, cmdSec)
229
230    def Popen(self, cmd, shell, stdout, stderr):
231        return self._proc
232
233
234class CmdRunner(object):
235    def __init__(self, sproc, cmd):
236        self._sproc = sproc
237        self._cmd = cmd
238
239    def __str__(self):
240        return ' '.join(self._cmd)
241
242    def runCmd(self):
243        proc = self._sproc.Popen(self._cmd, shell=False,
244                                 stdout=self._sproc.PIPE,
245                                 stderr=self._sproc.PIPE)
246        return proc.communicate()
247
248
249class Mock(object):
250    @classmethod
251    def caps(cls, os_path, contPrompt='y', fincr=1, filefound=True):
252        import datetime
253        from sys import stdout
254
255        class mockLog(object):
256            def __init__(self):
257                pass
258
259            def info(self, s):
260                print s
261                stdout.flush()
262
263            def warning(self, s, exc_info=None):
264                print s
265                stdout.flush()
266
267        def mock_config_log():
268            log = mockLog()
269            return log
270
271        def makeTailCmd(fileName):
272            return CmdRunner(MockSproc('tail log blah', '', time.sleep),
273                             'tail stuff')
274        log = mock_config_log()
275
276        emailer = Emailer(MockSMTP(log),
277                          toAddrs=WATCHERS, fromAddr=BOT_ADDR)
278
279        def makeFileChecker(log, fileName):
280            class fileSz(object):
281                def __init__(self, fincr):
282                    self._size = 0
283                    self._fincr = fincr
284
285                def getsize(self, fpath):
286                    self._size += self._fincr
287                    return self._size
288
289            fz = fileSz(fincr)
290
291            return FileChecker(log, 'filetocheck', emailer.sendEmail,
292                               datetime.datetime.now, fz.getsize,
293                               isfile=lambda(path): filefound,
294                               pollSec=0, timeoutSec=1)
295
296        return dict(sendEmail=emailer.sendEmail,
297                    makeFileChecker=makeFileChecker,
298                    makeTailCmd=makeTailCmd,
299                    argv_config=_mock_cp,
300                    raw_input=lambda(prompt): contPrompt,
301                    os_path=os_path, calendar=MockCalendar(),
302                    configure_logging=mock_config_log)
303
304
305class MockCalendar(object):
306    def today(self):
307        import datetime
308        return datetime.date(2012, 10, 19)
309
310
311class FileChecker(threading.Thread):
312    def __init__(self, log, path, sendEmail, nowFunc, fsize, isfile,
313                 timeoutSec=90 * 60, pollSec=30):
314
315        threading.Thread.__init__(self)
316        self._path = path
317        self._sendEmail = sendEmail
318        self._nowFunc = nowFunc
319        self._fsize = fsize
320        self._isfile = isfile
321        self._timeout = timeoutSec
322        self._pollSec = pollSec
323        self._stop = False
324        self._log = log
325
326    def __str__(self):
327        return self._path
328
329    def isfile(self, path):
330        if self._isfile(path):
331            return True
332        self._log.warning('%s is not found!' % path)
333        return False
334
335    def run(self):
336        lastSize = 0
337        lastTimeChanged = self._nowFunc()
338        alertrecent = False
339        lastIter = self._nowFunc()
340        while True:
341            # Check to see if we should stop
342            if self._stop:
343                self._log.info('Checker thread exiting.')
344                return
345
346            # Sleep a short amount of time so if we're told to exit, we
347            # can in a timely manner.  Only check the file on the
348            # interval specified.
349            time.sleep(0.5)
350
351            if (self._nowFunc() - lastIter).seconds >= self._pollSec:
352                size = 0
353                if self.isfile(self._path):
354                    size = self._fsize(self._path)
355
356                if size == lastSize:
357                    delta = self._nowFunc() - lastTimeChanged
358                    c = alertrecent is False\
359                        and (delta.days * 86400) + delta.seconds\
360                        > self._timeout
361                    if c:
362                        msg = 'Log file not updated for %s' % str(delta)
363                        self._log.info(msg)
364                        try_send(self._log, self._sendEmail,
365                                 msg, 'Log Update Warning')
366
367                        alertrecent = True
368                else:
369                    lastTimeChanged = self._nowFunc()
370                    lastSize = size
371                    alertrecent = False
372                lastIter = self._nowFunc()
373
374    def stop(self):
375        self._stop = True
376        self.join()
377
378
379def try_send(log, sendEmail, body, subject):
380    try:
381        sendEmail(body, subject)
382    except Exception as ex:
383        log.warning('Exception sending e-mail!',
384                    exc_info=ex)
385
386
387def _mock_cp(missingHeron=False):
388    cp = SafeConfigParser()
389    if not missingHeron:
390        cp.add_section('handler_detail')
391        cp.set('handler_detail', 'detail_log_dir', '/THE/LOG/DIR/')
392    return cp
393
394
395def etl_log(cp, join, today):
396    '''Build ETL log path from configuration and date
397
398    See also: heron_build.logging_start.
399
400    .. todo:: factor out redundancy.
401
402    >>> join = lambda d, p: d + p
403
404    >>> today = MockCalendar().today()
405
406    >>> etl_log(_mock_cp(), join, today)
407    '/THE/LOG/DIR/heron_load-2012-10-19.log'
408
409    >>> etl_log(_mock_cp(missingHeron=True), join, today)
410    Traceback (most recent call last):
411      ...
412    ValueError: no handler_detail section
413    '''
414    if not cp.has_section('handler_detail'):
415        raise ValueError('no handler_detail section')
416
417    logDir = cp.get('handler_detail', 'detail_log_dir')
418    if not logDir:
419        raise ValueError('no log dir')
420
421    logFileName = 'heron_load-%s.log' % today.strftime('%Y-%m-%d')
422    return join(logDir, logFileName)
423
424
425def get_config(argv):
426    '''
427    >>> get_config(['config=hp2.ini'])
428    ['hp2.ini']
429
430    >>> get_config([])
431    Traceback (most recent call last):
432      ...
433    ValueError: no config files: []
434    '''
435    filenames = [arg.split('=')[1]
436                 for arg in argv
437                 if arg.startswith('config=')]
438    if not filenames:
439        raise ValueError('no config files: %s' % argv)
440    return filenames
441
442
443class MockSMTP(object):
444    def __init__(self, log, raiseOnSend=False):
445        self._log = log
446        self._raiseOnSend = raiseOnSend
447
448    def connect(self):
449        pass
450
451    def sendmail(self, sender, receiver, msg):
452        if self._raiseOnSend:
453            raise IOError
454
455        self._log.info('MockSMTP:sendmail()\n From: %s\n To: %s\n%s'
456                       % (sender, receiver, msg))
457
458    def quit(self):
459        pass
460
461
462class Emailer(object):
463    '''
464    Test exception during e-mail
465    >>> import os
466    >>> log = Mock.caps(os.path)['configure_logging']()
467    >>> emailer = Emailer(MockSMTP(log, raiseOnSend=True), toAddrs=[], fromAddr='') # noqa
468    >>> try_send(log, emailer.sendEmail, body='Nothing', subject='Nothing') # noqa
469    Exception sending e-mail!
470    '''
471    #TODO: Consider using heron-admin@kumc.edu
472    def __init__(self, SMTP, toAddrs=['ngraham@kumc.edu'],
473                 fromAddr='periodicquery'):
474        self._SMTP = SMTP
475        self._toAddrs = toAddrs
476        self._fromAddr = fromAddr
477
478    def sendEmail(self, body, subject):
479        #Ref: http://docs.python.org/library/email-examples.html
480        msg = MIMEText(body)
481        msg['Subject'] = subject
482        msg['From'] = self._fromAddr
483        msg['To'] = ','.join(self._toAddrs)
484
485        #Note: Server defaults to localhost
486        self._SMTP.connect()
487        self._SMTP.sendmail(self._fromAddr, self._toAddrs, msg.as_string())
488        self._SMTP.quit()
489
490
491if __name__ == '__main__':
492    def _initial_caps():
493        '''Trusted code to compute initial capabilities
494        '''
495        from os import path as os_path
496        from smtplib import SMTP
497        from sys import argv
498        from datetime import datetime
499        import logging
500        import subprocess
501
502        def argv_config():
503            '''Capability to load config files specified on command line.
504            '''
505            cp = SafeConfigParser()
506            cp.read(get_config(argv))
507            return cp
508
509        emailer = Emailer(SMTP(), toAddrs=WATCHERS, fromAddr=BOT_ADDR)
510
511        def makeFileChecker(log, fileName):
512            return FileChecker(log, fileName, emailer.sendEmail,
513                               datetime.now, os_path.getsize,
514                               os_path.isfile)
515
516        # Pretty huge capability here...
517        paverCmd = CmdRunner(subprocess, argv[1:])
518
519        def makeTailCmd(fileName):
520            return CmdRunner(subprocess,
521                             ['tail', '-n', '200', fileName])
522
523        def _configure_logging(level=logging.DEBUG):
524            log = logging.getLogger(__name__)
525            logging.basicConfig(level=level)
526            return log
527
528        return dict(argv_config=argv_config, calendar=datetime,
529                    configure_logging=_configure_logging,
530                    makeFileChecker=makeFileChecker, makeTailCmd=makeTailCmd,
531                    os_path=os_path, paverCmd=paverCmd, raw_input=raw_input,
532                    sendEmail=emailer.sendEmail)
533
534    main(**_initial_caps())
Note: See TracBrowser for help on using the repository browser.