Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/compare_execution_plans.py @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 9.5 KB
Line 
1''' compare_execution_plans.py - Compare oracle execution plans
2
3Run with --help for usage.
4'''
5import argparse
6import logging
7import re
8import sys
9
10logging.basicConfig(level=logging.WARNING)
11log = logging.getLogger(__name__)
12
13
14class LogInfo(object):
15    def __init__(self, logFileName):
16        self.logFileName = logFileName
17        self._parseLogFile()
18
19    def getPlanSteps(self):
20        '''Return the plan steps resulting from _parseLogFile
21        '''
22        return self.testPlanSteps
23
24    def __str__(self):
25        s = 'Found %d tests in %s:' % (len(self.testPlanSteps),
26                                       self.logFileName)
27        for t in self.testPlanSteps:
28            s += '\n%s (%d plan(s))' % (t, len(self.testPlanSteps[t]))
29        return s
30
31    def stepsStr(self, testName, planIdx):
32        if testName not in self.testPlanSteps:
33            raise LookupError
34        if planIdx >= len(self.testPlanSteps[testName]):
35            raise IndexError
36
37        s = ''
38        for step in self.testPlanSteps[testName][planIdx]:
39            s += '\n%02d,%s,%s' % (step[0],
40                                   step[1],
41                                   step[2])
42        return s
43
44    def planForTestStr(self, testName):
45        i = 0
46        s = ''
47        for plan in self.testPlanSteps[testName]:
48            s += '\n%s:%s:%d' % (self.logFileName, testName, i)
49            s += self.stepsStr(testName, i)
50            i += 1
51        return s
52
53    def planForAllTestsStr(self):
54        s = ''
55        for t in self.testPlanSteps:
56            s += self.planForTestStr(t)
57        return s
58
59    def buildDeltaTestStr(self, other):
60        '''
61        Build string describing deltas between two LogInfo objects.
62        '''
63        def oneInAnother(a, b):
64            return (a.logFileName, b.logFileName,
65                    [t for t in a.getPlanSteps() if t not in b.getPlanSteps()])
66
67        def deltaStr(delta):
68            s = ''
69            if len(delta[2]) != 0:
70                s = 'Tests in %s but not in %s:' % (delta[0], delta[1])
71                for t in delta[2]:
72                    s += '\n%s' % t
73            return s
74
75        return deltaStr(oneInAnother(self, other))
76        + deltaStr(oneInAnother(other, self))
77
78    def hasTestName(self, testName):
79        return True if testName in self.testPlanSteps else False
80
81    def buildDeltaAllPlanStr(self, other):
82        s = ''
83        for t in self.getPlanSteps():
84            if other.hasTestName(t):
85                s += self.buildDeltaPlanStr(other, t)
86        return s
87
88    def buildDeltaPlanStr(self, other, testName):
89        '''
90        Build string describing the deltas between tests by name.
91        '''
92        myPlans = self.getPlanSteps()[testName]
93        otherPlans = other.getPlanSteps()[testName]
94
95        def names(a, b):
96            return '(%s vs %s)' % (a.logFileName, b.logFileName)
97
98        def fullStr(a, b, tn):
99            return '\n%s\n%s' % (self.planForTestStr(tn),
100                                 other.planForTestStr(tn))
101
102        s = ''
103        if len(myPlans) != len(otherPlans):
104            s = '%s Number of plans for %s don\'t match!' % (names(self,
105                                                                   other),
106                                                             testName)
107        else:
108            i = 0
109            for myPlan, otherPlan in zip(myPlans, otherPlans):
110                if len(myPlan) != len(otherPlan):
111                    s = '%s Number of steps for %s:%s don\'t match!' % (
112                        names(self, other), testName, i)
113                    s += fullStr(self, other, testName)
114                else:
115                    for myStep, otherStep in zip(myPlan, otherPlan):
116                        if myStep != otherStep:
117                            s = '%s test %s plan %d stepid %d doesn\'t match!'\
118                                % (names(self, other), testName, i, myStep[0])
119                            s += '\n%s:\n%s\n\n%s:\n%s\n\n' % \
120                                 (self.logFileName,
121                                  self.stepsStr(testName, i),
122                                  other.logFileName,
123                                  other.stepsStr(testName, i))
124                i += 1
125        return s
126
127    def _parseLogFile(self):
128        '''Creates a dictionary keyed by test name containing a list of lists
129        of tuples for each name.  The outer list is the list of plans for
130        each sub-query in the test.  The inner list is a list of each step
131        inside the plan.  Finally, the inner-most tuple has plan id, operation,
132        name.
133
134        {'query_name':[sub_plan[step(plan id, operation, name)]]
135        '''
136        # note: re.finditer or re.findall chokes with large files.
137
138        f = open(self.logFileName, 'r')
139        regex = re.compile('\*+\s+Starting Test:\s+(?P<name>.+?)\s+\*+|'
140                           '\*+\s+Total run time\s+(?P<runtime>[0-9]+)'
141                           '\s+sec\s+\*+')
142
143        acc = None
144        ln = 0
145        self.testPlanSteps = {}
146        for line in f:
147            m = regex.match(line)
148            if m:
149                if m.group('name') is not None:
150                    #log.info('%d found %s' % (ln, m.group('name')))
151                    currName = m.group('name')
152                    if currName in self.testPlanSteps:
153                        # TODO: Consider better handling multiple occurrences
154                        log.info('%s already found in %s! '
155                                 'Taking last' %
156                                 (currName, self.logFileName))
157                    self.testPlanSteps[currName] = []
158                    acc = ''
159                elif m.group('runtime'):
160                    #log.info('%d end' % ln)
161                    if acc is not None:
162                        for plan in self._parsePlans(acc):
163                            self.testPlanSteps[currName].append(
164                                self._parsePlanSteps(plan))
165                    acc = None
166            elif acc is not None:
167                acc += line
168            ln += 1
169        #log.info('Found %d total tests.' % len(self.testPlanSteps))
170
171    def _parsePlans(self, logData):
172        '''Return a list of Oracle execution plan tables
173        '''
174        return [p[0].strip() for p in re.findall(
175            'Plan hash value: [0-9]+((.|\n|\r\n)+?)(Predicate '
176            'Information \(identified by operation id\)|'
177            '[0-9]{4}-[0-9]{2}-[0-9]{2}.+?):',
178            logData)]
179
180    def _parsePlanSteps(self, plan):
181        '''Return a list of steps from an Oracle execution plan table.
182        Each step is a tuple containing the step id, the operation and
183        the name.
184        '''
185        steps = []
186        stepre = re.compile('\|(\s+)?\*?(\s+)?(?P<id>[0-9]+)'
187                            '(\s+)?\|(?P<operation>.+?)'
188                            '\|(?P<name>.+?)\|.+?'
189                            '\|.+?\|.+?\|.+?\|')
190
191        for line in plan.split('\n'):
192            m = stepre.match(line)
193            if m:
194                steps.append((int(m.group('id').strip()),
195                              m.group('operation').strip(),
196                              m.group('name').strip()))
197
198        return steps
199
200
201def handle_info(args):
202    for a in args:
203        li = LogInfo(a)
204        log.info('\n\n**** %s\n\n' % str(li))
205        log.info(li.planForAllTestsStr())
206    return 0
207
208
209def toLogInfo(fnlist):
210    return tuple(LogInfo(fn) for fn in fnlist)
211
212
213def logSum(lolist):
214    for lo in lolist:
215        log.info('\n\n%s' % str(lo))
216
217
218def handle_compare(args):
219    a, b = toLogInfo(args)
220    logSum([a, b])
221
222    ret = 0
223    diff = a.buildDeltaTestStr(b)
224    if len(diff) != 0:
225        log.warning(diff)
226        ret = 1
227    diff = a.buildDeltaAllPlanStr(b)
228    if len(diff) != 0:
229        log.warning(diff)
230        ret = 1
231    return ret
232
233
234def handle_truth(args):
235    truth, testlog = toLogInfo(args)
236    logSum([truth, testlog])
237    diff = truth.buildDeltaAllPlanStr(testlog)
238    ret = 0
239    if len(diff) != 0:
240        ret = 1
241        log.warning(diff)
242    else:
243        log.info('No differences found.')
244    return ret
245
246
247if __name__ == '__main__':
248    parser = argparse.ArgumentParser(
249        description='Compare oracle execution plans from log output.',
250        epilog='GOOD_LOG_FILE is a log file with excerpts from log files '
251        'which have what are considered "good" plans.  The log file excerpts '
252        'should contain the "**** Starting Test: ..." start delimiter and '
253        'the "**** Total run time ..." ending delimiter from the perforamce '
254        'test script.')
255
256    group = parser.add_mutually_exclusive_group()
257
258    group.add_argument('--compare', metavar=('LOG_FILE_A', 'LOG_FILE_B'),
259                       nargs=2, help='Compare two log files')
260    group.add_argument('--truth', metavar=('TRUTH_LOG_FILE', 'LOG_FILE'),
261                       nargs=2,
262                       help='Compare LOG_FILE given TRUTH_FILE')
263    group.add_argument('--info', metavar='LOG_FILE', nargs='+',
264                       help='Parse and print info on log file(s).'
265                       ' Implies verbose output')
266    parser.add_argument('-v', action='store_true', dest='verbose',
267                        help='Verbose output (INFO level logging)')
268
269    args = parser.parse_args()
270    if args.verbose or args.info is not None:
271        log.setLevel(level=logging.INFO)
272        log.info('Set logging to INFO')
273
274    if args.compare:
275        ret = handle_compare(args.compare)
276    elif args.truth:
277        ret = handle_truth(args.truth)
278    elif args.info:
279        ret = handle_info(args.info)
280    else:
281        parser.print_help()
282        ret = 0
283
284    sys.exit(ret)
Note: See TracBrowser for help on using the repository browser.