Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: heron_load/devdoc/select_parser.py @ 0:42ad7288920a

heron-michigan tip
Last change on this file since 0:42ad7288920a was 0:42ad7288920a, checked in by Matt Hoag <mhoag@…>, 6 years ago

Merge with demo_concepts_3800

File size: 16.0 KB
Line 
1'''select_parser.py -- a simple SQL parser,
2
3taken from SQLite's SELECT statement
4definition at http://www.sqlite.org/lang_select.html
5
6and updated for `Oracle syntax`__
7
8
9__ http://docs.oracle.com/cd/B28359_01/server.111/b28286/statements_10002.htm#SQLRF01702  # noqa
10
11  >>> def t(s):
12  ...     return script.parseString(s, parseAll=True)
13  >>> def t1(s):
14  ...     print t(s).statements[0].dump()
15  >>> def tn(s):
16  ...     return len(t(s).statements)
17
18A script may have multiple statements:
19
20  >>> len(t('select x from dual; select x from dual').statements)
21  2
22
23A select statement may be compound:
24  >>> tn('select x from y minus select c from t')
25  1
26
27
28  >>> t('select * from xyzzy where z > 100').statements[0].where_expr
29  (['Z', '>', '100'], {'column': [('Z', 0)]})
30
31  >>> s = t('select * from xyzzy where z > 100 order by zz').statements[0]
32  >>> s.order_by_terms[0]
33  'ZZ'
34  >>> s.where_expr.column
35  'Z'
36
37  >>> tn('select * from xyzzy')
38  1
39
40  >>> t('select p.* from p').statements[0].query[3][0].from_object
41  (['P'], {'table': [('P', 0)], 'loc': [(16, 0)]})
42
43  >>> tn('/* multi-line comment */ select * from xyzzy')
44  1
45  >>> tn('select * from xyzzy_single_line_comment -- single line comment')
46  1
47
48  >>> tn("update (select x from t1) a1 set x = 2;")
49  1
50
51Expr enhancements:
52  >>> def te(txt):
53  ...     return expr.parseString(txt, parseAll=True)
54
55  >>> len(te("case when x = 1 then 'n' else 'x' end"))
56  1
57  >>> len(te("case x when 1 then 'n' else 'x' end"))
58  1
59
60  >>> len(te('x is not null')[0])
61  3
62  >>> len(te('c = 2 and d_date is null')[0])
63  3
64
65  >>> te('extract(year from last_alive_date)')[0].column
66  'LAST_ALIVE_DATE'
67  >>> te('dbms_xplan.display()')[0].package
68  'DBMS_XPLAN'
69
70  >>> te('count(*)')[0]
71  (['COUNT', '*'], {})
72  >>> len(te('y in (select c from t2)')[0])
73  3
74  >>> len(te('x < (select v from t2 where c2 = 1)')[0])
75  3
76  >>> len(te("z not like 'pat%'")[0])
77  3
78  >>> len(te("t1.c in (1, 2, 3)")[0])
79  3
80
81  >>> te('.1')
82  (['.1'], {})
83
84  >>> te('"HeLLo"')
85  (['HeLLo'], {'column': [('HeLLo', 0)]})
86
87  >>> len(te("not (t1.c is null)"))
88  1
89
90  >>> len(te("""x not in (select c1 from s1.t1)""")[0])
91  3
92
93  >>> len(te("""s1.seq1.nextval"""))
94  1
95
96  >>> len(te("""max(plv.result_time) over (
97  ...           partition by plv.pat_id, component_id, reference_unit)"""))
98  1
99
100  >>> len(te("""median(plv.result_time) over (
101  ...           partition by plv.pat_id, component_id, reference_unit)"""))
102  1
103
104Ugh... let's eradicate use of keywords as names:
105  >>> tn('select c from t count')
106  1
107
108Scientific notation:
109  >>> te("val < 1e12")
110  ([(['VAL', '<', '1e12'], {'column': [('VAL', 0)]})], {})
111
112Qualified Result Columns:
113
114  >>> result_column.parseString('t.c AS a', parseAll=True).column_expr.table
115  'T'
116  >>> result_column.parseString('t.c a', parseAll=True).alias
117  'A'
118  >>> tn('Select p.q pq from dual')
119  1
120
121
122Create:
123  >>> tn("""create or replace view v as (select c from t)""")
124  1
125  >>> tn("""create index i on t (c1, c2)""")
126  1
127  >>> tn("""create table t (x integer, y varchar2(10) not null)""")
128  1
129
130tricky join clauses:
131  >>> tn("""create table t1 as
132  ...  (select distinct x
133  ...   from s1.t2 a1
134  ...   cross join t2 a2
135  ...   left join s2.t3 a3 on a2.k = a3.k
136  ...  );""")
137  1
138
139With: note Oracle's docs on this don't seem to match what their code does.
140  >>> tn('with t as (select x from dual) select * from t')
141  1
142
143  >>> tn('select x from (with q1 as (select x from t2) select * from t)')
144  1
145
146  >>> tn("""create or replace view v as
147  ...       with t as (select x from dual), u as (select x from y)
148  ...       select * from t""")
149  1
150
151  >>> tn('select pat_id from CLARITY.patient where 1 = 0; select x from dual')
152  2
153
154Explain Plan, Alter Session:
155  >>> tn('explain plan for select c from v')
156  1
157  >>> tn('SELECT PLAN_TABLE_OUTPUT FROM TABLE(DBMS_XPLAN.DISPLAY())')
158  1
159
160  >>> tn("alter session set nls_thingy = 'yyy'")
161  1
162
163  >>> tn("alter table t nologging")
164  1
165
166Insert with values::
167
168  >>> tn("""insert into lab_value_regex ("REGEX", "VF_CD", "NUM_OCC")
169  ...       values (:REGEX, :VF_CD, :NUM_OCC)""")
170  1
171
172Macros:
173  >>> tn('select x, &&foo from z')
174  1
175  >>> tn('select x from &&foo')
176  1
177
178Regression test: UNARY + seemed to conflict with BINARY:
179  >>> expr.parseString('1 + 1', parseAll=True)
180  ([(['1', '+', '1'], {})], {})
181  >>> expr.parseString('2 - 3', parseAll=True)
182  ([(['2', '-', '3'], {})], {})
183
184Oops... added as_q on create table as ..., but not on insert.
185  >>> t('insert into DEST (select X from SRC)')[0].as_q[3][0].from_object.table
186  'SRC'
187
188'''
189__copyright__ = "Copyright (c) 2010, Paul McGuire"
190
191from pyparsing import (Suppress, Forward, CaselessKeyword, MatchFirst, Word,
192                       alphas, alphanums, Regex, QuotedString, Combine,
193                       oneOf, hexnums, nums, delimitedList, Optional, Literal,
194                       operatorPrecedence, opAssoc, Group, restOfLine,
195                       ZeroOrMore, OneOrMore, ParserElement, StringEnd,
196                       Dict, Empty)
197
198ParserElement.enablePackrat()
199
200
201class AST:
202    statements = "statements"
203    drop = 'drop'
204    truncate = 'truncate'
205    select_statement = "select_statement"
206    insert = 'insert'
207    update = 'update'
208    delete = 'delete'
209    commit = 'commit'
210    create_index = 'create_index'
211    create_sequence = 'create_sequence'
212    alter_table = 'alter_table'
213    alter_session = 'alter_session'
214    query_name = 'query_name'
215    t_alias = 't_alias'
216    create_statement = "create_statement"
217    explain_statement = "explain_statement"
218    whenever = 'whenever'
219    database = "database"  # umm... schema?
220    dblink = 'dblink'
221    table = "table"
222    from_object = "from_object"
223    to_object = 'to_object'
224
225LPAR, RPAR, COMMA = map(Suppress, "(),")
226select_stmt = Forward()
227
228endLineComment = Literal('--') + restOfLine
229multiLineComment = Regex(r'/\*[\s\S]*?\*/')
230
231# keywords
232(UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, COUNT,
233 NATURAL, INNER, CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT,
234 FROM, WHERE, GROUP, BY, HAVING, ORDER, LIMIT, OFFSET, EXPLAIN, PLAN, FOR,
235 CREATE, OR, REPLACE, TABLE, VIEW, WITH, EXTRACT, YEAR, SYSDATE, MINUS,
236 WHENEVER, SQLERROR, CONTINUE, EXIT, DROP, TRUNCATE, INDEX, INSERT, INTO,
237 ALTER, SESSION, SET, NOLOGGING, DISABLE, CONSTRAINT, DELETE, UPDATE,
238 SEQUENCE, NEXTVAL, INTEGER, VALUES, PARTITION, OVER,
239 VARCHAR2, ADD, PRIMARY, KEY, COMMIT, TABLESPACE,
240 CONNECT, PRIOR, START
241 ) = _k1 = map(
242     CaselessKeyword,
243     """
244 UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, COUNT,
245 NATURAL, INNER, CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT,
246 FROM, WHERE, GROUP, BY, HAVING, ORDER, LIMIT, OFFSET, EXPLAIN, PLAN, FOR,
247 CREATE, OR, REPLACE, TABLE, VIEW, WITH, EXTRACT, YEAR, SYSDATE, MINUS,
248 WHENEVER, SQLERROR, CONTINUE, EXIT, DROP, TRUNCATE, INDEX, INSERT, INTO,
249 ALTER, SESSION, SET, NOLOGGING, DISABLE, CONSTRAINT, DELETE, UPDATE
250 SEQUENCE, NEXTVAL, INTEGER, VALUES, PARTITION, OVER,
251 VARCHAR2, ADD, PRIMARY, KEY, COMMIT, TABLESPACE,
252 CONNECT, PRIOR, START
253     """.strip().replace(",", "").split())
254(CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN,
255 EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH,
256 MAX, MEDIAN,
257 ESCAPE, CURRENT_TIME,
258 CURRENT_DATE, CURRENT_TIMESTAMP) = _k2 = map(
259     CaselessKeyword,
260     """
261 CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN,
262 EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH,
263 MAX, MEDIAN,
264 ESCAPE, CURRENT_TIME,
265 CURRENT_DATE, CURRENT_TIMESTAMP""".strip().replace(",", "").split())
266keyword = MatchFirst(_k1 + _k2)
267
268identifier = (
269    (~keyword + Word(alphas, alphanums + "_")).setParseAction(
270        lambda s, l, t: [t[0].upper()])
271    | QuotedString('"'))
272collation_name = identifier.copy()
273column_name = identifier.copy()("column")
274column_alias = identifier.copy()
275table_name = identifier.copy()(AST.table)
276t_alias = (identifier.copy()
277           | COUNT("keyword_kludge"))
278index_name = identifier.copy()
279package_name = identifier.copy()("package")
280function_name = Optional(package_name + ".") + identifier.copy()("function")
281parameter_name = identifier.copy()
282database_name = identifier.copy()(AST.database)
283dblink = identifier.copy()(AST.dblink)
284
285# TODO: Remove leftover macro code
286macro = Suppress(Literal("&&")) + Word(alphas, alphanums + "_")("macro")
287
288object_name = Group(Optional(database_name + ".")
289                    + table_name + Optional('@' + dblink)) | macro
290
291
292# expression
293expr = Forward().setName("expression")
294subquery = Forward()
295
296integer = Regex(r"[+-]?\d+")
297numeric_literal = Regex(r"[+-]?(?:\d+(?:(\.\d*)?)|(?:\.\d+))([eE][+-]?\d+)?")
298string_literal = QuotedString("'")
299blob_literal = Combine(oneOf("x X") + "'" + Word(hexnums) + "'")
300literal_value = (numeric_literal | string_literal | blob_literal | NULL
301                 | CURRENT_TIME | CURRENT_DATE | CURRENT_TIMESTAMP | SYSDATE)
302bind_parameter = (
303    Word("?", nums) |
304    Combine(oneOf(": @ $") + parameter_name))
305type_name = oneOf("TEXT REAL INTEGER BLOB NULL")
306
307wildcard = Literal("*")  # ("wildcard")
308
309analytic_function = MAX | MEDIAN
310analytic_clause = PARTITION + BY + delimitedList(expr)
311
312expr_term = Group(
313    CAST + LPAR + expr + AS + type_name + RPAR |
314    EXISTS + LPAR + subquery + RPAR |
315    (CASE + Optional(expr) +
316     OneOrMore(Group(WHEN + expr + THEN + expr)) +
317     Optional(Group(ELSE + expr)) +
318     Suppress(END)) |
319    EXTRACT + LPAR + YEAR + FROM + expr + RPAR |
320    COUNT + LPAR + (Optional(DISTINCT) + expr | wildcard) + RPAR |
321    (analytic_function + LPAR + expr + RPAR
322     + Optional(OVER + LPAR + analytic_clause + RPAR)) |
323    NOT + expr |
324    function_name + LPAR + Optional(delimitedList(expr)) + RPAR |
325    LPAR + delimitedList(literal_value) + RPAR |  # handle IN (...)
326    LPAR + subquery + RPAR |
327    macro |
328    object_name + "." + NEXTVAL |
329    (table_name + "."
330     + (column_name | keyword("keyword_as_name")))  # KLUDGE!
331    ) | column_name | literal_value | bind_parameter
332
333UNARY, BINARY, TERNARY = 1, 2, 3
334expr << operatorPrecedence(
335    expr_term,
336    # UNARY + seems to conflict with BINARY
337    [('||', BINARY, opAssoc.LEFT),
338     (oneOf('* / %'), BINARY, opAssoc.LEFT),
339     (oneOf('+ -'), BINARY, opAssoc.LEFT),
340     (oneOf('<< >> & |'), BINARY, opAssoc.LEFT),
341     (oneOf('< <= > >='), BINARY, opAssoc.LEFT),
342     (oneOf('= == != <>') | Group(IS + NOT) | IS | Group(NOT + IN) | IN
343      | Group(NOT + LIKE) | LIKE,
344      BINARY, opAssoc.LEFT),
345     (AND, BINARY, opAssoc.LEFT),
346     (OR, BINARY, opAssoc.LEFT),
347     ((BETWEEN, AND), TERNARY, opAssoc.LEFT)])
348
349compound_operator = (UNION + Optional(ALL) | INTERSECT | EXCEPT | MINUS)
350
351ordering_term = expr + Optional(
352    COLLATE + collation_name) + Optional(ASC | DESC)
353
354result_column = Group(table_name + "." + wildcard) | (
355    expr("column_expr") + Optional(
356        AS + (column_alias | keyword("keyword_as_name"))("alias")  # KLUDGE!
357        | column_alias)("alias"))
358select_list = wildcard | Group(delimitedList(result_column))
359
360
361def save_loc(s, l, t):
362    if isinstance(t[0], type('')):
363        return
364    t[0]['loc'] = l
365    return t
366
367table_collection_expression = TABLE + LPAR + expr + RPAR  # simplified
368
369query_table_expression = (object_name.setParseAction(save_loc)(AST.from_object)
370                          | LPAR + subquery + RPAR
371                          | table_collection_expression)
372#table_reference = (query_table_expression +
373#                   Optional(Optional(AS) + t_alias("table_alias")))
374table_reference = Group(query_table_expression +
375                        Optional(t_alias(AST.t_alias)))
376
377join_clause_ = OneOrMore(
378    Group(CROSS + JOIN + table_reference)
379    | Group((LEFT + Optional(OUTER) | Optional(INNER)) + JOIN
380            + table_reference + (
381                (ON + expr)
382                | (USING + LPAR + Group(delimitedList(column_name)) + RPAR))))
383
384where_clause = WHERE + expr("where_expr")
385
386group_by_clause = (GROUP + BY + Group(
387    delimitedList(ordering_term)("group_by_terms"))
388    + Optional(HAVING + expr("having_expr")))
389
390hierarchical_query_clause = Group(
391    Optional(START + WITH + expr)
392    + CONNECT + BY + Optional(PRIOR) + expr)
393
394query_block = (
395    SELECT + Optional(DISTINCT | ALL)
396    + select_list +
397    FROM + Group(
398        delimitedList(table_reference + Optional(join_clause_)
399                      | LPAR + table_reference + join_clause_ + RPAR)) +
400    Optional(where_clause) +
401    Optional(hierarchical_query_clause) +
402    Optional(group_by_clause))
403
404order_by_clause = ORDER + BY + Group(
405    delimitedList(ordering_term))("order_by_terms")
406
407subquery_factoring_clause = Group(
408    Suppress(WITH)
409    + Dict(delimitedList(
410        Group(identifier(AST.query_name)
411              + Suppress(AS) + LPAR + subquery + RPAR))))
412
413subquery << (
414    Optional(subquery_factoring_clause)("factors") +
415    ((query_block | LPAR + subquery + RPAR) +
416        Optional(order_by_clause))("query")
417    + ZeroOrMore(compound_operator + Group(subquery)))
418
419select_stmt = Group(subquery)(AST.select_statement)
420
421dml_table_expression_clause = (
422    object_name.setParseAction(save_loc)(AST.to_object)
423    | LPAR + subquery + RPAR
424    | table_collection_expression)
425
426delete_stmt = Group(
427    DELETE + FROM + dml_table_expression_clause + Optional(t_alias)
428    + Optional(where_clause))(AST.delete)
429    # simplified. no returning, error_logging
430
431insert_stmt = Group(
432    INSERT + INTO + dml_table_expression_clause + Optional(t_alias)
433    + Optional(LPAR + delimitedList(column_name) + RPAR)
434    + (Group(VALUES + LPAR + delimitedList(expr) + RPAR)("values") |
435       subquery("as_q")))(AST.insert)  # ... values, returning, error_logging
436
437update_set_clause = (
438    SET + Group(delimitedList(
439        Optional(table_name + '.') + column_name
440        + '=' + expr)))  # ... VALUE, subquery, ...
441
442update_stmt = Group(
443    UPDATE + dml_table_expression_clause + Optional(t_alias)
444    + update_set_clause
445    + Optional(where_clause))(AST.update)
446    # ... hint, ONLY, returning, error_logging
447
448commit_stmt = Group(COMMIT)(AST.commit)
449
450dml_stmt = select_stmt | insert_stmt | update_stmt | delete_stmt | commit_stmt
451
452'''Datatypes__
453__ http://docs.oracle.com/cd/B28359_01/server.111/b28286/sql_elements001.htm#i45441  # noqa
454'''
455datatype = INTEGER | Group(VARCHAR2 + LPAR + integer + RPAR)
456
457relational_properties = Group(delimitedList(
458    column_name + datatype + Optional(Optional(NOT) + NULL)))
459
460# http://docs.oracle.com/cd/B28359_01/server.111/b28286/statements_7002.htm#i2095331  # noqa
461create_stmt = Group(CREATE
462                    + Optional(OR + REPLACE)("replace")
463                    # global temporary
464                    + (TABLE("table") | VIEW("view"))
465                    + object_name(AST.to_object)
466                    + ((Suppress(AS) + (
467                        select_stmt | subquery)("as_q"))
468                       | (LPAR + relational_properties + RPAR))
469                    )(AST.create_statement)
470
471create_sequence_statement = Group(
472    CREATE + SEQUENCE + object_name)(AST.create_sequence)
473
474create_index_stmt = Group(
475    CREATE + INDEX + object_name
476    + ON + object_name
477    + LPAR + delimitedList(column_name) + RPAR
478    + Optional(TABLESPACE + identifier))(AST.create_index)
479
480drop_stmt = Group(DROP + (TABLE | VIEW | INDEX) + object_name)(AST.drop)
481truncate_stmt = Group(TRUNCATE + TABLE + object_name)(AST.truncate)
482
483ddl_stmt = (create_stmt | create_index_stmt | drop_stmt | truncate_stmt
484            | create_sequence_statement)
485
486alter_session_stmt = Group(
487    ALTER + SESSION + SET + identifier + '=' + expr)(AST.alter_session)
488
489alter_table_stmt = Group(
490    ALTER + TABLE + object_name + OneOrMore(
491        NOLOGGING
492        | Group(DISABLE + CONSTRAINT + identifier)
493        | Group(ADD + LPAR + relational_properties + RPAR)
494        | Group(ADD + CONSTRAINT + identifier
495                + PRIMARY + KEY + LPAR + delimitedList(column_name) + RPAR)
496    ))(AST.alter_table)
497
498explain_stmt = Group(EXPLAIN + PLAN + FOR + dml_stmt)(AST.explain_statement)
499
500whenever_stmt = Group(WHENEVER + SQLERROR
501                      + (CONTINUE("continue") | EXIT("exit")))(AST.whenever)
502
503empty_statement = Suppress(Empty())
504
505statement = (dml_stmt | ddl_stmt | explain_stmt | whenever_stmt
506             | alter_session_stmt | alter_table_stmt | empty_statement)
507
508script = delimitedList(statement, delim=';')(AST.statements) + StringEnd()
509script.ignore(endLineComment)
510script.ignore(multiLineComment)
Note: See TracBrowser for help on using the repository browser.