from pyparsing import * LPAR,RPAR,COMMA = map(Suppress,"(),") select_stmt = Forward().setName("select statement") # keywords (UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET) = map(CaselessKeyword, """UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET""".replace(",","").split()) (CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP) = map(CaselessKeyword, """CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP""".replace(",","").split()) keyword = MatchFirst((UNION, ALL, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, NATURAL, INNER, CROSS, LEFT, OUTER, JOIN, AS, INDEXED, NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, LIMIT, OFFSET, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP)) identifier = ~keyword + Word(alphas, alphanums+"_") collation_name = identifier.copy() column_name = identifier.copy() column_alias = identifier.copy() table_name = identifier.copy() table_alias = identifier.copy() index_name = identifier.copy() function_name = identifier.copy() parameter_name = identifier.copy() database_name = identifier.copy() # expression expr = Forward().setName("expression") integer = Regex(r"[+-]?\d+") numeric_literal = Regex(r"\d+(\.\d*)?([eE][+-]?\d+)?") string_literal = QuotedString("'") blob_literal = Combine(oneOf("x X") + "'" + Word(hexnums) + "'") literal_value = ( numeric_literal | string_literal | blob_literal | NULL | CURRENT_TIME | CURRENT_DATE | CURRENT_TIMESTAMP ) bind_parameter = ( Word("?",nums) | Combine(oneOf(": @ $") + parameter_name) ) type_name = oneOf("TEXT REAL INTEGER BLOB NULL") expr_term = ( CAST + LPAR + expr + AS + type_name + RPAR | EXISTS + LPAR + select_stmt + RPAR | function_name + LPAR + Optional(delimitedList(expr)) + RPAR | literal_value | bind_parameter | identifier ) UNARY,BINARY,TERNARY=1,2,3 expr << operatorPrecedence(expr_term, [ (oneOf('- + ~') | NOT, UNARY, opAssoc.LEFT), ('||', BINARY, opAssoc.LEFT), (oneOf('* / %'), BINARY, opAssoc.LEFT), (oneOf('+ -'), BINARY, opAssoc.LEFT), (oneOf('<< >> & |'), BINARY, opAssoc.LEFT), (oneOf('< <= > >='), BINARY, opAssoc.LEFT), (oneOf('= == != <>') | IS | IN | LIKE | GLOB | MATCH | REGEXP, BINARY, opAssoc.LEFT), ('||', BINARY, opAssoc.LEFT), ((BETWEEN,AND), TERNARY, opAssoc.LEFT), ]) compound_operator = (UNION + Optional(ALL) | INTERSECT | EXCEPT) ordering_term = expr + Optional(COLLATE + collation_name) + Optional(ASC | DESC) join_constraint = Optional(ON + expr | USING + LPAR + Group(delimitedList(column_name)) + RPAR) join_op = COMMA | (Optional(NATURAL) + Optional(INNER | CROSS | LEFT + OUTER | LEFT | OUTER) + JOIN) join_source = Forward() single_source = ( (Group(database_name("database") + "." + table_name("table")) | table_name("table")) + Optional(Optional(AS) + table_alias("table_alias")) + Optional(INDEXED + BY + index_name("name") | NOT + INDEXED)("index") | (LPAR + select_stmt + RPAR + Optional(Optional(AS) + table_alias)) | (LPAR + join_source + RPAR) ) join_source << single_source + ZeroOrMore(join_op + single_source + join_constraint) result_column = "*" | table_name + "." + "*" | (expr + Optional(Optional(AS) + column_alias)) select_core = (SELECT + Optional(DISTINCT | ALL) + Group(delimitedList(result_column))("columns") + Optional(FROM + join_source) + Optional(WHERE + expr("where_expr")) + Optional((AND + expr("and1_expr"))) + #and put here Optional((AND + expr("and2_expr"))) + #and put here Optional(GROUP + BY + Group(delimitedList(ordering_term)("group_by_terms")) + Optional(HAVING + expr("having_expr")))) select_stmt << (select_core + ZeroOrMore(compound_operator + select_core) + Optional(ORDER + BY + Group(delimitedList(ordering_term))("order_by_terms")) + Optional(LIMIT + (integer + OFFSET + integer | integer + COMMA + integer))) tests = """\ select b,z,a from cuishou where b = 'k' and z > 6 and y = 5 order by b,z """.splitlines() for t in tests: print t try: print select_stmt.parseString(t).dump() #sql_object_parse = select_stmt.parseString(t) #table_name = sql_object_parse.table[0] #print "show create table " + table_name + ";" except ParseException, pe: print pe.msg #print