/* * This file is part of the Yices SMT Solver. * Copyright (C) 2017 SRI International. * * Yices is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Yices is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Yices. If not, see . */ Yices Parser The parser code is in src/yices_parser.c and src/yices_parser.h. It's an encoding of a recursive descent parser. It relies on an automaton defined by types and transition tables contained in 'src/yices_parse_tables.h'. That file is built from 'utils/yices_input_tables.h' and a set of tables generated by 'utils/table_builder.c'. Grammar ------- command ::= ( define-type ) | ( define-type ) | ( define :: ) | ( define :: ) | ( assert ) | ( assert ) | ( exit ) | ( check ) | ( check-assuming ) | ( push ) | ( pop ) | ( reset ) | ( show-model ) | ( eval ) | ( echo ) | ( include ) | ( set-param ) | ( show-param ) | ( show-params ) | ( show-stats ) | ( reset-stats ) | ( set-timeout ) | ( show-timeout ) | ( dump-context ) | ( help ) | ( help ) | ( help ) | ( ef-solve ) | ( export-to-dimacs ) | ( show-implicant ) | ( show-unsat-core ) | ( show-unsat-assumptions ) | ( show-reduced-model ) | EOS immediate-value :: = true | false | rational | symbol typedef ::= | ( scalar ) symbol-list ::= | assumption-list ::= | | | (not ) type ::= int | real | bool | ( bitvector ) | ( tuple ) | ( -> ) expression ::= true | false | | | | | | ( update ( ) ) | ( forall ( ) ) | ( exists ( ) ) | ( lambda ( ) ) | ( let ( ) ) | ( ) function ::= | function-keyword ::= if ite = /= distinct or and not xor <=> => mk-tuple select tuple-update + - * / ^ < <= > >= abs div mod floor ceil divides is_int mk-bv bv-add bv-sub bv-mul bv-neg bv-pow bv-not bv-and bv-or bv-xor bv-shift-left0 bv-shift-left1 bv-shift-right0 bv-shift-right1 bv-ashift-right bv-rotate-left bv-rotate-right bv-extract bv-concat bv-repeat bv-sign-extend bv-zero-extend bv-ge bv-gt bv-le bv-lt bv-sge bv-sgt bv-sle bv-slt bv-shl bv-lshr bv-ashr bv-div bv-rem bv-sdiv bv-srem bv-smod bv-redor bv-redand bv-comp bool-to-bv bit Recursive-descent parser ------------------------ We use four deterministic automata to describe parsing procedures. Edges are labeled by terminals or by a recursive call to a parsing procedure. When several transitions are enabled in the same state, the one listed first has priority (i.e., transitions labeled by a terminal symbol have priority over recursive calls). Example: c9 ) DONE [parse-expression] c4 means if state=c9 then if token=')' then state=DONE else parse-expression(); state=c4 endif parse-command: - initial-state = c0 - transition table c0 ( c1 c1 define-type c2 echo c3 include c3 exit c4 check c4 check-assuming c16 assert c5 define c6 show-model c4 eval c5 push c4 pop c4 reset c4 show-params c4 show-stats c4 reset-stats c4 show-timeout c4 set-param c11 show-param c13 set-timeout c14 help c15 ef-solve c4 export-to-dimacs c3 show-implicant c4 show-unsat-core c4 show-unsat-assumptions c4 show-reduced-model c4 c2 c10 c3 c4 c4 ) DONE c5 [parse-expression] c20 c6 c7 c7 :: c8 c8 [parse-type] c9 c9 ) DONE [parse-expression] c4 c10 ) DONE [parse-typedef] c4 c11 c12 c12 true c4 false c4 c4 c4 c4 c13 c4 c14 c4 c15 c4 c4 ) DONE list of assumptions after '(check-assuming ' c16 ) DONE c16 ( c17 c17 not c18 c18 c19 c19 ) c16 optional label after '(assert [expression] ' c20 ) DONE c4 parse-typedef: share states t2, t3, t4 with parse-type - initial-state = td0 - transition table td0 int DONE real DONE bool DONE DONE ( td1 td1 scalar td2 bit-vector t4 tuple t2 -> t3 td2 td3 td3 td3 ) DONE td4 td5 td5 ) DONE parse-type: - initial state = t0 - transition table t0 int DONE real DONE bool DONE DONE ( t1 t1 bitvector t4 tuple t2 -> t3 t2 [parse-type] t6 t3 [parse-type] t2 t4 t5 t5 ) DONE t6 ) DONE [parse-type] t6 parse-expression: - initial state = e0 - transition table e0 ( e1 true DONE false DONE DONE DONE DONE DONE DONE e1 e2 update e4 forall e10 exists e10 lambda e10 let e15 [parse-expression] e2 e2 [parse-expression] e3 e3 ) DONE [parse-expression] e3 e4 [parse-expression] e5 e5 ( e6 e6 [parse-expression] e7 e7 ) e8 [parse-expression] e7 e8 [parse-expression] e9 e9 ) DONE e10 ( e11 e11 e12 e12 :: e13 e13 [parse-type] e14 e14 ) e8 e12 e15 ( e16 e16 ( e17 e17 e18 e18 [parse-expr] e19 e19 ) e20 e20 ) e8 e18 Implementation: Each recursive procedure can be implemented as state = initial state while (state != DONE) { } Each transition with a terminal token is equivalent to if state= and token= then token=next-token(); state= For recursive calls, we use a stack of states. We push the successor state on the stack, and jump to the initial state of the recursive function. The current token is not consumed. Example: "e2 [parse-expression] e3" is translated to if state=e2 then push e3 on the stack; state=e0 Return from a procedure is executed when state=DONE, and is translated to state=state on top of the stack; pop stack Initially, the stack contains a bottom marker, and token is set by calling next-token() in the lexer, and state is set to the initial state of whatever needs to be parsed (e.g., c0, e0, or t0). Parsing terminates when the bottom marker is popped from the stack (by a return). To avoid reading past the parsed sentence, which is bad in interactive mode, we must not call the next-token() function if state=bottom marker. Optimizations: We can merge equivalent states: c4, t5, e9 (to a new state r0) We can skip intermediate states whose only transition is a recursive call. For example, "e1 e2 [parse-expression] e3" corresponds to executing two transitions: if state=e1 and token= then token=next-token(); state=e2 if state=e2 then push e3 on the stack; state=e0 We can remove e2 and the second transition by rewriting the first one: if state=e1 and token= then token=next-token(); push e3 on the stack; new-state=e0 We can also eliminate the DONE state using the same trick. Full table ---------- The parser is defined by the following table, which defines a move or transition for each pair state/token; 'next' means get next token. The same data is stored in 'utils/yices_input_tables.h' and it's used by 'utils/build_tables.c'. state token action c0 ( next; goto c1 EOS return c1 exit next; goto r0 check next; goto r0 check-assuming next; goto c16 echo next; goto c3 include next; goto c3 assert next; push c20; goto e0 define-type next; goto c2 define next; goto c6 show-model next; goto r0 eval next; push r0; goto e0 push next; goto r0 pop next; goto r0 reset next; goto r0 dump-context next; goto r0 set-param next; goto c11 show-params next; goto r0 show-param next; goto c13 show-stats next; goto r0 reset-stats next; goto r0 show-timeout next; goto r0 set-timeout next; goto c14 help next; goto c15 ef-solve next; goto r0 show-implicant next; goto r0 show-unsat-core next; goto r0 show-unsat-assumptions next; goto r0 show-reduced-model next; goto r0 c2 next; goto c10 c3 next; goto r0 c6 next; goto c7 c7 :: next; push c9; goto t0 c9 ) return _ push r0; goto e0 c10 ) return _ push r0; goto td0 c11 next; goto c12 c12 true next; goto r0 false next; goto r0 next; goto r0 next; goto r0 next; goto r0 --> in (set-param .... ) c13 next; goto r0 c14 next; goto r0 c15 next; goto r0 next; goto r0 ... next; goto r0 --> treat all keywords as symbols here ) return c16 ) return next; goto c16 ( next; goto c17 c17 not next; goto c18 c18 next; goto c19 c19 ) next; goto c16 c20 ) return next; goto r0 td0 int return real return bool return return ( next; goto td1 td1 scalar next; goto td2 bitvector next; goto t4 tuple next; push t6; goto t0 -> next; push t6; push t0; goto t0 td2 next; goto td3 td3 ) return next; t0 int return real return bool return return ( next; goto t1 t1 bitvector next; goto t4 tuple next; push t6; goto t0 -> next; push t6; push t0; goto t0 t4 next; goto r0 t6 ) return _ push t6; goto t0 e0 true return false return return return return return return ( next; goto e1 e1 next; push e3; goto e0 update next; push e5; goto e0 forall next; goto e10 exists next; goto e10 lambda next; goto e10 let next; goto e15 _ push e3; push e0; goto e0 e3 ) return _ push e3; goto e0 e5 ( next; push e7; goto e0 e7 ) next; push r0; goto e0 _ push e7; goto e0 e10 ( next; goto e11 e11 next; goto e12 e12 :: next; push e14; goto t0 e14 ) next; push r0; goto e0 next; goto e12 e15 ( next; goto e16 e16 ( next; goto e17 e17 next; push e19; goto e0 e19 ) next; goto e20 e20 ( next; goto e17 ) next; push r0; goto e0 r0 ) return return means s := pop-state on top of stack if s != bottom next; goto s else exit