Skip to content

Instantly share code, notes, and snippets.

@mzp
Created October 24, 2010 08:35
Show Gist options
  • Save mzp/643321 to your computer and use it in GitHub Desktop.
Save mzp/643321 to your computer and use it in GitHub Desktop.
informal ruby syntax definition from http://wiki.ruby-standard.org/wiki/Lexical_structure
(define-rule source-character (annot "any character in ISO/IEC 646"))
(define-rule line-terminator (seq (opt (ascii "0x0d")) (ascii "0x0a")))
(define-rule separator (or (lit ";") (annot "''line-terminator'' here")))
(define-rule single-variable-assignment-expression (seq (seq (seq variable (annot "no ''line-terminator'' here")) (lit "=")) operator-expression))
(define-rule whitespace (or (ascii "0x09") (or (ascii "0x0b") (or (ascii "0x0c") (or (ascii "0x0d") (or (ascii "0x20") (seq (seq (lit "\") (opt (ascii "0x0d"))) (ascii "0x0a"))))))))
(define-rule comment (or single-line-comment multi-line-comment))
(define-rule single-line-comment (seq (lit "#") (opt comment-content)))
(define-rule comment-content line-content)
(define-rule line-content (many-1 source-character))
(define-rule multi-line-comment (seq (seq multi-line-comment-begin-line (opt multi-line-comment-line)) multi-line-comment-end-line))
(define-rule multi-line-comment-begin-line (seq (seq (seq (annot "beginning of a line") (lit "=begin")) (opt rest-of-begin-end-line)) line-terminator))
(define-rule multi-line-comment-end-line (seq (seq (seq (annot "beginning of a line") (lit "=end")) (opt rest-of-begin-end-line)) (or line-terminator (annot "end of a program"))))
(define-rule rest-of-begin-end-line (seq (many-1 whitespace) comment-content))
(define-rule line (seq comment-content line-terminator))
(define-rule multi-line-comment-line (seq line 'but not))
(define-rule token (or reserved-word (or identifier (or punctuator (or operator literal)))))
(define-rule reserved-word (or (lit "__LINE__") (or (lit "__ENCODING__") (or (lit "__FILE__") (or (lit "BEGIN") (or (lit "END") (or (lit "alias") (or (lit "and") (or (lit "begin") (or (lit "break") (or (lit "case") (or (lit "class") (or (lit "def") (or (lit "defined?") (or (lit "do") (or (lit "else") (or (lit "elsif") (or (lit "end") (or (lit "ensure") (or (lit "for") (or (lit "false") (or (lit "if") (or (lit "in") (or (lit "module") (or (lit "next") (or (lit "nil") (or (lit "not") (or (lit "or") (or (lit "redo") (or (lit "rescue") (or (lit "retry") (or (lit "return") (or (lit "self") (or (lit "super") (or (lit "then") (or (lit "true") (or (lit "undef") (or (lit "unless") (or (lit "until") (or (lit "when") (or (lit "while") (lit "yield"))))))))))))))))))))))))))))))))))))))))))
(define-rule identifier (or local-variable-identifier (or global-variable-identifier (or class-variable-identifier (or instance-variable-identifier (or constant-identifier method-identifier))))))
(define-rule local-variable-identifier (seq (or lowercase-character (lit "_")) (many identifier-character)))
(define-rule global-variable-identifier (seq (seq (lit "$") identifier-start-character) (many identifier-character)))
(define-rule class-variable-identifier (seq (seq (lit "@@") identifier-start-character) (many identifier-character)))
(define-rule instance-variable-identifier (seq (seq (lit "@") identifier-start-character) (many identifier-character)))
(define-rule constant-identifier (seq uppercase-character (many identifier-character)))
(define-rule method-identifier (or method-only-identifier (or assignment-like-method-identifier (or constant-identifier local-variable-identifier))))
(define-rule method-only-identifier (seq (or constant-identifier local-variable-identifier) (or (lit "!") (lit "?"))))
(define-rule assignment-like-method-identifier (seq (or constant-identifier local-variable-identifier) (lit "=")))
(define-rule identifier-character (or lowercase-character (or uppercase-character (or decimal-digit (lit "_")))))
(define-rule identifier-start-character (or lowercase-character (or uppercase-character (lit "_"))))
(define-rule uppercase-character (or (lit "A") (or (lit "B") (or (lit "C") (or (lit "D") (or (lit "E") (or (lit "F") (or (lit "G") (or (lit "H") (or (lit "I") (or (lit "J") (or (lit "K") (or (lit "L") (or (lit "M") (or (lit "N") (or (lit "O") (or (lit "P") (or (lit "Q") (or (lit "R") (or (lit "S") (or (lit "T") (or (lit "U") (or (lit "V") (or (lit "W") (or (lit "X") (or (lit "Y") (lit "Z")))))))))))))))))))))))))))
(define-rule lowercase-character (or (lit "a") (or (lit "b") (or (lit "c") (or (lit "d") (or (lit "e") (or (lit "f") (or (lit "g") (or (lit "h") (or (lit "i") (or (lit "j") (or (lit "k") (or (lit "l") (or (lit "m") (or (lit "n") (or (lit "o") (or (lit "p") (or (lit "q") (or (lit "r") (or (lit "s") (or (lit "t") (or (lit "u") (or (lit "v") (or (lit "w") (or (lit "x") (or (lit "y") (lit "z")))))))))))))))))))))))))))
(define-rule decimal-digit (or (lit "0") (or (lit "1") (or (lit "2") (or (lit "3") (or (lit "4") (or (lit "5") (or (lit "6") (or (lit "7") (or (lit "8") (lit "9")))))))))))
(define-rule punctuator (or (lit "[") (or (lit "]") (or (lit "(") (or (lit ")") (or (lit "{") (or (lit "}") (or (lit "::") (or (lit ",") (or (lit ";") (or (lit "..") (or (lit "...") (or (lit "?") (or (lit ":") (lit "=>")))))))))))))))
(define-rule operator (or operator-method-name assignment-operator))
(define-rule operator-method-name (or (lit "^") (or (lit "&") (or (lit "|") (or (lit "<=>") (or (lit "==") (or (lit "===") (or (lit "!~") (or (lit "=~") (or (lit ">") (or (lit ">=") (or (lit "<") (or (lit "<=") (or (lit "<<") (or (lit ">>") (or (lit "+") (or (lit "-") (or (lit "*") (or (lit "/") (or (lit "%") (or (lit "**") (or (lit "~") (or (lit "+@") (or (lit "-@") (or (lit "[]") (or (lit "[]=") (lit "`")))))))))))))))))))))))))))
(define-rule assignment-operator (seq assignment-operator-name (lit "=")))
(define-rule assignment-operator-name (or (lit "+") (or (lit "-") (or (lit "*") (or (lit "**") (or (lit "/") (or (lit "^") (or (lit "%") (or (lit "<<") (or (lit ">>") (or (lit "&") (or (lit "&&") (or (lit "||") (lit "|"))))))))))))))
(define-rule literal (or numeric-literal (or string-literal (or array-literal (or regular-expression-literal symbol)))))
(define-rule numeric-literal (or signed-number unsigned-number))
(define-rule unsigned-number (or integer-literal float-literal))
(define-rule integer-literal (or decimal-integer-literal (or binary-integer-literal (or octal-integer-literal hexadecimal-integer-literal))))
(define-rule decimal-integer-literal (or digit-decimal-integer-literal prefixed-decimal-integer-literal))
(define-rule digit-decimal-integer-literal (or (lit "0") (seq decimal-digit-without-zero (seq (opt (lit "_")) decimal-digit))))
(define-rule prefixed-decimal-integer-literal (seq (seq (lit "0") (or (lit "d") (lit "D"))) digit-decimal-part))
(define-rule digit-decimal-part (seq (lit "decimal-digit") (seq (opt (lit "_")) decimal-digit)))
(define-rule binary-integer-literal (seq (seq (seq (lit "0") (or (lit "b") (lit "B"))) binary-digit) (seq (opt (lit "_")) binary-digit)))
(define-rule octal-integer-literal (seq (lit "0") (or (lit "_") (or (lit "o") (lit "O")))))
(define-rule hexadecimal-integer-literal (seq (seq (seq (lit "0") (or (lit "x") (lit "X"))) hexadecimal-digit) (seq (opt (lit "_")) hexadecimal-digit)))
(define-rule float-literal (or decimal-float-literal exponent-float-literal))
(define-rule decimal-float-literal (seq (seq digit-decimal-integer-literal (lit ".")) digit-decimal-part))
(define-rule exponent-float-literal (seq base-part exponent-part))
(define-rule base-part (or decimal-float-literal digit-decimal-integer-literal))
(define-rule exponent-part (seq (or (lit "e") (lit "E")) (or (lit "+") (lit "-"))))
(define-rule signed-number (seq (or (lit "+") (lit "-")) unsigned-number))
(define-rule decimal-digit-without-zero (or (lit "1") (or (lit "2") (or (lit "3") (or (lit "4") (or (lit "5") (or (lit "6") (or (lit "7") (or (lit "8") (lit "9"))))))))))
(define-rule octal-digit (or (lit "0") (or (lit "1") (or (lit "2") (or (lit "3") (or (lit "4") (or (lit "5") (or (lit "6") (lit "7")))))))))
(define-rule binary-digit (or (lit "0") (lit "1")))
(define-rule hexadecimal-digit (or decimal-digit (or (lit "a") (or (lit "b") (or (lit "c") (or (lit "d") (or (lit "e") (or (lit "f") (or (lit "A") (or (lit "B") (or (lit "C") (or (lit "D") (or (lit "E") (lit "F"))))))))))))))
(define-rule string-literal (or single-quoted-string (or double-quoted-string (or quoted-non-expanded-literal-string (or quoted-expanded-literal-string (or here-document external-command-execution))))))
(define-rule single-quoted-string (seq (seq (lit "'") (many single-quoted-string-character)) (lit "'")))
(define-rule single-quoted-string-character (or non-escaped-single-quoted-string-character single-quoted-escape-sequence))
(define-rule single-quoted-escape-sequence (or single-escape-character-sequence non-escaped-single-quoted-string-character-sequence))
(define-rule single-escape-character-sequence (seq (lit "\") single-escaped-character))
(define-rule non-escaped-single-quoted-string-character-sequence (seq (lit "\") non-escaped-single-quoted-string-character))
(define-rule single-escaped-character (or (lit "'") (lit "\")))
(define-rule non-escaped-single-quoted-string-character (seq source-character 'but not))
(define-rule double-quoted-string (seq (seq (lit """) (many double-quoted-string-character)) (lit """)))
(define-rule double-quoted-string-character (seq source-character 'but not))
(define-rule double-escape-sequence (or simple-escape-sequence (or non-escaped-sequence (or line-terminator-escape-sequence (or octal-escape-sequence (or hex-escape-sequence control-escape-sequence))))))
(define-rule simple-escape-sequence (seq (lit "\") double-escaped-character))
(define-rule non-escaped-sequence (seq (lit "\") non-escaped-double-quoted-string-character))
(define-rule line-terminator-escape-sequence (seq (lit "\") line-terminator))
(define-rule non-escaped-double-quoted-string-character (seq source-character 'but not))
(define-rule double-escaped-character (or (lit "\") (or (lit "n") (or (lit "t") (or (lit "r") (or (lit "f") (or (lit "v") (or (lit "a") (or (lit "e") (or (lit "b") (lit "s")))))))))))
(define-rule octal-escape-sequence (seq (seq (lit "\") octal-digit) (seq octal-digit (opt octal-digit))))
(define-rule hex-escape-sequence (seq (seq (seq (lit "\") (lit "x")) hexadecimal-digit) (opt hexadecimal-digit)))
(define-rule control-escape-sequence (seq (seq (lit "\") (or (seq (lit "C") (lit "-")) (lit "c"))) control-escaped-character))
(define-rule control-escaped-character (or double-escape-sequence (or (lit "?") (seq source-character 'but not))))
(define-rule interpolated-character-sequence (or (seq (lit "#") global-variable-identifier) (or (seq (lit "#") class-variable-identifier) (or (seq (lit "#") instance-variable-identifier) (seq (seq (seq (lit "#") (lit "{")) compound-statement) (lit "}"))))))
(define-rule quoted-non-expanded-literal-string (seq (seq (seq (lit "%q") literal-beginning-delimiter) (many non-expanded-literal-string)) literal-ending-delimiter))
(define-rule non-expanded-literal-string (or non-expanded-literal-character non-expanded-delimited-string))
(define-rule non-expanded-delimited-string (seq (seq literal-beginning-delimiter (many non-expanded-literal-string)) literal-ending-delimiter))
(define-rule non-expanded-literal-character (or non-escaped-literal-character non-expanded-literal-escape-sequence))
(define-rule non-escaped-literal-character (seq source-character 'but not))
(define-rule non-expanded-literal-escape-sequence (or non-expanded-literal-escape-character-sequence non-escaped-non-expanded-literal-character-sequence))
(define-rule non-expanded-literal-escape-character-sequence (seq (lit "\") non-expanded-literal-escaped-character))
(define-rule non-expanded-literal-escaped-character (or literal-beginning-delimiter (or literal-ending-delimiter (lit "\"))))
(define-rule quoted-literal-escape-character non-expanded-literal-escaped-character)
(define-rule non-escaped-non-expanded-literal-character-sequence (seq (lit "\") non-escaped-non-expanded-literal-character))
(define-rule non-escaped-non-expanded-literal-character (seq source-character 'but not))
(define-rule quoted-expanded-literal-string (seq (seq (seq (seq (lit "%") (opt (lit "Q"))) literal-beginning-delimiter) (many expanded-literal-string)) literal-ending-delimiter))
(define-rule expanded-literal-string (or expanded-literal-character expanded-delimited-string))
(define-rule expanded-literal-character (or non-escaped-literal-character (or double-escape-sequence interpolated-character-sequence)))
(define-rule expanded-delimited-string (seq (seq literal-beginning-delimiter (many expanded-literal-string)) literal-ending-delimiter))
(define-rule literal-beginning-delimiter (seq source-character 'but not))
(define-rule alpha-numeric-character-or-separator (or whitespace (or line-terminator (or uppercase-character (or lowercase-character decimal-digit)))))
(define-rule literal-ending-delimiter (annot "depending on the ''literal-beginning-delimiter''"))
(define-rule matching-literal-beginning-delimiter (or (lit "(") (or (lit "{") (or (lit "<") (lit "[")))))
(define-rule here-document (seq (seq heredoc-start-line heredoc-body) heredoc-end-line))
(define-rule heredoc-start-line (seq heredoc-signifier rest-of-line))
(define-rule heredoc-signifier (seq (lit "<<") heredoc-delimiter-specifier))
(define-rule rest-of-line (seq (opt line-content) line-terminator))
(define-rule heredoc-body (many heredoc-body-line))
(define-rule heredoc-body-line (seq line 'but not))
(define-rule heredoc-delimiter-specifier (seq (opt (lit "-")) heredoc-delimiter))
(define-rule heredoc-delimiter (or non-quoted-delimiter (or single-quoted-delimiter (or double-quoted-delimiter command-quoted-delimiter))))
(define-rule non-quoted-delimiter non-quoted-delimiter-identifier)
(define-rule non-quoted-delimiter-identifier (many identifier-character))
(define-rule single-quoted-delimiter (seq (seq (lit "'") (many single-quoted-delimiter-identifier)) (lit "'")))
(define-rule single-quoted-delimiter-identifier (seq source-character 'but not))
(define-rule double-quoted-delimiter (seq (seq (lit """) (many double-quoted-delimiter-identifier)) (lit """)))
(define-rule double-quoted-delimiter-identifier (seq source-character 'but not))
(define-rule command-quoted-delimiter (seq (seq (lit "`") (many command-quoted-delimiter-identifier)) (lit "`")))
(define-rule command-quoted-delimiter-identifier (seq source-character 'but not))
(define-rule heredoc-end-line (or indented-heredoc-end-line non-indented-heredoc-end-line))
(define-rule indented-heredoc-end-line (seq (seq (seq (annot "beginning of a line") (many whitespace)) heredoc-delimiter-identifier) line-terminator))
(define-rule non-indented-heredoc-end-line (seq (seq (annot "beginning of a line") heredoc-delimiter-identifier) line-terminator))
(define-rule heredoc-delimiter-identifier (or non-quoted-delimiter-identifier (or single-quoted-delimiter-identifier (or double-quoted-delimiter-identifier command-quoted-delimiter-identifier))))
(define-rule external-command-execution (or backquoted-external-command-execution quoted-external-command-execution))
(define-rule backquoted-external-command-execution (seq (seq (lit "`") (many double-quoted-string-character)) `))
(define-rule quoted-external-command-execution (seq (seq (seq (lit "%x") literal-beginning-delimiter) (many expanded-literal-string)) literal-ending-delimiter))
(define-rule array-literal (or quoted-non-expanded-array-constructor quoted-expanded-array-constructor))
(define-rule quoted-non-expanded-array-constructor (seq (seq (seq (lit "%w") literal-beginning-delimiter) non-expanded-array-content) literal-ending-delimiter))
(define-rule non-expanded-array-content (seq (seq (opt quoted-array-item-separator-list) (opt non-expanded-array-item-list)) (opt quoted-array-item-separator-list)))
(define-rule non-expanded-array-item-list (seq non-expanded-array-item (seq quoted-array-item-separator-list non-expanded-array-item)))
(define-rule quoted-array-item-separator-list (many-1 quoted-array-item-separator))
(define-rule quoted-array-item-separator (or whitespace line-terminator))
(define-rule non-expanded-array-item (many-1 non-expanded-array-item-character))
(define-rule non-expanded-array-item-character (or non-escaped-array-item-character non-expanded-array-escape-sequence))
(define-rule non-escaped-array-item-character (or non-escaped-array-character matching-literal-delimiter))
(define-rule non-escaped-array-character (seq non-escaped-literal-character 'but not))
(define-rule matching-literal-delimiter (or (lit "(") (or (lit "{") (or (lit "<") (or (lit "[") (or (lit ")") (or (lit "}") (or (lit ">") (lit "]")))))))))
(define-rule non-expanded-array-escape-sequence (seq non-expanded-literal-escape-sequence 'but not))
(define-rule escaped-quoted-array-item-separator (seq (lit "\") quoted-array-item-separator))
(define-rule quoted-expanded-array-constructor (seq (seq (seq (lit "%W") literal-beginning-delimiter) expanded-array-content) literal-ending-delimiter))
(define-rule expanded-array-content (seq (seq (opt quoted-array-item-separator-list) (opt expanded-array-item-list)) (opt quoted-array-item-separator-list)))
(define-rule expanded-array-item-list (seq expanded-array-item quoted-array-item-separator-list expanded-array-item))
(define-rule expanded-array-item (many-1 expanded-array-item-character))
(define-rule expanded-array-item-character (or non-escaped-array-item-character (or expanded-array-escape-sequence interpolated-character-sequence)))
(define-rule expanded-array-escape-sequence (seq double-escape-sequence 'but not))
(define-rule regular-expression-literal (or (seq (seq (seq (lit "/") regular-expression-body) (lit "/")) (many regular-expression-option)) (seq (seq (seq (seq (lit "%r") literal-beginning-delimiter) (many expanded-literal-string)) literal-ending-delimiter) (many regular-expression-option))))
(define-rule regular-expression-body (many regular-expression-character))
(define-rule regular-expression-character (seq source-character 'but not))
(define-rule regular-expression-option (or (lit "i") (lit "m")))
(define-rule symbol (or symbol-literal dynamic-symbol))
(define-rule symbol-literal (seq (lit ":") symbol-name))
(define-rule dynamic-symbol (or (seq (lit ":") single-quoted-string) (or (seq (lit ":") double-quoted-string) (seq (seq (seq (lit "%s") literal-beginning-delimiter) (many non-expanded-literal-string)) literal-ending-delimiter))))
(define-rule symbol-name (or method-identifier (or operator-method-name (or reserved-word (or instance-variable-identifier (or global-variable-identifier class-variable-identifier))))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment