Skip to content

Instantly share code, notes, and snippets.

@jimsynz
Last active August 29, 2015 14:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jimsynz/debd983235429bc5885f to your computer and use it in GitHub Desktop.
Save jimsynz/debd983235429bc5885f to your computer and use it in GitHub Desktop.
source 'https://rubygems.org'
gem 'oedipus_lex'
gem 'rake'
gem 'rspec-core'
gem 'rspec-mocks'
gem 'rspec-expectations'
GEM
remote: https://rubygems.org/
specs:
diff-lcs (1.2.5)
oedipus_lex (2.3.1)
rake (10.3.2)
rspec-core (3.0.2)
rspec-support (~> 3.0.0)
rspec-expectations (3.0.2)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.0.0)
rspec-mocks (3.0.2)
rspec-support (~> 3.0.0)
rspec-support (3.0.2)
PLATFORMS
ruby
DEPENDENCIES
oedipus_lex
rake
rspec-core
rspec-expectations
rspec-mocks
require './lexer.rex'
class Lexer
attr_accessor :indent_level
def initialize str
super()
@indent_level = 0
parse str
end
def each
Enumerator.new do |y|
while token = next_token
y << token
end
end
end
def tokens
each.to_a
end
def in_or_out_dent text
text = text.gsub("\t", ' ').gsub("\n", '')
raise "Invalid indent level of #{text.size} spaces" unless text.size % 2 == 0
depth = text.size / 2
if depth > @indent_level
(depth - @indent_level).times.map { [ :INDENT, ' ' ] }
elsif depth < @indent_level
(@indent_level - depth).times.map { [ :OUTDENT, ' ' ] }
else
[]
end
end
def do_parse; end
end
class Lexer
macros
IDENTIFIER /[a-zA-Z_][a-zA-Z_0-9]*/
INT /(0|[1-9][0-9]*)/
rules
/'/ :SINGLE_TICK_STRING
:SINGLE_TICK_STRING /[^']+/ { [ :STRING, text ] }
:SINGLE_TICK_STRING /'/ nil
/"/ :DOUBLE_TICK_STRING
:DOUBLE_TICK_STRING /[^"]+/ { [ :STRING, text ] }
:DOUBLE_TICK_STRING /"/ nil
/def/ { [ :DEF, text ] }
/#{INT}\.[0-9]+/ { [ :FLOAT, text ] }
/0x[0-9a-fA-F]+/ { [ :INTEGER, text.to_i(16) ] }
/0b[01]+/ { [ :INTEGER, text.to_i(2) ] }
/#{INT}/ { [ :INTEGER, text.to_i ] }
/\s*(\#.*)/ { [ :COMMENT, text ] }
/:#{IDENTIFIER}/ { [ :SYMBOL, text ] }
/#{IDENTIFIER}\:/ { [ :SIGNATURE, text ] }
/#{IDENTIFIER}/ { [ :IDENTIFIER, text ] }
/\./ { [ :DOT, text ] }
/\:/ { [ :COLON, text ] }
/\=/ { [ :EQUAL, text ] }
/\+/ { [ :PLUS, text ] }
/\-/ { [ :MINUS, text ] }
/\*\*/ { [ :EXPO, text ] }
/\*/ { [ :ASTERISK, text ] }
/\// { [ :FWD_SLASH, text ] }
/%/ { [ :PERCENT, text ] }
/\(/ { [ :OPAREN, text ] }
/\)/ { [ :CPAREN, text ] }
/\n+[\ \t]+/ in_or_out_dent
/\s+/
# encoding: UTF-8
#--
# This file is automatically generated. Do not modify it.
# Generated by: oedipus_lex version 2.3.1.
# Source: lexer.rex
#++
class Lexer
require 'strscan'
IDENTIFIER = /[a-zA-Z_][a-zA-Z_0-9]*/
INT = /(0|[1-9][0-9]*)/
class ScanError < StandardError ; end
attr_accessor :lineno
attr_accessor :filename
attr_accessor :ss
attr_accessor :state
alias :match :ss
def matches
m = (1..9).map { |i| ss[i] }
m.pop until m[-1] or m.empty?
m
end
def action
yield
end
def scanner_class
StringScanner
end unless instance_methods(false).map(&:to_s).include?("scanner_class")
def parse str
self.ss = scanner_class.new str
self.lineno = 1
self.state ||= nil
do_parse
end
def parse_file path
self.filename = path
open path do |f|
parse f.read
end
end
def next_token
token = nil
until ss.eos? or token do
token =
case state
when nil then
case
when text = ss.scan(/'/) then
[:state, :SINGLE_TICK_STRING]
when text = ss.scan(/"/) then
[:state, :DOUBLE_TICK_STRING]
when text = ss.scan(/def/) then
action { [ :DEF, text ] }
when text = ss.scan(/#{INT}\.[0-9]+/) then
action { [ :FLOAT, text ] }
when text = ss.scan(/0x[0-9a-fA-F]+/) then
action { [ :INTEGER, text.to_i(16) ] }
when text = ss.scan(/0b[01]+/) then
action { [ :INTEGER, text.to_i(2) ] }
when text = ss.scan(/#{INT}/) then
action { [ :INTEGER, text.to_i ] }
when text = ss.scan(/\s*(\#.*)/) then
action { [ :COMMENT, text ] }
when text = ss.scan(/:#{IDENTIFIER}/) then
action { [ :SYMBOL, text ] }
when text = ss.scan(/#{IDENTIFIER}\:/) then
action { [ :SIGNATURE, text ] }
when text = ss.scan(/#{IDENTIFIER}/) then
action { [ :IDENTIFIER, text ] }
when text = ss.scan(/\./) then
action { [ :DOT, text ] }
when text = ss.scan(/\:/) then
action { [ :COLON, text ] }
when text = ss.scan(/\=/) then
action { [ :EQUAL, text ] }
when text = ss.scan(/\+/) then
action { [ :PLUS, text ] }
when text = ss.scan(/\-/) then
action { [ :MINUS, text ] }
when text = ss.scan(/\*\*/) then
action { [ :EXPO, text ] }
when text = ss.scan(/\*/) then
action { [ :ASTERISK, text ] }
when text = ss.scan(/\//) then
action { [ :FWD_SLASH, text ] }
when text = ss.scan(/%/) then
action { [ :PERCENT, text ] }
when text = ss.scan(/\(/) then
action { [ :OPAREN, text ] }
when text = ss.scan(/\)/) then
action { [ :CPAREN, text ] }
when text = ss.scan(/\n+[\ \t]+/) then
in_or_out_dent text
when text = ss.scan(/\s+/) then
# do nothing
else
text = ss.string[ss.pos .. -1]
raise ScanError, "can not match (#{state.inspect}): '#{text}'"
end
when :SINGLE_TICK_STRING then
case
when text = ss.scan(/[^']+/) then
action { [ :STRING, text ] }
when text = ss.scan(/'/) then
[:state, nil]
else
text = ss.string[ss.pos .. -1]
raise ScanError, "can not match (#{state.inspect}): '#{text}'"
end
when :DOUBLE_TICK_STRING then
case
when text = ss.scan(/[^"]+/) then
action { [ :STRING, text ] }
when text = ss.scan(/"/) then
[:state, nil]
else
text = ss.string[ss.pos .. -1]
raise ScanError, "can not match (#{state.inspect}): '#{text}'"
end
else
raise ScanError, "undefined state: '#{state}'"
end # token = case state
next unless token # allow functions to trigger redo w/ nil
end # while
raise "bad lexical result: #{token.inspect}" unless
token.nil? || (Array === token && token.size >= 2)
# auto-switch state
self.state = token.last if token && token.first == :state
token
end # def next_token
end # class
require './lexer'
RSpec.describe Lexer do
let(:results) { Lexer.new(source).tokens }
let(:tokens) { results.map(&:first) }
let(:values) { results.map(&:last) }
describe 'integers' do
%w| 0 1 123 |.each do |i|
describe i do
let(:source) { i }
it 'is an INTEGER' do
expect(tokens).to include :INTEGER
end
end
end
end
describe 'indent' do
[ "\n ", "\n\n ", "\n\t\t" ].each do |s|
describe s do
let(:source) { s }
it 'indents twice' do
expect(tokens.size).to eq 2
expect(tokens.first).to eq :INDENT
expect(tokens.last).to eq :INDENT
end
end
end
end
end
Rake.application.rake_require "oedipus_lex"
task :spec do
sh "rspec ./lexer_spec.rb"
end
task lexer: 'lexer.rex.rb'
task default: [:lexer, :spec]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment