peteroupc/abnfreader.rb

## abnfreader.rb
class TokenSymbolParser
  def initialize(token,prod)
    @token=token
    @prod=prod
  end
  def parse(str)
    return _parseOne(@token,str,0,str.length)
  end
  private
  def _parseOne(token,str,index,endIndex)
    minOccurs=[token.minOccurs,0].max
    maxOccurs=token.maxOccurs
    count=0
    while maxOccurs<0 || count<maxOccurs
         idx=_parseOneInst(token,str,index,endIndex)
                           if idx<0
           return -1 if count<minOccurs
           return index
         end
         count+=1
         index=idx
    end
    return index
  end
  def _parseOneInst(token,str,index,endIndex)
    case token.kind
      when :HEX
             return index<endIndex && str[index].ord==token.value ? index+1 : -1
      when :HEXRANGE
                if index<endIndex && str[index].ord>=token.value[0] && str[index].ord<=token.value[1]
            return index+1
          end
          return -1
            when :WORD
             return _parseOne(@prod[token.value],str,index,endIndex)
      when :STRING
             token.value.each_char{|x|
                   return -1 if index>=endIndex
                iord=str[index].ord
          xord=x[0].ord
          index+=1
                if (xord>=0x41 && xord<=0x5a)
                  return -1 if iord!=xord && iord!=xord+0x20
                elsif (xord>=0x61 && xord<=0x7a)
                  return -1 if iord!=xord && iord!=xord-0x20
          else
                  return -1 if iord!=xord
          end
             }
             return index
      when :SEQUENCE
           token.each{|tok|
              idx=_parseOne(tok,str,index,endIndex)
        return -1 if idx<0
        index=idx
           }
           return index
      when :CHOICE
           token.each{|tok|
              idx=_parseOne(tok,str,index,endIndex)
              return idx if idx>=0
           }
           return -1
    end
  end
end

#
# Represents a single node in an ABNF grammar.
#
class TokenSymbol
  include Enumerable
  attr_accessor :kind
  attr_accessor :value
  attr_reader :tokens
  attr_accessor :parent
  attr_accessor :minOccurs
  attr_accessor :name
  attr_accessor :maxOccurs
  def initialize(kind=nil,value=nil)
    @tokens=[]
    @kind=kind
    @value=value
    @minOccurs=1
    @maxOccurs=1
  end
  def each
    @tokens.each {|item| yield item }
  end
  def isDelegate?
    return true if self.kind==:SEQUENCE &&
      self.minOccurs==1 && self.maxOccurs==1 &&
      @tokens.length==1 && @tokens[0].kind==:WORD &&
      @tokens[0].minOccurs==1 && @tokens[0].maxOccurs==1
    return true if self.kind==:WORD &&
      self.minOccurs==1 && self.maxOccurs==1
    return false
  end
  def selfOrLast
    if @kind==:CHOICE
      return @tokens[@tokens.length-1]
    else
      return self
    end
  end
  def length
    return @tokens.length
  end
  def [](idx)
    return @tokens[idx]
  end
  def minOccurs=(val)
    val=-1 if val<=0
    @minOccurs=val
  end
  def maxOccurs=(val)
    val=-1 if val<=0
    @maxOccurs=val
  end
  def setMinMax(mn,mx)
    self.minOccurs=mn
    self.maxOccurs=mx
    return self
  end
  def clear
    @tokens.clear
  end
  def copy
    ret=TokenSymbol.new(@kind,@value)
    ret.minOccurs=@minOccurs
    ret.maxOccurs=@maxOccurs
    for tok in @tokens
     ret.addRaw(tok)
    end
    return ret
  end
  def add(tok)
    sym=TokenSymbol.new(tok[0],tok[1])
    sym.parent=self
    @tokens.push(sym)
    return sym
  end
  def addChild(child)
    child.parent=self
    @tokens.push(child)
    return child
  end
  def addRaw(child)
    @tokens.push(child)
    return child
  end
  def insertChild(child,index)
    child.parent=self
    @tokens.insert(index,child)
    return child
  end
  def inspectSimple
    return "#{kind} #{value||name} #{minOccurs} #{maxOccurs}"
  end
  def inspectDepth(depth)
    ret=""
    for i in 0...depth
      ret+="  "
    end
    ret+="#{kind} #{value||name} #{minOccurs} #{maxOccurs}\n"
    for tok in @tokens
      ret+=(tok ? tok.inspectDepth(depth+1) : ("  "*(depth+1))+"?\n")
    end
    return ret
  end
  def toNamedStr
    ret=to_s
    ret2=ret.gsub(/^\(|\)$/,"")
    ret=ret2 if !ret2.index("(") && !ret2.index(")")
    return ret if !name
    return name+" = "+ret
  end
  def minLength
    return 0 if self.minOccurs<=0
    case @kind
      when :STRING, :HEX, :HEXRANGE
        return self.hexLength*self.minOccurs
      when :SEQUENCE
        if @tokens.length==1
          return @tokens[0].minLength*self.minOccurs
        end
        return self.choiceLength
      when :CHOICE
        if @tokens.length==1
          return @tokens[0].minLength*self.minOccurs
        end
        return self.choiceLength
      when :WORD
        raise "Not supported"
    end
    return 0
  end
  def to_s
    extra=""
    ret=""
    optional=false
    if minOccurs<=0
      if maxOccurs==1
        optional=true
      else
        extra="*"+(maxOccurs<0 ? "" : maxOccurs.to_s)
      end
    elsif maxOccurs<=0
      extra=minOccurs.to_s+"*"
    elsif minOccurs==0 && maxOccurs==1
      optional=true
    elsif minOccurs!=maxOccurs
      extra=minOccurs.to_s+"*"+maxOccurs.to_s
    elsif minOccurs!=1
      extra=minOccurs.to_s
    end
    case @kind
      when :STRING
        if value[/[\r\n]/]
          rr=[]
          for b in value.bytes
           rr.push(sprintf("%%x%02x",b))
          end
          return rr.join(" ")
        else
         data=value.split("\"")
         for i in 0...data.length
          data[i]="\""+data[i]+"\""
         end
         ret=data.join(" %x22 ")
        end
      when :WORD
        ret=@value
      when :HEXRANGE
        ret=sprintf("%%x%02x-%02x",@value[0],@value[1])
      when :HEX
        ret=sprintf("%%x%02x",@value)
      when :SEQUENCE
        ret=((optional || @tokens.length==1) ? "" : "(")+@tokens.each{|t| t.to_s }.join(" ")+
            ((optional || @tokens.length==1) ? "" : ")")
      when :CHOICE
        ret=((optional || @tokens.length==1) ? "" : "(")+@tokens.each{|t| t.to_s }.join(" / ")+
            ((optional || @tokens.length==1) ? "" : ")")
    end
    if optional
      return "["+ret+"]"
    else
      return extra+ret
    end
  end
  private
  def _appendUtf8(str,codepoint)
    return str if codepoint<0
     if codepoint<=0x7F
      str+=codepoint.chr
     elsif codepoint<=0x7FF
      str+=(0xC0|((codepoint>>6)&0x1F)).chr
      str+=(0x80|(codepoint   &0x3F)).chr
     elsif codepoint<=0xFFFF
      str+=(0xE0|((codepoint>>12)&0x0F)).chr
      str+=(0x80|((codepoint>>6)&0x3F)).chr
      str+=(0x80|(codepoint   &0x3F)).chr
     elsif codepoint<=0x10FFFF
      str+=(0xF0|((codepoint>>18)&0x07)).chr
      str+=(0x80|((codepoint>>12)&0x3F)).chr
      str+=(0x80|((codepoint>>6)&0x3F)).chr
      str+=(0x80|(codepoint   &0x3F)).chr
     end
     return str
  end
  public
  # Length of a string token
  def stringLength
    return (self.kind==:STRING) ? self.value.length : 0
  end
  # Minimum length of a hex token
  def hexLength
    return self.value.length if self.kind==:STRING
    return self.minOccurs if (self.kind==:HEX || self.kind==:HEXRANGE)
    return 0
  end
  # Recursively calculates the minimum length of a choice token
  def choiceLength
   if self.kind==:CHOICE
     return self.map{|tok| tok.hexLength()*self.minOccurs }.min
   end
   if self.kind==:SEQUENCE && self.length==2
     h=self[0].hexLength()
     if self[1].kind==:HEX || self[1].kind==:HEXRANGE || self[1].kind==:STRING
       h+=self[1].hexLength()
     end
     if self[1].kind==:CHOICE
       h+=self[1].choiceLength()
     end
     return h*self.minOccurs
    end
   return 0
  end
  def isNonWhitespace?
    if self.kind==:HEX
      return @value!=0x20 && @value!=0x09 &&
         @value!=0x0a && @value!=0x0d
   elsif self.kind==:STRING
      for b in @value.bytes
  return true if b!=0x20 && b!=0x09 &&
            b!=0x0a && b!=0x0d
      end
      return false
   elsif self.kind==:HEXRANGE
      return true if @value[0]>0x20
      chars=[]
      c=@value[0];while c<=[0x21,@value[1]].min
        chars.push(c)
      c+=1;end
      chars.delete(0x20)
      chars.delete(0x09)
      chars.delete(0x0a)
      chars.delete(0x0d)
      return chars.length>0
   end
   return false
  end
  def productionContainsWhitespaceOnly?(prod)
    s=self
    while s.kind==:WORD
     raise "Can't find production '#{s.value}'" if !prod[s.value]
      s=prod[s.value]
    end
    if s.kind==:CHOICE || s.kind==:SEQUENCE
    s.each{|t|
    return false if !t.productionContainsWhitespaceOnly?(prod)
        }
        return true
    end
    return !s.isNonWhitespace?
  end
  def isMatch?(str,productions)
    parser=TokenSymbolParser.new(self,productions)
    return parser.parse(str)==str.length
  end
  #
  # Generates a random UTF-8 string that fits this node's grammar.
  # The 'productions' parameter is a hash of productions (nodes) to be used if
  # this node refers to another ABNF production (node) by name, either
  # directly or indirectly.
  #
  def randomString(productions, stack=nil)
    isws=self.productionContainsWhitespaceOnly?(productions)
    mn=@minOccurs<=0 ? 0 : @minOccurs
    mx=@maxOccurs<0 ?  ((1+rand(12))+(1+rand(12)))/2 : @maxOccurs
    if mn<=0 && mx==1 && rand(100)<(isws ? 90 : 10)
          # Optional production
          return ""
    end
    if mn<=0 && mx > 0
       mn=1
    end
    stack=[] if !stack
    occur=(mx==mn) ? mn : mn+rand(mx-mn+1)
    if isws
      occur/=2
      occur=mn if occur<mn
    end
    str=""
    if mn==0 && isws && rand(100)<80
      occur=0
    end
    if mn==1 && isws && rand(100)<80
      occur=0
    end
    for i in 0...occur
      case @kind
  when :STRING
    for b in self.value.bytes
      if (b>=0x41 && b<=0x5a) || (b>=0x61 && b<=0x7a)
      newletter=rand(2)==0 ? (b|0x20)  : (b&0xDF)
      raise if !((newletter>=0x41 && newletter<=0x5a) ||
            (newletter>=0x61 && newletter<=0x7a))
      str=_appendUtf8(str,newletter);
      else
      str=_appendUtf8(str,b)
    end
    end
  when :WORD
     stack.push(@value)
     pr=productions[@value]
     raise "Can't find production '#{@value}'" if !pr
     str+=pr.randomString(productions, stack)
     stack.pop
  when :HEX
    str=_appendUtf8(str,@value)
  when :HEXRANGE
    includesUpperLowerA=@value[0]<=0x41 && @value[1]>=0x41 &&
                 @value[0]<=0x61 && @value[1]>=0x61
    rnd=0
    while true
      rnd=@value[0]==@value[1] ? @value[0] :
           @value[0]+rand(@value[1]-@value[0]+1)
       if includesUpperLowerA && !(rnd>=0x41 && rnd<=0x5a) &&
                         !(rnd>=0x61 && rnd<=0x7a) &&
            !(rnd>=0x30 && rnd<=0x39) &&
           rand(100)<90
         next
       end
       if rnd<0x20 && rand(100)<90
         next
  end
       break
    end
    str+=_appendUtf8(str,rnd)
  when :SEQUENCE
     self.each{|t|
       str+=t.randomString(productions, stack)
     }
  when :CHOICE
     while true
         t=@tokens[rand(@tokens.length)]
        if t.productionContainsWhitespaceOnly?(productions) &&
          rand(100)<95
          next
       end
       if t.kind==:WORD
          recursing=stack.include?(t.value)
          if recursing && (stack.length>50 || rand(100)<90)
         next
       end
       end
         str+=t.randomString(productions, stack)
       break
     end
     end
   end
   return str
  end
  def allHexChoice
    return false if @kind!=:CHOICE
    for tok in @tokens
      return false if tok.kind!=:HEX && tok.kind!=:HEXRANGE
    end
    return true
  end
  def isAsciiLetter(x)
   return (x>=0x41 && x<=0x5a) || (x>=0x61 && x<=0x7a)
  end
  def allHexChoiceRecursiveAnyOccurs
    return false if @kind!=:CHOICE
    for tok in @tokens
      return false if tok.kind!=:HEX && tok.kind!=:HEXRANGE && !tok.allHexChoiceRecursiveAnyOccurs()
    end
    return true
  end
  def _hexChoiceTokens(hextokens)
    raise "Internal error" if @kind!=:CHOICE
    for tok in @tokens
      raise "Internal error" if tok.minOccurs!=1 || tok.maxOccurs!=1
      if tok.kind==:CHOICE
         tok._hexChoiceTokens(hextokens)
      elsif tok.kind==:HEX || tok.kind==:HEXRANGE
         hextokens.push(tok)
      else
         raise "Internal error"
      end
    end
  end
  def allHexChoiceRecursive
    return false if @kind!=:CHOICE
    for tok in @tokens
      return false if tok.minOccurs!=1 || tok.maxOccurs!=1
      return false if tok.kind!=:HEX && tok.kind!=:HEXRANGE && !tok.allHexChoiceRecursive()
    end
    return true
  end
  # Simplifies an ABNF tree
  def simplify
    return false if @kind==:HEX ||@kind==:HEXRANGE
    changed=true
    iter=0
    while changed
     changed=false
     iter+=1; raise self.inspect if iter>10
     if @kind==:SEQUENCE && @tokens.length>1
       i=0;while i<@tokens.length
         tok=@tokens[i]
         if tok.kind==:SEQUENCE && tok.minOccurs==1 && tok.maxOccurs==1
           newtokens=@tokens[0,i]
           endtokens=@tokens[i+1,@tokens.length]
           self.clear
           for t in newtokens;self.addChild(t);end
           for j in 0...tok.length; self.addChild(tok[j]); end
           for t in endtokens;self.addChild(t);end
           i-=1
           changed=true
         end
         i+=1
       end
     end
     if @kind==:SEQUENCE && @tokens.length==1
      tok=@tokens[0]
      if (tok.kind==:WORD || tok.kind==:STRING || tok.kind==:HEX || tok.kind==:HEXRANGE) &&
              ((tok.minOccurs==1 && tok.maxOccurs==1) || (self.minOccurs==1 && self.maxOccurs==1))
         self.kind=tok.kind
         self.value=tok.value
         if (self.minOccurs==1 && self.maxOccurs==1)
          self.minOccurs=tok.minOccurs
          self.maxOccurs=tok.maxOccurs
         end
         self.name=tok.name
         self.clear
         changed=true
      elsif (tok.kind==:CHOICE || tok.kind==:SEQUENCE) &&
        ((tok.minOccurs==1 && tok.maxOccurs==1) || (self.minOccurs==1 && self.maxOccurs==1))
        self.clear
        for i in 0...tok.length
          self.addChild(tok[i])
        end
        self.kind=tok.kind
        self.value=tok.value
        if (self.minOccurs==1 && self.maxOccurs==1)
         self.minOccurs=tok.minOccurs
         self.maxOccurs=tok.maxOccurs
        end
        self.name=tok.name
         changed=true
      elsif (tok.kind==:CHOICE || tok.kind==:SEQUENCE) &&
        self.minOccurs==0 && self.maxOccurs==1 && tok.minOccurs==1 && tok.maxOccurs==-1
        self.clear
        for i in 0...tok.length
          self.addChild(tok[i])
        end
        self.kind=tok.kind
        self.value=tok.value
        self.minOccurs=0
        self.maxOccurs=-1
        self.name=tok.name
         changed=true
      end
    end
    if @kind==:CHOICE
      done=false
      while !done
      done=true
      # isHexRangeRec = Choice of hex values and/or ranges of hex
      # values, each of which can occur only once
      isHexRangeRec = self.allHexChoiceRecursive()
      if isHexRangeRec
        hextokens=[]
        self._hexChoiceTokens(hextokens)
        @tokens=hextokens
      end
      i=0;while i<@tokens.length
        tok=@tokens[i]
        if !tok
             i+=1; next
        end
        if (tok.kind==:HEX || tok.kind==:HEXRANGE) && !isHexRangeRec
           # Simplify runs of hex/hexrange
           runsize=1
           tokenend=@tokens.length
           j=i+1;while j<@tokens.length
              next if @tokens[j]==nil
              if (@tokens[j].kind!=:HEX && @tokens[j].kind!=:HEXRANGE)
                  tokenend=j;break
              end
              runsize+=1
           j+=1;end
           if runsize>1 && runsize<@tokens.length
              tempchoice=TokenSymbol.new(:CHOICE)
              for k in 0...runsize
                tempchoice.addRaw(@tokens[i+k])
              end
              tempchoice.simplify
              newrunsize=runsize
              if tempchoice.kind==:CHOICE && tempchoice.tokens.length<runsize
                for k in 0...tempchoice.tokens.length
                    @tokens[i+k]=tempchoice[k]
                end
                newrunsize=tempchoice.tokens.length; done=false; changed=true
                for k in (i+newrunsize)...tokenend
                   @tokens[k]=nil
                end
              elsif tempchoice.kind==:HEX || tempchoice.kind==:HEXRANGE
                @tokens[i]=tempchoice
                newrunsize=1; done=false; changed=true
                for k in (i+newrunsize)...tokenend
                   @tokens[k]=nil
                end
              end
              tok=@tokens[i]
           end
           i=tokenend
        else
           i+=1
        end
      end
      @tokens.compact!
      i=0;while i<@tokens.length
        tok=@tokens[i]
        if !isHexRangeRec && !(tok.kind==:HEX || tok.kind==:HEXRANGE)
           # don't combine past non-hex/hexrange
           break
        end
        if (tok.kind==:HEX || tok.kind==:HEXRANGE)
          found=false
          tokMin=(tok.kind==:HEX) ? tok.value : tok.value[0]
          tokMax=(tok.kind==:HEX) ? tok.value : tok.value[1]
          j=i+1;while j<@tokens.length
           tok2=@tokens[j]
           j+=1
           next if !tok2 || tok==tok2
           if !isHexRangeRec && !(tok.kind==:HEX || tok.kind==:HEXRANGE)
               # don't combine past non-hex/hexrange
               break
           end
           if tok2.kind==:HEXRANGE && (
                   (isHexRangeRec && tok2.minOccurs==tok.minOccurs && tok2.maxOccurs==tok.maxOccurs) ||
                   (tok2.minOccurs==1 && tok.minOccurs==1 && tok2.maxOccurs==1 && tok.maxOccurs==1))
             if tok2.value[0]==tokMax+1
               tok2.value[0]=tokMin
               tok2.name=nil
               changed=true
               found=true; break
             elsif tok2.value[1]==tokMin-1
               tok2.value[1]=tokMax
               tok2.name=nil
               changed=true
               found=true; break
             end
           elsif tok2.kind==:HEX && (
                   (isHexRangeRec && tok2.minOccurs==tok.minOccurs &&
                tok2.maxOccurs==tok.maxOccurs) ||
                   (tok2.minOccurs==1 && tok.minOccurs==1 &&
               tok2.maxOccurs==1 && tok.maxOccurs==1))
             if tok2.value==tokMax+1
               tok2.kind=:HEXRANGE
               tok2.value=[tokMin,tok2.value]
               tok2.name=nil
               changed=true
               found=true; break
             elsif tok2.value==tokMin-1
               tok2.kind=:HEXRANGE
               tok2.value=[tok2.value,tokMax]
               changed=true
               tok2.name=nil
               found=true; break
             end
           end
          end
          if found
           @tokens.delete_at(i)
           i-=1
           done=false
          end
        end
        i+=1
      end
      end
    end
    if @kind==:CHOICE && ((self.minOccurs==1 && self.maxOccurs==1) ||
           self.allHexChoiceRecursive())
      done=false
      while !done
      done=true
      i=0;while i<@tokens.length
        tok=@tokens[i]
        if tok.kind==:CHOICE && tok.length>1 && tok.minOccurs==1 &&
            tok.maxOccurs==1
          oldPosition=i
          @tokens.delete_at(oldPosition)
          i-=1
          changed=true
          done=false
          for j in 0...tok.length
            self.insertChild(tok[tok.length-1-j],oldPosition)
          end
        end
        i+=1
      end
      end
    end
    if @kind==:STRING && @value.length==1 && !value[/[A-Za-z]/]
      self.kind=:HEX
      self.value=@value.bytes[0]
      changed=true
    end
    if @kind==:SEQUENCE
    for i in 0...@tokens.length
      tok=@tokens[i]
      nexttok=@tokens[i+1]
      if nexttok && tok && tok.minOccurs==1 && tok.maxOccurs==1 &&
        nexttok.minOccurs==1 && nexttok.maxOccurs==1
        if tok.kind==:STRING && nexttok.kind==:STRING
          tok.value+=nexttok.value
          changed=true
          @tokens[i+1]=nil
          next
        elsif tok.kind==:HEX && nexttok.kind==:HEX &&
            !self.isAsciiLetter(tok.value) &&
            !self.isAsciiLetter(nexttok.value)
          tok.value=tok.value.chr
          tok.value+=nexttok.value.chr
          tok.kind=:STRING
          changed=true
          @tokens[i+1]=nil
          next
        end
      end
    end
    end
    @tokens.compact!
    if (@kind==:CHOICE || @kind==:SEQUENCE) && @tokens.length==1 &&
        @tokens[0].minOccurs==1 &&
        @tokens[0].maxOccurs==1
      self.kind=@tokens[0].kind
      self.value=@tokens[0].value
      @name=@tokens[0].name if !@name || @name.length==0
      @tokens=@tokens[0].tokens
      changed=true
    end
    for tok in @tokens
      if tok.kind!=:HEX && tok.kind!=:HEXRANGE
        oldkind=tok.kind
        oldtok=tok.inspect
        tok.simplify
        if oldkind!=tok.kind
           changed=true
        else
           changed|=(oldtok!=tok.inspect)
        end
      end
    end
    end
    return self
  end
  def inspect
    inspectDepth(0)
  end
end

class ABNF
  attr_reader :indexedTokens
  attr_reader :maxLengthTokens
  def initialize
    @indexedTokens={}
    @maxLengthTokens={}
  end
#
# Parses a string containing ABNF productions.
# Returns a hash whose keys are the names and the
# values are ABNF grammar trees.  There are six kinds of tokens:
#  :SEQUENCE - matches all of the grammar nodes in order
#  :CHOICE - matches one of the grammar nodes, to be
#    checked in order
#  :HEX - matches a single byte or character
#  :HEXRANGE - matches a single byte or character within
#    a range of values
#  :STRING - matches an ASCII case-insensitive string
#  :WORD - refers to another ABNF production
#
def parse(abnf)
 abnflist={}
 for a in splitAbnfLines(abnf)
   parseAbnf(a,abnflist)
 end
 return abnflist
end
private
def isTokenStart(tok)
  return false if !tok
  case tok[0]
    when :WORD,:STRING,:HEX,:HEXRANGE,:PARENOPEN
      return true
    else
      return false
  end
end
def parseAbnf(abnf,abnflist)
  abnftest=[abnf]
tokens=[]
while true
 token=nextToken(abnftest)
 raise "Invalid ABNF: "+abnf if !token
 break if token[0]==:END
 tokens.push(token)
end
index=0
while index<tokens.length
  if tokens[index][0]==:TOKENVALUE
    @indexedTokens[tokens[index][1][0]]=tokens[index][1][1]
    index += 1
  elsif tokens[index][0]==:MAXLENGTH
    @maxLengthTokens[tokens[index][1][0]]=tokens[index][1][1]
    index += 1
  else
    break
  end
end
if tokens.length-index>2
  if tokens[index][0]==:WORD && tokens[index+1][0]==:EQUALS
    name=tokens[index][1]
    root=TokenSymbol.new(:SEQUENCE)
    node=root
    nextMinOccurs=nil
    nextMaxOccurs=nil
    for i in index+2...tokens.length
      tok=tokens[i]
      # Includes internal token :CSSTRING, for case-sensitive strings,
      # converting them to :SEQUENCE tokens
      case tok[0]
        when :WORD, :STRING, :CSSTRING, :HEX, :HEXRANGE
          if nextMinOccurs
            if tok[0]==:STRING && tok[1].length==1 && tok[1][/[A-Za-z]/]
              newtok=node.add([:CHOICE,nil])
              newtok.add([:HEX,tok[1].upcase.bytes[0]])
              newtok.add([:HEX,tok[1].downcase.bytes[0]])
              newtok.setMinMax(nextMinOccurs,nextMaxOccurs)
              nextMinOccurs=nil
            else
newtok=nil
if tok[0]==:CSSTRING
   newtok=node.add([:SEQUENCE,nil])
   for b in tok[1].bytes
     newtok.add([:HEX,b])
   end
   p newtok
else
              newtok=node.add(tok)
end
              newtok.setMinMax(nextMinOccurs,nextMaxOccurs)
              nextMinOccurs=nil
            end
          else
            if tok[0]==:STRING && tok[1].length==1 && tok[1][/[A-Za-z]/]
              newtok=node.add([:CHOICE,nil])
              newtok.add([:HEX,tok[1].upcase.bytes[0]])
              newtok.add([:HEX,tok[1].downcase.bytes[0]])
            else
newtok=nil
if tok[0]==:CSSTRING
   newtok=node.add([:SEQUENCE,nil])
   for b in tok[1].bytes
     newtok.add([:HEX,b])
   end
else
   node.add(tok)
end
            end
          end
        when :SLASH
          if node.kind==:SEQUENCE
            if node.parent && node.parent.kind==:CHOICE
              node=node.parent.addChild(TokenSymbol.new(:SEQUENCE))
            else
             node.kind=:CHOICE
             node2=TokenSymbol.new(:SEQUENCE)
             for i in 0...node.length
               node2.addChild(node[i])
             end
             node.clear
             node.addChild(node2)
             node=node.addChild(TokenSymbol.new(:SEQUENCE))
           end
         else
            node=node.addChild(TokenSymbol.new(:SEQUENCE))
         end
        when :PARENOPEN
          node=node.addChild(TokenSymbol.new(:SEQUENCE))
          if nextMinOccurs
            node.setMinMax(nextMinOccurs,nextMaxOccurs)
            nextMinOccurs=nil
          end
        when :PARENCLOSE, :BRACKETCLOSE
          node=node.parent
          if node && node.kind==:CHOICE
            node=node.parent
          end
        when :BRACKETOPEN
          node=node.addChild(TokenSymbol.new(:SEQUENCE))
          node.minOccurs=0
        when :ATLEASTNUMTIMES
          raise "Invalid ABNF" if !isTokenStart(tokens[i+1])
          nextMinOccurs=tokens[i][1]
          nextMaxOccurs=-1
        when :NUMTIMES
          raise "Invalid ABNF" if !isTokenStart(tokens[i+1])
          nextMinOccurs=tokens[i][1]
          nextMaxOccurs=tokens[i][1]
        when :MINMAXTIMES
          raise "Invalid ABNF" if !isTokenStart(tokens[i+1])
          nextMinOccurs=tokens[i][1][0]
          nextMaxOccurs=tokens[i][1][1]
        when :ANYNUMTIMES
          raise "Invalid ABNF" if !isTokenStart(tokens[i+1])
          nextMinOccurs=-1
          nextMaxOccurs=-1
        when :TOKENVALUE
          @indexedTokens[tokens[i][1][0]]=tokens[i][1][1]
        when :MAXLENGTH
          @maxLengthTokens[tokens[i][1][0]]=tokens[i][1][1]
        else
          raise "Invalid ABNF: "+abnf
      end
    end
    root.simplify
    root.name=name
    if abnflist[name]
      p "Warning: "+name+" defined already"
    end
    abnflist[name]=root
  end
end
end
  def chop(str,y)
    return str[y.length,str.length]
  end
def nextToken(xa)
  x=xa[0]
  x=x.gsub(/\A\s+/,"")
  if x.length==0
    return [:END,nil]
  end
  if x[/\A;(.*)/]
    # NOTE: This is an extension of ABNF, in
    # order to allow numbers to be assigned
    # to ABNF productions
    xa[0]=chop(x,$&)
    cmt=$1
    if cmt[/\A\s*TOKEN\:\s*(\S+)\s+(\d+)/]
      return [:TOKENVALUE,[$1,$2.to_i]]
    end
    # NOTE: This is an extension of ABNF, in
    # order to assign a maximum string length for
    # ABNF productions
    if cmt[/\A\s*MAXLENGTH\:\s*(\S+)\s+(\d+)/]
      return [:MAXLENGTH,[$1,$2.to_i]]
    end
    # comment
    return nextToken(xa)
  end
  if x[/\A\(\s*/]
    xa[0]=chop(x,$&)
    return [:PARENOPEN,nil]
  elsif x[/\A\)\s*/]
    xa[0]=chop(x,$&)
    return [:PARENCLOSE,nil]
  elsif x[/\A\[\s*/]
    xa[0]=chop(x,$&)
    return [:BRACKETOPEN,nil]
  elsif x[/\A\]\s*/]
    xa[0]=chop(x,$&)
    return [:BRACKETCLOSE,nil]
  elsif x[/\A\/\s*/]
    xa[0]=chop(x,$&)
    return [:SLASH,nil]
  elsif x[/\A([0-9]+)\*([0-9]+)/]
    xa[0]=chop(x,$&)
    return [:MINMAXTIMES,[$1.to_i,$2.to_i]]
  elsif x[/\A\*([0-9]+)/]
    xa[0]=chop(x,$&)
    return [:MINMAXTIMES,[0,$1.to_i]]
  elsif x[/\A([0-9]+)\*/]
    xa[0]=chop(x,$&)
    return [:ATLEASTNUMTIMES,$&.to_i]
  elsif x[/\A\*/]
    xa[0]=chop(x,$&)
    return [:ANYNUMTIMES,nil]
  elsif x[/\A<\">/]
    xa[0]=chop(x,$&)
    return [:STRING,"\""]
  elsif x[/\A(?:\%i)?\"([^\"]+)\"/]
    # Case-insensitive string
    xa[0]=chop(x,$&)
    return [:STRING,$1]
  elsif x[/\A\%s\"([^\"]+)\"/]
    # Case-sensitive string
    xa[0]=chop(x,$&)
    return [:CSSTRING,$1]
  elsif x[/\A\%x([0-9A-Fa-f]+)-([0-9A-Fa-f]+)/]
    xa[0]=chop(x,$&)
    return [:HEXRANGE,[$1.to_i(16),$2.to_i(16)]]
  elsif x[/\A\%x([0-9A-Fa-f]+)/]
    xa[0]=chop(x,$&)
    hexval=$1.to_i(16)
    x=xa[0]
    if x[/\A\.([0-9A-Fa-f]+)/]
      str=hexval.chr
      while x[/\A\.([0-9A-Fa-f]+)/]
        xa[0]=chop(x,$&)
        str+=($1.to_i(16)).chr
        x=xa[0]
      end
      return [:CSSTRING,str]
    else
     return [:HEX,hexval]
    end
  elsif x[/\A\%d([0-9]+)-([0-9]+)/]
    xa[0]=chop(x,$&)
    return [:HEXRANGE,[$1.to_i(10),$2.to_i(10)]]
  elsif x[/\A\%d([0-9]+)/]
    xa[0]=chop(x,$&)
    hexval=$1.to_i(10)
    x=xa[0]
    if x[/\A\.([0-9]+)/]
      str=hexval.chr
      while x[/\A\.([0-9]+)/]
        xa[0]=chop(x,$&)
        str+=($1.to_i(10)).chr
        x=xa[0]
      end
      return [:CSSTRING,str]
    else
     return [:HEX,hexval]
    end
  elsif x[/\A\:?\=/]
    xa[0]=chop(x,$&)
    return [:EQUALS,nil]
  elsif x[/\A([0-9]+)/]
    xa[0]=chop(x,$&)
    return [:NUMTIMES,$&.to_i]
  elsif x[/\A([A-Za-z0-9_-]+)/]
    xa[0]=chop(x,$&)
    return [:WORD,$&]
  end
  return nil
end
def splitAbnfLines(x)
  lines=x.split(/\r?\n/)
  ret=[]
  curline=nil
  filebegin=""
  for line in lines
    if line[/^\s*([A-Za-z0-9_-]+)\s*\:?\=/]
      ret.push(curline) if curline && curline.length>0
      curline=filebegin+line+"\r\n"
      filebegin=""
    elsif curline && curline.length>0
      curline+=line+"\r\n"
    else
      filebegin+=line+"\r\n"
    end
  end
  ret.push(curline||filebegin)
  return ret
end
end