Skip to content

Instantly share code, notes, and snippets.

@metade
Created April 1, 2009 07:58
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save metade/88607 to your computer and use it in GitHub Desktop.
Save metade/88607 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require 'rubygems'
require 'treetop'
require 'lonclass_grammar'
require 'pp'
module Lonclass
class Line
attr_accessor :number, :message, :subjects
def initialize(number, subjects, message)
@number = number
@subjects = subjects
@message = message
end
def to_hash
{ :number => number, :subjects => subjects.to_hash, :message => message }
end
end
class Node
attr_accessor :left, :right
def <<(obj)
if (left.nil?)
self.left = obj
elsif (right.nil?)
self.right = obj
elsif (right.kind_of? Node)
self.right << obj
else
n = Node.new
n << self.right
n << obj
self.right = n
end
end
def empty?
(left.nil? and right.nil?)
end
def to_hash
{ :left => (left ? left.to_hash : nil), :right => (right ? right.to_hash : nil) }
end
end
class Subject
attr_accessor :number, :name, :date, :location, :company
def initialize(params={})
params.each { |k,v| self.send("#{k}=", v) }
end
def to_hash
{ :number => number,
:name => name,
:date => date,
:location => location,
:company => company }
end
end
class Parser
def self.parse(string)
parser = Parser.new
parser.parse(string)
end
def initialize
@grammar = LonclassGrammarParser.new
end
def parse(string)
doc = @grammar.parse(string)
if doc.nil?
@grammar.terminal_failures.each do |tf|
# $stderr.puts "Expected #{tf.expected_string.inspect} (#{tf.index})- #{string[tf.index,10].inspect}"
end
return {}
end
doc.elements.map do |l|
# pp l.subject
subjects = Node.new
l.subjects.elements.each do |s|
number = s.respond_to?(:number) ? s.number.text_value : nil
number = $1 if number =~ /^\.(.*)/
name, date, location, company = extract_extensions(s)
subject = Subject.new(:number => number, :name => name, :date => date, :location => location, :company => company)
subjects << subject
end
# fixme: could be parsed better
company = (l.company.respond_to?(:name) ? l.company.name.text_value : nil)
if (company and !company.strip.blank?)
subjects << Subject.new if subjects.empty?
subjects.left.company = company
end
Line.new(l.number.text_value, subjects, l.message.text_value)
end
end
protected
def extract_extensions(subject)
name, date, location, company = nil, nil, nil
extensions = []
if subject.respond_to?(:extensions)
company = subject.company.text_value.blank? ? nil : subject.company.name.text_value
extensions = subject.extensions.elements
else
extensions = [subject]
end
extensions.each do |e|
if e.respond_to?(:date)
date = e.date.text_value
elsif e.respond_to?(:location)
location = e.location.text_value
else
name = e.text_value
end
end
[name, date, location, company]
end
end
end
if (__FILE__ == $0)
file = File.open('data/converted-LONCLASS_con.txt')
string = file.read
string.each_with_index do |line,i|
puts i
puts line
details = Lonclass::Parser.parse(line)
break if (details == {})
pp details.map { |d| d.to_hash }
end
end
module LonclassGrammar
include Treetop::Runtime
def root
@root || :lonclass
end
def _nt_lonclass
start_index = index
if node_cache[:lonclass].has_key?(index)
cached = node_cache[:lonclass][index]
@index = cached.interval.end if cached
return cached
end
r0 = _nt_lines
node_cache[:lonclass][start_index] = r0
return r0
end
def _nt_lines
start_index = index
if node_cache[:lines].has_key?(index)
cached = node_cache[:lines][index]
@index = cached.interval.end if cached
return cached
end
s0, i0 = [], index
loop do
r1 = _nt_line
if r1
s0 << r1
else
break
end
end
if s0.empty?
self.index = i0
r0 = nil
else
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
end
node_cache[:lines][start_index] = r0
return r0
end
module Line0
def number
elements[0]
end
def company
elements[2]
end
def subjects
elements[3]
end
def message
elements[6]
end
def eol
elements[7]
end
end
def _nt_line
start_index = index
if node_cache[:line].has_key?(index)
cached = node_cache[:line][index]
@index = cached.interval.end if cached
return cached
end
i0, s0 = index, []
r1 = _nt_number
s0 << r1
if r1
if input.index('|', index) == index
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure('|')
r2 = nil
end
s0 << r2
if r2
r4 = _nt_company
if r4
r3 = r4
else
r3 = instantiate_node(SyntaxNode,input, index...index)
end
s0 << r3
if r3
s5, i5 = [], index
loop do
r6 = _nt_subject
if r6
s5 << r6
else
break
end
end
r5 = instantiate_node(SyntaxNode,input, i5...index, s5)
s0 << r5
if r5
s7, i7 = [], index
loop do
if input.index(' ', index) == index
r8 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure(' ')
r8 = nil
end
if r8
s7 << r8
else
break
end
end
if s7.empty?
self.index = i7
r7 = nil
else
r7 = instantiate_node(SyntaxNode,input, i7...index, s7)
end
s0 << r7
if r7
if input.index('|', index) == index
r9 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure('|')
r9 = nil
end
s0 << r9
if r9
r10 = _nt_message
s0 << r10
if r10
r11 = _nt_eol
s0 << r11
end
end
end
end
end
end
end
if s0.last
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
r0.extend(Line0)
else
self.index = i0
r0 = nil
end
node_cache[:line][start_index] = r0
return r0
end
def _nt_number
start_index = index
if node_cache[:number].has_key?(index)
cached = node_cache[:number][index]
@index = cached.interval.end if cached
return cached
end
s0, i0 = [], index
loop do
if input.index(Regexp.new('[0-9]'), index) == index
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
r1 = nil
end
if r1
s0 << r1
else
break
end
end
if s0.empty?
self.index = i0
r0 = nil
else
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
end
node_cache[:number][start_index] = r0
return r0
end
module Company0
def name
elements[1]
end
end
def _nt_company
start_index = index
if node_cache[:company].has_key?(index)
cached = node_cache[:company][index]
@index = cached.interval.end if cached
return cached
end
i0, s0 = index, []
if input.index('[', index) == index
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure('[')
r1 = nil
end
s0 << r1
if r1
s2, i2 = [], index
loop do
r3 = _nt_char
if r3
s2 << r3
else
break
end
end
if s2.empty?
self.index = i2
r2 = nil
else
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
end
s0 << r2
if r2
if input.index(']', index) == index
r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure(']')
r4 = nil
end
s0 << r4
end
end
if s0.last
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
r0.extend(Company0)
else
self.index = i0
r0 = nil
end
node_cache[:company][start_index] = r0
return r0
end
module Subject0
def number
elements[0]
end
def extensions
elements[1]
end
def company
elements[3]
end
end
def _nt_subject
start_index = index
if node_cache[:subject].has_key?(index)
cached = node_cache[:subject][index]
@index = cached.interval.end if cached
return cached
end
i0 = index
r1 = _nt_extension
if r1
r0 = r1
else
i2, s2 = index, []
s3, i3 = [], index
loop do
r4 = _nt_number
if r4
s3 << r4
else
break
end
end
if s3.empty?
self.index = i3
r3 = nil
else
r3 = instantiate_node(SyntaxNode,input, i3...index, s3)
end
s2 << r3
if r3
s5, i5 = [], index
loop do
r6 = _nt_extension
if r6
s5 << r6
else
break
end
end
r5 = instantiate_node(SyntaxNode,input, i5...index, s5)
s2 << r5
if r5
if input.index(':', index) == index
r8 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure(':')
r8 = nil
end
if r8
r7 = r8
else
r7 = instantiate_node(SyntaxNode,input, index...index)
end
s2 << r7
if r7
r10 = _nt_company
if r10
r9 = r10
else
r9 = instantiate_node(SyntaxNode,input, index...index)
end
s2 << r9
end
end
end
if s2.last
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
r2.extend(Subject0)
else
self.index = i2
r2 = nil
end
if r2
r0 = r2
else
self.index = i0
r0 = nil
end
end
node_cache[:subject][start_index] = r0
return r0
end
def _nt_extension
start_index = index
if node_cache[:extension].has_key?(index)
cached = node_cache[:extension][index]
@index = cached.interval.end if cached
return cached
end
i0 = index
r1 = _nt_location_extension
if r1
r0 = r1
else
r2 = _nt_date_extension
if r2
r0 = r2
else
r3 = _nt_name_extension
if r3
r0 = r3
else
self.index = i0
r0 = nil
end
end
end
node_cache[:extension][start_index] = r0
return r0
end
def _nt_number
start_index = index
if node_cache[:number].has_key?(index)
cached = node_cache[:number][index]
@index = cached.interval.end if cached
return cached
end
s0, i0 = [], index
loop do
r1 = _nt_subject_class
if r1
s0 << r1
else
break
end
end
if s0.empty?
self.index = i0
r0 = nil
else
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
end
node_cache[:number][start_index] = r0
return r0
end
module SubjectClass0
end
def _nt_subject_class
start_index = index
if node_cache[:subject_class].has_key?(index)
cached = node_cache[:subject_class][index]
@index = cached.interval.end if cached
return cached
end
i0, s0 = index, []
s1, i1 = [], index
loop do
i2 = index
if input.index('.', index) == index
r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure('.')
r3 = nil
end
if r3
r2 = r3
else
if input.index('-', index) == index
r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure('-')
r4 = nil
end
if r4
r2 = r4
else
if input.index('/', index) == index
r5 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure('/')
r5 = nil
end
if r5
r2 = r5
else
self.index = i2
r2 = nil
end
end
end
if r2
s1 << r2
else
break
end
end
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
s0 << r1
if r1
s6, i6 = [], index
loop do
if input.index(Regexp.new('[0-9]'), index) == index
r7 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
r7 = nil
end
if r7
s6 << r7
else
break
end
end
if s6.empty?
self.index = i6
r6 = nil
else
r6 = instantiate_node(SyntaxNode,input, i6...index, s6)
end
s0 << r6
end
if s0.last
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
r0.extend(SubjectClass0)
else
self.index = i0
r0 = nil
end
node_cache[:subject_class][start_index] = r0
return r0
end
module DateExtension0
def date
elements[1]
end
end
def _nt_date_extension
start_index = index
if node_cache[:date_extension].has_key?(index)
cached = node_cache[:date_extension][index]
@index = cached.interval.end if cached
return cached
end
i0, s0 = index, []
if input.index('"', index) == index
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure('"')
r1 = nil
end
s0 << r1
if r1
s2, i2 = [], index
loop do
if input.index(Regexp.new('[. 0-9]'), index) == index
r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
r3 = nil
end
if r3
s2 << r3
else
break
end
end
if s2.empty?
self.index = i2
r2 = nil
else
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
end
s0 << r2
if r2
if input.index('"', index) == index
r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure('"')
r4 = nil
end
s0 << r4
end
end
if s0.last
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
r0.extend(DateExtension0)
else
self.index = i0
r0 = nil
end
node_cache[:date_extension][start_index] = r0
return r0
end
module LocationExtension0
def location
elements[1]
end
end
def _nt_location_extension
start_index = index
if node_cache[:location_extension].has_key?(index)
cached = node_cache[:location_extension][index]
@index = cached.interval.end if cached
return cached
end
i0, s0 = index, []
if input.index('(', index) == index
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure('(')
r1 = nil
end
s0 << r1
if r1
r2 = _nt_subject
s0 << r2
if r2
if input.index(')', index) == index
r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure(')')
r3 = nil
end
s0 << r3
end
end
if s0.last
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
r0.extend(LocationExtension0)
else
self.index = i0
r0 = nil
end
node_cache[:location_extension][start_index] = r0
return r0
end
module NameExtension0
end
module NameExtension1
end
def _nt_name_extension
start_index = index
if node_cache[:name_extension].has_key?(index)
cached = node_cache[:name_extension][index]
@index = cached.interval.end if cached
return cached
end
i0, s0 = index, []
if input.index(Regexp.new('[A-Z]'), index) == index
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
r1 = nil
end
s0 << r1
if r1
s2, i2 = [], index
loop do
i3 = index
if input.index(Regexp.new('[,A-Z]'), index) == index
r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
r4 = nil
end
if r4
r3 = r4
else
i5, s5 = index, []
if input.index(' ', index) == index
r6 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure(' ')
r6 = nil
end
s5 << r6
if r6
if input.index(Regexp.new('[,0-9A-Z]'), index) == index
r7 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
r7 = nil
end
s5 << r7
end
if s5.last
r5 = instantiate_node(SyntaxNode,input, i5...index, s5)
r5.extend(NameExtension0)
else
self.index = i5
r5 = nil
end
if r5
r3 = r5
else
self.index = i3
r3 = nil
end
end
if r3
s2 << r3
else
break
end
end
if s2.empty?
self.index = i2
r2 = nil
else
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
end
s0 << r2
end
if s0.last
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
r0.extend(NameExtension1)
else
self.index = i0
r0 = nil
end
node_cache[:name_extension][start_index] = r0
return r0
end
def _nt_message
start_index = index
if node_cache[:message].has_key?(index)
cached = node_cache[:message][index]
@index = cached.interval.end if cached
return cached
end
s0, i0 = [], index
loop do
r1 = _nt_char
if r1
s0 << r1
else
break
end
end
if s0.empty?
self.index = i0
r0 = nil
else
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
end
node_cache[:message][start_index] = r0
return r0
end
def _nt_char
start_index = index
if node_cache[:char].has_key?(index)
cached = node_cache[:char][index]
@index = cached.interval.end if cached
return cached
end
if input.index(Regexp.new('[\'A-Z0-9& ()-/]'), index) == index
r0 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
r0 = nil
end
node_cache[:char][start_index] = r0
return r0
end
module Eol0
end
def _nt_eol
start_index = index
if node_cache[:eol].has_key?(index)
cached = node_cache[:eol][index]
@index = cached.interval.end if cached
return cached
end
i0 = index
i1, s1 = index, []
if input.index("\r", index) == index
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure("\r")
r2 = nil
end
s1 << r2
if r2
if input.index("\n", index) == index
r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure("\n")
r4 = nil
end
if r4
r3 = r4
else
r3 = instantiate_node(SyntaxNode,input, index...index)
end
s1 << r3
end
if s1.last
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
r1.extend(Eol0)
else
self.index = i1
r1 = nil
end
if r1
r0 = r1
else
if input.index("\n", index) == index
r5 = instantiate_node(SyntaxNode,input, index...(index + 1))
@index += 1
else
terminal_parse_failure("\n")
r5 = nil
end
if r5
r0 = r5
else
self.index = i0
r0 = nil
end
end
node_cache[:eol][start_index] = r0
return r0
end
end
class LonclassGrammarParser < Treetop::Runtime::CompiledParser
include LonclassGrammar
end
grammar LonclassGrammar
rule lonclass
lines
end
rule lines
line+
end
rule line
number '|' company:company? subjects:subject* ' '+ '|' message eol
end
rule number
[0-9]+
end
rule company
'[' name:char+ ']'
end
rule subject
extension / (number:number+ extensions:extension* ':'? company:company?)
end
rule extension
location_extension / date_extension / name_extension
end
rule number
subject_class+
end
rule subject_class
('.' / '-' / '/')* [0-9]+
end
rule date_extension
'"' date:[. 0-9]+ '"'
end
rule location_extension
'(' location:subject ')'
end
rule name_extension
([A-Z] ([,A-Z] / ' ' [,0-9A-Z])+)
end
rule message
char+
end
rule char
['A-Z0-9& ()-/]
end
rule eol
("\r" "\n"?) / "\n"
end
end
#!/usr/bin/env spec
# via "sudo gem install rspec"
require 'lonclass'
describe "Lonclass parser" do
def process(string)
Lonclass::Parser.parse(string).map { |l| l.to_hash }
end
it "should parse a line with a blank company name" do
str = "80276|[ ] |COMPANIES\n"
process(str).should == [{
:subjects=>{:right=>nil, :left=>nil},
:message=>"COMPANIES",
:number=>"80276",
}]
end
it "should parse a line with a company name" do
str = "80277|[A AND W] |A AND W (FAST FOOD COMPANY)\n"
process(str).should == [{
:subjects=> {
:left => { :number=>nil, :name=>nil, :location=>nil, :date=>nil, :company=>"A AND W"},
:right=>nil,
},
:message=>"A AND W (FAST FOOD COMPANY)",
:number=>"80277",
}]
end
it "should parse a line that has a number in the company name" do
str = "80342|[AIRE O2] |AIRE O2 (COMPANY)\n"
process(str).should == [{
:subjects=> {
:left => { :number=>nil, :name=>nil, :location=>nil, :date=>nil, :company=>"AIRE O2"},
:right=>nil,
},
:message=>"AIRE O2 (COMPANY)",
:number=>"80342",
}]
end
it "should parse a line with a company name and a quote in the message" do
str = "80279|[A J BRETT] |A' J BRETT (ANTIQUE RESTORERS)\n"
process(str).should == [{
:subjects=> {
:left => { :number=>nil, :name=>nil, :location=>nil, :date=>nil, :company=>"A J BRETT"},
:right=>nil,
},
:message=>"A' J BRETT (ANTIQUE RESTORERS)",
:number=>"80279",
}]
end
it "should parse a line with a company name and a subject number" do
str = "80285|[ABB TRANSPORTATION].007.004.761 |JOB LOSSES AT ABB TRANSPORTATION\n"
process(str).should == [{
:subjects=> {
:left => { :number=>"007.004.761", :name=>nil, :location=>nil, :date=>nil, :company=>"ABB TRANSPORTATION"},
:right=>nil
},
:message=>"JOB LOSSES AT ABB TRANSPORTATION",
:number=>"80285",
}]
end
it "should parse a line with a company name and a subject number with a name extension" do
str = "80309|[AD DAF].008.02LEYLAND DAF |LLEYLAND DAF (COMPANY)\n"
process(str).should == [{
:subjects=> {
:left => { :number=>"008.02", :name=>"LEYLAND DAF", :location=>nil, :date=>nil, :company=>"AD DAF"},
:right=>nil
},
:message=>"LLEYLAND DAF (COMPANY)",
:number=>"80309",
}]
end
it "should parse a line with a company name and a conjoined subject number" do
str = "80720|[BEDFORD].008.24TRUCK.004.6 |DECLINE OF BEDFORD TRUCKS (COMPANY)\n"
process(str).should == [{
:subjects=> {
:left => { :number=>"008.24", :name=>"TRUCK", :location=>nil, :date=>nil, :company=>"BEDFORD"},
:right => { :number=>"004.6", :name=>nil, :location=>nil, :date=>nil, :company=>nil},
},
:message=>"DECLINE OF BEDFORD TRUCKS (COMPANY)",
:number=>"80720",
}]
end
it "should parse a line with a company name and 2 related subject numbers" do
str = "80586|[BARLOW CLOWES]658.111:332.6 |BARLOW CLOWES INVESTMENT GROUP\n"
process(str).should == [{
:subjects=> {
:left => { :number=>"658.111", :name=>nil, :location=>nil, :date=>nil, :company=>"BARLOW CLOWES"},
:right => { :number=>"332.6", :name=>nil, :location=>nil, :date=>nil, :company=>nil},
},
:message=>"BARLOW CLOWES INVESTMENT GROUP",
:number=>"80586",
}]
end
it "should parse a line with a company name and 2 related subject numbers" do
str = "80316|[ADIDAS]301.161.1:3-058.1:796.007.009.031:796.334.1.007(429) |ADIDAS BOOT MONEY SCANDAL (PAYMENTS TO AMATEUR WELSH RUGBY PLAYERS)\n"
process(str).should == [{
:subjects=> {
:left => { :number=>"301.161.1", :name=>nil, :location=>nil, :date=>nil, :company=>"ADIDAS"},
:right => {
:left => {:number=>"3-058.1", :name=>nil, :location=>nil, :date=>nil, :company=>nil},
:right => {
:left => {:number=>"796.007.009.031", :name=>nil, :location=>nil, :date=>nil, :company=>nil},
:right => {:number=>"796.334.1.007", :name=>nil, :location=>"429", :date=>nil, :company=>nil},
}
}
},
:message=>"ADIDAS BOOT MONEY SCANDAL (PAYMENTS TO AMATEUR WELSH RUGBY PLAYERS)",
:number=>"80316",
}]
end
it "should parse a line with a company name and subject number with a date" do
str = "80404|[AMERICAN EXPRESS].093\"1991\" |AMERICAN EXPRESS BANK AWARD 1991\n"
process(str).should == [{
:subjects=> {
:left => { :number=>"093", :name=>nil, :location=>nil, :date=>"1991", :company=>"AMERICAN EXPRESS"},
:right => nil
},
:message=>"AMERICAN EXPRESS BANK AWARD 1991",
:number=>"80404",
}]
end
it "should parse a line with a company name and subject number with a date" do
str = "270789|621.452.002.793.2 |JET ENGINE MUFFLERS\n"
process(str).should == [{
:subjects=> {
:left => { :number=>"621.452.002.793.2", :name=>nil, :location=>nil, :date=>nil, :company=>nil},
:right => nil
},
:message=>"JET ENGINE MUFFLERS",
:number=>"270789",
}]
end
it "should parse a line with a slash in the location extension" do
str = "270790|321.61(421/425)JAMES II |JAMES II (BRITISH KING)\n"
process(str).should == [{
:subjects=> {
:left => { :number=>"321.61", :name=>"JAMES II", :location=>"421/425", :date=>nil, :company=>nil},
:right => nil
},
:message=>"JAMES II (BRITISH KING)",
:number=>"270790",
}]
end
it "should parse a line with a relation after a name extension" do
str = "80670|[BBC]654.192.77SUB:654.19:629.195 |BBC SUBSCRIPTION CHANNEL\n"
process(str).should == [{
:subjects=> {
:left => { :number=>"654.192.77", :name=>"SUB", :location=>nil, :date=>nil, :company=>"BBC"},
:right => {
:left => {:number=>"654.19", :name=>nil, :location=>nil, :date=>nil, :company=>nil},
:right => {:number=>"629.195", :name=>nil, :location=>nil, :date=>nil, :company=>nil},
}
},
:message=>"BBC SUBSCRIPTION CHANNEL",
:number=>"80670",
}]
end
it "should parse a line with a location extension after a name extension" do
str = "80681|[BBC]654.193ENGLISH(73) |BBC WORLD SERVICE RADIO BROADCAST TO USA\n"
process(str).should == [{
:subjects => {
:left => { :number=>"654.193", :name=>"ENGLISH", :location=>"73", :date=>nil, :company=>"BBC"},
:right => nil
},
:message=>"BBC WORLD SERVICE RADIO BROADCAST TO USA",
:number=>"80681",
}]
end
it "should parse a line with multiple subject numbers" do
str = "80809|[BNOC]338.532.31:665.4/.5(261.2) |BNOC CUT PRICE OF NORTH SEA OIL\n"
process(str).should == [{
:subjects => {
:left => { :number=>"338.532.31", :name=>nil, :location=>nil, :date=>nil, :company=>"BNOC"},
:right => { :number=>"665.4/.5", :name=>nil, :location=>"261.2", :date=>nil, :company=>nil}
},
:message=>"BNOC CUT PRICE OF NORTH SEA OIL",
:number=>"80809",
}]
end
it "should parse a line with a company number followed by a location" do
str = "80862|[BRENT WALKER](047.1) |BRENT WALKER ANNUAL REPORTS\n"
process(str).should == [{
:subjects => {
:left => { :number=>nil, :name=>nil, :location=>"047.1", :date=>nil, :company=>"BRENT WALKER"},
:right => nil
},
:message=>"BRENT WALKER ANNUAL REPORTS",
:number=>"80862",
}]
end
it "should parse a line with a company number followed by another company number" # do
# str = "80923|[BRITISH LEYLAND].008.01[JAGUAR] |JAGUAR CARS LTD (BL SUB GROUP)\n"
# process(str).should == [{
# :subjects => {
# :left => { :number=>nil, :name=>nil, :location=>nil, :date=>nil, :company=>"BRITISH LEYLAND"},
# :right => { :number=>"008.01", :name=>nil, :location=>nil, :date=>nil, :company=>"JAGUAR"},
# },
# :message=>"JAGUAR CARS LTD (BL SUB GROUP)",
# :number=>"80923",
# }]
# end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment