Created
April 1, 2009 07:58
-
-
Save metade/88607 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'rubygems' | |
require 'treetop' | |
require 'lonclass_grammar' | |
require 'pp' | |
module Lonclass | |
class Line | |
attr_accessor :number, :message, :subjects | |
def initialize(number, subjects, message) | |
@number = number | |
@subjects = subjects | |
@message = message | |
end | |
def to_hash | |
{ :number => number, :subjects => subjects.to_hash, :message => message } | |
end | |
end | |
class Node | |
attr_accessor :left, :right | |
def <<(obj) | |
if (left.nil?) | |
self.left = obj | |
elsif (right.nil?) | |
self.right = obj | |
elsif (right.kind_of? Node) | |
self.right << obj | |
else | |
n = Node.new | |
n << self.right | |
n << obj | |
self.right = n | |
end | |
end | |
def empty? | |
(left.nil? and right.nil?) | |
end | |
def to_hash | |
{ :left => (left ? left.to_hash : nil), :right => (right ? right.to_hash : nil) } | |
end | |
end | |
class Subject | |
attr_accessor :number, :name, :date, :location, :company | |
def initialize(params={}) | |
params.each { |k,v| self.send("#{k}=", v) } | |
end | |
def to_hash | |
{ :number => number, | |
:name => name, | |
:date => date, | |
:location => location, | |
:company => company } | |
end | |
end | |
class Parser | |
def self.parse(string) | |
parser = Parser.new | |
parser.parse(string) | |
end | |
def initialize | |
@grammar = LonclassGrammarParser.new | |
end | |
def parse(string) | |
doc = @grammar.parse(string) | |
if doc.nil? | |
@grammar.terminal_failures.each do |tf| | |
# $stderr.puts "Expected #{tf.expected_string.inspect} (#{tf.index})- #{string[tf.index,10].inspect}" | |
end | |
return {} | |
end | |
doc.elements.map do |l| | |
# pp l.subject | |
subjects = Node.new | |
l.subjects.elements.each do |s| | |
number = s.respond_to?(:number) ? s.number.text_value : nil | |
number = $1 if number =~ /^\.(.*)/ | |
name, date, location, company = extract_extensions(s) | |
subject = Subject.new(:number => number, :name => name, :date => date, :location => location, :company => company) | |
subjects << subject | |
end | |
# fixme: could be parsed better | |
company = (l.company.respond_to?(:name) ? l.company.name.text_value : nil) | |
if (company and !company.strip.blank?) | |
subjects << Subject.new if subjects.empty? | |
subjects.left.company = company | |
end | |
Line.new(l.number.text_value, subjects, l.message.text_value) | |
end | |
end | |
protected | |
def extract_extensions(subject) | |
name, date, location, company = nil, nil, nil | |
extensions = [] | |
if subject.respond_to?(:extensions) | |
company = subject.company.text_value.blank? ? nil : subject.company.name.text_value | |
extensions = subject.extensions.elements | |
else | |
extensions = [subject] | |
end | |
extensions.each do |e| | |
if e.respond_to?(:date) | |
date = e.date.text_value | |
elsif e.respond_to?(:location) | |
location = e.location.text_value | |
else | |
name = e.text_value | |
end | |
end | |
[name, date, location, company] | |
end | |
end | |
end | |
if (__FILE__ == $0) | |
file = File.open('data/converted-LONCLASS_con.txt') | |
string = file.read | |
string.each_with_index do |line,i| | |
puts i | |
puts line | |
details = Lonclass::Parser.parse(line) | |
break if (details == {}) | |
pp details.map { |d| d.to_hash } | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module LonclassGrammar | |
include Treetop::Runtime | |
def root | |
@root || :lonclass | |
end | |
def _nt_lonclass | |
start_index = index | |
if node_cache[:lonclass].has_key?(index) | |
cached = node_cache[:lonclass][index] | |
@index = cached.interval.end if cached | |
return cached | |
end | |
r0 = _nt_lines | |
node_cache[:lonclass][start_index] = r0 | |
return r0 | |
end | |
def _nt_lines | |
start_index = index | |
if node_cache[:lines].has_key?(index) | |
cached = node_cache[:lines][index] | |
@index = cached.interval.end if cached | |
return cached | |
end | |
s0, i0 = [], index | |
loop do | |
r1 = _nt_line | |
if r1 | |
s0 << r1 | |
else | |
break | |
end | |
end | |
if s0.empty? | |
self.index = i0 | |
r0 = nil | |
else | |
r0 = instantiate_node(SyntaxNode,input, i0...index, s0) | |
end | |
node_cache[:lines][start_index] = r0 | |
return r0 | |
end | |
module Line0 | |
def number | |
elements[0] | |
end | |
def company | |
elements[2] | |
end | |
def subjects | |
elements[3] | |
end | |
def message | |
elements[6] | |
end | |
def eol | |
elements[7] | |
end | |
end | |
def _nt_line | |
start_index = index | |
if node_cache[:line].has_key?(index) | |
cached = node_cache[:line][index] | |
@index = cached.interval.end if cached | |
return cached | |
end | |
i0, s0 = index, [] | |
r1 = _nt_number | |
s0 << r1 | |
if r1 | |
if input.index('|', index) == index | |
r2 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure('|') | |
r2 = nil | |
end | |
s0 << r2 | |
if r2 | |
r4 = _nt_company | |
if r4 | |
r3 = r4 | |
else | |
r3 = instantiate_node(SyntaxNode,input, index...index) | |
end | |
s0 << r3 | |
if r3 | |
s5, i5 = [], index | |
loop do | |
r6 = _nt_subject | |
if r6 | |
s5 << r6 | |
else | |
break | |
end | |
end | |
r5 = instantiate_node(SyntaxNode,input, i5...index, s5) | |
s0 << r5 | |
if r5 | |
s7, i7 = [], index | |
loop do | |
if input.index(' ', index) == index | |
r8 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure(' ') | |
r8 = nil | |
end | |
if r8 | |
s7 << r8 | |
else | |
break | |
end | |
end | |
if s7.empty? | |
self.index = i7 | |
r7 = nil | |
else | |
r7 = instantiate_node(SyntaxNode,input, i7...index, s7) | |
end | |
s0 << r7 | |
if r7 | |
if input.index('|', index) == index | |
r9 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure('|') | |
r9 = nil | |
end | |
s0 << r9 | |
if r9 | |
r10 = _nt_message | |
s0 << r10 | |
if r10 | |
r11 = _nt_eol | |
s0 << r11 | |
end | |
end | |
end | |
end | |
end | |
end | |
end | |
if s0.last | |
r0 = instantiate_node(SyntaxNode,input, i0...index, s0) | |
r0.extend(Line0) | |
else | |
self.index = i0 | |
r0 = nil | |
end | |
node_cache[:line][start_index] = r0 | |
return r0 | |
end | |
def _nt_number | |
start_index = index | |
if node_cache[:number].has_key?(index) | |
cached = node_cache[:number][index] | |
@index = cached.interval.end if cached | |
return cached | |
end | |
s0, i0 = [], index | |
loop do | |
if input.index(Regexp.new('[0-9]'), index) == index | |
r1 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
r1 = nil | |
end | |
if r1 | |
s0 << r1 | |
else | |
break | |
end | |
end | |
if s0.empty? | |
self.index = i0 | |
r0 = nil | |
else | |
r0 = instantiate_node(SyntaxNode,input, i0...index, s0) | |
end | |
node_cache[:number][start_index] = r0 | |
return r0 | |
end | |
module Company0 | |
def name | |
elements[1] | |
end | |
end | |
def _nt_company | |
start_index = index | |
if node_cache[:company].has_key?(index) | |
cached = node_cache[:company][index] | |
@index = cached.interval.end if cached | |
return cached | |
end | |
i0, s0 = index, [] | |
if input.index('[', index) == index | |
r1 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure('[') | |
r1 = nil | |
end | |
s0 << r1 | |
if r1 | |
s2, i2 = [], index | |
loop do | |
r3 = _nt_char | |
if r3 | |
s2 << r3 | |
else | |
break | |
end | |
end | |
if s2.empty? | |
self.index = i2 | |
r2 = nil | |
else | |
r2 = instantiate_node(SyntaxNode,input, i2...index, s2) | |
end | |
s0 << r2 | |
if r2 | |
if input.index(']', index) == index | |
r4 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure(']') | |
r4 = nil | |
end | |
s0 << r4 | |
end | |
end | |
if s0.last | |
r0 = instantiate_node(SyntaxNode,input, i0...index, s0) | |
r0.extend(Company0) | |
else | |
self.index = i0 | |
r0 = nil | |
end | |
node_cache[:company][start_index] = r0 | |
return r0 | |
end | |
module Subject0 | |
def number | |
elements[0] | |
end | |
def extensions | |
elements[1] | |
end | |
def company | |
elements[3] | |
end | |
end | |
def _nt_subject | |
start_index = index | |
if node_cache[:subject].has_key?(index) | |
cached = node_cache[:subject][index] | |
@index = cached.interval.end if cached | |
return cached | |
end | |
i0 = index | |
r1 = _nt_extension | |
if r1 | |
r0 = r1 | |
else | |
i2, s2 = index, [] | |
s3, i3 = [], index | |
loop do | |
r4 = _nt_number | |
if r4 | |
s3 << r4 | |
else | |
break | |
end | |
end | |
if s3.empty? | |
self.index = i3 | |
r3 = nil | |
else | |
r3 = instantiate_node(SyntaxNode,input, i3...index, s3) | |
end | |
s2 << r3 | |
if r3 | |
s5, i5 = [], index | |
loop do | |
r6 = _nt_extension | |
if r6 | |
s5 << r6 | |
else | |
break | |
end | |
end | |
r5 = instantiate_node(SyntaxNode,input, i5...index, s5) | |
s2 << r5 | |
if r5 | |
if input.index(':', index) == index | |
r8 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure(':') | |
r8 = nil | |
end | |
if r8 | |
r7 = r8 | |
else | |
r7 = instantiate_node(SyntaxNode,input, index...index) | |
end | |
s2 << r7 | |
if r7 | |
r10 = _nt_company | |
if r10 | |
r9 = r10 | |
else | |
r9 = instantiate_node(SyntaxNode,input, index...index) | |
end | |
s2 << r9 | |
end | |
end | |
end | |
if s2.last | |
r2 = instantiate_node(SyntaxNode,input, i2...index, s2) | |
r2.extend(Subject0) | |
else | |
self.index = i2 | |
r2 = nil | |
end | |
if r2 | |
r0 = r2 | |
else | |
self.index = i0 | |
r0 = nil | |
end | |
end | |
node_cache[:subject][start_index] = r0 | |
return r0 | |
end | |
def _nt_extension | |
start_index = index | |
if node_cache[:extension].has_key?(index) | |
cached = node_cache[:extension][index] | |
@index = cached.interval.end if cached | |
return cached | |
end | |
i0 = index | |
r1 = _nt_location_extension | |
if r1 | |
r0 = r1 | |
else | |
r2 = _nt_date_extension | |
if r2 | |
r0 = r2 | |
else | |
r3 = _nt_name_extension | |
if r3 | |
r0 = r3 | |
else | |
self.index = i0 | |
r0 = nil | |
end | |
end | |
end | |
node_cache[:extension][start_index] = r0 | |
return r0 | |
end | |
def _nt_number | |
start_index = index | |
if node_cache[:number].has_key?(index) | |
cached = node_cache[:number][index] | |
@index = cached.interval.end if cached | |
return cached | |
end | |
s0, i0 = [], index | |
loop do | |
r1 = _nt_subject_class | |
if r1 | |
s0 << r1 | |
else | |
break | |
end | |
end | |
if s0.empty? | |
self.index = i0 | |
r0 = nil | |
else | |
r0 = instantiate_node(SyntaxNode,input, i0...index, s0) | |
end | |
node_cache[:number][start_index] = r0 | |
return r0 | |
end | |
module SubjectClass0 | |
end | |
def _nt_subject_class | |
start_index = index | |
if node_cache[:subject_class].has_key?(index) | |
cached = node_cache[:subject_class][index] | |
@index = cached.interval.end if cached | |
return cached | |
end | |
i0, s0 = index, [] | |
s1, i1 = [], index | |
loop do | |
i2 = index | |
if input.index('.', index) == index | |
r3 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure('.') | |
r3 = nil | |
end | |
if r3 | |
r2 = r3 | |
else | |
if input.index('-', index) == index | |
r4 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure('-') | |
r4 = nil | |
end | |
if r4 | |
r2 = r4 | |
else | |
if input.index('/', index) == index | |
r5 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure('/') | |
r5 = nil | |
end | |
if r5 | |
r2 = r5 | |
else | |
self.index = i2 | |
r2 = nil | |
end | |
end | |
end | |
if r2 | |
s1 << r2 | |
else | |
break | |
end | |
end | |
r1 = instantiate_node(SyntaxNode,input, i1...index, s1) | |
s0 << r1 | |
if r1 | |
s6, i6 = [], index | |
loop do | |
if input.index(Regexp.new('[0-9]'), index) == index | |
r7 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
r7 = nil | |
end | |
if r7 | |
s6 << r7 | |
else | |
break | |
end | |
end | |
if s6.empty? | |
self.index = i6 | |
r6 = nil | |
else | |
r6 = instantiate_node(SyntaxNode,input, i6...index, s6) | |
end | |
s0 << r6 | |
end | |
if s0.last | |
r0 = instantiate_node(SyntaxNode,input, i0...index, s0) | |
r0.extend(SubjectClass0) | |
else | |
self.index = i0 | |
r0 = nil | |
end | |
node_cache[:subject_class][start_index] = r0 | |
return r0 | |
end | |
module DateExtension0 | |
def date | |
elements[1] | |
end | |
end | |
def _nt_date_extension | |
start_index = index | |
if node_cache[:date_extension].has_key?(index) | |
cached = node_cache[:date_extension][index] | |
@index = cached.interval.end if cached | |
return cached | |
end | |
i0, s0 = index, [] | |
if input.index('"', index) == index | |
r1 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure('"') | |
r1 = nil | |
end | |
s0 << r1 | |
if r1 | |
s2, i2 = [], index | |
loop do | |
if input.index(Regexp.new('[. 0-9]'), index) == index | |
r3 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
r3 = nil | |
end | |
if r3 | |
s2 << r3 | |
else | |
break | |
end | |
end | |
if s2.empty? | |
self.index = i2 | |
r2 = nil | |
else | |
r2 = instantiate_node(SyntaxNode,input, i2...index, s2) | |
end | |
s0 << r2 | |
if r2 | |
if input.index('"', index) == index | |
r4 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure('"') | |
r4 = nil | |
end | |
s0 << r4 | |
end | |
end | |
if s0.last | |
r0 = instantiate_node(SyntaxNode,input, i0...index, s0) | |
r0.extend(DateExtension0) | |
else | |
self.index = i0 | |
r0 = nil | |
end | |
node_cache[:date_extension][start_index] = r0 | |
return r0 | |
end | |
module LocationExtension0 | |
def location | |
elements[1] | |
end | |
end | |
def _nt_location_extension | |
start_index = index | |
if node_cache[:location_extension].has_key?(index) | |
cached = node_cache[:location_extension][index] | |
@index = cached.interval.end if cached | |
return cached | |
end | |
i0, s0 = index, [] | |
if input.index('(', index) == index | |
r1 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure('(') | |
r1 = nil | |
end | |
s0 << r1 | |
if r1 | |
r2 = _nt_subject | |
s0 << r2 | |
if r2 | |
if input.index(')', index) == index | |
r3 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure(')') | |
r3 = nil | |
end | |
s0 << r3 | |
end | |
end | |
if s0.last | |
r0 = instantiate_node(SyntaxNode,input, i0...index, s0) | |
r0.extend(LocationExtension0) | |
else | |
self.index = i0 | |
r0 = nil | |
end | |
node_cache[:location_extension][start_index] = r0 | |
return r0 | |
end | |
module NameExtension0 | |
end | |
module NameExtension1 | |
end | |
def _nt_name_extension | |
start_index = index | |
if node_cache[:name_extension].has_key?(index) | |
cached = node_cache[:name_extension][index] | |
@index = cached.interval.end if cached | |
return cached | |
end | |
i0, s0 = index, [] | |
if input.index(Regexp.new('[A-Z]'), index) == index | |
r1 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
r1 = nil | |
end | |
s0 << r1 | |
if r1 | |
s2, i2 = [], index | |
loop do | |
i3 = index | |
if input.index(Regexp.new('[,A-Z]'), index) == index | |
r4 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
r4 = nil | |
end | |
if r4 | |
r3 = r4 | |
else | |
i5, s5 = index, [] | |
if input.index(' ', index) == index | |
r6 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure(' ') | |
r6 = nil | |
end | |
s5 << r6 | |
if r6 | |
if input.index(Regexp.new('[,0-9A-Z]'), index) == index | |
r7 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
r7 = nil | |
end | |
s5 << r7 | |
end | |
if s5.last | |
r5 = instantiate_node(SyntaxNode,input, i5...index, s5) | |
r5.extend(NameExtension0) | |
else | |
self.index = i5 | |
r5 = nil | |
end | |
if r5 | |
r3 = r5 | |
else | |
self.index = i3 | |
r3 = nil | |
end | |
end | |
if r3 | |
s2 << r3 | |
else | |
break | |
end | |
end | |
if s2.empty? | |
self.index = i2 | |
r2 = nil | |
else | |
r2 = instantiate_node(SyntaxNode,input, i2...index, s2) | |
end | |
s0 << r2 | |
end | |
if s0.last | |
r0 = instantiate_node(SyntaxNode,input, i0...index, s0) | |
r0.extend(NameExtension1) | |
else | |
self.index = i0 | |
r0 = nil | |
end | |
node_cache[:name_extension][start_index] = r0 | |
return r0 | |
end | |
def _nt_message | |
start_index = index | |
if node_cache[:message].has_key?(index) | |
cached = node_cache[:message][index] | |
@index = cached.interval.end if cached | |
return cached | |
end | |
s0, i0 = [], index | |
loop do | |
r1 = _nt_char | |
if r1 | |
s0 << r1 | |
else | |
break | |
end | |
end | |
if s0.empty? | |
self.index = i0 | |
r0 = nil | |
else | |
r0 = instantiate_node(SyntaxNode,input, i0...index, s0) | |
end | |
node_cache[:message][start_index] = r0 | |
return r0 | |
end | |
def _nt_char | |
start_index = index | |
if node_cache[:char].has_key?(index) | |
cached = node_cache[:char][index] | |
@index = cached.interval.end if cached | |
return cached | |
end | |
if input.index(Regexp.new('[\'A-Z0-9& ()-/]'), index) == index | |
r0 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
r0 = nil | |
end | |
node_cache[:char][start_index] = r0 | |
return r0 | |
end | |
module Eol0 | |
end | |
def _nt_eol | |
start_index = index | |
if node_cache[:eol].has_key?(index) | |
cached = node_cache[:eol][index] | |
@index = cached.interval.end if cached | |
return cached | |
end | |
i0 = index | |
i1, s1 = index, [] | |
if input.index("\r", index) == index | |
r2 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure("\r") | |
r2 = nil | |
end | |
s1 << r2 | |
if r2 | |
if input.index("\n", index) == index | |
r4 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure("\n") | |
r4 = nil | |
end | |
if r4 | |
r3 = r4 | |
else | |
r3 = instantiate_node(SyntaxNode,input, index...index) | |
end | |
s1 << r3 | |
end | |
if s1.last | |
r1 = instantiate_node(SyntaxNode,input, i1...index, s1) | |
r1.extend(Eol0) | |
else | |
self.index = i1 | |
r1 = nil | |
end | |
if r1 | |
r0 = r1 | |
else | |
if input.index("\n", index) == index | |
r5 = instantiate_node(SyntaxNode,input, index...(index + 1)) | |
@index += 1 | |
else | |
terminal_parse_failure("\n") | |
r5 = nil | |
end | |
if r5 | |
r0 = r5 | |
else | |
self.index = i0 | |
r0 = nil | |
end | |
end | |
node_cache[:eol][start_index] = r0 | |
return r0 | |
end | |
end | |
class LonclassGrammarParser < Treetop::Runtime::CompiledParser | |
include LonclassGrammar | |
end | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
grammar LonclassGrammar | |
rule lonclass | |
lines | |
end | |
rule lines | |
line+ | |
end | |
rule line | |
number '|' company:company? subjects:subject* ' '+ '|' message eol | |
end | |
rule number | |
[0-9]+ | |
end | |
rule company | |
'[' name:char+ ']' | |
end | |
rule subject | |
extension / (number:number+ extensions:extension* ':'? company:company?) | |
end | |
rule extension | |
location_extension / date_extension / name_extension | |
end | |
rule number | |
subject_class+ | |
end | |
rule subject_class | |
('.' / '-' / '/')* [0-9]+ | |
end | |
rule date_extension | |
'"' date:[. 0-9]+ '"' | |
end | |
rule location_extension | |
'(' location:subject ')' | |
end | |
rule name_extension | |
([A-Z] ([,A-Z] / ' ' [,0-9A-Z])+) | |
end | |
rule message | |
char+ | |
end | |
rule char | |
['A-Z0-9& ()-/] | |
end | |
rule eol | |
("\r" "\n"?) / "\n" | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env spec | |
# via "sudo gem install rspec" | |
require 'lonclass' | |
describe "Lonclass parser" do | |
def process(string) | |
Lonclass::Parser.parse(string).map { |l| l.to_hash } | |
end | |
it "should parse a line with a blank company name" do | |
str = "80276|[ ] |COMPANIES\n" | |
process(str).should == [{ | |
:subjects=>{:right=>nil, :left=>nil}, | |
:message=>"COMPANIES", | |
:number=>"80276", | |
}] | |
end | |
it "should parse a line with a company name" do | |
str = "80277|[A AND W] |A AND W (FAST FOOD COMPANY)\n" | |
process(str).should == [{ | |
:subjects=> { | |
:left => { :number=>nil, :name=>nil, :location=>nil, :date=>nil, :company=>"A AND W"}, | |
:right=>nil, | |
}, | |
:message=>"A AND W (FAST FOOD COMPANY)", | |
:number=>"80277", | |
}] | |
end | |
it "should parse a line that has a number in the company name" do | |
str = "80342|[AIRE O2] |AIRE O2 (COMPANY)\n" | |
process(str).should == [{ | |
:subjects=> { | |
:left => { :number=>nil, :name=>nil, :location=>nil, :date=>nil, :company=>"AIRE O2"}, | |
:right=>nil, | |
}, | |
:message=>"AIRE O2 (COMPANY)", | |
:number=>"80342", | |
}] | |
end | |
it "should parse a line with a company name and a quote in the message" do | |
str = "80279|[A J BRETT] |A' J BRETT (ANTIQUE RESTORERS)\n" | |
process(str).should == [{ | |
:subjects=> { | |
:left => { :number=>nil, :name=>nil, :location=>nil, :date=>nil, :company=>"A J BRETT"}, | |
:right=>nil, | |
}, | |
:message=>"A' J BRETT (ANTIQUE RESTORERS)", | |
:number=>"80279", | |
}] | |
end | |
it "should parse a line with a company name and a subject number" do | |
str = "80285|[ABB TRANSPORTATION].007.004.761 |JOB LOSSES AT ABB TRANSPORTATION\n" | |
process(str).should == [{ | |
:subjects=> { | |
:left => { :number=>"007.004.761", :name=>nil, :location=>nil, :date=>nil, :company=>"ABB TRANSPORTATION"}, | |
:right=>nil | |
}, | |
:message=>"JOB LOSSES AT ABB TRANSPORTATION", | |
:number=>"80285", | |
}] | |
end | |
it "should parse a line with a company name and a subject number with a name extension" do | |
str = "80309|[AD DAF].008.02LEYLAND DAF |LLEYLAND DAF (COMPANY)\n" | |
process(str).should == [{ | |
:subjects=> { | |
:left => { :number=>"008.02", :name=>"LEYLAND DAF", :location=>nil, :date=>nil, :company=>"AD DAF"}, | |
:right=>nil | |
}, | |
:message=>"LLEYLAND DAF (COMPANY)", | |
:number=>"80309", | |
}] | |
end | |
it "should parse a line with a company name and a conjoined subject number" do | |
str = "80720|[BEDFORD].008.24TRUCK.004.6 |DECLINE OF BEDFORD TRUCKS (COMPANY)\n" | |
process(str).should == [{ | |
:subjects=> { | |
:left => { :number=>"008.24", :name=>"TRUCK", :location=>nil, :date=>nil, :company=>"BEDFORD"}, | |
:right => { :number=>"004.6", :name=>nil, :location=>nil, :date=>nil, :company=>nil}, | |
}, | |
:message=>"DECLINE OF BEDFORD TRUCKS (COMPANY)", | |
:number=>"80720", | |
}] | |
end | |
it "should parse a line with a company name and 2 related subject numbers" do | |
str = "80586|[BARLOW CLOWES]658.111:332.6 |BARLOW CLOWES INVESTMENT GROUP\n" | |
process(str).should == [{ | |
:subjects=> { | |
:left => { :number=>"658.111", :name=>nil, :location=>nil, :date=>nil, :company=>"BARLOW CLOWES"}, | |
:right => { :number=>"332.6", :name=>nil, :location=>nil, :date=>nil, :company=>nil}, | |
}, | |
:message=>"BARLOW CLOWES INVESTMENT GROUP", | |
:number=>"80586", | |
}] | |
end | |
it "should parse a line with a company name and 2 related subject numbers" do | |
str = "80316|[ADIDAS]301.161.1:3-058.1:796.007.009.031:796.334.1.007(429) |ADIDAS BOOT MONEY SCANDAL (PAYMENTS TO AMATEUR WELSH RUGBY PLAYERS)\n" | |
process(str).should == [{ | |
:subjects=> { | |
:left => { :number=>"301.161.1", :name=>nil, :location=>nil, :date=>nil, :company=>"ADIDAS"}, | |
:right => { | |
:left => {:number=>"3-058.1", :name=>nil, :location=>nil, :date=>nil, :company=>nil}, | |
:right => { | |
:left => {:number=>"796.007.009.031", :name=>nil, :location=>nil, :date=>nil, :company=>nil}, | |
:right => {:number=>"796.334.1.007", :name=>nil, :location=>"429", :date=>nil, :company=>nil}, | |
} | |
} | |
}, | |
:message=>"ADIDAS BOOT MONEY SCANDAL (PAYMENTS TO AMATEUR WELSH RUGBY PLAYERS)", | |
:number=>"80316", | |
}] | |
end | |
it "should parse a line with a company name and subject number with a date" do | |
str = "80404|[AMERICAN EXPRESS].093\"1991\" |AMERICAN EXPRESS BANK AWARD 1991\n" | |
process(str).should == [{ | |
:subjects=> { | |
:left => { :number=>"093", :name=>nil, :location=>nil, :date=>"1991", :company=>"AMERICAN EXPRESS"}, | |
:right => nil | |
}, | |
:message=>"AMERICAN EXPRESS BANK AWARD 1991", | |
:number=>"80404", | |
}] | |
end | |
it "should parse a line with a company name and subject number with a date" do | |
str = "270789|621.452.002.793.2 |JET ENGINE MUFFLERS\n" | |
process(str).should == [{ | |
:subjects=> { | |
:left => { :number=>"621.452.002.793.2", :name=>nil, :location=>nil, :date=>nil, :company=>nil}, | |
:right => nil | |
}, | |
:message=>"JET ENGINE MUFFLERS", | |
:number=>"270789", | |
}] | |
end | |
it "should parse a line with a slash in the location extension" do | |
str = "270790|321.61(421/425)JAMES II |JAMES II (BRITISH KING)\n" | |
process(str).should == [{ | |
:subjects=> { | |
:left => { :number=>"321.61", :name=>"JAMES II", :location=>"421/425", :date=>nil, :company=>nil}, | |
:right => nil | |
}, | |
:message=>"JAMES II (BRITISH KING)", | |
:number=>"270790", | |
}] | |
end | |
it "should parse a line with a relation after a name extension" do | |
str = "80670|[BBC]654.192.77SUB:654.19:629.195 |BBC SUBSCRIPTION CHANNEL\n" | |
process(str).should == [{ | |
:subjects=> { | |
:left => { :number=>"654.192.77", :name=>"SUB", :location=>nil, :date=>nil, :company=>"BBC"}, | |
:right => { | |
:left => {:number=>"654.19", :name=>nil, :location=>nil, :date=>nil, :company=>nil}, | |
:right => {:number=>"629.195", :name=>nil, :location=>nil, :date=>nil, :company=>nil}, | |
} | |
}, | |
:message=>"BBC SUBSCRIPTION CHANNEL", | |
:number=>"80670", | |
}] | |
end | |
it "should parse a line with a location extension after a name extension" do | |
str = "80681|[BBC]654.193ENGLISH(73) |BBC WORLD SERVICE RADIO BROADCAST TO USA\n" | |
process(str).should == [{ | |
:subjects => { | |
:left => { :number=>"654.193", :name=>"ENGLISH", :location=>"73", :date=>nil, :company=>"BBC"}, | |
:right => nil | |
}, | |
:message=>"BBC WORLD SERVICE RADIO BROADCAST TO USA", | |
:number=>"80681", | |
}] | |
end | |
it "should parse a line with multiple subject numbers" do | |
str = "80809|[BNOC]338.532.31:665.4/.5(261.2) |BNOC CUT PRICE OF NORTH SEA OIL\n" | |
process(str).should == [{ | |
:subjects => { | |
:left => { :number=>"338.532.31", :name=>nil, :location=>nil, :date=>nil, :company=>"BNOC"}, | |
:right => { :number=>"665.4/.5", :name=>nil, :location=>"261.2", :date=>nil, :company=>nil} | |
}, | |
:message=>"BNOC CUT PRICE OF NORTH SEA OIL", | |
:number=>"80809", | |
}] | |
end | |
it "should parse a line with a company number followed by a location" do | |
str = "80862|[BRENT WALKER](047.1) |BRENT WALKER ANNUAL REPORTS\n" | |
process(str).should == [{ | |
:subjects => { | |
:left => { :number=>nil, :name=>nil, :location=>"047.1", :date=>nil, :company=>"BRENT WALKER"}, | |
:right => nil | |
}, | |
:message=>"BRENT WALKER ANNUAL REPORTS", | |
:number=>"80862", | |
}] | |
end | |
it "should parse a line with a company number followed by another company number" # do | |
# str = "80923|[BRITISH LEYLAND].008.01[JAGUAR] |JAGUAR CARS LTD (BL SUB GROUP)\n" | |
# process(str).should == [{ | |
# :subjects => { | |
# :left => { :number=>nil, :name=>nil, :location=>nil, :date=>nil, :company=>"BRITISH LEYLAND"}, | |
# :right => { :number=>"008.01", :name=>nil, :location=>nil, :date=>nil, :company=>"JAGUAR"}, | |
# }, | |
# :message=>"JAGUAR CARS LTD (BL SUB GROUP)", | |
# :number=>"80923", | |
# }] | |
# end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment