Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@carlzulauf
Forked from Grundell/gist:7526597
Last active December 28, 2015 16:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save carlzulauf/7526708 to your computer and use it in GitHub Desktop.
Save carlzulauf/7526708 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require 'net/http'
require 'pp'
require 'benchmark'
Pleading = Struct.new(:no, :position ,:person, :party, :replik, :content, :point)
Point = Struct.new(:no, :title)
class DocumentParsing
POINT_REGEX = /^(?<no>\d+)\s+\§\s+(?<title>.+?)\s?$/
PLEADING_REGEX = /^Anf\.\s+(?<no>\d+)(?<position>\s+[A-Z][a-z]+?)?\s+(?<person>.+\s.+?)\s+\((?<party>.+)\)(?<replik>\sreplik)?:$/
attr_reader :points, :pleadings
def initialize(text)
@text = text
end
def reset
@points = []
@pleadings = []
end
def current_point
@points.last
end
def current_pleading
current_pleading = @pleadings.last
if current_pleading && current_pleading.point == current_point
current_pleading
end
end
def call
reset
@text.lines.each_with_object([]) do |line, points|
if match_data = line.match(POINT_REGEX)
point = Point.new(match_data[:no], match_data[:title])
@points.push(point)
elsif match_data = line.match(PLEADING_REGEX)
pleading = Pleading.new(
match_data[:no],
match_data[:position],
match_data[:person],
match_data[:party],
match_data[:replik],
"",
current_point
)
@pleadings.push(pleading)
elsif current_pleading
current_pleading.content << line
end
end
end
end
host = "data.riksdagen.se"
endpoint = "/dokument/H00911/text"
response = Net::HTTP.get(host, endpoint).force_encoding("UTF-8")
parsing = DocumentParsing.new(response)
parsing.call
puts "--POINTS----------------------------------------"
pp parsing.points
puts "--PLEADINGS-------------------------------------"
pp parsing.pleadings
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment