Instantly share code, notes, and snippets.

Embed
What would you like to do?
require 'net/http'
require 'uri'
require 'json'
require 'cgi'
def lcs(a, b)
lengths = Array.new(a.length) { Array.new(b.length, 0) }
greatest_length = 0
output = ''
a.each_char.with_index do |x, i|
b.each_char.with_index do |y, j|
next if x != y
lengths[i][j] = i.zero? || j.zero? ? 1 : lengths[i - 1][j - 1] + 1
if lengths[i][j] > greatest_length
greatest_length = lengths[i][j]
output = a[i - greatest_length + 1, greatest_length]
end
end
end
output
end
def stitch(current, lines)
unused = []
for line in lines
next if line == current
lcs = lcs(line, current)
if lcs.length < line.length / 5
unused << line
next
end
slices = line.split(lcs)
current = slices[0] + current unless slices[0].nil?
current += slices[1] unless slices[1].nil?
end
[current] + unused
end
def page(stitched)
last_stitched = 0
no_progress = 0
loop do
stitched = stitch(stitched[0], stitched)
no_progress += 1 if stitched.length == last_stitched
last_stitched = stitched.length
break if no_progress > 5
end
stitched
end
fragments_file = File.read('full.json')
fragments = JSON.parse(fragments_file)
@pages = {}
@threads = []
fragments.each_pair do |k, v|
text = page(v)[0]
@pages[k] = text
puts "#{@pages.length}/#{fragments.length}"
end
File.open('pages.txt', 'w') do |f|
sorted = Hash[@pages.sort_by { |k, v| Integer(k) }]
sorted.each_pair do |k, v|
f.puts "Page #{k}\n"
f.puts v
f.puts "\n"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment