Skip to content

Instantly share code, notes, and snippets.

@justMaku justMaku/stitcher.rb
Created Dec 31, 2018

Embed
What would you like to do?
require 'net/http'
require 'uri'
require 'json'
require 'cgi'
def lcs(a, b)
lengths = Array.new(a.length) { Array.new(b.length, 0) }
greatest_length = 0
output = ''
a.each_char.with_index do |x, i|
b.each_char.with_index do |y, j|
next if x != y
lengths[i][j] = i.zero? || j.zero? ? 1 : lengths[i - 1][j - 1] + 1
if lengths[i][j] > greatest_length
greatest_length = lengths[i][j]
output = a[i - greatest_length + 1, greatest_length]
end
end
end
output
end
def stitch(current, lines)
unused = []
for line in lines
next if line == current
lcs = lcs(line, current)
if lcs.length < line.length / 5
unused << line
next
end
slices = line.split(lcs)
current = slices[0] + current unless slices[0].nil?
current += slices[1] unless slices[1].nil?
end
[current] + unused
end
def page(stitched)
last_stitched = 0
no_progress = 0
loop do
stitched = stitch(stitched[0], stitched)
no_progress += 1 if stitched.length == last_stitched
last_stitched = stitched.length
break if no_progress > 5
end
stitched
end
fragments_file = File.read('full.json')
fragments = JSON.parse(fragments_file)
@pages = {}
@threads = []
fragments.each_pair do |k, v|
text = page(v)[0]
@pages[k] = text
puts "#{@pages.length}/#{fragments.length}"
end
File.open('pages.txt', 'w') do |f|
sorted = Hash[@pages.sort_by { |k, v| Integer(k) }]
sorted.each_pair do |k, v|
f.puts "Page #{k}\n"
f.puts v
f.puts "\n"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.