Skip to content

Instantly share code, notes, and snippets.

@eraserhd
Forked from kognate/gist:994423
Created May 27, 2011 00:50
Show Gist options
  • Save eraserhd/994438 to your computer and use it in GitHub Desktop.
Save eraserhd/994438 to your computer and use it in GitHub Desktop.
tumbledry
require 'yaml'
class Tumbledry
attr_accessor :patterns
def initialize
File.open('repetative.txt', 'r') do |f|
@data = f.read()
end
self.patterns = {}
@numpat = 1
end
def patternize
i = 0
last_match = 1
while (i < @data.length)
j = @data.length
dat = @data.scan(/#{@data[i..j]}/)
if (dat[0].length > 1 && dat.length > last_match)
last_match = dat.length
j+=1
patterns[dat[0]] = @numpat.chr
@numpat+=1
puts dat[0]
else
last_match = 1
j = i + 1
i+= 1
end
end
end
def consume
begin
self.patterns = {}
patternize
self.patterns.each {|k,v|
@data.gsub!(/#{k}/,v)
}
end while self.patterns.keys.length > 0
puts @data
end
end
t = Tumbledry.new()
t.consume
require 'pp'
class TumbleDRYer
attr_reader :input
def initialize(input)
@input = input
end
def encode_string s
sio = StringIO.new("",'w')
PP.singleline_pp(s,sio)
sio.string
end
def words
input.strip.split(/\s+/)
end
def suffix_array
result = []
w = words
until w.empty?
result << w.clone
w.shift
end
result.sort
end
def longest_substring
sa = suffix_array
longest_length = 0
the_match = []
(sa.size-1).times do |i|
mv = [sa[i].size, sa[i+1].size].min
len = 0
while len < mv && sa[i][len] == sa[i+1][len]
len += 1
end
if len > longest_length
longest_length = len
the_match = sa[i][0...len]
end
end
the_match
end
def output
s = ""
s << "class Decoder\n"
s << " def to_s\n"
s << " #{encode_string(input)}\n"
s << " end\n"
s << "end\n"
s << "Decoder.new.to_s\n"
end
end
describe TumbleDRYer do
let(:input) { <<-EOS
CREATE TABLE `categories` (
`id` int(11) NOT NULL auto_increment,
`name` varchar(20) NOT NULL default '',
`description` varchar(70) NOT NULL default '',
PRIMARY KEY (`id`)
) TYPE=MyISAM AUTO_INCREMENT=3 ;
EOS
}
it "should be creatable" do
lambda {TumbleDRYer.new("")}.should_not raise_error
end
it "should accept input" do
TumbleDRYer.new("hello, world!").input.should == "hello, world!"
end
describe 'its suffix array' do
let(:words) { TumbleDRYer.new(input).words }
subject { TumbleDRYer.new(input).suffix_array }
it "should be an array" do
subject.should be_kind_of(Array)
end
it "should not contain any empty strings" do
subject.each do |arr|
arr.each do |w|
w.should_not == ""
end
end
end
end
describe 'its longest substring' do
subject { TumbleDRYer.new(input).longest_substring }
it "should be what we expect" do
subject.should == ["NOT", "NULL", "default", "'',"]
end
end
describe 'its output' do
let(:input){ "Hello, World!" }
subject { TumbleDRYer.new(input).output }
it 'should be a string' do
subject.should be_kind_of(String)
end
it 'should produce the original input when run' do
eval(subject).should == input
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment