ibanez270dx/shkip.rb

## shkip.rb
require 'io/console'
require 'optparse'
require 'ostruct'
require 'fileutils'

NAME = "Safari Keyword History Index Parser"
VERSION = "v0.0.1"
TIME = Time.now

def box_me_up(str)
  width = (STDOUT.winsize[-1].to_i-2).times.collect{'═'}.join
  # width = (str.length+2).times.collect{'═'}.join
  str = str.center(STDOUT.winsize[-1]-4,' ')
  boxed = "╔#{width}╗\n║ #{str} ║\n╚#{width}╝\n"
end

def show_error_and_exit
  puts @option_parser.banner
  puts "  #{$!}\n    use --help for more information\n\n"
  exit 1
end

################################################################################
# Command Line Options
################################################################################

# We set default values here.
options = OpenStruct.new
  options.name = "HistoryIndex"
  options.path = "/Users/#{ENV['USER']}/Library/Safari/"
  options.time = "#{TIME.strftime("%Y%m%d%H%M%S")}"
  options.line = STDOUT.winsize[-1].times.collect{'─'}.join
  options.stdo = true

 options.input = "#{options.name}.sk"
options.output = "#{options.name}-#{options.time}.txt"
options.backup = "#{options.name}-#{options.time}.backup"
  options.dump = "#{options.name}-#{options.time}.dump"

# Start parsing those options
@option_parser = OptionParser.new do |opts|
  opts.program_name = NAME
  opts.version = VERSION
  opts.banner = "#{box_me_up("#{NAME} #{VERSION}")}
    Usage: ruby skhip.rb [options]\n\n"

  opts.separator "  Specific options:"
  opts.on "-i", "--input FILE",
    "Path to HistoryIndex.sk" \
  do |input|
    options.input  = input
    options.name   = input[/(?<=\/)[\w]+(?=\.)/]
    options.output = "#{options.name}-#{options.time}.txt"
    File.open(input)
  end

  opts.separator ""
  opts.on "-o", "--output FILE",
    "Relative output location" \
  do |output|
    options.output = output
  end

  opts.separator ""
  opts.on "-d", "--use-dump [FILE]",
    "Skip dumping process by specifying an existing dump file.",
    "Leave blank to use default path." \
  do |dump|
    options.dump = dump
    File.open(dump) if dump
  end

  opts.separator ""
  opts.on_tail("-h", "--help", "What you're looking at :P") do
    puts opts
    exit
  end

  opts.on_tail("--version", "Show version") do
    puts opts.program_name
    puts opts.version
    exit
  end
end

begin
  @option_parser.parse!
  raise OptionParser::ParseError.
  new("arguments provided without switches!") \
  unless ARGV.empty?
rescue show_error_and_exit
end

################################################################################
# Setup
################################################################################

# Make a copy of the HistoryIndex.sk file
FileUtils.cp options.input, "#{options.backup}" \
rescue show_error_and_exit

# Dump it to ASCII chars
`xxd -b -c 10 #{options.input}.backup >> #{options.dump}` \
rescue show_error_and_exit

# Read it in all the fragments
IO.foreach(options.dump) do |input|
  (@acc || @acc=[]) << input.split(' ').last \
  rescue show_error_and_exit
end

# Make it one big ass string
dumped = @acc.join

################################################################################
# Parsing
################################################################################

parser = []  # collect regexp's and their corresponding replacements

# Mark as a line thingy
parser << { regexp: /(?<=[^\.])\.{256,}(?=[^\.|IA])/, replacement: '...[SKHIP-PARSER-SEGMENT]...' }

# looks like there's some code in there. It's the only other place other than
# the the URLs that have single dots. Here, we'll add an extra dot and parse it
# along with the rest 'o that crap.
parser << { regexp: /~(.)+big/, replacement: '*' }

# remove dots between single letters by using regex lookaheads
parser << { regexp: /(?<=[^\.])\.{1}(?=[^\.])/, replacement: '' }

# Execute!
parser.each do |r|
  # puts r.inspect
  dumped.gsub!(r[:regexp], r[:replacement])
end

################################################################################
# Tokenize that shit
################################################################################
current_index = 0
last_char = ""
@token  = ""
@tokens = []
@rejected = []

dumped.split('').each do |char|
  char.strip!

  if (char == "." && last_char != ".")
    # end of a word, add to array
    if @token.length > 1
      @tokens << case @token
      when "http" then "http://"
      else @token
      end
    else
      @rejected << @token
    end
    @token = ""
  end

  if char =~ /[\w|\-|\+|&|\=|\?]/
    @token << char
  elsif char != "."
    @rejected << char
  end
  last_char = char
end

################################################################################
# Iterate through tokens to create URL's and newlines
################################################################################

@words = []
last_token = ''
is_url = false
tmp = []

@tokens.each do |token|

  if token=~/^http/ && is_url
    @words << tmp
    tmp = []
  elsif token=="SKHIP-PARSER-SEGMENT" && is_url
    @words << tmp
    tmp = []
    is_url = false
  elsif token == "http://"
    is_url = true
  end

  if is_url
    tmp << token
  elsif token=="SKHIP-PARSER-SEGMENT"
    @words << options.line
  elsif token=~/IADefault/
    @words << box_me_up(token)
  else
    @words << token
  end

  last_token = token
end

################################################################################
# Print it out
################################################################################

@words.each do |word|
  puts word.is_a?(Array) ? word.join('.').sub('.','') : word
end

puts "\n\nFinished! Parsing took #{(Time.now - TIME).round(2)} seconds\n\n"

## skhip-original.rb
require 'io/console'
require 'optparse'
require 'ostruct'
require 'fileutils'

NAME = "Safari Keyword History Index Parser"
VERSION = "v0.0.1"
TIME = Time.now

def box_me_up(str)
  width = (STDOUT.winsize[-1].to_i-2).times.collect{'═'}.join
  # width = (str.length+2).times.collect{'═'}.join
  str = str.center(STDOUT.winsize[-1]-4,' ')
  boxed = "╔#{width}╗\n║ #{str} ║\n╚#{width}╝\n"
end

def show_error_and_exit
  puts @option_parser.banner
  puts "  #{$!}\n    use --help for more information\n\n"
  exit 1
end

################################################################################
# Command Line Options
################################################################################

# We set default values here.
options = OpenStruct.new
  options.name = "HistoryIndex"
  options.path = "/Users/#{ENV['USER']}/Library/Safari/"
  options.time = "#{TIME.strftime("%Y%m%d%H%M%S")}"
  options.line = STDOUT.winsize[-1].times.collect{'─'}.join
  options.stdo = true

 options.input = "#{options.name}.sk"
options.output = "#{options.name}-#{options.time}.txt"
options.backup = "#{options.name}-#{options.time}.backup"
  options.dump = "#{options.name}-#{options.time}.dump"

# Start parsing those options
@option_parser = OptionParser.new do |opts|
  opts.program_name = NAME
  opts.version = VERSION
  opts.banner = "#{box_me_up("#{NAME} #{VERSION}")}
    Usage: ruby skhip.rb [options]\n\n"

  opts.separator "  Specific options:"
  opts.on "-i", "--input FILE",
    "Path to HistoryIndex.sk" \
  do |input|
    options.input  = input
    options.name   = input[/(?<=\/)[\w]+(?=\.)/]
    options.output = "#{options.name}-#{options.time}.txt"
    File.open(input)
  end

  opts.separator ""
  opts.on "-o", "--output FILE",
    "Relative output location" \
  do |output|
    options.output = output
  end

  opts.separator ""
  opts.on "-d", "--use-dump [FILE]",
    "Skip dumping process by specifying an existing dump file.",
    "Leave blank to use default path." \
  do |dump|
    options.dump = dump
    File.open(dump) if dump
  end

  opts.separator ""
  opts.on_tail("-h", "--help", "What you're looking at :P") do
    puts opts
    exit
  end

  opts.on_tail("--version", "Show version") do
    puts opts.program_name
    puts opts.version
    exit
  end
end

begin
  @option_parser.parse!
  raise OptionParser::ParseError.
  new("arguments provided without switches!") \
  unless ARGV.empty?
rescue show_error_and_exit
end

################################################################################
# Setup
################################################################################

# Make a copy of the HistoryIndex.sk file
FileUtils.cp options.input, "#{options.backup}" \
rescue show_error_and_exit

# Dump it to ASCII chars
`xxd -b -c 10 #{options.input}.backup >> #{options.dump}` \
rescue show_error_and_exit

# Read it in all the fragments
IO.foreach(options.dump) do |input|
  (@acc || @acc=[]) << input.split(' ').last \
  rescue show_error_and_exit
end

# Make it one big ass string
dumped = @acc.join

################################################################################
# Parsing
################################################################################

parser = []  # collect regexp's and their corresponding replacements

# Make a big o' line breaky thing if there's a lot of dots
parser << { regexp: /(?<=[^\.])\.{256,}(?=[^\.|IA])/, replacement: '...[SKHIP-PARSER-SEGMENT]...' }

# replace sets of 3 dots with a single dot
# parser << { regexp: /(?<=[^\.])\.{3}(?=[^\.])/, replacement: ',' }

# remove single non-word characters (between two dots)
# parser << { regexp: /(?<=\.)[](?=\.)/, replacement: '..' }

# looks like there's some code in there. It's the only other place other than
# the the URLs that have single dots. Here, we'll add an extra dot and parse it
# along with the rest 'o that crap.
parser << { regexp: /~(.)+big/, replacement: '*' }

# remove dots between single letters by using regex lookaheads
parser << { regexp: /(?<=[^\.])\.{1}(?=[^\.])/, replacement: '' }

# remove single "stand-alone" characters
# parser << { regexp: /(?:\.{2,}|\n)[^-\.]{1,2}(?=\.{2,}|\n)/, replacement: '' }

# gonna assume that URL's HTTP part needs some slashes
# parser << { regexp: /http\.\.(?=[\w])/, replacement: 'http://' }

# put a newline before each URL
# parser << { regexp: /(\/?\.{1,})(?=https?)/, replacement: " " }

# Make a big o' line breaky thing if there's a lot of dots
# parser << { regexp: /(?<=[^\.])\.{256,}(?=[^\.|IA])/, replacement: options.line }

# clean up the ends of the URLs
# parser << { regexp: /\.{2,}(\w|{|}|\\|\d|;)+\n/, replacement: "\n" }


# replace all dot sequences longer than one with a comma
# parser << { regexp: /\.{2,}/, replacement: "," }

# add a newline after "html"... just makes things easier :P
# parser << { regexp: /(?<=html)()[^\n]/, replacement: "\n" }

# surround the headers with a border
# parser << { regexp: /()(?=IA\w+)/, replacement: options.line }
# parser << { regexp: /(?:IADefault)(?:I\w+|T\w+)(\n)/, replacement: options.line }

# Put line breaks in the remaining word blocks
# parser << { regexp: //, replacement: "\n" }

# Execute!
parser.each do |r|
  # puts r.inspect
  dumped.gsub!(r[:regexp], r[:replacement])
end

puts dumped

################################################################################
# Tokenize that shit
################################################################################
current_index = 0
last_char = ""
@token  = ""
@tokens = []
@rejected = []

dumped.split('').each do |char|
  char.strip!

  if (char == "." && last_char != ".")
    # end of a word, add to array
    if @token.length > 1
      @tokens << case @token
      when "http" then "http://"
      else @token
      end
    else
      @rejected << @token
    end
    @token = ""
  end

  if char =~ /[\w|\-|\+|&|\=|\?]/
    @token << char
  elsif char != "."
    @rejected << char
  end
  last_char = char
end


puts @tokens.inspect
# puts @rejected.inspect

################################################################################
# Iterate through tokens to create URL's and newlines
################################################################################

@words = []
last_token = ''
is_url = false
tmp = []

@tokens.each do |token|

  if token=~/^http/ && is_url
    @words << tmp
    tmp = []
  elsif token=="SKHIP-PARSER-SEGMENT" && is_url
    @words << tmp
    tmp = []
    is_url = false
  elsif token == "http://"
    is_url = true
  end

  if is_url
    tmp << token
  elsif token=="SKHIP-PARSER-SEGMENT"
    @words << options.line
  elsif token=~/IADefault/
    @words << box_me_up(token)
  else
    @words << token
  end

  last_token = token
end

puts @words.inspect

@words.each do |word|
  w = word.is_a?(Array) ? word.join('.').sub('.','') : word
  puts w
end

################################################################################
# Filter Wierd Artifacts
################################################################################
# artifacts = []


# collect regexp's and their corresponding replacements
# artifacts << { regexp: /\.\=\=/, replacement: '' }
# artifacts << { regexp: /http0/, replacement: 'http:' }

# artifacts << /http\n.+\n/
# artifacts << /z\.{+\n/
# artifacts << /E\.F\.\w/
# artifacts << /Bud2/
# artifacts << /.?\.["|-]/

# Execute!
# artifacts.each do |artifact|
  # regexp = Regexp.new artifact
  # words.match(regexp).to_a.each do |match|
    # (@removals || @removals=[]) << match.to_s
  # end
  # words.gsub! regexp, ''
# end

# Remove double spaces
# words.gsub!(/\n{2,}/,"\n")
#
# artifacts.each do |a|
#   dumped.gsub!(r[:regexp], r[:replacement])
# end

# puts words

# puts box_me_up('Artifact Removals:')
# @removals.each { |x| puts x }

# output = File.open("history_index_output.txt", 'w+')

# words.each do |word|
#   output.puts word
# end
#
# output.close

puts "\n\nFinished! Parsing took #{(Time.now - TIME).round(2)} seconds\n\n"
	require 'io/console'
	require 'optparse'
	require 'ostruct'
	require 'fileutils'

	NAME = "Safari Keyword History Index Parser"
	VERSION = "v0.0.1"
	TIME = Time.now

	def box_me_up(str)
	width = (STDOUT.winsize[-1].to_i-2).times.collect{'═'}.join
	# width = (str.length+2).times.collect{'═'}.join
	str = str.center(STDOUT.winsize[-1]-4,' ')
	boxed = "╔#{width}╗\n║ #{str} ║\n╚#{width}╝\n"
	end

	def show_error_and_exit
	puts @option_parser.banner
	puts " #{$!}\n use --help for more information\n\n"
	exit 1
	end

	################################################################################
	# Command Line Options
	################################################################################

	# We set default values here.
	options = OpenStruct.new
	options.name = "HistoryIndex"
	options.path = "/Users/#{ENV['USER']}/Library/Safari/"
	options.time = "#{TIME.strftime("%Y%m%d%H%M%S")}"
	options.line = STDOUT.winsize[-1].times.collect{'─'}.join
	options.stdo = true

	options.input = "#{options.name}.sk"
	options.output = "#{options.name}-#{options.time}.txt"
	options.backup = "#{options.name}-#{options.time}.backup"
	options.dump = "#{options.name}-#{options.time}.dump"

	# Start parsing those options
	@option_parser = OptionParser.new do \|opts\|
	opts.program_name = NAME
	opts.version = VERSION
	opts.banner = "#{box_me_up("#{NAME} #{VERSION}")}
	Usage: ruby skhip.rb [options]\n\n"

	opts.separator " Specific options:"
	opts.on "-i", "--input FILE",
	"Path to HistoryIndex.sk" \
	do \|input\|
	options.input = input
	options.name = input[/(?<=\/)[\w]+(?=\.)/]
	options.output = "#{options.name}-#{options.time}.txt"
	File.open(input)
	end

	opts.separator ""
	opts.on "-o", "--output FILE",
	"Relative output location" \
	do \|output\|
	options.output = output
	end

	opts.separator ""
	opts.on "-d", "--use-dump [FILE]",
	"Skip dumping process by specifying an existing dump file.",
	"Leave blank to use default path." \
	do \|dump\|
	options.dump = dump
	File.open(dump) if dump
	end

	opts.separator ""
	opts.on_tail("-h", "--help", "What you're looking at :P") do
	puts opts
	exit
	end

	opts.on_tail("--version", "Show version") do
	puts opts.program_name
	puts opts.version
	exit
	end
	end

	begin
	@option_parser.parse!
	raise OptionParser::ParseError.
	new("arguments provided without switches!") \
	unless ARGV.empty?
	rescue show_error_and_exit
	end

	################################################################################
	# Setup
	################################################################################

	# Make a copy of the HistoryIndex.sk file
	FileUtils.cp options.input, "#{options.backup}" \
	rescue show_error_and_exit

	# Dump it to ASCII chars
	`xxd -b -c 10 #{options.input}.backup >> #{options.dump}` \
	rescue show_error_and_exit

	# Read it in all the fragments
	IO.foreach(options.dump) do \|input\|
	(@acc \|\| @acc=[]) << input.split(' ').last \
	rescue show_error_and_exit
	end

	# Make it one big ass string
	dumped = @acc.join

	################################################################################
	# Parsing
	################################################################################

	parser = [] # collect regexp's and their corresponding replacements

	# Mark as a line thingy
	parser << { regexp: /(?<=[^\.])\.{256,}(?=[^\.\|IA])/, replacement: '...[SKHIP-PARSER-SEGMENT]...' }

	# looks like there's some code in there. It's the only other place other than
	# the the URLs that have single dots. Here, we'll add an extra dot and parse it
	# along with the rest 'o that crap.
	parser << { regexp: /~(.)+big/, replacement: '*' }

	# remove dots between single letters by using regex lookaheads
	parser << { regexp: /(?<=[^\.])\.{1}(?=[^\.])/, replacement: '' }

	# Execute!
	parser.each do \|r\|
	# puts r.inspect
	dumped.gsub!(r[:regexp], r[:replacement])
	end

	################################################################################
	# Tokenize that shit
	################################################################################
	current_index = 0
	last_char = ""
	@token = ""
	@tokens = []
	@rejected = []

	dumped.split('').each do \|char\|
	char.strip!

	if (char == "." && last_char != ".")
	# end of a word, add to array
	if @token.length > 1
	@tokens << case @token
	when "http" then "http://"
	else @token
	end
	else
	@rejected << @token
	end
	@token = ""
	end

	if char =~ /[\w\|\-\|\+\|&\|\=\|\?]/
	@token << char
	elsif char != "."
	@rejected << char
	end
	last_char = char
	end

	################################################################################
	# Iterate through tokens to create URL's and newlines
	################################################################################

	@words = []
	last_token = ''
	is_url = false
	tmp = []

	@tokens.each do \|token\|

	if token=~/^http/ && is_url
	@words << tmp
	tmp = []
	elsif token=="SKHIP-PARSER-SEGMENT" && is_url
	@words << tmp
	tmp = []
	is_url = false
	elsif token == "http://"
	is_url = true
	end

	if is_url
	tmp << token
	elsif token=="SKHIP-PARSER-SEGMENT"
	@words << options.line
	elsif token=~/IADefault/
	@words << box_me_up(token)
	else
	@words << token
	end

	last_token = token
	end

	################################################################################
	# Print it out
	################################################################################

	@words.each do \|word\|
	puts word.is_a?(Array) ? word.join('.').sub('.','') : word
	end

	puts "\n\nFinished! Parsing took #{(Time.now - TIME).round(2)} seconds\n\n"