Skip to content

Instantly share code, notes, and snippets.

@psobot
Created January 16, 2012 01:43
Show Gist options
  • Save psobot/1618528 to your computer and use it in GitHub Desktop.
Save psobot/1618528 to your computer and use it in GitHub Desktop.
Jobmine MySQL Dumper
# Jobmine Data Parser v0.1
# Peter Sobot, January 15, 2012
#
# Takes in a Jobmine search result dump (i.e.: "ps.xls" which is actually HTML)
# puts it all into a DB for easy self-searching and fun
#
# Assumed table system:
#
# CREATE TABLE `jobs` (
# `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
# `job_id` int(11) DEFAULT NULL,
# `title` varchar(48) DEFAULT NULL,
# `employer` varchar(48) DEFAULT NULL,
# `unit` varchar(48) DEFAULT NULL,
# `location` varchar(48) DEFAULT NULL,
# `openings` int(11) DEFAULT NULL,
# `deadline` datetime DEFAULT NULL,
# PRIMARY KEY (`id`),
# KEY `employer` (`employer`),
# KEY `location` (`location`)
# ) ENGINE=InnoDB AUTO_INCREMENT=0 DEFAULT CHARSET=utf8;
#
#
#
require 'Nokogiri'
require 'active_record'
if ARGV.first
ActiveRecord::Base.establish_connection(
:adapter => "mysql",
:database => "jobmine",
:username => 'root',
:password => '',
:host => 'localhost'
)
class Job < ActiveRecord::Base
end
data = Nokogiri::HTML File.open(ARGV.first).read
data.css('tr').each_with_index do |row, i|
next if i == 0
Job.create :job_id => row.children[0].content,
:title => row.children[2].content,
:employer => row.children[4].content,
:unit => row.children[6].content,
:location => row.children[8].content,
:openings => row.children[10].content,
:deadline => row.children[18].content
end
else
puts "Hey, filename, please?"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment