Skip to content

Instantly share code, notes, and snippets.

@mjmsmith
Created April 5, 2010 19:14
Show Gist options
  • Save mjmsmith/356737 to your computer and use it in GitHub Desktop.
Save mjmsmith/356737 to your computer and use it in GitHub Desktop.
munge metafilter infodump for easier querying
Download import.rb, user.sql, favorite.sql, and site.sql to a directory.
Download http://stuff.metafilter.com/infodump/infodump-all.zip and unzip to the same directory.
Create a MySQL database.
Edit <USERNAME>, <PASSWORD>, and <DATABASE> in import.rb.
Run import.rb. The MySQL bin directory needs to be on your path.
DROP TABLE IF EXISTS favorite;
CREATE TABLE favorite (
id int(11) NOT NULL AUTO_INCREMENT,
created datetime NOT NULL,
favee_id int(11) NOT NULL,
faver_id int(11) NOT NULL,
post_id int(11) NOT NULL,
comment_id int(11) NOT NULL,
type int(11) NOT NULL,
PRIMARY KEY (id)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
LOAD DATA LOCAL INFILE 'favoritesdata.txt' REPLACE INTO TABLE favorite
LINES TERMINATED BY '\r\n' IGNORE 2 LINES (
id,faver_id,favee_id,type,comment_id,post_id,created
);
DROP TABLE IF EXISTS mefi_favorite;
CREATE TABLE mefi_favorite (
id int(11) NOT NULL AUTO_INCREMENT,
created datetime NOT NULL,
favee_id int(11) NOT NULL,
faver_id int(11) NOT NULL,
post_id int(11) NOT NULL,
comment_id int(11) NOT NULL,
PRIMARY KEY (id),
KEY favee_id (favee_id),
KEY faver_id (faver_id),
KEY post_id (post_id),
KEY comment_id (comment_id)
) ENGINE=MyISAM DEFAULT CHARSET=latin1
AS SELECT created,favee_id,faver_id,post_id,comment_id FROM favorite WHERE type IN (1,2);
UPDATE mefi_favorite SET post_id=comment_id, comment_id=0 WHERE post_id=0;
DROP TABLE IF EXISTS askme_favorite;
CREATE TABLE askme_favorite (
id int(11) NOT NULL AUTO_INCREMENT,
created datetime NOT NULL,
favee_id int(11) NOT NULL,
faver_id int(11) NOT NULL,
post_id int(11) NOT NULL,
comment_id int(11) NOT NULL,
PRIMARY KEY (id),
KEY favee_id (favee_id),
KEY faver_id (faver_id),
KEY post_id (post_id),
KEY comment_id (comment_id)
) ENGINE=MyISAM DEFAULT CHARSET=latin1
AS SELECT created,favee_id,faver_id,post_id,comment_id FROM favorite WHERE type IN (3,4);
UPDATE askme_favorite SET post_id=comment_id, comment_id=0 WHERE post_id=0;
DROP TABLE IF EXISTS meta_favorite;
CREATE TABLE meta_favorite (
id int(11) NOT NULL AUTO_INCREMENT,
created datetime NOT NULL,
favee_id int(11) NOT NULL,
faver_id int(11) NOT NULL,
post_id int(11) NOT NULL,
comment_id int(11) NOT NULL,
PRIMARY KEY (id),
KEY favee_id (favee_id),
KEY faver_id (faver_id),
KEY post_id (post_id),
KEY comment_id (comment_id)
) ENGINE=MyISAM DEFAULT CHARSET=latin1
AS SELECT created,favee_id,faver_id,post_id,comment_id FROM favorite WHERE type IN (5,6);
UPDATE meta_favorite SET post_id=comment_id, comment_id=0 WHERE post_id=0;
DROP TABLE IF EXISTS music_favorite;
CREATE TABLE music_favorite (
id int(11) NOT NULL AUTO_INCREMENT,
created datetime NOT NULL,
favee_id int(11) NOT NULL,
faver_id int(11) NOT NULL,
post_id int(11) NOT NULL,
comment_id int(11) NOT NULL,
PRIMARY KEY (id),
KEY favee_id (favee_id),
KEY faver_id (faver_id),
KEY post_id (post_id),
KEY comment_id (comment_id)
) ENGINE=MyISAM DEFAULT CHARSET=latin1
AS SELECT created,favee_id,faver_id,post_id,comment_id FROM favorite WHERE type IN (8,9);
UPDATE music_favorite SET post_id=comment_id, comment_id=0 WHERE post_id=0;
DROP TABLE favorite;
#! /usr/bin/env ruby
def exec_sql filename
`mysql -u<USERNAME> -p<PASSWORD> <DATABASE> < #{filename}`
end
puts 'importing user'
exec_sql 'user.sql'
puts 'importing favorite'
exec_sql 'favorite.sql'
sites = ['mefi', 'askme', 'meta', 'music']
sites.each do |site|
puts "importing #{site}"
sql = File.new('site.sql').read.gsub('${site}', site)
File.open('tmp.sql', 'w') {|f| f.write sql}
exec_sql 'tmp.sql'
end
File.delete('tmp.sql')
DROP TABLE IF EXISTS ${site}_post;
CREATE TABLE ${site}_post (
id int(11) NOT NULL AUTO_INCREMENT,
post_id int(11) NOT NULL,
user_id int(11) NOT NULL,
created datetime NOT NULL,
category int(11) NOT NULL,
comments_count int(11) NOT NULL,
favorites_count int(11) NOT NULL,
deleted int(11) NOT NULL,
reason varchar(255) NOT NULL,
title varchar(255) NOT NULL,
PRIMARY KEY (id),
KEY user_id (user_id),
KEY post_id (post_id)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
LOAD DATA LOCAL INFILE 'postdata_${site}.txt' REPLACE INTO TABLE ${site}_post
LINES TERMINATED BY '\r\n' IGNORE 2 LINES (
post_id,user_id,created,category,comments_count,favorites_count,deleted,reason
);
DROP TABLE IF EXISTS title;
CREATE TABLE title (
id int(11) NOT NULL AUTO_INCREMENT,
post_id int(11) NOT NULL,
title varchar(255) NOT NULL,
PRIMARY KEY (id)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
LOAD DATA LOCAL INFILE 'posttitles_${site}.txt' REPLACE INTO TABLE title
LINES TERMINATED BY '\r\n' IGNORE 2 LINES (
post_id,title
);
UPDATE ${site}_post p, title t SET p.title = t.title WHERE p.post_id = t.post_id;
UPDATE ${site}_post SET title = CONCAT('(',post_id,')') WHERE title = '';
UPDATE ${site}_post SET reason = '' WHERE reason = '[NULL]';
DROP TABLE title;
DROP TABLE IF EXISTS ${site}_comment;
CREATE TABLE ${site}_comment (
id int(11) NOT NULL AUTO_INCREMENT,
best int(11) NOT NULL,
comment_id int(11) NOT NULL,
created datetime NOT NULL,
post_id int(11) NOT NULL,
user_id int(11) NOT NULL,
favorites_count int(11) NOT NULL,
length int(11) NOT NULL,
PRIMARY KEY (id),
KEY comment_id (comment_id),
KEY post_id (post_id),
KEY user_id (user_id)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
LOAD DATA LOCAL INFILE 'commentdata_${site}.txt' REPLACE INTO TABLE ${site}_comment
LINES TERMINATED BY '\r\n' IGNORE 2 LINES (
comment_id,post_id,user_id,created,favorites_count,best
);
DROP TABLE IF EXISTS length;
CREATE TABLE length (
id int(11) NOT NULL AUTO_INCREMENT,
comment_id int(11) NOT NULL,
length int(11) NOT NULL,
PRIMARY KEY (id)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
LOAD DATA LOCAL INFILE 'commentlength_${site}.txt' REPLACE INTO TABLE length
LINES TERMINATED BY '\r\n' IGNORE 2 LINES (
comment_id,length
);
UPDATE ${site}_comment c, length l SET c.length = l.length WHERE c.comment_id = l.comment_id;
DROP TABLE length;
DROP TABLE IF EXISTS ${site}_tag;
CREATE TABLE ${site}_tag (
id int(11) NOT NULL AUTO_INCREMENT,
post_id int(11) NOT NULL,
created datetime NOT NULL,
name varchar(255) NOT NULL,
PRIMARY KEY (id),
KEY post_id (post_id),
KEY name (name)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
LOAD DATA LOCAL INFILE 'tagdata_${site}.txt' REPLACE INTO TABLE ${site}_tag
LINES TERMINATED BY '\r\n' IGNORE 2 LINES (
id,post_id,created,name
);
DROP TABLE IF EXISTS user;
CREATE TABLE user (
id int(11) NOT NULL AUTO_INCREMENT,
joined datetime NOT NULL,
name varchar(255) NOT NULL,
PRIMARY KEY (id),
KEY name (name)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
LOAD DATA LOCAL INFILE 'usernames.txt' REPLACE INTO TABLE user
LINES TERMINATED BY '\r\n' IGNORE 2 LINES (
id,joined,name
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment