Skip to content

Instantly share code, notes, and snippets.

@erwinvaneyk
Last active August 29, 2015 14:10
Show Gist options
  • Save erwinvaneyk/49df43cf277028430b13 to your computer and use it in GitHub Desktop.
Save erwinvaneyk/49df43cf277028430b13 to your computer and use it in GitHub Desktop.
A SQL-file for creating the table structure and importing the datadump from the StackOverflow-datadump
-- Creates database structure for StackOverflow-dump
-- required:
-- - http://stackoverflow.com/questions/10762239/mysql-enable-load-data-local-infile
DROP DATABASE stackoverflow;
CREATE DATABASE stackoverflow;
USE stackoverflow;
CREATE TABLE Tags (
Id INT NOT NULL PRIMARY KEY,
TagName VARCHAR(30) NULL,
Count INT NULL,
ExcerptPostId INT NULL,
WikiPostId INT NULL
);
CREATE TABLE Badges (
Id INT NOT NULL PRIMARY KEY,
UserId INT NULL,
Name VARCHAR(50) NULL,
`Date` DATETIME NULL
);
CREATE TABLE Comments (
Id INT NOT NULL PRIMARY KEY,
PostId INT NULL,
Score INT NULL,
`Text` Text NULL,
CreationDate DATETIME NULL,
UserDisplayName VARCHAR(50) NULL,
UserId INT NULL
);
CREATE TABLE Votes (
Id INT NOT NULL PRIMARY KEY,
PostId INT NULL,
VoteTypeId INT NULL,
UserId INT NULL,
CreationDate DATETIME NULL,
BountyAmount INT NULL
);
CREATE TABLE Users (
Id INT NOT NULL PRIMARY KEY,
Reputation INT NULL,
CreationDate DATETIME NULL,
DisplayName VARCHAR(40) NULL,
LastAccessDate DATETIME NULL,
WebsiteUrl VARCHAR(200) NULL,
Location VARCHAR(100) NULL,
AboutMe TEXT NULL,
Views INT NULL,
UpVotes INT NULL,
DownVotes INT NULL,
EmailHash VARCHAR(50) NULL,
AccountId INT NOT NULL,
Age INT NULL
);
CREATE TABLE PostHistory (
Id INT NOT NULL PRIMARY KEY,
PostHistoryTypeId INT NOT NULL,
PostId INT NOT NULL,
RevisionGUID VARCHAR(255) NULL,
CreationDate DATETIME NULL,
UserId INT NOT NULL,
UserDisplayName VARCHAR(50) NULL,
Comment TEXT NULL,
`Text` TEXT NULL
);
CREATE TABLE PostLinks (
Id INT NOT NULL PRIMARY KEY,
CreationDate DATETIME NULL,
PostId INT NOT NULL,
RelatedPostId INT NULL,
LinkTypeId INT NULL
);
CREATE TABLE Posts (
Id INT NOT NULL PRIMARY KEY,
PostTypeId INT NOT NULL,
AcceptedAnswerId INT NULL,
ParentID INT NULL,
CreationDate DATETIME NULL,
Score INT NULL,
ViewCount INT NULL,
Body TEXT NOT NULL,
OwnerUserId INT NULL,
OwnerDisplayName VARCHAR(50) NULL,
LastEditorUserId VARCHAR(50) NULL,
LastEditorDisplayName VARCHAR(50) NULL,
LastEditDate DATETIME NULL,
LastActivityDate DATETIME NULL,
Title VARCHAR(255) NOT NULL,
Tags TEXT NULL, -- Not used, replaced by the table PostTags
AnswerCount INT NULL,
FavoriteCount INT NULL,
ClosedDate DATETIME NULL,
CommunityOwnedDate DATETIME NULL
);
CREATE TABLE PostTags (
PostId INT NOT NULL,
TagId INT NOT NULL
);
SELECT '' as 'Created TABLE structure';
LOAD XML INFILE '/tmp/data/Tags.xml'
INTO TABLE Tags;
SELECT '' as 'Imported Tags.xml';
LOAD XML INFILE 'Badges.xml'
INTO TABLE Badges;
SELECT '' as 'Imported Badges.xml';
LOAD XML LOCAL INFILE 'Votes.xml'
INTO TABLE Votes;
SELECT '' as 'Imported Votes.xml';
LOAD XML LOCAL INFILE 'Comments.xml'
INTO TABLE Votes;
SELECT '' as 'Imported Comments.xml';
LOAD XML LOCAL INFILE 'PostHistory.xml'
INTO TABLE Votes;
SELECT '' as 'Imported PostHistory.xml';
LOAD XML LOCAL INFILE 'PostLinks.xml'
INTO TABLE Votes;
SELECT '' as 'Imported PostLinks.xml';
LOAD XML LOCAL INFILE 'Posts.xml'
INTO TABLE Votes;
SELECT '' as 'Imported Posts.xml';
LOAD XML LOCAL INFILE 'Users.xml'
INTO TABLE Votes;
SELECT '' as 'Imported Users.xml';
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment