Skip to content

Instantly share code, notes, and snippets.

@ers35
Last active July 13, 2016 20:01
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ers35/3b615a75fa0ed5e6d5cc to your computer and use it in GitHub Desktop.
Save ers35/3b615a75fa0ed5e6d5cc to your computer and use it in GitHub Desktop.
Convert http://redd.it/3bxlg7 to an SQLite database
-- The author disclaims copyright to this source code.
-- Convert http://redd.it/3bxlg7 to an SQLite database
-- Convert the whole dataset at once like so:
-- find reddit_data -type f -exec time lua json2sqlite.lua {} \;
local cjson = require"cjson"
local sqlite = require"lsqlite3"
local db = sqlite.open("reddit_data.db")
db:exec[[
PRAGMA JOURNAL_MODE = OFF;
PRAGMA SYNCHRONOUS = OFF;
PRAGMA temp_store = MEMORY;
PRAGMA mmap_size = 268435456;
CREATE TABLE IF NOT EXISTS comment
(
id TEXT UNIQUE ON CONFLICT REPLACE,
body TEXT,
edited TEXT,
subreddit_id TEXT,
subreddit TEXT,
author_flair_text TEXT,
author TEXT,
ups TEXT,
retrieved_on INTEGER,
created_utc INTEGER,
link_id TEXT,
downs TEXT,
distinguished TEXT,
author_flair_css_class TEXT,
parent_id TEXT,
gilded INTEGER,
score INTEGER,
controversiality INTEGER,
archived TEXT,
name TEXT
);
]]
print(db:errmsg())
local stmt = db:prepare[[
INSERT INTO comment
(
id,
body,
edited,
subreddit_id,
subreddit,
author_flair_text,
author,
ups,
retrieved_on,
created_utc,
link_id,
downs,
distinguished,
author_flair_css_class,
parent_id,
gilded,
score,
controversiality,
archived,
name
)
VALUES
(
:id,
:body,
:edited,
:subreddit_id,
:subreddit,
:author_flair_text,
:author,
:ups,
:retrieved_on,
:created_utc,
:link_id,
:downs,
:distinguished,
:author_flair_css_class,
:parent_id,
:gilded,
:score,
:controversiality,
:archived,
:name
);
]]
print(db:errmsg())
print(arg[1])
db:exec"BEGIN TRANSACTION;"
local fd = io.open(arg[1], "r")
for json in fd:lines() do
local tbl = cjson.decode(json)
for k, v in pairs(tbl) do
if v == cjson.null then
tbl[k] = nil
end
end
stmt:bind_names(tbl)
stmt:step()
stmt:reset()
end
db:exec"COMMIT;"
print(db:errmsg())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment