Last active
December 12, 2018 01:37
-
-
Save keisisqrl/f4914bbb8b611a1e3688acaee732447c to your computer and use it in GitHub Desktop.
load tweets from json in backup into sqlite3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package require sqlite3 | |
sqlite3 db tweets.sqlite3 | |
db eval {DROP TABLE IF EXISTS tweetjson} | |
db eval {create table tweetjson (id INTEGER PRIMARY KEY, tweet TEXT)} | |
foreach srcjs [glob {*.js}] { | |
set inchan [open $srcjs] | |
gets $inchan | |
set json [read $inchan] | |
db eval { | |
INSERT INTO tweetjson SELECT json_extract(value,'$.id'),value FROM json_each(:json) | |
} | |
} | |
# Set up an index and a view for SQLite-compatible timestamp - what twitter ships isn't strictly ISO 8601 | |
# (it is technically "just hanging out") so we need substr(1,19) of it. Fortunately it's all UTC and | |
# SQLite lets us index functions. | |
db eval { | |
CREATE INDEX idx_tweet_ts on tweetjson (substr(json_extract(tweet,'$.created_at'),1,19)) | |
} | |
db eval { | |
CREATE VIEW tweet_timestamp as select id,substr(json_extract(tweet,'$.created_at'),1,19) as timestamp from tweetjson | |
} | |
# Tip: | |
# SELECT * FROM tweetjson,tweet_timestamp USING (id); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment