Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
#!/usr/bin/env bash
script_name='scrape_tweets'
log () {
timestamp=`date '+%c'`
echo "${script_name} (${task_id}) [${timestamp}] - $1"
}
datestr=`date '+%Y%m%d_%H%M%S'`
filename="timeline.${datestr}.json"
pushd /home/aps/tweet_scraper/
log 'downloading tweets'
/usr/local/bin/twurl "/1.1/statuses/home_timeline.json?count=200" > data/${filename}
sqlite3 tweets.sqlite 'create table tweets (id int unique not null, created_at string, user_id int, screen_name string, is_retweet boolean, text string)'
csvfile=$(mktemp)
log 'transforming json -> csv'
echo "id,created_at,user_id,screen_name,is_retweet,text" > $csvfile
jq -r '.[] | [.id, .created_at, .user.id, .user.screen_name, .retweeted_status != null, .text] | @csv' data/$filename >> $csvfile
log 'importing into sqlite db'
echo $filename
sqlite3 tweets.sqlite -cmd '.mode csv' ".import $csvfile tweets"
rm $csvfile
log 'done importing'
popd
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment