Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
#!/usr/bin/env bash
script_name='scrape_tweets'
log () {
timestamp=`date '+%c'`
echo "${script_name} (${task_id}) [${timestamp}] - $1"
}
datestr=`date '+%Y%m%d_%H%M%S'`
filename="timeline.${datestr}.json"
pushd /home/aps/tweet_scraper/
log 'downloading tweets'
/usr/local/bin/twurl "/1.1/statuses/home_timeline.json?count=200" > data/${filename}
sqlite3 tweets.sqlite 'create table tweets (id int unique not null, created_at string, user_id int, screen_name string, is_retweet boolean, text string)'
csvfile=$(mktemp)
log 'transforming json -> csv'
echo "id,created_at,user_id,screen_name,is_retweet,text" > $csvfile
jq -r '.[] | [.id, .created_at, .user.id, .user.screen_name, .retweeted_status != null, .text] | @csv' data/$filename >> $csvfile
log 'importing into sqlite db'
echo $filename
sqlite3 tweets.sqlite -cmd '.mode csv' ".import $csvfile tweets"
rm $csvfile
log 'done importing'
popd
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.