sllvn/scrape_tweets.sh

## scrape_tweets.sh
#!/usr/bin/env bash

script_name='scrape_tweets'

log () {
    timestamp=`date '+%c'`
    echo "${script_name} (${task_id}) [${timestamp}] - $1"
}

datestr=`date '+%Y%m%d_%H%M%S'`
filename="timeline.${datestr}.json"

pushd /home/aps/tweet_scraper/

log 'downloading tweets'
/usr/local/bin/twurl "/1.1/statuses/home_timeline.json?count=200" > data/${filename}

sqlite3 tweets.sqlite 'create table tweets (id int unique not null, created_at string, user_id int, screen_name string, is_retweet boolean, text string)'

csvfile=$(mktemp)

log 'transforming json -> csv'
echo "id,created_at,user_id,screen_name,is_retweet,text" > $csvfile
jq -r '.[] | [.id, .created_at, .user.id, .user.screen_name, .retweeted_status != null, .text] | @csv' data/$filename >> $csvfile

log 'importing into sqlite db'
echo $filename
sqlite3 tweets.sqlite -cmd '.mode csv' ".import $csvfile tweets"
rm $csvfile

log 'done importing'
popd
	#!/usr/bin/env bash

	script_name='scrape_tweets'

	log () {
	timestamp=`date '+%c'`
	echo "${script_name} (${task_id}) [${timestamp}] - $1"
	}

	datestr=`date '+%Y%m%d_%H%M%S'`
	filename="timeline.${datestr}.json"

	pushd /home/aps/tweet_scraper/

	log 'downloading tweets'
	/usr/local/bin/twurl "/1.1/statuses/home_timeline.json?count=200" > data/${filename}

	sqlite3 tweets.sqlite 'create table tweets (id int unique not null, created_at string, user_id int, screen_name string, is_retweet boolean, text string)'

	csvfile=$(mktemp)

	log 'transforming json -> csv'
	echo "id,created_at,user_id,screen_name,is_retweet,text" > $csvfile
	jq -r '.[] \| [.id, .created_at, .user.id, .user.screen_name, .retweeted_status != null, .text] \| @csv' data/$filename >> $csvfile

	log 'importing into sqlite db'
	echo $filename
	sqlite3 tweets.sqlite -cmd '.mode csv' ".import $csvfile tweets"
	rm $csvfile

	log 'done importing'
	popd