adewale/gist:43e9be1e145b65774d2a05aa40a92607

## gistfile1.txt
=Convert javascript data into JSON
sed -e 's/window.YTD.tweet.part0 = //' ./data/tweet.js > ./data/tweet.json

=Convert comma-delimited JSON into newline-delimited JSON
cat data/tweet.json | jq -c '.[]' > newline.json

=Extract all the tweets that match the desired format
grep  'Theory:' newline.json  > theories.json

= Extract the text and URL of each matching tweet. This is where we lose data because some tweets contain newlines (verify by comparing the number of tweets in both files) or because some tweets are retweets.
cat theories.json | jq -r '.[].full_text' > theories.txt
cat theories.json | jq -r '.[].id_str, .[].full_text' > theories.txt
cat theories.json | jq -r '.[].id_str, .[].full_text, "\n"'
cat theories.json | jq -r '"https://twitter.com/ade_oshineye/status/\(.[].id_str)", .[].full_text, "\n"' > theories.txt
cat theories.json | jq -r '.[].full_text, "https://twitter.com/ade_oshineye/status/\(.[].id_str)", "\n"'  > theories.txt
	=Convert javascript data into JSON
	sed -e 's/window.YTD.tweet.part0 = //' ./data/tweet.js > ./data/tweet.json

	=Convert comma-delimited JSON into newline-delimited JSON
	cat data/tweet.json \| jq -c '.[]' > newline.json

	=Extract all the tweets that match the desired format
	grep 'Theory:' newline.json > theories.json

	= Extract the text and URL of each matching tweet. This is where we lose data because some tweets contain newlines (verify by comparing the number of tweets in both files) or because some tweets are retweets.
	cat theories.json \| jq -r '.[].full_text' > theories.txt
	cat theories.json \| jq -r '.[].id_str, .[].full_text' > theories.txt
	cat theories.json \| jq -r '.[].id_str, .[].full_text, "\n"'
	cat theories.json \| jq -r '"https://twitter.com/ade_oshineye/status/\(.[].id_str)", .[].full_text, "\n"' > theories.txt
	cat theories.json \| jq -r '.[].full_text, "https://twitter.com/ade_oshineye/status/\(.[].id_str)", "\n"' > theories.txt