duner/README.md

## README.md

      
    Raw
  

              README.md
            
          
    If you download your personal Twitter archive, you don't quite get the data as JSON, but as a series of .js files, one for each month (there are meant to replicate the Twitter API respones for the front-end part of the downloadable archive.)
But if you want to be able to use the data in those files, which is far richer than the CSV data, for some analysis or app just run this script.
Run sh ./twitter-archive-to-json.sh in the same directory as the /tweets folder that comes with the archive download, and you'll get two files:

tweets.json — a JSON list of the objects
tweets_dict.json — a JSON dictionary where each Tweet's key is its id_str

You'll also get a /json-tweets directory which has the individual JSON files for each month of tweets.

  
## twitter-archive-to-json.sh
#!/usr/bin/env bash
mkdir json-tweets
mkdir .tmp-json-tweets

touch .tmp-tweets.json
touch tweets.json
echo "" > tweets.json
echo "" > .tmp-tweets.json

echo "Processing Tweet.js files..."
for f in tweets/*.js; do
    tail -n +2 "$f" > json-"${f%.js}".json
done

echo "Creating tweets.json..."
echo "[ {" >> .tmp-tweets.json
for f in json-tweets/*.json; do
    tail -n +2 "$f" | sed '$d' > .tmp-"${f%.js}"
    echo "}, {" >> .tmp-"${f%.js}"
    cat .tmp-"${f%.js}" >> .tmp-tweets.json
    rm .tmp-"${f%.js}"
done
rmdir .tmp-json-tweets
cat .tmp-tweets.json | sed '$d' > tweets.json
echo "} ]" >> tweets.json
rm .tmp-tweets.json
cat tweets.json | jq  '. | map({"key": .id_str | tostring, "value": .}) | from_entries' > tweets_dict.json
echo "DONE"
	#!/usr/bin/env bash
	mkdir json-tweets
	mkdir .tmp-json-tweets

	touch .tmp-tweets.json
	touch tweets.json
	echo "" > tweets.json
	echo "" > .tmp-tweets.json

	echo "Processing Tweet.js files..."
	for f in tweets/*.js; do
	tail -n +2 "$f" > json-"${f%.js}".json
	done

	echo "Creating tweets.json..."
	echo "[ {" >> .tmp-tweets.json
	for f in json-tweets/*.json; do
	tail -n +2 "$f" \| sed '$d' > .tmp-"${f%.js}"
	echo "}, {" >> .tmp-"${f%.js}"
	cat .tmp-"${f%.js}" >> .tmp-tweets.json
	rm .tmp-"${f%.js}"
	done
	rmdir .tmp-json-tweets
	cat .tmp-tweets.json \| sed '$d' > tweets.json
	echo "} ]" >> tweets.json
	rm .tmp-tweets.json
	cat tweets.json \| jq '. \| map({"key": .id_str \| tostring, "value": .}) \| from_entries' > tweets_dict.json
	echo "DONE"