Created
June 27, 2017 22:07
-
-
Save pbhj/6636d0908d0d11885809a2545b138694 to your computer and use it in GitHub Desktop.
bookmarkive shell script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# this is a heavily modified version of "http://www.gwern.net/Archiving URLs" | |
# it should take the last ~1.1 months worth of bookmarks and make a local archive, | |
# using wget, of the pages that were bookmarked (or whose bookmark was altered). | |
# break on errors, see "help set" for info | |
set -e | |
# # copy places.sqlite locally | |
# cp --force $(find ~/.mozilla/ -name "places.sqlite") ./ | |
cp --force ~/.mozilla/firefox/8aq4ajqt/places.sqlite ./ | |
printf "copied places.sqlite \n" | |
# # TODO | |
# # handle cookies too | |
# cp --force ~/.mozilla/firefox/8aq4ajqt/cookies.sqlite ./ | |
# sqlite3 ./cookies.sqlite "SELECT * FROM moz_cookies;" >> cookies.sqlite.txt | |
# strftime reference at http://www.sqlite.org/lang_datefunc.html | |
# | |
# "moz_historyvisits.visit_date is in (the number of) microseconds since January 1, 1970 UTC" hence the 1000000 factor | |
# see http://www.forensicswiki.org/wiki/Mozilla_Firefox and http://www.alekz.net/archives/740 | |
# removed "ORDER by lastModified" from end as sorting in filter-urls.sh | |
sqlite3 places.sqlite "SELECT url FROM moz_places, moz_bookmarks \ | |
WHERE moz_places.id = moz_bookmarks.fk and lastModified > strftime('%s','now','-1.1 month')*1000000;" \ | |
| ./filter-urls.sh > ./filtered_places.sqlite | |
printf "filtered\n" | |
rm ./places.sqlite | |
printf "removed \n" | |
split -l500 ./filtered_places.sqlite ./split_filtered_places.sqlite-- | |
rm ./filtered_places.sqlite | |
# wget options | |
# --tries=3 only retry 3 times, default is 20 | |
cd ./archive/ || exit | |
for url in ../split_filtered_places.sqlite--*; | |
do (wget --tries=3 --continue --page-requisites --timestamping --input-file "$url" && rm "$url" &); | |
done | |
# return to start directory | |
cd .. | |
# # TODO | |
# # auto-delete larger files? | |
#find ./archive -size +4M -delete | |
# remove duplicate files and make symlinks | |
rdfind -makesymlinks true -makeresultsfile true . | |
# copy results file and add date label | |
cp results.txt{,$(/bin/date +_%Y%m%d-%s)} | |
# clean up temp files | |
rm ../split_filtered_places.sqlite--* | |
# END |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment