Skip to content

Instantly share code, notes, and snippets.

@MrDrMcCoy
Created July 7, 2021 19:05
Show Gist options
  • Save MrDrMcCoy/3a1a714aa12a71eb17c69c00fbe8ed21 to your computer and use it in GitHub Desktop.
Save MrDrMcCoy/3a1a714aa12a71eb17c69c00fbe8ed21 to your computer and use it in GitHub Desktop.
Scrape the-eye.eu using rclone
#!/usr/bin/env bash
#####
# Scrape the-eye.eu using rclone
#####
trap 'exit 1' INT TERM KILL
rclone_opts=(
# --dry-run
--fast-list
--ignore-case
--log-file="rclone-eye.log"
--min-size="1"
--multi-thread-streams="0"
--progress
--retries-sleep="2s"
--retries="3"
--size-only
--tpslimit="2"
--transfers="1"
--use-mmap
--user-agent="eye02"
--verbose
## exclude filters
--filter="- **{astrology,freemason,horoscope,illuminati}**"
--filter="- **{conspiracy,gutenberg,occult}*/"
--filter="- **free*energy**"
--filter="- **index.html*"
--filter="- **mein*kamph**"
--filter="- **new*world*order**"
## include filters
--filter="+ **.[^.]*{css,htm,mht,svg,wiki,xml}[^.]*"
--filter="+ **.{7z,bz2,gz,img,iso,rar,t[bgx]z,tar,xz,zip,zipx,zst,z}"
--filter="+ **.{aif?,flac,mp3,m4a,og[ag],opus,wav,wma}"
--filter="+ **.{ass,idx,srt,sub}"
--filter="+ **.{aux,bbl,bib,blg,brf,bst,cls,dtx,fd,lat,tex}[^.]*"
--filter="+ **.{av,celtx,fcf,story,xav}"
--filter="+ **.{avi,divx,gifv,mp4,mov,mkv,ogv,webm,wmv}"
--filter="+ **.{az,chm,djv,eps,epub,ibook,fb2,hlp,lit,mdi,mobi,pdf,pml,[gp]s,tif,xps}[^.]*"
--filter="+ **.cb[rz]"
--filter="+ **.{bak,kml,man,notebook,rdf}"
--filter="+ **.{bbs,csv,faq,md,rst,txt}"
--filter="+ **.{bmp,gif,jp2,jpeg,jpg,png,webp}"
--filter="+ **.{db,mdb,mdt,pdb,sql}[^.]*"
--filter="+ **.{doc,od,page,ppt,rtf,wp,xls}[^.]*"
--filter="+ **.{bash,bat,js,py,r,rb,pl,sh}"
--filter="+ **.{ly,mus,mid,msc,mxl,sib}[^.]*"
--filter="+ **{changelog,install,license,notes,readme}"
## exclude everything else
--filter="- **"
)
while read -r host path ; do
[[ ${host} =~ ^\# ]] && continue # skip commented lines in below HEREDOC
echo "$(date) Syncing ${host}${path}..." | tee -a "rclone-eye.log"
mkdir -vp "${host}${path}"
rclone copy "${rclone_opts[@]}" \
--http-url="https://${host}" ":http:${path}" "${host}${path}"
done <<EOF
## Add the hostnames and subpaths here, separated by a space.
## Lines beginning with '#' will be skipped.
## You may want to split large directories into their various subpaths to
## make resuming interrupted transfers easier.
the-eye.eu /hardwaretest/
the-eye.eu /public/Books/
the-eye.eu /public/Papers/
the-eye.eu /public/Psychedelics/
the-eye.eu /public/Psychoactives/
the-eye.eu /public/Site-Dumps/RemoteCPU_Archive/
the-eye.eu /public/Site-Dumps/adambibby.ca/
the-eye.eu /public/Site-Dumps/library.uniteddiversity.coop/
the-eye.eu /public/Site-Dumps/pssurvival.com/
the-eye.eu /public/Site-Dumps/r0bin0705_8fzydz_share.nxtcloud.net/
the-eye.eu /public/Site-Dumps/www.elaulademusica.com/
the-eye.eu /public/Site-Dumps/www.jam-night.com/
the-eye.eu /public/Site-Dumps/www.seabrite.com/
the-eye.eu /public/Strategic Intelligence Network/
the-eye.eu /public/WorldTracker.org/
the-eye.eu /public/murdercube.com/
EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment