Last active
June 16, 2017 05:02
-
-
Save mcenirm/7a8470f0e4d99679af3ae9f3b01be77e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
set -u | |
top='http://ftp.nhc.noaa.gov/atcf/' | |
folders=( | |
# adv | |
# aid_public | |
# btk | |
# com | |
# dis | |
# docs | |
# fix | |
# fst | |
gis | |
# gpce | |
index | |
# lsdiag | |
# mar | |
# mar-coop | |
# pub | |
# stext | |
# wndprb | |
) | |
gitrepo=${PWD}/'atcf.git' | |
gitwt=${PWD}/${top#*://} | |
gitts=$(date -u +%FT%TZ) | |
gitorigin='git@github.com:mcenirm-forks/atcfmirror.git' | |
wgetopts=( | |
--no-config | |
--no-verbose | |
# --quiet | |
# --spider # don't download anything | |
--timestamping | |
--recursive | |
--level=inf | |
--no-remove-listing | |
--no-parent | |
# --regex-type=pcre | |
--reject-regex='\?.=.;.=.$' | |
# --reject-regex='\?.=.;.=.$|/atcf/(archive|docs)/' | |
--wait=0.1 # wait SECONDS between retrievals | |
--quota=20m # set retrieval quota to NUMBER | |
# --limit-rate=RATE limit download rate to RATE | |
# --unlink remove file before clobber | |
# --no-xattr turn off storage of metadata in extended file attributes | |
--rejected-log=bmrej-$(date +%s) # log reasons for URL rejection to FILE | |
) | |
urls=() | |
for f in "${folders[@]}" ; do | |
urls+=( "$top$f/" ) | |
done | |
_git () { | |
git --git-dir="$gitrepo" --work-tree="$gitwt" "$@" | |
} | |
commit_changes_if_any () { | |
local message=( "$gitts" "$@" ) | |
local changes=$(_git status --porcelain) | |
if [ -n "$changes" ] ; then | |
printf '%s\n' "$changes" | |
_git add . | |
_git commit -m "${message[*]}" | |
fi | |
} | |
if [ ! -d "$gitrepo" ] ; then | |
git init --bare "$gitrepo" | |
_git commit --allow-empty -m 'Initial commit' | |
fi | |
if ! _git remote get-url origin >/dev/null 2>/dev/null ; then | |
_git remote add origin "$gitorigin" | |
fi | |
if [ -d "$gitwt" ] ; then | |
commit_changes_if_any pre | |
fi | |
wget "${wgetopts[@]}" "${urls[@]}" | |
wget "${wgetopts[@]}" --accept-regex='/$' "$top" | |
(cd ftp.nhc.noaa.gov/atcf && find * -type f -mtime -1 -ls) | |
commit_changes_if_any post | |
_git push -u origin master | |
exit ############################# | |
GNU Wget 1.19.1, a non-interactive network retriever. | |
Usage: wget [OPTION]... [URL]... | |
Mandatory arguments to long options are mandatory for short options too. | |
Logging and input file: | |
-o, --output-file=FILE log messages to FILE | |
-a, --append-output=FILE append messages to FILE | |
Recursive accept/reject: | |
-A, --accept=LIST comma-separated list of accepted extensions | |
-R, --reject=LIST comma-separated list of rejected extensions | |
--accept-regex=REGEX regex matching accepted URLs | |
--reject-regex=REGEX regex matching rejected URLs | |
-D, --domains=LIST comma-separated list of accepted domains | |
--exclude-domains=LIST comma-separated list of rejected domains | |
--follow-tags=LIST comma-separated list of followed HTML tags | |
--ignore-tags=LIST comma-separated list of ignored HTML tags | |
-H, --span-hosts go to foreign hosts when recursive | |
-L, --relative follow relative links only | |
-I, --include-directories=LIST list of allowed directories | |
--trust-server-names use the name specified by the redirection | |
URL's last component | |
-X, --exclude-directories=LIST list of excluded directories | |
Mail bug reports and suggestions to <bug-wget@gnu.org> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment