garyrh/gist:2a373cc5a097433471fa

## gistfile1.sh
function ia-check() {
    echo $(curl -s -m60 "https://web.archive.org/web/*/$(echo "$*" | sed 's# #%20#g;s#/$#/\*#')" |
        head -c10KB |
        grep -m1 -Poi "(Saved <strong>\d+ time(s)?)|((\d+,)*\d+ URLs have been captured for this domain)|(Page cannot be crawled or displayed due to robots\.txt)|(This URL has been excluded from the Wayback Machine)|(Wayback Machine doesn&apos;t have that page archived)|(504 Gateway Time-out)" |
        sed "s#&apos;#'#g;s#<strong>##")
}

## gistfile2.sh
# Examples:
ia-check http://www.archiveteam.org # Save count for the individual page
ia-check http://www.archiveteam.org/ # Number of saved pages from the www sub-domain
ia-check www.archiveteam.org/ # Same as above
ia-check en.wikipedia.org/blabbyblahblah/ # Number of saved pages from the path blabbyblahblah

# Note that for domains with lots of captures, ia-check may time-out
# or the numbers printed may be completely inaccurate.
# Also, if a website only partially blocks the Wayback Machine, these numbers could be
# very inaccurate. There are probably other corner-cases I haven't thought of.
	function ia-check() {
	echo $(curl -s -m60 "https://web.archive.org/web//$(echo "$" \| sed 's# #%20#g;s#/$#/\*#')" \|
	head -c10KB \|
	grep -m1 -Poi "(Saved <strong>\d+ time(s)?)\|((\d+,)*\d+ URLs have been captured for this domain)\|(Page cannot be crawled or displayed due to robots\.txt)\|(This URL has been excluded from the Wayback Machine)\|(Wayback Machine doesn't have that page archived)\|(504 Gateway Time-out)" \|
	sed "s#'#'#g;s#<strong>##")
	}
	# Examples:
	ia-check http://www.archiveteam.org # Save count for the individual page
	ia-check http://www.archiveteam.org/ # Number of saved pages from the www sub-domain
	ia-check www.archiveteam.org/ # Same as above
	ia-check en.wikipedia.org/blabbyblahblah/ # Number of saved pages from the path blabbyblahblah

	# Note that for domains with lots of captures, ia-check may time-out
	# or the numbers printed may be completely inaccurate.
	# Also, if a website only partially blocks the Wayback Machine, these numbers could be
	# very inaccurate. There are probably other corner-cases I haven't thought of.