Last active
December 30, 2015 03:08
-
-
Save brain90/7766960 to your computer and use it in GitHub Desktop.
Simple script to download wikimedia common search results
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# --------------------------------------------------------- | |
# WikiCommonsDongdoders | |
# @author: gibrain.wordpress.com | |
# | |
# A simple script to mirror all image (full resolution) | |
# from Wikimedia Commons Search Results. | |
# | |
# Usage | |
# ./WikiCommonsDongdoders [short url] | |
# | |
# Examples | |
# Download all 500 image from http://commons.wikimedia.org/w/index.php?title= | |
# Special:Search&limit=500&offset=0&redirs=0&profile=default&search=buitenzorg | |
# | |
# 1. Shorten the url, ex: http://bit.ly/1atBcCg | |
# 2. Create new directory for download ex: mkdir ~/Buitenzorg | |
# 3. cd ~/Buintezorg/ | |
# 4. ./WikiCommonsDongdoders http://bit.ly/1atBcCg | |
# 5. tail unduhan.txt and wget-log to see the download progress. | |
# | |
# --------------------------------------------------------- | |
if [ $# -lt 1 ]; then | |
echo 'usage: '$(basename $0)' <short url>' | |
exit | |
fi | |
echo "Page parsing.... (Please be patient)" | |
url=$1 | |
#Parsing and cleaning image url | |
wget --no-proxy -q $url -O - | grep -o -P '//upload.*?jpg' | sed -e 's/\(thumb\/\)//g' | sed -e 's/^/http:/g' > unduhan.txt | |
echo "Downloading...." | |
#Downloading clean url | |
wget --no-proxy -bc -i unduhan.txt | |
echo -e "\e[93m# Use cat unduhan.txt to see list of full res image\n# Use tailf wget-log to see download progress\e[39m" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment