Skip to content

Instantly share code, notes, and snippets.

@yunga
Last active March 11, 2019 22:25
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yunga/4b98e5c2e53dbcbd4c6c to your computer and use it in GitHub Desktop.
Save yunga/4b98e5c2e53dbcbd4c6c to your computer and use it in GitHub Desktop.
Get Google Images search results.
#!/usr/bin/perl
use Mojolicious::Lite;
use Mojo::Util qw(url_escape url_unescape);
use File::Path qw(make_path);
use Getopt::Std;
##### Available colors and sizes on google image search ###
my %color = (
full => "ic:color", bw => "ic:gray", any => "",
black => "ic:specific,isc:black", blue => "ic:specific,isc:blue",
brown => "ic:specific,isc:brown", green => "ic:specific,isc:green",
grey => "ic:specific,isc:grey", orange => "ic:specific,isc:orange",
pink => "ic:specific,isc:pink", purple => "ic:specific,isc:purple",
red => "ic:specific,isc:red", teal => "ic:specific,isc:teal",
white => "ic:specific,isc:white", yellow => "ic:specific,isc:yellow",
);
my %size = (
icon => "isz:i", medium => "isz:m", large => "isz:l", any => "",
qsvga => "isz:lt,islt:qsvga", vga => "isz:lt,islt:vga",
svga => "isz:lt,islt:svga", xga => "isz:lt,islt:xga",
"2mp" => "isz:lt,islt:2mp", "4mp" => "isz:lt,islt:4mp",
);
# Get screen size
my %screen = ( w => 1920, h => 1080 );
@screen{"w", "h"} = ($1, $2) if `xrandr 2>&1` =~ /, current (\d+) x (\d+),/;
##### Get command line options (%opt) and set Search options (@tbs) ###
# Help, Verbose, Urls-Only, Directory, Number, Parallel, Filetype, Size, Color
my (%opt, @tbs);
getopts('hvud:n:p:f:s:c:', \%opt);
# Display help
sub help {
say "\n$_[0]" if defined $_[0];
say qq!
USAGE: $0 [options] search terms
Get Google Images search results. Version 20150303
-h Help, displays this message.
-v Verbose output. Actually it's more like debug infos.
-u Urls-only, displays the urls found and exits, don't download.
-d Directory where the images will be saved.
-n Number of urls to retrieve, between 1 and 100, defaults to 16.
-p Number of parallel downloads, defaults to 16.
-f Filetypes separated by commas, for example: png,gif
-s Size, locally defaults to $screen{w}x$screen{h}. Can also be:
icon medium large any
qsvga (>480x300) vga (>640x480) svga (>800x600)
xga (>1024x768) 2mp (>1600x1200) 4mp (>2272x1704)
-c Color selected in:
black blue brown green grey orange
pink purple red teal white yellow
color -> for full color images
bw -> for black and white images
any -> don't search colors (Default)
The script is silent by default. The exit code is the number of images
downloaded / urls printed, -1 for an error or the help message.
EXAMPLES:
$0 milkyway
Save 16 $screen{w}x$screen{h} milkyway images in the current directory.
$0 -n 50 -c bw -d "anime wall" bad apple
Save 50 black&white images about bad apple in the "./anime wall" folder.
$0 -vs 4mp macrophoto insects
Verbosely save 10 images larger than 2272x1704 in the current folder.
$0 -f gif animated
Search animated wallpapers
Have fun :)
Based on the idea of Tyrell Rutledge, perl stuff by Yunga Palatino.
See: http://reddit.com/r/commandline/2vog7b/
All right reversed. Feel free to copy/modify/redistribute/print/eat/sell.
PS: Use at your own risks. Computer may catch fire.
!;
exit -1;
}
help if $opt{h} or !(scalar(keys %opt) + $#ARGV + 1);
# Color names
if (defined $opt{c}) {
$opt{c} = lc $opt{c};
help("$0: Unknown color '$opt{c}'") unless defined $color{$opt{c}};
push @tbs, $color{$opt{c}} if $color{$opt{c}};
}
# Size
if (defined $opt{s}) {
$opt{s} = lc $opt{s};
if (defined $size{$opt{s}}) { # Named size option
push @tbs, $size{$opt{s}} if $size{$opt{s}};
} elsif ($opt{s} =~ /^(\d+)x(\d+)$/) { # WIDTHxHEIGHT
push @tbs, "isz:ex,iszw:$1,iszh:$2";
} else { # Huh?
help("$0: Unknown size '$opt{s}'");
}
} else {
push @tbs, "isz:ex,iszw:$screen{w},iszh:$screen{h}"; # Default
}
# Filetypes -- xxx: error checking against predefined filetypes?
my $filetypes = "";
$filetypes .= "%20" . join " ", map { "filetype:$_" } split /,/, $opt{f} if ($opt{f});
# Number of image to download (!= to the number of url fetched later)
my $numdown = 16 - 1;
if (defined $opt{n}) {
if ($opt{n} > 0) {
$numdown = $opt{n} - 1;
} else {
exit -1;
}
}
# Number of parallel downloads
my $parallel = $opt{p} // 16;
# Directory
my $dir = ".";
if (defined $opt{d}) {
if (-d $opt{d} or make_path($opt{d})) {
$dir = $opt{d};
} else {
say "$0: There was a problem creating '$opt{d}' directory.\n$!";
exit -1;
}
}
##### Search images ###
my $ua = Mojo::UserAgent->new(
max_redirects => 10,
inactivity_timeout => 30
);
# Make the search query-ckroll ;)
my $q = "https://www.google.com/search?tbm=isch&"
. "q=" . ($#ARGV == -1 ? "%52%69%63%6b%20%41%73%74%6c%65%79" : url_escape(join " ", @ARGV) )
. $filetypes
. "&tbs=" . join(",", @tbs);
# Fetch the page, select all elements of class rg_l and capture the image url -- xxx: it doesn't seem to contain duplicates
my @imgurl = $ua->get($q, {
"User-Agent" => "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 Chrome/24.0.1312.57"
} )->res->dom(".rg_l")->map(sub { m!href="http://www.google.\w+/imgres\?imgurl=(.*?)&! and $1 })->each;
# Print out some data, this should be called debug instead of verbose...
if ($opt{v}) {
say "Query: $q";
# Check the number of image found
if ($#imgurl == -1) {
say "No result found.";
exit 0;
} elsif ($#imgurl == 0) {
say "1 url found! We've got a GoogleWhack!!!";
} else {
say "Found $#imgurl urls.";
}
}
$numdown = $#imgurl if $#imgurl < $numdown;
# Display the urls and exits
if (defined $opt{u}) {
say (join "\n", @imgurl[0..$numdown]);
exit $numdown;
}
##### Fetch images in non-blocking way ###
my $count = 0;
my $downloader;
$downloader = sub {
my $id = shift;
return if !(my $url = shift @imgurl) or $count > $numdown;
$url = url_unescape($url) while $url ne url_unescape($url); # because it's escaped in the google link...
$ua->get( $url, {
"User-Agent" => "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 Chrome/24.0.1312.57",
"Referer" => $url, # you cannot link to our images, yeah, yeah, yeah xxx: make some white/blacklist of sites?
} => sub {
my ($ua, $tx) = @_;
my $url = $tx->req->url;
if ($numdown >= $count) {
if (my $res = $tx->success) {
# Try to get extension from mime-type
my $mime;
if (defined $res->headers->content_type) {
$mime = $res->headers->content_type =~ m!image/(\w+);?\s*!i ? "$1" : "unknown";
} else {
$mime = "unknown";
}
# "Sanitize" url path, we may use it for the filename
my $url_file = $url->path;
$url_file = url_unescape($url_file) while $url_file ne url_unescape($url_file);
$url_file =~ tr!a-zA-Z0-9_\-./!!cd; # we only keep those chars for filenames
$url_file =~ s!\s+! !g; $url_file =~ s!^ !!; $url_file =~ s! $!!; # squeeze & trim whitespace
$url_file =~ s!([_\-./])+!$1!g; # squeeze these too
my $file;
if ($url_file =~ m!^.*?/?([a-z0-9_.\-]+\.\w+)/?$!i) { # hostname - file.ext
$file = $url->host . " - $1";
} elsif ($url_file =~ m!^.*?/?([a-z0-9_\-.]+)/?$!i) { # hostname - file.mimetype
$file = $url->host . " - $1.$mime";
} else { # hostname - search query.mimetype
$file = $url->host . " - @ARGV.$mime";
}
# Check if file exists, and search for index number the previous filename
my ($duplicate, $index, $fullname) = (0, 0, "$dir/$file");
while (-f $fullname) {
if ( -s _ == $res->content->asset->size ) {
$duplicate = 1;
say "Got $url\n Skipped duplicate: $fullname (" . $res->content->asset->size . " bytes)" if $opt{v};
last; # Same server, same name, same size, probably the same file, we skip.
}
if ($fullname =~ m!\.(\d+)\.(?:\w+)$!) {
$index = $1 + 1;
$fullname =~ s!\.(?:\d+)\.(\w+)$!.$index.$1!;
} else {
$index++;
$fullname =~ s!\.(\w+)$!.$index.$1!;
}
}
# Save file
if (!$duplicate) {
$count++;
$res->content->asset->move_to("$fullname");
say "Got $url\n Saved " . $res->content->asset->size . " bytes as $fullname" if $opt{v};
}
} elsif (defined $opt{v}) {
my $err = $tx->error;
say "Error: " . ($err->{code} // "") . " $err->{message} - $url";
}
if ($count > $numdown or !scalar @imgurl) {
say "Downloaded $count image", $count > 1 ? "s" : "" if $opt{v};
Mojo::IOLoop->stop;
#Mojo::IOLoop->stop_gracefully;
#Mojo::IOLoop->reset;
#exit $count;
} else {
$downloader->($id) if @imgurl;
}
}
});
};
$downloader->($_) for 1 .. $parallel;
Mojo::IOLoop->start unless Mojo::IOLoop->is_running;
exit $count;
@TheRinger
Copy link

line 33 -> getopts('hvudw:n:f:s:c:', %opt);
append to desc => -w Write only Urls to data.txt file.
add option ->

Write urls to a html file and exit

if (defined $opt{w}) {
say (join "\n", @imgurl[0..$numdown]);
open (MYFILE, '>>data.txt');
print MYFILE (join "\n", @imgurl[0..$numdown]);
close (MYFILE);
exit;
}

need option to specify (output filename)
ability to wrap @imgurl into html link "a href"
Playing with this in ruby/sinatra

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment