Skip to content

Instantly share code, notes, and snippets.

@ckhung
Last active September 10, 2017 05:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ckhung/5a5bf37a3e9ef469310af1b7c41cce16 to your computer and use it in GitHub Desktop.
Save ckhung/5a5bf37a3e9ef469310af1b7c41cce16 to your computer and use it in GitHub Desktop.
generate wget commands to get a small fraction of the images used in imagenet
#!/usr/bin/perl -w
# https://newtoypia.blogspot.tw/2017/08/imagenet.html
# 「按照類別下載 ImageNet 訓練圖片的一小部分 」
# usage: ./imnetget.perl -n 200 wnid-name.txt < fall11_urls.txt > cmd.sh
# where each line of wnid-name.txt begins with a wnid such as
# n02481823
# n02482474
### any lines not beginning with an wnid are ignored as comments
use Getopt::Std;
use strict;
my (%opts, %sleep, %class, $wget_tot_count, %count, $wnid, $cl);
%opts = (
n => 100, # look at only every n-th occurrence
i => 1, # starting at the i-th occurrence
T => 5, # timeout (seconds) for each image
s => '30/20', # sleep 30sec after every 20 wget's
);
getopts('n:i:T:s:', \%opts);
@sleep{"dura","skip"} = $opts{s} =~ m#(\d+)/(\d+)#;
open F, $ARGV[0] or die;
while (<F>) {
($wnid) = /^\s*(n\d{7,})/;
next unless $wnid;
$class{$wnid} = 1;
}
while (<STDIN>) {
my ($wnid, $url, $ext);
($wnid) = /^\s*(n\d+)/;
next unless exists($class{$wnid});
next unless (($url) = /(http\S+)/i);
($ext) = /.*\.(\w+)/;
$ext = 'jpg' unless $ext;
++$count{$wnid};
next unless $count{$wnid} % $opts{n} == $opts{i};
printf("wget -T $opts{T} -t 1 -O $wnid-%04d.\L$ext '%s'\n", $count{$wnid}, $url);
++$wget_tot_count;
print("sleep $sleep{dura}\n") if $wget_tot_count % $sleep{skip} == 0;
}
#!/usr/bin/perl -w
# https://newtoypia.blogspot.tw/2017/08/imagenet.html
# 「按照類別下載 ImageNet 訓練圖片的一小部分 」
# usage: ./wnid2nam.perl ape-wnid.txt < input.txt
# For each wnid, replace all occurrences with its class name
use Getopt::Std;
use strict;
my (%class, $wnid, $cl);
open F, $ARGV[0] or die;
while (<F>) {
($wnid, undef, $cl) = /^\s*(n\d+)(\s+(\S+))?/;
next unless $wnid;
$cl = $wnid unless $cl;
$class{$wnid} = $cl;
}
while (<STDIN>) {
($wnid) = /\b(n\d+)\b/;
next unless $wnid;
$cl = $class{$wnid};
$cl = '?' unless $cl;
s/\b$wnid\b/$cl/g;
print;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment