Last active
September 10, 2017 05:57
-
-
Save ckhung/5a5bf37a3e9ef469310af1b7c41cce16 to your computer and use it in GitHub Desktop.
generate wget commands to get a small fraction of the images used in imagenet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
# https://newtoypia.blogspot.tw/2017/08/imagenet.html | |
# 「按照類別下載 ImageNet 訓練圖片的一小部分 」 | |
# usage: ./imnetget.perl -n 200 wnid-name.txt < fall11_urls.txt > cmd.sh | |
# where each line of wnid-name.txt begins with a wnid such as | |
# n02481823 | |
# n02482474 | |
### any lines not beginning with an wnid are ignored as comments | |
use Getopt::Std; | |
use strict; | |
my (%opts, %sleep, %class, $wget_tot_count, %count, $wnid, $cl); | |
%opts = ( | |
n => 100, # look at only every n-th occurrence | |
i => 1, # starting at the i-th occurrence | |
T => 5, # timeout (seconds) for each image | |
s => '30/20', # sleep 30sec after every 20 wget's | |
); | |
getopts('n:i:T:s:', \%opts); | |
@sleep{"dura","skip"} = $opts{s} =~ m#(\d+)/(\d+)#; | |
open F, $ARGV[0] or die; | |
while (<F>) { | |
($wnid) = /^\s*(n\d{7,})/; | |
next unless $wnid; | |
$class{$wnid} = 1; | |
} | |
while (<STDIN>) { | |
my ($wnid, $url, $ext); | |
($wnid) = /^\s*(n\d+)/; | |
next unless exists($class{$wnid}); | |
next unless (($url) = /(http\S+)/i); | |
($ext) = /.*\.(\w+)/; | |
$ext = 'jpg' unless $ext; | |
++$count{$wnid}; | |
next unless $count{$wnid} % $opts{n} == $opts{i}; | |
printf("wget -T $opts{T} -t 1 -O $wnid-%04d.\L$ext '%s'\n", $count{$wnid}, $url); | |
++$wget_tot_count; | |
print("sleep $sleep{dura}\n") if $wget_tot_count % $sleep{skip} == 0; | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
# https://newtoypia.blogspot.tw/2017/08/imagenet.html | |
# 「按照類別下載 ImageNet 訓練圖片的一小部分 」 | |
# usage: ./wnid2nam.perl ape-wnid.txt < input.txt | |
# For each wnid, replace all occurrences with its class name | |
use Getopt::Std; | |
use strict; | |
my (%class, $wnid, $cl); | |
open F, $ARGV[0] or die; | |
while (<F>) { | |
($wnid, undef, $cl) = /^\s*(n\d+)(\s+(\S+))?/; | |
next unless $wnid; | |
$cl = $wnid unless $cl; | |
$class{$wnid} = $cl; | |
} | |
while (<STDIN>) { | |
($wnid) = /\b(n\d+)\b/; | |
next unless $wnid; | |
$cl = $class{$wnid}; | |
$cl = '?' unless $cl; | |
s/\b$wnid\b/$cl/g; | |
print; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment