Last active
October 16, 2016 13:28
-
-
Save innerlee/9b176a52b3330a1340ec94da0cdc721b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@everywhere image_folder = "/home/gv6/openimages/train_images/" | |
@everywhere tmp_folder = "/media/ramdisk" | |
@everywhere save_folder = "/home/lizz/openimage" | |
@everywhere missing_folder = joinpath(save_folder, "missing") | |
@everywhere long, short, longest, shortest = [1024, 512, 8192, 128] | |
@everywhere function process(line) | |
# longer than 16 | |
name = line[1:min(end,16)] | |
length(name) != 16 && return nothing | |
# not end with "N/A" | |
if strip(line[end-4:end])[end-2:end] == "N/A" | |
write(joinpath(missing_folder, name * ".missing")) | |
return nothing | |
end | |
# path and file | |
p = joinpath(save_folder, name[1:1], name[2:3]) | |
f = joinpath(p, name * ".jpg") | |
# existing file? | |
isfile(f) && return nothing | |
ispath(p) || mkpath(p) | |
tmp_file = joinpath(tmp_folder, name * ".jpg.b64") | |
try | |
write(tmp_file, "data:image/jpeg;base64,", split(line)[end]) | |
ratio = eval(parse(readstring(`identify -quiet -format "%w/%h" inline:$tmp_file`))) | |
w, h = ratio < shortest / long ? (shortest , longest ) : | |
ratio < 1 ? (short , long ) : | |
ratio < long / shortest ? (long , short ) : | |
(longest , shortest) | |
run(`convert -quiet inline:$tmp_file -resize $(w)x$(h)\> $f`) | |
rm(tmp_file) | |
catch e | |
mv(tmp_file, tmp_file * ".error", remove_destination=true) | |
write(joinpath(missing_folder, name * ".error"), repr(e)) | |
end | |
nothing | |
end | |
# parallel processing | |
function bufan(process, file) | |
np = nprocs() | |
open(file) do f | |
@sync begin | |
for p = 1:np | |
if p!= myid() || np == 1 | |
@async begin | |
while !eof(f) | |
line = readline(f) | |
remotecall_fetch(process, p, line) | |
end | |
end | |
end | |
end | |
end | |
end | |
end | |
## main | |
# check args | |
length(ARGS) < 1 && error("please provide file names") | |
println("will process files: ", join(ARGS, ", ")) | |
# existence of folders | |
isdir(tmp_folder) || error("pls run `sudo mkdir -p $tmp_folder && sudo mount -t tmpfs -o size=2048M tmpfs $tmp_folder`") | |
ispath(missing_folder) || mkpath(missing_folder) | |
# process | |
for file in map(x -> joinpath(image_folder, x), ARGS) | |
println(">> processing file ", file) | |
bufan(process, file) | |
println(">> done file ", file) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment