Skip to content

Instantly share code, notes, and snippets.

@innerlee
Last active October 16, 2016 13:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save innerlee/9b176a52b3330a1340ec94da0cdc721b to your computer and use it in GitHub Desktop.
Save innerlee/9b176a52b3330a1340ec94da0cdc721b to your computer and use it in GitHub Desktop.
@everywhere image_folder = "/home/gv6/openimages/train_images/"
@everywhere tmp_folder = "/media/ramdisk"
@everywhere save_folder = "/home/lizz/openimage"
@everywhere missing_folder = joinpath(save_folder, "missing")
@everywhere long, short, longest, shortest = [1024, 512, 8192, 128]
@everywhere function process(line)
# longer than 16
name = line[1:min(end,16)]
length(name) != 16 && return nothing
# not end with "N/A"
if strip(line[end-4:end])[end-2:end] == "N/A"
write(joinpath(missing_folder, name * ".missing"))
return nothing
end
# path and file
p = joinpath(save_folder, name[1:1], name[2:3])
f = joinpath(p, name * ".jpg")
# existing file?
isfile(f) && return nothing
ispath(p) || mkpath(p)
tmp_file = joinpath(tmp_folder, name * ".jpg.b64")
try
write(tmp_file, "data:image/jpeg;base64,", split(line)[end])
ratio = eval(parse(readstring(`identify -quiet -format "%w/%h" inline:$tmp_file`)))
w, h = ratio < shortest / long ? (shortest , longest ) :
ratio < 1 ? (short , long ) :
ratio < long / shortest ? (long , short ) :
(longest , shortest)
run(`convert -quiet inline:$tmp_file -resize $(w)x$(h)\> $f`)
rm(tmp_file)
catch e
mv(tmp_file, tmp_file * ".error", remove_destination=true)
write(joinpath(missing_folder, name * ".error"), repr(e))
end
nothing
end
# parallel processing
function bufan(process, file)
np = nprocs()
open(file) do f
@sync begin
for p = 1:np
if p!= myid() || np == 1
@async begin
while !eof(f)
line = readline(f)
remotecall_fetch(process, p, line)
end
end
end
end
end
end
end
## main
# check args
length(ARGS) < 1 && error("please provide file names")
println("will process files: ", join(ARGS, ", "))
# existence of folders
isdir(tmp_folder) || error("pls run `sudo mkdir -p $tmp_folder && sudo mount -t tmpfs -o size=2048M tmpfs $tmp_folder`")
ispath(missing_folder) || mkpath(missing_folder)
# process
for file in map(x -> joinpath(image_folder, x), ARGS)
println(">> processing file ", file)
bufan(process, file)
println(">> done file ", file)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment