Requires gnu parallel
and aria2c
.
Both can be downloaded via brew
Last active
March 25, 2019 06:11
-
-
Save moskomule/46c926f8474ef5d0fcd2c7a188c4078f to your computer and use it in GitHub Desktop.
webvision
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
function download() { | |
BASE_URL="https://data.vision.ee.ethz.ch/aeirikur/webvision2018" | |
seq -w 0 32 | parallel -j 4 aria2c -x5 ${BASE_URL}/webvision_train_{}.tar | |
aria2c -x5 ${BASE_URL}/val_images_resized.tar | |
wget ${BASE_URL}/val_filelist.txt | |
wget ${BASE_URL}/info.tar | |
aria2c -x5 ${BASE_URL}/test_images_resized.tar | |
wget ${BASE_URL}/test_filelist.txt | |
} | |
function extract() { | |
seq -w 0 32 | parallel -j 4 tar -xvf webvision_train_{}.tar | |
tar -xvf val_images_resized.tar | |
tar -xvf info.tar | |
tar -xvf test_images_resized.tar | |
} | |
mkdir train | |
mkdir meta | |
mkdir test | |
mkdir test/meta | |
download | |
extract | |
nv val_images_resized val | |
mv val_filelist.txt meta/val.txt | |
mv test_images_resized test/val | |
awk '$0=$0" 0"' test_filelist.txt > test/meta/val.txt | |
cp meta/synsets.txt test/meta | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pathlib import Path | |
import shutil | |
from tqdm import tqdm | |
train = Path("train") | |
val = Path("val") | |
info = Path("info") | |
meta = Path("meta") | |
synsets = [] | |
filelist = [] | |
def move(rows: list, | |
base: Path, | |
append: bool = True): | |
base = Path(base) | |
for row in tqdm(rows): | |
source, label = row.strip().split() | |
name = source.split("/")[-1] | |
target = base / synsets[int(label)] / name | |
try: | |
if not append: | |
source = val / source | |
shutil.move(source, target) | |
if append: | |
filelist.append(str(target) + " " + label + "\n") | |
except FileNotFoundError: | |
print(f"No file {source}") | |
with (info / "synsets.txt").open() as f1, \ | |
(meta / "synsets.txt").open('w') as f2: | |
rows = f1.readlines() | |
for row in rows: | |
k = row.split()[0] | |
f2.write(k + "\n") | |
synsets.append(k) | |
for s in synsets: | |
(train / "google" / s).mkdir(exist_ok=True) | |
(train / "flickr" / s).mkdir(exist_ok=True) | |
(val / s).mkdir(exist_ok=True) | |
with (info / "train_filelist_google.txt").open() as fg,\ | |
(info / "train_filelist_flickr.txt").open() as ff: | |
rows_google = fg.readlines() | |
rows_flickr = ff.readlines() | |
move(rows_google, train / "google") | |
move(rows_flickr, train / "flickr") | |
with (meta / "val.txt").open() as f: | |
rows = f.readlines() | |
move(rows, val, False) | |
with (meta / "train.txt").open('w') as f: | |
for row in filelist: | |
f.write(row) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment