szobov/download_and_bake.sh

## download_and_bake.sh
wget https://shapenet.cs.stanford.edu/media/modelnet40_ply_hdf5_2048.zip --no-check-certificate
unzip modelnet40_ply_hdf5_2048.zip
cd modelnet40_ply_hdf5_2048
wget https://gist.githubusercontent.com/szobov/4fe7e6cba08be7044426bedaae67b206/raw/2f79acb796b3860aef81d9e96521c26593f87607/split_to_ply.py
python3 -m pip install h5py
python3 split_to_ply.py
cd dataset
wget https://github.com/fxia22/pointnet.pytorch/files/3659189/trainval.txt
wget https://github.com/fxia22/pointnet.pytorch/files/3914859/test.txt
wget https://github.com/fxia22/pointnet.pytorch/files/3914860/train.txt
wget https://github.com/fxia22/pointnet.pytorch/files/3914862/val.txt
echo "Done!"

## split_to_ply.py
import pathlib
import json
import h5py


ply_header = """ply
format ascii 1.0
comment VCGLIB generated
element vertex 2048
property float x
property float y
property float z
element face 0
property list uchar int vertex_indices
end_header"""


def main():
    dataset_dir = pathlib.Path("./dataset/")
    h5_to_json = {}
    for item in pathlib.Path(".").iterdir():
        if item.suffix == ".json":
            h5 = item.name.split("_id")[0]
            h5_name_tokens = h5.split("_")
            assert len(h5_name_tokens) == 4
            h5_cleanup_name = ("_".join(h5_name_tokens[:-1]) +
                               h5_name_tokens[-1] +
                               ".h5")
            assert pathlib.Path(h5_cleanup_name).exists()
            h5_to_json[h5_cleanup_name] = item
    files = ("test_files.txt", "train_files.txt")
    for f in files:
        data_type = f.split("_")[0]
        h5s = map(pathlib.Path,
                  pathlib.Path(f).read_text().split())
        for h5 in h5s:
            h5_file_name = h5.name
            assert h5_file_name in h5_to_json
            files_list = list(
                map(pathlib.Path,
                    json.loads(h5_to_json[h5_file_name].read_text()))
            )
            h5_content = h5py.File(str(h5_file_name))
            assert len(h5_content["data"]) == len(files_list)
            for points, file_path in zip(h5_content["data"], files_list):
                new_path = dataset_dir / file_path.parent / data_type / file_path.name
                new_path.parent.mkdir(parents=True, exist_ok=True)
                ply_content = "\n".join(map(
                    lambda p: f"{p[0]:>7f} {p[1]:>7f} {p[2]:>7f}", points))
                new_path.write_text(f"{ply_header}\n{ply_content}")


if __name__ == '__main__':
    main()
	wget https://shapenet.cs.stanford.edu/media/modelnet40_ply_hdf5_2048.zip --no-check-certificate
	unzip modelnet40_ply_hdf5_2048.zip
	cd modelnet40_ply_hdf5_2048
	wget https://gist.githubusercontent.com/szobov/4fe7e6cba08be7044426bedaae67b206/raw/2f79acb796b3860aef81d9e96521c26593f87607/split_to_ply.py
	python3 -m pip install h5py
	python3 split_to_ply.py
	cd dataset
	wget https://github.com/fxia22/pointnet.pytorch/files/3659189/trainval.txt
	wget https://github.com/fxia22/pointnet.pytorch/files/3914859/test.txt
	wget https://github.com/fxia22/pointnet.pytorch/files/3914860/train.txt
	wget https://github.com/fxia22/pointnet.pytorch/files/3914862/val.txt
	echo "Done!"
	import pathlib
	import json
	import h5py


	ply_header = """ply
	format ascii 1.0
	comment VCGLIB generated
	element vertex 2048
	property float x
	property float y
	property float z
	element face 0
	property list uchar int vertex_indices
	end_header"""


	def main():
	dataset_dir = pathlib.Path("./dataset/")
	h5_to_json = {}
	for item in pathlib.Path(".").iterdir():
	if item.suffix == ".json":
	h5 = item.name.split("_id")[0]
	h5_name_tokens = h5.split("_")
	assert len(h5_name_tokens) == 4
	h5_cleanup_name = ("_".join(h5_name_tokens[:-1]) +
	h5_name_tokens[-1] +
	".h5")
	assert pathlib.Path(h5_cleanup_name).exists()
	h5_to_json[h5_cleanup_name] = item
	files = ("test_files.txt", "train_files.txt")
	for f in files:
	data_type = f.split("_")[0]
	h5s = map(pathlib.Path,
	pathlib.Path(f).read_text().split())
	for h5 in h5s:
	h5_file_name = h5.name
	assert h5_file_name in h5_to_json
	files_list = list(
	map(pathlib.Path,
	json.loads(h5_to_json[h5_file_name].read_text()))
	)
	h5_content = h5py.File(str(h5_file_name))
	assert len(h5_content["data"]) == len(files_list)
	for points, file_path in zip(h5_content["data"], files_list):
	new_path = dataset_dir / file_path.parent / data_type / file_path.name
	new_path.parent.mkdir(parents=True, exist_ok=True)
	ply_content = "\n".join(map(
	lambda p: f"{p[0]:>7f} {p[1]:>7f} {p[2]:>7f}", points))
	new_path.write_text(f"{ply_header}\n{ply_content}")


	if __name__ == '__main__':
	main()