Created
April 28, 2024 14:29
-
-
Save ricsi98/81138cd51e8fe7e15644805c2371bca0 to your computer and use it in GitHub Desktop.
Create Freebase ID to node index mapping for FB15k-237 dataset.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Create Freebase ID to node index mapping for FB15k-237 dataset.""" | |
import json | |
from pathlib import Path | |
from typing import Dict | |
from torch_geometric.datasets import FB15k_237 | |
NODE_PATH = Path("nodes.json") | |
EDGE_PATH = Path("edges.json") | |
ds = FB15k_237(root="data") | |
node_dict: Dict[str, int] = {} | |
rel_dict: Dict[str, int] = {} | |
for path in ds.raw_paths: | |
with open(path, "r") as f: | |
lines = [x.split("\t") for x in f.read().split("\n")[:-1]] | |
for i, (src, rel, dst) in enumerate(lines): | |
if src not in node_dict: | |
node_dict[src] = len(node_dict) | |
if dst not in node_dict: | |
node_dict[dst] = len(node_dict) | |
if rel not in rel_dict: | |
rel_dict[rel] = len(rel_dict) | |
with open(NODE_PATH, "w") as f: | |
json.dump(node_dict, f) | |
with open(EDGE_PATH, "w") as f: | |
json.dump(rel_dict, f) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment