Skip to content

Instantly share code, notes, and snippets.

@ricsi98
Created April 28, 2024 14:29
Show Gist options
  • Save ricsi98/81138cd51e8fe7e15644805c2371bca0 to your computer and use it in GitHub Desktop.
Save ricsi98/81138cd51e8fe7e15644805c2371bca0 to your computer and use it in GitHub Desktop.
Create Freebase ID to node index mapping for FB15k-237 dataset.
"""Create Freebase ID to node index mapping for FB15k-237 dataset."""
import json
from pathlib import Path
from typing import Dict
from torch_geometric.datasets import FB15k_237
NODE_PATH = Path("nodes.json")
EDGE_PATH = Path("edges.json")
ds = FB15k_237(root="data")
node_dict: Dict[str, int] = {}
rel_dict: Dict[str, int] = {}
for path in ds.raw_paths:
with open(path, "r") as f:
lines = [x.split("\t") for x in f.read().split("\n")[:-1]]
for i, (src, rel, dst) in enumerate(lines):
if src not in node_dict:
node_dict[src] = len(node_dict)
if dst not in node_dict:
node_dict[dst] = len(node_dict)
if rel not in rel_dict:
rel_dict[rel] = len(rel_dict)
with open(NODE_PATH, "w") as f:
json.dump(node_dict, f)
with open(EDGE_PATH, "w") as f:
json.dump(rel_dict, f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment