Skip to content

Instantly share code, notes, and snippets.

@akirayou
Last active March 27, 2022 14:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save akirayou/243fee1ca02bc671e6f19f69af2630b2 to your computer and use it in GitHub Desktop.
Save akirayou/243fee1ca02bc671e6f19f69af2630b2 to your computer and use it in GitHub Desktop.
NBDC版日化辞RDFから欲しいのだけををダンプする時の書き方のテスト。tar.gzの中のファイルは1~222まで別れてるのその番号ごとに処理すれば大丈夫っぽい?  https://dbarchive.biosciencedbc.jp/jp/nikkaji/desc.html
# -*- coding: utf-8 -*-
"""
Created on Sun Mar 27 11:47:20 2022
@author: youak
"""
from rdflib import Graph
import tarfile
import gzip
tf={}
tf["main"]=tarfile.open("NBDC_NikkajiRDF_main.tar.gz","r")
tf["inchi"]=tarfile.open("NBDC_NikkajiRDF_InChI.tar.gz","r")
tf["smiles"]=tarfile.open("NBDC_NikkajiRDF_SMILES.tar.gz","r")
K=tf.keys()
def tf_elm2graph(tf,e):
g=Graph()
g.parse(gzip.open(tf.extractfile(e), mode='rt',encoding="utf-8"))
return g
for es in zip( *[tf[k] for k in K] ):
g={}
for i,k in enumerate(K):
g[k]=tf_elm2graph(tf[k],es[i])
from rdflib.namespace import RDF,RDFS,SKOS,DCTERMS
from rdflib.term import URIRef
for s,_,_ in g["main"].triples( (None,RDF.type,None)):
_,_,ID = g["main"].triples( (s,DCTERMS.identifier,None)).__next__()
print("Nikkaji_ID",ID)
for _,p,o in g["main"].triples( (s,RDFS.label,None)):
print("Label",o)
for _,p,o in g["main"].triples( (s,SKOS.altLabel,None)):
print("altLabel",o)
for _,p,o in g["main"].triples( (s,SKOS.altLabel,None)):
print("altLabel",o)
#デリファレンスめんどうなので、命名規則からlabelを確定
for ss,_,_ in g["inchi"].triples( (None,RDFS.label, ID+"_standard_InChI")):#当該がないばあいもあるのでforでまわす
_,_,o = g["inchi"].triples( (ss,URIRef("http://semanticscience.org/resource/SIO_000300"),None)).__next__()
print("InChI",o)
for ss,_,_ in g["inchi"].triples( (None,RDFS.label, ID+"_standard_InChIKey")):#当該がないばあいもあるのでforでまわす
_,_,o = g["inchi"].triples( (ss,URIRef("http://semanticscience.org/resource/SIO_000300"),None)).__next__()
print("InChIKey",o)
no_smile=True
for ss,_,_ in g["smiles"].triples( (None,RDFS.label, ID+"_canonical_SMILES")):#当該がないばあいもあるのでforでまわす
_,_,o = g["smiles"].triples( (ss,URIRef('http://semanticscience.org/resource/SIO_000300'),None)).__next__()
print("canonical_SMILES",o)
no_smile=False
if(no_smile):print("NoSMILES==========================================================================")
print()
#break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment