Skip to content

Instantly share code, notes, and snippets.

@takemikami
Last active April 5, 2020 09:33
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save takemikami/28fa4089b8d3427c677baef489420a68 to your computer and use it in GitHub Desktop.
Save takemikami/28fa4089b8d3427c677baef489420a68 to your computer and use it in GitHub Desktop.
im@sparqlからアイドルの姓名を取り出し、GBoardに単語登録するためのスクリプト
# im@sparql to gboard dic
# im@sparqlからアイドルの姓名を取り出し、GBoardに単語登録するためのスクリプト
#
# 登録手順:
# 1. pip install SPARQLWrapper
# 2. python imasparql2gboard.py | sort | uniq > dictionary.txt
# 3. zip PersonalDictionary.zip dictionary.txt
# 3. Android端末に転送(GoogleDriveとか、OneDriveとかを使えば良い)
# 4. 以降はAndroid端末で操作
# 4.1. 設定→システム→言語と入力→仮想キーボード→GBoard→単語リスト→日本語→メニューからインポート
# 4.2. PersonalDictionary.zipを選んで、登録ボタンを押す
import json, re
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON
def imasparql(query, endpoint='https://sparql.crssnky.xyz/spql/imas/query'):
sparql = SPARQLWrapper(endpoint)
sparql.setReturnFormat(JSON)
sparql.setQuery(query)
results = sparql.query().convert()
lst = []
keys = results['head']['vars']
for x in results['results']['bindings']:
lst.append(dict([(k, x[k]['value']) if k in x else (k, "") for k in keys]))
df = pd.read_json(json.dumps(lst), orient='records')
return df
query = """
PREFIX schema: <http://schema.org/>
PREFIX imas: <https://sparql.crssnky.xyz/imasrdf/URIs/imas-schema.ttl#>
SELECT ?s ?fn ?gn ?nm ?fnk ?gnk ?nmk
WHERE {
?s schema:familyName ?fn; schema:givenName ?gn; schema:name ?nm;
imas:familyNameKana ?fnk; imas:givenNameKana ?gnk; imas:nameKana ?nmk.
FILTER(LANG(?fn) = 'ja' && LANG(?gn) = 'ja' && LANG(?nm) = 'ja'
&& LANG(?fnk) = 'ja' && LANG(?gnk) = 'ja' && LANG(?nmk) = 'ja')
}
"""
df = imasparql(query)
print("# Gboard Dictionary version:1")
for r, k in zip(df["fnk"].values, df["fn"].values):
if r != k:
print("{}\t{}\tja-JP".format(r, k))
for r, k in zip(df["gnk"].values, df["gn"].values):
r = r.replace("ゔ", "ぶ")
if r != k:
print("{}\t{}\tja-JP".format(r, k))
for r, k in zip(df["nmk"].values, df["nm"].values):
r = r.replace("ゔ", "ぶ")
if r != k:
print("{}\t{}\tja-JP".format(r, k))
print("{}\t{}\tja-JP".format("もちょ", "(o・∇・o)"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment