Skip to content

Instantly share code, notes, and snippets.

@LinguList

LinguList/README.md

Created Mar 9, 2021
Embed
What would you like to do?

Code accompanying "Mapping Multi-SimLex to Concepticon"

This code accompanies the blog post "Mapping Multi-SimLex to Concepticon (https://calc.hypotheses.org/2684).

To run the code, you best install cldfbench with pip in a fresh virtual environment.

Then download the Multi-SimLex data at: https://multisimlex.com/data/translation.csv and run the first script:

$ python test.py

For the second script, make sure to have run cldfbench catconfig or to download https://github.com/concepticon/concepticon-data from GitHub, then run:

$ python extract.py
from pyconcepticon import Concepticon
concepticon = Concepticon() # or Concepticon("path/to/concepticon")
cl = {
concept.number: concept
for concept in concepticon.conceptlists["Vulic-2020-2244"].concepts.values()
}
msl = {}
for concept in cl.values():
for (idx, link, eng, rus, eng_score, russ_score) in zip(
concept.attributes["simlex_ids"],
concept.attributes["links"],
concept.attributes["english_in_source"],
concept.attributes["russian_in_source"],
concept.attributes["english_score"],
concept.attributes["russian_score"],
):
msl[idx] = [
concept.concepticon_id or "",
concept.concepticon_gloss or "",
eng,
rus,
eng_score,
russ_score,
]
pairs = []
with open("scores-russian.tsv", "w") as f:
f.write(
"\t".join(
[
"ID",
"CONCEPTICON_ID_1",
"CONCEPTICON_GLOSS_1",
"CONCEPTICON_ID_2",
"CONCEPTICON_GLOSS_2",
"ENGLISH_1",
"ENGLISH_2",
"RUSSIAN_1",
"RUSSIAN_2",
"ENGLISH_SCORE",
"RUSSIAN_SCORE",
]
)
+ "\n"
)
for i in range(1, 1889):
cidA, cglA, engA, rusA, eng_scoreA, rus_scoreA = msl[str(i) + ":1"]
cidB, cglB, engB, rusB, eng_scoreB, rus_scoreB = msl[str(i) + ":2"]
assert rus_scoreA == rus_scoreB
assert eng_scoreA == eng_scoreB
f.write(
"\t".join(
[
str(i),
cidA,
cglA,
cidB,
cglB,
engA,
engB,
rusA,
rusB,
"{0:.2f}".format(eng_scoreA),
"{0:.2f}".format(rus_scoreA),
]
)
+ "\n"
)
from pyconcepticon import Concepticon
concepticon = Concepticon() # or Concepticon("path/to/concepticon")
cl = {
concept.number: concept
for concept in concepticon.conceptlists["Vulic-2020-2244"].concepts.values()
}
msl = {}
for concept in cl.values():
for (idx, link, eng, rus, eng_score, russ_score) in zip(
concept.attributes["simlex_ids"],
concept.attributes["links"],
concept.attributes["english_in_source"],
concept.attributes["russian_in_source"],
concept.attributes["english_score"],
concept.attributes["russian_score"],
):
msl[idx] = [
concept.concepticon_id or "",
concept.concepticon_gloss or "",
eng,
rus,
eng_score,
russ_score,
]
pairs = []
with open("scores-russian.tsv", "w") as f:
f.write(
"\t".join(
[
"ID",
"CONCEPTICON_ID_1",
"CONCEPTICON_GLOSS_1",
"CONCEPTICON_ID_2",
"CONCEPTICON_GLOSS_2",
"ENGLISH_1",
"ENGLISH_2",
"RUSSIAN_1",
"RUSSIAN_2",
"ENGLISH_SCORE",
"RUSSIAN_SCORE",
]
)
+ "\n"
)
for i in range(1, 1889):
cidA, cglA, engA, rusA, eng_scoreA, rus_scoreA = msl[str(i) + ":1"]
cidB, cglB, engB, rusB, eng_scoreB, rus_scoreB = msl[str(i) + ":2"]
assert rus_scoreA == rus_scoreB
assert eng_scoreA == eng_scoreB
f.write(
"\t".join(
[
str(i),
cidA,
cglA,
cidB,
cglB,
engA,
engB,
rusA,
rusB,
"{0:.2f}".format(eng_scoreA),
"{0:.2f}".format(rus_scoreA),
]
)
+ "\n"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment