-
-
Save Destaq/6386b9b217bd8f664cbfdd5a4979d96d to your computer and use it in GitHub Desktop.
Graphically analyzes the most commonly learned language combinations on r/languagelearning based on user flairs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"username": "Reddit username", | |
"client_id": "Register a developer script on Reddit", | |
"client_secret": "See the above", | |
"password": "Your Reddit password" | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
AD | ca | |
---|---|---|
AE | ar | |
AF | fa | |
AG | en | |
AI | en | |
AL | sq | |
AM | hy | |
AO | pt | |
AQ | en | |
AR | es | |
AS | en | |
AT | de | |
AU | en | |
AW | nl | |
AX | sv | |
AZ | az | |
BA | bs | |
BB | en | |
BD | bn | |
BE | nl | |
BF | fr | |
BG | bg | |
BH | ar | |
BI | fr | |
BJ | fr | |
BL | fr | |
BM | en | |
BN | ms | |
BO | es | |
BQ | nl | |
BR | pt | |
BS | en | |
BT | dz | |
BV | no | |
BW | en | |
BY | be | |
BZ | en | |
CA | en | |
CC | en | |
CD | fr | |
CF | fr | |
CG | fr | |
CH | de | |
CI | fr | |
CK | en | |
CL | es | |
CM | fr | |
CN | zh | |
CO | es | |
CR | es | |
CU | es | |
CV | pt | |
CW | nl | |
CX | en | |
CY | el | |
CZ | cs | |
DE | de | |
DJ | fr | |
DK | da | |
DM | en | |
DO | es | |
DZ | ar | |
EC | es | |
EE | et | |
EG | ar | |
EH | ar | |
ER | ti | |
ES | es | |
ET | am | |
FI | fi | |
FJ | en | |
FK | en | |
FM | en | |
FO | fo | |
FR | fr | |
GA | fr | |
GB | en | |
GD | en | |
GE | ka | |
GF | fr | |
GG | en | |
GH | en | |
GI | en | |
GL | kl | |
GM | en | |
GN | fr | |
GP | fr | |
GQ | es | |
GR | el | |
GS | en | |
GT | es | |
GU | en | |
GW | pt | |
GY | en | |
HK | zh-hant | |
HM | en | |
HN | es | |
HR | hr | |
HT | fr | |
HU | hu | |
ID | id | |
IE | en | |
IL | he | |
IM | en | |
IN | hi | |
IO | en | |
IQ | ar | |
IR | fa | |
IS | is | |
IT | it | |
JE | en | |
JM | en | |
JO | ar | |
JP | ja | |
KE | sw | |
KG | ky | |
KH | km | |
KI | en | |
KM | ar | |
KN | en | |
KP | ko | |
KR | ko | |
KW | ar | |
KY | en | |
KZ | kk | |
LA | lo | |
LB | ar | |
LC | en | |
LI | de | |
LK | si | |
LR | en | |
LS | en | |
LT | lt | |
LU | lb | |
LV | lv | |
LY | ar | |
MA | ar | |
MC | fr | |
MD | ro | |
ME | srp | |
MF | fr | |
MG | mg | |
MH | en | |
MK | mk | |
ML | fr | |
MM | my | |
MN | mn | |
MO | zh-hant | |
MP | en | |
MQ | fr | |
MR | ar | |
MS | en | |
MT | mt | |
MU | mfe | |
MV | dv | |
MW | en | |
MX | es | |
MY | ms | |
MZ | pt | |
NA | en | |
NC | fr | |
NE | fr | |
NF | en | |
NG | en | |
NI | es | |
NL | nl | |
NO | nb | |
NP | ne | |
NR | na | |
NU | niu | |
NZ | mi | |
OM | ar | |
PA | es | |
PE | es | |
PF | fr | |
PG | en | |
PH | en | |
PK | en | |
PL | pl | |
PM | fr | |
PN | en | |
PR | es | |
PS | ar | |
PT | pt | |
PW | en | |
PY | es | |
QA | ar | |
RE | fr | |
RO | ro | |
RS | sr | |
RU | ru | |
RW | rw | |
SA | ar | |
SB | en | |
SC | fr | |
SD | ar | |
SE | sv | |
SG | zh | |
SH | en | |
SI | sl | |
SJ | no | |
SK | sk | |
SL | en | |
SM | it | |
SN | fr | |
SO | so | |
SR | nl | |
ST | pt | |
SS | en | |
SV | es | |
SX | nl | |
SY | ar | |
SZ | en | |
TC | en | |
TD | fr | |
TF | fr | |
TG | fr | |
TH | th | |
TJ | tg | |
TK | tkl | |
TL | pt | |
TM | tk | |
TN | ar | |
TO | en | |
TR | tr | |
TT | en | |
TV | en | |
TW | zh-hant | |
TZ | sw | |
UA | uk | |
UG | en | |
UM | en | |
US | en | |
UY | es | |
UZ | uz | |
VA | it | |
VC | en | |
VE | es | |
VG | en | |
VI | en | |
VN | vi | |
VU | bi | |
WF | fr | |
WS | sm | |
XK | en | |
YE | ar | |
YT | fr | |
ZA | en | |
ZM | en | |
ZW | en |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Converts flag emoji to ascii and back | |
https://github.com/cvzi/flag | |
Based on http://schinckel.net/2015/10/29/unicode-flags-in-python/ | |
Unicode country code emoji flags for Python | |
~~~~~~~~~~~~~~~~ | |
>>> import flag | |
>>> flag.flag("IL") | |
'🇮🇱' | |
>>> flag.flagize("Flag of Israel :IL:") | |
'Flag of Israel 🇮🇱' | |
>>> flag.dflagize("Flag of Israel 🇮🇱") | |
'Flag of Israel :IL:' | |
>>> flag.flagize(":gb-eng: is part of the UK :GB:", subregions=True) | |
'England 🏴 is part of the UK 🇬🇧' | |
>>> flag.dflagize("England 🏴 is part of the UK 🇬🇧", subregions=True) | |
'England :gb-eng: is part of the UK :GB:' | |
""" | |
import sys | |
import warnings | |
import re | |
from typing import List | |
__version__: str = '1.3.1' | |
__author__: str = 'cuzi' | |
__email__: str = 'cuzi@openmail.cc' | |
__source__: str = 'https://github.com/cvzi/flag' | |
__license__: str = """ | |
MIT License | |
Copyright (c) cuzi 2018 | |
Permission is hereby granted, free of charge, to any person obtaining a copy | |
of this software and associated documentation files (the "Software"), to deal | |
in the Software without restriction, including without limitation the rights | |
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
copies of the Software, and to permit persons to whom the Software is | |
furnished to do so, subject to the following conditions: | |
The above copyright notice and this permission notice shall be included in all | |
copies or substantial portions of the Software. | |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
SOFTWARE. | |
""" | |
__all__ = [ | |
"flag", | |
"flagize", | |
"dflagize", | |
"flagize_subregional", | |
"dflagize_subregional", | |
"Flag"] | |
OFFSET = ord("🇦") - ord("A") | |
OFFSET_TAG = 0xE0000 | |
CANCELTAG = "\U000E007F" | |
BLACKFLAG = "\U0001F3F4" | |
ASCII_LOWER = "abcdefghijklmnopqrstuvwxyz0123456789" | |
def check_prefix(custom_str: str) -> bool: | |
"""Check if prefix will safely work with flagize and subregional flags | |
:param str custom_str: Custom prefix | |
:return: False if the string will safely work with subregional flags | |
:rtype: bool | |
""" | |
return len(custom_str) == 0 | |
def check_suffix(custom_str: str) -> bool: | |
"""Check if suffix will safely work with flagize and subregional flags | |
:param str custom_str: Custom suffix | |
:return: False if the string will safely work with subregional flags | |
:rtype: bool | |
""" | |
if custom_str.startswith("-"): | |
return True | |
if len(custom_str) < 4: | |
custom_str = custom_str.lower() | |
for c in ASCII_LOWER: | |
if c in custom_str: | |
return True | |
return False | |
def flag_regional_indicator(code: List[str]) -> str: | |
"""Two letters are converted to regional indicator symbols | |
:param str code: two letter ISO 3166 code | |
:return: regional indicator symbols of the country flag | |
:rtype: str | |
""" | |
return "".join([chr(ord(c.upper()) + OFFSET) for c in code]) | |
def flag_tag_sequence(code: List[str]) -> str: | |
"""Three to seven letters/digits are converted to a tag sequence. | |
:param str code: regional code from ISO 3166-2. | |
:return: The unicode tag sequence of the subregional flag | |
:rtype: str | |
""" | |
tags = "".join([chr(ord(c.lower()) + OFFSET_TAG) for c in code]) | |
return BLACKFLAG + tags + CANCELTAG | |
class Flag: | |
"""Use this class if you want a different prefix and suffix instead | |
of colons. Offers the same methods as the module. | |
""" | |
def __init__(self, prefix_str: str = ":", | |
suffix_str: str = ":", warn: bool = True) -> None: | |
"""Set a custom prefix and suffix. Instead of ``:XY:`` it will | |
use ``{prefix}XY{suffix}``. | |
To encode subregional flags, use a suffix that is either longer | |
than 4 characters or that does not contain A-Z, a-z, 0-9 and | |
does not start with a - (minus). | |
:param str prefix_str: The leading symbols | |
:param str suffix_str: The trailing symbols | |
""" | |
self._prefix = prefix_str | |
self._prefix_re = re.escape(prefix_str) | |
self._prefix_warn = warn and check_prefix(self._prefix) | |
self._suffix = suffix_str | |
self._suffix_re = re.escape(suffix_str) | |
self._suffix_warn = warn and check_suffix(self._suffix) | |
@staticmethod | |
def flag(countrycode: str) -> str: | |
"""Encodes a single flag to unicode. Two letters are converted to | |
regional indicator symbols | |
Three or more letters/digits are converted to tag sequences. | |
Dashes, colons and other symbols are removed from input, only a-z, A-Z | |
and 0-9 are processed. | |
In general a valid flag is either a two letter code from ISO 3166 | |
(e.g. ``GB``), a code from ISO 3166-2 (e.g. ``GBENG``) or a numeric | |
code from ISO 3166-1. | |
However, not all codes produce valid unicode, see | |
http://unicode.org/reports/tr51/#flag-emoji-tag-sequences for more | |
information. | |
From ISO 3166-2 only England ``gbeng``, Scotland ``gbsct`` and | |
Wales ``gbwls`` are considered RGI (recommended for general | |
interchange) by the Unicode Consortium, | |
see http://www.unicode.org/Public/emoji/latest/emoji-test.txt | |
:param str countrycode: Two letter ISO 3166 code or a regional code | |
from ISO 3166-2. | |
:return: The unicode representation of the flag | |
:rtype: str | |
""" | |
return flag(countrycode) | |
def flagize(self, text: str, subregions: bool = False) -> str: | |
"""Encode flags. Replace all two letter codes ``{prefix}XX{suffix}`` with unicode | |
flags (emoji flag sequences) | |
For this method the suffix should not contain | |
A-Z, a-z or 0-9 and not start with a - (minus). | |
:param str text: The text | |
:param bool subregions: Also replace subregional/subdivision codes | |
``{prefix}xx-xxx{suffix}`` with unicode flags (flag emoji tag sequences). | |
:return: The text with all occurrences of ``{prefix}XX{suffix}`` replaced by unicode | |
flags | |
:rtype: str | |
""" | |
def flag_repl(matchobj): | |
return flag_regional_indicator(matchobj.group(1)) | |
text = re.sub(self._prefix_re + | |
"([a-zA-Z]{2})" + self._suffix_re, flag_repl, text) | |
if subregions: | |
text = self.flagize_subregional(text) | |
return text | |
def dflagize(self, text: str, subregions: bool = False) -> str: | |
"""Decode flags. Replace all unicode country flags (emoji flag | |
sequences) in text with ascii two letter code ``{prefix}XX{suffix}`` | |
:param str text: The text | |
:param bool subregions: Also replace subregional/subdivision flags | |
(flag emoji tag sequences) with ``{prefix}xx-xxx{suffix}`` | |
:return: The text with all unicode flags replaced by ascii | |
sequence ``{prefix}XX{suffix}`` | |
:rtype: str | |
""" | |
pattern = "%s%%c%%c%s" % (self._prefix, self._suffix) | |
def dflag(i): | |
points = tuple(ord(x) - OFFSET for x in i) | |
return pattern % points | |
def dflag_repl(matchobj): | |
return dflag(matchobj.group(0)) | |
regex = re.compile("([\U0001F1E6-\U0001F1FF]{2})", flags=re.UNICODE) | |
text = regex.sub(dflag_repl, text) | |
if subregions: | |
text = self.dflagize_subregional(text) | |
return text | |
def flagize_subregional(self, text: str) -> str: | |
"""Encode subregional/subdivision flags. Replace all regional codes | |
``{prefix}xx-xxx{suffix}`` with unicode flags (flag emoji tag sequences) | |
For this method the suffix should not contain | |
A-Z, a-z or 0-9 and not start with a - (minus). | |
:param str text: The text | |
:return: The text with all occurrences of ``{prefix}xx-xxx{suffix}`` replaced by | |
unicode flags | |
:rtype: str | |
""" | |
if self._prefix_warn: | |
warnings.warn( | |
"""The empty prefix (%r) is unsafe for subregional flags. | |
You can use Flag(%r, %r, warn=False) to disable this warning""" % | |
(self._prefix, self._prefix, self._suffix), UserWarning) | |
self._prefix_warn = False | |
elif self._suffix_warn: | |
warnings.warn( | |
"""The suffix (%r) is unsafe for subregional flags | |
because it is short and contains a-z, 0-9 or starts with - | |
You can use Flag(%r, %r, warn=False) to disable this warning""" % | |
(self._suffix, self._prefix, self._suffix), UserWarning) | |
self._suffix_warn = False | |
def flag_repl(matchobj): | |
return flag_tag_sequence(matchobj.group(1) + matchobj.group(2)) | |
# Enforces a hyphen after two chars, allows both: | |
# - The natural 2-letter unicode_region_subtag and subdivision_suffix | |
# like California USCA ":us-ca:", England GBENG ":gb-eng:" | |
# - For sake of completeness: 3-digit unicode_region_subtag like 840 | |
# for US formatted as ":84-0:" | |
text = re.sub( | |
self._prefix_re + | |
"([a-zA-Z]{2}|[0-9]{2})-([0-9a-zA-Z]{1,4})" + self._suffix_re, | |
flag_repl, | |
text) | |
return text | |
def dflagize_subregional(self, text: str) -> str: | |
"""Decode subregional/subdivision flags. Replace all unicode regional | |
flags (flag emoji tag sequences) in text with their ascii | |
code ``{prefix}xx-xxx{suffix}`` | |
:param str text: The text | |
:return: The text with all regional flags replaced by ascii | |
sequence ``{prefix}xx-xxx{suffix}`` | |
:rtype: str | |
""" | |
def dflag(i): | |
points = [ord(x) - OFFSET_TAG for x in i] | |
subregion = "".join(["%c" % point for point in points[2:]]) | |
return "%s%c%c-%s%s" % (self._prefix, | |
points[0], | |
points[1], | |
subregion, | |
self._suffix) | |
def dflag_repl(matchobj): | |
return dflag(matchobj.group(1)) | |
regex = re.compile( | |
BLACKFLAG + | |
"([\U000E0030-\U000E0039\U000E0061-\U000E007A]{3,6})" + | |
CANCELTAG, | |
flags=re.UNICODE) | |
text = regex.sub(dflag_repl, text) | |
return text | |
def flag(countrycode: str) -> str: | |
"""Encodes a single flag to unicode. Two letters are converted to regional | |
indicator symbols | |
Three or more letters/digits are converted to tag sequences. | |
Dashes, colons and other symbols are removed from input, only a-z, A-Z and | |
0-9 are processed. | |
In general a valid flag is either a two letter code from ISO 3166 | |
(e.g. ``GB``), a code from ISO 3166-2 (e.g. ``GBENG``) or a numeric code | |
from ISO 3166-1. | |
However, not all codes produce valid unicode, see | |
http://unicode.org/reports/tr51/#flag-emoji-tag-sequences for more | |
information. | |
From ISO 3166-2 only England ``gbeng``, Scotland ``gbsct`` and | |
Wales ``gbwls`` are considered RGI (recommended for general interchange) | |
by the Unicode Consortium, | |
see http://www.unicode.org/Public/emoji/latest/emoji-test.txt | |
:param str countrycode: Two letter ISO 3166 code or a regional code | |
from ISO 3166-2. | |
:return: The unicode representation of the flag | |
:rtype: str | |
""" | |
code = [c for c in countrycode.lower() if c in ASCII_LOWER] | |
if len(code) == 2: | |
# Regional indicator symbols | |
return flag_regional_indicator(code) | |
if len(code) > 2 and len(code) < 7: | |
# Tag sequence | |
return flag_tag_sequence(code) | |
found = ''.join(code) | |
raise ValueError( | |
'invalid countrycode, found %d (%r) in %r.' % | |
(len(found), found, countrycode)) | |
def flagize(text: str, subregions: bool = False) -> str: | |
"""Encode flags. Replace all two letter codes ``:XX:`` with unicode flags | |
(emoji flag sequences) | |
:param str text: The text | |
:param bool subregions: Also replace subregional/subdivision codes | |
``:xx-xxx:`` with unicode flags (flag emoji tag sequences). | |
:return: The text with all occurrences of ``:XX:`` replaced by unicode | |
flags | |
:rtype: str | |
""" | |
return standard.flagize(text, subregions) | |
def dflagize(text: str, subregions: bool = False) -> str: | |
"""Decode flags. Replace all unicode country flags (emoji flag sequences) | |
in text with ascii two letter code ``:XX:`` | |
:param str text: The text | |
:param bool subregions: Also replace subregional/subdivision flags | |
(flag emoji tag sequences) with ``:xx-xxx:`` | |
:return: The text with all unicode flags replaced by ascii | |
sequence ``:XX:`` | |
:rtype: str | |
""" | |
return standard.dflagize(text, subregions) | |
def flagize_subregional(text: str) -> str: | |
"""Encode subregional/subdivision flags. Replace all regional codes | |
``:xx-xxx:`` with unicode flags (flag emoji tag sequences) | |
:param str text: The text | |
:return: The text with all occurrences of ``:xx-xxx:`` replaced by | |
unicode flags | |
:rtype: str | |
""" | |
return standard.flagize_subregional(text) | |
def dflagize_subregional(text: str) -> str: | |
"""Decode subregional/subdivision flags. Replace all unicode regional | |
flags (flag emoji tag sequences) in text with their ascii | |
code ``:xx-xxx:`` | |
:param str text: The text | |
:return: The text with all regional flags replaced by ascii | |
sequence ``:xx-xxx:`` | |
:rtype: str | |
""" | |
return standard.dflagize_subregional(text) | |
standard = Flag(":", ":") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import praw | |
from pmaw import PushshiftAPI | |
import json | |
secrets = json.load(open(".env")) | |
userAgent = "python:subreddit_user_flairs.py:v1.0 (by {})".format(secrets["username"]) | |
reddit = praw.Reddit(user_agent=userAgent, **secrets) | |
api_praw = PushshiftAPI(praw=reddit) | |
comments = [] | |
for i in range(100): | |
print(i) | |
try: | |
comments.extend(api_praw.search_comments(subreddit="languagelearning", limit=1_000, mem_safe=False, since=1446083839 + 2_500_000 * i - 16 * 2_500_00, until=1446083839 + 2_500_000 + 2_500_000 * i - 16 * 2_500_00)) | |
except: | |
pass | |
user_flairs = {} | |
comments = list(comments) | |
useful_comments = 0 | |
for comment in comments: | |
author, author_flair = str(comment["author"]), comment["author_flair_text"] | |
if author_flair and author not in user_flairs: | |
user_flairs[author] = author_flair | |
elif author_flair: | |
useful_comments += 1 | |
print("Number of useful comments: {}".format(useful_comments)) | |
with open("flairs.json", "w+") as f: | |
json.dump(user_flairs, f) | |
# Taken from the Library of Congress | |
with open("language_codes.json", "r") as f: | |
language_codes = json.load(f) | |
# For unknown reasons this cannot be used as a PyPI import | |
import custom_flag as flag_module | |
possible_flags = open("flag_emojis.txt", "r").read().split(" ") | |
with open("country_languages.tsv", "r") as f: | |
country_languages = f.read().splitlines() | |
code_mapping = {} | |
for line in country_languages: | |
country_code, lang_code = line.split("\t") | |
code_mapping[country_code.lower()] = lang_code | |
flag_emoji_dict = {} | |
count = 0 | |
fail = 0 | |
for flag in possible_flags: | |
try: | |
# Get language code from country code. | |
try: | |
temp_lang_match = code_mapping[flag_module.dflagize(flag)[1:-1].lower()] | |
except KeyError: | |
temp_lang_match = None | |
flag_emoji_dict[flag] = language_codes[temp_lang_match] | |
count += 1 | |
except KeyError: | |
fail += 1 | |
print(count, "flags found", fail, "flags failed") | |
# Support people who write the language as a whole | |
for _, value in language_codes.copy().items(): | |
language_codes[value] = value | |
print(len(user_flairs), "users with flairs") # approximately 20% of comments are usable | |
import regex | |
import emoji | |
# Adapted from https://stackoverflow.com/a/49242754/12876940 | |
def split_emojis(text): | |
emoji_list = [] | |
data = regex.findall(r'\X', text) | |
non_emoji_text = "" | |
for word in data: | |
if word in emoji.EMOJI_DATA: | |
emoji_list.append(word) | |
else: | |
non_emoji_text += word | |
return emoji_list, non_emoji_text | |
# Link every language to every other language | |
language_links = {} | |
for language in language_codes.values(): | |
language_links[language] = {} | |
for language2 in language_codes.values(): | |
if language != language2: | |
language_links[language][language2] = 0 | |
for user in user_flairs: | |
flair = user_flairs[user] | |
broken_emoji, remaining_flair = split_emojis(flair) | |
# NOTE: a future improvement could be to only allow for one match per pair | |
# since some people evidently write Mandarin :flag: etc. | |
for i in range(len(broken_emoji)): | |
for j in range(len(broken_emoji)): | |
try: | |
match1 = flag_emoji_dict[broken_emoji[i]] | |
match2 = flag_emoji_dict[broken_emoji[j]] | |
if i != j and match1 in language_codes and match2 in language_codes: | |
language_links[match1][match2] += 1 | |
# The above is already enough, somehow. | |
except KeyError: | |
pass | |
if remaining_flair: | |
broken_flair = regex.split(r"[\s\:\-&,\|/\\]+", remaining_flair) | |
true_matches = [] | |
for element in broken_flair: | |
if element in language_codes: | |
true_matches.append(element) | |
elif element.lower() in language_codes: | |
true_matches.append(element.lower()) | |
for i in range(len(true_matches)): | |
for j in range(len(true_matches)): | |
if i != j and true_matches[i] in language_codes and true_matches[j] in language_codes: | |
m1 = language_codes[true_matches[i]] | |
m2 = language_codes[true_matches[j]] | |
if m1 != m2: # sometimes multiple spanishes etc. | |
language_links[ | |
language_codes[m1] | |
][ | |
language_codes[m2] | |
] += 1 | |
# language_links[language_codes[true_matches[j]]][language_codes[true_matches[i]]] += 1 | |
# Now for the visualization... | |
# Dump the output to a JSON file | |
with open("language_links_finished.json", "w+") as f: | |
json.dump(language_links, f) | |
# get into amCharts format | |
am_formatted = { | |
"data": [], | |
} | |
viewed_links = [] | |
for link in language_links: | |
for link2 in language_links[link]: | |
if (link, link2) in viewed_links: | |
continue | |
else: | |
if language_links[link][link2] >= 1: # could be any size here... | |
am_formatted["data"].append({ | |
"from": link, | |
"to": link2, | |
"value": language_links[link][link2] | |
}) | |
viewed_links.append((link, link2)) | |
viewed_links.append((link2, link)) | |
with open("am_formatted.json", "w+") as f: | |
json.dump(am_formatted, f) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
🇦🇩 🇦🇪 🇦🇫 🇦🇬 🇦🇮 🇦🇱 🇦🇲 🇦🇴 🇦🇶 🇦🇷 🇦🇸 🇦🇹 🇦🇺 🇦🇼 🇦🇽 🇦🇿 🇧🇦 🇧🇧 🇧🇩 🇧🇪 🇧🇫 🇧🇬 🇧🇭 🇧🇮 🇧🇯 🇧🇱 🇧🇲 🇧🇳 🇧🇴 🇧🇶 🇧🇷 🇧🇸 🇧🇹 🇧🇻 🇧🇼 🇧🇾 🇧🇿 🇨🇦 🇨🇨 🇨🇩 🇨🇫 🇨🇬 🇨🇭 🇨🇮 🇨🇰 🇨🇱 🇨🇲 🇨🇳 🇨🇴 🇨🇷 🇨🇺 🇨🇻 🇨🇼 🇨🇽 🇨🇾 🇨🇿 🇩🇪 🇩🇯 🇩🇰 🇩🇲 🇩🇴 🇩🇿 🇪🇨 🇪🇪 🇪🇬 🇪🇭 🇪🇷 🇪🇸 🇪🇹 🇪🇺 🇫🇮 🇫🇯 🇫🇰 🇫🇲 🇫🇴 🇫🇷 🇬🇦 🇬🇧 🇬🇩 🇬🇪 🇬🇫 🇬🇬 🇬🇭 🇬🇮 🇬🇱 🇬🇲 🇬🇳 🇬🇵 🇬🇶 🇬🇷 🇬🇸 🇬🇹 🇬🇺 🇬🇼 🇬🇾 🇭🇰 🇭🇲 🇭🇳 🇭🇷 🇭🇹 🇭🇺 🇮🇩 🇮🇪 🇮🇱 🇮🇲 🇮🇳 🇮🇴 🇮🇶 🇮🇷 🇮🇸 🇮🇹 🇯🇪 🇯🇲 🇯🇴 🇯🇵 🇰🇪 🇰🇬 🇰🇭 🇰🇮 🇰🇲 🇰🇳 🇰🇵 🇰🇷 🇰🇼 🇰🇾 🇰🇿 🇱🇦 🇱🇧 🇱🇨 🇱🇮 🇱🇰 🇱🇷 🇱🇸 🇱🇹 🇱🇺 🇱🇻 🇱🇾 🇲🇦 🇲🇨 🇲🇩 🇲🇪 🇲🇫 🇲🇬 🇲🇭 🇲🇰 🇲🇱 🇲🇲 🇲🇳 🇲🇴 🇲🇵 🇲🇶 🇲🇷 🇲🇸 🇲🇹 🇲🇺 🇲🇻 🇲🇼 🇲🇽 🇲🇾 🇲🇿 🇳🇦 🇳🇨 🇳🇪 🇳🇫 🇳🇬 🇳🇮 🇳🇱 🇳🇴 🇳🇵 🇳🇷 🇳🇺 🇳🇿 🇴🇲 🇵🇦 🇵🇪 🇵🇫 🇵🇬 🇵🇭 🇵🇰 🇵🇱 🇵🇲 🇵🇳 🇵🇷 🇵🇸 🇵🇹 🇵🇼 🇵🇾 🇶🇦 🇷🇪 🇷🇴 🇷🇸 🇷🇺 🇷🇼 🇸🇦 🇸🇧 🇸🇨 🇸🇩 🇸🇪 🇸🇬 🇸🇭 🇸🇮 🇸🇯 🇸🇰 🇸🇱 🇸🇲 🇸🇳 🇸🇴 🇸🇷 🇸🇸 🇸🇹 🇸🇻 🇸🇽 🇸🇾 🇸🇿 🇹🇨 🇹🇩 🇹🇫 🇹🇬 🇹🇭 🇹🇯 🇹🇰 🇹🇱 🇹🇲 🇹🇳 🇹🇴 🇹🇷 🇹🇹 🇹🇻 🇹🇼 🇹🇿 🇺🇦 🇺🇬 🇺🇲 🇺🇸 🇺🇾 🇺🇿 🇻🇦 🇻🇨 🇻🇪 🇻🇬 🇻🇮 🇻🇳 🇻🇺 🇼🇫 🇼🇸 🇾🇪 🇾🇹 🇿🇦 🇿🇲 🇿🇼 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"aar": "Afar", "aa": "Afar", "abk": "Abkhazian", "ab": "Abkhazian", "ace": "Achinese", "ach": "Acoli", "ada": "Adangme", "ady": "Adygei", "afa": "Afro-Asiatic Languages", "afh": "Afrihili", "afr": "Afrikaans", "af": "Afrikaans", "ain": "Ainu", "aka": "Akan", "ak": "Akan", "akk": "Akkadian", "alb": "Albanian", "sqi": "Albanian", "sq": "Albanian", "ale": "Aleut", "alg": "Algonquian Languages", "alt": "Southern Altai", "amh": "Amharic", "am": "Amharic", "ang": "English Old (ca.450-1100)", "anp": "Angika", "apa": "Apache Languages", "ara": "Arabic", "ar": "Arabic", "arc": "Imperial Aramaic (700-300 BCE)", "arg": "Aragonese", "an": "Aragonese", "arm": "Armenian", "hye": "Armenian", "hy": "Armenian", "arn": "Mapuche", "arp": "Arapaho", "art": "Artificial Languages", "arw": "Arawak", "asm": "Assamese", "as": "Assamese", "ast": "Asturian", "ath": "Athapascan Languages", "aus": "Australian Languages", "ava": "Avaric", "av": "Avaric", "ave": "Avestan", "ae": "Avestan", "awa": "Awadhi", "aym": "Aymara", "ay": "Aymara", "aze": "Azerbaijani", "az": "Azerbaijani", "bad": "Banda Languages", "bai": "Bamileke Languages", "bak": "Bashkir", "ba": "Bashkir", "bal": "Baluchi", "bam": "Bambara", "bm": "Bambara", "ban": "Balinese", "baq": "Basque", "eus": "Basque", "eu": "Basque", "bas": "Basa", "bat": "Baltic Languages", "bej": "Bedawiyet", "bel": "Belarusian", "be": "Belarusian", "bem": "Bemba", "ben": "Bengali", "bn": "Bengali", "ber": "Berber Languages", "bho": "Bhojpuri", "bih": "Bihari Languages", "bh": "Bihari Languages", "bik": "Bikol", "bin": "Bini", "bis": "Bislama", "bi": "Bislama", "bla": "Siksika", "bnt": "Bantu (Other)", "bos": "Bosnian", "bs": "Bosnian", "bra": "Braj", "bre": "Breton", "br": "Breton", "btk": "Batak Languages", "bua": "Buriat", "bug": "Buginese", "bul": "Bulgarian", "bg": "Bulgarian", "bur": "Burmese", "mya": "Burmese", "my": "Burmese", "byn": "Bilin", "cad": "Caddo", "cai": "Central American Indian Languages", "car": "Galibi Carib", "cat": "Catalan", "ca": "Catalan", "cau": "Caucasian Languages", "ceb": "Cebuano", "cel": "Celtic Languages", "cha": "Chamorro", "ch": "Chamorro", "chb": "Chibcha", "che": "Chechen", "ce": "Chechen", "chg": "Chagatai", "chi": "Chinese", "zho": "Chinese", "zh": "Chinese", "chk": "Chuukese", "chm": "Mari", "chn": "Chinook Jargon", "cho": "Choctaw", "chp": "Chipewyan", "chr": "Cherokee", "chu": "Church Slavic", "cu": "Church Slavic", "chv": "Chuvash", "cv": "Chuvash", "chy": "Cheyenne", "cmc": "Chamic Languages", "cop": "Coptic", "cor": "Cornish", "kw": "Cornish", "cos": "Corsican", "co": "Corsican", "cpe": "Creoles And Pidgins", "cpf": "Creoles And Pidgins", "cpp": "Creoles And Pidgins", "cre": "Cree", "cr": "Cree", "crh": "Crimean Tatar", "crp": "Creoles And Pidgins", "csb": "Kashubian", "cus": "Cushitic Languages", "cz": "Czech", "cze": "Czech", "ces": "Czech", "cs": "Czech", "dak": "Dakota", "dan": "Danish", "da": "Danish", "dar": "Dargwa", "day": "Land Dayak Languages", "del": "Delaware", "den": "Slave (Athapascan)", "dgr": "Dogrib", "din": "Dinka", "div": "Dhivehi", "dv": "Dhivehi", "doi": "Dogri", "dra": "Dravidian Languages", "dsb": "Lower Sorbian", "dua": "Duala", "dum": "Dutch Middle (ca.1050-1350)", "dut": "Dutch", "nld": "Dutch", "nl": "Dutch", "dyu": "Dyula", "dzo": "Dzongkha", "dz": "Dzongkha", "efi": "Efik", "egy": "Egyptian (Ancient)", "eka": "Ekajuk", "elx": "Elamite", "eng": "English", "en": "English", "enm": "English Middle (1100-1500)", "epo": "Esperanto", "eo": "Esperanto", "est": "Estonian", "et": "Estonian", "ewe": "Ewe", "ee": "Ewe", "ewo": "Ewondo", "fan": "Fang", "fao": "Faroese", "fo": "Faroese", "fat": "Fanti", "fij": "Fijian", "fj": "Fijian", "fil": "Filipino", "fin": "Finnish", "fi": "Finnish", "fiu": "Finno-Ugrian Languages", "fon": "Fon", "fre": "French", "fra": "French", "fr": "French", "frm": "French Middle (ca.1400-1600)", "fro": "French Old (842-ca.1400)", "frr": "Northern Frisian", "frs": "Eastern Frisian", "fry": "Western Frisian", "fy": "Western Frisian", "ful": "Fulah", "ff": "Fulah", "fur": "Friulian", "gaa": "Ga", "gay": "Gayo", "gba": "Gbaya", "gem": "Germanic Languages", "geo": "Georgian", "kat": "Georgian", "ka": "Georgian", "ger": "German", "deu": "German", "de": "German", "gez": "Geez", "gil": "Gilbertese", "gla": "Gaelic", "gd": "Gaelic", "gle": "Irish", "ga": "Irish", "glg": "Galician", "gl": "Galician", "glv": "Manx", "gv": "Manx", "gmh": "German Middle High (ca.1050-1500)", "goh": "German Old High (ca.750-1050)", "gon": "Gondi", "gor": "Gorontalo", "got": "Gothic", "grb": "Grebo", "grc": "Greek Ancient (to 1453)", "gre": "Greek Modern (1453-)", "ell": "Greek Modern (1453-)", "el": "Greek Modern (1453-)", "grn": "Guarani", "gn": "Guarani", "gsw": "Alemannic", "guj": "Gujarati", "gu": "Gujarati", "gwi": "Gwich'in", "hai": "Haida", "hat": "Haitian", "ht": "Haitian", "hau": "Hausa", "ha": "Hausa", "haw": "Hawaiian", "heb": "Hebrew", "he": "Hebrew", "her": "Herero", "hz": "Herero", "hil": "Hiligaynon", "him": "Himachali Languages", "hin": "Hindi", "hi": "Hindi", "hit": "Hittite", "hmn": "Hmong", "hmo": "Hiri Motu", "ho": "Hiri Motu", "hrv": "Croatian", "hr": "Croatian", "hsb": "Upper Sorbian", "hun": "Hungarian", "hu": "Hungarian", "hup": "Hupa", "iba": "Iban", "ibo": "Igbo", "ig": "Igbo", "ice": "Icelandic", "isl": "Icelandic", "is": "Icelandic", "ido": "Ido", "io": "Ido", "iii": "Nuosu", "ii": "Nuosu", "ijo": "Ijo Languages", "iku": "Inuktitut", "iu": "Inuktitut", "ile": "Interlingue", "ie": "Interlingue", "ilo": "Iloko", "ina": "Interlingua (International Auxiliary Language Association)", "ia": "Interlingua (International Auxiliary Language Association)", "inc": "Indic Languages", "ind": "Indonesian", "id": "Indonesian", "ine": "Indo-European Languages", "inh": "Ingush", "ipk": "Inupiaq", "ik": "Inupiaq", "ira": "Iranian Languages", "iro": "Iroquoian Languages", "ita": "Italian", "it": "Italian", "jav": "Javanese", "jv": "Javanese", "jbo": "Lojban", "jpn": "Japanese", "ja": "Japanese", "jpr": "Judeo-Persian", "jrb": "Judeo-Arabic", "kaa": "Kara-Kalpak", "kab": "Kabyle", "kac": "Jingpho", "kal": "Greenlandic", "kl": "Greenlandic", "kam": "Kamba", "kan": "Kannada", "kn": "Kannada", "kar": "Karen Languages", "kas": "Kashmiri", "ks": "Kashmiri", "kau": "Kanuri", "kr": "Kanuri", "kaw": "Kawi", "kaz": "Kazakh", "kk": "Kazakh", "kbd": "Kabardian", "kha": "Khasi", "khi": "Khoisan Languages", "khm": "Central Khmer", "km": "Central Khmer", "kho": "Khotanese", "kik": "Gikuyu", "ki": "Gikuyu", "kin": "Kinyarwanda", "rw": "Kinyarwanda", "kir": "Kirghiz", "ky": "Kirghiz", "kmb": "Kimbundu", "kok": "Konkani", "kom": "Komi", "kv": "Komi", "kon": "Kongo", "kg": "Kongo", "kor": "Korean", "ko": "Korean", "kos": "Kosraean", "kpe": "Kpelle", "krc": "Karachay-Balkar", "krl": "Karelian", "kro": "Kru Languages", "kru": "Kurukh", "kua": "Kuanyama", "kj": "Kuanyama", "kum": "Kumyk", "kur": "Kurdish", "ku": "Kurdish", "kut": "Kutenai", "lad": "Ladino", "lah": "Lahnda", "lam": "Lamba", "lao": "Lao", "lo": "Lao", "lat": "Latin", "la": "Latin", "lav": "Latvian", "lv": "Latvian", "lez": "Lezghian", "lim": "Limburgan", "li": "Limburgan", "lin": "Lingala", "ln": "Lingala", "lit": "Lithuanian", "lt": "Lithuanian", "lol": "Mongo", "loz": "Lozi", "ltz": "Letzeburgesch", "lb": "Letzeburgesch", "lua": "Luba-Lulua", "lub": "Luba-Katanga", "lu": "Luba-Katanga", "lug": "Ganda", "lg": "Ganda", "lui": "Luiseno", "lun": "Lunda", "luo": "Luo (Kenya And Tanzania)", "lus": "Lushai", "mac": "Macedonian", "mkd": "Macedonian", "mk": "Macedonian", "mad": "Madurese", "mag": "Magahi", "mah": "Marshallese", "mh": "Marshallese", "mai": "Maithili", "mak": "Makasar", "mal": "Malayalam", "ml": "Malayalam", "man": "Mandingo", "mao": "Maori", "mri": "Maori", "mi": "Maori", "map": "Austronesian Languages", "mar": "Marathi", "mr": "Marathi", "mas": "Masai", "may": "Malay", "msa": "Malay", "ms": "Malay", "mdf": "Moksha", "mdr": "Mandar", "men": "Mende", "mga": "Irish Middle (900-1200)", "mic": "Mi'kmaq", "min": "Minangkabau", "mis": "Uncoded Languages", "mkh": "Mon-Khmer Languages", "mlg": "Malagasy", "mg": "Malagasy", "mlt": "Maltese", "mt": "Maltese", "mnc": "Manchu", "mni": "Manipuri", "mno": "Manobo Languages", "moh": "Mohawk", "mon": "Mongolian", "mn": "Mongolian", "mos": "Mossi", "mul": "Multiple Languages", "mun": "Munda Languages", "mus": "Creek", "mwl": "Mirandese", "mwr": "Marwari", "myn": "Mayan Languages", "myv": "Erzya", "nah": "Nahuatl Languages", "nai": "North American Indian Languages", "nap": "Neapolitan", "nau": "Nauru", "na": "Nauru", "nav": "Navaho", "nv": "Navaho", "nbl": "Ndebele", "nr": "Ndebele", "nde": "Ndebele", "nd": "Ndebele", "ndo": "Ndonga", "ng": "Ndonga", "nds": "Low", "nep": "Nepali", "ne": "Nepali", "new": "Nepal Bhasa", "nia": "Nias", "nic": "Niger-Kordofanian Languages", "niu": "Niuean", "nno": "Norwegian", "nn": "Norwegian", "nob": "Bokm\u00e5l", "nb": "Bokm\u00e5l", "nog": "Nogai", "non": "Norse", "nor": "Norwegian", "no": "Norwegian", "nqo": "N'Ko", "nso": "Northern Sotho", "nub": "Nubian Languages", "nwc": "Classical Nepal Bhasa", "nya": "Chewa", "ny": "Chewa", "nym": "Nyamwezi", "nyn": "Nyankole", "nyo": "Nyoro", "nzi": "Nzima", "oci": "Occitan (post 1500)", "oc": "Occitan (post 1500)", "oji": "Ojibwa", "oj": "Ojibwa", "ori": "Oriya", "or": "Oriya", "orm": "Oromo", "om": "Oromo", "osa": "Osage", "oss": "Ossetian", "os": "Ossetian", "ota": "Turkish Ottoman (1500-1928)", "oto": "Otomian Languages", "paa": "Papuan Languages", "pag": "Pangasinan", "pal": "Pahlavi", "pam": "Kapampangan", "pan": "Panjabi", "pa": "Panjabi", "pap": "Papiamento", "pau": "Palauan", "peo": "Persian Old (ca.600-400 B.C.)", "per": "Persian", "fas": "Persian", "fa": "Persian", "phi": "Philippine Languages", "phn": "Phoenician", "pli": "Pali", "pi": "Pali", "pol": "Polish", "pl": "Polish", "pon": "Pohnpeian", "por": "Portuguese", "pt": "Portuguese", "pra": "Prakrit Languages", "pro": "Proven\u00e7al Old (to 1500)", "pus": "Pashto", "ps": "Pashto", "qaa-qtz": "Reserved For Local Use", "que": "Quechua", "qu": "Quechua", "raj": "Rajasthani", "rap": "Rapanui", "rar": "Cook Islands Maori", "roa": "Romance Languages", "roh": "Romansh", "rm": "Romansh", "rom": "Romany", "rum": "Moldavian", "ron": "Moldavian", "ro": "Moldavian", "run": "Rundi", "rn": "Rundi", "rup": "Aromanian", "rus": "Russian", "ru": "Russian", "sad": "Sandawe", "sag": "Sango", "sg": "Sango", "sah": "Yakut", "sai": "South American Indian (Other)", "sal": "Salishan Languages", "sam": "Samaritan Aramaic", "san": "Sanskrit", "sa": "Sanskrit", "sas": "Sasak", "sat": "Santali", "scn": "Sicilian", "sco": "Scots", "sel": "Selkup", "sem": "Semitic Languages", "sga": "Irish Old (to 900)", "sgn": "Sign Languages", "shn": "Shan", "sid": "Sidamo", "sin": "Sinhala", "si": "Sinhala", "sio": "Siouan Languages", "sit": "Sino-Tibetan Languages", "sla": "Slavic Languages", "slo": "Slovak", "slk": "Slovak", "sk": "Slovak", "slv": "Slovenian", "sl": "Slovenian", "sma": "Southern Sami", "sme": "Northern Sami", "se": "Northern Sami", "smi": "Sami Languages", "smj": "Lule Sami", "smn": "Inari Sami", "smo": "Samoan", "sm": "Samoan", "sms": "Skolt Sami", "sna": "Shona", "sn": "Shona", "snd": "Sindhi", "sd": "Sindhi", "snk": "Soninke", "sog": "Sogdian", "som": "Somali", "so": "Somali", "son": "Songhai Languages", "sot": "Sotho", "st": "Sotho", "spa": "Spanish", "es": "Spanish", "esp": "Spanish", "srd": "Sardinian", "sc": "Sardinian", "srn": "Sranan Tongo", "srp": "Serbian", "sr": "Serbian", "srr": "Serer", "ssa": "Nilo-Saharan Languages", "ssw": "Swati", "ss": "Swati", "suk": "Sukuma", "sun": "Sundanese", "su": "Sundanese", "sus": "Susu", "sux": "Sumerian", "swa": "Swahili", "sw": "Swahili", "swe": "Swedish", "sv": "Swedish", "syc": "Classical Syriac", "syr": "Syriac", "tah": "Tahitian", "ty": "Tahitian", "tai": "Tai Languages", "tam": "Tamil", "ta": "Tamil", "tat": "Tatar", "tt": "Tatar", "tel": "Telugu", "te": "Telugu", "tem": "Timne", "ter": "Tereno", "tet": "Tetum", "tgk": "Tajik", "tg": "Tajik", "tgl": "Tagalog", "tl": "Tagalog", "tha": "Thai", "th": "Thai", "tib": "Tibetan", "bod": "Tibetan", "bo": "Tibetan", "tig": "Tigre", "tir": "Tigrinya", "ti": "Tigrinya", "tiv": "Tiv", "tkl": "Tokelau", "tlh": "Klingon", "tli": "Tlingit", "tmh": "Tamashek", "tog": "Tonga (Nyasa)", "ton": "Tonga (Tonga Islands)", "to": "Tonga (Tonga Islands)", "tpi": "Tok Pisin", "tsi": "Tsimshian", "tsn": "Tswana", "tn": "Tswana", "tso": "Tsonga", "ts": "Tsonga", "tuk": "Turkmen", "tk": "Turkmen", "tum": "Tumbuka", "tup": "Tupi Languages", "tur": "Turkish", "tr": "Turkish", "tut": "Altaic Languages", "tvl": "Tuvalu", "twi": "Twi", "tw": "Twi", "tyv": "Tuvinian", "udm": "Udmurt", "uga": "Ugaritic", "uig": "Uighur", "ug": "Uighur", "ukr": "Ukrainian", "uk": "Ukrainian", "umb": "Umbundu", "und": "Undetermined", "urd": "Urdu", "ur": "Urdu", "uzb": "Uzbek", "uz": "Uzbek", "vai": "Vai", "ven": "Venda", "ve": "Venda", "vie": "Vietnamese", "vi": "Vietnamese", "vol": "Volap\u00fck", "vo": "Volap\u00fck", "vot": "Votic", "wak": "Wakashan Languages", "wal": "Walamo", "war": "Waray", "was": "Washo", "wel": "Welsh", "cym": "Welsh", "cy": "Welsh", "wen": "Sorbian Languages", "wln": "Walloon", "wa": "Walloon", "wol": "Wolof", "wo": "Wolof", "xal": "Kalmyk", "xho": "Xhosa", "xh": "Xhosa", "yao": "Yao", "yap": "Yapese", "yid": "Yiddish", "yi": "Yiddish", "yor": "Yoruba", "yo": "Yoruba", "ypk": "Yupik Languages", "zap": "Zapotec", "zbl": "Bliss", "zen": "Zenaga", "zgh": "Standard Moroccan Tamazight", "zha": "Chuang", "za": "Chuang", "znd": "Zande Languages", "zul": "Zulu", "zu": "Zulu", "zun": "Zuni", "zxx": "No Linguistic Content", "zza": "Dimili"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<style> | |
#chartdiv { | |
width: 100%; | |
height: 800px; | |
} | |
body { | |
background-color: #000; | |
} | |
</style> | |
<!-- Resources --> | |
<script src="https://cdn.amcharts.com/lib/4/core.js"></script> | |
<script src="https://cdn.amcharts.com/lib/4/charts.js"></script> | |
<script src="https://cdn.amcharts.com/lib/4/themes/dark.js"></script> | |
<!-- Chart code --> | |
<script> | |
am4core.ready(function () { | |
// Themes begin | |
am4core.useTheme(am4themes_dark); | |
// Themes end | |
var chart = am4core.create("chartdiv", am4charts.ChordDiagram); | |
// colors of main characters | |
chart.colors.saturation = 0.85; | |
chart.colors.step = 3; | |
var colors = {} | |
// reminder: must be run with a file server | |
fetch("am_formatted.json", { | |
headers: { | |
'Content-Type': 'application/json', | |
'Accept': 'application/json', | |
'mode': 'cors' | |
} | |
}).then(res => res.json()).then(json => { | |
chart.data = json.data; | |
}) | |
chart.dataFields.fromName = "from"; | |
chart.dataFields.toName = "to"; | |
chart.dataFields.value = "value"; | |
chart.nodePadding = 1; | |
chart.minNodeSize = 0.0025; | |
chart.startAngle = 135; | |
chart.endAngle = chart.startAngle + 360; | |
chart.sortBy = "value"; | |
chart.fontSize = 14; | |
var nodeTemplate = chart.nodes.template; | |
nodeTemplate.readerTitle = "Click to show/hide or drag to rearrange"; | |
nodeTemplate.showSystemTooltip = true; | |
nodeTemplate.propertyFields.fill = "color"; | |
nodeTemplate.tooltipText = "{name} connections: {total}"; | |
// when rolled over the node, make all the links rolled-over | |
nodeTemplate.events.on("over", function (event) { | |
var node = event.target; | |
node.outgoingDataItems.each(function (dataItem) { | |
if (dataItem.toNode) { | |
dataItem.link.isHover = true; | |
dataItem.toNode.label.isHover = true; | |
} | |
}) | |
node.incomingDataItems.each(function (dataItem) { | |
if (dataItem.fromNode) { | |
dataItem.link.isHover = true; | |
dataItem.fromNode.label.isHover = true; | |
} | |
}) | |
node.label.isHover = true; | |
}) | |
// when rolled out from the node, make all the links rolled-out | |
nodeTemplate.events.on("out", function (event) { | |
var node = event.target; | |
node.outgoingDataItems.each(function (dataItem) { | |
if (dataItem.toNode) { | |
dataItem.link.isHover = false; | |
dataItem.toNode.label.isHover = false; | |
} | |
}) | |
node.incomingDataItems.each(function (dataItem) { | |
if (dataItem.fromNode) { | |
dataItem.link.isHover = false; | |
dataItem.fromNode.label.isHover = false; | |
} | |
}) | |
node.label.isHover = false; | |
}) | |
var label = nodeTemplate.label; | |
label.relativeRotation = 90; | |
label.fillOpacity = 0.4; | |
let labelHS = label.states.create("hover"); | |
labelHS.properties.fillOpacity = 1; | |
nodeTemplate.cursorOverStyle = am4core.MouseCursorStyle.pointer; | |
nodeTemplate.adapter.add("fill", function (fill, target) { | |
let node = target; | |
let counters = {}; | |
let mainChar = false; | |
node.incomingDataItems.each(function (dataItem) { | |
if (colors[dataItem.toName]) { | |
mainChar = true; | |
} | |
if (isNaN(counters[dataItem.fromName])) { | |
counters[dataItem.fromName] = dataItem.value; | |
} | |
else { | |
counters[dataItem.fromName] += dataItem.value; | |
} | |
}) | |
if (mainChar) { | |
return fill; | |
} | |
let count = 0; | |
let color; | |
let biggest = 0; | |
let biggestName; | |
for (var name in counters) { | |
if (counters[name] > biggest) { | |
biggestName = name; | |
biggest = counters[name]; | |
} | |
} | |
if (colors[biggestName]) { | |
fill = colors[biggestName]; | |
} | |
return fill; | |
}) | |
// link template | |
var linkTemplate = chart.links.template; | |
linkTemplate.strokeOpacity = 0; | |
linkTemplate.fillOpacity = 0.2; | |
linkTemplate.tooltipText = "{fromName} & {toName}: {value.value}"; | |
// linkTemplate.colorMode = "gradient"; | |
var hoverState = linkTemplate.states.create("hover"); | |
hoverState.properties.fillOpacity = 0.7; | |
hoverState.properties.strokeOpacity = 0.7; | |
// data credit label | |
var creditLabel = chart.chartContainer.createChild(am4core.TextLink); | |
creditLabel.text = "Compiled by: u/LAcuber"; | |
creditLabel.url = "https://www.reddit.com/u/LAcuber"; | |
creditLabel.y = am4core.percent(99); | |
creditLabel.x = am4core.percent(99); | |
creditLabel.fontSize = 14; | |
creditLabel.horizontalCenter = "right"; | |
creditLabel.verticalCenter = "bottom"; | |
var title = chart.chartContainer.createChild(am4core.Label); | |
title.text = "r/languagelearning visualized"; | |
title.fontSize = 30; | |
title.align = "center"; | |
}); // end am4core.ready() | |
</script> | |
<!-- HTML --> | |
<div id="chartdiv"></div> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
After registering a Reddit script app, you can run
extractor.py
to gather all the data concerning user flairs. Afterwards:npm i http-server -g
— due to CORS issues, you cannot view the output immediately as a standalone filehttp-server
localhost:8080
and openshow.html
. Voila!