Skip to content

Instantly share code, notes, and snippets.

@Destaq
Created January 31, 2023 11:01
Show Gist options
  • Save Destaq/6386b9b217bd8f664cbfdd5a4979d96d to your computer and use it in GitHub Desktop.
Save Destaq/6386b9b217bd8f664cbfdd5a4979d96d to your computer and use it in GitHub Desktop.
Graphically analyzes the most commonly learned language combinations on r/languagelearning based on user flairs
{
"username": "Reddit username",
"client_id": "Register a developer script on Reddit",
"client_secret": "See the above",
"password": "Your Reddit password"
}
AD ca
AE ar
AF fa
AG en
AI en
AL sq
AM hy
AO pt
AQ en
AR es
AS en
AT de
AU en
AW nl
AX sv
AZ az
BA bs
BB en
BD bn
BE nl
BF fr
BG bg
BH ar
BI fr
BJ fr
BL fr
BM en
BN ms
BO es
BQ nl
BR pt
BS en
BT dz
BV no
BW en
BY be
BZ en
CA en
CC en
CD fr
CF fr
CG fr
CH de
CI fr
CK en
CL es
CM fr
CN zh
CO es
CR es
CU es
CV pt
CW nl
CX en
CY el
CZ cs
DE de
DJ fr
DK da
DM en
DO es
DZ ar
EC es
EE et
EG ar
EH ar
ER ti
ES es
ET am
FI fi
FJ en
FK en
FM en
FO fo
FR fr
GA fr
GB en
GD en
GE ka
GF fr
GG en
GH en
GI en
GL kl
GM en
GN fr
GP fr
GQ es
GR el
GS en
GT es
GU en
GW pt
GY en
HK zh-hant
HM en
HN es
HR hr
HT fr
HU hu
ID id
IE en
IL he
IM en
IN hi
IO en
IQ ar
IR fa
IS is
IT it
JE en
JM en
JO ar
JP ja
KE sw
KG ky
KH km
KI en
KM ar
KN en
KP ko
KR ko
KW ar
KY en
KZ kk
LA lo
LB ar
LC en
LI de
LK si
LR en
LS en
LT lt
LU lb
LV lv
LY ar
MA ar
MC fr
MD ro
ME srp
MF fr
MG mg
MH en
MK mk
ML fr
MM my
MN mn
MO zh-hant
MP en
MQ fr
MR ar
MS en
MT mt
MU mfe
MV dv
MW en
MX es
MY ms
MZ pt
NA en
NC fr
NE fr
NF en
NG en
NI es
NL nl
NO nb
NP ne
NR na
NU niu
NZ mi
OM ar
PA es
PE es
PF fr
PG en
PH en
PK en
PL pl
PM fr
PN en
PR es
PS ar
PT pt
PW en
PY es
QA ar
RE fr
RO ro
RS sr
RU ru
RW rw
SA ar
SB en
SC fr
SD ar
SE sv
SG zh
SH en
SI sl
SJ no
SK sk
SL en
SM it
SN fr
SO so
SR nl
ST pt
SS en
SV es
SX nl
SY ar
SZ en
TC en
TD fr
TF fr
TG fr
TH th
TJ tg
TK tkl
TL pt
TM tk
TN ar
TO en
TR tr
TT en
TV en
TW zh-hant
TZ sw
UA uk
UG en
UM en
US en
UY es
UZ uz
VA it
VC en
VE es
VG en
VI en
VN vi
VU bi
WF fr
WS sm
XK en
YE ar
YT fr
ZA en
ZM en
ZW en
# -*- coding: utf-8 -*-
"""
Converts flag emoji to ascii and back
https://github.com/cvzi/flag
Based on http://schinckel.net/2015/10/29/unicode-flags-in-python/
Unicode country code emoji flags for Python
~~~~~~~~~~~~~~~~
>>> import flag
>>> flag.flag("IL")
'🇮🇱'
>>> flag.flagize("Flag of Israel :IL:")
'Flag of Israel 🇮🇱'
>>> flag.dflagize("Flag of Israel 🇮🇱")
'Flag of Israel :IL:'
>>> flag.flagize(":gb-eng: is part of the UK :GB:", subregions=True)
'England 🏴󠁧󠁢󠁥󠁮󠁧󠁿 is part of the UK 🇬🇧'
>>> flag.dflagize("England 🏴󠁧󠁢󠁥󠁮󠁧󠁿 is part of the UK 🇬🇧", subregions=True)
'England :gb-eng: is part of the UK :GB:'
"""
import sys
import warnings
import re
from typing import List
__version__: str = '1.3.1'
__author__: str = 'cuzi'
__email__: str = 'cuzi@openmail.cc'
__source__: str = 'https://github.com/cvzi/flag'
__license__: str = """
MIT License
Copyright (c) cuzi 2018
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
__all__ = [
"flag",
"flagize",
"dflagize",
"flagize_subregional",
"dflagize_subregional",
"Flag"]
OFFSET = ord("🇦") - ord("A")
OFFSET_TAG = 0xE0000
CANCELTAG = "\U000E007F"
BLACKFLAG = "\U0001F3F4"
ASCII_LOWER = "abcdefghijklmnopqrstuvwxyz0123456789"
def check_prefix(custom_str: str) -> bool:
"""Check if prefix will safely work with flagize and subregional flags
:param str custom_str: Custom prefix
:return: False if the string will safely work with subregional flags
:rtype: bool
"""
return len(custom_str) == 0
def check_suffix(custom_str: str) -> bool:
"""Check if suffix will safely work with flagize and subregional flags
:param str custom_str: Custom suffix
:return: False if the string will safely work with subregional flags
:rtype: bool
"""
if custom_str.startswith("-"):
return True
if len(custom_str) < 4:
custom_str = custom_str.lower()
for c in ASCII_LOWER:
if c in custom_str:
return True
return False
def flag_regional_indicator(code: List[str]) -> str:
"""Two letters are converted to regional indicator symbols
:param str code: two letter ISO 3166 code
:return: regional indicator symbols of the country flag
:rtype: str
"""
return "".join([chr(ord(c.upper()) + OFFSET) for c in code])
def flag_tag_sequence(code: List[str]) -> str:
"""Three to seven letters/digits are converted to a tag sequence.
:param str code: regional code from ISO 3166-2.
:return: The unicode tag sequence of the subregional flag
:rtype: str
"""
tags = "".join([chr(ord(c.lower()) + OFFSET_TAG) for c in code])
return BLACKFLAG + tags + CANCELTAG
class Flag:
"""Use this class if you want a different prefix and suffix instead
of colons. Offers the same methods as the module.
"""
def __init__(self, prefix_str: str = ":",
suffix_str: str = ":", warn: bool = True) -> None:
"""Set a custom prefix and suffix. Instead of ``:XY:`` it will
use ``{prefix}XY{suffix}``.
To encode subregional flags, use a suffix that is either longer
than 4 characters or that does not contain A-Z, a-z, 0-9 and
does not start with a - (minus).
:param str prefix_str: The leading symbols
:param str suffix_str: The trailing symbols
"""
self._prefix = prefix_str
self._prefix_re = re.escape(prefix_str)
self._prefix_warn = warn and check_prefix(self._prefix)
self._suffix = suffix_str
self._suffix_re = re.escape(suffix_str)
self._suffix_warn = warn and check_suffix(self._suffix)
@staticmethod
def flag(countrycode: str) -> str:
"""Encodes a single flag to unicode. Two letters are converted to
regional indicator symbols
Three or more letters/digits are converted to tag sequences.
Dashes, colons and other symbols are removed from input, only a-z, A-Z
and 0-9 are processed.
In general a valid flag is either a two letter code from ISO 3166
(e.g. ``GB``), a code from ISO 3166-2 (e.g. ``GBENG``) or a numeric
code from ISO 3166-1.
However, not all codes produce valid unicode, see
http://unicode.org/reports/tr51/#flag-emoji-tag-sequences for more
information.
From ISO 3166-2 only England ``gbeng``, Scotland ``gbsct`` and
Wales ``gbwls`` are considered RGI (recommended for general
interchange) by the Unicode Consortium,
see http://www.unicode.org/Public/emoji/latest/emoji-test.txt
:param str countrycode: Two letter ISO 3166 code or a regional code
from ISO 3166-2.
:return: The unicode representation of the flag
:rtype: str
"""
return flag(countrycode)
def flagize(self, text: str, subregions: bool = False) -> str:
"""Encode flags. Replace all two letter codes ``{prefix}XX{suffix}`` with unicode
flags (emoji flag sequences)
For this method the suffix should not contain
A-Z, a-z or 0-9 and not start with a - (minus).
:param str text: The text
:param bool subregions: Also replace subregional/subdivision codes
``{prefix}xx-xxx{suffix}`` with unicode flags (flag emoji tag sequences).
:return: The text with all occurrences of ``{prefix}XX{suffix}`` replaced by unicode
flags
:rtype: str
"""
def flag_repl(matchobj):
return flag_regional_indicator(matchobj.group(1))
text = re.sub(self._prefix_re +
"([a-zA-Z]{2})" + self._suffix_re, flag_repl, text)
if subregions:
text = self.flagize_subregional(text)
return text
def dflagize(self, text: str, subregions: bool = False) -> str:
"""Decode flags. Replace all unicode country flags (emoji flag
sequences) in text with ascii two letter code ``{prefix}XX{suffix}``
:param str text: The text
:param bool subregions: Also replace subregional/subdivision flags
(flag emoji tag sequences) with ``{prefix}xx-xxx{suffix}``
:return: The text with all unicode flags replaced by ascii
sequence ``{prefix}XX{suffix}``
:rtype: str
"""
pattern = "%s%%c%%c%s" % (self._prefix, self._suffix)
def dflag(i):
points = tuple(ord(x) - OFFSET for x in i)
return pattern % points
def dflag_repl(matchobj):
return dflag(matchobj.group(0))
regex = re.compile("([\U0001F1E6-\U0001F1FF]{2})", flags=re.UNICODE)
text = regex.sub(dflag_repl, text)
if subregions:
text = self.dflagize_subregional(text)
return text
def flagize_subregional(self, text: str) -> str:
"""Encode subregional/subdivision flags. Replace all regional codes
``{prefix}xx-xxx{suffix}`` with unicode flags (flag emoji tag sequences)
For this method the suffix should not contain
A-Z, a-z or 0-9 and not start with a - (minus).
:param str text: The text
:return: The text with all occurrences of ``{prefix}xx-xxx{suffix}`` replaced by
unicode flags
:rtype: str
"""
if self._prefix_warn:
warnings.warn(
"""The empty prefix (%r) is unsafe for subregional flags.
You can use Flag(%r, %r, warn=False) to disable this warning""" %
(self._prefix, self._prefix, self._suffix), UserWarning)
self._prefix_warn = False
elif self._suffix_warn:
warnings.warn(
"""The suffix (%r) is unsafe for subregional flags
because it is short and contains a-z, 0-9 or starts with -
You can use Flag(%r, %r, warn=False) to disable this warning""" %
(self._suffix, self._prefix, self._suffix), UserWarning)
self._suffix_warn = False
def flag_repl(matchobj):
return flag_tag_sequence(matchobj.group(1) + matchobj.group(2))
# Enforces a hyphen after two chars, allows both:
# - The natural 2-letter unicode_region_subtag and subdivision_suffix
# like California USCA ":us-ca:", England GBENG ":gb-eng:"
# - For sake of completeness: 3-digit unicode_region_subtag like 840
# for US formatted as ":84-0:"
text = re.sub(
self._prefix_re +
"([a-zA-Z]{2}|[0-9]{2})-([0-9a-zA-Z]{1,4})" + self._suffix_re,
flag_repl,
text)
return text
def dflagize_subregional(self, text: str) -> str:
"""Decode subregional/subdivision flags. Replace all unicode regional
flags (flag emoji tag sequences) in text with their ascii
code ``{prefix}xx-xxx{suffix}``
:param str text: The text
:return: The text with all regional flags replaced by ascii
sequence ``{prefix}xx-xxx{suffix}``
:rtype: str
"""
def dflag(i):
points = [ord(x) - OFFSET_TAG for x in i]
subregion = "".join(["%c" % point for point in points[2:]])
return "%s%c%c-%s%s" % (self._prefix,
points[0],
points[1],
subregion,
self._suffix)
def dflag_repl(matchobj):
return dflag(matchobj.group(1))
regex = re.compile(
BLACKFLAG +
"([\U000E0030-\U000E0039\U000E0061-\U000E007A]{3,6})" +
CANCELTAG,
flags=re.UNICODE)
text = regex.sub(dflag_repl, text)
return text
def flag(countrycode: str) -> str:
"""Encodes a single flag to unicode. Two letters are converted to regional
indicator symbols
Three or more letters/digits are converted to tag sequences.
Dashes, colons and other symbols are removed from input, only a-z, A-Z and
0-9 are processed.
In general a valid flag is either a two letter code from ISO 3166
(e.g. ``GB``), a code from ISO 3166-2 (e.g. ``GBENG``) or a numeric code
from ISO 3166-1.
However, not all codes produce valid unicode, see
http://unicode.org/reports/tr51/#flag-emoji-tag-sequences for more
information.
From ISO 3166-2 only England ``gbeng``, Scotland ``gbsct`` and
Wales ``gbwls`` are considered RGI (recommended for general interchange)
by the Unicode Consortium,
see http://www.unicode.org/Public/emoji/latest/emoji-test.txt
:param str countrycode: Two letter ISO 3166 code or a regional code
from ISO 3166-2.
:return: The unicode representation of the flag
:rtype: str
"""
code = [c for c in countrycode.lower() if c in ASCII_LOWER]
if len(code) == 2:
# Regional indicator symbols
return flag_regional_indicator(code)
if len(code) > 2 and len(code) < 7:
# Tag sequence
return flag_tag_sequence(code)
found = ''.join(code)
raise ValueError(
'invalid countrycode, found %d (%r) in %r.' %
(len(found), found, countrycode))
def flagize(text: str, subregions: bool = False) -> str:
"""Encode flags. Replace all two letter codes ``:XX:`` with unicode flags
(emoji flag sequences)
:param str text: The text
:param bool subregions: Also replace subregional/subdivision codes
``:xx-xxx:`` with unicode flags (flag emoji tag sequences).
:return: The text with all occurrences of ``:XX:`` replaced by unicode
flags
:rtype: str
"""
return standard.flagize(text, subregions)
def dflagize(text: str, subregions: bool = False) -> str:
"""Decode flags. Replace all unicode country flags (emoji flag sequences)
in text with ascii two letter code ``:XX:``
:param str text: The text
:param bool subregions: Also replace subregional/subdivision flags
(flag emoji tag sequences) with ``:xx-xxx:``
:return: The text with all unicode flags replaced by ascii
sequence ``:XX:``
:rtype: str
"""
return standard.dflagize(text, subregions)
def flagize_subregional(text: str) -> str:
"""Encode subregional/subdivision flags. Replace all regional codes
``:xx-xxx:`` with unicode flags (flag emoji tag sequences)
:param str text: The text
:return: The text with all occurrences of ``:xx-xxx:`` replaced by
unicode flags
:rtype: str
"""
return standard.flagize_subregional(text)
def dflagize_subregional(text: str) -> str:
"""Decode subregional/subdivision flags. Replace all unicode regional
flags (flag emoji tag sequences) in text with their ascii
code ``:xx-xxx:``
:param str text: The text
:return: The text with all regional flags replaced by ascii
sequence ``:xx-xxx:``
:rtype: str
"""
return standard.dflagize_subregional(text)
standard = Flag(":", ":")
import praw
from pmaw import PushshiftAPI
import json
secrets = json.load(open(".env"))
userAgent = "python:subreddit_user_flairs.py:v1.0 (by {})".format(secrets["username"])
reddit = praw.Reddit(user_agent=userAgent, **secrets)
api_praw = PushshiftAPI(praw=reddit)
comments = []
for i in range(100):
print(i)
try:
comments.extend(api_praw.search_comments(subreddit="languagelearning", limit=1_000, mem_safe=False, since=1446083839 + 2_500_000 * i - 16 * 2_500_00, until=1446083839 + 2_500_000 + 2_500_000 * i - 16 * 2_500_00))
except:
pass
user_flairs = {}
comments = list(comments)
useful_comments = 0
for comment in comments:
author, author_flair = str(comment["author"]), comment["author_flair_text"]
if author_flair and author not in user_flairs:
user_flairs[author] = author_flair
elif author_flair:
useful_comments += 1
print("Number of useful comments: {}".format(useful_comments))
with open("flairs.json", "w+") as f:
json.dump(user_flairs, f)
# Taken from the Library of Congress
with open("language_codes.json", "r") as f:
language_codes = json.load(f)
# For unknown reasons this cannot be used as a PyPI import
import custom_flag as flag_module
possible_flags = open("flag_emojis.txt", "r").read().split(" ")
with open("country_languages.tsv", "r") as f:
country_languages = f.read().splitlines()
code_mapping = {}
for line in country_languages:
country_code, lang_code = line.split("\t")
code_mapping[country_code.lower()] = lang_code
flag_emoji_dict = {}
count = 0
fail = 0
for flag in possible_flags:
try:
# Get language code from country code.
try:
temp_lang_match = code_mapping[flag_module.dflagize(flag)[1:-1].lower()]
except KeyError:
temp_lang_match = None
flag_emoji_dict[flag] = language_codes[temp_lang_match]
count += 1
except KeyError:
fail += 1
print(count, "flags found", fail, "flags failed")
# Support people who write the language as a whole
for _, value in language_codes.copy().items():
language_codes[value] = value
print(len(user_flairs), "users with flairs") # approximately 20% of comments are usable
import regex
import emoji
# Adapted from https://stackoverflow.com/a/49242754/12876940
def split_emojis(text):
emoji_list = []
data = regex.findall(r'\X', text)
non_emoji_text = ""
for word in data:
if word in emoji.EMOJI_DATA:
emoji_list.append(word)
else:
non_emoji_text += word
return emoji_list, non_emoji_text
# Link every language to every other language
language_links = {}
for language in language_codes.values():
language_links[language] = {}
for language2 in language_codes.values():
if language != language2:
language_links[language][language2] = 0
for user in user_flairs:
flair = user_flairs[user]
broken_emoji, remaining_flair = split_emojis(flair)
# NOTE: a future improvement could be to only allow for one match per pair
# since some people evidently write Mandarin :flag: etc.
for i in range(len(broken_emoji)):
for j in range(len(broken_emoji)):
try:
match1 = flag_emoji_dict[broken_emoji[i]]
match2 = flag_emoji_dict[broken_emoji[j]]
if i != j and match1 in language_codes and match2 in language_codes:
language_links[match1][match2] += 1
# The above is already enough, somehow.
except KeyError:
pass
if remaining_flair:
broken_flair = regex.split(r"[\s\:\-&,\|/\\]+", remaining_flair)
true_matches = []
for element in broken_flair:
if element in language_codes:
true_matches.append(element)
elif element.lower() in language_codes:
true_matches.append(element.lower())
for i in range(len(true_matches)):
for j in range(len(true_matches)):
if i != j and true_matches[i] in language_codes and true_matches[j] in language_codes:
m1 = language_codes[true_matches[i]]
m2 = language_codes[true_matches[j]]
if m1 != m2: # sometimes multiple spanishes etc.
language_links[
language_codes[m1]
][
language_codes[m2]
] += 1
# language_links[language_codes[true_matches[j]]][language_codes[true_matches[i]]] += 1
# Now for the visualization...
# Dump the output to a JSON file
with open("language_links_finished.json", "w+") as f:
json.dump(language_links, f)
# get into amCharts format
am_formatted = {
"data": [],
}
viewed_links = []
for link in language_links:
for link2 in language_links[link]:
if (link, link2) in viewed_links:
continue
else:
if language_links[link][link2] >= 1: # could be any size here...
am_formatted["data"].append({
"from": link,
"to": link2,
"value": language_links[link][link2]
})
viewed_links.append((link, link2))
viewed_links.append((link2, link))
with open("am_formatted.json", "w+") as f:
json.dump(am_formatted, f)
🇦🇩 🇦🇪 🇦🇫 🇦🇬 🇦🇮 🇦🇱 🇦🇲 🇦🇴 🇦🇶 🇦🇷 🇦🇸 🇦🇹 🇦🇺 🇦🇼 🇦🇽 🇦🇿 🇧🇦 🇧🇧 🇧🇩 🇧🇪 🇧🇫 🇧🇬 🇧🇭 🇧🇮 🇧🇯 🇧🇱 🇧🇲 🇧🇳 🇧🇴 🇧🇶 🇧🇷 🇧🇸 🇧🇹 🇧🇻 🇧🇼 🇧🇾 🇧🇿 🇨🇦 🇨🇨 🇨🇩 🇨🇫 🇨🇬 🇨🇭 🇨🇮 🇨🇰 🇨🇱 🇨🇲 🇨🇳 🇨🇴 🇨🇷 🇨🇺 🇨🇻 🇨🇼 🇨🇽 🇨🇾 🇨🇿 🇩🇪 🇩🇯 🇩🇰 🇩🇲 🇩🇴 🇩🇿 🇪🇨 🇪🇪 🇪🇬 🇪🇭 🇪🇷 🇪🇸 🇪🇹 🇪🇺 🇫🇮 🇫🇯 🇫🇰 🇫🇲 🇫🇴 🇫🇷 🇬🇦 🇬🇧 🇬🇩 🇬🇪 🇬🇫 🇬🇬 🇬🇭 🇬🇮 🇬🇱 🇬🇲 🇬🇳 🇬🇵 🇬🇶 🇬🇷 🇬🇸 🇬🇹 🇬🇺 🇬🇼 🇬🇾 🇭🇰 🇭🇲 🇭🇳 🇭🇷 🇭🇹 🇭🇺 🇮🇩 🇮🇪 🇮🇱 🇮🇲 🇮🇳 🇮🇴 🇮🇶 🇮🇷 🇮🇸 🇮🇹 🇯🇪 🇯🇲 🇯🇴 🇯🇵 🇰🇪 🇰🇬 🇰🇭 🇰🇮 🇰🇲 🇰🇳 🇰🇵 🇰🇷 🇰🇼 🇰🇾 🇰🇿 🇱🇦 🇱🇧 🇱🇨 🇱🇮 🇱🇰 🇱🇷 🇱🇸 🇱🇹 🇱🇺 🇱🇻 🇱🇾 🇲🇦 🇲🇨 🇲🇩 🇲🇪 🇲🇫 🇲🇬 🇲🇭 🇲🇰 🇲🇱 🇲🇲 🇲🇳 🇲🇴 🇲🇵 🇲🇶 🇲🇷 🇲🇸 🇲🇹 🇲🇺 🇲🇻 🇲🇼 🇲🇽 🇲🇾 🇲🇿 🇳🇦 🇳🇨 🇳🇪 🇳🇫 🇳🇬 🇳🇮 🇳🇱 🇳🇴 🇳🇵 🇳🇷 🇳🇺 🇳🇿 🇴🇲 🇵🇦 🇵🇪 🇵🇫 🇵🇬 🇵🇭 🇵🇰 🇵🇱 🇵🇲 🇵🇳 🇵🇷 🇵🇸 🇵🇹 🇵🇼 🇵🇾 🇶🇦 🇷🇪 🇷🇴 🇷🇸 🇷🇺 🇷🇼 🇸🇦 🇸🇧 🇸🇨 🇸🇩 🇸🇪 🇸🇬 🇸🇭 🇸🇮 🇸🇯 🇸🇰 🇸🇱 🇸🇲 🇸🇳 🇸🇴 🇸🇷 🇸🇸 🇸🇹 🇸🇻 🇸🇽 🇸🇾 🇸🇿 🇹🇨 🇹🇩 🇹🇫 🇹🇬 🇹🇭 🇹🇯 🇹🇰 🇹🇱 🇹🇲 🇹🇳 🇹🇴 🇹🇷 🇹🇹 🇹🇻 🇹🇼 🇹🇿 🇺🇦 🇺🇬 🇺🇲 🇺🇸 🇺🇾 🇺🇿 🇻🇦 🇻🇨 🇻🇪 🇻🇬 🇻🇮 🇻🇳 🇻🇺 🇼🇫 🇼🇸 🇾🇪 🇾🇹 🇿🇦 🇿🇲 🇿🇼
{"aar": "Afar", "aa": "Afar", "abk": "Abkhazian", "ab": "Abkhazian", "ace": "Achinese", "ach": "Acoli", "ada": "Adangme", "ady": "Adygei", "afa": "Afro-Asiatic Languages", "afh": "Afrihili", "afr": "Afrikaans", "af": "Afrikaans", "ain": "Ainu", "aka": "Akan", "ak": "Akan", "akk": "Akkadian", "alb": "Albanian", "sqi": "Albanian", "sq": "Albanian", "ale": "Aleut", "alg": "Algonquian Languages", "alt": "Southern Altai", "amh": "Amharic", "am": "Amharic", "ang": "English Old (ca.450-1100)", "anp": "Angika", "apa": "Apache Languages", "ara": "Arabic", "ar": "Arabic", "arc": "Imperial Aramaic (700-300 BCE)", "arg": "Aragonese", "an": "Aragonese", "arm": "Armenian", "hye": "Armenian", "hy": "Armenian", "arn": "Mapuche", "arp": "Arapaho", "art": "Artificial Languages", "arw": "Arawak", "asm": "Assamese", "as": "Assamese", "ast": "Asturian", "ath": "Athapascan Languages", "aus": "Australian Languages", "ava": "Avaric", "av": "Avaric", "ave": "Avestan", "ae": "Avestan", "awa": "Awadhi", "aym": "Aymara", "ay": "Aymara", "aze": "Azerbaijani", "az": "Azerbaijani", "bad": "Banda Languages", "bai": "Bamileke Languages", "bak": "Bashkir", "ba": "Bashkir", "bal": "Baluchi", "bam": "Bambara", "bm": "Bambara", "ban": "Balinese", "baq": "Basque", "eus": "Basque", "eu": "Basque", "bas": "Basa", "bat": "Baltic Languages", "bej": "Bedawiyet", "bel": "Belarusian", "be": "Belarusian", "bem": "Bemba", "ben": "Bengali", "bn": "Bengali", "ber": "Berber Languages", "bho": "Bhojpuri", "bih": "Bihari Languages", "bh": "Bihari Languages", "bik": "Bikol", "bin": "Bini", "bis": "Bislama", "bi": "Bislama", "bla": "Siksika", "bnt": "Bantu (Other)", "bos": "Bosnian", "bs": "Bosnian", "bra": "Braj", "bre": "Breton", "br": "Breton", "btk": "Batak Languages", "bua": "Buriat", "bug": "Buginese", "bul": "Bulgarian", "bg": "Bulgarian", "bur": "Burmese", "mya": "Burmese", "my": "Burmese", "byn": "Bilin", "cad": "Caddo", "cai": "Central American Indian Languages", "car": "Galibi Carib", "cat": "Catalan", "ca": "Catalan", "cau": "Caucasian Languages", "ceb": "Cebuano", "cel": "Celtic Languages", "cha": "Chamorro", "ch": "Chamorro", "chb": "Chibcha", "che": "Chechen", "ce": "Chechen", "chg": "Chagatai", "chi": "Chinese", "zho": "Chinese", "zh": "Chinese", "chk": "Chuukese", "chm": "Mari", "chn": "Chinook Jargon", "cho": "Choctaw", "chp": "Chipewyan", "chr": "Cherokee", "chu": "Church Slavic", "cu": "Church Slavic", "chv": "Chuvash", "cv": "Chuvash", "chy": "Cheyenne", "cmc": "Chamic Languages", "cop": "Coptic", "cor": "Cornish", "kw": "Cornish", "cos": "Corsican", "co": "Corsican", "cpe": "Creoles And Pidgins", "cpf": "Creoles And Pidgins", "cpp": "Creoles And Pidgins", "cre": "Cree", "cr": "Cree", "crh": "Crimean Tatar", "crp": "Creoles And Pidgins", "csb": "Kashubian", "cus": "Cushitic Languages", "cz": "Czech", "cze": "Czech", "ces": "Czech", "cs": "Czech", "dak": "Dakota", "dan": "Danish", "da": "Danish", "dar": "Dargwa", "day": "Land Dayak Languages", "del": "Delaware", "den": "Slave (Athapascan)", "dgr": "Dogrib", "din": "Dinka", "div": "Dhivehi", "dv": "Dhivehi", "doi": "Dogri", "dra": "Dravidian Languages", "dsb": "Lower Sorbian", "dua": "Duala", "dum": "Dutch Middle (ca.1050-1350)", "dut": "Dutch", "nld": "Dutch", "nl": "Dutch", "dyu": "Dyula", "dzo": "Dzongkha", "dz": "Dzongkha", "efi": "Efik", "egy": "Egyptian (Ancient)", "eka": "Ekajuk", "elx": "Elamite", "eng": "English", "en": "English", "enm": "English Middle (1100-1500)", "epo": "Esperanto", "eo": "Esperanto", "est": "Estonian", "et": "Estonian", "ewe": "Ewe", "ee": "Ewe", "ewo": "Ewondo", "fan": "Fang", "fao": "Faroese", "fo": "Faroese", "fat": "Fanti", "fij": "Fijian", "fj": "Fijian", "fil": "Filipino", "fin": "Finnish", "fi": "Finnish", "fiu": "Finno-Ugrian Languages", "fon": "Fon", "fre": "French", "fra": "French", "fr": "French", "frm": "French Middle (ca.1400-1600)", "fro": "French Old (842-ca.1400)", "frr": "Northern Frisian", "frs": "Eastern Frisian", "fry": "Western Frisian", "fy": "Western Frisian", "ful": "Fulah", "ff": "Fulah", "fur": "Friulian", "gaa": "Ga", "gay": "Gayo", "gba": "Gbaya", "gem": "Germanic Languages", "geo": "Georgian", "kat": "Georgian", "ka": "Georgian", "ger": "German", "deu": "German", "de": "German", "gez": "Geez", "gil": "Gilbertese", "gla": "Gaelic", "gd": "Gaelic", "gle": "Irish", "ga": "Irish", "glg": "Galician", "gl": "Galician", "glv": "Manx", "gv": "Manx", "gmh": "German Middle High (ca.1050-1500)", "goh": "German Old High (ca.750-1050)", "gon": "Gondi", "gor": "Gorontalo", "got": "Gothic", "grb": "Grebo", "grc": "Greek Ancient (to 1453)", "gre": "Greek Modern (1453-)", "ell": "Greek Modern (1453-)", "el": "Greek Modern (1453-)", "grn": "Guarani", "gn": "Guarani", "gsw": "Alemannic", "guj": "Gujarati", "gu": "Gujarati", "gwi": "Gwich'in", "hai": "Haida", "hat": "Haitian", "ht": "Haitian", "hau": "Hausa", "ha": "Hausa", "haw": "Hawaiian", "heb": "Hebrew", "he": "Hebrew", "her": "Herero", "hz": "Herero", "hil": "Hiligaynon", "him": "Himachali Languages", "hin": "Hindi", "hi": "Hindi", "hit": "Hittite", "hmn": "Hmong", "hmo": "Hiri Motu", "ho": "Hiri Motu", "hrv": "Croatian", "hr": "Croatian", "hsb": "Upper Sorbian", "hun": "Hungarian", "hu": "Hungarian", "hup": "Hupa", "iba": "Iban", "ibo": "Igbo", "ig": "Igbo", "ice": "Icelandic", "isl": "Icelandic", "is": "Icelandic", "ido": "Ido", "io": "Ido", "iii": "Nuosu", "ii": "Nuosu", "ijo": "Ijo Languages", "iku": "Inuktitut", "iu": "Inuktitut", "ile": "Interlingue", "ie": "Interlingue", "ilo": "Iloko", "ina": "Interlingua (International Auxiliary Language Association)", "ia": "Interlingua (International Auxiliary Language Association)", "inc": "Indic Languages", "ind": "Indonesian", "id": "Indonesian", "ine": "Indo-European Languages", "inh": "Ingush", "ipk": "Inupiaq", "ik": "Inupiaq", "ira": "Iranian Languages", "iro": "Iroquoian Languages", "ita": "Italian", "it": "Italian", "jav": "Javanese", "jv": "Javanese", "jbo": "Lojban", "jpn": "Japanese", "ja": "Japanese", "jpr": "Judeo-Persian", "jrb": "Judeo-Arabic", "kaa": "Kara-Kalpak", "kab": "Kabyle", "kac": "Jingpho", "kal": "Greenlandic", "kl": "Greenlandic", "kam": "Kamba", "kan": "Kannada", "kn": "Kannada", "kar": "Karen Languages", "kas": "Kashmiri", "ks": "Kashmiri", "kau": "Kanuri", "kr": "Kanuri", "kaw": "Kawi", "kaz": "Kazakh", "kk": "Kazakh", "kbd": "Kabardian", "kha": "Khasi", "khi": "Khoisan Languages", "khm": "Central Khmer", "km": "Central Khmer", "kho": "Khotanese", "kik": "Gikuyu", "ki": "Gikuyu", "kin": "Kinyarwanda", "rw": "Kinyarwanda", "kir": "Kirghiz", "ky": "Kirghiz", "kmb": "Kimbundu", "kok": "Konkani", "kom": "Komi", "kv": "Komi", "kon": "Kongo", "kg": "Kongo", "kor": "Korean", "ko": "Korean", "kos": "Kosraean", "kpe": "Kpelle", "krc": "Karachay-Balkar", "krl": "Karelian", "kro": "Kru Languages", "kru": "Kurukh", "kua": "Kuanyama", "kj": "Kuanyama", "kum": "Kumyk", "kur": "Kurdish", "ku": "Kurdish", "kut": "Kutenai", "lad": "Ladino", "lah": "Lahnda", "lam": "Lamba", "lao": "Lao", "lo": "Lao", "lat": "Latin", "la": "Latin", "lav": "Latvian", "lv": "Latvian", "lez": "Lezghian", "lim": "Limburgan", "li": "Limburgan", "lin": "Lingala", "ln": "Lingala", "lit": "Lithuanian", "lt": "Lithuanian", "lol": "Mongo", "loz": "Lozi", "ltz": "Letzeburgesch", "lb": "Letzeburgesch", "lua": "Luba-Lulua", "lub": "Luba-Katanga", "lu": "Luba-Katanga", "lug": "Ganda", "lg": "Ganda", "lui": "Luiseno", "lun": "Lunda", "luo": "Luo (Kenya And Tanzania)", "lus": "Lushai", "mac": "Macedonian", "mkd": "Macedonian", "mk": "Macedonian", "mad": "Madurese", "mag": "Magahi", "mah": "Marshallese", "mh": "Marshallese", "mai": "Maithili", "mak": "Makasar", "mal": "Malayalam", "ml": "Malayalam", "man": "Mandingo", "mao": "Maori", "mri": "Maori", "mi": "Maori", "map": "Austronesian Languages", "mar": "Marathi", "mr": "Marathi", "mas": "Masai", "may": "Malay", "msa": "Malay", "ms": "Malay", "mdf": "Moksha", "mdr": "Mandar", "men": "Mende", "mga": "Irish Middle (900-1200)", "mic": "Mi'kmaq", "min": "Minangkabau", "mis": "Uncoded Languages", "mkh": "Mon-Khmer Languages", "mlg": "Malagasy", "mg": "Malagasy", "mlt": "Maltese", "mt": "Maltese", "mnc": "Manchu", "mni": "Manipuri", "mno": "Manobo Languages", "moh": "Mohawk", "mon": "Mongolian", "mn": "Mongolian", "mos": "Mossi", "mul": "Multiple Languages", "mun": "Munda Languages", "mus": "Creek", "mwl": "Mirandese", "mwr": "Marwari", "myn": "Mayan Languages", "myv": "Erzya", "nah": "Nahuatl Languages", "nai": "North American Indian Languages", "nap": "Neapolitan", "nau": "Nauru", "na": "Nauru", "nav": "Navaho", "nv": "Navaho", "nbl": "Ndebele", "nr": "Ndebele", "nde": "Ndebele", "nd": "Ndebele", "ndo": "Ndonga", "ng": "Ndonga", "nds": "Low", "nep": "Nepali", "ne": "Nepali", "new": "Nepal Bhasa", "nia": "Nias", "nic": "Niger-Kordofanian Languages", "niu": "Niuean", "nno": "Norwegian", "nn": "Norwegian", "nob": "Bokm\u00e5l", "nb": "Bokm\u00e5l", "nog": "Nogai", "non": "Norse", "nor": "Norwegian", "no": "Norwegian", "nqo": "N'Ko", "nso": "Northern Sotho", "nub": "Nubian Languages", "nwc": "Classical Nepal Bhasa", "nya": "Chewa", "ny": "Chewa", "nym": "Nyamwezi", "nyn": "Nyankole", "nyo": "Nyoro", "nzi": "Nzima", "oci": "Occitan (post 1500)", "oc": "Occitan (post 1500)", "oji": "Ojibwa", "oj": "Ojibwa", "ori": "Oriya", "or": "Oriya", "orm": "Oromo", "om": "Oromo", "osa": "Osage", "oss": "Ossetian", "os": "Ossetian", "ota": "Turkish Ottoman (1500-1928)", "oto": "Otomian Languages", "paa": "Papuan Languages", "pag": "Pangasinan", "pal": "Pahlavi", "pam": "Kapampangan", "pan": "Panjabi", "pa": "Panjabi", "pap": "Papiamento", "pau": "Palauan", "peo": "Persian Old (ca.600-400 B.C.)", "per": "Persian", "fas": "Persian", "fa": "Persian", "phi": "Philippine Languages", "phn": "Phoenician", "pli": "Pali", "pi": "Pali", "pol": "Polish", "pl": "Polish", "pon": "Pohnpeian", "por": "Portuguese", "pt": "Portuguese", "pra": "Prakrit Languages", "pro": "Proven\u00e7al Old (to 1500)", "pus": "Pashto", "ps": "Pashto", "qaa-qtz": "Reserved For Local Use", "que": "Quechua", "qu": "Quechua", "raj": "Rajasthani", "rap": "Rapanui", "rar": "Cook Islands Maori", "roa": "Romance Languages", "roh": "Romansh", "rm": "Romansh", "rom": "Romany", "rum": "Moldavian", "ron": "Moldavian", "ro": "Moldavian", "run": "Rundi", "rn": "Rundi", "rup": "Aromanian", "rus": "Russian", "ru": "Russian", "sad": "Sandawe", "sag": "Sango", "sg": "Sango", "sah": "Yakut", "sai": "South American Indian (Other)", "sal": "Salishan Languages", "sam": "Samaritan Aramaic", "san": "Sanskrit", "sa": "Sanskrit", "sas": "Sasak", "sat": "Santali", "scn": "Sicilian", "sco": "Scots", "sel": "Selkup", "sem": "Semitic Languages", "sga": "Irish Old (to 900)", "sgn": "Sign Languages", "shn": "Shan", "sid": "Sidamo", "sin": "Sinhala", "si": "Sinhala", "sio": "Siouan Languages", "sit": "Sino-Tibetan Languages", "sla": "Slavic Languages", "slo": "Slovak", "slk": "Slovak", "sk": "Slovak", "slv": "Slovenian", "sl": "Slovenian", "sma": "Southern Sami", "sme": "Northern Sami", "se": "Northern Sami", "smi": "Sami Languages", "smj": "Lule Sami", "smn": "Inari Sami", "smo": "Samoan", "sm": "Samoan", "sms": "Skolt Sami", "sna": "Shona", "sn": "Shona", "snd": "Sindhi", "sd": "Sindhi", "snk": "Soninke", "sog": "Sogdian", "som": "Somali", "so": "Somali", "son": "Songhai Languages", "sot": "Sotho", "st": "Sotho", "spa": "Spanish", "es": "Spanish", "esp": "Spanish", "srd": "Sardinian", "sc": "Sardinian", "srn": "Sranan Tongo", "srp": "Serbian", "sr": "Serbian", "srr": "Serer", "ssa": "Nilo-Saharan Languages", "ssw": "Swati", "ss": "Swati", "suk": "Sukuma", "sun": "Sundanese", "su": "Sundanese", "sus": "Susu", "sux": "Sumerian", "swa": "Swahili", "sw": "Swahili", "swe": "Swedish", "sv": "Swedish", "syc": "Classical Syriac", "syr": "Syriac", "tah": "Tahitian", "ty": "Tahitian", "tai": "Tai Languages", "tam": "Tamil", "ta": "Tamil", "tat": "Tatar", "tt": "Tatar", "tel": "Telugu", "te": "Telugu", "tem": "Timne", "ter": "Tereno", "tet": "Tetum", "tgk": "Tajik", "tg": "Tajik", "tgl": "Tagalog", "tl": "Tagalog", "tha": "Thai", "th": "Thai", "tib": "Tibetan", "bod": "Tibetan", "bo": "Tibetan", "tig": "Tigre", "tir": "Tigrinya", "ti": "Tigrinya", "tiv": "Tiv", "tkl": "Tokelau", "tlh": "Klingon", "tli": "Tlingit", "tmh": "Tamashek", "tog": "Tonga (Nyasa)", "ton": "Tonga (Tonga Islands)", "to": "Tonga (Tonga Islands)", "tpi": "Tok Pisin", "tsi": "Tsimshian", "tsn": "Tswana", "tn": "Tswana", "tso": "Tsonga", "ts": "Tsonga", "tuk": "Turkmen", "tk": "Turkmen", "tum": "Tumbuka", "tup": "Tupi Languages", "tur": "Turkish", "tr": "Turkish", "tut": "Altaic Languages", "tvl": "Tuvalu", "twi": "Twi", "tw": "Twi", "tyv": "Tuvinian", "udm": "Udmurt", "uga": "Ugaritic", "uig": "Uighur", "ug": "Uighur", "ukr": "Ukrainian", "uk": "Ukrainian", "umb": "Umbundu", "und": "Undetermined", "urd": "Urdu", "ur": "Urdu", "uzb": "Uzbek", "uz": "Uzbek", "vai": "Vai", "ven": "Venda", "ve": "Venda", "vie": "Vietnamese", "vi": "Vietnamese", "vol": "Volap\u00fck", "vo": "Volap\u00fck", "vot": "Votic", "wak": "Wakashan Languages", "wal": "Walamo", "war": "Waray", "was": "Washo", "wel": "Welsh", "cym": "Welsh", "cy": "Welsh", "wen": "Sorbian Languages", "wln": "Walloon", "wa": "Walloon", "wol": "Wolof", "wo": "Wolof", "xal": "Kalmyk", "xho": "Xhosa", "xh": "Xhosa", "yao": "Yao", "yap": "Yapese", "yid": "Yiddish", "yi": "Yiddish", "yor": "Yoruba", "yo": "Yoruba", "ypk": "Yupik Languages", "zap": "Zapotec", "zbl": "Bliss", "zen": "Zenaga", "zgh": "Standard Moroccan Tamazight", "zha": "Chuang", "za": "Chuang", "znd": "Zande Languages", "zul": "Zulu", "zu": "Zulu", "zun": "Zuni", "zxx": "No Linguistic Content", "zza": "Dimili"}
<style>
#chartdiv {
width: 100%;
height: 800px;
}
body {
background-color: #000;
}
</style>
<!-- Resources -->
<script src="https://cdn.amcharts.com/lib/4/core.js"></script>
<script src="https://cdn.amcharts.com/lib/4/charts.js"></script>
<script src="https://cdn.amcharts.com/lib/4/themes/dark.js"></script>
<!-- Chart code -->
<script>
am4core.ready(function () {
// Themes begin
am4core.useTheme(am4themes_dark);
// Themes end
var chart = am4core.create("chartdiv", am4charts.ChordDiagram);
// colors of main characters
chart.colors.saturation = 0.85;
chart.colors.step = 3;
var colors = {}
// reminder: must be run with a file server
fetch("am_formatted.json", {
headers: {
'Content-Type': 'application/json',
'Accept': 'application/json',
'mode': 'cors'
}
}).then(res => res.json()).then(json => {
chart.data = json.data;
})
chart.dataFields.fromName = "from";
chart.dataFields.toName = "to";
chart.dataFields.value = "value";
chart.nodePadding = 1;
chart.minNodeSize = 0.0025;
chart.startAngle = 135;
chart.endAngle = chart.startAngle + 360;
chart.sortBy = "value";
chart.fontSize = 14;
var nodeTemplate = chart.nodes.template;
nodeTemplate.readerTitle = "Click to show/hide or drag to rearrange";
nodeTemplate.showSystemTooltip = true;
nodeTemplate.propertyFields.fill = "color";
nodeTemplate.tooltipText = "{name} connections: {total}";
// when rolled over the node, make all the links rolled-over
nodeTemplate.events.on("over", function (event) {
var node = event.target;
node.outgoingDataItems.each(function (dataItem) {
if (dataItem.toNode) {
dataItem.link.isHover = true;
dataItem.toNode.label.isHover = true;
}
})
node.incomingDataItems.each(function (dataItem) {
if (dataItem.fromNode) {
dataItem.link.isHover = true;
dataItem.fromNode.label.isHover = true;
}
})
node.label.isHover = true;
})
// when rolled out from the node, make all the links rolled-out
nodeTemplate.events.on("out", function (event) {
var node = event.target;
node.outgoingDataItems.each(function (dataItem) {
if (dataItem.toNode) {
dataItem.link.isHover = false;
dataItem.toNode.label.isHover = false;
}
})
node.incomingDataItems.each(function (dataItem) {
if (dataItem.fromNode) {
dataItem.link.isHover = false;
dataItem.fromNode.label.isHover = false;
}
})
node.label.isHover = false;
})
var label = nodeTemplate.label;
label.relativeRotation = 90;
label.fillOpacity = 0.4;
let labelHS = label.states.create("hover");
labelHS.properties.fillOpacity = 1;
nodeTemplate.cursorOverStyle = am4core.MouseCursorStyle.pointer;
nodeTemplate.adapter.add("fill", function (fill, target) {
let node = target;
let counters = {};
let mainChar = false;
node.incomingDataItems.each(function (dataItem) {
if (colors[dataItem.toName]) {
mainChar = true;
}
if (isNaN(counters[dataItem.fromName])) {
counters[dataItem.fromName] = dataItem.value;
}
else {
counters[dataItem.fromName] += dataItem.value;
}
})
if (mainChar) {
return fill;
}
let count = 0;
let color;
let biggest = 0;
let biggestName;
for (var name in counters) {
if (counters[name] > biggest) {
biggestName = name;
biggest = counters[name];
}
}
if (colors[biggestName]) {
fill = colors[biggestName];
}
return fill;
})
// link template
var linkTemplate = chart.links.template;
linkTemplate.strokeOpacity = 0;
linkTemplate.fillOpacity = 0.2;
linkTemplate.tooltipText = "{fromName} & {toName}: {value.value}";
// linkTemplate.colorMode = "gradient";
var hoverState = linkTemplate.states.create("hover");
hoverState.properties.fillOpacity = 0.7;
hoverState.properties.strokeOpacity = 0.7;
// data credit label
var creditLabel = chart.chartContainer.createChild(am4core.TextLink);
creditLabel.text = "Compiled by: u/LAcuber";
creditLabel.url = "https://www.reddit.com/u/LAcuber";
creditLabel.y = am4core.percent(99);
creditLabel.x = am4core.percent(99);
creditLabel.fontSize = 14;
creditLabel.horizontalCenter = "right";
creditLabel.verticalCenter = "bottom";
var title = chart.chartContainer.createChild(am4core.Label);
title.text = "r/languagelearning visualized";
title.fontSize = 30;
title.align = "center";
}); // end am4core.ready()
</script>
<!-- HTML -->
<div id="chartdiv"></div>
@Destaq
Copy link
Author

Destaq commented Jan 31, 2023

After registering a Reddit script app, you can run extractor.py to gather all the data concerning user flairs. Afterwards:

  • npm i http-server -g — due to CORS issues, you cannot view the output immediately as a standalone file
  • http-server
  • Navigate to localhost:8080 and open show.html. Voila!

@Destaq
Copy link
Author

Destaq commented Jan 31, 2023

Example output:

image

Interactive version: here

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment