Skip to content

Instantly share code, notes, and snippets.

language_short_name = {'aa':'Afar','ab':'Abkhazian','af':'Afrikaans','ak':'Akan','sq':'Albanian','am':'Amharic','ar':'Arabic',
'an':'Aragonese','hy':'Armenian','as':'Assamese','av':'Avaric','ae':'Avestan','ay':'Aymara','az':'Azerbaijani','ba':'Bashkir',
'bm':'Bambara','eu':'Basque','be':'Belarusian','bn':'Bengali','bh':'Bihari languages','bi':'Bislama','bo':'Tibetan','bs':'Bosnian',
'br':'Breton','bg':'Bulgarian','my':'Burmese','ca':'Catalan; Valencian','cs':'Czech','ch':'Chamorro','ce':'Chechen','zh':'Chinese',
'cu':'Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic','cv':'Chuvash','kw':'Cornish','co':'Corsican',
'cr':'Cree','cy':'Welsh','cs':'Czech','da':'Danish','de':'German','dv':'Divehi; Dhivehi; Maldivian','nl':'Dutch; Flemish','dz':'Dzongkha',
'el':'Greek-Modern (1453-)','en':'English','eo':'Esperanto','et':'Estonian','eu':'Basque','ee':'Ewe','fo':'Faroese','fa':'Persian',
'fj':'Fijian','fi':'Finnish','fr':'French','fy':'Western Frisian','ff':'Fulah','Ga':'Georgian','gd':
>>> from googletrans import Translator
>>> translator = Translator()
>>> translator.detect('이 문장은 한글로 쓰여졌습니다.')
<Detected lang=ko confidence=0.27041003>
>>> from textblob import TextBlob
>>> text = "это компьютерный портал для гиков. It was a beautiful day ."
>>> lang = TextBlob(text)
>>> print(lang.detect_language())
ru
>>> text_content = "Er lebt mit seinen Eltern und seiner Schwester in Berlin. Welcome, to this world of Data Scientist. Today is a lovely day."
>>> doc = nlp(text_content)
>>> detect_language = doc._.language
>>> print(detect_language)
{'language': 'en', 'score': 0.8571372625765084}
>>> from spacy_langdetect import LanguageDetector
>>> import spacy
>>> nlp = spacy.load('en') # 1
>>> nlp.add_pipe(LanguageDetector(), name='language_detector', last=True) #2
>>> text_content = "Er lebt mit seinen Eltern und seiner Schwester in Berlin."
>>> doc = nlp(text_content) #3
>>> detect_language = doc._.language #4
>>> print(detect_language)
{'language': 'de', 'score': 0.9999958526911192}
states = list(indian_state['name'])
staff = list(indian_state['literacy_rate'])
staff_color = [i * 0.000045 for i in staff]
fig, ax = plt.subplots(figsize=(15, 8), dpi=100)
ax.set_xlim(70, 100)
ax.barh(states, staff,align='center',
# width = 0.5,
states = list(reuters_comment['trigram'])
staff = list(reuters_comment['count'])
staff_color = [i * 0.00045 for i in staff]
fig, ax = plt.subplots(figsize=(15, 8), dpi=100)
ax.barh(states, staff,align='center',
# width = 0.5,
color=cm.Blues([i / 0.006 for i in staff_color])
states = list(reddit_subgroup_df['subgroup'])
staff = list(reddit_subgroup_df['count'])
fig, ax = plt.subplots(figsize=(15, 8), dpi=400)
x = np.arange(len(states)) # the label locations
width = 0.35 # the width of the bars
rects1 = ax.bar(states, staff, width)
states = list(reddit_subgroup_df['subgroup'])
staff = list(reddit_subgroup_df['count'])
fig, ax = plt.subplots(figsize=(15, 8), dpi=800)
ax.bar(states, staff,align='center',
width = 0.5,
color=cm.Blues([i / 1000 for i in staff])
)
states = list(indian_state['name'])
staff = list(indian_state['literacy_rate'])
staff_color = [i * 0.000045 for i in staff]
fig, ax = plt.subplots(figsize=(15, 8), dpi=100)
ax.barh(states, staff,align='center',
# width = 0.5,
color=cm.Blues([i / 0.006 for i in staff_color])