This code is just a slightly modified code from here.
Please read that README file for execution instructions.
from collections import Counter | |
lang_unicodes = [['English',('\u0021','\u007F')], ['Devnagri',('\u0900','\u097F'),('\uA8E0','\uA8FF')], ['Bangla', ('\u0980','\u09FF')] | |
,['Gujarati',('\u0A80','\u0AFF')], ['Urdu/Persian/Arabic', ('\u0600','\u06FF'),('\u08A0','\u08FF')], ['Tamil',('\u0B80','\u0BFF')] | |
,['Telegu',('\u0C00','\u0C7F')], ['punjabi/gurumukhi',('\u0A00','\u0A7F')], ['malayalam',('\u0D00','\u0D7F')] | |
,['oriya',('\u0B00','\u0B7F')], ['kannada',('\u0C80','\u0CFF')] ,['Sinhala',('\u0D80','\u0DFF')] | |
,['Thai',('\u0E00','\u0E7F')], ['Lao',('\u0E80','\u0EFF')], ['Tibetan',('\u0F00','\u0FFF')] | |
,['Myanmar',('\u1000','\u109F')], ['Georgian',('\u10A0','\u10FF')], ['Ethiopic',('\u1200','\u139F')] |
This code is just a slightly modified code from here.
Please read that README file for execution instructions.