Skip to content

Instantly share code, notes, and snippets.

@ehzawad
Created February 24, 2024 20:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ehzawad/5603e77ed1af52171a2ae527695be602 to your computer and use it in GitHub Desktop.
Save ehzawad/5603e77ed1af52171a2ae527695be602 to your computer and use it in GitHub Desktop.
UTF-8 print all Bengali Characters
import yaml
# Define the Unicode ranges for Bengali characters
# The ranges are tuples of start and end points, inclusive
# Unassigned code points will be skipped
bengali_char_ranges = [
(0x0980, 0x0983), # Bengali Anji, etc.
(0x0985, 0x098C), # Bengali vowels
(0x098F, 0x0990), # Bengali E, AI
(0x0993, 0x09A8), # Bengali O, etc.
(0x09AA, 0x09B0), # Bengali letter KA, etc.
(0x09B2, 0x09B2), # Bengali letter LA
(0x09B6, 0x09B9), # Bengali SHA, etc.
(0x09BC, 0x09C4), # Bengali sign NUKTA, etc.
(0x09C7, 0x09C8), # Bengali VOWEL SIGN E, AI
(0x09CB, 0x09CE), # Bengali VOWEL SIGN O, etc.
(0x09D7, 0x09D7), # Bengali AU Length Mark
(0x09DC, 0x09DD), # Bengali letter RRA, etc.
(0x09DF, 0x09E3), # Bengali letter YYA, etc.
(0x09E6, 0x09FE), # Bengali digits and signs
]
bengali_chars = {
"characters": []
}
# Function to populate characters using the ranges
def populate_characters(ranges, char_dict):
for start, end in ranges:
for codepoint in range(start, end + 1):
char = chr(codepoint)
char_dict["characters"].append(char)
# Populate Bengali characters
populate_characters(bengali_char_ranges, bengali_chars)
# Save as YAML
yaml_file_path = "bengali_chars.yaml"
with open(yaml_file_path, "w", encoding="utf-8") as f:
yaml.dump(bengali_chars, f, allow_unicode=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment