Skip to content

Instantly share code, notes, and snippets.

@ehzawad
Created February 22, 2024 11:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ehzawad/f8ac64d65eb96fbd93c47275d78c5e88 to your computer and use it in GitHub Desktop.
Save ehzawad/f8ac64d65eb96fbd93c47275d78c5e88 to your computer and use it in GitHub Desktop.
import re
import csv
def is_bangla_word(word):
# Check if the word contains Bangla characters (Unicode range for Bangla: U+0980 to U+09FF)
return bool(re.search('[\u0980-\u09FF]', word))
def extract_bangla_words(line):
# Split the line into potential words, filtering out non-Bangla words
words = re.findall('[\u0980-\u09FF]+', line)
return words
def count_word_frequencies(file_path):
word_counts = {}
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
words = extract_bangla_words(line)
for word in words:
if is_bangla_word(word):
word_counts[word] = word_counts.get(word, 0) + 1
return word_counts
def write_frequencies_to_csv(word_counts, output_file_path):
with open(output_file_path, 'w', newline='', encoding='utf-8') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(['Word', 'Frequency']) # Writing the header
for word, count in word_counts.items():
writer.writerow([word, count])
# Example usage
file_path = 'your_text_file.txt' # Replace with the path to your text file
output_file_path = 'word_frequencies.csv'
word_counts = count_word_frequencies(file_path)
write_frequencies_to_csv(word_counts, output_file_path)
print(f"Word frequencies have been written to {output_file_path}.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment