ehzawad/word_frequencies.py

## word_frequencies.py
import re
import csv

def is_bangla_word(word):
    # Check if the word contains Bangla characters (Unicode range for Bangla: U+0980 to U+09FF)
    return bool(re.search('[\u0980-\u09FF]', word))

def extract_bangla_words(line):
    # Split the line into potential words, filtering out non-Bangla words
    words = re.findall('[\u0980-\u09FF]+', line)
    return words

def count_word_frequencies(file_path):
    word_counts = {}
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            words = extract_bangla_words(line)
            for word in words:
                if is_bangla_word(word):
                    word_counts[word] = word_counts.get(word, 0) + 1
    return word_counts

def write_frequencies_to_csv(word_counts, output_file_path):
    with open(output_file_path, 'w', newline='', encoding='utf-8') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(['Word', 'Frequency'])  # Writing the header
        for word, count in word_counts.items():
            writer.writerow([word, count])

# Example usage
file_path = 'your_text_file.txt'  # Replace with the path to your text file
output_file_path = 'word_frequencies.csv'
word_counts = count_word_frequencies(file_path)
write_frequencies_to_csv(word_counts, output_file_path)

print(f"Word frequencies have been written to {output_file_path}.")
	import re
	import csv

	def is_bangla_word(word):
	# Check if the word contains Bangla characters (Unicode range for Bangla: U+0980 to U+09FF)
	return bool(re.search('[\u0980-\u09FF]', word))

	def extract_bangla_words(line):
	# Split the line into potential words, filtering out non-Bangla words
	words = re.findall('[\u0980-\u09FF]+', line)
	return words

	def count_word_frequencies(file_path):
	word_counts = {}
	with open(file_path, 'r', encoding='utf-8') as file:
	for line in file:
	words = extract_bangla_words(line)
	for word in words:
	if is_bangla_word(word):
	word_counts[word] = word_counts.get(word, 0) + 1
	return word_counts

	def write_frequencies_to_csv(word_counts, output_file_path):
	with open(output_file_path, 'w', newline='', encoding='utf-8') as csv_file:
	writer = csv.writer(csv_file)
	writer.writerow(['Word', 'Frequency']) # Writing the header
	for word, count in word_counts.items():
	writer.writerow([word, count])

	# Example usage
	file_path = 'your_text_file.txt' # Replace with the path to your text file
	output_file_path = 'word_frequencies.csv'
	word_counts = count_word_frequencies(file_path)
	write_frequencies_to_csv(word_counts, output_file_path)

	print(f"Word frequencies have been written to {output_file_path}.")