lppier/detect_percentage_english.py

## detect_percentage_english.py
import string
import urllib.request
from nltk.corpus import words

punctuation = set(string.punctuation)

def remove_punc(str):
    return ''.join(c for c in str if c not in punctuation)

total_count = 0
eng_count = 0

with open('hsbc_th_supplement-pdf-page-1-text.txt') as f:
    for line in f:
        text_words = remove_punc(line).lower().split()
        print(text_words)
        total_count += len(text_words)
        for word in text_words:
            print(f"Finding {word}")
            if word in words.words():
                eng_count += 1

print('%s English words found' % eng_count)
print('%s total words found' % total_count)

percentage_eng = 0 if total_count == 0 else (float(eng_count) / total_count * 100)
print('%s%% of words were English' % percentage_eng)
	import string
	import urllib.request
	from nltk.corpus import words

	punctuation = set(string.punctuation)

	def remove_punc(str):
	return ''.join(c for c in str if c not in punctuation)

	total_count = 0
	eng_count = 0

	with open('hsbc_th_supplement-pdf-page-1-text.txt') as f:
	for line in f:
	text_words = remove_punc(line).lower().split()
	print(text_words)
	total_count += len(text_words)
	for word in text_words:
	print(f"Finding {word}")
	if word in words.words():
	eng_count += 1

	print('%s English words found' % eng_count)
	print('%s total words found' % total_count)

	percentage_eng = 0 if total_count == 0 else (float(eng_count) / total_count * 100)
	print('%s%% of words were English' % percentage_eng)