dajare/ccount.py

## ccount.py
#!/usr/bin/python
# coding: utf-8
import re
import codecs
import sys

## chmod 0755 to make executable
## run with `./name.py input_file`
## source: https://unix.stackexchange.com/a/372270/99759

find_hebrew = re.compile(ur'[\u0590-\u05ff]+')  # python 2
# find_hebrew = re.compile(r'[\u0590-\u05ff]+')   # python 3

text_file = sys.argv[1]

count = 0
with codecs.open(text_file, 'rU', encoding='utf-8') as f:
    for line in f.readlines():
        for n in find_hebrew.findall(line):
            count += len(n)
print(count)
	#!/usr/bin/python
	# coding: utf-8
	import re
	import codecs
	import sys

	## chmod 0755 to make executable
	## run with `./name.py input_file`
	## source: https://unix.stackexchange.com/a/372270/99759

	find_hebrew = re.compile(ur'[\u0590-\u05ff]+') # python 2
	# find_hebrew = re.compile(r'[\u0590-\u05ff]+') # python 3

	text_file = sys.argv[1]

	count = 0
	with codecs.open(text_file, 'rU', encoding='utf-8') as f:
	for line in f.readlines():
	for n in find_hebrew.findall(line):
	count += len(n)
	print(count)