physacco/zwc.py

## zwc.py
#!/usr/bin/env python
# encoding: utf-8

# Name: Chinese Word Counter
# Description: This is a word counter for Chinese. It reads text from stdin,
# filters white spaces (including the full-width Chinese blank-space character),
# and then counts the remaining characters. In my tests, it usually produces
# the same result as WPS. It may be useful for some writers.
# Example: zwc.py < foo.txt
# Author: physacco
# Date: 2011-02-08

import sys, re

raw_data = sys.stdin.read()
Bytes = len(raw_data)

string = raw_data.decode('utf-8')
Chars = len(string)

visible_chars = re.sub(ur'[\s\u3000]+', '', string)
VisChars = len(visible_chars)

print "%d words | %d chars | %d bytes" % (VisChars, Chars, Bytes)
	#!/usr/bin/env python
	# encoding: utf-8

	# Name: Chinese Word Counter
	# Description: This is a word counter for Chinese. It reads text from stdin,
	# filters white spaces (including the full-width Chinese blank-space character),
	# and then counts the remaining characters. In my tests, it usually produces
	# the same result as WPS. It may be useful for some writers.
	# Example: zwc.py < foo.txt
	# Author: physacco
	# Date: 2011-02-08

	import sys, re

	raw_data = sys.stdin.read()
	Bytes = len(raw_data)

	string = raw_data.decode('utf-8')
	Chars = len(string)

	visible_chars = re.sub(ur'[\s\u3000]+', '', string)
	VisChars = len(visible_chars)

	print "%d words \| %d chars \| %d bytes" % (VisChars, Chars, Bytes)