edsu/wpd2txt.py

## wpd2txt.py
#!/usr/bin/env python

import sys

"""
For each character c, if its value is:
    #13: treat as newline
    #128, #160: treat as space ' '
    #169..#171, #173, #174: treat as dash '-'
    #192..#236: skip ahead and ignore all characters until another occurrence
        of character c is found; resume at the following character
    #0..#31, #129..#159, #161..#168, #172, #175..#255: ignore (control characters)
    else treat as regular text character

from http://fileformats.archiveteam.org/wiki/WordPerfect
"""

def wpd2txt(filename):
    text = ''
    skip = None
    fh = open(filename, 'rb')
    for line in fh:
        for char in line:
            n = ord(char)
            if skip and skip == n:
                skip == None
            if n == 13:
                text += "\n"
            elif n in (128, 160):
                text += ' '
            elif n in (169, 170, 171, 173, 174):
                text += '-'
            elif n >= 192 and n <= 236:
                skip = n
            elif (n >= 0 and n <= 31) or (n >= 129 and n <= 159) or \
                 (n >= 161 and n <= 168) or n == 172 or (n >= 175 and n <= 255):
                pass
            else:
                text += char
    return text


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print "usage wpd2txt.py <filename>"
        sys.exit(1)
    filename = sys.argv[1]
    print wpd2txt(filename)
	#!/usr/bin/env python

	import sys

	"""
	For each character c, if its value is:
	#13: treat as newline
	#128, #160: treat as space ' '
	#169..#171, #173, #174: treat as dash '-'
	#192..#236: skip ahead and ignore all characters until another occurrence
	of character c is found; resume at the following character
	#0..#31, #129..#159, #161..#168, #172, #175..#255: ignore (control characters)
	else treat as regular text character

	from http://fileformats.archiveteam.org/wiki/WordPerfect
	"""

	def wpd2txt(filename):
	text = ''
	skip = None
	fh = open(filename, 'rb')
	for line in fh:
	for char in line:
	n = ord(char)
	if skip and skip == n:
	skip == None
	if n == 13:
	text += "\n"
	elif n in (128, 160):
	text += ' '
	elif n in (169, 170, 171, 173, 174):
	text += '-'
	elif n >= 192 and n <= 236:
	skip = n
	elif (n >= 0 and n <= 31) or (n >= 129 and n <= 159) or \
	(n >= 161 and n <= 168) or n == 172 or (n >= 175 and n <= 255):
	pass
	else:
	text += char
	return text


	if __name__ == "__main__":
	if len(sys.argv) != 2:
	print "usage wpd2txt.py <filename>"
	sys.exit(1)
	filename = sys.argv[1]
	print wpd2txt(filename)