JGVerdugo/dotika.py

## dotika.py
import glob
import os

# USAGE:
# 1. Download the Tika command prompt tool from http://tika.apache.org/download.html.
# 2. Put some files in the same directory.
# 3. Put this script in the same directory (make sure you have Python).
# 4. In the command line, write "python dotika.py".
# If Tika can extract your files, a new file with the extension .new
# will be created for each file matching the "extension" filter (see
# the code below). This script does nothing but automating the extraction
# process.
#
# Here are the default values.
# If you need a different format or encoding, change these values.
# Be sure to read this first: http://tika.apache.org/1.4/gettingstarted.html
# (especially the "Using Tika as a command line utility").

encoding = "UTF-8"
outputformat = "--text"
extension = "*.doc"

files = glob.glob(extension)

for file in files:
    newfile = file + ".new"
    print newfile
    os.system("java -jar tika-app-1.4.jar %s --encoding=%s %s > %s" % (outputformat, encoding, file, newfile))
	import glob
	import os

	# USAGE:
	# 1. Download the Tika command prompt tool from http://tika.apache.org/download.html.
	# 2. Put some files in the same directory.
	# 3. Put this script in the same directory (make sure you have Python).
	# 4. In the command line, write "python dotika.py".
	# If Tika can extract your files, a new file with the extension .new
	# will be created for each file matching the "extension" filter (see
	# the code below). This script does nothing but automating the extraction
	# process.
	#
	# Here are the default values.
	# If you need a different format or encoding, change these values.
	# Be sure to read this first: http://tika.apache.org/1.4/gettingstarted.html
	# (especially the "Using Tika as a command line utility").

	encoding = "UTF-8"
	outputformat = "--text"
	extension = "*.doc"

	files = glob.glob(extension)

	for file in files:
	newfile = file + ".new"
	print newfile
	os.system("java -jar tika-app-1.4.jar %s --encoding=%s %s > %s" % (outputformat, encoding, file, newfile))