larryxiao/* - pdf

## * - pdf
libreoffice --convert-to pdf *.ppt
libreoffice --headless --convert-to pdf *.ppt

## readme
20130607

CONVERT
EXTRACT
CLEANUP

libreoffice --convert-to pdf *.ppt
pdf2txt - extracts text contents of PDF files
pdftk
pdftk 1.pdf 2.pdf 3.pdf cat output merged.pdf
in alphabetical order: pdftk *.pdf cat output merged.pdf

## clean up text
#!/bin/bash
for f in *.txt
do
  echo "Processing $f file... \"$f"
  tr '\n' '||' < "$f"  > "$f.temp"
  tr '\f' ' ' < "$f.temp"  > "$f.out"
  mv "$f.out" ./out
  rm "$f.temp"
  # take action on each file. $f store current file name
  # cat $f
done

## pdf - text
#!/bin/bash
#FILES=./*.pdf
#Processing ./20130604202323560.pdf file... "output./20130604202323560.pdf
#for f in $FILES
#Processing 20130604202323560.pdf file... "output20130604202323560.pdf
for f in *.pdf
do
  echo "Processing $f file... \"output$f.txt"
  pdf2txt -o "output$f.txt" $f
  # take action on each file. $f store current file name
  # cat $f
done
	libreoffice --convert-to pdf *.ppt
	libreoffice --headless --convert-to pdf *.ppt
	20130607

	CONVERT
	EXTRACT
	CLEANUP

	libreoffice --convert-to pdf *.ppt
	pdf2txt - extracts text contents of PDF files
	pdftk
	pdftk 1.pdf 2.pdf 3.pdf cat output merged.pdf
	in alphabetical order: pdftk *.pdf cat output merged.pdf
	#!/bin/bash
	for f in *.txt
	do
	echo "Processing $f file... \"$f"
	tr '\n' '\|\|' < "$f" > "$f.temp"
	tr '\f' ' ' < "$f.temp" > "$f.out"
	mv "$f.out" ./out
	rm "$f.temp"
	# take action on each file. $f store current file name
	# cat $f
	done
	#!/bin/bash
	#FILES=./*.pdf
	#Processing ./20130604202323560.pdf file... "output./20130604202323560.pdf
	#for f in $FILES
	#Processing 20130604202323560.pdf file... "output20130604202323560.pdf
	for f in *.pdf
	do
	echo "Processing $f file... \"output$f.txt"
	pdf2txt -o "output$f.txt" $f
	# take action on each file. $f store current file name
	# cat $f
	done