Skip to content

Instantly share code, notes, and snippets.

@jasom
Created September 2, 2014 19:48
Show Gist options
  • Save jasom/a55c0574d6fb5976cfa2 to your computer and use it in GitHub Desktop.
Save jasom/a55c0574d6fb5976cfa2 to your computer and use it in GitHub Desktop.
Example of parsing a pdf
;;;; fccgparse.lisp
(in-package #:fccgparse)
;;; "fccgparse" goes here. Hacks and glory await!
;(declaim (optimize (debug 3)))
;(declaim (notinline make-instance))
(defun my-capitalize (string)
(format nil "~{~A~^ ~}" (mapcar (lambda (x) (string-upcase x :end 1)) (split-sequence #\Space string))))
(defun strip (string)
(with-output-to-string (s)
(loop with started = nil
with maybe = nil
for char across string
when started
do (if (char= char #\Space)
(push char maybe)
(progn
(when maybe (write-sequence maybe s))
(write-char char s)
(setf maybe nil)))
else when (char/= char #\Space)
do (setf started t)
(write-char char s))))
(defclass simple-pdf-device ()
((output-stream :initarg :output-stream)
(last-y :initform -1d20)))
(defmethod DEVICE-BEGIN-TAG ((device simple-pdf-device) tag &optional props)
(declare (ignore device tag props)))
(defmethod DEVICE-END-TAG ((device simple-pdf-device)))
(defmethod DEVICE-DO-TAG ((device simple-pdf-device) tag &optional props)
(declare (ignore device tag props)))
(defmethod DEVICE-BEGIN-PAGE ((device simple-pdf-device) page ctm))
(defmethod DEVICE-END-PAGE ((device simple-pdf-device) page))
(defmethod DEVICE-BEGIN-FIGURE ((device simple-pdf-device) bbox matrix))
(defmethod DEVICE-END-FIGURE ((device simple-pdf-device)))
(defmethod DEVICE-PAINT-PATH ((device simple-pdf-device) graphicstate stroke fill evenodd path))
(defmethod DEVICE-RENDER-IMAGE ((device simple-pdf-device) stream))
(defmethod DEVICE-RENDER-STRING ((device simple-pdf-device) textstate seq)
(destructuring-bind (x y) (nthcdr 4 (slot-value textstate 'pdfparse::matrix))
(declare (ignorable x))
;(when (or (< x 0) (< y 0)) (return-from device-render-string))
(when (> (abs (- (slot-value device 'last-y) y)) 3)
(write-char #\Newline (slot-value device 'output-stream)))
(setf (slot-value device 'last-y) y))
(loop with font = (slot-value textstate 'pdfparse::font)
with outs = (slot-value device 'output-stream)
for (a b . r) = seq then r
when font
do (loop for cid in (pdfparse::font-decode font a)
for ch = (handler-case (pdfparse::to-unichr font cid)
(pdfparse::key-error () (format nil "<CID ~X>" cid)))
when ch
do (format outs "~a"
ch))
;do (write-sequence
;(babel:octets-to-string
;(map-into (make-array (length a) :element-type '(unsigned-byte 8)) #'char-code a)
;:errorp nil) (slot-value device 'output-stream))
unless r return nil))
;(write-char #\Newline (slot-value device 'output-stream)))
;(format t "RENDER-STRING: ~S~%" seq))
(defclass table-pdf-device (simple-pdf-device) ())
(defmethod DEVICE-RENDER-STRING ((device table-pdf-device) textstate seq)
(let* ((fontsize (slot-value textstate 'pdfparse::fontsize))
(scaling (* .01d0 (slot-value textstate 'pdfparse::scaling)))
(charspace (* scaling (slot-value textstate 'pdfparse::charspace)))
(wordspace (if (or (not (slot-exists-p textstate 'pdfparse::font))
(pdfparse::is-multibyte (slot-value textstate 'pdfparse::font)))
0
(* scaling (slot-value textstate 'pdfparse::wordspace))))
(dxscale (* .001d0 fontsize scaling)))
;(destructuring-bind (x y) (nthcdr 4 (slot-value textstate 'pdfparse::matrix))
;(print (slot-value textstate 'pdfparse::matrix))
;(when (or (< x 0) (< y 0)) (return-from device-render-string))
(destructuring-bind (dx dy) (slot-value textstate 'pdfparse::linematrix)
(let ((y (car (last (pdfparse::translate-matrix (slot-value textstate 'pdfparse::matrix) (list dx dy))))))
(when (> (abs (- (slot-value device 'last-y) y)) 3)
(push :newline (slot-value device 'output-stream)))
(setf (slot-value device 'last-y) y))
(with-slots (output-stream) device
(loop with font = (slot-value textstate 'pdfparse::font)
;with x = x
with needcharspace = nil
for (string adj . r) = seq then r
when font
do
(push
(append
(nthcdr 4 (pdfparse::translate-matrix
(slot-value textstate 'pdfparse::matrix)
(list dx dy)))
(list
(with-output-to-string (outs)
(loop for cid in (pdfparse::font-decode font string)
for ch = (handler-case (pdfparse::to-unichr font cid)
(pdfparse::key-error () (format nil "<CID ~X>" cid)))
when ch
do (format outs "~a"
ch)
(incf dx (* (pdfparse::char-width font cid) fontsize scaling))
(when (= cid 32) (incf dx wordspace))
(when needcharspace (incf dx charspace))
(setf needcharspace t))))
(nthcdr 4 (pdfparse::translate-matrix
(slot-value textstate 'pdfparse::matrix)
(list dx dy)))) output-stream)
(when adj
(decf dx (* dxscale adj)))
;do (write-sequence
;(babel:octets-to-string
;(map-into (make-array (length a) :element-type '(unsigned-byte 8)) #'char-code a)
;:errorp nil) (slot-value device 'output-stream))
unless r return nil)
(setf (slot-value textstate 'pdfparse::linematrix) (list dx dy))
))))
(defmethod (setf device-ctm) (ctm (device simple-pdf-device)) ctm)
(defun get-page-text-positions (pdffilename pages)
(let ((device(make-instance 'table-pdf-device :output-stream nil)))
(pdfparse::process-pdf
(pdfparse::make-pdf-resource-manager)
device
(pdfparse::make-pdf-input-stream pdffilename)
:pagenos pages)
(nreverse (slot-value device 'output-stream))))
(defun get-page-text (pdffilename pages)
(with-output-to-string (f)
(pdfparse::process-pdf
(pdfparse::make-pdf-resource-manager)
(make-instance 'simple-pdf-device :output-stream f)
(pdfparse::make-pdf-input-stream pdffilename)
:pagenos pages)))
(defun get-feat-pages (pdffile)
(split-sequence #\Newline
(fccgparse::get-page-text pdffile (loop for i from 87 to 111 collect i))))
(defun preprocess-a-feat-line (line)
(let*
((line
(with-output-to-string (str)
(loop for (word . rest) =
(split-sequence #\Space
line
:remove-empty-subseqs t)
then rest
do (write-sequence word str)
while rest
do (write-char #\Space str))))
(match (cl-ppcre:scan "[A-Z ][’A-Z -]+$" line)))
(if (and match (> match 0))
(list (subseq line 0 match)
(subseq line (if (char= (char line match) #\Space)
(1+ match) match)))
(list line))))
(defun preprocess-feat-lines (lines)
(loop with current-line = (make-string-output-stream)
for line in lines
when (or (cl-ppcre:scan "^\\d+$" line)
(string= line "LORE")
(string= line "CHAPTER 2"))
do (setf line "")
when (cl-ppcre:scan "^(Benefit: )|(Prerequisites: )" line)
nconc (preprocess-a-feat-line (get-output-stream-string current-line))
do (write-sequence line current-line)
unless (or (string= line "") (char= (char line (1- (length line))) #\Space))
nconc (preprocess-a-feat-line (get-output-stream-string current-line))))
(defun extract-feat-info (preprocessed-lines)
(loop
with state = :begin
with name
with short
with prereq
with benefit
with mycat
with result = nil
with newfeat = (lambda ()
(push (list (my-capitalize name) short prereq benefit mycat) result)
(setf prereq "")
(setf benefit "")
(terpri))
for line in preprocessed-lines
do
(case state
(:begin
(when (string= line "BASIC COMBAT FEATS")
(setf mycat "BASIC COMBAT FEATS"
state :name)))
((:name :benefit)
(cond
((cl-ppcre:scan "FEATS" line)
(when name (funcall newfeat))
(setf mycat line
name nil))
((cl-ppcre:scan "^[A-Z ][’A-Z -]+$" line)
(when name
(funcall newfeat))
(setf name line
state :short))
((eql state :benefit)
(setf benefit (concatenate 'string benefit (string #\Newline) line)))))
(:short
(setf short line
state :details))
(:details
(cond
((starts-with-subseq "Benefit: " line)
(setf benefit (subseq line 9)
state :benefit))
((starts-with-subseq "Prerequisites: " line)
(setf prereq (subseq line 15)))
(t nil))))
finally (progn
(funcall newfeat)
(return (nreverse result)))))
(defun get-class-pages (pdfpath)
(split-sequence #\Newline
(fccgparse::get-page-text pdfpath (loop for i from 32 to 54 collect i))))
(defun preprocess-a-class-line (line)
(remove-if (lambda (x) (cl-ppcre:scan "^[IV]*$" x))
(preprocess-a-feat-line line)))
(defun preprocess-class-lines (lines)
(loop with current-line = (make-string-output-stream)
for line in lines
when (or (cl-ppcre:scan "^\\d+$" line)
(string= line "HERO")
(string= line "CHAPTER 1"))
do (setf line "")
when (cl-ppcre:scan "^((Class Skills: )|(Skill Points: )|(Vitality: ))" line)
nconc (preprocess-a-class-line (get-output-stream-string current-line))
do (write-sequence line current-line)
unless (or (string= line "") (char= (char line (1- (length line))) #\Space))
nconc (preprocess-a-class-line (get-output-stream-string current-line))))
(defun extract-class-info (lines)
(loop
;with state = :begin
with name
with skills
with points
with vitality
with proficiencies
for line in lines
when (and name skills points vitality proficiencies)
collect `(,(string-capitalize name) (:skills . ,(mapcar
(lambda (x) (subseq x 1))
(split-sequence #\, skills)))
(:skill-points . ,points)
(:vitality . ,vitality)
(:proficiencies . ,proficiencies))
and do
(setf name nil
skills nil
points nil
vitality nil
proficiencies nil)
do
(cond
;(format t "STATE=~A ; ~A~%" state line)
((and
(cl-ppcre:scan "^[A-Z ]+$" line)
(not (cl-ppcre:scan "(ABILIT)|(FEAT)" line)))
(setf name line))
((cl-ppcre:scan "^Class Skills: " line)
(setf skills (subseq line 13)))
((cl-ppcre:scan "^Skill Points: " line)
(setf points (parse-integer (subseq line 14) :junk-allowed t)))
((cl-ppcre:scan "^Vitality: " line)
(setf vitality (parse-integer (subseq line 10) :junk-allowed t)))
((cl-ppcre:scan "^Starting Proficiencies: " line)
(setf proficiencies (parse-integer (subseq line 24) :junk-allowed t))))))
(defun get-species-pages (pdfpath)
(split-sequence #\Newline
(fccgparse::get-page-text pdfpath (loop for i from 11 to 20 collect i))))
(defun preprocess-species-lines (lines)
(loop with current-line = (make-string-output-stream)
with running = nil
for line in lines
when (string= line "DRAKE") do (setf running t)
when (string= line "HUMAN TALENTS") do (setf running nil)
when (or
(not running)
(cl-ppcre:scan "^\\d+$" line)
(string= line "HERO")
(string= line "CHAPTER 1"))
do (setf line "")
when (cl-ppcre:scan "^•" line)
nconc (preprocess-a-class-line (get-output-stream-string current-line))
do (write-sequence line current-line)
;(format t "~a" line)
unless (or (string= line "") (string= line "•")(char= (char line (1- (length line))) #\Space))
nconc (preprocess-a-class-line (get-output-stream-string current-line))))
(defparameter +attr-convert+
'(:|Strength| :str
:|Intelligence| :int
:|Dexterity| :dex
:|Constitution| :con
:|Wisdom| :wis
:|Charisma| :cha
:|any| :any))
(defun parse-attributes (attr)
(loop
with modifier
with attribute = nil
for word in (split-sequence #\Space attr)
;if (string= word "to") do nil
;else if (starts-with-subseq "attribute" word) do nil
if (char= (char word (1- (length word))) #\,)
do (setf word (subseq word 0 (1- (length word))))
if (char= (char word 0) #\–)
when attribute collect (cons (if (cdr attribute)
attribute
(car attribute)) modifier) into result
end
and do (setf modifier (- (parse-integer (subseq word 1)))
attribute nil)
if (char= (char word 0) #\+)
when attribute collect (cons (if (cdr attribute)
attribute
(car attribute)) modifier) into result
end
and do (setf modifier (parse-integer (subseq word 1))
attribute nil)
when (member (make-keyword word) +attr-convert+ :test #'equal)
do (push (getf +attr-convert+ (make-keyword word)) attribute)
finally (return
(if attribute
(cons (cons (if (cdr attribute)
attribute
(car attribute)) modifier) result)
result))))
(defun parse-type (typestring)
(let ((words (split-sequence #\Space typestring))
(result nil))
(push (cons :size (make-keyword (string-upcase (pop words)))) result)
(push (cons :footprint
(if (ppcre:scan "^\\(" (car words))
(pop words)
"(1x1)")) result)
(push (cons :legs (if (ppcre:scan "biped" (pop words)) 2 4)) result)
(push (cons :type (make-keyword (string-upcase (pop words)))) result)
(pop words) ;with
(pop words) ;a
(pop words) ;reach
(pop words) ;of
(push (cons :reach (parse-integer (pop words) :junk-allowed t)) result)
(loop while (and words (not (ppcre:scan "score" (pop words)))))
(and words (pop words)) ;x
(cons :wound-multiplier
(if words
(parse-number (pop words))
1))
result))
(defun extract-species-info (lines)
(loop
;with state = :begin
with name
with type
with attributes
with speed
with things
for line in lines
when (and name (cl-ppcre:scan "^[A-Z][A-Z ]+$" line))
collect `(,(string-capitalize name) ,@type
(:attr .
,attributes)
(:base-speed . ,speed)
(:qualities . ,things)) into result
and do
(setf things nil
attributes nil)
do
(cond
;(format t "STATE=~A ; ~A~%" state line)
((cl-ppcre:scan "^[A-Z][A-Z ]+$" line)
(setf name line))
((cl-ppcre:scan "^• Attributes: " line)
(setf attributes (parse-attributes (subseq line 14))))
((cl-ppcre:scan "^• Base Speed:" line)
(setf speed (parse-integer (subseq line 14) :junk-allowed t))
;(format *error-output* "Speed found for ~a~%" name)
(unless speed (format *error-output* "SPEED BAD: ~s~%" (subseq line 14))))
((cl-ppcre:scan "^Type: " line)
(setf type (parse-type (subseq line 6))))
((cl-ppcre:scan "^• [^:]*:" line)
(let ((me (nth-value 1 (cl-ppcre:scan "^• [^:]*:" line))))
(push (make-keyword
(nsubstitute #\- #\Space
(string-upcase (subseq line 2 (1- me)))))
things)))
(t #+(or)(when name (format *error-output* "LINE: ~s~%" line))))
finally
(progn (setf (cdr (last result))
(list
`(,(string-capitalize name) ,@type
(:attr .
,attributes)
(:base-speed . ,speed)
(:qualities . ,things))))
(return result))))
(defun get-specialty-pages (pdfpath)
(split-sequence #\Newline
(fccgparse::get-page-text pdfpath (loop for i from 23 to 29 collect i))))
(defun preprocess-specialty-lines (lines)
(loop with current-line = (make-string-output-stream)
with running = nil
for line in lines
when (string= line "ACROBAT") do (setf running t)
when (string= line "STEP 3: ") do (setf running nil) and nconc (list "END")
when (or
(not running)
(cl-ppcre:scan "^\\d+$" line)
(string= line "HERO")
(string= line "CHAPTER 1"))
do (setf line "")
when (cl-ppcre:scan "^•" line)
nconc (preprocess-a-class-line (get-output-stream-string current-line))
do (write-sequence line current-line)
unless (or (string= line "") (char= (char line (1- (length line))) #\Space))
nconc (preprocess-a-class-line (get-output-stream-string current-line))))
(defun extract-specialty-info (lines)
(loop
;with state = :begin
with name
with training
with things
with feat
for line in lines
when (and name (cl-ppcre:scan "^[A-Z][A-Z ]+$" line))
collect `(,(string-capitalize name)
(:attr-train . ,training)
(:qualities . ,things)
(:feat . ,feat))
and do
(setf things nil)
do
(cond
;(format t "STATE=~A ; ~A~%" state line)
((cl-ppcre:scan "^[A-Z ]+$" line)
(setf name line))
((cl-ppcre:scan "^• Bonus Feat: " line)
(setf feat (make-keyword
(string-upcase (nsubstitute #\- #\Space (subseq line 14))))))
((cl-ppcre:scan "^• Attribute Training: " line)
(setf training
(loop for word in (split-sequence #\Space (subseq line 21))
when (and
(not (string= word "any"))
(getf +attr-convert+(make-keyword word)))
collect it)))
((cl-ppcre:scan "^• [^:]*:" line)
(let ((me (nth-value 1 (cl-ppcre:scan "^• [^:]*:" line))))
(push (make-keyword
(nsubstitute #\- #\Space
(string-upcase (subseq line 2 (1- me)))))
things))))))
(defun preprocess-talent-lines (lines)
(loop with current-line = (make-string-output-stream)
with running = nil
for line in lines
when (string= line "ADAPTABLE") do (setf running t)
when (string= line "SPECIALTY") do (setf running nil) and nconc (list "END")
when (or
(not running)
(cl-ppcre:scan "^\\d+$" line)
(string= line "HERO")
(string= line "CHAPTER 1"))
do (setf line "")
when (cl-ppcre:scan "^•" line)
nconc (preprocess-a-class-line (get-output-stream-string current-line))
do (write-sequence line current-line)
unless (or (string= line "") (char= (char line (1- (length line))) #\Space))
nconc (preprocess-a-class-line (get-output-stream-string current-line))))
(defun extract-talent-info (lines)
(loop
;with state = :begin
with name
with speed
with attributes
with things
for line in lines
when (and name (cl-ppcre:scan "^[A-Z][A-Z ]+$" line))
collect `(,(string-capitalize name)
(:attr . ,attributes)
(:base-speed . ,speed)
(:qualities . ,things))
and do
(setf things nil)
do
(cond
;(format t "STATE=~A ; ~A~%" state line)
((cl-ppcre:scan "^[A-Z ]+$" line)
(setf name line))
((cl-ppcre:scan "^• Attributes: " line)
(setf attributes (parse-attributes (subseq line 14))))
((cl-ppcre:scan "^• Base Speed: " line)
(setf speed (parse-integer (subseq line 14) :junk-allowed t)))
((cl-ppcre:scan "^• [^:]*:" line)
(let ((me (nth-value 1 (cl-ppcre:scan "^• [^:]*:" line))))
(push (make-keyword
(nsubstitute #\- #\Space
(string-upcase (subseq line 2 (1- me)))))
things))))))
(defun get-talent-pages (pdfpath)
(split-sequence #\Newline
(fccgparse::get-page-text pdfpath (loop for i from 20 to 23 collect i))))
(defun table-line-text (line)
(apply #'concatenate 'string (mapcar #'third line)))
(defun get-table-lines (pdfname page)
(split-sequence:split-sequence :newline
(fccgparse::get-page-text-positions pdfname page)
:remove-empty-subseqs t))
(defun get-table-lines-after (table-lines matcher n)
(loop
with sofar = nil
for line in table-lines
for rest on (cdr table-lines)
if (and sofar (< sofar n))
collect line into result and do (incf sofar)
else if (and sofar (>= sofar n)) return (list result rest)
else if (funcall matcher line)
do (setf sofar 0)
finally (return (list result nil))))
(defparameter +class-headings+
'(#\L
#\B
#\F
#\R
#\W
#\D
#\I
#\L
#\L
#\S))
(defun extract-columns (header-line headings &optional keep-headings)
(let ((headings (coerce headings 'list)))
(values
(loop with header = headings
for (x y string) in header-line
when (char= (car header) (char string 0))
collect x
and do (setf header (cdr header))
while header)
(when keep-headings
(loop
with output = (make-string-output-stream)
with any = nil
for (x y string) in header-line
with header = headings
when (and header (char= (car header) (char string 0)))
if any
collect (get-output-stream-string output) into result
else do (setf any t) end
and do (setf header (cdr header))
do (write-sequence string output)
finally (return (append result (list (get-output-stream-string output)))))))))
(defun split-columns (columns table-line &optional (fudge 0))
(loop with current = (make-string-output-stream)
with column = (cdr columns)
for (x1 y1 string x y) in table-line
when (and column (> (+ x fudge) (car column)))
collect (get-output-stream-string current) into result
and do (pop column)
do (write-string string current)
finally (return (append result (list (get-output-stream-string current))))))
(defun split-class-table (table-lines)
(loop
with columns = (extract-columns (car table-lines) +class-headings+)
for line in (cdr table-lines)
collect (split-columns columns line 1)))
(defun extract-class-tables (pdfname)
(loop with lines = (fccgparse::get-table-lines pdfname (loop for i from 32 to 54 collect i))
for (table-lines rest) = (get-table-lines-after lines #'match-class-table 23) then
(get-table-lines-after rest #'match-class-table 23)
collect (split-class-table table-lines)
while rest))
(defun postprocess-class-tables (classes)
(loop for lines in classes
collect
(loop for lne in lines
for line = (mapcar (lambda (x) (strip (substitute #\Space #\Tab x))) lne)
with level = 1
with sofar = nil
while (some (lambda (x) (position #\Tab x)) lne)
when (string= (car line) (format nil "~D" level))
if sofar collect sofar into result end and
do (setf sofar line
level (1+ level))
else do (prin1 (car line))
(setf sofar (mapcar (lambda (x y)
(if (string= y "") x
(concatenate 'string x " " y))) sofar line))
finally (return (nconc result (list sofar)))
)))
(defun match-class-table (x)
(ppcre:scan "Table.*The.*" (table-line-text x)))
(defun split-table (table-lines heading &optional include-headers)
(loop
with (columns headers) =
(multiple-value-list (extract-columns (car table-lines) heading include-headers))
for line in (cdr table-lines)
collect (split-columns columns line 1) into columnout
finally (return (if include-headers (cons headers columnout) columnout))))
(defun ensure-length (list length &key (default ""))
(if (> length (length list))
(loop for i from 1 to length
for x = list then (cdr x)
when x collect (car x)
else collect default)
list))
(defun postprocess-gear-table (table ncols skiptest)
(let ((table
(remove-if-not (lambda (x) (some (lambda (x) (position #\Tab x)) x)) table)))
(loop
with sofar = nil
for lne in table
for line = (ensure-length (mapcar
(lambda (x) (strip (substitute #\Space #\Tab x))) lne)
ncols)
;do (format t "LINE: ~A~%" line)
unless (funcall skiptest line)
when (string= (car line) "")
do (setf sofar (mapcar (lambda (x y) (if (string= y "") x
(concatenate 'string x " " y))) sofar line))
else if sofar collect sofar into result end and
do (setf sofar line)
end
finally (return (nconc result (list sofar))))))
(defun goods-skiptest (line)
(or
;(string= (car line) "Good Upgrades")
(starts-with #\* (car line))))
(defun get-goods-table (pdfname)
(let* ((lines1 (fccgparse::get-table-lines pdfname (list 160)))
(lines2 (fccgparse::get-table-lines pdfname (list 161)))
(table-lines1 (car (get-table-lines-after lines1 (lambda (x) (ppcre:scan "Table 4.6" (table-line-text x))) 100)))
(table-lines2 (car (get-table-lines-after lines2 (lambda (x) (ppcre:scan "159" (table-line-text x))) 100))))
(nconc
(split-table table-lines1 "NESCCWEC" t)
(split-table table-lines2 "NESCCWEC"))))
(defparameter *table-funcs* nil)
(defun extract-table (pdfname page startfn columns skiptest)
(let* ((lines1 (fccgparse::get-table-lines pdfname (list page)))
(table-lines1 (car (get-table-lines-after
lines1
(lambda (x) (funcall startfn (table-line-text x)))
100)))
(split-table (split-table table-lines1 columns t)))
(postprocess-gear-table split-table (length columns) skiptest)))
(defmacro define-table-extractor (table-name page-number scanner columns skiptest)
(let ((fname (intern (string-upcase (format nil "EXTRACT-~a-TABLE" table-name)))))
`(progn
(defun ,fname (pdfname)
(extract-table pdfname ,page-number ,scanner ,columns ,skiptest))
(push (cons ,table-name ',fname) *table-funcs*))))
(defun make-skiptest (matches)
(lambda (line)
(or (some 'identity
(mapcar (lambda (x) (cl-ppcre:scan
x
(apply #'concatenate 'string line))) matches))
(starts-with #\* (car line)))))
(defun extract-goods-table (pdfname)
(postprocess-gear-table (get-goods-table pdfname) 8 #'goods-skiptest))
(push (cons :goods 'extract-goods-table) *table-funcs*)
(define-table-extractor :kits 162 (curry #'ppcre:scan "Table 4.7") "NSSCCWEC"
(make-skiptest '(
;"Kit Upgrades"
)))
(define-table-extractor :locks 163 (curry #'ppcre:scan "Table 4.8") "NETCSCCWEC"
(make-skiptest '(
;"Locks"
;"Traps"
;"Lock & Trap Upgrades"
)))
(define-table-extractor :consumables 164 (curry #'ppcre:scan "Table 4.9") "NEUSCCWEC"
(make-skiptest '(
;"General Consumables"
;"Medical Supplies"
)))
(define-table-extractor :elixirs 165 (curry #'ppcre:scan "Table 4.10") "NESCCWEC"
(make-skiptest '(
;"Elixir Upgrades"
;"Potions"
;"Oils"
)))
(define-table-extractor :food 167 (curry #'ppcre:scan "Table 4.11: Food & Drink$") "NEUSCCWEC"
(make-skiptest '("or discarded.")))
(define-table-extractor :poisons 168 (curry #'ppcre:scan "Table 4.12") "NEIUCWEC"
(make-skiptest '("Poison Upgrades")))
(define-table-extractor :scrolls 169 (curry #'ppcre:scan "Table 4.13") "SSCCWEC"
(make-skiptest '(
;"Scroll Upgrades"
)))
(define-table-extractor :services 170 (curry #'ppcre:scan "Table 4.14") "SEAEC"
(make-skiptest '(
;"Community Services"
;"Hired Passage"
;"Lodging (per person or animal)"
)))
(define-table-extractor :mounts 172 (curry #'ppcre:scan "Table 4.15") "MBTWAC"
(make-skiptest '(
;"Draft Animals"
;"Flying Mounts"
;"Military Mounts"
;"Riding Mounts"
;"Swimming Mounts"
;"Mount Upgrades"
)))
(define-table-extractor :vehicles 173 (curry #'ppcre:scan "Table 4.16") "NQSTSOCCEC"
(make-skiptest '(
;"Air Vehicle"
;"Land Vehicles"
;"Water Vehicles"
;"Vehicle Upgrades"
)))
(define-table-extractor :armor 176 (curry #'ppcre:scan "Table 4.17") "TDRDASDCCWEC"
(make-skiptest '(
;"^Partial"
;"^Moderate"
;"^Fittings"
"^[a-z]"
"^Barding:"
"^Unborn:"
)))
(define-table-extractor :armor-upgrades 177 (curry #'ppcre:scan "Table 4.18") "NDEDASDCCWEC"
(make-skiptest '(
;"^Craftsmanship"
;"^Materials"
;"^Customization"
)))
(define-table-extractor :blunt-weapons 179 (curry #'ppcre:scan "Table 4.19") "NDTQSCCWEC"
(make-skiptest '(
;"^Clubs"
;"^Flails"
;"^Hammers"
;"^Shields"
;"^Staves"
;"^Whips"
)))
(define-table-extractor :edged-weapons 181 (curry #'ppcre:scan "Table 4.20") "NDTQSCCWEC"
(make-skiptest '(
;"^Axes"
;"^Fencing Blades"
;"^Knives"
;"^Swords"
;"^Greatswords"
;"^Polearms"
;"^Spears"
)))
(define-table-extractor :hurled-weapons 183 (curry #'ppcre:scan "Table 4.21") "NDTRQSCCWEC"
(make-skiptest '(
;"^Thrown"
;"^Grenades"
)))
(define-table-extractor :bows 184 (curry #'ppcre:scan "Table 4.22") "NDTRQSCCWEC"
(make-skiptest '(
;"^Arrows"
;"^Bows"
)))
(define-table-extractor :black-powder-weapons 185 (curry #'ppcre:scan "Table 4.23") "NDTRQSCCWEC"
(make-skiptest '(
;"^Powder"
;"^Sidearms"
;"^Longarms"
)))
(define-table-extractor :siege-weapons 186 (curry #'ppcre:scan "Table 4.24") "NDTRQSCCWEC"
(make-skiptest '(
)))
(define-table-extractor :weapon-upgrades 187 (curry #'ppcre:scan "Table 4.25") "NECCWEC"
(make-skiptest '(
;"^Craftsman"
;"^Material"
;"^Customiz"
"^Unborn:"
"^Improve check"
)))
(defun write-data-file (fname pdffname)
(with-open-file (outf fname :direction :output :if-exists :supersede)
(prin1 '(cl:in-package :fccg) outf)
(prin1 `(cl:defparameter fccg::+feats+
',(extract-feat-info (preprocess-feat-lines (get-feat-pages pdffname))))
outf)
(prin1 `(cl:defparameter fccg::+class+
',(extract-class-info (preprocess-class-lines (get-class-pages pdffname))))
outf)
(prin1 `(cl:defparameter fccg::+species+
',(extract-species-info (preprocess-species-lines (get-species-pages pdffname))))
outf)
(prin1 `(cl:defparameter fccg::+specialty+
',(extract-specialty-info (preprocess-specialty-lines (get-specialty-pages pdffname))))
outf)
(prin1 `(cl:defparameter fccg::+talent+
',(extract-talent-info (preprocess-talent-lines (get-talent-pages pdffname))))
outf)
(prin1 `(cl:defparameter fccg::+gear+
',(loop for (name . func) in (reverse *table-funcs*)
collect (cons name (funcall func pdffname))))
outf)
(values)))
(defun file-selector ()
(let ((retval))
(ltk:with-ltk ()
(ltk:withdraw ltk::*tk*)
(setf retval
(ltk:get-open-file :filetypes '(("Fantasy Craft Second Printing" "Fantasy_Craft_Second_Printing.pdf") ("PDF Files" "*.pdf") ("All Files" "*"))
:title "Please Select Fantasy Craft PDF File"))
(ltk::exit-wish))
retval))
(defun main ()
(write-data-file
"data.lisp"
(file-selector))
(uiop:quit))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment