utz82/plot-results.jl

## plot-results.jl
using DataFrames, CSV
using Cairo, Gadfly

data = CSV.read("results.csv")

p1 = layer(data, x=:module_size, y=:ratio_dict_enc0,
           color=[colorant"purple"], size=[0.75mm], Geom.hair, Geom.point)
p2 = layer(data, x=:module_size, y=:ratio_dict_enc1,
           color=[colorant"red"], size=[0.75mm], Geom.hair, Geom.point)
p3 = layer(data, x=:module_size, y=:ratio_dict_enc4,
           color=[colorant"orange"], size=[0.75mm], Geom.hair, Geom.point)

p = plot(p1, p2, p3, Guide.xlabel("module size (words)"),
         Guide.ylabel("packer efficiency (%)"),
         Guide.manual_color_key("encoders",
                                ["no recursion", "recursion depth=1",
                                 "recursion depth=4"],
                                [colorant"purple", colorant"red",
                                 colorant"orange"]))

img = SVG("results.svg", 20inch, 10inch)

draw(img, p)

## test-encoder.scm
;; testing dictionary encoding for module data
;;
;; Notes: For the purpose of this test, only note triggers are considered.
;; Empty pattern rows (after filtering everything but note triggers) are
;; discarded.
;; Furthermore, no module shall have more than (256 * 128) - 1 rows.
;;
;; Encoding Algorithm
;;
;; 1) Produce a dictionary of all unique rows.
;;    Encoded rows consist of a control value which has bits set depending on
;;    which tracks have a note trigger, followed by the note triggers
;;    themselves. Data of tracks which have no note trigger is discarded.
;; 2) Produce a sequence as follows:
;;    a) Replace the original module sequence with a list of the corresponding
;;       pattern data.
;;    b) Replace rows in the pattern data with a reference to the corresponding
;;       entry in the dictionary.
;; 3) Recursively optimize the new sequence as follows:
;;    a) Beginning with a window size of 3 and a nesting level of 1, find all
;;       consecutive repetitions in the sequence, replacing them with a control
;;       value which holds the window size and number of repetitions. Control
;;       value is identified as such by having bit 15 set. A dictionary pointer
;;       will never have bit 15 set, as we do not allow more than #x7fff rows of
;;       input.
;;       Substitution shall not take place inside compressed blocks. So,
;;       assuming the following sequence
;;       <ctrl window:4 len:4>1 2 3 4 5 6 7 3 4 5 6 7...
;;       the packer will not replace the second occurance of 3 4 5 6 7, because
;;       the first occurance is part of the compressed block
;;       <ctrl window:4 len:4>1 2 3 4.
;;    b) If executing step 3a replaced any chunk that already contained a
;;       control value, increment nesting level by 1.
;;    c) Increment window size by 1 and repeat from step 3a until reaching
;;       maximum window size (set to the length of the longest source pattern
;;       with empty rows discarded) or reaching maximum nesting level (set to 4
;;       for this test).

(use xmkit)

(define (drop-empty-rows triggers)
  (filter (lambda (row)
	    (any number? row))
	  triggers))

;; count the note triggers in the given xm
(define (note-trigger-count xm)
  (apply + (map (lambda (ptn)
		  (apply + (map (lambda (row)
				  (length (filter number? row)))
				(drop-empty-rows (xm:pattern-notes ptn)))))
		(xm:patterns xm))))

;; pack rows of pattern data as described in Step 1.
(define (pack-rows rows)
  (map (lambda (row)
	 (letrec ((make-ctrlval
		   (lambda (data cbit cval)
		     (if (null? data)
			 cval
			 (make-ctrlval (cdr data)
				       (* 2 cbit)
				       (if (number? (car data))
					   (+ cval cbit)
					   cval))))))
	   (cons (make-ctrlval row 1 0)
		 (filter number? row))))
       rows))

;; construct a dictionary of unique packed pattern rows from a list of
;; unpacked pattern rows. init-dict should be '() when calling this proc
;; directly.
(define (make-dict init-dict packed-rows)
  (if (null? packed-rows)
      init-dict
      (make-dict (if (member (car packed-rows) init-dict)
		     init-dict
		     (cons (car packed-rows) init-dict))
		 (cdr packed-rows))))

;; find and compress repetitions in data of the given window size
(define (pack-data data window)
  (letrec* ((count-matches
	     (lambda (head tail match-count)
	       (if (> (length head) (length tail))
		   match-count
		   (if (not (equal? head (take tail window)))
		       match-count
		       (count-matches head (drop tail window)
				      (+ 1 match-count)))))))
    (if (> (* 2 window) (length data))
	data
	(let ((matches (count-matches (take data window)
				      (drop data window)
				      0)))
	  (if (> matches 0)
	      (append (cons (+ #x8000 window (* 256 matches))
			    (take data window))
		      (pack-data (drop data (* (+ 1 matches) window)) window))
	      (if (= #x8000 (bitwise-and #x8000 (car data)))
		  ;; don't search for matches inside compressed blocks
		  (let ((blocksize (+ 1 (bitwise-and #xff (car data)))))
		    (append (take data blocksize)
			    (pack-data (drop data blocksize) window)))
		  (cons (car data)
			(pack-data (cdr data) window))))))))

;; pack a sequence as described in step 3
(define (pack-seq seq max-window max-depth)
  (letrec
      ((run-packer
	(lambda (sdata current-window current-depth)
	  (if (or (> current-depth max-depth)
		  (> current-window max-window)
		  (> current-window (round (/ (length sdata) 2))))
	      sdata
	      (let ((next-data (pack-data sdata current-window)))
		(run-packer next-data (+ 1 current-window)
			    (if (= (length sdata)
				   (length next-data))
				current-depth (+ 1 current-depth))))))))
    (run-packer seq 3 0)))

;; construct a sequence of dictionary references as described in step 2a
(define (make-seq packed-rows dict)
  (map (lambda (row)
	 (list-index (lambda (dict-word)
		       (equal? dict-word row))
		     dict))
       packed-rows))

(define (mock-encode-dict xm max-depth)
  (let* ((packed-patterns
	  (map (lambda (ptn)
		 (pack-rows (drop-empty-rows (xm:pattern-notes ptn))))
	       (xm:patterns xm)))
	 (max-ptn-len (apply max (map length packed-patterns)))
	 (dict (make-dict '() (concatenate packed-patterns)))
	 (dict-length (length (concatenate dict)))
	 (seq (make-seq
	       (concatenate
		(map (lambda (pos)
		       (list-ref packed-patterns pos))
		     (filter (lambda (seqpos)
			       (< seqpos (xm:number-of-patterns xm)))
			     (xm:sequence xm))))
	       dict)))
    (list (+ dict-length (length seq))
	  (+ dict-length (length (pack-seq seq max-ptn-len 1)))
	  (+ dict-length (length (pack-seq seq max-ptn-len max-depth))))))

;; returns the size of the module data when packed in the classic way, packing
;; rows into a control value + actual note triggers. thus, size = seq length
;; + (combined length of all patterns) + (number of note triggers)
(define (mock-encode-classic xm)
  (+ (apply + (map (lambda (ptn)
		     (length (drop-empty-rows (xm:pattern-notes ptn))))
		   (xm:patterns xm)))
     (xm:song-length xm)
     (note-trigger-count xm)))

;; generate test results for each module listed in the given file list
(define (generate-test-results files max-depth)
  (if (null? files)
      '()
      (let* ((xm (xm:file->module (car files)))
	     (dict-encode-result (mock-encode-dict xm max-depth)))
	(cons (list (car files)
		    (xm:number-of-tracks xm)
		    (mock-encode-classic xm)
		    (car dict-encode-result)
		    (cadr dict-encode-result)
		    (caddr dict-encode-result))
	      (generate-test-results (cdr files) max-depth)))))

(define (get-quota i j)
  (/ (round (* 1000 (/ (- i j) i)))
     10))

(define (run-tests filelist-file max-depth)
  (let* ((modules (filter xm:is-valid-xm-file?
			  (read-lines filelist-file)))
	 (results (generate-test-results modules max-depth))
	 (stats (list (string-append "modules parsed: "
				     (->string (length results)))
		      (string-append
		       "successful optimizations (no recursion): "
		       (->string
			(length
			 (filter positive?
				 (map (lambda (r)
					(get-quota (caddr r) (cadddr r)))
				      results)))))
		      (string-append
		       "successful optimizations (recursion depth = 1): "
		       (->string
			(length
			 (filter positive?
				 (map (lambda (r)
					(get-quota (caddr r) (list-ref r 4)))
				      results)))))
		      (string-append
		       "successful optimizations (recursion depth = 4): "
		       (->string
			(length
			 (filter positive?
				 (map (lambda (r)
					(get-quota (caddr r) (list-ref r 5)))
				      results)))))
		      (string-append
		       "average compression ratio (no recursion): "
		       (->string (/ (apply + (map (lambda (r)
						    (get-quota (caddr r)
							       (cadddr r)))
						  results))
				    (length results))))
		      (string-append
		       "average compression ratio (recursion depth = 1): "
		       (->string (/ (apply + (map (lambda (r)
						    (get-quota (caddr r)
							       (list-ref r 4)))
						  results))
				    (length results))))
		      (string-append
		       "average compression ratio (recursion depth = 4): "
		       (->string (/ (apply + (map (lambda (r)
						    (get-quota (caddr r)
							       (list-ref r 5)))
						  results))
				    (length results))))))
	 (csv (cons (string-append "channels," "module_size,"
				   "dict_enc0," "dict_enc1,"
				   "ratio_dict_enc0," "ratio_dict_enc1,"
				   "ratio_dict_enc4")
		    (map (lambda (r)
			   (string-append
			    ;; (car r) filenames may break csv plotter
			    ;; ","
			    (->string (cadr r))
			    "," (->string (caddr r))
			    "," (->string (cadddr r))
			    "," (->string (list-ref r 4))
			    "," (->string (get-quota (caddr r) (cadddr r)))
			    "," (->string (get-quota (caddr r)
						     (list-ref r 4)))
			    "," (->string (get-quota (caddr r)
						     (list-ref r 5)))))
			 results)))
	 (write-to-file (lambda (filename lines)
			  (call-with-output-file filename
			    (lambda (out)
			      (for-each (lambda (line)
					  (write-line line out))
					lines))))))
    (begin
      (write-to-file "results.csv" csv)
      (write-to-file "results.log" stats))))
	using DataFrames, CSV
	using Cairo, Gadfly

	data = CSV.read("results.csv")

	p1 = layer(data, x=:module_size, y=:ratio_dict_enc0,
	color=[colorant"purple"], size=[0.75mm], Geom.hair, Geom.point)
	p2 = layer(data, x=:module_size, y=:ratio_dict_enc1,
	color=[colorant"red"], size=[0.75mm], Geom.hair, Geom.point)
	p3 = layer(data, x=:module_size, y=:ratio_dict_enc4,
	color=[colorant"orange"], size=[0.75mm], Geom.hair, Geom.point)

	p = plot(p1, p2, p3, Guide.xlabel("module size (words)"),
	Guide.ylabel("packer efficiency (%)"),
	Guide.manual_color_key("encoders",
	["no recursion", "recursion depth=1",
	"recursion depth=4"],
	[colorant"purple", colorant"red",
	colorant"orange"]))

	img = SVG("results.svg", 20inch, 10inch)

	draw(img, p)
	;; testing dictionary encoding for module data
	;;
	;; Notes: For the purpose of this test, only note triggers are considered.
	;; Empty pattern rows (after filtering everything but note triggers) are
	;; discarded.
	;; Furthermore, no module shall have more than (256 * 128) - 1 rows.
	;;
	;; Encoding Algorithm
	;;
	;; 1) Produce a dictionary of all unique rows.
	;; Encoded rows consist of a control value which has bits set depending on
	;; which tracks have a note trigger, followed by the note triggers
	;; themselves. Data of tracks which have no note trigger is discarded.
	;; 2) Produce a sequence as follows:
	;; a) Replace the original module sequence with a list of the corresponding
	;; pattern data.
	;; b) Replace rows in the pattern data with a reference to the corresponding
	;; entry in the dictionary.
	;; 3) Recursively optimize the new sequence as follows:
	;; a) Beginning with a window size of 3 and a nesting level of 1, find all
	;; consecutive repetitions in the sequence, replacing them with a control
	;; value which holds the window size and number of repetitions. Control
	;; value is identified as such by having bit 15 set. A dictionary pointer
	;; will never have bit 15 set, as we do not allow more than #x7fff rows of
	;; input.
	;; Substitution shall not take place inside compressed blocks. So,
	;; assuming the following sequence
	;; <ctrl window:4 len:4>1 2 3 4 5 6 7 3 4 5 6 7...
	;; the packer will not replace the second occurance of 3 4 5 6 7, because
	;; the first occurance is part of the compressed block
	;; <ctrl window:4 len:4>1 2 3 4.
	;; b) If executing step 3a replaced any chunk that already contained a
	;; control value, increment nesting level by 1.
	;; c) Increment window size by 1 and repeat from step 3a until reaching
	;; maximum window size (set to the length of the longest source pattern
	;; with empty rows discarded) or reaching maximum nesting level (set to 4
	;; for this test).

	(use xmkit)

	(define (drop-empty-rows triggers)
	(filter (lambda (row)
	(any number? row))
	triggers))

	;; count the note triggers in the given xm
	(define (note-trigger-count xm)
	(apply + (map (lambda (ptn)
	(apply + (map (lambda (row)
	(length (filter number? row)))
	(drop-empty-rows (xm:pattern-notes ptn)))))
	(xm:patterns xm))))

	;; pack rows of pattern data as described in Step 1.
	(define (pack-rows rows)
	(map (lambda (row)
	(letrec ((make-ctrlval
	(lambda (data cbit cval)
	(if (null? data)
	cval
	(make-ctrlval (cdr data)
	(* 2 cbit)
	(if (number? (car data))
	(+ cval cbit)
	cval))))))
	(cons (make-ctrlval row 1 0)
	(filter number? row))))
	rows))

	;; construct a dictionary of unique packed pattern rows from a list of
	;; unpacked pattern rows. init-dict should be '() when calling this proc
	;; directly.
	(define (make-dict init-dict packed-rows)
	(if (null? packed-rows)
	init-dict
	(make-dict (if (member (car packed-rows) init-dict)
	init-dict
	(cons (car packed-rows) init-dict))
	(cdr packed-rows))))

	;; find and compress repetitions in data of the given window size
	(define (pack-data data window)
	(letrec* ((count-matches
	(lambda (head tail match-count)
	(if (> (length head) (length tail))
	match-count
	(if (not (equal? head (take tail window)))
	match-count
	(count-matches head (drop tail window)
	(+ 1 match-count)))))))
	(if (> (* 2 window) (length data))
	data
	(let ((matches (count-matches (take data window)
	(drop data window)
	0)))
	(if (> matches 0)
	(append (cons (+ #x8000 window (* 256 matches))
	(take data window))
	(pack-data (drop data (* (+ 1 matches) window)) window))
	(if (= #x8000 (bitwise-and #x8000 (car data)))
	;; don't search for matches inside compressed blocks
	(let ((blocksize (+ 1 (bitwise-and #xff (car data)))))
	(append (take data blocksize)
	(pack-data (drop data blocksize) window)))
	(cons (car data)
	(pack-data (cdr data) window))))))))

	;; pack a sequence as described in step 3
	(define (pack-seq seq max-window max-depth)
	(letrec
	((run-packer
	(lambda (sdata current-window current-depth)
	(if (or (> current-depth max-depth)
	(> current-window max-window)
	(> current-window (round (/ (length sdata) 2))))
	sdata
	(let ((next-data (pack-data sdata current-window)))
	(run-packer next-data (+ 1 current-window)
	(if (= (length sdata)
	(length next-data))
	current-depth (+ 1 current-depth))))))))
	(run-packer seq 3 0)))

	;; construct a sequence of dictionary references as described in step 2a
	(define (make-seq packed-rows dict)
	(map (lambda (row)
	(list-index (lambda (dict-word)
	(equal? dict-word row))
	dict))
	packed-rows))

	(define (mock-encode-dict xm max-depth)
	(let* ((packed-patterns
	(map (lambda (ptn)
	(pack-rows (drop-empty-rows (xm:pattern-notes ptn))))
	(xm:patterns xm)))
	(max-ptn-len (apply max (map length packed-patterns)))
	(dict (make-dict '() (concatenate packed-patterns)))
	(dict-length (length (concatenate dict)))
	(seq (make-seq
	(concatenate
	(map (lambda (pos)
	(list-ref packed-patterns pos))
	(filter (lambda (seqpos)
	(< seqpos (xm:number-of-patterns xm)))
	(xm:sequence xm))))
	dict)))
	(list (+ dict-length (length seq))
	(+ dict-length (length (pack-seq seq max-ptn-len 1)))
	(+ dict-length (length (pack-seq seq max-ptn-len max-depth))))))

	;; returns the size of the module data when packed in the classic way, packing
	;; rows into a control value + actual note triggers. thus, size = seq length
	;; + (combined length of all patterns) + (number of note triggers)
	(define (mock-encode-classic xm)
	(+ (apply + (map (lambda (ptn)
	(length (drop-empty-rows (xm:pattern-notes ptn))))
	(xm:patterns xm)))
	(xm:song-length xm)
	(note-trigger-count xm)))

	;; generate test results for each module listed in the given file list
	(define (generate-test-results files max-depth)
	(if (null? files)
	'()
	(let* ((xm (xm:file->module (car files)))
	(dict-encode-result (mock-encode-dict xm max-depth)))
	(cons (list (car files)
	(xm:number-of-tracks xm)
	(mock-encode-classic xm)
	(car dict-encode-result)
	(cadr dict-encode-result)
	(caddr dict-encode-result))
	(generate-test-results (cdr files) max-depth)))))

	(define (get-quota i j)
	(/ (round (* 1000 (/ (- i j) i)))
	10))

	(define (run-tests filelist-file max-depth)
	(let* ((modules (filter xm:is-valid-xm-file?
	(read-lines filelist-file)))
	(results (generate-test-results modules max-depth))
	(stats (list (string-append "modules parsed: "
	(->string (length results)))
	(string-append
	"successful optimizations (no recursion): "
	(->string
	(length
	(filter positive?
	(map (lambda (r)
	(get-quota (caddr r) (cadddr r)))
	results)))))
	(string-append
	"successful optimizations (recursion depth = 1): "
	(->string
	(length
	(filter positive?
	(map (lambda (r)
	(get-quota (caddr r) (list-ref r 4)))
	results)))))
	(string-append
	"successful optimizations (recursion depth = 4): "
	(->string
	(length
	(filter positive?
	(map (lambda (r)
	(get-quota (caddr r) (list-ref r 5)))
	results)))))
	(string-append
	"average compression ratio (no recursion): "
	(->string (/ (apply + (map (lambda (r)
	(get-quota (caddr r)
	(cadddr r)))
	results))
	(length results))))
	(string-append
	"average compression ratio (recursion depth = 1): "
	(->string (/ (apply + (map (lambda (r)
	(get-quota (caddr r)
	(list-ref r 4)))
	results))
	(length results))))
	(string-append
	"average compression ratio (recursion depth = 4): "
	(->string (/ (apply + (map (lambda (r)
	(get-quota (caddr r)
	(list-ref r 5)))
	results))
	(length results))))))
	(csv (cons (string-append "channels," "module_size,"
	"dict_enc0," "dict_enc1,"
	"ratio_dict_enc0," "ratio_dict_enc1,"
	"ratio_dict_enc4")
	(map (lambda (r)
	(string-append
	;; (car r) filenames may break csv plotter
	;; ","
	(->string (cadr r))
	"," (->string (caddr r))
	"," (->string (cadddr r))
	"," (->string (list-ref r 4))
	"," (->string (get-quota (caddr r) (cadddr r)))
	"," (->string (get-quota (caddr r)
	(list-ref r 4)))
	"," (->string (get-quota (caddr r)
	(list-ref r 5)))))
	results)))
	(write-to-file (lambda (filename lines)
	(call-with-output-file filename
	(lambda (out)
	(for-each (lambda (line)
	(write-line line out))
	lines))))))
	(begin
	(write-to-file "results.csv" csv)
	(write-to-file "results.log" stats))))