Skip to content

Instantly share code, notes, and snippets.

@nowl
Created September 9, 2011 03:23
Show Gist options
  • Save nowl/1205421 to your computer and use it in GitHub Desktop.
Save nowl/1205421 to your computer and use it in GitHub Desktop.
Common Lisp code to generate random words based on next-letter frequency from a predefined corpus
(defparameter *valid-chars* '(#\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\z))
(defparameter *table* nil)
(defparameter *norm-table* nil)
(defun build-table ()
(setf *table* (make-hash-table))
(with-open-file (in #p"corpus.txt" :direction :input)
(loop with lower-c with c with p while (not (eql c :eof)) do
(setf c (read-char in nil :eof)
lower-c (and (characterp c) (char-downcase c)))
(when (member lower-c *valid-chars*)
(when p
(unless (gethash p *table*)
(setf (gethash p *table*) (make-hash-table)))
(let ((value (gethash lower-c (gethash p *table*))))
(setf (gethash lower-c (gethash p *table*))
(if value (1+ value) 1))))
(setf p lower-c)))))
(defun sort-and-find-probs (list)
(let ((total (loop for val being the hash-value of list summing val)))
(sort (loop with sum = 0 for char being the hash-key of list using (hash-value val) collecting
(progn (incf sum (float (/ val total)))
(list char sum)))
#'< :key #'cadr)))
(defun build-norm-table ()
(setf *norm-table*
(loop for char being the hash-key of *table* using (hash-value val) collecting
(list char (sort-and-find-probs val)))))
(defun build-case-aux (prob list)
`(cond ,@(loop for val in list collecting
`((< ,prob ,(second val)) ,(first val)))))
(defun build-case-aux2 (char prob)
`(case ,char
,@(loop for entry in *norm-table* collecting
(list (car entry) (build-case-aux prob (cadr entry))))))
(defmacro build-case (char prob)
(build-case-aux2 char prob))
(defun weighted-random-next-char (char)
(let ((p (random 1.0)))
(build-case char p)))
(defun random-choice (list)
(let ((r (random (length list))))
(nth r list)))
(defun random-word (length &key (start-char nil))
(unless start-char
(setf start-char (random-choice *valid-chars*)))
(coerce
(cons start-char
(loop for i below (1- length) collecting
(let ((next-char (weighted-random-next-char start-char)))
(setf start-char next-char))))
'string))
; (build-table)
; (build-norm-table)
; (random-word 10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment