Last active
May 11, 2021 13:50
-
-
Save krisfoster/d0f67084669f98d6811f to your computer and use it in GitHub Desktop.
Scratch attempt at a markov model in elisp. God help me.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Package-Requires: ((dash "2.10.0") (dash-functional "1.2.0") (emacs "24")) | |
;;; | |
;;; A markov model for generating rubbish text in an amusing | |
;;; and diverting number of voices. | |
;;; | |
;; | |
;; How to use: | |
;; | |
;; The steps about downloading the smaple text files aren't | |
;; required anymore. It now fetches the smaples from gists | |
;; | |
;; Download the following files, these contain the sample text | |
;; for building the models: | |
;; | |
;; * https://gist.github.com/krisfoster/4f396e99d3a5bf4885a6 | |
;; * https://gist.github.com/krisfoster/b24fa45cf1cb12a64e09 | |
;; * https://gist.github.com/krisfoster/693c4d908458789f1a91 | |
;; | |
;; These need to be saved to your home directory (whatever ~ is in your | |
;; emacs set up). This will change... | |
;; | |
;; This still needs to be done though. | |
;; | |
;; Save this file as markov.el and load into emacs or just eval it. | |
;; | |
;; Open a file and: M-x krf/dre | |
;; | |
;; Hilarity ensues. | |
(require 'cl) | |
(require 'dash) | |
(require 'url) | |
;; | |
;; Debug funcs | |
;; | |
(defun krf/kbug (msg) | |
(save-excursion | |
;;(goto-char (point-max)) | |
(insert msg) | |
(insert "\n"))) | |
(defun krf/hash-to-list (hashtable) | |
"Return a list that represent the HASHTABLE." | |
(let (myList) | |
(maphash (lambda (kk vv) (setq myList (cons (list kk vv) myList))) hashtable) | |
myList)) | |
(defun krf/prnl (l) | |
(if (first l) | |
(progn | |
(insert (format "%s\n" (car l))) | |
(krf/prnl (cdr l))) | |
(insert "<<end>>\n"))) | |
(defun krf/display-hash (h) | |
(save-excursion | |
(goto-char (point-max)) | |
(krf/prnl (krf/hash-to-list h)))) | |
;; | |
;; end | |
;; | |
;; | |
;; Partitioning funcs - stole this from somewhere | |
;; | |
(defun krf/partition-list (list step length) | |
(loop | |
while list | |
collect (subseq list 0 length) | |
do (setf list (nthcdr step list)))) | |
(defun krf/partition-vector (vector step length) | |
(loop | |
for i = 0 then (+ i length) | |
while (< i (length vector)) | |
collect (subseq vector i (+ i length)))) | |
(defun krf/partition (sequence step length) | |
(etypecase sequence | |
(list (krf/partition-list sequence step length)) | |
(string (krf/partition-vector sequence step length)) ; emacs lisp strings are not vectors! | |
(vector (krf/partition-vector sequence step length)))) | |
;; | |
;; end | |
;; | |
;; | |
;; General funcs | |
;; | |
(defun krf/keys (hh) | |
"Returns all of the keys within a hash-table" | |
(let (key-list) | |
(maphash (lambda (kk vv) (setq key-list (cons kk key-list))) hh) | |
key-list)) | |
(defun krf/random-key (hh) | |
"Returns a random key from a hash-table" | |
(let* ((ks (krf/keys hh)) | |
(len (length ks)) | |
(idx (max 0 (random len)))) | |
(nth idx ks))) | |
(defun krf/get-key (l) | |
(subseq l 0 (- (length l) 1))) | |
(defun krf/get-value (l) | |
(car (last l))) | |
(defun krf/random-element (l) | |
"Takes a list and returns a random element from that list" | |
(let* (idx (random (length l))) | |
(nth random l))) | |
(defun krf/markov-chain (pws) | |
"I take a list of lists (partioned over the input text) and generate a hashtable, which is the model, and return it" | |
(reduce (lambda (hh v) | |
(let* ((kk (krf/get-key v)) ; all but last : key | |
(vv (krf/get-value v))) ; last is the value | |
(if (gethash kk hh) | |
(puthash kk (append (gethash kk hh) (list vv)) hh) | |
(puthash kk (list vv) hh)) | |
hh)) | |
pws | |
:initial-value (make-hash-table :test 'equal))) | |
(defun krf/next-link (kk h) | |
(let* ((v (gethash kk h)) | |
(idx (max 0 (random (length v))))) | |
(nth idx v))) | |
;; Load a file into a var | |
(defun krf/get-string-from-file (filePath) | |
"Return filePath's file content." | |
(with-temp-buffer | |
(insert-file-contents filePath) | |
(buffer-string))) | |
;; Stole this | |
(defun krf/chomp (str) | |
"Chomp leading and tailing whitespace from STR." | |
(replace-regexp-in-string (rx (or (: bos (* (any " \t\n"))) | |
(: (* (any " \t\n")) eos))) | |
"" | |
str)) | |
(defun krf/remove-empty-words (l) | |
(-remove (lambda (l) (member nil l)) l)) | |
(defun krf/sanatise-and-split (txt) | |
(let* ((r-word "^[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ]+[.?!]?$") | |
(clean-txt (krf/chomp (replace-regexp-in-string "--" "" | |
(replace-regexp-in-string "[\n,;\"\']" " " txt)))) | |
(words-1 (split-string clean-txt " ")) | |
(words-2 (mapcar 'krf/chomp words-1)) | |
(words-3 (-filter | |
(lambda (s) (and | |
(> (length s) 0) | |
(string-match r-word s))) | |
words-2))) | |
words-3)) | |
(defvar krf/kris-debug nil) | |
(defun krf/chain-walker (words idx starting-at acc) | |
"I walk the line... and return alist of words from the model" | |
(let* ( (next-part (krf/next-link starting-at words)) | |
(next-starting-at (append (last starting-at) (list next-part)))) | |
(progn | |
(if krf/kris-debug | |
(krf/kbug (concat "..[" (number-to-string idx) "] . " | |
(format "%s" starting-at) " . " | |
(format "%s" next-starting-at) " . " | |
(format "%s" next-part) " . " | |
(format "%s" acc)))) | |
(if (<= idx 0) | |
acc | |
(krf/chain-walker words (- idx 1) next-starting-at | |
(append acc (list next-part))))))) | |
(defun krf/load-model-text (file) | |
"I load a file and return a string" | |
(krf/get-string-from-file file)) | |
(defun krf/create-model (text &optional chain-length) | |
"Creates a markov-chain from some sample text" | |
(let* (;; Fix up the input data | |
(chain-length 2) | |
(words (krf/remove-empty-words (krf/sanatise-and-split text))) | |
;; make a hash map from the words. We partition the data into | |
(pwords (krf/remove-empty-words (krf/partition words 1 (+ chain-length 1)))) | |
(chain (krf/markov-chain pwords))) | |
chain)) | |
(defun krf/drop-the-noise (chain len) | |
(let* ((starting-at (krf/random-key chain))) | |
(krf/chain-walker chain len starting-at starting-at))) | |
(defvar krf/--lingos (make-hash-table :test 'equal)) | |
(defun krf/add-lingo (name sample) | |
"Adds a new lingo" | |
(puthash name (krf/create-model sample) krf/--lingos)) | |
(defun krf/get-lingo (name) | |
(gethash name krf/--lingos)) | |
(defun krf/talk-like-an-egyptian (name len) | |
"Generates random text, from a markov chain, for the given lingo" | |
(let ((chain (krf/get-lingo name))) | |
(mapconcat 'identity (krf/drop-the-noise chain len) " "))) | |
;; | |
;; Remote loading of text samples from URLS | |
;; | |
(defun remove-headers (resp) | |
"Removes the headers from the response retruned by url-retieve | |
/^.+\n\n/" | |
(replace-regexp-in-string "\\(.+\n\\)+\\(.+\n\n\\)" "" resp)) | |
(defun load-resources (name url) | |
"Takes a url that points to an online text sample and adds it as a lingo against the name" | |
(let ((url-request-method "GET")) | |
(url-retrieve url (lambda (status args) | |
(krf/add-lingo args (remove-headers (buffer-string)))) | |
(list name)))) | |
;; | |
;; These input files can be found at: | |
;; | |
;; * https://gist.github.com/krisfoster/4f396e99d3a5bf4885a6 | |
;; * https://gist.github.com/krisfoster/b24fa45cf1cb12a64e09 | |
;; * https://gist.github.com/krisfoster/693c4d908458789f1a91 | |
;; | |
(defun krf/init-local () | |
(progn | |
(krf/add-lingo "dre" (krf/load-model-text "~/dre.txt")) | |
(krf/add-lingo "chaucer" (krf/load-model-text "~/canterbury-tales.txt")) | |
(krf/add-lingo "lear" (krf/load-model-text "~/dong-with-luminous-nose.txt")))) | |
(defun krf/init-remote () | |
(progn | |
(load-resources "dre" "https://gist.github.com/krisfoster/4f396e99d3a5bf4885a6/raw/") | |
(load-resources "lear" "https://gist.github.com/krisfoster/b24fa45cf1cb12a64e09/raw/") | |
(load-resources "chaucer" "https://gist.github.com/krisfoster/693c4d908458789f1a91/raw/"))) | |
;; | |
;; Initialisation... | |
;; | |
(krf/init-remote) | |
(defun krf/dre () | |
(interactive) | |
(insert (krf/talk-like-an-egyptian "dre" 50))) | |
(defun krf/lear () | |
(interactive) | |
(insert (krf/talk-like-an-egyptian "lear" 50))) | |
(defun krf/chaucer () | |
(interactive) | |
(insert (krf/talk-like-an-egyptian "chaucer" 50))) | |
;; | |
;; Key bindings... for when I can find some unbound keys | |
;; | |
;; (global-set-key (kbd "") 'krf/dre) | |
;; (global-set-key (kbd "") 'krf/lear) | |
;; (global-set-key (kbd "") 'krf/chaucer) | |
;; And use it... | |
;; (krf/talk-like-an-egyptian "dre" 150) | |
;; (krf/talk-like-an-egyptian "chaucer" 50) | |
;; (krf/talk-like-an-egyptian "lear" 50) | |
;; or (krf/chaucer) | |
;; Next up... | |
;; * load the sample text in a better way - DONE. Now are loaded from gists directly | |
;; * allow the model to be easily updated with new samples | |
;; * serve it up as a restful web app using elnode | |
;;....word up! | |
(provide 'markov) | |
;;; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment