Created
February 17, 2012 16:01
-
-
Save rupa/1854147 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# rly dum markov | |
# db file | |
db=~/.smirnoff.db | |
# min length of chain to learn | |
min=3 | |
function help { | |
echo "$(basename $0) [ -init | -cram | -help ]" | |
echo " -cram - feed a file on STDIN" | |
echo " -help - help" | |
echo " -init - new brain" | |
echo "interactive commands" | |
echo " .e - exit" | |
echo " .h - help" | |
echo " .r <seed> - say something" | |
echo " .v - vacuum db" | |
echo "everything else teaches $(basename $0)" | |
} | |
function init_db { | |
[ -f $db ] && rm $db | |
sqlite3 $db <<! | |
BEGIN TRANSACTION; | |
CREATE TABLE words( | |
first TEXT NOT NULL COLLATE NOCASE, | |
last TEXT NOT NULL COLLATE NOCASE, | |
cnt int NOT NULL DEFAULT 1, | |
PRIMARY KEY(first,last) | |
); | |
CREATE INDEX pre on words(first COLLATE NOCASE); | |
CREATE INDEX post on words(last COLLATE NOCASE); | |
COMMIT; | |
! | |
if [ -f $db ];then | |
echo "$db created" | |
else | |
echo "fail" | |
fi | |
} | |
function vacuum { | |
sqlite3 $db "VACUUM words;" | |
} | |
function cram { | |
cat - | while read;do | |
taint=$(echo $REPLY | sed 's/[^A-Za-z0-9 \.]//g') | |
parse $taint | |
done | |
} | |
function learn { | |
sqlite3 $db <<! | |
INSERT OR REPLACE INTO words VALUES( | |
'$1', | |
'$2', | |
(SELECT cnt FROM words WHERE first='$1' AND last='$2')+1 | |
); | |
! | |
} | |
function next { | |
echo $(sqlite3 $db <<! | |
SELECT $1, cnt from words WHERE $2='$3'; | |
! | |
) | perl -e ' | |
$/=" "; | |
while( <STDIN> ) { | |
@a = split(/\|/,$_); | |
while( $a[1]-- ) { | |
push(@all, $a[0]); | |
} | |
} | |
print $all[rand @all]."\n"; | |
' | |
} | |
function chain { | |
[ "$4" == "$3" ] && return | |
local s=$(next $1 $2 $4) | |
[ "$s" == "$3" ] && return | |
local f="$s" | |
while [ $s ] ;do | |
s=$(next $1 $2 $s) | |
[ "$s" == "$3" ] || f="$f $s" | |
done | |
echo "$f" | |
} | |
function markov { | |
echo "$(chain first last __BEGIN__ $1 | awk ' | |
{ for(i=NF;i>0;i--) printf $i " " } | |
')$1 $(chain last first __END__ $1)" | |
} | |
function seed { | |
if [ $1 ]; then | |
markov $1 | |
else | |
local i=$(sqlite3 $db <<! | |
SELECT count(first) from words; | |
! | |
) | |
markov $(sqlite3 $db <<! | |
SELECT first FROM words WHERE ROWID=$(perl -e "print int(rand($i))+1"); | |
! | |
) | |
fi | |
} | |
function parse { | |
case $1 in | |
.e) exit;; | |
.h) help; return;; | |
.r) shift; seed $1;return;; | |
.v) vacuum; return;; | |
.*) return;; | |
esac | |
[ $# -lt $min ] && return | |
[ $1 ] && learn "__BEGIN__" $1 | |
for x; do | |
if [ $2 ];then | |
if [ "$(echo $1 | sed 's/[^.]//g')" == "." ]; then | |
learn $1 __END__ | |
learn __BEGIN__ $2 | |
shift | |
else | |
learn $1 $2; shift | |
fi | |
else | |
learn $1 "__END__" | |
fi | |
done | |
} | |
for x;do case $x in | |
-cram) cram; exit;; | |
-help) help; exit;; | |
-init) init_db $db;; | |
esac; shift; done | |
[ -f $db ] || { | |
echo "$db not found, run \"$(basename $0) -init\"" && exit | |
} | |
while read; do | |
taint=$(echo $REPLY | sed 's/[^A-Za-z0-9 \.]//g') | |
parse $taint | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment