Skip to content

Instantly share code, notes, and snippets.

@BenLubar
Last active December 17, 2015 20:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BenLubar/5668009 to your computer and use it in GitHub Desktop.
Save BenLubar/5668009 to your computer and use it in GitHub Desktop.
couchbase markov chain generator
package main
import (
"bufio"
"crypto/sha1"
"encoding/hex"
"flag"
"log"
"os"
"time"
"github.com/couchbaselabs/go-couchbase"
)
var Bucket *couchbase.Bucket
func main() {
flag.Parse()
var err error
Bucket, err = couchbase.GetBucket("http://127.0.0.1:18091/", "default", "markov")
if err != nil {
log.Fatalf("getting bucket: %v", err)
}
defer Bucket.Close()
Bucket.PutDDoc("markov", couchbase.DDocJSON{
Views: map[string]couchbase.ViewDefinition{
"markov": {
Map: `function( doc, meta ) {
if ( doc.Type != 'text' )
return;
var words = ['', '', '', '', ''];
var text = doc.Text.split( /\s+/g );
for ( var i = 0; i < text.length; i++ ) {
var word = text[i];
emit( words, word );
words.shift();
words.push( word );
}
emit( words, '' );
}`,
Reduce: `function( k, v, rereduce ) {
var counts = {};
if ( rereduce )
for ( var i = 0; i < v.length; i++ )
for ( var j = 0; j < v[i].length; j++ ) {
counts[v[i][j].w] = counts[v[i][j].w] || 0;
counts[v[i][j].w] += v[i][j].c;
}
else
for ( var i = 0; i < v.length; i++ ) {
counts[v[i]] = counts[v[i]] || 0;
counts[v[i]]++;
}
var result = [];
for ( var w in counts ) {
result.push( {w: w, c: counts[w]} );
}
return result.sort( function( a, b ) {
return b.c - a.c;
} );
}`,
},
},
})
sha := sha1.New()
for _, fn := range flag.Args() {
f, err := os.Open(fn)
if err != nil {
log.Fatalf("opening %s: %v", fn, err)
}
s := bufio.NewScanner(f)
for s.Scan() {
sha.Reset()
sha.Write(s.Bytes())
key := hex.EncodeToString(sha.Sum(nil))
line := s.Text()
for {
err := Bucket.Set(key, 0, struct {
Type, Text string
}{
Type: "text",
Text: line,
})
if err != nil {
log.Printf("%s: %v", key, err)
time.Sleep(time.Second)
} else {
break
}
}
}
if err = s.Err(); err != nil {
log.Fatalf("reading %s: %v", fn, err)
}
f.Close()
}
}
package main
import (
"fmt"
"math/rand"
"strings"
"sync"
)
const PrefixLength = 2
type Doc struct {
Text string
}
type MapResult struct {
Prefix [PrefixLength]string
Suffix string
}
type ReduceResult struct {
Total int
Suffix map[string]int
}
func Input(docs chan<- Doc) {
defer close(docs)
lines := []string{
"I am not a number!",
"I am a free man!",
}
for _, line := range lines {
docs <- Doc{Text: line}
}
}
func Map(docs <-chan Doc, emit chan<- MapResult) {
defer close(emit)
var wg sync.WaitGroup
const workers = 4
wg.Add(workers)
for i := 0; i < workers; i++ {
go Mapper(docs, emit, &wg)
}
wg.Wait()
}
func Mapper(docs <-chan Doc, emit chan<- MapResult, wg *sync.WaitGroup) {
defer wg.Done()
for doc := range docs {
var result MapResult
for _, word := range strings.Fields(doc.Text) {
result.Suffix = word
emit <- result
copy(result.Prefix[1:], result.Prefix[:])
result.Prefix[0] = word
}
result.Suffix = ""
emit <- result
}
}
func Reduce(emit <-chan MapResult, key [PrefixLength]string) ReduceResult {
output := make(chan ReduceResult)
var (
inputs []MapResult
outputs []ReduceResult
waiting int
)
reduce:
for {
select {
case in, ok := <-emit:
if !ok {
if len(inputs) > 0 {
waiting++
go Reducer(inputs, output)
inputs = nil
}
break reduce
}
if in.Prefix != key {
continue
}
inputs = append(inputs, in)
if len(inputs) >= 10 {
waiting++
go Reducer(inputs, output)
inputs = nil
}
case out := <-output:
waiting--
outputs = append(outputs, out)
if len(outputs) >= 10 {
waiting++
go Rereducer(outputs, output)
outputs = nil
}
}
}
for waiting > 0 {
out := <-output
waiting--
outputs = append(outputs, out)
if len(outputs) >= 10 {
waiting++
go Rereducer(outputs, output)
outputs = nil
}
}
go Rereducer(outputs, output)
return <-output
}
func Reducer(emit []MapResult, output chan<- ReduceResult) {
var result ReduceResult
result.Suffix = make(map[string]int)
for _, r := range emit {
result.Total++
result.Suffix[r.Suffix]++
}
output <- result
}
func Rereducer(emit []ReduceResult, output chan<- ReduceResult) {
var result ReduceResult
result.Suffix = make(map[string]int)
for _, r := range emit {
result.Total += r.Total
for k, v := range r.Suffix {
result.Suffix[k] += v
}
}
output <- result
}
func ChainLink(r *rand.Rand, prefix [PrefixLength]string) string {
docs := make(chan Doc)
emit := make(chan MapResult)
go Input(docs)
go Map(docs, emit)
possible := Reduce(emit, prefix)
n := r.Intn(possible.Total)
for p, c := range possible.Suffix {
n -= c
if n < 0 {
return p
}
}
return ""
}
func Chain(r *rand.Rand) string {
var prefix [PrefixLength]string
var words []string
for {
word := ChainLink(r, prefix)
if word == "" {
return strings.Join(words, " ")
}
words = append(words, word)
copy(prefix[1:], prefix[:])
prefix[0] = word
}
}
func main() {
r := rand.New(rand.NewSource(0))
for i := 0; i < 10; i++ {
fmt.Println(Chain(r))
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment