Skip to content

Instantly share code, notes, and snippets.

@chuyskywalker
Created September 18, 2016 02:51
Show Gist options
  • Save chuyskywalker/06e670f522b25da8c244be69b35ad4f4 to your computer and use it in GitHub Desktop.
Save chuyskywalker/06e670f522b25da8c244be69b35ad4f4 to your computer and use it in GitHub Desktop.
Count to MAXINT
package main
// second variant, splits process among cores and creates a unique file for each. MUCH faster, but incurs randomness penelty
import (
"fmt"
"os"
"math"
"math/rand"
"strconv"
"time"
"sync"
"runtime"
// "log"
"bufio"
// "io"
"compress/gzip"
)
func main() {
// let's be random each time, eh?
rand.Seed(time.Now().UTC().UnixNano())
// get the cli param for how many uniques to create a shuffle
y, _ := strconv.Atoi(os.Args[1])
numCPU := runtime.NumCPU()
runtime.GOMAXPROCS(numCPU)
var wg sync.WaitGroup
wg.Add(numCPU)
fmt.Fprintf(os.Stderr, "starting %d threads\n", numCPU)
for i := 0; i < numCPU; i++ {
go func(end int, segments int, offset int) {
defer wg.Done()
outFile, _ := os.Create(fmt.Sprintf("/dev/int-rand-%d.txt.gz", offset))
defer outFile.Close()
buf := bufio.NewWriter(outFile)
defer buf.Flush()
gz := gzip.NewWriter(buf)
defer gz.Close()
toCover := end / segments
notifyEvery := int(math.Ceil(float64(toCover) / 100))
fmt.Fprintf(os.Stderr, "[%d] STRT covering %d notify every %d\n", offset, toCover, notifyEvery)
x := make([]int, toCover)
// populate the array with values for the array now
for i := 0; i < toCover; i++ {
x[i] = (i * segments) + offset
if i % notifyEvery == 0 {
fmt.Fprintf(os.Stderr, "[%d] GEN @ %d / %d (%f%%)\n", offset, i, toCover, 100*float64(i)/float64(toCover))
}
}
// and do a fancy, fancy in-place random sort!
for i := range x {
j := rand.Intn(i + 1)
x[i], x[j] = x[j], x[i]
if i % notifyEvery == 0 {
fmt.Fprintf(os.Stderr, "[%d] FLIP @ %d / %d (%f%%)\n", offset, i, toCover, 100*float64(i)/float64(toCover))
}
}
// then you are done, write it all to a nice big file
for i, val := range x {
fmt.Fprintf(gz, "%d,%d\n", (i * segments) + offset, val)
if i % notifyEvery == 0 {
fmt.Fprintf(os.Stderr, "[%d] OUT @ %d / %d (%f%%)\n", offset, i, toCover, 100*float64(i)/float64(toCover))
}
}
fmt.Fprintf(os.Stderr, "[%d] DONE\n", offset)
}(y, numCPU, i)
}
wg.Wait()
}
/*
4294967295
real 0m34.090s
user 1m55.128s
sys 0m3.060s
root@11900b180fa1:/workspace# time ./bin/inter 42949672
LOAD DATA INFILE '/tmp/namedPipe'
INTO TABLE inty.ints
(rid)
CREATE TABLE `ints` (
`lid` int(10) unsigned NOT NULL auto_increment,
`rid` int(10) unsigned NOT NULL,
PRIMARY KEY (`lid`),
UNIQUE KEY `rid` (`rid`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
-- variant which synced across threads
root@11900b180fa1:/workspace# time ./bin/inter 4294967295 > progress 2>&1
real 523m59.089s
user 638m36.008s
sys 10m14.568s
--- non locking, per thread file variant
root@11900b180fa1:/workspace# time ./bin/inter2 4294967295 > progress 2>&1
real 145m24.757s
user 861m53.256s
sys 4m51.024s
*/
package main
// first variant, locking single file -- slower but SUPER random
import (
"fmt"
"os"
"math"
"math/rand"
"strconv"
"time"
"sync"
"runtime"
// "log"
"bufio"
"io"
"compress/gzip"
)
func main() {
// let's be random each time, eh?
rand.Seed(time.Now().UTC().UnixNano())
// get the cli param for how many uniques to create a shuffle
y, _ := strconv.Atoi(os.Args[1])
numCPU := runtime.NumCPU()
runtime.GOMAXPROCS(numCPU)
var line = 0
var wg sync.WaitGroup
wg.Add(numCPU)
fmt.Fprintf(os.Stderr, "starting %d threads\n", numCPU)
// f, err := os.Create("/dev/int-rand.txt")
// if err != nil {
// log.Fatal(err)
// }
// defer f.Close()
// w := bufio.NewWriter(f)
outFile, _ := os.Create("/dev/int-rand.txt.gz")
defer outFile.Close()
buf := bufio.NewWriter(outFile)
defer buf.Flush()
gz := gzip.NewWriter(buf)
defer gz.Close()
var writerlock = &sync.Mutex{}
for i := 0; i < numCPU; i++ {
go func(end int, segments int, offset int, w io.Writer) {
defer wg.Done()
toCover := end / segments
notifyEvery := int(math.Ceil(float64(toCover) / 100))
fmt.Fprintf(os.Stderr, "[%d] STRT covering %d notify every %d\n", offset, toCover, notifyEvery)
x := make([]int, toCover)
// populate the array with values for the array now
for i := 0; i < toCover; i++ {
x[i] = (i * segments) + offset
if i % notifyEvery == 0 {
fmt.Fprintf(os.Stderr, "[%d] GEN @ %d / %d (%f%%)\n", offset, i, toCover, 100*float64(i)/float64(toCover))
}
}
// and do a fancy, fancy in-place random sort!
for i := range x {
j := rand.Intn(i + 1)
x[i], x[j] = x[j], x[i]
if i % notifyEvery == 0 {
fmt.Fprintf(os.Stderr, "[%d] FLIP @ %d / %d (%f%%)\n", offset, i, toCover, 100*float64(i)/float64(toCover))
}
}
// then you are done, write it all to a nice big file
for i, val := range x {
writerlock.Lock()
fmt.Fprintf(w, "%d,%d\n", line, val)
line++
writerlock.Unlock()
if i % notifyEvery == 0 {
fmt.Fprintf(os.Stderr, "[%d] OUT @ %d / %d (%f%%)\n", offset, i, toCover, 100*float64(i)/float64(toCover))
}
}
fmt.Fprintf(os.Stderr, "[%d] DONE\n", offset)
}(y, numCPU, i, gz)
}
wg.Wait()
// err = w.Flush()
// if err != nil {
// log.Fatal(err)
// }
// // set the actual values for the array now
// for i := 0; i < y; i++ {
// fmt.Println(i)
// }
// list := rand.Perm(y)
// for _, x := range list {
// fmt.Println(x)
// }
// ---------
// // make an array of ints large enough to hold the whole set
// x := make([]int, y)
// // populate the array with values for the array now
// for i := 0; i < y; i++ {
// x[i] = i
// }
// // and do a fancy, fancy in-place random sort!
// for i := range x {
// j := rand.Intn(i + 1)
// x[i], x[j] = x[j], x[i]
// }
// // then you are done, write it all to a nice big file
// for i := range x {
// fmt.Println(x[i])
// }
}
/*
4294967295
real 0m34.090s
user 1m55.128s
sys 0m3.060s
root@11900b180fa1:/workspace# time ./bin/inter 42949672
LOAD DATA INFILE '/tmp/namedPipe'
INTO TABLE inty.ints
(rid)
CREATE TABLE `ints` (
`lid` int(10) unsigned NOT NULL auto_increment,
`rid` int(10) unsigned NOT NULL,
PRIMARY KEY (`lid`),
UNIQUE KEY `rid` (`rid`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment