Created
September 18, 2016 02:51
-
-
Save chuyskywalker/06e670f522b25da8c244be69b35ad4f4 to your computer and use it in GitHub Desktop.
Count to MAXINT
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
// second variant, splits process among cores and creates a unique file for each. MUCH faster, but incurs randomness penelty | |
import ( | |
"fmt" | |
"os" | |
"math" | |
"math/rand" | |
"strconv" | |
"time" | |
"sync" | |
"runtime" | |
// "log" | |
"bufio" | |
// "io" | |
"compress/gzip" | |
) | |
func main() { | |
// let's be random each time, eh? | |
rand.Seed(time.Now().UTC().UnixNano()) | |
// get the cli param for how many uniques to create a shuffle | |
y, _ := strconv.Atoi(os.Args[1]) | |
numCPU := runtime.NumCPU() | |
runtime.GOMAXPROCS(numCPU) | |
var wg sync.WaitGroup | |
wg.Add(numCPU) | |
fmt.Fprintf(os.Stderr, "starting %d threads\n", numCPU) | |
for i := 0; i < numCPU; i++ { | |
go func(end int, segments int, offset int) { | |
defer wg.Done() | |
outFile, _ := os.Create(fmt.Sprintf("/dev/int-rand-%d.txt.gz", offset)) | |
defer outFile.Close() | |
buf := bufio.NewWriter(outFile) | |
defer buf.Flush() | |
gz := gzip.NewWriter(buf) | |
defer gz.Close() | |
toCover := end / segments | |
notifyEvery := int(math.Ceil(float64(toCover) / 100)) | |
fmt.Fprintf(os.Stderr, "[%d] STRT covering %d notify every %d\n", offset, toCover, notifyEvery) | |
x := make([]int, toCover) | |
// populate the array with values for the array now | |
for i := 0; i < toCover; i++ { | |
x[i] = (i * segments) + offset | |
if i % notifyEvery == 0 { | |
fmt.Fprintf(os.Stderr, "[%d] GEN @ %d / %d (%f%%)\n", offset, i, toCover, 100*float64(i)/float64(toCover)) | |
} | |
} | |
// and do a fancy, fancy in-place random sort! | |
for i := range x { | |
j := rand.Intn(i + 1) | |
x[i], x[j] = x[j], x[i] | |
if i % notifyEvery == 0 { | |
fmt.Fprintf(os.Stderr, "[%d] FLIP @ %d / %d (%f%%)\n", offset, i, toCover, 100*float64(i)/float64(toCover)) | |
} | |
} | |
// then you are done, write it all to a nice big file | |
for i, val := range x { | |
fmt.Fprintf(gz, "%d,%d\n", (i * segments) + offset, val) | |
if i % notifyEvery == 0 { | |
fmt.Fprintf(os.Stderr, "[%d] OUT @ %d / %d (%f%%)\n", offset, i, toCover, 100*float64(i)/float64(toCover)) | |
} | |
} | |
fmt.Fprintf(os.Stderr, "[%d] DONE\n", offset) | |
}(y, numCPU, i) | |
} | |
wg.Wait() | |
} | |
/* | |
4294967295 | |
real 0m34.090s | |
user 1m55.128s | |
sys 0m3.060s | |
root@11900b180fa1:/workspace# time ./bin/inter 42949672 | |
LOAD DATA INFILE '/tmp/namedPipe' | |
INTO TABLE inty.ints | |
(rid) | |
CREATE TABLE `ints` ( | |
`lid` int(10) unsigned NOT NULL auto_increment, | |
`rid` int(10) unsigned NOT NULL, | |
PRIMARY KEY (`lid`), | |
UNIQUE KEY `rid` (`rid`) | |
) ENGINE=InnoDB DEFAULT CHARSET=latin1 | |
-- variant which synced across threads | |
root@11900b180fa1:/workspace# time ./bin/inter 4294967295 > progress 2>&1 | |
real 523m59.089s | |
user 638m36.008s | |
sys 10m14.568s | |
--- non locking, per thread file variant | |
root@11900b180fa1:/workspace# time ./bin/inter2 4294967295 > progress 2>&1 | |
real 145m24.757s | |
user 861m53.256s | |
sys 4m51.024s | |
*/ | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
// first variant, locking single file -- slower but SUPER random | |
import ( | |
"fmt" | |
"os" | |
"math" | |
"math/rand" | |
"strconv" | |
"time" | |
"sync" | |
"runtime" | |
// "log" | |
"bufio" | |
"io" | |
"compress/gzip" | |
) | |
func main() { | |
// let's be random each time, eh? | |
rand.Seed(time.Now().UTC().UnixNano()) | |
// get the cli param for how many uniques to create a shuffle | |
y, _ := strconv.Atoi(os.Args[1]) | |
numCPU := runtime.NumCPU() | |
runtime.GOMAXPROCS(numCPU) | |
var line = 0 | |
var wg sync.WaitGroup | |
wg.Add(numCPU) | |
fmt.Fprintf(os.Stderr, "starting %d threads\n", numCPU) | |
// f, err := os.Create("/dev/int-rand.txt") | |
// if err != nil { | |
// log.Fatal(err) | |
// } | |
// defer f.Close() | |
// w := bufio.NewWriter(f) | |
outFile, _ := os.Create("/dev/int-rand.txt.gz") | |
defer outFile.Close() | |
buf := bufio.NewWriter(outFile) | |
defer buf.Flush() | |
gz := gzip.NewWriter(buf) | |
defer gz.Close() | |
var writerlock = &sync.Mutex{} | |
for i := 0; i < numCPU; i++ { | |
go func(end int, segments int, offset int, w io.Writer) { | |
defer wg.Done() | |
toCover := end / segments | |
notifyEvery := int(math.Ceil(float64(toCover) / 100)) | |
fmt.Fprintf(os.Stderr, "[%d] STRT covering %d notify every %d\n", offset, toCover, notifyEvery) | |
x := make([]int, toCover) | |
// populate the array with values for the array now | |
for i := 0; i < toCover; i++ { | |
x[i] = (i * segments) + offset | |
if i % notifyEvery == 0 { | |
fmt.Fprintf(os.Stderr, "[%d] GEN @ %d / %d (%f%%)\n", offset, i, toCover, 100*float64(i)/float64(toCover)) | |
} | |
} | |
// and do a fancy, fancy in-place random sort! | |
for i := range x { | |
j := rand.Intn(i + 1) | |
x[i], x[j] = x[j], x[i] | |
if i % notifyEvery == 0 { | |
fmt.Fprintf(os.Stderr, "[%d] FLIP @ %d / %d (%f%%)\n", offset, i, toCover, 100*float64(i)/float64(toCover)) | |
} | |
} | |
// then you are done, write it all to a nice big file | |
for i, val := range x { | |
writerlock.Lock() | |
fmt.Fprintf(w, "%d,%d\n", line, val) | |
line++ | |
writerlock.Unlock() | |
if i % notifyEvery == 0 { | |
fmt.Fprintf(os.Stderr, "[%d] OUT @ %d / %d (%f%%)\n", offset, i, toCover, 100*float64(i)/float64(toCover)) | |
} | |
} | |
fmt.Fprintf(os.Stderr, "[%d] DONE\n", offset) | |
}(y, numCPU, i, gz) | |
} | |
wg.Wait() | |
// err = w.Flush() | |
// if err != nil { | |
// log.Fatal(err) | |
// } | |
// // set the actual values for the array now | |
// for i := 0; i < y; i++ { | |
// fmt.Println(i) | |
// } | |
// list := rand.Perm(y) | |
// for _, x := range list { | |
// fmt.Println(x) | |
// } | |
// --------- | |
// // make an array of ints large enough to hold the whole set | |
// x := make([]int, y) | |
// // populate the array with values for the array now | |
// for i := 0; i < y; i++ { | |
// x[i] = i | |
// } | |
// // and do a fancy, fancy in-place random sort! | |
// for i := range x { | |
// j := rand.Intn(i + 1) | |
// x[i], x[j] = x[j], x[i] | |
// } | |
// // then you are done, write it all to a nice big file | |
// for i := range x { | |
// fmt.Println(x[i]) | |
// } | |
} | |
/* | |
4294967295 | |
real 0m34.090s | |
user 1m55.128s | |
sys 0m3.060s | |
root@11900b180fa1:/workspace# time ./bin/inter 42949672 | |
LOAD DATA INFILE '/tmp/namedPipe' | |
INTO TABLE inty.ints | |
(rid) | |
CREATE TABLE `ints` ( | |
`lid` int(10) unsigned NOT NULL auto_increment, | |
`rid` int(10) unsigned NOT NULL, | |
PRIMARY KEY (`lid`), | |
UNIQUE KEY `rid` (`rid`) | |
) ENGINE=InnoDB DEFAULT CHARSET=latin1 | |
*/ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment