Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Using Go for embarrassingly parallel scripts
package main
import (
"fmt"
"net"
"io/ioutil"
"strings"
)
type DomainMap struct {
Domain string
IpMapping string
}
func retrieveDomains() []string {
file_in, _ := ioutil.ReadFile("domains.txt")
domain_list := string(file_in)
return strings.Split(strings.TrimSpace(domain_list), "\n")
}
func domainLookup(returnChannel chan DomainMap, domain string) {
rawIpAddresses, _ := net.LookupIP(domain)
// If any found, grab only the first address for simplicity as the ip lookup can return an array
ipAddress := ""
if len(rawIpAddresses) > 0 {
ipAddress = rawIpAddresses[0].String()
}
fmt.Println("Mapping: ", domain, "->", ipAddress)
// Send our results back to the main processes via our return channel
returnChannel <- DomainMap{domain, ipAddress}
}
// The extra set of parentheses here are the return type. You can give the return value a name,
// in this case +domainMapping+ and use that name in the function body. Then you don't need to specify
// what actually gets returned, you've already defined it here.
func waitForDomains(responseChannel chan DomainMap, numberOfDomains int) (domainMapping []DomainMap) {
returnedCount := 0
for {
domainMapping = append(domainMapping, <- responseChannel)
returnedCount++
if returnedCount >= numberOfDomains {
break
}
}
return
}
func main() {
domains := retrieveDomains()
// This is the channel the responses will come back on
responseChannel := make(chan DomainMap)
// Send our requests, one for each domain we get in their own goroutine
for _, domain := range domains {
go domainLookup(responseChannel, domain)
}
// Wait for all the goroutines to finish, collecting the responses
domainMapping := waitForDomains(responseChannel, len(domains))
fmt.Println(domainMapping)
}
package main
import (
"fmt"
"net"
"io/ioutil"
"strings"
)
type DomainMap struct {
Domain string
IpMapping string
}
func retrieveDomains() []string {
file_in, _ := ioutil.ReadFile("domains.txt")
domain_list := string(file_in)
return strings.Split(strings.TrimSpace(domain_list), "\n")
}
func domainLookup(domain string) DomainMap {
rawIpAddresses, _ := net.LookupIP(domain)
// If any found, grab only the first address for simplicity as the ip lookup can return an array
ipAddress := ""
if len(rawIpAddresses) > 0 {
ipAddress = rawIpAddresses[0].String()
}
fmt.Println("Mapping: ", domain, "->", ipAddress)
return DomainMap{domain, ipAddress}
}
func main() {
domains := retrieveDomains()
var domainMapping []DomainMap
// Send our requests, one for each domain we get in their
// own goroutine
for _, domain := range domains {
domainMapping = append(domainMapping, domainLookup(domain))
}
fmt.Println(domainMapping)
}
] wc -l domains.txt
783 domains.txt
] time go run domain_lookup_parallel.go
real 0m5.743s
user 0m0.359s
sys 0m0.355s
] time go run domain_lookup_sequential.go
real 0m43.794s
user 0m0.320s
sys 0m0.200s
@pedromg

This comment has been minimized.

Copy link

@pedromg pedromg commented Dec 3, 2012

Hi, do you have any stats for memory usage on both solutions ?

@SEJeff

This comment has been minimized.

Copy link

@SEJeff SEJeff commented Dec 4, 2012

Have you considered adding:
runtime.GOMAXPROCS(runtime.NumCPU())

@bentolor

This comment has been minimized.

Copy link

@bentolor bentolor commented Dec 5, 2012

As this operation is rather network bound than CPU bound, even much higher values than runtime.NumCPU() may make sense.

@ghost

This comment has been minimized.

Copy link

@ghost ghost commented Jan 6, 2013

I received an error with parallel version:

$ ./url-go
panic: runtime error: invalid memory address or nil pointer dereference
[signal 0xb code=0x1 addr=0x14 pc=0x80848fb]

goroutine 3 [running]:
net.cgoLookupIPCNAME(0x186250f0, 0x11, 0x0, 0x0, 0x0, ...)
net/_obj/_cgo_gotypes.go:183 +0x1cc
net.cgoLookupIP(0x186250f0, 0x11, 0x0, 0x0, 0x0, ...)
net/_obj/_cgo_gotypes.go:223 +0x3d
net.lookupIP(0x186250f0, 0x11, 0x0, 0x0, 0x0, ...)
/usr/lib/go/src/pkg/net/lookup_unix.go:64 +0x3d
net.LookupIP(0x186250f0, 0x11, 0x0, 0x0, 0x0, ...)
/usr/lib/go/src/pkg/net/doc.go:16 +0x3d
main.domainLookup(0x18625360, 0x186250f0, 0x11)
/home/xan/proves/tmp/url-go.go:22 +0x2c
created by main.main
/home/xan/proves/tmp/url-go.go:61 +0xd2

goroutine 1 [chan receive]:
main.waitForDomains(0x18625360, 0x2, 0x0, 0x0)
/home/xan/proves/tmp/url-go.go:42 +0x42
main.main()
/home/xan/proves/tmp/url-go.go:65 +0xf7

goroutine 2 [syscall]:
created by runtime.main
/usr/lib/go/src/pkg/runtime/proc.c:221

goroutine 4 [syscall]:
net._C2func_getaddrinfo(0xb6100468, 0x0)
net/_obj/_cgo_defun.c:42 +0x32
net.cgoLookupIPCNAME(0x18625102, 0xd, 0x0, 0x0, 0x0, ...)
net/_obj/_cgo_gotypes.go:177 +0xe7
net.cgoLookupIP(0x18625102, 0xd, 0x0, 0x0, 0x0, ...)
net/_obj/_cgo_gotypes.go:223 +0x3d
net.lookupIP(0x18625102, 0xd, 0x0, 0x0, 0x0, ...)
/usr/lib/go/src/pkg/net/lookup_unix.go:64 +0x3d
net.LookupIP(0x18625102, 0xd, 0x0, 0x0, 0x0, ...)
/usr/lib/go/src/pkg/net/doc.go:16 +0x3d
main.domainLookup(0x18625360, 0x18625102, 0xd)
/home/xan/proves/tmp/url-go.go:22 +0x2c
created by main.main
/home/xan/proves/tmp/url-go.go:61 +0xd2
xan@gerret:/home/xan/proves/tmp$

with the content of url-go.go exactly the same as https://gist.github.com/4170926#file-domain_lookup_parallel-go

What's wrong?

Xan.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.