Skip to content

Instantly share code, notes, and snippets.

@jasonroelofs
Created November 29, 2012 18:23
Show Gist options
  • Star 30 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save jasonroelofs/4170926 to your computer and use it in GitHub Desktop.
Save jasonroelofs/4170926 to your computer and use it in GitHub Desktop.
Using Go for embarrassingly parallel scripts
package main
import (
"fmt"
"net"
"io/ioutil"
"strings"
)
type DomainMap struct {
Domain string
IpMapping string
}
func retrieveDomains() []string {
file_in, _ := ioutil.ReadFile("domains.txt")
domain_list := string(file_in)
return strings.Split(strings.TrimSpace(domain_list), "\n")
}
func domainLookup(returnChannel chan DomainMap, domain string) {
rawIpAddresses, _ := net.LookupIP(domain)
// If any found, grab only the first address for simplicity as the ip lookup can return an array
ipAddress := ""
if len(rawIpAddresses) > 0 {
ipAddress = rawIpAddresses[0].String()
}
fmt.Println("Mapping: ", domain, "->", ipAddress)
// Send our results back to the main processes via our return channel
returnChannel <- DomainMap{domain, ipAddress}
}
// The extra set of parentheses here are the return type. You can give the return value a name,
// in this case +domainMapping+ and use that name in the function body. Then you don't need to specify
// what actually gets returned, you've already defined it here.
func waitForDomains(responseChannel chan DomainMap, numberOfDomains int) (domainMapping []DomainMap) {
returnedCount := 0
for {
domainMapping = append(domainMapping, <- responseChannel)
returnedCount++
if returnedCount >= numberOfDomains {
break
}
}
return
}
func main() {
domains := retrieveDomains()
// This is the channel the responses will come back on
responseChannel := make(chan DomainMap)
// Send our requests, one for each domain we get in their own goroutine
for _, domain := range domains {
go domainLookup(responseChannel, domain)
}
// Wait for all the goroutines to finish, collecting the responses
domainMapping := waitForDomains(responseChannel, len(domains))
fmt.Println(domainMapping)
}
package main
import (
"fmt"
"net"
"io/ioutil"
"strings"
)
type DomainMap struct {
Domain string
IpMapping string
}
func retrieveDomains() []string {
file_in, _ := ioutil.ReadFile("domains.txt")
domain_list := string(file_in)
return strings.Split(strings.TrimSpace(domain_list), "\n")
}
func domainLookup(domain string) DomainMap {
rawIpAddresses, _ := net.LookupIP(domain)
// If any found, grab only the first address for simplicity as the ip lookup can return an array
ipAddress := ""
if len(rawIpAddresses) > 0 {
ipAddress = rawIpAddresses[0].String()
}
fmt.Println("Mapping: ", domain, "->", ipAddress)
return DomainMap{domain, ipAddress}
}
func main() {
domains := retrieveDomains()
var domainMapping []DomainMap
// Send our requests, one for each domain we get in their
// own goroutine
for _, domain := range domains {
domainMapping = append(domainMapping, domainLookup(domain))
}
fmt.Println(domainMapping)
}
] wc -l domains.txt
783 domains.txt
] time go run domain_lookup_parallel.go
real 0m5.743s
user 0m0.359s
sys 0m0.355s
] time go run domain_lookup_sequential.go
real 0m43.794s
user 0m0.320s
sys 0m0.200s
@pedromg
Copy link

pedromg commented Dec 3, 2012

Hi, do you have any stats for memory usage on both solutions ?

@SEJeff
Copy link

SEJeff commented Dec 4, 2012

Have you considered adding:
runtime.GOMAXPROCS(runtime.NumCPU())

@bentolor
Copy link

bentolor commented Dec 5, 2012

As this operation is rather network bound than CPU bound, even much higher values than runtime.NumCPU() may make sense.

Copy link

ghost commented Jan 6, 2013

I received an error with parallel version:

$ ./url-go
panic: runtime error: invalid memory address or nil pointer dereference
[signal 0xb code=0x1 addr=0x14 pc=0x80848fb]

goroutine 3 [running]:
net.cgoLookupIPCNAME(0x186250f0, 0x11, 0x0, 0x0, 0x0, ...)
net/_obj/_cgo_gotypes.go:183 +0x1cc
net.cgoLookupIP(0x186250f0, 0x11, 0x0, 0x0, 0x0, ...)
net/_obj/_cgo_gotypes.go:223 +0x3d
net.lookupIP(0x186250f0, 0x11, 0x0, 0x0, 0x0, ...)
/usr/lib/go/src/pkg/net/lookup_unix.go:64 +0x3d
net.LookupIP(0x186250f0, 0x11, 0x0, 0x0, 0x0, ...)
/usr/lib/go/src/pkg/net/doc.go:16 +0x3d
main.domainLookup(0x18625360, 0x186250f0, 0x11)
/home/xan/proves/tmp/url-go.go:22 +0x2c
created by main.main
/home/xan/proves/tmp/url-go.go:61 +0xd2

goroutine 1 [chan receive]:
main.waitForDomains(0x18625360, 0x2, 0x0, 0x0)
/home/xan/proves/tmp/url-go.go:42 +0x42
main.main()
/home/xan/proves/tmp/url-go.go:65 +0xf7

goroutine 2 [syscall]:
created by runtime.main
/usr/lib/go/src/pkg/runtime/proc.c:221

goroutine 4 [syscall]:
net._C2func_getaddrinfo(0xb6100468, 0x0)
net/_obj/_cgo_defun.c:42 +0x32
net.cgoLookupIPCNAME(0x18625102, 0xd, 0x0, 0x0, 0x0, ...)
net/_obj/_cgo_gotypes.go:177 +0xe7
net.cgoLookupIP(0x18625102, 0xd, 0x0, 0x0, 0x0, ...)
net/_obj/_cgo_gotypes.go:223 +0x3d
net.lookupIP(0x18625102, 0xd, 0x0, 0x0, 0x0, ...)
/usr/lib/go/src/pkg/net/lookup_unix.go:64 +0x3d
net.LookupIP(0x18625102, 0xd, 0x0, 0x0, 0x0, ...)
/usr/lib/go/src/pkg/net/doc.go:16 +0x3d
main.domainLookup(0x18625360, 0x18625102, 0xd)
/home/xan/proves/tmp/url-go.go:22 +0x2c
created by main.main
/home/xan/proves/tmp/url-go.go:61 +0xd2
xan@gerret:/home/xan/proves/tmp$

with the content of url-go.go exactly the same as https://gist.github.com/4170926#file-domain_lookup_parallel-go

What's wrong?

Xan.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment