Skip to content

Instantly share code, notes, and snippets.

@moorage
Last active December 18, 2015 23:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save moorage/5862214 to your computer and use it in GitHub Desktop.
Save moorage/5862214 to your computer and use it in GitHub Desktop.
Have you ever needed to clean email addresses out of a file (like a SQL Dump) so that you don't accidentally email people in it? This script does exactly that! It turns all email addresses in the file into developers+EMAILADDRESS@example.com
package main
import (
"bufio"
"os"
"fmt"
"flag"
"regexp"
"strings"
"log"
)
// readWholeLine returns a single line (without the ending \n) from the input buffered reader.
// An error is returned iff there is an error with the buffered reader.
func readWholeLine(r *bufio.Reader) (string, error) {
var (isPrefix bool = true
err error = nil
line, ln []byte
)
for isPrefix && err == nil {
line, isPrefix, err = r.ReadLine()
ln = append(ln, line...)
}
return string(ln),err
}
// ForeachLine opens the file specified in filename, and reads each line until the \n, one at a time.
// It then invokes the callback method (called withLine) with the string gotten from that line.
// The purpose of this function is to be as memory efficient as possible when operating on files with many, many lines.
//
// Example:
// ForeachLine("/Users/myuser/Desktop/longlonglong.log", func(line string) {
// exampleFunction(line) // ... do whatever I need to with line
// })
func ForeachLine(filename string, withLine func(string)) {
f, err := os.Open(filename)
if err != nil { log.Fatal(err) }
r := bufio.NewReader(f)
s, e := readWholeLine(r)
for e == nil {
withLine(s)
s,e = readWholeLine(r)
}
}
var InputFilename string
var OutputFilename string
func init() {
flag.StringVar(&InputFilename, "i", "", "path to file with emails")
flag.StringVar(&OutputFilename, "o", "", "path to output file")
}
func main() {
flag.Parse()
if len(InputFilename) == 0 {
log.Fatalf("Please specify a InputFilename with -i (got: empty string)")
}
if len(OutputFilename) == 0 {
log.Fatalf("Please specify a OutputFilename with -o (got: empty string)")
}
log.Printf("Reading in from file `%s`\n", InputFilename)
log.Printf("Outputting to file `%s`\n", OutputFilename)
// open output file
fo, err := os.Create(OutputFilename)
if err != nil { panic(err) }
// make a write buffer
w := bufio.NewWriter(fo)
re := regexp.MustCompile(`[A-Za-z0-9\.\_\%\-\+\!]+@[A-Za-z0-9\.\-]+\.[A-Za-z]{2,4}`)
matchingLineCount := 0
lineCount := 0
ForeachLine(InputFilename, func(line string) {
sanitizedLine := line
emails := re.FindAllString(line, -1)
if len(emails) > 0 {
matchingLineCount++
// fmt.Printf("Found an email on line %d (matching lines to date: %d)\n", lineCount, matchingLineCount)
}
for _, email := range emails {
// fmt.Print("Found: " + email + "\n")
sanitizedEmail := "developers+" + strings.Replace(email, "@", "_AT_", -1) + "@thrivesmart.com"
// fmt.Print(" Replacing with: " + sanitizedEmail + "\n")
sanitizedLine = strings.Replace(sanitizedLine, email, sanitizedEmail, -1)
}
if lineCount >= 35084600 {
fmt.Printf("Writing out line %d\n", lineCount)
fmt.Print(" =>|"+sanitizedLine+"\n")
}
if _, err := w.WriteString(sanitizedLine+"\n"); err != nil {
fo.Close()
panic(err)
}
lineCount++
})
if err = w.Flush(); err != nil {
fo.Close()
panic(err)
}
fmt.Printf("FINISHED on line %d\n", lineCount)
if err = fo.Close(); err != nil {
panic(err)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment