Skip to content

Instantly share code, notes, and snippets.

@unstppbl
Created October 30, 2018 09:13
Show Gist options
  • Save unstppbl/ad97c20c2b99e76f6b1a8fbe72dc5f79 to your computer and use it in GitHub Desktop.
Save unstppbl/ad97c20c2b99e76f6b1a8fbe72dc5f79 to your computer and use it in GitHub Desktop.
Script to extract unique emails from file, assuming there is only one per line
package main
import (
"bufio"
"os"
"regexp"
)
func checkErr(err error) {
if err != nil {
panic(err)
}
}
func main() {
readFileName := "FirstEmailsWT.rtf"
writeFileName := "sortedEmails.txt"
// email extract regex
emailReg := regexp.MustCompile(`[a-zA-Z0-9-_.]+@[a-zA-Z0-9-_.]+`)
// file to write sorted emails
write, err := os.Create(writeFileName)
checkErr(err)
defer write.Close()
// file to read from
read, err := os.Open(readFileName)
checkErr(err)
defer read.Close()
// map to keep only unique emails
sorted := map[string]struct{}{}
// read from file and extract emails assuming there is only one per string
scanner := bufio.NewScanner(read)
for scanner.Scan() {
line := scanner.Text()
email := emailReg.FindString(line)
sorted[email] = struct{}{}
}
checkErr(scanner.Err())
// save sorted emails
for em := range sorted {
_, err := write.WriteString(em + "\n")
checkErr(err)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment