Skip to content

Instantly share code, notes, and snippets.

@xeoncross
Last active April 11, 2018 14:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xeoncross/0a8e8f59c3985db0618de527ad00e85c to your computer and use it in GitHub Desktop.
Save xeoncross/0a8e8f59c3985db0618de527ad00e85c to your computer and use it in GitHub Desktop.
package main
import (
"bufio"
"bytes"
"encoding/base64"
"errors"
"fmt"
"io"
"io/ioutil"
"log"
"mime"
"mime/multipart"
"mime/quotedprintable"
"net/http"
"net/textproto"
"os"
"strings"
"unicode"
)
const myMessage = `Content-Type: multipart/alternative;
boundary="===============5769616449556512256=="
MIME-Version: 1.0
To: test@test.com
From: test@gmail.com
Cc:
Subject: =?utf-8?b?0J/RgNC40LLQtdGC?=
Date: Mon, 30 Jun 2014 18:29:38 -0000
--===============5769616449556512256==
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: base64
X-Data: =?utf-8?b?AxfhfujropadladnggnfjgwsaiubvnmkadiuhterqHJSFfuAjkfhrqpeorLA?=
=?utf-8?b?kFnjNfhgt7Fjd9dfkliodQ==?=
0K3RgtC+INC80L7RkSDRgdC+0L7QsdGJ0LXQvdC40LUu
--===============5769616449556512256==
Content-Type: text/html; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: base64
0K3RgtC+INC80L7RkSDRgdC+0L7QsdGJ0LXQvdC40LUu
--===============5769616449556512256==--`
func main() {
parts, err := NewEmailFromReader(bytes.NewBufferString(myMessage))
if err != nil {
log.Fatal(err)
}
fmt.Println(len(parts), "parts found")
}
// part is a copyable representation of a multipart.Part
type part struct {
Header textproto.MIMEHeader
Body io.Reader
B []byte
}
// trimReader is a custom io.Reader that will trim any leading
// whitespace, as this can cause email imports to fail.
type trimReader struct {
rd io.Reader
}
// Read trims off any unicode whitespace from the originating reader
func (tr trimReader) Read(buf []byte) (int, error) {
n, err := tr.rd.Read(buf)
t := bytes.TrimLeftFunc(buf[:n], unicode.IsSpace)
n = copy(buf, t)
return n, err
}
// NewEmailFromReader reads a stream of bytes from an io.Reader, r,
// and returns an email struct containing the parsed data.
// This function expects the data in RFC 5322 format.
func NewEmailFromReader(r io.Reader) (parts []*part, err error) {
var headers textproto.MIMEHeader
s := trimReader{rd: r}
tp := textproto.NewReader(bufio.NewReader(s))
// Parse the main headers
headers, err = tp.ReadMIMEHeader()
if err != nil {
return
}
// Recursively parse the MIME parts
parts, err = parseMIMEParts(headers, tp.R)
if err != nil {
return
}
for _, part := range parts {
fmt.Println(part.Header.Get("Content-Type"))
fmt.Println("Body", readAll(part.Body))
// fmt.Println(string(part.B))
}
return
}
func readAll(r io.Reader) []byte {
b, err := ioutil.ReadAll(r)
if err != nil {
log.Fatal(err)
}
return b
}
// parseMIMEParts will recursively walk a MIME entity and return a []mime.Part containing
// each (flattened) mime.Part found.
// It is important to note that there are no limits to the number of recursions, so be
// careful when parsing unknown MIME structures!
func parseMIMEParts(hs textproto.MIMEHeader, b io.Reader) (parts []*part, err error) {
ct, params, err := mime.ParseMediaType(hs.Get("Content-Type"))
if err != nil {
return
}
fmt.Println("parseMIMEParts", ct)
// If it's a multipart email, recursively parse the parts
if strings.HasPrefix(ct, "multipart/") {
if _, ok := params["boundary"]; !ok {
return parts, errors.New("Missing boundary")
}
// Readers are buffered https://golang.org/src/mime/multipart/multipart.go#L99
mr := multipart.NewReader(b, params["boundary"])
var p *multipart.Part
for {
// Decodes quotedprintable: https://golang.org/src/mime/multipart/multipart.go#L128
// Closes last part reader: https://golang.org/src/mime/multipart/multipart.go#L302
p, err = mr.NextPart()
if err == io.EOF {
break
}
if err != nil {
return
}
// Correctly decode the body bytes
body := contentDecoderReader(p.Header, p)
var subct string
subct, _, err = mime.ParseMediaType(p.Header.Get("Content-Type"))
if strings.HasPrefix(subct, "multipart/") {
fmt.Println("\tparsing multipart?", subct)
var subparts []*part
subparts, err = parseMIMEParts(p.Header, body)
if err != nil {
return
}
parts = append(parts, subparts...)
} else {
fmt.Println("\tparsing plain?", subct)
var tmpFile *os.File
tmpFile, err = ioutil.TempFile("./emails", "mime")
if err != nil {
return
}
defer tmpFile.Close()
// http.Header and textproto.MIMEHeader are both just a map[string][]string
httpHeader := http.Header(p.Header)
fmt.Fprintf(tmpFile, "%#v\n\n\n", httpHeader)
_, err = io.Copy(tmpFile, body) // Save body disk
if err != nil {
return
}
tmpFile.Seek(0, 0)
// parts = append(parts, &part{Body: p, B: readAll(body), Header: p.Header})
parts = append(parts, &part{Body: body, Header: p.Header})
}
}
} else {
// If it is not a multipart email, parse the body content as a single "part"
// parts = append(parts, &part{Body: b, B: readAll(contentDecoderReader(hs, b)), Header: hs})
parts = append(parts, &part{Body: contentDecoderReader(hs, b), Header: hs})
}
return parts, nil
}
// func newTempFile() (os.File, err error) {
// tmpfile, err = ioutil.TempFile("", "example")
// if err != nil {
// log.Fatal(err)
// }
// // defer os.Remove(tmpfile.Name()) // clean up
//
// }
// contentDecoderReader
func contentDecoderReader(headers textproto.MIMEHeader, bodyReader io.Reader) *bufio.Reader {
if headers.Get("Content-Transfer-Encoding") == "quoted-printable" {
return bufioReader(quotedprintable.NewReader(bodyReader))
}
if headers.Get("Content-Transfer-Encoding") == "base64" {
return bufioReader(base64.NewDecoder(base64.StdEncoding, bodyReader))
}
return bufioReader(bodyReader)
}
// bufioReader ...
func bufioReader(r io.Reader) *bufio.Reader {
if bufferedReader, ok := r.(*bufio.Reader); ok {
return bufferedReader
}
return bufio.NewReader(r)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment