Last active
April 11, 2018 14:37
-
-
Save xeoncross/0a8e8f59c3985db0618de527ad00e85c to your computer and use it in GitHub Desktop.
Simple recursive mime body parsing based on https://github.com/jordan-wright/email/ and https://gist.github.com/Xeoncross/85b7ccaaa537589690034c03b1108d4e
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"bytes" | |
"encoding/base64" | |
"errors" | |
"fmt" | |
"io" | |
"io/ioutil" | |
"log" | |
"mime" | |
"mime/multipart" | |
"mime/quotedprintable" | |
"net/http" | |
"net/textproto" | |
"os" | |
"strings" | |
"unicode" | |
) | |
const myMessage = `Content-Type: multipart/alternative; | |
boundary="===============5769616449556512256==" | |
MIME-Version: 1.0 | |
To: test@test.com | |
From: test@gmail.com | |
Cc: | |
Subject: =?utf-8?b?0J/RgNC40LLQtdGC?= | |
Date: Mon, 30 Jun 2014 18:29:38 -0000 | |
--===============5769616449556512256== | |
Content-Type: text/plain; charset="utf-8" | |
MIME-Version: 1.0 | |
Content-Transfer-Encoding: base64 | |
X-Data: =?utf-8?b?AxfhfujropadladnggnfjgwsaiubvnmkadiuhterqHJSFfuAjkfhrqpeorLA?= | |
=?utf-8?b?kFnjNfhgt7Fjd9dfkliodQ==?= | |
0K3RgtC+INC80L7RkSDRgdC+0L7QsdGJ0LXQvdC40LUu | |
--===============5769616449556512256== | |
Content-Type: text/html; charset="utf-8" | |
MIME-Version: 1.0 | |
Content-Transfer-Encoding: base64 | |
0K3RgtC+INC80L7RkSDRgdC+0L7QsdGJ0LXQvdC40LUu | |
--===============5769616449556512256==--` | |
func main() { | |
parts, err := NewEmailFromReader(bytes.NewBufferString(myMessage)) | |
if err != nil { | |
log.Fatal(err) | |
} | |
fmt.Println(len(parts), "parts found") | |
} | |
// part is a copyable representation of a multipart.Part | |
type part struct { | |
Header textproto.MIMEHeader | |
Body io.Reader | |
B []byte | |
} | |
// trimReader is a custom io.Reader that will trim any leading | |
// whitespace, as this can cause email imports to fail. | |
type trimReader struct { | |
rd io.Reader | |
} | |
// Read trims off any unicode whitespace from the originating reader | |
func (tr trimReader) Read(buf []byte) (int, error) { | |
n, err := tr.rd.Read(buf) | |
t := bytes.TrimLeftFunc(buf[:n], unicode.IsSpace) | |
n = copy(buf, t) | |
return n, err | |
} | |
// NewEmailFromReader reads a stream of bytes from an io.Reader, r, | |
// and returns an email struct containing the parsed data. | |
// This function expects the data in RFC 5322 format. | |
func NewEmailFromReader(r io.Reader) (parts []*part, err error) { | |
var headers textproto.MIMEHeader | |
s := trimReader{rd: r} | |
tp := textproto.NewReader(bufio.NewReader(s)) | |
// Parse the main headers | |
headers, err = tp.ReadMIMEHeader() | |
if err != nil { | |
return | |
} | |
// Recursively parse the MIME parts | |
parts, err = parseMIMEParts(headers, tp.R) | |
if err != nil { | |
return | |
} | |
for _, part := range parts { | |
fmt.Println(part.Header.Get("Content-Type")) | |
fmt.Println("Body", readAll(part.Body)) | |
// fmt.Println(string(part.B)) | |
} | |
return | |
} | |
func readAll(r io.Reader) []byte { | |
b, err := ioutil.ReadAll(r) | |
if err != nil { | |
log.Fatal(err) | |
} | |
return b | |
} | |
// parseMIMEParts will recursively walk a MIME entity and return a []mime.Part containing | |
// each (flattened) mime.Part found. | |
// It is important to note that there are no limits to the number of recursions, so be | |
// careful when parsing unknown MIME structures! | |
func parseMIMEParts(hs textproto.MIMEHeader, b io.Reader) (parts []*part, err error) { | |
ct, params, err := mime.ParseMediaType(hs.Get("Content-Type")) | |
if err != nil { | |
return | |
} | |
fmt.Println("parseMIMEParts", ct) | |
// If it's a multipart email, recursively parse the parts | |
if strings.HasPrefix(ct, "multipart/") { | |
if _, ok := params["boundary"]; !ok { | |
return parts, errors.New("Missing boundary") | |
} | |
// Readers are buffered https://golang.org/src/mime/multipart/multipart.go#L99 | |
mr := multipart.NewReader(b, params["boundary"]) | |
var p *multipart.Part | |
for { | |
// Decodes quotedprintable: https://golang.org/src/mime/multipart/multipart.go#L128 | |
// Closes last part reader: https://golang.org/src/mime/multipart/multipart.go#L302 | |
p, err = mr.NextPart() | |
if err == io.EOF { | |
break | |
} | |
if err != nil { | |
return | |
} | |
// Correctly decode the body bytes | |
body := contentDecoderReader(p.Header, p) | |
var subct string | |
subct, _, err = mime.ParseMediaType(p.Header.Get("Content-Type")) | |
if strings.HasPrefix(subct, "multipart/") { | |
fmt.Println("\tparsing multipart?", subct) | |
var subparts []*part | |
subparts, err = parseMIMEParts(p.Header, body) | |
if err != nil { | |
return | |
} | |
parts = append(parts, subparts...) | |
} else { | |
fmt.Println("\tparsing plain?", subct) | |
var tmpFile *os.File | |
tmpFile, err = ioutil.TempFile("./emails", "mime") | |
if err != nil { | |
return | |
} | |
defer tmpFile.Close() | |
// http.Header and textproto.MIMEHeader are both just a map[string][]string | |
httpHeader := http.Header(p.Header) | |
fmt.Fprintf(tmpFile, "%#v\n\n\n", httpHeader) | |
_, err = io.Copy(tmpFile, body) // Save body disk | |
if err != nil { | |
return | |
} | |
tmpFile.Seek(0, 0) | |
// parts = append(parts, &part{Body: p, B: readAll(body), Header: p.Header}) | |
parts = append(parts, &part{Body: body, Header: p.Header}) | |
} | |
} | |
} else { | |
// If it is not a multipart email, parse the body content as a single "part" | |
// parts = append(parts, &part{Body: b, B: readAll(contentDecoderReader(hs, b)), Header: hs}) | |
parts = append(parts, &part{Body: contentDecoderReader(hs, b), Header: hs}) | |
} | |
return parts, nil | |
} | |
// func newTempFile() (os.File, err error) { | |
// tmpfile, err = ioutil.TempFile("", "example") | |
// if err != nil { | |
// log.Fatal(err) | |
// } | |
// // defer os.Remove(tmpfile.Name()) // clean up | |
// | |
// } | |
// contentDecoderReader | |
func contentDecoderReader(headers textproto.MIMEHeader, bodyReader io.Reader) *bufio.Reader { | |
if headers.Get("Content-Transfer-Encoding") == "quoted-printable" { | |
return bufioReader(quotedprintable.NewReader(bodyReader)) | |
} | |
if headers.Get("Content-Transfer-Encoding") == "base64" { | |
return bufioReader(base64.NewDecoder(base64.StdEncoding, bodyReader)) | |
} | |
return bufioReader(bodyReader) | |
} | |
// bufioReader ... | |
func bufioReader(r io.Reader) *bufio.Reader { | |
if bufferedReader, ok := r.(*bufio.Reader); ok { | |
return bufferedReader | |
} | |
return bufio.NewReader(r) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment