Skip to content

Instantly share code, notes, and snippets.

@amitavaghosh1
Last active October 27, 2022 10:58
Show Gist options
  • Save amitavaghosh1/0f1dc0347b9f0cfe6f7a50b3f87eedc7 to your computer and use it in GitHub Desktop.
Save amitavaghosh1/0f1dc0347b9f0cfe6f7a50b3f87eedc7 to your computer and use it in GitHub Desktop.
Encoder. Change \r to \n. Fucking microsoft
package encoder
import (
"io"
"io/ioutil"
"mime/multipart"
"github.com/dimchansky/utfbom"
"github.com/sirupsen/logrus"
"golang.org/x/net/html/charset"
"golang.org/x/text/transform"
)
type Normalize struct {
prev byte
}
func (n *Normalize) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for nDst < len(dst) && nSrc < len(src) {
c := src[nSrc]
switch c {
case '\r':
dst[nDst] = '\n'
case '\n':
if n.prev == '\r' {
nSrc++
n.prev = c
continue
}
dst[nDst] = '\n'
default:
dst[nDst] = c
}
n.prev = c
nDst++
nSrc++
}
if nSrc < len(src) {
err = transform.ErrShortDst
}
return
}
func (n *Normalize) Reset() {
n.prev = 0
}
func EncodedReader(file multipart.File) (t io.Reader, err error) {
reader := utfbom.SkipOnly(file)
defer file.Seek(0, io.SeekStart)
b, err := ioutil.ReadAll(reader)
if err != nil {
return t, err
}
enc, nname, certain := charset.DetermineEncoding(b, "text/plain")
logrus.Println("[EncodedReader] enconding detected", nname, "certain: ", certain)
return transform.NewReader(enc.NewDecoder().Reader(reader), &Normalize{}), nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment