Skip to content

Instantly share code, notes, and snippets.

@facchinm
Created August 26, 2016 16:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save facchinm/07db68e35e419bc5fef3a190ad6d7528 to your computer and use it in GitHub Desktop.
Save facchinm/07db68e35e419bc5fef3a190ad6d7528 to your computer and use it in GitHub Desktop.
From 970583587fbb9860cef9c17829f26b4c5f6170f0 Mon Sep 17 00:00:00 2001
From: Martino Facchin <m.facchin@arduino.cc>
Date: Fri, 26 Aug 2016 18:08:52 +0200
Subject: [PATCH] Treat cpp string as UTF8
Solves https://github.com/arduino/Arduino/issues/5277
---
src/arduino.cc/builder/utils/utils.go | 118 ++++++++++++++++++++++++++++------
1 file changed, 100 insertions(+), 18 deletions(-)
diff --git a/src/arduino.cc/builder/utils/utils.go b/src/arduino.cc/builder/utils/utils.go
index b57c268..d145a34 100644
--- a/src/arduino.cc/builder/utils/utils.go
+++ b/src/arduino.cc/builder/utils/utils.go
@@ -34,14 +34,18 @@ import (
"arduino.cc/builder/gohasissues"
"arduino.cc/builder/i18n"
"arduino.cc/builder/types"
+ "bytes"
"crypto/md5"
"encoding/hex"
+ "errors"
+ "fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
+ "unicode/utf8"
)
func KeysOfMapOfStringInterface(input map[string]interface{}) []string {
@@ -435,30 +439,108 @@ func ParseCppString(line string) (string, string, bool) {
return "", line, false
}
+ s, err := consumeQuotedString(line)
+ if err == nil {
+ return s, "", true
+ } else {
+ return "", line, false
+ }
+}
+
+// from package net/mail/message.go
+// consumeQuotedString parses the quoted string at the start of p.
+func consumeQuotedString(s string) (qs string, err error) {
+ // Assume first byte is '"'.
i := 1
- res := ""
+ qsb := make([]rune, 0, 10)
+
+ escaped := false
+
+Loop:
for {
- if i >= len(line) {
- return "", line, false
- }
+ r, size := utf8.DecodeRuneInString(s[i:])
- switch line[i] {
- // Backslash, next character is used unmodified
- case '\\':
- i++
- if i >= len(line) {
- return "", line, false
+ switch {
+ case size == 0:
+ return "", errors.New("mail: unclosed quoted-string")
+
+ case size == 1 && r == utf8.RuneError:
+ return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", s)
+
+ case escaped:
+ // quoted-pair = ("\" (VCHAR / WSP))
+
+ if !isVchar(r) && !isWSP(r) {
+ return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
}
- res += string(line[i])
- break
- // Quote, end of string
- case '"':
- return res, line[i+1:], true
+
+ qsb = append(qsb, r)
+ escaped = false
+
+ case isQtext(r) || isWSP(r):
+ // qtext (printable US-ASCII excluding " and \), or
+ // FWS (almost; we're ignoring CRLF)
+ qsb = append(qsb, r)
+
+ case r == '"':
+ break Loop
+
+ case r == '\\':
+ escaped = true
+
default:
- res += string(line[i])
- break
+ return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
+
}
- i++
+ i += size
+ }
+ s = s[i+1:]
+ if len(qsb) == 0 {
+ return "", errors.New("mail: empty quoted-string")
+ }
+ return string(qsb), nil
+}
+
+// isQtext reports whether r is an RFC 5322 qtext character.
+func isQtext(r rune) bool {
+ // Printable US-ASCII, excluding backslash or quote.
+ if r == '\\' || r == '"' {
+ return false
+ }
+ return isVchar(r)
+}
+
+// quoteString renders a string as an RFC 5322 quoted-string.
+func quoteString(s string) string {
+ var buf bytes.Buffer
+ buf.WriteByte('"')
+ for _, r := range s {
+ if isQtext(r) || isWSP(r) {
+ buf.WriteRune(r)
+ } else if isVchar(r) {
+ buf.WriteByte('\\')
+ buf.WriteRune(r)
+ }
}
+ buf.WriteByte('"')
+ return buf.String()
+}
+
+// isVchar reports whether r is an RFC 5322 VCHAR character.
+func isVchar(r rune) bool {
+ // Visible (printing) characters.
+ return '!' <= r && r <= '~' || isMultibyte(r)
+}
+
+// isMultibyte reports whether r is a multi-byte UTF-8 character
+// as supported by RFC 6532
+func isMultibyte(r rune) bool {
+ return r >= utf8.RuneSelf
+}
+
+// isWSP reports whether r is a WSP (white space).
+// WSP is a space or horizontal tab (RFC 5234 Appendix B).
+func isWSP(r rune) bool {
+ return r == ' ' || r == '\t'
}
--
2.9.2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment